From 1d61912f1e34dc2798cc6fbbd85c4a7c7c01f469 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Mon, 28 Oct 2024 17:44:02 -0500
Subject: [PATCH 001/279] Move the existing "unstable" feature to
 "unstable-intrinsics"

Currently there is a single feature called "unstable" that is used to
control whether intrinsics may be called. In anticipation of adding
other unstable features that we will want to control separately, create
a new feature called "unstable-intrinsics" that is enabled by
"unstable". Then move everything gated by "unstable" to
"unstable-intrinsics".
---
 Cargo.toml                                     | 5 ++++-
 ci/run.sh                                      | 6 +++---
 crates/compiler-builtins-smoke-test/Cargo.toml | 2 ++
 src/lib.rs                                     | 4 ++--
 src/math/mod.rs                                | 8 +++++---
 5 files changed, 16 insertions(+), 9 deletions(-)

diff --git a/Cargo.toml b/Cargo.toml
index aa6c08ddb..de450468a 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -18,7 +18,10 @@ default = []
 
 # This tells the compiler to assume that a Nightly toolchain is being used and
 # that it should activate any useful Nightly things accordingly.
-unstable = []
+unstable = ["unstable-intrinsics"]
+
+# Enable calls to functions in `core::intrinsics`
+unstable-intrinsics = []
 
 # Used to prevent using any intrinsics or arch-specific code.
 force-soft-floats = []
diff --git a/ci/run.sh b/ci/run.sh
index f61fff843..f1ca4b0cb 100755
--- a/ci/run.sh
+++ b/ci/run.sh
@@ -49,7 +49,7 @@ fi
 if [ "${BUILD_ONLY:-}" = "1" ]; then
     cmd="cargo build --target $target --package libm"
     $cmd
-    $cmd --features 'unstable'
+    $cmd --features "unstable-intrinsics"
 
     echo "can't run tests on $target"
 else
@@ -60,6 +60,6 @@ else
     $cmd --release
 
     # unstable with a feature
-    $cmd --features 'unstable'
-    $cmd --release --features 'unstable'
+    $cmd --features "unstable-intrinsics"
+    $cmd --release --features "unstable-intrinsics"
 fi
diff --git a/crates/compiler-builtins-smoke-test/Cargo.toml b/crates/compiler-builtins-smoke-test/Cargo.toml
index 8d084ee34..2aa7c8371 100644
--- a/crates/compiler-builtins-smoke-test/Cargo.toml
+++ b/crates/compiler-builtins-smoke-test/Cargo.toml
@@ -10,6 +10,8 @@ test = false
 bench = false
 
 [features]
+# Duplicated from libm's Cargo.toml
 unstable = []
+unstable-intrinsics = []
 checked = []
 force-soft-floats = []
diff --git a/src/lib.rs b/src/lib.rs
index 6d95fa173..1305d35ab 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,7 +1,7 @@
 //! libm in pure Rust
 #![no_std]
-#![cfg_attr(feature = "unstable", allow(internal_features))]
-#![cfg_attr(feature = "unstable", feature(core_intrinsics))]
+#![cfg_attr(feature = "unstable-intrinsics", allow(internal_features))]
+#![cfg_attr(feature = "unstable-intrinsics", feature(core_intrinsics))]
 #![allow(clippy::assign_op_pattern)]
 #![allow(clippy::deprecated_cfg_attr)]
 #![allow(clippy::eq_op)]
diff --git a/src/math/mod.rs b/src/math/mod.rs
index 85c9fc5bf..17b9e6b4c 100644
--- a/src/math/mod.rs
+++ b/src/math/mod.rs
@@ -60,14 +60,14 @@ macro_rules! i {
 // the time of this writing this is only used in a few places, and once
 // rust-lang/rust#72751 is fixed then this macro will no longer be necessary and
 // the native `/` operator can be used and panics won't be codegen'd.
-#[cfg(any(debug_assertions, not(feature = "unstable")))]
+#[cfg(any(debug_assertions, not(feature = "unstable-intrinsics")))]
 macro_rules! div {
     ($a:expr, $b:expr) => {
         $a / $b
     };
 }
 
-#[cfg(all(not(debug_assertions), feature = "unstable"))]
+#[cfg(all(not(debug_assertions), feature = "unstable-intrinsics"))]
 macro_rules! div {
     ($a:expr, $b:expr) => {
         unsafe { core::intrinsics::unchecked_div($a, $b) }
@@ -76,7 +76,9 @@ macro_rules! div {
 
 macro_rules! llvm_intrinsically_optimized {
     (#[cfg($($clause:tt)*)] $e:expr) => {
-        #[cfg(all(feature = "unstable", not(feature = "force-soft-floats"), $($clause)*))]
+        #[cfg(all(
+            feature = "unstable-intrinsics", not(feature = "force-soft-floats"), $($clause)*
+        ))]
         {
             if true { // thwart the dead code lint
                 $e

From da5144f86b43731b2a6c6f4248b2f3ebfa26c283 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Mon, 28 Oct 2024 18:15:19 -0500
Subject: [PATCH 002/279] Replace `feature = "unstable-intrinsics"` with
 `intrinsics_enabled`

We currently have a non-additive feature, "force-soft-floats", and we
will need to gain another "no-f16-f128". This makes `cfg` usage in code
somewhat confusing and redundant.

Use `build.rs` to figure out if "unstable-intrinsics" is enabled while
"force-soft-floats" is not enabled and if so, emit a cfg
`intrinsics_enabled`. This is cleaner to use and should make adding more
features easier to reason about.

Also use this as an opportunity to eliminate the build.rs from the
compiler-builtins test crate, replaced with the `[lints]` table in
Cargo.toml.
---
 Cargo.toml                                     |  5 +++++
 build.rs                                       | 15 ++++++++++++++-
 ci/run.sh                                      |  4 ++++
 crates/compiler-builtins-smoke-test/Cargo.toml |  6 ++++++
 crates/compiler-builtins-smoke-test/build.rs   |  3 ---
 src/lib.rs                                     |  4 ++--
 src/math/mod.rs                                |  8 +++-----
 7 files changed, 34 insertions(+), 11 deletions(-)
 delete mode 100644 crates/compiler-builtins-smoke-test/build.rs

diff --git a/Cargo.toml b/Cargo.toml
index de450468a..5e4565556 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -24,6 +24,11 @@ unstable = ["unstable-intrinsics"]
 unstable-intrinsics = []
 
 # Used to prevent using any intrinsics or arch-specific code.
+#
+# HACK: this is a negative feature which is generally a bad idea in Cargo, but
+# we need it to be able to forbid other features when this crate is used in
+# Rust dependencies. Setting this overrides all features that may enable
+# hard float operations.
 force-soft-floats = []
 
 [workspace]
diff --git a/build.rs b/build.rs
index b683557e4..adb521407 100644
--- a/build.rs
+++ b/build.rs
@@ -3,7 +3,6 @@ use std::env;
 fn main() {
     println!("cargo:rerun-if-changed=build.rs");
     println!("cargo:rustc-check-cfg=cfg(assert_no_panic)");
-    println!("cargo:rustc-check-cfg=cfg(feature, values(\"unstable\"))");
 
     println!("cargo:rustc-check-cfg=cfg(feature, values(\"checked\"))");
 
@@ -14,4 +13,18 @@ fn main() {
             println!("cargo:rustc-cfg=assert_no_panic");
         }
     }
+
+    configure_intrinsics();
+}
+
+/// Simplify the feature logic for enabling intrinsics so code only needs to use
+/// `cfg(intrinsics_enabled)`.
+fn configure_intrinsics() {
+    println!("cargo:rustc-check-cfg=cfg(intrinsics_enabled)");
+
+    // Disabled by default; `unstable-intrinsics` enables again; `force-soft-floats` overrides
+    // to disable.
+    if cfg!(feature = "unstable-intrinsics") && !cfg!(feature = "force-soft-floats") {
+        println!("cargo:rustc-cfg=intrinsics_enabled");
+    }
 }
diff --git a/ci/run.sh b/ci/run.sh
index f1ca4b0cb..d3fc4ce24 100755
--- a/ci/run.sh
+++ b/ci/run.sh
@@ -46,6 +46,10 @@ if [ "$(uname -a)" = "Linux" ]; then
     extra_flags="$extra_flags --features libm-test/test-musl-serialized"
 fi
 
+# Make sure we can build with overriding features. We test the indibidual
+# features it controls separately.
+cargo check --features "force-soft-floats"
+
 if [ "${BUILD_ONLY:-}" = "1" ]; then
     cmd="cargo build --target $target --package libm"
     $cmd
diff --git a/crates/compiler-builtins-smoke-test/Cargo.toml b/crates/compiler-builtins-smoke-test/Cargo.toml
index 2aa7c8371..2a6c62961 100644
--- a/crates/compiler-builtins-smoke-test/Cargo.toml
+++ b/crates/compiler-builtins-smoke-test/Cargo.toml
@@ -15,3 +15,9 @@ unstable = []
 unstable-intrinsics = []
 checked = []
 force-soft-floats = []
+
+[lints.rust]
+unexpected_cfgs = { level = "warn", check-cfg = [
+  "cfg(assert_no_panic)",
+  "cfg(intrinsics_enabled)",
+] }
diff --git a/crates/compiler-builtins-smoke-test/build.rs b/crates/compiler-builtins-smoke-test/build.rs
deleted file mode 100644
index 27d4a0e89..000000000
--- a/crates/compiler-builtins-smoke-test/build.rs
+++ /dev/null
@@ -1,3 +0,0 @@
-fn main() {
-    println!("cargo::rustc-check-cfg=cfg(assert_no_panic)");
-}
diff --git a/src/lib.rs b/src/lib.rs
index 1305d35ab..98ac55988 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,7 +1,7 @@
 //! libm in pure Rust
 #![no_std]
-#![cfg_attr(feature = "unstable-intrinsics", allow(internal_features))]
-#![cfg_attr(feature = "unstable-intrinsics", feature(core_intrinsics))]
+#![cfg_attr(intrinsics_enabled, allow(internal_features))]
+#![cfg_attr(intrinsics_enabled, feature(core_intrinsics))]
 #![allow(clippy::assign_op_pattern)]
 #![allow(clippy::deprecated_cfg_attr)]
 #![allow(clippy::eq_op)]
diff --git a/src/math/mod.rs b/src/math/mod.rs
index 17b9e6b4c..9baa57fc8 100644
--- a/src/math/mod.rs
+++ b/src/math/mod.rs
@@ -60,14 +60,14 @@ macro_rules! i {
 // the time of this writing this is only used in a few places, and once
 // rust-lang/rust#72751 is fixed then this macro will no longer be necessary and
 // the native `/` operator can be used and panics won't be codegen'd.
-#[cfg(any(debug_assertions, not(feature = "unstable-intrinsics")))]
+#[cfg(any(debug_assertions, not(intrinsics_enabled)))]
 macro_rules! div {
     ($a:expr, $b:expr) => {
         $a / $b
     };
 }
 
-#[cfg(all(not(debug_assertions), feature = "unstable-intrinsics"))]
+#[cfg(all(not(debug_assertions), intrinsics_enabled))]
 macro_rules! div {
     ($a:expr, $b:expr) => {
         unsafe { core::intrinsics::unchecked_div($a, $b) }
@@ -76,9 +76,7 @@ macro_rules! div {
 
 macro_rules! llvm_intrinsically_optimized {
     (#[cfg($($clause:tt)*)] $e:expr) => {
-        #[cfg(all(
-            feature = "unstable-intrinsics", not(feature = "force-soft-floats"), $($clause)*
-        ))]
+        #[cfg(all(intrinsics_enabled, not(feature = "force-soft-floats"), $($clause)*))]
         {
             if true { // thwart the dead code lint
                 $e

From 049f942592d73caeafb3092d93e7784abeef436c Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Mon, 28 Oct 2024 19:30:04 -0500
Subject: [PATCH 003/279] Introduce `math::arch::intrinsics`

This module provides implementations of basic functions that defer to
LLVM for what to do, rather than either using a builtin operation or
calling another function in this library.

`math::arch` will become the home of anything architecture-specific in
the future.
---
 src/math/arch/intrinsics.rs | 52 +++++++++++++++++++++++++++++++++++++
 src/math/arch/mod.rs        |  9 +++++++
 src/math/mod.rs             |  1 +
 3 files changed, 62 insertions(+)
 create mode 100644 src/math/arch/intrinsics.rs
 create mode 100644 src/math/arch/mod.rs

diff --git a/src/math/arch/intrinsics.rs b/src/math/arch/intrinsics.rs
new file mode 100644
index 000000000..1cf9291f4
--- /dev/null
+++ b/src/math/arch/intrinsics.rs
@@ -0,0 +1,52 @@
+// Config is needed for times when this module is available but we don't call everything
+#![allow(dead_code)]
+
+pub fn ceil(x: f64) -> f64 {
+    // SAFETY: safe intrinsic with no preconditions
+    unsafe { core::intrinsics::ceilf64(x) }
+}
+
+pub fn ceilf(x: f32) -> f32 {
+    // SAFETY: safe intrinsic with no preconditions
+    unsafe { core::intrinsics::ceilf32(x) }
+}
+
+pub fn fabs(x: f64) -> f64 {
+    // SAFETY: safe intrinsic with no preconditions
+    unsafe { core::intrinsics::fabsf64(x) }
+}
+
+pub fn fabsf(x: f32) -> f32 {
+    // SAFETY: safe intrinsic with no preconditions
+    unsafe { core::intrinsics::fabsf32(x) }
+}
+
+pub fn floor(x: f64) -> f64 {
+    // SAFETY: safe intrinsic with no preconditions
+    unsafe { core::intrinsics::floorf64(x) }
+}
+
+pub fn floorf(x: f32) -> f32 {
+    // SAFETY: safe intrinsic with no preconditions
+    unsafe { core::intrinsics::floorf32(x) }
+}
+
+pub fn sqrt(x: f64) -> f64 {
+    // SAFETY: safe intrinsic with no preconditions
+    unsafe { core::intrinsics::sqrtf64(x) }
+}
+
+pub fn sqrtf(x: f32) -> f32 {
+    // SAFETY: safe intrinsic with no preconditions
+    unsafe { core::intrinsics::sqrtf32(x) }
+}
+
+pub fn trunc(x: f64) -> f64 {
+    // SAFETY: safe intrinsic with no preconditions
+    unsafe { core::intrinsics::truncf64(x) }
+}
+
+pub fn truncf(x: f32) -> f32 {
+    // SAFETY: safe intrinsic with no preconditions
+    unsafe { core::intrinsics::truncf32(x) }
+}
diff --git a/src/math/arch/mod.rs b/src/math/arch/mod.rs
new file mode 100644
index 000000000..a4bc218b7
--- /dev/null
+++ b/src/math/arch/mod.rs
@@ -0,0 +1,9 @@
+//! Architecture-specific routines and operations.
+//!
+//! LLVM will already optimize calls to some of these in cases that there are hardware
+//! instructions. Providing an implementation here just ensures that the faster implementation
+//! is used when calling the function directly. This helps anyone who uses `libm` directly, as
+//! well as improving things when these routines are called as part of other implementations.
+
+#[cfg(intrinsics_enabled)]
+pub mod intrinsics;
diff --git a/src/math/mod.rs b/src/math/mod.rs
index 9baa57fc8..e3e6846d3 100644
--- a/src/math/mod.rs
+++ b/src/math/mod.rs
@@ -302,6 +302,7 @@ pub use self::trunc::trunc;
 pub use self::truncf::truncf;
 
 // Private modules
+mod arch;
 mod expo2;
 mod fenv;
 mod k_cos;

From f216aab1734cac0bc62c9182df0e098a476e004d Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Sat, 26 Oct 2024 02:56:22 -0500
Subject: [PATCH 004/279] Introduce a `select_implementation` macro

Currently there is a macro called `llvm_intrinsically_optimized` that
uses an intrinsic rather than the function implementation if the
configuration is correct. Add a new macro `select_implementation` that
is somewhat cleaner to use.

In the future, we can update this macro with more fields to specify
other implementations that may be selected, such as something
architecture-specific or e.g. using a generic implementation for `f32`
routines, rather than those that convert to `f64`.

This introduces a `macros` module within `math/support`. We will be able
to move more things here later.
---
 src/math/mod.rs            | 63 ++++++++++++++++++++------------------
 src/math/support/macros.rs | 34 ++++++++++++++++++++
 src/math/support/mod.rs    |  2 ++
 3 files changed, 69 insertions(+), 30 deletions(-)
 create mode 100644 src/math/support/macros.rs
 create mode 100644 src/math/support/mod.rs

diff --git a/src/math/mod.rs b/src/math/mod.rs
index e3e6846d3..a7e16bfc8 100644
--- a/src/math/mod.rs
+++ b/src/math/mod.rs
@@ -74,6 +74,7 @@ macro_rules! div {
     };
 }
 
+// FIXME: phase this out, to be replaced by the more flexible `select_implementation`
 macro_rules! llvm_intrinsically_optimized {
     (#[cfg($($clause:tt)*)] $e:expr) => {
         #[cfg(all(intrinsics_enabled, not(feature = "force-soft-floats"), $($clause)*))]
@@ -85,6 +86,38 @@ macro_rules! llvm_intrinsically_optimized {
     };
 }
 
+// Private modules
+#[macro_use]
+mod support;
+mod arch;
+mod expo2;
+mod fenv;
+mod k_cos;
+mod k_cosf;
+mod k_expo2;
+mod k_expo2f;
+mod k_sin;
+mod k_sinf;
+mod k_tan;
+mod k_tanf;
+mod rem_pio2;
+mod rem_pio2_large;
+mod rem_pio2f;
+
+// Private re-imports
+use self::expo2::expo2;
+use self::k_cos::k_cos;
+use self::k_cosf::k_cosf;
+use self::k_expo2::k_expo2;
+use self::k_expo2f::k_expo2f;
+use self::k_sin::k_sin;
+use self::k_sinf::k_sinf;
+use self::k_tan::k_tan;
+use self::k_tanf::k_tanf;
+use self::rem_pio2::rem_pio2;
+use self::rem_pio2_large::rem_pio2_large;
+use self::rem_pio2f::rem_pio2f;
+
 // Public modules
 mod acos;
 mod acosf;
@@ -301,36 +334,6 @@ pub use self::tgammaf::tgammaf;
 pub use self::trunc::trunc;
 pub use self::truncf::truncf;
 
-// Private modules
-mod arch;
-mod expo2;
-mod fenv;
-mod k_cos;
-mod k_cosf;
-mod k_expo2;
-mod k_expo2f;
-mod k_sin;
-mod k_sinf;
-mod k_tan;
-mod k_tanf;
-mod rem_pio2;
-mod rem_pio2_large;
-mod rem_pio2f;
-
-// Private re-imports
-use self::expo2::expo2;
-use self::k_cos::k_cos;
-use self::k_cosf::k_cosf;
-use self::k_expo2::k_expo2;
-use self::k_expo2f::k_expo2f;
-use self::k_sin::k_sin;
-use self::k_sinf::k_sinf;
-use self::k_tan::k_tan;
-use self::k_tanf::k_tanf;
-use self::rem_pio2::rem_pio2;
-use self::rem_pio2_large::rem_pio2_large;
-use self::rem_pio2f::rem_pio2f;
-
 #[inline]
 fn get_high_word(x: f64) -> u32 {
     (x.to_bits() >> 32) as u32
diff --git a/src/math/support/macros.rs b/src/math/support/macros.rs
new file mode 100644
index 000000000..6bc75837a
--- /dev/null
+++ b/src/math/support/macros.rs
@@ -0,0 +1,34 @@
+/// Choose among using an intrinsic (if available) and falling back to the default function body.
+/// Returns directly if the intrinsic version is used, otherwise continues to the rest of the
+/// function.
+///
+/// Use this if the intrinsic is likely to be more performant on the platform(s) specified
+/// in `intrinsic_available`.
+///
+/// The `cfg` used here is controlled by `build.rs` so the passed meta does not need to account
+/// for e.g. the `unstable-intrinsics` or `force-soft-float` features.
+macro_rules! select_implementation {
+    (
+        name: $fname:ident,
+        // Configuration meta for when to call intrinsics and let LLVM figure it out
+        $( use_intrinsic: $use_intrinsic:meta, )?
+        args: $($arg:ident),+ ,
+    ) => {
+        // FIXME: these use paths that are a pretty fragile (`super`). We should figure out
+        // something better w.r.t. how this is vendored into compiler-builtins.
+
+        // Never use intrinsics if we are forcing soft floats, and only enable with the
+        // `unstable-intrinsics` feature.
+        #[cfg(intrinsics_enabled)]
+        select_implementation! {
+            @cfg $( $use_intrinsic )?;
+            if true {
+                return  super::arch::intrinsics::$fname( $($arg),+ );
+            }
+        }
+    };
+
+    // Coalesce helper to construct an expression only if a config is provided
+    (@cfg ; $ex:expr) => { };
+    (@cfg $provided:meta; $ex:expr) => { #[cfg($provided)] $ex };
+}
diff --git a/src/math/support/mod.rs b/src/math/support/mod.rs
new file mode 100644
index 000000000..10532f0d1
--- /dev/null
+++ b/src/math/support/mod.rs
@@ -0,0 +1,2 @@
+#[macro_use]
+pub mod macros;

From 07a52fff9a6e8074f62619598f2d822d851fe0c9 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Mon, 28 Oct 2024 19:38:19 -0500
Subject: [PATCH 005/279] Make use of `select_implementation`

Replace all uses of `llvm_intrinsically` with select_implementation`.
---
 src/math/ceil.rs   | 12 +++++-------
 src/math/ceilf.rs  | 12 +++++-------
 src/math/fabs.rs   | 12 +++++-------
 src/math/fabsf.rs  | 12 +++++-------
 src/math/floor.rs  | 12 +++++-------
 src/math/floorf.rs | 12 +++++-------
 src/math/mod.rs    | 12 ------------
 src/math/sqrt.rs   | 16 +++++-----------
 src/math/sqrtf.rs  | 16 +++++-----------
 src/math/trunc.rs  | 12 +++++-------
 src/math/truncf.rs | 12 +++++-------
 11 files changed, 50 insertions(+), 90 deletions(-)

diff --git a/src/math/ceil.rs b/src/math/ceil.rs
index 1593fdaff..0da01b4d0 100644
--- a/src/math/ceil.rs
+++ b/src/math/ceil.rs
@@ -8,14 +8,12 @@ const TOINT: f64 = 1. / f64::EPSILON;
 /// Finds the nearest integer greater than or equal to `x`.
 #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
 pub fn ceil(x: f64) -> f64 {
-    // On wasm32 we know that LLVM's intrinsic will compile to an optimized
-    // `f64.ceil` native instruction, so we can leverage this for both code size
-    // and speed.
-    llvm_intrinsically_optimized! {
-        #[cfg(target_arch = "wasm32")] {
-            return unsafe { ::core::intrinsics::ceilf64(x) }
-        }
+    select_implementation! {
+        name: ceil,
+        use_intrinsic: target_arch = "wasm32",
+        args: x,
     }
+
     #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))]
     {
         //use an alternative implementation on x86, because the
diff --git a/src/math/ceilf.rs b/src/math/ceilf.rs
index bf9ba1227..0da384350 100644
--- a/src/math/ceilf.rs
+++ b/src/math/ceilf.rs
@@ -5,14 +5,12 @@ use core::f32;
 /// Finds the nearest integer greater than or equal to `x`.
 #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
 pub fn ceilf(x: f32) -> f32 {
-    // On wasm32 we know that LLVM's intrinsic will compile to an optimized
-    // `f32.ceil` native instruction, so we can leverage this for both code size
-    // and speed.
-    llvm_intrinsically_optimized! {
-        #[cfg(target_arch = "wasm32")] {
-            return unsafe { ::core::intrinsics::ceilf32(x) }
-        }
+    select_implementation! {
+        name: ceilf,
+        use_intrinsic: target_arch = "wasm32",
+        args: x,
     }
+
     let mut ui = x.to_bits();
     let e = (((ui >> 23) & 0xff).wrapping_sub(0x7f)) as i32;
 
diff --git a/src/math/fabs.rs b/src/math/fabs.rs
index 3b0628aa6..8d3ea2fd6 100644
--- a/src/math/fabs.rs
+++ b/src/math/fabs.rs
@@ -5,14 +5,12 @@ use core::u64;
 /// by direct manipulation of the bit representation of `x`.
 #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
 pub fn fabs(x: f64) -> f64 {
-    // On wasm32 we know that LLVM's intrinsic will compile to an optimized
-    // `f64.abs` native instruction, so we can leverage this for both code size
-    // and speed.
-    llvm_intrinsically_optimized! {
-        #[cfg(target_arch = "wasm32")] {
-            return unsafe { ::core::intrinsics::fabsf64(x) }
-        }
+    select_implementation! {
+        name: fabs,
+        use_intrinsic: target_arch = "wasm32",
+        args: x,
     }
+
     f64::from_bits(x.to_bits() & (u64::MAX / 2))
 }
 
diff --git a/src/math/fabsf.rs b/src/math/fabsf.rs
index f81c8ca44..1dac6389d 100644
--- a/src/math/fabsf.rs
+++ b/src/math/fabsf.rs
@@ -3,14 +3,12 @@
 /// by direct manipulation of the bit representation of `x`.
 #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
 pub fn fabsf(x: f32) -> f32 {
-    // On wasm32 we know that LLVM's intrinsic will compile to an optimized
-    // `f32.abs` native instruction, so we can leverage this for both code size
-    // and speed.
-    llvm_intrinsically_optimized! {
-        #[cfg(target_arch = "wasm32")] {
-            return unsafe { ::core::intrinsics::fabsf32(x) }
-        }
+    select_implementation! {
+        name: fabsf,
+        use_intrinsic: target_arch = "wasm32",
+        args: x,
     }
+
     f32::from_bits(x.to_bits() & 0x7fffffff)
 }
 
diff --git a/src/math/floor.rs b/src/math/floor.rs
index e8fb21e58..2b9955eba 100644
--- a/src/math/floor.rs
+++ b/src/math/floor.rs
@@ -8,14 +8,12 @@ const TOINT: f64 = 1. / f64::EPSILON;
 /// Finds the nearest integer less than or equal to `x`.
 #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
 pub fn floor(x: f64) -> f64 {
-    // On wasm32 we know that LLVM's intrinsic will compile to an optimized
-    // `f64.floor` native instruction, so we can leverage this for both code size
-    // and speed.
-    llvm_intrinsically_optimized! {
-        #[cfg(target_arch = "wasm32")] {
-            return unsafe { ::core::intrinsics::floorf64(x) }
-        }
+    select_implementation! {
+        name: floor,
+        use_intrinsic: target_arch = "wasm32",
+        args: x,
     }
+
     #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))]
     {
         //use an alternative implementation on x86, because the
diff --git a/src/math/floorf.rs b/src/math/floorf.rs
index f66cab74f..4f38cb15b 100644
--- a/src/math/floorf.rs
+++ b/src/math/floorf.rs
@@ -5,14 +5,12 @@ use core::f32;
 /// Finds the nearest integer less than or equal to `x`.
 #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
 pub fn floorf(x: f32) -> f32 {
-    // On wasm32 we know that LLVM's intrinsic will compile to an optimized
-    // `f32.floor` native instruction, so we can leverage this for both code size
-    // and speed.
-    llvm_intrinsically_optimized! {
-        #[cfg(target_arch = "wasm32")] {
-            return unsafe { ::core::intrinsics::floorf32(x) }
-        }
+    select_implementation! {
+        name: floorf,
+        use_intrinsic: target_arch = "wasm32",
+        args: x,
     }
+
     let mut ui = x.to_bits();
     let e = (((ui >> 23) as i32) & 0xff) - 0x7f;
 
diff --git a/src/math/mod.rs b/src/math/mod.rs
index a7e16bfc8..393bc5150 100644
--- a/src/math/mod.rs
+++ b/src/math/mod.rs
@@ -74,18 +74,6 @@ macro_rules! div {
     };
 }
 
-// FIXME: phase this out, to be replaced by the more flexible `select_implementation`
-macro_rules! llvm_intrinsically_optimized {
-    (#[cfg($($clause:tt)*)] $e:expr) => {
-        #[cfg(all(intrinsics_enabled, not(feature = "force-soft-floats"), $($clause)*))]
-        {
-            if true { // thwart the dead code lint
-                $e
-            }
-        }
-    };
-}
-
 // Private modules
 #[macro_use]
 mod support;
diff --git a/src/math/sqrt.rs b/src/math/sqrt.rs
index e2907384d..2e856100f 100644
--- a/src/math/sqrt.rs
+++ b/src/math/sqrt.rs
@@ -81,18 +81,12 @@ use core::f64;
 /// The square root of `x` (f64).
 #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
 pub fn sqrt(x: f64) -> f64 {
-    // On wasm32 we know that LLVM's intrinsic will compile to an optimized
-    // `f64.sqrt` native instruction, so we can leverage this for both code size
-    // and speed.
-    llvm_intrinsically_optimized! {
-        #[cfg(target_arch = "wasm32")] {
-            return if x < 0.0 {
-                f64::NAN
-            } else {
-                unsafe { ::core::intrinsics::sqrtf64(x) }
-            }
-        }
+    select_implementation! {
+        name: sqrt,
+        use_intrinsic: target_arch = "wasm32",
+        args: x,
     }
+
     #[cfg(all(target_feature = "sse2", not(feature = "force-soft-floats")))]
     {
         // Note: This path is unlikely since LLVM will usually have already
diff --git a/src/math/sqrtf.rs b/src/math/sqrtf.rs
index a738fc0b6..b2996b350 100644
--- a/src/math/sqrtf.rs
+++ b/src/math/sqrtf.rs
@@ -16,18 +16,12 @@
 /// The square root of `x` (f32).
 #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
 pub fn sqrtf(x: f32) -> f32 {
-    // On wasm32 we know that LLVM's intrinsic will compile to an optimized
-    // `f32.sqrt` native instruction, so we can leverage this for both code size
-    // and speed.
-    llvm_intrinsically_optimized! {
-        #[cfg(target_arch = "wasm32")] {
-            return if x < 0.0 {
-                ::core::f32::NAN
-            } else {
-                unsafe { ::core::intrinsics::sqrtf32(x) }
-            }
-        }
+    select_implementation! {
+        name: sqrtf,
+        use_intrinsic: target_arch = "wasm32",
+        args: x,
     }
+
     #[cfg(all(target_feature = "sse", not(feature = "force-soft-floats")))]
     {
         // Note: This path is unlikely since LLVM will usually have already
diff --git a/src/math/trunc.rs b/src/math/trunc.rs
index f7892a2c5..6961bb950 100644
--- a/src/math/trunc.rs
+++ b/src/math/trunc.rs
@@ -2,14 +2,12 @@ use core::f64;
 
 #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
 pub fn trunc(x: f64) -> f64 {
-    // On wasm32 we know that LLVM's intrinsic will compile to an optimized
-    // `f64.trunc` native instruction, so we can leverage this for both code size
-    // and speed.
-    llvm_intrinsically_optimized! {
-        #[cfg(target_arch = "wasm32")] {
-            return unsafe { ::core::intrinsics::truncf64(x) }
-        }
+    select_implementation! {
+        name: trunc,
+        use_intrinsic: target_arch = "wasm32",
+        args: x,
     }
+
     let x1p120 = f64::from_bits(0x4770000000000000); // 0x1p120f === 2 ^ 120
 
     let mut i: u64 = x.to_bits();
diff --git a/src/math/truncf.rs b/src/math/truncf.rs
index 20d5b73bd..8270c8eb3 100644
--- a/src/math/truncf.rs
+++ b/src/math/truncf.rs
@@ -2,14 +2,12 @@ use core::f32;
 
 #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
 pub fn truncf(x: f32) -> f32 {
-    // On wasm32 we know that LLVM's intrinsic will compile to an optimized
-    // `f32.trunc` native instruction, so we can leverage this for both code size
-    // and speed.
-    llvm_intrinsically_optimized! {
-        #[cfg(target_arch = "wasm32")] {
-            return unsafe { ::core::intrinsics::truncf32(x) }
-        }
+    select_implementation! {
+        name: truncf,
+        use_intrinsic: target_arch = "wasm32",
+        args: x,
     }
+
     let x1p120 = f32::from_bits(0x7b800000); // 0x1p120f === 2 ^ 120
 
     let mut i: u32 = x.to_bits();

From 4b89115c1a54ecc16bb50a7d277f8cded4ed89d9 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Mon, 28 Oct 2024 20:45:12 -0500
Subject: [PATCH 006/279] Combine the WASM CI job with the others

There isn't any reason to be distinct here, and it would be better to
test with all feature configurations in run.sh anyway.
---
 .github/workflows/main.yml | 23 +++++++----------------
 ci/run.sh                  |  2 +-
 2 files changed, 8 insertions(+), 17 deletions(-)

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 15eba6e89..cc0d23ffc 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -9,7 +9,7 @@ env:
 
 jobs:
   test:
-    name: Docker
+    name: Build and test
     timeout-minutes: 20
     strategy:
       fail-fast: false
@@ -52,6 +52,9 @@ jobs:
           os: ubuntu-latest
         - target: x86_64-apple-darwin
           os: macos-13
+        - target: wasm32-unknown-unknown
+          os: ubuntu-latest
+          build_only: 1
         - target: i686-pc-windows-msvc
           os: windows-latest
         - target: x86_64-pc-windows-msvc
@@ -89,30 +92,19 @@ jobs:
 
     # Non-linux tests just use our raw script
     - name: Run locally
-      if: matrix.os != 'ubuntu-latest'
+      if: matrix.os != 'ubuntu-latest' || contains(matrix.target, 'wasm')
       shell: bash
       run: ./ci/run.sh ${{ matrix.target }}
 
     # Otherwise we use our docker containers to run builds
     - name: Run in Docker
-      if: matrix.os == 'ubuntu-latest'
+      if: matrix.os == 'ubuntu-latest' && !contains(matrix.target, 'wasm')
       run: |
         rustup target add x86_64-unknown-linux-musl
         cargo generate-lockfile && ./ci/run-docker.sh ${{ matrix.target }}
 
-  wasm:
-    name: WebAssembly
-    runs-on: ubuntu-latest
-    steps:
-    - uses: actions/checkout@master
-    - name: Install Rust
-      run: rustup update nightly --no-self-update && rustup default nightly
-    - run: rustup target add wasm32-unknown-unknown
-    - uses: Swatinem/rust-cache@v2
-    - run: cargo build --target wasm32-unknown-unknown
-
   builtins:
-    name: "The compiler-builtins crate works"
+    name: Check use with compiler-builtins
     runs-on: ubuntu-latest
     steps:
     - uses: actions/checkout@master
@@ -164,7 +156,6 @@ jobs:
   success:
     needs:
       - test
-      - wasm
       - builtins
       - benchmarks
       - msrv
diff --git a/ci/run.sh b/ci/run.sh
index d3fc4ce24..30265e513 100755
--- a/ci/run.sh
+++ b/ci/run.sh
@@ -55,7 +55,7 @@ if [ "${BUILD_ONLY:-}" = "1" ]; then
     $cmd
     $cmd --features "unstable-intrinsics"
 
-    echo "can't run tests on $target"
+    echo "can't run tests on $target; skipping"
 else
     cmd="cargo test --all --target $target $extra_flags"
 

From 92b365e4dff05ddf82ff9b716e82e1a85bac8ab0 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Mon, 21 Oct 2024 17:38:57 -0500
Subject: [PATCH 007/279] Create interfaces for testing against MPFR

Add a way to call MPFR versions of functions in a predictable way, using
the `MpOp` trait.

Everything new here is guarded by the feature `test-multiprecision`
since MPFR cannot easily build on Windows or any cross compiled targets.
---
 crates/libm-test/Cargo.toml     |   3 +
 crates/libm-test/src/lib.rs     |   2 +
 crates/libm-test/src/mpfloat.rs | 389 ++++++++++++++++++++++++++++++++
 3 files changed, 394 insertions(+)
 create mode 100644 crates/libm-test/src/mpfloat.rs

diff --git a/crates/libm-test/Cargo.toml b/crates/libm-test/Cargo.toml
index 703524bcd..72ac57232 100644
--- a/crates/libm-test/Cargo.toml
+++ b/crates/libm-test/Cargo.toml
@@ -10,18 +10,21 @@ default = []
 # Generate tests which are random inputs and the outputs are calculated with
 # musl libc.
 test-musl-serialized = ["rand"]
+test-multiprecision = ["dep:az", "dep:rug"]
 
 # Build our own musl for testing and benchmarks
 build-musl = ["dep:musl-math-sys"]
 
 [dependencies]
 anyhow = "1.0.90"
+az = { version = "1.2.1", optional = true }
 libm = { path = "../.." }
 libm-macros = { path = "../libm-macros" }
 musl-math-sys = { path = "../musl-math-sys", optional = true }
 paste = "1.0.15"
 rand = "0.8.5"
 rand_chacha = "0.3.1"
+rug = { version = "1.26.1", optional = true, default-features = false, features = ["float", "std"] }
 
 [target.'cfg(target_family = "wasm")'.dependencies]
 # Enable randomness on WASM
diff --git a/crates/libm-test/src/lib.rs b/crates/libm-test/src/lib.rs
index 2abe7f605..64343e00d 100644
--- a/crates/libm-test/src/lib.rs
+++ b/crates/libm-test/src/lib.rs
@@ -1,4 +1,6 @@
 pub mod gen;
+#[cfg(feature = "test-multiprecision")]
+pub mod mpfloat;
 mod num_traits;
 mod special_case;
 mod test_traits;
diff --git a/crates/libm-test/src/mpfloat.rs b/crates/libm-test/src/mpfloat.rs
new file mode 100644
index 000000000..db072780a
--- /dev/null
+++ b/crates/libm-test/src/mpfloat.rs
@@ -0,0 +1,389 @@
+//! Interfaces needed to support testing with multi-precision floating point numbers.
+//!
+//! Within this module, the macros create a submodule for each `libm` function. These contain
+//! a struct named `Operation` that implements [`MpOp`].
+
+use std::cmp::Ordering;
+
+use az::Az;
+use rug::Assign;
+pub use rug::Float as MpFloat;
+use rug::float::Round::Nearest;
+use rug::ops::{PowAssignRound, RemAssignRound};
+
+use crate::Float;
+
+/// Create a multiple-precision float with the correct number of bits for a concrete float type.
+fn new_mpfloat<F: Float>() -> MpFloat {
+    MpFloat::new(F::SIGNIFICAND_BITS + 1)
+}
+
+/// Set subnormal emulation and convert to a concrete float type.
+fn prep_retval<F: Float>(mp: &mut MpFloat, ord: Ordering) -> F
+where
+    for<'a> &'a MpFloat: az::Cast<F>,
+{
+    mp.subnormalize_ieee_round(ord, Nearest);
+    (&*mp).az::<F>()
+}
+
+/// Structures that represent a float operation.
+///
+/// The struct itself should hold any context that can be reused among calls to `run` (allocated
+/// `MpFloat`s).
+pub trait MpOp {
+    /// Inputs to the operation (concrete float types).
+    type Input;
+
+    /// Outputs from the operation (concrete float types).
+    type Output;
+
+    /// Create a new instance.
+    fn new() -> Self;
+
+    /// Perform the operation.
+    ///
+    /// Usually this means assigning inputs to cached floats, performing the operation, applying
+    /// subnormal approximation, and converting the result back to concrete values.
+    fn run(&mut self, input: Self::Input) -> Self::Output;
+}
+
+/// Implement `MpOp` for functions with a single return value.
+macro_rules! impl_mp_op {
+    // Matcher for unary functions
+    (
+        fn_name: $fn_name:ident,
+        CFn: $CFn:ty,
+        CArgs: $CArgs:ty,
+        CRet: $CRet:ty,
+        RustFn: fn($fty:ty,) -> $_ret:ty,
+        RustArgs: $RustArgs:ty,
+        RustRet: $RustRet:ty,
+        fn_extra: $fn_name_normalized:expr,
+    ) => {
+        paste::paste! {
+            pub mod $fn_name {
+                use super::*;
+                pub struct Operation(MpFloat);
+
+                impl MpOp for Operation {
+                    type Input = $RustArgs;
+                    type Output = $RustRet;
+
+                    fn new() -> Self {
+                        Self(new_mpfloat::<$fty>())
+                    }
+
+                    fn run(&mut self, input: Self::Input) -> Self::Output {
+                        self.0.assign(input.0);
+                        let ord = self.0.[< $fn_name_normalized _round >](Nearest);
+                        prep_retval::<Self::Output>(&mut self.0, ord)
+                    }
+                }
+            }
+        }
+    };
+    // Matcher for binary functions
+    (
+        fn_name: $fn_name:ident,
+        CFn: $CFn:ty,
+        CArgs: $CArgs:ty,
+        CRet: $CRet:ty,
+        RustFn: fn($fty:ty, $_fty2:ty,) -> $_ret:ty,
+        RustArgs: $RustArgs:ty,
+        RustRet: $RustRet:ty,
+        fn_extra: $fn_name_normalized:expr,
+    ) => {
+        paste::paste! {
+            pub mod $fn_name {
+                use super::*;
+                pub struct Operation(MpFloat, MpFloat);
+
+                impl MpOp for Operation {
+                    type Input = $RustArgs;
+                    type Output = $RustRet;
+
+                    fn new() -> Self {
+                        Self(new_mpfloat::<$fty>(), new_mpfloat::<$fty>())
+                    }
+
+                    fn run(&mut self, input: Self::Input) -> Self::Output {
+                        self.0.assign(input.0);
+                        self.1.assign(input.1);
+                        let ord = self.0.[< $fn_name_normalized _round >](&self.1, Nearest);
+                        prep_retval::<Self::Output>(&mut self.0, ord)
+                    }
+                }
+            }
+        }
+    };
+    // Matcher for ternary functions
+    (
+        fn_name: $fn_name:ident,
+        CFn: $CFn:ty,
+        CArgs: $CArgs:ty,
+        CRet: $CRet:ty,
+        RustFn: fn($fty:ty, $_fty2:ty, $_fty3:ty,) -> $_ret:ty,
+        RustArgs: $RustArgs:ty,
+        RustRet: $RustRet:ty,
+        fn_extra: $fn_name_normalized:expr,
+    ) => {
+        paste::paste! {
+            pub mod $fn_name {
+                use super::*;
+                pub struct Operation(MpFloat, MpFloat, MpFloat);
+
+                impl MpOp for Operation {
+                    type Input = $RustArgs;
+                    type Output = $RustRet;
+
+                    fn new() -> Self {
+                        Self(new_mpfloat::<$fty>(), new_mpfloat::<$fty>(), new_mpfloat::<$fty>())
+                    }
+
+                    fn run(&mut self, input: Self::Input) -> Self::Output {
+                        self.0.assign(input.0);
+                        self.1.assign(input.1);
+                        self.2.assign(input.2);
+                        let ord = self.0.[< $fn_name_normalized _round >](&self.1, &self.2, Nearest);
+                        prep_retval::<Self::Output>(&mut self.0, ord)
+                    }
+                }
+            }
+        }
+    };
+}
+
+libm_macros::for_each_function! {
+    callback: impl_mp_op,
+    skip: [
+        // Most of these need a manual implementation
+        fabs, ceil, copysign, floor, rint, round, trunc,
+        fabsf, ceilf, copysignf, floorf, rintf, roundf, truncf,
+        fmod, fmodf, frexp, frexpf, ilogb, ilogbf, jn, jnf, ldexp, ldexpf,
+        lgamma_r, lgammaf_r, modf, modff, nextafter, nextafterf, pow,powf,
+        remquo, remquof, scalbn, scalbnf, sincos, sincosf,
+    ],
+    fn_extra: match MACRO_FN_NAME {
+        // Remap function names that are different between mpfr and libm
+        expm1 | expm1f => exp_m1,
+        fabs | fabsf => abs,
+        fdim | fdimf => positive_diff,
+        fma | fmaf => mul_add,
+        fmax | fmaxf => max,
+        fmin | fminf => min,
+        lgamma | lgammaf => ln_gamma,
+        log | logf => ln,
+        log1p | log1pf => ln_1p,
+        tgamma | tgammaf => gamma,
+        _ => MACRO_FN_NAME_NORMALIZED
+    }
+}
+
+/// Implement unary functions that don't have a `_round` version
+macro_rules! impl_no_round {
+    // Unary matcher
+    ($($fn_name:ident, $rug_name:ident;)*) => {
+        paste::paste! {
+            // Implement for both f32 and f64
+            $( impl_no_round!{ @inner_unary [< $fn_name f >], (f32,), $rug_name } )*
+            $( impl_no_round!{ @inner_unary $fn_name, (f64,), $rug_name } )*
+        }
+    };
+
+    (@inner_unary $fn_name:ident, ($fty:ty,), $rug_name:ident) => {
+        pub mod $fn_name {
+            use super::*;
+            pub struct Operation(MpFloat);
+
+            impl MpOp for Operation {
+                type Input = ($fty,);
+                type Output = $fty;
+
+                fn new() -> Self {
+                    Self(new_mpfloat::<$fty>())
+                }
+
+                fn run(&mut self, input: Self::Input) -> Self::Output {
+                    self.0.assign(input.0);
+                    self.0.$rug_name();
+                    prep_retval::<Self::Output>(&mut self.0, Ordering::Equal)
+                }
+            }
+        }
+    };
+}
+
+impl_no_round! {
+    fabs, abs_mut;
+    ceil, ceil_mut;
+    floor, floor_mut;
+    rint, round_even_mut; // FIXME: respect rounding mode
+    round, round_mut;
+    trunc, trunc_mut;
+}
+
+/// Some functions are difficult to do in a generic way. Implement them here.
+macro_rules! impl_op_for_ty {
+    ($fty:ty, $suffix:literal) => {
+        paste::paste! {
+            pub mod [<copysign $suffix>] {
+                use super::*;
+                pub struct Operation(MpFloat, MpFloat);
+
+                impl MpOp for Operation {
+                    type Input = ($fty, $fty);
+                    type Output = $fty;
+
+                    fn new() -> Self {
+                        Self(new_mpfloat::<$fty>(), new_mpfloat::<$fty>())
+                    }
+
+                    fn run(&mut self, input: Self::Input) -> Self::Output {
+                        self.0.assign(input.0);
+                        self.1.assign(input.1);
+                        self.0.copysign_mut(&self.1);
+                        prep_retval::<Self::Output>(&mut self.0, Ordering::Equal)
+                    }
+                }
+            }
+
+            pub mod [<nextafter $suffix>] {
+                use super::*;
+                pub struct Operation(MpFloat, MpFloat);
+
+                impl MpOp for Operation {
+                    type Input = ($fty, $fty);
+                    type Output = $fty;
+
+                    fn new() -> Self {
+                        Self(new_mpfloat::<$fty>(), new_mpfloat::<$fty>())
+                    }
+
+                    fn run(&mut self, input: Self::Input) -> Self::Output {
+                        self.0.assign(input.0);
+                        self.1.assign(input.1);
+                        self.0.next_toward(&self.1);
+                        prep_retval::<Self::Output>(&mut self.0, Ordering::Equal)
+                    }
+                }
+            }
+
+            pub mod [<pow $suffix>] {
+                use super::*;
+                pub struct Operation(MpFloat, MpFloat);
+
+                impl MpOp for Operation {
+                    type Input = ($fty, $fty);
+                    type Output = $fty;
+
+                    fn new() -> Self {
+                        Self(new_mpfloat::<$fty>(), new_mpfloat::<$fty>())
+                    }
+
+                    fn run(&mut self, input: Self::Input) -> Self::Output {
+                        self.0.assign(input.0);
+                        self.1.assign(input.1);
+                        let ord = self.0.pow_assign_round(&self.1, Nearest);
+                        prep_retval::<Self::Output>(&mut self.0, ord)
+                    }
+                }
+            }
+
+            pub mod [<fmod $suffix>] {
+                use super::*;
+                pub struct Operation(MpFloat, MpFloat);
+
+                impl MpOp for Operation {
+                    type Input = ($fty, $fty);
+                    type Output = $fty;
+
+                    fn new() -> Self {
+                        Self(new_mpfloat::<$fty>(), new_mpfloat::<$fty>())
+                    }
+
+                    fn run(&mut self, input: Self::Input) -> Self::Output {
+                        self.0.assign(input.0);
+                        self.1.assign(input.1);
+                        let ord = self.0.rem_assign_round(&self.1, Nearest);
+                        prep_retval::<Self::Output>(&mut self.0, ord)
+                    }
+                }
+            }
+
+            pub mod [<lgamma_r $suffix>] {
+                use super::*;
+                pub struct Operation(MpFloat);
+
+                impl MpOp for Operation {
+                    type Input = ($fty,);
+                    type Output = ($fty, i32);
+
+                    fn new() -> Self {
+                        Self(new_mpfloat::<$fty>())
+                    }
+
+                    fn run(&mut self, input: Self::Input) -> Self::Output {
+                        self.0.assign(input.0);
+                        let (sign, ord) = self.0.ln_abs_gamma_round(Nearest);
+                        let ret = prep_retval::<$fty>(&mut self.0, ord);
+                        (ret, sign as i32)
+                    }
+                }
+            }
+
+            pub mod [<jn $suffix>] {
+                use super::*;
+                pub struct Operation(i32, MpFloat);
+
+                impl MpOp for Operation {
+                    type Input = (i32, $fty);
+                    type Output = $fty;
+
+                    fn new() -> Self {
+                        Self(0, new_mpfloat::<$fty>())
+                    }
+
+                    fn run(&mut self, input: Self::Input) -> Self::Output {
+                        self.0 = input.0;
+                        self.1.assign(input.1);
+                        let ord = self.1.jn_round(self.0, Nearest);
+                        prep_retval::<$fty>(&mut self.1, ord)
+                    }
+                }
+            }
+
+            pub mod [<sincos $suffix>] {
+                use super::*;
+                pub struct Operation(MpFloat, MpFloat);
+
+                impl MpOp for Operation {
+                    type Input = ($fty,);
+                    type Output = ($fty, $fty);
+
+                    fn new() -> Self {
+                        Self(new_mpfloat::<$fty>(), new_mpfloat::<$fty>())
+                    }
+
+                    fn run(&mut self, input: Self::Input) -> Self::Output {
+                        self.0.assign(input.0);
+                        self.1.assign(0.0);
+                        let (sord, cord) = self.0.sin_cos_round(&mut self.1, Nearest);
+                        (
+                            prep_retval::<$fty>(&mut self.0, sord),
+                            prep_retval::<$fty>(&mut self.1, cord)
+                        )
+                    }
+                }
+            }
+        }
+    };
+}
+
+impl_op_for_ty!(f32, "f");
+impl_op_for_ty!(f64, "");
+
+// Account for `lgamma_r` not having a simple `f` suffix
+pub mod lgammaf_r {
+    pub use super::lgamma_rf::*;
+}

From 319285b560ce6a5fdb8790ebf0214b997b89c0a6 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Mon, 21 Oct 2024 17:41:24 -0500
Subject: [PATCH 008/279] Add a test against MPFR using random inputs

---
 crates/libm-test/src/gen/random.rs           |  16 +-
 crates/libm-test/src/lib.rs                  |  29 +++-
 crates/libm-test/src/mpfloat.rs              |  21 ---
 crates/libm-test/src/special_case.rs         | 157 ++++++++++++++-----
 crates/libm-test/src/test_traits.rs          |   2 +
 crates/libm-test/tests/compare_built_musl.rs |   2 +-
 crates/libm-test/tests/multiprecision.rs     |  71 +++++++++
 7 files changed, 228 insertions(+), 70 deletions(-)
 create mode 100644 crates/libm-test/tests/multiprecision.rs

diff --git a/crates/libm-test/src/gen/random.rs b/crates/libm-test/src/gen/random.rs
index 601ef4f1d..c73937aac 100644
--- a/crates/libm-test/src/gen/random.rs
+++ b/crates/libm-test/src/gen/random.rs
@@ -7,7 +7,7 @@ use rand::{Rng, SeedableRng};
 use rand_chacha::ChaCha8Rng;
 
 use super::CachedInput;
-use crate::GenerateInput;
+use crate::{CheckCtx, GenerateInput};
 
 const SEED: [u8; 32] = *b"3.141592653589793238462643383279";
 
@@ -40,9 +40,10 @@ static TEST_CASES_JN: LazyLock<CachedInput> = LazyLock::new(|| {
     let mut cases = (&*TEST_CASES).clone();
 
     // These functions are extremely slow, limit them
-    cases.inputs_i32.truncate((NTESTS / 1000).max(80));
-    cases.inputs_f32.truncate((NTESTS / 1000).max(80));
-    cases.inputs_f64.truncate((NTESTS / 1000).max(80));
+    let ntests_jn = (NTESTS / 1000).max(80);
+    cases.inputs_i32.truncate(ntests_jn);
+    cases.inputs_f32.truncate(ntests_jn);
+    cases.inputs_f64.truncate(ntests_jn);
 
     // It is easy to overflow the stack with these in debug mode
     let max_iterations = if cfg!(optimizations_enabled) && cfg!(target_pointer_width = "64") {
@@ -105,11 +106,10 @@ fn make_test_cases(ntests: usize) -> CachedInput {
 }
 
 /// Create a test case iterator.
-pub fn get_test_cases<RustArgs>(fname: &str) -> impl Iterator<Item = RustArgs>
+pub fn get_test_cases<RustArgs>(ctx: &CheckCtx) -> impl Iterator<Item = RustArgs>
 where
     CachedInput: GenerateInput<RustArgs>,
 {
-    let inputs = if fname == "jn" || fname == "jnf" { &TEST_CASES_JN } else { &TEST_CASES };
-
-    CachedInput::get_cases(inputs)
+    let inputs = if ctx.fname == "jn" || ctx.fname == "jnf" { &TEST_CASES_JN } else { &TEST_CASES };
+    inputs.get_cases()
 }
diff --git a/crates/libm-test/src/lib.rs b/crates/libm-test/src/lib.rs
index 64343e00d..13b76d6c5 100644
--- a/crates/libm-test/src/lib.rs
+++ b/crates/libm-test/src/lib.rs
@@ -16,14 +16,18 @@ pub type TestResult<T = (), E = anyhow::Error> = Result<T, E>;
 // List of all files present in libm's source
 include!(concat!(env!("OUT_DIR"), "/all_files.rs"));
 
-/// ULP allowed to differ from musl (note that musl itself may not be accurate).
+/// Default ULP allowed to differ from musl (note that musl itself may not be accurate).
 const MUSL_DEFAULT_ULP: u32 = 2;
 
-/// Certain functions have different allowed ULP (consider these xfail).
+/// Default ULP allowed to differ from multiprecision (i.e. infinite) results.
+const MULTIPREC_DEFAULT_ULP: u32 = 1;
+
+/// ULP allowed to differ from muls results.
 ///
 /// Note that these results were obtained using 400,000,000 rounds of random inputs, which
 /// is not a value used by default.
 pub fn musl_allowed_ulp(name: &str) -> u32 {
+    // Consider overrides xfail
     match name {
         #[cfg(x86_no_sse)]
         "asinh" | "asinhf" => 6,
@@ -44,6 +48,27 @@ pub fn musl_allowed_ulp(name: &str) -> u32 {
     }
 }
 
+/// ULP allowed to differ from multiprecision results.
+pub fn multiprec_allowed_ulp(name: &str) -> u32 {
+    // Consider overrides xfail
+    match name {
+        "asinh" | "asinhf" => 2,
+        "acoshf" => 4,
+        "atanh" | "atanhf" => 2,
+        "exp10" | "exp10f" => 3,
+        "j0" | "j0f" | "j1" | "j1f" => {
+            // Results seem very target-dependent
+            if cfg!(target_arch = "x86_64") { 4000 } else { 800_000 }
+        }
+        "jn" | "jnf" => 1000,
+        "lgamma" | "lgammaf" | "lgamma_r" | "lgammaf_r" => 16,
+        "sinh" | "sinhf" => 2,
+        "tanh" | "tanhf" => 2,
+        "tgamma" => 20,
+        _ => MULTIPREC_DEFAULT_ULP,
+    }
+}
+
 /// Return the unsuffixed version of a function name; e.g. `abs` and `absf` both return `abs`,
 /// `lgamma_r` and `lgammaf_r` both return `lgamma_r`.
 pub fn canonical_name(name: &str) -> &str {
diff --git a/crates/libm-test/src/mpfloat.rs b/crates/libm-test/src/mpfloat.rs
index db072780a..44962d116 100644
--- a/crates/libm-test/src/mpfloat.rs
+++ b/crates/libm-test/src/mpfloat.rs
@@ -248,27 +248,6 @@ macro_rules! impl_op_for_ty {
                 }
             }
 
-            pub mod [<nextafter $suffix>] {
-                use super::*;
-                pub struct Operation(MpFloat, MpFloat);
-
-                impl MpOp for Operation {
-                    type Input = ($fty, $fty);
-                    type Output = $fty;
-
-                    fn new() -> Self {
-                        Self(new_mpfloat::<$fty>(), new_mpfloat::<$fty>())
-                    }
-
-                    fn run(&mut self, input: Self::Input) -> Self::Output {
-                        self.0.assign(input.0);
-                        self.1.assign(input.1);
-                        self.0.next_toward(&self.1);
-                        prep_retval::<Self::Output>(&mut self.0, Ordering::Equal)
-                    }
-                }
-            }
-
             pub mod [<pow $suffix>] {
                 use super::*;
                 pub struct Operation(MpFloat, MpFloat);
diff --git a/crates/libm-test/src/special_case.rs b/crates/libm-test/src/special_case.rs
index df263d742..dac7a349d 100644
--- a/crates/libm-test/src/special_case.rs
+++ b/crates/libm-test/src/special_case.rs
@@ -58,20 +58,6 @@ impl MaybeOverride<(f32,)> for SpecialCase {
         ctx: &CheckCtx,
     ) -> Option<TestResult> {
         if ctx.basis == CheckBasis::Musl {
-            if ctx.fname == "acoshf" && input.0 < -1.0 {
-                // acoshf is undefined for x <= 1.0, but we return a random result at lower
-                // values.
-                return XFAIL;
-            }
-
-            if ctx.fname == "sincosf" {
-                let factor_frac_pi_2 = input.0.abs() / f32::consts::FRAC_PI_2;
-                if (factor_frac_pi_2 - factor_frac_pi_2.round()).abs() < 1e-2 {
-                    // we have a bad approximation near multiples of pi/2
-                    return XFAIL;
-                }
-            }
-
             if ctx.fname == "expm1f" && input.0 > 80.0 && actual.is_infinite() {
                 // we return infinity but the number is representable
                 return XFAIL;
@@ -82,15 +68,40 @@ impl MaybeOverride<(f32,)> for SpecialCase {
                 // doesn't seem to happen on x86
                 return XFAIL;
             }
+        }
 
-            if ctx.fname == "lgammaf" || ctx.fname == "lgammaf_r" && input.0 < 0.0 {
-                // loggamma should not be defined for x < 0, yet we both return results
-                return XFAIL;
-            }
+        if ctx.fname == "acoshf" && input.0 < -1.0 {
+            // acoshf is undefined for x <= 1.0, but we return a random result at lower
+            // values.
+            return XFAIL;
+        }
+
+        if ctx.fname == "lgammaf" || ctx.fname == "lgammaf_r" && input.0 < 0.0 {
+            // loggamma should not be defined for x < 0, yet we both return results
+            return XFAIL;
         }
 
         maybe_check_nan_bits(actual, expected, ctx)
     }
+
+    fn check_int<I: Int>(
+        input: (f32,),
+        actual: I,
+        expected: I,
+        ctx: &CheckCtx,
+    ) -> Option<anyhow::Result<()>> {
+        // On MPFR for lgammaf_r, we set -1 as the integer result for negative infinity but MPFR
+        // sets +1
+        if ctx.basis == CheckBasis::Mpfr
+            && ctx.fname == "lgammaf_r"
+            && input.0 == f32::NEG_INFINITY
+            && actual.abs() == expected.abs()
+        {
+            XFAIL
+        } else {
+            None
+        }
+    }
 }
 
 impl MaybeOverride<(f64,)> for SpecialCase {
@@ -117,15 +128,40 @@ impl MaybeOverride<(f64,)> for SpecialCase {
                 // musl returns -0.0, we return +0.0
                 return XFAIL;
             }
+        }
 
-            if ctx.fname == "lgamma" || ctx.fname == "lgamma_r" && input.0 < 0.0 {
-                // loggamma should not be defined for x < 0, yet we both return results
-                return XFAIL;
-            }
+        if ctx.fname == "acosh" && input.0 < 1.0 {
+            // The function is undefined for the inputs, musl and our libm both return
+            // random results.
+            return XFAIL;
+        }
+
+        if ctx.fname == "lgamma" || ctx.fname == "lgamma_r" && input.0 < 0.0 {
+            // loggamma should not be defined for x < 0, yet we both return results
+            return XFAIL;
         }
 
         maybe_check_nan_bits(actual, expected, ctx)
     }
+
+    fn check_int<I: Int>(
+        input: (f64,),
+        actual: I,
+        expected: I,
+        ctx: &CheckCtx,
+    ) -> Option<anyhow::Result<()>> {
+        // On MPFR for lgamma_r, we set -1 as the integer result for negative infinity but MPFR
+        // sets +1
+        if ctx.basis == CheckBasis::Mpfr
+            && ctx.fname == "lgamma_r"
+            && input.0 == f64::NEG_INFINITY
+            && actual.abs() == expected.abs()
+        {
+            XFAIL
+        } else {
+            None
+        }
+    }
 }
 
 /// Check NaN bits if the function requires it
@@ -142,6 +178,11 @@ fn maybe_check_nan_bits<F: Float>(actual: F, expected: F, ctx: &CheckCtx) -> Opt
         return SKIP;
     }
 
+    // MPFR only has one NaN bitpattern; allow the default `.is_nan()` checks to validate.
+    if ctx.basis == CheckBasis::Mpfr {
+        return SKIP;
+    }
+
     // abs and copysign require signaling NaNs to be propagated, so verify bit equality.
     if actual.to_bits() == expected.to_bits() {
         return SKIP;
@@ -158,9 +199,10 @@ impl MaybeOverride<(f32, f32)> for SpecialCase {
         _ulp: &mut u32,
         ctx: &CheckCtx,
     ) -> Option<TestResult> {
-        maybe_skip_min_max_nan(input, expected, ctx)
+        maybe_skip_binop_nan(input, expected, ctx)
     }
 }
+
 impl MaybeOverride<(f64, f64)> for SpecialCase {
     fn check_float<F: Float>(
         input: (f64, f64),
@@ -169,47 +211,86 @@ impl MaybeOverride<(f64, f64)> for SpecialCase {
         _ulp: &mut u32,
         ctx: &CheckCtx,
     ) -> Option<TestResult> {
-        maybe_skip_min_max_nan(input, expected, ctx)
+        maybe_skip_binop_nan(input, expected, ctx)
     }
 }
 
 /// Musl propagates NaNs if one is provided as the input, but we return the other input.
 // F1 and F2 are always the same type, this is just to please generics
-fn maybe_skip_min_max_nan<F1: Float, F2: Float>(
+fn maybe_skip_binop_nan<F1: Float, F2: Float>(
     input: (F1, F1),
     expected: F2,
     ctx: &CheckCtx,
 ) -> Option<TestResult> {
-    if (ctx.canonical_name == "fmax" || ctx.canonical_name == "fmin")
-        && (input.0.is_nan() || input.1.is_nan())
-        && expected.is_nan()
-    {
-        return XFAIL;
-    } else {
-        None
+    match ctx.basis {
+        CheckBasis::Musl => {
+            if (ctx.canonical_name == "fmax" || ctx.canonical_name == "fmin")
+                && (input.0.is_nan() || input.1.is_nan())
+                && expected.is_nan()
+            {
+                XFAIL
+            } else {
+                None
+            }
+        }
+        CheckBasis::Mpfr => {
+            if ctx.canonical_name == "copysign" && input.1.is_nan() {
+                SKIP
+            } else {
+                None
+            }
+        }
     }
 }
 
 impl MaybeOverride<(i32, f32)> for SpecialCase {
     fn check_float<F: Float>(
         input: (i32, f32),
-        _actual: F,
-        _expected: F,
+        actual: F,
+        expected: F,
         ulp: &mut u32,
         ctx: &CheckCtx,
     ) -> Option<TestResult> {
-        bessel_prec_dropoff(input, ulp, ctx)
+        match ctx.basis {
+            CheckBasis::Musl => bessel_prec_dropoff(input, ulp, ctx),
+            CheckBasis::Mpfr => {
+                // We return +0.0, MPFR returns -0.0
+                if ctx.fname == "jnf"
+                    && input.1 == f32::NEG_INFINITY
+                    && actual == F::ZERO
+                    && expected == F::ZERO
+                {
+                    XFAIL
+                } else {
+                    None
+                }
+            }
+        }
     }
 }
 impl MaybeOverride<(i32, f64)> for SpecialCase {
     fn check_float<F: Float>(
         input: (i32, f64),
-        _actual: F,
-        _expected: F,
+        actual: F,
+        expected: F,
         ulp: &mut u32,
         ctx: &CheckCtx,
     ) -> Option<TestResult> {
-        bessel_prec_dropoff(input, ulp, ctx)
+        match ctx.basis {
+            CheckBasis::Musl => bessel_prec_dropoff(input, ulp, ctx),
+            CheckBasis::Mpfr => {
+                // We return +0.0, MPFR returns -0.0
+                if ctx.fname == "jn"
+                    && input.1 == f64::NEG_INFINITY
+                    && actual == F::ZERO
+                    && expected == F::ZERO
+                {
+                    XFAIL
+                } else {
+                    bessel_prec_dropoff(input, ulp, ctx)
+                }
+            }
+        }
     }
 }
 
diff --git a/crates/libm-test/src/test_traits.rs b/crates/libm-test/src/test_traits.rs
index c24ac6e43..deb837887 100644
--- a/crates/libm-test/src/test_traits.rs
+++ b/crates/libm-test/src/test_traits.rs
@@ -52,6 +52,8 @@ impl CheckCtx {
 pub enum CheckBasis {
     /// Check against Musl's math sources.
     Musl,
+    /// Check against infinite precision (MPFR).
+    Mpfr,
 }
 
 /// A trait to implement on any output type so we can verify it in a generic way.
diff --git a/crates/libm-test/tests/compare_built_musl.rs b/crates/libm-test/tests/compare_built_musl.rs
index 208b8e286..5a118f7c2 100644
--- a/crates/libm-test/tests/compare_built_musl.rs
+++ b/crates/libm-test/tests/compare_built_musl.rs
@@ -29,8 +29,8 @@ macro_rules! musl_rand_tests {
         fn [< musl_random_ $fn_name >]() {
             let fname = stringify!($fn_name);
             let ulp = musl_allowed_ulp(fname);
-            let cases = random::get_test_cases::<$RustArgs>(fname);
             let ctx = CheckCtx::new(ulp, fname, CheckBasis::Musl);
+            let cases = random::get_test_cases::<$RustArgs>(&ctx);
 
             for input in cases {
                 let musl_res = input.call(musl::$fn_name as $CFn);
diff --git a/crates/libm-test/tests/multiprecision.rs b/crates/libm-test/tests/multiprecision.rs
new file mode 100644
index 000000000..f8d94a160
--- /dev/null
+++ b/crates/libm-test/tests/multiprecision.rs
@@ -0,0 +1,71 @@
+//! Test with "infinite precision"
+
+#![cfg(feature = "test-multiprecision")]
+
+use libm_test::gen::random;
+use libm_test::mpfloat::{self, MpOp};
+use libm_test::{CheckBasis, CheckCtx, CheckOutput, TupleCall, multiprec_allowed_ulp};
+
+/// Implement a test against MPFR with random inputs.
+macro_rules! multiprec_rand_tests {
+    (
+        fn_name: $fn_name:ident,
+        CFn: $CFn:ty,
+        CArgs: $CArgs:ty,
+        CRet: $CRet:ty,
+        RustFn: $RustFn:ty,
+        RustArgs: $RustArgs:ty,
+        RustRet: $RustRet:ty,
+        attrs: [$($meta:meta)*]
+    ) => {
+        paste::paste! {
+            #[test]
+            $(#[$meta])*
+            fn [< multiprec_random_ $fn_name >]() {
+                type MpOpTy = mpfloat::$fn_name::Operation;
+
+                let fname = stringify!($fn_name);
+                let ulp = multiprec_allowed_ulp(fname);
+                let mut mp_vals = MpOpTy::new();
+                let ctx = CheckCtx::new(ulp, fname, CheckBasis::Mpfr);
+                let cases = random::get_test_cases::<$RustArgs>(&ctx);
+
+                for input in cases {
+                    let mp_res = mp_vals.run(input);
+                    let crate_res = input.call(libm::$fn_name as $RustFn);
+
+                    crate_res.validate(mp_res, input, &ctx).unwrap();
+                }
+            }
+        }
+    };
+}
+
+libm_macros::for_each_function! {
+    callback: multiprec_rand_tests,
+    attributes: [
+        // Also an assertion failure on i686: at `MPFR_ASSERTN (! mpfr_erangeflag_p ())`
+        #[ignore = "large values are infeasible in MPFR"]
+        [jn, jnf],
+    ],
+    skip: [
+        // FIXME: MPFR tests needed
+        frexp,
+        frexpf,
+        ilogb,
+        ilogbf,
+        ldexp,
+        ldexpf,
+        modf,
+        modff,
+        remquo,
+        remquof,
+        scalbn,
+        scalbnf,
+
+        // FIXME: test needed, see
+        // https://github.com/rust-lang/libm/pull/311#discussion_r1818273392
+        nextafter,
+        nextafterf,
+    ],
+}

From 2dbbebd55ea6bc0fcc5a373809d9695bd0aff81a Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Mon, 21 Oct 2024 17:41:42 -0500
Subject: [PATCH 009/279] Run tests against MPFR on CI where possible

This effectively gives us tests against infinite-precision results on
MacOS and x86+sse Linux.
---
 ci/docker/aarch64-unknown-linux-gnu/Dockerfile |  2 +-
 ci/docker/i686-unknown-linux-gnu/Dockerfile    |  2 +-
 ci/docker/x86_64-unknown-linux-gnu/Dockerfile  |  2 +-
 ci/run.sh                                      | 16 ++++++++++++++++
 4 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/ci/docker/aarch64-unknown-linux-gnu/Dockerfile b/ci/docker/aarch64-unknown-linux-gnu/Dockerfile
index a7b23cb9e..7fa06b286 100644
--- a/ci/docker/aarch64-unknown-linux-gnu/Dockerfile
+++ b/ci/docker/aarch64-unknown-linux-gnu/Dockerfile
@@ -3,7 +3,7 @@ FROM ubuntu:24.04
 RUN apt-get update && \
     apt-get install -y --no-install-recommends \
     gcc libc6-dev ca-certificates \
-    gcc-aarch64-linux-gnu libc6-dev-arm64-cross \
+    gcc-aarch64-linux-gnu m4 make libc6-dev-arm64-cross \
     qemu-user-static
 
 ENV TOOLCHAIN_PREFIX=aarch64-linux-gnu-
diff --git a/ci/docker/i686-unknown-linux-gnu/Dockerfile b/ci/docker/i686-unknown-linux-gnu/Dockerfile
index 3b0bfc0d3..37e206a84 100644
--- a/ci/docker/i686-unknown-linux-gnu/Dockerfile
+++ b/ci/docker/i686-unknown-linux-gnu/Dockerfile
@@ -2,4 +2,4 @@ FROM ubuntu:24.04
 
 RUN apt-get update && \
     apt-get install -y --no-install-recommends \
-    gcc-multilib libc6-dev ca-certificates
+    gcc-multilib m4 make libc6-dev ca-certificates
diff --git a/ci/docker/x86_64-unknown-linux-gnu/Dockerfile b/ci/docker/x86_64-unknown-linux-gnu/Dockerfile
index 15723ab57..c84a31c57 100644
--- a/ci/docker/x86_64-unknown-linux-gnu/Dockerfile
+++ b/ci/docker/x86_64-unknown-linux-gnu/Dockerfile
@@ -2,4 +2,4 @@ FROM ubuntu:24.04
 
 RUN apt-get update && \
     apt-get install -y --no-install-recommends \
-    gcc libc6-dev ca-certificates
+    gcc m4 make libc6-dev ca-certificates
diff --git a/ci/run.sh b/ci/run.sh
index 30265e513..94612adc7 100755
--- a/ci/run.sh
+++ b/ci/run.sh
@@ -35,6 +35,22 @@ case "$target" in
     *) extra_flags="$extra_flags --features libm-test/build-musl" ;;
 esac
 
+# Configure which targets test against MPFR
+case "$target" in
+    # MSVC cannot link MPFR
+    *windows-msvc*) ;;
+    # FIXME: MinGW should be able to build MPFR, but setup in CI is nontrivial.
+    *windows-gnu*) ;;
+    # Targets that aren't cross compiled work fine
+    # FIXME(ci): we should be able to enable aarch64 Linux here once GHA
+    # support rolls out.
+    x86_64*) extra_flags="$extra_flags --features libm-test/test-multiprecision" ;;
+    # i686 works fine, i586 does not
+    i686*) extra_flags="$extra_flags --features libm-test/test-multiprecision" ;;
+    # Apple aarch64 is native
+    aarch64*apple*) extra_flags="$extra_flags --features libm-test/test-multiprecision" ;;
+esac
+
 # FIXME: `STATUS_DLL_NOT_FOUND` testing macros on CI.
 # <https://github.com/rust-lang/rust/issues/128944>
 case "$target" in

From e299027edb23b03b98bbe55a627a720eb0b8c5c0 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Mon, 28 Oct 2024 21:44:28 -0500
Subject: [PATCH 010/279] Rename the `special_case` module to `precision` and
 move default ULP

Having the default ULP in lib.rs doesn't make much sense when everything
else precision-related is in special_case.rs. Rename `special_case` to
`precision` and move the `*_allowed_ulp` functions there.
---
 crates/libm-test/src/lib.rs                   | 57 +------------------
 .../src/{special_case.rs => precision.rs}     | 53 +++++++++++++++++
 2 files changed, 55 insertions(+), 55 deletions(-)
 rename crates/libm-test/src/{special_case.rs => precision.rs} (84%)

diff --git a/crates/libm-test/src/lib.rs b/crates/libm-test/src/lib.rs
index 13b76d6c5..31b95e46c 100644
--- a/crates/libm-test/src/lib.rs
+++ b/crates/libm-test/src/lib.rs
@@ -2,11 +2,11 @@ pub mod gen;
 #[cfg(feature = "test-multiprecision")]
 pub mod mpfloat;
 mod num_traits;
-mod special_case;
+mod precision;
 mod test_traits;
 
 pub use num_traits::{Float, Hex, Int};
-pub use special_case::{MaybeOverride, SpecialCase};
+pub use precision::{MaybeOverride, SpecialCase, multiprec_allowed_ulp, musl_allowed_ulp};
 pub use test_traits::{CheckBasis, CheckCtx, CheckOutput, GenerateInput, TupleCall};
 
 /// Result type for tests is usually from `anyhow`. Most times there is no success value to
@@ -16,59 +16,6 @@ pub type TestResult<T = (), E = anyhow::Error> = Result<T, E>;
 // List of all files present in libm's source
 include!(concat!(env!("OUT_DIR"), "/all_files.rs"));
 
-/// Default ULP allowed to differ from musl (note that musl itself may not be accurate).
-const MUSL_DEFAULT_ULP: u32 = 2;
-
-/// Default ULP allowed to differ from multiprecision (i.e. infinite) results.
-const MULTIPREC_DEFAULT_ULP: u32 = 1;
-
-/// ULP allowed to differ from muls results.
-///
-/// Note that these results were obtained using 400,000,000 rounds of random inputs, which
-/// is not a value used by default.
-pub fn musl_allowed_ulp(name: &str) -> u32 {
-    // Consider overrides xfail
-    match name {
-        #[cfg(x86_no_sse)]
-        "asinh" | "asinhf" => 6,
-        "lgamma" | "lgamma_r" | "lgammaf" | "lgammaf_r" => 400,
-        "tanh" | "tanhf" => 4,
-        "tgamma" => 20,
-        "j0" | "j0f" | "j1" | "j1f" => {
-            // Results seem very target-dependent
-            if cfg!(target_arch = "x86_64") { 4000 } else { 800_000 }
-        }
-        "jn" | "jnf" => 1000,
-        "sincosf" => 500,
-        #[cfg(not(target_pointer_width = "64"))]
-        "exp10" => 4,
-        #[cfg(not(target_pointer_width = "64"))]
-        "exp10f" => 4,
-        _ => MUSL_DEFAULT_ULP,
-    }
-}
-
-/// ULP allowed to differ from multiprecision results.
-pub fn multiprec_allowed_ulp(name: &str) -> u32 {
-    // Consider overrides xfail
-    match name {
-        "asinh" | "asinhf" => 2,
-        "acoshf" => 4,
-        "atanh" | "atanhf" => 2,
-        "exp10" | "exp10f" => 3,
-        "j0" | "j0f" | "j1" | "j1f" => {
-            // Results seem very target-dependent
-            if cfg!(target_arch = "x86_64") { 4000 } else { 800_000 }
-        }
-        "jn" | "jnf" => 1000,
-        "lgamma" | "lgammaf" | "lgamma_r" | "lgammaf_r" => 16,
-        "sinh" | "sinhf" => 2,
-        "tanh" | "tanhf" => 2,
-        "tgamma" => 20,
-        _ => MULTIPREC_DEFAULT_ULP,
-    }
-}
-
 /// Return the unsuffixed version of a function name; e.g. `abs` and `absf` both return `abs`,
 /// `lgamma_r` and `lgammaf_r` both return `lgamma_r`.
 pub fn canonical_name(name: &str) -> &str {
diff --git a/crates/libm-test/src/special_case.rs b/crates/libm-test/src/precision.rs
similarity index 84%
rename from crates/libm-test/src/special_case.rs
rename to crates/libm-test/src/precision.rs
index dac7a349d..e2ad638c4 100644
--- a/crates/libm-test/src/special_case.rs
+++ b/crates/libm-test/src/precision.rs
@@ -8,6 +8,59 @@ use crate::{CheckBasis, CheckCtx, Float, Int, TestResult};
 /// Type implementing [`IgnoreCase`].
 pub struct SpecialCase;
 
+/// Default ULP allowed to differ from musl (note that musl itself may not be accurate).
+const MUSL_DEFAULT_ULP: u32 = 2;
+
+/// Default ULP allowed to differ from multiprecision (i.e. infinite) results.
+const MULTIPREC_DEFAULT_ULP: u32 = 1;
+
+/// ULP allowed to differ from muls results.
+///
+/// Note that these results were obtained using 400,000,000 rounds of random inputs, which
+/// is not a value used by default.
+pub fn musl_allowed_ulp(name: &str) -> u32 {
+    // Consider overrides xfail
+    match name {
+        #[cfg(x86_no_sse)]
+        "asinh" | "asinhf" => 6,
+        "lgamma" | "lgamma_r" | "lgammaf" | "lgammaf_r" => 400,
+        "tanh" | "tanhf" => 4,
+        "tgamma" => 20,
+        "j0" | "j0f" | "j1" | "j1f" => {
+            // Results seem very target-dependent
+            if cfg!(target_arch = "x86_64") { 4000 } else { 800_000 }
+        }
+        "jn" | "jnf" => 1000,
+        "sincosf" => 500,
+        #[cfg(not(target_pointer_width = "64"))]
+        "exp10" => 4,
+        #[cfg(not(target_pointer_width = "64"))]
+        "exp10f" => 4,
+        _ => MUSL_DEFAULT_ULP,
+    }
+}
+
+/// ULP allowed to differ from multiprecision results.
+pub fn multiprec_allowed_ulp(name: &str) -> u32 {
+    // Consider overrides xfail
+    match name {
+        "asinh" | "asinhf" => 2,
+        "acoshf" => 4,
+        "atanh" | "atanhf" => 2,
+        "exp10" | "exp10f" => 3,
+        "j0" | "j0f" | "j1" | "j1f" => {
+            // Results seem very target-dependent
+            if cfg!(target_arch = "x86_64") { 4000 } else { 800_000 }
+        }
+        "jn" | "jnf" => 1000,
+        "lgamma" | "lgammaf" | "lgamma_r" | "lgammaf_r" => 16,
+        "sinh" | "sinhf" => 2,
+        "tanh" | "tanhf" => 2,
+        "tgamma" => 20,
+        _ => MULTIPREC_DEFAULT_ULP,
+    }
+}
+
 /// Don't run further validation on this test case.
 const SKIP: Option<TestResult> = Some(Ok(()));
 

From 0575358d0e9d344ed5e19913d039ee6c26490b36 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Sat, 26 Oct 2024 03:05:01 -0500
Subject: [PATCH 011/279] Update `libm-test/build.rs` to skip directories

Don't try to generate tests for directories, or for files that contain
`f16` or `f128` (as these types are not provided by musl's math
implementations).

(cherry picked from commit fd7ad36b70d0bbc0f0b9bc7e54d10258423fda29)
---
 crates/libm-test/build.rs | 19 ++++++++++++++++++-
 1 file changed, 18 insertions(+), 1 deletion(-)

diff --git a/crates/libm-test/build.rs b/crates/libm-test/build.rs
index 472dec9d3..40b3e56c0 100644
--- a/crates/libm-test/build.rs
+++ b/crates/libm-test/build.rs
@@ -156,7 +156,11 @@ mod musl_serialized_tests {
             return;
         }
 
-        let files = fs::read_dir(math_src).unwrap().map(|f| f.unwrap().path()).collect::<Vec<_>>();
+        let files = fs::read_dir(math_src)
+            .unwrap()
+            .map(|f| f.unwrap().path())
+            .filter(file_needs_test)
+            .collect::<Vec<_>>();
 
         let mut math = Vec::new();
         for file in files {
@@ -187,6 +191,19 @@ mod musl_serialized_tests {
         generate_unit_tests(&math);
     }
 
+    /// Check whether a path within `src/math` should get tests generated.
+    fn file_needs_test(path: &PathBuf) -> bool {
+        // Skip directories
+        if path.is_dir() {
+            return false;
+        }
+
+        let fname = path.file_name().unwrap().to_str().unwrap();
+
+        // Musl doesn't support `f16` or `f128`
+        !(fname.contains("f16") || fname.contains("f128"))
+    }
+
     /// A "poor man's" parser for the signature of a function
     fn parse(s: &str) -> Function {
         let s = eat(s, "pub fn ");

From 88f6b83cecb9484abaf42d9b84663135e9a987fc Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Sat, 26 Oct 2024 02:56:22 -0500
Subject: [PATCH 012/279] Vendor `cfg_if::cfg_if!`

`cfg_if` is helpful for applying `cfg` attributes to groups of items,
like we will need to do with architecture-specific modules of `f16` and
`f128`. However, `libm` can't have dependencies.

The `cfg_if` macro is complex but small, so just vendor it here.
---
 src/math/support/macros.rs | 46 +++++++++++++++++++++++++++++++++++---
 1 file changed, 43 insertions(+), 3 deletions(-)

diff --git a/src/math/support/macros.rs b/src/math/support/macros.rs
index 6bc75837a..39a6fe827 100644
--- a/src/math/support/macros.rs
+++ b/src/math/support/macros.rs
@@ -1,6 +1,46 @@
-/// Choose among using an intrinsic (if available) and falling back to the default function body.
-/// Returns directly if the intrinsic version is used, otherwise continues to the rest of the
-/// function.
+/// `libm` cannot have dependencies, so this is vendored directly from the `cfg-if` crate
+/// (with some comments stripped for compactness).
+macro_rules! cfg_if {
+    // match if/else chains with a final `else`
+    ($(
+        if #[cfg($meta:meta)] { $($tokens:tt)* }
+    ) else * else {
+        $($tokens2:tt)*
+    }) => {
+        cfg_if! { @__items () ; $( ( ($meta) ($($tokens)*) ), )* ( () ($($tokens2)*) ), }
+    };
+
+    // match if/else chains lacking a final `else`
+    (
+        if #[cfg($i_met:meta)] { $($i_tokens:tt)* }
+        $( else if #[cfg($e_met:meta)] { $($e_tokens:tt)* } )*
+    ) => {
+        cfg_if! {
+            @__items
+            () ;
+            ( ($i_met) ($($i_tokens)*) ),
+            $( ( ($e_met) ($($e_tokens)*) ), )*
+            ( () () ),
+        }
+    };
+
+    // Internal and recursive macro to emit all the items
+    //
+    // Collects all the negated cfgs in a list at the beginning and after the
+    // semicolon is all the remaining items
+    (@__items ($($not:meta,)*) ; ) => {};
+    (@__items ($($not:meta,)*) ; ( ($($m:meta),*) ($($tokens:tt)*) ), $($rest:tt)*) => {
+        #[cfg(all($($m,)* not(any($($not),*))))] cfg_if! { @__identity $($tokens)* }
+        cfg_if! { @__items ($($not,)* $($m,)*) ; $($rest)* }
+    };
+
+    // Internal macro to make __apply work out right for different match types,
+    // because of how macros matching/expand stuff.
+    (@__identity $($tokens:tt)*) => { $($tokens)* };
+}
+
+/// Choose between using an intrinsic (if available) and the function body. Returns directly if
+/// the intrinsic is used, otherwise the rest of the function body is used.
 ///
 /// Use this if the intrinsic is likely to be more performant on the platform(s) specified
 /// in `intrinsic_available`.

From e8726efc213703b8e5a79f7ff351b83435f95b75 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Mon, 28 Oct 2024 20:24:46 -0500
Subject: [PATCH 013/279] Add an "arch" Cargo feature that is on by default

Introduce a Cargo feature to enable or disable architecture-specific
features (SIMD, assembly), which is on by default. This allows for more
fine grained control compared to relying on the `force-soft-floats`
feature.

Similar to "unstable-intrinsics", introduce a build.rs config option for
`unstable-intrinsics AND NOT force-soft-floats`, which makes this easier
to work with in code.

Effectively, this allows moving our non-additive Cargo feature
(force-soft-floats) to a positive one by default, allowing for an
override when needed.
---
 Cargo.toml                                     |  5 ++++-
 build.rs                                       | 12 ++++++++++++
 ci/run.sh                                      |  1 +
 crates/compiler-builtins-smoke-test/Cargo.toml |  1 +
 4 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/Cargo.toml b/Cargo.toml
index 5e4565556..2e74012ea 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -14,7 +14,10 @@ exclude = ["/ci/", "/.github/workflows/"]
 rust-version = "1.63"
 
 [features]
-default = []
+default = ["arch"]
+
+# Enable architecture-specific features such as SIMD or assembly routines.
+arch = []
 
 # This tells the compiler to assume that a Nightly toolchain is being used and
 # that it should activate any useful Nightly things accordingly.
diff --git a/build.rs b/build.rs
index adb521407..001029236 100644
--- a/build.rs
+++ b/build.rs
@@ -15,6 +15,7 @@ fn main() {
     }
 
     configure_intrinsics();
+    configure_arch();
 }
 
 /// Simplify the feature logic for enabling intrinsics so code only needs to use
@@ -28,3 +29,14 @@ fn configure_intrinsics() {
         println!("cargo:rustc-cfg=intrinsics_enabled");
     }
 }
+
+/// Simplify the feature logic for enabling arch-specific features so code only needs to use
+/// `cfg(arch_enabled)`.
+fn configure_arch() {
+    println!("cargo:rustc-check-cfg=cfg(arch_enabled)");
+
+    // Enabled by default via the "arch" feature, `force-soft-floats` overrides to disable.
+    if cfg!(feature = "arch") && !cfg!(feature = "force-soft-floats") {
+        println!("cargo:rustc-cfg=arch_enabled");
+    }
+}
diff --git a/ci/run.sh b/ci/run.sh
index 94612adc7..9f642326b 100755
--- a/ci/run.sh
+++ b/ci/run.sh
@@ -64,6 +64,7 @@ fi
 
 # Make sure we can build with overriding features. We test the indibidual
 # features it controls separately.
+cargo check --no-default-features
 cargo check --features "force-soft-floats"
 
 if [ "${BUILD_ONLY:-}" = "1" ]; then
diff --git a/crates/compiler-builtins-smoke-test/Cargo.toml b/crates/compiler-builtins-smoke-test/Cargo.toml
index 2a6c62961..7118bfe06 100644
--- a/crates/compiler-builtins-smoke-test/Cargo.toml
+++ b/crates/compiler-builtins-smoke-test/Cargo.toml
@@ -18,6 +18,7 @@ force-soft-floats = []
 
 [lints.rust]
 unexpected_cfgs = { level = "warn", check-cfg = [
+  "cfg(arch_enabled)",
   "cfg(assert_no_panic)",
   "cfg(intrinsics_enabled)",
 ] }

From ceb26d863b759aa11e5c606d08c5b74c0d3bf59c Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Mon, 28 Oct 2024 20:29:55 -0500
Subject: [PATCH 014/279] Update `select_implementation` to accept arch
 configuration

---
 src/math/support/macros.rs | 45 +++++++++++++++++++++++++++++++++-----
 1 file changed, 39 insertions(+), 6 deletions(-)

diff --git a/src/math/support/macros.rs b/src/math/support/macros.rs
index 39a6fe827..f85a6122e 100644
--- a/src/math/support/macros.rs
+++ b/src/math/support/macros.rs
@@ -39,17 +39,31 @@ macro_rules! cfg_if {
     (@__identity $($tokens:tt)*) => { $($tokens)* };
 }
 
-/// Choose between using an intrinsic (if available) and the function body. Returns directly if
-/// the intrinsic is used, otherwise the rest of the function body is used.
+/// Choose among using an intrinsic, an arch-specific implementation, and the function body.
+/// Returns directly if the intrinsic or arch is used, otherwise continue with the rest of the
+/// function.
 ///
-/// Use this if the intrinsic is likely to be more performant on the platform(s) specified
-/// in `intrinsic_available`.
+/// Specify a `use_intrinsic` meta field if the intrinsic is (1) available on the platforms (i.e.
+/// LLVM lowers it without libcalls that may recurse), (2) it is likely to be more performant.
+/// Intrinsics require wrappers in the `math::arch::intrinsics` module.
 ///
-/// The `cfg` used here is controlled by `build.rs` so the passed meta does not need to account
-/// for e.g. the `unstable-intrinsics` or `force-soft-float` features.
+/// Specify a `use_arch` meta field if an architecture-specific implementation is provided.
+/// These live in the `math::arch::some_target_arch` module.
+///
+/// Specify a `use_arch_required` meta field if something architecture-specific must be used
+/// regardless of feature configuration (`force-soft-floats`).
+///
+/// The passed meta options do not need to account for relevant Cargo features
+/// (`unstable-intrinsics`, `arch`, `force-soft-floats`), this macro handles that part.
 macro_rules! select_implementation {
     (
         name: $fname:ident,
+        // Configuration meta for when to use arch-specific implementation that requires hard
+        // float ops
+        $( use_arch: $use_arch:meta, )?
+        // Configuration meta for when to use the arch module regardless of whether softfloats
+        // have been requested.
+        $( use_arch_required: $use_arch_required:meta, )?
         // Configuration meta for when to call intrinsics and let LLVM figure it out
         $( use_intrinsic: $use_intrinsic:meta, )?
         args: $($arg:ident),+ ,
@@ -57,6 +71,25 @@ macro_rules! select_implementation {
         // FIXME: these use paths that are a pretty fragile (`super`). We should figure out
         // something better w.r.t. how this is vendored into compiler-builtins.
 
+        // However, we do need a few things from `arch` that are used even with soft floats.
+        //
+        select_implementation! {
+            @cfg $($use_arch_required)?;
+            if true {
+                return  super::arch::$fname( $($arg),+ );
+            }
+        }
+
+        // By default, never use arch-specific implementations if we have force-soft-floats
+        #[cfg(arch_enabled)]
+        select_implementation! {
+            @cfg $($use_arch)?;
+            // Wrap in `if true` to avoid unused warnings
+            if true {
+                return  super::arch::$fname( $($arg),+ );
+            }
+        }
+
         // Never use intrinsics if we are forcing soft floats, and only enable with the
         // `unstable-intrinsics` feature.
         #[cfg(intrinsics_enabled)]

From 586962f354eeff83df436e50a2db25570b3093d2 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Mon, 28 Oct 2024 20:31:48 -0500
Subject: [PATCH 015/279] Move architecture-specific code to `src/math/arch`

Move the code and call into its new location with
`select_implementation`.
---
 src/math/arch/i586.rs |  37 +++++++
 src/math/arch/i686.rs |  24 +++++
 src/math/arch/mod.rs  |  19 ++++
 src/math/ceil.rs      |  19 +---
 src/math/floor.rs     |  19 +---
 src/math/sqrt.rs      | 233 ++++++++++++++++++++----------------------
 src/math/sqrtf.rs     | 159 +++++++++++++---------------
 7 files changed, 261 insertions(+), 249 deletions(-)
 create mode 100644 src/math/arch/i586.rs
 create mode 100644 src/math/arch/i686.rs

diff --git a/src/math/arch/i586.rs b/src/math/arch/i586.rs
new file mode 100644
index 000000000..f92b9a2af
--- /dev/null
+++ b/src/math/arch/i586.rs
@@ -0,0 +1,37 @@
+//! Architecture-specific support for x86-32 without SSE2
+
+use super::super::fabs;
+
+/// Use an alternative implementation on x86, because the
+/// main implementation fails with the x87 FPU used by
+/// debian i386, probably due to excess precision issues.
+/// Basic implementation taken from https://github.com/rust-lang/libm/issues/219.
+pub fn ceil(x: f64) -> f64 {
+    if fabs(x).to_bits() < 4503599627370496.0_f64.to_bits() {
+        let truncated = x as i64 as f64;
+        if truncated < x {
+            return truncated + 1.0;
+        } else {
+            return truncated;
+        }
+    } else {
+        return x;
+    }
+}
+
+/// Use an alternative implementation on x86, because the
+/// main implementation fails with the x87 FPU used by
+/// debian i386, probably due to excess precision issues.
+/// Basic implementation taken from https://github.com/rust-lang/libm/issues/219.
+pub fn floor(x: f64) -> f64 {
+    if fabs(x).to_bits() < 4503599627370496.0_f64.to_bits() {
+        let truncated = x as i64 as f64;
+        if truncated > x {
+            return truncated - 1.0;
+        } else {
+            return truncated;
+        }
+    } else {
+        return x;
+    }
+}
diff --git a/src/math/arch/i686.rs b/src/math/arch/i686.rs
new file mode 100644
index 000000000..80f7face1
--- /dev/null
+++ b/src/math/arch/i686.rs
@@ -0,0 +1,24 @@
+//! Architecture-specific support for x86-32 and x86-64 with SSE2
+
+#![cfg(not(feature = "force-soft-floats"))]
+
+#[cfg(target_arch = "x86")]
+use core::arch::x86::*;
+#[cfg(target_arch = "x86_64")]
+use core::arch::x86_64::*;
+
+pub fn sqrtf(x: f32) -> f32 {
+    unsafe {
+        let m = _mm_set_ss(x);
+        let m_sqrt = _mm_sqrt_ss(m);
+        _mm_cvtss_f32(m_sqrt)
+    }
+}
+
+pub fn sqrt(x: f64) -> f64 {
+    unsafe {
+        let m = _mm_set_sd(x);
+        let m_sqrt = _mm_sqrt_pd(m);
+        _mm_cvtsd_f64(m_sqrt)
+    }
+}
diff --git a/src/math/arch/mod.rs b/src/math/arch/mod.rs
index a4bc218b7..cf9547117 100644
--- a/src/math/arch/mod.rs
+++ b/src/math/arch/mod.rs
@@ -7,3 +7,22 @@
 
 #[cfg(intrinsics_enabled)]
 pub mod intrinsics;
+
+// Most implementations should be defined here, to ensure they are not made available when
+// soft floats are required.
+#[cfg(arch_enabled)]
+cfg_if! {
+    if #[cfg(target_feature = "sse2")] {
+        mod i686;
+        pub use i686::{sqrt, sqrtf};
+    }
+}
+
+// There are certain architecture-specific implementations that are needed for correctness
+// even with `force-soft-float`. These are configured here.
+cfg_if! {
+    if #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))] {
+        mod i586;
+        pub use i586::{ceil, floor};
+    }
+}
diff --git a/src/math/ceil.rs b/src/math/ceil.rs
index 0da01b4d0..c7e857dbb 100644
--- a/src/math/ceil.rs
+++ b/src/math/ceil.rs
@@ -10,28 +10,11 @@ const TOINT: f64 = 1. / f64::EPSILON;
 pub fn ceil(x: f64) -> f64 {
     select_implementation! {
         name: ceil,
+        use_arch_required: all(target_arch = "x86", not(target_feature = "sse2")),
         use_intrinsic: target_arch = "wasm32",
         args: x,
     }
 
-    #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))]
-    {
-        //use an alternative implementation on x86, because the
-        //main implementation fails with the x87 FPU used by
-        //debian i386, probably due to excess precision issues.
-        //basic implementation taken from https://github.com/rust-lang/libm/issues/219
-        use super::fabs;
-        if fabs(x).to_bits() < 4503599627370496.0_f64.to_bits() {
-            let truncated = x as i64 as f64;
-            if truncated < x {
-                return truncated + 1.0;
-            } else {
-                return truncated;
-            }
-        } else {
-            return x;
-        }
-    }
     let u: u64 = x.to_bits();
     let e: i64 = (u >> 52 & 0x7ff) as i64;
     let y: f64;
diff --git a/src/math/floor.rs b/src/math/floor.rs
index 2b9955eba..532226b9f 100644
--- a/src/math/floor.rs
+++ b/src/math/floor.rs
@@ -10,28 +10,11 @@ const TOINT: f64 = 1. / f64::EPSILON;
 pub fn floor(x: f64) -> f64 {
     select_implementation! {
         name: floor,
+        use_arch_required: all(target_arch = "x86", not(target_feature = "sse2")),
         use_intrinsic: target_arch = "wasm32",
         args: x,
     }
 
-    #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))]
-    {
-        //use an alternative implementation on x86, because the
-        //main implementation fails with the x87 FPU used by
-        //debian i386, probably due to excess precision issues.
-        //basic implementation taken from https://github.com/rust-lang/libm/issues/219
-        use super::fabs;
-        if fabs(x).to_bits() < 4503599627370496.0_f64.to_bits() {
-            let truncated = x as i64 as f64;
-            if truncated > x {
-                return truncated - 1.0;
-            } else {
-                return truncated;
-            }
-        } else {
-            return x;
-        }
-    }
     let ui = x.to_bits();
     let e = ((ui >> 52) & 0x7ff) as i32;
 
diff --git a/src/math/sqrt.rs b/src/math/sqrt.rs
index 2e856100f..a443b7e4c 100644
--- a/src/math/sqrt.rs
+++ b/src/math/sqrt.rs
@@ -83,156 +83,139 @@ use core::f64;
 pub fn sqrt(x: f64) -> f64 {
     select_implementation! {
         name: sqrt,
+        use_arch: target_feature = "sse2",
         use_intrinsic: target_arch = "wasm32",
         args: x,
     }
 
-    #[cfg(all(target_feature = "sse2", not(feature = "force-soft-floats")))]
-    {
-        // Note: This path is unlikely since LLVM will usually have already
-        // optimized sqrt calls into hardware instructions if sse2 is available,
-        // but if someone does end up here they'll appreciate the speed increase.
-        #[cfg(target_arch = "x86")]
-        use core::arch::x86::*;
-        #[cfg(target_arch = "x86_64")]
-        use core::arch::x86_64::*;
-        unsafe {
-            let m = _mm_set_sd(x);
-            let m_sqrt = _mm_sqrt_pd(m);
-            _mm_cvtsd_f64(m_sqrt)
-        }
-    }
-    #[cfg(any(not(target_feature = "sse2"), feature = "force-soft-floats"))]
-    {
-        use core::num::Wrapping;
+    use core::num::Wrapping;
 
-        const TINY: f64 = 1.0e-300;
+    const TINY: f64 = 1.0e-300;
 
-        let mut z: f64;
-        let sign: Wrapping<u32> = Wrapping(0x80000000);
-        let mut ix0: i32;
-        let mut s0: i32;
-        let mut q: i32;
-        let mut m: i32;
-        let mut t: i32;
-        let mut i: i32;
-        let mut r: Wrapping<u32>;
-        let mut t1: Wrapping<u32>;
-        let mut s1: Wrapping<u32>;
-        let mut ix1: Wrapping<u32>;
-        let mut q1: Wrapping<u32>;
+    let mut z: f64;
+    let sign: Wrapping<u32> = Wrapping(0x80000000);
+    let mut ix0: i32;
+    let mut s0: i32;
+    let mut q: i32;
+    let mut m: i32;
+    let mut t: i32;
+    let mut i: i32;
+    let mut r: Wrapping<u32>;
+    let mut t1: Wrapping<u32>;
+    let mut s1: Wrapping<u32>;
+    let mut ix1: Wrapping<u32>;
+    let mut q1: Wrapping<u32>;
 
-        ix0 = (x.to_bits() >> 32) as i32;
-        ix1 = Wrapping(x.to_bits() as u32);
+    ix0 = (x.to_bits() >> 32) as i32;
+    ix1 = Wrapping(x.to_bits() as u32);
 
-        /* take care of Inf and NaN */
-        if (ix0 & 0x7ff00000) == 0x7ff00000 {
-            return x * x + x; /* sqrt(NaN)=NaN, sqrt(+inf)=+inf, sqrt(-inf)=sNaN */
+    /* take care of Inf and NaN */
+    if (ix0 & 0x7ff00000) == 0x7ff00000 {
+        return x * x + x; /* sqrt(NaN)=NaN, sqrt(+inf)=+inf, sqrt(-inf)=sNaN */
+    }
+    /* take care of zero */
+    if ix0 <= 0 {
+        if ((ix0 & !(sign.0 as i32)) | ix1.0 as i32) == 0 {
+            return x; /* sqrt(+-0) = +-0 */
         }
-        /* take care of zero */
-        if ix0 <= 0 {
-            if ((ix0 & !(sign.0 as i32)) | ix1.0 as i32) == 0 {
-                return x; /* sqrt(+-0) = +-0 */
-            }
-            if ix0 < 0 {
-                return (x - x) / (x - x); /* sqrt(-ve) = sNaN */
-            }
+        if ix0 < 0 {
+            return (x - x) / (x - x); /* sqrt(-ve) = sNaN */
         }
-        /* normalize x */
-        m = ix0 >> 20;
-        if m == 0 {
-            /* subnormal x */
-            while ix0 == 0 {
-                m -= 21;
-                ix0 |= (ix1 >> 11).0 as i32;
-                ix1 <<= 21;
-            }
-            i = 0;
-            while (ix0 & 0x00100000) == 0 {
-                i += 1;
-                ix0 <<= 1;
-            }
-            m -= i - 1;
-            ix0 |= (ix1 >> (32 - i) as usize).0 as i32;
-            ix1 = ix1 << i as usize;
+    }
+    /* normalize x */
+    m = ix0 >> 20;
+    if m == 0 {
+        /* subnormal x */
+        while ix0 == 0 {
+            m -= 21;
+            ix0 |= (ix1 >> 11).0 as i32;
+            ix1 <<= 21;
         }
-        m -= 1023; /* unbias exponent */
-        ix0 = (ix0 & 0x000fffff) | 0x00100000;
-        if (m & 1) == 1 {
-            /* odd m, double x to make it even */
-            ix0 += ix0 + ((ix1 & sign) >> 31).0 as i32;
-            ix1 += ix1;
+        i = 0;
+        while (ix0 & 0x00100000) == 0 {
+            i += 1;
+            ix0 <<= 1;
         }
-        m >>= 1; /* m = [m/2] */
-
-        /* generate sqrt(x) bit by bit */
+        m -= i - 1;
+        ix0 |= (ix1 >> (32 - i) as usize).0 as i32;
+        ix1 = ix1 << i as usize;
+    }
+    m -= 1023; /* unbias exponent */
+    ix0 = (ix0 & 0x000fffff) | 0x00100000;
+    if (m & 1) == 1 {
+        /* odd m, double x to make it even */
         ix0 += ix0 + ((ix1 & sign) >> 31).0 as i32;
         ix1 += ix1;
-        q = 0; /* [q,q1] = sqrt(x) */
-        q1 = Wrapping(0);
-        s0 = 0;
-        s1 = Wrapping(0);
-        r = Wrapping(0x00200000); /* r = moving bit from right to left */
+    }
+    m >>= 1; /* m = [m/2] */
 
-        while r != Wrapping(0) {
-            t = s0 + r.0 as i32;
-            if t <= ix0 {
-                s0 = t + r.0 as i32;
-                ix0 -= t;
-                q += r.0 as i32;
-            }
-            ix0 += ix0 + ((ix1 & sign) >> 31).0 as i32;
-            ix1 += ix1;
-            r >>= 1;
+    /* generate sqrt(x) bit by bit */
+    ix0 += ix0 + ((ix1 & sign) >> 31).0 as i32;
+    ix1 += ix1;
+    q = 0; /* [q,q1] = sqrt(x) */
+    q1 = Wrapping(0);
+    s0 = 0;
+    s1 = Wrapping(0);
+    r = Wrapping(0x00200000); /* r = moving bit from right to left */
+
+    while r != Wrapping(0) {
+        t = s0 + r.0 as i32;
+        if t <= ix0 {
+            s0 = t + r.0 as i32;
+            ix0 -= t;
+            q += r.0 as i32;
         }
+        ix0 += ix0 + ((ix1 & sign) >> 31).0 as i32;
+        ix1 += ix1;
+        r >>= 1;
+    }
 
-        r = sign;
-        while r != Wrapping(0) {
-            t1 = s1 + r;
-            t = s0;
-            if t < ix0 || (t == ix0 && t1 <= ix1) {
-                s1 = t1 + r;
-                if (t1 & sign) == sign && (s1 & sign) == Wrapping(0) {
-                    s0 += 1;
-                }
-                ix0 -= t;
-                if ix1 < t1 {
-                    ix0 -= 1;
-                }
-                ix1 -= t1;
-                q1 += r;
+    r = sign;
+    while r != Wrapping(0) {
+        t1 = s1 + r;
+        t = s0;
+        if t < ix0 || (t == ix0 && t1 <= ix1) {
+            s1 = t1 + r;
+            if (t1 & sign) == sign && (s1 & sign) == Wrapping(0) {
+                s0 += 1;
             }
-            ix0 += ix0 + ((ix1 & sign) >> 31).0 as i32;
-            ix1 += ix1;
-            r >>= 1;
+            ix0 -= t;
+            if ix1 < t1 {
+                ix0 -= 1;
+            }
+            ix1 -= t1;
+            q1 += r;
         }
+        ix0 += ix0 + ((ix1 & sign) >> 31).0 as i32;
+        ix1 += ix1;
+        r >>= 1;
+    }
 
-        /* use floating add to find out rounding direction */
-        if (ix0 as u32 | ix1.0) != 0 {
-            z = 1.0 - TINY; /* raise inexact flag */
-            if z >= 1.0 {
-                z = 1.0 + TINY;
-                if q1.0 == 0xffffffff {
-                    q1 = Wrapping(0);
+    /* use floating add to find out rounding direction */
+    if (ix0 as u32 | ix1.0) != 0 {
+        z = 1.0 - TINY; /* raise inexact flag */
+        if z >= 1.0 {
+            z = 1.0 + TINY;
+            if q1.0 == 0xffffffff {
+                q1 = Wrapping(0);
+                q += 1;
+            } else if z > 1.0 {
+                if q1.0 == 0xfffffffe {
                     q += 1;
-                } else if z > 1.0 {
-                    if q1.0 == 0xfffffffe {
-                        q += 1;
-                    }
-                    q1 += Wrapping(2);
-                } else {
-                    q1 += q1 & Wrapping(1);
                 }
+                q1 += Wrapping(2);
+            } else {
+                q1 += q1 & Wrapping(1);
             }
         }
-        ix0 = (q >> 1) + 0x3fe00000;
-        ix1 = q1 >> 1;
-        if (q & 1) == 1 {
-            ix1 |= sign;
-        }
-        ix0 += m << 20;
-        f64::from_bits((ix0 as u64) << 32 | ix1.0 as u64)
     }
+    ix0 = (q >> 1) + 0x3fe00000;
+    ix1 = q1 >> 1;
+    if (q & 1) == 1 {
+        ix1 |= sign;
+    }
+    ix0 += m << 20;
+    f64::from_bits((ix0 as u64) << 32 | ix1.0 as u64)
 }
 
 #[cfg(test)]
diff --git a/src/math/sqrtf.rs b/src/math/sqrtf.rs
index b2996b350..d2f7ae703 100644
--- a/src/math/sqrtf.rs
+++ b/src/math/sqrtf.rs
@@ -18,109 +18,92 @@
 pub fn sqrtf(x: f32) -> f32 {
     select_implementation! {
         name: sqrtf,
+        use_arch: target_feature = "sse2",
         use_intrinsic: target_arch = "wasm32",
         args: x,
     }
 
-    #[cfg(all(target_feature = "sse", not(feature = "force-soft-floats")))]
-    {
-        // Note: This path is unlikely since LLVM will usually have already
-        // optimized sqrt calls into hardware instructions if sse is available,
-        // but if someone does end up here they'll appreciate the speed increase.
-        #[cfg(target_arch = "x86")]
-        use core::arch::x86::*;
-        #[cfg(target_arch = "x86_64")]
-        use core::arch::x86_64::*;
-        unsafe {
-            let m = _mm_set_ss(x);
-            let m_sqrt = _mm_sqrt_ss(m);
-            _mm_cvtss_f32(m_sqrt)
-        }
-    }
-    #[cfg(any(not(target_feature = "sse"), feature = "force-soft-floats"))]
-    {
-        const TINY: f32 = 1.0e-30;
-
-        let mut z: f32;
-        let sign: i32 = 0x80000000u32 as i32;
-        let mut ix: i32;
-        let mut s: i32;
-        let mut q: i32;
-        let mut m: i32;
-        let mut t: i32;
-        let mut i: i32;
-        let mut r: u32;
-
-        ix = x.to_bits() as i32;
-
-        /* take care of Inf and NaN */
-        if (ix as u32 & 0x7f800000) == 0x7f800000 {
-            return x * x + x; /* sqrt(NaN)=NaN, sqrt(+inf)=+inf, sqrt(-inf)=sNaN */
-        }
+    const TINY: f32 = 1.0e-30;
 
-        /* take care of zero */
-        if ix <= 0 {
-            if (ix & !sign) == 0 {
-                return x; /* sqrt(+-0) = +-0 */
-            }
-            if ix < 0 {
-                return (x - x) / (x - x); /* sqrt(-ve) = sNaN */
-            }
-        }
+    let mut z: f32;
+    let sign: i32 = 0x80000000u32 as i32;
+    let mut ix: i32;
+    let mut s: i32;
+    let mut q: i32;
+    let mut m: i32;
+    let mut t: i32;
+    let mut i: i32;
+    let mut r: u32;
 
-        /* normalize x */
-        m = ix >> 23;
-        if m == 0 {
-            /* subnormal x */
-            i = 0;
-            while ix & 0x00800000 == 0 {
-                ix <<= 1;
-                i = i + 1;
-            }
-            m -= i - 1;
+    ix = x.to_bits() as i32;
+
+    /* take care of Inf and NaN */
+    if (ix as u32 & 0x7f800000) == 0x7f800000 {
+        return x * x + x; /* sqrt(NaN)=NaN, sqrt(+inf)=+inf, sqrt(-inf)=sNaN */
+    }
+
+    /* take care of zero */
+    if ix <= 0 {
+        if (ix & !sign) == 0 {
+            return x; /* sqrt(+-0) = +-0 */
         }
-        m -= 127; /* unbias exponent */
-        ix = (ix & 0x007fffff) | 0x00800000;
-        if m & 1 == 1 {
-            /* odd m, double x to make it even */
-            ix += ix;
+        if ix < 0 {
+            return (x - x) / (x - x); /* sqrt(-ve) = sNaN */
         }
-        m >>= 1; /* m = [m/2] */
+    }
 
-        /* generate sqrt(x) bit by bit */
+    /* normalize x */
+    m = ix >> 23;
+    if m == 0 {
+        /* subnormal x */
+        i = 0;
+        while ix & 0x00800000 == 0 {
+            ix <<= 1;
+            i = i + 1;
+        }
+        m -= i - 1;
+    }
+    m -= 127; /* unbias exponent */
+    ix = (ix & 0x007fffff) | 0x00800000;
+    if m & 1 == 1 {
+        /* odd m, double x to make it even */
         ix += ix;
-        q = 0;
-        s = 0;
-        r = 0x01000000; /* r = moving bit from right to left */
-
-        while r != 0 {
-            t = s + r as i32;
-            if t <= ix {
-                s = t + r as i32;
-                ix -= t;
-                q += r as i32;
-            }
-            ix += ix;
-            r >>= 1;
+    }
+    m >>= 1; /* m = [m/2] */
+
+    /* generate sqrt(x) bit by bit */
+    ix += ix;
+    q = 0;
+    s = 0;
+    r = 0x01000000; /* r = moving bit from right to left */
+
+    while r != 0 {
+        t = s + r as i32;
+        if t <= ix {
+            s = t + r as i32;
+            ix -= t;
+            q += r as i32;
         }
+        ix += ix;
+        r >>= 1;
+    }
 
-        /* use floating add to find out rounding direction */
-        if ix != 0 {
-            z = 1.0 - TINY; /* raise inexact flag */
-            if z >= 1.0 {
-                z = 1.0 + TINY;
-                if z > 1.0 {
-                    q += 2;
-                } else {
-                    q += q & 1;
-                }
+    /* use floating add to find out rounding direction */
+    if ix != 0 {
+        z = 1.0 - TINY; /* raise inexact flag */
+        if z >= 1.0 {
+            z = 1.0 + TINY;
+            if z > 1.0 {
+                q += 2;
+            } else {
+                q += q & 1;
             }
         }
-
-        ix = (q >> 1) + 0x3f000000;
-        ix += m << 23;
-        f32::from_bits(ix as u32)
     }
+
+    ix = (q >> 1) + 0x3f000000;
+    ix += m << 23;
+    f32::from_bits(ix as u32)
 }
 
 // PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520

From e86594bafa966c0573bc6dd64ca8ff8923913d61 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Sat, 26 Oct 2024 00:42:02 -0500
Subject: [PATCH 016/279] Add float and integer traits from compiler-builtins

In preparation of adding generic algorithms to `libm`, add the traits
from `compiler-builtins`.

Eventually we should be able to unify the two crates so we don't have
duplicate implementations.
---
 src/math/mod.rs                  |   2 +
 src/math/support/float_traits.rs | 168 +++++++++++++++
 src/math/support/int_traits.rs   | 343 +++++++++++++++++++++++++++++++
 src/math/support/mod.rs          |   5 +
 4 files changed, 518 insertions(+)
 create mode 100644 src/math/support/float_traits.rs
 create mode 100644 src/math/support/int_traits.rs

diff --git a/src/math/mod.rs b/src/math/mod.rs
index 393bc5150..2cd77f132 100644
--- a/src/math/mod.rs
+++ b/src/math/mod.rs
@@ -105,6 +105,8 @@ use self::k_tanf::k_tanf;
 use self::rem_pio2::rem_pio2;
 use self::rem_pio2_large::rem_pio2_large;
 use self::rem_pio2f::rem_pio2f;
+#[allow(unused_imports)]
+use self::support::{CastFrom, CastInto, DInt, Float, HInt, Int, MinInt};
 
 // Public modules
 mod acos;
diff --git a/src/math/support/float_traits.rs b/src/math/support/float_traits.rs
new file mode 100644
index 000000000..4cf5d7c61
--- /dev/null
+++ b/src/math/support/float_traits.rs
@@ -0,0 +1,168 @@
+use core::ops;
+
+use super::int_traits::{Int, MinInt};
+
+/// Trait for some basic operations on floats
+#[allow(dead_code)]
+pub trait Float:
+    Copy
+    + core::fmt::Debug
+    + PartialEq
+    + PartialOrd
+    + ops::AddAssign
+    + ops::MulAssign
+    + ops::Add<Output = Self>
+    + ops::Sub<Output = Self>
+    + ops::Div<Output = Self>
+    + ops::Rem<Output = Self>
+{
+    /// A uint of the same width as the float
+    type Int: Int<OtherSign = Self::SignedInt, UnsignedInt = Self::Int>;
+
+    /// A int of the same width as the float
+    type SignedInt: Int + MinInt<OtherSign = Self::Int, UnsignedInt = Self::Int>;
+
+    /// An int capable of containing the exponent bits plus a sign bit. This is signed.
+    type ExpInt: Int;
+
+    const ZERO: Self;
+    const ONE: Self;
+
+    /// The bitwidth of the float type
+    const BITS: u32;
+
+    /// The bitwidth of the significand
+    const SIGNIFICAND_BITS: u32;
+
+    /// The bitwidth of the exponent
+    const EXPONENT_BITS: u32 = Self::BITS - Self::SIGNIFICAND_BITS - 1;
+
+    /// The saturated value of the exponent (infinite representation), in the rightmost postiion.
+    const EXPONENT_MAX: u32 = (1 << Self::EXPONENT_BITS) - 1;
+
+    /// The exponent bias value
+    const EXPONENT_BIAS: u32 = Self::EXPONENT_MAX >> 1;
+
+    /// A mask for the sign bit
+    const SIGN_MASK: Self::Int;
+
+    /// A mask for the significand
+    const SIGNIFICAND_MASK: Self::Int;
+
+    /// The implicit bit of the float format
+    const IMPLICIT_BIT: Self::Int;
+
+    /// A mask for the exponent
+    const EXPONENT_MASK: Self::Int;
+
+    /// Returns `self` transmuted to `Self::Int`
+    fn to_bits(self) -> Self::Int;
+
+    /// Returns `self` transmuted to `Self::SignedInt`
+    fn to_bits_signed(self) -> Self::SignedInt;
+
+    /// Checks if two floats have the same bit representation. *Except* for NaNs! NaN can be
+    /// represented in multiple different ways. This method returns `true` if two NaNs are
+    /// compared.
+    fn eq_repr(self, rhs: Self) -> bool;
+
+    /// Returns true if the sign is negative
+    fn is_sign_negative(self) -> bool;
+
+    /// Returns the exponent, not adjusting for bias.
+    fn exp(self) -> Self::ExpInt;
+
+    /// Returns the significand with no implicit bit (or the "fractional" part)
+    fn frac(self) -> Self::Int;
+
+    /// Returns the significand with implicit bit
+    fn imp_frac(self) -> Self::Int;
+
+    /// Returns a `Self::Int` transmuted back to `Self`
+    fn from_bits(a: Self::Int) -> Self;
+
+    /// Constructs a `Self` from its parts. Inputs are treated as bits and shifted into position.
+    fn from_parts(negative: bool, exponent: Self::Int, significand: Self::Int) -> Self;
+
+    fn abs(self) -> Self {
+        let abs_mask = !Self::SIGN_MASK;
+        Self::from_bits(self.to_bits() & abs_mask)
+    }
+
+    /// Returns (normalized exponent, normalized significand)
+    fn normalize(significand: Self::Int) -> (i32, Self::Int);
+
+    /// Returns if `self` is subnormal
+    fn is_subnormal(self) -> bool;
+}
+
+macro_rules! float_impl {
+    ($ty:ident, $ity:ident, $sity:ident, $expty:ident, $bits:expr, $significand_bits:expr) => {
+        impl Float for $ty {
+            type Int = $ity;
+            type SignedInt = $sity;
+            type ExpInt = $expty;
+
+            const ZERO: Self = 0.0;
+            const ONE: Self = 1.0;
+
+            const BITS: u32 = $bits;
+            const SIGNIFICAND_BITS: u32 = $significand_bits;
+
+            const SIGN_MASK: Self::Int = 1 << (Self::BITS - 1);
+            const SIGNIFICAND_MASK: Self::Int = (1 << Self::SIGNIFICAND_BITS) - 1;
+            const IMPLICIT_BIT: Self::Int = 1 << Self::SIGNIFICAND_BITS;
+            const EXPONENT_MASK: Self::Int = !(Self::SIGN_MASK | Self::SIGNIFICAND_MASK);
+
+            fn to_bits(self) -> Self::Int {
+                self.to_bits()
+            }
+            fn to_bits_signed(self) -> Self::SignedInt {
+                self.to_bits() as Self::SignedInt
+            }
+            fn eq_repr(self, rhs: Self) -> bool {
+                fn is_nan(x: $ty) -> bool {
+                    // When using mangled-names, the "real" compiler-builtins might not have the
+                    // necessary builtin (__unordtf2) to test whether `f128` is NaN.
+                    // FIXME(f16_f128): Remove once the nightly toolchain has the __unordtf2 builtin
+                    // x is NaN if all the bits of the exponent are set and the significand is non-0
+                    x.to_bits() & $ty::EXPONENT_MASK == $ty::EXPONENT_MASK
+                        && x.to_bits() & $ty::SIGNIFICAND_MASK != 0
+                }
+                if is_nan(self) && is_nan(rhs) { true } else { self.to_bits() == rhs.to_bits() }
+            }
+            fn is_sign_negative(self) -> bool {
+                self.is_sign_negative()
+            }
+            fn exp(self) -> Self::ExpInt {
+                ((self.to_bits() & Self::EXPONENT_MASK) >> Self::SIGNIFICAND_BITS) as Self::ExpInt
+            }
+            fn frac(self) -> Self::Int {
+                self.to_bits() & Self::SIGNIFICAND_MASK
+            }
+            fn imp_frac(self) -> Self::Int {
+                self.frac() | Self::IMPLICIT_BIT
+            }
+            fn from_bits(a: Self::Int) -> Self {
+                Self::from_bits(a)
+            }
+            fn from_parts(negative: bool, exponent: Self::Int, significand: Self::Int) -> Self {
+                Self::from_bits(
+                    ((negative as Self::Int) << (Self::BITS - 1))
+                        | ((exponent << Self::SIGNIFICAND_BITS) & Self::EXPONENT_MASK)
+                        | (significand & Self::SIGNIFICAND_MASK),
+                )
+            }
+            fn normalize(significand: Self::Int) -> (i32, Self::Int) {
+                let shift = significand.leading_zeros().wrapping_sub(Self::EXPONENT_BITS);
+                (1i32.wrapping_sub(shift as i32), significand << shift as Self::Int)
+            }
+            fn is_subnormal(self) -> bool {
+                (self.to_bits() & Self::EXPONENT_MASK) == Self::Int::ZERO
+            }
+        }
+    };
+}
+
+float_impl!(f32, u32, i32, i16, 32, 23);
+float_impl!(f64, u64, i64, i16, 64, 52);
diff --git a/src/math/support/int_traits.rs b/src/math/support/int_traits.rs
new file mode 100644
index 000000000..bdf3afd48
--- /dev/null
+++ b/src/math/support/int_traits.rs
@@ -0,0 +1,343 @@
+use core::{fmt, ops};
+
+/// Minimal integer implementations needed on all integer types, including wide integers.
+#[allow(dead_code)]
+pub trait MinInt:
+    Copy
+    + fmt::Debug
+    + ops::BitOr<Output = Self>
+    + ops::Not<Output = Self>
+    + ops::Shl<u32, Output = Self>
+{
+    /// Type with the same width but other signedness
+    type OtherSign: MinInt;
+    /// Unsigned version of Self
+    type UnsignedInt: MinInt;
+
+    /// If `Self` is a signed integer
+    const SIGNED: bool;
+
+    /// The bitwidth of the int type
+    const BITS: u32;
+
+    const ZERO: Self;
+    const ONE: Self;
+    const MIN: Self;
+    const MAX: Self;
+}
+
+/// Trait for some basic operations on integers
+#[allow(dead_code)]
+pub trait Int:
+    MinInt
+    + PartialEq
+    + PartialOrd
+    + ops::AddAssign
+    + ops::SubAssign
+    + ops::BitAndAssign
+    + ops::BitOrAssign
+    + ops::BitXorAssign
+    + ops::ShlAssign<i32>
+    + ops::ShrAssign<u32>
+    + ops::Add<Output = Self>
+    + ops::Sub<Output = Self>
+    + ops::Mul<Output = Self>
+    + ops::Div<Output = Self>
+    + ops::Shr<u32, Output = Self>
+    + ops::BitXor<Output = Self>
+    + ops::BitAnd<Output = Self>
+{
+    fn unsigned(self) -> Self::UnsignedInt;
+    fn from_unsigned(unsigned: Self::UnsignedInt) -> Self;
+
+    fn from_bool(b: bool) -> Self;
+
+    /// Prevents the need for excessive conversions between signed and unsigned
+    fn logical_shr(self, other: u32) -> Self;
+
+    /// Absolute difference between two integers.
+    fn abs_diff(self, other: Self) -> Self::UnsignedInt;
+
+    // copied from primitive integers, but put in a trait
+    fn is_zero(self) -> bool;
+    fn wrapping_neg(self) -> Self;
+    fn wrapping_add(self, other: Self) -> Self;
+    fn wrapping_mul(self, other: Self) -> Self;
+    fn wrapping_sub(self, other: Self) -> Self;
+    fn wrapping_shl(self, other: u32) -> Self;
+    fn wrapping_shr(self, other: u32) -> Self;
+    fn rotate_left(self, other: u32) -> Self;
+    fn overflowing_add(self, other: Self) -> (Self, bool);
+    fn leading_zeros(self) -> u32;
+    fn ilog2(self) -> u32;
+}
+
+macro_rules! int_impl_common {
+    ($ty:ty) => {
+        fn from_bool(b: bool) -> Self {
+            b as $ty
+        }
+
+        fn logical_shr(self, other: u32) -> Self {
+            Self::from_unsigned(self.unsigned().wrapping_shr(other))
+        }
+
+        fn is_zero(self) -> bool {
+            self == Self::ZERO
+        }
+
+        fn wrapping_neg(self) -> Self {
+            <Self>::wrapping_neg(self)
+        }
+
+        fn wrapping_add(self, other: Self) -> Self {
+            <Self>::wrapping_add(self, other)
+        }
+
+        fn wrapping_mul(self, other: Self) -> Self {
+            <Self>::wrapping_mul(self, other)
+        }
+
+        fn wrapping_sub(self, other: Self) -> Self {
+            <Self>::wrapping_sub(self, other)
+        }
+
+        fn wrapping_shl(self, other: u32) -> Self {
+            <Self>::wrapping_shl(self, other)
+        }
+
+        fn wrapping_shr(self, other: u32) -> Self {
+            <Self>::wrapping_shr(self, other)
+        }
+
+        fn rotate_left(self, other: u32) -> Self {
+            <Self>::rotate_left(self, other)
+        }
+
+        fn overflowing_add(self, other: Self) -> (Self, bool) {
+            <Self>::overflowing_add(self, other)
+        }
+
+        fn leading_zeros(self) -> u32 {
+            <Self>::leading_zeros(self)
+        }
+
+        fn ilog2(self) -> u32 {
+            <Self>::ilog2(self)
+        }
+    };
+}
+
+macro_rules! int_impl {
+    ($ity:ty, $uty:ty) => {
+        impl MinInt for $uty {
+            type OtherSign = $ity;
+            type UnsignedInt = $uty;
+
+            const BITS: u32 = <Self as MinInt>::ZERO.count_zeros();
+            const SIGNED: bool = Self::MIN != Self::ZERO;
+
+            const ZERO: Self = 0;
+            const ONE: Self = 1;
+            const MIN: Self = <Self>::MIN;
+            const MAX: Self = <Self>::MAX;
+        }
+
+        impl Int for $uty {
+            fn unsigned(self) -> $uty {
+                self
+            }
+
+            // It makes writing macros easier if this is implemented for both signed and unsigned
+            #[allow(clippy::wrong_self_convention)]
+            fn from_unsigned(me: $uty) -> Self {
+                me
+            }
+
+            fn abs_diff(self, other: Self) -> Self {
+                if self < other { other.wrapping_sub(self) } else { self.wrapping_sub(other) }
+            }
+
+            int_impl_common!($uty);
+        }
+
+        impl MinInt for $ity {
+            type OtherSign = $uty;
+            type UnsignedInt = $uty;
+
+            const BITS: u32 = <Self as MinInt>::ZERO.count_zeros();
+            const SIGNED: bool = Self::MIN != Self::ZERO;
+
+            const ZERO: Self = 0;
+            const ONE: Self = 1;
+            const MIN: Self = <Self>::MIN;
+            const MAX: Self = <Self>::MAX;
+        }
+
+        impl Int for $ity {
+            fn unsigned(self) -> $uty {
+                self as $uty
+            }
+
+            fn from_unsigned(me: $uty) -> Self {
+                me as $ity
+            }
+
+            fn abs_diff(self, other: Self) -> $uty {
+                self.wrapping_sub(other).wrapping_abs() as $uty
+            }
+
+            int_impl_common!($ity);
+        }
+    };
+}
+
+int_impl!(isize, usize);
+int_impl!(i8, u8);
+int_impl!(i16, u16);
+int_impl!(i32, u32);
+int_impl!(i64, u64);
+int_impl!(i128, u128);
+
+/// Trait for integers twice the bit width of another integer. This is implemented for all
+/// primitives except for `u8`, because there is not a smaller primitive.
+#[allow(unused)]
+pub trait DInt: MinInt {
+    /// Integer that is half the bit width of the integer this trait is implemented for
+    type H: HInt<D = Self>;
+
+    /// Returns the low half of `self`
+    fn lo(self) -> Self::H;
+    /// Returns the high half of `self`
+    fn hi(self) -> Self::H;
+    /// Returns the low and high halves of `self` as a tuple
+    fn lo_hi(self) -> (Self::H, Self::H) {
+        (self.lo(), self.hi())
+    }
+    /// Constructs an integer using lower and higher half parts
+    fn from_lo_hi(lo: Self::H, hi: Self::H) -> Self {
+        lo.zero_widen() | hi.widen_hi()
+    }
+}
+
+/// Trait for integers half the bit width of another integer. This is implemented for all
+/// primitives except for `u128`, because it there is not a larger primitive.
+#[allow(unused)]
+pub trait HInt: Int {
+    /// Integer that is double the bit width of the integer this trait is implemented for
+    type D: DInt<H = Self> + MinInt;
+
+    // NB: some of the below methods could have default implementations (e.g. `widen_hi`), but for
+    // unknown reasons this can cause infinite recursion when optimizations are disabled. See
+    // <https://github.com/rust-lang/compiler-builtins/pull/707> for context.
+
+    /// Widens (using default extension) the integer to have double bit width
+    fn widen(self) -> Self::D;
+    /// Widens (zero extension only) the integer to have double bit width. This is needed to get
+    /// around problems with associated type bounds (such as `Int<Othersign: DInt>`) being unstable
+    fn zero_widen(self) -> Self::D;
+    /// Widens the integer to have double bit width and shifts the integer into the higher bits
+    fn widen_hi(self) -> Self::D;
+    /// Widening multiplication with zero widening. This cannot overflow.
+    fn zero_widen_mul(self, rhs: Self) -> Self::D;
+    /// Widening multiplication. This cannot overflow.
+    fn widen_mul(self, rhs: Self) -> Self::D;
+}
+
+macro_rules! impl_d_int {
+    ($($X:ident $D:ident),*) => {
+        $(
+            impl DInt for $D {
+                type H = $X;
+
+                fn lo(self) -> Self::H {
+                    self as $X
+                }
+                fn hi(self) -> Self::H {
+                    (self >> <$X as MinInt>::BITS) as $X
+                }
+            }
+        )*
+    };
+}
+
+macro_rules! impl_h_int {
+    ($($H:ident $uH:ident $X:ident),*) => {
+        $(
+            impl HInt for $H {
+                type D = $X;
+
+                fn widen(self) -> Self::D {
+                    self as $X
+                }
+                fn zero_widen(self) -> Self::D {
+                    (self as $uH) as $X
+                }
+                fn zero_widen_mul(self, rhs: Self) -> Self::D {
+                    self.zero_widen().wrapping_mul(rhs.zero_widen())
+                }
+                fn widen_mul(self, rhs: Self) -> Self::D {
+                    self.widen().wrapping_mul(rhs.widen())
+                }
+                fn widen_hi(self) -> Self::D {
+                    (self as $X) << <Self as MinInt>::BITS
+                }
+            }
+        )*
+    };
+}
+
+impl_d_int!(u8 u16, u16 u32, u32 u64, u64 u128, i8 i16, i16 i32, i32 i64, i64 i128);
+impl_h_int!(
+    u8 u8 u16,
+    u16 u16 u32,
+    u32 u32 u64,
+    u64 u64 u128,
+    i8 u8 i16,
+    i16 u16 i32,
+    i32 u32 i64,
+    i64 u64 i128
+);
+
+/// Trait to express (possibly lossy) casting of integers
+#[allow(unused)]
+pub trait CastInto<T: Copy>: Copy {
+    fn cast(self) -> T;
+}
+
+#[allow(unused)]
+pub trait CastFrom<T: Copy>: Copy {
+    fn cast_from(value: T) -> Self;
+}
+
+impl<T: Copy, U: CastInto<T> + Copy> CastFrom<U> for T {
+    fn cast_from(value: U) -> Self {
+        value.cast()
+    }
+}
+
+macro_rules! cast_into {
+    ($ty:ty) => {
+        cast_into!($ty; usize, isize, u8, i8, u16, i16, u32, i32, u64, i64, u128, i128);
+    };
+    ($ty:ty; $($into:ty),*) => {$(
+        impl CastInto<$into> for $ty {
+            fn cast(self) -> $into {
+                self as $into
+            }
+        }
+    )*};
+}
+
+cast_into!(usize);
+cast_into!(isize);
+cast_into!(u8);
+cast_into!(i8);
+cast_into!(u16);
+cast_into!(i16);
+cast_into!(u32);
+cast_into!(i32);
+cast_into!(u64);
+cast_into!(i64);
+cast_into!(u128);
+cast_into!(i128);
diff --git a/src/math/support/mod.rs b/src/math/support/mod.rs
index 10532f0d1..f054df6cd 100644
--- a/src/math/support/mod.rs
+++ b/src/math/support/mod.rs
@@ -1,2 +1,7 @@
 #[macro_use]
 pub mod macros;
+mod float_traits;
+mod int_traits;
+
+pub use float_traits::Float;
+pub use int_traits::{CastFrom, CastInto, DInt, HInt, Int, MinInt};

From 09dd0c83b40ee0f39e1a2e333a3f1629affdba57 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Wed, 30 Oct 2024 13:45:53 -0500
Subject: [PATCH 017/279] Rename `canonical_name` to `base_name`

"Canonical" isn't really the right word here, update to "base".
---
 crates/libm-test/src/gen/random.rs  |  3 ++-
 crates/libm-test/src/lib.rs         |  2 +-
 crates/libm-test/src/precision.rs   | 32 ++++++++++++++---------------
 crates/libm-test/src/test_traits.rs |  8 ++++----
 src/math/support/macros.rs          |  8 ++++----
 5 files changed, 27 insertions(+), 26 deletions(-)

diff --git a/crates/libm-test/src/gen/random.rs b/crates/libm-test/src/gen/random.rs
index c73937aac..d03d1ff79 100644
--- a/crates/libm-test/src/gen/random.rs
+++ b/crates/libm-test/src/gen/random.rs
@@ -110,6 +110,7 @@ pub fn get_test_cases<RustArgs>(ctx: &CheckCtx) -> impl Iterator<Item = RustArgs
 where
     CachedInput: GenerateInput<RustArgs>,
 {
-    let inputs = if ctx.fname == "jn" || ctx.fname == "jnf" { &TEST_CASES_JN } else { &TEST_CASES };
+    let inputs =
+        if ctx.fn_name == "jn" || ctx.fn_name == "jnf" { &TEST_CASES_JN } else { &TEST_CASES };
     inputs.get_cases()
 }
diff --git a/crates/libm-test/src/lib.rs b/crates/libm-test/src/lib.rs
index 31b95e46c..6c7a3f5ec 100644
--- a/crates/libm-test/src/lib.rs
+++ b/crates/libm-test/src/lib.rs
@@ -18,7 +18,7 @@ include!(concat!(env!("OUT_DIR"), "/all_files.rs"));
 
 /// Return the unsuffixed version of a function name; e.g. `abs` and `absf` both return `abs`,
 /// `lgamma_r` and `lgammaf_r` both return `lgamma_r`.
-pub fn canonical_name(name: &str) -> &str {
+pub fn base_name(name: &str) -> &str {
     let known_mappings = &[
         ("erff", "erf"),
         ("erf", "erf"),
diff --git a/crates/libm-test/src/precision.rs b/crates/libm-test/src/precision.rs
index e2ad638c4..9ef0e818d 100644
--- a/crates/libm-test/src/precision.rs
+++ b/crates/libm-test/src/precision.rs
@@ -111,25 +111,25 @@ impl MaybeOverride<(f32,)> for SpecialCase {
         ctx: &CheckCtx,
     ) -> Option<TestResult> {
         if ctx.basis == CheckBasis::Musl {
-            if ctx.fname == "expm1f" && input.0 > 80.0 && actual.is_infinite() {
+            if ctx.fn_name == "expm1f" && input.0 > 80.0 && actual.is_infinite() {
                 // we return infinity but the number is representable
                 return XFAIL;
             }
 
-            if ctx.fname == "sinhf" && input.0.abs() > 80.0 && actual.is_nan() {
+            if ctx.fn_name == "sinhf" && input.0.abs() > 80.0 && actual.is_nan() {
                 // we return some NaN that should be real values or infinite
                 // doesn't seem to happen on x86
                 return XFAIL;
             }
         }
 
-        if ctx.fname == "acoshf" && input.0 < -1.0 {
+        if ctx.fn_name == "acoshf" && input.0 < -1.0 {
             // acoshf is undefined for x <= 1.0, but we return a random result at lower
             // values.
             return XFAIL;
         }
 
-        if ctx.fname == "lgammaf" || ctx.fname == "lgammaf_r" && input.0 < 0.0 {
+        if ctx.fn_name == "lgammaf" || ctx.fn_name == "lgammaf_r" && input.0 < 0.0 {
             // loggamma should not be defined for x < 0, yet we both return results
             return XFAIL;
         }
@@ -146,7 +146,7 @@ impl MaybeOverride<(f32,)> for SpecialCase {
         // On MPFR for lgammaf_r, we set -1 as the integer result for negative infinity but MPFR
         // sets +1
         if ctx.basis == CheckBasis::Mpfr
-            && ctx.fname == "lgammaf_r"
+            && ctx.fn_name == "lgammaf_r"
             && input.0 == f32::NEG_INFINITY
             && actual.abs() == expected.abs()
         {
@@ -166,13 +166,13 @@ impl MaybeOverride<(f64,)> for SpecialCase {
         ctx: &CheckCtx,
     ) -> Option<TestResult> {
         if ctx.basis == CheckBasis::Musl {
-            if cfg!(target_arch = "x86") && ctx.fname == "acosh" && input.0 < 1.0 {
+            if cfg!(target_arch = "x86") && ctx.fn_name == "acosh" && input.0 < 1.0 {
                 // The function is undefined, both implementations return random results
                 return SKIP;
             }
 
             if cfg!(x86_no_sse)
-                && ctx.fname == "ceil"
+                && ctx.fn_name == "ceil"
                 && input.0 < 0.0
                 && input.0 > -1.0
                 && expected == F::ZERO
@@ -183,13 +183,13 @@ impl MaybeOverride<(f64,)> for SpecialCase {
             }
         }
 
-        if ctx.fname == "acosh" && input.0 < 1.0 {
+        if ctx.fn_name == "acosh" && input.0 < 1.0 {
             // The function is undefined for the inputs, musl and our libm both return
             // random results.
             return XFAIL;
         }
 
-        if ctx.fname == "lgamma" || ctx.fname == "lgamma_r" && input.0 < 0.0 {
+        if ctx.fn_name == "lgamma" || ctx.fn_name == "lgamma_r" && input.0 < 0.0 {
             // loggamma should not be defined for x < 0, yet we both return results
             return XFAIL;
         }
@@ -206,7 +206,7 @@ impl MaybeOverride<(f64,)> for SpecialCase {
         // On MPFR for lgamma_r, we set -1 as the integer result for negative infinity but MPFR
         // sets +1
         if ctx.basis == CheckBasis::Mpfr
-            && ctx.fname == "lgamma_r"
+            && ctx.fn_name == "lgamma_r"
             && input.0 == f64::NEG_INFINITY
             && actual.abs() == expected.abs()
         {
@@ -219,7 +219,7 @@ impl MaybeOverride<(f64,)> for SpecialCase {
 
 /// Check NaN bits if the function requires it
 fn maybe_check_nan_bits<F: Float>(actual: F, expected: F, ctx: &CheckCtx) -> Option<TestResult> {
-    if !(ctx.canonical_name == "fabs" || ctx.canonical_name == "copysign") {
+    if !(ctx.base_name == "fabs" || ctx.base_name == "copysign") {
         return None;
     }
 
@@ -277,7 +277,7 @@ fn maybe_skip_binop_nan<F1: Float, F2: Float>(
 ) -> Option<TestResult> {
     match ctx.basis {
         CheckBasis::Musl => {
-            if (ctx.canonical_name == "fmax" || ctx.canonical_name == "fmin")
+            if (ctx.base_name == "fmax" || ctx.base_name == "fmin")
                 && (input.0.is_nan() || input.1.is_nan())
                 && expected.is_nan()
             {
@@ -287,7 +287,7 @@ fn maybe_skip_binop_nan<F1: Float, F2: Float>(
             }
         }
         CheckBasis::Mpfr => {
-            if ctx.canonical_name == "copysign" && input.1.is_nan() {
+            if ctx.base_name == "copysign" && input.1.is_nan() {
                 SKIP
             } else {
                 None
@@ -308,7 +308,7 @@ impl MaybeOverride<(i32, f32)> for SpecialCase {
             CheckBasis::Musl => bessel_prec_dropoff(input, ulp, ctx),
             CheckBasis::Mpfr => {
                 // We return +0.0, MPFR returns -0.0
-                if ctx.fname == "jnf"
+                if ctx.fn_name == "jnf"
                     && input.1 == f32::NEG_INFINITY
                     && actual == F::ZERO
                     && expected == F::ZERO
@@ -333,7 +333,7 @@ impl MaybeOverride<(i32, f64)> for SpecialCase {
             CheckBasis::Musl => bessel_prec_dropoff(input, ulp, ctx),
             CheckBasis::Mpfr => {
                 // We return +0.0, MPFR returns -0.0
-                if ctx.fname == "jn"
+                if ctx.fn_name == "jn"
                     && input.1 == f64::NEG_INFINITY
                     && actual == F::ZERO
                     && expected == F::ZERO
@@ -353,7 +353,7 @@ fn bessel_prec_dropoff<F: Float>(
     ulp: &mut u32,
     ctx: &CheckCtx,
 ) -> Option<TestResult> {
-    if ctx.canonical_name == "jn" {
+    if ctx.base_name == "jn" {
         if input.0 > 4000 {
             return XFAIL;
         } else if input.0 > 2000 {
diff --git a/crates/libm-test/src/test_traits.rs b/crates/libm-test/src/test_traits.rs
index deb837887..34e15e0b2 100644
--- a/crates/libm-test/src/test_traits.rs
+++ b/crates/libm-test/src/test_traits.rs
@@ -33,17 +33,17 @@ pub struct CheckCtx {
     /// Allowed ULP deviation
     pub ulp: u32,
     /// Function name.
-    pub fname: &'static str,
+    pub fn_name: &'static str,
     /// Return the unsuffixed version of the function name.
-    pub canonical_name: &'static str,
+    pub base_name: &'static str,
     /// Source of truth for tests.
     pub basis: CheckBasis,
 }
 
 impl CheckCtx {
     pub fn new(ulp: u32, fname: &'static str, basis: CheckBasis) -> Self {
-        let canonical_fname = crate::canonical_name(fname);
-        Self { ulp, fname, canonical_name: canonical_fname, basis }
+        let base_name = crate::base_name(fname);
+        Self { ulp, fn_name: fname, base_name, basis }
     }
 }
 
diff --git a/src/math/support/macros.rs b/src/math/support/macros.rs
index f85a6122e..b14bbec38 100644
--- a/src/math/support/macros.rs
+++ b/src/math/support/macros.rs
@@ -57,7 +57,7 @@ macro_rules! cfg_if {
 /// (`unstable-intrinsics`, `arch`, `force-soft-floats`), this macro handles that part.
 macro_rules! select_implementation {
     (
-        name: $fname:ident,
+        name: $fn_name:ident,
         // Configuration meta for when to use arch-specific implementation that requires hard
         // float ops
         $( use_arch: $use_arch:meta, )?
@@ -76,7 +76,7 @@ macro_rules! select_implementation {
         select_implementation! {
             @cfg $($use_arch_required)?;
             if true {
-                return  super::arch::$fname( $($arg),+ );
+                return  super::arch::$fn_name( $($arg),+ );
             }
         }
 
@@ -86,7 +86,7 @@ macro_rules! select_implementation {
             @cfg $($use_arch)?;
             // Wrap in `if true` to avoid unused warnings
             if true {
-                return  super::arch::$fname( $($arg),+ );
+                return  super::arch::$fn_name( $($arg),+ );
             }
         }
 
@@ -96,7 +96,7 @@ macro_rules! select_implementation {
         select_implementation! {
             @cfg $( $use_intrinsic )?;
             if true {
-                return  super::arch::intrinsics::$fname( $($arg),+ );
+                return  super::arch::intrinsics::$fn_name( $($arg),+ );
             }
         }
     };

From f4c65b52ebec9403cefbd981d07c4cbe9315258f Mon Sep 17 00:00:00 2001
From: WANG Rui <wangrui@loongson.cn>
Date: Thu, 31 Oct 2024 19:29:13 +0800
Subject: [PATCH 018/279] ci: add support for loongarch64-unknown-linux-gnu

---
 .github/workflows/main.yml                         |  2 ++
 ci/docker/loongarch64-unknown-linux-gnu/Dockerfile | 13 +++++++++++++
 2 files changed, 15 insertions(+)
 create mode 100644 ci/docker/loongarch64-unknown-linux-gnu/Dockerfile

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index cc0d23ffc..f0c5fe7c0 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -32,6 +32,8 @@ jobs:
           os: ubuntu-latest
         - target: i686-unknown-linux-gnu
           os: ubuntu-latest
+        - target: loongarch64-unknown-linux-gnu
+          os: ubuntu-latest
         - target: powerpc-unknown-linux-gnu
           os: ubuntu-latest
         - target: powerpc64-unknown-linux-gnu
diff --git a/ci/docker/loongarch64-unknown-linux-gnu/Dockerfile b/ci/docker/loongarch64-unknown-linux-gnu/Dockerfile
new file mode 100644
index 000000000..a9ce320e8
--- /dev/null
+++ b/ci/docker/loongarch64-unknown-linux-gnu/Dockerfile
@@ -0,0 +1,13 @@
+FROM ubuntu:24.04
+
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends \
+    gcc libc6-dev qemu-user-static ca-certificates \
+    gcc-14-loongarch64-linux-gnu libc6-dev-loong64-cross
+
+ENV CARGO_TARGET_LOONGARCH64_UNKNOWN_LINUX_GNU_LINKER=loongarch64-linux-gnu-gcc-14 \
+    CARGO_TARGET_LOONGARCH64_UNKNOWN_LINUX_GNU_RUNNER=qemu-loongarch64-static \
+    AR_loongarch64_unknown_linux_gnu=loongarch64-linux-gnu-ar \
+    CC_loongarch64_unknown_linux_gnu=loongarch64-linux-gnu-gcc-14 \
+    QEMU_LD_PREFIX=/usr/loongarch64-linux-gnu \
+    RUST_TEST_THREADS=1

From be2552e0ace0e9cf289db7d8202ffd82e4003bbd Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Mon, 21 Oct 2024 18:27:01 -0500
Subject: [PATCH 019/279] Add benchmarks against musl libm

Add a benchmark for each function that checks against `musl_math_sys`.
---
 crates/libm-test/Cargo.toml        |  13 ++++
 crates/libm-test/benches/random.rs | 119 +++++++++++++++++++++++++++++
 2 files changed, 132 insertions(+)
 create mode 100644 crates/libm-test/benches/random.rs

diff --git a/crates/libm-test/Cargo.toml b/crates/libm-test/Cargo.toml
index 72ac57232..fedf745ed 100644
--- a/crates/libm-test/Cargo.toml
+++ b/crates/libm-test/Cargo.toml
@@ -15,6 +15,12 @@ test-multiprecision = ["dep:az", "dep:rug"]
 # Build our own musl for testing and benchmarks
 build-musl = ["dep:musl-math-sys"]
 
+# Enable report generation without bringing in more dependencies by default
+benchmarking-reports = ["criterion/plotters", "criterion/html_reports"]
+
+# Run with a reduced set of benchmarks, such as for CI
+short-benchmarks = []
+
 [dependencies]
 anyhow = "1.0.90"
 az = { version = "1.2.1", optional = true }
@@ -32,3 +38,10 @@ getrandom = { version = "0.2", features = ["js"] }
 
 [build-dependencies]
 rand = { version = "0.8.5", optional = true }
+
+[dev-dependencies]
+criterion = { version = "0.5.1", default-features = false, features = ["cargo_bench_support"] }
+
+[[bench]]
+name = "random"
+harness = false
diff --git a/crates/libm-test/benches/random.rs b/crates/libm-test/benches/random.rs
new file mode 100644
index 000000000..6c9047c3c
--- /dev/null
+++ b/crates/libm-test/benches/random.rs
@@ -0,0 +1,119 @@
+use std::hint::black_box;
+use std::time::Duration;
+
+use criterion::{Criterion, criterion_main};
+use libm_test::gen::random;
+use libm_test::{CheckBasis, CheckCtx, TupleCall};
+
+/// Benchmark with this many items to get a variety
+const BENCH_ITER_ITEMS: usize = if cfg!(feature = "short-benchmarks") { 50 } else { 500 };
+
+macro_rules! musl_rand_benches {
+    (
+        fn_name: $fn_name:ident,
+        CFn: $CFn:ty,
+        CArgs: $CArgs:ty,
+        CRet: $CRet:ty,
+        RustFn: $RustFn:ty,
+        RustArgs: $RustArgs:ty,
+        RustRet: $RustRet:ty,
+        fn_extra: $skip_on_i586:expr,
+    ) => {
+        paste::paste! {
+            fn [< musl_bench_ $fn_name >](c: &mut Criterion) {
+                let fn_name = stringify!($fn_name);
+
+                let ulp = libm_test::musl_allowed_ulp(fn_name);
+                let ctx = CheckCtx::new(ulp, fn_name, CheckBasis::Musl);
+                let benchvec: Vec<_> = random::get_test_cases::<$RustArgs>(&ctx)
+                    .take(BENCH_ITER_ITEMS)
+                    .collect();
+
+                // Perform a sanity check that we are benchmarking the same thing
+                // Don't test against musl if it is not available
+                #[cfg(feature = "build-musl")]
+                for input in benchvec.iter().copied() {
+                    use anyhow::Context;
+                    use libm_test::{CheckBasis, CheckCtx, CheckOutput};
+
+                    if cfg!(x86_no_sse) && $skip_on_i586 {
+                        break;
+                    }
+
+                    let musl_res = input.call(musl_math_sys::$fn_name as $CFn);
+                    let crate_res = input.call(libm::$fn_name as $RustFn);
+
+                    let ctx = CheckCtx::new(ulp, fn_name, CheckBasis::Musl);
+                    crate_res.validate(musl_res, input, &ctx).context(fn_name).unwrap();
+                }
+
+                /* Function pointers are black boxed to avoid inlining in the benchmark loop */
+
+                let mut group = c.benchmark_group(fn_name);
+                group.bench_function("crate", |b| b.iter(|| {
+                    let f = black_box(libm::$fn_name as $RustFn);
+                    for input in benchvec.iter().copied() {
+                        input.call(f);
+                    }
+                }));
+
+                // Don't test against musl if it is not available
+                #[cfg(feature = "build-musl")]
+                group.bench_function("musl", |b| b.iter(|| {
+                    let f = black_box(musl_math_sys::$fn_name as $CFn);
+                    for input in benchvec.iter().copied() {
+                        input.call(f);
+                    }
+                }));
+            }
+        }
+    };
+}
+
+libm_macros::for_each_function! {
+    callback: musl_rand_benches,
+    skip: [],
+    fn_extra: match MACRO_FN_NAME {
+        // FIXME(correctness): wrong result on i586
+        exp10 | exp10f | exp2 | exp2f => true,
+        _ => false
+    }
+}
+
+macro_rules! run_callback {
+    (
+        fn_name: $fn_name:ident,
+        CFn: $_CFn:ty,
+        CArgs: $_CArgs:ty,
+        CRet: $_CRet:ty,
+        RustFn: $_RustFn:ty,
+        RustArgs: $_RustArgs:ty,
+        RustRet: $_RustRet:ty,
+        extra: [$criterion:ident],
+    ) => {
+        paste::paste! {
+            [< musl_bench_ $fn_name >](&mut $criterion)
+        }
+    };
+}
+
+pub fn musl_random() {
+    let mut criterion = Criterion::default();
+
+    // For CI, run a short 0.5s warmup and 1.0s tests. This makes benchmarks complete in
+    // about the same time as other tests.
+    if cfg!(feature = "short-benchmarks") {
+        criterion = criterion
+            .warm_up_time(Duration::from_millis(500))
+            .measurement_time(Duration::from_millis(1000));
+    }
+
+    criterion = criterion.configure_from_args();
+
+    libm_macros::for_each_function! {
+        callback: run_callback,
+        extra: [criterion],
+    };
+}
+
+criterion_main!(musl_random);

From b2a0b82cf9cf35e466551bc23775342968e4dc2e Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Mon, 21 Oct 2024 18:27:58 -0500
Subject: [PATCH 020/279] Remove `libm-bench`

This has been superseded by the benchmarks in `libm-test`.
---
 Cargo.toml                         |   1 -
 crates/libm-bench/Cargo.toml       |  16 ----
 crates/libm-bench/benches/bench.rs | 116 -----------------------------
 3 files changed, 133 deletions(-)
 delete mode 100644 crates/libm-bench/Cargo.toml
 delete mode 100644 crates/libm-bench/benches/bench.rs

diff --git a/Cargo.toml b/Cargo.toml
index 2e74012ea..178627766 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -38,7 +38,6 @@ force-soft-floats = []
 resolver = "2"
 members = [
   "crates/compiler-builtins-smoke-test",
-  "crates/libm-bench",
   "crates/libm-macros",
   "crates/libm-test",
   "crates/musl-math-sys",
diff --git a/crates/libm-bench/Cargo.toml b/crates/libm-bench/Cargo.toml
deleted file mode 100644
index ee8c58200..000000000
--- a/crates/libm-bench/Cargo.toml
+++ /dev/null
@@ -1,16 +0,0 @@
-[package]
-name = "libm-bench"
-version = "0.1.0"
-authors = ["Gonzalo Brito Gadeschi <gonzalobg88@gmail.com>"]
-edition = "2021"
-license = "MIT OR Apache-2.0"
-publish = false
-
-[dependencies]
-libm = { path = "../..", default-features = false }
-rand = "0.8.5"
-paste = "1.0.15"
-
-[features]
-default = []
-unstable = [ "libm/unstable" ]
diff --git a/crates/libm-bench/benches/bench.rs b/crates/libm-bench/benches/bench.rs
deleted file mode 100644
index ca999b90f..000000000
--- a/crates/libm-bench/benches/bench.rs
+++ /dev/null
@@ -1,116 +0,0 @@
-#![feature(test)]
-extern crate test;
-
-use rand::Rng;
-use test::Bencher;
-
-macro_rules! unary {
-  ($($func:ident),*) => ($(
-      paste::item! {
-        #[bench]
-        pub fn [<$func>](bh: &mut Bencher) {
-            let mut rng = rand::thread_rng();
-            let x = rng.gen::<f64>();
-            bh.iter(|| test::black_box(libm::[<$func>](x)))
-        }
-        #[bench]
-        pub fn [<$func f>](bh: &mut Bencher) {
-            let mut rng = rand::thread_rng();
-            let x = rng.gen::<f32>();
-            bh.iter(|| test::black_box(libm::[<$func f>](x)))
-        }
-    }
-  )*);
-}
-macro_rules! binary {
-  ($($func:ident),*) => ($(
-      paste::item! {
-        #[bench]
-        pub fn [<$func>](bh: &mut Bencher) {
-            let mut rng = rand::thread_rng();
-            let x = rng.gen::<f64>();
-            let y = rng.gen::<f64>();
-            bh.iter(|| test::black_box(libm::[<$func>](x, y)))
-        }
-        #[bench]
-        pub fn [<$func f>](bh: &mut Bencher) {
-            let mut rng = rand::thread_rng();
-            let x = rng.gen::<f32>();
-            let y = rng.gen::<f32>();
-            bh.iter(|| test::black_box(libm::[<$func f>](x, y)))
-        }
-    }
-  )*);
-  ($($func:ident);*) => ($(
-      paste::item! {
-        #[bench]
-        pub fn [<$func>](bh: &mut Bencher) {
-            let mut rng = rand::thread_rng();
-            let x = rng.gen::<f64>();
-            let n = rng.gen::<i32>();
-            bh.iter(|| test::black_box(libm::[<$func>](x, n)))
-        }
-        #[bench]
-        pub fn [<$func f>](bh: &mut Bencher) {
-            let mut rng = rand::thread_rng();
-            let x = rng.gen::<f32>();
-            let n = rng.gen::<i32>();
-            bh.iter(|| test::black_box(libm::[<$func f>](x, n)))
-        }
-    }
-  )*);
-}
-macro_rules! trinary {
-  ($($func:ident),*) => ($(
-      paste::item! {
-        #[bench]
-        pub fn [<$func>](bh: &mut Bencher) {
-            let mut rng = rand::thread_rng();
-            let x = rng.gen::<f64>();
-            let y = rng.gen::<f64>();
-            let z = rng.gen::<f64>();
-            bh.iter(|| test::black_box(libm::[<$func>](x, y, z)))
-        }
-        #[bench]
-        pub fn [<$func f>](bh: &mut Bencher) {
-            let mut rng = rand::thread_rng();
-            let x = rng.gen::<f32>();
-            let y = rng.gen::<f32>();
-            let z = rng.gen::<f32>();
-            bh.iter(|| test::black_box(libm::[<$func f>](x, y, z)))
-        }
-    }
-  )*);
-}
-macro_rules! bessel {
-  ($($func:ident),*) => ($(
-      paste::item! {
-        #[bench]
-        pub fn [<$func>](bh: &mut Bencher) {
-            let mut rng = rand::thread_rng();
-            let mut n = rng.gen::<i32>();
-            n &= 0xffff;
-            let x = rng.gen::<f64>();
-            bh.iter(|| test::black_box(libm::[<$func>](n, x)))
-        }
-        #[bench]
-        pub fn [<$func f>](bh: &mut Bencher) {
-            let mut rng = rand::thread_rng();
-            let mut n = rng.gen::<i32>();
-            n &= 0xffff;
-            let x = rng.gen::<f32>();
-            bh.iter(|| test::black_box(libm::[<$func f>](n, x)))
-        }
-    }
-  )*);
-}
-
-unary!(
-    acos, acosh, asin, atan, cbrt, ceil, cos, cosh, erf, exp, exp2, exp10, expm1, fabs, floor, j0,
-    j1, lgamma, log, log1p, log2, log10, rint, round, sin, sinh, sqrt, tan, tanh, tgamma, trunc,
-    y0, y1
-);
-binary!(atan2, copysign, fdim, fmax, fmin, fmod, hypot, pow);
-trinary!(fma);
-bessel!(jn, yn);
-binary!(ldexp; scalbn);

From f951c183983ec5184fd337c753bc4fb87ee4743b Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Mon, 21 Oct 2024 18:35:11 -0500
Subject: [PATCH 021/279] Check benchmarks in CI

---
 .github/workflows/main.yml | 2 +-
 ci/run.sh                  | 4 ++++
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index f0c5fe7c0..bfd86497b 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -125,7 +125,7 @@ jobs:
     - uses: Swatinem/rust-cache@v2
     - name: Download musl source
       run: ./ci/download-musl.sh
-    - run: cargo bench --all
+    - run: cargo bench --all --features libm-test/short-benchmarks,libm-test/build-musl
 
   msrv:
     name: Check MSRV
diff --git a/ci/run.sh b/ci/run.sh
index 9f642326b..a211bc98c 100755
--- a/ci/run.sh
+++ b/ci/run.sh
@@ -83,4 +83,8 @@ else
     # unstable with a feature
     $cmd --features "unstable-intrinsics"
     $cmd --release --features "unstable-intrinsics"
+
+    # Make sure benchmarks have correct results
+    $cmd --benches
+    $cmd --benches --release
 fi

From 830ddc557e4d2246671a86cdbe4fdc43fca72ba0 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Thu, 31 Oct 2024 22:47:11 -0500
Subject: [PATCH 022/279] Change prefixes used by the `Float` trait

Change `EXPONENT_` to `EXP_` and `SIGNIFICAND_` to `SIG_`. These are
pretty unambiguous, and just makes for less to type once these get used.
---
 crates/libm-test/src/mpfloat.rs  |  2 +-
 src/math/support/float_traits.rs | 45 ++++++++++++++++++--------------
 2 files changed, 26 insertions(+), 21 deletions(-)

diff --git a/crates/libm-test/src/mpfloat.rs b/crates/libm-test/src/mpfloat.rs
index 44962d116..2e6fdae7f 100644
--- a/crates/libm-test/src/mpfloat.rs
+++ b/crates/libm-test/src/mpfloat.rs
@@ -15,7 +15,7 @@ use crate::Float;
 
 /// Create a multiple-precision float with the correct number of bits for a concrete float type.
 fn new_mpfloat<F: Float>() -> MpFloat {
-    MpFloat::new(F::SIGNIFICAND_BITS + 1)
+    MpFloat::new(F::SIG_BITS + 1)
 }
 
 /// Set subnormal emulation and convert to a concrete float type.
diff --git a/src/math/support/float_traits.rs b/src/math/support/float_traits.rs
index 4cf5d7c61..f90e99d52 100644
--- a/src/math/support/float_traits.rs
+++ b/src/math/support/float_traits.rs
@@ -27,34 +27,37 @@ pub trait Float:
 
     const ZERO: Self;
     const ONE: Self;
+    const INFINITY: Self;
+    const NEG_INFINITY: Self;
+    const NAN: Self;
 
     /// The bitwidth of the float type
     const BITS: u32;
 
     /// The bitwidth of the significand
-    const SIGNIFICAND_BITS: u32;
+    const SIG_BITS: u32;
 
     /// The bitwidth of the exponent
-    const EXPONENT_BITS: u32 = Self::BITS - Self::SIGNIFICAND_BITS - 1;
+    const EXP_BITS: u32 = Self::BITS - Self::SIG_BITS - 1;
 
     /// The saturated value of the exponent (infinite representation), in the rightmost postiion.
-    const EXPONENT_MAX: u32 = (1 << Self::EXPONENT_BITS) - 1;
+    const EXP_MAX: u32 = (1 << Self::EXP_BITS) - 1;
 
     /// The exponent bias value
-    const EXPONENT_BIAS: u32 = Self::EXPONENT_MAX >> 1;
+    const EXP_BIAS: u32 = Self::EXP_MAX >> 1;
 
     /// A mask for the sign bit
     const SIGN_MASK: Self::Int;
 
     /// A mask for the significand
-    const SIGNIFICAND_MASK: Self::Int;
+    const SIG_MASK: Self::Int;
+
+    /// A mask for the exponent
+    const EXP_MASK: Self::Int;
 
     /// The implicit bit of the float format
     const IMPLICIT_BIT: Self::Int;
 
-    /// A mask for the exponent
-    const EXPONENT_MASK: Self::Int;
-
     /// Returns `self` transmuted to `Self::Int`
     fn to_bits(self) -> Self::Int;
 
@@ -105,14 +108,17 @@ macro_rules! float_impl {
 
             const ZERO: Self = 0.0;
             const ONE: Self = 1.0;
+            const INFINITY: Self = Self::INFINITY;
+            const NEG_INFINITY: Self = Self::NEG_INFINITY;
+            const NAN: Self = Self::NAN;
 
             const BITS: u32 = $bits;
-            const SIGNIFICAND_BITS: u32 = $significand_bits;
+            const SIG_BITS: u32 = $significand_bits;
 
             const SIGN_MASK: Self::Int = 1 << (Self::BITS - 1);
-            const SIGNIFICAND_MASK: Self::Int = (1 << Self::SIGNIFICAND_BITS) - 1;
-            const IMPLICIT_BIT: Self::Int = 1 << Self::SIGNIFICAND_BITS;
-            const EXPONENT_MASK: Self::Int = !(Self::SIGN_MASK | Self::SIGNIFICAND_MASK);
+            const SIG_MASK: Self::Int = (1 << Self::SIG_BITS) - 1;
+            const EXP_MASK: Self::Int = !(Self::SIGN_MASK | Self::SIG_MASK);
+            const IMPLICIT_BIT: Self::Int = 1 << Self::SIG_BITS;
 
             fn to_bits(self) -> Self::Int {
                 self.to_bits()
@@ -126,8 +132,7 @@ macro_rules! float_impl {
                     // necessary builtin (__unordtf2) to test whether `f128` is NaN.
                     // FIXME(f16_f128): Remove once the nightly toolchain has the __unordtf2 builtin
                     // x is NaN if all the bits of the exponent are set and the significand is non-0
-                    x.to_bits() & $ty::EXPONENT_MASK == $ty::EXPONENT_MASK
-                        && x.to_bits() & $ty::SIGNIFICAND_MASK != 0
+                    x.to_bits() & $ty::EXP_MASK == $ty::EXP_MASK && x.to_bits() & $ty::SIG_MASK != 0
                 }
                 if is_nan(self) && is_nan(rhs) { true } else { self.to_bits() == rhs.to_bits() }
             }
@@ -135,10 +140,10 @@ macro_rules! float_impl {
                 self.is_sign_negative()
             }
             fn exp(self) -> Self::ExpInt {
-                ((self.to_bits() & Self::EXPONENT_MASK) >> Self::SIGNIFICAND_BITS) as Self::ExpInt
+                ((self.to_bits() & Self::EXP_MASK) >> Self::SIG_BITS) as Self::ExpInt
             }
             fn frac(self) -> Self::Int {
-                self.to_bits() & Self::SIGNIFICAND_MASK
+                self.to_bits() & Self::SIG_MASK
             }
             fn imp_frac(self) -> Self::Int {
                 self.frac() | Self::IMPLICIT_BIT
@@ -149,16 +154,16 @@ macro_rules! float_impl {
             fn from_parts(negative: bool, exponent: Self::Int, significand: Self::Int) -> Self {
                 Self::from_bits(
                     ((negative as Self::Int) << (Self::BITS - 1))
-                        | ((exponent << Self::SIGNIFICAND_BITS) & Self::EXPONENT_MASK)
-                        | (significand & Self::SIGNIFICAND_MASK),
+                        | ((exponent << Self::SIG_BITS) & Self::EXP_MASK)
+                        | (significand & Self::SIG_MASK),
                 )
             }
             fn normalize(significand: Self::Int) -> (i32, Self::Int) {
-                let shift = significand.leading_zeros().wrapping_sub(Self::EXPONENT_BITS);
+                let shift = significand.leading_zeros().wrapping_sub(Self::EXP_BITS);
                 (1i32.wrapping_sub(shift as i32), significand << shift as Self::Int)
             }
             fn is_subnormal(self) -> bool {
-                (self.to_bits() & Self::EXPONENT_MASK) == Self::Int::ZERO
+                (self.to_bits() & Self::EXP_MASK) == Self::Int::ZERO
             }
         }
     };

From 4384fc7f9a9e03565bc4ae22f9a06fafabc7152c Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Thu, 31 Oct 2024 23:47:41 -0500
Subject: [PATCH 023/279] Update libm `Float` and `Int` with functions from the
 test traits

The test versions of `Float` and `Int` have a few more methods and
constants availablee. Update the in `libm` with everything missing from
`libm_test` so we will be able to merge these.
---
 src/math/support/float_traits.rs | 50 ++++++++++++++++++++++++++------
 src/math/support/int_traits.rs   | 43 ++++++++++++++++++++++-----
 2 files changed, 77 insertions(+), 16 deletions(-)

diff --git a/src/math/support/float_traits.rs b/src/math/support/float_traits.rs
index f90e99d52..a1d84faf2 100644
--- a/src/math/support/float_traits.rs
+++ b/src/math/support/float_traits.rs
@@ -1,4 +1,4 @@
-use core::ops;
+use core::{fmt, ops};
 
 use super::int_traits::{Int, MinInt};
 
@@ -6,7 +6,8 @@ use super::int_traits::{Int, MinInt};
 #[allow(dead_code)]
 pub trait Float:
     Copy
-    + core::fmt::Debug
+    + fmt::Debug
+    + fmt::Display
     + PartialEq
     + PartialOrd
     + ops::AddAssign
@@ -17,16 +18,17 @@ pub trait Float:
     + ops::Rem<Output = Self>
 {
     /// A uint of the same width as the float
-    type Int: Int<OtherSign = Self::SignedInt, UnsignedInt = Self::Int>;
+    type Int: Int<OtherSign = Self::SignedInt, Unsigned = Self::Int>;
 
     /// A int of the same width as the float
-    type SignedInt: Int + MinInt<OtherSign = Self::Int, UnsignedInt = Self::Int>;
+    type SignedInt: Int + MinInt<OtherSign = Self::Int, Unsigned = Self::Int>;
 
     /// An int capable of containing the exponent bits plus a sign bit. This is signed.
     type ExpInt: Int;
 
     const ZERO: Self;
     const ONE: Self;
+    const NEG_ONE: Self;
     const INFINITY: Self;
     const NEG_INFINITY: Self;
     const NAN: Self;
@@ -69,9 +71,18 @@ pub trait Float:
     /// compared.
     fn eq_repr(self, rhs: Self) -> bool;
 
-    /// Returns true if the sign is negative
+    /// Returns true if the value is NaN.
+    fn is_nan(self) -> bool;
+
+    /// Returns true if the value is +inf or -inf.
+    fn is_infinite(self) -> bool;
+
+    /// Returns true if the sign is negative.
     fn is_sign_negative(self) -> bool;
 
+    /// Returns if `self` is subnormal
+    fn is_subnormal(self) -> bool;
+
     /// Returns the exponent, not adjusting for bias.
     fn exp(self) -> Self::ExpInt;
 
@@ -95,8 +106,11 @@ pub trait Float:
     /// Returns (normalized exponent, normalized significand)
     fn normalize(significand: Self::Int) -> (i32, Self::Int);
 
-    /// Returns if `self` is subnormal
-    fn is_subnormal(self) -> bool;
+    /// Returns a number composed of the magnitude of self and the sign of sign.
+    fn copysign(self, other: Self) -> Self;
+
+    /// Returns a number that represents the sign of self.
+    fn signum(self) -> Self;
 }
 
 macro_rules! float_impl {
@@ -108,6 +122,7 @@ macro_rules! float_impl {
 
             const ZERO: Self = 0.0;
             const ONE: Self = 1.0;
+            const NEG_ONE: Self = -1.0;
             const INFINITY: Self = Self::INFINITY;
             const NEG_INFINITY: Self = Self::NEG_INFINITY;
             const NAN: Self = Self::NAN;
@@ -136,9 +151,18 @@ macro_rules! float_impl {
                 }
                 if is_nan(self) && is_nan(rhs) { true } else { self.to_bits() == rhs.to_bits() }
             }
+            fn is_nan(self) -> bool {
+                self.is_nan()
+            }
+            fn is_infinite(self) -> bool {
+                self.is_infinite()
+            }
             fn is_sign_negative(self) -> bool {
                 self.is_sign_negative()
             }
+            fn is_subnormal(self) -> bool {
+                (self.to_bits() & Self::EXP_MASK) == Self::Int::ZERO
+            }
             fn exp(self) -> Self::ExpInt {
                 ((self.to_bits() & Self::EXP_MASK) >> Self::SIG_BITS) as Self::ExpInt
             }
@@ -162,8 +186,16 @@ macro_rules! float_impl {
                 let shift = significand.leading_zeros().wrapping_sub(Self::EXP_BITS);
                 (1i32.wrapping_sub(shift as i32), significand << shift as Self::Int)
             }
-            fn is_subnormal(self) -> bool {
-                (self.to_bits() & Self::EXP_MASK) == Self::Int::ZERO
+            fn copysign(self, other: Self) -> Self {
+                let mut x = self.to_bits();
+                let y = other.to_bits();
+                x &= !Self::SIGN_MASK;
+                x |= y & Self::SIGN_MASK;
+                Self::from_bits(x)
+            }
+
+            fn signum(self) -> Self {
+                if self.is_nan() { self } else { Self::ONE.copysign(self) }
             }
         }
     };
diff --git a/src/math/support/int_traits.rs b/src/math/support/int_traits.rs
index bdf3afd48..c5feef8d7 100644
--- a/src/math/support/int_traits.rs
+++ b/src/math/support/int_traits.rs
@@ -12,7 +12,7 @@ pub trait MinInt:
     /// Type with the same width but other signedness
     type OtherSign: MinInt;
     /// Unsigned version of Self
-    type UnsignedInt: MinInt;
+    type Unsigned: MinInt;
 
     /// If `Self` is a signed integer
     const SIGNED: bool;
@@ -30,6 +30,7 @@ pub trait MinInt:
 #[allow(dead_code)]
 pub trait Int:
     MinInt
+    + fmt::Display
     + PartialEq
     + PartialOrd
     + ops::AddAssign
@@ -47,8 +48,10 @@ pub trait Int:
     + ops::BitXor<Output = Self>
     + ops::BitAnd<Output = Self>
 {
-    fn unsigned(self) -> Self::UnsignedInt;
-    fn from_unsigned(unsigned: Self::UnsignedInt) -> Self;
+    fn signed(self) -> <Self::Unsigned as MinInt>::OtherSign;
+    fn unsigned(self) -> Self::Unsigned;
+    fn from_unsigned(unsigned: Self::Unsigned) -> Self;
+    fn abs(self) -> Self;
 
     fn from_bool(b: bool) -> Self;
 
@@ -56,10 +59,12 @@ pub trait Int:
     fn logical_shr(self, other: u32) -> Self;
 
     /// Absolute difference between two integers.
-    fn abs_diff(self, other: Self) -> Self::UnsignedInt;
+    fn abs_diff(self, other: Self) -> Self::Unsigned;
 
     // copied from primitive integers, but put in a trait
     fn is_zero(self) -> bool;
+    fn checked_add(self, other: Self) -> Option<Self>;
+    fn checked_sub(self, other: Self) -> Option<Self>;
     fn wrapping_neg(self) -> Self;
     fn wrapping_add(self, other: Self) -> Self;
     fn wrapping_mul(self, other: Self) -> Self;
@@ -86,6 +91,14 @@ macro_rules! int_impl_common {
             self == Self::ZERO
         }
 
+        fn checked_add(self, other: Self) -> Option<Self> {
+            self.checked_add(other)
+        }
+
+        fn checked_sub(self, other: Self) -> Option<Self> {
+            self.checked_sub(other)
+        }
+
         fn wrapping_neg(self) -> Self {
             <Self>::wrapping_neg(self)
         }
@@ -132,7 +145,7 @@ macro_rules! int_impl {
     ($ity:ty, $uty:ty) => {
         impl MinInt for $uty {
             type OtherSign = $ity;
-            type UnsignedInt = $uty;
+            type Unsigned = $uty;
 
             const BITS: u32 = <Self as MinInt>::ZERO.count_zeros();
             const SIGNED: bool = Self::MIN != Self::ZERO;
@@ -144,10 +157,18 @@ macro_rules! int_impl {
         }
 
         impl Int for $uty {
-            fn unsigned(self) -> $uty {
+            fn signed(self) -> $ity {
+                self as $ity
+            }
+
+            fn unsigned(self) -> Self {
                 self
             }
 
+            fn abs(self) -> Self {
+                unimplemented!()
+            }
+
             // It makes writing macros easier if this is implemented for both signed and unsigned
             #[allow(clippy::wrong_self_convention)]
             fn from_unsigned(me: $uty) -> Self {
@@ -163,7 +184,7 @@ macro_rules! int_impl {
 
         impl MinInt for $ity {
             type OtherSign = $uty;
-            type UnsignedInt = $uty;
+            type Unsigned = $uty;
 
             const BITS: u32 = <Self as MinInt>::ZERO.count_zeros();
             const SIGNED: bool = Self::MIN != Self::ZERO;
@@ -175,10 +196,18 @@ macro_rules! int_impl {
         }
 
         impl Int for $ity {
+            fn signed(self) -> Self {
+                self
+            }
+
             fn unsigned(self) -> $uty {
                 self as $uty
             }
 
+            fn abs(self) -> Self {
+                self.abs()
+            }
+
             fn from_unsigned(me: $uty) -> Self {
                 me as $ity
             }

From 80a0160fbb31f59de1b857cede75817348a8c4d7 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Thu, 31 Oct 2024 23:52:27 -0500
Subject: [PATCH 024/279] Expose the `support` module publicly with a test
 feature

---
 Cargo.toml                                     | 3 +++
 crates/compiler-builtins-smoke-test/Cargo.toml | 1 +
 src/math/mod.rs                                | 9 ++++++++-
 3 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/Cargo.toml b/Cargo.toml
index 178627766..98a60bfe3 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -26,6 +26,9 @@ unstable = ["unstable-intrinsics"]
 # Enable calls to functions in `core::intrinsics`
 unstable-intrinsics = []
 
+# Make some internal things public for testing.
+unstable-test-support = []
+
 # Used to prevent using any intrinsics or arch-specific code.
 #
 # HACK: this is a negative feature which is generally a bad idea in Cargo, but
diff --git a/crates/compiler-builtins-smoke-test/Cargo.toml b/crates/compiler-builtins-smoke-test/Cargo.toml
index 7118bfe06..e75c4f42b 100644
--- a/crates/compiler-builtins-smoke-test/Cargo.toml
+++ b/crates/compiler-builtins-smoke-test/Cargo.toml
@@ -13,6 +13,7 @@ bench = false
 # Duplicated from libm's Cargo.toml
 unstable = []
 unstable-intrinsics = []
+unstable-test-support = []
 checked = []
 force-soft-floats = []
 
diff --git a/src/math/mod.rs b/src/math/mod.rs
index 2cd77f132..afebdf586 100644
--- a/src/math/mod.rs
+++ b/src/math/mod.rs
@@ -74,9 +74,16 @@ macro_rules! div {
     };
 }
 
-// Private modules
+// `support` may be public for testing
+#[macro_use]
+#[cfg(feature = "unstable-test-support")]
+pub mod support;
+
 #[macro_use]
+#[cfg(not(feature = "unstable-test-support"))]
 mod support;
+
+// Private modules
 mod arch;
 mod expo2;
 mod fenv;

From 338d953aea588bece37631b56f5eee8a9fddd139 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Thu, 31 Oct 2024 23:53:03 -0500
Subject: [PATCH 025/279] Replace `libm_test::{Float, Int}` with `libm::{Float,
 Int}`

This involves moving some things from full generic implementations (e.g.
`impl<F: Float> SomeTrait for F { /* ... */ }` to generic functions and
macros to implement traits that call them, due to orphan rule violations
after `Float` became a not-in-crate trait.

`Hex` was moved to `test_traits` so we can eliminate `num_traits`.
---
 crates/libm-test/Cargo.toml         |   2 +-
 crates/libm-test/src/lib.rs         |   5 +-
 crates/libm-test/src/num_traits.rs  | 214 -----------------------
 crates/libm-test/src/test_traits.rs | 261 +++++++++++++++++++++-------
 4 files changed, 199 insertions(+), 283 deletions(-)
 delete mode 100644 crates/libm-test/src/num_traits.rs

diff --git a/crates/libm-test/Cargo.toml b/crates/libm-test/Cargo.toml
index fedf745ed..3587b44e6 100644
--- a/crates/libm-test/Cargo.toml
+++ b/crates/libm-test/Cargo.toml
@@ -24,7 +24,7 @@ short-benchmarks = []
 [dependencies]
 anyhow = "1.0.90"
 az = { version = "1.2.1", optional = true }
-libm = { path = "../.." }
+libm = { path = "../..", features = ["unstable-test-support"] }
 libm-macros = { path = "../libm-macros" }
 musl-math-sys = { path = "../musl-math-sys", optional = true }
 paste = "1.0.15"
diff --git a/crates/libm-test/src/lib.rs b/crates/libm-test/src/lib.rs
index 6c7a3f5ec..56a872779 100644
--- a/crates/libm-test/src/lib.rs
+++ b/crates/libm-test/src/lib.rs
@@ -1,13 +1,12 @@
 pub mod gen;
 #[cfg(feature = "test-multiprecision")]
 pub mod mpfloat;
-mod num_traits;
 mod precision;
 mod test_traits;
 
-pub use num_traits::{Float, Hex, Int};
+pub use libm::support::{Float, Int};
 pub use precision::{MaybeOverride, SpecialCase, multiprec_allowed_ulp, musl_allowed_ulp};
-pub use test_traits::{CheckBasis, CheckCtx, CheckOutput, GenerateInput, TupleCall};
+pub use test_traits::{CheckBasis, CheckCtx, CheckOutput, GenerateInput, Hex, TupleCall};
 
 /// Result type for tests is usually from `anyhow`. Most times there is no success value to
 /// propagate.
diff --git a/crates/libm-test/src/num_traits.rs b/crates/libm-test/src/num_traits.rs
deleted file mode 100644
index e16f4e4dc..000000000
--- a/crates/libm-test/src/num_traits.rs
+++ /dev/null
@@ -1,214 +0,0 @@
-use std::fmt;
-
-use crate::{MaybeOverride, SpecialCase, TestResult};
-
-/// Common types and methods for floating point numbers.
-pub trait Float: Copy + fmt::Display + fmt::Debug + PartialEq<Self> {
-    type Int: Int<OtherSign = Self::SignedInt, Unsigned = Self::Int>;
-    type SignedInt: Int + Int<OtherSign = Self::Int, Unsigned = Self::Int>;
-
-    const ZERO: Self;
-    const ONE: Self;
-
-    /// The bitwidth of the float type
-    const BITS: u32;
-
-    /// The bitwidth of the significand
-    const SIGNIFICAND_BITS: u32;
-
-    /// The bitwidth of the exponent
-    const EXPONENT_BITS: u32 = Self::BITS - Self::SIGNIFICAND_BITS - 1;
-
-    fn is_nan(self) -> bool;
-    fn is_infinite(self) -> bool;
-    fn to_bits(self) -> Self::Int;
-    fn from_bits(bits: Self::Int) -> Self;
-    fn signum(self) -> Self;
-}
-
-macro_rules! impl_float {
-    ($($fty:ty, $ui:ty, $si:ty, $significand_bits:expr;)+) => {
-        $(
-            impl Float for $fty {
-                type Int = $ui;
-                type SignedInt = $si;
-
-                const ZERO: Self = 0.0;
-                const ONE: Self = 1.0;
-
-                const BITS: u32 = <$ui>::BITS;
-                const SIGNIFICAND_BITS: u32 = $significand_bits;
-
-                fn is_nan(self) -> bool {
-                    self.is_nan()
-                }
-                fn is_infinite(self) -> bool {
-                    self.is_infinite()
-                }
-                fn to_bits(self) -> Self::Int {
-                    self.to_bits()
-                }
-                fn from_bits(bits: Self::Int) -> Self {
-                    Self::from_bits(bits)
-                }
-                fn signum(self) -> Self {
-                    self.signum()
-                }
-            }
-
-            impl Hex for $fty {
-                fn hex(self) -> String {
-                    self.to_bits().hex()
-                }
-            }
-        )+
-    }
-}
-
-impl_float!(
-    f32, u32, i32, 23;
-    f64, u64, i64, 52;
-);
-
-/// Common types and methods for integers.
-pub trait Int: Copy + fmt::Display + fmt::Debug + PartialEq<Self> {
-    type OtherSign: Int;
-    type Unsigned: Int;
-    const BITS: u32;
-    const SIGNED: bool;
-
-    fn signed(self) -> <Self::Unsigned as Int>::OtherSign;
-    fn unsigned(self) -> Self::Unsigned;
-    fn checked_sub(self, other: Self) -> Option<Self>;
-    fn abs(self) -> Self;
-}
-
-macro_rules! impl_int {
-    ($($ui:ty, $si:ty ;)+) => {
-        $(
-            impl Int for $ui {
-                type OtherSign = $si;
-                type Unsigned = Self;
-                const BITS: u32 = <$ui>::BITS;
-                const SIGNED: bool = false;
-                fn signed(self) -> Self::OtherSign {
-                    self as $si
-                }
-                fn unsigned(self) -> Self {
-                    self
-                }
-                fn checked_sub(self, other: Self) -> Option<Self> {
-                    self.checked_sub(other)
-                }
-                fn abs(self) -> Self {
-                    unimplemented!()
-                }
-            }
-
-            impl Int for $si {
-                type OtherSign = $ui;
-                type Unsigned = $ui;
-                const BITS: u32 = <$ui>::BITS;
-                const SIGNED: bool = true;
-                fn signed(self) -> Self {
-                    self
-                }
-                fn unsigned(self) -> $ui {
-                    self as $ui
-                }
-                fn checked_sub(self, other: Self) -> Option<Self> {
-                    self.checked_sub(other)
-                }
-                fn abs(self) -> Self {
-                    self.abs()
-                }
-            }
-
-            impl_int!(@for_both $si);
-            impl_int!(@for_both $ui);
-
-        )+
-    };
-
-    (@for_both $ty:ty) => {
-        impl Hex for $ty {
-            fn hex(self) -> String {
-                format!("{self:#0width$x}", width = ((Self::BITS / 4) + 2) as usize)
-            }
-        }
-
-        impl<Input> $crate::CheckOutput<Input> for $ty
-        where
-            Input: Hex + fmt::Debug,
-            SpecialCase: MaybeOverride<Input>,
-        {
-            fn validate<'a>(
-                self,
-                expected: Self,
-                input: Input,
-                ctx: &$crate::CheckCtx,
-            ) -> TestResult {
-                if let Some(res) = SpecialCase::check_int(input, self, expected, ctx) {
-                    return res;
-                }
-
-                anyhow::ensure!(
-                    self == expected,
-                    "\
-                    \n    input:    {input:?} {ibits}\
-                    \n    expected: {expected:<22?} {expbits}\
-                    \n    actual:   {self:<22?} {actbits}\
-                    ",
-                    actbits = self.hex(),
-                    expbits = expected.hex(),
-                    ibits = input.hex(),
-                );
-
-                Ok(())
-            }
-        }
-    }
-}
-
-impl_int!(
-    u32, i32;
-    u64, i64;
-);
-
-/// A helper trait to print something as hex with the correct number of nibbles, e.g. a `u32`
-/// will always print with `0x` followed by 8 digits.
-///
-/// This is only used for printing errors so allocating is okay.
-pub trait Hex: Copy {
-    fn hex(self) -> String;
-}
-
-impl<T1> Hex for (T1,)
-where
-    T1: Hex,
-{
-    fn hex(self) -> String {
-        format!("({},)", self.0.hex())
-    }
-}
-
-impl<T1, T2> Hex for (T1, T2)
-where
-    T1: Hex,
-    T2: Hex,
-{
-    fn hex(self) -> String {
-        format!("({}, {})", self.0.hex(), self.1.hex())
-    }
-}
-
-impl<T1, T2, T3> Hex for (T1, T2, T3)
-where
-    T1: Hex,
-    T2: Hex,
-    T3: Hex,
-{
-    fn hex(self) -> String {
-        format!("({}, {}, {})", self.0.hex(), self.1.hex(), self.2.hex())
-    }
-}
diff --git a/crates/libm-test/src/test_traits.rs b/crates/libm-test/src/test_traits.rs
index 34e15e0b2..67df83fb4 100644
--- a/crates/libm-test/src/test_traits.rs
+++ b/crates/libm-test/src/test_traits.rs
@@ -11,21 +11,7 @@ use std::fmt;
 
 use anyhow::{Context, bail, ensure};
 
-use crate::{Float, Hex, Int, MaybeOverride, SpecialCase, TestResult};
-
-/// Implement this on types that can generate a sequence of tuples for test input.
-pub trait GenerateInput<TupleArgs> {
-    fn get_cases(&self) -> impl Iterator<Item = TupleArgs>;
-}
-
-/// Trait for calling a function with a tuple as arguments.
-///
-/// Implemented on the tuple with the function signature as the generic (so we can use the same
-/// tuple for multiple signatures).
-pub trait TupleCall<Func>: fmt::Debug {
-    type Output;
-    fn call(self, f: Func) -> Self::Output;
-}
+use crate::{Float, Int, MaybeOverride, SpecialCase, TestResult};
 
 /// Context passed to [`CheckOutput`].
 #[derive(Clone, Debug, PartialEq, Eq)]
@@ -56,14 +42,38 @@ pub enum CheckBasis {
     Mpfr,
 }
 
+/// Implement this on types that can generate a sequence of tuples for test input.
+pub trait GenerateInput<TupleArgs> {
+    fn get_cases(&self) -> impl Iterator<Item = TupleArgs>;
+}
+
+/// Trait for calling a function with a tuple as arguments.
+///
+/// Implemented on the tuple with the function signature as the generic (so we can use the same
+/// tuple for multiple signatures).
+pub trait TupleCall<Func>: fmt::Debug {
+    type Output;
+    fn call(self, f: Func) -> Self::Output;
+}
+
 /// A trait to implement on any output type so we can verify it in a generic way.
 pub trait CheckOutput<Input>: Sized {
     /// Validate `self` (actual) and `expected` are the same.
     ///
     /// `input` is only used here for error messages.
-    fn validate<'a>(self, expected: Self, input: Input, ctx: &CheckCtx) -> TestResult;
+    fn validate(self, expected: Self, input: Input, ctx: &CheckCtx) -> TestResult;
+}
+
+/// A helper trait to print something as hex with the correct number of nibbles, e.g. a `u32`
+/// will always print with `0x` followed by 8 digits.
+///
+/// This is only used for printing errors so allocating is okay.
+pub trait Hex: Copy {
+    fn hex(self) -> String;
 }
 
+/* implement `TupleCall` */
+
 impl<T1, R> TupleCall<fn(T1) -> R> for (T1,)
 where
     T1: fmt::Debug,
@@ -143,72 +153,193 @@ where
     }
 }
 
-// Implement for floats
-impl<F, Input> CheckOutput<Input> for F
+/* implement `Hex` */
+
+impl<T1> Hex for (T1,)
 where
-    F: Float + Hex,
+    T1: Hex,
+{
+    fn hex(self) -> String {
+        format!("({},)", self.0.hex())
+    }
+}
+
+impl<T1, T2> Hex for (T1, T2)
+where
+    T1: Hex,
+    T2: Hex,
+{
+    fn hex(self) -> String {
+        format!("({}, {})", self.0.hex(), self.1.hex())
+    }
+}
+
+impl<T1, T2, T3> Hex for (T1, T2, T3)
+where
+    T1: Hex,
+    T2: Hex,
+    T3: Hex,
+{
+    fn hex(self) -> String {
+        format!("({}, {}, {})", self.0.hex(), self.1.hex(), self.2.hex())
+    }
+}
+
+/* trait implementations for ints */
+
+macro_rules! impl_int {
+    ($($ty:ty),*) => {
+        $(
+            impl Hex for $ty {
+                fn hex(self) -> String {
+                    format!("{self:#0width$x}", width = ((Self::BITS / 4) + 2) as usize)
+                }
+            }
+
+            impl<Input> $crate::CheckOutput<Input> for $ty
+            where
+                Input: Hex + fmt::Debug,
+                SpecialCase: MaybeOverride<Input>,
+            {
+                fn validate<'a>(
+                    self,
+                    expected: Self,
+                    input: Input,
+                    ctx: &$crate::CheckCtx,
+                ) -> TestResult {
+                    validate_int(self, expected, input, ctx)
+                }
+            }
+        )*
+    };
+}
+
+fn validate_int<'a, I, Input>(actual: I, expected: I, input: Input, ctx: &CheckCtx) -> TestResult
+where
+    I: Int + Hex,
     Input: Hex + fmt::Debug,
-    u32: TryFrom<F::SignedInt, Error: fmt::Debug>,
     SpecialCase: MaybeOverride<Input>,
 {
-    fn validate<'a>(self, expected: Self, input: Input, ctx: &CheckCtx) -> TestResult {
-        // Create a wrapper function so we only need to `.with_context` once.
-        let inner = || -> TestResult {
-            let mut allowed_ulp = ctx.ulp;
-
-            // If the tested function requires a nonstandard test, run it here.
-            if let Some(res) =
-                SpecialCase::check_float(input, self, expected, &mut allowed_ulp, ctx)
-            {
-                return res;
+    if let Some(res) = SpecialCase::check_int(input, actual, expected, ctx) {
+        return res;
+    }
+
+    anyhow::ensure!(
+        actual == expected,
+        "\
+        \n    input:    {input:?} {ibits}\
+        \n    expected: {expected:<22?} {expbits}\
+        \n    actual:   {actual:<22?} {actbits}\
+        ",
+        actbits = actual.hex(),
+        expbits = expected.hex(),
+        ibits = input.hex(),
+    );
+
+    Ok(())
+}
+
+impl_int!(u32, i32, u64, i64);
+
+/* trait implementations for floats */
+
+macro_rules! impl_float {
+    ($($ty:ty),*) => {
+        $(
+            impl Hex for $ty {
+                fn hex(self) -> String {
+                    format!(
+                        "{:#0width$x}",
+                        self.to_bits(),
+                        width = ((Self::BITS / 4) + 2) as usize
+                    )
+                }
             }
 
-            // Check when both are NaNs
-            if self.is_nan() && expected.is_nan() {
-                // By default, NaNs have nothing special to check.
-                return Ok(());
-            } else if self.is_nan() || expected.is_nan() {
-                // Check when only one is a NaN
-                bail!("real value != NaN")
+            impl<Input> $crate::CheckOutput<Input> for $ty
+            where
+                Input: Hex + fmt::Debug,
+                SpecialCase: MaybeOverride<Input>,
+            {
+                fn validate<'a>(
+                    self,
+                    expected: Self,
+                    input: Input,
+                    ctx: &$crate::CheckCtx,
+                ) -> TestResult {
+                    validate_float(self, expected, input, ctx)
+                }
             }
+        )*
+    };
+}
+
+fn validate_float<'a, F, Input>(actual: F, expected: F, input: Input, ctx: &CheckCtx) -> TestResult
+where
+    F: Float + Hex,
+    Input: Hex + fmt::Debug,
+    u32: TryFrom<F::SignedInt, Error: fmt::Debug>,
+    SpecialCase: MaybeOverride<Input>,
+{
+    // Create a wrapper function so we only need to `.with_context` once.
+    let inner = || -> TestResult {
+        let mut allowed_ulp = ctx.ulp;
 
-            // Make sure that the signs are the same before checing ULP to avoid wraparound
-            let act_sig = self.signum();
-            let exp_sig = expected.signum();
-            ensure!(act_sig == exp_sig, "mismatched signs {act_sig} {exp_sig}");
+        // If the tested function requires a nonstandard test, run it here.
+        if let Some(res) = SpecialCase::check_float(input, actual, expected, &mut allowed_ulp, ctx)
+        {
+            return res;
+        }
 
-            if self.is_infinite() ^ expected.is_infinite() {
-                bail!("mismatched infinities");
-            }
+        // Check when both are NaNs
+        if actual.is_nan() && expected.is_nan() {
+            // By default, NaNs have nothing special to check.
+            return Ok(());
+        } else if actual.is_nan() || expected.is_nan() {
+            // Check when only one is a NaN
+            bail!("real value != NaN")
+        }
 
-            let act_bits = self.to_bits().signed();
-            let exp_bits = expected.to_bits().signed();
+        // Make sure that the signs are the same before checing ULP to avoid wraparound
+        let act_sig = actual.signum();
+        let exp_sig = expected.signum();
+        ensure!(act_sig == exp_sig, "mismatched signs {act_sig} {exp_sig}");
 
-            let ulp_diff = act_bits.checked_sub(exp_bits).unwrap().abs();
+        if actual.is_infinite() ^ expected.is_infinite() {
+            bail!("mismatched infinities");
+        }
 
-            let ulp_u32 = u32::try_from(ulp_diff)
-                .map_err(|e| anyhow::anyhow!("{e:?}: ulp of {ulp_diff} exceeds u32::MAX"))?;
+        let act_bits = actual.to_bits().signed();
+        let exp_bits = expected.to_bits().signed();
 
-            ensure!(ulp_u32 <= allowed_ulp, "ulp {ulp_diff} > {allowed_ulp}",);
+        let ulp_diff = act_bits.checked_sub(exp_bits).unwrap().abs();
 
-            Ok(())
-        };
+        let ulp_u32 = u32::try_from(ulp_diff)
+            .map_err(|e| anyhow::anyhow!("{e:?}: ulp of {ulp_diff} exceeds u32::MAX"))?;
 
-        inner().with_context(|| {
-            format!(
-                "\
-                \n    input:    {input:?} {ibits}\
-                \n    expected: {expected:<22?} {expbits}\
-                \n    actual:   {self:<22?} {actbits}\
-                ",
-                actbits = self.hex(),
-                expbits = expected.hex(),
-                ibits = input.hex(),
-            )
-        })
-    }
+        ensure!(ulp_u32 <= allowed_ulp, "ulp {ulp_diff} > {allowed_ulp}",);
+
+        Ok(())
+    };
+
+    inner().with_context(|| {
+        format!(
+            "\
+            \n    input:    {input:?} {ibits}\
+            \n    expected: {expected:<22?} {expbits}\
+            \n    actual:   {actual:<22?} {actbits}\
+            ",
+            actbits = actual.hex(),
+            expbits = expected.hex(),
+            ibits = input.hex(),
+        )
+    })
 }
 
+impl_float!(f32, f64);
+
+/* trait implementations for compound types */
+
 /// Implement `CheckOutput` for combinations of types.
 macro_rules! impl_tuples {
     ($(($a:ty, $b:ty);)*) => {

From 8bf711d1bcde6180bfb93adbcf8b68ffda285aaa Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Fri, 1 Nov 2024 06:05:42 -0500
Subject: [PATCH 026/279] Fix errors reported by Clippy in `libm`

---
 src/lib.rs                     |   3 +
 src/math/asin.rs               |   2 +-
 src/math/asinf.rs              |   2 +-
 src/math/atan2f.rs             |  22 +--
 src/math/atanhf.rs             |   2 +-
 src/math/exp2f.rs              |   4 +-
 src/math/expm1.rs              |   2 +-
 src/math/expm1f.rs             |   2 +-
 src/math/fabs.rs               |   2 -
 src/math/fdim.rs               |   6 +-
 src/math/fdimf.rs              |   6 +-
 src/math/fmaf.rs               |   2 +-
 src/math/fmod.rs               |   4 +-
 src/math/fmodf.rs              |   2 +-
 src/math/ilogb.rs              |   2 +-
 src/math/ilogbf.rs             |   2 +-
 src/math/jn.rs                 | 238 ++++++++++++++++-----------------
 src/math/jnf.rs                | 230 ++++++++++++++++---------------
 src/math/lgamma_r.rs           |   3 +-
 src/math/lgammaf_r.rs          |   3 +-
 src/math/nextafter.rs          |   4 +-
 src/math/pow.rs                |   6 +-
 src/math/powf.rs               |  12 +-
 src/math/rem_pio2.rs           |   2 +-
 src/math/rem_pio2_large.rs     |   2 -
 src/math/sincosf.rs            |  26 ++--
 src/math/sqrt.rs               |  12 +-
 src/math/support/int_traits.rs |   3 +
 src/math/tgamma.rs             |   5 +-
 29 files changed, 306 insertions(+), 305 deletions(-)

diff --git a/src/lib.rs b/src/lib.rs
index 98ac55988..511ab598d 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -5,12 +5,15 @@
 #![allow(clippy::assign_op_pattern)]
 #![allow(clippy::deprecated_cfg_attr)]
 #![allow(clippy::eq_op)]
+#![allow(clippy::excessive_precision)]
 #![allow(clippy::float_cmp)]
 #![allow(clippy::int_plus_one)]
 #![allow(clippy::many_single_char_names)]
 #![allow(clippy::mixed_case_hex_literals)]
+#![allow(clippy::needless_late_init)]
 #![allow(clippy::needless_return)]
 #![allow(clippy::unreadable_literal)]
+#![allow(clippy::zero_divided_by_zero)]
 
 mod libm_helper;
 mod math;
diff --git a/src/math/asin.rs b/src/math/asin.rs
index 12fe08fc7..12d0cd35f 100644
--- a/src/math/asin.rs
+++ b/src/math/asin.rs
@@ -90,7 +90,7 @@ pub fn asin(mut x: f64) -> f64 {
     /* |x| < 0.5 */
     if ix < 0x3fe00000 {
         /* if 0x1p-1022 <= |x| < 0x1p-26, avoid raising underflow */
-        if ix < 0x3e500000 && ix >= 0x00100000 {
+        if (0x00100000..0x3e500000).contains(&ix) {
             return x;
         } else {
             return x + x * comp_r(x * x);
diff --git a/src/math/asinf.rs b/src/math/asinf.rs
index 2c785abe2..0ea49c076 100644
--- a/src/math/asinf.rs
+++ b/src/math/asinf.rs
@@ -54,7 +54,7 @@ pub fn asinf(mut x: f32) -> f32 {
     if ix < 0x3f000000 {
         /* |x| < 0.5 */
         /* if 0x1p-126 <= |x| < 0x1p-12, avoid raising underflow */
-        if (ix < 0x39800000) && (ix >= 0x00800000) {
+        if (0x00800000..0x39800000).contains(&ix) {
             return x;
         }
         return x + x * r(x * x);
diff --git a/src/math/atan2f.rs b/src/math/atan2f.rs
index fa33f54f6..95b466fff 100644
--- a/src/math/atan2f.rs
+++ b/src/math/atan2f.rs
@@ -42,9 +42,9 @@ pub fn atan2f(y: f32, x: f32) -> f32 {
     /* when y = 0 */
     if iy == 0 {
         return match m {
-            0 | 1 => y,   /* atan(+-0,+anything)=+-0 */
-            2 => PI,      /* atan(+0,-anything) = pi */
-            3 | _ => -PI, /* atan(-0,-anything) =-pi */
+            0 | 1 => y, /* atan(+-0,+anything)=+-0 */
+            2 => PI,    /* atan(+0,-anything) = pi */
+            _ => -PI,   /* atan(-0,-anything) =-pi */
         };
     }
     /* when x = 0 */
@@ -55,17 +55,17 @@ pub fn atan2f(y: f32, x: f32) -> f32 {
     if ix == 0x7f800000 {
         return if iy == 0x7f800000 {
             match m {
-                0 => PI / 4.,           /* atan(+INF,+INF) */
-                1 => -PI / 4.,          /* atan(-INF,+INF) */
-                2 => 3. * PI / 4.,      /* atan(+INF,-INF)*/
-                3 | _ => -3. * PI / 4., /* atan(-INF,-INF)*/
+                0 => PI / 4.,       /* atan(+INF,+INF) */
+                1 => -PI / 4.,      /* atan(-INF,+INF) */
+                2 => 3. * PI / 4.,  /* atan(+INF,-INF)*/
+                _ => -3. * PI / 4., /* atan(-INF,-INF)*/
             }
         } else {
             match m {
-                0 => 0.,      /* atan(+...,+INF) */
-                1 => -0.,     /* atan(-...,+INF) */
-                2 => PI,      /* atan(+...,-INF) */
-                3 | _ => -PI, /* atan(-...,-INF) */
+                0 => 0.,  /* atan(+...,+INF) */
+                1 => -0., /* atan(-...,+INF) */
+                2 => PI,  /* atan(+...,-INF) */
+                _ => -PI, /* atan(-...,-INF) */
             }
         };
     }
diff --git a/src/math/atanhf.rs b/src/math/atanhf.rs
index 3545411bb..80ccec1f6 100644
--- a/src/math/atanhf.rs
+++ b/src/math/atanhf.rs
@@ -18,7 +18,7 @@ pub fn atanhf(mut x: f32) -> f32 {
         if u < 0x3f800000 - (32 << 23) {
             /* handle underflow */
             if u < (1 << 23) {
-                force_eval!((x * x) as f32);
+                force_eval!(x * x);
             }
         } else {
             /* |x| < 0.5, up to 1.7ulp error */
diff --git a/src/math/exp2f.rs b/src/math/exp2f.rs
index f4867b80e..f452b6a20 100644
--- a/src/math/exp2f.rs
+++ b/src/math/exp2f.rs
@@ -95,7 +95,7 @@ pub fn exp2f(mut x: f32) -> f32 {
             /* NaN */
             return x;
         }
-        if ui >= 0x43000000 && ui < 0x80000000 {
+        if (0x43000000..0x80000000).contains(&ui) {
             /* x >= 128 */
             x *= x1p127;
             return x;
@@ -127,7 +127,7 @@ pub fn exp2f(mut x: f32) -> f32 {
     let z: f64 = (x - uf) as f64;
     /* Compute r = exp2(y) = exp2ft[i0] * p(z). */
     let r: f64 = f64::from_bits(i!(EXP2FT, i0 as usize));
-    let t: f64 = r as f64 * z;
+    let t: f64 = r * z;
     let r: f64 = r + t * (p1 as f64 + z * p2 as f64) + t * (z * z) * (p3 as f64 + z * p4 as f64);
 
     /* Scale by 2**k */
diff --git a/src/math/expm1.rs b/src/math/expm1.rs
index 42608509a..f25153f32 100644
--- a/src/math/expm1.rs
+++ b/src/math/expm1.rs
@@ -115,7 +115,7 @@ pub fn expm1(mut x: f64) -> f64 {
     }
     ui = ((0x3ff + k) as u64) << 52; /* 2^k */
     let twopk = f64::from_bits(ui);
-    if k < 0 || k > 56 {
+    if !(0..=56).contains(&k) {
         /* suffice to return exp(x)-1 */
         y = x - e + 1.0;
         if k == 1024 {
diff --git a/src/math/expm1f.rs b/src/math/expm1f.rs
index a862fe255..12c6f532b 100644
--- a/src/math/expm1f.rs
+++ b/src/math/expm1f.rs
@@ -115,7 +115,7 @@ pub fn expm1f(mut x: f32) -> f32 {
         return 1. + 2. * (x - e);
     }
     let twopk = f32::from_bits(((0x7f + k) << 23) as u32); /* 2^k */
-    if (k < 0) || (k > 56) {
+    if !(0..=56).contains(&k) {
         /* suffice to return exp(x)-1 */
         let mut y = x - e + 1.;
         if k == 128 {
diff --git a/src/math/fabs.rs b/src/math/fabs.rs
index 8d3ea2fd6..d7980eb65 100644
--- a/src/math/fabs.rs
+++ b/src/math/fabs.rs
@@ -1,5 +1,3 @@
-use core::u64;
-
 /// Absolute value (magnitude) (f64)
 /// Calculates the absolute value (magnitude) of the argument `x`,
 /// by direct manipulation of the bit representation of `x`.
diff --git a/src/math/fdim.rs b/src/math/fdim.rs
index 014930097..7c58cb5a9 100644
--- a/src/math/fdim.rs
+++ b/src/math/fdim.rs
@@ -3,9 +3,9 @@ use core::f64;
 /// Positive difference (f64)
 ///
 /// Determines the positive difference between arguments, returning:
-/// * x - y	if x > y, or
-/// * +0	if x <= y, or
-/// * NAN	if either argument is NAN.
+/// * x - y if x > y, or
+/// * +0    if x <= y, or
+/// * NAN   if either argument is NAN.
 ///
 /// A range error may occur.
 #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
diff --git a/src/math/fdimf.rs b/src/math/fdimf.rs
index ea0b592d7..2abd49a64 100644
--- a/src/math/fdimf.rs
+++ b/src/math/fdimf.rs
@@ -3,9 +3,9 @@ use core::f32;
 /// Positive difference (f32)
 ///
 /// Determines the positive difference between arguments, returning:
-/// * x - y	if x > y, or
-/// * +0	if x <= y, or
-/// * NAN	if either argument is NAN.
+/// * x - y if x > y, or
+/// * +0    if x <= y, or
+/// * NAN   if either argument is NAN.
 ///
 /// A range error may occur.
 #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
diff --git a/src/math/fmaf.rs b/src/math/fmaf.rs
index 10bdaeab3..79371c836 100644
--- a/src/math/fmaf.rs
+++ b/src/math/fmaf.rs
@@ -71,7 +71,7 @@ pub fn fmaf(x: f32, y: f32, mut z: f32) -> f32 {
             underflow may not be raised correctly, example:
             fmaf(0x1p-120f, 0x1p-120f, 0x1p-149f)
         */
-        if e < 0x3ff - 126 && e >= 0x3ff - 149 && fetestexcept(FE_INEXACT) != 0 {
+        if ((0x3ff - 149)..(0x3ff - 126)).contains(&e) && fetestexcept(FE_INEXACT) != 0 {
             feclearexcept(FE_INEXACT);
             // prevent `xy + vz` from being CSE'd with `xy + z` above
             let vz: f32 = unsafe { read_volatile(&z) };
diff --git a/src/math/fmod.rs b/src/math/fmod.rs
index d892ffd8b..df16162bc 100644
--- a/src/math/fmod.rs
+++ b/src/math/fmod.rs
@@ -1,5 +1,3 @@
-use core::u64;
-
 #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
 pub fn fmod(x: f64, y: f64) -> f64 {
     let mut uxi = x.to_bits();
@@ -74,7 +72,7 @@ pub fn fmod(x: f64, y: f64) -> f64 {
     } else {
         uxi >>= -ex + 1;
     }
-    uxi |= (sx as u64) << 63;
+    uxi |= sx << 63;
 
     f64::from_bits(uxi)
 }
diff --git a/src/math/fmodf.rs b/src/math/fmodf.rs
index 1d8001384..671af8580 100644
--- a/src/math/fmodf.rs
+++ b/src/math/fmodf.rs
@@ -1,4 +1,4 @@
-use core::{f32, u32};
+use core::f32;
 
 #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
 pub fn fmodf(x: f32, y: f32) -> f32 {
diff --git a/src/math/ilogb.rs b/src/math/ilogb.rs
index 9d58d0608..ccc4914be 100644
--- a/src/math/ilogb.rs
+++ b/src/math/ilogb.rs
@@ -21,7 +21,7 @@ pub fn ilogb(x: f64) -> i32 {
         e
     } else if e == 0x7ff {
         force_eval!(0.0 / 0.0);
-        if (i << 12) != 0 { FP_ILOGBNAN } else { i32::max_value() }
+        if (i << 12) != 0 { FP_ILOGBNAN } else { i32::MAX }
     } else {
         e - 0x3ff
     }
diff --git a/src/math/ilogbf.rs b/src/math/ilogbf.rs
index 85deb43c8..3585d6d36 100644
--- a/src/math/ilogbf.rs
+++ b/src/math/ilogbf.rs
@@ -21,7 +21,7 @@ pub fn ilogbf(x: f32) -> i32 {
         e
     } else if e == 0xff {
         force_eval!(0.0 / 0.0);
-        if (i << 9) != 0 { FP_ILOGBNAN } else { i32::max_value() }
+        if (i << 9) != 0 { FP_ILOGBNAN } else { i32::MAX }
     } else {
         e - 0x7f
     }
diff --git a/src/math/jn.rs b/src/math/jn.rs
index aff051f24..7f98ddc05 100644
--- a/src/math/jn.rs
+++ b/src/math/jn.rs
@@ -104,7 +104,8 @@ pub fn jn(n: i32, mut x: f64) -> f64 {
                 0 => -cos(x) + sin(x),
                 1 => -cos(x) - sin(x),
                 2 => cos(x) - sin(x),
-                3 | _ => cos(x) + sin(x),
+                // 3
+                _ => cos(x) + sin(x),
             };
             b = INVSQRTPI * temp / sqrt(x);
         } else {
@@ -118,130 +119,128 @@ pub fn jn(n: i32, mut x: f64) -> f64 {
                 a = temp;
             }
         }
-    } else {
-        if ix < 0x3e100000 {
-            /* x < 2**-29 */
-            /* x is tiny, return the first Taylor expansion of J(n,x)
-             * J(n,x) = 1/n!*(x/2)^n  - ...
-             */
-            if nm1 > 32 {
-                /* underflow */
-                b = 0.0;
-            } else {
-                temp = x * 0.5;
-                b = temp;
-                a = 1.0;
-                i = 2;
-                while i <= nm1 + 1 {
-                    a *= i as f64; /* a = n! */
-                    b *= temp; /* b = (x/2)^n */
-                    i += 1;
-                }
-                b = b / a;
-            }
+    } else if ix < 0x3e100000 {
+        /* x < 2**-29 */
+        /* x is tiny, return the first Taylor expansion of J(n,x)
+         * J(n,x) = 1/n!*(x/2)^n  - ...
+         */
+        if nm1 > 32 {
+            /* underflow */
+            b = 0.0;
         } else {
-            /* use backward recurrence */
-            /*                      x      x^2      x^2
-             *  J(n,x)/J(n-1,x) =  ----   ------   ------   .....
-             *                      2n  - 2(n+1) - 2(n+2)
-             *
-             *                      1      1        1
-             *  (for large x)   =  ----  ------   ------   .....
-             *                      2n   2(n+1)   2(n+2)
-             *                      -- - ------ - ------ -
-             *                       x     x         x
-             *
-             * Let w = 2n/x and h=2/x, then the above quotient
-             * is equal to the continued fraction:
-             *                  1
-             *      = -----------------------
-             *                     1
-             *         w - -----------------
-             *                        1
-             *              w+h - ---------
-             *                     w+2h - ...
-             *
-             * To determine how many terms needed, let
-             * Q(0) = w, Q(1) = w(w+h) - 1,
-             * Q(k) = (w+k*h)*Q(k-1) - Q(k-2),
-             * When Q(k) > 1e4      good for single
-             * When Q(k) > 1e9      good for double
-             * When Q(k) > 1e17     good for quadruple
-             */
-            /* determine k */
-            let mut t: f64;
-            let mut q0: f64;
-            let mut q1: f64;
-            let mut w: f64;
-            let h: f64;
-            let mut z: f64;
-            let mut tmp: f64;
-            let nf: f64;
+            temp = x * 0.5;
+            b = temp;
+            a = 1.0;
+            i = 2;
+            while i <= nm1 + 1 {
+                a *= i as f64; /* a = n! */
+                b *= temp; /* b = (x/2)^n */
+                i += 1;
+            }
+            b = b / a;
+        }
+    } else {
+        /* use backward recurrence */
+        /*                      x      x^2      x^2
+         *  J(n,x)/J(n-1,x) =  ----   ------   ------   .....
+         *                      2n  - 2(n+1) - 2(n+2)
+         *
+         *                      1      1        1
+         *  (for large x)   =  ----  ------   ------   .....
+         *                      2n   2(n+1)   2(n+2)
+         *                      -- - ------ - ------ -
+         *                       x     x         x
+         *
+         * Let w = 2n/x and h=2/x, then the above quotient
+         * is equal to the continued fraction:
+         *                  1
+         *      = -----------------------
+         *                     1
+         *         w - -----------------
+         *                        1
+         *              w+h - ---------
+         *                     w+2h - ...
+         *
+         * To determine how many terms needed, let
+         * Q(0) = w, Q(1) = w(w+h) - 1,
+         * Q(k) = (w+k*h)*Q(k-1) - Q(k-2),
+         * When Q(k) > 1e4      good for single
+         * When Q(k) > 1e9      good for double
+         * When Q(k) > 1e17     good for quadruple
+         */
+        /* determine k */
+        let mut t: f64;
+        let mut q0: f64;
+        let mut q1: f64;
+        let mut w: f64;
+        let h: f64;
+        let mut z: f64;
+        let mut tmp: f64;
+        let nf: f64;
 
-            let mut k: i32;
+        let mut k: i32;
 
-            nf = (nm1 as f64) + 1.0;
-            w = 2.0 * nf / x;
-            h = 2.0 / x;
-            z = w + h;
-            q0 = w;
-            q1 = w * z - 1.0;
-            k = 1;
-            while q1 < 1.0e9 {
-                k += 1;
-                z += h;
-                tmp = z * q1 - q0;
-                q0 = q1;
-                q1 = tmp;
-            }
-            t = 0.0;
-            i = k;
-            while i >= 0 {
-                t = 1.0 / (2.0 * ((i as f64) + nf) / x - t);
+        nf = (nm1 as f64) + 1.0;
+        w = 2.0 * nf / x;
+        h = 2.0 / x;
+        z = w + h;
+        q0 = w;
+        q1 = w * z - 1.0;
+        k = 1;
+        while q1 < 1.0e9 {
+            k += 1;
+            z += h;
+            tmp = z * q1 - q0;
+            q0 = q1;
+            q1 = tmp;
+        }
+        t = 0.0;
+        i = k;
+        while i >= 0 {
+            t = 1.0 / (2.0 * ((i as f64) + nf) / x - t);
+            i -= 1;
+        }
+        a = t;
+        b = 1.0;
+        /*  estimate log((2/x)^n*n!) = n*log(2/x)+n*ln(n)
+         *  Hence, if n*(log(2n/x)) > ...
+         *  single 8.8722839355e+01
+         *  double 7.09782712893383973096e+02
+         *  long double 1.1356523406294143949491931077970765006170e+04
+         *  then recurrent value may overflow and the result is
+         *  likely underflow to zero
+         */
+        tmp = nf * log(fabs(w));
+        if tmp < 7.09782712893383973096e+02 {
+            i = nm1;
+            while i > 0 {
+                temp = b;
+                b = b * (2.0 * (i as f64)) / x - a;
+                a = temp;
                 i -= 1;
             }
-            a = t;
-            b = 1.0;
-            /*  estimate log((2/x)^n*n!) = n*log(2/x)+n*ln(n)
-             *  Hence, if n*(log(2n/x)) > ...
-             *  single 8.8722839355e+01
-             *  double 7.09782712893383973096e+02
-             *  long double 1.1356523406294143949491931077970765006170e+04
-             *  then recurrent value may overflow and the result is
-             *  likely underflow to zero
-             */
-            tmp = nf * log(fabs(w));
-            if tmp < 7.09782712893383973096e+02 {
-                i = nm1;
-                while i > 0 {
-                    temp = b;
-                    b = b * (2.0 * (i as f64)) / x - a;
-                    a = temp;
-                    i -= 1;
-                }
-            } else {
-                i = nm1;
-                while i > 0 {
-                    temp = b;
-                    b = b * (2.0 * (i as f64)) / x - a;
-                    a = temp;
-                    /* scale b to avoid spurious overflow */
-                    let x1p500 = f64::from_bits(0x5f30000000000000); // 0x1p500 == 2^500
-                    if b > x1p500 {
-                        a /= b;
-                        t /= b;
-                        b = 1.0;
-                    }
-                    i -= 1;
+        } else {
+            i = nm1;
+            while i > 0 {
+                temp = b;
+                b = b * (2.0 * (i as f64)) / x - a;
+                a = temp;
+                /* scale b to avoid spurious overflow */
+                let x1p500 = f64::from_bits(0x5f30000000000000); // 0x1p500 == 2^500
+                if b > x1p500 {
+                    a /= b;
+                    t /= b;
+                    b = 1.0;
                 }
+                i -= 1;
             }
-            z = j0(x);
-            w = j1(x);
-            if fabs(z) >= fabs(w) {
-                b = t * z / b;
-            } else {
-                b = t * w / a;
-            }
+        }
+        z = j0(x);
+        w = j1(x);
+        if fabs(z) >= fabs(w) {
+            b = t * z / b;
+        } else {
+            b = t * w / a;
         }
     }
 
@@ -315,7 +314,8 @@ pub fn yn(n: i32, x: f64) -> f64 {
             0 => -sin(x) - cos(x),
             1 => -sin(x) + cos(x),
             2 => sin(x) + cos(x),
-            3 | _ => sin(x) - cos(x),
+            // 3
+            _ => sin(x) - cos(x),
         };
         b = INVSQRTPI * temp / sqrt(x);
     } else {
diff --git a/src/math/jnf.rs b/src/math/jnf.rs
index e5afda448..754f8f33b 100644
--- a/src/math/jnf.rs
+++ b/src/math/jnf.rs
@@ -64,128 +64,126 @@ pub fn jnf(n: i32, mut x: f32) -> f32 {
             b = b * (2.0 * (i as f32) / x) - a;
             a = temp;
         }
+    } else if ix < 0x35800000 {
+        /* x < 2**-20 */
+        /* x is tiny, return the first Taylor expansion of J(n,x)
+         * J(n,x) = 1/n!*(x/2)^n  - ...
+         */
+        if nm1 > 8 {
+            /* underflow */
+            nm1 = 8;
+        }
+        temp = 0.5 * x;
+        b = temp;
+        a = 1.0;
+        i = 2;
+        while i <= nm1 + 1 {
+            a *= i as f32; /* a = n! */
+            b *= temp; /* b = (x/2)^n */
+            i += 1;
+        }
+        b = b / a;
     } else {
-        if ix < 0x35800000 {
-            /* x < 2**-20 */
-            /* x is tiny, return the first Taylor expansion of J(n,x)
-             * J(n,x) = 1/n!*(x/2)^n  - ...
-             */
-            if nm1 > 8 {
-                /* underflow */
-                nm1 = 8;
-            }
-            temp = 0.5 * x;
-            b = temp;
-            a = 1.0;
-            i = 2;
-            while i <= nm1 + 1 {
-                a *= i as f32; /* a = n! */
-                b *= temp; /* b = (x/2)^n */
-                i += 1;
-            }
-            b = b / a;
-        } else {
-            /* use backward recurrence */
-            /*                      x      x^2      x^2
-             *  J(n,x)/J(n-1,x) =  ----   ------   ------   .....
-             *                      2n  - 2(n+1) - 2(n+2)
-             *
-             *                      1      1        1
-             *  (for large x)   =  ----  ------   ------   .....
-             *                      2n   2(n+1)   2(n+2)
-             *                      -- - ------ - ------ -
-             *                       x     x         x
-             *
-             * Let w = 2n/x and h=2/x, then the above quotient
-             * is equal to the continued fraction:
-             *                  1
-             *      = -----------------------
-             *                     1
-             *         w - -----------------
-             *                        1
-             *              w+h - ---------
-             *                     w+2h - ...
-             *
-             * To determine how many terms needed, let
-             * Q(0) = w, Q(1) = w(w+h) - 1,
-             * Q(k) = (w+k*h)*Q(k-1) - Q(k-2),
-             * When Q(k) > 1e4      good for single
-             * When Q(k) > 1e9      good for double
-             * When Q(k) > 1e17     good for quadruple
-             */
-            /* determine k */
-            let mut t: f32;
-            let mut q0: f32;
-            let mut q1: f32;
-            let mut w: f32;
-            let h: f32;
-            let mut z: f32;
-            let mut tmp: f32;
-            let nf: f32;
-            let mut k: i32;
+        /* use backward recurrence */
+        /*                      x      x^2      x^2
+         *  J(n,x)/J(n-1,x) =  ----   ------   ------   .....
+         *                      2n  - 2(n+1) - 2(n+2)
+         *
+         *                      1      1        1
+         *  (for large x)   =  ----  ------   ------   .....
+         *                      2n   2(n+1)   2(n+2)
+         *                      -- - ------ - ------ -
+         *                       x     x         x
+         *
+         * Let w = 2n/x and h=2/x, then the above quotient
+         * is equal to the continued fraction:
+         *                  1
+         *      = -----------------------
+         *                     1
+         *         w - -----------------
+         *                        1
+         *              w+h - ---------
+         *                     w+2h - ...
+         *
+         * To determine how many terms needed, let
+         * Q(0) = w, Q(1) = w(w+h) - 1,
+         * Q(k) = (w+k*h)*Q(k-1) - Q(k-2),
+         * When Q(k) > 1e4      good for single
+         * When Q(k) > 1e9      good for double
+         * When Q(k) > 1e17     good for quadruple
+         */
+        /* determine k */
+        let mut t: f32;
+        let mut q0: f32;
+        let mut q1: f32;
+        let mut w: f32;
+        let h: f32;
+        let mut z: f32;
+        let mut tmp: f32;
+        let nf: f32;
+        let mut k: i32;
 
-            nf = (nm1 as f32) + 1.0;
-            w = 2.0 * (nf as f32) / x;
-            h = 2.0 / x;
-            z = w + h;
-            q0 = w;
-            q1 = w * z - 1.0;
-            k = 1;
-            while q1 < 1.0e4 {
-                k += 1;
-                z += h;
-                tmp = z * q1 - q0;
-                q0 = q1;
-                q1 = tmp;
-            }
-            t = 0.0;
-            i = k;
-            while i >= 0 {
-                t = 1.0 / (2.0 * ((i as f32) + nf) / x - t);
+        nf = (nm1 as f32) + 1.0;
+        w = 2.0 * nf / x;
+        h = 2.0 / x;
+        z = w + h;
+        q0 = w;
+        q1 = w * z - 1.0;
+        k = 1;
+        while q1 < 1.0e4 {
+            k += 1;
+            z += h;
+            tmp = z * q1 - q0;
+            q0 = q1;
+            q1 = tmp;
+        }
+        t = 0.0;
+        i = k;
+        while i >= 0 {
+            t = 1.0 / (2.0 * ((i as f32) + nf) / x - t);
+            i -= 1;
+        }
+        a = t;
+        b = 1.0;
+        /*  estimate log((2/x)^n*n!) = n*log(2/x)+n*ln(n)
+         *  Hence, if n*(log(2n/x)) > ...
+         *  single 8.8722839355e+01
+         *  double 7.09782712893383973096e+02
+         *  long double 1.1356523406294143949491931077970765006170e+04
+         *  then recurrent value may overflow and the result is
+         *  likely underflow to zero
+         */
+        tmp = nf * logf(fabsf(w));
+        if tmp < 88.721679688 {
+            i = nm1;
+            while i > 0 {
+                temp = b;
+                b = 2.0 * (i as f32) * b / x - a;
+                a = temp;
                 i -= 1;
             }
-            a = t;
-            b = 1.0;
-            /*  estimate log((2/x)^n*n!) = n*log(2/x)+n*ln(n)
-             *  Hence, if n*(log(2n/x)) > ...
-             *  single 8.8722839355e+01
-             *  double 7.09782712893383973096e+02
-             *  long double 1.1356523406294143949491931077970765006170e+04
-             *  then recurrent value may overflow and the result is
-             *  likely underflow to zero
-             */
-            tmp = nf * logf(fabsf(w));
-            if tmp < 88.721679688 {
-                i = nm1;
-                while i > 0 {
-                    temp = b;
-                    b = 2.0 * (i as f32) * b / x - a;
-                    a = temp;
-                    i -= 1;
-                }
-            } else {
-                i = nm1;
-                while i > 0 {
-                    temp = b;
-                    b = 2.0 * (i as f32) * b / x - a;
-                    a = temp;
-                    /* scale b to avoid spurious overflow */
-                    let x1p60 = f32::from_bits(0x5d800000); // 0x1p60 == 2^60
-                    if b > x1p60 {
-                        a /= b;
-                        t /= b;
-                        b = 1.0;
-                    }
-                    i -= 1;
+        } else {
+            i = nm1;
+            while i > 0 {
+                temp = b;
+                b = 2.0 * (i as f32) * b / x - a;
+                a = temp;
+                /* scale b to avoid spurious overflow */
+                let x1p60 = f32::from_bits(0x5d800000); // 0x1p60 == 2^60
+                if b > x1p60 {
+                    a /= b;
+                    t /= b;
+                    b = 1.0;
                 }
+                i -= 1;
             }
-            z = j0f(x);
-            w = j1f(x);
-            if fabsf(z) >= fabsf(w) {
-                b = t * z / b;
-            } else {
-                b = t * w / a;
-            }
+        }
+        z = j0f(x);
+        w = j1f(x);
+        if fabsf(z) >= fabsf(w) {
+            b = t * z / b;
+        } else {
+            b = t * w / a;
         }
     }
 
diff --git a/src/math/lgamma_r.rs b/src/math/lgamma_r.rs
index b26177e6e..6becaad2c 100644
--- a/src/math/lgamma_r.rs
+++ b/src/math/lgamma_r.rs
@@ -160,7 +160,8 @@ fn sin_pi(mut x: f64) -> f64 {
         1 => k_cos(x, 0.0),
         2 => k_sin(-x, 0.0, 0),
         3 => -k_cos(x, 0.0),
-        0 | _ => k_sin(x, 0.0, 0),
+        // 0
+        _ => k_sin(x, 0.0, 0),
     }
 }
 
diff --git a/src/math/lgammaf_r.rs b/src/math/lgammaf_r.rs
index 723c90daf..10cecee54 100644
--- a/src/math/lgammaf_r.rs
+++ b/src/math/lgammaf_r.rs
@@ -95,7 +95,8 @@ fn sin_pi(mut x: f32) -> f32 {
         1 => k_cosf(y),
         2 => k_sinf(-y),
         3 => -k_cosf(y),
-        0 | _ => k_sinf(y),
+        // 0
+        _ => k_sinf(y),
     }
 }
 
diff --git a/src/math/nextafter.rs b/src/math/nextafter.rs
index 057626191..422bd7496 100644
--- a/src/math/nextafter.rs
+++ b/src/math/nextafter.rs
@@ -10,8 +10,8 @@ pub fn nextafter(x: f64, y: f64) -> f64 {
         return y;
     }
 
-    let ax = ux_i & !1_u64 / 2;
-    let ay = uy_i & !1_u64 / 2;
+    let ax = ux_i & (!1_u64 / 2);
+    let ay = uy_i & (!1_u64 / 2);
     if ax == 0 {
         if ay == 0 {
             return y;
diff --git a/src/math/pow.rs b/src/math/pow.rs
index 7ecad291d..736465cd1 100644
--- a/src/math/pow.rs
+++ b/src/math/pow.rs
@@ -98,8 +98,8 @@ pub fn pow(x: f64, y: f64) -> f64 {
     let (hx, lx): (i32, u32) = ((x.to_bits() >> 32) as i32, x.to_bits() as u32);
     let (hy, ly): (i32, u32) = ((y.to_bits() >> 32) as i32, y.to_bits() as u32);
 
-    let mut ix: i32 = (hx & 0x7fffffff) as i32;
-    let iy: i32 = (hy & 0x7fffffff) as i32;
+    let mut ix: i32 = hx & 0x7fffffff_i32;
+    let iy: i32 = hy & 0x7fffffff_i32;
 
     /* x**0 = 1, even if x is NaN */
     if ((iy as u32) | ly) == 0 {
@@ -355,7 +355,7 @@ pub fn pow(x: f64, y: f64) -> f64 {
     }
 
     /* compute 2**(p_h+p_l) */
-    let i: i32 = j & (0x7fffffff as i32);
+    let i: i32 = j & 0x7fffffff_i32;
     k = (i >> 20) - 0x3ff;
     let mut n: i32 = 0;
 
diff --git a/src/math/powf.rs b/src/math/powf.rs
index 2d9d1e4bb..839c6c23d 100644
--- a/src/math/powf.rs
+++ b/src/math/powf.rs
@@ -13,6 +13,8 @@
  * ====================================================
  */
 
+use core::cmp::Ordering;
+
 use super::{fabsf, scalbnf, sqrtf};
 
 const BP: [f32; 2] = [1.0, 1.5];
@@ -115,15 +117,13 @@ pub fn powf(x: f32, y: f32) -> f32 {
     /* special value of y */
     if iy == 0x7f800000 {
         /* y is +-inf */
-        if ix == 0x3f800000 {
+        match ix.cmp(&0x3f800000) {
             /* (-1)**+-inf is 1 */
-            return 1.0;
-        } else if ix > 0x3f800000 {
+            Ordering::Equal => return 1.0,
             /* (|x|>1)**+-inf = inf,0 */
-            return if hy >= 0 { y } else { 0.0 };
-        } else {
+            Ordering::Greater => return if hy >= 0 { y } else { 0.0 },
             /* (|x|<1)**+-inf = 0,inf */
-            return if hy >= 0 { 0.0 } else { -y };
+            Ordering::Less => return if hy >= 0 { 0.0 } else { -y },
         }
     }
     if iy == 0x3f800000 {
diff --git a/src/math/rem_pio2.rs b/src/math/rem_pio2.rs
index 4dfb8c658..917e90819 100644
--- a/src/math/rem_pio2.rs
+++ b/src/math/rem_pio2.rs
@@ -50,7 +50,7 @@ pub(crate) fn rem_pio2(x: f64) -> (i32, f64, f64) {
 
     fn medium(x: f64, ix: u32) -> (i32, f64, f64) {
         /* rint(x/(pi/2)), Assume round-to-nearest. */
-        let tmp = x as f64 * INV_PIO2 + TO_INT;
+        let tmp = x * INV_PIO2 + TO_INT;
         // force rounding of tmp to it's storage format on x87 to avoid
         // excess precision issues.
         #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))]
diff --git a/src/math/rem_pio2_large.rs b/src/math/rem_pio2_large.rs
index 1dfbba3b1..ec8397f4b 100644
--- a/src/math/rem_pio2_large.rs
+++ b/src/math/rem_pio2_large.rs
@@ -425,8 +425,6 @@ pub(crate) fn rem_pio2_large(x: &[f64], y: &mut [f64], e0: i32, prec: usize) ->
             for i in (0..=jz).rev() {
                 fw += i!(fq, i);
             }
-            // TODO: drop excess precision here once double_t is used
-            fw = fw as f64;
             i!(y, 0, =, if ih == 0 { fw } else { -fw });
             fw = i!(fq, 0) - fw;
             for i in 1..=jz {
diff --git a/src/math/sincosf.rs b/src/math/sincosf.rs
index 423845e44..f33607676 100644
--- a/src/math/sincosf.rs
+++ b/src/math/sincosf.rs
@@ -67,14 +67,12 @@ pub fn sincosf(x: f32) -> (f32, f32) {
             }
         }
         /* -sin(x+c) is not correct if x+c could be 0: -0 vs +0 */
-        else {
-            if sign {
-                s = -k_sinf(x as f64 + S2PIO2);
-                c = -k_cosf(x as f64 + S2PIO2);
-            } else {
-                s = -k_sinf(x as f64 - S2PIO2);
-                c = -k_cosf(x as f64 - S2PIO2);
-            }
+        else if sign {
+            s = -k_sinf(x as f64 + S2PIO2);
+            c = -k_cosf(x as f64 + S2PIO2);
+        } else {
+            s = -k_sinf(x as f64 - S2PIO2);
+            c = -k_cosf(x as f64 - S2PIO2);
         }
 
         return (s, c);
@@ -91,14 +89,12 @@ pub fn sincosf(x: f32) -> (f32, f32) {
                 s = -k_cosf(x as f64 - S3PIO2);
                 c = k_sinf(x as f64 - S3PIO2);
             }
+        } else if sign {
+            s = k_sinf(x as f64 + S4PIO2);
+            c = k_cosf(x as f64 + S4PIO2);
         } else {
-            if sign {
-                s = k_sinf(x as f64 + S4PIO2);
-                c = k_cosf(x as f64 + S4PIO2);
-            } else {
-                s = k_sinf(x as f64 - S4PIO2);
-                c = k_cosf(x as f64 - S4PIO2);
-            }
+            s = k_sinf(x as f64 - S4PIO2);
+            c = k_cosf(x as f64 - S4PIO2);
         }
 
         return (s, c);
diff --git a/src/math/sqrt.rs b/src/math/sqrt.rs
index a443b7e4c..3eaf52cda 100644
--- a/src/math/sqrt.rs
+++ b/src/math/sqrt.rs
@@ -144,13 +144,15 @@ pub fn sqrt(x: f64) -> f64 {
     ix0 = (ix0 & 0x000fffff) | 0x00100000;
     if (m & 1) == 1 {
         /* odd m, double x to make it even */
-        ix0 += ix0 + ((ix1 & sign) >> 31).0 as i32;
+        ix0 *= 2;
+        ix0 += ((ix1 & sign) >> 31).0 as i32;
         ix1 += ix1;
     }
     m >>= 1; /* m = [m/2] */
 
     /* generate sqrt(x) bit by bit */
-    ix0 += ix0 + ((ix1 & sign) >> 31).0 as i32;
+    ix0 *= 2;
+    ix0 += ((ix1 & sign) >> 31).0 as i32;
     ix1 += ix1;
     q = 0; /* [q,q1] = sqrt(x) */
     q1 = Wrapping(0);
@@ -165,7 +167,8 @@ pub fn sqrt(x: f64) -> f64 {
             ix0 -= t;
             q += r.0 as i32;
         }
-        ix0 += ix0 + ((ix1 & sign) >> 31).0 as i32;
+        ix0 *= 2;
+        ix0 += ((ix1 & sign) >> 31).0 as i32;
         ix1 += ix1;
         r >>= 1;
     }
@@ -186,7 +189,8 @@ pub fn sqrt(x: f64) -> f64 {
             ix1 -= t1;
             q1 += r;
         }
-        ix0 += ix0 + ((ix1 & sign) >> 31).0 as i32;
+        ix0 *= 2;
+        ix0 += ((ix1 & sign) >> 31).0 as i32;
         ix1 += ix1;
         r >>= 1;
     }
diff --git a/src/math/support/int_traits.rs b/src/math/support/int_traits.rs
index c5feef8d7..b08907aa5 100644
--- a/src/math/support/int_traits.rs
+++ b/src/math/support/int_traits.rs
@@ -136,6 +136,9 @@ macro_rules! int_impl_common {
         }
 
         fn ilog2(self) -> u32 {
+            // On our older MSRV, this resolves to the trait method. Which won't actually work,
+            // but this is only called behind other gates.
+            #[allow(clippy::incompatible_msrv)]
             <Self>::ilog2(self)
         }
     };
diff --git a/src/math/tgamma.rs b/src/math/tgamma.rs
index 3f38c0b1d..60451416a 100644
--- a/src/math/tgamma.rs
+++ b/src/math/tgamma.rs
@@ -45,7 +45,8 @@ fn sinpi(mut x: f64) -> f64 {
         1 => k_cos(x, 0.0),
         2 => k_sin(-x, 0.0, 0),
         3 => -k_cos(x, 0.0),
-        0 | _ => k_sin(x, 0.0, 0),
+        // 0
+        _ => k_sin(x, 0.0, 0),
     }
 }
 
@@ -143,7 +144,7 @@ pub fn tgamma(mut x: f64) -> f64 {
     /* special cases */
     if ix >= 0x7ff00000 {
         /* tgamma(nan)=nan, tgamma(inf)=inf, tgamma(-inf)=nan with invalid */
-        return x + core::f64::INFINITY;
+        return x + f64::INFINITY;
     }
     if ix < ((0x3ff - 54) << 20) {
         /* |x| < 2^-54: tgamma(x) ~ 1/x, +-0 raises div-by-zero */

From e8c0f68aff1a9bc07a23c2da6de9f7c2c69573e3 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Fri, 1 Nov 2024 06:32:06 -0500
Subject: [PATCH 027/279] Enable clippy for `libm` in CI

---
 .github/workflows/main.yml | 10 ++++++++--
 ci/run.sh                  |  1 +
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index bfd86497b..14e557884 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -82,8 +82,8 @@ jobs:
         [ -n "${{ matrix.channel }}" ] && channel="${{ matrix.channel }}"
         rustup update "$channel" --no-self-update
         rustup default "$channel"
-        rustup target add ${{ matrix.target }}
-        rustup component add llvm-tools-preview
+        rustup target add "${{ matrix.target }}"
+        rustup component add clippy llvm-tools-preview
     - uses: Swatinem/rust-cache@v2
       with:
         key: ${{ matrix.target }}
@@ -105,6 +105,12 @@ jobs:
         rustup target add x86_64-unknown-linux-musl
         cargo generate-lockfile && ./ci/run-docker.sh ${{ matrix.target }}
 
+    - name: Clippy
+      run: |
+        # Run clippy on `libm`
+        cargo clippy --target "${{ matrix.target }}" --package libm
+        
+
   builtins:
     name: Check use with compiler-builtins
     runs-on: ubuntu-latest
diff --git a/ci/run.sh b/ci/run.sh
index a211bc98c..32453663e 100755
--- a/ci/run.sh
+++ b/ci/run.sh
@@ -88,3 +88,4 @@ else
     $cmd --benches
     $cmd --benches --release
 fi
+

From 9fa3bf3b87d9b1aa8f82f064e08dba3fcf139c08 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Thu, 31 Oct 2024 03:05:45 -0500
Subject: [PATCH 028/279] Introduce `hf32!` and `hf64!` macros for hex float
 support

Rust does not have any native way to parse hex floats, but they are
heavily used in the C algorithms that we derive from. Introduce a const
function that can parse these, as well as macros `hf32!` and `hf64!`
that ensure the string literals get handled at compiler time.

These are currently not used but making everything available now will
ease future development.

Co-authored-by: quaternic <57393910+quaternic@users.noreply.github.com>
---
 CONTRIBUTING.md               |  34 +--
 src/math/support/hex_float.rs | 399 ++++++++++++++++++++++++++++++++++
 src/math/support/macros.rs    |  20 ++
 src/math/support/mod.rs       |   3 +
 4 files changed, 430 insertions(+), 26 deletions(-)
 create mode 100644 src/math/support/hex_float.rs

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index a39623696..0a1741631 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -44,37 +44,19 @@ Check [PR #65] for an example.
   `mod.rs`.
 
 - You may encounter weird literals like `0x1p127f` in the MUSL code. These are hexadecimal floating
-  point literals. Rust (the language) doesn't support these kind of literals. The best way I have
-  found to deal with these literals is to turn them into their integer representation using the
-  [`hexf!`] macro and then turn them back into floats. See below:
+  point literals. Rust (the language) doesn't support these kind of literals. This crate provides
+  two macros, `hf32!` and `hf64!`, which convert string literals to floats at compile time.
 
-[`hexf!`]: https://crates.io/crates/hexf
-
-``` rust
-// Step 1: write a program to convert the float into its integer representation
-#[macro_use]
-extern crate hexf;
-
-fn main() {
-    println!("{:#x}", hexf32!("0x1.0p127").to_bits());
-}
-```
-
-``` console
-$ # Step 2: run the program
-$ cargo run
-0x7f000000
-```
-
-``` rust
-// Step 3: copy paste the output into libm
-let x1p127 = f32::from_bits(0x7f000000); // 0x1p127f === 2 ^ 12
-```
+  ```rust
+  assert_eq!(hf32!("0x1.ffep+8").to_bits(), 0x43fff000);
+  assert_eq!(hf64!("0x1.ffep+8").to_bits(), 0x407ffe0000000000);
+  ```
 
 - Rust code panics on arithmetic overflows when not optimized. You may need to use the [`Wrapping`]
-  newtype to avoid this problem.
+  newtype to avoid this problem, or individual methods like [`wrapping_add`].
 
 [`Wrapping`]: https://doc.rust-lang.org/std/num/struct.Wrapping.html
+[`wrapping_add`]: https://doc.rust-lang.org/std/primitive.u32.html#method.wrapping_add
 
 ## Testing
 
diff --git a/src/math/support/hex_float.rs b/src/math/support/hex_float.rs
new file mode 100644
index 000000000..80434a5ec
--- /dev/null
+++ b/src/math/support/hex_float.rs
@@ -0,0 +1,399 @@
+//! Utilities for working with hex float formats.
+
+#![allow(dead_code)] // FIXME: remove once this gets used
+
+/// Construct a 32-bit float from hex float representation (C-style)
+pub const fn hf32(s: &str) -> f32 {
+    f32_from_bits(parse_any(s, 32, 23) as u32)
+}
+
+/// Construct a 64-bit float from hex float representation (C-style)
+pub const fn hf64(s: &str) -> f64 {
+    f64_from_bits(parse_any(s, 64, 52) as u64)
+}
+
+const fn parse_any(s: &str, bits: u32, sig_bits: u32) -> u128 {
+    let exp_bits: u32 = bits - sig_bits - 1;
+    let max_msb: i32 = (1 << (exp_bits - 1)) - 1;
+    // The exponent of one ULP in the subnormals
+    let min_lsb: i32 = 1 - max_msb - sig_bits as i32;
+
+    let (neg, mut sig, exp) = parse_hex(s.as_bytes());
+
+    if sig == 0 {
+        return (neg as u128) << (bits - 1);
+    }
+
+    // exponents of the least and most significant bits in the value
+    let lsb = sig.trailing_zeros() as i32;
+    let msb = u128_ilog2(sig) as i32;
+    let sig_bits = sig_bits as i32;
+
+    assert!(msb - lsb <= sig_bits, "the value is too precise");
+    assert!(msb + exp <= max_msb, "the value is too huge");
+    assert!(lsb + exp >= min_lsb, "the value is too tiny");
+
+    // The parsed value is X = sig * 2^exp
+    // Expressed as a multiple U of the smallest subnormal value:
+    // X = U * 2^min_lsb, so U = sig * 2^(exp-min_lsb)
+    let mut uexp = exp - min_lsb;
+
+    let shift = if uexp + msb >= sig_bits {
+        // normal, shift msb to position sig_bits
+        sig_bits - msb
+    } else {
+        // subnormal, shift so that uexp becomes 0
+        uexp
+    };
+
+    if shift >= 0 {
+        sig <<= shift;
+    } else {
+        sig >>= -shift;
+    }
+    uexp -= shift;
+
+    // the most significant bit is like having 1 in the exponent bits
+    // add any leftover exponent to that
+    assert!(uexp >= 0 && uexp < (1 << exp_bits) - 2);
+    sig += (uexp as u128) << sig_bits;
+
+    // finally, set the sign bit if necessary
+    sig | ((neg as u128) << (bits - 1))
+}
+
+/// Parse a hexadecimal float x
+/// returns (s,n,e):
+///     s == x.is_sign_negative()
+///     n * 2^e == x.abs()
+const fn parse_hex(mut b: &[u8]) -> (bool, u128, i32) {
+    let mut neg = false;
+    let mut sig: u128 = 0;
+    let mut exp: i32 = 0;
+
+    if let &[c @ (b'-' | b'+'), ref rest @ ..] = b {
+        b = rest;
+        neg = c == b'-';
+    }
+
+    if let &[b'0', b'x' | b'X', ref rest @ ..] = b {
+        b = rest;
+    } else {
+        panic!("no hex indicator");
+    }
+
+    let mut seen_point = false;
+    let mut some_digits = false;
+
+    while let &[c, ref rest @ ..] = b {
+        b = rest;
+
+        match c {
+            b'.' => {
+                assert!(!seen_point);
+                seen_point = true;
+                continue;
+            }
+            b'p' | b'P' => break,
+            c => {
+                let digit = hex_digit(c);
+                some_digits = true;
+                let of;
+                (sig, of) = sig.overflowing_mul(16);
+                assert!(!of, "too many digits");
+                sig |= digit as u128;
+                // up until the fractional point, the value grows
+                // with more digits, but after it the exponent is
+                // compensated to match.
+                if seen_point {
+                    exp -= 4;
+                }
+            }
+        }
+    }
+    assert!(some_digits, "at least one digit is required");
+    some_digits = false;
+
+    let mut negate_exp = false;
+    if let &[c @ (b'-' | b'+'), ref rest @ ..] = b {
+        b = rest;
+        negate_exp = c == b'-';
+    }
+
+    let mut pexp: i32 = 0;
+    while let &[c, ref rest @ ..] = b {
+        b = rest;
+        let digit = dec_digit(c);
+        some_digits = true;
+        let of;
+        (pexp, of) = pexp.overflowing_mul(10);
+        assert!(!of, "too many exponent digits");
+        pexp += digit as i32;
+    }
+    assert!(some_digits, "at least one exponent digit is required");
+
+    if negate_exp {
+        exp -= pexp;
+    } else {
+        exp += pexp;
+    }
+
+    (neg, sig, exp)
+}
+
+const fn dec_digit(c: u8) -> u8 {
+    match c {
+        b'0'..=b'9' => c - b'0',
+        _ => panic!("bad char"),
+    }
+}
+
+const fn hex_digit(c: u8) -> u8 {
+    match c {
+        b'0'..=b'9' => c - b'0',
+        b'a'..=b'f' => c - b'a' + 10,
+        b'A'..=b'F' => c - b'A' + 10,
+        _ => panic!("bad char"),
+    }
+}
+
+/* FIXME(msrv): vendor some things that are not const stable at our MSRV */
+
+/// `f32::from_bits`
+const fn f32_from_bits(v: u32) -> f32 {
+    unsafe { core::mem::transmute(v) }
+}
+
+/// `f64::from_bits`
+const fn f64_from_bits(v: u64) -> f64 {
+    unsafe { core::mem::transmute(v) }
+}
+
+/// `u128::ilog2`
+const fn u128_ilog2(v: u128) -> u32 {
+    assert!(v != 0);
+    u128::BITS - 1 - v.leading_zeros()
+}
+
+#[cfg(test)]
+mod tests {
+    extern crate std;
+    use std::{format, println};
+
+    use super::*;
+
+    #[test]
+    fn test_parse_any() {
+        for k in -149..=127 {
+            let s = format!("0x1p{k}");
+            let x = hf32(&s);
+            let y = if k < 0 { 0.5f32.powi(-k) } else { 2.0f32.powi(k) };
+            assert_eq!(x, y);
+        }
+
+        let mut s = *b"0x.0000000p-121";
+        for e in 0..40 {
+            for k in 0..(1 << 15) {
+                let expected = f32::from_bits(k) * 2.0f32.powi(e);
+                let x = hf32(std::str::from_utf8(&s).unwrap());
+                assert_eq!(
+                    x.to_bits(),
+                    expected.to_bits(),
+                    "\
+                    e={e}\n\
+                    k={k}\n\
+                    x={x}\n\
+                    expected={expected}\n\
+                    s={}\n\
+                    f32::from_bits(k)={}\n\
+                    2.0f32.powi(e)={}\
+                    ",
+                    std::str::from_utf8(&s).unwrap(),
+                    f32::from_bits(k),
+                    2.0f32.powi(e),
+                );
+                for i in (3..10).rev() {
+                    if s[i] == b'f' {
+                        s[i] = b'0';
+                    } else if s[i] == b'9' {
+                        s[i] = b'a';
+                        break;
+                    } else {
+                        s[i] += 1;
+                        break;
+                    }
+                }
+            }
+            for i in (12..15).rev() {
+                if s[i] == b'0' {
+                    s[i] = b'9';
+                } else {
+                    s[i] -= 1;
+                    break;
+                }
+            }
+            for i in (3..10).rev() {
+                s[i] = b'0';
+            }
+        }
+    }
+
+    #[test]
+    fn test_f32() {
+        let checks = [
+            ("0x.1234p+16", (0x1234 as f32).to_bits()),
+            ("0x1.234p+12", (0x1234 as f32).to_bits()),
+            ("0x12.34p+8", (0x1234 as f32).to_bits()),
+            ("0x123.4p+4", (0x1234 as f32).to_bits()),
+            ("0x1234p+0", (0x1234 as f32).to_bits()),
+            ("0x1234.p+0", (0x1234 as f32).to_bits()),
+            ("0x1234.0p+0", (0x1234 as f32).to_bits()),
+            ("0x1.fffffep+127", f32::MAX.to_bits()),
+            ("0x1.0p+1", 2.0f32.to_bits()),
+            ("0x1.0p+0", 1.0f32.to_bits()),
+            ("0x1.ffep+8", 0x43fff000),
+            ("+0x1.ffep+8", 0x43fff000),
+            ("0x1p+0", 0x3f800000),
+            ("0x1.99999ap-4", 0x3dcccccd),
+            ("0x1.9p+6", 0x42c80000),
+            ("0x1.2d5ed2p+20", 0x4996af69),
+            ("-0x1.348eb8p+10", 0xc49a475c),
+            ("-0x1.33dcfep-33", 0xaf19ee7f),
+            ("0x0.0p0", 0.0f32.to_bits()),
+            ("-0x0.0p0", (-0.0f32).to_bits()),
+            ("0x1.0p0", 1.0f32.to_bits()),
+            ("0x1.99999ap-4", (0.1f32).to_bits()),
+            ("-0x1.99999ap-4", (-0.1f32).to_bits()),
+            ("0x1.111114p-127", 0x00444445),
+            ("0x1.23456p-130", 0x00091a2b),
+            ("0x1p-149", 0x00000001),
+        ];
+        for (s, exp) in checks {
+            println!("parsing {s}");
+            let act = hf32(s).to_bits();
+            assert_eq!(
+                act, exp,
+                "parsing {s}: {act:#010x} != {exp:#010x}\nact: {act:#034b}\nexp: {exp:#034b}"
+            );
+        }
+    }
+
+    #[test]
+    fn test_f64() {
+        let checks = [
+            ("0x.1234p+16", (0x1234 as f64).to_bits()),
+            ("0x1.234p+12", (0x1234 as f64).to_bits()),
+            ("0x12.34p+8", (0x1234 as f64).to_bits()),
+            ("0x123.4p+4", (0x1234 as f64).to_bits()),
+            ("0x1234p+0", (0x1234 as f64).to_bits()),
+            ("0x1234.p+0", (0x1234 as f64).to_bits()),
+            ("0x1234.0p+0", (0x1234 as f64).to_bits()),
+            ("0x1.ffep+8", 0x407ffe0000000000),
+            ("0x1p+0", 0x3ff0000000000000),
+            ("0x1.999999999999ap-4", 0x3fb999999999999a),
+            ("0x1.9p+6", 0x4059000000000000),
+            ("0x1.2d5ed1fe1da7bp+20", 0x4132d5ed1fe1da7b),
+            ("-0x1.348eb851eb852p+10", 0xc09348eb851eb852),
+            ("-0x1.33dcfe54a3803p-33", 0xbde33dcfe54a3803),
+            ("0x1.0p0", 1.0f64.to_bits()),
+            ("0x0.0p0", 0.0f64.to_bits()),
+            ("-0x0.0p0", (-0.0f64).to_bits()),
+            ("0x1.999999999999ap-4", 0.1f64.to_bits()),
+            ("0x1.999999999998ap-4", (0.1f64 - f64::EPSILON).to_bits()),
+            ("-0x1.999999999999ap-4", (-0.1f64).to_bits()),
+            ("-0x1.999999999998ap-4", (-0.1f64 + f64::EPSILON).to_bits()),
+            ("0x0.8000000000001p-1022", 0x0008000000000001),
+            ("0x0.123456789abcdp-1022", 0x000123456789abcd),
+            ("0x0.0000000000002p-1022", 0x0000000000000002),
+        ];
+        for (s, exp) in checks {
+            println!("parsing {s}");
+            let act = hf64(s).to_bits();
+            assert_eq!(
+                act, exp,
+                "parsing {s}: {act:#018x} != {exp:#018x}\nact: {act:#066b}\nexp: {exp:#066b}"
+            );
+        }
+    }
+
+    #[test]
+    fn test_f32_almost_extra_precision() {
+        // Exact maximum precision allowed
+        hf32("0x1.abcdeep+0");
+    }
+
+    #[test]
+    fn test_macros() {
+        assert_eq!(hf32!("0x1.ffep+8").to_bits(), 0x43fff000u32);
+        assert_eq!(hf64!("0x1.ffep+8").to_bits(), 0x407ffe0000000000u64);
+    }
+}
+
+#[cfg(test)]
+// FIXME(ppc): something with `should_panic` tests cause a SIGILL with ppc64le
+#[cfg(not(all(target_arch = "powerpc64", target_endian = "little")))]
+mod tests_panicking {
+    extern crate std;
+    use super::*;
+
+    #[test]
+    #[should_panic]
+    fn test_f32_extra_precision2() {
+        // One bit more than the above.
+        hf32("0x1.ffffffp+127");
+    }
+
+    #[test]
+    #[should_panic(expected = "the value is too huge")]
+    fn test_f32_overflow() {
+        // One bit more than the above.
+        hf32("0x1p+128");
+    }
+
+    #[test]
+    #[should_panic(expected = "the value is too precise")]
+    fn test_f32_extra_precision() {
+        // One bit more than the above.
+        hf32("0x1.abcdefp+0");
+    }
+
+    #[test]
+    fn test_f32_tiniest() {
+        let x = hf32("0x1.p-149");
+        let y = hf32("0x0.0000000000000001p-85");
+        let z = hf32("0x0.8p-148");
+        assert_eq!(x, y);
+        assert_eq!(x, z);
+    }
+
+    #[test]
+    #[should_panic(expected = "the value is too tiny")]
+    fn test_f32_too_tiny() {
+        hf32("0x1.p-150");
+    }
+
+    #[test]
+    #[should_panic(expected = "the value is too tiny")]
+    fn test_f32_also_too_tiny() {
+        hf32("0x0.8p-149");
+    }
+
+    #[test]
+    #[should_panic(expected = "the value is too tiny")]
+    fn test_f32_again_too_tiny() {
+        hf32("0x0.0000000000000001p-86");
+    }
+
+    #[test]
+    fn test_f64_almost_extra_precision() {
+        // Exact maximum precision allowed
+        hf64("0x1.abcdabcdabcdfp+0");
+    }
+
+    #[test]
+    #[should_panic(expected = "the value is too precise")]
+    fn test_f64_extra_precision() {
+        // One bit more than the above.
+        hf64("0x1.abcdabcdabcdf8p+0");
+    }
+}
diff --git a/src/math/support/macros.rs b/src/math/support/macros.rs
index b14bbec38..9441eace5 100644
--- a/src/math/support/macros.rs
+++ b/src/math/support/macros.rs
@@ -105,3 +105,23 @@ macro_rules! select_implementation {
     (@cfg ; $ex:expr) => { };
     (@cfg $provided:meta; $ex:expr) => { #[cfg($provided)] $ex };
 }
+
+/// Construct a 32-bit float from hex float representation (C-style), guaranteed to
+/// evaluate at compile time.
+#[allow(unused_macros)]
+macro_rules! hf32 {
+    ($s:literal) => {{
+        const X: f32 = $crate::math::support::hf32($s);
+        X
+    }};
+}
+
+/// Construct a 64-bit float from hex float representation (C-style), guaranteed to
+/// evaluate at compile time.
+#[allow(unused_macros)]
+macro_rules! hf64 {
+    ($s:literal) => {{
+        const X: f64 = $crate::math::support::hf64($s);
+        X
+    }};
+}
diff --git a/src/math/support/mod.rs b/src/math/support/mod.rs
index f054df6cd..04a313abc 100644
--- a/src/math/support/mod.rs
+++ b/src/math/support/mod.rs
@@ -1,7 +1,10 @@
 #[macro_use]
 pub mod macros;
 mod float_traits;
+mod hex_float;
 mod int_traits;
 
 pub use float_traits::Float;
+#[allow(unused_imports)]
+pub use hex_float::{hf32, hf64};
 pub use int_traits::{CastFrom, CastInto, DInt, HInt, Int, MinInt};

From af009cc000b189eda7b724b86adc0616772f7270 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Johanna=20S=C3=B6rng=C3=A5rd?=
 <44257381+JSorngard@users.noreply.github.com>
Date: Fri, 1 Nov 2024 13:29:03 +0100
Subject: [PATCH 029/279] Add some more basic docstrings (#352)

* Add docstrings to the tgamma functions
* Add docstrings to the lgamma functions
* Add docstrings to trunc
* Add docstrings to exp10 functions
---
 src/math/exp10.rs   | 1 +
 src/math/exp10f.rs  | 1 +
 src/math/lgamma.rs  | 2 ++
 src/math/lgammaf.rs | 2 ++
 src/math/tgamma.rs  | 1 +
 src/math/tgammaf.rs | 1 +
 src/math/trunc.rs   | 3 +++
 src/math/truncf.rs  | 3 +++
 8 files changed, 14 insertions(+)

diff --git a/src/math/exp10.rs b/src/math/exp10.rs
index 559930e10..2c3df0173 100644
--- a/src/math/exp10.rs
+++ b/src/math/exp10.rs
@@ -6,6 +6,7 @@ const P10: &[f64] = &[
     1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10, 1e11, 1e12, 1e13, 1e14, 1e15,
 ];
 
+/// Calculates 10 raised to the power of `x` (f64).
 #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
 pub fn exp10(x: f64) -> f64 {
     let (mut y, n) = modf(x);
diff --git a/src/math/exp10f.rs b/src/math/exp10f.rs
index 786305481..e81d18380 100644
--- a/src/math/exp10f.rs
+++ b/src/math/exp10f.rs
@@ -5,6 +5,7 @@ const LN10_F64: f64 = 3.32192809488736234787031942948939;
 const P10: &[f32] =
     &[1e-7, 1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7];
 
+/// Calculates 10 raised to the power of `x` (f32).
 #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
 pub fn exp10f(x: f32) -> f32 {
     let (mut y, n) = modff(x);
diff --git a/src/math/lgamma.rs b/src/math/lgamma.rs
index a08bc5b64..8312dc186 100644
--- a/src/math/lgamma.rs
+++ b/src/math/lgamma.rs
@@ -1,5 +1,7 @@
 use super::lgamma_r;
 
+/// The natural logarithm of the
+/// [Gamma function](https://en.wikipedia.org/wiki/Gamma_function) (f64).
 #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
 pub fn lgamma(x: f64) -> f64 {
     lgamma_r(x).0
diff --git a/src/math/lgammaf.rs b/src/math/lgammaf.rs
index a9c2da75b..d37512397 100644
--- a/src/math/lgammaf.rs
+++ b/src/math/lgammaf.rs
@@ -1,5 +1,7 @@
 use super::lgammaf_r;
 
+/// The natural logarithm of the
+/// [Gamma function](https://en.wikipedia.org/wiki/Gamma_function) (f32).
 #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
 pub fn lgammaf(x: f32) -> f32 {
     lgammaf_r(x).0
diff --git a/src/math/tgamma.rs b/src/math/tgamma.rs
index 60451416a..305986064 100644
--- a/src/math/tgamma.rs
+++ b/src/math/tgamma.rs
@@ -130,6 +130,7 @@ fn s(x: f64) -> f64 {
     return num / den;
 }
 
+/// The [Gamma function](https://en.wikipedia.org/wiki/Gamma_function) (f64).
 #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
 pub fn tgamma(mut x: f64) -> f64 {
     let u: u64 = x.to_bits();
diff --git a/src/math/tgammaf.rs b/src/math/tgammaf.rs
index 23e3814f9..fe178f7a3 100644
--- a/src/math/tgammaf.rs
+++ b/src/math/tgammaf.rs
@@ -1,5 +1,6 @@
 use super::tgamma;
 
+/// The [Gamma function](https://en.wikipedia.org/wiki/Gamma_function) (f32).
 #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
 pub fn tgammaf(x: f32) -> f32 {
     tgamma(x as f64) as f32
diff --git a/src/math/trunc.rs b/src/math/trunc.rs
index 6961bb950..34bc2fdfa 100644
--- a/src/math/trunc.rs
+++ b/src/math/trunc.rs
@@ -1,5 +1,8 @@
 use core::f64;
 
+/// Rounds the number toward 0 to the closest integral value (f64).
+///
+/// This effectively removes the decimal part of the number, leaving the integral part.
 #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
 pub fn trunc(x: f64) -> f64 {
     select_implementation! {
diff --git a/src/math/truncf.rs b/src/math/truncf.rs
index 8270c8eb3..a74f78987 100644
--- a/src/math/truncf.rs
+++ b/src/math/truncf.rs
@@ -1,5 +1,8 @@
 use core::f32;
 
+/// Rounds the number toward 0 to the closest integral value (f32).
+///
+/// This effectively removes the decimal part of the number, leaving the integral part.
 #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
 pub fn truncf(x: f32) -> f32 {
     select_implementation! {

From 4cd303da4510428eecc8cb51c6b7040c390548d8 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Fri, 1 Nov 2024 07:45:19 -0500
Subject: [PATCH 030/279] Resolve clippy errors in `libm` tests and check this
 in CI

---
 .github/workflows/main.yml |  5 ++-
 src/math/ceil.rs           |  6 +--
 src/math/ceilf.rs          |  6 +--
 src/math/fabs.rs           |  8 ++--
 src/math/fabsf.rs          |  8 ++--
 src/math/floor.rs          |  6 +--
 src/math/floorf.rs         |  6 +--
 src/math/pow.rs            | 85 ++++++++++++++++++--------------------
 src/math/sqrt.rs           |  9 ++--
 src/math/sqrtf.rs          |  9 ++--
 10 files changed, 65 insertions(+), 83 deletions(-)

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 14e557884..c79d637ec 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -106,9 +106,10 @@ jobs:
         cargo generate-lockfile && ./ci/run-docker.sh ${{ matrix.target }}
 
     - name: Clippy
-      run: |
+      # Tests and utilities can't build on no_std targets
+      if: "!contains(matrix.target, 'thumb')"
         # Run clippy on `libm`
-        cargo clippy --target "${{ matrix.target }}" --package libm
+      run: cargo clippy --target "${{ matrix.target }}" --package libm --all-targets
         
 
   builtins:
diff --git a/src/math/ceil.rs b/src/math/ceil.rs
index c7e857dbb..c198ebcfe 100644
--- a/src/math/ceil.rs
+++ b/src/math/ceil.rs
@@ -34,8 +34,6 @@ pub fn ceil(x: f64) -> f64 {
 
 #[cfg(test)]
 mod tests {
-    use core::f64::*;
-
     use super::*;
 
     #[test]
@@ -48,8 +46,8 @@ mod tests {
     #[test]
     fn spec_tests() {
         // Not Asserted: that the current rounding mode has no effect.
-        assert!(ceil(NAN).is_nan());
-        for f in [0.0, -0.0, INFINITY, NEG_INFINITY].iter().copied() {
+        assert!(ceil(f64::NAN).is_nan());
+        for f in [0.0, -0.0, f64::INFINITY, f64::NEG_INFINITY].iter().copied() {
             assert_eq!(ceil(f), f);
         }
     }
diff --git a/src/math/ceilf.rs b/src/math/ceilf.rs
index 0da384350..9eb2ec07a 100644
--- a/src/math/ceilf.rs
+++ b/src/math/ceilf.rs
@@ -42,8 +42,6 @@ pub fn ceilf(x: f32) -> f32 {
 #[cfg(not(target_arch = "powerpc64"))]
 #[cfg(test)]
 mod tests {
-    use core::f32::*;
-
     use super::*;
 
     #[test]
@@ -56,8 +54,8 @@ mod tests {
     #[test]
     fn spec_tests() {
         // Not Asserted: that the current rounding mode has no effect.
-        assert!(ceilf(NAN).is_nan());
-        for f in [0.0, -0.0, INFINITY, NEG_INFINITY].iter().copied() {
+        assert!(ceilf(f32::NAN).is_nan());
+        for f in [0.0, -0.0, f32::INFINITY, f32::NEG_INFINITY].iter().copied() {
             assert_eq!(ceilf(f), f);
         }
     }
diff --git a/src/math/fabs.rs b/src/math/fabs.rs
index d7980eb65..d083053e1 100644
--- a/src/math/fabs.rs
+++ b/src/math/fabs.rs
@@ -14,8 +14,6 @@ pub fn fabs(x: f64) -> f64 {
 
 #[cfg(test)]
 mod tests {
-    use core::f64::*;
-
     use super::*;
 
     #[test]
@@ -27,12 +25,12 @@ mod tests {
     /// The spec: https://en.cppreference.com/w/cpp/numeric/math/fabs
     #[test]
     fn spec_tests() {
-        assert!(fabs(NAN).is_nan());
+        assert!(fabs(f64::NAN).is_nan());
         for f in [0.0, -0.0].iter().copied() {
             assert_eq!(fabs(f), 0.0);
         }
-        for f in [INFINITY, NEG_INFINITY].iter().copied() {
-            assert_eq!(fabs(f), INFINITY);
+        for f in [f64::INFINITY, f64::NEG_INFINITY].iter().copied() {
+            assert_eq!(fabs(f), f64::INFINITY);
         }
     }
 }
diff --git a/src/math/fabsf.rs b/src/math/fabsf.rs
index 1dac6389d..eabe87254 100644
--- a/src/math/fabsf.rs
+++ b/src/math/fabsf.rs
@@ -16,8 +16,6 @@ pub fn fabsf(x: f32) -> f32 {
 #[cfg(not(target_arch = "powerpc64"))]
 #[cfg(test)]
 mod tests {
-    use core::f32::*;
-
     use super::*;
 
     #[test]
@@ -29,12 +27,12 @@ mod tests {
     /// The spec: https://en.cppreference.com/w/cpp/numeric/math/fabs
     #[test]
     fn spec_tests() {
-        assert!(fabsf(NAN).is_nan());
+        assert!(fabsf(f32::NAN).is_nan());
         for f in [0.0, -0.0].iter().copied() {
             assert_eq!(fabsf(f), 0.0);
         }
-        for f in [INFINITY, NEG_INFINITY].iter().copied() {
-            assert_eq!(fabsf(f), INFINITY);
+        for f in [f32::INFINITY, f32::NEG_INFINITY].iter().copied() {
+            assert_eq!(fabsf(f), f32::INFINITY);
         }
     }
 }
diff --git a/src/math/floor.rs b/src/math/floor.rs
index 532226b9f..e478f6d54 100644
--- a/src/math/floor.rs
+++ b/src/math/floor.rs
@@ -33,8 +33,6 @@ pub fn floor(x: f64) -> f64 {
 
 #[cfg(test)]
 mod tests {
-    use core::f64::*;
-
     use super::*;
 
     #[test]
@@ -47,8 +45,8 @@ mod tests {
     #[test]
     fn spec_tests() {
         // Not Asserted: that the current rounding mode has no effect.
-        assert!(floor(NAN).is_nan());
-        for f in [0.0, -0.0, INFINITY, NEG_INFINITY].iter().copied() {
+        assert!(floor(f64::NAN).is_nan());
+        for f in [0.0, -0.0, f64::INFINITY, f64::NEG_INFINITY].iter().copied() {
             assert_eq!(floor(f), f);
         }
     }
diff --git a/src/math/floorf.rs b/src/math/floorf.rs
index 4f38cb15b..bd1570c86 100644
--- a/src/math/floorf.rs
+++ b/src/math/floorf.rs
@@ -42,8 +42,6 @@ pub fn floorf(x: f32) -> f32 {
 #[cfg(not(target_arch = "powerpc64"))]
 #[cfg(test)]
 mod tests {
-    use core::f32::*;
-
     use super::*;
 
     #[test]
@@ -57,8 +55,8 @@ mod tests {
     #[test]
     fn spec_tests() {
         // Not Asserted: that the current rounding mode has no effect.
-        assert!(floorf(NAN).is_nan());
-        for f in [0.0, -0.0, INFINITY, NEG_INFINITY].iter().copied() {
+        assert!(floorf(f32::NAN).is_nan());
+        for f in [0.0, -0.0, f32::INFINITY, f32::NEG_INFINITY].iter().copied() {
             assert_eq!(floorf(f), f);
         }
     }
diff --git a/src/math/pow.rs b/src/math/pow.rs
index 736465cd1..80b2a2499 100644
--- a/src/math/pow.rs
+++ b/src/math/pow.rs
@@ -398,7 +398,6 @@ mod tests {
     extern crate core;
 
     use self::core::f64::consts::{E, PI};
-    use self::core::f64::{EPSILON, INFINITY, MAX, MIN, MIN_POSITIVE, NAN, NEG_INFINITY};
     use super::pow;
 
     const POS_ZERO: &[f64] = &[0.0];
@@ -407,15 +406,15 @@ mod tests {
     const NEG_ONE: &[f64] = &[-1.0];
     const POS_FLOATS: &[f64] = &[99.0 / 70.0, E, PI];
     const NEG_FLOATS: &[f64] = &[-99.0 / 70.0, -E, -PI];
-    const POS_SMALL_FLOATS: &[f64] = &[(1.0 / 2.0), MIN_POSITIVE, EPSILON];
-    const NEG_SMALL_FLOATS: &[f64] = &[-(1.0 / 2.0), -MIN_POSITIVE, -EPSILON];
-    const POS_EVENS: &[f64] = &[2.0, 6.0, 8.0, 10.0, 22.0, 100.0, MAX];
-    const NEG_EVENS: &[f64] = &[MIN, -100.0, -22.0, -10.0, -8.0, -6.0, -2.0];
+    const POS_SMALL_FLOATS: &[f64] = &[(1.0 / 2.0), f64::MIN_POSITIVE, f64::EPSILON];
+    const NEG_SMALL_FLOATS: &[f64] = &[-(1.0 / 2.0), -f64::MIN_POSITIVE, -f64::EPSILON];
+    const POS_EVENS: &[f64] = &[2.0, 6.0, 8.0, 10.0, 22.0, 100.0, f64::MAX];
+    const NEG_EVENS: &[f64] = &[f64::MIN, -100.0, -22.0, -10.0, -8.0, -6.0, -2.0];
     const POS_ODDS: &[f64] = &[3.0, 7.0];
     const NEG_ODDS: &[f64] = &[-7.0, -3.0];
-    const NANS: &[f64] = &[NAN];
-    const POS_INF: &[f64] = &[INFINITY];
-    const NEG_INF: &[f64] = &[NEG_INFINITY];
+    const NANS: &[f64] = &[f64::NAN];
+    const POS_INF: &[f64] = &[f64::INFINITY];
+    const NEG_INF: &[f64] = &[f64::NEG_INFINITY];
 
     const ALL: &[&[f64]] = &[
         POS_ZERO,
@@ -492,83 +491,83 @@ mod tests {
     #[test]
     fn nan_inputs() {
         // NAN as the base:
-        // (NAN ^ anything *but 0* should be NAN)
-        test_sets_as_exponent(NAN, &ALL[2..], NAN);
+        // (f64::NAN ^ anything *but 0* should be f64::NAN)
+        test_sets_as_exponent(f64::NAN, &ALL[2..], f64::NAN);
 
-        // NAN as the exponent:
-        // (anything *but 1* ^ NAN should be NAN)
-        test_sets_as_base(&ALL[..(ALL.len() - 2)], NAN, NAN);
+        // f64::NAN as the exponent:
+        // (anything *but 1* ^ f64::NAN should be f64::NAN)
+        test_sets_as_base(&ALL[..(ALL.len() - 2)], f64::NAN, f64::NAN);
     }
 
     #[test]
     fn infinity_as_base() {
         // Positive Infinity as the base:
-        // (+Infinity ^ positive anything but 0 and NAN should be +Infinity)
-        test_sets_as_exponent(INFINITY, &POS[1..], INFINITY);
+        // (+Infinity ^ positive anything but 0 and f64::NAN should be +Infinity)
+        test_sets_as_exponent(f64::INFINITY, &POS[1..], f64::INFINITY);
 
-        // (+Infinity ^ negative anything except 0 and NAN should be 0.0)
-        test_sets_as_exponent(INFINITY, &NEG[1..], 0.0);
+        // (+Infinity ^ negative anything except 0 and f64::NAN should be 0.0)
+        test_sets_as_exponent(f64::INFINITY, &NEG[1..], 0.0);
 
         // Negative Infinity as the base:
         // (-Infinity ^ positive odd ints should be -Infinity)
-        test_sets_as_exponent(NEG_INFINITY, &[POS_ODDS], NEG_INFINITY);
+        test_sets_as_exponent(f64::NEG_INFINITY, &[POS_ODDS], f64::NEG_INFINITY);
 
         // (-Infinity ^ anything but odd ints should be == -0 ^ (-anything))
         // We can lump in pos/neg odd ints here because they don't seem to
         // cause panics (div by zero) in release mode (I think).
-        test_sets(ALL, &|v: f64| pow(NEG_INFINITY, v), &|v: f64| pow(-0.0, -v));
+        test_sets(ALL, &|v: f64| pow(f64::NEG_INFINITY, v), &|v: f64| pow(-0.0, -v));
     }
 
     #[test]
     fn infinity_as_exponent() {
         // Positive/Negative base greater than 1:
-        // (pos/neg > 1 ^ Infinity should be Infinity - note this excludes NAN as the base)
-        test_sets_as_base(&ALL[5..(ALL.len() - 2)], INFINITY, INFINITY);
+        // (pos/neg > 1 ^ Infinity should be Infinity - note this excludes f64::NAN as the base)
+        test_sets_as_base(&ALL[5..(ALL.len() - 2)], f64::INFINITY, f64::INFINITY);
 
         // (pos/neg > 1 ^ -Infinity should be 0.0)
-        test_sets_as_base(&ALL[5..ALL.len() - 2], NEG_INFINITY, 0.0);
+        test_sets_as_base(&ALL[5..ALL.len() - 2], f64::NEG_INFINITY, 0.0);
 
         // Positive/Negative base less than 1:
         let base_below_one = &[POS_ZERO, NEG_ZERO, NEG_SMALL_FLOATS, POS_SMALL_FLOATS];
 
-        // (pos/neg < 1 ^ Infinity should be 0.0 - this also excludes NAN as the base)
-        test_sets_as_base(base_below_one, INFINITY, 0.0);
+        // (pos/neg < 1 ^ Infinity should be 0.0 - this also excludes f64::NAN as the base)
+        test_sets_as_base(base_below_one, f64::INFINITY, 0.0);
 
         // (pos/neg < 1 ^ -Infinity should be Infinity)
-        test_sets_as_base(base_below_one, NEG_INFINITY, INFINITY);
+        test_sets_as_base(base_below_one, f64::NEG_INFINITY, f64::INFINITY);
 
         // Positive/Negative 1 as the base:
         // (pos/neg 1 ^ Infinity should be 1)
-        test_sets_as_base(&[NEG_ONE, POS_ONE], INFINITY, 1.0);
+        test_sets_as_base(&[NEG_ONE, POS_ONE], f64::INFINITY, 1.0);
 
         // (pos/neg 1 ^ -Infinity should be 1)
-        test_sets_as_base(&[NEG_ONE, POS_ONE], NEG_INFINITY, 1.0);
+        test_sets_as_base(&[NEG_ONE, POS_ONE], f64::NEG_INFINITY, 1.0);
     }
 
     #[test]
     fn zero_as_base() {
         // Positive Zero as the base:
-        // (+0 ^ anything positive but 0 and NAN should be +0)
+        // (+0 ^ anything positive but 0 and f64::NAN should be +0)
         test_sets_as_exponent(0.0, &POS[1..], 0.0);
 
-        // (+0 ^ anything negative but 0 and NAN should be Infinity)
+        // (+0 ^ anything negative but 0 and f64::NAN should be Infinity)
         // (this should panic because we're dividing by zero)
-        test_sets_as_exponent(0.0, &NEG[1..], INFINITY);
+        test_sets_as_exponent(0.0, &NEG[1..], f64::INFINITY);
 
         // Negative Zero as the base:
-        // (-0 ^ anything positive but 0, NAN, and odd ints should be +0)
+        // (-0 ^ anything positive but 0, f64::NAN, and odd ints should be +0)
         test_sets_as_exponent(-0.0, &POS[3..], 0.0);
 
-        // (-0 ^ anything negative but 0, NAN, and odd ints should be Infinity)
+        // (-0 ^ anything negative but 0, f64::NAN, and odd ints should be Infinity)
         // (should panic because of divide by zero)
-        test_sets_as_exponent(-0.0, &NEG[3..], INFINITY);
+        test_sets_as_exponent(-0.0, &NEG[3..], f64::INFINITY);
 
         // (-0 ^ positive odd ints should be -0)
         test_sets_as_exponent(-0.0, &[POS_ODDS], -0.0);
 
         // (-0 ^ negative odd ints should be -Infinity)
         // (should panic because of divide by zero)
-        test_sets_as_exponent(-0.0, &[NEG_ODDS], NEG_INFINITY);
+        test_sets_as_exponent(-0.0, &[NEG_ODDS], f64::NEG_INFINITY);
     }
 
     #[test]
@@ -583,21 +582,17 @@ mod tests {
 
         // Factoring -1 out:
         // (negative anything ^ integer should be (-1 ^ integer) * (positive anything ^ integer))
-        (&[POS_ZERO, NEG_ZERO, POS_ONE, NEG_ONE, POS_EVENS, NEG_EVENS]).iter().for_each(
-            |int_set| {
-                int_set.iter().for_each(|int| {
-                    test_sets(ALL, &|v: f64| pow(-v, *int), &|v: f64| {
-                        pow(-1.0, *int) * pow(v, *int)
-                    });
-                })
-            },
-        );
+        [POS_ZERO, NEG_ZERO, POS_ONE, NEG_ONE, POS_EVENS, NEG_EVENS].iter().for_each(|int_set| {
+            int_set.iter().for_each(|int| {
+                test_sets(ALL, &|v: f64| pow(-v, *int), &|v: f64| pow(-1.0, *int) * pow(v, *int));
+            })
+        });
 
         // Negative base (imaginary results):
         // (-anything except 0 and Infinity ^ non-integer should be NAN)
-        (&NEG[1..(NEG.len() - 1)]).iter().for_each(|set| {
+        NEG[1..(NEG.len() - 1)].iter().for_each(|set| {
             set.iter().for_each(|val| {
-                test_sets(&ALL[3..7], &|v: f64| pow(*val, v), &|_| NAN);
+                test_sets(&ALL[3..7], &|v: f64| pow(*val, v), &|_| f64::NAN);
             })
         });
     }
diff --git a/src/math/sqrt.rs b/src/math/sqrt.rs
index 3eaf52cda..d9a8f184c 100644
--- a/src/math/sqrt.rs
+++ b/src/math/sqrt.rs
@@ -224,8 +224,6 @@ pub fn sqrt(x: f64) -> f64 {
 
 #[cfg(test)]
 mod tests {
-    use core::f64::*;
-
     use super::*;
 
     #[test]
@@ -239,15 +237,16 @@ mod tests {
     fn spec_tests() {
         // Not Asserted: FE_INVALID exception is raised if argument is negative.
         assert!(sqrt(-1.0).is_nan());
-        assert!(sqrt(NAN).is_nan());
-        for f in [0.0, -0.0, INFINITY].iter().copied() {
+        assert!(sqrt(f64::NAN).is_nan());
+        for f in [0.0, -0.0, f64::INFINITY].iter().copied() {
             assert_eq!(sqrt(f), f);
         }
     }
 
     #[test]
+    #[allow(clippy::approx_constant)]
     fn conformance_tests() {
-        let values = [3.14159265359, 10000.0, f64::from_bits(0x0000000f), INFINITY];
+        let values = [3.14159265359, 10000.0, f64::from_bits(0x0000000f), f64::INFINITY];
         let results = [
             4610661241675116657u64,
             4636737291354636288u64,
diff --git a/src/math/sqrtf.rs b/src/math/sqrtf.rs
index d2f7ae703..23f9a8443 100644
--- a/src/math/sqrtf.rs
+++ b/src/math/sqrtf.rs
@@ -110,8 +110,6 @@ pub fn sqrtf(x: f32) -> f32 {
 #[cfg(not(target_arch = "powerpc64"))]
 #[cfg(test)]
 mod tests {
-    use core::f32::*;
-
     use super::*;
 
     #[test]
@@ -125,15 +123,16 @@ mod tests {
     fn spec_tests() {
         // Not Asserted: FE_INVALID exception is raised if argument is negative.
         assert!(sqrtf(-1.0).is_nan());
-        assert!(sqrtf(NAN).is_nan());
-        for f in [0.0, -0.0, INFINITY].iter().copied() {
+        assert!(sqrtf(f32::NAN).is_nan());
+        for f in [0.0, -0.0, f32::INFINITY].iter().copied() {
             assert_eq!(sqrtf(f), f);
         }
     }
 
     #[test]
+    #[allow(clippy::approx_constant)]
     fn conformance_tests() {
-        let values = [3.14159265359f32, 10000.0f32, f32::from_bits(0x0000000f), INFINITY];
+        let values = [3.14159265359f32, 10000.0f32, f32::from_bits(0x0000000f), f32::INFINITY];
         let results = [1071833029u32, 1120403456u32, 456082799u32, 2139095040u32];
 
         for i in 0..values.len() {

From 0d810ed72617a27ed3d0eeddf85a1535cf00ea52 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Fri, 1 Nov 2024 06:40:17 -0500
Subject: [PATCH 031/279] Fix clippy lints in `crates/` and enable this on CI

---
 .github/workflows/main.yml          | 24 ++++++++++++++++++------
 crates/libm-macros/src/lib.rs       |  2 +-
 crates/libm-test/src/gen/random.rs  |  2 +-
 crates/libm-test/src/precision.rs   |  2 +-
 crates/libm-test/src/test_traits.rs |  4 ++--
 crates/musl-math-sys/build.rs       |  2 +-
 crates/musl-math-sys/src/lib.rs     |  7 +++++++
 7 files changed, 31 insertions(+), 12 deletions(-)

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index c79d637ec..866f0de9e 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -105,12 +105,24 @@ jobs:
         rustup target add x86_64-unknown-linux-musl
         cargo generate-lockfile && ./ci/run-docker.sh ${{ matrix.target }}
 
-    - name: Clippy
-      # Tests and utilities can't build on no_std targets
-      if: "!contains(matrix.target, 'thumb')"
-        # Run clippy on `libm`
-      run: cargo clippy --target "${{ matrix.target }}" --package libm --all-targets
-        
+  clippy:
+    name: Clippy
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@master
+    - name: Install Rust
+      run: |
+        rustup update nightly --no-self-update
+        rustup default nightly
+        rustup component add clippy
+    - uses: Swatinem/rust-cache@v2
+    - name: Download musl source
+      run: ./ci/download-musl.sh
+    - run: |
+        cargo clippy --all \
+          --exclude cb \
+          --features libm-test/build-musl,libm-test/test-multiprecision \
+          --all-targets
 
   builtins:
     name: Check use with compiler-builtins
diff --git a/crates/libm-macros/src/lib.rs b/crates/libm-macros/src/lib.rs
index dc78598ca..41d13035c 100644
--- a/crates/libm-macros/src/lib.rs
+++ b/crates/libm-macros/src/lib.rs
@@ -353,7 +353,7 @@ fn validate(input: &StructuredInput) -> syn::Result<Vec<&'static FunctionInfo>>
     if !input.skip.is_empty() && input.only.is_some() {
         let e = syn::Error::new(
             input.only_span.unwrap(),
-            format!("only one of `skip` or `only` may be specified"),
+            "only one of `skip` or `only` may be specified",
         );
         return Err(e);
     }
diff --git a/crates/libm-test/src/gen/random.rs b/crates/libm-test/src/gen/random.rs
index d03d1ff79..e347b3c63 100644
--- a/crates/libm-test/src/gen/random.rs
+++ b/crates/libm-test/src/gen/random.rs
@@ -37,7 +37,7 @@ static TEST_CASES: LazyLock<CachedInput> = LazyLock::new(|| make_test_cases(NTES
 /// value so tests don't run forever.
 static TEST_CASES_JN: LazyLock<CachedInput> = LazyLock::new(|| {
     // Start with regular test cases
-    let mut cases = (&*TEST_CASES).clone();
+    let mut cases = (*TEST_CASES).clone();
 
     // These functions are extremely slow, limit them
     let ntests_jn = (NTESTS / 1000).max(80);
diff --git a/crates/libm-test/src/precision.rs b/crates/libm-test/src/precision.rs
index 9ef0e818d..5b021e946 100644
--- a/crates/libm-test/src/precision.rs
+++ b/crates/libm-test/src/precision.rs
@@ -238,7 +238,7 @@ fn maybe_check_nan_bits<F: Float>(actual: F, expected: F, ctx: &CheckCtx) -> Opt
 
     // abs and copysign require signaling NaNs to be propagated, so verify bit equality.
     if actual.to_bits() == expected.to_bits() {
-        return SKIP;
+        SKIP
     } else {
         Some(Err(anyhow::anyhow!("NaNs have different bitpatterns")))
     }
diff --git a/crates/libm-test/src/test_traits.rs b/crates/libm-test/src/test_traits.rs
index 67df83fb4..e69e16d24 100644
--- a/crates/libm-test/src/test_traits.rs
+++ b/crates/libm-test/src/test_traits.rs
@@ -214,7 +214,7 @@ macro_rules! impl_int {
     };
 }
 
-fn validate_int<'a, I, Input>(actual: I, expected: I, input: Input, ctx: &CheckCtx) -> TestResult
+fn validate_int<I, Input>(actual: I, expected: I, input: Input, ctx: &CheckCtx) -> TestResult
 where
     I: Int + Hex,
     Input: Hex + fmt::Debug,
@@ -274,7 +274,7 @@ macro_rules! impl_float {
     };
 }
 
-fn validate_float<'a, F, Input>(actual: F, expected: F, input: Input, ctx: &CheckCtx) -> TestResult
+fn validate_float<F, Input>(actual: F, expected: F, input: Input, ctx: &CheckCtx) -> TestResult
 where
     F: Float + Hex,
     Input: Hex + fmt::Debug,
diff --git a/crates/musl-math-sys/build.rs b/crates/musl-math-sys/build.rs
index 03df06c79..03deb4ff0 100644
--- a/crates/musl-math-sys/build.rs
+++ b/crates/musl-math-sys/build.rs
@@ -124,7 +124,7 @@ fn build_musl_math(cfg: &Config) {
     // Run configuration steps. Usually done as part of the musl `Makefile`.
     let obj_include = cfg.out_dir.join("musl_obj/include");
     fs::create_dir_all(&obj_include).unwrap();
-    fs::create_dir_all(&obj_include.join("bits")).unwrap();
+    fs::create_dir_all(obj_include.join("bits")).unwrap();
     let sed_stat = Command::new("sed")
         .arg("-f")
         .arg(musl_dir.join("tools/mkalltypes.sed"))
diff --git a/crates/musl-math-sys/src/lib.rs b/crates/musl-math-sys/src/lib.rs
index fe3c89229..db352fab8 100644
--- a/crates/musl-math-sys/src/lib.rs
+++ b/crates/musl-math-sys/src/lib.rs
@@ -7,6 +7,7 @@ use std::ffi::{c_char, c_int, c_long};
 /// unsound.
 macro_rules! functions {
     ( $(
+        $( #[$meta:meta] )*
         $pfx_name:ident: $name:ident( $($arg:ident: $aty:ty),+ ) -> $rty:ty;
     )* ) => {
         extern "C" {
@@ -15,6 +16,7 @@ macro_rules! functions {
 
         $(
             // Expose a safe version
+            $( #[$meta] )*
             pub fn $name( $($arg: $aty),+ ) -> $rty {
                 // SAFETY: FFI calls with no preconditions
                 unsafe { $pfx_name( $($arg),+ ) }
@@ -231,8 +233,13 @@ functions! {
     musl_logf: logf(a: f32) -> f32;
     musl_modf: modf(a: f64, b: &mut f64) -> f64;
     musl_modff: modff(a: f32, b: &mut f32) -> f32;
+
+    // FIXME: these need to be unsafe
+    #[allow(clippy::not_unsafe_ptr_arg_deref)]
     musl_nan: nan(a: *const c_char) -> f64;
+    #[allow(clippy::not_unsafe_ptr_arg_deref)]
     musl_nanf: nanf(a: *const c_char) -> f32;
+
     musl_nearbyint: nearbyint(a: f64) -> f64;
     musl_nearbyintf: nearbyintf(a: f32) -> f32;
     musl_nextafter: nextafter(a: f64, b: f64) -> f64;

From 06809a28a5f6c633b7450d39441d5f7e4b40ab3e Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Thu, 31 Oct 2024 02:44:00 -0500
Subject: [PATCH 032/279] Adjust how the proc macro emits types and add an enum

Currently the macro always provides `CFn`, `RustFn`, `RustArgs`, etc.
Change this so that:

1. This information must be explicily requested in the invocation.
2. There is a new `FTy` field available that emits a single float type,
   rather than a tuple or signature.

Additionally, add two new macros that create enums representing function
names.
---
 crates/libm-macros/Cargo.toml     |   1 +
 crates/libm-macros/src/enums.rs   | 132 +++++++++++++++++++++
 crates/libm-macros/src/lib.rs     | 191 +++++++++++++++++++++++-------
 crates/libm-macros/src/parse.rs   |  24 +++-
 crates/libm-macros/tests/basic.rs |  50 ++++----
 crates/libm-macros/tests/enum.rs  |  19 +++
 6 files changed, 354 insertions(+), 63 deletions(-)
 create mode 100644 crates/libm-macros/src/enums.rs
 create mode 100644 crates/libm-macros/tests/enum.rs

diff --git a/crates/libm-macros/Cargo.toml b/crates/libm-macros/Cargo.toml
index 3da9d45a2..c9defb1c5 100644
--- a/crates/libm-macros/Cargo.toml
+++ b/crates/libm-macros/Cargo.toml
@@ -8,6 +8,7 @@ publish = false
 proc-macro = true
 
 [dependencies]
+heck = "0.5.0"
 proc-macro2 = "1.0.88"
 quote = "1.0.37"
 syn = { version = "2.0.79", features = ["full", "extra-traits", "visit-mut"] }
diff --git a/crates/libm-macros/src/enums.rs b/crates/libm-macros/src/enums.rs
new file mode 100644
index 000000000..d9017dff7
--- /dev/null
+++ b/crates/libm-macros/src/enums.rs
@@ -0,0 +1,132 @@
+use heck::ToUpperCamelCase;
+use proc_macro2 as pm2;
+use proc_macro2::{Ident, Span};
+use quote::quote;
+use syn::spanned::Spanned;
+use syn::{Fields, ItemEnum, Variant};
+
+use crate::{ALL_FUNCTIONS_FLAT, base_name};
+
+/// Implement `#[function_enum]`, see documentation in `lib.rs`.
+pub fn function_enum(
+    mut item: ItemEnum,
+    attributes: pm2::TokenStream,
+) -> syn::Result<pm2::TokenStream> {
+    expect_empty_enum(&item)?;
+    let attr_span = attributes.span();
+    let mut attr = attributes.into_iter();
+
+    // Attribute should be the identifier of the `BaseName` enum.
+    let Some(tt) = attr.next() else {
+        return Err(syn::Error::new(attr_span, "expected one attribute"));
+    };
+
+    let pm2::TokenTree::Ident(base_enum) = tt else {
+        return Err(syn::Error::new(tt.span(), "expected an identifier"));
+    };
+
+    if let Some(tt) = attr.next() {
+        return Err(syn::Error::new(tt.span(), "unexpected token after identifier"));
+    }
+
+    let enum_name = &item.ident;
+    let mut as_str_arms = Vec::new();
+    let mut base_arms = Vec::new();
+
+    for func in ALL_FUNCTIONS_FLAT.iter() {
+        let fn_name = func.name;
+        let ident = Ident::new(&fn_name.to_upper_camel_case(), Span::call_site());
+        let bname_ident = Ident::new(&base_name(fn_name).to_upper_camel_case(), Span::call_site());
+
+        // Match arm for `fn as_str(self)` matcher
+        as_str_arms.push(quote! { Self::#ident => #fn_name });
+
+        // Match arm for `fn base_name(self)` matcher
+        base_arms.push(quote! { Self::#ident => #base_enum::#bname_ident });
+
+        let variant =
+            Variant { attrs: Vec::new(), ident, fields: Fields::Unit, discriminant: None };
+
+        item.variants.push(variant);
+    }
+
+    let res = quote! {
+        // Instantiate the enum
+        #item
+
+        impl #enum_name {
+            /// The stringified version of this function name.
+            const fn as_str(self) -> &'static str {
+                match self {
+                    #( #as_str_arms , )*
+                }
+            }
+
+            /// The base name enum for this function.
+            const fn base_name(self) -> #base_enum {
+                match self {
+                    #( #base_arms, )*
+                }
+            }
+        }
+    };
+
+    Ok(res)
+}
+
+/// Implement `#[base_name_enum]`, see documentation in `lib.rs`.
+pub fn base_name_enum(
+    mut item: ItemEnum,
+    attributes: pm2::TokenStream,
+) -> syn::Result<pm2::TokenStream> {
+    expect_empty_enum(&item)?;
+    if !attributes.is_empty() {
+        let sp = attributes.span();
+        return Err(syn::Error::new(sp.span(), "no attributes expected"));
+    }
+
+    let mut base_names: Vec<_> =
+        ALL_FUNCTIONS_FLAT.iter().map(|func| base_name(func.name)).collect();
+    base_names.sort_unstable();
+    base_names.dedup();
+
+    let item_name = &item.ident;
+    let mut as_str_arms = Vec::new();
+
+    for base_name in base_names {
+        let ident = Ident::new(&base_name.to_upper_camel_case(), Span::call_site());
+
+        // Match arm for `fn as_str(self)` matcher
+        as_str_arms.push(quote! { Self::#ident => #base_name });
+
+        let variant =
+            Variant { attrs: Vec::new(), ident, fields: Fields::Unit, discriminant: None };
+
+        item.variants.push(variant);
+    }
+
+    let res = quote! {
+        // Instantiate the enum
+        #item
+
+        impl #item_name {
+            /// The stringified version of this base name.
+            const fn as_str(self) -> &'static str {
+                match self {
+                    #( #as_str_arms ),*
+                }
+            }
+        }
+    };
+
+    Ok(res)
+}
+
+/// Verify that an enum is empty, otherwise return an error
+fn expect_empty_enum(item: &ItemEnum) -> syn::Result<()> {
+    if !item.variants.is_empty() {
+        Err(syn::Error::new(item.variants.span(), "expected an empty enum"))
+    } else {
+        Ok(())
+    }
+}
diff --git a/crates/libm-macros/src/lib.rs b/crates/libm-macros/src/lib.rs
index 41d13035c..2db412e79 100644
--- a/crates/libm-macros/src/lib.rs
+++ b/crates/libm-macros/src/lib.rs
@@ -1,16 +1,20 @@
+mod enums;
 mod parse;
+
 use std::sync::LazyLock;
 
 use parse::{Invocation, StructuredInput};
 use proc_macro as pm;
 use proc_macro2::{self as pm2, Span};
 use quote::{ToTokens, quote};
-use syn::Ident;
+use syn::spanned::Spanned;
 use syn::visit_mut::VisitMut;
+use syn::{Ident, ItemEnum};
 
-const ALL_FUNCTIONS: &[(Signature, Option<Signature>, &[&str])] = &[
+const ALL_FUNCTIONS: &[(Ty, Signature, Option<Signature>, &[&str])] = &[
     (
         // `fn(f32) -> f32`
+        Ty::F32,
         Signature { args: &[Ty::F32], returns: &[Ty::F32] },
         None,
         &[
@@ -22,6 +26,7 @@ const ALL_FUNCTIONS: &[(Signature, Option<Signature>, &[&str])] = &[
     ),
     (
         // `(f64) -> f64`
+        Ty::F64,
         Signature { args: &[Ty::F64], returns: &[Ty::F64] },
         None,
         &[
@@ -33,6 +38,7 @@ const ALL_FUNCTIONS: &[(Signature, Option<Signature>, &[&str])] = &[
     ),
     (
         // `(f32, f32) -> f32`
+        Ty::F32,
         Signature { args: &[Ty::F32, Ty::F32], returns: &[Ty::F32] },
         None,
         &[
@@ -50,6 +56,7 @@ const ALL_FUNCTIONS: &[(Signature, Option<Signature>, &[&str])] = &[
     ),
     (
         // `(f64, f64) -> f64`
+        Ty::F64,
         Signature { args: &[Ty::F64, Ty::F64], returns: &[Ty::F64] },
         None,
         &[
@@ -67,102 +74,120 @@ const ALL_FUNCTIONS: &[(Signature, Option<Signature>, &[&str])] = &[
     ),
     (
         // `(f32, f32, f32) -> f32`
+        Ty::F32,
         Signature { args: &[Ty::F32, Ty::F32, Ty::F32], returns: &[Ty::F32] },
         None,
         &["fmaf"],
     ),
     (
         // `(f64, f64, f64) -> f64`
+        Ty::F64,
         Signature { args: &[Ty::F64, Ty::F64, Ty::F64], returns: &[Ty::F64] },
         None,
         &["fma"],
     ),
     (
         // `(f32) -> i32`
+        Ty::F32,
         Signature { args: &[Ty::F32], returns: &[Ty::I32] },
         None,
         &["ilogbf"],
     ),
     (
         // `(f64) -> i32`
+        Ty::F64,
         Signature { args: &[Ty::F64], returns: &[Ty::I32] },
         None,
         &["ilogb"],
     ),
     (
         // `(i32, f32) -> f32`
+        Ty::F32,
         Signature { args: &[Ty::I32, Ty::F32], returns: &[Ty::F32] },
         None,
         &["jnf"],
     ),
     (
         // `(i32, f64) -> f64`
+        Ty::F64,
         Signature { args: &[Ty::I32, Ty::F64], returns: &[Ty::F64] },
         None,
         &["jn"],
     ),
     (
         // `(f32, i32) -> f32`
+        Ty::F32,
         Signature { args: &[Ty::F32, Ty::I32], returns: &[Ty::F32] },
         None,
         &["scalbnf", "ldexpf"],
     ),
     (
         // `(f64, i64) -> f64`
+        Ty::F64,
         Signature { args: &[Ty::F64, Ty::I32], returns: &[Ty::F64] },
         None,
         &["scalbn", "ldexp"],
     ),
     (
         // `(f32, &mut f32) -> f32` as `(f32) -> (f32, f32)`
+        Ty::F32,
         Signature { args: &[Ty::F32], returns: &[Ty::F32, Ty::F32] },
         Some(Signature { args: &[Ty::F32, Ty::MutF32], returns: &[Ty::F32] }),
         &["modff"],
     ),
     (
         // `(f64, &mut f64) -> f64` as  `(f64) -> (f64, f64)`
+        Ty::F64,
         Signature { args: &[Ty::F64], returns: &[Ty::F64, Ty::F64] },
         Some(Signature { args: &[Ty::F64, Ty::MutF64], returns: &[Ty::F64] }),
         &["modf"],
     ),
     (
         // `(f32, &mut c_int) -> f32` as `(f32) -> (f32, i32)`
+        Ty::F32,
         Signature { args: &[Ty::F32], returns: &[Ty::F32, Ty::I32] },
         Some(Signature { args: &[Ty::F32, Ty::MutCInt], returns: &[Ty::F32] }),
         &["frexpf", "lgammaf_r"],
     ),
     (
         // `(f64, &mut c_int) -> f64` as `(f64) -> (f64, i32)`
+        Ty::F64,
         Signature { args: &[Ty::F64], returns: &[Ty::F64, Ty::I32] },
         Some(Signature { args: &[Ty::F64, Ty::MutCInt], returns: &[Ty::F64] }),
         &["frexp", "lgamma_r"],
     ),
     (
         // `(f32, f32, &mut c_int) -> f32` as `(f32, f32) -> (f32, i32)`
+        Ty::F32,
         Signature { args: &[Ty::F32, Ty::F32], returns: &[Ty::F32, Ty::I32] },
         Some(Signature { args: &[Ty::F32, Ty::F32, Ty::MutCInt], returns: &[Ty::F32] }),
         &["remquof"],
     ),
     (
         // `(f64, f64, &mut c_int) -> f64` as `(f64, f64) -> (f64, i32)`
+        Ty::F64,
         Signature { args: &[Ty::F64, Ty::F64], returns: &[Ty::F64, Ty::I32] },
         Some(Signature { args: &[Ty::F64, Ty::F64, Ty::MutCInt], returns: &[Ty::F64] }),
         &["remquo"],
     ),
     (
         // `(f32, &mut f32, &mut f32)` as `(f32) -> (f32, f32)`
+        Ty::F32,
         Signature { args: &[Ty::F32], returns: &[Ty::F32, Ty::F32] },
         Some(Signature { args: &[Ty::F32, Ty::MutF32, Ty::MutF32], returns: &[] }),
         &["sincosf"],
     ),
     (
         // `(f64, &mut f64, &mut f64)` as `(f64) -> (f64, f64)`
+        Ty::F64,
         Signature { args: &[Ty::F64], returns: &[Ty::F64, Ty::F64] },
         Some(Signature { args: &[Ty::F64, Ty::MutF64, Ty::MutF64], returns: &[] }),
         &["sincos"],
     ),
 ];
 
+const KNOWN_TYPES: &[&str] = &["FTy", "CFn", "CArgs", "CRet", "RustFn", "RustArgs", "RustRet"];
+
 /// A type used in a function signature.
 #[allow(dead_code)]
 #[derive(Debug, Clone, Copy)]
@@ -190,12 +215,12 @@ impl ToTokens for Ty {
             Ty::F128 => quote! { f128 },
             Ty::I32 => quote! { i32 },
             Ty::CInt => quote! { ::core::ffi::c_int },
-            Ty::MutF16 => quote! { &mut f16 },
-            Ty::MutF32 => quote! { &mut f32 },
-            Ty::MutF64 => quote! { &mut f64 },
-            Ty::MutF128 => quote! { &mut f128 },
-            Ty::MutI32 => quote! { &mut i32 },
-            Ty::MutCInt => quote! { &mut core::ffi::c_int },
+            Ty::MutF16 => quote! { &'a mut f16 },
+            Ty::MutF32 => quote! { &'a mut f32 },
+            Ty::MutF64 => quote! { &'a mut f64 },
+            Ty::MutF128 => quote! { &'a mut f128 },
+            Ty::MutI32 => quote! { &'a mut i32 },
+            Ty::MutCInt => quote! { &'a mut core::ffi::c_int },
         };
 
         tokens.extend(ts);
@@ -213,6 +238,7 @@ struct Signature {
 #[derive(Debug, Clone)]
 struct FunctionInfo {
     name: &'static str,
+    base_fty: Ty,
     /// Function signature for C implementations
     c_sig: Signature,
     /// Function signature for Rust implementations
@@ -223,10 +249,11 @@ struct FunctionInfo {
 static ALL_FUNCTIONS_FLAT: LazyLock<Vec<FunctionInfo>> = LazyLock::new(|| {
     let mut ret = Vec::new();
 
-    for (rust_sig, c_sig, names) in ALL_FUNCTIONS {
+    for (base_fty, rust_sig, c_sig, names) in ALL_FUNCTIONS {
         for name in *names {
             let api = FunctionInfo {
                 name,
+                base_fty: *base_fty,
                 rust_sig: rust_sig.clone(),
                 c_sig: c_sig.clone().unwrap_or_else(|| rust_sig.clone()),
             };
@@ -238,6 +265,37 @@ static ALL_FUNCTIONS_FLAT: LazyLock<Vec<FunctionInfo>> = LazyLock::new(|| {
     ret
 });
 
+/// Populate an enum with a variant representing function. Names are in upper camel case.
+///
+/// Applied to an empty enum. Expects one attribute `#[function_enum(BaseName)]` that provides
+/// the name of the `BaseName` enum.
+#[proc_macro_attribute]
+pub fn function_enum(attributes: pm::TokenStream, tokens: pm::TokenStream) -> pm::TokenStream {
+    let item = syn::parse_macro_input!(tokens as ItemEnum);
+    let res = enums::function_enum(item, attributes.into());
+
+    match res {
+        Ok(ts) => ts,
+        Err(e) => e.into_compile_error(),
+    }
+    .into()
+}
+
+/// Create an enum representing all possible base names, with names in upper camel case.
+///
+/// Applied to an empty enum.
+#[proc_macro_attribute]
+pub fn base_name_enum(attributes: pm::TokenStream, tokens: pm::TokenStream) -> pm::TokenStream {
+    let item = syn::parse_macro_input!(tokens as ItemEnum);
+    let res = enums::base_name_enum(item, attributes.into());
+
+    match res {
+        Ok(ts) => ts,
+        Err(e) => e.into_compile_error(),
+    }
+    .into()
+}
+
 /// Do something for each function present in this crate.
 ///
 /// Takes a callback macro and invokes it multiple times, once for each function that
@@ -258,6 +316,8 @@ static ALL_FUNCTIONS_FLAT: LazyLock<Vec<FunctionInfo>> = LazyLock::new(|| {
 ///     (
 ///         // Name of that function
 ///         fn_name: $fn_name:ident,
+///         // The basic float type for this function (e.g. `f32`, `f64`)
+///         FTy: $FTy:ty,
 ///         // Function signature of the C version (e.g. `fn(f32, &mut f32) -> f32`)
 ///         CFn: $CFn:ty,
 ///         // A tuple representing the C version's arguments (e.g. `(f32, &mut f32)`)
@@ -279,17 +339,16 @@ static ALL_FUNCTIONS_FLAT: LazyLock<Vec<FunctionInfo>> = LazyLock::new(|| {
 ///     ) => { };
 /// }
 ///
+/// // All fields except for `callback` are optional.
 /// libm_macros::for_each_function! {
 ///     // The macro to invoke as a callback
 ///     callback: callback_macro,
+///     // Which types to include either as a list (`[CFn, RustFn, RustArgs]`) or "all"
+///     emit_types: all,
 ///     // Functions to skip, i.e. `callback` shouldn't be called at all for these.
-///     //
-///     // This is an optional field.
 ///     skip: [sin, cos],
 ///     // Attributes passed as `attrs` for specific functions. For example, here the invocation
 ///     // with `sinf` and that with `cosf` will both get `meta1` and `meta2`, but no others will.
-///     //
-///     // This is an optional field.
 ///     attributes: [
 ///         #[meta1]
 ///         #[meta2]
@@ -297,8 +356,6 @@ static ALL_FUNCTIONS_FLAT: LazyLock<Vec<FunctionInfo>> = LazyLock::new(|| {
 ///     ],
 ///     // Any tokens that should be passed directly to all invocations of the callback. This can
 ///     // be used to pass local variables or other things the macro needs access to.
-///     //
-///     // This is an optional field.
 ///     extra: [foo],
 ///     // Similar to `extra`, but allow providing a pattern for only specific functions. Uses
 ///     // a simplified match-like syntax.
@@ -313,7 +370,7 @@ pub fn for_each_function(tokens: pm::TokenStream) -> pm::TokenStream {
     let input = syn::parse_macro_input!(tokens as Invocation);
 
     let res = StructuredInput::from_fields(input)
-        .and_then(|s_in| validate(&s_in).map(|fn_list| (s_in, fn_list)))
+        .and_then(|mut s_in| validate(&mut s_in).map(|fn_list| (s_in, fn_list)))
         .and_then(|(s_in, fn_list)| expand(s_in, &fn_list));
 
     match res {
@@ -325,7 +382,7 @@ pub fn for_each_function(tokens: pm::TokenStream) -> pm::TokenStream {
 /// Check for any input that is structurally correct but has other problems.
 ///
 /// Returns the list of function names that we should expand for.
-fn validate(input: &StructuredInput) -> syn::Result<Vec<&'static FunctionInfo>> {
+fn validate(input: &mut StructuredInput) -> syn::Result<Vec<&'static FunctionInfo>> {
     // Collect lists of all functions that are provied as macro inputs in various fields (only,
     // skip, attributes).
     let attr_mentions = input
@@ -376,6 +433,43 @@ fn validate(input: &StructuredInput) -> syn::Result<Vec<&'static FunctionInfo>>
         fn_list.push(func);
     }
 
+    // Types that the user would like us to provide in the macro
+    let mut add_all_types = false;
+    for ty in &input.emit_types {
+        let ty_name = ty.to_string();
+        if ty_name == "all" {
+            add_all_types = true;
+            continue;
+        }
+
+        // Check that all requested types are valid
+        if !KNOWN_TYPES.contains(&ty_name.as_str()) {
+            let e = syn::Error::new(
+                ty_name.span(),
+                format!("unrecognized type identifier `{ty_name}`"),
+            );
+            return Err(e);
+        }
+    }
+
+    if add_all_types {
+        // Ensure that if `all` was specified that nothing else was
+        if input.emit_types.len() > 1 {
+            let e = syn::Error::new(
+                input.emit_types_span.unwrap(),
+                "if `all` is specified, no other type identifiers may be given",
+            );
+            return Err(e);
+        }
+
+        // ...and then add all types
+        input.emit_types.clear();
+        for ty in KNOWN_TYPES {
+            let ident = Ident::new(ty, Span::call_site());
+            input.emit_types.push(ident);
+        }
+    }
+
     if let Some(map) = &input.fn_extra {
         if !map.keys().any(|key| key == "_") {
             // No default provided; make sure every expected function is covered
@@ -451,20 +545,31 @@ fn expand(input: StructuredInput, fn_list: &[&FunctionInfo]) -> syn::Result<pm2:
             None => pm2::TokenStream::new(),
         };
 
+        let base_fty = func.base_fty;
         let c_args = &func.c_sig.args;
         let c_ret = &func.c_sig.returns;
         let rust_args = &func.rust_sig.args;
         let rust_ret = &func.rust_sig.returns;
 
+        let mut ty_fields = Vec::new();
+        for ty in &input.emit_types {
+            let field = match ty.to_string().as_str() {
+                "FTy" => quote! { FTy: #base_fty, },
+                "CFn" => quote! { CFn: fn( #(#c_args),* ,) -> ( #(#c_ret),* ), },
+                "CArgs" => quote! { CArgs: ( #(#c_args),* ,), },
+                "CRet" => quote! { CRet: ( #(#c_ret),* ), },
+                "RustFn" => quote! { RustFn: fn( #(#rust_args),* ,) -> ( #(#rust_ret),* ), },
+                "RustArgs" => quote! { RustArgs: ( #(#rust_args),* ,), },
+                "RustRet" => quote! { RustRet: ( #(#rust_ret),* ), },
+                _ => unreachable!("checked in validation"),
+            };
+            ty_fields.push(field);
+        }
+
         let new = quote! {
             #callback! {
                 fn_name: #fn_name,
-                CFn: fn( #(#c_args),* ,) -> ( #(#c_ret),* ),
-                CArgs: ( #(#c_args),* ,),
-                CRet: ( #(#c_ret),* ),
-                RustFn: fn( #(#rust_args),* ,) -> ( #(#rust_ret),* ),
-                RustArgs: ( #(#rust_args),* ,),
-                RustRet: ( #(#rust_ret),* ),
+                #( #ty_fields )*
                 #meta_field
                 #extra_field
                 #fn_extra_field
@@ -488,24 +593,7 @@ struct MacroReplace {
 
 impl MacroReplace {
     fn new(name: &'static str) -> Self {
-        // Keep this in sync with `libm_test::canonical_name`
-        let known_mappings = &[
-            ("erff", "erf"),
-            ("erf", "erf"),
-            ("lgammaf_r", "lgamma_r"),
-            ("modff", "modf"),
-            ("modf", "modf"),
-        ];
-
-        let norm_name = match known_mappings.iter().find(|known| known.0 == name) {
-            Some(found) => found.1,
-            None => name
-                .strip_suffix("f")
-                .or_else(|| name.strip_suffix("f16"))
-                .or_else(|| name.strip_suffix("f128"))
-                .unwrap_or(name),
-        };
-
+        let norm_name = base_name(name);
         Self { fn_name: name, norm_name: norm_name.to_owned(), error: None }
     }
 
@@ -539,3 +627,24 @@ impl VisitMut for MacroReplace {
         syn::visit_mut::visit_ident_mut(self, i);
     }
 }
+
+/// Return the unsuffixed name of a function.
+fn base_name(name: &str) -> &str {
+    // Keep this in sync with `libm_test::base_name`
+    let known_mappings = &[
+        ("erff", "erf"),
+        ("erf", "erf"),
+        ("lgammaf_r", "lgamma_r"),
+        ("modff", "modf"),
+        ("modf", "modf"),
+    ];
+
+    match known_mappings.iter().find(|known| known.0 == name) {
+        Some(found) => found.1,
+        None => name
+            .strip_suffix("f")
+            .or_else(|| name.strip_suffix("f16"))
+            .or_else(|| name.strip_suffix("f128"))
+            .unwrap_or(name),
+    }
+}
diff --git a/crates/libm-macros/src/parse.rs b/crates/libm-macros/src/parse.rs
index ee9bd524b..369bbae2f 100644
--- a/crates/libm-macros/src/parse.rs
+++ b/crates/libm-macros/src/parse.rs
@@ -5,7 +5,7 @@ use quote::ToTokens;
 use syn::parse::{Parse, ParseStream, Parser};
 use syn::punctuated::Punctuated;
 use syn::spanned::Spanned;
-use syn::token::Comma;
+use syn::token::{self, Comma};
 use syn::{Arm, Attribute, Expr, ExprMatch, Ident, Meta, Token, bracketed};
 
 /// The input to our macro; just a list of `field: value` items.
@@ -39,6 +39,9 @@ impl Parse for Mapping {
 pub struct StructuredInput {
     /// Macro to invoke once per function
     pub callback: Ident,
+    /// Whether or not to provide `CFn` `CArgs` `RustFn` etc. This is really only needed
+    /// once for crate to set up the main trait.
+    pub emit_types: Vec<Ident>,
     /// Skip these functions
     pub skip: Vec<Ident>,
     /// Invoke only for these functions
@@ -50,6 +53,7 @@ pub struct StructuredInput {
     /// Per-function extra expressions to pass to the macro
     pub fn_extra: Option<BTreeMap<Ident, Expr>>,
     // For diagnostics
+    pub emit_types_span: Option<Span>,
     pub only_span: Option<Span>,
     pub fn_extra_span: Option<Span>,
 }
@@ -58,6 +62,7 @@ impl StructuredInput {
     pub fn from_fields(input: Invocation) -> syn::Result<Self> {
         let mut map: Vec<_> = input.fields.into_iter().collect();
         let cb_expr = expect_field(&mut map, "callback")?;
+        let emit_types_expr = expect_field(&mut map, "emit_types").ok();
         let skip_expr = expect_field(&mut map, "skip").ok();
         let only_expr = expect_field(&mut map, "only").ok();
         let attr_expr = expect_field(&mut map, "attributes").ok();
@@ -71,6 +76,12 @@ impl StructuredInput {
             ))?;
         }
 
+        let emit_types_span = emit_types_expr.as_ref().map(|expr| expr.span());
+        let emit_types = match emit_types_expr {
+            Some(expr) => Parser::parse2(parse_ident_or_array, expr.into_token_stream())?,
+            None => Vec::new(),
+        };
+
         let skip = match skip_expr {
             Some(expr) => Parser::parse2(parse_ident_array, expr.into_token_stream())?,
             None => Vec::new(),
@@ -103,6 +114,7 @@ impl StructuredInput {
 
         Ok(Self {
             callback: expect_ident(cb_expr)?,
+            emit_types,
             skip,
             only,
             only_span,
@@ -110,6 +122,7 @@ impl StructuredInput {
             extra,
             fn_extra,
             fn_extra_span,
+            emit_types_span,
         })
     }
 }
@@ -183,6 +196,15 @@ fn expect_ident(expr: Expr) -> syn::Result<Ident> {
     syn::parse2(expr.into_token_stream())
 }
 
+/// Parse either a single identifier (`foo`) or an array of identifiers (`[foo, bar, baz]`).
+fn parse_ident_or_array(input: ParseStream) -> syn::Result<Vec<Ident>> {
+    if !input.peek(token::Bracket) {
+        return Ok(vec![input.parse()?]);
+    }
+
+    parse_ident_array(input)
+}
+
 /// Parse an array of expressions.
 fn parse_expr_array(input: ParseStream) -> syn::Result<Vec<Expr>> {
     let content;
diff --git a/crates/libm-macros/tests/basic.rs b/crates/libm-macros/tests/basic.rs
index 8f8c09f1b..2eaba04f4 100644
--- a/crates/libm-macros/tests/basic.rs
+++ b/crates/libm-macros/tests/basic.rs
@@ -4,6 +4,7 @@
 macro_rules! basic {
     (
         fn_name: $fn_name:ident,
+        FTy: $FTy:ty,
         CFn: $CFn:ty,
         CArgs: $CArgs:ty,
         CRet: $CRet:ty,
@@ -17,9 +18,9 @@ macro_rules! basic {
         $(#[$meta])*
         mod $fn_name {
             #[allow(unused)]
-            type CFnTy = $CFn;
-            // type CArgsTy<'_> = $CArgs;
-            // type CRetTy<'_> = $CRet;
+            type FTy= $FTy;
+            #[allow(unused)]
+            type CFnTy<'a> = $CFn;
             #[allow(unused)]
             type RustFnTy = $RustFn;
             #[allow(unused)]
@@ -39,6 +40,7 @@ macro_rules! basic {
 mod test_basic {
     libm_macros::for_each_function! {
         callback: basic,
+        emit_types: all,
         skip: [sin, cos],
         attributes: [
             // just some random attributes
@@ -58,25 +60,8 @@ mod test_basic {
 macro_rules! basic_no_extra {
     (
         fn_name: $fn_name:ident,
-        CFn: $CFn:ty,
-        CArgs: $CArgs:ty,
-        CRet: $CRet:ty,
-        RustFn: $RustFn:ty,
-        RustArgs: $RustArgs:ty,
-        RustRet: $RustRet:ty,
     ) => {
-        mod $fn_name {
-            #[allow(unused)]
-            type CFnTy = $CFn;
-            // type CArgsTy<'_> = $CArgs;
-            // type CRetTy<'_> = $CRet;
-            #[allow(unused)]
-            type RustFnTy = $RustFn;
-            #[allow(unused)]
-            type RustArgsTy = $RustArgs;
-            #[allow(unused)]
-            type RustRetTy = $RustRet;
-        }
+        mod $fn_name {}
     };
 }
 
@@ -94,3 +79,26 @@ mod test_only {
         only: [sin, sinf],
     }
 }
+
+macro_rules! specified_types {
+    (
+        fn_name: $fn_name:ident,
+        RustFn: $RustFn:ty,
+        RustArgs: $RustArgs:ty,
+    ) => {
+        mod $fn_name {
+            #[allow(unused)]
+            type RustFnTy = $RustFn;
+            #[allow(unused)]
+            type RustArgsTy = $RustArgs;
+        }
+    };
+}
+
+mod test_emit_types {
+    // Test that we can specify a couple types to emit
+    libm_macros::for_each_function! {
+        callback: specified_types,
+        emit_types: [RustFn, RustArgs],
+    }
+}
diff --git a/crates/libm-macros/tests/enum.rs b/crates/libm-macros/tests/enum.rs
new file mode 100644
index 000000000..884b8d8d6
--- /dev/null
+++ b/crates/libm-macros/tests/enum.rs
@@ -0,0 +1,19 @@
+#[libm_macros::function_enum(BaseName)]
+#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
+pub enum Function {}
+
+#[libm_macros::base_name_enum]
+#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
+pub enum BaseName {}
+
+#[test]
+fn as_str() {
+    assert_eq!(Function::Sin.as_str(), "sin");
+    assert_eq!(Function::Sinf.as_str(), "sinf");
+}
+
+#[test]
+fn basename() {
+    assert_eq!(Function::Sin.base_name(), BaseName::Sin);
+    assert_eq!(Function::Sinf.base_name(), BaseName::Sin);
+}

From f2dcd2749a505b00e5a08fbc279b8711ac096188 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Thu, 31 Oct 2024 02:45:37 -0500
Subject: [PATCH 033/279] Introduce a `op` module with struct representations
 of each routine

This contains:

1. Per-function and per-operation enums created by the proc macro
2. The `MathOp` trait which is implemented once per struct representing
   a function
3. Submodules for each function, each containing a `Routine` struct that
   implements `MathOp`
---
 crates/libm-test/src/lib.rs         |   2 +
 crates/libm-test/src/op.rs          | 111 ++++++++++++++++++++++++++++
 crates/libm-test/src/test_traits.rs |   4 +-
 3 files changed, 115 insertions(+), 2 deletions(-)
 create mode 100644 crates/libm-test/src/op.rs

diff --git a/crates/libm-test/src/lib.rs b/crates/libm-test/src/lib.rs
index 56a872779..e64ad6264 100644
--- a/crates/libm-test/src/lib.rs
+++ b/crates/libm-test/src/lib.rs
@@ -1,10 +1,12 @@
 pub mod gen;
 #[cfg(feature = "test-multiprecision")]
 pub mod mpfloat;
+pub mod op;
 mod precision;
 mod test_traits;
 
 pub use libm::support::{Float, Int};
+pub use op::{BaseName, MathOp, Name};
 pub use precision::{MaybeOverride, SpecialCase, multiprec_allowed_ulp, musl_allowed_ulp};
 pub use test_traits::{CheckBasis, CheckCtx, CheckOutput, GenerateInput, Hex, TupleCall};
 
diff --git a/crates/libm-test/src/op.rs b/crates/libm-test/src/op.rs
new file mode 100644
index 000000000..fe0a08a28
--- /dev/null
+++ b/crates/libm-test/src/op.rs
@@ -0,0 +1,111 @@
+//! Types representing individual functions.
+//!
+//! Each routine gets a module with its name, e.g. `mod sinf { /* ... */ }`. The module
+//! contains a unit struct `Routine` which implements `MathOp`.
+//!
+//! Basically everything could be called a "function" here, so we loosely use the following
+//! terminology:
+//!
+//! - "Function": the math operation that does not have an associated precision. E.g. `f(x) = e^x`,
+//!   `f(x) = log(x)`.
+//! - "Routine": A code implementation of a math operation with a specific precision. E.g. `exp`,
+//!   `expf`, `expl`, `log`, `logf`.
+//! - "Operation" / "Op": Something that relates a routine to a function or is otherwise higher
+//!   level. `Op` is also used as the name for generic parameters since it is terse.
+
+use crate::{CheckOutput, Float, TupleCall};
+
+/// An enum representing each possible routine name.
+#[libm_macros::function_enum(BaseName)]
+#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
+pub enum Name {}
+
+/// The name without any type specifier, e.g. `sin` and `sinf` both become `sin`.
+#[libm_macros::base_name_enum]
+#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
+pub enum BaseName {}
+
+/// Attributes ascribed to a `libm` routine including signature, type information,
+/// and naming.
+pub trait MathOp {
+    /// The float type used for this operation.
+    type FTy: Float;
+
+    /// The function type representing the signature in a C library.
+    type CFn: Copy;
+
+    /// Arguments passed to the C library function as a tuple. These may include `&mut` return
+    /// values.
+    type CArgs<'a>
+    where
+        Self: 'a;
+
+    /// The type returned by C implementations.
+    type CRet;
+
+    /// The signature of the Rust function as a `fn(...) -> ...` type.
+    type RustFn: Copy;
+
+    /// Arguments passed to the Rust library function as a tuple.
+    ///
+    /// The required `TupleCall` bounds ensure this type can be passed either to the C function or
+    /// to the Rust function.
+    type RustArgs: Copy
+        + TupleCall<Self::RustFn, Output = Self::RustRet>
+        + TupleCall<Self::CFn, Output = Self::RustRet>;
+
+    /// Type returned from the Rust function.
+    type RustRet: CheckOutput<Self::RustArgs>;
+
+    /// The name of this function, including suffix (e.g. `sin`, `sinf`).
+    const NAME: Name;
+
+    /// The name as a string.
+    const NAME_STR: &'static str = Self::NAME.as_str();
+
+    /// The name of the function excluding the type suffix, e.g. `sin` and `sinf` are both `sin`.
+    const BASE_NAME: BaseName = Self::NAME.base_name();
+
+    /// The function in `libm` which can be called.
+    const ROUTINE: Self::RustFn;
+}
+
+macro_rules! do_thing {
+    // Matcher for unary functions
+    (
+        fn_name: $fn_name:ident,
+        FTy: $FTy:ty,
+        CFn: $CFn:ty,
+        CArgs: $CArgs:ty,
+        CRet: $CRet:ty,
+        RustFn: $RustFn:ty,
+        RustArgs: $RustArgs:ty,
+        RustRet: $RustRet:ty,
+    ) => {
+        paste::paste! {
+            pub mod $fn_name {
+                use super::*;
+                pub struct Routine;
+
+                impl MathOp for Routine {
+                    type FTy = $FTy;
+                    type CFn = for<'a> $CFn;
+                    type CArgs<'a> = $CArgs where Self: 'a;
+                    type CRet = $CRet;
+                    type RustFn = $RustFn;
+                    type RustArgs = $RustArgs;
+                    type RustRet = $RustRet;
+
+                    const NAME: Name = Name::[< $fn_name:camel >];
+                    const ROUTINE: Self::RustFn = libm::$fn_name;
+                }
+            }
+
+        }
+    };
+}
+
+libm_macros::for_each_function! {
+    callback: do_thing,
+    emit_types: all,
+}
diff --git a/crates/libm-test/src/test_traits.rs b/crates/libm-test/src/test_traits.rs
index e69e16d24..b9bec9a44 100644
--- a/crates/libm-test/src/test_traits.rs
+++ b/crates/libm-test/src/test_traits.rs
@@ -137,7 +137,7 @@ where
     }
 }
 
-impl<T1, T2, T3> TupleCall<fn(T1, &mut T2, &mut T3)> for (T1,)
+impl<T1, T2, T3> TupleCall<for<'a> fn(T1, &'a mut T2, &'a mut T3)> for (T1,)
 where
     T1: fmt::Debug,
     T2: fmt::Debug + Default,
@@ -145,7 +145,7 @@ where
 {
     type Output = (T2, T3);
 
-    fn call(self, f: fn(T1, &mut T2, &mut T3)) -> Self::Output {
+    fn call(self, f: for<'a> fn(T1, &'a mut T2, &'a mut T3)) -> Self::Output {
         let mut t2 = T2::default();
         let mut t3 = T3::default();
         f(self.0, &mut t2, &mut t3);

From 7c418306c90bf0180249b3f38761e416cfee86a9 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Thu, 31 Oct 2024 02:46:21 -0500
Subject: [PATCH 034/279] Rework tests to make use of the new `MathOp` trait

---
 crates/libm-test/benches/random.rs           | 129 ++++---
 crates/libm-test/src/mpfloat.rs              | 355 ++++++++-----------
 crates/libm-test/tests/check_coverage.rs     |   6 -
 crates/libm-test/tests/compare_built_musl.rs |  54 +--
 crates/libm-test/tests/multiprecision.rs     |  49 +--
 5 files changed, 278 insertions(+), 315 deletions(-)

diff --git a/crates/libm-test/benches/random.rs b/crates/libm-test/benches/random.rs
index 6c9047c3c..6f2305dd2 100644
--- a/crates/libm-test/benches/random.rs
+++ b/crates/libm-test/benches/random.rs
@@ -2,72 +2,103 @@ use std::hint::black_box;
 use std::time::Duration;
 
 use criterion::{Criterion, criterion_main};
-use libm_test::gen::random;
-use libm_test::{CheckBasis, CheckCtx, TupleCall};
+use libm_test::gen::{CachedInput, random};
+use libm_test::{CheckBasis, CheckCtx, GenerateInput, MathOp, TupleCall};
 
 /// Benchmark with this many items to get a variety
 const BENCH_ITER_ITEMS: usize = if cfg!(feature = "short-benchmarks") { 50 } else { 500 };
 
+/// Extra parameters we only care about if we are benchmarking against musl.
+#[allow(dead_code)]
+struct MuslExtra<F> {
+    musl_fn: Option<F>,
+    skip_on_i586: bool,
+}
+
 macro_rules! musl_rand_benches {
     (
         fn_name: $fn_name:ident,
-        CFn: $CFn:ty,
-        CArgs: $CArgs:ty,
-        CRet: $CRet:ty,
-        RustFn: $RustFn:ty,
-        RustArgs: $RustArgs:ty,
-        RustRet: $RustRet:ty,
         fn_extra: $skip_on_i586:expr,
     ) => {
         paste::paste! {
             fn [< musl_bench_ $fn_name >](c: &mut Criterion) {
-                let fn_name = stringify!($fn_name);
-
-                let ulp = libm_test::musl_allowed_ulp(fn_name);
-                let ctx = CheckCtx::new(ulp, fn_name, CheckBasis::Musl);
-                let benchvec: Vec<_> = random::get_test_cases::<$RustArgs>(&ctx)
-                    .take(BENCH_ITER_ITEMS)
-                    .collect();
+                type Op = libm_test::op::$fn_name::Routine;
 
-                // Perform a sanity check that we are benchmarking the same thing
-                // Don't test against musl if it is not available
                 #[cfg(feature = "build-musl")]
-                for input in benchvec.iter().copied() {
-                    use anyhow::Context;
-                    use libm_test::{CheckBasis, CheckCtx, CheckOutput};
+                let musl_extra = MuslExtra {
+                    musl_fn: Some(musl_math_sys::$fn_name as <Op as MathOp>::CFn),
+                    skip_on_i586: $skip_on_i586
+                };
+
+                #[cfg(not(feature = "build-musl"))]
+                let musl_extra = MuslExtra {
+                    musl_fn: None,
+                    skip_on_i586: $skip_on_i586
+                };
+
+                bench_one::<Op>(c, musl_extra);
+            }
+        }
+    };
+}
 
-                    if cfg!(x86_no_sse) && $skip_on_i586 {
-                        break;
-                    }
+fn bench_one<Op>(c: &mut Criterion, musl_extra: MuslExtra<Op::CFn>)
+where
+    Op: MathOp,
+    CachedInput: GenerateInput<Op::RustArgs>,
+{
+    let name = Op::NAME_STR;
+
+    let ulp = libm_test::musl_allowed_ulp(name);
+    let ctx = CheckCtx::new(ulp, name, CheckBasis::Musl);
+    let benchvec: Vec<_> =
+        random::get_test_cases::<Op::RustArgs>(&ctx).take(BENCH_ITER_ITEMS).collect();
+
+    // Perform a sanity check that we are benchmarking the same thing
+    // Don't test against musl if it is not available
+    #[cfg(feature = "build-musl")]
+    for input in benchvec.iter().copied() {
+        use anyhow::Context;
+        use libm_test::CheckOutput;
+
+        if cfg!(x86_no_sse) && musl_extra.skip_on_i586 {
+            break;
+        }
 
-                    let musl_res = input.call(musl_math_sys::$fn_name as $CFn);
-                    let crate_res = input.call(libm::$fn_name as $RustFn);
+        let musl_res = input.call(musl_extra.musl_fn.unwrap());
+        let crate_res = input.call(Op::ROUTINE);
 
-                    let ctx = CheckCtx::new(ulp, fn_name, CheckBasis::Musl);
-                    crate_res.validate(musl_res, input, &ctx).context(fn_name).unwrap();
-                }
+        crate_res.validate(musl_res, input, &ctx).context(name).unwrap();
+    }
 
-                /* Function pointers are black boxed to avoid inlining in the benchmark loop */
+    #[cfg(not(feature = "build-musl"))]
+    let _ = musl_extra; // silence unused warnings
 
-                let mut group = c.benchmark_group(fn_name);
-                group.bench_function("crate", |b| b.iter(|| {
-                    let f = black_box(libm::$fn_name as $RustFn);
-                    for input in benchvec.iter().copied() {
-                        input.call(f);
-                    }
-                }));
+    /* Option pointers are black boxed to avoid inlining in the benchmark loop */
 
-                // Don't test against musl if it is not available
-                #[cfg(feature = "build-musl")]
-                group.bench_function("musl", |b| b.iter(|| {
-                    let f = black_box(musl_math_sys::$fn_name as $CFn);
-                    for input in benchvec.iter().copied() {
-                        input.call(f);
-                    }
-                }));
+    let mut group = c.benchmark_group(name);
+    group.bench_function("crate", |b| {
+        b.iter(|| {
+            let f = black_box(Op::ROUTINE);
+            for input in benchvec.iter().copied() {
+                input.call(f);
             }
-        }
-    };
+        })
+    });
+
+    // Don't test against musl if it is not available
+    #[cfg(feature = "build-musl")]
+    {
+        let musl_fn = musl_extra.musl_fn.unwrap();
+        group.bench_function("musl", |b| {
+            b.iter(|| {
+                let f = black_box(musl_fn);
+                for input in benchvec.iter().copied() {
+                    input.call(f);
+                }
+            })
+        });
+    }
 }
 
 libm_macros::for_each_function! {
@@ -83,12 +114,6 @@ libm_macros::for_each_function! {
 macro_rules! run_callback {
     (
         fn_name: $fn_name:ident,
-        CFn: $_CFn:ty,
-        CArgs: $_CArgs:ty,
-        CRet: $_CRet:ty,
-        RustFn: $_RustFn:ty,
-        RustArgs: $_RustArgs:ty,
-        RustRet: $_RustRet:ty,
         extra: [$criterion:ident],
     ) => {
         paste::paste! {
diff --git a/crates/libm-test/src/mpfloat.rs b/crates/libm-test/src/mpfloat.rs
index 2e6fdae7f..507b077b3 100644
--- a/crates/libm-test/src/mpfloat.rs
+++ b/crates/libm-test/src/mpfloat.rs
@@ -11,7 +11,7 @@ pub use rug::Float as MpFloat;
 use rug::float::Round::Nearest;
 use rug::ops::{PowAssignRound, RemAssignRound};
 
-use crate::Float;
+use crate::{Float, MathOp};
 
 /// Create a multiple-precision float with the correct number of bits for a concrete float type.
 fn new_mpfloat<F: Float>() -> MpFloat {
@@ -29,23 +29,19 @@ where
 
 /// Structures that represent a float operation.
 ///
-/// The struct itself should hold any context that can be reused among calls to `run` (allocated
-/// `MpFloat`s).
-pub trait MpOp {
-    /// Inputs to the operation (concrete float types).
-    type Input;
-
-    /// Outputs from the operation (concrete float types).
-    type Output;
+pub trait MpOp: MathOp {
+    /// The struct itself should hold any context that can be reused among calls to `run` (allocated
+    /// `MpFloat`s).
+    type MpTy;
 
     /// Create a new instance.
-    fn new() -> Self;
+    fn new_mp() -> Self::MpTy;
 
     /// Perform the operation.
     ///
     /// Usually this means assigning inputs to cached floats, performing the operation, applying
     /// subnormal approximation, and converting the result back to concrete values.
-    fn run(&mut self, input: Self::Input) -> Self::Output;
+    fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet;
 }
 
 /// Implement `MpOp` for functions with a single return value.
@@ -53,32 +49,21 @@ macro_rules! impl_mp_op {
     // Matcher for unary functions
     (
         fn_name: $fn_name:ident,
-        CFn: $CFn:ty,
-        CArgs: $CArgs:ty,
-        CRet: $CRet:ty,
-        RustFn: fn($fty:ty,) -> $_ret:ty,
-        RustArgs: $RustArgs:ty,
-        RustRet: $RustRet:ty,
+        RustFn: fn($_fty:ty,) -> $_ret:ty,
         fn_extra: $fn_name_normalized:expr,
     ) => {
         paste::paste! {
-            pub mod $fn_name {
-                use super::*;
-                pub struct Operation(MpFloat);
-
-                impl MpOp for Operation {
-                    type Input = $RustArgs;
-                    type Output = $RustRet;
-
-                    fn new() -> Self {
-                        Self(new_mpfloat::<$fty>())
-                    }
-
-                    fn run(&mut self, input: Self::Input) -> Self::Output {
-                        self.0.assign(input.0);
-                        let ord = self.0.[< $fn_name_normalized _round >](Nearest);
-                        prep_retval::<Self::Output>(&mut self.0, ord)
-                    }
+            impl MpOp for crate::op::$fn_name::Routine {
+                type MpTy = MpFloat;
+
+                fn new_mp() -> Self::MpTy {
+                    new_mpfloat::<Self::FTy>()
+                }
+
+                fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet {
+                    this.assign(input.0);
+                    let ord = this.[< $fn_name_normalized _round >](Nearest);
+                    prep_retval::<Self::RustRet>(this, ord)
                 }
             }
         }
@@ -86,33 +71,22 @@ macro_rules! impl_mp_op {
     // Matcher for binary functions
     (
         fn_name: $fn_name:ident,
-        CFn: $CFn:ty,
-        CArgs: $CArgs:ty,
-        CRet: $CRet:ty,
-        RustFn: fn($fty:ty, $_fty2:ty,) -> $_ret:ty,
-        RustArgs: $RustArgs:ty,
-        RustRet: $RustRet:ty,
+        RustFn: fn($_fty:ty, $_fty2:ty,) -> $_ret:ty,
         fn_extra: $fn_name_normalized:expr,
     ) => {
         paste::paste! {
-            pub mod $fn_name {
-                use super::*;
-                pub struct Operation(MpFloat, MpFloat);
-
-                impl MpOp for Operation {
-                    type Input = $RustArgs;
-                    type Output = $RustRet;
-
-                    fn new() -> Self {
-                        Self(new_mpfloat::<$fty>(), new_mpfloat::<$fty>())
-                    }
-
-                    fn run(&mut self, input: Self::Input) -> Self::Output {
-                        self.0.assign(input.0);
-                        self.1.assign(input.1);
-                        let ord = self.0.[< $fn_name_normalized _round >](&self.1, Nearest);
-                        prep_retval::<Self::Output>(&mut self.0, ord)
-                    }
+            impl MpOp for crate::op::$fn_name::Routine {
+                type MpTy = (MpFloat, MpFloat);
+
+                fn new_mp() -> Self::MpTy {
+                    (new_mpfloat::<Self::FTy>(), new_mpfloat::<Self::FTy>())
+                }
+
+                fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet {
+                    this.0.assign(input.0);
+                    this.1.assign(input.1);
+                    let ord = this.0.[< $fn_name_normalized _round >](&this.1, Nearest);
+                    prep_retval::<Self::RustRet>(&mut this.0, ord)
                 }
             }
         }
@@ -120,34 +94,27 @@ macro_rules! impl_mp_op {
     // Matcher for ternary functions
     (
         fn_name: $fn_name:ident,
-        CFn: $CFn:ty,
-        CArgs: $CArgs:ty,
-        CRet: $CRet:ty,
-        RustFn: fn($fty:ty, $_fty2:ty, $_fty3:ty,) -> $_ret:ty,
-        RustArgs: $RustArgs:ty,
-        RustRet: $RustRet:ty,
+        RustFn: fn($_fty:ty, $_fty2:ty, $_fty3:ty,) -> $_ret:ty,
         fn_extra: $fn_name_normalized:expr,
     ) => {
         paste::paste! {
-            pub mod $fn_name {
-                use super::*;
-                pub struct Operation(MpFloat, MpFloat, MpFloat);
-
-                impl MpOp for Operation {
-                    type Input = $RustArgs;
-                    type Output = $RustRet;
-
-                    fn new() -> Self {
-                        Self(new_mpfloat::<$fty>(), new_mpfloat::<$fty>(), new_mpfloat::<$fty>())
-                    }
-
-                    fn run(&mut self, input: Self::Input) -> Self::Output {
-                        self.0.assign(input.0);
-                        self.1.assign(input.1);
-                        self.2.assign(input.2);
-                        let ord = self.0.[< $fn_name_normalized _round >](&self.1, &self.2, Nearest);
-                        prep_retval::<Self::Output>(&mut self.0, ord)
-                    }
+            impl MpOp for crate::op::$fn_name::Routine {
+                type MpTy = (MpFloat, MpFloat, MpFloat);
+
+                fn new_mp() -> Self::MpTy {
+                    (
+                        new_mpfloat::<Self::FTy>(),
+                        new_mpfloat::<Self::FTy>(),
+                        new_mpfloat::<Self::FTy>(),
+                    )
+                }
+
+                fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet {
+                    this.0.assign(input.0);
+                    this.1.assign(input.1);
+                    this.2.assign(input.2);
+                    let ord = this.0.[< $fn_name_normalized _round >](&this.1, &this.2, Nearest);
+                    prep_retval::<Self::RustRet>(&mut this.0, ord)
                 }
             }
         }
@@ -156,6 +123,7 @@ macro_rules! impl_mp_op {
 
 libm_macros::for_each_function! {
     callback: impl_mp_op,
+    emit_types: [RustFn],
     skip: [
         // Most of these need a manual implementation
         fabs, ceil, copysign, floor, rint, round, trunc,
@@ -186,29 +154,23 @@ macro_rules! impl_no_round {
     ($($fn_name:ident, $rug_name:ident;)*) => {
         paste::paste! {
             // Implement for both f32 and f64
-            $( impl_no_round!{ @inner_unary [< $fn_name f >], (f32,), $rug_name } )*
-            $( impl_no_round!{ @inner_unary $fn_name, (f64,), $rug_name } )*
+            $( impl_no_round!{ @inner_unary [< $fn_name f >], $rug_name } )*
+            $( impl_no_round!{ @inner_unary $fn_name, $rug_name } )*
         }
     };
 
-    (@inner_unary $fn_name:ident, ($fty:ty,), $rug_name:ident) => {
-        pub mod $fn_name {
-            use super::*;
-            pub struct Operation(MpFloat);
+    (@inner_unary $fn_name:ident, $rug_name:ident) => {
+        impl MpOp for crate::op::$fn_name::Routine {
+            type MpTy = MpFloat;
 
-            impl MpOp for Operation {
-                type Input = ($fty,);
-                type Output = $fty;
-
-                fn new() -> Self {
-                    Self(new_mpfloat::<$fty>())
-                }
+            fn new_mp() -> Self::MpTy {
+                new_mpfloat::<Self::FTy>()
+            }
 
-                fn run(&mut self, input: Self::Input) -> Self::Output {
-                    self.0.assign(input.0);
-                    self.0.$rug_name();
-                    prep_retval::<Self::Output>(&mut self.0, Ordering::Equal)
-                }
+            fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet {
+                this.assign(input.0);
+                this.$rug_name();
+                prep_retval::<Self::RustRet>(this, Ordering::Equal)
             }
         }
     };
@@ -227,132 +189,81 @@ impl_no_round! {
 macro_rules! impl_op_for_ty {
     ($fty:ty, $suffix:literal) => {
         paste::paste! {
-            pub mod [<copysign $suffix>] {
-                use super::*;
-                pub struct Operation(MpFloat, MpFloat);
-
-                impl MpOp for Operation {
-                    type Input = ($fty, $fty);
-                    type Output = $fty;
-
-                    fn new() -> Self {
-                        Self(new_mpfloat::<$fty>(), new_mpfloat::<$fty>())
-                    }
-
-                    fn run(&mut self, input: Self::Input) -> Self::Output {
-                        self.0.assign(input.0);
-                        self.1.assign(input.1);
-                        self.0.copysign_mut(&self.1);
-                        prep_retval::<Self::Output>(&mut self.0, Ordering::Equal)
-                    }
+            impl MpOp for crate::op::[<copysign $suffix>]::Routine {
+                type MpTy = (MpFloat, MpFloat);
+
+                fn new_mp() -> Self::MpTy {
+                    (new_mpfloat::<Self::FTy>(), new_mpfloat::<Self::FTy>())
                 }
-            }
 
-            pub mod [<pow $suffix>] {
-                use super::*;
-                pub struct Operation(MpFloat, MpFloat);
+                fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet {
+                    this.0.assign(input.0);
+                    this.1.assign(input.1);
+                    this.0.copysign_mut(&this.1);
+                    prep_retval::<Self::RustRet>(&mut this.0, Ordering::Equal)
+                }
+            }
 
-                impl MpOp for Operation {
-                    type Input = ($fty, $fty);
-                    type Output = $fty;
+            impl MpOp for crate::op::[<pow $suffix>]::Routine {
+                type MpTy = (MpFloat, MpFloat);
 
-                    fn new() -> Self {
-                        Self(new_mpfloat::<$fty>(), new_mpfloat::<$fty>())
-                    }
+                fn new_mp() -> Self::MpTy {
+                    (new_mpfloat::<Self::FTy>(), new_mpfloat::<Self::FTy>())
+                }
 
-                    fn run(&mut self, input: Self::Input) -> Self::Output {
-                        self.0.assign(input.0);
-                        self.1.assign(input.1);
-                        let ord = self.0.pow_assign_round(&self.1, Nearest);
-                        prep_retval::<Self::Output>(&mut self.0, ord)
-                    }
+                fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet {
+                    this.0.assign(input.0);
+                    this.1.assign(input.1);
+                    let ord = this.0.pow_assign_round(&this.1, Nearest);
+                    prep_retval::<Self::RustRet>(&mut this.0, ord)
                 }
             }
 
-            pub mod [<fmod $suffix>] {
-                use super::*;
-                pub struct Operation(MpFloat, MpFloat);
+            impl MpOp for crate::op::[<fmod $suffix>]::Routine {
+                type MpTy = (MpFloat, MpFloat);
 
-                impl MpOp for Operation {
-                    type Input = ($fty, $fty);
-                    type Output = $fty;
-
-                    fn new() -> Self {
-                        Self(new_mpfloat::<$fty>(), new_mpfloat::<$fty>())
-                    }
+                fn new_mp() -> Self::MpTy {
+                    (new_mpfloat::<Self::FTy>(), new_mpfloat::<Self::FTy>())
+                }
 
-                    fn run(&mut self, input: Self::Input) -> Self::Output {
-                        self.0.assign(input.0);
-                        self.1.assign(input.1);
-                        let ord = self.0.rem_assign_round(&self.1, Nearest);
-                        prep_retval::<Self::Output>(&mut self.0, ord)
-                    }
+                fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet {
+                    this.0.assign(input.0);
+                    this.1.assign(input.1);
+                    let ord = this.0.rem_assign_round(&this.1, Nearest);
+                    prep_retval::<Self::RustRet>(&mut this.0, ord)
                 }
             }
 
-            pub mod [<lgamma_r $suffix>] {
-                use super::*;
-                pub struct Operation(MpFloat);
-
-                impl MpOp for Operation {
-                    type Input = ($fty,);
-                    type Output = ($fty, i32);
+            impl MpOp for crate::op::[<jn $suffix>]::Routine {
+                type MpTy = (i32, MpFloat);
 
-                    fn new() -> Self {
-                        Self(new_mpfloat::<$fty>())
-                    }
+                fn new_mp() -> Self::MpTy {
+                    (0, new_mpfloat::<Self::FTy>())
+                }
 
-                    fn run(&mut self, input: Self::Input) -> Self::Output {
-                        self.0.assign(input.0);
-                        let (sign, ord) = self.0.ln_abs_gamma_round(Nearest);
-                        let ret = prep_retval::<$fty>(&mut self.0, ord);
-                        (ret, sign as i32)
-                    }
+                fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet {
+                    this.0 = input.0;
+                    this.1.assign(input.1);
+                    let ord = this.1.jn_round(this.0, Nearest);
+                    prep_retval::<Self::FTy>(&mut this.1, ord)
                 }
             }
 
-            pub mod [<jn $suffix>] {
-                use super::*;
-                pub struct Operation(i32, MpFloat);
-
-                impl MpOp for Operation {
-                    type Input = (i32, $fty);
-                    type Output = $fty;
+            impl MpOp for crate::op::[<sincos $suffix>]::Routine {
+                type MpTy = (MpFloat, MpFloat);
 
-                    fn new() -> Self {
-                        Self(0, new_mpfloat::<$fty>())
-                    }
-
-                    fn run(&mut self, input: Self::Input) -> Self::Output {
-                        self.0 = input.0;
-                        self.1.assign(input.1);
-                        let ord = self.1.jn_round(self.0, Nearest);
-                        prep_retval::<$fty>(&mut self.1, ord)
-                    }
+                fn new_mp() -> Self::MpTy {
+                    (new_mpfloat::<Self::FTy>(), new_mpfloat::<Self::FTy>())
                 }
-            }
 
-            pub mod [<sincos $suffix>] {
-                use super::*;
-                pub struct Operation(MpFloat, MpFloat);
-
-                impl MpOp for Operation {
-                    type Input = ($fty,);
-                    type Output = ($fty, $fty);
-
-                    fn new() -> Self {
-                        Self(new_mpfloat::<$fty>(), new_mpfloat::<$fty>())
-                    }
-
-                    fn run(&mut self, input: Self::Input) -> Self::Output {
-                        self.0.assign(input.0);
-                        self.1.assign(0.0);
-                        let (sord, cord) = self.0.sin_cos_round(&mut self.1, Nearest);
-                        (
-                            prep_retval::<$fty>(&mut self.0, sord),
-                            prep_retval::<$fty>(&mut self.1, cord)
-                        )
-                    }
+                fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet {
+                    this.0.assign(input.0);
+                    this.1.assign(0.0);
+                    let (sord, cord) = this.0.sin_cos_round(&mut this.1, Nearest);
+                    (
+                        prep_retval::<Self::FTy>(&mut this.0, sord),
+                        prep_retval::<Self::FTy>(&mut this.1, cord)
+                    )
                 }
             }
         }
@@ -362,7 +273,33 @@ macro_rules! impl_op_for_ty {
 impl_op_for_ty!(f32, "f");
 impl_op_for_ty!(f64, "");
 
-// Account for `lgamma_r` not having a simple `f` suffix
-pub mod lgammaf_r {
-    pub use super::lgamma_rf::*;
+// `lgamma_r` is not a simple suffix so we can't use the above macro.
+impl MpOp for crate::op::lgamma_r::Routine {
+    type MpTy = MpFloat;
+
+    fn new_mp() -> Self::MpTy {
+        new_mpfloat::<Self::FTy>()
+    }
+
+    fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet {
+        this.assign(input.0);
+        let (sign, ord) = this.ln_abs_gamma_round(Nearest);
+        let ret = prep_retval::<Self::FTy>(this, ord);
+        (ret, sign as i32)
+    }
+}
+
+impl MpOp for crate::op::lgammaf_r::Routine {
+    type MpTy = MpFloat;
+
+    fn new_mp() -> Self::MpTy {
+        new_mpfloat::<Self::FTy>()
+    }
+
+    fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet {
+        this.assign(input.0);
+        let (sign, ord) = this.ln_abs_gamma_round(Nearest);
+        let ret = prep_retval::<Self::FTy>(this, ord);
+        (ret, sign as i32)
+    }
 }
diff --git a/crates/libm-test/tests/check_coverage.rs b/crates/libm-test/tests/check_coverage.rs
index ef6d21fdb..b7988660e 100644
--- a/crates/libm-test/tests/check_coverage.rs
+++ b/crates/libm-test/tests/check_coverage.rs
@@ -22,12 +22,6 @@ const ALLOWED_SKIPS: &[&str] = &[
 macro_rules! callback {
     (
         fn_name: $name:ident,
-        CFn: $_CFn:ty,
-        CArgs: $_CArgs:ty,
-        CRet: $_CRet:ty,
-        RustFn: $_RustFn:ty,
-        RustArgs: $_RustArgs:ty,
-        RustRet: $_RustRet:ty,
         extra: [$push_to:ident],
     ) => {
         $push_to.push(stringify!($name));
diff --git a/crates/libm-test/tests/compare_built_musl.rs b/crates/libm-test/tests/compare_built_musl.rs
index 5a118f7c2..d4ba9e900 100644
--- a/crates/libm-test/tests/compare_built_musl.rs
+++ b/crates/libm-test/tests/compare_built_musl.rs
@@ -9,42 +9,46 @@
 // There are some targets we can't build musl for
 #![cfg(feature = "build-musl")]
 
-use libm_test::gen::random;
-use libm_test::{CheckBasis, CheckCtx, CheckOutput, TupleCall, musl_allowed_ulp};
-use musl_math_sys as musl;
+use libm_test::gen::{CachedInput, random};
+use libm_test::{
+    CheckBasis, CheckCtx, CheckOutput, GenerateInput, MathOp, TupleCall, musl_allowed_ulp,
+};
 
 macro_rules! musl_rand_tests {
     (
         fn_name: $fn_name:ident,
-        CFn: $CFn:ty,
-        CArgs: $CArgs:ty,
-        CRet: $CRet:ty,
-        RustFn: $RustFn:ty,
-        RustArgs: $RustArgs:ty,
-        RustRet: $RustRet:ty,
         attrs: [$($meta:meta)*]
-    ) => { paste::paste! {
-        #[test]
-        $(#[$meta])*
-        fn [< musl_random_ $fn_name >]() {
-            let fname = stringify!($fn_name);
-            let ulp = musl_allowed_ulp(fname);
-            let ctx = CheckCtx::new(ulp, fname, CheckBasis::Musl);
-            let cases = random::get_test_cases::<$RustArgs>(&ctx);
-
-            for input in cases {
-                let musl_res = input.call(musl::$fn_name as $CFn);
-                let crate_res = input.call(libm::$fn_name as $RustFn);
-
-                crate_res.validate(musl_res, input, &ctx).unwrap();
+    ) => {
+        paste::paste! {
+            #[test]
+            $(#[$meta])*
+            fn [< musl_random_ $fn_name >]() {
+                test_one::<libm_test::op::$fn_name::Routine>(musl_math_sys::$fn_name);
             }
         }
-    } };
+    };
+}
+
+fn test_one<Op>(musl_fn: Op::CFn)
+where
+    Op: MathOp,
+    CachedInput: GenerateInput<Op::RustArgs>,
+{
+    let name = Op::NAME_STR;
+    let ulp = musl_allowed_ulp(name);
+    let ctx = CheckCtx::new(ulp, name, CheckBasis::Musl);
+    let cases = random::get_test_cases::<Op::RustArgs>(&ctx);
+
+    for input in cases {
+        let musl_res = input.call(musl_fn);
+        let crate_res = input.call(Op::ROUTINE);
+
+        crate_res.validate(musl_res, input, &ctx).unwrap();
+    }
 }
 
 libm_macros::for_each_function! {
     callback: musl_rand_tests,
-    skip: [],
     attributes: [
         #[cfg_attr(x86_no_sse, ignore)] // FIXME(correctness): wrong result on i586
         [exp10, exp10f, exp2, exp2f, rint]
diff --git a/crates/libm-test/tests/multiprecision.rs b/crates/libm-test/tests/multiprecision.rs
index f8d94a160..676ee86a0 100644
--- a/crates/libm-test/tests/multiprecision.rs
+++ b/crates/libm-test/tests/multiprecision.rs
@@ -2,45 +2,48 @@
 
 #![cfg(feature = "test-multiprecision")]
 
-use libm_test::gen::random;
-use libm_test::mpfloat::{self, MpOp};
-use libm_test::{CheckBasis, CheckCtx, CheckOutput, TupleCall, multiprec_allowed_ulp};
+use libm_test::gen::{CachedInput, random};
+use libm_test::mpfloat::MpOp;
+use libm_test::{
+    CheckBasis, CheckCtx, CheckOutput, GenerateInput, MathOp, TupleCall, multiprec_allowed_ulp,
+};
 
 /// Implement a test against MPFR with random inputs.
 macro_rules! multiprec_rand_tests {
     (
         fn_name: $fn_name:ident,
-        CFn: $CFn:ty,
-        CArgs: $CArgs:ty,
-        CRet: $CRet:ty,
-        RustFn: $RustFn:ty,
-        RustArgs: $RustArgs:ty,
-        RustRet: $RustRet:ty,
         attrs: [$($meta:meta)*]
     ) => {
         paste::paste! {
             #[test]
             $(#[$meta])*
             fn [< multiprec_random_ $fn_name >]() {
-                type MpOpTy = mpfloat::$fn_name::Operation;
-
-                let fname = stringify!($fn_name);
-                let ulp = multiprec_allowed_ulp(fname);
-                let mut mp_vals = MpOpTy::new();
-                let ctx = CheckCtx::new(ulp, fname, CheckBasis::Mpfr);
-                let cases = random::get_test_cases::<$RustArgs>(&ctx);
-
-                for input in cases {
-                    let mp_res = mp_vals.run(input);
-                    let crate_res = input.call(libm::$fn_name as $RustFn);
-
-                    crate_res.validate(mp_res, input, &ctx).unwrap();
-                }
+                test_one::<libm_test::op::$fn_name::Routine>();
             }
         }
     };
 }
 
+fn test_one<Op>()
+where
+    Op: MathOp + MpOp,
+    CachedInput: GenerateInput<Op::RustArgs>,
+{
+    let name = Op::NAME_STR;
+
+    let ulp = multiprec_allowed_ulp(name);
+    let mut mp_vals = Op::new_mp();
+    let ctx = CheckCtx::new(ulp, name, CheckBasis::Mpfr);
+    let cases = random::get_test_cases::<Op::RustArgs>(&ctx);
+
+    for input in cases {
+        let mp_res = Op::run(&mut mp_vals, input);
+        let crate_res = input.call(Op::ROUTINE);
+
+        crate_res.validate(mp_res, input, &ctx).unwrap();
+    }
+}
+
 libm_macros::for_each_function! {
     callback: multiprec_rand_tests,
     attributes: [

From 2e9bd1d531f10395ea5d0418d15c75f7530e2033 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Sat, 2 Nov 2024 16:52:28 -0500
Subject: [PATCH 035/279] Correct the proc macro to emit `pub` functions

---
 crates/libm-macros/src/enums.rs | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/crates/libm-macros/src/enums.rs b/crates/libm-macros/src/enums.rs
index d9017dff7..1f9fca2ef 100644
--- a/crates/libm-macros/src/enums.rs
+++ b/crates/libm-macros/src/enums.rs
@@ -56,14 +56,14 @@ pub fn function_enum(
 
         impl #enum_name {
             /// The stringified version of this function name.
-            const fn as_str(self) -> &'static str {
+            pub const fn as_str(self) -> &'static str {
                 match self {
                     #( #as_str_arms , )*
                 }
             }
 
             /// The base name enum for this function.
-            const fn base_name(self) -> #base_enum {
+            pub const fn base_name(self) -> #base_enum {
                 match self {
                     #( #base_arms, )*
                 }
@@ -111,7 +111,7 @@ pub fn base_name_enum(
 
         impl #item_name {
             /// The stringified version of this base name.
-            const fn as_str(self) -> &'static str {
+            pub const fn as_str(self) -> &'static str {
                 match self {
                     #( #as_str_arms ),*
                 }

From 1e1830745d5907b8340ffc196c02e4e839b38adb Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Sat, 2 Nov 2024 22:35:30 -0500
Subject: [PATCH 036/279] Change the `CheckCtx` constructor to take a `Name`
 enum

This prepares to eliminate some reliance on string matching but does not
yet make those changes.
---
 crates/libm-macros/src/lib.rs                |  4 +--
 crates/libm-test/benches/random.rs           |  2 +-
 crates/libm-test/src/gen/random.rs           |  7 +++--
 crates/libm-test/src/lib.rs                  | 21 -------------
 crates/libm-test/src/precision.rs            | 32 ++++++++++----------
 crates/libm-test/src/test_traits.rs          | 20 ++++++++----
 crates/libm-test/tests/compare_built_musl.rs |  2 +-
 crates/libm-test/tests/multiprecision.rs     |  2 +-
 8 files changed, 40 insertions(+), 50 deletions(-)

diff --git a/crates/libm-macros/src/lib.rs b/crates/libm-macros/src/lib.rs
index 2db412e79..1e7cd08b9 100644
--- a/crates/libm-macros/src/lib.rs
+++ b/crates/libm-macros/src/lib.rs
@@ -628,9 +628,9 @@ impl VisitMut for MacroReplace {
     }
 }
 
-/// Return the unsuffixed name of a function.
+/// Return the unsuffixed version of a function name; e.g. `abs` and `absf` both return `abs`,
+/// `lgamma_r` and `lgammaf_r` both return `lgamma_r`.
 fn base_name(name: &str) -> &str {
-    // Keep this in sync with `libm_test::base_name`
     let known_mappings = &[
         ("erff", "erf"),
         ("erf", "erf"),
diff --git a/crates/libm-test/benches/random.rs b/crates/libm-test/benches/random.rs
index 6f2305dd2..72ace5d53 100644
--- a/crates/libm-test/benches/random.rs
+++ b/crates/libm-test/benches/random.rs
@@ -50,7 +50,7 @@ where
     let name = Op::NAME_STR;
 
     let ulp = libm_test::musl_allowed_ulp(name);
-    let ctx = CheckCtx::new(ulp, name, CheckBasis::Musl);
+    let ctx = CheckCtx::new(ulp, Op::NAME, CheckBasis::Musl);
     let benchvec: Vec<_> =
         random::get_test_cases::<Op::RustArgs>(&ctx).take(BENCH_ITER_ITEMS).collect();
 
diff --git a/crates/libm-test/src/gen/random.rs b/crates/libm-test/src/gen/random.rs
index e347b3c63..b72247a4e 100644
--- a/crates/libm-test/src/gen/random.rs
+++ b/crates/libm-test/src/gen/random.rs
@@ -110,7 +110,10 @@ pub fn get_test_cases<RustArgs>(ctx: &CheckCtx) -> impl Iterator<Item = RustArgs
 where
     CachedInput: GenerateInput<RustArgs>,
 {
-    let inputs =
-        if ctx.fn_name == "jn" || ctx.fn_name == "jnf" { &TEST_CASES_JN } else { &TEST_CASES };
+    let inputs = if ctx.fn_name_str == "jn" || ctx.fn_name_str == "jnf" {
+        &TEST_CASES_JN
+    } else {
+        &TEST_CASES
+    };
     inputs.get_cases()
 }
diff --git a/crates/libm-test/src/lib.rs b/crates/libm-test/src/lib.rs
index e64ad6264..af6ad6da5 100644
--- a/crates/libm-test/src/lib.rs
+++ b/crates/libm-test/src/lib.rs
@@ -17,27 +17,6 @@ pub type TestResult<T = (), E = anyhow::Error> = Result<T, E>;
 // List of all files present in libm's source
 include!(concat!(env!("OUT_DIR"), "/all_files.rs"));
 
-/// Return the unsuffixed version of a function name; e.g. `abs` and `absf` both return `abs`,
-/// `lgamma_r` and `lgammaf_r` both return `lgamma_r`.
-pub fn base_name(name: &str) -> &str {
-    let known_mappings = &[
-        ("erff", "erf"),
-        ("erf", "erf"),
-        ("lgammaf_r", "lgamma_r"),
-        ("modff", "modf"),
-        ("modf", "modf"),
-    ];
-
-    match known_mappings.iter().find(|known| known.0 == name) {
-        Some(found) => found.1,
-        None => name
-            .strip_suffix("f")
-            .or_else(|| name.strip_suffix("f16"))
-            .or_else(|| name.strip_suffix("f128"))
-            .unwrap_or(name),
-    }
-}
-
 /// True if `EMULATED` is set and nonempty. Used to determine how many iterations to run.
 pub const fn emulated() -> bool {
     match option_env!("EMULATED") {
diff --git a/crates/libm-test/src/precision.rs b/crates/libm-test/src/precision.rs
index 5b021e946..5b5743020 100644
--- a/crates/libm-test/src/precision.rs
+++ b/crates/libm-test/src/precision.rs
@@ -111,25 +111,25 @@ impl MaybeOverride<(f32,)> for SpecialCase {
         ctx: &CheckCtx,
     ) -> Option<TestResult> {
         if ctx.basis == CheckBasis::Musl {
-            if ctx.fn_name == "expm1f" && input.0 > 80.0 && actual.is_infinite() {
+            if ctx.fn_name_str == "expm1f" && input.0 > 80.0 && actual.is_infinite() {
                 // we return infinity but the number is representable
                 return XFAIL;
             }
 
-            if ctx.fn_name == "sinhf" && input.0.abs() > 80.0 && actual.is_nan() {
+            if ctx.fn_name_str == "sinhf" && input.0.abs() > 80.0 && actual.is_nan() {
                 // we return some NaN that should be real values or infinite
                 // doesn't seem to happen on x86
                 return XFAIL;
             }
         }
 
-        if ctx.fn_name == "acoshf" && input.0 < -1.0 {
+        if ctx.fn_name_str == "acoshf" && input.0 < -1.0 {
             // acoshf is undefined for x <= 1.0, but we return a random result at lower
             // values.
             return XFAIL;
         }
 
-        if ctx.fn_name == "lgammaf" || ctx.fn_name == "lgammaf_r" && input.0 < 0.0 {
+        if ctx.fn_name_str == "lgammaf" || ctx.fn_name_str == "lgammaf_r" && input.0 < 0.0 {
             // loggamma should not be defined for x < 0, yet we both return results
             return XFAIL;
         }
@@ -146,7 +146,7 @@ impl MaybeOverride<(f32,)> for SpecialCase {
         // On MPFR for lgammaf_r, we set -1 as the integer result for negative infinity but MPFR
         // sets +1
         if ctx.basis == CheckBasis::Mpfr
-            && ctx.fn_name == "lgammaf_r"
+            && ctx.fn_name_str == "lgammaf_r"
             && input.0 == f32::NEG_INFINITY
             && actual.abs() == expected.abs()
         {
@@ -166,13 +166,13 @@ impl MaybeOverride<(f64,)> for SpecialCase {
         ctx: &CheckCtx,
     ) -> Option<TestResult> {
         if ctx.basis == CheckBasis::Musl {
-            if cfg!(target_arch = "x86") && ctx.fn_name == "acosh" && input.0 < 1.0 {
+            if cfg!(target_arch = "x86") && ctx.fn_name_str == "acosh" && input.0 < 1.0 {
                 // The function is undefined, both implementations return random results
                 return SKIP;
             }
 
             if cfg!(x86_no_sse)
-                && ctx.fn_name == "ceil"
+                && ctx.fn_name_str == "ceil"
                 && input.0 < 0.0
                 && input.0 > -1.0
                 && expected == F::ZERO
@@ -183,13 +183,13 @@ impl MaybeOverride<(f64,)> for SpecialCase {
             }
         }
 
-        if ctx.fn_name == "acosh" && input.0 < 1.0 {
+        if ctx.fn_name_str == "acosh" && input.0 < 1.0 {
             // The function is undefined for the inputs, musl and our libm both return
             // random results.
             return XFAIL;
         }
 
-        if ctx.fn_name == "lgamma" || ctx.fn_name == "lgamma_r" && input.0 < 0.0 {
+        if ctx.fn_name_str == "lgamma" || ctx.fn_name_str == "lgamma_r" && input.0 < 0.0 {
             // loggamma should not be defined for x < 0, yet we both return results
             return XFAIL;
         }
@@ -206,7 +206,7 @@ impl MaybeOverride<(f64,)> for SpecialCase {
         // On MPFR for lgamma_r, we set -1 as the integer result for negative infinity but MPFR
         // sets +1
         if ctx.basis == CheckBasis::Mpfr
-            && ctx.fn_name == "lgamma_r"
+            && ctx.fn_name_str == "lgamma_r"
             && input.0 == f64::NEG_INFINITY
             && actual.abs() == expected.abs()
         {
@@ -219,7 +219,7 @@ impl MaybeOverride<(f64,)> for SpecialCase {
 
 /// Check NaN bits if the function requires it
 fn maybe_check_nan_bits<F: Float>(actual: F, expected: F, ctx: &CheckCtx) -> Option<TestResult> {
-    if !(ctx.base_name == "fabs" || ctx.base_name == "copysign") {
+    if !(ctx.base_name_str == "fabs" || ctx.base_name_str == "copysign") {
         return None;
     }
 
@@ -277,7 +277,7 @@ fn maybe_skip_binop_nan<F1: Float, F2: Float>(
 ) -> Option<TestResult> {
     match ctx.basis {
         CheckBasis::Musl => {
-            if (ctx.base_name == "fmax" || ctx.base_name == "fmin")
+            if (ctx.base_name_str == "fmax" || ctx.base_name_str == "fmin")
                 && (input.0.is_nan() || input.1.is_nan())
                 && expected.is_nan()
             {
@@ -287,7 +287,7 @@ fn maybe_skip_binop_nan<F1: Float, F2: Float>(
             }
         }
         CheckBasis::Mpfr => {
-            if ctx.base_name == "copysign" && input.1.is_nan() {
+            if ctx.base_name_str == "copysign" && input.1.is_nan() {
                 SKIP
             } else {
                 None
@@ -308,7 +308,7 @@ impl MaybeOverride<(i32, f32)> for SpecialCase {
             CheckBasis::Musl => bessel_prec_dropoff(input, ulp, ctx),
             CheckBasis::Mpfr => {
                 // We return +0.0, MPFR returns -0.0
-                if ctx.fn_name == "jnf"
+                if ctx.fn_name_str == "jnf"
                     && input.1 == f32::NEG_INFINITY
                     && actual == F::ZERO
                     && expected == F::ZERO
@@ -333,7 +333,7 @@ impl MaybeOverride<(i32, f64)> for SpecialCase {
             CheckBasis::Musl => bessel_prec_dropoff(input, ulp, ctx),
             CheckBasis::Mpfr => {
                 // We return +0.0, MPFR returns -0.0
-                if ctx.fn_name == "jn"
+                if ctx.fn_name_str == "jn"
                     && input.1 == f64::NEG_INFINITY
                     && actual == F::ZERO
                     && expected == F::ZERO
@@ -353,7 +353,7 @@ fn bessel_prec_dropoff<F: Float>(
     ulp: &mut u32,
     ctx: &CheckCtx,
 ) -> Option<TestResult> {
-    if ctx.base_name == "jn" {
+    if ctx.base_name_str == "jn" {
         if input.0 > 4000 {
             return XFAIL;
         } else if input.0 > 2000 {
diff --git a/crates/libm-test/src/test_traits.rs b/crates/libm-test/src/test_traits.rs
index b9bec9a44..65faefd33 100644
--- a/crates/libm-test/src/test_traits.rs
+++ b/crates/libm-test/src/test_traits.rs
@@ -11,25 +11,33 @@ use std::fmt;
 
 use anyhow::{Context, bail, ensure};
 
-use crate::{Float, Int, MaybeOverride, SpecialCase, TestResult};
+use crate::{BaseName, Float, Int, MaybeOverride, Name, SpecialCase, TestResult};
 
 /// Context passed to [`CheckOutput`].
 #[derive(Clone, Debug, PartialEq, Eq)]
 pub struct CheckCtx {
     /// Allowed ULP deviation
     pub ulp: u32,
+    pub fn_name: Name,
+    pub base_name: BaseName,
     /// Function name.
-    pub fn_name: &'static str,
+    pub fn_name_str: &'static str,
     /// Return the unsuffixed version of the function name.
-    pub base_name: &'static str,
+    pub base_name_str: &'static str,
     /// Source of truth for tests.
     pub basis: CheckBasis,
 }
 
 impl CheckCtx {
-    pub fn new(ulp: u32, fname: &'static str, basis: CheckBasis) -> Self {
-        let base_name = crate::base_name(fname);
-        Self { ulp, fn_name: fname, base_name, basis }
+    pub fn new(ulp: u32, fn_name: Name, basis: CheckBasis) -> Self {
+        Self {
+            ulp,
+            fn_name,
+            fn_name_str: fn_name.as_str(),
+            base_name: fn_name.base_name(),
+            base_name_str: fn_name.base_name().as_str(),
+            basis,
+        }
     }
 }
 
diff --git a/crates/libm-test/tests/compare_built_musl.rs b/crates/libm-test/tests/compare_built_musl.rs
index d4ba9e900..f4c827fc9 100644
--- a/crates/libm-test/tests/compare_built_musl.rs
+++ b/crates/libm-test/tests/compare_built_musl.rs
@@ -36,7 +36,7 @@ where
 {
     let name = Op::NAME_STR;
     let ulp = musl_allowed_ulp(name);
-    let ctx = CheckCtx::new(ulp, name, CheckBasis::Musl);
+    let ctx = CheckCtx::new(ulp, Op::NAME, CheckBasis::Musl);
     let cases = random::get_test_cases::<Op::RustArgs>(&ctx);
 
     for input in cases {
diff --git a/crates/libm-test/tests/multiprecision.rs b/crates/libm-test/tests/multiprecision.rs
index 676ee86a0..5f38d8462 100644
--- a/crates/libm-test/tests/multiprecision.rs
+++ b/crates/libm-test/tests/multiprecision.rs
@@ -33,7 +33,7 @@ where
 
     let ulp = multiprec_allowed_ulp(name);
     let mut mp_vals = Op::new_mp();
-    let ctx = CheckCtx::new(ulp, name, CheckBasis::Mpfr);
+    let ctx = CheckCtx::new(ulp, Op::NAME, CheckBasis::Mpfr);
     let cases = random::get_test_cases::<Op::RustArgs>(&ctx);
 
     for input in cases {

From 655e8122f8a68637524e4b2d8445969d189ed71d Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Sat, 2 Nov 2024 22:40:09 -0500
Subject: [PATCH 037/279] Rename `Name` to `Identifier` to avoid some ambiguity
 of "name"

---
 crates/libm-test/benches/random.rs           |  4 ++--
 crates/libm-test/src/gen/random.rs           |  7 ++----
 crates/libm-test/src/lib.rs                  |  2 +-
 crates/libm-test/src/op.rs                   | 12 +++++-----
 crates/libm-test/src/precision.rs            | 24 ++++++++++----------
 crates/libm-test/src/test_traits.rs          | 16 ++++++-------
 crates/libm-test/tests/compare_built_musl.rs |  4 ++--
 crates/libm-test/tests/multiprecision.rs     |  4 ++--
 8 files changed, 35 insertions(+), 38 deletions(-)

diff --git a/crates/libm-test/benches/random.rs b/crates/libm-test/benches/random.rs
index 72ace5d53..5eea43319 100644
--- a/crates/libm-test/benches/random.rs
+++ b/crates/libm-test/benches/random.rs
@@ -47,10 +47,10 @@ where
     Op: MathOp,
     CachedInput: GenerateInput<Op::RustArgs>,
 {
-    let name = Op::NAME_STR;
+    let name = Op::NAME;
 
     let ulp = libm_test::musl_allowed_ulp(name);
-    let ctx = CheckCtx::new(ulp, Op::NAME, CheckBasis::Musl);
+    let ctx = CheckCtx::new(ulp, Op::IDENTIFIER, CheckBasis::Musl);
     let benchvec: Vec<_> =
         random::get_test_cases::<Op::RustArgs>(&ctx).take(BENCH_ITER_ITEMS).collect();
 
diff --git a/crates/libm-test/src/gen/random.rs b/crates/libm-test/src/gen/random.rs
index b72247a4e..e347b3c63 100644
--- a/crates/libm-test/src/gen/random.rs
+++ b/crates/libm-test/src/gen/random.rs
@@ -110,10 +110,7 @@ pub fn get_test_cases<RustArgs>(ctx: &CheckCtx) -> impl Iterator<Item = RustArgs
 where
     CachedInput: GenerateInput<RustArgs>,
 {
-    let inputs = if ctx.fn_name_str == "jn" || ctx.fn_name_str == "jnf" {
-        &TEST_CASES_JN
-    } else {
-        &TEST_CASES
-    };
+    let inputs =
+        if ctx.fn_name == "jn" || ctx.fn_name == "jnf" { &TEST_CASES_JN } else { &TEST_CASES };
     inputs.get_cases()
 }
diff --git a/crates/libm-test/src/lib.rs b/crates/libm-test/src/lib.rs
index af6ad6da5..914e58244 100644
--- a/crates/libm-test/src/lib.rs
+++ b/crates/libm-test/src/lib.rs
@@ -6,7 +6,7 @@ mod precision;
 mod test_traits;
 
 pub use libm::support::{Float, Int};
-pub use op::{BaseName, MathOp, Name};
+pub use op::{BaseName, Identifier, MathOp};
 pub use precision::{MaybeOverride, SpecialCase, multiprec_allowed_ulp, musl_allowed_ulp};
 pub use test_traits::{CheckBasis, CheckCtx, CheckOutput, GenerateInput, Hex, TupleCall};
 
diff --git a/crates/libm-test/src/op.rs b/crates/libm-test/src/op.rs
index fe0a08a28..50b455d3a 100644
--- a/crates/libm-test/src/op.rs
+++ b/crates/libm-test/src/op.rs
@@ -15,10 +15,10 @@
 
 use crate::{CheckOutput, Float, TupleCall};
 
-/// An enum representing each possible routine name.
+/// An enum representing each possible symbol name (`sin`, `sinf`, `sinl`, etc).
 #[libm_macros::function_enum(BaseName)]
 #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
-pub enum Name {}
+pub enum Identifier {}
 
 /// The name without any type specifier, e.g. `sin` and `sinf` both become `sin`.
 #[libm_macros::base_name_enum]
@@ -58,13 +58,13 @@ pub trait MathOp {
     type RustRet: CheckOutput<Self::RustArgs>;
 
     /// The name of this function, including suffix (e.g. `sin`, `sinf`).
-    const NAME: Name;
+    const IDENTIFIER: Identifier;
 
     /// The name as a string.
-    const NAME_STR: &'static str = Self::NAME.as_str();
+    const NAME: &'static str = Self::IDENTIFIER.as_str();
 
     /// The name of the function excluding the type suffix, e.g. `sin` and `sinf` are both `sin`.
-    const BASE_NAME: BaseName = Self::NAME.base_name();
+    const BASE_NAME: BaseName = Self::IDENTIFIER.base_name();
 
     /// The function in `libm` which can be called.
     const ROUTINE: Self::RustFn;
@@ -96,7 +96,7 @@ macro_rules! do_thing {
                     type RustArgs = $RustArgs;
                     type RustRet = $RustRet;
 
-                    const NAME: Name = Name::[< $fn_name:camel >];
+                    const IDENTIFIER: Identifier = Identifier::[< $fn_name:camel >];
                     const ROUTINE: Self::RustFn = libm::$fn_name;
                 }
             }
diff --git a/crates/libm-test/src/precision.rs b/crates/libm-test/src/precision.rs
index 5b5743020..afe8c1fb7 100644
--- a/crates/libm-test/src/precision.rs
+++ b/crates/libm-test/src/precision.rs
@@ -111,25 +111,25 @@ impl MaybeOverride<(f32,)> for SpecialCase {
         ctx: &CheckCtx,
     ) -> Option<TestResult> {
         if ctx.basis == CheckBasis::Musl {
-            if ctx.fn_name_str == "expm1f" && input.0 > 80.0 && actual.is_infinite() {
+            if ctx.fn_name == "expm1f" && input.0 > 80.0 && actual.is_infinite() {
                 // we return infinity but the number is representable
                 return XFAIL;
             }
 
-            if ctx.fn_name_str == "sinhf" && input.0.abs() > 80.0 && actual.is_nan() {
+            if ctx.fn_name == "sinhf" && input.0.abs() > 80.0 && actual.is_nan() {
                 // we return some NaN that should be real values or infinite
                 // doesn't seem to happen on x86
                 return XFAIL;
             }
         }
 
-        if ctx.fn_name_str == "acoshf" && input.0 < -1.0 {
+        if ctx.fn_name == "acoshf" && input.0 < -1.0 {
             // acoshf is undefined for x <= 1.0, but we return a random result at lower
             // values.
             return XFAIL;
         }
 
-        if ctx.fn_name_str == "lgammaf" || ctx.fn_name_str == "lgammaf_r" && input.0 < 0.0 {
+        if ctx.fn_name == "lgammaf" || ctx.fn_name == "lgammaf_r" && input.0 < 0.0 {
             // loggamma should not be defined for x < 0, yet we both return results
             return XFAIL;
         }
@@ -146,7 +146,7 @@ impl MaybeOverride<(f32,)> for SpecialCase {
         // On MPFR for lgammaf_r, we set -1 as the integer result for negative infinity but MPFR
         // sets +1
         if ctx.basis == CheckBasis::Mpfr
-            && ctx.fn_name_str == "lgammaf_r"
+            && ctx.fn_name == "lgammaf_r"
             && input.0 == f32::NEG_INFINITY
             && actual.abs() == expected.abs()
         {
@@ -166,13 +166,13 @@ impl MaybeOverride<(f64,)> for SpecialCase {
         ctx: &CheckCtx,
     ) -> Option<TestResult> {
         if ctx.basis == CheckBasis::Musl {
-            if cfg!(target_arch = "x86") && ctx.fn_name_str == "acosh" && input.0 < 1.0 {
+            if cfg!(target_arch = "x86") && ctx.fn_name == "acosh" && input.0 < 1.0 {
                 // The function is undefined, both implementations return random results
                 return SKIP;
             }
 
             if cfg!(x86_no_sse)
-                && ctx.fn_name_str == "ceil"
+                && ctx.fn_name == "ceil"
                 && input.0 < 0.0
                 && input.0 > -1.0
                 && expected == F::ZERO
@@ -183,13 +183,13 @@ impl MaybeOverride<(f64,)> for SpecialCase {
             }
         }
 
-        if ctx.fn_name_str == "acosh" && input.0 < 1.0 {
+        if ctx.fn_name == "acosh" && input.0 < 1.0 {
             // The function is undefined for the inputs, musl and our libm both return
             // random results.
             return XFAIL;
         }
 
-        if ctx.fn_name_str == "lgamma" || ctx.fn_name_str == "lgamma_r" && input.0 < 0.0 {
+        if ctx.fn_name == "lgamma" || ctx.fn_name == "lgamma_r" && input.0 < 0.0 {
             // loggamma should not be defined for x < 0, yet we both return results
             return XFAIL;
         }
@@ -206,7 +206,7 @@ impl MaybeOverride<(f64,)> for SpecialCase {
         // On MPFR for lgamma_r, we set -1 as the integer result for negative infinity but MPFR
         // sets +1
         if ctx.basis == CheckBasis::Mpfr
-            && ctx.fn_name_str == "lgamma_r"
+            && ctx.fn_name == "lgamma_r"
             && input.0 == f64::NEG_INFINITY
             && actual.abs() == expected.abs()
         {
@@ -308,7 +308,7 @@ impl MaybeOverride<(i32, f32)> for SpecialCase {
             CheckBasis::Musl => bessel_prec_dropoff(input, ulp, ctx),
             CheckBasis::Mpfr => {
                 // We return +0.0, MPFR returns -0.0
-                if ctx.fn_name_str == "jnf"
+                if ctx.fn_name == "jnf"
                     && input.1 == f32::NEG_INFINITY
                     && actual == F::ZERO
                     && expected == F::ZERO
@@ -333,7 +333,7 @@ impl MaybeOverride<(i32, f64)> for SpecialCase {
             CheckBasis::Musl => bessel_prec_dropoff(input, ulp, ctx),
             CheckBasis::Mpfr => {
                 // We return +0.0, MPFR returns -0.0
-                if ctx.fn_name_str == "jn"
+                if ctx.fn_name == "jn"
                     && input.1 == f64::NEG_INFINITY
                     && actual == F::ZERO
                     && expected == F::ZERO
diff --git a/crates/libm-test/src/test_traits.rs b/crates/libm-test/src/test_traits.rs
index 65faefd33..180330058 100644
--- a/crates/libm-test/src/test_traits.rs
+++ b/crates/libm-test/src/test_traits.rs
@@ -11,17 +11,17 @@ use std::fmt;
 
 use anyhow::{Context, bail, ensure};
 
-use crate::{BaseName, Float, Int, MaybeOverride, Name, SpecialCase, TestResult};
+use crate::{BaseName, Float, Identifier, Int, MaybeOverride, SpecialCase, TestResult};
 
 /// Context passed to [`CheckOutput`].
 #[derive(Clone, Debug, PartialEq, Eq)]
 pub struct CheckCtx {
     /// Allowed ULP deviation
     pub ulp: u32,
-    pub fn_name: Name,
+    pub fn_ident: Identifier,
     pub base_name: BaseName,
     /// Function name.
-    pub fn_name_str: &'static str,
+    pub fn_name: &'static str,
     /// Return the unsuffixed version of the function name.
     pub base_name_str: &'static str,
     /// Source of truth for tests.
@@ -29,13 +29,13 @@ pub struct CheckCtx {
 }
 
 impl CheckCtx {
-    pub fn new(ulp: u32, fn_name: Name, basis: CheckBasis) -> Self {
+    pub fn new(ulp: u32, fn_ident: Identifier, basis: CheckBasis) -> Self {
         Self {
             ulp,
-            fn_name,
-            fn_name_str: fn_name.as_str(),
-            base_name: fn_name.base_name(),
-            base_name_str: fn_name.base_name().as_str(),
+            fn_ident,
+            fn_name: fn_ident.as_str(),
+            base_name: fn_ident.base_name(),
+            base_name_str: fn_ident.base_name().as_str(),
             basis,
         }
     }
diff --git a/crates/libm-test/tests/compare_built_musl.rs b/crates/libm-test/tests/compare_built_musl.rs
index f4c827fc9..c029a5d97 100644
--- a/crates/libm-test/tests/compare_built_musl.rs
+++ b/crates/libm-test/tests/compare_built_musl.rs
@@ -34,9 +34,9 @@ where
     Op: MathOp,
     CachedInput: GenerateInput<Op::RustArgs>,
 {
-    let name = Op::NAME_STR;
+    let name = Op::NAME;
     let ulp = musl_allowed_ulp(name);
-    let ctx = CheckCtx::new(ulp, Op::NAME, CheckBasis::Musl);
+    let ctx = CheckCtx::new(ulp, Op::IDENTIFIER, CheckBasis::Musl);
     let cases = random::get_test_cases::<Op::RustArgs>(&ctx);
 
     for input in cases {
diff --git a/crates/libm-test/tests/multiprecision.rs b/crates/libm-test/tests/multiprecision.rs
index 5f38d8462..00c6278f6 100644
--- a/crates/libm-test/tests/multiprecision.rs
+++ b/crates/libm-test/tests/multiprecision.rs
@@ -29,11 +29,11 @@ where
     Op: MathOp + MpOp,
     CachedInput: GenerateInput<Op::RustArgs>,
 {
-    let name = Op::NAME_STR;
+    let name = Op::NAME;
 
     let ulp = multiprec_allowed_ulp(name);
     let mut mp_vals = Op::new_mp();
-    let ctx = CheckCtx::new(ulp, Op::NAME, CheckBasis::Mpfr);
+    let ctx = CheckCtx::new(ulp, Op::IDENTIFIER, CheckBasis::Mpfr);
     let cases = random::get_test_cases::<Op::RustArgs>(&ctx);
 
     for input in cases {

From 2a52f3248b36a41c405c5ed613811d3253569c5d Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Sat, 2 Nov 2024 23:03:41 -0500
Subject: [PATCH 038/279] Change default ULP to use enum matching

Migrate from string to enum matching and tie this to `CheckCtx::new`, so
no tests need to explicitly set ULP.
---
 crates/libm-test/benches/random.rs           |  3 +-
 crates/libm-test/src/lib.rs                  |  2 +-
 crates/libm-test/src/precision.rs            | 73 +++++++++-----------
 crates/libm-test/src/test_traits.rs          | 11 +--
 crates/libm-test/tests/compare_built_musl.rs |  8 +--
 crates/libm-test/tests/multiprecision.rs     |  9 +--
 6 files changed, 47 insertions(+), 59 deletions(-)

diff --git a/crates/libm-test/benches/random.rs b/crates/libm-test/benches/random.rs
index 5eea43319..d77d57908 100644
--- a/crates/libm-test/benches/random.rs
+++ b/crates/libm-test/benches/random.rs
@@ -49,8 +49,7 @@ where
 {
     let name = Op::NAME;
 
-    let ulp = libm_test::musl_allowed_ulp(name);
-    let ctx = CheckCtx::new(ulp, Op::IDENTIFIER, CheckBasis::Musl);
+    let ctx = CheckCtx::new(Op::IDENTIFIER, CheckBasis::Musl);
     let benchvec: Vec<_> =
         random::get_test_cases::<Op::RustArgs>(&ctx).take(BENCH_ITER_ITEMS).collect();
 
diff --git a/crates/libm-test/src/lib.rs b/crates/libm-test/src/lib.rs
index 914e58244..7f0d9aa75 100644
--- a/crates/libm-test/src/lib.rs
+++ b/crates/libm-test/src/lib.rs
@@ -7,7 +7,7 @@ mod test_traits;
 
 pub use libm::support::{Float, Int};
 pub use op::{BaseName, Identifier, MathOp};
-pub use precision::{MaybeOverride, SpecialCase, multiprec_allowed_ulp, musl_allowed_ulp};
+pub use precision::{MaybeOverride, SpecialCase, default_ulp};
 pub use test_traits::{CheckBasis, CheckCtx, CheckOutput, GenerateInput, Hex, TupleCall};
 
 /// Result type for tests is usually from `anyhow`. Most times there is no success value to
diff --git a/crates/libm-test/src/precision.rs b/crates/libm-test/src/precision.rs
index afe8c1fb7..c761709b8 100644
--- a/crates/libm-test/src/precision.rs
+++ b/crates/libm-test/src/precision.rs
@@ -3,7 +3,10 @@
 
 use core::f32;
 
-use crate::{CheckBasis, CheckCtx, Float, Int, TestResult};
+use CheckBasis::{Mpfr, Musl};
+use Identifier as Id;
+
+use crate::{CheckBasis, CheckCtx, Float, Identifier, Int, TestResult};
 
 /// Type implementing [`IgnoreCase`].
 pub struct SpecialCase;
@@ -14,50 +17,42 @@ const MUSL_DEFAULT_ULP: u32 = 2;
 /// Default ULP allowed to differ from multiprecision (i.e. infinite) results.
 const MULTIPREC_DEFAULT_ULP: u32 = 1;
 
-/// ULP allowed to differ from muls results.
+/// ULP allowed to differ from the results returned by a test basis.
 ///
-/// Note that these results were obtained using 400,000,000 rounds of random inputs, which
+/// Note that these results were obtained using 400M rounds of random inputs, which
 /// is not a value used by default.
-pub fn musl_allowed_ulp(name: &str) -> u32 {
-    // Consider overrides xfail
-    match name {
-        #[cfg(x86_no_sse)]
-        "asinh" | "asinhf" => 6,
-        "lgamma" | "lgamma_r" | "lgammaf" | "lgammaf_r" => 400,
-        "tanh" | "tanhf" => 4,
-        "tgamma" => 20,
-        "j0" | "j0f" | "j1" | "j1f" => {
+pub fn default_ulp(ctx: &CheckCtx) -> u32 {
+    match (&ctx.basis, ctx.fn_ident) {
+        // Overrides that apply to either basis
+        (_, Id::J0 | Id::J0f | Id::J1 | Id::J1f) => {
             // Results seem very target-dependent
             if cfg!(target_arch = "x86_64") { 4000 } else { 800_000 }
         }
-        "jn" | "jnf" => 1000,
-        "sincosf" => 500,
-        #[cfg(not(target_pointer_width = "64"))]
-        "exp10" => 4,
-        #[cfg(not(target_pointer_width = "64"))]
-        "exp10f" => 4,
-        _ => MUSL_DEFAULT_ULP,
-    }
-}
+        (_, Id::Jn | Id::Jnf) => 1000,
 
-/// ULP allowed to differ from multiprecision results.
-pub fn multiprec_allowed_ulp(name: &str) -> u32 {
-    // Consider overrides xfail
-    match name {
-        "asinh" | "asinhf" => 2,
-        "acoshf" => 4,
-        "atanh" | "atanhf" => 2,
-        "exp10" | "exp10f" => 3,
-        "j0" | "j0f" | "j1" | "j1f" => {
-            // Results seem very target-dependent
-            if cfg!(target_arch = "x86_64") { 4000 } else { 800_000 }
-        }
-        "jn" | "jnf" => 1000,
-        "lgamma" | "lgammaf" | "lgamma_r" | "lgammaf_r" => 16,
-        "sinh" | "sinhf" => 2,
-        "tanh" | "tanhf" => 2,
-        "tgamma" => 20,
-        _ => MULTIPREC_DEFAULT_ULP,
+        // Overrides for musl
+        #[cfg(x86_no_sse)]
+        (Musl, Id::Asinh | Id::Asinhf) => 6,
+        #[cfg(not(target_pointer_width = "64"))]
+        (Musl, Id::Exp10 | Id::Exp10f) => 4,
+        (Musl, Id::Lgamma | Id::LgammaR | Id::Lgammaf | Id::LgammafR) => 400,
+        (Musl, Id::Sincosf) => 500,
+        (Musl, Id::Tanh | Id::Tanhf) => 4,
+        (Musl, Id::Tgamma) => 20,
+
+        // Overrides for MPFR
+        (Mpfr, Id::Acoshf) => 4,
+        (Mpfr, Id::Asinh | Id::Asinhf) => 2,
+        (Mpfr, Id::Atanh | Id::Atanhf) => 2,
+        (Mpfr, Id::Exp10 | Id::Exp10f) => 3,
+        (Mpfr, Id::Lgamma | Id::LgammaR | Id::Lgammaf | Id::LgammafR) => 16,
+        (Mpfr, Id::Sinh | Id::Sinhf) => 2,
+        (Mpfr, Id::Tanh | Id::Tanhf) => 2,
+        (Mpfr, Id::Tgamma) => 20,
+
+        // Defaults
+        (Musl, _) => MUSL_DEFAULT_ULP,
+        (Mpfr, _) => MULTIPREC_DEFAULT_ULP,
     }
 }
 
diff --git a/crates/libm-test/src/test_traits.rs b/crates/libm-test/src/test_traits.rs
index 180330058..ec14a8cf2 100644
--- a/crates/libm-test/src/test_traits.rs
+++ b/crates/libm-test/src/test_traits.rs
@@ -29,15 +29,18 @@ pub struct CheckCtx {
 }
 
 impl CheckCtx {
-    pub fn new(ulp: u32, fn_ident: Identifier, basis: CheckBasis) -> Self {
-        Self {
-            ulp,
+    /// Create a new check context, using the default ULP for the function.
+    pub fn new(fn_ident: Identifier, basis: CheckBasis) -> Self {
+        let mut ret = Self {
+            ulp: 0,
             fn_ident,
             fn_name: fn_ident.as_str(),
             base_name: fn_ident.base_name(),
             base_name_str: fn_ident.base_name().as_str(),
             basis,
-        }
+        };
+        ret.ulp = crate::default_ulp(&ret);
+        ret
     }
 }
 
diff --git a/crates/libm-test/tests/compare_built_musl.rs b/crates/libm-test/tests/compare_built_musl.rs
index c029a5d97..0022ee03c 100644
--- a/crates/libm-test/tests/compare_built_musl.rs
+++ b/crates/libm-test/tests/compare_built_musl.rs
@@ -10,9 +10,7 @@
 #![cfg(feature = "build-musl")]
 
 use libm_test::gen::{CachedInput, random};
-use libm_test::{
-    CheckBasis, CheckCtx, CheckOutput, GenerateInput, MathOp, TupleCall, musl_allowed_ulp,
-};
+use libm_test::{CheckBasis, CheckCtx, CheckOutput, GenerateInput, MathOp, TupleCall};
 
 macro_rules! musl_rand_tests {
     (
@@ -34,9 +32,7 @@ where
     Op: MathOp,
     CachedInput: GenerateInput<Op::RustArgs>,
 {
-    let name = Op::NAME;
-    let ulp = musl_allowed_ulp(name);
-    let ctx = CheckCtx::new(ulp, Op::IDENTIFIER, CheckBasis::Musl);
+    let ctx = CheckCtx::new(Op::IDENTIFIER, CheckBasis::Musl);
     let cases = random::get_test_cases::<Op::RustArgs>(&ctx);
 
     for input in cases {
diff --git a/crates/libm-test/tests/multiprecision.rs b/crates/libm-test/tests/multiprecision.rs
index 00c6278f6..47a85bdb3 100644
--- a/crates/libm-test/tests/multiprecision.rs
+++ b/crates/libm-test/tests/multiprecision.rs
@@ -4,9 +4,7 @@
 
 use libm_test::gen::{CachedInput, random};
 use libm_test::mpfloat::MpOp;
-use libm_test::{
-    CheckBasis, CheckCtx, CheckOutput, GenerateInput, MathOp, TupleCall, multiprec_allowed_ulp,
-};
+use libm_test::{CheckBasis, CheckCtx, CheckOutput, GenerateInput, MathOp, TupleCall};
 
 /// Implement a test against MPFR with random inputs.
 macro_rules! multiprec_rand_tests {
@@ -29,11 +27,8 @@ where
     Op: MathOp + MpOp,
     CachedInput: GenerateInput<Op::RustArgs>,
 {
-    let name = Op::NAME;
-
-    let ulp = multiprec_allowed_ulp(name);
     let mut mp_vals = Op::new_mp();
-    let ctx = CheckCtx::new(ulp, Op::IDENTIFIER, CheckBasis::Mpfr);
+    let ctx = CheckCtx::new(Op::IDENTIFIER, CheckBasis::Mpfr);
     let cases = random::get_test_cases::<Op::RustArgs>(&ctx);
 
     for input in cases {

From 523c8ca3ad4e9ad3da3974fef40fe6c1a8f78ff7 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Sun, 3 Nov 2024 20:24:26 -0600
Subject: [PATCH 039/279] Change the `multiprec_` prefix to `mp_`

Currently there is a combination of names starting with
`multiprecision_`, `mp_` and `multiprec_`. Update so `multiprecision_`
is always used when a long form makes sense, `mp_` otherwise
(eliminating `multiprec_`).
---
 crates/libm-test/src/precision.rs        | 4 ++--
 crates/libm-test/tests/multiprecision.rs | 6 +++---
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/crates/libm-test/src/precision.rs b/crates/libm-test/src/precision.rs
index c761709b8..cf9115430 100644
--- a/crates/libm-test/src/precision.rs
+++ b/crates/libm-test/src/precision.rs
@@ -15,7 +15,7 @@ pub struct SpecialCase;
 const MUSL_DEFAULT_ULP: u32 = 2;
 
 /// Default ULP allowed to differ from multiprecision (i.e. infinite) results.
-const MULTIPREC_DEFAULT_ULP: u32 = 1;
+const MP_DEFAULT_ULP: u32 = 1;
 
 /// ULP allowed to differ from the results returned by a test basis.
 ///
@@ -52,7 +52,7 @@ pub fn default_ulp(ctx: &CheckCtx) -> u32 {
 
         // Defaults
         (Musl, _) => MUSL_DEFAULT_ULP,
-        (Mpfr, _) => MULTIPREC_DEFAULT_ULP,
+        (Mpfr, _) => MP_DEFAULT_ULP,
     }
 }
 
diff --git a/crates/libm-test/tests/multiprecision.rs b/crates/libm-test/tests/multiprecision.rs
index 47a85bdb3..0b41fba82 100644
--- a/crates/libm-test/tests/multiprecision.rs
+++ b/crates/libm-test/tests/multiprecision.rs
@@ -7,7 +7,7 @@ use libm_test::mpfloat::MpOp;
 use libm_test::{CheckBasis, CheckCtx, CheckOutput, GenerateInput, MathOp, TupleCall};
 
 /// Implement a test against MPFR with random inputs.
-macro_rules! multiprec_rand_tests {
+macro_rules! mp_rand_tests {
     (
         fn_name: $fn_name:ident,
         attrs: [$($meta:meta)*]
@@ -15,7 +15,7 @@ macro_rules! multiprec_rand_tests {
         paste::paste! {
             #[test]
             $(#[$meta])*
-            fn [< multiprec_random_ $fn_name >]() {
+            fn [< mp_random_ $fn_name >]() {
                 test_one::<libm_test::op::$fn_name::Routine>();
             }
         }
@@ -40,7 +40,7 @@ where
 }
 
 libm_macros::for_each_function! {
-    callback: multiprec_rand_tests,
+    callback: mp_rand_tests,
     attributes: [
         // Also an assertion failure on i686: at `MPFR_ASSERTN (! mpfr_erangeflag_p ())`
         #[ignore = "large values are infeasible in MPFR"]

From 2809ddf92db2e62920478e9a0f843d5250b42cae Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Sun, 3 Nov 2024 20:52:28 -0600
Subject: [PATCH 040/279] Move some numeric trait logic to default
 implementations

There are a handful of functions we can move out of the macro and to the
numeric traits as default implementations; do that here.

Additionally, add some bounds that make sense for completeness.
---
 crates/libm-test/src/test_traits.rs |   2 +-
 src/math/support/float_traits.rs    | 106 ++++++++++++++++------------
 src/math/support/int_traits.rs      |   7 +-
 3 files changed, 67 insertions(+), 48 deletions(-)

diff --git a/crates/libm-test/src/test_traits.rs b/crates/libm-test/src/test_traits.rs
index ec14a8cf2..b8e0aa108 100644
--- a/crates/libm-test/src/test_traits.rs
+++ b/crates/libm-test/src/test_traits.rs
@@ -314,7 +314,7 @@ where
         // Make sure that the signs are the same before checing ULP to avoid wraparound
         let act_sig = actual.signum();
         let exp_sig = expected.signum();
-        ensure!(act_sig == exp_sig, "mismatched signs {act_sig} {exp_sig}");
+        ensure!(act_sig == exp_sig, "mismatched signs {act_sig:?} {exp_sig:?}");
 
         if actual.is_infinite() ^ expected.is_infinite() {
             bail!("mismatched infinities");
diff --git a/src/math/support/float_traits.rs b/src/math/support/float_traits.rs
index a1d84faf2..0047ba368 100644
--- a/src/math/support/float_traits.rs
+++ b/src/math/support/float_traits.rs
@@ -1,4 +1,4 @@
-use core::{fmt, ops};
+use core::{fmt, mem, ops};
 
 use super::int_traits::{Int, MinInt};
 
@@ -7,15 +7,17 @@ use super::int_traits::{Int, MinInt};
 pub trait Float:
     Copy
     + fmt::Debug
-    + fmt::Display
     + PartialEq
     + PartialOrd
     + ops::AddAssign
     + ops::MulAssign
     + ops::Add<Output = Self>
     + ops::Sub<Output = Self>
+    + ops::Mul<Output = Self>
     + ops::Div<Output = Self>
     + ops::Rem<Output = Self>
+    + ops::Neg<Output = Self>
+    + 'static
 {
     /// A uint of the same width as the float
     type Int: Int<OtherSign = Self::SignedInt, Unsigned = Self::Int>;
@@ -27,11 +29,16 @@ pub trait Float:
     type ExpInt: Int;
 
     const ZERO: Self;
+    const NEG_ZERO: Self;
     const ONE: Self;
     const NEG_ONE: Self;
     const INFINITY: Self;
     const NEG_INFINITY: Self;
     const NAN: Self;
+    const MAX: Self;
+    const MIN: Self;
+    const PI: Self;
+    const FRAC_PI_2: Self;
 
     /// The bitwidth of the float type
     const BITS: u32;
@@ -69,7 +76,19 @@ pub trait Float:
     /// Checks if two floats have the same bit representation. *Except* for NaNs! NaN can be
     /// represented in multiple different ways. This method returns `true` if two NaNs are
     /// compared.
-    fn eq_repr(self, rhs: Self) -> bool;
+    fn eq_repr(self, rhs: Self) -> bool {
+        let is_nan = |x: Self| -> bool {
+            // }
+            // fn is_nan(x: Self) -> bool {
+            // When using mangled-names, the "real" compiler-builtins might not have the
+            // necessary builtin (__unordtf2) to test whether `f128` is NaN.
+            // FIXME(f16_f128): Remove once the nightly toolchain has the __unordtf2 builtin
+            // x is NaN if all the bits of the exponent are set and the significand is non-0
+            x.to_bits() & Self::EXP_MASK == Self::EXP_MASK
+                && x.to_bits() & Self::SIG_MASK != Self::Int::ZERO
+        };
+        if is_nan(self) && is_nan(rhs) { true } else { self.to_bits() == rhs.to_bits() }
+    }
 
     /// Returns true if the value is NaN.
     fn is_nan(self) -> bool;
@@ -81,22 +100,35 @@ pub trait Float:
     fn is_sign_negative(self) -> bool;
 
     /// Returns if `self` is subnormal
-    fn is_subnormal(self) -> bool;
+    fn is_subnormal(self) -> bool {
+        (self.to_bits() & Self::EXP_MASK) == Self::Int::ZERO
+    }
 
     /// Returns the exponent, not adjusting for bias.
     fn exp(self) -> Self::ExpInt;
 
     /// Returns the significand with no implicit bit (or the "fractional" part)
-    fn frac(self) -> Self::Int;
+    fn frac(self) -> Self::Int {
+        self.to_bits() & Self::SIG_MASK
+    }
 
     /// Returns the significand with implicit bit
-    fn imp_frac(self) -> Self::Int;
+    fn imp_frac(self) -> Self::Int {
+        self.frac() | Self::IMPLICIT_BIT
+    }
 
     /// Returns a `Self::Int` transmuted back to `Self`
     fn from_bits(a: Self::Int) -> Self;
 
     /// Constructs a `Self` from its parts. Inputs are treated as bits and shifted into position.
-    fn from_parts(negative: bool, exponent: Self::Int, significand: Self::Int) -> Self;
+    fn from_parts(negative: bool, exponent: Self::Int, significand: Self::Int) -> Self {
+        let sign = if negative { Self::Int::ONE } else { Self::Int::ZERO };
+        Self::from_bits(
+            (sign << (Self::BITS - 1))
+                | ((exponent << Self::SIG_BITS) & Self::EXP_MASK)
+                | (significand & Self::SIG_MASK),
+        )
+    }
 
     fn abs(self) -> Self {
         let abs_mask = !Self::SIGN_MASK;
@@ -107,10 +139,18 @@ pub trait Float:
     fn normalize(significand: Self::Int) -> (i32, Self::Int);
 
     /// Returns a number composed of the magnitude of self and the sign of sign.
-    fn copysign(self, other: Self) -> Self;
+    fn copysign(self, other: Self) -> Self {
+        let mut x = self.to_bits();
+        let y = other.to_bits();
+        x &= !Self::SIGN_MASK;
+        x |= y & Self::SIGN_MASK;
+        Self::from_bits(x)
+    }
 
     /// Returns a number that represents the sign of self.
-    fn signum(self) -> Self;
+    fn signum(self) -> Self {
+        if self.is_nan() { self } else { Self::ONE.copysign(self) }
+    }
 }
 
 macro_rules! float_impl {
@@ -121,11 +161,22 @@ macro_rules! float_impl {
             type ExpInt = $expty;
 
             const ZERO: Self = 0.0;
+            const NEG_ZERO: Self = -0.0;
             const ONE: Self = 1.0;
             const NEG_ONE: Self = -1.0;
             const INFINITY: Self = Self::INFINITY;
             const NEG_INFINITY: Self = Self::NEG_INFINITY;
             const NAN: Self = Self::NAN;
+            const MAX: Self = -Self::MIN;
+            // Sign bit set, saturated mantissa, saturated exponent with last bit zeroed
+            // FIXME(msrv): just use `from_bits` when available
+            // SAFETY: POD cast with no preconditions
+            const MIN: Self = unsafe {
+                mem::transmute::<Self::Int, Self>(Self::Int::MAX & !(1 << Self::SIG_BITS))
+            };
+
+            const PI: Self = core::$ty::consts::PI;
+            const FRAC_PI_2: Self = core::$ty::consts::FRAC_PI_2;
 
             const BITS: u32 = $bits;
             const SIG_BITS: u32 = $significand_bits;
@@ -141,16 +192,6 @@ macro_rules! float_impl {
             fn to_bits_signed(self) -> Self::SignedInt {
                 self.to_bits() as Self::SignedInt
             }
-            fn eq_repr(self, rhs: Self) -> bool {
-                fn is_nan(x: $ty) -> bool {
-                    // When using mangled-names, the "real" compiler-builtins might not have the
-                    // necessary builtin (__unordtf2) to test whether `f128` is NaN.
-                    // FIXME(f16_f128): Remove once the nightly toolchain has the __unordtf2 builtin
-                    // x is NaN if all the bits of the exponent are set and the significand is non-0
-                    x.to_bits() & $ty::EXP_MASK == $ty::EXP_MASK && x.to_bits() & $ty::SIG_MASK != 0
-                }
-                if is_nan(self) && is_nan(rhs) { true } else { self.to_bits() == rhs.to_bits() }
-            }
             fn is_nan(self) -> bool {
                 self.is_nan()
             }
@@ -160,43 +201,16 @@ macro_rules! float_impl {
             fn is_sign_negative(self) -> bool {
                 self.is_sign_negative()
             }
-            fn is_subnormal(self) -> bool {
-                (self.to_bits() & Self::EXP_MASK) == Self::Int::ZERO
-            }
             fn exp(self) -> Self::ExpInt {
                 ((self.to_bits() & Self::EXP_MASK) >> Self::SIG_BITS) as Self::ExpInt
             }
-            fn frac(self) -> Self::Int {
-                self.to_bits() & Self::SIG_MASK
-            }
-            fn imp_frac(self) -> Self::Int {
-                self.frac() | Self::IMPLICIT_BIT
-            }
             fn from_bits(a: Self::Int) -> Self {
                 Self::from_bits(a)
             }
-            fn from_parts(negative: bool, exponent: Self::Int, significand: Self::Int) -> Self {
-                Self::from_bits(
-                    ((negative as Self::Int) << (Self::BITS - 1))
-                        | ((exponent << Self::SIG_BITS) & Self::EXP_MASK)
-                        | (significand & Self::SIG_MASK),
-                )
-            }
             fn normalize(significand: Self::Int) -> (i32, Self::Int) {
                 let shift = significand.leading_zeros().wrapping_sub(Self::EXP_BITS);
                 (1i32.wrapping_sub(shift as i32), significand << shift as Self::Int)
             }
-            fn copysign(self, other: Self) -> Self {
-                let mut x = self.to_bits();
-                let y = other.to_bits();
-                x &= !Self::SIGN_MASK;
-                x |= y & Self::SIGN_MASK;
-                Self::from_bits(x)
-            }
-
-            fn signum(self) -> Self {
-                if self.is_nan() { self } else { Self::ONE.copysign(self) }
-            }
         }
     };
 }
diff --git a/src/math/support/int_traits.rs b/src/math/support/int_traits.rs
index b08907aa5..c72c1d5cb 100644
--- a/src/math/support/int_traits.rs
+++ b/src/math/support/int_traits.rs
@@ -1,4 +1,4 @@
-use core::{fmt, ops};
+use core::{cmp, fmt, ops};
 
 /// Minimal integer implementations needed on all integer types, including wide integers.
 #[allow(dead_code)]
@@ -31,6 +31,8 @@ pub trait MinInt:
 pub trait Int:
     MinInt
     + fmt::Display
+    + fmt::Binary
+    + fmt::LowerHex
     + PartialEq
     + PartialOrd
     + ops::AddAssign
@@ -47,6 +49,9 @@ pub trait Int:
     + ops::Shr<u32, Output = Self>
     + ops::BitXor<Output = Self>
     + ops::BitAnd<Output = Self>
+    + cmp::Ord
+    + CastInto<usize>
+    + CastFrom<u8>
 {
     fn signed(self) -> <Self::Unsigned as MinInt>::OtherSign;
     fn unsigned(self) -> Self::Unsigned;

From a0cf291c87f927f7ab31193284621c25e7c7a71a Mon Sep 17 00:00:00 2001
From: beetrees <b@beetr.ee>
Date: Thu, 14 Nov 2024 18:13:10 +0000
Subject: [PATCH 041/279] Use `https:` links in `README.md`

---
 README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index e5d64bd2d..52d760a4f 100644
--- a/README.md
+++ b/README.md
@@ -41,14 +41,14 @@ This crate supports rustc 1.63 and newer.
 ## License
 
 Usage is licensed under the MIT license ([LICENSE-MIT](LICENSE-MIT) or
-http://opensource.org/licenses/MIT).
+https://opensource.org/licenses/MIT).
 
 
 ### Contribution
 
 Contributions are licensed under both the MIT license and the Apache License,
 Version 2.0 ([LICENSE-APACHE](LICENSE-APACHE) or
-http://www.apache.org/licenses/LICENSE-2.0). Unless you explicitly state
+https://www.apache.org/licenses/LICENSE-2.0). Unless you explicitly state
 otherwise, any contribution intentionally submitted for inclusion in the work
 by you, as defined in the Apache-2.0 license, shall be dual licensed as
 mentioned, without any additional terms or conditions.

From 0ac65122ebdcb00c760bd04644032d54c967c25f Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Sun, 22 Dec 2024 05:03:21 +0000
Subject: [PATCH 042/279] Remove tests against system musl

We now have tests against our custom-built musl as well as tests against
MPFR. The tests against system musl covers less than those against
custom-built musl, and are less portable; there isn't much benefit to
keeping them around so just remove them.
---
 CONTRIBUTING.md                      |  20 +-
 ci/run.sh                            |   5 -
 crates/libm-test/Cargo.toml          |   1 -
 crates/libm-test/build.rs            | 456 ---------------------------
 crates/libm-test/tests/musl_biteq.rs |   6 -
 src/lib.rs                           |  25 --
 6 files changed, 12 insertions(+), 501 deletions(-)
 delete mode 100644 crates/libm-test/tests/musl_biteq.rs

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 0a1741631..aadcdf036 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -6,9 +6,8 @@
   `src/math/mod.rs` accordingly. Also, uncomment the corresponding trait method
   in `src/lib.rs`.
 - Write some simple tests in your module (using `#[test]`)
-- Run `cargo test` to make sure it works
-- Run `cargo test --features libm-test/test-musl-serialized` to compare your
-  implementation against musl's
+- Run `cargo test` to make sure it works. Full tests are only run when enabling
+  features, see [Testing](#testing) below.
 - Send us a pull request! Make sure to run `cargo fmt` on your code before
   sending the PR. Also include "closes #42" in the PR description to close the
   corresponding issue.
@@ -66,12 +65,17 @@ Normal tests can be executed with:
 cargo test
 ```
 
-If you'd like to run tests with randomized inputs that get compared against musl
-itself, you'll need to be on a Linux system and then you can execute:
+If you'd like to run tests with randomized inputs that get compared against
+infinite-precision results, run:
 
 ```sh
-cargo test --features libm-test/test-musl-serialized
+cargo test --features libm-test/test-multiprecision,libm-test/build-musl --release
 ```
 
-Note that you may need to pass `--release` to Cargo if there are errors related
-to integer overflow.
+The multiprecision tests use the [`rug`] crate for bindings to MPFR. MPFR can
+be difficult to build on non-Unix systems, refer to [`gmp_mpfr_sys`] for help.
+
+`build-musl` does not build with MSVC, Wasm, or Thumb.
+
+[`rug`]: https://docs.rs/rug/latest/rug/
+[`gmp_mpfr_sys`]: https://docs.rs/gmp-mpfr-sys/1.6.4/gmp_mpfr_sys/
diff --git a/ci/run.sh b/ci/run.sh
index 32453663e..d89c8bdf0 100755
--- a/ci/run.sh
+++ b/ci/run.sh
@@ -57,11 +57,6 @@ case "$target" in
     *windows-gnu) extra_flags="$extra_flags --exclude libm-macros" ;;
 esac
 
-if [ "$(uname -a)" = "Linux" ]; then
-    # also run the reference tests when we can. requires a Linux host.
-    extra_flags="$extra_flags --features libm-test/test-musl-serialized"
-fi
-
 # Make sure we can build with overriding features. We test the indibidual
 # features it controls separately.
 cargo check --no-default-features
diff --git a/crates/libm-test/Cargo.toml b/crates/libm-test/Cargo.toml
index 3587b44e6..4d75b25f8 100644
--- a/crates/libm-test/Cargo.toml
+++ b/crates/libm-test/Cargo.toml
@@ -9,7 +9,6 @@ default = []
 
 # Generate tests which are random inputs and the outputs are calculated with
 # musl libc.
-test-musl-serialized = ["rand"]
 test-multiprecision = ["dep:az", "dep:rug"]
 
 # Build our own musl for testing and benchmarks
diff --git a/crates/libm-test/build.rs b/crates/libm-test/build.rs
index 40b3e56c0..dc3126dbb 100644
--- a/crates/libm-test/build.rs
+++ b/crates/libm-test/build.rs
@@ -8,9 +8,6 @@ fn main() {
     emit_optimization_cfg(&cfg);
     emit_cfg_shorthands(&cfg);
     list_all_tests(&cfg);
-
-    #[cfg(feature = "test-musl-serialized")]
-    musl_serialized_tests::generate();
 }
 
 #[allow(dead_code)]
@@ -93,456 +90,3 @@ fn list_all_tests(cfg: &Config) {
     let outfile = cfg.out_dir.join("all_files.rs");
     fs::write(outfile, s).unwrap();
 }
-
-/// At build time, generate the output of what the corresponding `*musl` target does with a range
-/// of inputs.
-///
-/// Serialize that target's output, run the same thing with our symbols, then load and compare
-/// the resulting values.
-#[cfg(feature = "test-musl-serialized")]
-mod musl_serialized_tests {
-    use std::path::PathBuf;
-    use std::process::Command;
-    use std::{env, fs};
-
-    use rand::Rng;
-    use rand::seq::SliceRandom;
-
-    // Number of tests to generate for each function
-    const NTESTS: usize = 500;
-
-    // These files are all internal functions or otherwise miscellaneous, not
-    // defining a function we want to test.
-    const IGNORED_FILES: &[&str] = &[
-        "fenv.rs",
-        // These are giving slightly different results compared to musl
-        "lgamma.rs",
-        "lgammaf.rs",
-        "tgamma.rs",
-        "j0.rs",
-        "j0f.rs",
-        "jn.rs",
-        "jnf.rs",
-        "j1.rs",
-        "j1f.rs",
-    ];
-
-    struct Function {
-        name: String,
-        args: Vec<Ty>,
-        ret: Vec<Ty>,
-        tests: Vec<Test>,
-    }
-
-    enum Ty {
-        F32,
-        F64,
-        I32,
-        Bool,
-    }
-
-    struct Test {
-        inputs: Vec<i64>,
-        outputs: Vec<i64>,
-    }
-
-    pub fn generate() {
-        // PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520
-        let target_arch = env::var("CARGO_CFG_TARGET_ARCH").unwrap();
-        let libm_test = PathBuf::from(env::var("CARGO_MANIFEST_DIR").unwrap());
-        let math_src = libm_test.join("../../src/math");
-
-        if target_arch == "powerpc64" {
-            return;
-        }
-
-        let files = fs::read_dir(math_src)
-            .unwrap()
-            .map(|f| f.unwrap().path())
-            .filter(file_needs_test)
-            .collect::<Vec<_>>();
-
-        let mut math = Vec::new();
-        for file in files {
-            if IGNORED_FILES.iter().any(|f| file.ends_with(f)) {
-                continue;
-            }
-
-            println!("generating musl reference tests in {:?}", file);
-
-            let contents = fs::read_to_string(file).unwrap();
-            let mut functions = contents.lines().filter(|f| f.starts_with("pub fn"));
-            while let Some(function_to_test) = functions.next() {
-                math.push(parse(function_to_test));
-            }
-        }
-
-        // Generate a bunch of random inputs for each function. This will
-        // attempt to generate a good set of uniform test cases for exercising
-        // all the various functionality.
-        generate_random_tests(&mut math, &mut rand::thread_rng());
-
-        // After we have all our inputs, use the x86_64-unknown-linux-musl
-        // target to generate the expected output.
-        generate_test_outputs(&mut math);
-        //panic!("Boo");
-        // ... and now that we have both inputs and expected outputs, do a bunch
-        // of codegen to create the unit tests which we'll actually execute.
-        generate_unit_tests(&math);
-    }
-
-    /// Check whether a path within `src/math` should get tests generated.
-    fn file_needs_test(path: &PathBuf) -> bool {
-        // Skip directories
-        if path.is_dir() {
-            return false;
-        }
-
-        let fname = path.file_name().unwrap().to_str().unwrap();
-
-        // Musl doesn't support `f16` or `f128`
-        !(fname.contains("f16") || fname.contains("f128"))
-    }
-
-    /// A "poor man's" parser for the signature of a function
-    fn parse(s: &str) -> Function {
-        let s = eat(s, "pub fn ");
-        let pos = s.find('(').unwrap();
-        let name = &s[..pos];
-        let s = &s[pos + 1..];
-        let end = s.find(')').unwrap();
-        let args = s[..end]
-            .split(',')
-            .map(|arg| {
-                let colon = arg.find(':').unwrap();
-                parse_ty(arg[colon + 1..].trim())
-            })
-            .collect::<Vec<_>>();
-        let tail = &s[end + 1..];
-        let tail = eat(tail, " -> ");
-        let ret = parse_retty(tail.replace("{", "").trim());
-
-        return Function { name: name.to_string(), args, ret, tests: Vec::new() };
-
-        fn parse_ty(s: &str) -> Ty {
-            match s {
-                "f32" => Ty::F32,
-                "f64" => Ty::F64,
-                "i32" => Ty::I32,
-                "bool" => Ty::Bool,
-                other => panic!("unknown type `{}`", other),
-            }
-        }
-
-        fn parse_retty(s: &str) -> Vec<Ty> {
-            match s {
-                "(f32, f32)" => vec![Ty::F32, Ty::F32],
-                "(f32, i32)" => vec![Ty::F32, Ty::I32],
-                "(f64, f64)" => vec![Ty::F64, Ty::F64],
-                "(f64, i32)" => vec![Ty::F64, Ty::I32],
-                other => vec![parse_ty(other)],
-            }
-        }
-
-        fn eat<'a>(s: &'a str, prefix: &str) -> &'a str {
-            if s.starts_with(prefix) {
-                &s[prefix.len()..]
-            } else {
-                panic!("{:?} didn't start with {:?}", s, prefix)
-            }
-        }
-    }
-
-    fn generate_random_tests<R: Rng>(functions: &mut [Function], rng: &mut R) {
-        for function in functions {
-            for _ in 0..NTESTS {
-                function.tests.push(generate_test(function, rng));
-            }
-        }
-
-        fn generate_test<R: Rng>(function: &Function, rng: &mut R) -> Test {
-            let mut inputs = function.args.iter().map(|ty| ty.gen_i64(rng)).collect::<Vec<_>>();
-
-            // First argument to this function appears to be a number of
-            // iterations, so passing in massive random numbers causes it to
-            // take forever to execute, so make sure we're not running random
-            // math code until the heat death of the universe.
-            if function.name == "jn" || function.name == "jnf" {
-                inputs[0] &= 0xffff;
-            }
-
-            Test {
-                inputs,
-                // zero output for now since we'll generate it later
-                outputs: vec![],
-            }
-        }
-    }
-
-    impl Ty {
-        fn gen_i64<R: Rng>(&self, r: &mut R) -> i64 {
-            use std::{f32, f64};
-
-            return match self {
-                Ty::F32 => {
-                    if r.gen_range(0..20) < 1 {
-                        let i = *[f32::NAN, f32::INFINITY, f32::NEG_INFINITY].choose(r).unwrap();
-                        i.to_bits().into()
-                    } else {
-                        r.gen::<f32>().to_bits().into()
-                    }
-                }
-                Ty::F64 => {
-                    if r.gen_range(0..20) < 1 {
-                        let i = *[f64::NAN, f64::INFINITY, f64::NEG_INFINITY].choose(r).unwrap();
-                        i.to_bits() as i64
-                    } else {
-                        r.gen::<f64>().to_bits() as i64
-                    }
-                }
-                Ty::I32 => {
-                    if r.gen_range(0..10) < 1 {
-                        let i = *[i32::max_value(), 0, i32::min_value()].choose(r).unwrap();
-                        i.into()
-                    } else {
-                        r.gen::<i32>().into()
-                    }
-                }
-                Ty::Bool => r.gen::<bool>() as i64,
-            };
-        }
-
-        fn libc_ty(&self) -> &'static str {
-            match self {
-                Ty::F32 => "f32",
-                Ty::F64 => "f64",
-                Ty::I32 => "i32",
-                Ty::Bool => "i32",
-            }
-        }
-
-        fn libc_pty(&self) -> &'static str {
-            match self {
-                Ty::F32 => "*mut f32",
-                Ty::F64 => "*mut f64",
-                Ty::I32 => "*mut i32",
-                Ty::Bool => "*mut i32",
-            }
-        }
-
-        fn default(&self) -> &'static str {
-            match self {
-                Ty::F32 => "0_f32",
-                Ty::F64 => "0_f64",
-                Ty::I32 => "0_i32",
-                Ty::Bool => "false",
-            }
-        }
-
-        fn to_i64(&self) -> &'static str {
-            match self {
-                Ty::F32 => ".to_bits() as i64",
-                Ty::F64 => ".to_bits() as i64",
-                Ty::I32 => " as i64",
-                Ty::Bool => " as i64",
-            }
-        }
-    }
-
-    fn generate_test_outputs(functions: &mut [Function]) {
-        let mut src = String::new();
-        let dst = std::env::var("OUT_DIR").unwrap();
-
-        // Generate a program which will run all tests with all inputs in
-        // `functions`. This program will write all outputs to stdout (in a
-        // binary format).
-        src.push_str("use std::io::Write;");
-        src.push_str("fn main() {");
-        src.push_str("let mut result = Vec::new();");
-        for function in functions.iter_mut() {
-            src.push_str("unsafe {");
-            src.push_str("extern { fn ");
-            src.push_str(&function.name);
-            src.push_str("(");
-
-            let (ret, retptr) = match function.name.as_str() {
-                "sincos" | "sincosf" => (None, &function.ret[..]),
-                _ => (Some(&function.ret[0]), &function.ret[1..]),
-            };
-            for (i, arg) in function.args.iter().enumerate() {
-                src.push_str(&format!("arg{}: {},", i, arg.libc_ty()));
-            }
-            for (i, ret) in retptr.iter().enumerate() {
-                src.push_str(&format!("argret{}: {},", i, ret.libc_pty()));
-            }
-            src.push_str(")");
-            if let Some(ty) = ret {
-                src.push_str(" -> ");
-                src.push_str(ty.libc_ty());
-            }
-            src.push_str("; }");
-
-            src.push_str(&format!("static TESTS: &[[i64; {}]]", function.args.len()));
-            src.push_str(" = &[");
-            for test in function.tests.iter() {
-                src.push_str("[");
-                for val in test.inputs.iter() {
-                    src.push_str(&val.to_string());
-                    src.push_str(",");
-                }
-                src.push_str("],");
-            }
-            src.push_str("];");
-
-            src.push_str("for test in TESTS {");
-            for (i, arg) in retptr.iter().enumerate() {
-                src.push_str(&format!("let mut argret{} = {};", i, arg.default()));
-            }
-            src.push_str("let output = ");
-            src.push_str(&function.name);
-            src.push_str("(");
-            for (i, arg) in function.args.iter().enumerate() {
-                src.push_str(&match arg {
-                    Ty::F32 => format!("f32::from_bits(test[{}] as u32)", i),
-                    Ty::F64 => format!("f64::from_bits(test[{}] as u64)", i),
-                    Ty::I32 => format!("test[{}] as i32", i),
-                    Ty::Bool => format!("test[{}] as i32", i),
-                });
-                src.push_str(",");
-            }
-            for (i, _) in retptr.iter().enumerate() {
-                src.push_str(&format!("&mut argret{},", i));
-            }
-            src.push_str(");");
-            if let Some(ty) = &ret {
-                src.push_str(&format!("let output = output{};", ty.to_i64()));
-                src.push_str("result.extend_from_slice(&output.to_le_bytes());");
-            }
-
-            for (i, ret) in retptr.iter().enumerate() {
-                src.push_str(&format!(
-                    "result.extend_from_slice(&(argret{}{}).to_le_bytes());",
-                    i,
-                    ret.to_i64(),
-                ));
-            }
-            src.push_str("}");
-
-            src.push_str("}");
-        }
-
-        src.push_str("std::io::stdout().write_all(&result).unwrap();");
-
-        src.push_str("}");
-
-        let path = format!("{}/gen.rs", dst);
-        fs::write(&path, src).unwrap();
-
-        // Make it somewhat pretty if something goes wrong
-        drop(Command::new("rustfmt").arg(&path).status());
-
-        // Compile and execute this tests for the musl target, assuming we're an
-        // x86_64 host effectively.
-        let status = Command::new("rustc")
-            .current_dir(&dst)
-            .arg(&path)
-            .arg("--target=x86_64-unknown-linux-musl")
-            .status()
-            .unwrap();
-        assert!(status.success());
-        let output = Command::new("./gen").current_dir(&dst).output().unwrap();
-        assert!(output.status.success());
-        assert!(output.stderr.is_empty());
-
-        // Map all the output bytes back to an `i64` and then shove it all into
-        // the expected results.
-        let mut results = output.stdout.chunks_exact(8).map(|buf| {
-            let mut exact = [0; 8];
-            exact.copy_from_slice(buf);
-            i64::from_le_bytes(exact)
-        });
-
-        for f in functions.iter_mut() {
-            for test in f.tests.iter_mut() {
-                test.outputs = (0..f.ret.len()).map(|_| results.next().unwrap()).collect();
-            }
-        }
-        assert!(results.next().is_none());
-    }
-
-    /// Codegens a file which has a ton of `#[test]` annotations for all the
-    /// tests that we generated above.
-    fn generate_unit_tests(functions: &[Function]) {
-        let mut src = String::new();
-        let dst = std::env::var("OUT_DIR").unwrap();
-
-        for function in functions {
-            src.push_str("#[test]");
-            src.push_str("fn ");
-            src.push_str(&function.name);
-            src.push_str("_matches_musl() {");
-            src.push_str(&format!(
-                "static TESTS: &[([i64; {}], [i64; {}])]",
-                function.args.len(),
-                function.ret.len(),
-            ));
-            src.push_str(" = &[");
-            for test in function.tests.iter() {
-                src.push_str("([");
-                for val in test.inputs.iter() {
-                    src.push_str(&val.to_string());
-                    src.push_str(",");
-                }
-                src.push_str("],");
-                src.push_str("[");
-                for val in test.outputs.iter() {
-                    src.push_str(&val.to_string());
-                    src.push_str(",");
-                }
-                src.push_str("],");
-                src.push_str("),");
-            }
-            src.push_str("];");
-
-            src.push_str("for (test, expected) in TESTS {");
-            src.push_str("let output = libm::");
-            src.push_str(&function.name);
-            src.push_str("(");
-            for (i, arg) in function.args.iter().enumerate() {
-                src.push_str(&match arg {
-                    Ty::F32 => format!("f32::from_bits(test[{}] as u32)", i),
-                    Ty::F64 => format!("f64::from_bits(test[{}] as u64)", i),
-                    Ty::I32 => format!("test[{}] as i32", i),
-                    Ty::Bool => format!("test[{}] as i32", i),
-                });
-                src.push_str(",");
-            }
-            src.push_str(");");
-
-            for (i, ret) in function.ret.iter().enumerate() {
-                let get = if function.ret.len() == 1 { String::new() } else { format!(".{}", i) };
-                src.push_str(&(match ret {
-                    Ty::F32 => format!("if libm::_eqf(output{}, f32::from_bits(expected[{}] as u32)).is_ok() {{ continue }}", get, i),
-                    Ty::F64 => format!("if libm::_eq(output{}, f64::from_bits(expected[{}] as u64)).is_ok() {{ continue }}", get, i),
-                    Ty::I32 => format!("if output{} as i64 == expected[{}] {{ continue }}", get, i),
-                    Ty::Bool => unreachable!(),
-                }));
-            }
-
-            src.push_str(
-                r#"
-                panic!("INPUT: {:?} EXPECTED: {:?} ACTUAL {:?}", test, expected, output);
-            "#,
-            );
-            src.push_str("}");
-
-            src.push_str("}");
-        }
-
-        let path = format!("{}/musl-tests.rs", dst);
-        fs::write(&path, src).unwrap();
-
-        // Try to make it somewhat pretty
-        drop(Command::new("rustfmt").arg(&path).status());
-    }
-}
diff --git a/crates/libm-test/tests/musl_biteq.rs b/crates/libm-test/tests/musl_biteq.rs
deleted file mode 100644
index f586fd03d..000000000
--- a/crates/libm-test/tests/musl_biteq.rs
+++ /dev/null
@@ -1,6 +0,0 @@
-//! compare
-
-// PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520
-#[cfg(not(target_arch = "powerpc64"))]
-#[cfg(all(test, feature = "test-musl-serialized"))]
-include!(concat!(env!("OUT_DIR"), "/musl-tests.rs"));
diff --git a/src/lib.rs b/src/lib.rs
index 511ab598d..6bb06b5b8 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -23,28 +23,3 @@ use core::{f32, f64};
 pub use libm_helper::*;
 
 pub use self::math::*;
-
-/// Approximate equality with 1 ULP of tolerance
-#[doc(hidden)]
-#[inline]
-pub fn _eqf(a: f32, b: f32) -> Result<(), u32> {
-    if a.is_nan() && b.is_nan() {
-        Ok(())
-    } else {
-        let err = (a.to_bits() as i32).wrapping_sub(b.to_bits() as i32).abs();
-
-        if err <= 1 { Ok(()) } else { Err(err as u32) }
-    }
-}
-
-#[doc(hidden)]
-#[inline]
-pub fn _eq(a: f64, b: f64) -> Result<(), u64> {
-    if a.is_nan() && b.is_nan() {
-        Ok(())
-    } else {
-        let err = (a.to_bits() as i64).wrapping_sub(b.to_bits() as i64).abs();
-
-        if err <= 1 { Ok(()) } else { Err(err as u64) }
-    }
-}

From e9782cecbb8b3c8a8fa7c816c8798597b8f69648 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Sun, 22 Dec 2024 22:24:38 +0000
Subject: [PATCH 043/279] Fix a bug in `abs_diff`

These were taken from `compiler-builtins` but the implementation has a
bug near the integer limits. Fixed in `compiler-builtins` by using
`core`'s implementation at [1], this is the corresponding fix for
`libm`.

[1]: https://github.com/rust-lang/compiler-builtins/pull/736
---
 src/math/support/int_traits.rs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/math/support/int_traits.rs b/src/math/support/int_traits.rs
index c72c1d5cb..ded990bdf 100644
--- a/src/math/support/int_traits.rs
+++ b/src/math/support/int_traits.rs
@@ -184,7 +184,7 @@ macro_rules! int_impl {
             }
 
             fn abs_diff(self, other: Self) -> Self {
-                if self < other { other.wrapping_sub(self) } else { self.wrapping_sub(other) }
+                self.abs_diff(other)
             }
 
             int_impl_common!($uty);
@@ -221,7 +221,7 @@ macro_rules! int_impl {
             }
 
             fn abs_diff(self, other: Self) -> $uty {
-                self.wrapping_sub(other).wrapping_abs() as $uty
+                self.abs_diff(other)
             }
 
             int_impl_common!($ity);

From 498a11fa411dfff58c9bb52fdd154e2338909d29 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Sun, 22 Dec 2024 23:15:16 +0000
Subject: [PATCH 044/279] Introduce helper types for accessing trait items

The ambiguous associated types error sometimes fires in cases where it
shouldn't be ambiguous ([1]), which can make things clunky when working
with chained associated types (e.g. `Op::FTy::Int::*` does not work).
Add helper types that we can use instead of the full syntax.

There aren't too many cases in-crate now but this is relevant for some
open PRs.

[1]: https://github.com/rust-lang/rust/issues/38078
---
 crates/libm-test/benches/random.rs | 2 +-
 crates/libm-test/src/lib.rs        | 4 ++--
 crates/libm-test/src/op.rs         | 9 +++++++++
 src/math/support/float_traits.rs   | 4 ++++
 src/math/support/int_traits.rs     | 6 +++++-
 src/math/support/mod.rs            | 3 ++-
 6 files changed, 23 insertions(+), 5 deletions(-)

diff --git a/crates/libm-test/benches/random.rs b/crates/libm-test/benches/random.rs
index d77d57908..9ccc38fe3 100644
--- a/crates/libm-test/benches/random.rs
+++ b/crates/libm-test/benches/random.rs
@@ -26,7 +26,7 @@ macro_rules! musl_rand_benches {
 
                 #[cfg(feature = "build-musl")]
                 let musl_extra = MuslExtra {
-                    musl_fn: Some(musl_math_sys::$fn_name as <Op as MathOp>::CFn),
+                    musl_fn: Some(musl_math_sys::$fn_name as libm_test::CFn<Op>),
                     skip_on_i586: $skip_on_i586
                 };
 
diff --git a/crates/libm-test/src/lib.rs b/crates/libm-test/src/lib.rs
index 7f0d9aa75..bc96b466b 100644
--- a/crates/libm-test/src/lib.rs
+++ b/crates/libm-test/src/lib.rs
@@ -5,8 +5,8 @@ pub mod op;
 mod precision;
 mod test_traits;
 
-pub use libm::support::{Float, Int};
-pub use op::{BaseName, Identifier, MathOp};
+pub use libm::support::{Float, Int, IntTy};
+pub use op::{BaseName, CFn, FTy, Identifier, MathOp, RustFn, RustRet};
 pub use precision::{MaybeOverride, SpecialCase, default_ulp};
 pub use test_traits::{CheckBasis, CheckCtx, CheckOutput, GenerateInput, Hex, TupleCall};
 
diff --git a/crates/libm-test/src/op.rs b/crates/libm-test/src/op.rs
index 50b455d3a..0faeceb09 100644
--- a/crates/libm-test/src/op.rs
+++ b/crates/libm-test/src/op.rs
@@ -70,6 +70,15 @@ pub trait MathOp {
     const ROUTINE: Self::RustFn;
 }
 
+/// Access the associated `FTy` type from an op (helper to avoid ambiguous associated types).
+pub type FTy<Op> = <Op as MathOp>::FTy;
+/// Access the associated `CFn` type from an op (helper to avoid ambiguous associated types).
+pub type CFn<Op> = <Op as MathOp>::CFn;
+/// Access the associated `RustFn` type from an op (helper to avoid ambiguous associated types).
+pub type RustFn<Op> = <Op as MathOp>::RustFn;
+/// Access the associated `RustRet` type from an op (helper to avoid ambiguous associated types).
+pub type RustRet<Op> = <Op as MathOp>::RustRet;
+
 macro_rules! do_thing {
     // Matcher for unary functions
     (
diff --git a/src/math/support/float_traits.rs b/src/math/support/float_traits.rs
index 0047ba368..5808aeebc 100644
--- a/src/math/support/float_traits.rs
+++ b/src/math/support/float_traits.rs
@@ -153,6 +153,10 @@ pub trait Float:
     }
 }
 
+/// Access the associated `Int` type from a float (helper to avoid ambiguous associated types).
+#[allow(dead_code)]
+pub type IntTy<F> = <F as Float>::Int;
+
 macro_rules! float_impl {
     ($ty:ident, $ity:ident, $sity:ident, $expty:ident, $bits:expr, $significand_bits:expr) => {
         impl Float for $ty {
diff --git a/src/math/support/int_traits.rs b/src/math/support/int_traits.rs
index ded990bdf..380313c1e 100644
--- a/src/math/support/int_traits.rs
+++ b/src/math/support/int_traits.rs
@@ -26,6 +26,10 @@ pub trait MinInt:
     const MAX: Self;
 }
 
+/// Access the associated `OtherSign` type from an int (helper to avoid ambiguous associated
+/// types).
+pub type OtherSign<I> = <I as MinInt>::OtherSign;
+
 /// Trait for some basic operations on integers
 #[allow(dead_code)]
 pub trait Int:
@@ -53,7 +57,7 @@ pub trait Int:
     + CastInto<usize>
     + CastFrom<u8>
 {
-    fn signed(self) -> <Self::Unsigned as MinInt>::OtherSign;
+    fn signed(self) -> OtherSign<Self::Unsigned>;
     fn unsigned(self) -> Self::Unsigned;
     fn from_unsigned(unsigned: Self::Unsigned) -> Self;
     fn abs(self) -> Self;
diff --git a/src/math/support/mod.rs b/src/math/support/mod.rs
index 04a313abc..25681c307 100644
--- a/src/math/support/mod.rs
+++ b/src/math/support/mod.rs
@@ -4,7 +4,8 @@ mod float_traits;
 mod hex_float;
 mod int_traits;
 
-pub use float_traits::Float;
+#[allow(unused_imports)]
+pub use float_traits::{Float, IntTy};
 #[allow(unused_imports)]
 pub use hex_float::{hf32, hf64};
 pub use int_traits::{CastFrom, CastInto, DInt, HInt, Int, MinInt};

From 80def21ead365e7b338d2cf9371d2be7d34cba7b Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Sun, 22 Dec 2024 23:56:45 +0000
Subject: [PATCH 045/279] Rename associated type helpers, add `OpITy`

Change the names to make them less ambiguous. Additionally add `OpITy`
for accessing the same-sized integer of an operation's float type.
---
 crates/libm-test/benches/random.rs |  2 +-
 crates/libm-test/src/lib.rs        |  2 +-
 crates/libm-test/src/op.rs         | 10 ++++++----
 3 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/crates/libm-test/benches/random.rs b/crates/libm-test/benches/random.rs
index 9ccc38fe3..b9c39334c 100644
--- a/crates/libm-test/benches/random.rs
+++ b/crates/libm-test/benches/random.rs
@@ -26,7 +26,7 @@ macro_rules! musl_rand_benches {
 
                 #[cfg(feature = "build-musl")]
                 let musl_extra = MuslExtra {
-                    musl_fn: Some(musl_math_sys::$fn_name as libm_test::CFn<Op>),
+                    musl_fn: Some(musl_math_sys::$fn_name as libm_test::OpCFn<Op>),
                     skip_on_i586: $skip_on_i586
                 };
 
diff --git a/crates/libm-test/src/lib.rs b/crates/libm-test/src/lib.rs
index bc96b466b..17a06b3be 100644
--- a/crates/libm-test/src/lib.rs
+++ b/crates/libm-test/src/lib.rs
@@ -6,7 +6,7 @@ mod precision;
 mod test_traits;
 
 pub use libm::support::{Float, Int, IntTy};
-pub use op::{BaseName, CFn, FTy, Identifier, MathOp, RustFn, RustRet};
+pub use op::{BaseName, Identifier, MathOp, OpCFn, OpFTy, OpRustFn, OpRustRet};
 pub use precision::{MaybeOverride, SpecialCase, default_ulp};
 pub use test_traits::{CheckBasis, CheckCtx, CheckOutput, GenerateInput, Hex, TupleCall};
 
diff --git a/crates/libm-test/src/op.rs b/crates/libm-test/src/op.rs
index 0faeceb09..bcea31c22 100644
--- a/crates/libm-test/src/op.rs
+++ b/crates/libm-test/src/op.rs
@@ -71,13 +71,15 @@ pub trait MathOp {
 }
 
 /// Access the associated `FTy` type from an op (helper to avoid ambiguous associated types).
-pub type FTy<Op> = <Op as MathOp>::FTy;
+pub type OpFTy<Op> = <Op as MathOp>::FTy;
+/// Access the associated `FTy::Int` type from an op (helper to avoid ambiguous associated types).
+pub type OpITy<Op> = <<Op as MathOp>::FTy as Float>::Int;
 /// Access the associated `CFn` type from an op (helper to avoid ambiguous associated types).
-pub type CFn<Op> = <Op as MathOp>::CFn;
+pub type OpCFn<Op> = <Op as MathOp>::CFn;
 /// Access the associated `RustFn` type from an op (helper to avoid ambiguous associated types).
-pub type RustFn<Op> = <Op as MathOp>::RustFn;
+pub type OpRustFn<Op> = <Op as MathOp>::RustFn;
 /// Access the associated `RustRet` type from an op (helper to avoid ambiguous associated types).
-pub type RustRet<Op> = <Op as MathOp>::RustRet;
+pub type OpRustRet<Op> = <Op as MathOp>::RustRet;
 
 macro_rules! do_thing {
     // Matcher for unary functions

From ff185c63b37ed91f96254de81632fe54613ae45f Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Thu, 26 Dec 2024 09:13:58 +0000
Subject: [PATCH 046/279] Replace string function name matching with enums
 where possible

---
 crates/libm-test/src/gen/random.rs  |   5 +-
 crates/libm-test/src/precision.rs   | 100 ++++++++++++----------------
 crates/libm-test/src/test_traits.rs |   3 -
 3 files changed, 46 insertions(+), 62 deletions(-)

diff --git a/crates/libm-test/src/gen/random.rs b/crates/libm-test/src/gen/random.rs
index e347b3c63..527cd1351 100644
--- a/crates/libm-test/src/gen/random.rs
+++ b/crates/libm-test/src/gen/random.rs
@@ -7,7 +7,7 @@ use rand::{Rng, SeedableRng};
 use rand_chacha::ChaCha8Rng;
 
 use super::CachedInput;
-use crate::{CheckCtx, GenerateInput};
+use crate::{BaseName, CheckCtx, GenerateInput};
 
 const SEED: [u8; 32] = *b"3.141592653589793238462643383279";
 
@@ -110,7 +110,6 @@ pub fn get_test_cases<RustArgs>(ctx: &CheckCtx) -> impl Iterator<Item = RustArgs
 where
     CachedInput: GenerateInput<RustArgs>,
 {
-    let inputs =
-        if ctx.fn_name == "jn" || ctx.fn_name == "jnf" { &TEST_CASES_JN } else { &TEST_CASES };
+    let inputs = if ctx.base_name == BaseName::Jn { &TEST_CASES_JN } else { &TEST_CASES };
     inputs.get_cases()
 }
diff --git a/crates/libm-test/src/precision.rs b/crates/libm-test/src/precision.rs
index cf9115430..c7f9d9e30 100644
--- a/crates/libm-test/src/precision.rs
+++ b/crates/libm-test/src/precision.rs
@@ -6,7 +6,7 @@ use core::f32;
 use CheckBasis::{Mpfr, Musl};
 use Identifier as Id;
 
-use crate::{CheckBasis, CheckCtx, Float, Identifier, Int, TestResult};
+use crate::{BaseName, CheckBasis, CheckCtx, Float, Identifier, Int, TestResult};
 
 /// Type implementing [`IgnoreCase`].
 pub struct SpecialCase;
@@ -106,25 +106,26 @@ impl MaybeOverride<(f32,)> for SpecialCase {
         ctx: &CheckCtx,
     ) -> Option<TestResult> {
         if ctx.basis == CheckBasis::Musl {
-            if ctx.fn_name == "expm1f" && input.0 > 80.0 && actual.is_infinite() {
+            if ctx.base_name == BaseName::Expm1 && input.0 > 80.0 && actual.is_infinite() {
                 // we return infinity but the number is representable
                 return XFAIL;
             }
 
-            if ctx.fn_name == "sinhf" && input.0.abs() > 80.0 && actual.is_nan() {
+            if ctx.base_name == BaseName::Sinh && input.0.abs() > 80.0 && actual.is_nan() {
                 // we return some NaN that should be real values or infinite
                 // doesn't seem to happen on x86
                 return XFAIL;
             }
         }
 
-        if ctx.fn_name == "acoshf" && input.0 < -1.0 {
+        if ctx.base_name == BaseName::Acosh && input.0 < -1.0 {
             // acoshf is undefined for x <= 1.0, but we return a random result at lower
             // values.
             return XFAIL;
         }
 
-        if ctx.fn_name == "lgammaf" || ctx.fn_name == "lgammaf_r" && input.0 < 0.0 {
+        if ctx.base_name == BaseName::Lgamma || ctx.base_name == BaseName::LgammaR && input.0 < 0.0
+        {
             // loggamma should not be defined for x < 0, yet we both return results
             return XFAIL;
         }
@@ -141,7 +142,7 @@ impl MaybeOverride<(f32,)> for SpecialCase {
         // On MPFR for lgammaf_r, we set -1 as the integer result for negative infinity but MPFR
         // sets +1
         if ctx.basis == CheckBasis::Mpfr
-            && ctx.fn_name == "lgammaf_r"
+            && ctx.base_name == BaseName::LgammaR
             && input.0 == f32::NEG_INFINITY
             && actual.abs() == expected.abs()
         {
@@ -161,13 +162,13 @@ impl MaybeOverride<(f64,)> for SpecialCase {
         ctx: &CheckCtx,
     ) -> Option<TestResult> {
         if ctx.basis == CheckBasis::Musl {
-            if cfg!(target_arch = "x86") && ctx.fn_name == "acosh" && input.0 < 1.0 {
+            if cfg!(target_arch = "x86") && ctx.base_name == BaseName::Acosh && input.0 < 1.0 {
                 // The function is undefined, both implementations return random results
                 return SKIP;
             }
 
             if cfg!(x86_no_sse)
-                && ctx.fn_name == "ceil"
+                && ctx.base_name == BaseName::Ceil
                 && input.0 < 0.0
                 && input.0 > -1.0
                 && expected == F::ZERO
@@ -178,13 +179,14 @@ impl MaybeOverride<(f64,)> for SpecialCase {
             }
         }
 
-        if ctx.fn_name == "acosh" && input.0 < 1.0 {
+        if ctx.base_name == BaseName::Acosh && input.0 < 1.0 {
             // The function is undefined for the inputs, musl and our libm both return
             // random results.
             return XFAIL;
         }
 
-        if ctx.fn_name == "lgamma" || ctx.fn_name == "lgamma_r" && input.0 < 0.0 {
+        if ctx.base_name == BaseName::Lgamma || ctx.base_name == BaseName::LgammaR && input.0 < 0.0
+        {
             // loggamma should not be defined for x < 0, yet we both return results
             return XFAIL;
         }
@@ -201,7 +203,7 @@ impl MaybeOverride<(f64,)> for SpecialCase {
         // On MPFR for lgamma_r, we set -1 as the integer result for negative infinity but MPFR
         // sets +1
         if ctx.basis == CheckBasis::Mpfr
-            && ctx.fn_name == "lgamma_r"
+            && ctx.base_name == BaseName::LgammaR
             && input.0 == f64::NEG_INFINITY
             && actual.abs() == expected.abs()
         {
@@ -214,7 +216,7 @@ impl MaybeOverride<(f64,)> for SpecialCase {
 
 /// Check NaN bits if the function requires it
 fn maybe_check_nan_bits<F: Float>(actual: F, expected: F, ctx: &CheckCtx) -> Option<TestResult> {
-    if !(ctx.base_name_str == "fabs" || ctx.base_name_str == "copysign") {
+    if !(ctx.base_name == BaseName::Fabs || ctx.base_name == BaseName::Copysign) {
         return None;
     }
 
@@ -270,24 +272,16 @@ fn maybe_skip_binop_nan<F1: Float, F2: Float>(
     expected: F2,
     ctx: &CheckCtx,
 ) -> Option<TestResult> {
-    match ctx.basis {
-        CheckBasis::Musl => {
-            if (ctx.base_name_str == "fmax" || ctx.base_name_str == "fmin")
-                && (input.0.is_nan() || input.1.is_nan())
-                && expected.is_nan()
-            {
-                XFAIL
-            } else {
-                None
-            }
-        }
-        CheckBasis::Mpfr => {
-            if ctx.base_name_str == "copysign" && input.1.is_nan() {
-                SKIP
-            } else {
-                None
-            }
+    match (&ctx.basis, ctx.base_name) {
+        (Musl, BaseName::Fmin | BaseName::Fmax)
+            if (input.0.is_nan() || input.1.is_nan()) && expected.is_nan() =>
+        {
+            XFAIL
         }
+
+        (Mpfr, BaseName::Copysign) if input.1.is_nan() => SKIP,
+
+        _ => None,
     }
 }
 
@@ -299,20 +293,17 @@ impl MaybeOverride<(i32, f32)> for SpecialCase {
         ulp: &mut u32,
         ctx: &CheckCtx,
     ) -> Option<TestResult> {
-        match ctx.basis {
-            CheckBasis::Musl => bessel_prec_dropoff(input, ulp, ctx),
-            CheckBasis::Mpfr => {
-                // We return +0.0, MPFR returns -0.0
-                if ctx.fn_name == "jnf"
-                    && input.1 == f32::NEG_INFINITY
-                    && actual == F::ZERO
-                    && expected == F::ZERO
-                {
-                    XFAIL
-                } else {
-                    None
-                }
+        match (&ctx.basis, ctx.base_name) {
+            (Musl, _) => bessel_prec_dropoff(input, ulp, ctx),
+
+            // We return +0.0, MPFR returns -0.0
+            (Mpfr, BaseName::Jn)
+                if input.1 == f32::NEG_INFINITY && actual == F::ZERO && expected == F::ZERO =>
+            {
+                XFAIL
             }
+
+            _ => None,
         }
     }
 }
@@ -324,20 +315,17 @@ impl MaybeOverride<(i32, f64)> for SpecialCase {
         ulp: &mut u32,
         ctx: &CheckCtx,
     ) -> Option<TestResult> {
-        match ctx.basis {
-            CheckBasis::Musl => bessel_prec_dropoff(input, ulp, ctx),
-            CheckBasis::Mpfr => {
-                // We return +0.0, MPFR returns -0.0
-                if ctx.fn_name == "jn"
-                    && input.1 == f64::NEG_INFINITY
-                    && actual == F::ZERO
-                    && expected == F::ZERO
-                {
-                    XFAIL
-                } else {
-                    bessel_prec_dropoff(input, ulp, ctx)
-                }
+        match (&ctx.basis, ctx.base_name) {
+            (Musl, _) => bessel_prec_dropoff(input, ulp, ctx),
+
+            // We return +0.0, MPFR returns -0.0
+            (Mpfr, BaseName::Jn)
+                if input.1 == f64::NEG_INFINITY && actual == F::ZERO && expected == F::ZERO =>
+            {
+                XFAIL
             }
+
+            _ => None,
         }
     }
 }
@@ -348,7 +336,7 @@ fn bessel_prec_dropoff<F: Float>(
     ulp: &mut u32,
     ctx: &CheckCtx,
 ) -> Option<TestResult> {
-    if ctx.base_name_str == "jn" {
+    if ctx.base_name == BaseName::Jn {
         if input.0 > 4000 {
             return XFAIL;
         } else if input.0 > 2000 {
diff --git a/crates/libm-test/src/test_traits.rs b/crates/libm-test/src/test_traits.rs
index b8e0aa108..ca933bbda 100644
--- a/crates/libm-test/src/test_traits.rs
+++ b/crates/libm-test/src/test_traits.rs
@@ -22,8 +22,6 @@ pub struct CheckCtx {
     pub base_name: BaseName,
     /// Function name.
     pub fn_name: &'static str,
-    /// Return the unsuffixed version of the function name.
-    pub base_name_str: &'static str,
     /// Source of truth for tests.
     pub basis: CheckBasis,
 }
@@ -36,7 +34,6 @@ impl CheckCtx {
             fn_ident,
             fn_name: fn_ident.as_str(),
             base_name: fn_ident.base_name(),
-            base_name_str: fn_ident.base_name().as_str(),
             basis,
         };
         ret.ulp = crate::default_ulp(&ret);

From ef3cc6be6a10ec24af9dba26a76a7831c1d11c70 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Fri, 27 Dec 2024 10:58:36 +0000
Subject: [PATCH 047/279] Fix new `clippy::precedence` lints

[1] extends Clippy's `precedence` lint to cover `&`, `|`, and bitshifts.
Update cases that are flagged by this in the most recent nightly.

[1]: https://github.com/rust-lang/rust-clippy/pull/13743
---
 src/math/ceil.rs      |  2 +-
 src/math/exp10.rs     |  2 +-
 src/math/exp10f.rs    |  2 +-
 src/math/exp2.rs      |  2 +-
 src/math/fma.rs       | 12 ++++++------
 src/math/fmod.rs      |  4 ++--
 src/math/fmodf.rs     |  4 ++--
 src/math/j1.rs        |  4 ++--
 src/math/jn.rs        |  4 ++--
 src/math/log10.rs     |  2 +-
 src/math/log1p.rs     |  2 +-
 src/math/log2.rs      |  2 +-
 src/math/mod.rs       |  2 +-
 src/math/modf.rs      |  2 +-
 src/math/modff.rs     |  2 +-
 src/math/nextafter.rs |  6 +++---
 src/math/rint.rs      |  2 +-
 src/math/rintf.rs     |  2 +-
 src/math/sqrt.rs      |  2 +-
 src/math/trunc.rs     |  2 +-
 src/math/truncf.rs    |  2 +-
 21 files changed, 32 insertions(+), 32 deletions(-)

diff --git a/src/math/ceil.rs b/src/math/ceil.rs
index c198ebcfe..b0576f3dc 100644
--- a/src/math/ceil.rs
+++ b/src/math/ceil.rs
@@ -16,7 +16,7 @@ pub fn ceil(x: f64) -> f64 {
     }
 
     let u: u64 = x.to_bits();
-    let e: i64 = (u >> 52 & 0x7ff) as i64;
+    let e: i64 = ((u >> 52) & 0x7ff) as i64;
     let y: f64;
 
     if e >= 0x3ff + 52 || x == 0. {
diff --git a/src/math/exp10.rs b/src/math/exp10.rs
index 2c3df0173..7c33c92b6 100644
--- a/src/math/exp10.rs
+++ b/src/math/exp10.rs
@@ -12,7 +12,7 @@ pub fn exp10(x: f64) -> f64 {
     let (mut y, n) = modf(x);
     let u: u64 = n.to_bits();
     /* fabs(n) < 16 without raising invalid on nan */
-    if (u >> 52 & 0x7ff) < 0x3ff + 4 {
+    if ((u >> 52) & 0x7ff) < 0x3ff + 4 {
         if y == 0.0 {
             return i!(P10, ((n as isize) + 15) as usize);
         }
diff --git a/src/math/exp10f.rs b/src/math/exp10f.rs
index e81d18380..0520a41f2 100644
--- a/src/math/exp10f.rs
+++ b/src/math/exp10f.rs
@@ -11,7 +11,7 @@ pub fn exp10f(x: f32) -> f32 {
     let (mut y, n) = modff(x);
     let u = n.to_bits();
     /* fabsf(n) < 8 without raising invalid on nan */
-    if (u >> 23 & 0xff) < 0x7f + 3 {
+    if ((u >> 23) & 0xff) < 0x7f + 3 {
         if y == 0.0 {
             return i!(P10, ((n as isize) + 7) as usize);
         }
diff --git a/src/math/exp2.rs b/src/math/exp2.rs
index dce2ab4df..6e98d066c 100644
--- a/src/math/exp2.rs
+++ b/src/math/exp2.rs
@@ -341,7 +341,7 @@ pub fn exp2(mut x: f64) -> f64 {
 
     /* Filter out exceptional cases. */
     let ui = f64::to_bits(x);
-    let ix = ui >> 32 & 0x7fffffff;
+    let ix = (ui >> 32) & 0x7fffffff;
     if ix >= 0x408ff000 {
         /* |x| >= 1022 or nan */
         if ix >= 0x40900000 && ui >> 63 == 0 {
diff --git a/src/math/fma.rs b/src/math/fma.rs
index bb2028fa7..826143d5a 100644
--- a/src/math/fma.rs
+++ b/src/math/fma.rs
@@ -82,7 +82,7 @@ pub fn fma(x: f64, y: f64, z: f64) -> f64 {
             d -= 64;
             if d == 0 {
             } else if d < 64 {
-                rlo = rhi << (64 - d) | rlo >> d | ((rlo << (64 - d)) != 0) as u64;
+                rlo = (rhi << (64 - d)) | (rlo >> d) | ((rlo << (64 - d)) != 0) as u64;
                 rhi = rhi >> d;
             } else {
                 rlo = 1;
@@ -95,7 +95,7 @@ pub fn fma(x: f64, y: f64, z: f64) -> f64 {
         if d == 0 {
             zlo = nz.m;
         } else if d < 64 {
-            zlo = nz.m >> d | ((nz.m << (64 - d)) != 0) as u64;
+            zlo = (nz.m >> d) | ((nz.m << (64 - d)) != 0) as u64;
         } else {
             zlo = 1;
         }
@@ -127,11 +127,11 @@ pub fn fma(x: f64, y: f64, z: f64) -> f64 {
         e += 64;
         d = rhi.leading_zeros() as i32 - 1;
         /* note: d > 0 */
-        rhi = rhi << d | rlo >> (64 - d) | ((rlo << d) != 0) as u64;
+        rhi = (rhi << d) | (rlo >> (64 - d)) | ((rlo << d) != 0) as u64;
     } else if rlo != 0 {
         d = rlo.leading_zeros() as i32 - 1;
         if d < 0 {
-            rhi = rlo >> 1 | (rlo & 1);
+            rhi = (rlo >> 1) | (rlo & 1);
         } else {
             rhi = rlo << d;
         }
@@ -165,7 +165,7 @@ pub fn fma(x: f64, y: f64, z: f64) -> f64 {
             /* one bit is lost when scaled, add another top bit to
             only round once at conversion if it is inexact */
             if (rhi << 53) != 0 {
-                i = (rhi >> 1 | (rhi & 1) | 1 << 62) as i64;
+                i = ((rhi >> 1) | (rhi & 1) | (1 << 62)) as i64;
                 if sign != 0 {
                     i = -i;
                 }
@@ -182,7 +182,7 @@ pub fn fma(x: f64, y: f64, z: f64) -> f64 {
         } else {
             /* only round once when scaled */
             d = 10;
-            i = ((rhi >> d | ((rhi << (64 - d)) != 0) as u64) << d) as i64;
+            i = (((rhi >> d) | ((rhi << (64 - d)) != 0) as u64) << d) as i64;
             if sign != 0 {
                 i = -i;
             }
diff --git a/src/math/fmod.rs b/src/math/fmod.rs
index df16162bc..b68e6b0ea 100644
--- a/src/math/fmod.rs
+++ b/src/math/fmod.rs
@@ -2,8 +2,8 @@
 pub fn fmod(x: f64, y: f64) -> f64 {
     let mut uxi = x.to_bits();
     let mut uyi = y.to_bits();
-    let mut ex = (uxi >> 52 & 0x7ff) as i64;
-    let mut ey = (uyi >> 52 & 0x7ff) as i64;
+    let mut ex = ((uxi >> 52) & 0x7ff) as i64;
+    let mut ey = ((uyi >> 52) & 0x7ff) as i64;
     let sx = uxi >> 63;
     let mut i;
 
diff --git a/src/math/fmodf.rs b/src/math/fmodf.rs
index 671af8580..4de181957 100644
--- a/src/math/fmodf.rs
+++ b/src/math/fmodf.rs
@@ -4,8 +4,8 @@ use core::f32;
 pub fn fmodf(x: f32, y: f32) -> f32 {
     let mut uxi = x.to_bits();
     let mut uyi = y.to_bits();
-    let mut ex = (uxi >> 23 & 0xff) as i32;
-    let mut ey = (uyi >> 23 & 0xff) as i32;
+    let mut ex = ((uxi >> 23) & 0xff) as i32;
+    let mut ey = ((uyi >> 23) & 0xff) as i32;
     let sx = uxi & 0x80000000;
     let mut i;
 
diff --git a/src/math/j1.rs b/src/math/j1.rs
index cef17a63e..578ae59d3 100644
--- a/src/math/j1.rs
+++ b/src/math/j1.rs
@@ -171,10 +171,10 @@ pub fn y1(x: f64) -> f64 {
     lx = get_low_word(x);
 
     /* y1(nan)=nan, y1(<0)=nan, y1(0)=-inf, y1(inf)=0 */
-    if (ix << 1 | lx) == 0 {
+    if (ix << 1) | lx == 0 {
         return -1.0 / 0.0;
     }
-    if (ix >> 31) != 0 {
+    if ix >> 31 != 0 {
         return 0.0 / 0.0;
     }
     if ix >= 0x7ff00000 {
diff --git a/src/math/jn.rs b/src/math/jn.rs
index 7f98ddc05..d228781d1 100644
--- a/src/math/jn.rs
+++ b/src/math/jn.rs
@@ -55,7 +55,7 @@ pub fn jn(n: i32, mut x: f64) -> f64 {
     ix &= 0x7fffffff;
 
     // -lx == !lx + 1
-    if (ix | (lx | ((!lx).wrapping_add(1))) >> 31) > 0x7ff00000 {
+    if ix | ((lx | (!lx).wrapping_add(1)) >> 31) > 0x7ff00000 {
         /* nan */
         return x;
     }
@@ -265,7 +265,7 @@ pub fn yn(n: i32, x: f64) -> f64 {
     ix &= 0x7fffffff;
 
     // -lx == !lx + 1
-    if (ix | (lx | ((!lx).wrapping_add(1))) >> 31) > 0x7ff00000 {
+    if ix | ((lx | (!lx).wrapping_add(1)) >> 31) > 0x7ff00000 {
         /* nan */
         return x;
     }
diff --git a/src/math/log10.rs b/src/math/log10.rs
index f9d118f12..8c9d68c49 100644
--- a/src/math/log10.rs
+++ b/src/math/log10.rs
@@ -78,7 +78,7 @@ pub fn log10(mut x: f64) -> f64 {
     hx += 0x3ff00000 - 0x3fe6a09e;
     k += (hx >> 20) as i32 - 0x3ff;
     hx = (hx & 0x000fffff) + 0x3fe6a09e;
-    ui = (hx as u64) << 32 | (ui & 0xffffffff);
+    ui = ((hx as u64) << 32) | (ui & 0xffffffff);
     x = f64::from_bits(ui);
 
     f = x - 1.0;
diff --git a/src/math/log1p.rs b/src/math/log1p.rs
index 80561ec74..b7f3fb09e 100644
--- a/src/math/log1p.rs
+++ b/src/math/log1p.rs
@@ -125,7 +125,7 @@ pub fn log1p(x: f64) -> f64 {
         }
         /* reduce u into [sqrt(2)/2, sqrt(2)] */
         hu = (hu & 0x000fffff) + 0x3fe6a09e;
-        ui = (hu as u64) << 32 | (ui & 0xffffffff);
+        ui = ((hu as u64) << 32) | (ui & 0xffffffff);
         f = f64::from_bits(ui) - 1.;
     }
     hfsq = 0.5 * f * f;
diff --git a/src/math/log2.rs b/src/math/log2.rs
index 59533340b..701f63c25 100644
--- a/src/math/log2.rs
+++ b/src/math/log2.rs
@@ -75,7 +75,7 @@ pub fn log2(mut x: f64) -> f64 {
     hx += 0x3ff00000 - 0x3fe6a09e;
     k += (hx >> 20) as i32 - 0x3ff;
     hx = (hx & 0x000fffff) + 0x3fe6a09e;
-    ui = (hx as u64) << 32 | (ui & 0xffffffff);
+    ui = ((hx as u64) << 32) | (ui & 0xffffffff);
     x = f64::from_bits(ui);
 
     f = x - 1.0;
diff --git a/src/math/mod.rs b/src/math/mod.rs
index afebdf586..3852c774e 100644
--- a/src/math/mod.rs
+++ b/src/math/mod.rs
@@ -359,5 +359,5 @@ fn with_set_low_word(f: f64, lo: u32) -> f64 {
 
 #[inline]
 fn combine_words(hi: u32, lo: u32) -> f64 {
-    f64::from_bits((hi as u64) << 32 | lo as u64)
+    f64::from_bits(((hi as u64) << 32) | lo as u64)
 }
diff --git a/src/math/modf.rs b/src/math/modf.rs
index bcab33a81..e29e80ccf 100644
--- a/src/math/modf.rs
+++ b/src/math/modf.rs
@@ -2,7 +2,7 @@ pub fn modf(x: f64) -> (f64, f64) {
     let rv2: f64;
     let mut u = x.to_bits();
     let mask: u64;
-    let e = ((u >> 52 & 0x7ff) as i32) - 0x3ff;
+    let e = (((u >> 52) & 0x7ff) as i32) - 0x3ff;
 
     /* no fractional part */
     if e >= 52 {
diff --git a/src/math/modff.rs b/src/math/modff.rs
index 56ece12e3..fac60abaa 100644
--- a/src/math/modff.rs
+++ b/src/math/modff.rs
@@ -2,7 +2,7 @@ pub fn modff(x: f32) -> (f32, f32) {
     let rv2: f32;
     let mut u: u32 = x.to_bits();
     let mask: u32;
-    let e = ((u >> 23 & 0xff) as i32) - 0x7f;
+    let e = (((u >> 23) & 0xff) as i32) - 0x7f;
 
     /* no fractional part */
     if e >= 23 {
diff --git a/src/math/nextafter.rs b/src/math/nextafter.rs
index 422bd7496..c991ff6f2 100644
--- a/src/math/nextafter.rs
+++ b/src/math/nextafter.rs
@@ -16,14 +16,14 @@ pub fn nextafter(x: f64, y: f64) -> f64 {
         if ay == 0 {
             return y;
         }
-        ux_i = (uy_i & 1_u64 << 63) | 1;
-    } else if ax > ay || ((ux_i ^ uy_i) & 1_u64 << 63) != 0 {
+        ux_i = (uy_i & (1_u64 << 63)) | 1;
+    } else if ax > ay || ((ux_i ^ uy_i) & (1_u64 << 63)) != 0 {
         ux_i -= 1;
     } else {
         ux_i += 1;
     }
 
-    let e = ux_i >> 52 & 0x7ff;
+    let e = (ux_i >> 52) & 0x7ff;
     // raise overflow if ux.f is infinite and x is finite
     if e == 0x7ff {
         force_eval!(x + x);
diff --git a/src/math/rint.rs b/src/math/rint.rs
index 618b26e54..cbdc3c2b9 100644
--- a/src/math/rint.rs
+++ b/src/math/rint.rs
@@ -2,7 +2,7 @@
 pub fn rint(x: f64) -> f64 {
     let one_over_e = 1.0 / f64::EPSILON;
     let as_u64: u64 = x.to_bits();
-    let exponent: u64 = as_u64 >> 52 & 0x7ff;
+    let exponent: u64 = (as_u64 >> 52) & 0x7ff;
     let is_positive = (as_u64 >> 63) == 0;
     if exponent >= 0x3ff + 52 {
         x
diff --git a/src/math/rintf.rs b/src/math/rintf.rs
index 0726d83ba..2d22c9393 100644
--- a/src/math/rintf.rs
+++ b/src/math/rintf.rs
@@ -2,7 +2,7 @@
 pub fn rintf(x: f32) -> f32 {
     let one_over_e = 1.0 / f32::EPSILON;
     let as_u32: u32 = x.to_bits();
-    let exponent: u32 = as_u32 >> 23 & 0xff;
+    let exponent: u32 = (as_u32 >> 23) & 0xff;
     let is_positive = (as_u32 >> 31) == 0;
     if exponent >= 0x7f + 23 {
         x
diff --git a/src/math/sqrt.rs b/src/math/sqrt.rs
index d9a8f184c..3f1a10fdd 100644
--- a/src/math/sqrt.rs
+++ b/src/math/sqrt.rs
@@ -219,7 +219,7 @@ pub fn sqrt(x: f64) -> f64 {
         ix1 |= sign;
     }
     ix0 += m << 20;
-    f64::from_bits((ix0 as u64) << 32 | ix1.0 as u64)
+    f64::from_bits(((ix0 as u64) << 32) | ix1.0 as u64)
 }
 
 #[cfg(test)]
diff --git a/src/math/trunc.rs b/src/math/trunc.rs
index 34bc2fdfa..d85bffb40 100644
--- a/src/math/trunc.rs
+++ b/src/math/trunc.rs
@@ -14,7 +14,7 @@ pub fn trunc(x: f64) -> f64 {
     let x1p120 = f64::from_bits(0x4770000000000000); // 0x1p120f === 2 ^ 120
 
     let mut i: u64 = x.to_bits();
-    let mut e: i64 = (i >> 52 & 0x7ff) as i64 - 0x3ff + 12;
+    let mut e: i64 = ((i >> 52) & 0x7ff) as i64 - 0x3ff + 12;
     let m: u64;
 
     if e >= 52 + 12 {
diff --git a/src/math/truncf.rs b/src/math/truncf.rs
index a74f78987..82017b87b 100644
--- a/src/math/truncf.rs
+++ b/src/math/truncf.rs
@@ -14,7 +14,7 @@ pub fn truncf(x: f32) -> f32 {
     let x1p120 = f32::from_bits(0x7b800000); // 0x1p120f === 2 ^ 120
 
     let mut i: u32 = x.to_bits();
-    let mut e: i32 = (i >> 23 & 0xff) as i32 - 0x7f + 9;
+    let mut e: i32 = ((i >> 23) & 0xff) as i32 - 0x7f + 9;
     let m: u32;
 
     if e >= 23 + 9 {

From b5a1aafe450ab420f4dcc86cbcbf3d29489971ab Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Fri, 27 Dec 2024 11:02:40 +0000
Subject: [PATCH 048/279] Allow Clippy lints in `compiler-builtins-smoke-test`

Rather than always needing to exclude `cb` when running `cargo clippy`,
just disable Clippy for the included module.
---
 .github/workflows/main.yml                     | 1 -
 crates/compiler-builtins-smoke-test/src/lib.rs | 3 ++-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 866f0de9e..d290d09a5 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -120,7 +120,6 @@ jobs:
       run: ./ci/download-musl.sh
     - run: |
         cargo clippy --all \
-          --exclude cb \
           --features libm-test/build-musl,libm-test/test-multiprecision \
           --all-targets
 
diff --git a/crates/compiler-builtins-smoke-test/src/lib.rs b/crates/compiler-builtins-smoke-test/src/lib.rs
index e65cb8da3..e3a51a575 100644
--- a/crates/compiler-builtins-smoke-test/src/lib.rs
+++ b/crates/compiler-builtins-smoke-test/src/lib.rs
@@ -4,8 +4,9 @@
 
 #![feature(core_intrinsics)]
 #![allow(internal_features)]
-#![allow(dead_code)]
 #![no_std]
 
+#[allow(dead_code)]
+#[allow(clippy::all)] // We don't get `libm`'s list of `allow`s, so just ignore Clippy.
 #[path = "../../../src/math/mod.rs"]
 pub mod libm;

From 3c7a832660f2ef807fd2707a9f64692c64dbd07a Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Sun, 29 Dec 2024 06:46:22 +0000
Subject: [PATCH 049/279] Change from `-latest` to named CI images

GitHub will be upgrading the `-latest` tags of these images in the near
future. Change all images to specify the latest version.
---
 .github/workflows/main.yml | 62 +++++++++++++++++++-------------------
 1 file changed, 31 insertions(+), 31 deletions(-)

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index d290d09a5..93cd541f8 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -16,56 +16,56 @@ jobs:
       matrix:
         include:
         - target: aarch64-apple-darwin
-          os: macos-latest
+          os: macos-15
         - target: aarch64-unknown-linux-gnu
-          os: ubuntu-latest
+          os: ubuntu-24.04
         - target: aarch64-pc-windows-msvc
-          os: windows-latest
+          os: windows-2025
           build_only: 1 # Can't run on x86 hosts
         - target: arm-unknown-linux-gnueabi
-          os: ubuntu-latest
+          os: ubuntu-24.04
         - target: arm-unknown-linux-gnueabihf
-          os: ubuntu-latest
+          os: ubuntu-24.04
         - target: armv7-unknown-linux-gnueabihf
-          os: ubuntu-latest
+          os: ubuntu-24.04
         - target: i586-unknown-linux-gnu
-          os: ubuntu-latest
+          os: ubuntu-24.04
         - target: i686-unknown-linux-gnu
-          os: ubuntu-latest
+          os: ubuntu-24.04
         - target: loongarch64-unknown-linux-gnu
-          os: ubuntu-latest
+          os: ubuntu-24.04
         - target: powerpc-unknown-linux-gnu
-          os: ubuntu-latest
+          os: ubuntu-24.04
         - target: powerpc64-unknown-linux-gnu
-          os: ubuntu-latest
+          os: ubuntu-24.04
         - target: powerpc64le-unknown-linux-gnu
-          os: ubuntu-latest
+          os: ubuntu-24.04
         - target: riscv64gc-unknown-linux-gnu
-          os: ubuntu-latest
+          os: ubuntu-24.04
         - target: thumbv6m-none-eabi
-          os: ubuntu-latest
+          os: ubuntu-24.04
         - target: thumbv7em-none-eabi
-          os: ubuntu-latest
+          os: ubuntu-24.04
         - target: thumbv7em-none-eabihf
-          os: ubuntu-latest
+          os: ubuntu-24.04
         - target: thumbv7m-none-eabi
-          os: ubuntu-latest
+          os: ubuntu-24.04
         - target: x86_64-unknown-linux-gnu
-          os: ubuntu-latest
+          os: ubuntu-24.04
         - target: x86_64-apple-darwin
           os: macos-13
         - target: wasm32-unknown-unknown
-          os: ubuntu-latest
+          os: ubuntu-24.04
           build_only: 1
         - target: i686-pc-windows-msvc
-          os: windows-latest
+          os: windows-2025
         - target: x86_64-pc-windows-msvc
-          os: windows-latest
+          os: windows-2025
         - target: i686-pc-windows-gnu
-          os: windows-latest
+          os: windows-2025
           channel: nightly-i686-gnu
         - target: x86_64-pc-windows-gnu
-          os: windows-latest
+          os: windows-2025
           channel: nightly-x86_64-gnu
     runs-on: ${{ matrix.os }}
     env:
@@ -94,20 +94,20 @@ jobs:
 
     # Non-linux tests just use our raw script
     - name: Run locally
-      if: matrix.os != 'ubuntu-latest' || contains(matrix.target, 'wasm')
+      if: matrix.os != 'ubuntu-24.04' || contains(matrix.target, 'wasm')
       shell: bash
       run: ./ci/run.sh ${{ matrix.target }}
 
     # Otherwise we use our docker containers to run builds
     - name: Run in Docker
-      if: matrix.os == 'ubuntu-latest' && !contains(matrix.target, 'wasm')
+      if: matrix.os == 'ubuntu-24.04' && !contains(matrix.target, 'wasm')
       run: |
         rustup target add x86_64-unknown-linux-musl
         cargo generate-lockfile && ./ci/run-docker.sh ${{ matrix.target }}
 
   clippy:
     name: Clippy
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-24.04
     steps:
     - uses: actions/checkout@master
     - name: Install Rust
@@ -125,7 +125,7 @@ jobs:
 
   builtins:
     name: Check use with compiler-builtins
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-24.04
     steps:
     - uses: actions/checkout@master
     - name: Install Rust
@@ -135,7 +135,7 @@ jobs:
 
   benchmarks:
     name: Benchmarks
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-24.04
     steps:
     - uses: actions/checkout@master
     - name: Install Rust
@@ -147,7 +147,7 @@ jobs:
 
   msrv:
     name: Check MSRV
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-24.04
     env:
       RUSTFLAGS: # No need to check warnings on old MSRV, unset `-Dwarnings`
     steps:
@@ -163,7 +163,7 @@ jobs:
 
   rustfmt:
     name: Rustfmt
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-24.04
     steps:
     - uses: actions/checkout@master
     - name: Install Rust
@@ -180,7 +180,7 @@ jobs:
       - benchmarks
       - msrv
       - rustfmt
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-24.04
     # GitHub branch protection is exceedingly silly and treats "jobs skipped because a dependency
     # failed" as success. So we have to do some contortions to ensure the job fails if any of its
     # dependencies fails.

From c447e5f9babd0658cf794e5fe5a77e721ab36f55 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Sat, 26 Oct 2024 03:07:58 -0500
Subject: [PATCH 050/279] Introduce generic `abs` and `copysign`

Add generic versions of `abs` and `copysign`, which will provide an
entrypoint for adding `f16` and `f128`. Since this implementation is
identical to the existing type-specific implementations, make use of it
for `f32` and `f64`.
---
 src/math/copysign.rs         |  6 +-----
 src/math/copysignf.rs        |  6 +-----
 src/math/fabs.rs             |  2 +-
 src/math/fabsf.rs            |  2 +-
 src/math/generic/abs.rs      |  6 ++++++
 src/math/generic/copysign.rs | 10 ++++++++++
 src/math/generic/mod.rs      |  5 +++++
 src/math/mod.rs              |  1 +
 8 files changed, 26 insertions(+), 12 deletions(-)
 create mode 100644 src/math/generic/abs.rs
 create mode 100644 src/math/generic/copysign.rs
 create mode 100644 src/math/generic/mod.rs

diff --git a/src/math/copysign.rs b/src/math/copysign.rs
index 1f4a35a33..552bf3975 100644
--- a/src/math/copysign.rs
+++ b/src/math/copysign.rs
@@ -4,9 +4,5 @@
 /// first argument, `x`, and the sign of its second argument, `y`.
 #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
 pub fn copysign(x: f64, y: f64) -> f64 {
-    let mut ux = x.to_bits();
-    let uy = y.to_bits();
-    ux &= (!0) >> 1;
-    ux |= uy & (1 << 63);
-    f64::from_bits(ux)
+    super::generic::copysign(x, y)
 }
diff --git a/src/math/copysignf.rs b/src/math/copysignf.rs
index 6c346e3a5..8b9bed4c0 100644
--- a/src/math/copysignf.rs
+++ b/src/math/copysignf.rs
@@ -4,9 +4,5 @@
 /// first argument, `x`, and the sign of its second argument, `y`.
 #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
 pub fn copysignf(x: f32, y: f32) -> f32 {
-    let mut ux = x.to_bits();
-    let uy = y.to_bits();
-    ux &= 0x7fffffff;
-    ux |= uy & 0x80000000;
-    f32::from_bits(ux)
+    super::generic::copysign(x, y)
 }
diff --git a/src/math/fabs.rs b/src/math/fabs.rs
index d083053e1..2163637e7 100644
--- a/src/math/fabs.rs
+++ b/src/math/fabs.rs
@@ -9,7 +9,7 @@ pub fn fabs(x: f64) -> f64 {
         args: x,
     }
 
-    f64::from_bits(x.to_bits() & (u64::MAX / 2))
+    super::generic::abs(x)
 }
 
 #[cfg(test)]
diff --git a/src/math/fabsf.rs b/src/math/fabsf.rs
index eabe87254..ac77c9201 100644
--- a/src/math/fabsf.rs
+++ b/src/math/fabsf.rs
@@ -9,7 +9,7 @@ pub fn fabsf(x: f32) -> f32 {
         args: x,
     }
 
-    f32::from_bits(x.to_bits() & 0x7fffffff)
+    super::generic::abs(x)
 }
 
 // PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520
diff --git a/src/math/generic/abs.rs b/src/math/generic/abs.rs
new file mode 100644
index 000000000..2c9a43c12
--- /dev/null
+++ b/src/math/generic/abs.rs
@@ -0,0 +1,6 @@
+use super::super::Float;
+
+/// Absolute value.
+pub fn abs<F: Float>(x: F) -> F {
+    x.abs()
+}
diff --git a/src/math/generic/copysign.rs b/src/math/generic/copysign.rs
new file mode 100644
index 000000000..d6b814891
--- /dev/null
+++ b/src/math/generic/copysign.rs
@@ -0,0 +1,10 @@
+use super::super::Float;
+
+/// Copy the sign of `y` to `x`.
+pub fn copysign<F: Float>(x: F, y: F) -> F {
+    let mut ux = x.to_bits();
+    let uy = y.to_bits();
+    ux &= !F::SIGN_MASK;
+    ux |= uy & (F::SIGN_MASK);
+    F::from_bits(ux)
+}
diff --git a/src/math/generic/mod.rs b/src/math/generic/mod.rs
new file mode 100644
index 000000000..1ddd08f0e
--- /dev/null
+++ b/src/math/generic/mod.rs
@@ -0,0 +1,5 @@
+mod abs;
+mod copysign;
+
+pub use abs::abs;
+pub use copysign::copysign;
diff --git a/src/math/mod.rs b/src/math/mod.rs
index 3852c774e..ba1995228 100644
--- a/src/math/mod.rs
+++ b/src/math/mod.rs
@@ -87,6 +87,7 @@ mod support;
 mod arch;
 mod expo2;
 mod fenv;
+mod generic;
 mod k_cos;
 mod k_cosf;
 mod k_expo2;

From 400e196e1931c2ff3bd89095ae7ebf822096eb2d Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Sat, 26 Oct 2024 00:44:50 -0500
Subject: [PATCH 051/279] Add `f16` and `f128` configuration from
 `compiler-builtins`

In preparation of adding routines from these two types, duplicate the
`compiler-builtins` configuration here.
---
 Cargo.toml                                    |   5 +-
 build.rs                                      |  30 +---
 configure.rs                                  | 168 ++++++++++++++++++
 .../compiler-builtins-smoke-test/Cargo.toml   |   2 +
 crates/libm-macros/Cargo.toml                 |   7 +
 crates/libm-test/Cargo.toml                   |  11 +-
 crates/libm-test/build.rs                     |  62 +------
 src/lib.rs                                    |   2 +
 src/math/support/float_traits.rs              |   4 +
 9 files changed, 208 insertions(+), 83 deletions(-)
 create mode 100644 configure.rs

diff --git a/Cargo.toml b/Cargo.toml
index 98a60bfe3..bfc11509e 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -21,7 +21,7 @@ arch = []
 
 # This tells the compiler to assume that a Nightly toolchain is being used and
 # that it should activate any useful Nightly things accordingly.
-unstable = ["unstable-intrinsics"]
+unstable = ["unstable-intrinsics", "unstable-float"]
 
 # Enable calls to functions in `core::intrinsics`
 unstable-intrinsics = []
@@ -29,6 +29,9 @@ unstable-intrinsics = []
 # Make some internal things public for testing.
 unstable-test-support = []
 
+# Enable the nightly-only `f16` and `f128`.
+unstable-float = []
+
 # Used to prevent using any intrinsics or arch-specific code.
 #
 # HACK: this is a negative feature which is generally a bad idea in Cargo, but
diff --git a/build.rs b/build.rs
index 001029236..9c9e0e723 100644
--- a/build.rs
+++ b/build.rs
@@ -1,6 +1,10 @@
 use std::env;
 
+mod configure;
+
 fn main() {
+    let cfg = configure::Config::from_env();
+
     println!("cargo:rerun-if-changed=build.rs");
     println!("cargo:rustc-check-cfg=cfg(assert_no_panic)");
 
@@ -14,29 +18,5 @@ fn main() {
         }
     }
 
-    configure_intrinsics();
-    configure_arch();
-}
-
-/// Simplify the feature logic for enabling intrinsics so code only needs to use
-/// `cfg(intrinsics_enabled)`.
-fn configure_intrinsics() {
-    println!("cargo:rustc-check-cfg=cfg(intrinsics_enabled)");
-
-    // Disabled by default; `unstable-intrinsics` enables again; `force-soft-floats` overrides
-    // to disable.
-    if cfg!(feature = "unstable-intrinsics") && !cfg!(feature = "force-soft-floats") {
-        println!("cargo:rustc-cfg=intrinsics_enabled");
-    }
-}
-
-/// Simplify the feature logic for enabling arch-specific features so code only needs to use
-/// `cfg(arch_enabled)`.
-fn configure_arch() {
-    println!("cargo:rustc-check-cfg=cfg(arch_enabled)");
-
-    // Enabled by default via the "arch" feature, `force-soft-floats` overrides to disable.
-    if cfg!(feature = "arch") && !cfg!(feature = "force-soft-floats") {
-        println!("cargo:rustc-cfg=arch_enabled");
-    }
+    configure::emit_libm_config(&cfg);
 }
diff --git a/configure.rs b/configure.rs
new file mode 100644
index 000000000..389e86c33
--- /dev/null
+++ b/configure.rs
@@ -0,0 +1,168 @@
+// Configuration shared with both libm and libm-test
+
+use std::env;
+use std::path::PathBuf;
+
+#[allow(dead_code)]
+pub struct Config {
+    pub manifest_dir: PathBuf,
+    pub out_dir: PathBuf,
+    pub opt_level: u8,
+    pub target_arch: String,
+    pub target_env: String,
+    pub target_family: Option<String>,
+    pub target_os: String,
+    pub target_string: String,
+    pub target_vendor: String,
+    pub target_features: Vec<String>,
+}
+
+impl Config {
+    pub fn from_env() -> Self {
+        let target_features = env::var("CARGO_CFG_TARGET_FEATURE")
+            .map(|feats| feats.split(',').map(ToOwned::to_owned).collect())
+            .unwrap_or_default();
+
+        Self {
+            manifest_dir: PathBuf::from(env::var("CARGO_MANIFEST_DIR").unwrap()),
+            out_dir: PathBuf::from(env::var("OUT_DIR").unwrap()),
+            opt_level: env::var("OPT_LEVEL").unwrap().parse().unwrap(),
+            target_arch: env::var("CARGO_CFG_TARGET_ARCH").unwrap(),
+            target_env: env::var("CARGO_CFG_TARGET_ENV").unwrap(),
+            target_family: env::var("CARGO_CFG_TARGET_FAMILY").ok(),
+            target_os: env::var("CARGO_CFG_TARGET_OS").unwrap(),
+            target_string: env::var("TARGET").unwrap(),
+            target_vendor: env::var("CARGO_CFG_TARGET_VENDOR").unwrap(),
+            target_features,
+        }
+    }
+}
+
+/// Libm gets most config options made available.
+#[allow(dead_code)]
+pub fn emit_libm_config(cfg: &Config) {
+    emit_intrinsics_cfg();
+    emit_arch_cfg();
+    emit_optimization_cfg(cfg);
+    emit_cfg_shorthands(cfg);
+    emit_f16_f128_cfg(cfg);
+}
+
+/// Tests don't need most feature-related config.
+#[allow(dead_code)]
+pub fn emit_test_config(cfg: &Config) {
+    emit_optimization_cfg(cfg);
+    emit_cfg_shorthands(cfg);
+    emit_f16_f128_cfg(cfg);
+}
+
+/// Simplify the feature logic for enabling intrinsics so code only needs to use
+/// `cfg(intrinsics_enabled)`.
+fn emit_intrinsics_cfg() {
+    println!("cargo:rustc-check-cfg=cfg(intrinsics_enabled)");
+
+    // Disabled by default; `unstable-intrinsics` enables again; `force-soft-floats` overrides
+    // to disable.
+    if cfg!(feature = "unstable-intrinsics") && !cfg!(feature = "force-soft-floats") {
+        println!("cargo:rustc-cfg=intrinsics_enabled");
+    }
+}
+
+/// Simplify the feature logic for enabling arch-specific features so code only needs to use
+/// `cfg(arch_enabled)`.
+fn emit_arch_cfg() {
+    println!("cargo:rustc-check-cfg=cfg(arch_enabled)");
+
+    // Enabled by default via the "arch" feature, `force-soft-floats` overrides to disable.
+    if cfg!(feature = "arch") && !cfg!(feature = "force-soft-floats") {
+        println!("cargo:rustc-cfg=arch_enabled");
+    }
+}
+
+/// Some tests are extremely slow. Emit a config option based on optimization level.
+fn emit_optimization_cfg(cfg: &Config) {
+    println!("cargo:rustc-check-cfg=cfg(optimizations_enabled)");
+
+    if cfg.opt_level >= 2 {
+        println!("cargo:rustc-cfg=optimizations_enabled");
+    }
+}
+
+/// Provide an alias for common longer config combinations.
+fn emit_cfg_shorthands(cfg: &Config) {
+    println!("cargo:rustc-check-cfg=cfg(x86_no_sse)");
+    if cfg.target_arch == "x86" && !cfg.target_features.iter().any(|f| f == "sse") {
+        // Shorthand to detect i586 targets
+        println!("cargo:rustc-cfg=x86_no_sse");
+    }
+}
+
+/// Configure whether or not `f16` and `f128` support should be enabled.
+fn emit_f16_f128_cfg(cfg: &Config) {
+    println!("cargo:rustc-check-cfg=cfg(f16_enabled)");
+    println!("cargo:rustc-check-cfg=cfg(f128_enabled)");
+
+    // `unstable-float` enables these features.
+    if !cfg!(feature = "unstable-float") {
+        return;
+    }
+
+    // Set whether or not `f16` and `f128` are supported at a basic level by LLVM. This only means
+    // that the backend will not crash when using these types and generates code that can be called
+    // without crashing (no infinite recursion). This does not mean that the platform doesn't have
+    // ABI or other bugs.
+    //
+    // We do this here rather than in `rust-lang/rust` because configuring via cargo features is
+    // not straightforward.
+    //
+    // Original source of this list:
+    // <https://github.com/rust-lang/compiler-builtins/pull/652#issuecomment-2266151350>
+    let f16_enabled = match cfg.target_arch.as_str() {
+        // Unsupported <https://github.com/llvm/llvm-project/issues/94434>
+        "arm64ec" => false,
+        // Selection failure <https://github.com/llvm/llvm-project/issues/50374>
+        "s390x" => false,
+        // Infinite recursion <https://github.com/llvm/llvm-project/issues/97981>
+        // FIXME(llvm): loongarch fixed by <https://github.com/llvm/llvm-project/pull/107791>
+        "csky" => false,
+        "hexagon" => false,
+        "loongarch64" => false,
+        "mips" | "mips64" | "mips32r6" | "mips64r6" => false,
+        "powerpc" | "powerpc64" => false,
+        "sparc" | "sparc64" => false,
+        "wasm32" | "wasm64" => false,
+        // Most everything else works as of LLVM 19
+        _ => true,
+    };
+
+    let f128_enabled = match cfg.target_arch.as_str() {
+        // Unsupported (libcall is not supported) <https://github.com/llvm/llvm-project/issues/121122>
+        "amdgpu" => false,
+        // Unsupported <https://github.com/llvm/llvm-project/issues/94434>
+        "arm64ec" => false,
+        // Selection failure <https://github.com/llvm/llvm-project/issues/96432>
+        "mips64" | "mips64r6" => false,
+        // Selection failure <https://github.com/llvm/llvm-project/issues/95471>
+        "nvptx64" => false,
+        // Selection failure <https://github.com/llvm/llvm-project/issues/101545>
+        "powerpc64" if &cfg.target_os == "aix" => false,
+        // Selection failure <https://github.com/llvm/llvm-project/issues/41838>
+        "sparc" => false,
+        // Most everything else works as of LLVM 19
+        _ => true,
+    };
+
+    // If the feature is set, disable these types.
+    let disable_both = env::var_os("CARGO_FEATURE_NO_F16_F128").is_some();
+
+    println!("cargo:rustc-check-cfg=cfg(f16_enabled)");
+    println!("cargo:rustc-check-cfg=cfg(f128_enabled)");
+
+    if f16_enabled && !disable_both {
+        println!("cargo:rustc-cfg=f16_enabled");
+    }
+
+    if f128_enabled && !disable_both {
+        println!("cargo:rustc-cfg=f128_enabled");
+    }
+}
diff --git a/crates/compiler-builtins-smoke-test/Cargo.toml b/crates/compiler-builtins-smoke-test/Cargo.toml
index e75c4f42b..82cfeecb9 100644
--- a/crates/compiler-builtins-smoke-test/Cargo.toml
+++ b/crates/compiler-builtins-smoke-test/Cargo.toml
@@ -21,5 +21,7 @@ force-soft-floats = []
 unexpected_cfgs = { level = "warn", check-cfg = [
   "cfg(arch_enabled)",
   "cfg(assert_no_panic)",
+  "cfg(f128_enabled)",
+  "cfg(f16_enabled)",
   "cfg(intrinsics_enabled)",
 ] }
diff --git a/crates/libm-macros/Cargo.toml b/crates/libm-macros/Cargo.toml
index c9defb1c5..9194232b2 100644
--- a/crates/libm-macros/Cargo.toml
+++ b/crates/libm-macros/Cargo.toml
@@ -12,3 +12,10 @@ heck = "0.5.0"
 proc-macro2 = "1.0.88"
 quote = "1.0.37"
 syn = { version = "2.0.79", features = ["full", "extra-traits", "visit-mut"] }
+
+[lints.rust]
+# Values used during testing
+unexpected_cfgs = { level = "warn", check-cfg = [
+  'cfg(f16_enabled)',
+  'cfg(f128_enabled)',
+] }
diff --git a/crates/libm-test/Cargo.toml b/crates/libm-test/Cargo.toml
index 4d75b25f8..f2dd88fa1 100644
--- a/crates/libm-test/Cargo.toml
+++ b/crates/libm-test/Cargo.toml
@@ -5,7 +5,10 @@ edition = "2021"
 publish = false
 
 [features]
-default = []
+default = ["unstable-float"]
+
+# Propagated from libm because this affects which functions we test.
+unstable-float = ["libm/unstable-float"]
 
 # Generate tests which are random inputs and the outputs are calculated with
 # musl libc.
@@ -44,3 +47,9 @@ criterion = { version = "0.5.1", default-features = false, features = ["cargo_be
 [[bench]]
 name = "random"
 harness = false
+
+[lints.rust]
+# Values from the chared config.rs used by `libm` but not the test crate
+unexpected_cfgs = { level = "warn", check-cfg = [
+  'cfg(feature, values("arch", "force-soft-floats", "unstable-intrinsics"))',
+] }
diff --git a/crates/libm-test/build.rs b/crates/libm-test/build.rs
index dc3126dbb..f2cd298ba 100644
--- a/crates/libm-test/build.rs
+++ b/crates/libm-test/build.rs
@@ -1,66 +1,16 @@
 use std::fmt::Write;
-use std::path::PathBuf;
-use std::{env, fs};
+use std::fs;
+
+#[path = "../../configure.rs"]
+mod configure;
+use configure::Config;
 
 fn main() {
     let cfg = Config::from_env();
 
-    emit_optimization_cfg(&cfg);
-    emit_cfg_shorthands(&cfg);
     list_all_tests(&cfg);
-}
-
-#[allow(dead_code)]
-struct Config {
-    manifest_dir: PathBuf,
-    out_dir: PathBuf,
-    opt_level: u8,
-    target_arch: String,
-    target_env: String,
-    target_family: Option<String>,
-    target_os: String,
-    target_string: String,
-    target_vendor: String,
-    target_features: Vec<String>,
-}
-
-impl Config {
-    fn from_env() -> Self {
-        let target_features = env::var("CARGO_CFG_TARGET_FEATURE")
-            .map(|feats| feats.split(',').map(ToOwned::to_owned).collect())
-            .unwrap_or_default();
-
-        Self {
-            manifest_dir: PathBuf::from(env::var("CARGO_MANIFEST_DIR").unwrap()),
-            out_dir: PathBuf::from(env::var("OUT_DIR").unwrap()),
-            opt_level: env::var("OPT_LEVEL").unwrap().parse().unwrap(),
-            target_arch: env::var("CARGO_CFG_TARGET_ARCH").unwrap(),
-            target_env: env::var("CARGO_CFG_TARGET_ENV").unwrap(),
-            target_family: env::var("CARGO_CFG_TARGET_FAMILY").ok(),
-            target_os: env::var("CARGO_CFG_TARGET_OS").unwrap(),
-            target_string: env::var("TARGET").unwrap(),
-            target_vendor: env::var("CARGO_CFG_TARGET_VENDOR").unwrap(),
-            target_features,
-        }
-    }
-}
 
-/// Some tests are extremely slow. Emit a config option based on optimization level.
-fn emit_optimization_cfg(cfg: &Config) {
-    println!("cargo::rustc-check-cfg=cfg(optimizations_enabled)");
-
-    if cfg.opt_level >= 2 {
-        println!("cargo::rustc-cfg=optimizations_enabled");
-    }
-}
-
-/// Provide an alias for common longer config combinations.
-fn emit_cfg_shorthands(cfg: &Config) {
-    println!("cargo::rustc-check-cfg=cfg(x86_no_sse)");
-    if cfg.target_arch == "x86" && !cfg.target_features.iter().any(|f| f == "sse") {
-        // Shorthand to detect i586 targets
-        println!("cargo::rustc-cfg=x86_no_sse");
-    }
+    configure::emit_test_config(&cfg);
 }
 
 /// Create a list of all source files in an array. This can be used for making sure that
diff --git a/src/lib.rs b/src/lib.rs
index 6bb06b5b8..327e3d6e6 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -2,6 +2,8 @@
 #![no_std]
 #![cfg_attr(intrinsics_enabled, allow(internal_features))]
 #![cfg_attr(intrinsics_enabled, feature(core_intrinsics))]
+#![cfg_attr(f128_enabled, feature(f128))]
+#![cfg_attr(f16_enabled, feature(f16))]
 #![allow(clippy::assign_op_pattern)]
 #![allow(clippy::deprecated_cfg_attr)]
 #![allow(clippy::eq_op)]
diff --git a/src/math/support/float_traits.rs b/src/math/support/float_traits.rs
index 5808aeebc..7b3f6904b 100644
--- a/src/math/support/float_traits.rs
+++ b/src/math/support/float_traits.rs
@@ -219,5 +219,9 @@ macro_rules! float_impl {
     };
 }
 
+#[cfg(f16_enabled)]
+float_impl!(f16, u16, i16, i8, 16, 10);
 float_impl!(f32, u32, i32, i16, 32, 23);
 float_impl!(f64, u64, i64, i16, 64, 52);
+#[cfg(f128_enabled)]
+float_impl!(f128, u128, i128, i16, 128, 112);

From e80dad0eb512ad5c350d758ec108769bac3fdb55 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Sun, 29 Dec 2024 07:37:01 +0000
Subject: [PATCH 052/279] Always enable `unstable-float` in CI

Since these add new API but do not affect runtime, we can enable it for
all tests that run with nightly.
---
 ci/run.sh | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/ci/run.sh b/ci/run.sh
index d89c8bdf0..7e514a1cd 100755
--- a/ci/run.sh
+++ b/ci/run.sh
@@ -62,22 +62,26 @@ esac
 cargo check --no-default-features
 cargo check --features "force-soft-floats"
 
+# Always enable `unstable-float` since it expands available API but does not
+# change any implementations.
+extra_flags="$extra_flags --features unstable-float"
+
 if [ "${BUILD_ONLY:-}" = "1" ]; then
     cmd="cargo build --target $target --package libm"
     $cmd
-    $cmd --features "unstable-intrinsics"
+    $cmd --features unstable-intrinsics
 
     echo "can't run tests on $target; skipping"
 else
     cmd="cargo test --all --target $target $extra_flags"
 
-    # stable by default
+    # Test without intrinsics
     $cmd
     $cmd --release
 
-    # unstable with a feature
-    $cmd --features "unstable-intrinsics"
-    $cmd --release --features "unstable-intrinsics"
+    # Test with intrinsic use
+    $cmd --features unstable-intrinsics
+    $cmd --release --features unstable-intrinsics
 
     # Make sure benchmarks have correct results
     $cmd --benches

From b1b30caa2111131b06b9d773a7ba2fb79edf4a7f Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Thu, 19 Dec 2024 11:47:04 +0000
Subject: [PATCH 053/279] Update and slightly refactor some of the `Float`
 trait

Add a constant for negative pi and provide a standalone const
`from_bits`, which can be combined with what we already had in
`hex_float`. Also provide another default method to reduce what needs to
be provided by the macro.
---
 src/math/support/float_traits.rs | 47 ++++++++++++++++++++++----------
 src/math/support/hex_float.rs    | 12 ++------
 src/math/support/mod.rs          |  1 +
 3 files changed, 36 insertions(+), 24 deletions(-)

diff --git a/src/math/support/float_traits.rs b/src/math/support/float_traits.rs
index 7b3f6904b..68ba60030 100644
--- a/src/math/support/float_traits.rs
+++ b/src/math/support/float_traits.rs
@@ -38,6 +38,7 @@ pub trait Float:
     const MAX: Self;
     const MIN: Self;
     const PI: Self;
+    const NEG_PI: Self;
     const FRAC_PI_2: Self;
 
     /// The bitwidth of the float type
@@ -71,7 +72,9 @@ pub trait Float:
     fn to_bits(self) -> Self::Int;
 
     /// Returns `self` transmuted to `Self::SignedInt`
-    fn to_bits_signed(self) -> Self::SignedInt;
+    fn to_bits_signed(self) -> Self::SignedInt {
+        self.to_bits().signed()
+    }
 
     /// Checks if two floats have the same bit representation. *Except* for NaNs! NaN can be
     /// represented in multiple different ways. This method returns `true` if two NaNs are
@@ -158,7 +161,15 @@ pub trait Float:
 pub type IntTy<F> = <F as Float>::Int;
 
 macro_rules! float_impl {
-    ($ty:ident, $ity:ident, $sity:ident, $expty:ident, $bits:expr, $significand_bits:expr) => {
+    (
+        $ty:ident,
+        $ity:ident,
+        $sity:ident,
+        $expty:ident,
+        $bits:expr,
+        $significand_bits:expr,
+        $from_bits:path
+    ) => {
         impl Float for $ty {
             type Int = $ity;
             type SignedInt = $sity;
@@ -173,13 +184,10 @@ macro_rules! float_impl {
             const NAN: Self = Self::NAN;
             const MAX: Self = -Self::MIN;
             // Sign bit set, saturated mantissa, saturated exponent with last bit zeroed
-            // FIXME(msrv): just use `from_bits` when available
-            // SAFETY: POD cast with no preconditions
-            const MIN: Self = unsafe {
-                mem::transmute::<Self::Int, Self>(Self::Int::MAX & !(1 << Self::SIG_BITS))
-            };
+            const MIN: Self = $from_bits(Self::Int::MAX & !(1 << Self::SIG_BITS));
 
             const PI: Self = core::$ty::consts::PI;
+            const NEG_PI: Self = -Self::PI;
             const FRAC_PI_2: Self = core::$ty::consts::FRAC_PI_2;
 
             const BITS: u32 = $bits;
@@ -193,9 +201,6 @@ macro_rules! float_impl {
             fn to_bits(self) -> Self::Int {
                 self.to_bits()
             }
-            fn to_bits_signed(self) -> Self::SignedInt {
-                self.to_bits() as Self::SignedInt
-            }
             fn is_nan(self) -> bool {
                 self.is_nan()
             }
@@ -220,8 +225,22 @@ macro_rules! float_impl {
 }
 
 #[cfg(f16_enabled)]
-float_impl!(f16, u16, i16, i8, 16, 10);
-float_impl!(f32, u32, i32, i16, 32, 23);
-float_impl!(f64, u64, i64, i16, 64, 52);
+float_impl!(f16, u16, i16, i8, 16, 10, f16::from_bits);
+float_impl!(f32, u32, i32, i16, 32, 23, f32_from_bits);
+float_impl!(f64, u64, i64, i16, 64, 52, f64_from_bits);
 #[cfg(f128_enabled)]
-float_impl!(f128, u128, i128, i16, 128, 112);
+float_impl!(f128, u128, i128, i16, 128, 112, f128::from_bits);
+
+/* FIXME(msrv): vendor some things that are not const stable at our MSRV */
+
+/// `f32::from_bits`
+pub const fn f32_from_bits(bits: u32) -> f32 {
+    // SAFETY: POD cast with no preconditions
+    unsafe { mem::transmute::<u32, f32>(bits) }
+}
+
+/// `f64::from_bits`
+pub const fn f64_from_bits(bits: u64) -> f64 {
+    // SAFETY: POD cast with no preconditions
+    unsafe { mem::transmute::<u64, f64>(bits) }
+}
diff --git a/src/math/support/hex_float.rs b/src/math/support/hex_float.rs
index 80434a5ec..1666c6153 100644
--- a/src/math/support/hex_float.rs
+++ b/src/math/support/hex_float.rs
@@ -2,6 +2,8 @@
 
 #![allow(dead_code)] // FIXME: remove once this gets used
 
+use super::{f32_from_bits, f64_from_bits};
+
 /// Construct a 32-bit float from hex float representation (C-style)
 pub const fn hf32(s: &str) -> f32 {
     f32_from_bits(parse_any(s, 32, 23) as u32)
@@ -159,16 +161,6 @@ const fn hex_digit(c: u8) -> u8 {
 
 /* FIXME(msrv): vendor some things that are not const stable at our MSRV */
 
-/// `f32::from_bits`
-const fn f32_from_bits(v: u32) -> f32 {
-    unsafe { core::mem::transmute(v) }
-}
-
-/// `f64::from_bits`
-const fn f64_from_bits(v: u64) -> f64 {
-    unsafe { core::mem::transmute(v) }
-}
-
 /// `u128::ilog2`
 const fn u128_ilog2(v: u128) -> u32 {
     assert!(v != 0);
diff --git a/src/math/support/mod.rs b/src/math/support/mod.rs
index 25681c307..e2f4e0e98 100644
--- a/src/math/support/mod.rs
+++ b/src/math/support/mod.rs
@@ -6,6 +6,7 @@ mod int_traits;
 
 #[allow(unused_imports)]
 pub use float_traits::{Float, IntTy};
+pub(crate) use float_traits::{f32_from_bits, f64_from_bits};
 #[allow(unused_imports)]
 pub use hex_float::{hf32, hf64};
 pub use int_traits::{CastFrom, CastInto, DInt, HInt, Int, MinInt};

From f0817728e58d4b0d6e8e03d06cab82ce6ec2b289 Mon Sep 17 00:00:00 2001
From: beetrees <b@beetr.ee>
Date: Thu, 19 Dec 2024 11:59:09 +0000
Subject: [PATCH 054/279] Remove an `is_nan` workaround that is no longer
 needed

---
 src/math/support/float_traits.rs | 12 +-----------
 1 file changed, 1 insertion(+), 11 deletions(-)

diff --git a/src/math/support/float_traits.rs b/src/math/support/float_traits.rs
index 68ba60030..e64640a0d 100644
--- a/src/math/support/float_traits.rs
+++ b/src/math/support/float_traits.rs
@@ -80,17 +80,7 @@ pub trait Float:
     /// represented in multiple different ways. This method returns `true` if two NaNs are
     /// compared.
     fn eq_repr(self, rhs: Self) -> bool {
-        let is_nan = |x: Self| -> bool {
-            // }
-            // fn is_nan(x: Self) -> bool {
-            // When using mangled-names, the "real" compiler-builtins might not have the
-            // necessary builtin (__unordtf2) to test whether `f128` is NaN.
-            // FIXME(f16_f128): Remove once the nightly toolchain has the __unordtf2 builtin
-            // x is NaN if all the bits of the exponent are set and the significand is non-0
-            x.to_bits() & Self::EXP_MASK == Self::EXP_MASK
-                && x.to_bits() & Self::SIG_MASK != Self::Int::ZERO
-        };
-        if is_nan(self) && is_nan(rhs) { true } else { self.to_bits() == rhs.to_bits() }
+        if self.is_nan() && rhs.is_nan() { true } else { self.to_bits() == rhs.to_bits() }
     }
 
     /// Returns true if the value is NaN.

From 69cf64f8f5f5275802ca2d0c2ac3d7ec27ac3f6f Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Thu, 19 Dec 2024 11:18:33 +0000
Subject: [PATCH 055/279] Add an 8-bit float type for testing purposes

Introduce `f8`, which is an 8-bit float compliant with IEEE-754. This
type is useful for testing since it is easily possible to enumerate all
values.
---
 crates/libm-test/src/f8_impl.rs | 487 ++++++++++++++++++++++++++++++++
 crates/libm-test/src/lib.rs     |   4 +
 2 files changed, 491 insertions(+)
 create mode 100644 crates/libm-test/src/f8_impl.rs

diff --git a/crates/libm-test/src/f8_impl.rs b/crates/libm-test/src/f8_impl.rs
new file mode 100644
index 000000000..babcc6357
--- /dev/null
+++ b/crates/libm-test/src/f8_impl.rs
@@ -0,0 +1,487 @@
+//! An IEEE-compliant 8-bit float type for testing purposes.
+
+use std::cmp::{self, Ordering};
+use std::{fmt, ops};
+
+use crate::Float;
+
+/// Sometimes verifying float logic is easiest when all values can quickly be checked exhaustively
+/// or by hand.
+///
+/// IEEE-754 compliant type that includes a 1 bit sign, 4 bit exponent, and 3 bit significand.
+/// Bias is -7.
+///
+/// Based on <https://en.wikipedia.org/wiki/Minifloat#Example_8-bit_float_(1.4.3)>.
+#[derive(Clone, Copy)]
+#[repr(transparent)]
+#[allow(non_camel_case_types)]
+pub struct f8(u8);
+
+impl Float for f8 {
+    type Int = u8;
+    type SignedInt = i8;
+    type ExpInt = i8;
+
+    const ZERO: Self = Self(0b0_0000_000);
+    const NEG_ZERO: Self = Self(0b1_0000_000);
+    const ONE: Self = Self(0b0_0111_000);
+    const NEG_ONE: Self = Self(0b1_0111_000);
+    const MAX: Self = Self(0b0_1110_111);
+    const MIN: Self = Self(0b1_1110_111);
+    const INFINITY: Self = Self(0b0_1111_000);
+    const NEG_INFINITY: Self = Self(0b1_1111_000);
+    const NAN: Self = Self(0b0_1111_100);
+    const PI: Self = Self::ZERO;
+    const NEG_PI: Self = Self::ZERO;
+    const FRAC_PI_2: Self = Self::ZERO;
+
+    const BITS: u32 = 8;
+    const SIG_BITS: u32 = 3;
+    const SIGN_MASK: Self::Int = 0b1_0000_000;
+    const SIG_MASK: Self::Int = 0b0_0000_111;
+    const EXP_MASK: Self::Int = 0b0_1111_000;
+    const IMPLICIT_BIT: Self::Int = 0b0_0001_000;
+
+    fn to_bits(self) -> Self::Int {
+        self.0
+    }
+
+    fn to_bits_signed(self) -> Self::SignedInt {
+        self.0 as i8
+    }
+
+    fn is_nan(self) -> bool {
+        self.0 & Self::EXP_MASK == Self::EXP_MASK && self.0 & Self::SIG_MASK != 0
+    }
+
+    fn is_infinite(self) -> bool {
+        self.0 & Self::EXP_MASK == Self::EXP_MASK && self.0 & Self::SIG_MASK == 0
+    }
+
+    fn is_sign_negative(self) -> bool {
+        self.0 & Self::SIGN_MASK != 0
+    }
+
+    fn exp(self) -> Self::ExpInt {
+        unimplemented!()
+    }
+
+    fn from_bits(a: Self::Int) -> Self {
+        Self(a)
+    }
+
+    fn normalize(_significand: Self::Int) -> (i32, Self::Int) {
+        unimplemented!()
+    }
+}
+
+impl f8 {
+    pub const ALL_LEN: usize = 240;
+
+    /// All non-infinite non-NaN values of `f8`
+    pub const ALL: [Self; Self::ALL_LEN] = [
+        // -m*2^7
+        Self(0b1_1110_111), // -240
+        Self(0b1_1110_110),
+        Self(0b1_1110_101),
+        Self(0b1_1110_100),
+        Self(0b1_1110_011),
+        Self(0b1_1110_010),
+        Self(0b1_1110_001),
+        Self(0b1_1110_000), // -128
+        // -m*2^6
+        Self(0b1_1101_111), // -120
+        Self(0b1_1101_110),
+        Self(0b1_1101_101),
+        Self(0b1_1101_100),
+        Self(0b1_1101_011),
+        Self(0b1_1101_010),
+        Self(0b1_1101_001),
+        Self(0b1_1101_000), // -64
+        // -m*2^5
+        Self(0b1_1100_111), // -60
+        Self(0b1_1100_110),
+        Self(0b1_1100_101),
+        Self(0b1_1100_100),
+        Self(0b1_1100_011),
+        Self(0b1_1100_010),
+        Self(0b1_1100_001),
+        Self(0b1_1100_000), // -32
+        // -m*2^4
+        Self(0b1_1011_111), // -30
+        Self(0b1_1011_110),
+        Self(0b1_1011_101),
+        Self(0b1_1011_100),
+        Self(0b1_1011_011),
+        Self(0b1_1011_010),
+        Self(0b1_1011_001),
+        Self(0b1_1011_000), // -16
+        // -m*2^3
+        Self(0b1_1010_111), // -15
+        Self(0b1_1010_110),
+        Self(0b1_1010_101),
+        Self(0b1_1010_100),
+        Self(0b1_1010_011),
+        Self(0b1_1010_010),
+        Self(0b1_1010_001),
+        Self(0b1_1010_000), // -8
+        // -m*2^2
+        Self(0b1_1001_111), // -7.5
+        Self(0b1_1001_110),
+        Self(0b1_1001_101),
+        Self(0b1_1001_100),
+        Self(0b1_1001_011),
+        Self(0b1_1001_010),
+        Self(0b1_1001_001),
+        Self(0b1_1001_000), // -4
+        // -m*2^1
+        Self(0b1_1000_111), // -3.75
+        Self(0b1_1000_110),
+        Self(0b1_1000_101),
+        Self(0b1_1000_100),
+        Self(0b1_1000_011),
+        Self(0b1_1000_010),
+        Self(0b1_1000_001),
+        Self(0b1_1000_000), // -2
+        // -m*2^0
+        Self(0b1_0111_111), // -1.875
+        Self(0b1_0111_110),
+        Self(0b1_0111_101),
+        Self(0b1_0111_100),
+        Self(0b1_0111_011),
+        Self(0b1_0111_010),
+        Self(0b1_0111_001),
+        Self(0b1_0111_000), // -1
+        // -m*2^-1
+        Self(0b1_0110_111), // −0.9375
+        Self(0b1_0110_110),
+        Self(0b1_0110_101),
+        Self(0b1_0110_100),
+        Self(0b1_0110_011),
+        Self(0b1_0110_010),
+        Self(0b1_0110_001),
+        Self(0b1_0110_000), // -0.5
+        // -m*2^-2
+        Self(0b1_0101_111), // −0.46875
+        Self(0b1_0101_110),
+        Self(0b1_0101_101),
+        Self(0b1_0101_100),
+        Self(0b1_0101_011),
+        Self(0b1_0101_010),
+        Self(0b1_0101_001),
+        Self(0b1_0101_000), // -0.25
+        // -m*2^-3
+        Self(0b1_0100_111), // −0.234375
+        Self(0b1_0100_110),
+        Self(0b1_0100_101),
+        Self(0b1_0100_100),
+        Self(0b1_0100_011),
+        Self(0b1_0100_010),
+        Self(0b1_0100_001),
+        Self(0b1_0100_000), // -0.125
+        // -m*2^-4
+        Self(0b1_0011_111), // −0.1171875
+        Self(0b1_0011_110),
+        Self(0b1_0011_101),
+        Self(0b1_0011_100),
+        Self(0b1_0011_011),
+        Self(0b1_0011_010),
+        Self(0b1_0011_001),
+        Self(0b1_0011_000), // −0.0625
+        // -m*2^-5
+        Self(0b1_0010_111), // −0.05859375
+        Self(0b1_0010_110),
+        Self(0b1_0010_101),
+        Self(0b1_0010_100),
+        Self(0b1_0010_011),
+        Self(0b1_0010_010),
+        Self(0b1_0010_001),
+        Self(0b1_0010_000), // −0.03125
+        // -m*2^-6
+        Self(0b1_0001_111), // −0.029296875
+        Self(0b1_0001_110),
+        Self(0b1_0001_101),
+        Self(0b1_0001_100),
+        Self(0b1_0001_011),
+        Self(0b1_0001_010),
+        Self(0b1_0001_001),
+        Self(0b1_0001_000), // −0.015625
+        // -m*2^-7 subnormal numbers
+        Self(0b1_0000_111), // −0.013671875
+        Self(0b1_0000_110),
+        Self(0b1_0000_101),
+        Self(0b1_0000_100),
+        Self(0b1_0000_011),
+        Self(0b1_0000_010),
+        Self(0b1_0000_001), // −0.001953125
+        // Zeroes
+        Self(0b1_0000_000), // -0.0
+        Self(0b0_0000_000), // 0.0
+        // m*2^-7 // subnormal numbers
+        Self(0b0_0000_001),
+        Self(0b0_0000_010),
+        Self(0b0_0000_011),
+        Self(0b0_0000_100),
+        Self(0b0_0000_101),
+        Self(0b0_0000_110),
+        Self(0b0_0000_111), // 0.013671875
+        // m*2^-6
+        Self(0b0_0001_000), // 0.015625
+        Self(0b0_0001_001),
+        Self(0b0_0001_010),
+        Self(0b0_0001_011),
+        Self(0b0_0001_100),
+        Self(0b0_0001_101),
+        Self(0b0_0001_110),
+        Self(0b0_0001_111), // 0.029296875
+        // m*2^-5
+        Self(0b0_0010_000), // 0.03125
+        Self(0b0_0010_001),
+        Self(0b0_0010_010),
+        Self(0b0_0010_011),
+        Self(0b0_0010_100),
+        Self(0b0_0010_101),
+        Self(0b0_0010_110),
+        Self(0b0_0010_111), // 0.05859375
+        // m*2^-4
+        Self(0b0_0011_000), // 0.0625
+        Self(0b0_0011_001),
+        Self(0b0_0011_010),
+        Self(0b0_0011_011),
+        Self(0b0_0011_100),
+        Self(0b0_0011_101),
+        Self(0b0_0011_110),
+        Self(0b0_0011_111), // 0.1171875
+        // m*2^-3
+        Self(0b0_0100_000), // 0.125
+        Self(0b0_0100_001),
+        Self(0b0_0100_010),
+        Self(0b0_0100_011),
+        Self(0b0_0100_100),
+        Self(0b0_0100_101),
+        Self(0b0_0100_110),
+        Self(0b0_0100_111), // 0.234375
+        // m*2^-2
+        Self(0b0_0101_000), // 0.25
+        Self(0b0_0101_001),
+        Self(0b0_0101_010),
+        Self(0b0_0101_011),
+        Self(0b0_0101_100),
+        Self(0b0_0101_101),
+        Self(0b0_0101_110),
+        Self(0b0_0101_111), // 0.46875
+        // m*2^-1
+        Self(0b0_0110_000), // 0.5
+        Self(0b0_0110_001),
+        Self(0b0_0110_010),
+        Self(0b0_0110_011),
+        Self(0b0_0110_100),
+        Self(0b0_0110_101),
+        Self(0b0_0110_110),
+        Self(0b0_0110_111), // 0.9375
+        // m*2^0
+        Self(0b0_0111_000), // 1
+        Self(0b0_0111_001),
+        Self(0b0_0111_010),
+        Self(0b0_0111_011),
+        Self(0b0_0111_100),
+        Self(0b0_0111_101),
+        Self(0b0_0111_110),
+        Self(0b0_0111_111), // 1.875
+        // m*2^1
+        Self(0b0_1000_000), // 2
+        Self(0b0_1000_001),
+        Self(0b0_1000_010),
+        Self(0b0_1000_011),
+        Self(0b0_1000_100),
+        Self(0b0_1000_101),
+        Self(0b0_1000_110),
+        Self(0b0_1000_111), // 3.75
+        // m*2^2
+        Self(0b0_1001_000), // 4
+        Self(0b0_1001_001),
+        Self(0b0_1001_010),
+        Self(0b0_1001_011),
+        Self(0b0_1001_100),
+        Self(0b0_1001_101),
+        Self(0b0_1001_110),
+        Self(0b0_1001_111), // 7.5
+        // m*2^3
+        Self(0b0_1010_000), // 8
+        Self(0b0_1010_001),
+        Self(0b0_1010_010),
+        Self(0b0_1010_011),
+        Self(0b0_1010_100),
+        Self(0b0_1010_101),
+        Self(0b0_1010_110),
+        Self(0b0_1010_111), // 15
+        // m*2^4
+        Self(0b0_1011_000), // 16
+        Self(0b0_1011_001),
+        Self(0b0_1011_010),
+        Self(0b0_1011_011),
+        Self(0b0_1011_100),
+        Self(0b0_1011_101),
+        Self(0b0_1011_110),
+        Self(0b0_1011_111), // 30
+        // m*2^5
+        Self(0b0_1100_000), // 32
+        Self(0b0_1100_001),
+        Self(0b0_1100_010),
+        Self(0b0_1100_011),
+        Self(0b0_1100_100),
+        Self(0b0_1100_101),
+        Self(0b0_1100_110),
+        Self(0b0_1100_111), // 60
+        // m*2^6
+        Self(0b0_1101_000), // 64
+        Self(0b0_1101_001),
+        Self(0b0_1101_010),
+        Self(0b0_1101_011),
+        Self(0b0_1101_100),
+        Self(0b0_1101_101),
+        Self(0b0_1101_110),
+        Self(0b0_1101_111), // 120
+        // m*2^7
+        Self(0b0_1110_000), // 128
+        Self(0b0_1110_001),
+        Self(0b0_1110_010),
+        Self(0b0_1110_011),
+        Self(0b0_1110_100),
+        Self(0b0_1110_101),
+        Self(0b0_1110_110),
+        Self(0b0_1110_111), // 240
+    ];
+}
+
+impl ops::Add for f8 {
+    type Output = Self;
+    fn add(self, _rhs: Self) -> Self::Output {
+        unimplemented!()
+    }
+}
+
+impl ops::Sub for f8 {
+    type Output = Self;
+    fn sub(self, _rhs: Self) -> Self::Output {
+        unimplemented!()
+    }
+}
+impl ops::Mul for f8 {
+    type Output = Self;
+    fn mul(self, _rhs: Self) -> Self::Output {
+        unimplemented!()
+    }
+}
+impl ops::Div for f8 {
+    type Output = Self;
+    fn div(self, _rhs: Self) -> Self::Output {
+        unimplemented!()
+    }
+}
+
+impl ops::Neg for f8 {
+    type Output = Self;
+    fn neg(self) -> Self::Output {
+        Self(self.0 ^ Self::SIGN_MASK)
+    }
+}
+
+impl ops::Rem for f8 {
+    type Output = Self;
+    fn rem(self, _rhs: Self) -> Self::Output {
+        unimplemented!()
+    }
+}
+
+impl ops::AddAssign for f8 {
+    fn add_assign(&mut self, _rhs: Self) {
+        unimplemented!()
+    }
+}
+
+impl ops::SubAssign for f8 {
+    fn sub_assign(&mut self, _rhs: Self) {
+        unimplemented!()
+    }
+}
+
+impl ops::MulAssign for f8 {
+    fn mul_assign(&mut self, _rhs: Self) {
+        unimplemented!()
+    }
+}
+
+impl cmp::PartialEq for f8 {
+    fn eq(&self, other: &Self) -> bool {
+        if self.is_nan() || other.is_nan() {
+            false
+        } else if self.abs().to_bits() | other.abs().to_bits() == 0 {
+            true
+        } else {
+            self.0 == other.0
+        }
+    }
+}
+impl cmp::PartialOrd for f8 {
+    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+        let inf_rep = f8::EXP_MASK;
+
+        let a_abs = self.abs().to_bits();
+        let b_abs = other.abs().to_bits();
+
+        // If either a or b is NaN, they are unordered.
+        if a_abs > inf_rep || b_abs > inf_rep {
+            return None;
+        }
+
+        // If a and b are both zeros, they are equal.
+        if a_abs | b_abs == 0 {
+            return Some(Ordering::Equal);
+        }
+
+        let a_srep = self.to_bits_signed();
+        let b_srep = other.to_bits_signed();
+        let res = a_srep.cmp(&b_srep);
+
+        if a_srep & b_srep >= 0 {
+            // If at least one of a and b is positive, we get the same result comparing
+            // a and b as signed integers as we would with a fp_ting-point compare.
+            Some(res)
+        } else {
+            // Otherwise, both are negative, so we need to flip the sense of the
+            // comparison to get the correct result.
+            Some(res.reverse())
+        }
+    }
+}
+impl fmt::Display for f8 {
+    fn fmt(&self, _f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        unimplemented!()
+    }
+}
+
+impl fmt::Debug for f8 {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        fmt::Binary::fmt(self, f)
+    }
+}
+
+impl fmt::Binary for f8 {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        let v = self.0;
+        write!(
+            f,
+            "0b{:b}_{:04b}_{:03b}",
+            v >> 7,
+            (v & Self::EXP_MASK) >> Self::SIG_BITS,
+            v & Self::SIG_MASK
+        )
+    }
+}
+
+impl fmt::LowerHex for f8 {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        self.0.fmt(f)
+    }
+}
diff --git a/crates/libm-test/src/lib.rs b/crates/libm-test/src/lib.rs
index 17a06b3be..ed7131713 100644
--- a/crates/libm-test/src/lib.rs
+++ b/crates/libm-test/src/lib.rs
@@ -1,3 +1,6 @@
+#![allow(clippy::unusual_byte_groupings)] // sometimes we group by sign_exp_sig
+
+mod f8_impl;
 pub mod gen;
 #[cfg(feature = "test-multiprecision")]
 pub mod mpfloat;
@@ -5,6 +8,7 @@ pub mod op;
 mod precision;
 mod test_traits;
 
+pub use f8_impl::f8;
 pub use libm::support::{Float, Int, IntTy};
 pub use op::{BaseName, Identifier, MathOp, OpCFn, OpFTy, OpRustFn, OpRustRet};
 pub use precision::{MaybeOverride, SpecialCase, default_ulp};

From 842b08b4bd78ad04329c17298703203219654f3f Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Thu, 19 Dec 2024 15:10:35 +0000
Subject: [PATCH 056/279] Introduce a float extension trait and some numerical
 routines

---
 crates/libm-test/src/lib.rs |   4 +-
 crates/libm-test/src/num.rs | 458 ++++++++++++++++++++++++++++++++++++
 2 files changed, 461 insertions(+), 1 deletion(-)
 create mode 100644 crates/libm-test/src/num.rs

diff --git a/crates/libm-test/src/lib.rs b/crates/libm-test/src/lib.rs
index ed7131713..48b382d20 100644
--- a/crates/libm-test/src/lib.rs
+++ b/crates/libm-test/src/lib.rs
@@ -4,12 +4,14 @@ mod f8_impl;
 pub mod gen;
 #[cfg(feature = "test-multiprecision")]
 pub mod mpfloat;
+mod num;
 pub mod op;
 mod precision;
 mod test_traits;
 
 pub use f8_impl::f8;
-pub use libm::support::{Float, Int, IntTy};
+pub use libm::support::{Float, Int, IntTy, MinInt};
+pub use num::{FloatExt, logspace};
 pub use op::{BaseName, Identifier, MathOp, OpCFn, OpFTy, OpRustFn, OpRustRet};
 pub use precision::{MaybeOverride, SpecialCase, default_ulp};
 pub use test_traits::{CheckBasis, CheckCtx, CheckOutput, GenerateInput, Hex, TupleCall};
diff --git a/crates/libm-test/src/num.rs b/crates/libm-test/src/num.rs
new file mode 100644
index 000000000..4aa7f61b0
--- /dev/null
+++ b/crates/libm-test/src/num.rs
@@ -0,0 +1,458 @@
+//! Helpful numeric operations.
+
+use std::cmp::min;
+
+use libm::support::{CastInto, Float};
+
+use crate::{Int, MinInt};
+
+/// Extension to `libm`'s `Float` trait with methods that are useful for tests but not
+/// needed in `libm` itself.
+pub trait FloatExt: Float {
+    /// The minimum subnormal number.
+    const TINY_BITS: Self::Int = Self::Int::ONE;
+
+    /// Retrieve additional constants for this float type.
+    fn consts() -> Consts<Self> {
+        Consts::new()
+    }
+
+    /// Increment by one ULP, saturating at infinity.
+    fn next_up(self) -> Self {
+        let bits = self.to_bits();
+        if self.is_nan() || bits == Self::INFINITY.to_bits() {
+            return self;
+        }
+
+        let abs = self.abs().to_bits();
+        let next_bits = if abs == Self::Int::ZERO {
+            // Next up from 0 is the smallest subnormal
+            Self::TINY_BITS
+        } else if bits == abs {
+            // Positive: counting up is more positive
+            bits + Self::Int::ONE
+        } else {
+            // Negative: counting down is more positive
+            bits - Self::Int::ONE
+        };
+        Self::from_bits(next_bits)
+    }
+
+    /// A faster way to effectively call `next_up` `n` times.
+    fn n_up(self, n: Self::Int) -> Self {
+        let bits = self.to_bits();
+        if self.is_nan() || bits == Self::INFINITY.to_bits() || n == Self::Int::ZERO {
+            return self;
+        }
+
+        let abs = self.abs().to_bits();
+        let is_positive = bits == abs;
+        let crosses_zero = !is_positive && n > abs;
+        let inf_bits = Self::INFINITY.to_bits();
+
+        let next_bits = if abs == Self::Int::ZERO {
+            min(n, inf_bits)
+        } else if crosses_zero {
+            min(n - abs, inf_bits)
+        } else if is_positive {
+            // Positive, counting up is more positive but this may overflow
+            match bits.checked_add(n) {
+                Some(v) if v >= inf_bits => inf_bits,
+                Some(v) => v,
+                None => inf_bits,
+            }
+        } else {
+            // Negative, counting down is more positive
+            bits - n
+        };
+        Self::from_bits(next_bits)
+    }
+
+    /// Decrement by one ULP, saturating at negative infinity.
+    fn next_down(self) -> Self {
+        let bits = self.to_bits();
+        if self.is_nan() || bits == Self::NEG_INFINITY.to_bits() {
+            return self;
+        }
+
+        let abs = self.abs().to_bits();
+        let next_bits = if abs == Self::Int::ZERO {
+            // Next up from 0 is the smallest negative subnormal
+            Self::TINY_BITS | Self::SIGN_MASK
+        } else if bits == abs {
+            // Positive: counting down is more negative
+            bits - Self::Int::ONE
+        } else {
+            // Negative: counting up is more negative
+            bits + Self::Int::ONE
+        };
+        Self::from_bits(next_bits)
+    }
+
+    /// A faster way to effectively call `next_down` `n` times.
+    fn n_down(self, n: Self::Int) -> Self {
+        let bits = self.to_bits();
+        if self.is_nan() || bits == Self::NEG_INFINITY.to_bits() || n == Self::Int::ZERO {
+            return self;
+        }
+
+        let abs = self.abs().to_bits();
+        let is_positive = bits == abs;
+        let crosses_zero = is_positive && n > abs;
+        let inf_bits = Self::INFINITY.to_bits();
+        let ninf_bits = Self::NEG_INFINITY.to_bits();
+
+        let next_bits = if abs == Self::Int::ZERO {
+            min(n, inf_bits) | Self::SIGN_MASK
+        } else if crosses_zero {
+            min(n - abs, inf_bits) | Self::SIGN_MASK
+        } else if is_positive {
+            // Positive, counting down is more negative
+            bits - n
+        } else {
+            // Negative, counting up is more negative but this may overflow
+            match bits.checked_add(n) {
+                Some(v) if v > ninf_bits => ninf_bits,
+                Some(v) => v,
+                None => ninf_bits,
+            }
+        };
+        Self::from_bits(next_bits)
+    }
+}
+
+impl<F> FloatExt for F where F: Float {}
+
+/// Extra constants that are useful for tests.
+#[derive(Debug, Clone, Copy)]
+pub struct Consts<F> {
+    /// The default quiet NaN, which is also the minimum quiet NaN.
+    pub pos_nan: F,
+    /// The default quiet NaN with negative sign.
+    pub neg_nan: F,
+    /// NaN with maximum (unsigned) significand to be a quiet NaN. The significand is saturated.
+    pub max_qnan: F,
+    /// NaN with minimum (unsigned) significand to be a signaling NaN.
+    pub min_snan: F,
+    /// NaN with maximum (unsigned) significand to be a signaling NaN.
+    pub max_snan: F,
+    pub neg_max_qnan: F,
+    pub neg_min_snan: F,
+    pub neg_max_snan: F,
+}
+
+impl<F: FloatExt> Consts<F> {
+    fn new() -> Self {
+        let top_sigbit_mask = F::Int::ONE << (F::SIG_BITS - 1);
+        let pos_nan = F::EXP_MASK | top_sigbit_mask;
+        let max_qnan = F::EXP_MASK | F::SIG_MASK;
+        let min_snan = F::EXP_MASK | F::Int::ONE;
+        let max_snan = (F::EXP_MASK | F::SIG_MASK) ^ top_sigbit_mask;
+
+        let neg_nan = pos_nan | F::SIGN_MASK;
+        let neg_max_qnan = max_qnan | F::SIGN_MASK;
+        let neg_min_snan = min_snan | F::SIGN_MASK;
+        let neg_max_snan = max_snan | F::SIGN_MASK;
+
+        Self {
+            pos_nan: F::from_bits(pos_nan),
+            neg_nan: F::from_bits(neg_nan),
+            max_qnan: F::from_bits(max_qnan),
+            min_snan: F::from_bits(min_snan),
+            max_snan: F::from_bits(max_snan),
+            neg_max_qnan: F::from_bits(neg_max_qnan),
+            neg_min_snan: F::from_bits(neg_min_snan),
+            neg_max_snan: F::from_bits(neg_max_snan),
+        }
+    }
+
+    pub fn iter(self) -> impl Iterator<Item = F> {
+        // Destructure so we get unused warnings if we forget a list entry.
+        let Self {
+            pos_nan,
+            neg_nan,
+            max_qnan,
+            min_snan,
+            max_snan,
+            neg_max_qnan,
+            neg_min_snan,
+            neg_max_snan,
+        } = self;
+
+        [pos_nan, neg_nan, max_qnan, min_snan, max_snan, neg_max_qnan, neg_min_snan, neg_max_snan]
+            .into_iter()
+    }
+}
+
+/// Return the number of steps between two floats, returning `None` if either input is NaN.
+///
+/// This is the number of steps needed for `n_up` or `n_down` to go between values. Infinities
+/// are treated the same as those functions (will return the nearest finite value), and only one
+/// of `-0` or `+0` is counted. It does not matter which value is greater.
+pub fn ulp_between<F: Float>(x: F, y: F) -> Option<F::Int> {
+    let a = as_ulp_steps(x)?;
+    let b = as_ulp_steps(y)?;
+    Some(a.abs_diff(b))
+}
+
+/// Return the (signed) number of steps from zero to `x`.
+fn as_ulp_steps<F: Float>(x: F) -> Option<F::SignedInt> {
+    let s = x.to_bits_signed();
+    let val = if s >= F::SignedInt::ZERO {
+        // each increment from `s = 0` is one step up from `x = 0.0`
+        s
+    } else {
+        // each increment from `s = F::SignedInt::MIN` is one step down from `x = -0.0`
+        F::SignedInt::MIN - s
+    };
+
+    // If `x` is NaN, return `None`
+    (!x.is_nan()).then_some(val)
+}
+
+/// An iterator that returns floats with linearly spaced integer representations, which translates
+/// to logarithmic spacing of their values.
+///
+/// Note that this tends to skip negative zero, so that needs to be checked explicitly.
+pub fn logspace<F: FloatExt>(start: F, end: F, steps: F::Int) -> impl Iterator<Item = F> {
+    assert!(!start.is_nan());
+    assert!(!end.is_nan());
+    assert!(end >= start);
+
+    let mut steps = steps.checked_sub(F::Int::ONE).expect("`steps` must be at least 2");
+    let between = ulp_between(start, end).expect("`start` or `end` is NaN");
+    let spacing = (between / steps).max(F::Int::ONE);
+    steps = steps.min(between); // At maximum, one step per ULP
+
+    let mut x = start;
+    (0..=steps.cast()).map(move |_| {
+        let ret = x;
+        x = x.n_up(spacing);
+        ret
+    })
+}
+
+#[cfg(test)]
+mod tests {
+    use std::cmp::max;
+
+    use super::*;
+    use crate::f8;
+
+    #[test]
+    fn test_next_up_down() {
+        for (i, v) in f8::ALL.into_iter().enumerate() {
+            let down = v.next_down().to_bits();
+            let up = v.next_up().to_bits();
+
+            if i == 0 {
+                assert_eq!(down, f8::NEG_INFINITY.to_bits(), "{i} next_down({v:#010b})");
+            } else {
+                let expected =
+                    if v == f8::ZERO { 1 | f8::SIGN_MASK } else { f8::ALL[i - 1].to_bits() };
+                assert_eq!(down, expected, "{i} next_down({v:#010b})");
+            }
+
+            if i == f8::ALL_LEN - 1 {
+                assert_eq!(up, f8::INFINITY.to_bits(), "{i} next_up({v:#010b})");
+            } else {
+                let expected = if v == f8::NEG_ZERO { 1 } else { f8::ALL[i + 1].to_bits() };
+                assert_eq!(up, expected, "{i} next_up({v:#010b})");
+            }
+        }
+    }
+
+    #[test]
+    fn test_next_up_down_inf_nan() {
+        assert_eq!(f8::NEG_INFINITY.next_up().to_bits(), f8::ALL[0].to_bits(),);
+        assert_eq!(f8::NEG_INFINITY.next_down().to_bits(), f8::NEG_INFINITY.to_bits(),);
+        assert_eq!(f8::INFINITY.next_down().to_bits(), f8::ALL[f8::ALL_LEN - 1].to_bits(),);
+        assert_eq!(f8::INFINITY.next_up().to_bits(), f8::INFINITY.to_bits(),);
+        assert_eq!(f8::NAN.next_up().to_bits(), f8::NAN.to_bits(),);
+        assert_eq!(f8::NAN.next_down().to_bits(), f8::NAN.to_bits(),);
+    }
+
+    #[test]
+    fn test_n_up_down_quick() {
+        assert_eq!(f8::ALL[0].n_up(4).to_bits(), f8::ALL[4].to_bits(),);
+        assert_eq!(
+            f8::ALL[f8::ALL_LEN - 1].n_down(4).to_bits(),
+            f8::ALL[f8::ALL_LEN - 5].to_bits(),
+        );
+
+        // Check around zero
+        assert_eq!(f8::from_bits(0b0).n_up(7).to_bits(), 0b0_0000_111);
+        assert_eq!(f8::from_bits(0b0).n_down(7).to_bits(), 0b1_0000_111);
+
+        // Check across zero
+        assert_eq!(f8::from_bits(0b1_0000_111).n_up(8).to_bits(), 0b0_0000_001);
+        assert_eq!(f8::from_bits(0b0_0000_111).n_down(8).to_bits(), 0b1_0000_001);
+    }
+
+    #[test]
+    fn test_n_up_down_one() {
+        // Verify that `n_up(1)` and `n_down(1)` are the same as `next_up()` and next_down()`.`
+        for i in 0..u8::MAX {
+            let v = f8::from_bits(i);
+            assert_eq!(v.next_up().to_bits(), v.n_up(1).to_bits());
+            assert_eq!(v.next_down().to_bits(), v.n_down(1).to_bits());
+        }
+    }
+
+    #[test]
+    fn test_n_up_down_inf_nan_zero() {
+        assert_eq!(f8::NEG_INFINITY.n_up(1).to_bits(), f8::ALL[0].to_bits());
+        assert_eq!(f8::NEG_INFINITY.n_up(239).to_bits(), f8::ALL[f8::ALL_LEN - 1].to_bits());
+        assert_eq!(f8::NEG_INFINITY.n_up(240).to_bits(), f8::INFINITY.to_bits());
+        assert_eq!(f8::NEG_INFINITY.n_down(u8::MAX).to_bits(), f8::NEG_INFINITY.to_bits());
+
+        assert_eq!(f8::INFINITY.n_down(1).to_bits(), f8::ALL[f8::ALL_LEN - 1].to_bits());
+        assert_eq!(f8::INFINITY.n_down(239).to_bits(), f8::ALL[0].to_bits());
+        assert_eq!(f8::INFINITY.n_down(240).to_bits(), f8::NEG_INFINITY.to_bits());
+        assert_eq!(f8::INFINITY.n_up(u8::MAX).to_bits(), f8::INFINITY.to_bits());
+
+        assert_eq!(f8::NAN.n_up(u8::MAX).to_bits(), f8::NAN.to_bits());
+        assert_eq!(f8::NAN.n_down(u8::MAX).to_bits(), f8::NAN.to_bits());
+
+        assert_eq!(f8::ZERO.n_down(1).to_bits(), f8::TINY_BITS | f8::SIGN_MASK);
+        assert_eq!(f8::NEG_ZERO.n_up(1).to_bits(), f8::TINY_BITS);
+    }
+
+    /// True if the specified range of `f8::ALL` includes both +0 and -0
+    fn crossed_zero(start: usize, end: usize) -> bool {
+        let crossed = &f8::ALL[start..=end];
+        crossed.iter().any(|f| f8::eq_repr(*f, f8::ZERO))
+            && crossed.iter().any(|f| f8::eq_repr(*f, f8::NEG_ZERO))
+    }
+
+    #[test]
+    fn test_n_up_down() {
+        for (i, v) in f8::ALL.into_iter().enumerate() {
+            for n in 0..f8::ALL_LEN {
+                let down = v.n_down(n as u8).to_bits();
+                let up = v.n_up(n as u8).to_bits();
+
+                if let Some(down_exp_idx) = i.checked_sub(n) {
+                    // No overflow
+                    let mut expected = f8::ALL[down_exp_idx].to_bits();
+                    if n >= 1 && crossed_zero(down_exp_idx, i) {
+                        // If both -0 and +0 are included, we need to adjust our expected value
+                        match down_exp_idx.checked_sub(1) {
+                            Some(v) => expected = f8::ALL[v].to_bits(),
+                            // Saturate to -inf if we are out of values
+                            None => expected = f8::NEG_INFINITY.to_bits(),
+                        }
+                    }
+                    assert_eq!(down, expected, "{i} {n} n_down({v:#010b})");
+                } else {
+                    // Overflow to -inf
+                    assert_eq!(down, f8::NEG_INFINITY.to_bits(), "{i} {n} n_down({v:#010b})");
+                }
+
+                let mut up_exp_idx = i + n;
+                if up_exp_idx < f8::ALL_LEN {
+                    // No overflow
+                    if n >= 1 && up_exp_idx < f8::ALL_LEN && crossed_zero(i, up_exp_idx) {
+                        // If both -0 and +0 are included, we need to adjust our expected value
+                        up_exp_idx += 1;
+                    }
+
+                    let expected = if up_exp_idx >= f8::ALL_LEN {
+                        f8::INFINITY.to_bits()
+                    } else {
+                        f8::ALL[up_exp_idx].to_bits()
+                    };
+
+                    assert_eq!(up, expected, "{i} {n} n_up({v:#010b})");
+                } else {
+                    // Overflow to +inf
+                    assert_eq!(up, f8::INFINITY.to_bits(), "{i} {n} n_up({v:#010b})");
+                }
+            }
+        }
+    }
+
+    #[test]
+    fn test_ulp_between() {
+        for (i, x) in f8::ALL.into_iter().enumerate() {
+            for (j, y) in f8::ALL.into_iter().enumerate() {
+                let ulp = ulp_between(x, y).unwrap();
+                let make_msg = || format!("i: {i} j: {j} x: {x:b} y: {y:b} ulp {ulp}");
+
+                let i_low = min(i, j);
+                let i_hi = max(i, j);
+                let mut expected = u8::try_from(i_hi - i_low).unwrap();
+                if crossed_zero(i_low, i_hi) {
+                    expected -= 1;
+                }
+
+                assert_eq!(ulp, expected, "{}", make_msg());
+
+                // Skip if either are zero since `next_{up,down}` will count over it
+                let either_zero = x == f8::ZERO || y == f8::ZERO;
+                if x < y && !either_zero {
+                    assert_eq!(x.n_up(ulp).to_bits(), y.to_bits(), "{}", make_msg());
+                    assert_eq!(y.n_down(ulp).to_bits(), x.to_bits(), "{}", make_msg());
+                } else if !either_zero {
+                    assert_eq!(y.n_up(ulp).to_bits(), x.to_bits(), "{}", make_msg());
+                    assert_eq!(x.n_down(ulp).to_bits(), y.to_bits(), "{}", make_msg());
+                }
+            }
+        }
+    }
+
+    #[test]
+    fn test_ulp_between_inf_nan_zero() {
+        assert_eq!(ulp_between(f8::NEG_INFINITY, f8::INFINITY).unwrap(), f8::ALL_LEN as u8);
+        assert_eq!(ulp_between(f8::INFINITY, f8::NEG_INFINITY).unwrap(), f8::ALL_LEN as u8);
+        assert_eq!(
+            ulp_between(f8::NEG_INFINITY, f8::ALL[f8::ALL_LEN - 1]).unwrap(),
+            f8::ALL_LEN as u8 - 1
+        );
+        assert_eq!(ulp_between(f8::INFINITY, f8::ALL[0]).unwrap(), f8::ALL_LEN as u8 - 1);
+
+        assert_eq!(ulp_between(f8::ZERO, f8::NEG_ZERO).unwrap(), 0);
+        assert_eq!(ulp_between(f8::NAN, f8::ZERO), None);
+        assert_eq!(ulp_between(f8::ZERO, f8::NAN), None);
+    }
+
+    #[test]
+    fn test_logspace() {
+        let ls: Vec<_> = logspace(f8::from_bits(0x0), f8::from_bits(0x4), 2).collect();
+        let exp = [f8::from_bits(0x0), f8::from_bits(0x4)];
+        assert_eq!(ls, exp);
+
+        let ls: Vec<_> = logspace(f8::from_bits(0x0), f8::from_bits(0x4), 3).collect();
+        let exp = [f8::from_bits(0x0), f8::from_bits(0x2), f8::from_bits(0x4)];
+        assert_eq!(ls, exp);
+
+        // Check that we include all values with no repeats if `steps` exceeds the maximum number
+        // of steps.
+        let ls: Vec<_> = logspace(f8::from_bits(0x0), f8::from_bits(0x3), 10).collect();
+        let exp = [f8::from_bits(0x0), f8::from_bits(0x1), f8::from_bits(0x2), f8::from_bits(0x3)];
+        assert_eq!(ls, exp);
+    }
+
+    #[test]
+    fn test_consts() {
+        let Consts {
+            pos_nan,
+            neg_nan,
+            max_qnan,
+            min_snan,
+            max_snan,
+            neg_max_qnan,
+            neg_min_snan,
+            neg_max_snan,
+        } = f8::consts();
+
+        assert_eq!(pos_nan.to_bits(), 0b0_1111_100);
+        assert_eq!(neg_nan.to_bits(), 0b1_1111_100);
+        assert_eq!(max_qnan.to_bits(), 0b0_1111_111);
+        assert_eq!(min_snan.to_bits(), 0b0_1111_001);
+        assert_eq!(max_snan.to_bits(), 0b0_1111_011);
+        assert_eq!(neg_max_qnan.to_bits(), 0b1_1111_111);
+        assert_eq!(neg_min_snan.to_bits(), 0b1_1111_001);
+        assert_eq!(neg_max_snan.to_bits(), 0b1_1111_011);
+    }
+}

From a114878d53f470b5a76f8905318392e18f617c01 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Thu, 19 Dec 2024 11:19:01 +0000
Subject: [PATCH 057/279] Add interfaces and tests based on function domains

Create a type representing a function's domain and a test that does a
logarithmic sweep of points within the domain.
---
 crates/libm-test/src/domain.rs              | 186 ++++++++++++++++++++
 crates/libm-test/src/gen.rs                 |   1 +
 crates/libm-test/src/gen/domain_logspace.rs |  43 +++++
 crates/libm-test/src/lib.rs                 |   1 +
 crates/libm-test/tests/multiprecision.rs    | 101 ++++++++++-
 5 files changed, 327 insertions(+), 5 deletions(-)
 create mode 100644 crates/libm-test/src/domain.rs
 create mode 100644 crates/libm-test/src/gen/domain_logspace.rs

diff --git a/crates/libm-test/src/domain.rs b/crates/libm-test/src/domain.rs
new file mode 100644
index 000000000..43ba21974
--- /dev/null
+++ b/crates/libm-test/src/domain.rs
@@ -0,0 +1,186 @@
+//! Traits and operations related to bounds of a function.
+
+use std::fmt;
+use std::ops::{self, Bound};
+
+use crate::Float;
+
+/// Representation of a function's domain.
+#[derive(Clone, Debug)]
+pub struct Domain<T> {
+    /// Start of the region for which a function is defined (ignoring poles).
+    pub start: Bound<T>,
+    /// Endof the region for which a function is defined (ignoring poles).
+    pub end: Bound<T>,
+    /// Additional points to check closer around. These can be e.g. undefined asymptotes or
+    /// inflection points.
+    pub check_points: Option<fn() -> BoxIter<T>>,
+}
+
+type BoxIter<T> = Box<dyn Iterator<Item = T>>;
+
+impl<F: Float> Domain<F> {
+    /// The start of this domain, saturating at negative infinity.
+    pub fn range_start(&self) -> F {
+        match self.start {
+            Bound::Included(v) => v,
+            Bound::Excluded(v) => v.next_up(),
+            Bound::Unbounded => F::NEG_INFINITY,
+        }
+    }
+
+    /// The end of this domain, saturating at infinity.
+    pub fn range_end(&self) -> F {
+        match self.end {
+            Bound::Included(v) => v,
+            Bound::Excluded(v) => v.next_down(),
+            Bound::Unbounded => F::INFINITY,
+        }
+    }
+}
+
+impl<F: Float> Domain<F> {
+    /// x ∈ ℝ
+    pub const UNBOUNDED: Self =
+        Self { start: Bound::Unbounded, end: Bound::Unbounded, check_points: None };
+
+    /// x ∈ ℝ >= 0
+    pub const POSITIVE: Self =
+        Self { start: Bound::Included(F::ZERO), end: Bound::Unbounded, check_points: None };
+
+    /// x ∈ ℝ > 0
+    pub const STRICTLY_POSITIVE: Self =
+        Self { start: Bound::Excluded(F::ZERO), end: Bound::Unbounded, check_points: None };
+
+    /// Used for versions of `asin` and `acos`.
+    pub const INVERSE_TRIG_PERIODIC: Self = Self {
+        start: Bound::Included(F::NEG_ONE),
+        end: Bound::Included(F::ONE),
+        check_points: None,
+    };
+
+    /// Domain for `acosh`
+    pub const ACOSH: Self =
+        Self { start: Bound::Included(F::ONE), end: Bound::Unbounded, check_points: None };
+
+    /// Domain for `atanh`
+    pub const ATANH: Self = Self {
+        start: Bound::Excluded(F::NEG_ONE),
+        end: Bound::Excluded(F::ONE),
+        check_points: None,
+    };
+
+    /// Domain for `sin`, `cos`, and `tan`
+    pub const TRIG: Self = Self {
+        // TODO
+        check_points: Some(|| Box::new([-F::PI, -F::FRAC_PI_2, F::FRAC_PI_2, F::PI].into_iter())),
+        ..Self::UNBOUNDED
+    };
+
+    /// Domain for `log` in various bases
+    pub const LOG: Self = Self::STRICTLY_POSITIVE;
+
+    /// Domain for `log1p` i.e. `log(1 + x)`
+    pub const LOG1P: Self =
+        Self { start: Bound::Excluded(F::NEG_ONE), end: Bound::Unbounded, check_points: None };
+
+    /// Domain for `sqrt`
+    pub const SQRT: Self = Self::POSITIVE;
+
+    /// Domain for `gamma`
+    pub const GAMMA: Self = Self {
+        check_points: Some(|| {
+            // Negative integers are asymptotes
+            Box::new((0..u8::MAX).map(|scale| {
+                let mut base = F::ZERO;
+                for _ in 0..scale {
+                    base = base - F::ONE;
+                }
+                base
+            }))
+        }),
+        // Whether or not gamma is defined for negative numbers is implementation dependent
+        ..Self::UNBOUNDED
+    };
+
+    /// Domain for `loggamma`
+    pub const LGAMMA: Self = Self::STRICTLY_POSITIVE;
+}
+
+/// Implement on `op::*` types to indicate how they are bounded.
+pub trait HasDomain<T>
+where
+    T: Copy + fmt::Debug + ops::Add<Output = T> + ops::Sub<Output = T> + PartialOrd + 'static,
+{
+    const DOMAIN: Domain<T>;
+}
+
+/// Implement [`HasDomain`] for both the `f32` and `f64` variants of a function.
+macro_rules! impl_has_domain {
+    ($($fn_name:ident => $domain:expr;)*) => {
+        paste::paste! {
+            $(
+                // Implement for f64 functions
+                impl HasDomain<f64> for $crate::op::$fn_name::Routine {
+                    const DOMAIN: Domain<f64> = Domain::<f64>::$domain;
+                }
+
+                // Implement for f32 functions
+                impl HasDomain<f32> for $crate::op::[< $fn_name f >]::Routine {
+                    const DOMAIN: Domain<f32> = Domain::<f32>::$domain;
+                }
+            )*
+        }
+    };
+}
+
+// Tie functions together with their domains.
+impl_has_domain! {
+    acos => INVERSE_TRIG_PERIODIC;
+    acosh => ACOSH;
+    asin => INVERSE_TRIG_PERIODIC;
+    asinh => UNBOUNDED;
+    atan => UNBOUNDED;
+    atanh => ATANH;
+    cbrt => UNBOUNDED;
+    ceil => UNBOUNDED;
+    cos => TRIG;
+    cosh => UNBOUNDED;
+    erf => UNBOUNDED;
+    exp => UNBOUNDED;
+    exp10 => UNBOUNDED;
+    exp2 => UNBOUNDED;
+    expm1 => UNBOUNDED;
+    fabs => UNBOUNDED;
+    floor => UNBOUNDED;
+    frexp => UNBOUNDED;
+    ilogb => UNBOUNDED;
+    j0 => UNBOUNDED;
+    j1 => UNBOUNDED;
+    lgamma => LGAMMA;
+    log => LOG;
+    log10 => LOG;
+    log1p => LOG1P;
+    log2 => LOG;
+    modf => UNBOUNDED;
+    rint => UNBOUNDED;
+    round => UNBOUNDED;
+    sin => TRIG;
+    sincos => TRIG;
+    sinh => UNBOUNDED;
+    sqrt => SQRT;
+    tan => TRIG;
+    tanh => UNBOUNDED;
+    tgamma => GAMMA;
+    trunc => UNBOUNDED;
+}
+
+/* Manual implementations, these functions don't follow `foo`->`foof` naming */
+
+impl HasDomain<f32> for crate::op::lgammaf_r::Routine {
+    const DOMAIN: Domain<f32> = Domain::<f32>::LGAMMA;
+}
+
+impl HasDomain<f64> for crate::op::lgamma_r::Routine {
+    const DOMAIN: Domain<f64> = Domain::<f64>::LGAMMA;
+}
diff --git a/crates/libm-test/src/gen.rs b/crates/libm-test/src/gen.rs
index 3e9eca37a..e3c88c44a 100644
--- a/crates/libm-test/src/gen.rs
+++ b/crates/libm-test/src/gen.rs
@@ -1,6 +1,7 @@
 //! Different generators that can create random or systematic bit patterns.
 
 use crate::GenerateInput;
+pub mod domain_logspace;
 pub mod random;
 
 /// Helper type to turn any reusable input into a generator.
diff --git a/crates/libm-test/src/gen/domain_logspace.rs b/crates/libm-test/src/gen/domain_logspace.rs
new file mode 100644
index 000000000..e8cdb9d2b
--- /dev/null
+++ b/crates/libm-test/src/gen/domain_logspace.rs
@@ -0,0 +1,43 @@
+//! A generator that produces logarithmically spaced values within domain bounds.
+
+use libm::support::{IntTy, MinInt};
+
+use crate::domain::HasDomain;
+use crate::op::OpITy;
+use crate::{MathOp, logspace};
+
+/// Number of tests to run.
+// FIXME(ntests): replace this with a more logical algorithm
+const NTESTS: usize = {
+    if cfg!(optimizations_enabled) {
+        if crate::emulated()
+            || !cfg!(target_pointer_width = "64")
+            || cfg!(all(target_arch = "x86_64", target_vendor = "apple"))
+        {
+            // Tests are pretty slow on non-64-bit targets, x86 MacOS, and targets that run
+            // in QEMU.
+            100_000
+        } else {
+            5_000_000
+        }
+    } else {
+        // Without optimizations just run a quick check
+        800
+    }
+};
+
+/// Create a range of logarithmically spaced inputs within a function's domain.
+///
+/// This allows us to get reasonably thorough coverage without wasting time on values that are
+/// NaN or out of range. Random tests will still cover values that are excluded here.
+pub fn get_test_cases<Op>() -> impl Iterator<Item = (Op::FTy,)>
+where
+    Op: MathOp + HasDomain<Op::FTy>,
+    IntTy<Op::FTy>: TryFrom<usize>,
+{
+    let domain = Op::DOMAIN;
+    let start = domain.range_start();
+    let end = domain.range_end();
+    let steps = OpITy::<Op>::try_from(NTESTS).unwrap_or(OpITy::<Op>::MAX);
+    logspace(start, end, steps).map(|v| (v,))
+}
diff --git a/crates/libm-test/src/lib.rs b/crates/libm-test/src/lib.rs
index 48b382d20..622b2dec9 100644
--- a/crates/libm-test/src/lib.rs
+++ b/crates/libm-test/src/lib.rs
@@ -1,5 +1,6 @@
 #![allow(clippy::unusual_byte_groupings)] // sometimes we group by sign_exp_sig
 
+pub mod domain;
 mod f8_impl;
 pub mod gen;
 #[cfg(feature = "test-multiprecision")]
diff --git a/crates/libm-test/tests/multiprecision.rs b/crates/libm-test/tests/multiprecision.rs
index 0b41fba82..e643f3c9c 100644
--- a/crates/libm-test/tests/multiprecision.rs
+++ b/crates/libm-test/tests/multiprecision.rs
@@ -2,11 +2,14 @@
 
 #![cfg(feature = "test-multiprecision")]
 
-use libm_test::gen::{CachedInput, random};
+use libm_test::domain::HasDomain;
+use libm_test::gen::{CachedInput, domain_logspace, random};
 use libm_test::mpfloat::MpOp;
-use libm_test::{CheckBasis, CheckCtx, CheckOutput, GenerateInput, MathOp, TupleCall};
+use libm_test::{
+    CheckBasis, CheckCtx, CheckOutput, GenerateInput, MathOp, OpFTy, OpRustFn, OpRustRet, TupleCall,
+};
 
-/// Implement a test against MPFR with random inputs.
+/// Test against MPFR with random inputs.
 macro_rules! mp_rand_tests {
     (
         fn_name: $fn_name:ident,
@@ -16,13 +19,14 @@ macro_rules! mp_rand_tests {
             #[test]
             $(#[$meta])*
             fn [< mp_random_ $fn_name >]() {
-                test_one::<libm_test::op::$fn_name::Routine>();
+                test_one_random::<libm_test::op::$fn_name::Routine>();
             }
         }
     };
 }
 
-fn test_one<Op>()
+/// Test a single routine with random inputs
+fn test_one_random<Op>()
 where
     Op: MathOp + MpOp,
     CachedInput: GenerateInput<Op::RustArgs>,
@@ -67,3 +71,90 @@ libm_macros::for_each_function! {
         nextafterf,
     ],
 }
+
+/// Test against MPFR with generators from a domain.
+macro_rules! mp_domain_tests {
+    (
+        fn_name: $fn_name:ident,
+        attrs: [$($meta:meta)*]
+    ) => {
+        paste::paste! {
+            #[test]
+            $(#[$meta])*
+            fn [< mp_logspace_ $fn_name >]() {
+                type Op = libm_test::op::$fn_name::Routine;
+                domain_test_runner::<Op>(domain_logspace::get_test_cases::<Op>());
+            }
+        }
+    };
+}
+
+/// Test a single routine against domaine-aware inputs.
+fn domain_test_runner<Op>(cases: impl Iterator<Item = (Op::FTy,)>)
+where
+    // Complicated generics...
+    // The operation must take a single float argument (unary only)
+    Op: MathOp<RustArgs = (<Op as MathOp>::FTy,)>,
+    // It must also support multiprecision operations
+    Op: MpOp,
+    // And it must have a domain specified
+    Op: HasDomain<Op::FTy>,
+    // The single float argument tuple must be able to call the `RustFn` and return `RustRet`
+    (OpFTy<Op>,): TupleCall<OpRustFn<Op>, Output = OpRustRet<Op>>,
+{
+    let mut mp_vals = Op::new_mp();
+    let ctx = CheckCtx::new(Op::IDENTIFIER, CheckBasis::Mpfr);
+
+    for input in cases {
+        let mp_res = Op::run(&mut mp_vals, input);
+        let crate_res = input.call(Op::ROUTINE);
+
+        crate_res.validate(mp_res, input, &ctx).unwrap();
+    }
+}
+
+libm_macros::for_each_function! {
+    callback: mp_domain_tests,
+    attributes: [],
+    skip: [
+        // Functions with multiple inputs
+        atan2,
+        atan2f,
+        copysign,
+        copysignf,
+        fdim,
+        fdimf,
+        fma,
+        fmaf,
+        fmax,
+        fmaxf,
+        fmin,
+        fminf,
+        fmod,
+        fmodf,
+        hypot,
+        hypotf,
+        jn,
+        jnf,
+        ldexp,
+        ldexpf,
+        nextafter,
+        nextafterf,
+        pow,
+        powf,
+        remainder,
+        remainderf,
+        remquo,
+        remquof,
+        scalbn,
+        scalbnf,
+
+        // FIXME: MPFR tests needed
+        frexp,
+        frexpf,
+        ilogb,
+        ilogbf,
+        modf,
+        modff,
+    ],
+}

From e1935690fe724669209ce286a29a0e74bfd59a54 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Thu, 19 Dec 2024 11:22:02 +0000
Subject: [PATCH 058/279] Add tests for edge cases

Introduce a generator that will tests various points of interest
including zeros, infinities, and NaNs.
---
 crates/libm-test/src/domain.rs           |  4 +-
 crates/libm-test/src/gen.rs              |  1 +
 crates/libm-test/src/gen/edge_cases.rs   | 90 ++++++++++++++++++++++++
 crates/libm-test/tests/multiprecision.rs |  9 ++-
 4 files changed, 101 insertions(+), 3 deletions(-)
 create mode 100644 crates/libm-test/src/gen/edge_cases.rs

diff --git a/crates/libm-test/src/domain.rs b/crates/libm-test/src/domain.rs
index 43ba21974..9ee8a19b9 100644
--- a/crates/libm-test/src/domain.rs
+++ b/crates/libm-test/src/domain.rs
@@ -3,7 +3,7 @@
 use std::fmt;
 use std::ops::{self, Bound};
 
-use crate::Float;
+use crate::{Float, FloatExt};
 
 /// Representation of a function's domain.
 #[derive(Clone, Debug)]
@@ -19,7 +19,7 @@ pub struct Domain<T> {
 
 type BoxIter<T> = Box<dyn Iterator<Item = T>>;
 
-impl<F: Float> Domain<F> {
+impl<F: FloatExt> Domain<F> {
     /// The start of this domain, saturating at negative infinity.
     pub fn range_start(&self) -> F {
         match self.start {
diff --git a/crates/libm-test/src/gen.rs b/crates/libm-test/src/gen.rs
index e3c88c44a..2d15915d9 100644
--- a/crates/libm-test/src/gen.rs
+++ b/crates/libm-test/src/gen.rs
@@ -2,6 +2,7 @@
 
 use crate::GenerateInput;
 pub mod domain_logspace;
+pub mod edge_cases;
 pub mod random;
 
 /// Helper type to turn any reusable input into a generator.
diff --git a/crates/libm-test/src/gen/edge_cases.rs b/crates/libm-test/src/gen/edge_cases.rs
new file mode 100644
index 000000000..625e18bc7
--- /dev/null
+++ b/crates/libm-test/src/gen/edge_cases.rs
@@ -0,0 +1,90 @@
+//! A generator that checks a handful of cases near infinities, zeros, asymptotes, and NaNs.
+
+use libm::support::Float;
+
+use crate::domain::HasDomain;
+use crate::{FloatExt, MathOp};
+
+/// Number of values near an interesting point to check.
+// FIXME(ntests): replace this with a more logical algorithm
+const AROUND: usize = 100;
+
+/// Functions have infinite asymptotes, limit how many we check.
+// FIXME(ntests): replace this with a more logical algorithm
+const MAX_CHECK_POINTS: usize = 10;
+
+/// Create a list of values around interesting points (infinities, zeroes, NaNs).
+pub fn get_test_cases<Op, F>() -> impl Iterator<Item = (F,)>
+where
+    Op: MathOp<FTy = F> + HasDomain<F>,
+    F: Float,
+{
+    let mut ret = Vec::new();
+    let values = &mut ret;
+    let domain = Op::DOMAIN;
+    let domain_start = domain.range_start();
+    let domain_end = domain.range_end();
+
+    // Check near some notable constants
+    count_up(F::ONE, values);
+    count_up(F::ZERO, values);
+    count_up(F::NEG_ONE, values);
+    count_down(F::ONE, values);
+    count_down(F::ZERO, values);
+    count_down(F::NEG_ONE, values);
+    values.push(F::NEG_ZERO);
+
+    // Check values near the extremes
+    count_up(F::NEG_INFINITY, values);
+    count_down(F::INFINITY, values);
+    count_down(domain_end, values);
+    count_up(domain_start, values);
+    count_down(domain_start, values);
+    count_up(domain_end, values);
+    count_down(domain_end, values);
+
+    // Check some special values that aren't included in the above ranges
+    values.push(F::NAN);
+    values.extend(F::consts().iter());
+
+    // Check around asymptotes
+    if let Some(f) = domain.check_points {
+        let iter = f();
+        for x in iter.take(MAX_CHECK_POINTS) {
+            count_up(x, values);
+            count_down(x, values);
+        }
+    }
+
+    // Some results may overlap so deduplicate the vector to save test cycles.
+    values.sort_by_key(|x| x.to_bits());
+    values.dedup_by_key(|x| x.to_bits());
+
+    ret.into_iter().map(|v| (v,))
+}
+
+/// Add `AROUND` values starting at and including `x` and counting up. Uses the smallest possible
+/// increments (1 ULP).
+fn count_up<F: Float>(mut x: F, values: &mut Vec<F>) {
+    assert!(!x.is_nan());
+
+    let mut count = 0;
+    while x < F::INFINITY && count < AROUND {
+        values.push(x);
+        x = x.next_up();
+        count += 1;
+    }
+}
+
+/// Add `AROUND` values starting at and including `x` and counting down. Uses the smallest possible
+/// increments (1 ULP).
+fn count_down<F: Float>(mut x: F, values: &mut Vec<F>) {
+    assert!(!x.is_nan());
+
+    let mut count = 0;
+    while x > F::NEG_INFINITY && count < AROUND {
+        values.push(x);
+        x = x.next_down();
+        count += 1;
+    }
+}
diff --git a/crates/libm-test/tests/multiprecision.rs b/crates/libm-test/tests/multiprecision.rs
index e643f3c9c..5255dc1cf 100644
--- a/crates/libm-test/tests/multiprecision.rs
+++ b/crates/libm-test/tests/multiprecision.rs
@@ -3,7 +3,7 @@
 #![cfg(feature = "test-multiprecision")]
 
 use libm_test::domain::HasDomain;
-use libm_test::gen::{CachedInput, domain_logspace, random};
+use libm_test::gen::{CachedInput, domain_logspace, edge_cases, random};
 use libm_test::mpfloat::MpOp;
 use libm_test::{
     CheckBasis, CheckCtx, CheckOutput, GenerateInput, MathOp, OpFTy, OpRustFn, OpRustRet, TupleCall,
@@ -79,6 +79,13 @@ macro_rules! mp_domain_tests {
         attrs: [$($meta:meta)*]
     ) => {
         paste::paste! {
+            #[test]
+            $(#[$meta])*
+            fn [< mp_edge_case_ $fn_name >]() {
+                type Op = libm_test::op::$fn_name::Routine;
+                domain_test_runner::<Op>(edge_cases::get_test_cases::<Op, _>());
+            }
+
             #[test]
             $(#[$meta])*
             fn [< mp_logspace_ $fn_name >]() {

From 5b26d7901e65cd5ef109fe117305010663f40bc6 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Thu, 19 Dec 2024 14:23:05 +0000
Subject: [PATCH 059/279] Update allowed precision to account for new tests

---
 crates/libm-test/src/precision.rs | 20 +++++++++-----------
 1 file changed, 9 insertions(+), 11 deletions(-)

diff --git a/crates/libm-test/src/precision.rs b/crates/libm-test/src/precision.rs
index c7f9d9e30..b878212fa 100644
--- a/crates/libm-test/src/precision.rs
+++ b/crates/libm-test/src/precision.rs
@@ -41,10 +41,11 @@ pub fn default_ulp(ctx: &CheckCtx) -> u32 {
         (Musl, Id::Tgamma) => 20,
 
         // Overrides for MPFR
+        (Mpfr, Id::Acosh) => 4,
         (Mpfr, Id::Acoshf) => 4,
         (Mpfr, Id::Asinh | Id::Asinhf) => 2,
         (Mpfr, Id::Atanh | Id::Atanhf) => 2,
-        (Mpfr, Id::Exp10 | Id::Exp10f) => 3,
+        (Mpfr, Id::Exp10 | Id::Exp10f) => 6,
         (Mpfr, Id::Lgamma | Id::LgammaR | Id::Lgammaf | Id::LgammafR) => 16,
         (Mpfr, Id::Sinh | Id::Sinhf) => 2,
         (Mpfr, Id::Tanh | Id::Tanhf) => 2,
@@ -105,17 +106,14 @@ impl MaybeOverride<(f32,)> for SpecialCase {
         _ulp: &mut u32,
         ctx: &CheckCtx,
     ) -> Option<TestResult> {
-        if ctx.basis == CheckBasis::Musl {
-            if ctx.base_name == BaseName::Expm1 && input.0 > 80.0 && actual.is_infinite() {
-                // we return infinity but the number is representable
-                return XFAIL;
-            }
+        if ctx.base_name == BaseName::Expm1 && input.0 > 80.0 && actual.is_infinite() {
+            // we return infinity but the number is representable
+            return XFAIL;
+        }
 
-            if ctx.base_name == BaseName::Sinh && input.0.abs() > 80.0 && actual.is_nan() {
-                // we return some NaN that should be real values or infinite
-                // doesn't seem to happen on x86
-                return XFAIL;
-            }
+        if ctx.base_name == BaseName::Sinh && input.0.abs() > 80.0 && actual.is_nan() {
+            // we return some NaN that should be real values or infinite
+            return XFAIL;
         }
 
         if ctx.base_name == BaseName::Acosh && input.0 < -1.0 {

From f4d97cd3f64d7a6682a2cbde6ed6233cc20c2e0f Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Thu, 19 Dec 2024 11:22:22 +0000
Subject: [PATCH 060/279] Add a way to plot the output from generators

For visualization, add a simple script for generating scatter plots and
a binary (via examples) to plot the inputs given various domains.
---
 crates/libm-test/examples/plot_domains.rs | 105 +++++++++++++++
 crates/libm-test/examples/plot_file.jl    | 157 ++++++++++++++++++++++
 2 files changed, 262 insertions(+)
 create mode 100644 crates/libm-test/examples/plot_domains.rs
 create mode 100644 crates/libm-test/examples/plot_file.jl

diff --git a/crates/libm-test/examples/plot_domains.rs b/crates/libm-test/examples/plot_domains.rs
new file mode 100644
index 000000000..630a0c233
--- /dev/null
+++ b/crates/libm-test/examples/plot_domains.rs
@@ -0,0 +1,105 @@
+//! Program to write all inputs from a generator to a file, then invoke a Julia script to plot
+//! them. Output is in `target/plots`.
+//!
+//! Requires Julia with the `CairoMakie` dependency.
+//!
+//! Note that running in release mode by default generates a _lot_ more datapoints, which
+//! causes plotting to be extremely slow (some simplification to be done in the script).
+
+use std::fmt::Write as _;
+use std::io::{BufWriter, Write};
+use std::path::Path;
+use std::process::Command;
+use std::{env, fs};
+
+use libm_test::domain::HasDomain;
+use libm_test::gen::{domain_logspace, edge_cases};
+use libm_test::{MathOp, op};
+
+const JL_PLOT: &str = "examples/plot_file.jl";
+
+fn main() {
+    let manifest_env = env::var("CARGO_MANIFEST_DIR").unwrap();
+    let manifest_dir = Path::new(&manifest_env);
+    let out_dir = manifest_dir.join("../../target/plots");
+    if !out_dir.exists() {
+        fs::create_dir(&out_dir).unwrap();
+    }
+
+    let jl_script = manifest_dir.join(JL_PLOT);
+    let mut config = format!(r#"out_dir = "{}""#, out_dir.display());
+    config.write_str("\n\n").unwrap();
+
+    // Plot a few domains with some functions that use them.
+    plot_one_operator::<op::sqrtf::Routine>(&out_dir, &mut config);
+    plot_one_operator::<op::cosf::Routine>(&out_dir, &mut config);
+    plot_one_operator::<op::cbrtf::Routine>(&out_dir, &mut config);
+
+    let config_path = out_dir.join("config.toml");
+    fs::write(&config_path, config).unwrap();
+
+    // The script expects a path to `config.toml` to be passed as its only argument
+    let mut cmd = Command::new("julia");
+    if cfg!(optimizations_enabled) {
+        cmd.arg("-O3");
+    }
+    cmd.arg(jl_script).arg(config_path);
+
+    println!("launching script... {cmd:?}");
+    cmd.status().unwrap();
+}
+
+/// Run multiple generators for a single operator.
+fn plot_one_operator<Op>(out_dir: &Path, config: &mut String)
+where
+    Op: MathOp<FTy = f32> + HasDomain<f32>,
+{
+    plot_one_generator(
+        out_dir,
+        Op::BASE_NAME.as_str(),
+        "logspace",
+        config,
+        domain_logspace::get_test_cases::<Op>(),
+    );
+    plot_one_generator(
+        out_dir,
+        Op::BASE_NAME.as_str(),
+        "edge_cases",
+        config,
+        edge_cases::get_test_cases::<Op, _>(),
+    );
+}
+
+/// Plot the output of a single generator.
+fn plot_one_generator(
+    out_dir: &Path,
+    fn_name: &str,
+    gen_name: &str,
+    config: &mut String,
+    gen: impl Iterator<Item = (f32,)>,
+) {
+    let text_file = out_dir.join(format!("input-{fn_name}-{gen_name}.txt"));
+
+    let f = fs::File::create(&text_file).unwrap();
+    let mut w = BufWriter::new(f);
+    let mut count = 0u64;
+
+    for input in gen {
+        writeln!(w, "{:e}", input.0).unwrap();
+        count += 1;
+    }
+
+    w.flush().unwrap();
+    println!("generated {count} inputs for {fn_name}-{gen_name}");
+
+    writeln!(
+        config,
+        r#"[[input]]
+function = "{fn_name}"
+generator = "{gen_name}"
+input_file = "{}"
+"#,
+        text_file.to_str().unwrap()
+    )
+    .unwrap()
+}
diff --git a/crates/libm-test/examples/plot_file.jl b/crates/libm-test/examples/plot_file.jl
new file mode 100644
index 000000000..14a128303
--- /dev/null
+++ b/crates/libm-test/examples/plot_file.jl
@@ -0,0 +1,157 @@
+"A quick script for plotting a list of floats.
+
+Takes a path to a TOML file (Julia has builtin TOML support but not JSON) which
+specifies a list of source files to plot. Plots are done with both a linear and
+a log scale.
+
+Requires [Makie] (specifically CairoMakie) for plotting.
+
+[Makie]: https://docs.makie.org/stable/
+"
+
+using CairoMakie
+using TOML
+
+function main()::Nothing
+    CairoMakie.activate!(px_per_unit=10)
+    config_path = ARGS[1]
+
+    cfg = Dict()
+    open(config_path, "r") do f
+        cfg = TOML.parse(f)
+    end
+
+    out_dir = cfg["out_dir"]
+    for input in cfg["input"]
+        fn_name = input["function"]
+        gen_name = input["generator"]
+        input_file = input["input_file"]
+
+        plot_one(input_file, out_dir, fn_name, gen_name)
+    end
+end
+
+"Read inputs from a file, create both linear and log plots for one function"
+function plot_one(
+    input_file::String,
+    out_dir::String,
+    fn_name::String,
+    gen_name::String,
+)::Nothing
+    fig = Figure()
+
+    lin_out_file = joinpath(out_dir, "plot-$fn_name-$gen_name.png")
+    log_out_file = joinpath(out_dir, "plot-$fn_name-$gen_name-log.png")
+
+    # Map string function names to callable functions
+    if fn_name == "cos"
+        orig_func = cos
+        xlims = (-6.0, 6.0)
+        xlims_log = (-pi * 10, pi * 10)
+    elseif fn_name == "cbrt"
+        orig_func = cbrt
+        xlims = (-2.0, 2.0)
+        xlims_log = (-1000.0, 1000.0)
+    elseif fn_name == "sqrt"
+        orig_func = sqrt
+        xlims = (-1.1, 6.0)
+        xlims_log = (-1.1, 5000.0)
+    else
+        println("unrecognized function name `$fn_name`; update plot_file.jl")
+        exit(1)
+    end
+
+    # Edge cases don't do much beyond +/-1, except for infinity.
+    if gen_name == "edge_cases"
+        xlims = (-1.1, 1.1)
+        xlims_log = (-1.1, 1.1)
+    end
+
+    # Turn domain errors into NaN
+    func(x) = map_or(x, orig_func, NaN)
+
+    # Parse a series of X values produced by the generator
+    inputs = readlines(input_file)
+    gen_x = map((v) -> parse(Float32, v), inputs)
+
+    do_plot(
+        fig, gen_x, func, xlims[1], xlims[2],
+        "$fn_name $gen_name (linear scale)",
+        lin_out_file, false,
+    )
+
+    do_plot(
+        fig, gen_x, func, xlims_log[1], xlims_log[2],
+        "$fn_name $gen_name (log scale)",
+        log_out_file, true,
+    )
+end
+
+"Create a single plot"
+function do_plot(
+    fig::Figure,
+    gen_x::Vector{F},
+    func::Function,
+    xmin::AbstractFloat,
+    xmax::AbstractFloat,
+    title::String,
+    out_file::String,
+    logscale::Bool,
+)::Nothing where F<:AbstractFloat
+    println("plotting $title")
+
+    # `gen_x` is the values the generator produces. `actual_x` is for plotting a
+    # continuous function.
+    input_min = xmin - 1.0
+    input_max = xmax + 1.0
+    gen_x = filter((v) -> v >= input_min && v <= input_max, gen_x)
+    markersize = length(gen_x) < 10_000 ? 6.0 : 4.0
+
+    steps = 10_000
+    if logscale
+        r = LinRange(symlog10(input_min), symlog10(input_max), steps)
+        actual_x = sympow10.(r)
+        xscale = Makie.pseudolog10
+    else
+        actual_x = LinRange(input_min, input_max, steps)
+        xscale = identity
+    end
+    
+    gen_y = @. func(gen_x)
+    actual_y = @. func(actual_x)
+
+    ax = Axis(fig[1, 1], xscale=xscale, title=title)
+
+    lines!(
+        ax, actual_x, actual_y, color=(:lightblue, 0.6),
+        linewidth=6.0, label="true function",
+    )
+    scatter!(
+        ax, gen_x, gen_y, color=(:darkblue, 0.9),
+        markersize=markersize, label="checked inputs",
+    )
+    axislegend(ax, position=:rb, framevisible=false)
+
+    save(out_file, fig)
+    delete!(ax)
+end
+
+"Apply a function, returning the default if there is a domain error"
+function map_or(
+    input::AbstractFloat,
+    f::Function,
+    default::Any
+)::Union{AbstractFloat,Any}
+    try
+        return f(input)
+    catch
+        return default
+    end
+end
+
+# Operations for logarithms that are symmetric about 0
+C = 10
+symlog10(x::Number) = sign(x) * (log10(1 + abs(x)/(10^C)))
+sympow10(x::Number) = (10^C) * (10^x - 1)
+
+main()

From 971bafaafdf13a5b3384584ced5fad9ed6ed9f9a Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Sun, 22 Dec 2024 11:22:02 +0000
Subject: [PATCH 061/279] Move the macro's input function list to a new module
 `shared`

This will enable us to `include!` the file to access these types in
`libm-test`, rather than somehow reproducing the types as part of the
macro. Ideally `libm-test` would just `use` the types from `libm-macros`
but proc macro crates cannot currently export anything else.

This also adjusts naming to closer match the scheme described in
`libm_test::op`.
---
 crates/libm-macros/src/enums.rs  |   7 +-
 crates/libm-macros/src/lib.rs    | 299 +++++--------------------------
 crates/libm-macros/src/shared.rs | 277 ++++++++++++++++++++++++++++
 3 files changed, 320 insertions(+), 263 deletions(-)
 create mode 100644 crates/libm-macros/src/shared.rs

diff --git a/crates/libm-macros/src/enums.rs b/crates/libm-macros/src/enums.rs
index 1f9fca2ef..82dedc66e 100644
--- a/crates/libm-macros/src/enums.rs
+++ b/crates/libm-macros/src/enums.rs
@@ -5,7 +5,7 @@ use quote::quote;
 use syn::spanned::Spanned;
 use syn::{Fields, ItemEnum, Variant};
 
-use crate::{ALL_FUNCTIONS_FLAT, base_name};
+use crate::{ALL_OPERATIONS, base_name};
 
 /// Implement `#[function_enum]`, see documentation in `lib.rs`.
 pub fn function_enum(
@@ -33,7 +33,7 @@ pub fn function_enum(
     let mut as_str_arms = Vec::new();
     let mut base_arms = Vec::new();
 
-    for func in ALL_FUNCTIONS_FLAT.iter() {
+    for func in ALL_OPERATIONS.iter() {
         let fn_name = func.name;
         let ident = Ident::new(&fn_name.to_upper_camel_case(), Span::call_site());
         let bname_ident = Ident::new(&base_name(fn_name).to_upper_camel_case(), Span::call_site());
@@ -85,8 +85,7 @@ pub fn base_name_enum(
         return Err(syn::Error::new(sp.span(), "no attributes expected"));
     }
 
-    let mut base_names: Vec<_> =
-        ALL_FUNCTIONS_FLAT.iter().map(|func| base_name(func.name)).collect();
+    let mut base_names: Vec<_> = ALL_OPERATIONS.iter().map(|func| base_name(func.name)).collect();
     base_names.sort_unstable();
     base_names.dedup();
 
diff --git a/crates/libm-macros/src/lib.rs b/crates/libm-macros/src/lib.rs
index 1e7cd08b9..916b539ed 100644
--- a/crates/libm-macros/src/lib.rs
+++ b/crates/libm-macros/src/lib.rs
@@ -1,270 +1,18 @@
 mod enums;
 mod parse;
-
-use std::sync::LazyLock;
+mod shared;
 
 use parse::{Invocation, StructuredInput};
 use proc_macro as pm;
 use proc_macro2::{self as pm2, Span};
 use quote::{ToTokens, quote};
+pub(crate) use shared::{ALL_OPERATIONS, FloatTy, MathOpInfo, Ty};
 use syn::spanned::Spanned;
 use syn::visit_mut::VisitMut;
 use syn::{Ident, ItemEnum};
 
-const ALL_FUNCTIONS: &[(Ty, Signature, Option<Signature>, &[&str])] = &[
-    (
-        // `fn(f32) -> f32`
-        Ty::F32,
-        Signature { args: &[Ty::F32], returns: &[Ty::F32] },
-        None,
-        &[
-            "acosf", "acoshf", "asinf", "asinhf", "atanf", "atanhf", "cbrtf", "ceilf", "cosf",
-            "coshf", "erff", "exp10f", "exp2f", "expf", "expm1f", "fabsf", "floorf", "j0f", "j1f",
-            "lgammaf", "log10f", "log1pf", "log2f", "logf", "rintf", "roundf", "sinf", "sinhf",
-            "sqrtf", "tanf", "tanhf", "tgammaf", "truncf",
-        ],
-    ),
-    (
-        // `(f64) -> f64`
-        Ty::F64,
-        Signature { args: &[Ty::F64], returns: &[Ty::F64] },
-        None,
-        &[
-            "acos", "acosh", "asin", "asinh", "atan", "atanh", "cbrt", "ceil", "cos", "cosh",
-            "erf", "exp10", "exp2", "exp", "expm1", "fabs", "floor", "j0", "j1", "lgamma", "log10",
-            "log1p", "log2", "log", "rint", "round", "sin", "sinh", "sqrt", "tan", "tanh",
-            "tgamma", "trunc",
-        ],
-    ),
-    (
-        // `(f32, f32) -> f32`
-        Ty::F32,
-        Signature { args: &[Ty::F32, Ty::F32], returns: &[Ty::F32] },
-        None,
-        &[
-            "atan2f",
-            "copysignf",
-            "fdimf",
-            "fmaxf",
-            "fminf",
-            "fmodf",
-            "hypotf",
-            "nextafterf",
-            "powf",
-            "remainderf",
-        ],
-    ),
-    (
-        // `(f64, f64) -> f64`
-        Ty::F64,
-        Signature { args: &[Ty::F64, Ty::F64], returns: &[Ty::F64] },
-        None,
-        &[
-            "atan2",
-            "copysign",
-            "fdim",
-            "fmax",
-            "fmin",
-            "fmod",
-            "hypot",
-            "nextafter",
-            "pow",
-            "remainder",
-        ],
-    ),
-    (
-        // `(f32, f32, f32) -> f32`
-        Ty::F32,
-        Signature { args: &[Ty::F32, Ty::F32, Ty::F32], returns: &[Ty::F32] },
-        None,
-        &["fmaf"],
-    ),
-    (
-        // `(f64, f64, f64) -> f64`
-        Ty::F64,
-        Signature { args: &[Ty::F64, Ty::F64, Ty::F64], returns: &[Ty::F64] },
-        None,
-        &["fma"],
-    ),
-    (
-        // `(f32) -> i32`
-        Ty::F32,
-        Signature { args: &[Ty::F32], returns: &[Ty::I32] },
-        None,
-        &["ilogbf"],
-    ),
-    (
-        // `(f64) -> i32`
-        Ty::F64,
-        Signature { args: &[Ty::F64], returns: &[Ty::I32] },
-        None,
-        &["ilogb"],
-    ),
-    (
-        // `(i32, f32) -> f32`
-        Ty::F32,
-        Signature { args: &[Ty::I32, Ty::F32], returns: &[Ty::F32] },
-        None,
-        &["jnf"],
-    ),
-    (
-        // `(i32, f64) -> f64`
-        Ty::F64,
-        Signature { args: &[Ty::I32, Ty::F64], returns: &[Ty::F64] },
-        None,
-        &["jn"],
-    ),
-    (
-        // `(f32, i32) -> f32`
-        Ty::F32,
-        Signature { args: &[Ty::F32, Ty::I32], returns: &[Ty::F32] },
-        None,
-        &["scalbnf", "ldexpf"],
-    ),
-    (
-        // `(f64, i64) -> f64`
-        Ty::F64,
-        Signature { args: &[Ty::F64, Ty::I32], returns: &[Ty::F64] },
-        None,
-        &["scalbn", "ldexp"],
-    ),
-    (
-        // `(f32, &mut f32) -> f32` as `(f32) -> (f32, f32)`
-        Ty::F32,
-        Signature { args: &[Ty::F32], returns: &[Ty::F32, Ty::F32] },
-        Some(Signature { args: &[Ty::F32, Ty::MutF32], returns: &[Ty::F32] }),
-        &["modff"],
-    ),
-    (
-        // `(f64, &mut f64) -> f64` as  `(f64) -> (f64, f64)`
-        Ty::F64,
-        Signature { args: &[Ty::F64], returns: &[Ty::F64, Ty::F64] },
-        Some(Signature { args: &[Ty::F64, Ty::MutF64], returns: &[Ty::F64] }),
-        &["modf"],
-    ),
-    (
-        // `(f32, &mut c_int) -> f32` as `(f32) -> (f32, i32)`
-        Ty::F32,
-        Signature { args: &[Ty::F32], returns: &[Ty::F32, Ty::I32] },
-        Some(Signature { args: &[Ty::F32, Ty::MutCInt], returns: &[Ty::F32] }),
-        &["frexpf", "lgammaf_r"],
-    ),
-    (
-        // `(f64, &mut c_int) -> f64` as `(f64) -> (f64, i32)`
-        Ty::F64,
-        Signature { args: &[Ty::F64], returns: &[Ty::F64, Ty::I32] },
-        Some(Signature { args: &[Ty::F64, Ty::MutCInt], returns: &[Ty::F64] }),
-        &["frexp", "lgamma_r"],
-    ),
-    (
-        // `(f32, f32, &mut c_int) -> f32` as `(f32, f32) -> (f32, i32)`
-        Ty::F32,
-        Signature { args: &[Ty::F32, Ty::F32], returns: &[Ty::F32, Ty::I32] },
-        Some(Signature { args: &[Ty::F32, Ty::F32, Ty::MutCInt], returns: &[Ty::F32] }),
-        &["remquof"],
-    ),
-    (
-        // `(f64, f64, &mut c_int) -> f64` as `(f64, f64) -> (f64, i32)`
-        Ty::F64,
-        Signature { args: &[Ty::F64, Ty::F64], returns: &[Ty::F64, Ty::I32] },
-        Some(Signature { args: &[Ty::F64, Ty::F64, Ty::MutCInt], returns: &[Ty::F64] }),
-        &["remquo"],
-    ),
-    (
-        // `(f32, &mut f32, &mut f32)` as `(f32) -> (f32, f32)`
-        Ty::F32,
-        Signature { args: &[Ty::F32], returns: &[Ty::F32, Ty::F32] },
-        Some(Signature { args: &[Ty::F32, Ty::MutF32, Ty::MutF32], returns: &[] }),
-        &["sincosf"],
-    ),
-    (
-        // `(f64, &mut f64, &mut f64)` as `(f64) -> (f64, f64)`
-        Ty::F64,
-        Signature { args: &[Ty::F64], returns: &[Ty::F64, Ty::F64] },
-        Some(Signature { args: &[Ty::F64, Ty::MutF64, Ty::MutF64], returns: &[] }),
-        &["sincos"],
-    ),
-];
-
 const KNOWN_TYPES: &[&str] = &["FTy", "CFn", "CArgs", "CRet", "RustFn", "RustArgs", "RustRet"];
 
-/// A type used in a function signature.
-#[allow(dead_code)]
-#[derive(Debug, Clone, Copy)]
-enum Ty {
-    F16,
-    F32,
-    F64,
-    F128,
-    I32,
-    CInt,
-    MutF16,
-    MutF32,
-    MutF64,
-    MutF128,
-    MutI32,
-    MutCInt,
-}
-
-impl ToTokens for Ty {
-    fn to_tokens(&self, tokens: &mut pm2::TokenStream) {
-        let ts = match self {
-            Ty::F16 => quote! { f16 },
-            Ty::F32 => quote! { f32 },
-            Ty::F64 => quote! { f64 },
-            Ty::F128 => quote! { f128 },
-            Ty::I32 => quote! { i32 },
-            Ty::CInt => quote! { ::core::ffi::c_int },
-            Ty::MutF16 => quote! { &'a mut f16 },
-            Ty::MutF32 => quote! { &'a mut f32 },
-            Ty::MutF64 => quote! { &'a mut f64 },
-            Ty::MutF128 => quote! { &'a mut f128 },
-            Ty::MutI32 => quote! { &'a mut i32 },
-            Ty::MutCInt => quote! { &'a mut core::ffi::c_int },
-        };
-
-        tokens.extend(ts);
-    }
-}
-
-/// Representation of e.g. `(f32, f32) -> f32`
-#[derive(Debug, Clone)]
-struct Signature {
-    args: &'static [Ty],
-    returns: &'static [Ty],
-}
-
-/// Combined information about a function implementation.
-#[derive(Debug, Clone)]
-struct FunctionInfo {
-    name: &'static str,
-    base_fty: Ty,
-    /// Function signature for C implementations
-    c_sig: Signature,
-    /// Function signature for Rust implementations
-    rust_sig: Signature,
-}
-
-/// A flat representation of `ALL_FUNCTIONS`.
-static ALL_FUNCTIONS_FLAT: LazyLock<Vec<FunctionInfo>> = LazyLock::new(|| {
-    let mut ret = Vec::new();
-
-    for (base_fty, rust_sig, c_sig, names) in ALL_FUNCTIONS {
-        for name in *names {
-            let api = FunctionInfo {
-                name,
-                base_fty: *base_fty,
-                rust_sig: rust_sig.clone(),
-                c_sig: c_sig.clone().unwrap_or_else(|| rust_sig.clone()),
-            };
-            ret.push(api);
-        }
-    }
-
-    ret.sort_by_key(|item| item.name);
-    ret
-});
-
 /// Populate an enum with a variant representing function. Names are in upper camel case.
 ///
 /// Applied to an empty enum. Expects one attribute `#[function_enum(BaseName)]` that provides
@@ -382,7 +130,7 @@ pub fn for_each_function(tokens: pm::TokenStream) -> pm::TokenStream {
 /// Check for any input that is structurally correct but has other problems.
 ///
 /// Returns the list of function names that we should expand for.
-fn validate(input: &mut StructuredInput) -> syn::Result<Vec<&'static FunctionInfo>> {
+fn validate(input: &mut StructuredInput) -> syn::Result<Vec<&'static MathOpInfo>> {
     // Collect lists of all functions that are provied as macro inputs in various fields (only,
     // skip, attributes).
     let attr_mentions = input
@@ -398,7 +146,7 @@ fn validate(input: &mut StructuredInput) -> syn::Result<Vec<&'static FunctionInf
 
     // Make sure that every function mentioned is a real function
     for mentioned in all_mentioned_fns {
-        if !ALL_FUNCTIONS_FLAT.iter().any(|func| mentioned == func.name) {
+        if !ALL_OPERATIONS.iter().any(|func| mentioned == func.name) {
             let e = syn::Error::new(
                 mentioned.span(),
                 format!("unrecognized function name `{mentioned}`"),
@@ -417,7 +165,7 @@ fn validate(input: &mut StructuredInput) -> syn::Result<Vec<&'static FunctionInf
 
     // Construct a list of what we intend to expand
     let mut fn_list = Vec::new();
-    for func in ALL_FUNCTIONS_FLAT.iter() {
+    for func in ALL_OPERATIONS.iter() {
         let fn_name = func.name;
         // If we have an `only` list and it does _not_ contain this function name, skip it
         if input.only.as_ref().is_some_and(|only| !only.iter().any(|o| o == fn_name)) {
@@ -498,7 +246,7 @@ fn validate(input: &mut StructuredInput) -> syn::Result<Vec<&'static FunctionInf
 }
 
 /// Expand our structured macro input into invocations of the callback macro.
-fn expand(input: StructuredInput, fn_list: &[&FunctionInfo]) -> syn::Result<pm2::TokenStream> {
+fn expand(input: StructuredInput, fn_list: &[&MathOpInfo]) -> syn::Result<pm2::TokenStream> {
     let mut out = pm2::TokenStream::new();
     let default_ident = Ident::new("_", Span::call_site());
     let callback = input.callback;
@@ -545,7 +293,7 @@ fn expand(input: StructuredInput, fn_list: &[&FunctionInfo]) -> syn::Result<pm2:
             None => pm2::TokenStream::new(),
         };
 
-        let base_fty = func.base_fty;
+        let base_fty = func.float_ty;
         let c_args = &func.c_sig.args;
         let c_ret = &func.c_sig.returns;
         let rust_args = &func.rust_sig.args;
@@ -648,3 +396,36 @@ fn base_name(name: &str) -> &str {
             .unwrap_or(name),
     }
 }
+
+impl ToTokens for Ty {
+    fn to_tokens(&self, tokens: &mut pm2::TokenStream) {
+        let ts = match self {
+            Ty::F16 => quote! { f16 },
+            Ty::F32 => quote! { f32 },
+            Ty::F64 => quote! { f64 },
+            Ty::F128 => quote! { f128 },
+            Ty::I32 => quote! { i32 },
+            Ty::CInt => quote! { ::core::ffi::c_int },
+            Ty::MutF16 => quote! { &'a mut f16 },
+            Ty::MutF32 => quote! { &'a mut f32 },
+            Ty::MutF64 => quote! { &'a mut f64 },
+            Ty::MutF128 => quote! { &'a mut f128 },
+            Ty::MutI32 => quote! { &'a mut i32 },
+            Ty::MutCInt => quote! { &'a mut core::ffi::c_int },
+        };
+
+        tokens.extend(ts);
+    }
+}
+impl ToTokens for FloatTy {
+    fn to_tokens(&self, tokens: &mut pm2::TokenStream) {
+        let ts = match self {
+            FloatTy::F16 => quote! { f16 },
+            FloatTy::F32 => quote! { f32 },
+            FloatTy::F64 => quote! { f64 },
+            FloatTy::F128 => quote! { f128 },
+        };
+
+        tokens.extend(ts);
+    }
+}
diff --git a/crates/libm-macros/src/shared.rs b/crates/libm-macros/src/shared.rs
new file mode 100644
index 000000000..100bcc7ad
--- /dev/null
+++ b/crates/libm-macros/src/shared.rs
@@ -0,0 +1,277 @@
+/* List of all functions that is shared between `libm-macros` and `libm-test`. */
+
+use std::fmt;
+use std::sync::LazyLock;
+
+const ALL_OPERATIONS_NESTED: &[(FloatTy, Signature, Option<Signature>, &[&str])] = &[
+    (
+        // `fn(f32) -> f32`
+        FloatTy::F32,
+        Signature { args: &[Ty::F32], returns: &[Ty::F32] },
+        None,
+        &[
+            "acosf", "acoshf", "asinf", "asinhf", "atanf", "atanhf", "cbrtf", "ceilf", "cosf",
+            "coshf", "erff", "exp10f", "exp2f", "expf", "expm1f", "fabsf", "floorf", "j0f", "j1f",
+            "lgammaf", "log10f", "log1pf", "log2f", "logf", "rintf", "roundf", "sinf", "sinhf",
+            "sqrtf", "tanf", "tanhf", "tgammaf", "truncf",
+        ],
+    ),
+    (
+        // `(f64) -> f64`
+        FloatTy::F64,
+        Signature { args: &[Ty::F64], returns: &[Ty::F64] },
+        None,
+        &[
+            "acos", "acosh", "asin", "asinh", "atan", "atanh", "cbrt", "ceil", "cos", "cosh",
+            "erf", "exp10", "exp2", "exp", "expm1", "fabs", "floor", "j0", "j1", "lgamma", "log10",
+            "log1p", "log2", "log", "rint", "round", "sin", "sinh", "sqrt", "tan", "tanh",
+            "tgamma", "trunc",
+        ],
+    ),
+    (
+        // `(f32, f32) -> f32`
+        FloatTy::F32,
+        Signature { args: &[Ty::F32, Ty::F32], returns: &[Ty::F32] },
+        None,
+        &[
+            "atan2f",
+            "copysignf",
+            "fdimf",
+            "fmaxf",
+            "fminf",
+            "fmodf",
+            "hypotf",
+            "nextafterf",
+            "powf",
+            "remainderf",
+        ],
+    ),
+    (
+        // `(f64, f64) -> f64`
+        FloatTy::F64,
+        Signature { args: &[Ty::F64, Ty::F64], returns: &[Ty::F64] },
+        None,
+        &[
+            "atan2",
+            "copysign",
+            "fdim",
+            "fmax",
+            "fmin",
+            "fmod",
+            "hypot",
+            "nextafter",
+            "pow",
+            "remainder",
+        ],
+    ),
+    (
+        // `(f32, f32, f32) -> f32`
+        FloatTy::F32,
+        Signature { args: &[Ty::F32, Ty::F32, Ty::F32], returns: &[Ty::F32] },
+        None,
+        &["fmaf"],
+    ),
+    (
+        // `(f64, f64, f64) -> f64`
+        FloatTy::F64,
+        Signature { args: &[Ty::F64, Ty::F64, Ty::F64], returns: &[Ty::F64] },
+        None,
+        &["fma"],
+    ),
+    (
+        // `(f32) -> i32`
+        FloatTy::F32,
+        Signature { args: &[Ty::F32], returns: &[Ty::I32] },
+        None,
+        &["ilogbf"],
+    ),
+    (
+        // `(f64) -> i32`
+        FloatTy::F64,
+        Signature { args: &[Ty::F64], returns: &[Ty::I32] },
+        None,
+        &["ilogb"],
+    ),
+    (
+        // `(i32, f32) -> f32`
+        FloatTy::F32,
+        Signature { args: &[Ty::I32, Ty::F32], returns: &[Ty::F32] },
+        None,
+        &["jnf"],
+    ),
+    (
+        // `(i32, f64) -> f64`
+        FloatTy::F64,
+        Signature { args: &[Ty::I32, Ty::F64], returns: &[Ty::F64] },
+        None,
+        &["jn"],
+    ),
+    (
+        // `(f32, i32) -> f32`
+        FloatTy::F32,
+        Signature { args: &[Ty::F32, Ty::I32], returns: &[Ty::F32] },
+        None,
+        &["scalbnf", "ldexpf"],
+    ),
+    (
+        // `(f64, i64) -> f64`
+        FloatTy::F64,
+        Signature { args: &[Ty::F64, Ty::I32], returns: &[Ty::F64] },
+        None,
+        &["scalbn", "ldexp"],
+    ),
+    (
+        // `(f32, &mut f32) -> f32` as `(f32) -> (f32, f32)`
+        FloatTy::F32,
+        Signature { args: &[Ty::F32], returns: &[Ty::F32, Ty::F32] },
+        Some(Signature { args: &[Ty::F32, Ty::MutF32], returns: &[Ty::F32] }),
+        &["modff"],
+    ),
+    (
+        // `(f64, &mut f64) -> f64` as  `(f64) -> (f64, f64)`
+        FloatTy::F64,
+        Signature { args: &[Ty::F64], returns: &[Ty::F64, Ty::F64] },
+        Some(Signature { args: &[Ty::F64, Ty::MutF64], returns: &[Ty::F64] }),
+        &["modf"],
+    ),
+    (
+        // `(f32, &mut c_int) -> f32` as `(f32) -> (f32, i32)`
+        FloatTy::F32,
+        Signature { args: &[Ty::F32], returns: &[Ty::F32, Ty::I32] },
+        Some(Signature { args: &[Ty::F32, Ty::MutCInt], returns: &[Ty::F32] }),
+        &["frexpf", "lgammaf_r"],
+    ),
+    (
+        // `(f64, &mut c_int) -> f64` as `(f64) -> (f64, i32)`
+        FloatTy::F64,
+        Signature { args: &[Ty::F64], returns: &[Ty::F64, Ty::I32] },
+        Some(Signature { args: &[Ty::F64, Ty::MutCInt], returns: &[Ty::F64] }),
+        &["frexp", "lgamma_r"],
+    ),
+    (
+        // `(f32, f32, &mut c_int) -> f32` as `(f32, f32) -> (f32, i32)`
+        FloatTy::F32,
+        Signature { args: &[Ty::F32, Ty::F32], returns: &[Ty::F32, Ty::I32] },
+        Some(Signature { args: &[Ty::F32, Ty::F32, Ty::MutCInt], returns: &[Ty::F32] }),
+        &["remquof"],
+    ),
+    (
+        // `(f64, f64, &mut c_int) -> f64` as `(f64, f64) -> (f64, i32)`
+        FloatTy::F64,
+        Signature { args: &[Ty::F64, Ty::F64], returns: &[Ty::F64, Ty::I32] },
+        Some(Signature { args: &[Ty::F64, Ty::F64, Ty::MutCInt], returns: &[Ty::F64] }),
+        &["remquo"],
+    ),
+    (
+        // `(f32, &mut f32, &mut f32)` as `(f32) -> (f32, f32)`
+        FloatTy::F32,
+        Signature { args: &[Ty::F32], returns: &[Ty::F32, Ty::F32] },
+        Some(Signature { args: &[Ty::F32, Ty::MutF32, Ty::MutF32], returns: &[] }),
+        &["sincosf"],
+    ),
+    (
+        // `(f64, &mut f64, &mut f64)` as `(f64) -> (f64, f64)`
+        FloatTy::F64,
+        Signature { args: &[Ty::F64], returns: &[Ty::F64, Ty::F64] },
+        Some(Signature { args: &[Ty::F64, Ty::MutF64, Ty::MutF64], returns: &[] }),
+        &["sincos"],
+    ),
+];
+
+/// A type used in a function signature.
+#[allow(dead_code)]
+#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
+pub enum Ty {
+    F16,
+    F32,
+    F64,
+    F128,
+    I32,
+    CInt,
+    MutF16,
+    MutF32,
+    MutF64,
+    MutF128,
+    MutI32,
+    MutCInt,
+}
+
+/// A subset of [`Ty`] representing only floats.
+#[allow(dead_code)]
+#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
+pub enum FloatTy {
+    F16,
+    F32,
+    F64,
+    F128,
+}
+
+impl fmt::Display for Ty {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        let s = match self {
+            Ty::F16 => "f16",
+            Ty::F32 => "f32",
+            Ty::F64 => "f64",
+            Ty::F128 => "f128",
+            Ty::I32 => "i32",
+            Ty::CInt => "::core::ffi::c_int",
+            Ty::MutF16 => "&mut f16",
+            Ty::MutF32 => "&mut f32",
+            Ty::MutF64 => "&mut f64",
+            Ty::MutF128 => "&mut f128",
+            Ty::MutI32 => "&mut i32",
+            Ty::MutCInt => "&mut ::core::ffi::c_int",
+        };
+        f.write_str(s)
+    }
+}
+
+impl fmt::Display for FloatTy {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        let s = match self {
+            FloatTy::F16 => "f16",
+            FloatTy::F32 => "f32",
+            FloatTy::F64 => "f64",
+            FloatTy::F128 => "f128",
+        };
+        f.write_str(s)
+    }
+}
+
+/// Representation of e.g. `(f32, f32) -> f32`
+#[derive(Debug, Clone)]
+pub struct Signature {
+    pub args: &'static [Ty],
+    pub returns: &'static [Ty],
+}
+
+/// Combined information about a function implementation.
+#[derive(Debug, Clone)]
+pub struct MathOpInfo {
+    pub name: &'static str,
+    pub float_ty: FloatTy,
+    /// Function signature for C implementations
+    pub c_sig: Signature,
+    /// Function signature for Rust implementations
+    pub rust_sig: Signature,
+}
+
+/// A flat representation of `ALL_FUNCTIONS`.
+pub static ALL_OPERATIONS: LazyLock<Vec<MathOpInfo>> = LazyLock::new(|| {
+    let mut ret = Vec::new();
+
+    for (base_fty, rust_sig, c_sig, names) in ALL_OPERATIONS_NESTED {
+        for name in *names {
+            let api = MathOpInfo {
+                name,
+                float_ty: *base_fty,
+                rust_sig: rust_sig.clone(),
+                c_sig: c_sig.clone().unwrap_or_else(|| rust_sig.clone()),
+            };
+            ret.push(api);
+        }
+    }
+
+    ret.sort_by_key(|item| item.name);
+    ret
+});

From dc15fdd9f0b380e1cdfbfadb3c36a93e322fb910 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Sun, 22 Dec 2024 11:41:43 +0000
Subject: [PATCH 062/279] Include `shared.rs` in `libm_test::op`

These types from `libm-macros` provide a way to get information about an
operation at runtime, rather than only being encoded in the type system.
Include the file and reexport relevant types.
---
 crates/libm-test/src/lib.rs | 2 +-
 crates/libm-test/src/op.rs  | 8 ++++++++
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/crates/libm-test/src/lib.rs b/crates/libm-test/src/lib.rs
index 622b2dec9..e3a690678 100644
--- a/crates/libm-test/src/lib.rs
+++ b/crates/libm-test/src/lib.rs
@@ -13,7 +13,7 @@ mod test_traits;
 pub use f8_impl::f8;
 pub use libm::support::{Float, Int, IntTy, MinInt};
 pub use num::{FloatExt, logspace};
-pub use op::{BaseName, Identifier, MathOp, OpCFn, OpFTy, OpRustFn, OpRustRet};
+pub use op::{BaseName, FloatTy, Identifier, MathOp, OpCFn, OpFTy, OpRustFn, OpRustRet, Ty};
 pub use precision::{MaybeOverride, SpecialCase, default_ulp};
 pub use test_traits::{CheckBasis, CheckCtx, CheckOutput, GenerateInput, Hex, TupleCall};
 
diff --git a/crates/libm-test/src/op.rs b/crates/libm-test/src/op.rs
index bcea31c22..a2f21d3c1 100644
--- a/crates/libm-test/src/op.rs
+++ b/crates/libm-test/src/op.rs
@@ -13,8 +13,16 @@
 //! - "Operation" / "Op": Something that relates a routine to a function or is otherwise higher
 //!   level. `Op` is also used as the name for generic parameters since it is terse.
 
+use std::fmt;
+
+pub use shared::{ALL_OPERATIONS, FloatTy, MathOpInfo, Ty};
+
 use crate::{CheckOutput, Float, TupleCall};
 
+mod shared {
+    include!("../../libm-macros/src/shared.rs");
+}
+
 /// An enum representing each possible symbol name (`sin`, `sinf`, `sinl`, etc).
 #[libm_macros::function_enum(BaseName)]
 #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]

From 6e101af42c8aabcfe859767c611206192b3c62a5 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Sun, 29 Dec 2024 08:06:53 +0000
Subject: [PATCH 063/279] Add new trait implementations for `Identifier` and
 `BaseName`

These allow for more convenient printing, as well as storage in map
types.
---
 crates/libm-test/src/op.rs | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/crates/libm-test/src/op.rs b/crates/libm-test/src/op.rs
index a2f21d3c1..e58c28903 100644
--- a/crates/libm-test/src/op.rs
+++ b/crates/libm-test/src/op.rs
@@ -25,14 +25,26 @@ mod shared {
 
 /// An enum representing each possible symbol name (`sin`, `sinf`, `sinl`, etc).
 #[libm_macros::function_enum(BaseName)]
-#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
+#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
 pub enum Identifier {}
 
+impl fmt::Display for Identifier {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.write_str(self.as_str())
+    }
+}
+
 /// The name without any type specifier, e.g. `sin` and `sinf` both become `sin`.
 #[libm_macros::base_name_enum]
-#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
+#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
 pub enum BaseName {}
 
+impl fmt::Display for BaseName {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.write_str(self.as_str())
+    }
+}
+
 /// Attributes ascribed to a `libm` routine including signature, type information,
 /// and naming.
 pub trait MathOp {

From a210d4a6864bffe9fde6b7f8abafb8fd03847b93 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Sun, 22 Dec 2024 11:26:22 +0000
Subject: [PATCH 064/279] Add `ALL`, `from_str` and `math_op` to `Identifier`

Introduce new API to iterate the function list and associate items with
their `MathOp`.
---
 crates/libm-macros/src/enums.rs  | 23 +++++++++++++++++++++++
 crates/libm-macros/tests/enum.rs | 29 ++++++++++++++++++++++++-----
 2 files changed, 47 insertions(+), 5 deletions(-)

diff --git a/crates/libm-macros/src/enums.rs b/crates/libm-macros/src/enums.rs
index 82dedc66e..864b625ea 100644
--- a/crates/libm-macros/src/enums.rs
+++ b/crates/libm-macros/src/enums.rs
@@ -31,6 +31,7 @@ pub fn function_enum(
 
     let enum_name = &item.ident;
     let mut as_str_arms = Vec::new();
+    let mut from_str_arms = Vec::new();
     let mut base_arms = Vec::new();
 
     for func in ALL_OPERATIONS.iter() {
@@ -40,6 +41,7 @@ pub fn function_enum(
 
         // Match arm for `fn as_str(self)` matcher
         as_str_arms.push(quote! { Self::#ident => #fn_name });
+        from_str_arms.push(quote! { #fn_name => Self::#ident });
 
         // Match arm for `fn base_name(self)` matcher
         base_arms.push(quote! { Self::#ident => #base_enum::#bname_ident });
@@ -50,11 +52,18 @@ pub fn function_enum(
         item.variants.push(variant);
     }
 
+    let variants = item.variants.iter();
+
     let res = quote! {
         // Instantiate the enum
         #item
 
         impl #enum_name {
+            /// All variants of this enum.
+            pub const ALL: &[Self] = &[
+                #( Self::#variants, )*
+            ];
+
             /// The stringified version of this function name.
             pub const fn as_str(self) -> &'static str {
                 match self {
@@ -62,12 +71,26 @@ pub fn function_enum(
                 }
             }
 
+            /// If `s` is the name of a function, return it.
+            pub fn from_str(s: &str) -> Option<Self> {
+                let ret = match s {
+                    #( #from_str_arms , )*
+                    _ => return None,
+                };
+                Some(ret)
+            }
+
             /// The base name enum for this function.
             pub const fn base_name(self) -> #base_enum {
                 match self {
                     #( #base_arms, )*
                 }
             }
+
+            /// Return information about this operation.
+            pub fn math_op(self) -> &'static crate::op::MathOpInfo {
+                crate::op::ALL_OPERATIONS.iter().find(|op| op.name == self.as_str()).unwrap()
+            }
         }
     };
 
diff --git a/crates/libm-macros/tests/enum.rs b/crates/libm-macros/tests/enum.rs
index 884b8d8d6..93e209a0d 100644
--- a/crates/libm-macros/tests/enum.rs
+++ b/crates/libm-macros/tests/enum.rs
@@ -1,6 +1,6 @@
 #[libm_macros::function_enum(BaseName)]
 #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
-pub enum Function {}
+pub enum Identifier {}
 
 #[libm_macros::base_name_enum]
 #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
@@ -8,12 +8,31 @@ pub enum BaseName {}
 
 #[test]
 fn as_str() {
-    assert_eq!(Function::Sin.as_str(), "sin");
-    assert_eq!(Function::Sinf.as_str(), "sinf");
+    assert_eq!(Identifier::Sin.as_str(), "sin");
+    assert_eq!(Identifier::Sinf.as_str(), "sinf");
+}
+
+#[test]
+fn from_str() {
+    assert_eq!(Identifier::from_str("sin").unwrap(), Identifier::Sin);
+    assert_eq!(Identifier::from_str("sinf").unwrap(), Identifier::Sinf);
 }
 
 #[test]
 fn basename() {
-    assert_eq!(Function::Sin.base_name(), BaseName::Sin);
-    assert_eq!(Function::Sinf.base_name(), BaseName::Sin);
+    assert_eq!(Identifier::Sin.base_name(), BaseName::Sin);
+    assert_eq!(Identifier::Sinf.base_name(), BaseName::Sin);
 }
+
+#[test]
+fn math_op() {
+    assert_eq!(Identifier::Sin.math_op().float_ty, FloatTy::F64);
+    assert_eq!(Identifier::Sinf.math_op().float_ty, FloatTy::F32);
+}
+
+// Replicate the structure that we have in `libm-test`
+mod op {
+    include!("../../libm-macros/src/shared.rs");
+}
+
+use op::FloatTy;

From 1d1d7de2c236fcbee9c36b1cac5ca9cf4abfd584 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Thu, 26 Dec 2024 07:43:56 +0000
Subject: [PATCH 065/279] Move `CheckBasis` and `CheckCtx` to a new `run_cfg`
 module

These are used more places than just test traits, so this new module
should be a better home. `run_cfg` will also be expanded in the near
future.
---
 crates/libm-test/src/lib.rs         |  4 ++-
 crates/libm-test/src/run_cfg.rs     | 51 +++++++++++++++++++++++++++++
 crates/libm-test/src/test_traits.rs | 39 +---------------------
 3 files changed, 55 insertions(+), 39 deletions(-)
 create mode 100644 crates/libm-test/src/run_cfg.rs

diff --git a/crates/libm-test/src/lib.rs b/crates/libm-test/src/lib.rs
index e3a690678..eb457b0ae 100644
--- a/crates/libm-test/src/lib.rs
+++ b/crates/libm-test/src/lib.rs
@@ -8,6 +8,7 @@ pub mod mpfloat;
 mod num;
 pub mod op;
 mod precision;
+mod run_cfg;
 mod test_traits;
 
 pub use f8_impl::f8;
@@ -15,7 +16,8 @@ pub use libm::support::{Float, Int, IntTy, MinInt};
 pub use num::{FloatExt, logspace};
 pub use op::{BaseName, FloatTy, Identifier, MathOp, OpCFn, OpFTy, OpRustFn, OpRustRet, Ty};
 pub use precision::{MaybeOverride, SpecialCase, default_ulp};
-pub use test_traits::{CheckBasis, CheckCtx, CheckOutput, GenerateInput, Hex, TupleCall};
+pub use run_cfg::{CheckBasis, CheckCtx};
+pub use test_traits::{CheckOutput, GenerateInput, Hex, TupleCall};
 
 /// Result type for tests is usually from `anyhow`. Most times there is no success value to
 /// propagate.
diff --git a/crates/libm-test/src/run_cfg.rs b/crates/libm-test/src/run_cfg.rs
new file mode 100644
index 000000000..eb7e0e2c1
--- /dev/null
+++ b/crates/libm-test/src/run_cfg.rs
@@ -0,0 +1,51 @@
+//! Configuration for how tests get run.
+
+#![allow(unused)]
+
+use std::collections::BTreeMap;
+use std::env;
+use std::sync::LazyLock;
+
+use crate::{BaseName, FloatTy, Identifier, op};
+
+pub const EXTENSIVE_ENV: &str = "LIBM_EXTENSIVE_TESTS";
+
+/// Context passed to [`CheckOutput`].
+#[derive(Clone, Debug, PartialEq, Eq)]
+pub struct CheckCtx {
+    /// Allowed ULP deviation
+    pub ulp: u32,
+    pub fn_ident: Identifier,
+    pub base_name: BaseName,
+    /// Function name.
+    pub fn_name: &'static str,
+    /// Return the unsuffixed version of the function name.
+    pub base_name_str: &'static str,
+    /// Source of truth for tests.
+    pub basis: CheckBasis,
+}
+
+impl CheckCtx {
+    /// Create a new check context, using the default ULP for the function.
+    pub fn new(fn_ident: Identifier, basis: CheckBasis) -> Self {
+        let mut ret = Self {
+            ulp: 0,
+            fn_ident,
+            fn_name: fn_ident.as_str(),
+            base_name: fn_ident.base_name(),
+            base_name_str: fn_ident.base_name().as_str(),
+            basis,
+        };
+        ret.ulp = crate::default_ulp(&ret);
+        ret
+    }
+}
+
+/// Possible items to test against
+#[derive(Clone, Debug, PartialEq, Eq)]
+pub enum CheckBasis {
+    /// Check against Musl's math sources.
+    Musl,
+    /// Check against infinite precision (MPFR).
+    Mpfr,
+}
diff --git a/crates/libm-test/src/test_traits.rs b/crates/libm-test/src/test_traits.rs
index ca933bbda..6b833dfb5 100644
--- a/crates/libm-test/src/test_traits.rs
+++ b/crates/libm-test/src/test_traits.rs
@@ -11,44 +11,7 @@ use std::fmt;
 
 use anyhow::{Context, bail, ensure};
 
-use crate::{BaseName, Float, Identifier, Int, MaybeOverride, SpecialCase, TestResult};
-
-/// Context passed to [`CheckOutput`].
-#[derive(Clone, Debug, PartialEq, Eq)]
-pub struct CheckCtx {
-    /// Allowed ULP deviation
-    pub ulp: u32,
-    pub fn_ident: Identifier,
-    pub base_name: BaseName,
-    /// Function name.
-    pub fn_name: &'static str,
-    /// Source of truth for tests.
-    pub basis: CheckBasis,
-}
-
-impl CheckCtx {
-    /// Create a new check context, using the default ULP for the function.
-    pub fn new(fn_ident: Identifier, basis: CheckBasis) -> Self {
-        let mut ret = Self {
-            ulp: 0,
-            fn_ident,
-            fn_name: fn_ident.as_str(),
-            base_name: fn_ident.base_name(),
-            basis,
-        };
-        ret.ulp = crate::default_ulp(&ret);
-        ret
-    }
-}
-
-/// Possible items to test against
-#[derive(Clone, Debug, PartialEq, Eq)]
-pub enum CheckBasis {
-    /// Check against Musl's math sources.
-    Musl,
-    /// Check against infinite precision (MPFR).
-    Mpfr,
-}
+use crate::{CheckCtx, Float, Int, MaybeOverride, SpecialCase, TestResult};
 
 /// Implement this on types that can generate a sequence of tuples for test input.
 pub trait GenerateInput<TupleArgs> {

From c0d9cc3fdfcaec2d6b2eb58c8c6048fa3992379d Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Thu, 26 Dec 2024 07:42:13 +0000
Subject: [PATCH 066/279] Use `CheckCtx` in more places

Rather than passing names or identifiers, just pass `CheckCtx` in a few
more places.
---
 crates/libm-test/examples/plot_domains.rs   | 14 ++++++++------
 crates/libm-test/src/gen/domain_logspace.rs |  4 ++--
 crates/libm-test/src/gen/edge_cases.rs      |  4 ++--
 crates/libm-test/tests/multiprecision.rs    |  8 +++++---
 4 files changed, 17 insertions(+), 13 deletions(-)

diff --git a/crates/libm-test/examples/plot_domains.rs b/crates/libm-test/examples/plot_domains.rs
index 630a0c233..626511245 100644
--- a/crates/libm-test/examples/plot_domains.rs
+++ b/crates/libm-test/examples/plot_domains.rs
@@ -14,7 +14,7 @@ use std::{env, fs};
 
 use libm_test::domain::HasDomain;
 use libm_test::gen::{domain_logspace, edge_cases};
-use libm_test::{MathOp, op};
+use libm_test::{CheckBasis, CheckCtx, MathOp, op};
 
 const JL_PLOT: &str = "examples/plot_file.jl";
 
@@ -54,30 +54,32 @@ fn plot_one_operator<Op>(out_dir: &Path, config: &mut String)
 where
     Op: MathOp<FTy = f32> + HasDomain<f32>,
 {
+    let ctx = CheckCtx::new(Op::IDENTIFIER, CheckBasis::Mpfr);
     plot_one_generator(
         out_dir,
-        Op::BASE_NAME.as_str(),
+        &ctx,
         "logspace",
         config,
-        domain_logspace::get_test_cases::<Op>(),
+        domain_logspace::get_test_cases::<Op>(&ctx),
     );
     plot_one_generator(
         out_dir,
-        Op::BASE_NAME.as_str(),
+        &ctx,
         "edge_cases",
         config,
-        edge_cases::get_test_cases::<Op, _>(),
+        edge_cases::get_test_cases::<Op, _>(&ctx),
     );
 }
 
 /// Plot the output of a single generator.
 fn plot_one_generator(
     out_dir: &Path,
-    fn_name: &str,
+    ctx: &CheckCtx,
     gen_name: &str,
     config: &mut String,
     gen: impl Iterator<Item = (f32,)>,
 ) {
+    let fn_name = ctx.base_name_str;
     let text_file = out_dir.join(format!("input-{fn_name}-{gen_name}.txt"));
 
     let f = fs::File::create(&text_file).unwrap();
diff --git a/crates/libm-test/src/gen/domain_logspace.rs b/crates/libm-test/src/gen/domain_logspace.rs
index e8cdb9d2b..3e69bee34 100644
--- a/crates/libm-test/src/gen/domain_logspace.rs
+++ b/crates/libm-test/src/gen/domain_logspace.rs
@@ -4,7 +4,7 @@ use libm::support::{IntTy, MinInt};
 
 use crate::domain::HasDomain;
 use crate::op::OpITy;
-use crate::{MathOp, logspace};
+use crate::{CheckCtx, MathOp, logspace};
 
 /// Number of tests to run.
 // FIXME(ntests): replace this with a more logical algorithm
@@ -30,7 +30,7 @@ const NTESTS: usize = {
 ///
 /// This allows us to get reasonably thorough coverage without wasting time on values that are
 /// NaN or out of range. Random tests will still cover values that are excluded here.
-pub fn get_test_cases<Op>() -> impl Iterator<Item = (Op::FTy,)>
+pub fn get_test_cases<Op>(_ctx: &CheckCtx) -> impl Iterator<Item = (Op::FTy,)>
 where
     Op: MathOp + HasDomain<Op::FTy>,
     IntTy<Op::FTy>: TryFrom<usize>,
diff --git a/crates/libm-test/src/gen/edge_cases.rs b/crates/libm-test/src/gen/edge_cases.rs
index 625e18bc7..3387f6c48 100644
--- a/crates/libm-test/src/gen/edge_cases.rs
+++ b/crates/libm-test/src/gen/edge_cases.rs
@@ -3,7 +3,7 @@
 use libm::support::Float;
 
 use crate::domain::HasDomain;
-use crate::{FloatExt, MathOp};
+use crate::{CheckCtx, FloatExt, MathOp};
 
 /// Number of values near an interesting point to check.
 // FIXME(ntests): replace this with a more logical algorithm
@@ -14,7 +14,7 @@ const AROUND: usize = 100;
 const MAX_CHECK_POINTS: usize = 10;
 
 /// Create a list of values around interesting points (infinities, zeroes, NaNs).
-pub fn get_test_cases<Op, F>() -> impl Iterator<Item = (F,)>
+pub fn get_test_cases<Op, F>(_ctx: &CheckCtx) -> impl Iterator<Item = (F,)>
 where
     Op: MathOp<FTy = F> + HasDomain<F>,
     F: Float,
diff --git a/crates/libm-test/tests/multiprecision.rs b/crates/libm-test/tests/multiprecision.rs
index 5255dc1cf..2675ca018 100644
--- a/crates/libm-test/tests/multiprecision.rs
+++ b/crates/libm-test/tests/multiprecision.rs
@@ -83,21 +83,21 @@ macro_rules! mp_domain_tests {
             $(#[$meta])*
             fn [< mp_edge_case_ $fn_name >]() {
                 type Op = libm_test::op::$fn_name::Routine;
-                domain_test_runner::<Op>(edge_cases::get_test_cases::<Op, _>());
+                domain_test_runner::<Op, _>(edge_cases::get_test_cases::<Op, _>);
             }
 
             #[test]
             $(#[$meta])*
             fn [< mp_logspace_ $fn_name >]() {
                 type Op = libm_test::op::$fn_name::Routine;
-                domain_test_runner::<Op>(domain_logspace::get_test_cases::<Op>());
+                domain_test_runner::<Op, _>(domain_logspace::get_test_cases::<Op>);
             }
         }
     };
 }
 
 /// Test a single routine against domaine-aware inputs.
-fn domain_test_runner<Op>(cases: impl Iterator<Item = (Op::FTy,)>)
+fn domain_test_runner<Op, I>(gen: impl FnOnce(&CheckCtx) -> I)
 where
     // Complicated generics...
     // The operation must take a single float argument (unary only)
@@ -108,9 +108,11 @@ where
     Op: HasDomain<Op::FTy>,
     // The single float argument tuple must be able to call the `RustFn` and return `RustRet`
     (OpFTy<Op>,): TupleCall<OpRustFn<Op>, Output = OpRustRet<Op>>,
+    I: Iterator<Item = (Op::FTy,)>,
 {
     let mut mp_vals = Op::new_mp();
     let ctx = CheckCtx::new(Op::IDENTIFIER, CheckBasis::Mpfr);
+    let cases = gen(&ctx);
 
     for input in cases {
         let mp_res = Op::run(&mut mp_vals, input);

From c0e0827aad15bbdf60b4be9a6cca8eb4919c4d26 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Sun, 29 Dec 2024 09:04:18 +0000
Subject: [PATCH 067/279] Don't run `push` CI on anything other than `master`

---
 .github/workflows/main.yml | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 93cd541f8..83875f368 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -1,5 +1,9 @@
 name: CI
-on: [push, pull_request]
+on:
+  push:
+    branches:
+      - master
+  pull_request:
 
 env:
   CARGO_TERM_VERBOSE: true

From 186f864c0eb3c4274e0926dc0529470f08aea2c0 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Mon, 30 Dec 2024 01:53:29 +0000
Subject: [PATCH 068/279] Set the allowed FMA ULP to 0

It is currently getting the default of 1 or 2. Since this operation
should always be infinite precision, no deviation is allowed.
---
 crates/libm-test/src/precision.rs | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/crates/libm-test/src/precision.rs b/crates/libm-test/src/precision.rs
index b878212fa..058d01c6e 100644
--- a/crates/libm-test/src/precision.rs
+++ b/crates/libm-test/src/precision.rs
@@ -24,6 +24,8 @@ const MP_DEFAULT_ULP: u32 = 1;
 pub fn default_ulp(ctx: &CheckCtx) -> u32 {
     match (&ctx.basis, ctx.fn_ident) {
         // Overrides that apply to either basis
+        // FMA is expected to be infinite precision.
+        (_, Id::Fma | Id::Fmaf) => 0,
         (_, Id::J0 | Id::J0f | Id::J1 | Id::J1f) => {
             // Results seem very target-dependent
             if cfg!(target_arch = "x86_64") { 4000 } else { 800_000 }

From 766e21c288831a0caaa1993fdb0b3826cb9bbae7 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Mon, 30 Dec 2024 05:55:41 +0000
Subject: [PATCH 069/279] Remove lossy casting in `logspace`

Currently `logspace` does a lossy cast from `F::Int` to `usize`. This
could be problematic in the rare cases that this is called with a step
count exceeding what is representable in `usize`.

Resolve this by instead adding bounds so the float's integer type itself
can be iterated.
---
 crates/libm-test/src/gen/domain_logspace.rs |  3 +++
 crates/libm-test/src/num.rs                 | 10 +++++++---
 2 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/crates/libm-test/src/gen/domain_logspace.rs b/crates/libm-test/src/gen/domain_logspace.rs
index 3e69bee34..5e37170fa 100644
--- a/crates/libm-test/src/gen/domain_logspace.rs
+++ b/crates/libm-test/src/gen/domain_logspace.rs
@@ -1,5 +1,7 @@
 //! A generator that produces logarithmically spaced values within domain bounds.
 
+use std::ops::RangeInclusive;
+
 use libm::support::{IntTy, MinInt};
 
 use crate::domain::HasDomain;
@@ -34,6 +36,7 @@ pub fn get_test_cases<Op>(_ctx: &CheckCtx) -> impl Iterator<Item = (Op::FTy,)>
 where
     Op: MathOp + HasDomain<Op::FTy>,
     IntTy<Op::FTy>: TryFrom<usize>,
+    RangeInclusive<IntTy<Op::FTy>>: Iterator,
 {
     let domain = Op::DOMAIN;
     let start = domain.range_start();
diff --git a/crates/libm-test/src/num.rs b/crates/libm-test/src/num.rs
index 4aa7f61b0..eff2fbc1f 100644
--- a/crates/libm-test/src/num.rs
+++ b/crates/libm-test/src/num.rs
@@ -1,8 +1,9 @@
 //! Helpful numeric operations.
 
 use std::cmp::min;
+use std::ops::RangeInclusive;
 
-use libm::support::{CastInto, Float};
+use libm::support::Float;
 
 use crate::{Int, MinInt};
 
@@ -214,7 +215,10 @@ fn as_ulp_steps<F: Float>(x: F) -> Option<F::SignedInt> {
 /// to logarithmic spacing of their values.
 ///
 /// Note that this tends to skip negative zero, so that needs to be checked explicitly.
-pub fn logspace<F: FloatExt>(start: F, end: F, steps: F::Int) -> impl Iterator<Item = F> {
+pub fn logspace<F: FloatExt>(start: F, end: F, steps: F::Int) -> impl Iterator<Item = F>
+where
+    RangeInclusive<F::Int>: Iterator,
+{
     assert!(!start.is_nan());
     assert!(!end.is_nan());
     assert!(end >= start);
@@ -225,7 +229,7 @@ pub fn logspace<F: FloatExt>(start: F, end: F, steps: F::Int) -> impl Iterator<I
     steps = steps.min(between); // At maximum, one step per ULP
 
     let mut x = start;
-    (0..=steps.cast()).map(move |_| {
+    (F::Int::ZERO..=steps).map(move |_| {
         let ret = x;
         x = x.n_up(spacing);
         ret

From 37859022dd1339a2d6cab6927d1fb67b3653e005 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Sun, 22 Dec 2024 10:14:56 +0000
Subject: [PATCH 070/279] Forward the `CI` environment variable when running in
 Docker

We want to be able to adjust our configuration based on whether we are
running in CI, propagate this so our tests can use it.
---
 ci/run-docker.sh | 1 +
 1 file changed, 1 insertion(+)

diff --git a/ci/run-docker.sh b/ci/run-docker.sh
index a040126df..d9f29656d 100755
--- a/ci/run-docker.sh
+++ b/ci/run-docker.sh
@@ -28,6 +28,7 @@ run() {
     docker run \
         --rm \
         --user "$(id -u):$(id -g)" \
+        -e CI \
         -e RUSTFLAGS \
         -e CARGO_HOME=/cargo \
         -e CARGO_TARGET_DIR=/target \

From 0840d59d3a543095f89e923b0ac8bbd92f3607ae Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Tue, 31 Dec 2024 22:56:36 +0000
Subject: [PATCH 071/279] Use `rustdoc` output to create a list of public API

Rather than collecting a list of file names in `libm-test/build.rs`,
just use a script to parse rustdoc's JSON output.
---
 .github/workflows/main.yml               |   4 +
 crates/libm-test/build.rs                |  34 -------
 crates/libm-test/src/lib.rs              |  12 ++-
 crates/libm-test/tests/check_coverage.rs |  70 +++++++-------
 etc/function-list.txt                    | 115 ++++++++++++++++++++++
 etc/update-api-list.py                   | 117 +++++++++++++++++++++++
 6 files changed, 283 insertions(+), 69 deletions(-)
 create mode 100644 etc/function-list.txt
 create mode 100755 etc/update-api-list.py

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 83875f368..0f5becf73 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -96,6 +96,10 @@ jobs:
       run: ./ci/download-musl.sh
       shell: bash
 
+    - name: Verify API list
+      if: matrix.os == 'ubuntu-24.04'
+      run: python3 etc/update-api-list.py --check
+
     # Non-linux tests just use our raw script
     - name: Run locally
       if: matrix.os != 'ubuntu-24.04' || contains(matrix.target, 'wasm')
diff --git a/crates/libm-test/build.rs b/crates/libm-test/build.rs
index f2cd298ba..134fb11ce 100644
--- a/crates/libm-test/build.rs
+++ b/crates/libm-test/build.rs
@@ -1,42 +1,8 @@
-use std::fmt::Write;
-use std::fs;
-
 #[path = "../../configure.rs"]
 mod configure;
 use configure::Config;
 
 fn main() {
     let cfg = Config::from_env();
-
-    list_all_tests(&cfg);
-
     configure::emit_test_config(&cfg);
 }
-
-/// Create a list of all source files in an array. This can be used for making sure that
-/// all functions are tested or otherwise covered in some way.
-// FIXME: it would probably be better to use rustdoc JSON output to get public functions.
-fn list_all_tests(cfg: &Config) {
-    let math_src = cfg.manifest_dir.join("../../src/math");
-
-    let mut files = fs::read_dir(math_src)
-        .unwrap()
-        .map(|f| f.unwrap().path())
-        .filter(|entry| entry.is_file())
-        .map(|f| f.file_stem().unwrap().to_str().unwrap().to_owned())
-        .collect::<Vec<_>>();
-    files.sort();
-
-    let mut s = "pub const ALL_FUNCTIONS: &[&str] = &[".to_owned();
-    for f in files {
-        if f == "mod" {
-            // skip mod.rs
-            continue;
-        }
-        write!(s, "\"{f}\",").unwrap();
-    }
-    write!(s, "];").unwrap();
-
-    let outfile = cfg.out_dir.join("all_files.rs");
-    fs::write(outfile, s).unwrap();
-}
diff --git a/crates/libm-test/src/lib.rs b/crates/libm-test/src/lib.rs
index eb457b0ae..fdba0357f 100644
--- a/crates/libm-test/src/lib.rs
+++ b/crates/libm-test/src/lib.rs
@@ -23,9 +23,6 @@ pub use test_traits::{CheckOutput, GenerateInput, Hex, TupleCall};
 /// propagate.
 pub type TestResult<T = (), E = anyhow::Error> = Result<T, E>;
 
-// List of all files present in libm's source
-include!(concat!(env!("OUT_DIR"), "/all_files.rs"));
-
 /// True if `EMULATED` is set and nonempty. Used to determine how many iterations to run.
 pub const fn emulated() -> bool {
     match option_env!("EMULATED") {
@@ -34,3 +31,12 @@ pub const fn emulated() -> bool {
         Some(_) => true,
     }
 }
+
+/// True if `CI` is set and nonempty.
+pub const fn ci() -> bool {
+    match option_env!("CI") {
+        Some(s) if s.is_empty() => false,
+        None => false,
+        Some(_) => true,
+    }
+}
diff --git a/crates/libm-test/tests/check_coverage.rs b/crates/libm-test/tests/check_coverage.rs
index b7988660e..9f85d6424 100644
--- a/crates/libm-test/tests/check_coverage.rs
+++ b/crates/libm-test/tests/check_coverage.rs
@@ -1,54 +1,60 @@
 //! Ensure that `for_each_function!` isn't missing any symbols.
 
-/// Files in `src/` that do not export a testable symbol.
-const ALLOWED_SKIPS: &[&str] = &[
-    // Not a generic test function
-    "fenv",
-    // Nonpublic functions
-    "expo2",
-    "k_cos",
-    "k_cosf",
-    "k_expo2",
-    "k_expo2f",
-    "k_sin",
-    "k_sinf",
-    "k_tan",
-    "k_tanf",
-    "rem_pio2",
-    "rem_pio2_large",
-    "rem_pio2f",
-];
+use std::collections::HashSet;
+use std::env;
+use std::path::Path;
+use std::process::Command;
 
 macro_rules! callback {
     (
         fn_name: $name:ident,
-        extra: [$push_to:ident],
+        extra: [$set:ident],
     ) => {
-        $push_to.push(stringify!($name));
+        let name = stringify!($name);
+        let new = $set.insert(name);
+        assert!(new, "duplicate function `{name}` in `ALL_OPERATIONS`");
     };
 }
 
 #[test]
 fn test_for_each_function_all_included() {
-    let mut included = Vec::new();
-    let mut missing = Vec::new();
+    let all_functions: HashSet<_> = include_str!("../../../etc/function-list.txt")
+        .lines()
+        .filter(|line| !line.starts_with("#"))
+        .collect();
+
+    let mut tested = HashSet::new();
 
     libm_macros::for_each_function! {
         callback: callback,
-        extra: [included],
+        extra: [tested],
     };
 
-    for f in libm_test::ALL_FUNCTIONS {
-        if !included.contains(f) && !ALLOWED_SKIPS.contains(f) {
-            missing.push(f)
-        }
-    }
-
-    if !missing.is_empty() {
+    let untested = all_functions.difference(&tested);
+    if untested.clone().next().is_some() {
         panic!(
-            "missing tests for the following: {missing:#?} \
+            "missing tests for the following: {untested:#?} \
             \nmake sure any new functions are entered in \
-            `ALL_FUNCTIONS` (in `libm-macros`)."
+            `ALL_OPERATIONS` (in `libm-macros`)."
         );
     }
+    assert_eq!(all_functions, tested);
+}
+
+#[test]
+fn ensure_list_updated() {
+    if libm_test::ci() {
+        // Most CI tests run in Docker where we don't have Python or Rustdoc, so it's easiest
+        // to just run the python file directly when it is available.
+        eprintln!("skipping test; CI runs the python file directly");
+        return;
+    }
+
+    let res = Command::new("python3")
+        .arg(Path::new(env!("CARGO_MANIFEST_DIR")).join("../../etc/update-api-list.py"))
+        .arg("--check")
+        .status()
+        .unwrap();
+
+    assert!(res.success(), "May need to run `./etc/update-api-list.py`");
 }
diff --git a/etc/function-list.txt b/etc/function-list.txt
new file mode 100644
index 000000000..51f5b221c
--- /dev/null
+++ b/etc/function-list.txt
@@ -0,0 +1,115 @@
+# autogenerated by update-api-list.py
+acos
+acosf
+acosh
+acoshf
+asin
+asinf
+asinh
+asinhf
+atan
+atan2
+atan2f
+atanf
+atanh
+atanhf
+cbrt
+cbrtf
+ceil
+ceilf
+copysign
+copysignf
+cos
+cosf
+cosh
+coshf
+erf
+erfc
+erfcf
+erff
+exp
+exp10
+exp10f
+exp2
+exp2f
+expf
+expm1
+expm1f
+fabs
+fabsf
+fdim
+fdimf
+floor
+floorf
+fma
+fmaf
+fmax
+fmaxf
+fmin
+fminf
+fmod
+fmodf
+frexp
+frexpf
+hypot
+hypotf
+ilogb
+ilogbf
+j0
+j0f
+j1
+j1f
+jn
+jnf
+ldexp
+ldexpf
+lgamma
+lgamma_r
+lgammaf
+lgammaf_r
+log
+log10
+log10f
+log1p
+log1pf
+log2
+log2f
+logf
+modf
+modff
+nextafter
+nextafterf
+pow
+powf
+remainder
+remainderf
+remquo
+remquof
+rint
+rintf
+round
+roundf
+scalbn
+scalbnf
+sin
+sincos
+sincosf
+sinf
+sinh
+sinhf
+sqrt
+sqrtf
+tan
+tanf
+tanh
+tanhf
+tgamma
+tgammaf
+trunc
+truncf
+y0
+y0f
+y1
+y1f
+yn
+ynf
diff --git a/etc/update-api-list.py b/etc/update-api-list.py
new file mode 100755
index 000000000..7284a628c
--- /dev/null
+++ b/etc/update-api-list.py
@@ -0,0 +1,117 @@
+#!/usr/bin/env python3
+"""Create a text file listing all public API. This can be used to ensure that all
+functions are covered by our macros.
+"""
+
+import json
+import subprocess as sp
+import sys
+import difflib
+from pathlib import Path
+from typing import Any
+
+ETC_DIR = Path(__file__).parent
+
+
+def get_rustdoc_json() -> dict[Any, Any]:
+    """Get rustdoc's JSON output for the `libm` crate."""
+
+    librs_path = ETC_DIR.joinpath("../src/lib.rs")
+    j = sp.check_output(
+        [
+            "rustdoc",
+            librs_path,
+            "--edition=2021",
+            "--output-format=json",
+            "-Zunstable-options",
+            "-o-",
+        ],
+        text=True,
+    )
+    j = json.loads(j)
+    return j
+
+
+def list_public_functions() -> list[str]:
+    """Get a list of public functions from rustdoc JSON output.
+
+    Note that this only finds functions that are reexported in `lib.rs`, this will
+    need to be adjusted if we need to account for functions that are defined there.
+    """
+    names = []
+    index: dict[str, dict[str, Any]] = get_rustdoc_json()["index"]
+    for item in index.values():
+        # Find public items
+        if item["visibility"] != "public":
+            continue
+
+        # Find only reexports
+        if "use" not in item["inner"].keys():
+            continue
+
+        # Locate the item that is reexported
+        id = item["inner"]["use"]["id"]
+        srcitem = index.get(str(id))
+
+        # External crate
+        if srcitem is None:
+            continue
+
+        # Skip if not a function
+        if "function" not in srcitem["inner"].keys():
+            continue
+
+        names.append(srcitem["name"])
+
+    names.sort()
+    return names
+
+
+def diff_and_exit(actual: str, expected: str):
+    """If the two strings are different, print a diff between them and then exit
+    with an error.
+    """
+    if actual == expected:
+        print("output matches expected; success")
+        return
+
+    a = [f"{line}\n" for line in actual.splitlines()]
+    b = [f"{line}\n" for line in expected.splitlines()]
+
+    diff = difflib.unified_diff(a, b, "actual", "expected")
+    sys.stdout.writelines(diff)
+    print("mismatched function list")
+    exit(1)
+
+
+def main():
+    """By default overwrite the file. If `--check` is passed, print a diff instead and
+    error if the files are different.
+    """
+    match sys.argv:
+        case [_]:
+            check = False
+        case [_, "--check"]:
+            check = True
+        case _:
+            print("unrecognized arguments")
+            exit(1)
+
+    names = list_public_functions()
+    output = "# autogenerated by update-api-list.py\n"
+    for name in names:
+        output += f"{name}\n"
+
+    out_file = ETC_DIR.joinpath("function-list.txt")
+
+    if check:
+        with open(out_file, "r") as f:
+            current = f.read()
+        diff_and_exit(current, output)
+    else:
+        with open(out_file, "w") as f:
+            f.write(output)
+
+
+if __name__ == "__main__":
+    main()

From 780519d753c517c27144cfabc48cd9f00246c2a6 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Tue, 31 Dec 2024 22:57:21 +0000
Subject: [PATCH 072/279] Add missing functions to the macro list

Now that we are using rustdoc output to locate public functions, the
test is indicating a few that were missed since they don't have their
own function. Update everything to now include the following routines:

* `erfc`
* `erfcf`
* `y0`
* `y0f`
* `y1`
* `y1f`
* `yn`
* `ynf`
---
 crates/libm-macros/src/shared.rs         | 16 ++++++++--------
 crates/libm-test/src/domain.rs           |  3 +++
 crates/libm-test/src/gen/random.rs       |  6 +++++-
 crates/libm-test/src/mpfloat.rs          | 17 ++++++++++++++++-
 crates/libm-test/src/precision.rs        | 14 ++++++--------
 crates/libm-test/tests/multiprecision.rs |  4 +++-
 crates/musl-math-sys/src/lib.rs          |  1 +
 7 files changed, 42 insertions(+), 19 deletions(-)

diff --git a/crates/libm-macros/src/shared.rs b/crates/libm-macros/src/shared.rs
index 100bcc7ad..ef0f18801 100644
--- a/crates/libm-macros/src/shared.rs
+++ b/crates/libm-macros/src/shared.rs
@@ -11,9 +11,9 @@ const ALL_OPERATIONS_NESTED: &[(FloatTy, Signature, Option<Signature>, &[&str])]
         None,
         &[
             "acosf", "acoshf", "asinf", "asinhf", "atanf", "atanhf", "cbrtf", "ceilf", "cosf",
-            "coshf", "erff", "exp10f", "exp2f", "expf", "expm1f", "fabsf", "floorf", "j0f", "j1f",
-            "lgammaf", "log10f", "log1pf", "log2f", "logf", "rintf", "roundf", "sinf", "sinhf",
-            "sqrtf", "tanf", "tanhf", "tgammaf", "truncf",
+            "coshf", "erff", "erfcf", "exp10f", "exp2f", "expf", "expm1f", "fabsf", "floorf",
+            "j0f", "j1f", "lgammaf", "log10f", "log1pf", "log2f", "logf", "rintf", "roundf",
+            "sinf", "sinhf", "sqrtf", "tanf", "tanhf", "tgammaf", "truncf", "y0f", "y1f",
         ],
     ),
     (
@@ -23,9 +23,9 @@ const ALL_OPERATIONS_NESTED: &[(FloatTy, Signature, Option<Signature>, &[&str])]
         None,
         &[
             "acos", "acosh", "asin", "asinh", "atan", "atanh", "cbrt", "ceil", "cos", "cosh",
-            "erf", "exp10", "exp2", "exp", "expm1", "fabs", "floor", "j0", "j1", "lgamma", "log10",
-            "log1p", "log2", "log", "rint", "round", "sin", "sinh", "sqrt", "tan", "tanh",
-            "tgamma", "trunc",
+            "erf", "erfc", "exp10", "exp2", "exp", "expm1", "fabs", "floor", "j0", "j1", "lgamma",
+            "log10", "log1p", "log2", "log", "rint", "round", "sin", "sinh", "sqrt", "tan", "tanh",
+            "tgamma", "trunc", "y0", "y1",
         ],
     ),
     (
@@ -97,14 +97,14 @@ const ALL_OPERATIONS_NESTED: &[(FloatTy, Signature, Option<Signature>, &[&str])]
         FloatTy::F32,
         Signature { args: &[Ty::I32, Ty::F32], returns: &[Ty::F32] },
         None,
-        &["jnf"],
+        &["jnf", "ynf"],
     ),
     (
         // `(i32, f64) -> f64`
         FloatTy::F64,
         Signature { args: &[Ty::I32, Ty::F64], returns: &[Ty::F64] },
         None,
-        &["jn"],
+        &["jn", "yn"],
     ),
     (
         // `(f32, i32) -> f32`
diff --git a/crates/libm-test/src/domain.rs b/crates/libm-test/src/domain.rs
index 9ee8a19b9..7b5a01b96 100644
--- a/crates/libm-test/src/domain.rs
+++ b/crates/libm-test/src/domain.rs
@@ -147,6 +147,7 @@ impl_has_domain! {
     cos => TRIG;
     cosh => UNBOUNDED;
     erf => UNBOUNDED;
+    erfc => UNBOUNDED;
     exp => UNBOUNDED;
     exp10 => UNBOUNDED;
     exp2 => UNBOUNDED;
@@ -173,6 +174,8 @@ impl_has_domain! {
     tanh => UNBOUNDED;
     tgamma => GAMMA;
     trunc => UNBOUNDED;
+    y0 => UNBOUNDED;
+    y1 => UNBOUNDED;
 }
 
 /* Manual implementations, these functions don't follow `foo`->`foof` naming */
diff --git a/crates/libm-test/src/gen/random.rs b/crates/libm-test/src/gen/random.rs
index 527cd1351..4f75da07b 100644
--- a/crates/libm-test/src/gen/random.rs
+++ b/crates/libm-test/src/gen/random.rs
@@ -110,6 +110,10 @@ pub fn get_test_cases<RustArgs>(ctx: &CheckCtx) -> impl Iterator<Item = RustArgs
 where
     CachedInput: GenerateInput<RustArgs>,
 {
-    let inputs = if ctx.base_name == BaseName::Jn { &TEST_CASES_JN } else { &TEST_CASES };
+    let inputs = if ctx.base_name == BaseName::Jn || ctx.base_name == BaseName::Yn {
+        &TEST_CASES_JN
+    } else {
+        &TEST_CASES
+    };
     inputs.get_cases()
 }
diff --git a/crates/libm-test/src/mpfloat.rs b/crates/libm-test/src/mpfloat.rs
index 507b077b3..28df916bd 100644
--- a/crates/libm-test/src/mpfloat.rs
+++ b/crates/libm-test/src/mpfloat.rs
@@ -130,7 +130,7 @@ libm_macros::for_each_function! {
         fabsf, ceilf, copysignf, floorf, rintf, roundf, truncf,
         fmod, fmodf, frexp, frexpf, ilogb, ilogbf, jn, jnf, ldexp, ldexpf,
         lgamma_r, lgammaf_r, modf, modff, nextafter, nextafterf, pow,powf,
-        remquo, remquof, scalbn, scalbnf, sincos, sincosf,
+        remquo, remquof, scalbn, scalbnf, sincos, sincosf, yn, ynf,
     ],
     fn_extra: match MACRO_FN_NAME {
         // Remap function names that are different between mpfr and libm
@@ -266,6 +266,21 @@ macro_rules! impl_op_for_ty {
                     )
                 }
             }
+
+            impl MpOp for crate::op::[<yn $suffix>]::Routine {
+                type MpTy = (i32, MpFloat);
+
+                fn new_mp() -> Self::MpTy {
+                    (0, new_mpfloat::<Self::FTy>())
+                }
+
+                fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet {
+                    this.0 = input.0;
+                    this.1.assign(input.1);
+                    let ord = this.1.yn_round(this.0, Nearest);
+                    prep_retval::<Self::FTy>(&mut this.1, ord)
+                }
+            }
         }
     };
 }
diff --git a/crates/libm-test/src/precision.rs b/crates/libm-test/src/precision.rs
index 058d01c6e..6d4561c43 100644
--- a/crates/libm-test/src/precision.rs
+++ b/crates/libm-test/src/precision.rs
@@ -26,11 +26,9 @@ pub fn default_ulp(ctx: &CheckCtx) -> u32 {
         // Overrides that apply to either basis
         // FMA is expected to be infinite precision.
         (_, Id::Fma | Id::Fmaf) => 0,
-        (_, Id::J0 | Id::J0f | Id::J1 | Id::J1f) => {
-            // Results seem very target-dependent
-            if cfg!(target_arch = "x86_64") { 4000 } else { 800_000 }
-        }
-        (_, Id::Jn | Id::Jnf) => 1000,
+        (_, Id::J0 | Id::J0f | Id::J1 | Id::J1f | Id::Y0 | Id::Y0f | Id::Y1 | Id::Y1f) => 800_000,
+        (_, Id::Jn | Id::Jnf | Id::Yn | Id::Ynf) => 1000,
+        (_, Id::Erfc | Id::Erfcf) => 4,
 
         // Overrides for musl
         #[cfg(x86_no_sse)]
@@ -297,7 +295,7 @@ impl MaybeOverride<(i32, f32)> for SpecialCase {
             (Musl, _) => bessel_prec_dropoff(input, ulp, ctx),
 
             // We return +0.0, MPFR returns -0.0
-            (Mpfr, BaseName::Jn)
+            (Mpfr, BaseName::Jn | BaseName::Yn)
                 if input.1 == f32::NEG_INFINITY && actual == F::ZERO && expected == F::ZERO =>
             {
                 XFAIL
@@ -319,7 +317,7 @@ impl MaybeOverride<(i32, f64)> for SpecialCase {
             (Musl, _) => bessel_prec_dropoff(input, ulp, ctx),
 
             // We return +0.0, MPFR returns -0.0
-            (Mpfr, BaseName::Jn)
+            (Mpfr, BaseName::Jn | BaseName::Yn)
                 if input.1 == f64::NEG_INFINITY && actual == F::ZERO && expected == F::ZERO =>
             {
                 XFAIL
@@ -336,7 +334,7 @@ fn bessel_prec_dropoff<F: Float>(
     ulp: &mut u32,
     ctx: &CheckCtx,
 ) -> Option<TestResult> {
-    if ctx.base_name == BaseName::Jn {
+    if ctx.base_name == BaseName::Jn || ctx.base_name == BaseName::Yn {
         if input.0 > 4000 {
             return XFAIL;
         } else if input.0 > 2000 {
diff --git a/crates/libm-test/tests/multiprecision.rs b/crates/libm-test/tests/multiprecision.rs
index 2675ca018..4821f7446 100644
--- a/crates/libm-test/tests/multiprecision.rs
+++ b/crates/libm-test/tests/multiprecision.rs
@@ -48,7 +48,7 @@ libm_macros::for_each_function! {
     attributes: [
         // Also an assertion failure on i686: at `MPFR_ASSERTN (! mpfr_erangeflag_p ())`
         #[ignore = "large values are infeasible in MPFR"]
-        [jn, jnf],
+        [jn, jnf, yn, ynf],
     ],
     skip: [
         // FIXME: MPFR tests needed
@@ -157,6 +157,8 @@ libm_macros::for_each_function! {
         remquof,
         scalbn,
         scalbnf,
+        yn,
+        ynf,
 
         // FIXME: MPFR tests needed
         frexp,
diff --git a/crates/musl-math-sys/src/lib.rs b/crates/musl-math-sys/src/lib.rs
index db352fab8..07277ef3e 100644
--- a/crates/musl-math-sys/src/lib.rs
+++ b/crates/musl-math-sys/src/lib.rs
@@ -282,5 +282,6 @@ functions! {
     musl_y0f: y0f(a: f32) -> f32;
     musl_y1: y1(a: f64) -> f64;
     musl_y1f: y1f(a: f32) -> f32;
+    musl_yn: yn(a: c_int, b: f64) -> f64;
     musl_ynf: ynf(a: c_int, b: f32) -> f32;
 }

From 0cb855b1de6f2282fddae2e01e1ed97b923a64e1 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Thu, 2 Jan 2025 21:23:28 +0000
Subject: [PATCH 073/279] macros: Always emit `f16_enabled` and `f128_enabled`
 attributes

Once we start addinf `f16` and `f128` routines, we will need to have
this cfg for almost all uses of `for_each_function`. Rather than needing
to specify this each time, always emit `#[cfg(f16_enabled)]` or
`#[cfg(f128_enabled)]` for each function that uses `f16` or `f128`,
respectively.
---
 crates/libm-macros/src/lib.rs                | 37 ++++++++++++++------
 crates/libm-macros/tests/basic.rs            | 10 ++++--
 crates/libm-test/benches/random.rs           |  4 +++
 crates/libm-test/src/lib.rs                  |  2 ++
 crates/libm-test/src/mpfloat.rs              |  6 ++++
 crates/libm-test/src/op.rs                   |  3 ++
 crates/libm-test/tests/check_coverage.rs     |  1 +
 crates/libm-test/tests/compare_built_musl.rs |  4 +--
 crates/libm-test/tests/multiprecision.rs     | 10 +++---
 9 files changed, 57 insertions(+), 20 deletions(-)

diff --git a/crates/libm-macros/src/lib.rs b/crates/libm-macros/src/lib.rs
index 916b539ed..3cee5385b 100644
--- a/crates/libm-macros/src/lib.rs
+++ b/crates/libm-macros/src/lib.rs
@@ -79,7 +79,7 @@ pub fn base_name_enum(attributes: pm::TokenStream, tokens: pm::TokenStream) -> p
 ///         // The Rust version's return type (e.g. `(f32, f32)`)
 ///         RustRet: $RustRet:ty,
 ///         // Attributes for the current function, if any
-///         attrs: [$($meta:meta)*]
+///         attrs: [$($attr:meta),*],
 ///         // Extra tokens passed directly (if any)
 ///         extra: [$extra:ident],
 ///         // Extra function-tokens passed directly (if any)
@@ -97,6 +97,9 @@ pub fn base_name_enum(attributes: pm::TokenStream, tokens: pm::TokenStream) -> p
 ///     skip: [sin, cos],
 ///     // Attributes passed as `attrs` for specific functions. For example, here the invocation
 ///     // with `sinf` and that with `cosf` will both get `meta1` and `meta2`, but no others will.
+///     //
+///     // Note that `f16_enabled` and `f128_enabled` will always get emitted regardless of whether
+///     // or not this is specified.
 ///     attributes: [
 ///         #[meta1]
 ///         #[meta2]
@@ -255,16 +258,28 @@ fn expand(input: StructuredInput, fn_list: &[&MathOpInfo]) -> syn::Result<pm2::T
         let fn_name = Ident::new(func.name, Span::call_site());
 
         // Prepare attributes in an `attrs: ...` field
-        let meta_field = match &input.attributes {
-            Some(attrs) => {
-                let meta = attrs
-                    .iter()
-                    .filter(|map| map.names.contains(&fn_name))
-                    .flat_map(|map| &map.meta);
-                quote! { attrs: [ #( #meta )* ]  }
-            }
-            None => pm2::TokenStream::new(),
-        };
+        let mut meta_fields = Vec::new();
+        if let Some(attrs) = &input.attributes {
+            let meta_iter = attrs
+                .iter()
+                .filter(|map| map.names.contains(&fn_name))
+                .flat_map(|map| &map.meta)
+                .map(|v| v.into_token_stream());
+
+            meta_fields.extend(meta_iter);
+        }
+
+        // Always emit f16 and f128 meta so this doesn't need to be repeated everywhere
+        if func.rust_sig.args.contains(&Ty::F16) || func.rust_sig.returns.contains(&Ty::F16) {
+            let ts = quote! { cfg(f16_enabled) };
+            meta_fields.push(ts);
+        }
+        if func.rust_sig.args.contains(&Ty::F128) || func.rust_sig.returns.contains(&Ty::F128) {
+            let ts = quote! { cfg(f128_enabled) };
+            meta_fields.push(ts);
+        }
+
+        let meta_field = quote! { attrs: [ #( #meta_fields ),* ], };
 
         // Prepare extra in an `extra: ...` field, running the replacer
         let extra_field = match input.extra.clone() {
diff --git a/crates/libm-macros/tests/basic.rs b/crates/libm-macros/tests/basic.rs
index 2eaba04f4..0aa417f13 100644
--- a/crates/libm-macros/tests/basic.rs
+++ b/crates/libm-macros/tests/basic.rs
@@ -1,3 +1,5 @@
+#![feature(f16)]
+#![feature(f128)]
 // `STATUS_DLL_NOT_FOUND` on i686 MinGW, not worth looking into.
 #![cfg(not(all(target_arch = "x86", target_os = "windows", target_env = "gnu")))]
 
@@ -11,11 +13,11 @@ macro_rules! basic {
         RustFn: $RustFn:ty,
         RustArgs: $RustArgs:ty,
         RustRet: $RustRet:ty,
-        attrs: [$($meta:meta)*]
+        attrs: [$($attr:meta),*],
         extra: [$($extra_tt:tt)*],
         fn_extra: $fn_extra:expr,
     ) => {
-        $(#[$meta])*
+        $(#[$attr])*
         mod $fn_name {
             #[allow(unused)]
             type FTy= $FTy;
@@ -60,7 +62,9 @@ mod test_basic {
 macro_rules! basic_no_extra {
     (
         fn_name: $fn_name:ident,
+        attrs: [$($attr:meta),*],
     ) => {
+        $(#[$attr])*
         mod $fn_name {}
     };
 }
@@ -85,7 +89,9 @@ macro_rules! specified_types {
         fn_name: $fn_name:ident,
         RustFn: $RustFn:ty,
         RustArgs: $RustArgs:ty,
+        attrs: [$($attr:meta),*],
     ) => {
+        $(#[$attr])*
         mod $fn_name {
             #[allow(unused)]
             type RustFnTy = $RustFn;
diff --git a/crates/libm-test/benches/random.rs b/crates/libm-test/benches/random.rs
index b9c39334c..06997cd36 100644
--- a/crates/libm-test/benches/random.rs
+++ b/crates/libm-test/benches/random.rs
@@ -18,9 +18,11 @@ struct MuslExtra<F> {
 macro_rules! musl_rand_benches {
     (
         fn_name: $fn_name:ident,
+        attrs: [$($attr:meta),*],
         fn_extra: $skip_on_i586:expr,
     ) => {
         paste::paste! {
+            $(#[$attr])*
             fn [< musl_bench_ $fn_name >](c: &mut Criterion) {
                 type Op = libm_test::op::$fn_name::Routine;
 
@@ -113,9 +115,11 @@ libm_macros::for_each_function! {
 macro_rules! run_callback {
     (
         fn_name: $fn_name:ident,
+        attrs: [$($attr:meta),*],
         extra: [$criterion:ident],
     ) => {
         paste::paste! {
+            $(#[$attr])*
             [< musl_bench_ $fn_name >](&mut $criterion)
         }
     };
diff --git a/crates/libm-test/src/lib.rs b/crates/libm-test/src/lib.rs
index fdba0357f..97907b2a1 100644
--- a/crates/libm-test/src/lib.rs
+++ b/crates/libm-test/src/lib.rs
@@ -1,3 +1,5 @@
+#![cfg_attr(f16_enabled, feature(f16))]
+#![cfg_attr(f128_enabled, feature(f128))]
 #![allow(clippy::unusual_byte_groupings)] // sometimes we group by sign_exp_sig
 
 pub mod domain;
diff --git a/crates/libm-test/src/mpfloat.rs b/crates/libm-test/src/mpfloat.rs
index 28df916bd..edb4cb962 100644
--- a/crates/libm-test/src/mpfloat.rs
+++ b/crates/libm-test/src/mpfloat.rs
@@ -50,9 +50,11 @@ macro_rules! impl_mp_op {
     (
         fn_name: $fn_name:ident,
         RustFn: fn($_fty:ty,) -> $_ret:ty,
+        attrs: [$($attr:meta),*],
         fn_extra: $fn_name_normalized:expr,
     ) => {
         paste::paste! {
+            $(#[$attr])*
             impl MpOp for crate::op::$fn_name::Routine {
                 type MpTy = MpFloat;
 
@@ -72,9 +74,11 @@ macro_rules! impl_mp_op {
     (
         fn_name: $fn_name:ident,
         RustFn: fn($_fty:ty, $_fty2:ty,) -> $_ret:ty,
+        attrs: [$($attr:meta),*],
         fn_extra: $fn_name_normalized:expr,
     ) => {
         paste::paste! {
+            $(#[$attr])*
             impl MpOp for crate::op::$fn_name::Routine {
                 type MpTy = (MpFloat, MpFloat);
 
@@ -95,9 +99,11 @@ macro_rules! impl_mp_op {
     (
         fn_name: $fn_name:ident,
         RustFn: fn($_fty:ty, $_fty2:ty, $_fty3:ty,) -> $_ret:ty,
+        attrs: [$($attr:meta),*],
         fn_extra: $fn_name_normalized:expr,
     ) => {
         paste::paste! {
+            $(#[$attr])*
             impl MpOp for crate::op::$fn_name::Routine {
                 type MpTy = (MpFloat, MpFloat, MpFloat);
 
diff --git a/crates/libm-test/src/op.rs b/crates/libm-test/src/op.rs
index e58c28903..ee61eb0b8 100644
--- a/crates/libm-test/src/op.rs
+++ b/crates/libm-test/src/op.rs
@@ -112,8 +112,11 @@ macro_rules! do_thing {
         RustFn: $RustFn:ty,
         RustArgs: $RustArgs:ty,
         RustRet: $RustRet:ty,
+        attrs: [$($attr:meta),*],
+
     ) => {
         paste::paste! {
+            $(#[$attr])*
             pub mod $fn_name {
                 use super::*;
                 pub struct Routine;
diff --git a/crates/libm-test/tests/check_coverage.rs b/crates/libm-test/tests/check_coverage.rs
index 9f85d6424..c23298686 100644
--- a/crates/libm-test/tests/check_coverage.rs
+++ b/crates/libm-test/tests/check_coverage.rs
@@ -8,6 +8,7 @@ use std::process::Command;
 macro_rules! callback {
     (
         fn_name: $name:ident,
+        attrs: [$($attr:meta),*],
         extra: [$set:ident],
     ) => {
         let name = stringify!($name);
diff --git a/crates/libm-test/tests/compare_built_musl.rs b/crates/libm-test/tests/compare_built_musl.rs
index 0022ee03c..71f080ab1 100644
--- a/crates/libm-test/tests/compare_built_musl.rs
+++ b/crates/libm-test/tests/compare_built_musl.rs
@@ -15,11 +15,11 @@ use libm_test::{CheckBasis, CheckCtx, CheckOutput, GenerateInput, MathOp, TupleC
 macro_rules! musl_rand_tests {
     (
         fn_name: $fn_name:ident,
-        attrs: [$($meta:meta)*]
+        attrs: [$($attr:meta),*],
     ) => {
         paste::paste! {
             #[test]
-            $(#[$meta])*
+            $(#[$attr])*
             fn [< musl_random_ $fn_name >]() {
                 test_one::<libm_test::op::$fn_name::Routine>(musl_math_sys::$fn_name);
             }
diff --git a/crates/libm-test/tests/multiprecision.rs b/crates/libm-test/tests/multiprecision.rs
index 4821f7446..71ff2ff96 100644
--- a/crates/libm-test/tests/multiprecision.rs
+++ b/crates/libm-test/tests/multiprecision.rs
@@ -13,11 +13,11 @@ use libm_test::{
 macro_rules! mp_rand_tests {
     (
         fn_name: $fn_name:ident,
-        attrs: [$($meta:meta)*]
+        attrs: [$($attr:meta),*],
     ) => {
         paste::paste! {
             #[test]
-            $(#[$meta])*
+            $(#[$attr])*
             fn [< mp_random_ $fn_name >]() {
                 test_one_random::<libm_test::op::$fn_name::Routine>();
             }
@@ -76,18 +76,18 @@ libm_macros::for_each_function! {
 macro_rules! mp_domain_tests {
     (
         fn_name: $fn_name:ident,
-        attrs: [$($meta:meta)*]
+        attrs: [$($attr:meta),*],
     ) => {
         paste::paste! {
             #[test]
-            $(#[$meta])*
+            $(#[$attr])*
             fn [< mp_edge_case_ $fn_name >]() {
                 type Op = libm_test::op::$fn_name::Routine;
                 domain_test_runner::<Op, _>(edge_cases::get_test_cases::<Op, _>);
             }
 
             #[test]
-            $(#[$meta])*
+            $(#[$attr])*
             fn [< mp_logspace_ $fn_name >]() {
                 type Op = libm_test::op::$fn_name::Routine;
                 domain_test_runner::<Op, _>(domain_logspace::get_test_cases::<Op>);

From 110c6f75e1a1c21d3b0e002baeb36457c4010bdb Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Thu, 2 Jan 2025 10:19:54 +0000
Subject: [PATCH 074/279] Rename generic `abs` to `fabs`

Using the same name as the routines themselves means this will correctly
get picked up by the CI job looking for exhaustive tests.
---
 src/math/fabs.rs                     | 2 +-
 src/math/fabsf.rs                    | 2 +-
 src/math/generic/{abs.rs => fabs.rs} | 2 +-
 src/math/generic/mod.rs              | 4 ++--
 4 files changed, 5 insertions(+), 5 deletions(-)
 rename src/math/generic/{abs.rs => fabs.rs} (63%)

diff --git a/src/math/fabs.rs b/src/math/fabs.rs
index 2163637e7..6687fdcc3 100644
--- a/src/math/fabs.rs
+++ b/src/math/fabs.rs
@@ -9,7 +9,7 @@ pub fn fabs(x: f64) -> f64 {
         args: x,
     }
 
-    super::generic::abs(x)
+    super::generic::fabs(x)
 }
 
 #[cfg(test)]
diff --git a/src/math/fabsf.rs b/src/math/fabsf.rs
index ac77c9201..99bb5b5f1 100644
--- a/src/math/fabsf.rs
+++ b/src/math/fabsf.rs
@@ -9,7 +9,7 @@ pub fn fabsf(x: f32) -> f32 {
         args: x,
     }
 
-    super::generic::abs(x)
+    super::generic::fabs(x)
 }
 
 // PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520
diff --git a/src/math/generic/abs.rs b/src/math/generic/fabs.rs
similarity index 63%
rename from src/math/generic/abs.rs
rename to src/math/generic/fabs.rs
index 2c9a43c12..f2c7f0f46 100644
--- a/src/math/generic/abs.rs
+++ b/src/math/generic/fabs.rs
@@ -1,6 +1,6 @@
 use super::super::Float;
 
 /// Absolute value.
-pub fn abs<F: Float>(x: F) -> F {
+pub fn fabs<F: Float>(x: F) -> F {
     x.abs()
 }
diff --git a/src/math/generic/mod.rs b/src/math/generic/mod.rs
index 1ddd08f0e..08524b685 100644
--- a/src/math/generic/mod.rs
+++ b/src/math/generic/mod.rs
@@ -1,5 +1,5 @@
-mod abs;
 mod copysign;
+mod fabs;
 
-pub use abs::abs;
 pub use copysign::copysign;
+pub use fabs::fabs;

From 6f6bd11810ccfef9ea11821563122c4f10160760 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Fri, 3 Jan 2025 12:04:45 +0000
Subject: [PATCH 075/279] Use intrinsics for `abs` and `copysign` when
 available

Currently our implementations for `abs` and `copysign` are defined on
the trait, and these are then called from `generic`. It would be better
to call core's `.abs()` / `.copysign(y)`, but we can't do this in the
generic because calling the standalone function could become recursive
(`fabsf` becomes `intrinsics::fabsf32`, that may lower to a call to
`fabsf`).

Change this so the traits uses the call to `core` if available, falling
back to a call to the standalone generic function.

In practice the recursion isn't likely to be a problem since LLVM
probably always lowers `abs`/`copysign` to assembly, but this pattern
should be more correct for functions that we will add in the future
(e.g. `fma`).

This should eventually be followed by a change to call the trait methods
rather than `fabs`/`copysign` directly.
---
 crates/libm-test/src/f8_impl.rs  |  8 +++++++
 src/math/generic/copysign.rs     |  2 +-
 src/math/generic/fabs.rs         |  3 ++-
 src/math/mod.rs                  |  9 +++++++-
 src/math/support/float_traits.rs | 37 +++++++++++++++++++++-----------
 5 files changed, 43 insertions(+), 16 deletions(-)

diff --git a/crates/libm-test/src/f8_impl.rs b/crates/libm-test/src/f8_impl.rs
index babcc6357..d378863f2 100644
--- a/crates/libm-test/src/f8_impl.rs
+++ b/crates/libm-test/src/f8_impl.rs
@@ -70,6 +70,14 @@ impl Float for f8 {
         Self(a)
     }
 
+    fn abs(self) -> Self {
+        libm::generic::fabs(self)
+    }
+
+    fn copysign(self, other: Self) -> Self {
+        libm::generic::copysign(self, other)
+    }
+
     fn normalize(_significand: Self::Int) -> (i32, Self::Int) {
         unimplemented!()
     }
diff --git a/src/math/generic/copysign.rs b/src/math/generic/copysign.rs
index d6b814891..04864a359 100644
--- a/src/math/generic/copysign.rs
+++ b/src/math/generic/copysign.rs
@@ -5,6 +5,6 @@ pub fn copysign<F: Float>(x: F, y: F) -> F {
     let mut ux = x.to_bits();
     let uy = y.to_bits();
     ux &= !F::SIGN_MASK;
-    ux |= uy & (F::SIGN_MASK);
+    ux |= uy & F::SIGN_MASK;
     F::from_bits(ux)
 }
diff --git a/src/math/generic/fabs.rs b/src/math/generic/fabs.rs
index f2c7f0f46..75b473107 100644
--- a/src/math/generic/fabs.rs
+++ b/src/math/generic/fabs.rs
@@ -2,5 +2,6 @@ use super::super::Float;
 
 /// Absolute value.
 pub fn fabs<F: Float>(x: F) -> F {
-    x.abs()
+    let abs_mask = !F::SIGN_MASK;
+    F::from_bits(x.to_bits() & abs_mask)
 }
diff --git a/src/math/mod.rs b/src/math/mod.rs
index ba1995228..e7b21de67 100644
--- a/src/math/mod.rs
+++ b/src/math/mod.rs
@@ -83,11 +83,18 @@ pub mod support;
 #[cfg(not(feature = "unstable-test-support"))]
 mod support;
 
+cfg_if! {
+    if #[cfg(feature = "unstable-test-support")] {
+        pub mod generic;
+    } else {
+        mod generic;
+    }
+}
+
 // Private modules
 mod arch;
 mod expo2;
 mod fenv;
-mod generic;
 mod k_cos;
 mod k_cosf;
 mod k_expo2;
diff --git a/src/math/support/float_traits.rs b/src/math/support/float_traits.rs
index e64640a0d..3b5be4fa3 100644
--- a/src/math/support/float_traits.rs
+++ b/src/math/support/float_traits.rs
@@ -123,23 +123,14 @@ pub trait Float:
         )
     }
 
-    fn abs(self) -> Self {
-        let abs_mask = !Self::SIGN_MASK;
-        Self::from_bits(self.to_bits() & abs_mask)
-    }
+    fn abs(self) -> Self;
+
+    /// Returns a number composed of the magnitude of self and the sign of sign.
+    fn copysign(self, other: Self) -> Self;
 
     /// Returns (normalized exponent, normalized significand)
     fn normalize(significand: Self::Int) -> (i32, Self::Int);
 
-    /// Returns a number composed of the magnitude of self and the sign of sign.
-    fn copysign(self, other: Self) -> Self {
-        let mut x = self.to_bits();
-        let y = other.to_bits();
-        x &= !Self::SIGN_MASK;
-        x |= y & Self::SIGN_MASK;
-        Self::from_bits(x)
-    }
-
     /// Returns a number that represents the sign of self.
     fn signum(self) -> Self {
         if self.is_nan() { self } else { Self::ONE.copysign(self) }
@@ -206,6 +197,26 @@ macro_rules! float_impl {
             fn from_bits(a: Self::Int) -> Self {
                 Self::from_bits(a)
             }
+            fn abs(self) -> Self {
+                cfg_if! {
+                    // FIXME(msrv): `abs` is available in `core` starting with 1.85.
+                    if #[cfg(feature = "unstable-intrinsics")] {
+                        self.abs()
+                    } else {
+                        super::super::generic::fabs(self)
+                    }
+                }
+            }
+            fn copysign(self, other: Self) -> Self {
+                cfg_if! {
+                    // FIXME(msrv): `copysign` is available in `core` starting with 1.85.
+                    if #[cfg(feature = "unstable-intrinsics")] {
+                        self.copysign(other)
+                    } else {
+                        super::super::generic::copysign(self, other)
+                    }
+                }
+            }
             fn normalize(significand: Self::Int) -> (i32, Self::Int) {
                 let shift = significand.leading_zeros().wrapping_sub(Self::EXP_BITS);
                 (1i32.wrapping_sub(shift as i32), significand << shift as Self::Int)

From 1b8940071483790bfb311f7ffacba0eb4186cc84 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Fri, 3 Jan 2025 22:26:39 +0000
Subject: [PATCH 076/279] Change to exhaustive matching for `default_ulp`

Make it more obvious what the expected ULP for a given routine is. This
also narrows ULP to 0 for operations that require exact results.
---
 crates/libm-test/src/precision.rs | 130 +++++++++++++++++++++---------
 1 file changed, 91 insertions(+), 39 deletions(-)

diff --git a/crates/libm-test/src/precision.rs b/crates/libm-test/src/precision.rs
index 6d4561c43..89b66146c 100644
--- a/crates/libm-test/src/precision.rs
+++ b/crates/libm-test/src/precision.rs
@@ -4,57 +4,109 @@
 use core::f32;
 
 use CheckBasis::{Mpfr, Musl};
-use Identifier as Id;
+use {BaseName as Bn, Identifier as Id};
 
 use crate::{BaseName, CheckBasis, CheckCtx, Float, Identifier, Int, TestResult};
 
 /// Type implementing [`IgnoreCase`].
 pub struct SpecialCase;
 
-/// Default ULP allowed to differ from musl (note that musl itself may not be accurate).
-const MUSL_DEFAULT_ULP: u32 = 2;
-
-/// Default ULP allowed to differ from multiprecision (i.e. infinite) results.
-const MP_DEFAULT_ULP: u32 = 1;
-
 /// ULP allowed to differ from the results returned by a test basis.
 ///
 /// Note that these results were obtained using 400M rounds of random inputs, which
 /// is not a value used by default.
 pub fn default_ulp(ctx: &CheckCtx) -> u32 {
-    match (&ctx.basis, ctx.fn_ident) {
-        // Overrides that apply to either basis
-        // FMA is expected to be infinite precision.
-        (_, Id::Fma | Id::Fmaf) => 0,
-        (_, Id::J0 | Id::J0f | Id::J1 | Id::J1f | Id::Y0 | Id::Y0f | Id::Y1 | Id::Y1f) => 800_000,
-        (_, Id::Jn | Id::Jnf | Id::Yn | Id::Ynf) => 1000,
-        (_, Id::Erfc | Id::Erfcf) => 4,
-
-        // Overrides for musl
-        #[cfg(x86_no_sse)]
-        (Musl, Id::Asinh | Id::Asinhf) => 6,
-        #[cfg(not(target_pointer_width = "64"))]
-        (Musl, Id::Exp10 | Id::Exp10f) => 4,
-        (Musl, Id::Lgamma | Id::LgammaR | Id::Lgammaf | Id::LgammafR) => 400,
-        (Musl, Id::Sincosf) => 500,
-        (Musl, Id::Tanh | Id::Tanhf) => 4,
-        (Musl, Id::Tgamma) => 20,
-
-        // Overrides for MPFR
-        (Mpfr, Id::Acosh) => 4,
-        (Mpfr, Id::Acoshf) => 4,
-        (Mpfr, Id::Asinh | Id::Asinhf) => 2,
-        (Mpfr, Id::Atanh | Id::Atanhf) => 2,
-        (Mpfr, Id::Exp10 | Id::Exp10f) => 6,
-        (Mpfr, Id::Lgamma | Id::LgammaR | Id::Lgammaf | Id::LgammafR) => 16,
-        (Mpfr, Id::Sinh | Id::Sinhf) => 2,
-        (Mpfr, Id::Tanh | Id::Tanhf) => 2,
-        (Mpfr, Id::Tgamma) => 20,
-
-        // Defaults
-        (Musl, _) => MUSL_DEFAULT_ULP,
-        (Mpfr, _) => MP_DEFAULT_ULP,
+    // ULP compared to the infinite (MPFR) result.
+    let mut ulp = match ctx.base_name {
+        // Operations that require exact results. This list should correlate with what we
+        // have documented at <https://doc.rust-lang.org/std/primitive.f32.html>.
+        Bn::Ceil
+        | Bn::Copysign
+        | Bn::Fabs
+        | Bn::Fdim
+        | Bn::Floor
+        | Bn::Fma
+        | Bn::Fmax
+        | Bn::Fmin
+        | Bn::Fmod
+        | Bn::Frexp
+        | Bn::Ldexp
+        | Bn::Modf
+        | Bn::Nextafter
+        | Bn::Remainder
+        | Bn::Remquo
+        | Bn::Rint
+        | Bn::Round
+        | Bn::Scalbn
+        | Bn::Sqrt
+        | Bn::Trunc => 0,
+
+        // Operations that aren't required to be exact, but our implementations are.
+        Bn::Cbrt if ctx.fn_ident != Id::Cbrt => 0,
+        Bn::Ilogb => 0,
+        Bn::Tgamma if ctx.fn_ident != Id::Tgamma => 0,
+
+        // Bessel functions have large inaccuracies.
+        Bn::J0 | Bn::J1 | Bn::Y0 | Bn::Y1 => 8_000_000,
+        Bn::Jn | Bn::Yn => 1_000,
+
+        // For all other operations, specify our implementation's worst case precision.
+        Bn::Acos => 1,
+        Bn::Acosh => 4,
+        Bn::Asin => 1,
+        Bn::Asinh => 2,
+        Bn::Atan => 1,
+        Bn::Atan2 => 1,
+        Bn::Atanh => 2,
+        Bn::Cbrt => 1,
+        Bn::Cos => 1,
+        Bn::Cosh => 1,
+        Bn::Erf => 1,
+        Bn::Erfc => 4,
+        Bn::Exp => 1,
+        Bn::Exp10 => 6,
+        Bn::Exp2 => 1,
+        Bn::Expm1 => 1,
+        Bn::Hypot => 1,
+        Bn::Lgamma | Bn::LgammaR => 16,
+        Bn::Log => 1,
+        Bn::Log10 => 1,
+        Bn::Log1p => 1,
+        Bn::Log2 => 1,
+        Bn::Pow => 1,
+        Bn::Sin => 1,
+        Bn::Sincos => 1,
+        Bn::Sinh => 2,
+        Bn::Tan => 1,
+        Bn::Tanh => 2,
+        Bn::Tgamma => 20,
+    };
+
+    // There are some cases where musl's approximation is less accurate than ours. For these
+    // cases, increase the ULP.
+    if ctx.basis == Musl {
+        match ctx.base_name {
+            Bn::Cosh => ulp = 2,
+            Bn::Exp10 if usize::BITS < 64 => ulp = 4,
+            Bn::Lgamma | Bn::LgammaR => ulp = 400,
+            Bn::Tanh => ulp = 4,
+            _ if ctx.fn_ident == Id::Sincosf => ulp = 500,
+            _ if ctx.fn_ident == Id::Tgamma => ulp = 20,
+            _ => (),
+        }
     }
+
+    // In some cases, our implementation is less accurate than musl on i586.
+    if cfg!(x86_no_sse) {
+        match ctx.fn_ident {
+            Id::Log1p | Id::Log1pf => ulp = 2,
+            Id::Round => ulp = 1,
+            Id::Tan => ulp = 2,
+            _ => (),
+        }
+    }
+
+    ulp
 }
 
 /// Don't run further validation on this test case.

From 2d909e1b4d38102d591006636b55859111b6df4e Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Sat, 4 Jan 2025 11:14:31 +0000
Subject: [PATCH 077/279] precision: Sort `ilogb` with other precise operations

This is a nonfunctional change.
---
 crates/libm-test/src/precision.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/crates/libm-test/src/precision.rs b/crates/libm-test/src/precision.rs
index 89b66146c..8bedcde44 100644
--- a/crates/libm-test/src/precision.rs
+++ b/crates/libm-test/src/precision.rs
@@ -30,6 +30,7 @@ pub fn default_ulp(ctx: &CheckCtx) -> u32 {
         | Bn::Fmin
         | Bn::Fmod
         | Bn::Frexp
+        | Bn::Ilogb
         | Bn::Ldexp
         | Bn::Modf
         | Bn::Nextafter
@@ -43,7 +44,6 @@ pub fn default_ulp(ctx: &CheckCtx) -> u32 {
 
         // Operations that aren't required to be exact, but our implementations are.
         Bn::Cbrt if ctx.fn_ident != Id::Cbrt => 0,
-        Bn::Ilogb => 0,
         Bn::Tgamma if ctx.fn_ident != Id::Tgamma => 0,
 
         // Bessel functions have large inaccuracies.

From aa3d242bde84f94858387e888436fc536d9246fb Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Sun, 5 Jan 2025 02:06:02 +0000
Subject: [PATCH 078/279] Clean up integers stored in `MpTy`

There isn't any need to cache the integer since it gets provided as an
argument anyway. Simplify this in `jn` and `yn`.
---
 crates/libm-test/src/mpfloat.rs | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/crates/libm-test/src/mpfloat.rs b/crates/libm-test/src/mpfloat.rs
index edb4cb962..5e516ef68 100644
--- a/crates/libm-test/src/mpfloat.rs
+++ b/crates/libm-test/src/mpfloat.rs
@@ -241,17 +241,17 @@ macro_rules! impl_op_for_ty {
             }
 
             impl MpOp for crate::op::[<jn $suffix>]::Routine {
-                type MpTy = (i32, MpFloat);
+                type MpTy = MpFloat;
 
                 fn new_mp() -> Self::MpTy {
-                    (0, new_mpfloat::<Self::FTy>())
+                    new_mpfloat::<Self::FTy>()
                 }
 
                 fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet {
-                    this.0 = input.0;
-                    this.1.assign(input.1);
-                    let ord = this.1.jn_round(this.0, Nearest);
-                    prep_retval::<Self::FTy>(&mut this.1, ord)
+                    let (n, x) = input;
+                    this.assign(x);
+                    let ord = this.jn_round(n, Nearest);
+                    prep_retval::<Self::FTy>(this, ord)
                 }
             }
 
@@ -274,17 +274,17 @@ macro_rules! impl_op_for_ty {
             }
 
             impl MpOp for crate::op::[<yn $suffix>]::Routine {
-                type MpTy = (i32, MpFloat);
+                type MpTy = MpFloat;
 
                 fn new_mp() -> Self::MpTy {
-                    (0, new_mpfloat::<Self::FTy>())
+                    new_mpfloat::<Self::FTy>()
                 }
 
                 fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet {
-                    this.0 = input.0;
-                    this.1.assign(input.1);
-                    let ord = this.1.yn_round(this.0, Nearest);
-                    prep_retval::<Self::FTy>(&mut this.1, ord)
+                    let (n, x) = input;
+                    this.assign(x);
+                    let ord = this.yn_round(n, Nearest);
+                    prep_retval::<Self::FTy>(this, ord)
                 }
             }
         }

From cc29de29ae606937e53e7f97b0b7ed197c7f4923 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Sat, 4 Jan 2025 09:53:58 +0000
Subject: [PATCH 079/279] Add tests against MPFR for `modf` and `modff`

Rug provides `trunc_fract_round`, which implements `modf`, use it to add
a test.
---
 crates/libm-test/src/mpfloat.rs          | 18 ++++++++++++++++++
 crates/libm-test/tests/multiprecision.rs |  4 ----
 2 files changed, 18 insertions(+), 4 deletions(-)

diff --git a/crates/libm-test/src/mpfloat.rs b/crates/libm-test/src/mpfloat.rs
index 5e516ef68..b9e71d68a 100644
--- a/crates/libm-test/src/mpfloat.rs
+++ b/crates/libm-test/src/mpfloat.rs
@@ -210,6 +210,24 @@ macro_rules! impl_op_for_ty {
                 }
             }
 
+            impl MpOp for crate::op::[<modf $suffix>]::Routine {
+                type MpTy = (MpFloat, MpFloat);
+
+                fn new_mp() -> Self::MpTy {
+                    (new_mpfloat::<Self::FTy>(), new_mpfloat::<Self::FTy>())
+                }
+
+                fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet {
+                    this.0.assign(input.0);
+                    this.1.assign(&this.0);
+                    let (ord0, ord1) = this.0.trunc_fract_round(&mut this.1, Nearest);
+                    (
+                        prep_retval::<Self::FTy>(&mut this.1, ord0),
+                        prep_retval::<Self::FTy>(&mut this.0, ord1),
+                    )
+                }
+            }
+
             impl MpOp for crate::op::[<pow $suffix>]::Routine {
                 type MpTy = (MpFloat, MpFloat);
 
diff --git a/crates/libm-test/tests/multiprecision.rs b/crates/libm-test/tests/multiprecision.rs
index 71ff2ff96..54d313059 100644
--- a/crates/libm-test/tests/multiprecision.rs
+++ b/crates/libm-test/tests/multiprecision.rs
@@ -58,8 +58,6 @@ libm_macros::for_each_function! {
         ilogbf,
         ldexp,
         ldexpf,
-        modf,
-        modff,
         remquo,
         remquof,
         scalbn,
@@ -165,7 +163,5 @@ libm_macros::for_each_function! {
         frexpf,
         ilogb,
         ilogbf,
-        modf,
-        modff,
     ],
 }

From 9b15f00cd247667d92cf98196ec655dc13b20857 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Sat, 4 Jan 2025 10:39:27 +0000
Subject: [PATCH 080/279] Add tests against MPFR for `frexp` and `frexpf`

This implementation comes from `rug::Float::to_f32_exp` [1].

[1]: https://docs.rs/rug/1.26.1/rug/struct.Float.html#method.to_f32_exp
---
 crates/libm-test/src/mpfloat.rs          | 19 +++++++++++++++++++
 crates/libm-test/tests/multiprecision.rs |  4 ----
 2 files changed, 19 insertions(+), 4 deletions(-)

diff --git a/crates/libm-test/src/mpfloat.rs b/crates/libm-test/src/mpfloat.rs
index b9e71d68a..4d40858f2 100644
--- a/crates/libm-test/src/mpfloat.rs
+++ b/crates/libm-test/src/mpfloat.rs
@@ -258,6 +258,25 @@ macro_rules! impl_op_for_ty {
                 }
             }
 
+            impl MpOp for crate::op::[<frexp $suffix>]::Routine {
+                type MpTy = MpFloat;
+
+                fn new_mp() -> Self::MpTy {
+                    new_mpfloat::<Self::FTy>()
+                }
+
+                fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet {
+                    // Implementation taken from `rug::Float::to_f32_exp`.
+                    this.assign(input.0);
+                    let exp = this.get_exp().unwrap_or(0);
+                    if exp != 0 {
+                        *this >>= exp;
+                    }
+
+                    (prep_retval::<Self::FTy>(this, Ordering::Equal), exp)
+                }
+            }
+
             impl MpOp for crate::op::[<jn $suffix>]::Routine {
                 type MpTy = MpFloat;
 
diff --git a/crates/libm-test/tests/multiprecision.rs b/crates/libm-test/tests/multiprecision.rs
index 54d313059..ade5a2553 100644
--- a/crates/libm-test/tests/multiprecision.rs
+++ b/crates/libm-test/tests/multiprecision.rs
@@ -52,8 +52,6 @@ libm_macros::for_each_function! {
     ],
     skip: [
         // FIXME: MPFR tests needed
-        frexp,
-        frexpf,
         ilogb,
         ilogbf,
         ldexp,
@@ -159,8 +157,6 @@ libm_macros::for_each_function! {
         ynf,
 
         // FIXME: MPFR tests needed
-        frexp,
-        frexpf,
         ilogb,
         ilogbf,
     ],

From 8d6f6a1787b30055e642e5ee96bd1a9935d862a0 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Fri, 3 Jan 2025 05:28:39 +0000
Subject: [PATCH 081/279] Add tests against MPFR for `scalbn{f}` and `ldexp{f}`

---
 crates/libm-test/src/mpfloat.rs          | 28 ++++++++++++++++++++++++
 crates/libm-test/tests/multiprecision.rs |  4 ----
 2 files changed, 28 insertions(+), 4 deletions(-)

diff --git a/crates/libm-test/src/mpfloat.rs b/crates/libm-test/src/mpfloat.rs
index 4d40858f2..8b8298004 100644
--- a/crates/libm-test/src/mpfloat.rs
+++ b/crates/libm-test/src/mpfloat.rs
@@ -292,6 +292,34 @@ macro_rules! impl_op_for_ty {
                 }
             }
 
+            // `ldexp` and `scalbn` are the same for binary floating point, so just forward all
+            // methods.
+            impl MpOp for crate::op::[<ldexp $suffix>]::Routine {
+                type MpTy = <crate::op::[<scalbn $suffix>]::Routine as MpOp>::MpTy;
+
+                fn new_mp() -> Self::MpTy {
+                    <crate::op::[<scalbn $suffix>]::Routine as MpOp>::new_mp()
+                }
+
+                fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet {
+                    <crate::op::[<scalbn $suffix>]::Routine as MpOp>::run(this, input)
+                }
+            }
+
+            impl MpOp for crate::op::[<scalbn $suffix>]::Routine {
+                type MpTy = MpFloat;
+
+                fn new_mp() -> Self::MpTy {
+                    new_mpfloat::<Self::FTy>()
+                }
+
+                fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet {
+                    this.assign(input.0);
+                    *this <<= input.1;
+                    prep_retval::<Self::FTy>(this, Ordering::Equal)
+                }
+            }
+
             impl MpOp for crate::op::[<sincos $suffix>]::Routine {
                 type MpTy = (MpFloat, MpFloat);
 
diff --git a/crates/libm-test/tests/multiprecision.rs b/crates/libm-test/tests/multiprecision.rs
index ade5a2553..4cdba0942 100644
--- a/crates/libm-test/tests/multiprecision.rs
+++ b/crates/libm-test/tests/multiprecision.rs
@@ -54,12 +54,8 @@ libm_macros::for_each_function! {
         // FIXME: MPFR tests needed
         ilogb,
         ilogbf,
-        ldexp,
-        ldexpf,
         remquo,
         remquof,
-        scalbn,
-        scalbnf,
 
         // FIXME: test needed, see
         // https://github.com/rust-lang/libm/pull/311#discussion_r1818273392

From 188e8710a052df2f430e7d753c4d38d03a3aaab5 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Sun, 29 Dec 2024 08:45:08 +0000
Subject: [PATCH 082/279] Add a way for tests to log to a file

Occasionally it is useful to see some information from running tests
without making everything noisy from `--nocapture`. Add a function to
log this kind of output to a file, and print the file as part of CI.
---
 .github/workflows/main.yml  |  5 ++++
 configure.rs                | 15 +++++++++++
 crates/libm-test/src/lib.rs | 53 +++++++++++++++++++++++++++++++++++++
 3 files changed, 73 insertions(+)

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 0f5becf73..023ec58c0 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -113,6 +113,11 @@ jobs:
         rustup target add x86_64-unknown-linux-musl
         cargo generate-lockfile && ./ci/run-docker.sh ${{ matrix.target }}
 
+    - name: Print test logs if available
+      if: always()
+      run: if [ -f "target/test-log.txt" ]; then cat target/test-log.txt; fi
+      shell: bash
+
   clippy:
     name: Clippy
     runs-on: ubuntu-24.04
diff --git a/configure.rs b/configure.rs
index 389e86c33..a18937c3c 100644
--- a/configure.rs
+++ b/configure.rs
@@ -8,6 +8,7 @@ pub struct Config {
     pub manifest_dir: PathBuf,
     pub out_dir: PathBuf,
     pub opt_level: u8,
+    pub cargo_features: Vec<String>,
     pub target_arch: String,
     pub target_env: String,
     pub target_family: Option<String>,
@@ -22,11 +23,16 @@ impl Config {
         let target_features = env::var("CARGO_CFG_TARGET_FEATURE")
             .map(|feats| feats.split(',').map(ToOwned::to_owned).collect())
             .unwrap_or_default();
+        let cargo_features = env::vars()
+            .filter_map(|(name, _value)| name.strip_prefix("CARGO_FEATURE_").map(ToOwned::to_owned))
+            .map(|s| s.to_lowercase().replace("_", "-"))
+            .collect();
 
         Self {
             manifest_dir: PathBuf::from(env::var("CARGO_MANIFEST_DIR").unwrap()),
             out_dir: PathBuf::from(env::var("OUT_DIR").unwrap()),
             opt_level: env::var("OPT_LEVEL").unwrap().parse().unwrap(),
+            cargo_features,
             target_arch: env::var("CARGO_CFG_TARGET_ARCH").unwrap(),
             target_env: env::var("CARGO_CFG_TARGET_ENV").unwrap(),
             target_family: env::var("CARGO_CFG_TARGET_FAMILY").ok(),
@@ -45,6 +51,7 @@ pub fn emit_libm_config(cfg: &Config) {
     emit_arch_cfg();
     emit_optimization_cfg(cfg);
     emit_cfg_shorthands(cfg);
+    emit_cfg_env(cfg);
     emit_f16_f128_cfg(cfg);
 }
 
@@ -53,6 +60,7 @@ pub fn emit_libm_config(cfg: &Config) {
 pub fn emit_test_config(cfg: &Config) {
     emit_optimization_cfg(cfg);
     emit_cfg_shorthands(cfg);
+    emit_cfg_env(cfg);
     emit_f16_f128_cfg(cfg);
 }
 
@@ -97,6 +105,13 @@ fn emit_cfg_shorthands(cfg: &Config) {
     }
 }
 
+/// Reemit config that we make use of for test logging.
+fn emit_cfg_env(cfg: &Config) {
+    println!("cargo:rustc-env=CFG_CARGO_FEATURES={:?}", cfg.cargo_features);
+    println!("cargo:rustc-env=CFG_OPT_LEVEL={}", cfg.opt_level);
+    println!("cargo:rustc-env=CFG_TARGET_FEATURES={:?}", cfg.target_features);
+}
+
 /// Configure whether or not `f16` and `f128` support should be enabled.
 fn emit_f16_f128_cfg(cfg: &Config) {
     println!("cargo:rustc-check-cfg=cfg(f16_enabled)");
diff --git a/crates/libm-test/src/lib.rs b/crates/libm-test/src/lib.rs
index 97907b2a1..c1aec0230 100644
--- a/crates/libm-test/src/lib.rs
+++ b/crates/libm-test/src/lib.rs
@@ -13,6 +13,13 @@ mod precision;
 mod run_cfg;
 mod test_traits;
 
+use std::env;
+use std::fs::File;
+use std::io::Write;
+use std::path::PathBuf;
+use std::sync::LazyLock;
+use std::time::SystemTime;
+
 pub use f8_impl::f8;
 pub use libm::support::{Float, Int, IntTy, MinInt};
 pub use num::{FloatExt, logspace};
@@ -42,3 +49,49 @@ pub const fn ci() -> bool {
         Some(_) => true,
     }
 }
+
+/// Print to stderr and additionally log it to `target/test-log.txt`. This is useful for saving
+/// output that would otherwise be consumed by the test harness.
+pub fn test_log(s: &str) {
+    // Handle to a file opened in append mode, unless a suitable path can't be determined.
+    static OUTFILE: LazyLock<Option<File>> = LazyLock::new(|| {
+        // If the target directory is overridden, use that environment variable. Otherwise, save
+        // at the default path `{workspace_root}/target`.
+        let target_dir = match env::var("CARGO_TARGET_DIR") {
+            Ok(s) => PathBuf::from(s),
+            Err(_) => {
+                let Ok(x) = env::var("CARGO_MANIFEST_DIR") else {
+                    return None;
+                };
+
+                PathBuf::from(x).parent().unwrap().parent().unwrap().join("target")
+            }
+        };
+        let outfile = target_dir.join("test-log.txt");
+
+        let mut f = File::options()
+            .create(true)
+            .append(true)
+            .open(outfile)
+            .expect("failed to open logfile");
+        let now = SystemTime::now().duration_since(SystemTime::UNIX_EPOCH).unwrap();
+
+        writeln!(f, "\n\nTest run at {}", now.as_secs()).unwrap();
+        writeln!(f, "arch: {}", env::consts::ARCH).unwrap();
+        writeln!(f, "os: {}", env::consts::OS).unwrap();
+        writeln!(f, "bits: {}", usize::BITS).unwrap();
+        writeln!(f, "emulated: {}", emulated()).unwrap();
+        writeln!(f, "ci: {}", ci()).unwrap();
+        writeln!(f, "cargo features: {}", env!("CFG_CARGO_FEATURES")).unwrap();
+        writeln!(f, "opt level: {}", env!("CFG_OPT_LEVEL")).unwrap();
+        writeln!(f, "target features: {}", env!("CFG_TARGET_FEATURES")).unwrap();
+
+        Some(f)
+    });
+
+    eprintln!("{s}");
+
+    if let Some(mut f) = OUTFILE.as_ref() {
+        writeln!(f, "{s}").unwrap();
+    }
+}

From 0d486fe62a55ae409ee96ee58854ae0efaa57914 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Thu, 26 Dec 2024 07:44:54 +0000
Subject: [PATCH 083/279] Streamline the way that test iteration count is
 determined

Currently, tests use a handful of constants to determine how many
iterations to perform: `NTESTS`, `AROUND`, and `MAX_CHECK_POINTS`. This
configuration is not very straightforward to adjust and needs to be
repeated everywhere it is used.

Replace this with new functions in the `run_cfg` module that determine
iteration counts in a more reusable and documented way.

This only updates `edge_cases` and `domain_logspace`, `random` is
refactored in a later commit.
---
 crates/libm-test/src/gen/domain_logspace.rs |  31 +---
 crates/libm-test/src/gen/edge_cases.rs      |  54 +++---
 crates/libm-test/src/gen/random.rs          |   1 +
 crates/libm-test/src/lib.rs                 |   2 +-
 crates/libm-test/src/run_cfg.rs             | 177 +++++++++++++++++++-
 5 files changed, 208 insertions(+), 57 deletions(-)

diff --git a/crates/libm-test/src/gen/domain_logspace.rs b/crates/libm-test/src/gen/domain_logspace.rs
index 5e37170fa..3d8a3e7fe 100644
--- a/crates/libm-test/src/gen/domain_logspace.rs
+++ b/crates/libm-test/src/gen/domain_logspace.rs
@@ -6,41 +6,26 @@ use libm::support::{IntTy, MinInt};
 
 use crate::domain::HasDomain;
 use crate::op::OpITy;
+use crate::run_cfg::{GeneratorKind, iteration_count};
 use crate::{CheckCtx, MathOp, logspace};
 
-/// Number of tests to run.
-// FIXME(ntests): replace this with a more logical algorithm
-const NTESTS: usize = {
-    if cfg!(optimizations_enabled) {
-        if crate::emulated()
-            || !cfg!(target_pointer_width = "64")
-            || cfg!(all(target_arch = "x86_64", target_vendor = "apple"))
-        {
-            // Tests are pretty slow on non-64-bit targets, x86 MacOS, and targets that run
-            // in QEMU.
-            100_000
-        } else {
-            5_000_000
-        }
-    } else {
-        // Without optimizations just run a quick check
-        800
-    }
-};
-
 /// Create a range of logarithmically spaced inputs within a function's domain.
 ///
 /// This allows us to get reasonably thorough coverage without wasting time on values that are
 /// NaN or out of range. Random tests will still cover values that are excluded here.
-pub fn get_test_cases<Op>(_ctx: &CheckCtx) -> impl Iterator<Item = (Op::FTy,)>
+pub fn get_test_cases<Op>(ctx: &CheckCtx) -> impl Iterator<Item = (Op::FTy,)>
 where
     Op: MathOp + HasDomain<Op::FTy>,
-    IntTy<Op::FTy>: TryFrom<usize>,
+    IntTy<Op::FTy>: TryFrom<u64>,
     RangeInclusive<IntTy<Op::FTy>>: Iterator,
 {
     let domain = Op::DOMAIN;
+    let ntests = iteration_count(ctx, GeneratorKind::Domain, 0);
+
+    // We generate logspaced inputs within a specific range, excluding values that are out of
+    // range in order to make iterations useful (random tests still cover the full range).
     let start = domain.range_start();
     let end = domain.range_end();
-    let steps = OpITy::<Op>::try_from(NTESTS).unwrap_or(OpITy::<Op>::MAX);
+    let steps = OpITy::<Op>::try_from(ntests).unwrap_or(OpITy::<Op>::MAX);
     logspace(start, end, steps).map(|v| (v,))
 }
diff --git a/crates/libm-test/src/gen/edge_cases.rs b/crates/libm-test/src/gen/edge_cases.rs
index 3387f6c48..1f27c1467 100644
--- a/crates/libm-test/src/gen/edge_cases.rs
+++ b/crates/libm-test/src/gen/edge_cases.rs
@@ -3,18 +3,11 @@
 use libm::support::Float;
 
 use crate::domain::HasDomain;
+use crate::run_cfg::{check_near_count, check_point_count};
 use crate::{CheckCtx, FloatExt, MathOp};
 
-/// Number of values near an interesting point to check.
-// FIXME(ntests): replace this with a more logical algorithm
-const AROUND: usize = 100;
-
-/// Functions have infinite asymptotes, limit how many we check.
-// FIXME(ntests): replace this with a more logical algorithm
-const MAX_CHECK_POINTS: usize = 10;
-
 /// Create a list of values around interesting points (infinities, zeroes, NaNs).
-pub fn get_test_cases<Op, F>(_ctx: &CheckCtx) -> impl Iterator<Item = (F,)>
+pub fn get_test_cases<Op, F>(ctx: &CheckCtx) -> impl Iterator<Item = (F,)>
 where
     Op: MathOp<FTy = F> + HasDomain<F>,
     F: Float,
@@ -25,23 +18,26 @@ where
     let domain_start = domain.range_start();
     let domain_end = domain.range_end();
 
+    let check_points = check_point_count(ctx);
+    let near_points = check_near_count(ctx);
+
     // Check near some notable constants
-    count_up(F::ONE, values);
-    count_up(F::ZERO, values);
-    count_up(F::NEG_ONE, values);
-    count_down(F::ONE, values);
-    count_down(F::ZERO, values);
-    count_down(F::NEG_ONE, values);
+    count_up(F::ONE, near_points, values);
+    count_up(F::ZERO, near_points, values);
+    count_up(F::NEG_ONE, near_points, values);
+    count_down(F::ONE, near_points, values);
+    count_down(F::ZERO, near_points, values);
+    count_down(F::NEG_ONE, near_points, values);
     values.push(F::NEG_ZERO);
 
     // Check values near the extremes
-    count_up(F::NEG_INFINITY, values);
-    count_down(F::INFINITY, values);
-    count_down(domain_end, values);
-    count_up(domain_start, values);
-    count_down(domain_start, values);
-    count_up(domain_end, values);
-    count_down(domain_end, values);
+    count_up(F::NEG_INFINITY, near_points, values);
+    count_down(F::INFINITY, near_points, values);
+    count_down(domain_end, near_points, values);
+    count_up(domain_start, near_points, values);
+    count_down(domain_start, near_points, values);
+    count_up(domain_end, near_points, values);
+    count_down(domain_end, near_points, values);
 
     // Check some special values that aren't included in the above ranges
     values.push(F::NAN);
@@ -50,9 +46,9 @@ where
     // Check around asymptotes
     if let Some(f) = domain.check_points {
         let iter = f();
-        for x in iter.take(MAX_CHECK_POINTS) {
-            count_up(x, values);
-            count_down(x, values);
+        for x in iter.take(check_points) {
+            count_up(x, near_points, values);
+            count_down(x, near_points, values);
         }
     }
 
@@ -65,11 +61,11 @@ where
 
 /// Add `AROUND` values starting at and including `x` and counting up. Uses the smallest possible
 /// increments (1 ULP).
-fn count_up<F: Float>(mut x: F, values: &mut Vec<F>) {
+fn count_up<F: Float>(mut x: F, points: u64, values: &mut Vec<F>) {
     assert!(!x.is_nan());
 
     let mut count = 0;
-    while x < F::INFINITY && count < AROUND {
+    while x < F::INFINITY && count < points {
         values.push(x);
         x = x.next_up();
         count += 1;
@@ -78,11 +74,11 @@ fn count_up<F: Float>(mut x: F, values: &mut Vec<F>) {
 
 /// Add `AROUND` values starting at and including `x` and counting down. Uses the smallest possible
 /// increments (1 ULP).
-fn count_down<F: Float>(mut x: F, values: &mut Vec<F>) {
+fn count_down<F: Float>(mut x: F, points: u64, values: &mut Vec<F>) {
     assert!(!x.is_nan());
 
     let mut count = 0;
-    while x > F::NEG_INFINITY && count < AROUND {
+    while x > F::NEG_INFINITY && count < points {
         values.push(x);
         x = x.next_down();
         count += 1;
diff --git a/crates/libm-test/src/gen/random.rs b/crates/libm-test/src/gen/random.rs
index 4f75da07b..a30a3674e 100644
--- a/crates/libm-test/src/gen/random.rs
+++ b/crates/libm-test/src/gen/random.rs
@@ -12,6 +12,7 @@ use crate::{BaseName, CheckCtx, GenerateInput};
 const SEED: [u8; 32] = *b"3.141592653589793238462643383279";
 
 /// Number of tests to run.
+// FIXME(ntests): clean this up when possible
 const NTESTS: usize = {
     if cfg!(optimizations_enabled) {
         if crate::emulated()
diff --git a/crates/libm-test/src/lib.rs b/crates/libm-test/src/lib.rs
index c1aec0230..80ec23736 100644
--- a/crates/libm-test/src/lib.rs
+++ b/crates/libm-test/src/lib.rs
@@ -25,7 +25,7 @@ pub use libm::support::{Float, Int, IntTy, MinInt};
 pub use num::{FloatExt, logspace};
 pub use op::{BaseName, FloatTy, Identifier, MathOp, OpCFn, OpFTy, OpRustFn, OpRustRet, Ty};
 pub use precision::{MaybeOverride, SpecialCase, default_ulp};
-pub use run_cfg::{CheckBasis, CheckCtx};
+pub use run_cfg::{CheckBasis, CheckCtx, EXTENSIVE_ENV, GeneratorKind};
 pub use test_traits::{CheckOutput, GenerateInput, Hex, TupleCall};
 
 /// Result type for tests is usually from `anyhow`. Most times there is no success value to
diff --git a/crates/libm-test/src/run_cfg.rs b/crates/libm-test/src/run_cfg.rs
index eb7e0e2c1..46a6a1fad 100644
--- a/crates/libm-test/src/run_cfg.rs
+++ b/crates/libm-test/src/run_cfg.rs
@@ -1,13 +1,11 @@
 //! Configuration for how tests get run.
 
-#![allow(unused)]
-
-use std::collections::BTreeMap;
 use std::env;
 use std::sync::LazyLock;
 
-use crate::{BaseName, FloatTy, Identifier, op};
+use crate::{BaseName, FloatTy, Identifier, test_log};
 
+/// The environment variable indicating which extensive tests should be run.
 pub const EXTENSIVE_ENV: &str = "LIBM_EXTENSIVE_TESTS";
 
 /// Context passed to [`CheckOutput`].
@@ -49,3 +47,174 @@ pub enum CheckBasis {
     /// Check against infinite precision (MPFR).
     Mpfr,
 }
+
+/// The different kinds of generators that provide test input.
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub enum GeneratorKind {
+    Domain,
+    Random,
+}
+
+/// A list of all functions that should get extensive tests.
+///
+/// This also supports the special test name `all` to run all tests, as well as `all_f16`,
+/// `all_f32`, `all_f64`, and `all_f128` to run all tests for a specific float type.
+static EXTENSIVE: LazyLock<Vec<Identifier>> = LazyLock::new(|| {
+    let var = env::var(EXTENSIVE_ENV).unwrap_or_default();
+    let list = var.split(",").filter(|s| !s.is_empty()).collect::<Vec<_>>();
+    let mut ret = Vec::new();
+
+    let append_ty_ops = |ret: &mut Vec<_>, fty: FloatTy| {
+        let iter = Identifier::ALL.iter().filter(move |id| id.math_op().float_ty == fty).copied();
+        ret.extend(iter);
+    };
+
+    for item in list {
+        match item {
+            "all" => ret = Identifier::ALL.to_owned(),
+            "all_f16" => append_ty_ops(&mut ret, FloatTy::F16),
+            "all_f32" => append_ty_ops(&mut ret, FloatTy::F32),
+            "all_f64" => append_ty_ops(&mut ret, FloatTy::F64),
+            "all_f128" => append_ty_ops(&mut ret, FloatTy::F128),
+            s => {
+                let id = Identifier::from_str(s)
+                    .unwrap_or_else(|| panic!("unrecognized test name `{s}`"));
+                ret.push(id);
+            }
+        }
+    }
+
+    ret
+});
+
+/// Information about the function to be tested.
+#[derive(Debug)]
+struct TestEnv {
+    /// Tests should be reduced because the platform is slow. E.g. 32-bit or emulated.
+    slow_platform: bool,
+    /// The float cannot be tested exhaustively, `f64` or `f128`.
+    large_float_ty: bool,
+    /// Env indicates that an extensive test should be run.
+    should_run_extensive: bool,
+    /// Multiprecision tests will be run.
+    mp_tests_enabled: bool,
+    /// The number of inputs to the function.
+    input_count: usize,
+}
+
+impl TestEnv {
+    fn from_env(ctx: &CheckCtx) -> Self {
+        let id = ctx.fn_ident;
+        let op = id.math_op();
+
+        let will_run_mp = cfg!(feature = "test-multiprecision");
+
+        // Tests are pretty slow on non-64-bit targets, x86 MacOS, and targets that run in QEMU. Start
+        // with a reduced number on these platforms.
+        let slow_on_ci = crate::emulated()
+            || usize::BITS < 64
+            || cfg!(all(target_arch = "x86_64", target_vendor = "apple"));
+        let slow_platform = slow_on_ci && crate::ci();
+
+        let large_float_ty = match op.float_ty {
+            FloatTy::F16 | FloatTy::F32 => false,
+            FloatTy::F64 | FloatTy::F128 => true,
+        };
+
+        let will_run_extensive = EXTENSIVE.contains(&id);
+
+        let input_count = op.rust_sig.args.len();
+
+        Self {
+            slow_platform,
+            large_float_ty,
+            should_run_extensive: will_run_extensive,
+            mp_tests_enabled: will_run_mp,
+            input_count,
+        }
+    }
+}
+
+/// The number of iterations to run for a given test.
+pub fn iteration_count(ctx: &CheckCtx, gen_kind: GeneratorKind, argnum: usize) -> u64 {
+    let t_env = TestEnv::from_env(ctx);
+
+    // Ideally run 5M tests
+    let mut domain_iter_count: u64 = 4_000_000;
+
+    // Start with a reduced number of tests on slow platforms.
+    if t_env.slow_platform {
+        domain_iter_count = 100_000;
+    }
+
+    // Larger float types get more iterations.
+    if t_env.large_float_ty {
+        domain_iter_count *= 4;
+    }
+
+    // Functions with more arguments get more iterations.
+    let arg_multiplier = 1 << (t_env.input_count - 1);
+    domain_iter_count *= arg_multiplier;
+
+    // If we will be running tests against MPFR, we don't need to test as much against musl.
+    // However, there are some platforms where we have to test against musl since MPFR can't be
+    // built.
+    if t_env.mp_tests_enabled && ctx.basis == CheckBasis::Musl {
+        domain_iter_count /= 100;
+    }
+
+    // Run fewer random tests than domain tests.
+    let random_iter_count = domain_iter_count / 100;
+
+    let mut total_iterations = match gen_kind {
+        GeneratorKind::Domain => domain_iter_count,
+        GeneratorKind::Random => random_iter_count,
+    };
+
+    if cfg!(optimizations_enabled) {
+        // Always run at least 10,000 tests.
+        total_iterations = total_iterations.max(10_000);
+    } else {
+        // Without optimizations, just run a quick check regardless of other parameters.
+        total_iterations = 800;
+    }
+
+    // Adjust for the number of inputs
+    let ntests = match t_env.input_count {
+        1 => total_iterations,
+        2 => (total_iterations as f64).sqrt().ceil() as u64,
+        3 => (total_iterations as f64).cbrt().ceil() as u64,
+        _ => panic!("test has more than three arguments"),
+    };
+    let total = ntests.pow(t_env.input_count.try_into().unwrap());
+
+    test_log(&format!(
+        "{gen_kind:?} {basis:?} {fn_ident} arg {arg}/{args}: {ntests} iterations \
+         ({total} total)",
+        basis = ctx.basis,
+        fn_ident = ctx.fn_ident,
+        arg = argnum + 1,
+        args = t_env.input_count,
+    ));
+
+    ntests
+}
+
+/// For domain tests, limit how many asymptotes or specified check points we test.
+pub fn check_point_count(ctx: &CheckCtx) -> usize {
+    let t_env = TestEnv::from_env(ctx);
+    if t_env.slow_platform || !cfg!(optimizations_enabled) { 4 } else { 10 }
+}
+
+/// When validating points of interest (e.g. asymptotes, inflection points, extremes), also check
+/// this many surrounding values.
+pub fn check_near_count(_ctx: &CheckCtx) -> u64 {
+    if cfg!(optimizations_enabled) { 100 } else { 10 }
+}
+
+/// Check whether extensive actions should be run or skipped.
+#[expect(dead_code, reason = "extensive tests have not yet been added")]
+pub fn skip_extensive_test(ctx: &CheckCtx) -> bool {
+    let t_env = TestEnv::from_env(ctx);
+    !t_env.should_run_extensive
+}

From ae8bf8c29d7f4a55cd1337ef260b512618a8c55b Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Mon, 30 Dec 2024 06:12:16 +0000
Subject: [PATCH 084/279] Add an iterator that ensures known size

Introduce the `KnownSize` iterator wrapper, which allows providing the
size at construction time. This provides an `ExactSizeIterator`
implemenation so we can check a generator's value count during testing.
---
 crates/libm-test/src/gen.rs | 37 +++++++++++++++++++++++++++++++++++++
 1 file changed, 37 insertions(+)

diff --git a/crates/libm-test/src/gen.rs b/crates/libm-test/src/gen.rs
index 2d15915d9..2305d2a23 100644
--- a/crates/libm-test/src/gen.rs
+++ b/crates/libm-test/src/gen.rs
@@ -5,6 +5,43 @@ pub mod domain_logspace;
 pub mod edge_cases;
 pub mod random;
 
+/// A wrapper to turn any iterator into an `ExactSizeIterator`. Asserts the final result to ensure
+/// the provided size was correct.
+#[derive(Debug)]
+pub struct KnownSize<I> {
+    total: u64,
+    current: u64,
+    iter: I,
+}
+
+impl<I> KnownSize<I> {
+    pub fn new(iter: I, total: u64) -> Self {
+        Self { total, current: 0, iter }
+    }
+}
+
+impl<I: Iterator> Iterator for KnownSize<I> {
+    type Item = I::Item;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        let next = self.iter.next();
+        if next.is_some() {
+            self.current += 1;
+            return next;
+        }
+
+        assert_eq!(self.current, self.total, "total items did not match expected");
+        None
+    }
+
+    fn size_hint(&self) -> (usize, Option<usize>) {
+        let remaining = usize::try_from(self.total - self.current).unwrap();
+        (remaining, Some(remaining))
+    }
+}
+
+impl<I: Iterator> ExactSizeIterator for KnownSize<I> {}
+
 /// Helper type to turn any reusable input into a generator.
 #[derive(Clone, Debug, Default)]
 pub struct CachedInput {

From addbb18eec710d29382e1e85034190f398274aaa Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Sun, 29 Dec 2024 11:23:08 +0000
Subject: [PATCH 085/279] Rewrite the random test generator

Currently, all inputs are generated and then cached. This works
reasonably well but it isn't very configurable or extensible (adding
`f16` and `f128` is awkward).

Replace this with a trait for generating random sequences of tuples.
This also removes possible storage limitations of caching all inputs.
---
 crates/libm-test/benches/random.rs           |   7 +-
 crates/libm-test/src/gen.rs                  |  69 -------
 crates/libm-test/src/gen/random.rs           | 206 +++++++++----------
 crates/libm-test/src/lib.rs                  |   2 +-
 crates/libm-test/src/run_cfg.rs              |  32 ++-
 crates/libm-test/src/test_traits.rs          |   8 +-
 crates/libm-test/tests/compare_built_musl.rs |  11 +-
 crates/libm-test/tests/multiprecision.rs     |   9 +-
 8 files changed, 148 insertions(+), 196 deletions(-)

diff --git a/crates/libm-test/benches/random.rs b/crates/libm-test/benches/random.rs
index 06997cd36..23f429455 100644
--- a/crates/libm-test/benches/random.rs
+++ b/crates/libm-test/benches/random.rs
@@ -2,8 +2,9 @@ use std::hint::black_box;
 use std::time::Duration;
 
 use criterion::{Criterion, criterion_main};
-use libm_test::gen::{CachedInput, random};
-use libm_test::{CheckBasis, CheckCtx, GenerateInput, MathOp, TupleCall};
+use libm_test::gen::random;
+use libm_test::gen::random::RandomInput;
+use libm_test::{CheckBasis, CheckCtx, MathOp, TupleCall};
 
 /// Benchmark with this many items to get a variety
 const BENCH_ITER_ITEMS: usize = if cfg!(feature = "short-benchmarks") { 50 } else { 500 };
@@ -47,7 +48,7 @@ macro_rules! musl_rand_benches {
 fn bench_one<Op>(c: &mut Criterion, musl_extra: MuslExtra<Op::CFn>)
 where
     Op: MathOp,
-    CachedInput: GenerateInput<Op::RustArgs>,
+    Op::RustArgs: RandomInput,
 {
     let name = Op::NAME;
 
diff --git a/crates/libm-test/src/gen.rs b/crates/libm-test/src/gen.rs
index 2305d2a23..83e00f31d 100644
--- a/crates/libm-test/src/gen.rs
+++ b/crates/libm-test/src/gen.rs
@@ -1,6 +1,5 @@
 //! Different generators that can create random or systematic bit patterns.
 
-use crate::GenerateInput;
 pub mod domain_logspace;
 pub mod edge_cases;
 pub mod random;
@@ -41,71 +40,3 @@ impl<I: Iterator> Iterator for KnownSize<I> {
 }
 
 impl<I: Iterator> ExactSizeIterator for KnownSize<I> {}
-
-/// Helper type to turn any reusable input into a generator.
-#[derive(Clone, Debug, Default)]
-pub struct CachedInput {
-    pub inputs_f32: Vec<(f32, f32, f32)>,
-    pub inputs_f64: Vec<(f64, f64, f64)>,
-    pub inputs_i32: Vec<(i32, i32, i32)>,
-}
-
-impl GenerateInput<(f32,)> for CachedInput {
-    fn get_cases(&self) -> impl Iterator<Item = (f32,)> {
-        self.inputs_f32.iter().map(|f| (f.0,))
-    }
-}
-
-impl GenerateInput<(f32, f32)> for CachedInput {
-    fn get_cases(&self) -> impl Iterator<Item = (f32, f32)> {
-        self.inputs_f32.iter().map(|f| (f.0, f.1))
-    }
-}
-
-impl GenerateInput<(i32, f32)> for CachedInput {
-    fn get_cases(&self) -> impl Iterator<Item = (i32, f32)> {
-        self.inputs_i32.iter().zip(self.inputs_f32.iter()).map(|(i, f)| (i.0, f.0))
-    }
-}
-
-impl GenerateInput<(f32, i32)> for CachedInput {
-    fn get_cases(&self) -> impl Iterator<Item = (f32, i32)> {
-        GenerateInput::<(i32, f32)>::get_cases(self).map(|(i, f)| (f, i))
-    }
-}
-
-impl GenerateInput<(f32, f32, f32)> for CachedInput {
-    fn get_cases(&self) -> impl Iterator<Item = (f32, f32, f32)> {
-        self.inputs_f32.iter().copied()
-    }
-}
-
-impl GenerateInput<(f64,)> for CachedInput {
-    fn get_cases(&self) -> impl Iterator<Item = (f64,)> {
-        self.inputs_f64.iter().map(|f| (f.0,))
-    }
-}
-
-impl GenerateInput<(f64, f64)> for CachedInput {
-    fn get_cases(&self) -> impl Iterator<Item = (f64, f64)> {
-        self.inputs_f64.iter().map(|f| (f.0, f.1))
-    }
-}
-
-impl GenerateInput<(i32, f64)> for CachedInput {
-    fn get_cases(&self) -> impl Iterator<Item = (i32, f64)> {
-        self.inputs_i32.iter().zip(self.inputs_f64.iter()).map(|(i, f)| (i.0, f.0))
-    }
-}
-
-impl GenerateInput<(f64, i32)> for CachedInput {
-    fn get_cases(&self) -> impl Iterator<Item = (f64, i32)> {
-        GenerateInput::<(i32, f64)>::get_cases(self).map(|(i, f)| (f, i))
-    }
-}
-
-impl GenerateInput<(f64, f64, f64)> for CachedInput {
-    fn get_cases(&self) -> impl Iterator<Item = (f64, f64, f64)> {
-        self.inputs_f64.iter().copied()
-    }
-}
diff --git a/crates/libm-test/src/gen/random.rs b/crates/libm-test/src/gen/random.rs
index a30a3674e..6df944317 100644
--- a/crates/libm-test/src/gen/random.rs
+++ b/crates/libm-test/src/gen/random.rs
@@ -1,120 +1,118 @@
-//! A simple generator that produces deterministic random input, caching to use the same
-//! inputs for all functions.
-
+use std::env;
+use std::ops::RangeInclusive;
 use std::sync::LazyLock;
 
+use libm::support::Float;
+use rand::distributions::{Alphanumeric, Standard};
+use rand::prelude::Distribution;
 use rand::{Rng, SeedableRng};
 use rand_chacha::ChaCha8Rng;
 
-use super::CachedInput;
-use crate::{BaseName, CheckCtx, GenerateInput};
-
-const SEED: [u8; 32] = *b"3.141592653589793238462643383279";
-
-/// Number of tests to run.
-// FIXME(ntests): clean this up when possible
-const NTESTS: usize = {
-    if cfg!(optimizations_enabled) {
-        if crate::emulated()
-            || !cfg!(target_pointer_width = "64")
-            || cfg!(all(target_arch = "x86_64", target_vendor = "apple"))
-        {
-            // Tests are pretty slow on non-64-bit targets, x86 MacOS, and targets that run
-            // in QEMU.
-            100_000
-        } else {
-            5_000_000
-        }
-    } else {
-        // Without optimizations just run a quick check
-        800
-    }
-};
-
-/// Tested inputs.
-static TEST_CASES: LazyLock<CachedInput> = LazyLock::new(|| make_test_cases(NTESTS));
-
-/// The first argument to `jn` and `jnf` is the number of iterations. Make this a reasonable
-/// value so tests don't run forever.
-static TEST_CASES_JN: LazyLock<CachedInput> = LazyLock::new(|| {
-    // Start with regular test cases
-    let mut cases = (*TEST_CASES).clone();
-
-    // These functions are extremely slow, limit them
-    let ntests_jn = (NTESTS / 1000).max(80);
-    cases.inputs_i32.truncate(ntests_jn);
-    cases.inputs_f32.truncate(ntests_jn);
-    cases.inputs_f64.truncate(ntests_jn);
-
-    // It is easy to overflow the stack with these in debug mode
-    let max_iterations = if cfg!(optimizations_enabled) && cfg!(target_pointer_width = "64") {
-        0xffff
-    } else if cfg!(windows) {
-        0x00ff
-    } else {
-        0x0fff
-    };
+use super::KnownSize;
+use crate::run_cfg::{int_range, iteration_count};
+use crate::{CheckCtx, GeneratorKind};
 
-    let mut rng = ChaCha8Rng::from_seed(SEED);
+pub(crate) const SEED_ENV: &str = "LIBM_SEED";
 
-    for case in cases.inputs_i32.iter_mut() {
-        case.0 = rng.gen_range(3..=max_iterations);
-    }
+pub(crate) static SEED: LazyLock<[u8; 32]> = LazyLock::new(|| {
+    let s = env::var(SEED_ENV).unwrap_or_else(|_| {
+        let mut rng = rand::thread_rng();
+        (0..32).map(|_| rng.sample(Alphanumeric) as char).collect()
+    });
 
-    cases
+    s.as_bytes().try_into().unwrap_or_else(|_| {
+        panic!("Seed must be 32 characters, got `{s}`");
+    })
 });
 
-fn make_test_cases(ntests: usize) -> CachedInput {
-    let mut rng = ChaCha8Rng::from_seed(SEED);
-
-    // make sure we include some basic cases
-    let mut inputs_i32 = vec![(0, 0, 0), (1, 1, 1), (-1, -1, -1)];
-    let mut inputs_f32 = vec![
-        (0.0, 0.0, 0.0),
-        (f32::EPSILON, f32::EPSILON, f32::EPSILON),
-        (f32::INFINITY, f32::INFINITY, f32::INFINITY),
-        (f32::NEG_INFINITY, f32::NEG_INFINITY, f32::NEG_INFINITY),
-        (f32::MAX, f32::MAX, f32::MAX),
-        (f32::MIN, f32::MIN, f32::MIN),
-        (f32::MIN_POSITIVE, f32::MIN_POSITIVE, f32::MIN_POSITIVE),
-        (f32::NAN, f32::NAN, f32::NAN),
-    ];
-    let mut inputs_f64 = vec![
-        (0.0, 0.0, 0.0),
-        (f64::EPSILON, f64::EPSILON, f64::EPSILON),
-        (f64::INFINITY, f64::INFINITY, f64::INFINITY),
-        (f64::NEG_INFINITY, f64::NEG_INFINITY, f64::NEG_INFINITY),
-        (f64::MAX, f64::MAX, f64::MAX),
-        (f64::MIN, f64::MIN, f64::MIN),
-        (f64::MIN_POSITIVE, f64::MIN_POSITIVE, f64::MIN_POSITIVE),
-        (f64::NAN, f64::NAN, f64::NAN),
-    ];
-
-    inputs_i32.extend((0..(ntests - inputs_i32.len())).map(|_| rng.gen::<(i32, i32, i32)>()));
-
-    // Generate integers to get a full range of bitpatterns, then convert back to
-    // floats.
-    inputs_f32.extend((0..(ntests - inputs_f32.len())).map(|_| {
-        let ints = rng.gen::<(u32, u32, u32)>();
-        (f32::from_bits(ints.0), f32::from_bits(ints.1), f32::from_bits(ints.2))
-    }));
-    inputs_f64.extend((0..(ntests - inputs_f64.len())).map(|_| {
-        let ints = rng.gen::<(u64, u64, u64)>();
-        (f64::from_bits(ints.0), f64::from_bits(ints.1), f64::from_bits(ints.2))
-    }));
-
-    CachedInput { inputs_f32, inputs_f64, inputs_i32 }
+/// Generate a sequence of random values of this type.
+pub trait RandomInput {
+    fn get_cases(ctx: &CheckCtx) -> impl ExactSizeIterator<Item = Self>;
 }
 
-/// Create a test case iterator.
-pub fn get_test_cases<RustArgs>(ctx: &CheckCtx) -> impl Iterator<Item = RustArgs>
+/// Generate a sequence of deterministically random floats.
+fn random_floats<F: Float>(count: u64) -> impl Iterator<Item = F>
 where
-    CachedInput: GenerateInput<RustArgs>,
+    Standard: Distribution<F::Int>,
 {
-    let inputs = if ctx.base_name == BaseName::Jn || ctx.base_name == BaseName::Yn {
-        &TEST_CASES_JN
-    } else {
-        &TEST_CASES
+    let mut rng = ChaCha8Rng::from_seed(*SEED);
+
+    // Generate integers to get a full range of bitpatterns (including NaNs), then convert back
+    // to the float type.
+    (0..count).map(move |_| F::from_bits(rng.gen::<F::Int>()))
+}
+
+/// Generate a sequence of deterministically random `i32`s within a specified range.
+fn random_ints(count: u64, range: RangeInclusive<i32>) -> impl Iterator<Item = i32> {
+    let mut rng = ChaCha8Rng::from_seed(*SEED);
+    (0..count).map(move |_| rng.gen_range::<i32, _>(range.clone()))
+}
+
+macro_rules! impl_random_input {
+    ($fty:ty) => {
+        impl RandomInput for ($fty,) {
+            fn get_cases(ctx: &CheckCtx) -> impl ExactSizeIterator<Item = Self> {
+                let count = iteration_count(ctx, GeneratorKind::Random, 0);
+                let iter = random_floats(count).map(|f: $fty| (f,));
+                KnownSize::new(iter, count)
+            }
+        }
+
+        impl RandomInput for ($fty, $fty) {
+            fn get_cases(ctx: &CheckCtx) -> impl ExactSizeIterator<Item = Self> {
+                let count0 = iteration_count(ctx, GeneratorKind::Random, 0);
+                let count1 = iteration_count(ctx, GeneratorKind::Random, 1);
+                let iter = random_floats(count0)
+                    .flat_map(move |f1: $fty| random_floats(count1).map(move |f2: $fty| (f1, f2)));
+                KnownSize::new(iter, count0 * count1)
+            }
+        }
+
+        impl RandomInput for ($fty, $fty, $fty) {
+            fn get_cases(ctx: &CheckCtx) -> impl ExactSizeIterator<Item = Self> {
+                let count0 = iteration_count(ctx, GeneratorKind::Random, 0);
+                let count1 = iteration_count(ctx, GeneratorKind::Random, 1);
+                let count2 = iteration_count(ctx, GeneratorKind::Random, 2);
+                let iter = random_floats(count0).flat_map(move |f1: $fty| {
+                    random_floats(count1).flat_map(move |f2: $fty| {
+                        random_floats(count2).map(move |f3: $fty| (f1, f2, f3))
+                    })
+                });
+                KnownSize::new(iter, count0 * count1 * count2)
+            }
+        }
+
+        impl RandomInput for (i32, $fty) {
+            fn get_cases(ctx: &CheckCtx) -> impl ExactSizeIterator<Item = Self> {
+                let count0 = iteration_count(ctx, GeneratorKind::Random, 0);
+                let count1 = iteration_count(ctx, GeneratorKind::Random, 1);
+                let range0 = int_range(ctx, 0);
+                let iter = random_ints(count0, range0)
+                    .flat_map(move |f1: i32| random_floats(count1).map(move |f2: $fty| (f1, f2)));
+                KnownSize::new(iter, count0 * count1)
+            }
+        }
+
+        impl RandomInput for ($fty, i32) {
+            fn get_cases(ctx: &CheckCtx) -> impl ExactSizeIterator<Item = Self> {
+                let count0 = iteration_count(ctx, GeneratorKind::Random, 0);
+                let count1 = iteration_count(ctx, GeneratorKind::Random, 1);
+                let range1 = int_range(ctx, 1);
+                let iter = random_floats(count0).flat_map(move |f1: $fty| {
+                    random_ints(count1, range1.clone()).map(move |f2: i32| (f1, f2))
+                });
+                KnownSize::new(iter, count0 * count1)
+            }
+        }
     };
-    inputs.get_cases()
+}
+
+impl_random_input!(f32);
+impl_random_input!(f64);
+
+/// Create a test case iterator.
+pub fn get_test_cases<RustArgs: RandomInput>(
+    ctx: &CheckCtx,
+) -> impl Iterator<Item = RustArgs> + use<'_, RustArgs> {
+    RustArgs::get_cases(ctx)
 }
diff --git a/crates/libm-test/src/lib.rs b/crates/libm-test/src/lib.rs
index 80ec23736..8a4e782df 100644
--- a/crates/libm-test/src/lib.rs
+++ b/crates/libm-test/src/lib.rs
@@ -26,7 +26,7 @@ pub use num::{FloatExt, logspace};
 pub use op::{BaseName, FloatTy, Identifier, MathOp, OpCFn, OpFTy, OpRustFn, OpRustRet, Ty};
 pub use precision::{MaybeOverride, SpecialCase, default_ulp};
 pub use run_cfg::{CheckBasis, CheckCtx, EXTENSIVE_ENV, GeneratorKind};
-pub use test_traits::{CheckOutput, GenerateInput, Hex, TupleCall};
+pub use test_traits::{CheckOutput, Hex, TupleCall};
 
 /// Result type for tests is usually from `anyhow`. Most times there is no success value to
 /// propagate.
diff --git a/crates/libm-test/src/run_cfg.rs b/crates/libm-test/src/run_cfg.rs
index 46a6a1fad..9cede0cc7 100644
--- a/crates/libm-test/src/run_cfg.rs
+++ b/crates/libm-test/src/run_cfg.rs
@@ -1,8 +1,10 @@
 //! Configuration for how tests get run.
 
-use std::env;
+use std::ops::RangeInclusive;
 use std::sync::LazyLock;
+use std::{env, str};
 
+use crate::gen::random::{SEED, SEED_ENV};
 use crate::{BaseName, FloatTy, Identifier, test_log};
 
 /// The environment variable indicating which extensive tests should be run.
@@ -188,9 +190,16 @@ pub fn iteration_count(ctx: &CheckCtx, gen_kind: GeneratorKind, argnum: usize) -
     };
     let total = ntests.pow(t_env.input_count.try_into().unwrap());
 
+    let seed_msg = match gen_kind {
+        GeneratorKind::Domain => String::new(),
+        GeneratorKind::Random => {
+            format!(" using `{SEED_ENV}={}`", str::from_utf8(SEED.as_slice()).unwrap())
+        }
+    };
+
     test_log(&format!(
         "{gen_kind:?} {basis:?} {fn_ident} arg {arg}/{args}: {ntests} iterations \
-         ({total} total)",
+         ({total} total){seed_msg}",
         basis = ctx.basis,
         fn_ident = ctx.fn_ident,
         arg = argnum + 1,
@@ -200,6 +209,25 @@ pub fn iteration_count(ctx: &CheckCtx, gen_kind: GeneratorKind, argnum: usize) -
     ntests
 }
 
+/// Some tests require that an integer be kept within reasonable limits; generate that here.
+pub fn int_range(ctx: &CheckCtx, argnum: usize) -> RangeInclusive<i32> {
+    let t_env = TestEnv::from_env(ctx);
+
+    if !matches!(ctx.base_name, BaseName::Jn | BaseName::Yn) {
+        return i32::MIN..=i32::MAX;
+    }
+
+    assert_eq!(argnum, 0, "For `jn`/`yn`, only the first argument takes an integer");
+
+    // The integer argument to `jn` is an iteration count. Limit this to ensure tests can be
+    // completed in a reasonable amount of time.
+    if t_env.slow_platform || !cfg!(optimizations_enabled) {
+        (-0xf)..=0xff
+    } else {
+        (-0xff)..=0xffff
+    }
+}
+
 /// For domain tests, limit how many asymptotes or specified check points we test.
 pub fn check_point_count(ctx: &CheckCtx) -> usize {
     let t_env = TestEnv::from_env(ctx);
diff --git a/crates/libm-test/src/test_traits.rs b/crates/libm-test/src/test_traits.rs
index 6b833dfb5..261d1f254 100644
--- a/crates/libm-test/src/test_traits.rs
+++ b/crates/libm-test/src/test_traits.rs
@@ -1,8 +1,7 @@
 //! Traits related to testing.
 //!
-//! There are three main traits in this module:
+//! There are two main traits in this module:
 //!
-//! - `GenerateInput`: implemented on any types that create test cases.
 //! - `TupleCall`: implemented on tuples to allow calling them as function arguments.
 //! - `CheckOutput`: implemented on anything that is an output type for validation against an
 //!   expected value.
@@ -13,11 +12,6 @@ use anyhow::{Context, bail, ensure};
 
 use crate::{CheckCtx, Float, Int, MaybeOverride, SpecialCase, TestResult};
 
-/// Implement this on types that can generate a sequence of tuples for test input.
-pub trait GenerateInput<TupleArgs> {
-    fn get_cases(&self) -> impl Iterator<Item = TupleArgs>;
-}
-
 /// Trait for calling a function with a tuple as arguments.
 ///
 /// Implemented on the tuple with the function signature as the generic (so we can use the same
diff --git a/crates/libm-test/tests/compare_built_musl.rs b/crates/libm-test/tests/compare_built_musl.rs
index 71f080ab1..ecd379a0a 100644
--- a/crates/libm-test/tests/compare_built_musl.rs
+++ b/crates/libm-test/tests/compare_built_musl.rs
@@ -9,8 +9,9 @@
 // There are some targets we can't build musl for
 #![cfg(feature = "build-musl")]
 
-use libm_test::gen::{CachedInput, random};
-use libm_test::{CheckBasis, CheckCtx, CheckOutput, GenerateInput, MathOp, TupleCall};
+use libm_test::gen::random;
+use libm_test::gen::random::RandomInput;
+use libm_test::{CheckBasis, CheckCtx, CheckOutput, MathOp, TupleCall};
 
 macro_rules! musl_rand_tests {
     (
@@ -21,16 +22,16 @@ macro_rules! musl_rand_tests {
             #[test]
             $(#[$attr])*
             fn [< musl_random_ $fn_name >]() {
-                test_one::<libm_test::op::$fn_name::Routine>(musl_math_sys::$fn_name);
+                test_one_random::<libm_test::op::$fn_name::Routine>(musl_math_sys::$fn_name);
             }
         }
     };
 }
 
-fn test_one<Op>(musl_fn: Op::CFn)
+fn test_one_random<Op>(musl_fn: Op::CFn)
 where
     Op: MathOp,
-    CachedInput: GenerateInput<Op::RustArgs>,
+    Op::RustArgs: RandomInput,
 {
     let ctx = CheckCtx::new(Op::IDENTIFIER, CheckBasis::Musl);
     let cases = random::get_test_cases::<Op::RustArgs>(&ctx);
diff --git a/crates/libm-test/tests/multiprecision.rs b/crates/libm-test/tests/multiprecision.rs
index 4cdba0942..960c370d4 100644
--- a/crates/libm-test/tests/multiprecision.rs
+++ b/crates/libm-test/tests/multiprecision.rs
@@ -3,11 +3,10 @@
 #![cfg(feature = "test-multiprecision")]
 
 use libm_test::domain::HasDomain;
-use libm_test::gen::{CachedInput, domain_logspace, edge_cases, random};
+use libm_test::gen::random::RandomInput;
+use libm_test::gen::{domain_logspace, edge_cases, random};
 use libm_test::mpfloat::MpOp;
-use libm_test::{
-    CheckBasis, CheckCtx, CheckOutput, GenerateInput, MathOp, OpFTy, OpRustFn, OpRustRet, TupleCall,
-};
+use libm_test::{CheckBasis, CheckCtx, CheckOutput, MathOp, OpFTy, OpRustFn, OpRustRet, TupleCall};
 
 /// Test against MPFR with random inputs.
 macro_rules! mp_rand_tests {
@@ -29,7 +28,7 @@ macro_rules! mp_rand_tests {
 fn test_one_random<Op>()
 where
     Op: MathOp + MpOp,
-    CachedInput: GenerateInput<Op::RustArgs>,
+    Op::RustArgs: RandomInput,
 {
     let mut mp_vals = Op::new_mp();
     let ctx = CheckCtx::new(Op::IDENTIFIER, CheckBasis::Mpfr);

From ac3ff8cfdcab83a523b40c55330d36b470e9205b Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Mon, 6 Jan 2025 00:36:18 +0000
Subject: [PATCH 086/279] Update precision based on new test results

---
 crates/libm-test/src/precision.rs | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/crates/libm-test/src/precision.rs b/crates/libm-test/src/precision.rs
index 8bedcde44..a8efe1015 100644
--- a/crates/libm-test/src/precision.rs
+++ b/crates/libm-test/src/precision.rs
@@ -90,8 +90,15 @@ pub fn default_ulp(ctx: &CheckCtx) -> u32 {
             Bn::Exp10 if usize::BITS < 64 => ulp = 4,
             Bn::Lgamma | Bn::LgammaR => ulp = 400,
             Bn::Tanh => ulp = 4,
-            _ if ctx.fn_ident == Id::Sincosf => ulp = 500,
-            _ if ctx.fn_ident == Id::Tgamma => ulp = 20,
+            _ => (),
+        }
+
+        match ctx.fn_ident {
+            // FIXME(#401): musl has an incorrect result here.
+            Id::Fdim => ulp = 2,
+            Id::Jnf | Id::Ynf => ulp = 4000,
+            Id::Sincosf => ulp = 500,
+            Id::Tgamma => ulp = 20,
             _ => (),
         }
     }
@@ -99,6 +106,8 @@ pub fn default_ulp(ctx: &CheckCtx) -> u32 {
     // In some cases, our implementation is less accurate than musl on i586.
     if cfg!(x86_no_sse) {
         match ctx.fn_ident {
+            Id::Asinh => ulp = 3,
+            Id::Asinhf => ulp = 3,
             Id::Log1p | Id::Log1pf => ulp = 2,
             Id::Round => ulp = 1,
             Id::Tan => ulp = 2,

From e69097155b268ceb2f9478b9397079ca3e923783 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Mon, 6 Jan 2025 02:52:24 +0000
Subject: [PATCH 087/279] Rename `unstable-test-support` to
 `unstable-public-internals`

The `support` module that this feature makes public will be useful for
implementations in `compiler-builtins`, not only for testing. Give this
feature a more accurate name.
---
 Cargo.toml                                     |  2 +-
 crates/compiler-builtins-smoke-test/Cargo.toml | 13 +++++--------
 crates/libm-test/Cargo.toml                    |  2 +-
 src/math/mod.rs                                |  6 +++---
 4 files changed, 10 insertions(+), 13 deletions(-)

diff --git a/Cargo.toml b/Cargo.toml
index bfc11509e..dc362779e 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -27,7 +27,7 @@ unstable = ["unstable-intrinsics", "unstable-float"]
 unstable-intrinsics = []
 
 # Make some internal things public for testing.
-unstable-test-support = []
+unstable-public-internals = []
 
 # Enable the nightly-only `f16` and `f128`.
 unstable-float = []
diff --git a/crates/compiler-builtins-smoke-test/Cargo.toml b/crates/compiler-builtins-smoke-test/Cargo.toml
index 82cfeecb9..1f09ce99c 100644
--- a/crates/compiler-builtins-smoke-test/Cargo.toml
+++ b/crates/compiler-builtins-smoke-test/Cargo.toml
@@ -9,14 +9,6 @@ publish = false
 test = false
 bench = false
 
-[features]
-# Duplicated from libm's Cargo.toml
-unstable = []
-unstable-intrinsics = []
-unstable-test-support = []
-checked = []
-force-soft-floats = []
-
 [lints.rust]
 unexpected_cfgs = { level = "warn", check-cfg = [
   "cfg(arch_enabled)",
@@ -24,4 +16,9 @@ unexpected_cfgs = { level = "warn", check-cfg = [
   "cfg(f128_enabled)",
   "cfg(f16_enabled)",
   "cfg(intrinsics_enabled)",
+  'cfg(feature, values("checked"))',
+  'cfg(feature, values("force-soft-floats"))',
+  'cfg(feature, values("unstable"))',
+  'cfg(feature, values("unstable-intrinsics"))',
+  'cfg(feature, values("unstable-public-internals"))',
 ] }
diff --git a/crates/libm-test/Cargo.toml b/crates/libm-test/Cargo.toml
index f2dd88fa1..9b3ab5c53 100644
--- a/crates/libm-test/Cargo.toml
+++ b/crates/libm-test/Cargo.toml
@@ -26,7 +26,7 @@ short-benchmarks = []
 [dependencies]
 anyhow = "1.0.90"
 az = { version = "1.2.1", optional = true }
-libm = { path = "../..", features = ["unstable-test-support"] }
+libm = { path = "../..", features = ["unstable-public-internals"] }
 libm-macros = { path = "../libm-macros" }
 musl-math-sys = { path = "../musl-math-sys", optional = true }
 paste = "1.0.15"
diff --git a/src/math/mod.rs b/src/math/mod.rs
index e7b21de67..9003a8342 100644
--- a/src/math/mod.rs
+++ b/src/math/mod.rs
@@ -76,15 +76,15 @@ macro_rules! div {
 
 // `support` may be public for testing
 #[macro_use]
-#[cfg(feature = "unstable-test-support")]
+#[cfg(feature = "unstable-public-internals")]
 pub mod support;
 
 #[macro_use]
-#[cfg(not(feature = "unstable-test-support"))]
+#[cfg(not(feature = "unstable-public-internals"))]
 mod support;
 
 cfg_if! {
-    if #[cfg(feature = "unstable-test-support")] {
+    if #[cfg(feature = "unstable-public-internals")] {
         pub mod generic;
     } else {
         mod generic;

From ff15e465d6d4beb81e686fdfdeb40388b474bb3e Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Mon, 6 Jan 2025 03:00:54 +0000
Subject: [PATCH 088/279] Increase the precision for `jn` and `jnf`

New random seeds seem to indicate that this test does have some more
failures, this is a recent failure on i586:

    ---- musl_random_jnf stdout ----
    Random Musl jnf arg 1/2: 100 iterations (10000 total) using `LIBM_SEED=nLfzQ3U1OBVvqWaMBcto84UTMsC5FIaC`
    Random Musl jnf arg 2/2: 100 iterations (10000 total) using `LIBM_SEED=nLfzQ3U1OBVvqWaMBcto84UTMsC5FIaC`

    thread 'musl_random_jnf' panicked at crates/libm-test/tests/compare_built_musl.rs:43:51:
    called `Result::unwrap()` on an `Err` value:
        input:    (205, 5497.891) (0x000000cd, 0x45abcf21)
        expected: 7.3291517e-6           0x36f5ecef
        actual:   7.331668e-6            0x36f6028c

    Caused by:
        ulp 5533 > 4000

It seems unlikely that `jn` would somehow have better precision than
`j0`/`j1`, so just use the same precision.
---
 crates/libm-test/src/precision.rs | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/crates/libm-test/src/precision.rs b/crates/libm-test/src/precision.rs
index a8efe1015..4a6ca8af7 100644
--- a/crates/libm-test/src/precision.rs
+++ b/crates/libm-test/src/precision.rs
@@ -47,8 +47,7 @@ pub fn default_ulp(ctx: &CheckCtx) -> u32 {
         Bn::Tgamma if ctx.fn_ident != Id::Tgamma => 0,
 
         // Bessel functions have large inaccuracies.
-        Bn::J0 | Bn::J1 | Bn::Y0 | Bn::Y1 => 8_000_000,
-        Bn::Jn | Bn::Yn => 1_000,
+        Bn::J0 | Bn::J1 | Bn::Y0 | Bn::Y1 | Bn::Jn | Bn::Yn => 8_000_000,
 
         // For all other operations, specify our implementation's worst case precision.
         Bn::Acos => 1,
@@ -96,7 +95,6 @@ pub fn default_ulp(ctx: &CheckCtx) -> u32 {
         match ctx.fn_ident {
             // FIXME(#401): musl has an incorrect result here.
             Id::Fdim => ulp = 2,
-            Id::Jnf | Id::Ynf => ulp = 4000,
             Id::Sincosf => ulp = 500,
             Id::Tgamma => ulp = 20,
             _ => (),

From ba4bc97c0e3b55908a6a86d7496db33eb6a8b033 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Sun, 5 Jan 2025 01:58:40 +0000
Subject: [PATCH 089/279] Add tests against MPFR for `ilogb` and `ilogbf`

---
 crates/libm-test/src/mpfloat.rs          | 23 +++++++++++++++++++++++
 crates/libm-test/tests/multiprecision.rs |  6 ------
 2 files changed, 23 insertions(+), 6 deletions(-)

diff --git a/crates/libm-test/src/mpfloat.rs b/crates/libm-test/src/mpfloat.rs
index 8b8298004..ad98fafc8 100644
--- a/crates/libm-test/src/mpfloat.rs
+++ b/crates/libm-test/src/mpfloat.rs
@@ -277,6 +277,29 @@ macro_rules! impl_op_for_ty {
                 }
             }
 
+            impl MpOp for crate::op::[<ilogb $suffix>]::Routine {
+                type MpTy = MpFloat;
+
+                fn new_mp() -> Self::MpTy {
+                    new_mpfloat::<Self::FTy>()
+                }
+
+                fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet {
+                    this.assign(input.0);
+
+                    // `get_exp` follows `frexp` for `0.5 <= |m| < 1.0`. Adjust the exponent by
+                    // one to scale the significand to `1.0 <= |m| < 2.0`.
+                    this.get_exp().map(|v| v - 1).unwrap_or_else(|| {
+                        if this.is_infinite() {
+                            i32::MAX
+                        } else {
+                            // Zero or NaN
+                            i32::MIN
+                        }
+                    })
+                }
+            }
+
             impl MpOp for crate::op::[<jn $suffix>]::Routine {
                 type MpTy = MpFloat;
 
diff --git a/crates/libm-test/tests/multiprecision.rs b/crates/libm-test/tests/multiprecision.rs
index 960c370d4..42ec965c1 100644
--- a/crates/libm-test/tests/multiprecision.rs
+++ b/crates/libm-test/tests/multiprecision.rs
@@ -51,8 +51,6 @@ libm_macros::for_each_function! {
     ],
     skip: [
         // FIXME: MPFR tests needed
-        ilogb,
-        ilogbf,
         remquo,
         remquof,
 
@@ -150,9 +148,5 @@ libm_macros::for_each_function! {
         scalbnf,
         yn,
         ynf,
-
-        // FIXME: MPFR tests needed
-        ilogb,
-        ilogbf,
     ],
 }

From 9080785a20d7bc798403e5974cd797393aef8adb Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Thu, 2 Jan 2025 10:21:35 +0000
Subject: [PATCH 090/279] Add more detailed definition output for
 `update-api-list.py`

Update the script to produce, in addition to the simple text list, a
JSON file listing routine names, the types they work with, and the
source files that contain a function with the routine name. This gets
consumed by another script and will be used to determine which extensive
CI jobs to run.
---
 etc/function-definitions.json | 764 ++++++++++++++++++++++++++++++++++
 etc/update-api-list.py        | 246 ++++++++---
 2 files changed, 945 insertions(+), 65 deletions(-)
 create mode 100644 etc/function-definitions.json

diff --git a/etc/function-definitions.json b/etc/function-definitions.json
new file mode 100644
index 000000000..4b10812c3
--- /dev/null
+++ b/etc/function-definitions.json
@@ -0,0 +1,764 @@
+{
+    "__comment": "Autogenerated by update-api-list.py. List of files that define a function with a given name. This file is checked in to make it obvious if refactoring breaks things",
+    "acos": {
+        "sources": [
+            "src/libm_helper.rs",
+            "src/math/acos.rs"
+        ],
+        "type": "f64"
+    },
+    "acosf": {
+        "sources": [
+            "src/math/acosf.rs"
+        ],
+        "type": "f32"
+    },
+    "acosh": {
+        "sources": [
+            "src/libm_helper.rs",
+            "src/math/acosh.rs"
+        ],
+        "type": "f64"
+    },
+    "acoshf": {
+        "sources": [
+            "src/math/acoshf.rs"
+        ],
+        "type": "f32"
+    },
+    "asin": {
+        "sources": [
+            "src/libm_helper.rs",
+            "src/math/asin.rs"
+        ],
+        "type": "f64"
+    },
+    "asinf": {
+        "sources": [
+            "src/math/asinf.rs"
+        ],
+        "type": "f32"
+    },
+    "asinh": {
+        "sources": [
+            "src/libm_helper.rs",
+            "src/math/asinh.rs"
+        ],
+        "type": "f64"
+    },
+    "asinhf": {
+        "sources": [
+            "src/math/asinhf.rs"
+        ],
+        "type": "f32"
+    },
+    "atan": {
+        "sources": [
+            "src/libm_helper.rs",
+            "src/math/atan.rs"
+        ],
+        "type": "f64"
+    },
+    "atan2": {
+        "sources": [
+            "src/libm_helper.rs",
+            "src/math/atan2.rs"
+        ],
+        "type": "f64"
+    },
+    "atan2f": {
+        "sources": [
+            "src/math/atan2f.rs"
+        ],
+        "type": "f32"
+    },
+    "atanf": {
+        "sources": [
+            "src/math/atanf.rs"
+        ],
+        "type": "f32"
+    },
+    "atanh": {
+        "sources": [
+            "src/libm_helper.rs",
+            "src/math/atanh.rs"
+        ],
+        "type": "f64"
+    },
+    "atanhf": {
+        "sources": [
+            "src/math/atanhf.rs"
+        ],
+        "type": "f32"
+    },
+    "cbrt": {
+        "sources": [
+            "src/libm_helper.rs",
+            "src/math/cbrt.rs"
+        ],
+        "type": "f64"
+    },
+    "cbrtf": {
+        "sources": [
+            "src/math/cbrtf.rs"
+        ],
+        "type": "f32"
+    },
+    "ceil": {
+        "sources": [
+            "src/libm_helper.rs",
+            "src/math/arch/i586.rs",
+            "src/math/arch/intrinsics.rs",
+            "src/math/ceil.rs"
+        ],
+        "type": "f64"
+    },
+    "ceilf": {
+        "sources": [
+            "src/math/arch/intrinsics.rs",
+            "src/math/ceilf.rs"
+        ],
+        "type": "f32"
+    },
+    "copysign": {
+        "sources": [
+            "src/libm_helper.rs",
+            "src/math/copysign.rs",
+            "src/math/generic/copysign.rs",
+            "src/math/support/float_traits.rs"
+        ],
+        "type": "f64"
+    },
+    "copysignf": {
+        "sources": [
+            "src/math/copysignf.rs",
+            "src/math/generic/copysign.rs"
+        ],
+        "type": "f32"
+    },
+    "cos": {
+        "sources": [
+            "src/libm_helper.rs",
+            "src/math/cos.rs"
+        ],
+        "type": "f64"
+    },
+    "cosf": {
+        "sources": [
+            "src/math/cosf.rs"
+        ],
+        "type": "f32"
+    },
+    "cosh": {
+        "sources": [
+            "src/libm_helper.rs",
+            "src/math/cosh.rs"
+        ],
+        "type": "f64"
+    },
+    "coshf": {
+        "sources": [
+            "src/math/coshf.rs"
+        ],
+        "type": "f32"
+    },
+    "erf": {
+        "sources": [
+            "src/libm_helper.rs",
+            "src/math/erf.rs"
+        ],
+        "type": "f64"
+    },
+    "erfc": {
+        "sources": [
+            "src/libm_helper.rs",
+            "src/math/erf.rs"
+        ],
+        "type": "f64"
+    },
+    "erfcf": {
+        "sources": [
+            "src/math/erff.rs"
+        ],
+        "type": "f32"
+    },
+    "erff": {
+        "sources": [
+            "src/math/erff.rs"
+        ],
+        "type": "f32"
+    },
+    "exp": {
+        "sources": [
+            "src/libm_helper.rs",
+            "src/math/exp.rs",
+            "src/math/support/float_traits.rs"
+        ],
+        "type": "f64"
+    },
+    "exp10": {
+        "sources": [
+            "src/libm_helper.rs",
+            "src/math/exp10.rs"
+        ],
+        "type": "f64"
+    },
+    "exp10f": {
+        "sources": [
+            "src/math/exp10f.rs"
+        ],
+        "type": "f32"
+    },
+    "exp2": {
+        "sources": [
+            "src/libm_helper.rs",
+            "src/math/exp2.rs"
+        ],
+        "type": "f64"
+    },
+    "exp2f": {
+        "sources": [
+            "src/math/exp2f.rs"
+        ],
+        "type": "f32"
+    },
+    "expf": {
+        "sources": [
+            "src/math/expf.rs"
+        ],
+        "type": "f32"
+    },
+    "expm1": {
+        "sources": [
+            "src/libm_helper.rs",
+            "src/math/expm1.rs"
+        ],
+        "type": "f64"
+    },
+    "expm1f": {
+        "sources": [
+            "src/math/expm1f.rs"
+        ],
+        "type": "f32"
+    },
+    "fabs": {
+        "sources": [
+            "src/libm_helper.rs",
+            "src/math/arch/intrinsics.rs",
+            "src/math/fabs.rs",
+            "src/math/generic/fabs.rs"
+        ],
+        "type": "f64"
+    },
+    "fabsf": {
+        "sources": [
+            "src/math/arch/intrinsics.rs",
+            "src/math/fabsf.rs",
+            "src/math/generic/fabs.rs"
+        ],
+        "type": "f32"
+    },
+    "fdim": {
+        "sources": [
+            "src/libm_helper.rs",
+            "src/math/fdim.rs"
+        ],
+        "type": "f64"
+    },
+    "fdimf": {
+        "sources": [
+            "src/math/fdimf.rs"
+        ],
+        "type": "f32"
+    },
+    "floor": {
+        "sources": [
+            "src/libm_helper.rs",
+            "src/math/arch/i586.rs",
+            "src/math/arch/intrinsics.rs",
+            "src/math/floor.rs"
+        ],
+        "type": "f64"
+    },
+    "floorf": {
+        "sources": [
+            "src/math/arch/intrinsics.rs",
+            "src/math/floorf.rs"
+        ],
+        "type": "f32"
+    },
+    "fma": {
+        "sources": [
+            "src/libm_helper.rs",
+            "src/math/fma.rs"
+        ],
+        "type": "f64"
+    },
+    "fmaf": {
+        "sources": [
+            "src/math/fmaf.rs"
+        ],
+        "type": "f32"
+    },
+    "fmax": {
+        "sources": [
+            "src/libm_helper.rs",
+            "src/math/fmax.rs"
+        ],
+        "type": "f64"
+    },
+    "fmaxf": {
+        "sources": [
+            "src/math/fmaxf.rs"
+        ],
+        "type": "f32"
+    },
+    "fmin": {
+        "sources": [
+            "src/libm_helper.rs",
+            "src/math/fmin.rs"
+        ],
+        "type": "f64"
+    },
+    "fminf": {
+        "sources": [
+            "src/math/fminf.rs"
+        ],
+        "type": "f32"
+    },
+    "fmod": {
+        "sources": [
+            "src/libm_helper.rs",
+            "src/math/fmod.rs"
+        ],
+        "type": "f64"
+    },
+    "fmodf": {
+        "sources": [
+            "src/math/fmodf.rs"
+        ],
+        "type": "f32"
+    },
+    "frexp": {
+        "sources": [
+            "src/libm_helper.rs",
+            "src/math/frexp.rs"
+        ],
+        "type": "f64"
+    },
+    "frexpf": {
+        "sources": [
+            "src/math/frexpf.rs"
+        ],
+        "type": "f32"
+    },
+    "hypot": {
+        "sources": [
+            "src/libm_helper.rs",
+            "src/math/hypot.rs"
+        ],
+        "type": "f64"
+    },
+    "hypotf": {
+        "sources": [
+            "src/math/hypotf.rs"
+        ],
+        "type": "f32"
+    },
+    "ilogb": {
+        "sources": [
+            "src/libm_helper.rs",
+            "src/math/ilogb.rs"
+        ],
+        "type": "f64"
+    },
+    "ilogbf": {
+        "sources": [
+            "src/math/ilogbf.rs"
+        ],
+        "type": "f32"
+    },
+    "j0": {
+        "sources": [
+            "src/libm_helper.rs",
+            "src/math/j0.rs"
+        ],
+        "type": "f64"
+    },
+    "j0f": {
+        "sources": [
+            "src/math/j0f.rs"
+        ],
+        "type": "f32"
+    },
+    "j1": {
+        "sources": [
+            "src/libm_helper.rs",
+            "src/math/j1.rs"
+        ],
+        "type": "f64"
+    },
+    "j1f": {
+        "sources": [
+            "src/math/j1f.rs"
+        ],
+        "type": "f32"
+    },
+    "jn": {
+        "sources": [
+            "src/libm_helper.rs",
+            "src/math/jn.rs"
+        ],
+        "type": "f64"
+    },
+    "jnf": {
+        "sources": [
+            "src/math/jnf.rs"
+        ],
+        "type": "f32"
+    },
+    "ldexp": {
+        "sources": [
+            "src/libm_helper.rs",
+            "src/math/ldexp.rs"
+        ],
+        "type": "f64"
+    },
+    "ldexpf": {
+        "sources": [
+            "src/math/ldexpf.rs"
+        ],
+        "type": "f32"
+    },
+    "lgamma": {
+        "sources": [
+            "src/libm_helper.rs",
+            "src/math/lgamma.rs"
+        ],
+        "type": "f64"
+    },
+    "lgamma_r": {
+        "sources": [
+            "src/libm_helper.rs",
+            "src/math/lgamma_r.rs"
+        ],
+        "type": "f64"
+    },
+    "lgammaf": {
+        "sources": [
+            "src/math/lgammaf.rs"
+        ],
+        "type": "f32"
+    },
+    "lgammaf_r": {
+        "sources": [
+            "src/math/lgammaf_r.rs"
+        ],
+        "type": "f32"
+    },
+    "log": {
+        "sources": [
+            "src/libm_helper.rs",
+            "src/math/log.rs"
+        ],
+        "type": "f64"
+    },
+    "log10": {
+        "sources": [
+            "src/libm_helper.rs",
+            "src/math/log10.rs"
+        ],
+        "type": "f64"
+    },
+    "log10f": {
+        "sources": [
+            "src/math/log10f.rs"
+        ],
+        "type": "f32"
+    },
+    "log1p": {
+        "sources": [
+            "src/libm_helper.rs",
+            "src/math/log1p.rs"
+        ],
+        "type": "f64"
+    },
+    "log1pf": {
+        "sources": [
+            "src/math/log1pf.rs"
+        ],
+        "type": "f32"
+    },
+    "log2": {
+        "sources": [
+            "src/libm_helper.rs",
+            "src/math/log2.rs"
+        ],
+        "type": "f64"
+    },
+    "log2f": {
+        "sources": [
+            "src/math/log2f.rs"
+        ],
+        "type": "f32"
+    },
+    "logf": {
+        "sources": [
+            "src/math/logf.rs"
+        ],
+        "type": "f32"
+    },
+    "modf": {
+        "sources": [
+            "src/libm_helper.rs",
+            "src/math/modf.rs"
+        ],
+        "type": "f64"
+    },
+    "modff": {
+        "sources": [
+            "src/math/modff.rs"
+        ],
+        "type": "f32"
+    },
+    "nextafter": {
+        "sources": [
+            "src/libm_helper.rs",
+            "src/math/nextafter.rs"
+        ],
+        "type": "f64"
+    },
+    "nextafterf": {
+        "sources": [
+            "src/math/nextafterf.rs"
+        ],
+        "type": "f32"
+    },
+    "pow": {
+        "sources": [
+            "src/libm_helper.rs",
+            "src/math/pow.rs"
+        ],
+        "type": "f64"
+    },
+    "powf": {
+        "sources": [
+            "src/math/powf.rs"
+        ],
+        "type": "f32"
+    },
+    "remainder": {
+        "sources": [
+            "src/libm_helper.rs",
+            "src/math/remainder.rs"
+        ],
+        "type": "f64"
+    },
+    "remainderf": {
+        "sources": [
+            "src/math/remainderf.rs"
+        ],
+        "type": "f32"
+    },
+    "remquo": {
+        "sources": [
+            "src/libm_helper.rs",
+            "src/math/remquo.rs"
+        ],
+        "type": "f64"
+    },
+    "remquof": {
+        "sources": [
+            "src/math/remquof.rs"
+        ],
+        "type": "f32"
+    },
+    "rint": {
+        "sources": [
+            "src/libm_helper.rs",
+            "src/math/rint.rs"
+        ],
+        "type": "f64"
+    },
+    "rintf": {
+        "sources": [
+            "src/math/rintf.rs"
+        ],
+        "type": "f32"
+    },
+    "round": {
+        "sources": [
+            "src/libm_helper.rs",
+            "src/math/round.rs"
+        ],
+        "type": "f64"
+    },
+    "roundf": {
+        "sources": [
+            "src/math/roundf.rs"
+        ],
+        "type": "f32"
+    },
+    "scalbn": {
+        "sources": [
+            "src/libm_helper.rs",
+            "src/math/scalbn.rs"
+        ],
+        "type": "f64"
+    },
+    "scalbnf": {
+        "sources": [
+            "src/math/scalbnf.rs"
+        ],
+        "type": "f32"
+    },
+    "sin": {
+        "sources": [
+            "src/libm_helper.rs",
+            "src/math/sin.rs"
+        ],
+        "type": "f64"
+    },
+    "sincos": {
+        "sources": [
+            "src/libm_helper.rs",
+            "src/math/sincos.rs"
+        ],
+        "type": "f64"
+    },
+    "sincosf": {
+        "sources": [
+            "src/math/sincosf.rs"
+        ],
+        "type": "f32"
+    },
+    "sinf": {
+        "sources": [
+            "src/math/sinf.rs"
+        ],
+        "type": "f32"
+    },
+    "sinh": {
+        "sources": [
+            "src/libm_helper.rs",
+            "src/math/sinh.rs"
+        ],
+        "type": "f64"
+    },
+    "sinhf": {
+        "sources": [
+            "src/math/sinhf.rs"
+        ],
+        "type": "f32"
+    },
+    "sqrt": {
+        "sources": [
+            "src/libm_helper.rs",
+            "src/math/arch/i686.rs",
+            "src/math/arch/intrinsics.rs",
+            "src/math/sqrt.rs"
+        ],
+        "type": "f64"
+    },
+    "sqrtf": {
+        "sources": [
+            "src/math/arch/i686.rs",
+            "src/math/arch/intrinsics.rs",
+            "src/math/sqrtf.rs"
+        ],
+        "type": "f32"
+    },
+    "tan": {
+        "sources": [
+            "src/libm_helper.rs",
+            "src/math/tan.rs"
+        ],
+        "type": "f64"
+    },
+    "tanf": {
+        "sources": [
+            "src/math/tanf.rs"
+        ],
+        "type": "f32"
+    },
+    "tanh": {
+        "sources": [
+            "src/libm_helper.rs",
+            "src/math/tanh.rs"
+        ],
+        "type": "f64"
+    },
+    "tanhf": {
+        "sources": [
+            "src/math/tanhf.rs"
+        ],
+        "type": "f32"
+    },
+    "tgamma": {
+        "sources": [
+            "src/libm_helper.rs",
+            "src/math/tgamma.rs"
+        ],
+        "type": "f64"
+    },
+    "tgammaf": {
+        "sources": [
+            "src/math/tgammaf.rs"
+        ],
+        "type": "f32"
+    },
+    "trunc": {
+        "sources": [
+            "src/libm_helper.rs",
+            "src/math/arch/intrinsics.rs",
+            "src/math/trunc.rs"
+        ],
+        "type": "f64"
+    },
+    "truncf": {
+        "sources": [
+            "src/math/arch/intrinsics.rs",
+            "src/math/truncf.rs"
+        ],
+        "type": "f32"
+    },
+    "y0": {
+        "sources": [
+            "src/libm_helper.rs",
+            "src/math/j0.rs"
+        ],
+        "type": "f64"
+    },
+    "y0f": {
+        "sources": [
+            "src/math/j0f.rs"
+        ],
+        "type": "f32"
+    },
+    "y1": {
+        "sources": [
+            "src/libm_helper.rs",
+            "src/math/j1.rs"
+        ],
+        "type": "f64"
+    },
+    "y1f": {
+        "sources": [
+            "src/math/j1f.rs"
+        ],
+        "type": "f32"
+    },
+    "yn": {
+        "sources": [
+            "src/libm_helper.rs",
+            "src/math/jn.rs"
+        ],
+        "type": "f64"
+    },
+    "ynf": {
+        "sources": [
+            "src/math/jnf.rs"
+        ],
+        "type": "f32"
+    }
+}
diff --git a/etc/update-api-list.py b/etc/update-api-list.py
index 7284a628c..a4587aa81 100755
--- a/etc/update-api-list.py
+++ b/etc/update-api-list.py
@@ -3,68 +3,166 @@
 functions are covered by our macros.
 """
 
+import difflib
 import json
 import subprocess as sp
 import sys
-import difflib
+from dataclasses import dataclass
+from glob import glob
 from pathlib import Path
-from typing import Any
+from typing import Any, TypeAlias
 
 ETC_DIR = Path(__file__).parent
 
+IndexTy: TypeAlias = dict[str, dict[str, Any]]
+"""Type of the `index` item in rustdoc's JSON output"""
 
-def get_rustdoc_json() -> dict[Any, Any]:
-    """Get rustdoc's JSON output for the `libm` crate."""
-
-    librs_path = ETC_DIR.joinpath("../src/lib.rs")
-    j = sp.check_output(
-        [
-            "rustdoc",
-            librs_path,
-            "--edition=2021",
-            "--output-format=json",
-            "-Zunstable-options",
-            "-o-",
-        ],
-        text=True,
-    )
-    j = json.loads(j)
-    return j
 
-
-def list_public_functions() -> list[str]:
-    """Get a list of public functions from rustdoc JSON output.
-
-    Note that this only finds functions that are reexported in `lib.rs`, this will
-    need to be adjusted if we need to account for functions that are defined there.
+@dataclass
+class Crate:
+    """Representation of public interfaces and function defintion locations in
+    `libm`.
     """
-    names = []
-    index: dict[str, dict[str, Any]] = get_rustdoc_json()["index"]
-    for item in index.values():
-        # Find public items
-        if item["visibility"] != "public":
-            continue
-
-        # Find only reexports
-        if "use" not in item["inner"].keys():
-            continue
 
-        # Locate the item that is reexported
-        id = item["inner"]["use"]["id"]
-        srcitem = index.get(str(id))
-
-        # External crate
-        if srcitem is None:
-            continue
-
-        # Skip if not a function
-        if "function" not in srcitem["inner"].keys():
-            continue
-
-        names.append(srcitem["name"])
-
-    names.sort()
-    return names
+    public_functions: list[str]
+    """List of all public functions."""
+    defs: dict[str, list[str]]
+    """Map from `name->[source files]` to find all places that define a public
+    function. We track this to know which tests need to be rerun when specific files
+    get updated.
+    """
+    types: dict[str, str]
+    """Map from `name->type`."""
+
+    def __init__(self) -> None:
+        self.public_functions = []
+        self.defs = {}
+        self.types = {}
+
+        j = self.get_rustdoc_json()
+        index: IndexTy = j["index"]
+        self._init_function_list(index)
+        self._init_defs(index)
+        self._init_types()
+
+    @staticmethod
+    def get_rustdoc_json() -> dict[Any, Any]:
+        """Get rustdoc's JSON output for the `libm` crate."""
+
+        j = sp.check_output(
+            [
+                "rustdoc",
+                "src/lib.rs",
+                "--edition=2021",
+                "--document-private-items",
+                "--output-format=json",
+                "-Zunstable-options",
+                "-o-",
+            ],
+            cwd=ETC_DIR.parent,
+            text=True,
+        )
+        j = json.loads(j)
+        return j
+
+    def _init_function_list(self, index: IndexTy) -> None:
+        """Get a list of public functions from rustdoc JSON output.
+
+        Note that this only finds functions that are reexported in `lib.rs`, this will
+        need to be adjusted if we need to account for functions that are defined there, or
+        glob reexports in other locations.
+        """
+        # Filter out items that are not public
+        public = [i for i in index.values() if i["visibility"] == "public"]
+
+        # Collect a list of source IDs for reexported items in `lib.rs` or `mod math`.
+        use = (i for i in public if "use" in i["inner"])
+        use = (
+            i for i in use if i["span"]["filename"] in ["src/math/mod.rs", "src/lib.rs"]
+        )
+        reexported_ids = [item["inner"]["use"]["id"] for item in use]
+
+        # Collect a list of reexported items that are functions
+        for id in reexported_ids:
+            srcitem = index.get(str(id))
+            # External crate
+            if srcitem is None:
+                continue
+
+            # Skip if not a function
+            if "function" not in srcitem["inner"]:
+                continue
+
+            self.public_functions.append(srcitem["name"])
+        self.public_functions.sort()
+
+    def _init_defs(self, index: IndexTy) -> None:
+        defs = {name: set() for name in self.public_functions}
+        funcs = (i for i in index.values() if "function" in i["inner"])
+        funcs = (f for f in funcs if f["name"] in self.public_functions)
+        for func in funcs:
+            defs[func["name"]].add(func["span"]["filename"])
+
+        # A lot of the `arch` module is often configured out so doesn't show up in docs. Use
+        # string matching as a fallback.
+        for fname in glob("src/math/arch/**.rs", root_dir=ETC_DIR.parent):
+            contents = Path(fname).read_text()
+
+            for name in self.public_functions:
+                if f"fn {name}" in contents:
+                    defs[name].add(fname)
+
+        for name, sources in defs.items():
+            base_sources = defs[base_name(name)[0]]
+            for src in (s for s in base_sources if "generic" in s):
+                sources.add(src)
+
+        # Sort the set
+        self.defs = {k: sorted(v) for (k, v) in defs.items()}
+
+    def _init_types(self) -> None:
+        self.types = {name: base_name(name)[1] for name in self.public_functions}
+
+    def write_function_list(self, check: bool) -> None:
+        """Collect the list of public functions to a simple text file."""
+        output = "# autogenerated by update-api-list.py\n"
+        for name in self.public_functions:
+            output += f"{name}\n"
+
+        out_file = ETC_DIR.joinpath("function-list.txt")
+
+        if check:
+            with open(out_file, "r") as f:
+                current = f.read()
+            diff_and_exit(current, output)
+        else:
+            with open(out_file, "w") as f:
+                f.write(output)
+
+    def write_function_defs(self, check: bool) -> None:
+        """Collect the list of information about public functions to a JSON file ."""
+        comment = (
+            "Autogenerated by update-api-list.py. "
+            "List of files that define a function with a given name. "
+            "This file is checked in to make it obvious if refactoring breaks things"
+        )
+
+        d = {"__comment": comment}
+        d |= {
+            name: {"sources": self.defs[name], "type": self.types[name]}
+            for name in self.public_functions
+        }
+
+        out_file = ETC_DIR.joinpath("function-definitions.json")
+        output = json.dumps(d, indent=4) + "\n"
+
+        if check:
+            with open(out_file, "r") as f:
+                current = f.read()
+            diff_and_exit(current, output)
+        else:
+            with open(out_file, "w") as f:
+                f.write(output)
 
 
 def diff_and_exit(actual: str, expected: str):
@@ -84,6 +182,35 @@ def diff_and_exit(actual: str, expected: str):
     exit(1)
 
 
+def base_name(name: str) -> tuple[str, str]:
+    """Return the basename and type from a full function name. Keep in sync with Rust's
+    `fn base_name`.
+    """
+    known_mappings = [
+        ("erff", ("erf", "f32")),
+        ("erf", ("erf", "f64")),
+        ("modff", ("modf", "f32")),
+        ("modf", ("modf", "f64")),
+        ("lgammaf_r", ("lgamma_r", "f32")),
+        ("lgamma_r", ("lgamma_r", "f64")),
+    ]
+
+    found = next((base for (full, base) in known_mappings if full == name), None)
+    if found is not None:
+        return found
+
+    if name.endswith("f"):
+        return (name.rstrip("f"), "f32")
+
+    if name.endswith("f16"):
+        return (name.rstrip("f16"), "f16")
+
+    if name.endswith("f128"):
+        return (name.rstrip("f128"), "f128")
+
+    return (name, "f64")
+
+
 def main():
     """By default overwrite the file. If `--check` is passed, print a diff instead and
     error if the files are different.
@@ -97,20 +224,9 @@ def main():
             print("unrecognized arguments")
             exit(1)
 
-    names = list_public_functions()
-    output = "# autogenerated by update-api-list.py\n"
-    for name in names:
-        output += f"{name}\n"
-
-    out_file = ETC_DIR.joinpath("function-list.txt")
-
-    if check:
-        with open(out_file, "r") as f:
-            current = f.read()
-        diff_and_exit(current, output)
-    else:
-        with open(out_file, "w") as f:
-            f.write(output)
+    crate = Crate()
+    crate.write_function_list(check)
+    crate.write_function_defs(check)
 
 
 if __name__ == "__main__":

From 6362d4e13ca18535adcafd0bf66e5d56a77c478d Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Sun, 22 Dec 2024 11:47:44 +0000
Subject: [PATCH 091/279] Add extensive and exhaustive tests

Add a generator that will test all inputs for input spaces `u32::MAX` or
smaller (e.g. single-argument `f32` routines).  For anything larger,
still run approximately `u32::MAX` tests, but distribute inputs evenly
across the function domain.

Since we often only want to run one of these tests at a time, this
implementation parallelizes within each test using `rayon`. A custom
test runner is used so a progress bar is possible.

Specific tests must be enabled by setting the `LIBM_EXTENSIVE_TESTS`
environment variable, e.g.

    LIBM_EXTENSIVE_TESTS=all_f16,cos,cosf cargo run ...

Testing on a recent machine, most tests take about two minutes or less.
The Bessel functions are quite slow and take closer to 10 minutes, and
FMA is increased to run for about the same.
---
 crates/libm-test/Cargo.toml                |   9 +
 crates/libm-test/src/gen.rs                |   1 +
 crates/libm-test/src/gen/extensive.rs      | 153 ++++++++++++++
 crates/libm-test/src/gen/random.rs         |   4 +-
 crates/libm-test/src/lib.rs                |   4 +-
 crates/libm-test/src/num.rs                |   2 +-
 crates/libm-test/src/run_cfg.rs            |  37 +++-
 crates/libm-test/tests/z_extensive/main.rs |  14 ++
 crates/libm-test/tests/z_extensive/run.rs  | 234 +++++++++++++++++++++
 9 files changed, 450 insertions(+), 8 deletions(-)
 create mode 100644 crates/libm-test/src/gen/extensive.rs
 create mode 100644 crates/libm-test/tests/z_extensive/main.rs
 create mode 100644 crates/libm-test/tests/z_extensive/run.rs

diff --git a/crates/libm-test/Cargo.toml b/crates/libm-test/Cargo.toml
index 9b3ab5c53..69e96034e 100644
--- a/crates/libm-test/Cargo.toml
+++ b/crates/libm-test/Cargo.toml
@@ -26,12 +26,14 @@ short-benchmarks = []
 [dependencies]
 anyhow = "1.0.90"
 az = { version = "1.2.1", optional = true }
+indicatif = { version = "0.17.9", default-features = false }
 libm = { path = "../..", features = ["unstable-public-internals"] }
 libm-macros = { path = "../libm-macros" }
 musl-math-sys = { path = "../musl-math-sys", optional = true }
 paste = "1.0.15"
 rand = "0.8.5"
 rand_chacha = "0.3.1"
+rayon = "1.10.0"
 rug = { version = "1.26.1", optional = true, default-features = false, features = ["float", "std"] }
 
 [target.'cfg(target_family = "wasm")'.dependencies]
@@ -43,11 +45,18 @@ rand = { version = "0.8.5", optional = true }
 
 [dev-dependencies]
 criterion = { version = "0.5.1", default-features = false, features = ["cargo_bench_support"] }
+libtest-mimic = "0.8.1"
 
 [[bench]]
 name = "random"
 harness = false
 
+[[test]]
+# No harness so that we can skip tests at runtime based on env. Prefixed with
+# `z` so these tests get run last.
+name = "z_extensive"
+harness = false
+
 [lints.rust]
 # Values from the chared config.rs used by `libm` but not the test crate
 unexpected_cfgs = { level = "warn", check-cfg = [
diff --git a/crates/libm-test/src/gen.rs b/crates/libm-test/src/gen.rs
index 83e00f31d..e2bfcdf34 100644
--- a/crates/libm-test/src/gen.rs
+++ b/crates/libm-test/src/gen.rs
@@ -2,6 +2,7 @@
 
 pub mod domain_logspace;
 pub mod edge_cases;
+pub mod extensive;
 pub mod random;
 
 /// A wrapper to turn any iterator into an `ExactSizeIterator`. Asserts the final result to ensure
diff --git a/crates/libm-test/src/gen/extensive.rs b/crates/libm-test/src/gen/extensive.rs
new file mode 100644
index 000000000..d8b991b2a
--- /dev/null
+++ b/crates/libm-test/src/gen/extensive.rs
@@ -0,0 +1,153 @@
+use std::fmt;
+use std::ops::RangeInclusive;
+
+use libm::support::MinInt;
+
+use crate::domain::HasDomain;
+use crate::gen::KnownSize;
+use crate::op::OpITy;
+use crate::run_cfg::{int_range, iteration_count};
+use crate::{CheckCtx, GeneratorKind, MathOp, logspace};
+
+/// Generate a sequence of inputs that either cover the domain in completeness (for smaller float
+/// types and single argument functions) or provide evenly spaced inputs across the domain with
+/// approximately `u32::MAX` total iterations.
+pub trait ExtensiveInput<Op> {
+    fn get_cases(ctx: &CheckCtx) -> impl ExactSizeIterator<Item = Self> + Send;
+}
+
+/// Construct an iterator from `logspace` and also calculate the total number of steps expected
+/// for that iterator.
+fn logspace_steps<Op>(
+    start: Op::FTy,
+    end: Op::FTy,
+    ctx: &CheckCtx,
+    argnum: usize,
+) -> (impl Iterator<Item = Op::FTy> + Clone, u64)
+where
+    Op: MathOp,
+    OpITy<Op>: TryFrom<u64, Error: fmt::Debug>,
+    RangeInclusive<OpITy<Op>>: Iterator,
+{
+    let max_steps = iteration_count(ctx, GeneratorKind::Extensive, argnum);
+    let max_steps = OpITy::<Op>::try_from(max_steps).unwrap_or(OpITy::<Op>::MAX);
+    let iter = logspace(start, end, max_steps);
+
+    // `logspace` can't implement `ExactSizeIterator` because of the range, but its size hint
+    // should be accurate (assuming <= usize::MAX iterations).
+    let size_hint = iter.size_hint();
+    assert_eq!(size_hint.0, size_hint.1.unwrap());
+
+    (iter, size_hint.0.try_into().unwrap())
+}
+
+macro_rules! impl_extensive_input {
+    ($fty:ty) => {
+        impl<Op> ExtensiveInput<Op> for ($fty,)
+        where
+            Op: MathOp<RustArgs = Self, FTy = $fty>,
+            Op: HasDomain<Op::FTy>,
+        {
+            fn get_cases(ctx: &CheckCtx) -> impl ExactSizeIterator<Item = Self> {
+                let start = Op::DOMAIN.range_start();
+                let end = Op::DOMAIN.range_end();
+                let (iter0, steps0) = logspace_steps::<Op>(start, end, ctx, 0);
+                let iter0 = iter0.map(|v| (v,));
+                KnownSize::new(iter0, steps0)
+            }
+        }
+
+        impl<Op> ExtensiveInput<Op> for ($fty, $fty)
+        where
+            Op: MathOp<RustArgs = Self, FTy = $fty>,
+        {
+            fn get_cases(ctx: &CheckCtx) -> impl ExactSizeIterator<Item = Self> {
+                let start = <$fty>::NEG_INFINITY;
+                let end = <$fty>::INFINITY;
+                let (iter0, steps0) = logspace_steps::<Op>(start, end, ctx, 0);
+                let (iter1, steps1) = logspace_steps::<Op>(start, end, ctx, 1);
+                let iter =
+                    iter0.flat_map(move |first| iter1.clone().map(move |second| (first, second)));
+                let count = steps0.checked_mul(steps1).unwrap();
+                KnownSize::new(iter, count)
+            }
+        }
+
+        impl<Op> ExtensiveInput<Op> for ($fty, $fty, $fty)
+        where
+            Op: MathOp<RustArgs = Self, FTy = $fty>,
+        {
+            fn get_cases(ctx: &CheckCtx) -> impl ExactSizeIterator<Item = Self> {
+                let start = <$fty>::NEG_INFINITY;
+                let end = <$fty>::INFINITY;
+
+                let (iter0, steps0) = logspace_steps::<Op>(start, end, ctx, 0);
+                let (iter1, steps1) = logspace_steps::<Op>(start, end, ctx, 1);
+                let (iter2, steps2) = logspace_steps::<Op>(start, end, ctx, 2);
+
+                let iter = iter0
+                    .flat_map(move |first| iter1.clone().map(move |second| (first, second)))
+                    .flat_map(move |(first, second)| {
+                        iter2.clone().map(move |third| (first, second, third))
+                    });
+                let count = steps0.checked_mul(steps1).unwrap().checked_mul(steps2).unwrap();
+
+                KnownSize::new(iter, count)
+            }
+        }
+
+        impl<Op> ExtensiveInput<Op> for (i32, $fty)
+        where
+            Op: MathOp<RustArgs = Self, FTy = $fty>,
+        {
+            fn get_cases(ctx: &CheckCtx) -> impl ExactSizeIterator<Item = Self> {
+                let start = <$fty>::NEG_INFINITY;
+                let end = <$fty>::INFINITY;
+
+                let iter0 = int_range(ctx, GeneratorKind::Extensive, 0);
+                let steps0 = iteration_count(ctx, GeneratorKind::Extensive, 0);
+                let (iter1, steps1) = logspace_steps::<Op>(start, end, ctx, 1);
+
+                let iter =
+                    iter0.flat_map(move |first| iter1.clone().map(move |second| (first, second)));
+                let count = steps0.checked_mul(steps1).unwrap();
+
+                KnownSize::new(iter, count)
+            }
+        }
+
+        impl<Op> ExtensiveInput<Op> for ($fty, i32)
+        where
+            Op: MathOp<RustArgs = Self, FTy = $fty>,
+        {
+            fn get_cases(ctx: &CheckCtx) -> impl ExactSizeIterator<Item = Self> {
+                let start = <$fty>::NEG_INFINITY;
+                let end = <$fty>::INFINITY;
+
+                let (iter0, steps0) = logspace_steps::<Op>(start, end, ctx, 0);
+                let iter1 = int_range(ctx, GeneratorKind::Extensive, 0);
+                let steps1 = iteration_count(ctx, GeneratorKind::Extensive, 0);
+
+                let iter =
+                    iter0.flat_map(move |first| iter1.clone().map(move |second| (first, second)));
+                let count = steps0.checked_mul(steps1).unwrap();
+
+                KnownSize::new(iter, count)
+            }
+        }
+    };
+}
+
+impl_extensive_input!(f32);
+impl_extensive_input!(f64);
+
+/// Create a test case iterator for extensive inputs.
+pub fn get_test_cases<Op>(
+    ctx: &CheckCtx,
+) -> impl ExactSizeIterator<Item = Op::RustArgs> + Send + use<'_, Op>
+where
+    Op: MathOp,
+    Op::RustArgs: ExtensiveInput<Op>,
+{
+    Op::RustArgs::get_cases(ctx)
+}
diff --git a/crates/libm-test/src/gen/random.rs b/crates/libm-test/src/gen/random.rs
index 6df944317..29a9dcd2b 100644
--- a/crates/libm-test/src/gen/random.rs
+++ b/crates/libm-test/src/gen/random.rs
@@ -86,7 +86,7 @@ macro_rules! impl_random_input {
             fn get_cases(ctx: &CheckCtx) -> impl ExactSizeIterator<Item = Self> {
                 let count0 = iteration_count(ctx, GeneratorKind::Random, 0);
                 let count1 = iteration_count(ctx, GeneratorKind::Random, 1);
-                let range0 = int_range(ctx, 0);
+                let range0 = int_range(ctx, GeneratorKind::Random, 0);
                 let iter = random_ints(count0, range0)
                     .flat_map(move |f1: i32| random_floats(count1).map(move |f2: $fty| (f1, f2)));
                 KnownSize::new(iter, count0 * count1)
@@ -97,7 +97,7 @@ macro_rules! impl_random_input {
             fn get_cases(ctx: &CheckCtx) -> impl ExactSizeIterator<Item = Self> {
                 let count0 = iteration_count(ctx, GeneratorKind::Random, 0);
                 let count1 = iteration_count(ctx, GeneratorKind::Random, 1);
-                let range1 = int_range(ctx, 1);
+                let range1 = int_range(ctx, GeneratorKind::Random, 1);
                 let iter = random_floats(count0).flat_map(move |f1: $fty| {
                     random_ints(count1, range1.clone()).map(move |f2: i32| (f1, f2))
                 });
diff --git a/crates/libm-test/src/lib.rs b/crates/libm-test/src/lib.rs
index 8a4e782df..a940db1d2 100644
--- a/crates/libm-test/src/lib.rs
+++ b/crates/libm-test/src/lib.rs
@@ -25,7 +25,8 @@ pub use libm::support::{Float, Int, IntTy, MinInt};
 pub use num::{FloatExt, logspace};
 pub use op::{BaseName, FloatTy, Identifier, MathOp, OpCFn, OpFTy, OpRustFn, OpRustRet, Ty};
 pub use precision::{MaybeOverride, SpecialCase, default_ulp};
-pub use run_cfg::{CheckBasis, CheckCtx, EXTENSIVE_ENV, GeneratorKind};
+use run_cfg::EXTENSIVE_MAX_ITERATIONS;
+pub use run_cfg::{CheckBasis, CheckCtx, EXTENSIVE_ENV, GeneratorKind, skip_extensive_test};
 pub use test_traits::{CheckOutput, Hex, TupleCall};
 
 /// Result type for tests is usually from `anyhow`. Most times there is no success value to
@@ -85,6 +86,7 @@ pub fn test_log(s: &str) {
         writeln!(f, "cargo features: {}", env!("CFG_CARGO_FEATURES")).unwrap();
         writeln!(f, "opt level: {}", env!("CFG_OPT_LEVEL")).unwrap();
         writeln!(f, "target features: {}", env!("CFG_TARGET_FEATURES")).unwrap();
+        writeln!(f, "extensive iterations {}", *EXTENSIVE_MAX_ITERATIONS).unwrap();
 
         Some(f)
     });
diff --git a/crates/libm-test/src/num.rs b/crates/libm-test/src/num.rs
index eff2fbc1f..f693ef02f 100644
--- a/crates/libm-test/src/num.rs
+++ b/crates/libm-test/src/num.rs
@@ -215,7 +215,7 @@ fn as_ulp_steps<F: Float>(x: F) -> Option<F::SignedInt> {
 /// to logarithmic spacing of their values.
 ///
 /// Note that this tends to skip negative zero, so that needs to be checked explicitly.
-pub fn logspace<F: FloatExt>(start: F, end: F, steps: F::Int) -> impl Iterator<Item = F>
+pub fn logspace<F: FloatExt>(start: F, end: F, steps: F::Int) -> impl Iterator<Item = F> + Clone
 where
     RangeInclusive<F::Int>: Iterator,
 {
diff --git a/crates/libm-test/src/run_cfg.rs b/crates/libm-test/src/run_cfg.rs
index 9cede0cc7..48a654caa 100644
--- a/crates/libm-test/src/run_cfg.rs
+++ b/crates/libm-test/src/run_cfg.rs
@@ -10,6 +10,22 @@ use crate::{BaseName, FloatTy, Identifier, test_log};
 /// The environment variable indicating which extensive tests should be run.
 pub const EXTENSIVE_ENV: &str = "LIBM_EXTENSIVE_TESTS";
 
+/// Specify the number of iterations via this environment variable, rather than using the default.
+pub const EXTENSIVE_ITER_ENV: &str = "LIBM_EXTENSIVE_ITERATIONS";
+
+/// Maximum number of iterations to run for a single routine.
+///
+/// The default value of one greater than `u32::MAX` allows testing single-argument `f32` routines
+/// and single- or double-argument `f16` routines exhaustively. `f64` and `f128` can't feasibly
+/// be tested exhaustively; however, [`EXTENSIVE_ITER_ENV`] can be set to run tests for multiple
+/// hours.
+pub static EXTENSIVE_MAX_ITERATIONS: LazyLock<u64> = LazyLock::new(|| {
+    let default = 1 << 32;
+    env::var(EXTENSIVE_ITER_ENV)
+        .map(|v| v.parse().expect("failed to parse iteration count"))
+        .unwrap_or(default)
+});
+
 /// Context passed to [`CheckOutput`].
 #[derive(Clone, Debug, PartialEq, Eq)]
 pub struct CheckCtx {
@@ -54,6 +70,7 @@ pub enum CheckBasis {
 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
 pub enum GeneratorKind {
     Domain,
+    Extensive,
     Random,
 }
 
@@ -171,8 +188,14 @@ pub fn iteration_count(ctx: &CheckCtx, gen_kind: GeneratorKind, argnum: usize) -
     let mut total_iterations = match gen_kind {
         GeneratorKind::Domain => domain_iter_count,
         GeneratorKind::Random => random_iter_count,
+        GeneratorKind::Extensive => *EXTENSIVE_MAX_ITERATIONS,
     };
 
+    // FMA has a huge domain but is reasonably fast to run, so increase iterations.
+    if ctx.base_name == BaseName::Fma {
+        total_iterations *= 4;
+    }
+
     if cfg!(optimizations_enabled) {
         // Always run at least 10,000 tests.
         total_iterations = total_iterations.max(10_000);
@@ -191,7 +214,7 @@ pub fn iteration_count(ctx: &CheckCtx, gen_kind: GeneratorKind, argnum: usize) -
     let total = ntests.pow(t_env.input_count.try_into().unwrap());
 
     let seed_msg = match gen_kind {
-        GeneratorKind::Domain => String::new(),
+        GeneratorKind::Domain | GeneratorKind::Extensive => String::new(),
         GeneratorKind::Random => {
             format!(" using `{SEED_ENV}={}`", str::from_utf8(SEED.as_slice()).unwrap())
         }
@@ -210,7 +233,7 @@ pub fn iteration_count(ctx: &CheckCtx, gen_kind: GeneratorKind, argnum: usize) -
 }
 
 /// Some tests require that an integer be kept within reasonable limits; generate that here.
-pub fn int_range(ctx: &CheckCtx, argnum: usize) -> RangeInclusive<i32> {
+pub fn int_range(ctx: &CheckCtx, gen_kind: GeneratorKind, argnum: usize) -> RangeInclusive<i32> {
     let t_env = TestEnv::from_env(ctx);
 
     if !matches!(ctx.base_name, BaseName::Jn | BaseName::Yn) {
@@ -221,10 +244,17 @@ pub fn int_range(ctx: &CheckCtx, argnum: usize) -> RangeInclusive<i32> {
 
     // The integer argument to `jn` is an iteration count. Limit this to ensure tests can be
     // completed in a reasonable amount of time.
-    if t_env.slow_platform || !cfg!(optimizations_enabled) {
+    let non_extensive_range = if t_env.slow_platform || !cfg!(optimizations_enabled) {
         (-0xf)..=0xff
     } else {
         (-0xff)..=0xffff
+    };
+
+    let extensive_range = (-0xfff)..=0xfffff;
+
+    match gen_kind {
+        GeneratorKind::Extensive => extensive_range,
+        GeneratorKind::Domain | GeneratorKind::Random => non_extensive_range,
     }
 }
 
@@ -241,7 +271,6 @@ pub fn check_near_count(_ctx: &CheckCtx) -> u64 {
 }
 
 /// Check whether extensive actions should be run or skipped.
-#[expect(dead_code, reason = "extensive tests have not yet been added")]
 pub fn skip_extensive_test(ctx: &CheckCtx) -> bool {
     let t_env = TestEnv::from_env(ctx);
     !t_env.should_run_extensive
diff --git a/crates/libm-test/tests/z_extensive/main.rs b/crates/libm-test/tests/z_extensive/main.rs
new file mode 100644
index 000000000..3a2af88bd
--- /dev/null
+++ b/crates/libm-test/tests/z_extensive/main.rs
@@ -0,0 +1,14 @@
+//! `main` is just a wrapper to handle configuration.
+
+#[cfg(not(feature = "test-multiprecision"))]
+fn main() {
+    eprintln!("multiprecision not enabled; skipping extensive tests");
+}
+
+#[cfg(feature = "test-multiprecision")]
+mod run;
+
+#[cfg(feature = "test-multiprecision")]
+fn main() {
+    run::run();
+}
diff --git a/crates/libm-test/tests/z_extensive/run.rs b/crates/libm-test/tests/z_extensive/run.rs
new file mode 100644
index 000000000..7acff5324
--- /dev/null
+++ b/crates/libm-test/tests/z_extensive/run.rs
@@ -0,0 +1,234 @@
+//! Exhaustive tests for `f16` and `f32`, high-iteration for `f64` and `f128`.
+
+use std::fmt;
+use std::io::{self, IsTerminal};
+use std::sync::atomic::{AtomicU64, Ordering};
+use std::time::Duration;
+
+use indicatif::{ProgressBar, ProgressStyle};
+use libm_test::gen::extensive::{self, ExtensiveInput};
+use libm_test::mpfloat::MpOp;
+use libm_test::{
+    CheckBasis, CheckCtx, CheckOutput, MathOp, TestResult, TupleCall, skip_extensive_test,
+};
+use libtest_mimic::{Arguments, Trial};
+use rayon::prelude::*;
+
+/// Run the extensive test suite.
+pub fn run() {
+    let mut args = Arguments::from_args();
+    // Prevent multiple tests from running in parallel, each test gets parallized internally.
+    args.test_threads = Some(1);
+    let tests = register_all_tests();
+
+    // With default parallelism, the CPU doesn't saturate. We don't need to be nice to
+    // other processes, so do 1.5x to make sure we use all available resources.
+    let threads = std::thread::available_parallelism().map(Into::into).unwrap_or(0) * 3 / 2;
+    rayon::ThreadPoolBuilder::new().num_threads(threads).build_global().unwrap();
+
+    libtest_mimic::run(&args, tests).exit();
+}
+
+macro_rules! mp_extensive_tests {
+    (
+        fn_name: $fn_name:ident,
+        attrs: [$($attr:meta),*],
+        extra: [$push_to:ident],
+    ) => {
+        $(#[$attr])*
+        register_single_test::<libm_test::op::$fn_name::Routine>(&mut $push_to);
+    };
+}
+
+/// Create a list of tests for consumption by `libtest_mimic`.
+fn register_all_tests() -> Vec<Trial> {
+    let mut all_tests = Vec::new();
+
+    libm_macros::for_each_function! {
+        callback: mp_extensive_tests,
+        extra: [all_tests],
+        skip: [
+            // FIXME: MPFR tests needed
+            remquo,
+            remquof,
+
+            // FIXME: test needed, see
+            // https://github.com/rust-lang/libm/pull/311#discussion_r1818273392
+            nextafter,
+            nextafterf,
+        ],
+    }
+
+    all_tests
+}
+
+/// Add a single test to the list.
+fn register_single_test<Op>(all: &mut Vec<Trial>)
+where
+    Op: MathOp + MpOp,
+    Op::RustArgs: ExtensiveInput<Op> + Send,
+{
+    let test_name = format!("mp_extensive_{}", Op::NAME);
+    let ctx = CheckCtx::new(Op::IDENTIFIER, CheckBasis::Mpfr);
+    let skip = skip_extensive_test(&ctx);
+
+    let runner = move || {
+        if !cfg!(optimizations_enabled) {
+            panic!("extensive tests should be run with --release");
+        }
+
+        let res = run_single_test::<Op>();
+        let e = match res {
+            Ok(()) => return Ok(()),
+            Err(e) => e,
+        };
+
+        // Format with the `Debug` implementation so we get the error cause chain, and print it
+        // here so we see the result immediately (rather than waiting for all tests to conclude).
+        let e = format!("{e:?}");
+        eprintln!("failure testing {}:{e}\n", Op::IDENTIFIER);
+
+        Err(e.into())
+    };
+
+    all.push(Trial::test(test_name, runner).with_ignored_flag(skip));
+}
+
+/// Test runner for a signle routine.
+fn run_single_test<Op>() -> TestResult
+where
+    Op: MathOp + MpOp,
+    Op::RustArgs: ExtensiveInput<Op> + Send,
+{
+    // Small delay before printing anything so other output from the runner has a chance to flush.
+    std::thread::sleep(Duration::from_millis(500));
+    eprintln!();
+
+    let completed = AtomicU64::new(0);
+    let ctx = CheckCtx::new(Op::IDENTIFIER, CheckBasis::Mpfr);
+    let cases = &mut extensive::get_test_cases::<Op>(&ctx);
+    let total: u64 = cases.len().try_into().unwrap();
+    let pb = Progress::new(Op::NAME, total);
+
+    let test_single_chunk = |mp_vals: &mut Op::MpTy, input_vec: Vec<Op::RustArgs>| -> TestResult {
+        for input in input_vec {
+            // Test the input.
+            let mp_res = Op::run(mp_vals, input);
+            let crate_res = input.call(Op::ROUTINE);
+            crate_res.validate(mp_res, input, &ctx)?;
+
+            let completed = completed.fetch_add(1, Ordering::Relaxed) + 1;
+            pb.update(completed, input);
+        }
+
+        Ok(())
+    };
+
+    // Chunk the cases so Rayon doesn't switch threads between each iterator item. 50k seems near
+    // a performance sweet spot. Ideally we would reuse these allocations rather than discarding,
+    // but that is difficult with Rayon's API.
+    let chunk_size = 50_000;
+    let chunks = std::iter::from_fn(move || {
+        let mut v = Vec::with_capacity(chunk_size);
+        v.extend(cases.take(chunk_size));
+        (!v.is_empty()).then_some(v)
+    });
+
+    // Run the actual tests
+    let res = chunks.par_bridge().try_for_each_init(Op::new_mp, test_single_chunk);
+
+    let real_total = completed.load(Ordering::Relaxed);
+    pb.complete(real_total);
+
+    if res.is_ok() && real_total != total {
+        // Provide a warning if our estimate needs to be updated.
+        panic!("total run {real_total} does not match expected {total}");
+    }
+
+    res
+}
+
+/// Wrapper around a `ProgressBar` that handles styles and non-TTY messages.
+struct Progress {
+    pb: ProgressBar,
+    name_padded: String,
+    final_style: ProgressStyle,
+    is_tty: bool,
+}
+
+impl Progress {
+    const PB_TEMPLATE: &str = "[{elapsed:3} {percent:3}%] {bar:20.cyan/blue} NAME \
+        {human_pos:>13}/{human_len:13} {per_sec:18} eta {eta:8} {msg}";
+    const PB_TEMPLATE_FINAL: &str = "[{elapsed:3} {percent:3}%] {bar:20.cyan/blue} NAME \
+        {human_pos:>13}/{human_len:13} {per_sec:18} done in {elapsed_precise}";
+
+    fn new(name: &str, total: u64) -> Self {
+        eprintln!("starting extensive tests for `{name}`");
+        let name_padded = format!("{name:9}");
+        let is_tty = io::stderr().is_terminal();
+
+        let initial_style =
+            ProgressStyle::with_template(&Self::PB_TEMPLATE.replace("NAME", &name_padded))
+                .unwrap()
+                .progress_chars("##-");
+
+        let final_style =
+            ProgressStyle::with_template(&Self::PB_TEMPLATE_FINAL.replace("NAME", &name_padded))
+                .unwrap()
+                .progress_chars("##-");
+
+        let pb = ProgressBar::new(total);
+        pb.set_style(initial_style);
+
+        Self { pb, final_style, name_padded, is_tty }
+    }
+
+    fn update(&self, completed: u64, input: impl fmt::Debug) {
+        // Infrequently update the progress bar.
+        if completed % 20_000 == 0 {
+            self.pb.set_position(completed);
+        }
+
+        if completed % 500_000 == 0 {
+            self.pb.set_message(format!("input: {input:<24?}"));
+        }
+
+        if !self.is_tty && completed % 5_000_000 == 0 {
+            let len = self.pb.length().unwrap_or_default();
+            eprintln!(
+                "[{elapsed:3?}s {percent:3.0}%] {name} \
+                {human_pos:>10}/{human_len:<10} {per_sec:14.2}/s eta {eta:4}s {input:<24?}",
+                elapsed = self.pb.elapsed().as_secs(),
+                percent = completed as f32 * 100.0 / len as f32,
+                name = self.name_padded,
+                human_pos = completed,
+                human_len = len,
+                per_sec = self.pb.per_sec(),
+                eta = self.pb.eta().as_secs()
+            );
+        }
+    }
+
+    fn complete(self, real_total: u64) {
+        self.pb.set_style(self.final_style);
+        self.pb.set_position(real_total);
+        self.pb.abandon();
+
+        if !self.is_tty {
+            let len = self.pb.length().unwrap_or_default();
+            eprintln!(
+                "[{elapsed:3}s {percent:3.0}%] {name} \
+                {human_pos:>10}/{human_len:<10} {per_sec:14.2}/s done in {elapsed_precise}",
+                elapsed = self.pb.elapsed().as_secs(),
+                percent = real_total as f32 * 100.0 / len as f32,
+                name = self.name_padded,
+                human_pos = real_total,
+                human_len = len,
+                per_sec = self.pb.per_sec(),
+                elapsed_precise = self.pb.elapsed().as_secs(),
+            );
+        }
+
+        eprintln!();
+    }
+}

From f2397e5e9b2e8e7590e126ccdb800374e730dba1 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Sun, 22 Dec 2024 11:47:53 +0000
Subject: [PATCH 092/279] Update precision based on failures from extensive
 tests

---
 crates/libm-test/src/precision.rs | 22 +++++++++++++++++++++-
 1 file changed, 21 insertions(+), 1 deletion(-)

diff --git a/crates/libm-test/src/precision.rs b/crates/libm-test/src/precision.rs
index 4a6ca8af7..696bb3735 100644
--- a/crates/libm-test/src/precision.rs
+++ b/crates/libm-test/src/precision.rs
@@ -55,7 +55,7 @@ pub fn default_ulp(ctx: &CheckCtx) -> u32 {
         Bn::Asin => 1,
         Bn::Asinh => 2,
         Bn::Atan => 1,
-        Bn::Atan2 => 1,
+        Bn::Atan2 => 2,
         Bn::Atanh => 2,
         Bn::Cbrt => 1,
         Bn::Cos => 1,
@@ -187,6 +187,20 @@ impl MaybeOverride<(f32,)> for SpecialCase {
             return XFAIL;
         }
 
+        if (ctx.base_name == BaseName::Lgamma || ctx.base_name == BaseName::LgammaR)
+            && input.0 > 4e36
+            && expected.is_infinite()
+            && !actual.is_infinite()
+        {
+            // This result should saturate but we return a finite value.
+            return XFAIL;
+        }
+
+        if ctx.base_name == BaseName::J0 && input.0 < -1e34 {
+            // Errors get huge close to -inf
+            return XFAIL;
+        }
+
         maybe_check_nan_bits(actual, expected, ctx)
     }
 
@@ -248,6 +262,11 @@ impl MaybeOverride<(f64,)> for SpecialCase {
             return XFAIL;
         }
 
+        if ctx.base_name == BaseName::J0 && input.0 < -1e300 {
+            // Errors get huge close to -inf
+            return XFAIL;
+        }
+
         maybe_check_nan_bits(actual, expected, ctx)
     }
 
@@ -364,6 +383,7 @@ impl MaybeOverride<(i32, f32)> for SpecialCase {
         }
     }
 }
+
 impl MaybeOverride<(i32, f64)> for SpecialCase {
     fn check_float<F: Float>(
         input: (i32, f64),

From 9b08ee52b09c8717deb7c3c1e9f3c8fff0039c77 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Thu, 2 Jan 2025 10:25:27 +0000
Subject: [PATCH 093/279] Run extensive tests in CI when relevant files change

Add a CI job with a dynamically calculated matrix that runs extensive
jobs on changed files. This makes use of the new
`function-definitions.json` file to determine which changed files
require full tests for a routine to run.
---
 .github/workflows/main.yml        |  59 +++++++++++-
 ci/calculate-exhaustive-matrix.py | 148 ++++++++++++++++++++++++++++++
 2 files changed, 206 insertions(+), 1 deletion(-)
 create mode 100755 ci/calculate-exhaustive-matrix.py

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 023ec58c0..1b2fd12ba 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -14,7 +14,7 @@ env:
 jobs:
   test:
     name: Build and test
-    timeout-minutes: 20
+    timeout-minutes: 25
     strategy:
       fail-fast: false
       matrix:
@@ -186,6 +186,62 @@ jobs:
         rustup component add rustfmt
     - run: cargo fmt -- --check
 
+  # Determine which extensive tests should be run based on changed files.
+  calculate_extensive_matrix:
+    name: Calculate job matrix
+    runs-on: ubuntu-24.04
+    outputs:
+      matrix: ${{ steps.script.outputs.matrix }}
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 100
+      - name: Fetch pull request ref
+        run: git fetch origin "$GITHUB_REF:$GITHUB_REF"
+      - run: python3 ci/calculate-exhaustive-matrix.py >> "$GITHUB_OUTPUT"
+        id: script
+
+  extensive:
+    name: Extensive tests for ${{ matrix.ty }}
+    needs:
+      # Wait on `clippy` so we have some confidence that the crate will build
+      - clippy
+      - calculate_extensive_matrix
+    runs-on: ubuntu-24.04
+    timeout-minutes: 80
+    strategy:
+      matrix:
+        # Use the output from `calculate_extensive_matrix` to calculate the matrix
+        # FIXME: it would be better to run all jobs (i.e. all types) but mark those that
+        # didn't change as skipped, rather than completely excluding the job. However,
+        # this is not currently possible https://github.com/actions/runner/issues/1985.
+        include: ${{ fromJSON(needs.calculate_extensive_matrix.outputs.matrix).matrix }}
+    env:
+      CHANGED: ${{ matrix.changed }}
+    steps:
+      - uses: actions/checkout@v4
+      - name: Install Rust
+        run: |
+          rustup update nightly --no-self-update
+          rustup default nightly
+      - uses: Swatinem/rust-cache@v2
+      - name: Download musl source
+        run: ./ci/download-musl.sh
+      - name: Run extensive tests
+        run: |
+          echo "Changed: '$CHANGED'"
+          if [ -z "$CHANGED" ]; then
+            echo "No tests to run, exiting."
+            exit
+          fi
+
+          LIBM_EXTENSIVE_TESTS="$CHANGED" cargo t \
+            --features test-multiprecision,unstable \
+            --release -- extensive
+      - name: Print test logs if available
+        run: if [ -f "target/test-log.txt" ]; then cat target/test-log.txt; fi
+        shell: bash
+
   success:
     needs:
       - test
@@ -193,6 +249,7 @@ jobs:
       - benchmarks
       - msrv
       - rustfmt
+      - extensive
     runs-on: ubuntu-24.04
     # GitHub branch protection is exceedingly silly and treats "jobs skipped because a dependency
     # failed" as success. So we have to do some contortions to ensure the job fails if any of its
diff --git a/ci/calculate-exhaustive-matrix.py b/ci/calculate-exhaustive-matrix.py
new file mode 100755
index 000000000..8b42f9389
--- /dev/null
+++ b/ci/calculate-exhaustive-matrix.py
@@ -0,0 +1,148 @@
+#!/usr/bin/env python3
+"""Calculate which exhaustive tests should be run as part of CI.
+
+This dynamically prepares a list of routines that had a source file change based on
+git history.
+"""
+
+import subprocess as sp
+import sys
+import json
+from dataclasses import dataclass
+from os import getenv
+from pathlib import Path
+from typing import TypedDict
+
+
+REPO_ROOT = Path(__file__).parent.parent
+GIT = ["git", "-C", REPO_ROOT]
+
+# Don't run exhaustive tests if these files change, even if they contaiin a function
+# definition.
+IGNORE_FILES = [
+    "src/math/support/",
+    "src/libm_helper.rs",
+    "src/math/arch/intrinsics.rs",
+]
+
+TYPES = ["f16", "f32", "f64", "f128"]
+
+
+class FunctionDef(TypedDict):
+    """Type for an entry in `function-definitions.json`"""
+
+    sources: list[str]
+    type: str
+
+
+@dataclass
+class Context:
+    gh_ref: str | None
+    changed: list[Path]
+    defs: dict[str, FunctionDef]
+
+    def __init__(self) -> None:
+        self.gh_ref = getenv("GITHUB_REF")
+        self.changed = []
+        self._init_change_list()
+
+        with open(REPO_ROOT.joinpath("etc/function-definitions.json")) as f:
+            defs = json.load(f)
+
+        defs.pop("__comment", None)
+        self.defs = defs
+
+    def _init_change_list(self):
+        """Create a list of files that have been changed. This uses GITHUB_REF if
+        available, otherwise a diff between `HEAD` and `master`.
+        """
+
+        # For pull requests, GitHub creates a ref `refs/pull/1234/merge` (1234 being
+        # the PR number), and sets this as `GITHUB_REF`.
+        ref = self.gh_ref
+        eprint(f"using ref `{ref}`")
+        if ref is None or "merge" not in ref:
+            # If the ref is not for `merge` then we are not in PR CI
+            eprint("No diff available for ref")
+            return
+
+        # The ref is for a dummy merge commit. We can extract the merge base by
+        # inspecting all parents (`^@`).
+        merge_sha = sp.check_output(
+            GIT + ["show-ref", "--hash", ref], text=True
+        ).strip()
+        merge_log = sp.check_output(GIT + ["log", "-1", merge_sha], text=True)
+        eprint(f"Merge:\n{merge_log}\n")
+
+        parents = (
+            sp.check_output(GIT + ["rev-parse", f"{merge_sha}^@"], text=True)
+            .strip()
+            .splitlines()
+        )
+        assert len(parents) == 2, f"expected two-parent merge but got:\n{parents}"
+        base = parents[0].strip()
+        incoming = parents[1].strip()
+
+        eprint(f"base: {base}, incoming: {incoming}")
+        textlist = sp.check_output(
+            GIT + ["diff", base, incoming, "--name-only"], text=True
+        )
+        self.changed = [Path(p) for p in textlist.splitlines()]
+
+    @staticmethod
+    def _ignore_file(fname: str) -> bool:
+        return any(fname.startswith(pfx) for pfx in IGNORE_FILES)
+
+    def changed_routines(self) -> dict[str, list[str]]:
+        """Create a list of routines for which one or more files have been updated,
+        separated by type.
+        """
+        routines = set()
+        for name, meta in self.defs.items():
+            # Don't update if changes to the file should be ignored
+            sources = (f for f in meta["sources"] if not self._ignore_file(f))
+
+            # Select changed files
+            changed = [f for f in sources if Path(f) in self.changed]
+
+            if len(changed) > 0:
+                eprint(f"changed files for {name}: {changed}")
+                routines.add(name)
+
+        ret = {}
+        for r in sorted(routines):
+            ret.setdefault(self.defs[r]["type"], []).append(r)
+
+        return ret
+
+    def make_workflow_output(self) -> str:
+        """Create a JSON object a list items for each type's changed files, if any
+        did change, and the routines that were affected by the change.
+        """
+        changed = self.changed_routines()
+        ret = []
+        for ty in TYPES:
+            ty_changed = changed.get(ty, [])
+            item = {
+                "ty": ty,
+                "changed": ",".join(ty_changed),
+            }
+            ret.append(item)
+        output = json.dumps({"matrix": ret}, separators=(",", ":"))
+        eprint(f"output: {output}")
+        return output
+
+
+def eprint(*args, **kwargs):
+    """Print to stderr."""
+    print(*args, file=sys.stderr, **kwargs)
+
+
+def main():
+    ctx = Context()
+    output = ctx.make_workflow_output()
+    print(f"matrix={output}")
+
+
+if __name__ == "__main__":
+    main()

From 46aeb607155921f7f3ea6b973cf99cec5513c9d8 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Thu, 2 Jan 2025 21:41:58 +0000
Subject: [PATCH 094/279] Enable `f16` and `f128` when creating the API change
 list

Additionally, read glob output as absoulte paths. This enables the
script to work properly even when invoked from a different directory.
---
 etc/update-api-list.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/etc/update-api-list.py b/etc/update-api-list.py
index a4587aa81..67f73e59c 100755
--- a/etc/update-api-list.py
+++ b/etc/update-api-list.py
@@ -13,6 +13,7 @@
 from typing import Any, TypeAlias
 
 ETC_DIR = Path(__file__).parent
+ROOT_DIR = ETC_DIR.parent
 
 IndexTy: TypeAlias = dict[str, dict[str, Any]]
 """Type of the `index` item in rustdoc's JSON output"""
@@ -56,10 +57,12 @@ def get_rustdoc_json() -> dict[Any, Any]:
                 "--edition=2021",
                 "--document-private-items",
                 "--output-format=json",
+                "--cfg=f16_enabled",
+                "--cfg=f128_enabled",
                 "-Zunstable-options",
                 "-o-",
             ],
-            cwd=ETC_DIR.parent,
+            cwd=ROOT_DIR,
             text=True,
         )
         j = json.loads(j)
@@ -105,8 +108,8 @@ def _init_defs(self, index: IndexTy) -> None:
 
         # A lot of the `arch` module is often configured out so doesn't show up in docs. Use
         # string matching as a fallback.
-        for fname in glob("src/math/arch/**.rs", root_dir=ETC_DIR.parent):
-            contents = Path(fname).read_text()
+        for fname in glob("src/math/arch/**.rs", root_dir=ROOT_DIR):
+            contents = (ROOT_DIR.joinpath(fname)).read_text()
 
             for name in self.public_functions:
                 if f"fn {name}" in contents:

From 887ec87ebda048f04b3a6932abca84d872bde65e Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Thu, 2 Jan 2025 21:11:50 +0000
Subject: [PATCH 095/279] Add `fabsf16`, `fabsf128`, `copysignf16`, and
 `copysignf128`

Use the generic implementations to provide these simple methods.
---
 src/libm_helper.rs       | 24 +++++++++++++++++++++---
 src/math/copysignf128.rs |  8 ++++++++
 src/math/copysignf16.rs  |  8 ++++++++
 src/math/fabs.rs         |  1 +
 src/math/fabsf.rs        |  1 +
 src/math/fabsf128.rs     | 37 +++++++++++++++++++++++++++++++++++++
 src/math/fabsf16.rs      | 37 +++++++++++++++++++++++++++++++++++++
 src/math/mod.rs          | 20 ++++++++++++++++++++
 8 files changed, 133 insertions(+), 3 deletions(-)
 create mode 100644 src/math/copysignf128.rs
 create mode 100644 src/math/copysignf16.rs
 create mode 100644 src/math/fabsf128.rs
 create mode 100644 src/math/fabsf16.rs

diff --git a/src/libm_helper.rs b/src/libm_helper.rs
index 52d0c4c2a..f087267e4 100644
--- a/src/libm_helper.rs
+++ b/src/libm_helper.rs
@@ -30,7 +30,7 @@ macro_rules! libm_helper {
         }
     };
 
-    ({$($func:tt);*}) => {
+    ({$($func:tt;)*}) => {
         $(
             libm_helper! { $func }
         )*
@@ -103,7 +103,7 @@ libm_helper! {
         (fn trunc(x: f32) -> (f32);                 => truncf);
         (fn y0(x: f32) -> (f32);                    => y0f);
         (fn y1(x: f32) -> (f32);                    => y1f);
-        (fn yn(n: i32, x: f32) -> (f32);            => ynf)
+        (fn yn(n: i32, x: f32) -> (f32);            => ynf);
     }
 }
 
@@ -166,6 +166,24 @@ libm_helper! {
         (fn trunc(x: f64) -> (f64);                 => trunc);
         (fn y0(x: f64) -> (f64);                    => y0);
         (fn y1(x: f64) -> (f64);                    => y1);
-        (fn yn(n: i32, x: f64) -> (f64);            => yn)
+        (fn yn(n: i32, x: f64) -> (f64);            => yn);
+    }
+}
+
+#[cfg(f16_enabled)]
+libm_helper! {
+    f16,
+    funcs: {
+        (fn copysign(x: f16, y: f16) -> (f16);      => copysignf16);
+        (fn fabs(x: f16) -> (f16);                  => fabsf16);
+    }
+}
+
+#[cfg(f128_enabled)]
+libm_helper! {
+    f128,
+    funcs: {
+        (fn copysign(x: f128, y: f128) -> (f128);   => copysignf128);
+        (fn fabs(x: f128) -> (f128);                => fabsf128);
     }
 }
diff --git a/src/math/copysignf128.rs b/src/math/copysignf128.rs
new file mode 100644
index 000000000..7bd81d42b
--- /dev/null
+++ b/src/math/copysignf128.rs
@@ -0,0 +1,8 @@
+/// Sign of Y, magnitude of X (f128)
+///
+/// Constructs a number with the magnitude (absolute value) of its
+/// first argument, `x`, and the sign of its second argument, `y`.
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn copysignf128(x: f128, y: f128) -> f128 {
+    super::generic::copysign(x, y)
+}
diff --git a/src/math/copysignf16.rs b/src/math/copysignf16.rs
new file mode 100644
index 000000000..820658686
--- /dev/null
+++ b/src/math/copysignf16.rs
@@ -0,0 +1,8 @@
+/// Sign of Y, magnitude of X (f16)
+///
+/// Constructs a number with the magnitude (absolute value) of its
+/// first argument, `x`, and the sign of its second argument, `y`.
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn copysignf16(x: f16, y: f16) -> f16 {
+    super::generic::copysign(x, y)
+}
diff --git a/src/math/fabs.rs b/src/math/fabs.rs
index 6687fdcc3..46c0d88a5 100644
--- a/src/math/fabs.rs
+++ b/src/math/fabs.rs
@@ -1,4 +1,5 @@
 /// Absolute value (magnitude) (f64)
+///
 /// Calculates the absolute value (magnitude) of the argument `x`,
 /// by direct manipulation of the bit representation of `x`.
 #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
diff --git a/src/math/fabsf.rs b/src/math/fabsf.rs
index 99bb5b5f1..d5775b600 100644
--- a/src/math/fabsf.rs
+++ b/src/math/fabsf.rs
@@ -1,4 +1,5 @@
 /// Absolute value (magnitude) (f32)
+///
 /// Calculates the absolute value (magnitude) of the argument `x`,
 /// by direct manipulation of the bit representation of `x`.
 #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
diff --git a/src/math/fabsf128.rs b/src/math/fabsf128.rs
new file mode 100644
index 000000000..ef531bd91
--- /dev/null
+++ b/src/math/fabsf128.rs
@@ -0,0 +1,37 @@
+/// Absolute value (magnitude) (f128)
+///
+/// Calculates the absolute value (magnitude) of the argument `x`,
+/// by direct manipulation of the bit representation of `x`.
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn fabsf128(x: f128) -> f128 {
+    select_implementation! {
+        name: fabsf,
+        use_intrinsic: target_arch = "wasm32",
+        args: x,
+    }
+
+    super::generic::fabs(x)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn sanity_check() {
+        assert_eq!(fabsf128(-1.0), 1.0);
+        assert_eq!(fabsf128(2.8), 2.8);
+    }
+
+    /// The spec: https://en.cppreference.com/w/cpp/numeric/math/fabs
+    #[test]
+    fn spec_tests() {
+        assert!(fabsf128(f128::NAN).is_nan());
+        for f in [0.0, -0.0].iter().copied() {
+            assert_eq!(fabsf128(f), 0.0);
+        }
+        for f in [f128::INFINITY, f128::NEG_INFINITY].iter().copied() {
+            assert_eq!(fabsf128(f), f128::INFINITY);
+        }
+    }
+}
diff --git a/src/math/fabsf16.rs b/src/math/fabsf16.rs
new file mode 100644
index 000000000..eb41f7391
--- /dev/null
+++ b/src/math/fabsf16.rs
@@ -0,0 +1,37 @@
+/// Absolute value (magnitude) (f16)
+///
+/// Calculates the absolute value (magnitude) of the argument `x`,
+/// by direct manipulation of the bit representation of `x`.
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn fabsf16(x: f16) -> f16 {
+    select_implementation! {
+        name: fabsf,
+        use_intrinsic: target_arch = "wasm32",
+        args: x,
+    }
+
+    super::generic::fabs(x)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn sanity_check() {
+        assert_eq!(fabsf16(-1.0), 1.0);
+        assert_eq!(fabsf16(2.8), 2.8);
+    }
+
+    /// The spec: https://en.cppreference.com/w/cpp/numeric/math/fabs
+    #[test]
+    fn spec_tests() {
+        assert!(fabsf16(f16::NAN).is_nan());
+        for f in [0.0, -0.0].iter().copied() {
+            assert_eq!(fabsf16(f), 0.0);
+        }
+        for f in [f16::INFINITY, f16::NEG_INFINITY].iter().copied() {
+            assert_eq!(fabsf16(f), f16::INFINITY);
+        }
+    }
+}
diff --git a/src/math/mod.rs b/src/math/mod.rs
index 9003a8342..5baf35e42 100644
--- a/src/math/mod.rs
+++ b/src/math/mod.rs
@@ -339,6 +339,26 @@ pub use self::tgammaf::tgammaf;
 pub use self::trunc::trunc;
 pub use self::truncf::truncf;
 
+cfg_if! {
+    if #[cfg(f16_enabled)] {
+        mod copysignf16;
+        mod fabsf16;
+
+        pub use self::copysignf16::copysignf16;
+        pub use self::fabsf16::fabsf16;
+    }
+}
+
+cfg_if! {
+    if #[cfg(f128_enabled)] {
+        mod copysignf128;
+        mod fabsf128;
+
+        pub use self::copysignf128::copysignf128;
+        pub use self::fabsf128::fabsf128;
+    }
+}
+
 #[inline]
 fn get_high_word(x: f64) -> u32 {
     (x.to_bits() >> 32) as u32

From 336a6b6643693cb6fcfaad9643349e06256ab442 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Fri, 3 Jan 2025 00:12:53 +0000
Subject: [PATCH 096/279] Add test infrastructure for `f16` and `f128`

Update test traits to support `f16` and `f128`, as applicable. Add the
new routines (`fabs` and `copysign` for `f16` and `f128`) to the list of
all operations.
---
 crates/libm-macros/src/shared.rs             | 28 +++++++
 crates/libm-test/Cargo.toml                  |  2 +-
 crates/libm-test/benches/random.rs           | 47 +++++++-----
 crates/libm-test/src/domain.rs               | 12 +++
 crates/libm-test/src/gen/extensive.rs        |  4 +
 crates/libm-test/src/gen/random.rs           |  4 +
 crates/libm-test/src/mpfloat.rs              | 77 ++++++++++++++------
 crates/libm-test/src/precision.rs            | 32 ++++++++
 crates/libm-test/src/test_traits.rs          |  6 ++
 crates/libm-test/tests/compare_built_musl.rs |  2 +
 crates/libm-test/tests/multiprecision.rs     |  2 +
 etc/function-definitions.json                | 28 +++++++
 etc/function-list.txt                        |  4 +
 13 files changed, 205 insertions(+), 43 deletions(-)

diff --git a/crates/libm-macros/src/shared.rs b/crates/libm-macros/src/shared.rs
index ef0f18801..16547404f 100644
--- a/crates/libm-macros/src/shared.rs
+++ b/crates/libm-macros/src/shared.rs
@@ -4,6 +4,13 @@ use std::fmt;
 use std::sync::LazyLock;
 
 const ALL_OPERATIONS_NESTED: &[(FloatTy, Signature, Option<Signature>, &[&str])] = &[
+    (
+        // `fn(f16) -> f16`
+        FloatTy::F16,
+        Signature { args: &[Ty::F16], returns: &[Ty::F16] },
+        None,
+        &["fabsf16"],
+    ),
     (
         // `fn(f32) -> f32`
         FloatTy::F32,
@@ -28,6 +35,20 @@ const ALL_OPERATIONS_NESTED: &[(FloatTy, Signature, Option<Signature>, &[&str])]
             "tgamma", "trunc", "y0", "y1",
         ],
     ),
+    (
+        // `fn(f128) -> f128`
+        FloatTy::F128,
+        Signature { args: &[Ty::F128], returns: &[Ty::F128] },
+        None,
+        &["fabsf128"],
+    ),
+    (
+        // `(f16, f16) -> f16`
+        FloatTy::F16,
+        Signature { args: &[Ty::F16, Ty::F16], returns: &[Ty::F16] },
+        None,
+        &["copysignf16"],
+    ),
     (
         // `(f32, f32) -> f32`
         FloatTy::F32,
@@ -64,6 +85,13 @@ const ALL_OPERATIONS_NESTED: &[(FloatTy, Signature, Option<Signature>, &[&str])]
             "remainder",
         ],
     ),
+    (
+        // `(f128, f128) -> f128`
+        FloatTy::F128,
+        Signature { args: &[Ty::F128, Ty::F128], returns: &[Ty::F128] },
+        None,
+        &["copysignf128"],
+    ),
     (
         // `(f32, f32, f32) -> f32`
         FloatTy::F32,
diff --git a/crates/libm-test/Cargo.toml b/crates/libm-test/Cargo.toml
index 69e96034e..2761d3d52 100644
--- a/crates/libm-test/Cargo.toml
+++ b/crates/libm-test/Cargo.toml
@@ -8,7 +8,7 @@ publish = false
 default = ["unstable-float"]
 
 # Propagated from libm because this affects which functions we test.
-unstable-float = ["libm/unstable-float"]
+unstable-float = ["libm/unstable-float", "rug?/nightly-float"]
 
 # Generate tests which are random inputs and the outputs are calculated with
 # musl libc.
diff --git a/crates/libm-test/benches/random.rs b/crates/libm-test/benches/random.rs
index 23f429455..cd1e2d2cc 100644
--- a/crates/libm-test/benches/random.rs
+++ b/crates/libm-test/benches/random.rs
@@ -20,7 +20,7 @@ macro_rules! musl_rand_benches {
     (
         fn_name: $fn_name:ident,
         attrs: [$($attr:meta),*],
-        fn_extra: $skip_on_i586:expr,
+        fn_extra: ($skip_on_i586:expr, $musl_fn:expr),
     ) => {
         paste::paste! {
             $(#[$attr])*
@@ -28,15 +28,15 @@ macro_rules! musl_rand_benches {
                 type Op = libm_test::op::$fn_name::Routine;
 
                 #[cfg(feature = "build-musl")]
-                let musl_extra = MuslExtra {
-                    musl_fn: Some(musl_math_sys::$fn_name as libm_test::OpCFn<Op>),
-                    skip_on_i586: $skip_on_i586
+                let musl_extra = MuslExtra::<libm_test::OpCFn<Op>> {
+                    musl_fn: $musl_fn,
+                    skip_on_i586: $skip_on_i586,
                 };
 
                 #[cfg(not(feature = "build-musl"))]
                 let musl_extra = MuslExtra {
                     musl_fn: None,
-                    skip_on_i586: $skip_on_i586
+                    skip_on_i586: $skip_on_i586,
                 };
 
                 bench_one::<Op>(c, musl_extra);
@@ -67,7 +67,10 @@ where
             break;
         }
 
-        let musl_res = input.call(musl_extra.musl_fn.unwrap());
+        let Some(musl_fn) = musl_extra.musl_fn else {
+            continue;
+        };
+        let musl_res = input.call(musl_fn);
         let crate_res = input.call(Op::ROUTINE);
 
         crate_res.validate(musl_res, input, &ctx).context(name).unwrap();
@@ -91,15 +94,16 @@ where
     // Don't test against musl if it is not available
     #[cfg(feature = "build-musl")]
     {
-        let musl_fn = musl_extra.musl_fn.unwrap();
-        group.bench_function("musl", |b| {
-            b.iter(|| {
-                let f = black_box(musl_fn);
-                for input in benchvec.iter().copied() {
-                    input.call(f);
-                }
-            })
-        });
+        if let Some(musl_fn) = musl_extra.musl_fn {
+            group.bench_function("musl", |b| {
+                b.iter(|| {
+                    let f = black_box(musl_fn);
+                    for input in benchvec.iter().copied() {
+                        input.call(f);
+                    }
+                })
+            });
+        }
     }
 }
 
@@ -107,9 +111,16 @@ libm_macros::for_each_function! {
     callback: musl_rand_benches,
     skip: [],
     fn_extra: match MACRO_FN_NAME {
-        // FIXME(correctness): wrong result on i586
-        exp10 | exp10f | exp2 | exp2f => true,
-        _ => false
+        // We pass a tuple of `(skip_on_i586, musl_fn)`
+
+        // FIXME(correctness): exp functions have the wrong result on i586
+        exp10 | exp10f | exp2 | exp2f => (true, Some(musl_math_sys::MACRO_FN_NAME)),
+
+        // Musl does not provide `f16` and `f128` functions
+        copysignf16 | copysignf128 | fabsf16 | fabsf128 => (false, None),
+
+        // By default we never skip (false) and always have a musl function available
+        _ => (false, Some(musl_math_sys::MACRO_FN_NAME))
     }
 }
 
diff --git a/crates/libm-test/src/domain.rs b/crates/libm-test/src/domain.rs
index 7b5a01b96..52393d402 100644
--- a/crates/libm-test/src/domain.rs
+++ b/crates/libm-test/src/domain.rs
@@ -187,3 +187,15 @@ impl HasDomain<f32> for crate::op::lgammaf_r::Routine {
 impl HasDomain<f64> for crate::op::lgamma_r::Routine {
     const DOMAIN: Domain<f64> = Domain::<f64>::LGAMMA;
 }
+
+/* Not all `f16` and `f128` functions exist yet so we can't easily use the macros. */
+
+#[cfg(f16_enabled)]
+impl HasDomain<f16> for crate::op::fabsf16::Routine {
+    const DOMAIN: Domain<f16> = Domain::<f16>::UNBOUNDED;
+}
+
+#[cfg(f128_enabled)]
+impl HasDomain<f128> for crate::op::fabsf128::Routine {
+    const DOMAIN: Domain<f128> = Domain::<f128>::UNBOUNDED;
+}
diff --git a/crates/libm-test/src/gen/extensive.rs b/crates/libm-test/src/gen/extensive.rs
index d8b991b2a..d724226e9 100644
--- a/crates/libm-test/src/gen/extensive.rs
+++ b/crates/libm-test/src/gen/extensive.rs
@@ -138,8 +138,12 @@ macro_rules! impl_extensive_input {
     };
 }
 
+#[cfg(f16_enabled)]
+impl_extensive_input!(f16);
 impl_extensive_input!(f32);
 impl_extensive_input!(f64);
+#[cfg(f128_enabled)]
+impl_extensive_input!(f128);
 
 /// Create a test case iterator for extensive inputs.
 pub fn get_test_cases<Op>(
diff --git a/crates/libm-test/src/gen/random.rs b/crates/libm-test/src/gen/random.rs
index 29a9dcd2b..6b08e560d 100644
--- a/crates/libm-test/src/gen/random.rs
+++ b/crates/libm-test/src/gen/random.rs
@@ -107,8 +107,12 @@ macro_rules! impl_random_input {
     };
 }
 
+#[cfg(f16_enabled)]
+impl_random_input!(f16);
 impl_random_input!(f32);
 impl_random_input!(f64);
+#[cfg(f128_enabled)]
+impl_random_input!(f128);
 
 /// Create a test case iterator.
 pub fn get_test_cases<RustArgs: RandomInput>(
diff --git a/crates/libm-test/src/mpfloat.rs b/crates/libm-test/src/mpfloat.rs
index ad98fafc8..f2b7b2f25 100644
--- a/crates/libm-test/src/mpfloat.rs
+++ b/crates/libm-test/src/mpfloat.rs
@@ -137,6 +137,7 @@ libm_macros::for_each_function! {
         fmod, fmodf, frexp, frexpf, ilogb, ilogbf, jn, jnf, ldexp, ldexpf,
         lgamma_r, lgammaf_r, modf, modff, nextafter, nextafterf, pow,powf,
         remquo, remquof, scalbn, scalbnf, sincos, sincosf, yn, ynf,
+        copysignf16, copysignf128, fabsf16, fabsf128,
     ],
     fn_extra: match MACRO_FN_NAME {
         // Remap function names that are different between mpfr and libm
@@ -157,10 +158,8 @@ libm_macros::for_each_function! {
 /// Implement unary functions that don't have a `_round` version
 macro_rules! impl_no_round {
     // Unary matcher
-    ($($fn_name:ident, $rug_name:ident;)*) => {
+    ($($fn_name:ident => $rug_name:ident;)*) => {
         paste::paste! {
-            // Implement for both f32 and f64
-            $( impl_no_round!{ @inner_unary [< $fn_name f >], $rug_name } )*
             $( impl_no_round!{ @inner_unary $fn_name, $rug_name } )*
         }
     };
@@ -183,33 +182,34 @@ macro_rules! impl_no_round {
 }
 
 impl_no_round! {
-    fabs, abs_mut;
-    ceil, ceil_mut;
-    floor, floor_mut;
-    rint, round_even_mut; // FIXME: respect rounding mode
-    round, round_mut;
-    trunc, trunc_mut;
+    ceil => ceil_mut;
+    ceilf => ceil_mut;
+    fabs => abs_mut;
+    fabsf => abs_mut;
+    floor => floor_mut;
+    floorf => floor_mut;
+    rint => round_even_mut; // FIXME: respect rounding mode
+    rintf => round_even_mut; // FIXME: respect rounding mode
+    round => round_mut;
+    roundf => round_mut;
+    trunc => trunc_mut;
+    truncf => trunc_mut;
+}
+
+#[cfg(f16_enabled)]
+impl_no_round! {
+    fabsf16 => abs_mut;
+}
+
+#[cfg(f128_enabled)]
+impl_no_round! {
+    fabsf128 => abs_mut;
 }
 
 /// Some functions are difficult to do in a generic way. Implement them here.
 macro_rules! impl_op_for_ty {
     ($fty:ty, $suffix:literal) => {
         paste::paste! {
-            impl MpOp for crate::op::[<copysign $suffix>]::Routine {
-                type MpTy = (MpFloat, MpFloat);
-
-                fn new_mp() -> Self::MpTy {
-                    (new_mpfloat::<Self::FTy>(), new_mpfloat::<Self::FTy>())
-                }
-
-                fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet {
-                    this.0.assign(input.0);
-                    this.1.assign(input.1);
-                    this.0.copysign_mut(&this.1);
-                    prep_retval::<Self::RustRet>(&mut this.0, Ordering::Equal)
-                }
-            }
-
             impl MpOp for crate::op::[<modf $suffix>]::Routine {
                 type MpTy = (MpFloat, MpFloat);
 
@@ -379,9 +379,38 @@ macro_rules! impl_op_for_ty {
     };
 }
 
+/// Version of `impl_op_for_ty` with only functions that have `f16` and `f128` implementations.
+macro_rules! impl_op_for_ty_all {
+    ($fty:ty, $suffix:literal) => {
+        paste::paste! {
+            impl MpOp for crate::op::[<copysign $suffix>]::Routine {
+                type MpTy = (MpFloat, MpFloat);
+
+                fn new_mp() -> Self::MpTy {
+                    (new_mpfloat::<Self::FTy>(), new_mpfloat::<Self::FTy>())
+                }
+
+                fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet {
+                    this.0.assign(input.0);
+                    this.1.assign(input.1);
+                    this.0.copysign_mut(&this.1);
+                    prep_retval::<Self::RustRet>(&mut this.0, Ordering::Equal)
+                }
+            }
+        }
+    };
+}
+
 impl_op_for_ty!(f32, "f");
 impl_op_for_ty!(f64, "");
 
+#[cfg(f16_enabled)]
+impl_op_for_ty_all!(f16, "f16");
+impl_op_for_ty_all!(f32, "f");
+impl_op_for_ty_all!(f64, "");
+#[cfg(f128_enabled)]
+impl_op_for_ty_all!(f128, "f128");
+
 // `lgamma_r` is not a simple suffix so we can't use the above macro.
 impl MpOp for crate::op::lgamma_r::Routine {
     type MpTy = MpFloat;
diff --git a/crates/libm-test/src/precision.rs b/crates/libm-test/src/precision.rs
index 696bb3735..f8c3a7b8f 100644
--- a/crates/libm-test/src/precision.rs
+++ b/crates/libm-test/src/precision.rs
@@ -157,6 +157,9 @@ pub trait MaybeOverride<Input> {
     }
 }
 
+#[cfg(f16_enabled)]
+impl MaybeOverride<(f16,)> for SpecialCase {}
+
 impl MaybeOverride<(f32,)> for SpecialCase {
     fn check_float<F: Float>(
         input: (f32,),
@@ -290,6 +293,9 @@ impl MaybeOverride<(f64,)> for SpecialCase {
     }
 }
 
+#[cfg(f128_enabled)]
+impl MaybeOverride<(f128,)> for SpecialCase {}
+
 /// Check NaN bits if the function requires it
 fn maybe_check_nan_bits<F: Float>(actual: F, expected: F, ctx: &CheckCtx) -> Option<TestResult> {
     if !(ctx.base_name == BaseName::Fabs || ctx.base_name == BaseName::Copysign) {
@@ -317,6 +323,19 @@ fn maybe_check_nan_bits<F: Float>(actual: F, expected: F, ctx: &CheckCtx) -> Opt
     }
 }
 
+#[cfg(f16_enabled)]
+impl MaybeOverride<(f16, f16)> for SpecialCase {
+    fn check_float<F: Float>(
+        input: (f16, f16),
+        _actual: F,
+        expected: F,
+        _ulp: &mut u32,
+        ctx: &CheckCtx,
+    ) -> Option<TestResult> {
+        maybe_skip_binop_nan(input, expected, ctx)
+    }
+}
+
 impl MaybeOverride<(f32, f32)> for SpecialCase {
     fn check_float<F: Float>(
         input: (f32, f32),
@@ -341,6 +360,19 @@ impl MaybeOverride<(f64, f64)> for SpecialCase {
     }
 }
 
+#[cfg(f128_enabled)]
+impl MaybeOverride<(f128, f128)> for SpecialCase {
+    fn check_float<F: Float>(
+        input: (f128, f128),
+        _actual: F,
+        expected: F,
+        _ulp: &mut u32,
+        ctx: &CheckCtx,
+    ) -> Option<TestResult> {
+        maybe_skip_binop_nan(input, expected, ctx)
+    }
+}
+
 /// Musl propagates NaNs if one is provided as the input, but we return the other input.
 // F1 and F2 are always the same type, this is just to please generics
 fn maybe_skip_binop_nan<F1: Float, F2: Float>(
diff --git a/crates/libm-test/src/test_traits.rs b/crates/libm-test/src/test_traits.rs
index 261d1f254..0a4baa2e3 100644
--- a/crates/libm-test/src/test_traits.rs
+++ b/crates/libm-test/src/test_traits.rs
@@ -303,6 +303,12 @@ where
 
 impl_float!(f32, f64);
 
+#[cfg(f16_enabled)]
+impl_float!(f16);
+
+#[cfg(f128_enabled)]
+impl_float!(f128);
+
 /* trait implementations for compound types */
 
 /// Implement `CheckOutput` for combinations of types.
diff --git a/crates/libm-test/tests/compare_built_musl.rs b/crates/libm-test/tests/compare_built_musl.rs
index ecd379a0a..3e11d322a 100644
--- a/crates/libm-test/tests/compare_built_musl.rs
+++ b/crates/libm-test/tests/compare_built_musl.rs
@@ -46,6 +46,8 @@ where
 
 libm_macros::for_each_function! {
     callback: musl_rand_tests,
+    // Musl does not support `f16` and `f128` on all platforms.
+    skip: [copysignf16, copysignf128, fabsf16, fabsf128],
     attributes: [
         #[cfg_attr(x86_no_sse, ignore)] // FIXME(correctness): wrong result on i586
         [exp10, exp10f, exp2, exp2f, rint]
diff --git a/crates/libm-test/tests/multiprecision.rs b/crates/libm-test/tests/multiprecision.rs
index 42ec965c1..7961b0802 100644
--- a/crates/libm-test/tests/multiprecision.rs
+++ b/crates/libm-test/tests/multiprecision.rs
@@ -120,6 +120,8 @@ libm_macros::for_each_function! {
         atan2f,
         copysign,
         copysignf,
+        copysignf16,
+        copysignf128,
         fdim,
         fdimf,
         fma,
diff --git a/etc/function-definitions.json b/etc/function-definitions.json
index 4b10812c3..0b2d6214f 100644
--- a/etc/function-definitions.json
+++ b/etc/function-definitions.json
@@ -136,6 +136,20 @@
         ],
         "type": "f32"
     },
+    "copysignf128": {
+        "sources": [
+            "src/math/copysignf128.rs",
+            "src/math/generic/copysign.rs"
+        ],
+        "type": "f128"
+    },
+    "copysignf16": {
+        "sources": [
+            "src/math/copysignf16.rs",
+            "src/math/generic/copysign.rs"
+        ],
+        "type": "f16"
+    },
     "cos": {
         "sources": [
             "src/libm_helper.rs",
@@ -258,6 +272,20 @@
         ],
         "type": "f32"
     },
+    "fabsf128": {
+        "sources": [
+            "src/math/fabsf128.rs",
+            "src/math/generic/fabs.rs"
+        ],
+        "type": "f128"
+    },
+    "fabsf16": {
+        "sources": [
+            "src/math/fabsf16.rs",
+            "src/math/generic/fabs.rs"
+        ],
+        "type": "f16"
+    },
     "fdim": {
         "sources": [
             "src/libm_helper.rs",
diff --git a/etc/function-list.txt b/etc/function-list.txt
index 51f5b221c..0a1bbab24 100644
--- a/etc/function-list.txt
+++ b/etc/function-list.txt
@@ -19,6 +19,8 @@ ceil
 ceilf
 copysign
 copysignf
+copysignf128
+copysignf16
 cos
 cosf
 cosh
@@ -37,6 +39,8 @@ expm1
 expm1f
 fabs
 fabsf
+fabsf128
+fabsf16
 fdim
 fdimf
 floor

From 9b68478c4d4a525617918f6ae18a0f623b90fd11 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Sat, 28 Dec 2024 09:52:19 +0000
Subject: [PATCH 097/279] Add domain and edge case tests to musl

This provides an increase in test coverage on platforms that cannot test
against MPFR.
---
 crates/libm-test/src/lib.rs                  |  4 +-
 crates/libm-test/src/op.rs                   |  2 +
 crates/libm-test/tests/compare_built_musl.rs | 97 +++++++++++++++++++-
 3 files changed, 101 insertions(+), 2 deletions(-)

diff --git a/crates/libm-test/src/lib.rs b/crates/libm-test/src/lib.rs
index a940db1d2..251114a0d 100644
--- a/crates/libm-test/src/lib.rs
+++ b/crates/libm-test/src/lib.rs
@@ -23,7 +23,9 @@ use std::time::SystemTime;
 pub use f8_impl::f8;
 pub use libm::support::{Float, Int, IntTy, MinInt};
 pub use num::{FloatExt, logspace};
-pub use op::{BaseName, FloatTy, Identifier, MathOp, OpCFn, OpFTy, OpRustFn, OpRustRet, Ty};
+pub use op::{
+    BaseName, FloatTy, Identifier, MathOp, OpCFn, OpCRet, OpFTy, OpRustFn, OpRustRet, Ty,
+};
 pub use precision::{MaybeOverride, SpecialCase, default_ulp};
 use run_cfg::EXTENSIVE_MAX_ITERATIONS;
 pub use run_cfg::{CheckBasis, CheckCtx, EXTENSIVE_ENV, GeneratorKind, skip_extensive_test};
diff --git a/crates/libm-test/src/op.rs b/crates/libm-test/src/op.rs
index ee61eb0b8..8329d3424 100644
--- a/crates/libm-test/src/op.rs
+++ b/crates/libm-test/src/op.rs
@@ -96,6 +96,8 @@ pub type OpFTy<Op> = <Op as MathOp>::FTy;
 pub type OpITy<Op> = <<Op as MathOp>::FTy as Float>::Int;
 /// Access the associated `CFn` type from an op (helper to avoid ambiguous associated types).
 pub type OpCFn<Op> = <Op as MathOp>::CFn;
+/// Access the associated `CRet` type from an op (helper to avoid ambiguous associated types).
+pub type OpCRet<Op> = <Op as MathOp>::CRet;
 /// Access the associated `RustFn` type from an op (helper to avoid ambiguous associated types).
 pub type OpRustFn<Op> = <Op as MathOp>::RustFn;
 /// Access the associated `RustRet` type from an op (helper to avoid ambiguous associated types).
diff --git a/crates/libm-test/tests/compare_built_musl.rs b/crates/libm-test/tests/compare_built_musl.rs
index 3e11d322a..b91d7f9f5 100644
--- a/crates/libm-test/tests/compare_built_musl.rs
+++ b/crates/libm-test/tests/compare_built_musl.rs
@@ -9,8 +9,9 @@
 // There are some targets we can't build musl for
 #![cfg(feature = "build-musl")]
 
-use libm_test::gen::random;
+use libm_test::domain::HasDomain;
 use libm_test::gen::random::RandomInput;
+use libm_test::gen::{domain_logspace, edge_cases, random};
 use libm_test::{CheckBasis, CheckCtx, CheckOutput, MathOp, TupleCall};
 
 macro_rules! musl_rand_tests {
@@ -53,3 +54,97 @@ libm_macros::for_each_function! {
         [exp10, exp10f, exp2, exp2f, rint]
     ],
 }
+
+/// Test against musl with generators from a domain.
+macro_rules! musl_domain_tests {
+    (
+        fn_name: $fn_name:ident,
+        attrs: [$($attr:meta),*],
+    ) => {
+        paste::paste! {
+            #[test]
+            $(#[$attr])*
+            fn [< musl_edge_case_ $fn_name >]() {
+                type Op = libm_test::op::$fn_name::Routine;
+                domain_test_runner::<Op, _>(
+                    edge_cases::get_test_cases::<Op, _>,
+                    musl_math_sys::$fn_name,
+                );
+            }
+
+            #[test]
+            $(#[$attr])*
+            fn [< musl_logspace_ $fn_name >]() {
+                type Op = libm_test::op::$fn_name::Routine;
+                domain_test_runner::<Op, _>(
+                    domain_logspace::get_test_cases::<Op>,
+                    musl_math_sys::$fn_name,
+                );
+            }
+        }
+    };
+}
+
+/// Test a single routine against domaine-aware inputs.
+fn domain_test_runner<Op, I>(gen: impl FnOnce(&CheckCtx) -> I, musl_fn: Op::CFn)
+where
+    Op: MathOp,
+    Op: HasDomain<Op::FTy>,
+    I: Iterator<Item = Op::RustArgs>,
+{
+    let ctx = CheckCtx::new(Op::IDENTIFIER, CheckBasis::Musl);
+    let cases = gen(&ctx);
+
+    for input in cases {
+        let musl_res = input.call(musl_fn);
+        let crate_res = input.call(Op::ROUTINE);
+
+        crate_res.validate(musl_res, input, &ctx).unwrap();
+    }
+}
+
+libm_macros::for_each_function! {
+    callback: musl_domain_tests,
+    attributes: [],
+    skip: [
+        // Functions with multiple inputs
+        atan2,
+        atan2f,
+        copysign,
+        copysignf,
+        copysignf16,
+        copysignf128,
+        fdim,
+        fdimf,
+        fma,
+        fmaf,
+        fmax,
+        fmaxf,
+        fmin,
+        fminf,
+        fmod,
+        fmodf,
+        hypot,
+        hypotf,
+        jn,
+        jnf,
+        ldexp,
+        ldexpf,
+        nextafter,
+        nextafterf,
+        pow,
+        powf,
+        remainder,
+        remainderf,
+        remquo,
+        remquof,
+        scalbn,
+        scalbnf,
+        yn,
+        ynf,
+
+        // Not provided by musl
+        fabsf16,
+        fabsf128,
+    ],
+}

From 5e0c8403ae508465cfacce8ba96458e00f28a753 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Mon, 6 Jan 2025 09:26:27 +0000
Subject: [PATCH 098/279] Add an override for failing ceil/floor tests on i586

---
 crates/libm-test/src/precision.rs | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/crates/libm-test/src/precision.rs b/crates/libm-test/src/precision.rs
index f8c3a7b8f..817ea0fae 100644
--- a/crates/libm-test/src/precision.rs
+++ b/crates/libm-test/src/precision.rs
@@ -270,6 +270,16 @@ impl MaybeOverride<(f64,)> for SpecialCase {
             return XFAIL;
         }
 
+        if (ctx.fn_ident == Identifier::Ceil || ctx.fn_ident == Identifier::Floor)
+            && expected.eq_repr(F::NEG_ZERO)
+            && actual.eq_repr(F::ZERO)
+            && cfg!(x86_no_sse)
+        {
+            // FIXME: the x87 implementations do not keep the distinction between -0.0 and 0.0.
+            // See https://github.com/rust-lang/libm/pull/404#issuecomment-2572399955
+            return XFAIL;
+        }
+
         maybe_check_nan_bits(actual, expected, ctx)
     }
 

From 0164e9326c974b3b64f77c8f8bc803da3c598b79 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Mon, 6 Jan 2025 09:46:16 +0000
Subject: [PATCH 099/279] Loosen precision on i586 based on new tests

---
 crates/libm-test/src/precision.rs | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/crates/libm-test/src/precision.rs b/crates/libm-test/src/precision.rs
index 817ea0fae..1a66a430c 100644
--- a/crates/libm-test/src/precision.rs
+++ b/crates/libm-test/src/precision.rs
@@ -106,7 +106,11 @@ pub fn default_ulp(ctx: &CheckCtx) -> u32 {
         match ctx.fn_ident {
             Id::Asinh => ulp = 3,
             Id::Asinhf => ulp = 3,
+            Id::Exp10 | Id::Exp10f => ulp = 1_000_000,
+            Id::Exp2 | Id::Exp2f => ulp = 10_000_000,
+            Id::Fmaf => ulp = 1,
             Id::Log1p | Id::Log1pf => ulp = 2,
+            Id::Rint => ulp = 100_000,
             Id::Round => ulp = 1,
             Id::Tan => ulp = 2,
             _ => (),
@@ -271,15 +275,23 @@ impl MaybeOverride<(f64,)> for SpecialCase {
         }
 
         if (ctx.fn_ident == Identifier::Ceil || ctx.fn_ident == Identifier::Floor)
+            && cfg!(x86_no_sse)
             && expected.eq_repr(F::NEG_ZERO)
             && actual.eq_repr(F::ZERO)
-            && cfg!(x86_no_sse)
         {
             // FIXME: the x87 implementations do not keep the distinction between -0.0 and 0.0.
             // See https://github.com/rust-lang/libm/pull/404#issuecomment-2572399955
             return XFAIL;
         }
 
+        if (ctx.fn_ident == Identifier::Exp10 || ctx.fn_ident == Identifier::Exp2)
+            && cfg!(x86_no_sse)
+        {
+            // FIXME: i586 has very imprecise results with ULP > u32::MAX for these
+            // operations so we can't reasonably provide a limit.
+            return XFAIL;
+        }
+
         maybe_check_nan_bits(actual, expected, ctx)
     }
 

From 45862e9eec9b57395afa72e4df6e259055166a8b Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Mon, 6 Jan 2025 20:17:38 +0000
Subject: [PATCH 100/279] ci: Only update the github ref for pull requests

On master, this fetch fails with:

    fatal: refusing to fetch into branch 'refs/heads/master' checked out at '/home/runner/work/libm/libm'

Just skip the command when this shouldn't be needed.
---
 .github/workflows/main.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 1b2fd12ba..320800f2e 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -198,6 +198,7 @@ jobs:
           fetch-depth: 100
       - name: Fetch pull request ref
         run: git fetch origin "$GITHUB_REF:$GITHUB_REF"
+        if: github.event_name == 'pull_request'
       - run: python3 ci/calculate-exhaustive-matrix.py >> "$GITHUB_OUTPUT"
         id: script
 

From ce0560c82b54cab222b5c3866154c114bf34b0df Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Mon, 6 Jan 2025 05:15:40 +0000
Subject: [PATCH 101/279] Enable MPFR tests on i586

MPFR does build and run correctly without SSE, but requires
`force-cross` be enabled.
---
 ci/docker/i586-unknown-linux-gnu/Dockerfile | 2 +-
 ci/run.sh                                   | 2 +-
 crates/libm-test/Cargo.toml                 | 3 ++-
 3 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/ci/docker/i586-unknown-linux-gnu/Dockerfile b/ci/docker/i586-unknown-linux-gnu/Dockerfile
index 3b0bfc0d3..37e206a84 100644
--- a/ci/docker/i586-unknown-linux-gnu/Dockerfile
+++ b/ci/docker/i586-unknown-linux-gnu/Dockerfile
@@ -2,4 +2,4 @@ FROM ubuntu:24.04
 
 RUN apt-get update && \
     apt-get install -y --no-install-recommends \
-    gcc-multilib libc6-dev ca-certificates
+    gcc-multilib m4 make libc6-dev ca-certificates
diff --git a/ci/run.sh b/ci/run.sh
index 7e514a1cd..70fc271f1 100755
--- a/ci/run.sh
+++ b/ci/run.sh
@@ -45,8 +45,8 @@ case "$target" in
     # FIXME(ci): we should be able to enable aarch64 Linux here once GHA
     # support rolls out.
     x86_64*) extra_flags="$extra_flags --features libm-test/test-multiprecision" ;;
-    # i686 works fine, i586 does not
     i686*) extra_flags="$extra_flags --features libm-test/test-multiprecision" ;;
+    i586*) extra_flags="$extra_flags --features libm-test/test-multiprecision --features gmp-mpfr-sys/force-cross" ;;
     # Apple aarch64 is native
     aarch64*apple*) extra_flags="$extra_flags --features libm-test/test-multiprecision" ;;
 esac
diff --git a/crates/libm-test/Cargo.toml b/crates/libm-test/Cargo.toml
index 2761d3d52..371beb19a 100644
--- a/crates/libm-test/Cargo.toml
+++ b/crates/libm-test/Cargo.toml
@@ -12,7 +12,7 @@ unstable-float = ["libm/unstable-float", "rug?/nightly-float"]
 
 # Generate tests which are random inputs and the outputs are calculated with
 # musl libc.
-test-multiprecision = ["dep:az", "dep:rug"]
+test-multiprecision = ["dep:az", "dep:rug", "dep:gmp-mpfr-sys"]
 
 # Build our own musl for testing and benchmarks
 build-musl = ["dep:musl-math-sys"]
@@ -26,6 +26,7 @@ short-benchmarks = []
 [dependencies]
 anyhow = "1.0.90"
 az = { version = "1.2.1", optional = true }
+gmp-mpfr-sys = { version = "1.6.4", optional = true, default-features = false, features = ["mpfr"] }
 indicatif = { version = "0.17.9", default-features = false }
 libm = { path = "../..", features = ["unstable-public-internals"] }
 libm-macros = { path = "../libm-macros" }

From 6328b101b10c688bbdabd3f8e215e0598b951e95 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Mon, 6 Jan 2025 20:27:48 +0000
Subject: [PATCH 102/279] Increase the allowed precision for failing tests on
 i586

These will need to be fixed, for now just xfail them so this doesn't
block better test coverage.
---
 crates/libm-test/src/precision.rs | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/crates/libm-test/src/precision.rs b/crates/libm-test/src/precision.rs
index 1a66a430c..03bf7cecc 100644
--- a/crates/libm-test/src/precision.rs
+++ b/crates/libm-test/src/precision.rs
@@ -104,11 +104,14 @@ pub fn default_ulp(ctx: &CheckCtx) -> u32 {
     // In some cases, our implementation is less accurate than musl on i586.
     if cfg!(x86_no_sse) {
         match ctx.fn_ident {
+            // FIXME(#401): these need to be correctly rounded but are not.
+            Id::Fmaf => ulp = 1,
+            Id::Fdim => ulp = 1,
+
             Id::Asinh => ulp = 3,
             Id::Asinhf => ulp = 3,
             Id::Exp10 | Id::Exp10f => ulp = 1_000_000,
             Id::Exp2 | Id::Exp2f => ulp = 10_000_000,
-            Id::Fmaf => ulp = 1,
             Id::Log1p | Id::Log1pf => ulp = 2,
             Id::Rint => ulp = 100_000,
             Id::Round => ulp = 1,

From d4ea0304bb2426183b897cf4895ec87255dfe288 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Mon, 6 Jan 2025 21:57:29 +0000
Subject: [PATCH 103/279] Switch from using `unstable-intrinsics` to
 `intrinsics_enabled`

Unlike `unstable-intrinsics`, `intrinsics_enabled` gets disabled with
`force-soft-floats` which is what we want here.
---
 src/math/arch/intrinsics.rs      | 6 ++----
 src/math/support/float_traits.rs | 4 ++--
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/src/math/arch/intrinsics.rs b/src/math/arch/intrinsics.rs
index 1cf9291f4..733af0f1a 100644
--- a/src/math/arch/intrinsics.rs
+++ b/src/math/arch/intrinsics.rs
@@ -12,13 +12,11 @@ pub fn ceilf(x: f32) -> f32 {
 }
 
 pub fn fabs(x: f64) -> f64 {
-    // SAFETY: safe intrinsic with no preconditions
-    unsafe { core::intrinsics::fabsf64(x) }
+    x.abs()
 }
 
 pub fn fabsf(x: f32) -> f32 {
-    // SAFETY: safe intrinsic with no preconditions
-    unsafe { core::intrinsics::fabsf32(x) }
+    x.abs()
 }
 
 pub fn floor(x: f64) -> f64 {
diff --git a/src/math/support/float_traits.rs b/src/math/support/float_traits.rs
index 3b5be4fa3..697050966 100644
--- a/src/math/support/float_traits.rs
+++ b/src/math/support/float_traits.rs
@@ -200,7 +200,7 @@ macro_rules! float_impl {
             fn abs(self) -> Self {
                 cfg_if! {
                     // FIXME(msrv): `abs` is available in `core` starting with 1.85.
-                    if #[cfg(feature = "unstable-intrinsics")] {
+                    if #[cfg(intrinsics_enabled)] {
                         self.abs()
                     } else {
                         super::super::generic::fabs(self)
@@ -210,7 +210,7 @@ macro_rules! float_impl {
             fn copysign(self, other: Self) -> Self {
                 cfg_if! {
                     // FIXME(msrv): `copysign` is available in `core` starting with 1.85.
-                    if #[cfg(feature = "unstable-intrinsics")] {
+                    if #[cfg(intrinsics_enabled)] {
                         self.copysign(other)
                     } else {
                         super::super::generic::copysign(self, other)

From 32f96f132c76b32654bdfcaef241f9f9d21a025f Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Mon, 6 Jan 2025 22:52:01 +0000
Subject: [PATCH 104/279] Remove an unused `feature = "force-soft-floats"` gate

---
 src/math/arch/i686.rs | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/math/arch/i686.rs b/src/math/arch/i686.rs
index 80f7face1..ad54d8b61 100644
--- a/src/math/arch/i686.rs
+++ b/src/math/arch/i686.rs
@@ -1,7 +1,5 @@
 //! Architecture-specific support for x86-32 and x86-64 with SSE2
 
-#![cfg(not(feature = "force-soft-floats"))]
-
 #[cfg(target_arch = "x86")]
 use core::arch::x86::*;
 #[cfg(target_arch = "x86_64")]

From 424c3ece1a7546de8530fa9d0fbf90d3b182cd18 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Mon, 6 Jan 2025 23:17:38 +0000
Subject: [PATCH 105/279] Don't use intrinsics abs for `f16` and `f128` on
 wasm32

This configuration was duplicated from `fabs` and `fabsf`, but wasm is
unlikely to have an intrinsic lowering for these float types. So, just
always use the generic.
---
 src/math/fabsf128.rs | 6 ------
 src/math/fabsf16.rs  | 6 ------
 2 files changed, 12 deletions(-)

diff --git a/src/math/fabsf128.rs b/src/math/fabsf128.rs
index ef531bd91..46429ca49 100644
--- a/src/math/fabsf128.rs
+++ b/src/math/fabsf128.rs
@@ -4,12 +4,6 @@
 /// by direct manipulation of the bit representation of `x`.
 #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
 pub fn fabsf128(x: f128) -> f128 {
-    select_implementation! {
-        name: fabsf,
-        use_intrinsic: target_arch = "wasm32",
-        args: x,
-    }
-
     super::generic::fabs(x)
 }
 
diff --git a/src/math/fabsf16.rs b/src/math/fabsf16.rs
index eb41f7391..eee42ac6a 100644
--- a/src/math/fabsf16.rs
+++ b/src/math/fabsf16.rs
@@ -4,12 +4,6 @@
 /// by direct manipulation of the bit representation of `x`.
 #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
 pub fn fabsf16(x: f16) -> f16 {
-    select_implementation! {
-        name: fabsf,
-        use_intrinsic: target_arch = "wasm32",
-        args: x,
-    }
-
     super::generic::fabs(x)
 }
 

From 44770b96920557baf38990d2ee4142e166be579d Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Tue, 7 Jan 2025 00:19:26 +0000
Subject: [PATCH 106/279] Replace "intrinsic" config with "arch" config

WASM is the only architecture we use `intrinsics::` for. We probably
don't want to do this for any other architectures since it is better to
use assembly, or work toward getting the functions available in `core`.

To more accurately reflect the relationship between arch and intrinsics,
make wasm32 an `arch` module and call the intrinsics from there.
---
 etc/function-definitions.json              | 20 ++++++++---------
 src/math/arch/mod.rs                       |  8 +++----
 src/math/arch/{intrinsics.rs => wasm32.rs} |  6 ++++--
 src/math/ceil.rs                           |  2 +-
 src/math/ceilf.rs                          |  2 +-
 src/math/fabs.rs                           |  2 +-
 src/math/fabsf.rs                          |  2 +-
 src/math/floor.rs                          |  2 +-
 src/math/floorf.rs                         |  2 +-
 src/math/sqrt.rs                           |  6 ++++--
 src/math/sqrtf.rs                          |  6 ++++--
 src/math/support/macros.rs                 | 25 +++-------------------
 src/math/trunc.rs                          |  2 +-
 src/math/truncf.rs                         |  2 +-
 14 files changed, 37 insertions(+), 50 deletions(-)
 rename src/math/arch/{intrinsics.rs => wasm32.rs} (82%)

diff --git a/etc/function-definitions.json b/etc/function-definitions.json
index 0b2d6214f..3cf7e0fed 100644
--- a/etc/function-definitions.json
+++ b/etc/function-definitions.json
@@ -108,14 +108,14 @@
         "sources": [
             "src/libm_helper.rs",
             "src/math/arch/i586.rs",
-            "src/math/arch/intrinsics.rs",
+            "src/math/arch/wasm32.rs",
             "src/math/ceil.rs"
         ],
         "type": "f64"
     },
     "ceilf": {
         "sources": [
-            "src/math/arch/intrinsics.rs",
+            "src/math/arch/wasm32.rs",
             "src/math/ceilf.rs"
         ],
         "type": "f32"
@@ -258,7 +258,7 @@
     "fabs": {
         "sources": [
             "src/libm_helper.rs",
-            "src/math/arch/intrinsics.rs",
+            "src/math/arch/wasm32.rs",
             "src/math/fabs.rs",
             "src/math/generic/fabs.rs"
         ],
@@ -266,7 +266,7 @@
     },
     "fabsf": {
         "sources": [
-            "src/math/arch/intrinsics.rs",
+            "src/math/arch/wasm32.rs",
             "src/math/fabsf.rs",
             "src/math/generic/fabs.rs"
         ],
@@ -303,14 +303,14 @@
         "sources": [
             "src/libm_helper.rs",
             "src/math/arch/i586.rs",
-            "src/math/arch/intrinsics.rs",
+            "src/math/arch/wasm32.rs",
             "src/math/floor.rs"
         ],
         "type": "f64"
     },
     "floorf": {
         "sources": [
-            "src/math/arch/intrinsics.rs",
+            "src/math/arch/wasm32.rs",
             "src/math/floorf.rs"
         ],
         "type": "f32"
@@ -683,7 +683,7 @@
         "sources": [
             "src/libm_helper.rs",
             "src/math/arch/i686.rs",
-            "src/math/arch/intrinsics.rs",
+            "src/math/arch/wasm32.rs",
             "src/math/sqrt.rs"
         ],
         "type": "f64"
@@ -691,7 +691,7 @@
     "sqrtf": {
         "sources": [
             "src/math/arch/i686.rs",
-            "src/math/arch/intrinsics.rs",
+            "src/math/arch/wasm32.rs",
             "src/math/sqrtf.rs"
         ],
         "type": "f32"
@@ -738,14 +738,14 @@
     "trunc": {
         "sources": [
             "src/libm_helper.rs",
-            "src/math/arch/intrinsics.rs",
+            "src/math/arch/wasm32.rs",
             "src/math/trunc.rs"
         ],
         "type": "f64"
     },
     "truncf": {
         "sources": [
-            "src/math/arch/intrinsics.rs",
+            "src/math/arch/wasm32.rs",
             "src/math/truncf.rs"
         ],
         "type": "f32"
diff --git a/src/math/arch/mod.rs b/src/math/arch/mod.rs
index cf9547117..bd79ae1c6 100644
--- a/src/math/arch/mod.rs
+++ b/src/math/arch/mod.rs
@@ -5,14 +5,14 @@
 //! is used when calling the function directly. This helps anyone who uses `libm` directly, as
 //! well as improving things when these routines are called as part of other implementations.
 
-#[cfg(intrinsics_enabled)]
-pub mod intrinsics;
-
 // Most implementations should be defined here, to ensure they are not made available when
 // soft floats are required.
 #[cfg(arch_enabled)]
 cfg_if! {
-    if #[cfg(target_feature = "sse2")] {
+    if #[cfg(all(target_arch = "wasm32", intrinsics_enabled))] {
+        mod wasm32;
+        pub use wasm32::{ceil, ceilf, fabs, fabsf, floor, floorf, sqrt, sqrtf, trunc, truncf};
+    } else if #[cfg(target_feature = "sse2")] {
         mod i686;
         pub use i686::{sqrt, sqrtf};
     }
diff --git a/src/math/arch/intrinsics.rs b/src/math/arch/wasm32.rs
similarity index 82%
rename from src/math/arch/intrinsics.rs
rename to src/math/arch/wasm32.rs
index 733af0f1a..09df8624e 100644
--- a/src/math/arch/intrinsics.rs
+++ b/src/math/arch/wasm32.rs
@@ -1,5 +1,7 @@
-// Config is needed for times when this module is available but we don't call everything
-#![allow(dead_code)]
+//! Wasm asm is not stable; just use intrinsics for operations that have asm routine equivalents.
+//!
+//! Note that we need to be absolutely certain that everything here lowers to assembly operations,
+//! otherwise libcalls will be recursive.
 
 pub fn ceil(x: f64) -> f64 {
     // SAFETY: safe intrinsic with no preconditions
diff --git a/src/math/ceil.rs b/src/math/ceil.rs
index b0576f3dc..398bfee47 100644
--- a/src/math/ceil.rs
+++ b/src/math/ceil.rs
@@ -10,8 +10,8 @@ const TOINT: f64 = 1. / f64::EPSILON;
 pub fn ceil(x: f64) -> f64 {
     select_implementation! {
         name: ceil,
+        use_arch: all(target_arch = "wasm32", intrinsics_enabled),
         use_arch_required: all(target_arch = "x86", not(target_feature = "sse2")),
-        use_intrinsic: target_arch = "wasm32",
         args: x,
     }
 
diff --git a/src/math/ceilf.rs b/src/math/ceilf.rs
index 9eb2ec07a..9e8e78e3e 100644
--- a/src/math/ceilf.rs
+++ b/src/math/ceilf.rs
@@ -7,7 +7,7 @@ use core::f32;
 pub fn ceilf(x: f32) -> f32 {
     select_implementation! {
         name: ceilf,
-        use_intrinsic: target_arch = "wasm32",
+        use_arch: all(target_arch = "wasm32", intrinsics_enabled),
         args: x,
     }
 
diff --git a/src/math/fabs.rs b/src/math/fabs.rs
index 46c0d88a5..22867fab0 100644
--- a/src/math/fabs.rs
+++ b/src/math/fabs.rs
@@ -6,7 +6,7 @@
 pub fn fabs(x: f64) -> f64 {
     select_implementation! {
         name: fabs,
-        use_intrinsic: target_arch = "wasm32",
+        use_arch: all(target_arch = "wasm32", intrinsics_enabled),
         args: x,
     }
 
diff --git a/src/math/fabsf.rs b/src/math/fabsf.rs
index d5775b600..e5820a26c 100644
--- a/src/math/fabsf.rs
+++ b/src/math/fabsf.rs
@@ -6,7 +6,7 @@
 pub fn fabsf(x: f32) -> f32 {
     select_implementation! {
         name: fabsf,
-        use_intrinsic: target_arch = "wasm32",
+        use_arch: all(target_arch = "wasm32", intrinsics_enabled),
         args: x,
     }
 
diff --git a/src/math/floor.rs b/src/math/floor.rs
index e478f6d54..2823bf44d 100644
--- a/src/math/floor.rs
+++ b/src/math/floor.rs
@@ -10,8 +10,8 @@ const TOINT: f64 = 1. / f64::EPSILON;
 pub fn floor(x: f64) -> f64 {
     select_implementation! {
         name: floor,
+        use_arch: all(target_arch = "wasm32", intrinsics_enabled),
         use_arch_required: all(target_arch = "x86", not(target_feature = "sse2")),
-        use_intrinsic: target_arch = "wasm32",
         args: x,
     }
 
diff --git a/src/math/floorf.rs b/src/math/floorf.rs
index bd1570c86..23a18c0f7 100644
--- a/src/math/floorf.rs
+++ b/src/math/floorf.rs
@@ -7,7 +7,7 @@ use core::f32;
 pub fn floorf(x: f32) -> f32 {
     select_implementation! {
         name: floorf,
-        use_intrinsic: target_arch = "wasm32",
+        use_arch: all(target_arch = "wasm32", intrinsics_enabled),
         args: x,
     }
 
diff --git a/src/math/sqrt.rs b/src/math/sqrt.rs
index 3f1a10fdd..2fd7070b1 100644
--- a/src/math/sqrt.rs
+++ b/src/math/sqrt.rs
@@ -83,8 +83,10 @@ use core::f64;
 pub fn sqrt(x: f64) -> f64 {
     select_implementation! {
         name: sqrt,
-        use_arch: target_feature = "sse2",
-        use_intrinsic: target_arch = "wasm32",
+        use_arch: any(
+            all(target_arch = "wasm32", intrinsics_enabled),
+            target_feature = "sse2"
+        ),
         args: x,
     }
 
diff --git a/src/math/sqrtf.rs b/src/math/sqrtf.rs
index 23f9a8443..319335163 100644
--- a/src/math/sqrtf.rs
+++ b/src/math/sqrtf.rs
@@ -18,8 +18,10 @@
 pub fn sqrtf(x: f32) -> f32 {
     select_implementation! {
         name: sqrtf,
-        use_arch: target_feature = "sse2",
-        use_intrinsic: target_arch = "wasm32",
+        use_arch: any(
+            all(target_arch = "wasm32", intrinsics_enabled),
+            target_feature = "sse2"
+        ),
         args: x,
     }
 
diff --git a/src/math/support/macros.rs b/src/math/support/macros.rs
index 9441eace5..f5094b9da 100644
--- a/src/math/support/macros.rs
+++ b/src/math/support/macros.rs
@@ -39,13 +39,8 @@ macro_rules! cfg_if {
     (@__identity $($tokens:tt)*) => { $($tokens)* };
 }
 
-/// Choose among using an intrinsic, an arch-specific implementation, and the function body.
-/// Returns directly if the intrinsic or arch is used, otherwise continue with the rest of the
-/// function.
-///
-/// Specify a `use_intrinsic` meta field if the intrinsic is (1) available on the platforms (i.e.
-/// LLVM lowers it without libcalls that may recurse), (2) it is likely to be more performant.
-/// Intrinsics require wrappers in the `math::arch::intrinsics` module.
+/// Choose between using an arch-specific implementation and the function body. Returns directly
+/// if the arch implementation is used, otherwise continue with the rest of the function.
 ///
 /// Specify a `use_arch` meta field if an architecture-specific implementation is provided.
 /// These live in the `math::arch::some_target_arch` module.
@@ -53,8 +48,7 @@ macro_rules! cfg_if {
 /// Specify a `use_arch_required` meta field if something architecture-specific must be used
 /// regardless of feature configuration (`force-soft-floats`).
 ///
-/// The passed meta options do not need to account for relevant Cargo features
-/// (`unstable-intrinsics`, `arch`, `force-soft-floats`), this macro handles that part.
+/// The passed meta options do not need to account for the `arch` target feature.
 macro_rules! select_implementation {
     (
         name: $fn_name:ident,
@@ -64,15 +58,12 @@ macro_rules! select_implementation {
         // Configuration meta for when to use the arch module regardless of whether softfloats
         // have been requested.
         $( use_arch_required: $use_arch_required:meta, )?
-        // Configuration meta for when to call intrinsics and let LLVM figure it out
-        $( use_intrinsic: $use_intrinsic:meta, )?
         args: $($arg:ident),+ ,
     ) => {
         // FIXME: these use paths that are a pretty fragile (`super`). We should figure out
         // something better w.r.t. how this is vendored into compiler-builtins.
 
         // However, we do need a few things from `arch` that are used even with soft floats.
-        //
         select_implementation! {
             @cfg $($use_arch_required)?;
             if true {
@@ -89,16 +80,6 @@ macro_rules! select_implementation {
                 return  super::arch::$fn_name( $($arg),+ );
             }
         }
-
-        // Never use intrinsics if we are forcing soft floats, and only enable with the
-        // `unstable-intrinsics` feature.
-        #[cfg(intrinsics_enabled)]
-        select_implementation! {
-            @cfg $( $use_intrinsic )?;
-            if true {
-                return  super::arch::intrinsics::$fn_name( $($arg),+ );
-            }
-        }
     };
 
     // Coalesce helper to construct an expression only if a config is provided
diff --git a/src/math/trunc.rs b/src/math/trunc.rs
index d85bffb40..7e5c4f2c2 100644
--- a/src/math/trunc.rs
+++ b/src/math/trunc.rs
@@ -7,7 +7,7 @@ use core::f64;
 pub fn trunc(x: f64) -> f64 {
     select_implementation! {
         name: trunc,
-        use_intrinsic: target_arch = "wasm32",
+        use_arch: all(target_arch = "wasm32", intrinsics_enabled),
         args: x,
     }
 
diff --git a/src/math/truncf.rs b/src/math/truncf.rs
index 82017b87b..b491747d9 100644
--- a/src/math/truncf.rs
+++ b/src/math/truncf.rs
@@ -7,7 +7,7 @@ use core::f32;
 pub fn truncf(x: f32) -> f32 {
     select_implementation! {
         name: truncf,
-        use_intrinsic: target_arch = "wasm32",
+        use_arch: all(target_arch = "wasm32", intrinsics_enabled),
         args: x,
     }
 

From 5712ab12f78edb4ea109905a9b08aeea49280422 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Tue, 7 Jan 2025 02:54:37 +0000
Subject: [PATCH 107/279] Increase the allowed ULP for `tgammaf`

Extensive tests report that the precision isn't actually 0:

    ---- mp_extensive_tgammaf ----

        input:    (-0.00063536887,) (0xba268ee2,)
        expected: -1574.4668             0xc4c4cef0
        actual:   -1574.4667             0xc4c4ceef

    Caused by:
        ulp 1 > 0

Update ULP to reflect this. After this change, `tgammaf` extensive tests
pass.
---
 crates/libm-test/src/precision.rs | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/crates/libm-test/src/precision.rs b/crates/libm-test/src/precision.rs
index 03bf7cecc..cbe4bdf88 100644
--- a/crates/libm-test/src/precision.rs
+++ b/crates/libm-test/src/precision.rs
@@ -44,7 +44,6 @@ pub fn default_ulp(ctx: &CheckCtx) -> u32 {
 
         // Operations that aren't required to be exact, but our implementations are.
         Bn::Cbrt if ctx.fn_ident != Id::Cbrt => 0,
-        Bn::Tgamma if ctx.fn_ident != Id::Tgamma => 0,
 
         // Bessel functions have large inaccuracies.
         Bn::J0 | Bn::J1 | Bn::Y0 | Bn::Y1 | Bn::Jn | Bn::Yn => 8_000_000,
@@ -78,6 +77,8 @@ pub fn default_ulp(ctx: &CheckCtx) -> u32 {
         Bn::Sinh => 2,
         Bn::Tan => 1,
         Bn::Tanh => 2,
+        // tgammaf has higher accuracy than tgamma.
+        Bn::Tgamma if ctx.fn_ident != Id::Tgamma => 1,
         Bn::Tgamma => 20,
     };
 

From 43d21a575e8581acb8488585cbc5491f040d533e Mon Sep 17 00:00:00 2001
From: beetrees <b@beetr.ee>
Date: Tue, 7 Jan 2025 13:51:15 +0000
Subject: [PATCH 108/279] Make extensive tests exhaustive if there are enough
 iterations available

---
 crates/libm-test/src/gen/domain_logspace.rs |   2 +-
 crates/libm-test/src/gen/extensive.rs       | 237 ++++++++++++++------
 crates/libm-test/src/lib.rs                 |   2 +-
 crates/libm-test/src/num.rs                 |  89 +++++++-
 crates/libm-test/tests/z_extensive/run.rs   |   3 +-
 5 files changed, 252 insertions(+), 81 deletions(-)

diff --git a/crates/libm-test/src/gen/domain_logspace.rs b/crates/libm-test/src/gen/domain_logspace.rs
index 3d8a3e7fe..c6963ad43 100644
--- a/crates/libm-test/src/gen/domain_logspace.rs
+++ b/crates/libm-test/src/gen/domain_logspace.rs
@@ -27,5 +27,5 @@ where
     let start = domain.range_start();
     let end = domain.range_end();
     let steps = OpITy::<Op>::try_from(ntests).unwrap_or(OpITy::<Op>::MAX);
-    logspace(start, end, steps).map(|v| (v,))
+    logspace(start, end, steps).0.map(|v| (v,))
 }
diff --git a/crates/libm-test/src/gen/extensive.rs b/crates/libm-test/src/gen/extensive.rs
index d724226e9..fb709e546 100644
--- a/crates/libm-test/src/gen/extensive.rs
+++ b/crates/libm-test/src/gen/extensive.rs
@@ -1,19 +1,18 @@
 use std::fmt;
 use std::ops::RangeInclusive;
 
-use libm::support::MinInt;
+use libm::support::{Float, MinInt};
 
 use crate::domain::HasDomain;
-use crate::gen::KnownSize;
 use crate::op::OpITy;
 use crate::run_cfg::{int_range, iteration_count};
-use crate::{CheckCtx, GeneratorKind, MathOp, logspace};
+use crate::{CheckCtx, GeneratorKind, MathOp, linear_ints, logspace};
 
 /// Generate a sequence of inputs that either cover the domain in completeness (for smaller float
 /// types and single argument functions) or provide evenly spaced inputs across the domain with
 /// approximately `u32::MAX` total iterations.
 pub trait ExtensiveInput<Op> {
-    fn get_cases(ctx: &CheckCtx) -> impl ExactSizeIterator<Item = Self> + Send;
+    fn get_cases(ctx: &CheckCtx) -> (impl Iterator<Item = Self> + Send, u64);
 }
 
 /// Construct an iterator from `logspace` and also calculate the total number of steps expected
@@ -21,24 +20,60 @@ pub trait ExtensiveInput<Op> {
 fn logspace_steps<Op>(
     start: Op::FTy,
     end: Op::FTy,
-    ctx: &CheckCtx,
-    argnum: usize,
+    max_steps: u64,
 ) -> (impl Iterator<Item = Op::FTy> + Clone, u64)
 where
     Op: MathOp,
     OpITy<Op>: TryFrom<u64, Error: fmt::Debug>,
+    u64: TryFrom<OpITy<Op>, Error: fmt::Debug>,
     RangeInclusive<OpITy<Op>>: Iterator,
 {
-    let max_steps = iteration_count(ctx, GeneratorKind::Extensive, argnum);
     let max_steps = OpITy::<Op>::try_from(max_steps).unwrap_or(OpITy::<Op>::MAX);
-    let iter = logspace(start, end, max_steps);
+    let (iter, steps) = logspace(start, end, max_steps);
+
+    // `steps` will be <= the original `max_steps`, which is a `u64`.
+    (iter, steps.try_into().unwrap())
+}
+
+/// Represents the iterator in either `Left` or `Right`.
+enum EitherIter<A, B> {
+    A(A),
+    B(B),
+}
 
-    // `logspace` can't implement `ExactSizeIterator` because of the range, but its size hint
-    // should be accurate (assuming <= usize::MAX iterations).
-    let size_hint = iter.size_hint();
-    assert_eq!(size_hint.0, size_hint.1.unwrap());
+impl<T, A: Iterator<Item = T>, B: Iterator<Item = T>> Iterator for EitherIter<A, B> {
+    type Item = T;
 
-    (iter, size_hint.0.try_into().unwrap())
+    fn next(&mut self) -> Option<Self::Item> {
+        match self {
+            Self::A(iter) => iter.next(),
+            Self::B(iter) => iter.next(),
+        }
+    }
+
+    fn size_hint(&self) -> (usize, Option<usize>) {
+        match self {
+            Self::A(iter) => iter.size_hint(),
+            Self::B(iter) => iter.size_hint(),
+        }
+    }
+}
+
+/// Gets the total number of possible values, returning `None` if that number doesn't fit in a
+/// `u64`.
+fn value_count<F: Float>() -> Option<u64>
+where
+    u64: TryFrom<F::Int>,
+{
+    u64::try_from(F::Int::MAX).ok().and_then(|max| max.checked_add(1))
+}
+
+/// Returns an iterator of every possible value of type `F`.
+fn all_values<F: Float>() -> impl Iterator<Item = F>
+where
+    RangeInclusive<F::Int>: Iterator<Item = F::Int>,
+{
+    (F::Int::MIN..=F::Int::MAX).map(|bits| F::from_bits(bits))
 }
 
 macro_rules! impl_extensive_input {
@@ -48,12 +83,23 @@ macro_rules! impl_extensive_input {
             Op: MathOp<RustArgs = Self, FTy = $fty>,
             Op: HasDomain<Op::FTy>,
         {
-            fn get_cases(ctx: &CheckCtx) -> impl ExactSizeIterator<Item = Self> {
-                let start = Op::DOMAIN.range_start();
-                let end = Op::DOMAIN.range_end();
-                let (iter0, steps0) = logspace_steps::<Op>(start, end, ctx, 0);
-                let iter0 = iter0.map(|v| (v,));
-                KnownSize::new(iter0, steps0)
+            fn get_cases(ctx: &CheckCtx) -> (impl Iterator<Item = Self>, u64) {
+                let max_steps0 = iteration_count(ctx, GeneratorKind::Extensive, 0);
+                // `f16` and `f32` can have exhaustive tests.
+                match value_count::<Op::FTy>() {
+                    Some(steps0) if steps0 <= max_steps0 => {
+                        let iter0 = all_values();
+                        let iter0 = iter0.map(|v| (v,));
+                        (EitherIter::A(iter0), steps0)
+                    }
+                    _ => {
+                        let start = Op::DOMAIN.range_start();
+                        let end = Op::DOMAIN.range_end();
+                        let (iter0, steps0) = logspace_steps::<Op>(start, end, max_steps0);
+                        let iter0 = iter0.map(|v| (v,));
+                        (EitherIter::B(iter0), steps0)
+                    }
+                }
             }
         }
 
@@ -61,15 +107,28 @@ macro_rules! impl_extensive_input {
         where
             Op: MathOp<RustArgs = Self, FTy = $fty>,
         {
-            fn get_cases(ctx: &CheckCtx) -> impl ExactSizeIterator<Item = Self> {
-                let start = <$fty>::NEG_INFINITY;
-                let end = <$fty>::INFINITY;
-                let (iter0, steps0) = logspace_steps::<Op>(start, end, ctx, 0);
-                let (iter1, steps1) = logspace_steps::<Op>(start, end, ctx, 1);
-                let iter =
-                    iter0.flat_map(move |first| iter1.clone().map(move |second| (first, second)));
-                let count = steps0.checked_mul(steps1).unwrap();
-                KnownSize::new(iter, count)
+            fn get_cases(ctx: &CheckCtx) -> (impl Iterator<Item = Self>, u64) {
+                let max_steps0 = iteration_count(ctx, GeneratorKind::Extensive, 0);
+                let max_steps1 = iteration_count(ctx, GeneratorKind::Extensive, 1);
+                // `f16` can have exhaustive tests.
+                match value_count::<Op::FTy>() {
+                    Some(count) if count <= max_steps0 && count <= max_steps1 => {
+                        let iter = all_values()
+                            .flat_map(|first| all_values().map(move |second| (first, second)));
+                        (EitherIter::A(iter), count.checked_mul(count).unwrap())
+                    }
+                    _ => {
+                        let start = <$fty>::NEG_INFINITY;
+                        let end = <$fty>::INFINITY;
+                        let (iter0, steps0) = logspace_steps::<Op>(start, end, max_steps0);
+                        let (iter1, steps1) = logspace_steps::<Op>(start, end, max_steps1);
+                        let iter = iter0.flat_map(move |first| {
+                            iter1.clone().map(move |second| (first, second))
+                        });
+                        let count = steps0.checked_mul(steps1).unwrap();
+                        (EitherIter::B(iter), count)
+                    }
+                }
             }
         }
 
@@ -77,22 +136,41 @@ macro_rules! impl_extensive_input {
         where
             Op: MathOp<RustArgs = Self, FTy = $fty>,
         {
-            fn get_cases(ctx: &CheckCtx) -> impl ExactSizeIterator<Item = Self> {
-                let start = <$fty>::NEG_INFINITY;
-                let end = <$fty>::INFINITY;
-
-                let (iter0, steps0) = logspace_steps::<Op>(start, end, ctx, 0);
-                let (iter1, steps1) = logspace_steps::<Op>(start, end, ctx, 1);
-                let (iter2, steps2) = logspace_steps::<Op>(start, end, ctx, 2);
-
-                let iter = iter0
-                    .flat_map(move |first| iter1.clone().map(move |second| (first, second)))
-                    .flat_map(move |(first, second)| {
-                        iter2.clone().map(move |third| (first, second, third))
-                    });
-                let count = steps0.checked_mul(steps1).unwrap().checked_mul(steps2).unwrap();
-
-                KnownSize::new(iter, count)
+            fn get_cases(ctx: &CheckCtx) -> (impl Iterator<Item = Self>, u64) {
+                let max_steps0 = iteration_count(ctx, GeneratorKind::Extensive, 0);
+                let max_steps1 = iteration_count(ctx, GeneratorKind::Extensive, 1);
+                let max_steps2 = iteration_count(ctx, GeneratorKind::Extensive, 2);
+                // `f16` can be exhaustive tested if `LIBM_EXTENSIVE_TESTS` is incresed.
+                match value_count::<Op::FTy>() {
+                    Some(count)
+                        if count <= max_steps0 && count <= max_steps1 && count <= max_steps2 =>
+                    {
+                        let iter = all_values().flat_map(|first| {
+                            all_values().flat_map(move |second| {
+                                all_values().map(move |third| (first, second, third))
+                            })
+                        });
+                        (EitherIter::A(iter), count.checked_pow(3).unwrap())
+                    }
+                    _ => {
+                        let start = <$fty>::NEG_INFINITY;
+                        let end = <$fty>::INFINITY;
+
+                        let (iter0, steps0) = logspace_steps::<Op>(start, end, max_steps0);
+                        let (iter1, steps1) = logspace_steps::<Op>(start, end, max_steps1);
+                        let (iter2, steps2) = logspace_steps::<Op>(start, end, max_steps2);
+
+                        let iter = iter0
+                            .flat_map(move |first| iter1.clone().map(move |second| (first, second)))
+                            .flat_map(move |(first, second)| {
+                                iter2.clone().map(move |third| (first, second, third))
+                            });
+                        let count =
+                            steps0.checked_mul(steps1).unwrap().checked_mul(steps2).unwrap();
+
+                        (EitherIter::B(iter), count)
+                    }
+                }
             }
         }
 
@@ -100,19 +178,32 @@ macro_rules! impl_extensive_input {
         where
             Op: MathOp<RustArgs = Self, FTy = $fty>,
         {
-            fn get_cases(ctx: &CheckCtx) -> impl ExactSizeIterator<Item = Self> {
-                let start = <$fty>::NEG_INFINITY;
-                let end = <$fty>::INFINITY;
+            fn get_cases(ctx: &CheckCtx) -> (impl Iterator<Item = Self>, u64) {
+                let range0 = int_range(ctx, GeneratorKind::Extensive, 0);
+                let max_steps0 = iteration_count(ctx, GeneratorKind::Extensive, 0);
+                let max_steps1 = iteration_count(ctx, GeneratorKind::Extensive, 1);
+                match value_count::<Op::FTy>() {
+                    Some(count1) if count1 <= max_steps1 => {
+                        let (iter0, steps0) = linear_ints(range0, max_steps0);
+                        let iter = iter0
+                            .flat_map(move |first| all_values().map(move |second| (first, second)));
+                        (EitherIter::A(iter), steps0.checked_mul(count1).unwrap())
+                    }
+                    _ => {
+                        let start = <$fty>::NEG_INFINITY;
+                        let end = <$fty>::INFINITY;
 
-                let iter0 = int_range(ctx, GeneratorKind::Extensive, 0);
-                let steps0 = iteration_count(ctx, GeneratorKind::Extensive, 0);
-                let (iter1, steps1) = logspace_steps::<Op>(start, end, ctx, 1);
+                        let (iter0, steps0) = linear_ints(range0, max_steps0);
+                        let (iter1, steps1) = logspace_steps::<Op>(start, end, max_steps1);
 
-                let iter =
-                    iter0.flat_map(move |first| iter1.clone().map(move |second| (first, second)));
-                let count = steps0.checked_mul(steps1).unwrap();
+                        let iter = iter0.flat_map(move |first| {
+                            iter1.clone().map(move |second| (first, second))
+                        });
+                        let count = steps0.checked_mul(steps1).unwrap();
 
-                KnownSize::new(iter, count)
+                        (EitherIter::B(iter), count)
+                    }
+                }
             }
         }
 
@@ -120,19 +211,33 @@ macro_rules! impl_extensive_input {
         where
             Op: MathOp<RustArgs = Self, FTy = $fty>,
         {
-            fn get_cases(ctx: &CheckCtx) -> impl ExactSizeIterator<Item = Self> {
-                let start = <$fty>::NEG_INFINITY;
-                let end = <$fty>::INFINITY;
+            fn get_cases(ctx: &CheckCtx) -> (impl Iterator<Item = Self>, u64) {
+                let max_steps0 = iteration_count(ctx, GeneratorKind::Extensive, 0);
+                let range1 = int_range(ctx, GeneratorKind::Extensive, 1);
+                let max_steps1 = iteration_count(ctx, GeneratorKind::Extensive, 1);
+                match value_count::<Op::FTy>() {
+                    Some(count0) if count0 <= max_steps0 => {
+                        let (iter1, steps1) = linear_ints(range1, max_steps1);
+                        let iter = all_values().flat_map(move |first| {
+                            iter1.clone().map(move |second| (first, second))
+                        });
+                        (EitherIter::A(iter), count0.checked_mul(steps1).unwrap())
+                    }
+                    _ => {
+                        let start = <$fty>::NEG_INFINITY;
+                        let end = <$fty>::INFINITY;
 
-                let (iter0, steps0) = logspace_steps::<Op>(start, end, ctx, 0);
-                let iter1 = int_range(ctx, GeneratorKind::Extensive, 0);
-                let steps1 = iteration_count(ctx, GeneratorKind::Extensive, 0);
+                        let (iter0, steps0) = logspace_steps::<Op>(start, end, max_steps0);
+                        let (iter1, steps1) = linear_ints(range1, max_steps1);
 
-                let iter =
-                    iter0.flat_map(move |first| iter1.clone().map(move |second| (first, second)));
-                let count = steps0.checked_mul(steps1).unwrap();
+                        let iter = iter0.flat_map(move |first| {
+                            iter1.clone().map(move |second| (first, second))
+                        });
+                        let count = steps0.checked_mul(steps1).unwrap();
 
-                KnownSize::new(iter, count)
+                        (EitherIter::B(iter), count)
+                    }
+                }
             }
         }
     };
@@ -145,10 +250,10 @@ impl_extensive_input!(f64);
 #[cfg(f128_enabled)]
 impl_extensive_input!(f128);
 
-/// Create a test case iterator for extensive inputs.
+/// Create a test case iterator for extensive inputs. Also returns the total test case count.
 pub fn get_test_cases<Op>(
     ctx: &CheckCtx,
-) -> impl ExactSizeIterator<Item = Op::RustArgs> + Send + use<'_, Op>
+) -> (impl Iterator<Item = Op::RustArgs> + Send + use<'_, Op>, u64)
 where
     Op: MathOp,
     Op::RustArgs: ExtensiveInput<Op>,
diff --git a/crates/libm-test/src/lib.rs b/crates/libm-test/src/lib.rs
index 251114a0d..6e7017f09 100644
--- a/crates/libm-test/src/lib.rs
+++ b/crates/libm-test/src/lib.rs
@@ -22,7 +22,7 @@ use std::time::SystemTime;
 
 pub use f8_impl::f8;
 pub use libm::support::{Float, Int, IntTy, MinInt};
-pub use num::{FloatExt, logspace};
+pub use num::{FloatExt, linear_ints, logspace};
 pub use op::{
     BaseName, FloatTy, Identifier, MathOp, OpCFn, OpCRet, OpFTy, OpRustFn, OpRustRet, Ty,
 };
diff --git a/crates/libm-test/src/num.rs b/crates/libm-test/src/num.rs
index f693ef02f..eed941423 100644
--- a/crates/libm-test/src/num.rs
+++ b/crates/libm-test/src/num.rs
@@ -215,7 +215,13 @@ fn as_ulp_steps<F: Float>(x: F) -> Option<F::SignedInt> {
 /// to logarithmic spacing of their values.
 ///
 /// Note that this tends to skip negative zero, so that needs to be checked explicitly.
-pub fn logspace<F: FloatExt>(start: F, end: F, steps: F::Int) -> impl Iterator<Item = F> + Clone
+///
+/// Returns `(iterator, iterator_length)`.
+pub fn logspace<F: FloatExt>(
+    start: F,
+    end: F,
+    steps: F::Int,
+) -> (impl Iterator<Item = F> + Clone, F::Int)
 where
     RangeInclusive<F::Int>: Iterator,
 {
@@ -223,17 +229,42 @@ where
     assert!(!end.is_nan());
     assert!(end >= start);
 
-    let mut steps = steps.checked_sub(F::Int::ONE).expect("`steps` must be at least 2");
+    let steps = steps.checked_sub(F::Int::ONE).expect("`steps` must be at least 2");
     let between = ulp_between(start, end).expect("`start` or `end` is NaN");
     let spacing = (between / steps).max(F::Int::ONE);
-    steps = steps.min(between); // At maximum, one step per ULP
+    let steps = steps.min(between); // At maximum, one step per ULP
 
     let mut x = start;
-    (F::Int::ZERO..=steps).map(move |_| {
-        let ret = x;
-        x = x.n_up(spacing);
-        ret
-    })
+    (
+        (F::Int::ZERO..=steps).map(move |_| {
+            let ret = x;
+            x = x.n_up(spacing);
+            ret
+        }),
+        steps + F::Int::ONE,
+    )
+}
+
+/// Returns an iterator of up to `steps` integers evenly distributed.
+pub fn linear_ints(
+    range: RangeInclusive<i32>,
+    steps: u64,
+) -> (impl Iterator<Item = i32> + Clone, u64) {
+    let steps = steps.checked_sub(1).unwrap();
+    let between = u64::from(range.start().abs_diff(*range.end()));
+    let spacing = i32::try_from((between / steps).max(1)).unwrap();
+    let steps = steps.min(between);
+    let mut x: i32 = *range.start();
+    (
+        (0..=steps).map(move |_| {
+            let res = x;
+            // Wrapping add to avoid panic on last item (where `x` could overflow past i32::MAX as
+            // there is no next item).
+            x = x.wrapping_add(spacing);
+            res
+        }),
+        steps + 1,
+    )
 }
 
 #[cfg(test)]
@@ -422,19 +453,55 @@ mod tests {
 
     #[test]
     fn test_logspace() {
-        let ls: Vec<_> = logspace(f8::from_bits(0x0), f8::from_bits(0x4), 2).collect();
+        let (ls, count) = logspace(f8::from_bits(0x0), f8::from_bits(0x4), 2);
+        let ls: Vec<_> = ls.collect();
         let exp = [f8::from_bits(0x0), f8::from_bits(0x4)];
         assert_eq!(ls, exp);
+        assert_eq!(ls.len(), usize::from(count));
 
-        let ls: Vec<_> = logspace(f8::from_bits(0x0), f8::from_bits(0x4), 3).collect();
+        let (ls, count) = logspace(f8::from_bits(0x0), f8::from_bits(0x4), 3);
+        let ls: Vec<_> = ls.collect();
         let exp = [f8::from_bits(0x0), f8::from_bits(0x2), f8::from_bits(0x4)];
         assert_eq!(ls, exp);
+        assert_eq!(ls.len(), usize::from(count));
 
         // Check that we include all values with no repeats if `steps` exceeds the maximum number
         // of steps.
-        let ls: Vec<_> = logspace(f8::from_bits(0x0), f8::from_bits(0x3), 10).collect();
+        let (ls, count) = logspace(f8::from_bits(0x0), f8::from_bits(0x3), 10);
+        let ls: Vec<_> = ls.collect();
         let exp = [f8::from_bits(0x0), f8::from_bits(0x1), f8::from_bits(0x2), f8::from_bits(0x3)];
         assert_eq!(ls, exp);
+        assert_eq!(ls.len(), usize::from(count));
+    }
+
+    #[test]
+    fn test_linear_ints() {
+        let (ints, count) = linear_ints(0..=4, 2);
+        let ints: Vec<_> = ints.collect();
+        let exp = [0, 4];
+        assert_eq!(ints, exp);
+        assert_eq!(ints.len(), usize::try_from(count).unwrap());
+
+        let (ints, count) = linear_ints(0..=4, 3);
+        let ints: Vec<_> = ints.collect();
+        let exp = [0, 2, 4];
+        assert_eq!(ints, exp);
+        assert_eq!(ints.len(), usize::try_from(count).unwrap());
+
+        // Check that we include all values with no repeats if `steps` exceeds the maximum number
+        // of steps.
+        let (ints, count) = linear_ints(0x0..=0x3, 10);
+        let ints: Vec<_> = ints.collect();
+        let exp = [0, 1, 2, 3];
+        assert_eq!(ints, exp);
+        assert_eq!(ints.len(), usize::try_from(count).unwrap());
+
+        // Check that there are no panics around `i32::MAX`.
+        let (ints, count) = linear_ints(i32::MAX - 1..=i32::MAX, 5);
+        let ints: Vec<_> = ints.collect();
+        let exp = [i32::MAX - 1, i32::MAX];
+        assert_eq!(ints, exp);
+        assert_eq!(ints.len(), usize::try_from(count).unwrap());
     }
 
     #[test]
diff --git a/crates/libm-test/tests/z_extensive/run.rs b/crates/libm-test/tests/z_extensive/run.rs
index 7acff5324..07f4d5370 100644
--- a/crates/libm-test/tests/z_extensive/run.rs
+++ b/crates/libm-test/tests/z_extensive/run.rs
@@ -106,8 +106,7 @@ where
 
     let completed = AtomicU64::new(0);
     let ctx = CheckCtx::new(Op::IDENTIFIER, CheckBasis::Mpfr);
-    let cases = &mut extensive::get_test_cases::<Op>(&ctx);
-    let total: u64 = cases.len().try_into().unwrap();
+    let (ref mut cases, total) = extensive::get_test_cases::<Op>(&ctx);
     let pb = Progress::new(Op::NAME, total);
 
     let test_single_chunk = |mp_vals: &mut Op::MpTy, input_vec: Vec<Op::RustArgs>| -> TestResult {

From 8e82616f154b06cf4ee9cdb82a4f56474a403d04 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tgross@intrepidcs.com>
Date: Tue, 7 Jan 2025 17:30:05 -0500
Subject: [PATCH 109/279] Account for optimization levels other than numbers

The build script currently panics with `opt-level=z` or `opt-level=s`.
Account for this here.
---
 configure.rs | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/configure.rs b/configure.rs
index a18937c3c..8b8ba9815 100644
--- a/configure.rs
+++ b/configure.rs
@@ -7,7 +7,7 @@ use std::path::PathBuf;
 pub struct Config {
     pub manifest_dir: PathBuf,
     pub out_dir: PathBuf,
-    pub opt_level: u8,
+    pub opt_level: String,
     pub cargo_features: Vec<String>,
     pub target_arch: String,
     pub target_env: String,
@@ -31,7 +31,7 @@ impl Config {
         Self {
             manifest_dir: PathBuf::from(env::var("CARGO_MANIFEST_DIR").unwrap()),
             out_dir: PathBuf::from(env::var("OUT_DIR").unwrap()),
-            opt_level: env::var("OPT_LEVEL").unwrap().parse().unwrap(),
+            opt_level: env::var("OPT_LEVEL").unwrap(),
             cargo_features,
             target_arch: env::var("CARGO_CFG_TARGET_ARCH").unwrap(),
             target_env: env::var("CARGO_CFG_TARGET_ENV").unwrap(),
@@ -91,7 +91,7 @@ fn emit_arch_cfg() {
 fn emit_optimization_cfg(cfg: &Config) {
     println!("cargo:rustc-check-cfg=cfg(optimizations_enabled)");
 
-    if cfg.opt_level >= 2 {
+    if !matches!(cfg.opt_level.as_str(), "0" | "1") {
         println!("cargo:rustc-cfg=optimizations_enabled");
     }
 }

From 0f6b1bb457a6013152560c39f849dc79cdfdcdc5 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Sun, 5 Jan 2025 11:21:00 +0000
Subject: [PATCH 110/279] Add tests against MPFR for `remquo` and `remquof`

Rug does not yet expose this function, but it is possible to use the
MPFR bindings directly.
---
 crates/libm-test/Cargo.toml               |  2 +-
 crates/libm-test/src/mpfloat.rs           | 50 +++++++++++++++++++++++
 crates/libm-test/tests/multiprecision.rs  |  4 --
 crates/libm-test/tests/z_extensive/run.rs |  4 --
 4 files changed, 51 insertions(+), 9 deletions(-)

diff --git a/crates/libm-test/Cargo.toml b/crates/libm-test/Cargo.toml
index 371beb19a..621e587c5 100644
--- a/crates/libm-test/Cargo.toml
+++ b/crates/libm-test/Cargo.toml
@@ -35,7 +35,7 @@ paste = "1.0.15"
 rand = "0.8.5"
 rand_chacha = "0.3.1"
 rayon = "1.10.0"
-rug = { version = "1.26.1", optional = true, default-features = false, features = ["float", "std"] }
+rug = { version = "1.26.1", optional = true, default-features = false, features = ["float", "integer", "std"] }
 
 [target.'cfg(target_family = "wasm")'.dependencies]
 # Enable randomness on WASM
diff --git a/crates/libm-test/src/mpfloat.rs b/crates/libm-test/src/mpfloat.rs
index f2b7b2f25..f71e72cd5 100644
--- a/crates/libm-test/src/mpfloat.rs
+++ b/crates/libm-test/src/mpfloat.rs
@@ -4,10 +4,13 @@
 //! a struct named `Operation` that implements [`MpOp`].
 
 use std::cmp::Ordering;
+use std::ffi::{c_int, c_long};
 
 use az::Az;
+use gmp_mpfr_sys::mpfr::rnd_t;
 use rug::Assign;
 pub use rug::Float as MpFloat;
+use rug::float::Round;
 use rug::float::Round::Nearest;
 use rug::ops::{PowAssignRound, RemAssignRound};
 
@@ -361,6 +364,32 @@ macro_rules! impl_op_for_ty {
                 }
             }
 
+            impl MpOp for crate::op::[<remquo $suffix>]::Routine {
+                type MpTy = (MpFloat, MpFloat, MpFloat);
+
+                fn new_mp() -> Self::MpTy {
+                    (
+                        new_mpfloat::<Self::FTy>(),
+                        new_mpfloat::<Self::FTy>(),
+                        new_mpfloat::<Self::FTy>()
+                    )
+                }
+
+                fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet {
+                    this.0.assign(input.0);
+                    this.1.assign(input.1);
+                    let (ord, ql) = mpfr_remquo(&mut this.2, &this.0, &this.1, Nearest);
+
+                    // `remquo` integer results are sign-magnitude representation. Transfer the
+                    // sign bit from the long result to the int result.
+                    let clear = !(1 << (c_int::BITS - 1));
+                    let sign = ((ql >> (c_long::BITS - 1)) as i32) << (c_int::BITS - 1);
+                    let q = (ql as i32) & clear | sign;
+
+                    (prep_retval::<Self::FTy>(&mut this.2, ord), q)
+                }
+            }
+
             impl MpOp for crate::op::[<yn $suffix>]::Routine {
                 type MpTy = MpFloat;
 
@@ -441,3 +470,24 @@ impl MpOp for crate::op::lgammaf_r::Routine {
         (ret, sign as i32)
     }
 }
+
+/// `rug` does not provide `remquo` so this exposes `mpfr_remquo`. See rug#76.
+fn mpfr_remquo(r: &mut MpFloat, x: &MpFloat, y: &MpFloat, round: Round) -> (Ordering, c_long) {
+    let r = r.as_raw_mut();
+    let x = x.as_raw();
+    let y = y.as_raw();
+    let mut q: c_long = 0;
+
+    let round = match round {
+        Round::Nearest => rnd_t::RNDN,
+        Round::Zero => rnd_t::RNDZ,
+        Round::Up => rnd_t::RNDU,
+        Round::Down => rnd_t::RNDD,
+        Round::AwayZero => rnd_t::RNDA,
+        _ => unreachable!(),
+    };
+
+    // SAFETY: mutable and const pointers are valid and do not alias, by Rust's rules.
+    let ord = unsafe { gmp_mpfr_sys::mpfr::remquo(r, &mut q, x, y, round) };
+    (ord.cmp(&0), q)
+}
diff --git a/crates/libm-test/tests/multiprecision.rs b/crates/libm-test/tests/multiprecision.rs
index 7961b0802..e2766cfda 100644
--- a/crates/libm-test/tests/multiprecision.rs
+++ b/crates/libm-test/tests/multiprecision.rs
@@ -50,10 +50,6 @@ libm_macros::for_each_function! {
         [jn, jnf, yn, ynf],
     ],
     skip: [
-        // FIXME: MPFR tests needed
-        remquo,
-        remquof,
-
         // FIXME: test needed, see
         // https://github.com/rust-lang/libm/pull/311#discussion_r1818273392
         nextafter,
diff --git a/crates/libm-test/tests/z_extensive/run.rs b/crates/libm-test/tests/z_extensive/run.rs
index 07f4d5370..7ee967851 100644
--- a/crates/libm-test/tests/z_extensive/run.rs
+++ b/crates/libm-test/tests/z_extensive/run.rs
@@ -48,10 +48,6 @@ fn register_all_tests() -> Vec<Trial> {
         callback: mp_extensive_tests,
         extra: [all_tests],
         skip: [
-            // FIXME: MPFR tests needed
-            remquo,
-            remquof,
-
             // FIXME: test needed, see
             // https://github.com/rust-lang/libm/pull/311#discussion_r1818273392
             nextafter,

From edc59589886e8d7b9bc1b6a5fd47e2802d2024f1 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Sat, 11 Jan 2025 21:24:06 +0000
Subject: [PATCH 111/279] Use `core::arch::wasm` functions rather than
 intrinsics

These wasm functions are available in `core::arch::wasm32` since [1], so
we can use them while avoiding the possibly-recursive `intrinsics::*`
calls (in practice none of those should always lower to libcalls on
wasm, but that is up to LLVM).

Since these require an unstable feature, they are still gated under
`unstable-intrinsics`.

[1]: https://github.com/rust-lang/stdarch/pull/1677
---
 src/lib.rs              |  1 +
 src/math/arch/wasm32.rs | 30 ++++++++++--------------------
 2 files changed, 11 insertions(+), 20 deletions(-)

diff --git a/src/lib.rs b/src/lib.rs
index 327e3d6e6..b0e431211 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -2,6 +2,7 @@
 #![no_std]
 #![cfg_attr(intrinsics_enabled, allow(internal_features))]
 #![cfg_attr(intrinsics_enabled, feature(core_intrinsics))]
+#![cfg_attr(all(intrinsics_enabled, target_family = "wasm"), feature(wasm_numeric_instr))]
 #![cfg_attr(f128_enabled, feature(f128))]
 #![cfg_attr(f16_enabled, feature(f16))]
 #![allow(clippy::assign_op_pattern)]
diff --git a/src/math/arch/wasm32.rs b/src/math/arch/wasm32.rs
index 09df8624e..384445f12 100644
--- a/src/math/arch/wasm32.rs
+++ b/src/math/arch/wasm32.rs
@@ -1,16 +1,12 @@
-//! Wasm asm is not stable; just use intrinsics for operations that have asm routine equivalents.
-//!
-//! Note that we need to be absolutely certain that everything here lowers to assembly operations,
-//! otherwise libcalls will be recursive.
+//! Wasm has builtins for simple float operations. Use the unstable `core::arch` intrinsics which
+//! are significantly faster than soft float operations.
 
 pub fn ceil(x: f64) -> f64 {
-    // SAFETY: safe intrinsic with no preconditions
-    unsafe { core::intrinsics::ceilf64(x) }
+    core::arch::wasm32::f64_ceil(x)
 }
 
 pub fn ceilf(x: f32) -> f32 {
-    // SAFETY: safe intrinsic with no preconditions
-    unsafe { core::intrinsics::ceilf32(x) }
+    core::arch::wasm32::f32_ceil(x)
 }
 
 pub fn fabs(x: f64) -> f64 {
@@ -22,31 +18,25 @@ pub fn fabsf(x: f32) -> f32 {
 }
 
 pub fn floor(x: f64) -> f64 {
-    // SAFETY: safe intrinsic with no preconditions
-    unsafe { core::intrinsics::floorf64(x) }
+    core::arch::wasm32::f64_floor(x)
 }
 
 pub fn floorf(x: f32) -> f32 {
-    // SAFETY: safe intrinsic with no preconditions
-    unsafe { core::intrinsics::floorf32(x) }
+    core::arch::wasm32::f32_floor(x)
 }
 
 pub fn sqrt(x: f64) -> f64 {
-    // SAFETY: safe intrinsic with no preconditions
-    unsafe { core::intrinsics::sqrtf64(x) }
+    core::arch::wasm32::f64_sqrt(x)
 }
 
 pub fn sqrtf(x: f32) -> f32 {
-    // SAFETY: safe intrinsic with no preconditions
-    unsafe { core::intrinsics::sqrtf32(x) }
+    core::arch::wasm32::f32_sqrt(x)
 }
 
 pub fn trunc(x: f64) -> f64 {
-    // SAFETY: safe intrinsic with no preconditions
-    unsafe { core::intrinsics::truncf64(x) }
+    core::arch::wasm32::f64_trunc(x)
 }
 
 pub fn truncf(x: f32) -> f32 {
-    // SAFETY: safe intrinsic with no preconditions
-    unsafe { core::intrinsics::truncf32(x) }
+    core::arch::wasm32::f32_trunc(x)
 }

From 5dea7fcb4c56027c83639a51e622c5dffdfe8a92 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Sat, 11 Jan 2025 22:04:32 +0000
Subject: [PATCH 112/279] Split `cast` into `cast` and `cast_lossy`

There is a difference in intent between wishing to cast and truncate the
value, and expecting the input to be within range. To make this clear,
add separate `cast_lossy` and `cast_from_lossy` to indicate what that
truncation is intended, leaving `cast` and `cast_from` to only be casts
that expected not to truncate.

Actually enforcing this at runtime is likely to have a cost, so just
`debug_assert!` that `cast` doesn't truncate.
---
 src/math/support/int_traits.rs | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/src/math/support/int_traits.rs b/src/math/support/int_traits.rs
index 380313c1e..0f2d72d9b 100644
--- a/src/math/support/int_traits.rs
+++ b/src/math/support/int_traits.rs
@@ -343,18 +343,30 @@ impl_h_int!(
 /// Trait to express (possibly lossy) casting of integers
 #[allow(unused)]
 pub trait CastInto<T: Copy>: Copy {
+    /// By default, casts should be exact.
     fn cast(self) -> T;
+
+    /// Call for casts that are expected to truncate.
+    fn cast_lossy(self) -> T;
 }
 
 #[allow(unused)]
 pub trait CastFrom<T: Copy>: Copy {
+    /// By default, casts should be exact.
     fn cast_from(value: T) -> Self;
+
+    /// Call for casts that are expected to truncate.
+    fn cast_from_lossy(value: T) -> Self;
 }
 
 impl<T: Copy, U: CastInto<T> + Copy> CastFrom<U> for T {
     fn cast_from(value: U) -> Self {
         value.cast()
     }
+
+    fn cast_from_lossy(value: U) -> Self {
+        value.cast_lossy()
+    }
 }
 
 macro_rules! cast_into {
@@ -364,6 +376,13 @@ macro_rules! cast_into {
     ($ty:ty; $($into:ty),*) => {$(
         impl CastInto<$into> for $ty {
             fn cast(self) -> $into {
+                // All we can really do to enforce casting rules is check the rules when in
+                // debug mode.
+                debug_assert!(<$into>::try_from(self).is_ok(), "failed cast from {self}");
+                self as $into
+            }
+
+            fn cast_lossy(self) -> $into {
                 self as $into
             }
         }

From 5fb644fd6ef135d8ba0cdc2074332bad8b473c95 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Sat, 11 Jan 2025 23:22:12 +0000
Subject: [PATCH 113/279] Reorder tests in `run.sh`

I do not believe Cargo separately caches crates with different sets of
features enabled. So, ensuring that tests run with `unstable-intrinsics`
are always grouped should slightly reduce runtime.

As an added benefit, all the debug mode tests run first so initial
feedback is available faster.
---
 ci/run.sh | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/ci/run.sh b/ci/run.sh
index 70fc271f1..89c9c8631 100755
--- a/ci/run.sh
+++ b/ci/run.sh
@@ -75,16 +75,14 @@ if [ "${BUILD_ONLY:-}" = "1" ]; then
 else
     cmd="cargo test --all --target $target $extra_flags"
 
-    # Test without intrinsics
+    # Test once without intrinsics, once with intrinsics enabled
     $cmd
-    $cmd --release
-
-    # Test with intrinsic use
     $cmd --features unstable-intrinsics
+    $cmd --features unstable-intrinsics --benches
+    
+    # Test the same in release mode, which also increases coverage.
+    $cmd --release
     $cmd --release --features unstable-intrinsics
-
-    # Make sure benchmarks have correct results
-    $cmd --benches
-    $cmd --benches --release
+    $cmd --release --features unstable-intrinsics --benches
 fi
 

From 8940fbf0e28ba1b2d1fc791b5e72954f36d27ae4 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Sat, 11 Jan 2025 23:40:19 +0000
Subject: [PATCH 114/279] Remove `ExpInt` from `Float`, always use `i32`
 instead

`ExpInt` is likely to only have performance benefits on 16-bit
platforms, but makes working with the exponent more difficult. It seems
like a worthwhile tradeoff to instead just use `i32`, so do that here.
---
 crates/libm-test/src/f8_impl.rs  |  5 -----
 src/math/support/float_traits.rs | 22 ++++++++--------------
 src/math/support/int_traits.rs   |  2 ++
 3 files changed, 10 insertions(+), 19 deletions(-)

diff --git a/crates/libm-test/src/f8_impl.rs b/crates/libm-test/src/f8_impl.rs
index d378863f2..299553d20 100644
--- a/crates/libm-test/src/f8_impl.rs
+++ b/crates/libm-test/src/f8_impl.rs
@@ -20,7 +20,6 @@ pub struct f8(u8);
 impl Float for f8 {
     type Int = u8;
     type SignedInt = i8;
-    type ExpInt = i8;
 
     const ZERO: Self = Self(0b0_0000_000);
     const NEG_ZERO: Self = Self(0b1_0000_000);
@@ -62,10 +61,6 @@ impl Float for f8 {
         self.0 & Self::SIGN_MASK != 0
     }
 
-    fn exp(self) -> Self::ExpInt {
-        unimplemented!()
-    }
-
     fn from_bits(a: Self::Int) -> Self {
         Self(a)
     }
diff --git a/src/math/support/float_traits.rs b/src/math/support/float_traits.rs
index 697050966..f795527db 100644
--- a/src/math/support/float_traits.rs
+++ b/src/math/support/float_traits.rs
@@ -1,6 +1,6 @@
 use core::{fmt, mem, ops};
 
-use super::int_traits::{Int, MinInt};
+use super::int_traits::{CastInto, Int, MinInt};
 
 /// Trait for some basic operations on floats
 #[allow(dead_code)]
@@ -25,9 +25,6 @@ pub trait Float:
     /// A int of the same width as the float
     type SignedInt: Int + MinInt<OtherSign = Self::Int, Unsigned = Self::Int>;
 
-    /// An int capable of containing the exponent bits plus a sign bit. This is signed.
-    type ExpInt: Int;
-
     const ZERO: Self;
     const NEG_ZERO: Self;
     const ONE: Self;
@@ -98,7 +95,9 @@ pub trait Float:
     }
 
     /// Returns the exponent, not adjusting for bias.
-    fn exp(self) -> Self::ExpInt;
+    fn exp(self) -> i32 {
+        ((self.to_bits() & Self::EXP_MASK) >> Self::SIG_BITS).cast()
+    }
 
     /// Returns the significand with no implicit bit (or the "fractional" part)
     fn frac(self) -> Self::Int {
@@ -146,7 +145,6 @@ macro_rules! float_impl {
         $ty:ident,
         $ity:ident,
         $sity:ident,
-        $expty:ident,
         $bits:expr,
         $significand_bits:expr,
         $from_bits:path
@@ -154,7 +152,6 @@ macro_rules! float_impl {
         impl Float for $ty {
             type Int = $ity;
             type SignedInt = $sity;
-            type ExpInt = $expty;
 
             const ZERO: Self = 0.0;
             const NEG_ZERO: Self = -0.0;
@@ -191,9 +188,6 @@ macro_rules! float_impl {
             fn is_sign_negative(self) -> bool {
                 self.is_sign_negative()
             }
-            fn exp(self) -> Self::ExpInt {
-                ((self.to_bits() & Self::EXP_MASK) >> Self::SIG_BITS) as Self::ExpInt
-            }
             fn from_bits(a: Self::Int) -> Self {
                 Self::from_bits(a)
             }
@@ -226,11 +220,11 @@ macro_rules! float_impl {
 }
 
 #[cfg(f16_enabled)]
-float_impl!(f16, u16, i16, i8, 16, 10, f16::from_bits);
-float_impl!(f32, u32, i32, i16, 32, 23, f32_from_bits);
-float_impl!(f64, u64, i64, i16, 64, 52, f64_from_bits);
+float_impl!(f16, u16, i16, 16, 10, f16::from_bits);
+float_impl!(f32, u32, i32, 32, 23, f32_from_bits);
+float_impl!(f64, u64, i64, 64, 52, f64_from_bits);
 #[cfg(f128_enabled)]
-float_impl!(f128, u128, i128, i16, 128, 112, f128::from_bits);
+float_impl!(f128, u128, i128, 128, 112, f128::from_bits);
 
 /* FIXME(msrv): vendor some things that are not const stable at our MSRV */
 
diff --git a/src/math/support/int_traits.rs b/src/math/support/int_traits.rs
index 0f2d72d9b..459f0a58b 100644
--- a/src/math/support/int_traits.rs
+++ b/src/math/support/int_traits.rs
@@ -55,6 +55,8 @@ pub trait Int:
     + ops::BitAnd<Output = Self>
     + cmp::Ord
     + CastInto<usize>
+    + CastInto<i32>
+    + CastFrom<i32>
     + CastFrom<u8>
 {
     fn signed(self) -> OtherSign<Self::Unsigned>;

From d5ebe5d38d5944a385843fadf6af918611383028 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Sat, 11 Jan 2025 22:30:30 +0000
Subject: [PATCH 115/279] Add a `release-checked` profile with debug and
 overflow assertions

A failing debug assertion or overflow without correctly wrapping or
saturating is a bug, but the `debug` profile that has these enabled does
not run enough test cases to hit edge cases that may trigger these. Add
a new `release-checked` profile that enables debug assertions and
overflow checks. This seems to only extend per-function test time by a
few seconds (or around a minute on longer extensive tests), so enable
this as the default on CI.

In order to ensure `no_panic` still gets checked, add a build-only step
to CI.
---
 .github/workflows/main.yml | 3 ++-
 Cargo.toml                 | 7 +++++++
 build.rs                   | 6 +++++-
 ci/run.sh                  | 8 +++++---
 4 files changed, 19 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 320800f2e..98505ea35 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -238,7 +238,8 @@ jobs:
 
           LIBM_EXTENSIVE_TESTS="$CHANGED" cargo t \
             --features test-multiprecision,unstable \
-            --release -- extensive
+            --profile release-checked \
+            -- extensive
       - name: Print test logs if available
         run: if [ -f "target/test-log.txt" ]; then cat target/test-log.txt; fi
         shell: bash
diff --git a/Cargo.toml b/Cargo.toml
index dc362779e..0e444b583 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -61,3 +61,10 @@ no-panic = "0.1.30"
 # This is needed for no-panic to correctly detect the lack of panics
 [profile.release]
 lto = "fat"
+
+# Release mode with debug assertions
+[profile.release-checked]
+inherits = "release"
+debug-assertions = true
+lto = "fat"
+overflow-checks = true
diff --git a/build.rs b/build.rs
index 9c9e0e723..ca4a639a1 100644
--- a/build.rs
+++ b/build.rs
@@ -13,8 +13,12 @@ fn main() {
     #[allow(unexpected_cfgs)]
     if !cfg!(feature = "checked") {
         let lvl = env::var("OPT_LEVEL").unwrap();
-        if lvl != "0" {
+        if lvl != "0" && !cfg!(debug_assertions) {
             println!("cargo:rustc-cfg=assert_no_panic");
+        } else if env::var("ENSURE_NO_PANIC").is_ok() {
+            // Give us a defensive way of ensureing that no-panic is checked  when we
+            // expect it to be.
+            panic!("`assert_no_panic `was not enabled");
         }
     }
 
diff --git a/ci/run.sh b/ci/run.sh
index 89c9c8631..244a22a07 100755
--- a/ci/run.sh
+++ b/ci/run.sh
@@ -81,8 +81,10 @@ else
     $cmd --features unstable-intrinsics --benches
     
     # Test the same in release mode, which also increases coverage.
-    $cmd --release
-    $cmd --release --features unstable-intrinsics
-    $cmd --release --features unstable-intrinsics --benches
+    $cmd --profile release-checked 
+    $cmd --profile release-checked --features unstable-intrinsics
+    $cmd --profile release-checked --features unstable-intrinsics --benches
+
+    ENSURE_NO_PANIC=1 cargo build --target "$target" --release
 fi
 

From 430fe28ef397bf2537e28e9a6764d7e74cb34ce3 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Sun, 12 Jan 2025 03:25:25 +0000
Subject: [PATCH 116/279] Add `biteq` and `exp_unbiased` to `Float`

These are two convenience methods. Additionally, add tests for the trait
methods, and an `assert_biteq!` macro to check and print the output.
---
 src/math/support/float_traits.rs | 125 +++++++++++++++++++++++++++++--
 src/math/support/int_traits.rs   |   8 +-
 src/math/support/macros.rs       |  20 +++++
 3 files changed, 143 insertions(+), 10 deletions(-)

diff --git a/src/math/support/float_traits.rs b/src/math/support/float_traits.rs
index f795527db..3aa0d844a 100644
--- a/src/math/support/float_traits.rs
+++ b/src/math/support/float_traits.rs
@@ -1,6 +1,6 @@
 use core::{fmt, mem, ops};
 
-use super::int_traits::{CastInto, Int, MinInt};
+use super::int_traits::{CastFrom, CastInto, Int, MinInt};
 
 /// Trait for some basic operations on floats
 #[allow(dead_code)]
@@ -73,11 +73,18 @@ pub trait Float:
         self.to_bits().signed()
     }
 
+    /// Check bitwise equality.
+    fn biteq(self, rhs: Self) -> bool {
+        self.to_bits() == rhs.to_bits()
+    }
+
     /// Checks if two floats have the same bit representation. *Except* for NaNs! NaN can be
-    /// represented in multiple different ways. This method returns `true` if two NaNs are
-    /// compared.
+    /// represented in multiple different ways.
+    ///
+    /// This method returns `true` if two NaNs are compared. Use [`biteq`](Self::biteq) instead
+    /// if `NaN` should not be treated separately.
     fn eq_repr(self, rhs: Self) -> bool {
-        if self.is_nan() && rhs.is_nan() { true } else { self.to_bits() == rhs.to_bits() }
+        if self.is_nan() && rhs.is_nan() { true } else { self.biteq(rhs) }
     }
 
     /// Returns true if the value is NaN.
@@ -94,17 +101,22 @@ pub trait Float:
         (self.to_bits() & Self::EXP_MASK) == Self::Int::ZERO
     }
 
-    /// Returns the exponent, not adjusting for bias.
+    /// Returns the exponent, not adjusting for bias, not accounting for subnormals or zero.
     fn exp(self) -> i32 {
         ((self.to_bits() & Self::EXP_MASK) >> Self::SIG_BITS).cast()
     }
 
+    /// Extract the exponent and adjust it for bias, not accounting for subnormals or zero.
+    fn exp_unbiased(self) -> i32 {
+        self.exp() - (Self::EXP_BIAS as i32)
+    }
+
     /// Returns the significand with no implicit bit (or the "fractional" part)
     fn frac(self) -> Self::Int {
         self.to_bits() & Self::SIG_MASK
     }
 
-    /// Returns the significand with implicit bit
+    /// Returns the significand with implicit bit.
     fn imp_frac(self) -> Self::Int {
         self.frac() | Self::IMPLICIT_BIT
     }
@@ -113,11 +125,11 @@ pub trait Float:
     fn from_bits(a: Self::Int) -> Self;
 
     /// Constructs a `Self` from its parts. Inputs are treated as bits and shifted into position.
-    fn from_parts(negative: bool, exponent: Self::Int, significand: Self::Int) -> Self {
+    fn from_parts(negative: bool, exponent: i32, significand: Self::Int) -> Self {
         let sign = if negative { Self::Int::ONE } else { Self::Int::ZERO };
         Self::from_bits(
             (sign << (Self::BITS - 1))
-                | ((exponent << Self::SIG_BITS) & Self::EXP_MASK)
+                | (Self::Int::cast_from(exponent as u32 & Self::EXP_MAX) << Self::SIG_BITS)
                 | (significand & Self::SIG_MASK),
         )
     }
@@ -239,3 +251,100 @@ pub const fn f64_from_bits(bits: u64) -> f64 {
     // SAFETY: POD cast with no preconditions
     unsafe { mem::transmute::<u64, f64>(bits) }
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    #[cfg(f16_enabled)]
+    fn check_f16() {
+        // Constants
+        assert_eq!(f16::EXP_MAX, 0b11111);
+        assert_eq!(f16::EXP_BIAS, 15);
+
+        // `exp_unbiased`
+        assert_eq!(f16::FRAC_PI_2.exp_unbiased(), 0);
+        assert_eq!((1.0f16 / 2.0).exp_unbiased(), -1);
+        assert_eq!(f16::MAX.exp_unbiased(), 15);
+        assert_eq!(f16::MIN.exp_unbiased(), 15);
+        assert_eq!(f16::MIN_POSITIVE.exp_unbiased(), -14);
+        // This is a convenience method and not ldexp, `exp_unbiased` does not return correct
+        // results for zero and subnormals.
+        assert_eq!(f16::ZERO.exp_unbiased(), -15);
+        assert_eq!(f16::from_bits(0x1).exp_unbiased(), -15);
+
+        // `from_parts`
+        assert_biteq!(f16::from_parts(true, f16::EXP_BIAS as i32, 0), -1.0f16);
+        assert_biteq!(f16::from_parts(false, 0, 1), f16::from_bits(0x1));
+    }
+
+    #[test]
+    fn check_f32() {
+        // Constants
+        assert_eq!(f32::EXP_MAX, 0b11111111);
+        assert_eq!(f32::EXP_BIAS, 127);
+
+        // `exp_unbiased`
+        assert_eq!(f32::FRAC_PI_2.exp_unbiased(), 0);
+        assert_eq!((1.0f32 / 2.0).exp_unbiased(), -1);
+        assert_eq!(f32::MAX.exp_unbiased(), 127);
+        assert_eq!(f32::MIN.exp_unbiased(), 127);
+        assert_eq!(f32::MIN_POSITIVE.exp_unbiased(), -126);
+        // This is a convenience method and not ldexp, `exp_unbiased` does not return correct
+        // results for zero and subnormals.
+        assert_eq!(f32::ZERO.exp_unbiased(), -127);
+        assert_eq!(f32::from_bits(0x1).exp_unbiased(), -127);
+
+        // `from_parts`
+        assert_biteq!(f32::from_parts(true, f32::EXP_BIAS as i32, 0), -1.0f32);
+        assert_biteq!(f32::from_parts(false, 10 + f32::EXP_BIAS as i32, 0), hf32!("0x1p10"));
+        assert_biteq!(f32::from_parts(false, 0, 1), f32::from_bits(0x1));
+    }
+
+    #[test]
+    fn check_f64() {
+        // Constants
+        assert_eq!(f64::EXP_MAX, 0b11111111111);
+        assert_eq!(f64::EXP_BIAS, 1023);
+
+        // `exp_unbiased`
+        assert_eq!(f64::FRAC_PI_2.exp_unbiased(), 0);
+        assert_eq!((1.0f64 / 2.0).exp_unbiased(), -1);
+        assert_eq!(f64::MAX.exp_unbiased(), 1023);
+        assert_eq!(f64::MIN.exp_unbiased(), 1023);
+        assert_eq!(f64::MIN_POSITIVE.exp_unbiased(), -1022);
+        // This is a convenience method and not ldexp, `exp_unbiased` does not return correct
+        // results for zero and subnormals.
+        assert_eq!(f64::ZERO.exp_unbiased(), -1023);
+        assert_eq!(f64::from_bits(0x1).exp_unbiased(), -1023);
+
+        // `from_parts`
+        assert_biteq!(f64::from_parts(true, f64::EXP_BIAS as i32, 0), -1.0f64);
+        assert_biteq!(f64::from_parts(false, 10 + f64::EXP_BIAS as i32, 0), hf64!("0x1p10"));
+        assert_biteq!(f64::from_parts(false, 0, 1), f64::from_bits(0x1));
+    }
+
+    #[test]
+    #[cfg(f128_enabled)]
+    fn check_f128() {
+        // Constants
+        assert_eq!(f128::EXP_MAX, 0b111111111111111);
+        assert_eq!(f128::EXP_BIAS, 16383);
+
+        // `exp_unbiased`
+        assert_eq!(f128::FRAC_PI_2.exp_unbiased(), 0);
+        assert_eq!((1.0f128 / 2.0).exp_unbiased(), -1);
+        assert_eq!(f128::MAX.exp_unbiased(), 16383);
+        assert_eq!(f128::MIN.exp_unbiased(), 16383);
+        assert_eq!(f128::MIN_POSITIVE.exp_unbiased(), -16382);
+        // This is a convenience method and not ldexp, `exp_unbiased` does not return correct
+        // results for zero and subnormals.
+        assert_eq!(f128::ZERO.exp_unbiased(), -16383);
+        assert_eq!(f128::from_bits(0x1).exp_unbiased(), -16383);
+
+        // `from_parts`
+        assert_biteq!(f128::from_parts(true, f128::EXP_BIAS as i32, 0), -1.0f128);
+        assert_biteq!(f128::from_parts(false, 0, 1), f128::from_bits(0x1));
+    }
+}
diff --git a/src/math/support/int_traits.rs b/src/math/support/int_traits.rs
index 459f0a58b..db799c030 100644
--- a/src/math/support/int_traits.rs
+++ b/src/math/support/int_traits.rs
@@ -54,10 +54,14 @@ pub trait Int:
     + ops::BitXor<Output = Self>
     + ops::BitAnd<Output = Self>
     + cmp::Ord
-    + CastInto<usize>
-    + CastInto<i32>
     + CastFrom<i32>
+    + CastFrom<u32>
     + CastFrom<u8>
+    + CastFrom<usize>
+    + CastInto<i32>
+    + CastInto<u32>
+    + CastInto<u8>
+    + CastInto<usize>
 {
     fn signed(self) -> OtherSign<Self::Unsigned>;
     fn unsigned(self) -> Self::Unsigned;
diff --git a/src/math/support/macros.rs b/src/math/support/macros.rs
index f5094b9da..076fdf1f7 100644
--- a/src/math/support/macros.rs
+++ b/src/math/support/macros.rs
@@ -106,3 +106,23 @@ macro_rules! hf64 {
         X
     }};
 }
+
+/// Assert `F::biteq` with better messages.
+#[cfg(test)]
+macro_rules! assert_biteq {
+    ($left:expr, $right:expr, $($arg:tt)*) => {{
+        let bits = ($left.to_bits() * 0).leading_zeros(); // hack to get the width from the value
+        assert!(
+            $left.biteq($right),
+            "\nl: {l:?} ({lb:#0width$x})\nr: {r:?} ({rb:#0width$x})",
+            l = $left,
+            lb = $left.to_bits(),
+            r = $right,
+            rb = $right.to_bits(),
+            width = ((bits / 4) + 2) as usize
+        );
+    }};
+    ($left:expr, $right:expr $(,)?) => {
+        assert_biteq!($left, $right,)
+    };
+}

From 233700c98410ff3252595b7f1256ec27f0e061be Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Sun, 12 Jan 2025 03:55:46 +0000
Subject: [PATCH 117/279] Always use the same seed for benchmarking

It would be preferable to switch to a different generator, or at least
set the seed within the benchmark, but this is the most straightforward
way to make things simple.
---
 .github/workflows/main.yml | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 98505ea35..b14ab40ec 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -156,7 +156,11 @@ jobs:
     - uses: Swatinem/rust-cache@v2
     - name: Download musl source
       run: ./ci/download-musl.sh
-    - run: cargo bench --all --features libm-test/short-benchmarks,libm-test/build-musl
+    - run: |
+        # Always use the same seed for benchmarks. Ideally we should switch to a
+        # non-random generator.
+        export LIBM_SEED=benchesbenchesbenchesbencheswoo!
+        cargo bench --all --features libm-test/short-benchmarks,libm-test/build-musl
 
   msrv:
     name: Check MSRV

From 5eda282fa02ffda03fbda50b0f406c022230f3f5 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Sun, 12 Jan 2025 07:43:24 +0000
Subject: [PATCH 118/279] Expose C versions of `libm` functions in the `cb`
 crate

`compiler_builtins` exposes an `extern "C"` version of `libm` routines,
so add the same here. There really isn't much to test here (unless we
later add tests against C `libm` suites), but one nice benefit is this
gives us a library with unmangled names that is easy to `objdump`. In
accordance with that, also update `cb` to be a `staticlib`.

Unfortunately this also means we have to remove it from the workspace,
since Cargo doesn't allow setting `panic = "abort"` for a single crate.
---
 .github/workflows/main.yml                    |   2 +-
 .gitignore                                    |   3 +-
 Cargo.toml                                    |   5 +-
 .../compiler-builtins-smoke-test/Cargo.toml   |  18 +-
 crates/compiler-builtins-smoke-test/build.rs  |   7 +
 .../compiler-builtins-smoke-test/src/lib.rs   | 171 ++++++++++++++++++
 6 files changed, 200 insertions(+), 6 deletions(-)
 create mode 100644 crates/compiler-builtins-smoke-test/build.rs

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index b14ab40ec..7e371d613 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -144,7 +144,7 @@ jobs:
     - name: Install Rust
       run: rustup update nightly --no-self-update && rustup default nightly
     - uses: Swatinem/rust-cache@v2
-    - run: cargo build -p cb
+    - run: cargo test --manifest-path crates/compiler-builtins-smoke-test/Cargo.toml
 
   benchmarks:
     name: Benchmarks
diff --git a/.gitignore b/.gitignore
index b6a532751..4e9c9c03d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,8 +2,7 @@
 .#*
 /bin
 /math/src
-/math/target
-/target
+target
 Cargo.lock
 musl/
 **.tar.gz
diff --git a/Cargo.toml b/Cargo.toml
index 0e444b583..fc881b77e 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -43,7 +43,6 @@ force-soft-floats = []
 [workspace]
 resolver = "2"
 members = [
-  "crates/compiler-builtins-smoke-test",
   "crates/libm-macros",
   "crates/libm-test",
   "crates/musl-math-sys",
@@ -53,6 +52,10 @@ default-members = [
   "crates/libm-macros",
   "crates/libm-test",
 ]
+exclude = [
+  # Requires `panic = abort` so can't be a member of the workspace
+  "crates/compiler-builtins-smoke-test",
+]
 
 [dev-dependencies]
 no-panic = "0.1.30"
diff --git a/crates/compiler-builtins-smoke-test/Cargo.toml b/crates/compiler-builtins-smoke-test/Cargo.toml
index 1f09ce99c..d578b0dcd 100644
--- a/crates/compiler-builtins-smoke-test/Cargo.toml
+++ b/crates/compiler-builtins-smoke-test/Cargo.toml
@@ -6,15 +6,21 @@ edition = "2021"
 publish = false
 
 [lib]
+crate-type = ["staticlib"]
 test = false
 bench = false
 
+[features]
+default = ["arch", "unstable-float"]
+
+# Copied from `libm`'s root `Cargo.toml`'
+unstable-float = []
+arch = []
+
 [lints.rust]
 unexpected_cfgs = { level = "warn", check-cfg = [
   "cfg(arch_enabled)",
   "cfg(assert_no_panic)",
-  "cfg(f128_enabled)",
-  "cfg(f16_enabled)",
   "cfg(intrinsics_enabled)",
   'cfg(feature, values("checked"))',
   'cfg(feature, values("force-soft-floats"))',
@@ -22,3 +28,11 @@ unexpected_cfgs = { level = "warn", check-cfg = [
   'cfg(feature, values("unstable-intrinsics"))',
   'cfg(feature, values("unstable-public-internals"))',
 ] }
+
+[profile.dev]
+panic = "abort"
+
+[profile.release]
+panic = "abort"
+codegen-units = 1
+lto = "fat"
diff --git a/crates/compiler-builtins-smoke-test/build.rs b/crates/compiler-builtins-smoke-test/build.rs
new file mode 100644
index 000000000..401b7e1eb
--- /dev/null
+++ b/crates/compiler-builtins-smoke-test/build.rs
@@ -0,0 +1,7 @@
+#[path = "../../configure.rs"]
+mod configure;
+
+fn main() {
+    let cfg = configure::Config::from_env();
+    configure::emit_libm_config(&cfg);
+}
diff --git a/crates/compiler-builtins-smoke-test/src/lib.rs b/crates/compiler-builtins-smoke-test/src/lib.rs
index e3a51a575..95ecb840d 100644
--- a/crates/compiler-builtins-smoke-test/src/lib.rs
+++ b/crates/compiler-builtins-smoke-test/src/lib.rs
@@ -1,8 +1,12 @@
 //! Fake compiler-builtins crate
 //!
 //! This is used to test that we can source import `libm` into the compiler-builtins crate.
+//! Additionally, it provides a `#[no_mangle]` C API that can be easier to inspect than the
+//! default `.rlib`.
 
 #![feature(core_intrinsics)]
+#![feature(f16)]
+#![feature(f128)]
 #![allow(internal_features)]
 #![no_std]
 
@@ -10,3 +14,170 @@
 #[allow(clippy::all)] // We don't get `libm`'s list of `allow`s, so just ignore Clippy.
 #[path = "../../../src/math/mod.rs"]
 pub mod libm;
+
+use core::ffi::c_int;
+
+/// Mark functions `#[no_mangle]` and with the C ABI.
+macro_rules! no_mangle {
+    ($( $name:ident( $($tt:tt)+ ) -> $ret:ty; )+) => {
+        $( no_mangle!(@inner $name( $($tt)+ ) -> $ret); )+
+    };
+
+    // Handle simple functions with single return types
+    (@inner $name:ident( $($arg:ident: $aty:ty),+ ) -> $ret:ty) => {
+        #[no_mangle]
+        extern "C" fn $name($($arg: $aty),+) -> $ret {
+            libm::$name($($arg),+)
+        }
+    };
+
+
+    // Functions with `&mut` return values need to be handled differently, use `|` to
+    // separate inputs vs. outputs.
+    (
+        @inner $name:ident( $($arg:ident: $aty:ty),+ | $($rarg:ident: $rty:ty),+) -> $ret:ty
+    ) => {
+        #[no_mangle]
+        extern "C" fn $name($($arg: $aty,)+ $($rarg: $rty),+) -> $ret {
+            let ret;
+            (ret, $(*$rarg),+) = libm::$name($($arg),+);
+            ret
+        }
+    };
+}
+
+no_mangle! {
+    frexp(x: f64 | y: &mut c_int) -> f64;
+    frexpf(x: f32 | y: &mut c_int) -> f32;
+    acos(x: f64) -> f64;
+    acosf(x: f32) -> f32;
+    acosh(x: f64) -> f64;
+    acoshf(x: f32) -> f32;
+    asin(x: f64) -> f64;
+    asinf(x: f32) -> f32;
+    asinh(x: f64) -> f64;
+    asinhf(x: f32) -> f32;
+    atan(x: f64) -> f64;
+    atan2(x: f64, y: f64) -> f64;
+    atan2f(x: f32, y: f32) -> f32;
+    atanf(x: f32) -> f32;
+    atanh(x: f64) -> f64;
+    atanhf(x: f32) -> f32;
+    cbrt(x: f64) -> f64;
+    cbrtf(x: f32) -> f32;
+    ceil(x: f64) -> f64;
+    ceilf(x: f32) -> f32;
+    copysign(x: f64, y: f64) -> f64;
+    copysignf(x: f32, y: f32) -> f32;
+    copysignf128(x: f128, y: f128) -> f128;
+    copysignf16(x: f16, y: f16) -> f16;
+    cos(x: f64) -> f64;
+    cosf(x: f32) -> f32;
+    cosh(x: f64) -> f64;
+    coshf(x: f32) -> f32;
+    erf(x: f64) -> f64;
+    erfc(x: f64) -> f64;
+    erfcf(x: f32) -> f32;
+    erff(x: f32) -> f32;
+    exp(x: f64) -> f64;
+    exp10(x: f64) -> f64;
+    exp10f(x: f32) -> f32;
+    exp2(x: f64) -> f64;
+    exp2f(x: f32) -> f32;
+    expf(x: f32) -> f32;
+    expm1(x: f64) -> f64;
+    expm1f(x: f32) -> f32;
+    fabs(x: f64) -> f64;
+    fabsf(x: f32) -> f32;
+    fabsf128(x: f128) -> f128;
+    fabsf16(x: f16) -> f16;
+    fdim(x: f64, y: f64) -> f64;
+    fdimf(x: f32, y: f32) -> f32;
+    floor(x: f64) -> f64;
+    floorf(x: f32) -> f32;
+    fma(x: f64, y: f64, z: f64) -> f64;
+    fmaf(x: f32, y: f32, z: f32) -> f32;
+    fmax(x: f64, y: f64) -> f64;
+    fmaxf(x: f32, y: f32) -> f32;
+    fmin(x: f64, y: f64) -> f64;
+    fminf(x: f32, y: f32) -> f32;
+    fmod(x: f64, y: f64) -> f64;
+    fmodf(x: f32, y: f32) -> f32;
+    hypot(x: f64, y: f64) -> f64;
+    hypotf(x: f32, y: f32) -> f32;
+    ilogb(x: f64) -> c_int;
+    ilogbf(x: f32) -> c_int;
+    j0(x: f64) -> f64;
+    j0f(x: f32) -> f32;
+    j1(x: f64) -> f64;
+    j1f(x: f32) -> f32;
+    jn(x: c_int, y: f64) -> f64;
+    jnf(x: c_int, y: f32) -> f32;
+    ldexp(x: f64, y: c_int) -> f64;
+    ldexpf(x: f32, y: c_int) -> f32;
+    lgamma(x: f64) -> f64;
+    lgamma_r(x: f64 | r: &mut c_int) -> f64;
+    lgammaf(x: f32) -> f32;
+    lgammaf_r(x: f32 | r: &mut c_int) -> f32;
+    log(x: f64) -> f64;
+    log10(x: f64) -> f64;
+    log10f(x: f32) -> f32;
+    log1p(x: f64) -> f64;
+    log1pf(x: f32) -> f32;
+    log2(x: f64) -> f64;
+    log2f(x: f32) -> f32;
+    logf(x: f32) -> f32;
+    modf(x: f64 | r: &mut f64) -> f64;
+    modff(x: f32 | r: &mut f32) -> f32;
+    nextafter(x: f64, y: f64) -> f64;
+    nextafterf(x: f32, y: f32) -> f32;
+    pow(x: f64, y: f64) -> f64;
+    powf(x: f32, y: f32) -> f32;
+    remainder(x: f64, y: f64) -> f64;
+    remainderf(x: f32, y: f32) -> f32;
+    remquo(x: f64, y: f64 | q: &mut c_int) -> f64;
+    remquof(x: f32, y: f32 | q: &mut c_int) -> f32;
+    rint(x: f64) -> f64;
+    rintf(x: f32) -> f32;
+    round(x: f64) -> f64;
+    roundf(x: f32) -> f32;
+    scalbn(x: f64, y: c_int) -> f64;
+    scalbnf(x: f32, y: c_int) -> f32;
+    sin(x: f64) -> f64;
+    sinf(x: f32) -> f32;
+    sinh(x: f64) -> f64;
+    sinhf(x: f32) -> f32;
+    sqrt(x: f64) -> f64;
+    sqrtf(x: f32) -> f32;
+    tan(x: f64) -> f64;
+    tanf(x: f32) -> f32;
+    tanh(x: f64) -> f64;
+    tanhf(x: f32) -> f32;
+    tgamma(x: f64) -> f64;
+    tgammaf(x: f32) -> f32;
+    trunc(x: f64) -> f64;
+    truncf(x: f32) -> f32;
+    y0(x: f64) -> f64;
+    y0f(x: f32) -> f32;
+    y1(x: f64) -> f64;
+    y1f(x: f32) -> f32;
+    yn(x: c_int, y: f64) -> f64;
+    ynf(x: c_int, y: f32) -> f32;
+}
+
+/* sincos has no direct return type, not worth handling in the macro */
+
+#[no_mangle]
+extern "C" fn sincos(x: f64, s: &mut f64, c: &mut f64) {
+    (*s, *c) = libm::sincos(x);
+}
+
+#[no_mangle]
+extern "C" fn sincosf(x: f32, s: &mut f32, c: &mut f32) {
+    (*s, *c) = libm::sincosf(x);
+}
+
+#[panic_handler]
+fn panic(_info: &core::panic::PanicInfo) -> ! {
+    loop {}
+}

From d56c3aa4dbfb59eea0d993b48a34ec98a2107d55 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Sun, 12 Jan 2025 04:24:03 +0000
Subject: [PATCH 119/279] Add a new precision adjustment for i586 `rint`
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

`rint` had a couple recent failures from the random tests:

    ---- mp_random_rint stdout ----
    Random Mpfr rint arg 1/1: 10000 iterations (10000 total) using `LIBM_SEED=Fl1f69DaJnwkHN2FeuCXaBFRvJYsPvEY`

    thread 'mp_random_rint' panicked at crates/libm-test/tests/multiprecision.rs:41:49:
    called `Result::unwrap()` on an `Err` value:
        input:    (-849751480.5001163,) (0xc1c95316dc4003d0,)
        expected: -849751481.0           0xc1c95316dc800000
        actual:   -849751480.0           0xc1c95316dc000000

    Caused by:
        ulp 8388608 > 100000

And:

    ---- mp_random_rint stdout ----
    Random Mpfr rint arg 1/1: 10000 iterations (10000 total) using `LIBM_SEED=XN7VCGhX3Wu6Mzn8COvJPITyZlGP7gN7`

    thread 'mp_random_rint' panicked at crates/libm-test/tests/multiprecision.rs:41:49:
    called `Result::unwrap()` on an `Err` value:
        input:    (-12493089.499809155,) (0xc167d4242ffe6fc5,)
        expected: -12493089.0            0xc167d42420000000
        actual:   -12493090.0            0xc167d42440000000

    Caused by:
        ulp 536870912 > 100000

It seems we just implement an incorrect rounding mode. Replace the
existing `rint` override with an xfail if the difference is 0.0 <= ε <=
1.0.
---
 crates/libm-test/src/precision.rs | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/crates/libm-test/src/precision.rs b/crates/libm-test/src/precision.rs
index cbe4bdf88..75b99c652 100644
--- a/crates/libm-test/src/precision.rs
+++ b/crates/libm-test/src/precision.rs
@@ -114,7 +114,6 @@ pub fn default_ulp(ctx: &CheckCtx) -> u32 {
             Id::Exp10 | Id::Exp10f => ulp = 1_000_000,
             Id::Exp2 | Id::Exp2f => ulp = 10_000_000,
             Id::Log1p | Id::Log1pf => ulp = 2,
-            Id::Rint => ulp = 100_000,
             Id::Round => ulp = 1,
             Id::Tan => ulp = 2,
             _ => (),
@@ -261,6 +260,15 @@ impl MaybeOverride<(f64,)> for SpecialCase {
             }
         }
 
+        if cfg!(x86_no_sse)
+            && ctx.base_name == BaseName::Rint
+            && (expected - actual).abs() <= F::ONE
+            && (expected - actual).abs() > F::ZERO
+        {
+            // Our rounding mode is incorrect.
+            return XFAIL;
+        }
+
         if ctx.base_name == BaseName::Acosh && input.0 < 1.0 {
             // The function is undefined for the inputs, musl and our libm both return
             // random results.

From 67122044027d528781bd1111f3cc9262c7866731 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Sun, 12 Jan 2025 08:02:44 +0000
Subject: [PATCH 120/279] Add a new precision adjustment for i586 `exp2f`

There was a recent failure from the random tests:

     ---- mp_random_exp2f stdout ----
    Random Mpfr exp2f arg 1/1: 10000 iterations (10000 total) using `LIBM_SEED=fqgMuzs6eqH1VZSEmQpLnThnaIyRUOWe`

    thread 'mp_random_exp2f' panicked at crates/libm-test/tests/multiprecision.rs:41:49:
    called `Result::unwrap()` on an `Err` value:
        input:    (127.97238,) (0x42fff1dc,)
        expected: 3.3383009e38           0x7f7b2556
        actual:   inf                    0x7f800000

    Caused by:
        mismatched infinities

Add an xfail for mismatched infinities on i586.
---
 crates/libm-test/src/precision.rs | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/crates/libm-test/src/precision.rs b/crates/libm-test/src/precision.rs
index 75b99c652..15913fe6d 100644
--- a/crates/libm-test/src/precision.rs
+++ b/crates/libm-test/src/precision.rs
@@ -211,6 +211,15 @@ impl MaybeOverride<(f32,)> for SpecialCase {
             return XFAIL;
         }
 
+        if cfg!(x86_no_sse)
+            && ctx.base_name == BaseName::Exp2
+            && !expected.is_infinite()
+            && actual.is_infinite()
+        {
+            // We return infinity when there is a representable value. Test input: 127.97238
+            return XFAIL;
+        }
+
         maybe_check_nan_bits(actual, expected, ctx)
     }
 

From def4a0ec9d11d0e4d7dab58bb6efd63788362e4d Mon Sep 17 00:00:00 2001
From: Hanna Kruppe <hanna.kruppe@gmail.com>
Date: Sun, 12 Jan 2025 10:56:30 +0100
Subject: [PATCH 121/279] Use wasm32 arch intrinsics for rint{,f}

---
 etc/function-definitions.json | 2 ++
 src/math/arch/mod.rs          | 4 +++-
 src/math/arch/wasm32.rs       | 8 ++++++++
 src/math/rint.rs              | 6 ++++++
 src/math/rintf.rs             | 6 ++++++
 5 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/etc/function-definitions.json b/etc/function-definitions.json
index 3cf7e0fed..f60a7e567 100644
--- a/etc/function-definitions.json
+++ b/etc/function-definitions.json
@@ -604,12 +604,14 @@
     "rint": {
         "sources": [
             "src/libm_helper.rs",
+            "src/math/arch/wasm32.rs",
             "src/math/rint.rs"
         ],
         "type": "f64"
     },
     "rintf": {
         "sources": [
+            "src/math/arch/wasm32.rs",
             "src/math/rintf.rs"
         ],
         "type": "f32"
diff --git a/src/math/arch/mod.rs b/src/math/arch/mod.rs
index bd79ae1c6..3992419cb 100644
--- a/src/math/arch/mod.rs
+++ b/src/math/arch/mod.rs
@@ -11,7 +11,9 @@
 cfg_if! {
     if #[cfg(all(target_arch = "wasm32", intrinsics_enabled))] {
         mod wasm32;
-        pub use wasm32::{ceil, ceilf, fabs, fabsf, floor, floorf, sqrt, sqrtf, trunc, truncf};
+        pub use wasm32::{
+            ceil, ceilf, fabs, fabsf, floor, floorf, rint, rintf, sqrt, sqrtf, trunc, truncf,
+        };
     } else if #[cfg(target_feature = "sse2")] {
         mod i686;
         pub use i686::{sqrt, sqrtf};
diff --git a/src/math/arch/wasm32.rs b/src/math/arch/wasm32.rs
index 384445f12..de80c8a58 100644
--- a/src/math/arch/wasm32.rs
+++ b/src/math/arch/wasm32.rs
@@ -25,6 +25,14 @@ pub fn floorf(x: f32) -> f32 {
     core::arch::wasm32::f32_floor(x)
 }
 
+pub fn rint(x: f64) -> f64 {
+    core::arch::wasm32::f64_nearest(x)
+}
+
+pub fn rintf(x: f32) -> f32 {
+    core::arch::wasm32::f32_nearest(x)
+}
+
 pub fn sqrt(x: f64) -> f64 {
     core::arch::wasm32::f64_sqrt(x)
 }
diff --git a/src/math/rint.rs b/src/math/rint.rs
index cbdc3c2b9..50192ffdf 100644
--- a/src/math/rint.rs
+++ b/src/math/rint.rs
@@ -1,5 +1,11 @@
 #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
 pub fn rint(x: f64) -> f64 {
+    select_implementation! {
+        name: rint,
+        use_arch: all(target_arch = "wasm32", intrinsics_enabled),
+        args: x,
+    }
+
     let one_over_e = 1.0 / f64::EPSILON;
     let as_u64: u64 = x.to_bits();
     let exponent: u64 = (as_u64 >> 52) & 0x7ff;
diff --git a/src/math/rintf.rs b/src/math/rintf.rs
index 2d22c9393..64968b6be 100644
--- a/src/math/rintf.rs
+++ b/src/math/rintf.rs
@@ -1,5 +1,11 @@
 #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
 pub fn rintf(x: f32) -> f32 {
+    select_implementation! {
+        name: rintf,
+        use_arch: all(target_arch = "wasm32", intrinsics_enabled),
+        args: x,
+    }
+
     let one_over_e = 1.0 / f32::EPSILON;
     let as_u32: u32 = x.to_bits();
     let exponent: u32 = (as_u32 >> 23) & 0xff;

From 45438834507788dfc3ef0bd21187711baa1d79f2 Mon Sep 17 00:00:00 2001
From: Hanna Kruppe <hanna.kruppe@gmail.com>
Date: Sun, 12 Jan 2025 11:16:40 +0100
Subject: [PATCH 122/279] Introduce arch::aarch64 and use it for rint{,f}

---
 etc/function-definitions.json |  2 ++
 src/math/arch/aarch64.rs      | 33 +++++++++++++++++++++++++++++++++
 src/math/arch/mod.rs          |  7 +++++++
 src/math/rint.rs              |  5 ++++-
 src/math/rintf.rs             |  5 ++++-
 5 files changed, 50 insertions(+), 2 deletions(-)
 create mode 100644 src/math/arch/aarch64.rs

diff --git a/etc/function-definitions.json b/etc/function-definitions.json
index f60a7e567..39b6c9702 100644
--- a/etc/function-definitions.json
+++ b/etc/function-definitions.json
@@ -604,6 +604,7 @@
     "rint": {
         "sources": [
             "src/libm_helper.rs",
+            "src/math/arch/aarch64.rs",
             "src/math/arch/wasm32.rs",
             "src/math/rint.rs"
         ],
@@ -611,6 +612,7 @@
     },
     "rintf": {
         "sources": [
+            "src/math/arch/aarch64.rs",
             "src/math/arch/wasm32.rs",
             "src/math/rintf.rs"
         ],
diff --git a/src/math/arch/aarch64.rs b/src/math/arch/aarch64.rs
new file mode 100644
index 000000000..374ec11bf
--- /dev/null
+++ b/src/math/arch/aarch64.rs
@@ -0,0 +1,33 @@
+use core::arch::aarch64::{
+    float32x2_t, float64x1_t, vdup_n_f32, vdup_n_f64, vget_lane_f32, vget_lane_f64, vrndn_f32,
+    vrndn_f64,
+};
+
+pub fn rint(x: f64) -> f64 {
+    // SAFETY: only requires target_feature=neon, ensured by `cfg_if` in parent module.
+    let x_vec: float64x1_t = unsafe { vdup_n_f64(x) };
+
+    // SAFETY: only requires target_feature=neon, ensured by `cfg_if` in parent module.
+    let result_vec: float64x1_t = unsafe { vrndn_f64(x_vec) };
+
+    // SAFETY: only requires target_feature=neon, ensured by `cfg_if` in parent module.
+    let result: f64 = unsafe { vget_lane_f64::<0>(result_vec) };
+
+    result
+}
+
+pub fn rintf(x: f32) -> f32 {
+    // There's a scalar form of this instruction (FRINTN) but core::arch doesn't expose it, so we
+    // have to use the vector form and drop the other lanes afterwards.
+
+    // SAFETY: only requires target_feature=neon, ensured by `cfg_if` in parent module.
+    let x_vec: float32x2_t = unsafe { vdup_n_f32(x) };
+
+    // SAFETY: only requires target_feature=neon, ensured by `cfg_if` in parent module.
+    let result_vec: float32x2_t = unsafe { vrndn_f32(x_vec) };
+
+    // SAFETY: only requires target_feature=neon, ensured by `cfg_if` in parent module.
+    let result: f32 = unsafe { vget_lane_f32::<0>(result_vec) };
+
+    result
+}
diff --git a/src/math/arch/mod.rs b/src/math/arch/mod.rs
index 3992419cb..091d7650a 100644
--- a/src/math/arch/mod.rs
+++ b/src/math/arch/mod.rs
@@ -17,6 +17,13 @@ cfg_if! {
     } else if #[cfg(target_feature = "sse2")] {
         mod i686;
         pub use i686::{sqrt, sqrtf};
+    } else if #[cfg(all(
+        target_arch = "aarch64", // TODO: also arm64ec?
+        target_feature = "neon",
+        target_endian = "little", // see https://github.com/rust-lang/stdarch/issues/1484
+    ))] {
+        mod aarch64;
+        pub use aarch64::{rint, rintf};
     }
 }
 
diff --git a/src/math/rint.rs b/src/math/rint.rs
index 50192ffdf..c9ea6402e 100644
--- a/src/math/rint.rs
+++ b/src/math/rint.rs
@@ -2,7 +2,10 @@
 pub fn rint(x: f64) -> f64 {
     select_implementation! {
         name: rint,
-        use_arch: all(target_arch = "wasm32", intrinsics_enabled),
+        use_arch: any(
+            all(target_arch = "wasm32", intrinsics_enabled),
+            all(target_arch = "aarch64", target_feature = "neon", target_endian = "little"),
+        ),
         args: x,
     }
 
diff --git a/src/math/rintf.rs b/src/math/rintf.rs
index 64968b6be..33b5b3dde 100644
--- a/src/math/rintf.rs
+++ b/src/math/rintf.rs
@@ -2,7 +2,10 @@
 pub fn rintf(x: f32) -> f32 {
     select_implementation! {
         name: rintf,
-        use_arch: all(target_arch = "wasm32", intrinsics_enabled),
+        use_arch: any(
+            all(target_arch = "wasm32", intrinsics_enabled),
+            all(target_arch = "aarch64", target_feature = "neon", target_endian = "little"),
+        ),
         args: x,
     }
 

From f711c0e4324a7dd860eca3a97bfd687e4f3f3243 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Mon, 13 Jan 2025 03:14:43 +0000
Subject: [PATCH 123/279] Rename the `test-multiprecision` feature to
 `build-mpfr`

Currently the features that control what we test against are
`build-musl` and `test-multiprecision`. I didn't name them very
consistently and there isn't really any reason for that.

Rename `test-multiprecision` to `build-mpfr` to better reflect what it
actually does and to be more consistent with `build-musl`.
---
 .github/workflows/main.yml                 | 4 ++--
 CONTRIBUTING.md                            | 2 +-
 ci/run.sh                                  | 8 ++++----
 crates/libm-test/Cargo.toml                | 2 +-
 crates/libm-test/src/lib.rs                | 2 +-
 crates/libm-test/src/run_cfg.rs            | 2 +-
 crates/libm-test/tests/multiprecision.rs   | 2 +-
 crates/libm-test/tests/z_extensive/main.rs | 6 +++---
 8 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 7e371d613..d54288574 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -133,7 +133,7 @@ jobs:
       run: ./ci/download-musl.sh
     - run: |
         cargo clippy --all \
-          --features libm-test/build-musl,libm-test/test-multiprecision \
+          --features libm-test/build-musl,libm-test/build-mpfr \
           --all-targets
 
   builtins:
@@ -241,7 +241,7 @@ jobs:
           fi
 
           LIBM_EXTENSIVE_TESTS="$CHANGED" cargo t \
-            --features test-multiprecision,unstable \
+            --features build-mpfr,unstable \
             --profile release-checked \
             -- extensive
       - name: Print test logs if available
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index aadcdf036..f7560878d 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -69,7 +69,7 @@ If you'd like to run tests with randomized inputs that get compared against
 infinite-precision results, run:
 
 ```sh
-cargo test --features libm-test/test-multiprecision,libm-test/build-musl --release
+cargo test --features libm-test/build-mpfr,libm-test/build-musl --release
 ```
 
 The multiprecision tests use the [`rug`] crate for bindings to MPFR. MPFR can
diff --git a/ci/run.sh b/ci/run.sh
index 244a22a07..63678620c 100755
--- a/ci/run.sh
+++ b/ci/run.sh
@@ -44,11 +44,11 @@ case "$target" in
     # Targets that aren't cross compiled work fine
     # FIXME(ci): we should be able to enable aarch64 Linux here once GHA
     # support rolls out.
-    x86_64*) extra_flags="$extra_flags --features libm-test/test-multiprecision" ;;
-    i686*) extra_flags="$extra_flags --features libm-test/test-multiprecision" ;;
-    i586*) extra_flags="$extra_flags --features libm-test/test-multiprecision --features gmp-mpfr-sys/force-cross" ;;
+    x86_64*) extra_flags="$extra_flags --features libm-test/build-mpfr" ;;
+    i686*) extra_flags="$extra_flags --features libm-test/build-mpfr" ;;
+    i586*) extra_flags="$extra_flags --features libm-test/build-mpfr --features gmp-mpfr-sys/force-cross" ;;
     # Apple aarch64 is native
-    aarch64*apple*) extra_flags="$extra_flags --features libm-test/test-multiprecision" ;;
+    aarch64*apple*) extra_flags="$extra_flags --features libm-test/build-mpfr" ;;
 esac
 
 # FIXME: `STATUS_DLL_NOT_FOUND` testing macros on CI.
diff --git a/crates/libm-test/Cargo.toml b/crates/libm-test/Cargo.toml
index 621e587c5..3cf4a08e1 100644
--- a/crates/libm-test/Cargo.toml
+++ b/crates/libm-test/Cargo.toml
@@ -12,7 +12,7 @@ unstable-float = ["libm/unstable-float", "rug?/nightly-float"]
 
 # Generate tests which are random inputs and the outputs are calculated with
 # musl libc.
-test-multiprecision = ["dep:az", "dep:rug", "dep:gmp-mpfr-sys"]
+build-mpfr = ["dep:az", "dep:rug", "dep:gmp-mpfr-sys"]
 
 # Build our own musl for testing and benchmarks
 build-musl = ["dep:musl-math-sys"]
diff --git a/crates/libm-test/src/lib.rs b/crates/libm-test/src/lib.rs
index 6e7017f09..cb89f1c8b 100644
--- a/crates/libm-test/src/lib.rs
+++ b/crates/libm-test/src/lib.rs
@@ -5,7 +5,7 @@
 pub mod domain;
 mod f8_impl;
 pub mod gen;
-#[cfg(feature = "test-multiprecision")]
+#[cfg(feature = "build-mpfr")]
 pub mod mpfloat;
 mod num;
 pub mod op;
diff --git a/crates/libm-test/src/run_cfg.rs b/crates/libm-test/src/run_cfg.rs
index 48a654caa..4a52091fe 100644
--- a/crates/libm-test/src/run_cfg.rs
+++ b/crates/libm-test/src/run_cfg.rs
@@ -126,7 +126,7 @@ impl TestEnv {
         let id = ctx.fn_ident;
         let op = id.math_op();
 
-        let will_run_mp = cfg!(feature = "test-multiprecision");
+        let will_run_mp = cfg!(feature = "build-mpfr");
 
         // Tests are pretty slow on non-64-bit targets, x86 MacOS, and targets that run in QEMU. Start
         // with a reduced number on these platforms.
diff --git a/crates/libm-test/tests/multiprecision.rs b/crates/libm-test/tests/multiprecision.rs
index e2766cfda..2d8856e16 100644
--- a/crates/libm-test/tests/multiprecision.rs
+++ b/crates/libm-test/tests/multiprecision.rs
@@ -1,6 +1,6 @@
 //! Test with "infinite precision"
 
-#![cfg(feature = "test-multiprecision")]
+#![cfg(feature = "build-mpfr")]
 
 use libm_test::domain::HasDomain;
 use libm_test::gen::random::RandomInput;
diff --git a/crates/libm-test/tests/z_extensive/main.rs b/crates/libm-test/tests/z_extensive/main.rs
index 3a2af88bd..5448cb6ea 100644
--- a/crates/libm-test/tests/z_extensive/main.rs
+++ b/crates/libm-test/tests/z_extensive/main.rs
@@ -1,14 +1,14 @@
 //! `main` is just a wrapper to handle configuration.
 
-#[cfg(not(feature = "test-multiprecision"))]
+#[cfg(not(feature = "build-mpfr"))]
 fn main() {
     eprintln!("multiprecision not enabled; skipping extensive tests");
 }
 
-#[cfg(feature = "test-multiprecision")]
+#[cfg(feature = "build-mpfr")]
 mod run;
 
-#[cfg(feature = "test-multiprecision")]
+#[cfg(feature = "build-mpfr")]
 fn main() {
     run::run();
 }

From 357e9aac78d9b53eb2c030de299ed3c31176046d Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Mon, 13 Jan 2025 03:17:54 +0000
Subject: [PATCH 124/279] Enable `build-mpfr` and `build-musl` by default

Most users who are developing this crate are likely running on a Unix
system, since there isn't much to test against otherwise. For
convenience, enable the features required to run these tests by default.
---
 .github/workflows/main.yml  |  5 +----
 CONTRIBUTING.md             | 11 ++++-------
 ci/run.sh                   | 27 ++++++++++++++++-----------
 crates/libm-test/Cargo.toml |  2 +-
 4 files changed, 22 insertions(+), 23 deletions(-)

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index d54288574..35b307f77 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -131,10 +131,7 @@ jobs:
     - uses: Swatinem/rust-cache@v2
     - name: Download musl source
       run: ./ci/download-musl.sh
-    - run: |
-        cargo clippy --all \
-          --features libm-test/build-musl,libm-test/build-mpfr \
-          --all-targets
+    - run: cargo clippy --all --all-features --all-targets
 
   builtins:
     name: Check use with compiler-builtins
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index f7560878d..ba7f78ca0 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -62,15 +62,12 @@ Check [PR #65] for an example.
 Normal tests can be executed with:
 
 ```sh
-cargo test
+# `--release` ables more test cases
+cargo test --release
 ```
 
-If you'd like to run tests with randomized inputs that get compared against
-infinite-precision results, run:
-
-```sh
-cargo test --features libm-test/build-mpfr,libm-test/build-musl --release
-```
+If you are on a system that cannot build musl or MPFR, passing
+`--no-default-features` will run some limited tests.
 
 The multiprecision tests use the [`rug`] crate for bindings to MPFR. MPFR can
 be difficult to build on non-Unix systems, refer to [`gmp_mpfr_sys`] for help.
diff --git a/ci/run.sh b/ci/run.sh
index 63678620c..35b84809f 100755
--- a/ci/run.sh
+++ b/ci/run.sh
@@ -14,7 +14,15 @@ if [ -z "$target" ]; then
     target="$host_target"
 fi
 
-extra_flags=""
+# We enumerate features manually.
+extra_flags="--no-default-features"
+
+# Enable arch-specific routines when available.
+extra_flags="$extra_flags --features arch"
+
+# Always enable `unstable-float` since it expands available API but does not
+# change any implementations.
+extra_flags="$extra_flags --features unstable-float"
 
 # We need to specifically skip tests for musl-math-sys on systems that can't
 # build musl since otherwise `--all` will activate it.
@@ -57,14 +65,8 @@ case "$target" in
     *windows-gnu) extra_flags="$extra_flags --exclude libm-macros" ;;
 esac
 
-# Make sure we can build with overriding features. We test the indibidual
-# features it controls separately.
-cargo check --no-default-features
-cargo check --features "force-soft-floats"
-
-# Always enable `unstable-float` since it expands available API but does not
-# change any implementations.
-extra_flags="$extra_flags --features unstable-float"
+# Make sure we can build with overriding features.
+cargo check -p libm --no-default-features
 
 if [ "${BUILD_ONLY:-}" = "1" ]; then
     cmd="cargo build --target $target --package libm"
@@ -80,11 +82,14 @@ else
     $cmd --features unstable-intrinsics
     $cmd --features unstable-intrinsics --benches
     
-    # Test the same in release mode, which also increases coverage.
+    # Test the same in release mode, which also increases coverage. Also ensure
+    # the soft float routines are checked.
     $cmd --profile release-checked 
+    $cmd --profile release-checked --features force-soft-floats
     $cmd --profile release-checked --features unstable-intrinsics
     $cmd --profile release-checked --features unstable-intrinsics --benches
 
-    ENSURE_NO_PANIC=1 cargo build --target "$target" --release
+    # Ensure that the routines do not panic.
+    ENSURE_NO_PANIC=1 cargo build -p libm --target "$target" --no-default-features --release
 fi
 
diff --git a/crates/libm-test/Cargo.toml b/crates/libm-test/Cargo.toml
index 3cf4a08e1..d3f18ab3e 100644
--- a/crates/libm-test/Cargo.toml
+++ b/crates/libm-test/Cargo.toml
@@ -5,7 +5,7 @@ edition = "2021"
 publish = false
 
 [features]
-default = ["unstable-float"]
+default = ["build-mpfr", "build-musl", "unstable-float"]
 
 # Propagated from libm because this affects which functions we test.
 unstable-float = ["libm/unstable-float", "rug?/nightly-float"]

From 222d3cd888e3391dc6e54d6c5a216543a6cd9325 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Mon, 13 Jan 2025 03:12:12 +0000
Subject: [PATCH 125/279] Add a utility crate for quick evaluation

Introduce a simple binary that can run arbitrary input against any of
the available implementations (musl, MPFR, our libm). This provides an
easy way to check results, or run specific cases against a debugger.

Examples:

    $ cargo run -p util -- eval libm pow 1.6 2.4
    3.089498284311124
    $ cargo run -p util -- eval mpfr pow 1.6 2.4
    3.089498284311124
    $ cargo run -p util -- eval musl tgamma 1.2344597839132
    0.9097442657960874
    $ cargo run -p util -- eval mpfr tgamma 1.2344597839132
    0.9097442657960874
    $ cargo run -p util -- eval libm tgamma 1.2344597839132
    0.9097442657960871
    $ cargo run -p util -- eval musl sincos 3.1415926535
    (8.979318433952318e-11, -1.0)
---
 Cargo.toml                      |   1 +
 crates/libm-test/src/mpfloat.rs |  26 ++++
 crates/util/Cargo.toml          |  19 +++
 crates/util/build.rs            |   9 ++
 crates/util/src/main.rs         | 243 ++++++++++++++++++++++++++++++++
 5 files changed, 298 insertions(+)
 create mode 100644 crates/util/Cargo.toml
 create mode 100644 crates/util/build.rs
 create mode 100644 crates/util/src/main.rs

diff --git a/Cargo.toml b/Cargo.toml
index fc881b77e..820c01347 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -46,6 +46,7 @@ members = [
   "crates/libm-macros",
   "crates/libm-test",
   "crates/musl-math-sys",
+  "crates/util",
 ]
 default-members = [
   ".",
diff --git a/crates/libm-test/src/mpfloat.rs b/crates/libm-test/src/mpfloat.rs
index f71e72cd5..092f5f1d2 100644
--- a/crates/libm-test/src/mpfloat.rs
+++ b/crates/libm-test/src/mpfloat.rs
@@ -471,6 +471,32 @@ impl MpOp for crate::op::lgammaf_r::Routine {
     }
 }
 
+/* stub implementations so we don't need to special case them */
+
+impl MpOp for crate::op::nextafter::Routine {
+    type MpTy = MpFloat;
+
+    fn new_mp() -> Self::MpTy {
+        unimplemented!("nextafter does not yet have a MPFR operation");
+    }
+
+    fn run(_this: &mut Self::MpTy, _input: Self::RustArgs) -> Self::RustRet {
+        unimplemented!("nextafter does not yet have a MPFR operation");
+    }
+}
+
+impl MpOp for crate::op::nextafterf::Routine {
+    type MpTy = MpFloat;
+
+    fn new_mp() -> Self::MpTy {
+        unimplemented!("nextafter does not yet have a MPFR operation");
+    }
+
+    fn run(_this: &mut Self::MpTy, _input: Self::RustArgs) -> Self::RustRet {
+        unimplemented!("nextafter does not yet have a MPFR operation");
+    }
+}
+
 /// `rug` does not provide `remquo` so this exposes `mpfr_remquo`. See rug#76.
 fn mpfr_remquo(r: &mut MpFloat, x: &MpFloat, y: &MpFloat, round: Round) -> (Ordering, c_long) {
     let r = r.as_raw_mut();
diff --git a/crates/util/Cargo.toml b/crates/util/Cargo.toml
new file mode 100644
index 000000000..acf5db704
--- /dev/null
+++ b/crates/util/Cargo.toml
@@ -0,0 +1,19 @@
+[package]
+name = "util"
+version = "0.1.0"
+edition = "2021"
+publish = false
+
+[features]
+default = ["build-musl", "build-mpfr", "unstable-float"]
+build-musl = ["libm-test/build-musl", "dep:musl-math-sys"]
+build-mpfr = ["libm-test/build-mpfr", "dep:az", "dep:rug"]
+unstable-float = ["libm/unstable-float", "libm-test/unstable-float", "rug?/nightly-float"]
+
+[dependencies]
+az = { version = "1.2.1", optional = true }
+libm = { path = "../..", default-features = false }
+libm-macros = { path = "../libm-macros" }
+libm-test = { path = "../libm-test", default-features = false }
+musl-math-sys = { path = "../musl-math-sys", optional = true }
+rug = { version = "1.26.1", optional = true, default-features = false, features = ["float", "std"] }
diff --git a/crates/util/build.rs b/crates/util/build.rs
new file mode 100644
index 000000000..0745ef3dd
--- /dev/null
+++ b/crates/util/build.rs
@@ -0,0 +1,9 @@
+#![allow(unexpected_cfgs)]
+
+#[path = "../../configure.rs"]
+mod configure;
+
+fn main() {
+    let cfg = configure::Config::from_env();
+    configure::emit_libm_config(&cfg);
+}
diff --git a/crates/util/src/main.rs b/crates/util/src/main.rs
new file mode 100644
index 000000000..f7bd31bb6
--- /dev/null
+++ b/crates/util/src/main.rs
@@ -0,0 +1,243 @@
+//! Helper CLI utility for common tasks.
+
+#![cfg_attr(f16_enabled, feature(f16))]
+#![cfg_attr(f128_enabled, feature(f128))]
+
+use std::any::type_name;
+use std::env;
+use std::str::FromStr;
+
+#[cfg(feature = "build-mpfr")]
+use az::Az;
+#[cfg(feature = "build-mpfr")]
+use libm_test::mpfloat::MpOp;
+use libm_test::{MathOp, TupleCall};
+
+const USAGE: &str = "\
+usage:
+
+cargo run -p util -- <SUBCOMMAND>
+
+SUBCOMMAND:
+    eval <BASIS> <OP> inputs...
+        Evaulate the expression with a given basis. This can be useful for
+        running routines with a debugger, or quickly checking input. Examples:
+        * eval musl sinf 1.234 # print the results of musl sinf(1.234f32)
+        * eval mpfr pow 1.234 2.432 # print the results of mpfr pow(1.234, 2.432)
+";
+
+fn main() {
+    let args = env::args().collect::<Vec<_>>();
+    let str_args = args.iter().map(|s| s.as_str()).collect::<Vec<_>>();
+
+    match &str_args.as_slice()[1..] {
+        ["eval", basis, op, inputs @ ..] => do_eval(basis, op, inputs),
+        _ => {
+            println!("{USAGE}\nunrecognized input `{str_args:?}`");
+            std::process::exit(1);
+        }
+    }
+}
+
+macro_rules! handle_call {
+    (
+        fn_name: $fn_name:ident,
+        CFn: $CFn:ty,
+        RustFn: $RustFn:ty,
+        RustArgs: $RustArgs:ty,
+        attrs: [$($attr:meta),*],
+        extra: ($basis:ident, $op:ident, $inputs:ident),
+        fn_extra: $musl_fn:expr,
+    ) => {
+        $(#[$attr])*
+        if $op == stringify!($fn_name) {
+            type Op = libm_test::op::$fn_name::Routine;
+
+            let input = <$RustArgs>::parse($inputs);
+            let libm_fn: <Op as MathOp>::RustFn = libm::$fn_name;
+
+            let output = match $basis {
+                "libm" => input.call(libm_fn),
+                #[cfg(feature = "build-musl")]
+                "musl" => {
+                    let musl_fn: <Op as MathOp>::CFn =
+                        $musl_fn.unwrap_or_else(|| panic!("no musl function for {}", $op));
+                    input.call(musl_fn)
+                }
+                #[cfg(feature = "build-mpfr")]
+                "mpfr" => {
+                    let mut mp = <Op as MpOp>::new_mp();
+                    Op::run(&mut mp, input)
+                }
+                _ => panic!("unrecognized or disabled basis '{}'", $basis),
+            };
+            println!("{output:?}");
+            return;
+        }
+    };
+}
+
+/// Evaluate the specified operation with a given basis.
+fn do_eval(basis: &str, op: &str, inputs: &[&str]) {
+    libm_macros::for_each_function! {
+        callback: handle_call,
+        emit_types: [CFn, RustFn, RustArgs],
+        extra: (basis, op, inputs),
+        fn_extra: match MACRO_FN_NAME {
+            copysignf16 | copysignf128 | fabsf16 | fabsf128 => None,
+            _ => Some(musl_math_sys::MACRO_FN_NAME)
+        }
+    }
+
+    panic!("no operation matching {op}");
+}
+
+/// Parse a tuple from a space-delimited string.
+trait ParseTuple {
+    fn parse(input: &[&str]) -> Self;
+}
+
+macro_rules! impl_parse_tuple {
+    ($ty:ty) => {
+        impl ParseTuple for ($ty,) {
+            fn parse(input: &[&str]) -> Self {
+                assert_eq!(input.len(), 1, "expected a single argument, got {input:?}");
+                (parse(input, 0),)
+            }
+        }
+
+        impl ParseTuple for ($ty, $ty) {
+            fn parse(input: &[&str]) -> Self {
+                assert_eq!(input.len(), 2, "expected two arguments, got {input:?}");
+                (parse(input, 0), parse(input, 1))
+            }
+        }
+
+        impl ParseTuple for ($ty, i32) {
+            fn parse(input: &[&str]) -> Self {
+                assert_eq!(input.len(), 2, "expected two arguments, got {input:?}");
+                (parse(input, 0), parse(input, 1))
+            }
+        }
+
+        impl ParseTuple for (i32, $ty) {
+            fn parse(input: &[&str]) -> Self {
+                assert_eq!(input.len(), 2, "expected two arguments, got {input:?}");
+                (parse(input, 0), parse(input, 1))
+            }
+        }
+
+        impl ParseTuple for ($ty, $ty, $ty) {
+            fn parse(input: &[&str]) -> Self {
+                assert_eq!(input.len(), 2, "expected three arguments, got {input:?}");
+                (parse(input, 0), parse(input, 1), parse(input, 3))
+            }
+        }
+    };
+}
+
+#[allow(unused_macros)]
+#[cfg(feature = "build-mpfr")]
+macro_rules! impl_parse_tuple_via_rug {
+    ($ty:ty) => {
+        impl ParseTuple for ($ty,) {
+            fn parse(input: &[&str]) -> Self {
+                assert_eq!(input.len(), 1, "expected a single argument, got {input:?}");
+                (parse_rug(input, 0),)
+            }
+        }
+
+        impl ParseTuple for ($ty, $ty) {
+            fn parse(input: &[&str]) -> Self {
+                assert_eq!(input.len(), 2, "expected two arguments, got {input:?}");
+                (parse_rug(input, 0), parse_rug(input, 1))
+            }
+        }
+
+        impl ParseTuple for ($ty, i32) {
+            fn parse(input: &[&str]) -> Self {
+                assert_eq!(input.len(), 2, "expected two arguments, got {input:?}");
+                (parse_rug(input, 0), parse(input, 1))
+            }
+        }
+
+        impl ParseTuple for (i32, $ty) {
+            fn parse(input: &[&str]) -> Self {
+                assert_eq!(input.len(), 2, "expected two arguments, got {input:?}");
+                (parse(input, 0), parse_rug(input, 1))
+            }
+        }
+
+        impl ParseTuple for ($ty, $ty, $ty) {
+            fn parse(input: &[&str]) -> Self {
+                assert_eq!(input.len(), 2, "expected three arguments, got {input:?}");
+                (parse_rug(input, 0), parse_rug(input, 1), parse_rug(input, 3))
+            }
+        }
+    };
+}
+
+// Fallback for when Rug is not built.
+#[allow(unused_macros)]
+#[cfg(not(feature = "build-mpfr"))]
+macro_rules! impl_parse_tuple_via_rug {
+    ($ty:ty) => {
+        impl ParseTuple for ($ty,) {
+            fn parse(_input: &[&str]) -> Self {
+                panic!("parsing this type requires the `build-mpfr` feature")
+            }
+        }
+
+        impl ParseTuple for ($ty, $ty) {
+            fn parse(_input: &[&str]) -> Self {
+                panic!("parsing this type requires the `build-mpfr` feature")
+            }
+        }
+
+        impl ParseTuple for ($ty, i32) {
+            fn parse(_input: &[&str]) -> Self {
+                panic!("parsing this type requires the `build-mpfr` feature")
+            }
+        }
+
+        impl ParseTuple for (i32, $ty) {
+            fn parse(_input: &[&str]) -> Self {
+                panic!("parsing this type requires the `build-mpfr` feature")
+            }
+        }
+
+        impl ParseTuple for ($ty, $ty, $ty) {
+            fn parse(_input: &[&str]) -> Self {
+                panic!("parsing this type requires the `build-mpfr` feature")
+            }
+        }
+    };
+}
+
+impl_parse_tuple!(f32);
+impl_parse_tuple!(f64);
+
+#[cfg(f16_enabled)]
+impl_parse_tuple_via_rug!(f16);
+#[cfg(f128_enabled)]
+impl_parse_tuple_via_rug!(f128);
+
+/// Try to parse the number, printing a nice message on failure.
+fn parse<F: FromStr>(input: &[&str], idx: usize) -> F {
+    let s = input[idx];
+    s.parse().unwrap_or_else(|_| panic!("invalid {} input '{s}'", type_name::<F>()))
+}
+
+/// Try to parse the float type going via `rug`, for `f16` and `f128` which don't yet implement
+/// `FromStr`.
+#[cfg(feature = "build-mpfr")]
+fn parse_rug<F: libm_test::Float>(input: &[&str], idx: usize) -> F
+where
+    rug::Float: az::Cast<F>,
+{
+    let s = input[idx];
+    let x =
+        rug::Float::parse(s).unwrap_or_else(|_| panic!("invalid {} input '{s}'", type_name::<F>()));
+    let x = rug::Float::with_val(F::BITS, x);
+    x.az()
+}

From ef493a8269aa531602dc1d4d96240049da459211 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Sat, 11 Jan 2025 23:38:19 +0000
Subject: [PATCH 126/279] Add a generic version of `trunc`

The algorithm is identical for both types, so this is a straightforward
routine to port.
---
 src/math/generic/mod.rs   |  2 ++
 src/math/generic/trunc.rs | 54 +++++++++++++++++++++++++++++++++++++++
 src/math/mod.rs           |  2 +-
 src/math/trunc.rs         | 30 +---------------------
 src/math/truncf.rs        | 22 +---------------
 5 files changed, 59 insertions(+), 51 deletions(-)
 create mode 100644 src/math/generic/trunc.rs

diff --git a/src/math/generic/mod.rs b/src/math/generic/mod.rs
index 08524b685..e5166ca10 100644
--- a/src/math/generic/mod.rs
+++ b/src/math/generic/mod.rs
@@ -1,5 +1,7 @@
 mod copysign;
 mod fabs;
+mod trunc;
 
 pub use copysign::copysign;
 pub use fabs::fabs;
+pub use trunc::trunc;
diff --git a/src/math/generic/trunc.rs b/src/math/generic/trunc.rs
new file mode 100644
index 000000000..5d0ba6109
--- /dev/null
+++ b/src/math/generic/trunc.rs
@@ -0,0 +1,54 @@
+use super::super::{Float, Int, IntTy, MinInt};
+
+pub fn trunc<F: Float>(x: F) -> F {
+    let mut xi: F::Int = x.to_bits();
+    let e: i32 = x.exp_unbiased();
+
+    // C1: The represented value has no fractional part, so no truncation is needed
+    if e >= F::SIG_BITS as i32 {
+        return x;
+    }
+
+    let mask = if e < 0 {
+        // C2: If the exponent is negative, the result will be zero so we mask out everything
+        // except the sign.
+        F::SIGN_MASK
+    } else {
+        // C3: Otherwise, we mask out the last `e` bits of the significand.
+        !(F::SIG_MASK >> e.unsigned())
+    };
+
+    // C4: If the to-be-masked-out portion is already zero, we have an exact result
+    if (xi & !mask) == IntTy::<F>::ZERO {
+        return x;
+    }
+
+    // C5: Otherwise the result is inexact and we will truncate. Raise `FE_INEXACT`, mask the
+    // result, and return.
+    force_eval!(x + F::MAX);
+    xi &= mask;
+    F::from_bits(xi)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn sanity_check() {
+        assert_biteq!(trunc(1.1f32), 1.0);
+        assert_biteq!(trunc(1.1f64), 1.0);
+
+        // C1
+        assert_biteq!(trunc(hf32!("0x1p23")), hf32!("0x1p23"));
+        assert_biteq!(trunc(hf64!("0x1p52")), hf64!("0x1p52"));
+        assert_biteq!(trunc(hf32!("-0x1p23")), hf32!("-0x1p23"));
+        assert_biteq!(trunc(hf64!("-0x1p52")), hf64!("-0x1p52"));
+
+        // C2
+        assert_biteq!(trunc(hf32!("0x1p-1")), 0.0);
+        assert_biteq!(trunc(hf64!("0x1p-1")), 0.0);
+        assert_biteq!(trunc(hf32!("-0x1p-1")), -0.0);
+        assert_biteq!(trunc(hf64!("-0x1p-1")), -0.0);
+    }
+}
diff --git a/src/math/mod.rs b/src/math/mod.rs
index 5baf35e42..c0d038a0c 100644
--- a/src/math/mod.rs
+++ b/src/math/mod.rs
@@ -121,7 +121,7 @@ use self::rem_pio2::rem_pio2;
 use self::rem_pio2_large::rem_pio2_large;
 use self::rem_pio2f::rem_pio2f;
 #[allow(unused_imports)]
-use self::support::{CastFrom, CastInto, DInt, Float, HInt, Int, MinInt};
+use self::support::{CastFrom, CastInto, DInt, Float, HInt, Int, IntTy, MinInt};
 
 // Public modules
 mod acos;
diff --git a/src/math/trunc.rs b/src/math/trunc.rs
index 7e5c4f2c2..2cc8aaa7e 100644
--- a/src/math/trunc.rs
+++ b/src/math/trunc.rs
@@ -1,5 +1,3 @@
-use core::f64;
-
 /// Rounds the number toward 0 to the closest integral value (f64).
 ///
 /// This effectively removes the decimal part of the number, leaving the integral part.
@@ -11,31 +9,5 @@ pub fn trunc(x: f64) -> f64 {
         args: x,
     }
 
-    let x1p120 = f64::from_bits(0x4770000000000000); // 0x1p120f === 2 ^ 120
-
-    let mut i: u64 = x.to_bits();
-    let mut e: i64 = ((i >> 52) & 0x7ff) as i64 - 0x3ff + 12;
-    let m: u64;
-
-    if e >= 52 + 12 {
-        return x;
-    }
-    if e < 12 {
-        e = 1;
-    }
-    m = -1i64 as u64 >> e;
-    if (i & m) == 0 {
-        return x;
-    }
-    force_eval!(x + x1p120);
-    i &= !m;
-    f64::from_bits(i)
-}
-
-#[cfg(test)]
-mod tests {
-    #[test]
-    fn sanity_check() {
-        assert_eq!(super::trunc(1.1), 1.0);
-    }
+    super::generic::trunc(x)
 }
diff --git a/src/math/truncf.rs b/src/math/truncf.rs
index b491747d9..14533a267 100644
--- a/src/math/truncf.rs
+++ b/src/math/truncf.rs
@@ -1,5 +1,3 @@
-use core::f32;
-
 /// Rounds the number toward 0 to the closest integral value (f32).
 ///
 /// This effectively removes the decimal part of the number, leaving the integral part.
@@ -11,25 +9,7 @@ pub fn truncf(x: f32) -> f32 {
         args: x,
     }
 
-    let x1p120 = f32::from_bits(0x7b800000); // 0x1p120f === 2 ^ 120
-
-    let mut i: u32 = x.to_bits();
-    let mut e: i32 = ((i >> 23) & 0xff) as i32 - 0x7f + 9;
-    let m: u32;
-
-    if e >= 23 + 9 {
-        return x;
-    }
-    if e < 9 {
-        e = 1;
-    }
-    m = -1i32 as u32 >> e;
-    if (i & m) == 0 {
-        return x;
-    }
-    force_eval!(x + x1p120);
-    i &= !m;
-    f32::from_bits(i)
+    super::generic::trunc(x)
 }
 
 // PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520

From aa3f28bce61491718d779119c4678c6abb368aa7 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Sun, 12 Jan 2025 04:12:56 +0000
Subject: [PATCH 127/279] Add `truncf16` and `truncf128`

Use the generic algorithms to provide implementations for these
routines.
---
 crates/compiler-builtins-smoke-test/src/lib.rs |  2 ++
 crates/libm-macros/src/shared.rs               |  4 ++--
 crates/libm-test/benches/random.rs             |  2 +-
 crates/libm-test/src/domain.rs                 | 10 ++++++++++
 crates/libm-test/src/mpfloat.rs                |  3 +++
 crates/libm-test/tests/compare_built_musl.rs   |  4 +++-
 crates/util/src/main.rs                        |  2 +-
 etc/function-definitions.json                  | 16 ++++++++++++++++
 etc/function-list.txt                          |  2 ++
 src/math/generic/trunc.rs                      |  3 +++
 src/math/mod.rs                                |  4 ++++
 src/math/truncf128.rs                          |  7 +++++++
 src/math/truncf16.rs                           |  7 +++++++
 13 files changed, 61 insertions(+), 5 deletions(-)
 create mode 100644 src/math/truncf128.rs
 create mode 100644 src/math/truncf16.rs

diff --git a/crates/compiler-builtins-smoke-test/src/lib.rs b/crates/compiler-builtins-smoke-test/src/lib.rs
index 95ecb840d..3416a2229 100644
--- a/crates/compiler-builtins-smoke-test/src/lib.rs
+++ b/crates/compiler-builtins-smoke-test/src/lib.rs
@@ -157,6 +157,8 @@ no_mangle! {
     tgammaf(x: f32) -> f32;
     trunc(x: f64) -> f64;
     truncf(x: f32) -> f32;
+    truncf128(x: f128) -> f128;
+    truncf16(x: f16) -> f16;
     y0(x: f64) -> f64;
     y0f(x: f32) -> f32;
     y1(x: f64) -> f64;
diff --git a/crates/libm-macros/src/shared.rs b/crates/libm-macros/src/shared.rs
index 16547404f..24fccd6f2 100644
--- a/crates/libm-macros/src/shared.rs
+++ b/crates/libm-macros/src/shared.rs
@@ -9,7 +9,7 @@ const ALL_OPERATIONS_NESTED: &[(FloatTy, Signature, Option<Signature>, &[&str])]
         FloatTy::F16,
         Signature { args: &[Ty::F16], returns: &[Ty::F16] },
         None,
-        &["fabsf16"],
+        &["fabsf16", "truncf16"],
     ),
     (
         // `fn(f32) -> f32`
@@ -40,7 +40,7 @@ const ALL_OPERATIONS_NESTED: &[(FloatTy, Signature, Option<Signature>, &[&str])]
         FloatTy::F128,
         Signature { args: &[Ty::F128], returns: &[Ty::F128] },
         None,
-        &["fabsf128"],
+        &["fabsf128", "truncf128"],
     ),
     (
         // `(f16, f16) -> f16`
diff --git a/crates/libm-test/benches/random.rs b/crates/libm-test/benches/random.rs
index cd1e2d2cc..8c6afff25 100644
--- a/crates/libm-test/benches/random.rs
+++ b/crates/libm-test/benches/random.rs
@@ -117,7 +117,7 @@ libm_macros::for_each_function! {
         exp10 | exp10f | exp2 | exp2f => (true, Some(musl_math_sys::MACRO_FN_NAME)),
 
         // Musl does not provide `f16` and `f128` functions
-        copysignf16 | copysignf128 | fabsf16 | fabsf128 => (false, None),
+        copysignf16 | copysignf128 | fabsf16 | fabsf128 | truncf16 | truncf128  => (false, None),
 
         // By default we never skip (false) and always have a musl function available
         _ => (false, Some(musl_math_sys::MACRO_FN_NAME))
diff --git a/crates/libm-test/src/domain.rs b/crates/libm-test/src/domain.rs
index 52393d402..adafb9faa 100644
--- a/crates/libm-test/src/domain.rs
+++ b/crates/libm-test/src/domain.rs
@@ -199,3 +199,13 @@ impl HasDomain<f16> for crate::op::fabsf16::Routine {
 impl HasDomain<f128> for crate::op::fabsf128::Routine {
     const DOMAIN: Domain<f128> = Domain::<f128>::UNBOUNDED;
 }
+
+#[cfg(f16_enabled)]
+impl HasDomain<f16> for crate::op::truncf16::Routine {
+    const DOMAIN: Domain<f16> = Domain::<f16>::UNBOUNDED;
+}
+
+#[cfg(f128_enabled)]
+impl HasDomain<f128> for crate::op::truncf128::Routine {
+    const DOMAIN: Domain<f128> = Domain::<f128>::UNBOUNDED;
+}
diff --git a/crates/libm-test/src/mpfloat.rs b/crates/libm-test/src/mpfloat.rs
index 092f5f1d2..2a740ed47 100644
--- a/crates/libm-test/src/mpfloat.rs
+++ b/crates/libm-test/src/mpfloat.rs
@@ -141,6 +141,7 @@ libm_macros::for_each_function! {
         lgamma_r, lgammaf_r, modf, modff, nextafter, nextafterf, pow,powf,
         remquo, remquof, scalbn, scalbnf, sincos, sincosf, yn, ynf,
         copysignf16, copysignf128, fabsf16, fabsf128,
+        truncf16, truncf128,
     ],
     fn_extra: match MACRO_FN_NAME {
         // Remap function names that are different between mpfr and libm
@@ -202,11 +203,13 @@ impl_no_round! {
 #[cfg(f16_enabled)]
 impl_no_round! {
     fabsf16 => abs_mut;
+    truncf16 => trunc_mut;
 }
 
 #[cfg(f128_enabled)]
 impl_no_round! {
     fabsf128 => abs_mut;
+    truncf128 => trunc_mut;
 }
 
 /// Some functions are difficult to do in a generic way. Implement them here.
diff --git a/crates/libm-test/tests/compare_built_musl.rs b/crates/libm-test/tests/compare_built_musl.rs
index b91d7f9f5..a395c6c5d 100644
--- a/crates/libm-test/tests/compare_built_musl.rs
+++ b/crates/libm-test/tests/compare_built_musl.rs
@@ -48,7 +48,7 @@ where
 libm_macros::for_each_function! {
     callback: musl_rand_tests,
     // Musl does not support `f16` and `f128` on all platforms.
-    skip: [copysignf16, copysignf128, fabsf16, fabsf128],
+    skip: [copysignf16, copysignf128, fabsf16, fabsf128, truncf16, truncf128],
     attributes: [
         #[cfg_attr(x86_no_sse, ignore)] // FIXME(correctness): wrong result on i586
         [exp10, exp10f, exp2, exp2f, rint]
@@ -146,5 +146,7 @@ libm_macros::for_each_function! {
         // Not provided by musl
         fabsf16,
         fabsf128,
+        truncf16,
+        truncf128,
     ],
 }
diff --git a/crates/util/src/main.rs b/crates/util/src/main.rs
index f7bd31bb6..c8a03068a 100644
--- a/crates/util/src/main.rs
+++ b/crates/util/src/main.rs
@@ -84,7 +84,7 @@ fn do_eval(basis: &str, op: &str, inputs: &[&str]) {
         emit_types: [CFn, RustFn, RustArgs],
         extra: (basis, op, inputs),
         fn_extra: match MACRO_FN_NAME {
-            copysignf16 | copysignf128 | fabsf16 | fabsf128 => None,
+            copysignf16 | copysignf128 | fabsf16 | fabsf128 | truncf16 | truncf128  => None,
             _ => Some(musl_math_sys::MACRO_FN_NAME)
         }
     }
diff --git a/etc/function-definitions.json b/etc/function-definitions.json
index 39b6c9702..86fa02101 100644
--- a/etc/function-definitions.json
+++ b/etc/function-definitions.json
@@ -743,6 +743,7 @@
         "sources": [
             "src/libm_helper.rs",
             "src/math/arch/wasm32.rs",
+            "src/math/generic/trunc.rs",
             "src/math/trunc.rs"
         ],
         "type": "f64"
@@ -750,10 +751,25 @@
     "truncf": {
         "sources": [
             "src/math/arch/wasm32.rs",
+            "src/math/generic/trunc.rs",
             "src/math/truncf.rs"
         ],
         "type": "f32"
     },
+    "truncf128": {
+        "sources": [
+            "src/math/generic/trunc.rs",
+            "src/math/truncf128.rs"
+        ],
+        "type": "f128"
+    },
+    "truncf16": {
+        "sources": [
+            "src/math/generic/trunc.rs",
+            "src/math/truncf16.rs"
+        ],
+        "type": "f16"
+    },
     "y0": {
         "sources": [
             "src/libm_helper.rs",
diff --git a/etc/function-list.txt b/etc/function-list.txt
index 0a1bbab24..8aa901762 100644
--- a/etc/function-list.txt
+++ b/etc/function-list.txt
@@ -111,6 +111,8 @@ tgamma
 tgammaf
 trunc
 truncf
+truncf128
+truncf16
 y0
 y0f
 y1
diff --git a/src/math/generic/trunc.rs b/src/math/generic/trunc.rs
index 5d0ba6109..ca5f1bdd6 100644
--- a/src/math/generic/trunc.rs
+++ b/src/math/generic/trunc.rs
@@ -1,3 +1,6 @@
+/* SPDX-License-Identifier: MIT
+ * origin: musl src/math/trunc.c */
+
 use super::super::{Float, Int, IntTy, MinInt};
 
 pub fn trunc<F: Float>(x: F) -> F {
diff --git a/src/math/mod.rs b/src/math/mod.rs
index c0d038a0c..723be0e1d 100644
--- a/src/math/mod.rs
+++ b/src/math/mod.rs
@@ -343,9 +343,11 @@ cfg_if! {
     if #[cfg(f16_enabled)] {
         mod copysignf16;
         mod fabsf16;
+        mod truncf16;
 
         pub use self::copysignf16::copysignf16;
         pub use self::fabsf16::fabsf16;
+        pub use self::truncf16::truncf16;
     }
 }
 
@@ -353,9 +355,11 @@ cfg_if! {
     if #[cfg(f128_enabled)] {
         mod copysignf128;
         mod fabsf128;
+        mod truncf128;
 
         pub use self::copysignf128::copysignf128;
         pub use self::fabsf128::fabsf128;
+        pub use self::truncf128::truncf128;
     }
 }
 
diff --git a/src/math/truncf128.rs b/src/math/truncf128.rs
new file mode 100644
index 000000000..9dccc0d0e
--- /dev/null
+++ b/src/math/truncf128.rs
@@ -0,0 +1,7 @@
+/// Rounds the number toward 0 to the closest integral value (f128).
+///
+/// This effectively removes the decimal part of the number, leaving the integral part.
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn truncf128(x: f128) -> f128 {
+    super::generic::trunc(x)
+}
diff --git a/src/math/truncf16.rs b/src/math/truncf16.rs
new file mode 100644
index 000000000..d7c3d225c
--- /dev/null
+++ b/src/math/truncf16.rs
@@ -0,0 +1,7 @@
+/// Rounds the number toward 0 to the closest integral value (f16).
+///
+/// This effectively removes the decimal part of the number, leaving the integral part.
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn truncf16(x: f16) -> f16 {
+    super::generic::trunc(x)
+}

From 0072e74541b9c64cb5763f0f829f811675fcf407 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Mon, 13 Jan 2025 10:17:51 +0000
Subject: [PATCH 128/279] Disable `util` and `libm-macros` for optimized tests

These crates take time building in CI, especially with the release
profile having LTO enabled, but there isn't really any reason to test
them with different features or in release mode. Disable this to save
some CI runtime.
---
 ci/run.sh | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/ci/run.sh b/ci/run.sh
index 35b84809f..94ff54cb7 100755
--- a/ci/run.sh
+++ b/ci/run.sh
@@ -77,8 +77,14 @@ if [ "${BUILD_ONLY:-}" = "1" ]; then
 else
     cmd="cargo test --all --target $target $extra_flags"
 
-    # Test once without intrinsics, once with intrinsics enabled
+    # Test once without intrinsics
     $cmd
+
+    # Exclude the macros and utile crates from the rest of the tests to save CI
+    # runtime, they shouldn't have anything feature- or opt-level-dependent.
+    cmd="$cmd --exclude util --exclude libm-macros"
+
+    # Test once with intrinsics enabled
     $cmd --features unstable-intrinsics
     $cmd --features unstable-intrinsics --benches
     

From 02e78f87ca50d5a9c01dd224a3dce1d0fec25f6a Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Mon, 13 Jan 2025 10:29:56 +0000
Subject: [PATCH 129/279] Format the MPFR manual implementation list

---
 crates/libm-test/src/mpfloat.rs | 48 ++++++++++++++++++++++++++++-----
 1 file changed, 41 insertions(+), 7 deletions(-)

diff --git a/crates/libm-test/src/mpfloat.rs b/crates/libm-test/src/mpfloat.rs
index 2a740ed47..a4aad81f7 100644
--- a/crates/libm-test/src/mpfloat.rs
+++ b/crates/libm-test/src/mpfloat.rs
@@ -135,13 +135,47 @@ libm_macros::for_each_function! {
     emit_types: [RustFn],
     skip: [
         // Most of these need a manual implementation
-        fabs, ceil, copysign, floor, rint, round, trunc,
-        fabsf, ceilf, copysignf, floorf, rintf, roundf, truncf,
-        fmod, fmodf, frexp, frexpf, ilogb, ilogbf, jn, jnf, ldexp, ldexpf,
-        lgamma_r, lgammaf_r, modf, modff, nextafter, nextafterf, pow,powf,
-        remquo, remquof, scalbn, scalbnf, sincos, sincosf, yn, ynf,
-        copysignf16, copysignf128, fabsf16, fabsf128,
-        truncf16, truncf128,
+        ceil,
+        ceilf,
+        copysign,
+        copysignf,
+        copysignf128,
+        copysignf16,
+        fabs,
+        fabsf,
+        fabsf128,
+        fabsf16,floor,
+        floorf,
+        fmod,
+        fmodf,
+        frexp,
+        frexpf,
+        ilogb,
+        ilogbf,
+        jn,
+        jnf,
+        ldexp,ldexpf,
+        lgamma_r,
+        lgammaf_r,
+        modf,
+        modff,
+        nextafter,
+        nextafterf,
+        pow,
+        powf,remquo,
+        remquof,
+        rint,
+        rintf,
+        round,
+        roundf,
+        scalbn,
+        scalbnf,
+        sincos,sincosf,
+        trunc,
+        truncf,
+        truncf128,
+        truncf16,yn,
+        ynf,
     ],
     fn_extra: match MACRO_FN_NAME {
         // Remap function names that are different between mpfr and libm

From b97ef0b5ea6f7e76e4352d66b9cde285791211e4 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Mon, 13 Jan 2025 13:49:43 +0000
Subject: [PATCH 130/279] Add a generic version of `fdim`

---
 etc/function-definitions.json |  6 ++++--
 src/math/fdim.rs              | 12 +-----------
 src/math/fdimf.rs             | 12 +-----------
 src/math/generic/fdim.rs      | 13 +++++++++++++
 src/math/generic/mod.rs       |  2 ++
 5 files changed, 21 insertions(+), 24 deletions(-)
 create mode 100644 src/math/generic/fdim.rs

diff --git a/etc/function-definitions.json b/etc/function-definitions.json
index 86fa02101..1e6d260fe 100644
--- a/etc/function-definitions.json
+++ b/etc/function-definitions.json
@@ -289,13 +289,15 @@
     "fdim": {
         "sources": [
             "src/libm_helper.rs",
-            "src/math/fdim.rs"
+            "src/math/fdim.rs",
+            "src/math/generic/fdim.rs"
         ],
         "type": "f64"
     },
     "fdimf": {
         "sources": [
-            "src/math/fdimf.rs"
+            "src/math/fdimf.rs",
+            "src/math/generic/fdim.rs"
         ],
         "type": "f32"
     },
diff --git a/src/math/fdim.rs b/src/math/fdim.rs
index 7c58cb5a9..10ffa2881 100644
--- a/src/math/fdim.rs
+++ b/src/math/fdim.rs
@@ -1,5 +1,3 @@
-use core::f64;
-
 /// Positive difference (f64)
 ///
 /// Determines the positive difference between arguments, returning:
@@ -10,13 +8,5 @@ use core::f64;
 /// A range error may occur.
 #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
 pub fn fdim(x: f64, y: f64) -> f64 {
-    if x.is_nan() {
-        x
-    } else if y.is_nan() {
-        y
-    } else if x > y {
-        x - y
-    } else {
-        0.0
-    }
+    super::generic::fdim(x, y)
 }
diff --git a/src/math/fdimf.rs b/src/math/fdimf.rs
index 2abd49a64..367ef517c 100644
--- a/src/math/fdimf.rs
+++ b/src/math/fdimf.rs
@@ -1,5 +1,3 @@
-use core::f32;
-
 /// Positive difference (f32)
 ///
 /// Determines the positive difference between arguments, returning:
@@ -10,13 +8,5 @@ use core::f32;
 /// A range error may occur.
 #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
 pub fn fdimf(x: f32, y: f32) -> f32 {
-    if x.is_nan() {
-        x
-    } else if y.is_nan() {
-        y
-    } else if x > y {
-        x - y
-    } else {
-        0.0
-    }
+    super::generic::fdim(x, y)
 }
diff --git a/src/math/generic/fdim.rs b/src/math/generic/fdim.rs
new file mode 100644
index 000000000..2e54a41de
--- /dev/null
+++ b/src/math/generic/fdim.rs
@@ -0,0 +1,13 @@
+use super::super::Float;
+
+pub fn fdim<F: Float>(x: F, y: F) -> F {
+    if x.is_nan() {
+        x
+    } else if y.is_nan() {
+        y
+    } else if x > y {
+        x - y
+    } else {
+        F::ZERO
+    }
+}
diff --git a/src/math/generic/mod.rs b/src/math/generic/mod.rs
index e5166ca10..2b068d6c5 100644
--- a/src/math/generic/mod.rs
+++ b/src/math/generic/mod.rs
@@ -1,7 +1,9 @@
 mod copysign;
 mod fabs;
+mod fdim;
 mod trunc;
 
 pub use copysign::copysign;
 pub use fabs::fabs;
+pub use fdim::fdim;
 pub use trunc::trunc;

From 31f0e1f1bbe9efc97a42bf81a7866e3074ca7223 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Mon, 13 Jan 2025 13:58:40 +0000
Subject: [PATCH 131/279] Add `fdimf16` and `fdimf128`

Use the generic algorithms to provide implementations for these
routines.
---
 crates/compiler-builtins-smoke-test/src/lib.rs |  2 ++
 crates/libm-macros/src/shared.rs               |  4 ++--
 crates/libm-test/benches/random.rs             |  9 ++++++++-
 crates/libm-test/src/domain.rs                 | 10 ++++++++++
 crates/libm-test/src/mpfloat.rs                |  2 +-
 crates/libm-test/tests/compare_built_musl.rs   | 17 ++++++++++++++---
 crates/libm-test/tests/multiprecision.rs       |  2 ++
 crates/util/src/main.rs                        |  9 ++++++++-
 etc/function-definitions.json                  | 14 ++++++++++++++
 etc/function-list.txt                          |  2 ++
 src/libm_helper.rs                             |  2 ++
 src/math/fdimf128.rs                           | 12 ++++++++++++
 src/math/fdimf16.rs                            | 12 ++++++++++++
 src/math/mod.rs                                |  4 ++++
 14 files changed, 93 insertions(+), 8 deletions(-)
 create mode 100644 src/math/fdimf128.rs
 create mode 100644 src/math/fdimf16.rs

diff --git a/crates/compiler-builtins-smoke-test/src/lib.rs b/crates/compiler-builtins-smoke-test/src/lib.rs
index 3416a2229..b9521eb07 100644
--- a/crates/compiler-builtins-smoke-test/src/lib.rs
+++ b/crates/compiler-builtins-smoke-test/src/lib.rs
@@ -93,6 +93,8 @@ no_mangle! {
     fabsf16(x: f16) -> f16;
     fdim(x: f64, y: f64) -> f64;
     fdimf(x: f32, y: f32) -> f32;
+    fdimf128(x: f128, y: f128) -> f128;
+    fdimf16(x: f16, y: f16) -> f16;
     floor(x: f64) -> f64;
     floorf(x: f32) -> f32;
     fma(x: f64, y: f64, z: f64) -> f64;
diff --git a/crates/libm-macros/src/shared.rs b/crates/libm-macros/src/shared.rs
index 24fccd6f2..608381962 100644
--- a/crates/libm-macros/src/shared.rs
+++ b/crates/libm-macros/src/shared.rs
@@ -47,7 +47,7 @@ const ALL_OPERATIONS_NESTED: &[(FloatTy, Signature, Option<Signature>, &[&str])]
         FloatTy::F16,
         Signature { args: &[Ty::F16, Ty::F16], returns: &[Ty::F16] },
         None,
-        &["copysignf16"],
+        &["copysignf16", "fdimf16"],
     ),
     (
         // `(f32, f32) -> f32`
@@ -90,7 +90,7 @@ const ALL_OPERATIONS_NESTED: &[(FloatTy, Signature, Option<Signature>, &[&str])]
         FloatTy::F128,
         Signature { args: &[Ty::F128, Ty::F128], returns: &[Ty::F128] },
         None,
-        &["copysignf128"],
+        &["copysignf128", "fdimf128"],
     ),
     (
         // `(f32, f32, f32) -> f32`
diff --git a/crates/libm-test/benches/random.rs b/crates/libm-test/benches/random.rs
index 8c6afff25..e79002277 100644
--- a/crates/libm-test/benches/random.rs
+++ b/crates/libm-test/benches/random.rs
@@ -117,7 +117,14 @@ libm_macros::for_each_function! {
         exp10 | exp10f | exp2 | exp2f => (true, Some(musl_math_sys::MACRO_FN_NAME)),
 
         // Musl does not provide `f16` and `f128` functions
-        copysignf16 | copysignf128 | fabsf16 | fabsf128 | truncf16 | truncf128  => (false, None),
+        copysignf128
+        | copysignf16
+        | fabsf128
+        | fabsf16
+        | fdimf128
+        | fdimf16
+        | truncf128
+        | truncf16 => (false, None),
 
         // By default we never skip (false) and always have a musl function available
         _ => (false, Some(musl_math_sys::MACRO_FN_NAME))
diff --git a/crates/libm-test/src/domain.rs b/crates/libm-test/src/domain.rs
index adafb9faa..68b91bf02 100644
--- a/crates/libm-test/src/domain.rs
+++ b/crates/libm-test/src/domain.rs
@@ -200,6 +200,16 @@ impl HasDomain<f128> for crate::op::fabsf128::Routine {
     const DOMAIN: Domain<f128> = Domain::<f128>::UNBOUNDED;
 }
 
+#[cfg(f16_enabled)]
+impl HasDomain<f16> for crate::op::fdimf16::Routine {
+    const DOMAIN: Domain<f16> = Domain::<f16>::UNBOUNDED;
+}
+
+#[cfg(f128_enabled)]
+impl HasDomain<f128> for crate::op::fdimf128::Routine {
+    const DOMAIN: Domain<f128> = Domain::<f128>::UNBOUNDED;
+}
+
 #[cfg(f16_enabled)]
 impl HasDomain<f16> for crate::op::truncf16::Routine {
     const DOMAIN: Domain<f16> = Domain::<f16>::UNBOUNDED;
diff --git a/crates/libm-test/src/mpfloat.rs b/crates/libm-test/src/mpfloat.rs
index a4aad81f7..9d95356d3 100644
--- a/crates/libm-test/src/mpfloat.rs
+++ b/crates/libm-test/src/mpfloat.rs
@@ -181,7 +181,7 @@ libm_macros::for_each_function! {
         // Remap function names that are different between mpfr and libm
         expm1 | expm1f => exp_m1,
         fabs | fabsf => abs,
-        fdim | fdimf => positive_diff,
+        fdim | fdimf | fdimf16 | fdimf128  => positive_diff,
         fma | fmaf => mul_add,
         fmax | fmaxf => max,
         fmin | fminf => min,
diff --git a/crates/libm-test/tests/compare_built_musl.rs b/crates/libm-test/tests/compare_built_musl.rs
index a395c6c5d..836c425a5 100644
--- a/crates/libm-test/tests/compare_built_musl.rs
+++ b/crates/libm-test/tests/compare_built_musl.rs
@@ -48,7 +48,16 @@ where
 libm_macros::for_each_function! {
     callback: musl_rand_tests,
     // Musl does not support `f16` and `f128` on all platforms.
-    skip: [copysignf16, copysignf128, fabsf16, fabsf128, truncf16, truncf128],
+    skip: [
+        copysignf128,
+        copysignf16,
+        fabsf128,
+        fabsf16,
+        fdimf128,
+        fdimf16,
+        truncf128,
+        truncf16,
+    ],
     attributes: [
         #[cfg_attr(x86_no_sse, ignore)] // FIXME(correctness): wrong result on i586
         [exp10, exp10f, exp2, exp2f, rint]
@@ -144,9 +153,11 @@ libm_macros::for_each_function! {
         ynf,
 
         // Not provided by musl
-        fabsf16,
         fabsf128,
-        truncf16,
+        fabsf16,
+        fdimf128,
+        fdimf16,
         truncf128,
+        truncf16,
     ],
 }
diff --git a/crates/libm-test/tests/multiprecision.rs b/crates/libm-test/tests/multiprecision.rs
index 2d8856e16..123abfdaf 100644
--- a/crates/libm-test/tests/multiprecision.rs
+++ b/crates/libm-test/tests/multiprecision.rs
@@ -120,6 +120,8 @@ libm_macros::for_each_function! {
         copysignf128,
         fdim,
         fdimf,
+        fdimf16,
+        fdimf128,
         fma,
         fmaf,
         fmax,
diff --git a/crates/util/src/main.rs b/crates/util/src/main.rs
index c8a03068a..b979c60ad 100644
--- a/crates/util/src/main.rs
+++ b/crates/util/src/main.rs
@@ -84,7 +84,14 @@ fn do_eval(basis: &str, op: &str, inputs: &[&str]) {
         emit_types: [CFn, RustFn, RustArgs],
         extra: (basis, op, inputs),
         fn_extra: match MACRO_FN_NAME {
-            copysignf16 | copysignf128 | fabsf16 | fabsf128 | truncf16 | truncf128  => None,
+            copysignf128
+            | copysignf16
+            | fabsf128
+            | fabsf16
+            | fdimf128
+            | fdimf16
+            | truncf128
+            | truncf16  => None,
             _ => Some(musl_math_sys::MACRO_FN_NAME)
         }
     }
diff --git a/etc/function-definitions.json b/etc/function-definitions.json
index 1e6d260fe..dbaac931c 100644
--- a/etc/function-definitions.json
+++ b/etc/function-definitions.json
@@ -301,6 +301,20 @@
         ],
         "type": "f32"
     },
+    "fdimf128": {
+        "sources": [
+            "src/math/fdimf128.rs",
+            "src/math/generic/fdim.rs"
+        ],
+        "type": "f128"
+    },
+    "fdimf16": {
+        "sources": [
+            "src/math/fdimf16.rs",
+            "src/math/generic/fdim.rs"
+        ],
+        "type": "f16"
+    },
     "floor": {
         "sources": [
             "src/libm_helper.rs",
diff --git a/etc/function-list.txt b/etc/function-list.txt
index 8aa901762..7f96a4362 100644
--- a/etc/function-list.txt
+++ b/etc/function-list.txt
@@ -43,6 +43,8 @@ fabsf128
 fabsf16
 fdim
 fdimf
+fdimf128
+fdimf16
 floor
 floorf
 fma
diff --git a/src/libm_helper.rs b/src/libm_helper.rs
index f087267e4..73bae4567 100644
--- a/src/libm_helper.rs
+++ b/src/libm_helper.rs
@@ -176,6 +176,7 @@ libm_helper! {
     funcs: {
         (fn copysign(x: f16, y: f16) -> (f16);      => copysignf16);
         (fn fabs(x: f16) -> (f16);                  => fabsf16);
+        (fn fdim(x: f16, y: f16) -> (f16);          => fdimf16);
     }
 }
 
@@ -185,5 +186,6 @@ libm_helper! {
     funcs: {
         (fn copysign(x: f128, y: f128) -> (f128);   => copysignf128);
         (fn fabs(x: f128) -> (f128);                => fabsf128);
+        (fn fdim(x: f128, y: f128) -> (f128);       => fdimf128);
     }
 }
diff --git a/src/math/fdimf128.rs b/src/math/fdimf128.rs
new file mode 100644
index 000000000..6f3d1d0ff
--- /dev/null
+++ b/src/math/fdimf128.rs
@@ -0,0 +1,12 @@
+/// Positive difference (f128)
+///
+/// Determines the positive difference between arguments, returning:
+/// * x - y if x > y, or
+/// * +0    if x <= y, or
+/// * NAN   if either argument is NAN.
+///
+/// A range error may occur.
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn fdimf128(x: f128, y: f128) -> f128 {
+    super::generic::fdim(x, y)
+}
diff --git a/src/math/fdimf16.rs b/src/math/fdimf16.rs
new file mode 100644
index 000000000..37bd68858
--- /dev/null
+++ b/src/math/fdimf16.rs
@@ -0,0 +1,12 @@
+/// Positive difference (f16)
+///
+/// Determines the positive difference between arguments, returning:
+/// * x - y if x > y, or
+/// * +0    if x <= y, or
+/// * NAN   if either argument is NAN.
+///
+/// A range error may occur.
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn fdimf16(x: f16, y: f16) -> f16 {
+    super::generic::fdim(x, y)
+}
diff --git a/src/math/mod.rs b/src/math/mod.rs
index 723be0e1d..03adb6be1 100644
--- a/src/math/mod.rs
+++ b/src/math/mod.rs
@@ -343,10 +343,12 @@ cfg_if! {
     if #[cfg(f16_enabled)] {
         mod copysignf16;
         mod fabsf16;
+        mod fdimf16;
         mod truncf16;
 
         pub use self::copysignf16::copysignf16;
         pub use self::fabsf16::fabsf16;
+        pub use self::fdimf16::fdimf16;
         pub use self::truncf16::truncf16;
     }
 }
@@ -355,10 +357,12 @@ cfg_if! {
     if #[cfg(f128_enabled)] {
         mod copysignf128;
         mod fabsf128;
+        mod fdimf128;
         mod truncf128;
 
         pub use self::copysignf128::copysignf128;
         pub use self::fabsf128::fabsf128;
+        pub use self::fdimf128::fdimf128;
         pub use self::truncf128::truncf128;
     }
 }

From c95ec4c6f9f3d0f9ed3c001b75d397a5ae9ec093 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Mon, 13 Jan 2025 22:57:38 +0000
Subject: [PATCH 132/279] Don't set `codegen-units=1` by default in CI

We can set this only for the release profile, there isn't any reason to
have it set for debug tests.
---
 Cargo.toml | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/Cargo.toml b/Cargo.toml
index 820c01347..f84f3eac6 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -61,14 +61,15 @@ exclude = [
 [dev-dependencies]
 no-panic = "0.1.30"
 
-
-# This is needed for no-panic to correctly detect the lack of panics
 [profile.release]
+# Options for no-panic to correctly detect the lack of panics
+codegen-units = 1
 lto = "fat"
 
 # Release mode with debug assertions
 [profile.release-checked]
-inherits = "release"
+codegen-units = 1
 debug-assertions = true
+inherits = "release"
 lto = "fat"
 overflow-checks = true

From c9948639559a6050911fdac211507c15bcc3a16e Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Mon, 13 Jan 2025 23:00:39 +0000
Subject: [PATCH 133/279] Reduce indentation in `run.sh` using early return

---
 ci/run.sh | 47 ++++++++++++++++++++++++-----------------------
 1 file changed, 24 insertions(+), 23 deletions(-)

diff --git a/ci/run.sh b/ci/run.sh
index 94ff54cb7..008f32d5b 100755
--- a/ci/run.sh
+++ b/ci/run.sh
@@ -3,8 +3,6 @@
 set -eux
 
 export RUST_BACKTRACE="${RUST_BACKTRACE:-full}"
-# Needed for no-panic to correct detect a lack of panics
-export RUSTFLAGS="${RUSTFLAGS:-} -Ccodegen-units=1"
 
 target="${1:-}"
 
@@ -69,33 +67,36 @@ esac
 cargo check -p libm --no-default-features
 
 if [ "${BUILD_ONLY:-}" = "1" ]; then
+    # If we are on targets that can't run tests, verify that we can build.
     cmd="cargo build --target $target --package libm"
     $cmd
     $cmd --features unstable-intrinsics
 
     echo "can't run tests on $target; skipping"
-else
-    cmd="cargo test --all --target $target $extra_flags"
+    exit
+fi
 
-    # Test once without intrinsics
-    $cmd
+# Otherwise, run the test suite.
 
-    # Exclude the macros and utile crates from the rest of the tests to save CI
-    # runtime, they shouldn't have anything feature- or opt-level-dependent.
-    cmd="$cmd --exclude util --exclude libm-macros"
+cmd="cargo test --all --target $target $extra_flags"
 
-    # Test once with intrinsics enabled
-    $cmd --features unstable-intrinsics
-    $cmd --features unstable-intrinsics --benches
-    
-    # Test the same in release mode, which also increases coverage. Also ensure
-    # the soft float routines are checked.
-    $cmd --profile release-checked 
-    $cmd --profile release-checked --features force-soft-floats
-    $cmd --profile release-checked --features unstable-intrinsics
-    $cmd --profile release-checked --features unstable-intrinsics --benches
-
-    # Ensure that the routines do not panic.
-    ENSURE_NO_PANIC=1 cargo build -p libm --target "$target" --no-default-features --release
-fi
+# Test once without intrinsics
+$cmd
+
+# Exclude the macros and utile crates from the rest of the tests to save CI
+# runtime, they shouldn't have anything feature- or opt-level-dependent.
+cmd="$cmd --exclude util --exclude libm-macros"
+
+# Test once with intrinsics enabled
+$cmd --features unstable-intrinsics
+$cmd --features unstable-intrinsics --benches
+
+# Test the same in release mode, which also increases coverage. Also ensure
+# the soft float routines are checked.
+$cmd --profile release-checked 
+$cmd --profile release-checked --features force-soft-floats
+$cmd --profile release-checked --features unstable-intrinsics
+$cmd --profile release-checked --features unstable-intrinsics --benches
 
+# Ensure that the routines do not panic.
+ENSURE_NO_PANIC=1 cargo build -p libm --target "$target" --no-default-features --release

From 2b8045ab6d1ce15f9fbc0de63acadbe21f0381dd Mon Sep 17 00:00:00 2001
From: quaternic <57393910+quaternic@users.noreply.github.com>
Date: Tue, 14 Jan 2025 03:55:26 +0200
Subject: [PATCH 134/279] Simplify and optimize `fdim` (#442)

The cases with NaN arguments can be handled by the same x - y
expression, and this generates much better code: https://godbolt.org/z/f3rnT8jx4.
---
 src/math/generic/fdim.rs | 10 +---------
 1 file changed, 1 insertion(+), 9 deletions(-)

diff --git a/src/math/generic/fdim.rs b/src/math/generic/fdim.rs
index 2e54a41de..bf971cd7d 100644
--- a/src/math/generic/fdim.rs
+++ b/src/math/generic/fdim.rs
@@ -1,13 +1,5 @@
 use super::super::Float;
 
 pub fn fdim<F: Float>(x: F, y: F) -> F {
-    if x.is_nan() {
-        x
-    } else if y.is_nan() {
-        y
-    } else if x > y {
-        x - y
-    } else {
-        F::ZERO
-    }
+    if x <= y { F::ZERO } else { x - y }
 }

From abf0a2813e232c5a903f715a8ecbbfa6f74b6e94 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Mon, 13 Jan 2025 23:40:42 +0000
Subject: [PATCH 135/279] Use cargo-nextest for running tests in CI

The test suite for this repo has quite a lot of tests, and it is
difficult to tell which contribute the most to the long CI runtime.
libtest does have an unstable flag to report test times, but that is
inconvenient to use because it needs to be passed only to libtest
binaries.

Switch to cargo-nextest [1] which provides time reporting and, overall,
a better test UI. It may also improve test runtime, though this seems
unlikely since we have larger test binaries with many small tests
(nextest benefits the most when there are larger binaries that can be
run in parallel).

For anyone running locally without, `run.sh` should still fall back to
`cargo test` if `cargo-nextest` is not available.

This diff includes some cleanup and consistency changes to other
CI-related files.

[1]: https://nexte.st
---
 .github/workflows/main.yml    |  2 ++
 .github/workflows/publish.yml |  2 +-
 ci/download-musl.sh           |  2 +-
 ci/run-docker.sh              |  8 +++--
 ci/run.sh                     | 58 +++++++++++++++++++++++------------
 5 files changed, 48 insertions(+), 24 deletions(-)

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 35b307f77..30976d472 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -6,6 +6,7 @@ on:
   pull_request:
 
 env:
+  CARGO_TERM_COLOR: always
   CARGO_TERM_VERBOSE: true
   RUSTDOCFLAGS: -Dwarnings
   RUSTFLAGS: -Dwarnings
@@ -88,6 +89,7 @@ jobs:
         rustup default "$channel"
         rustup target add "${{ matrix.target }}"
         rustup component add clippy llvm-tools-preview
+    - uses: taiki-e/install-action@nextest
     - uses: Swatinem/rust-cache@v2
       with:
         key: ${{ matrix.target }}
diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml
index e715c6187..15904079d 100644
--- a/.github/workflows/publish.yml
+++ b/.github/workflows/publish.yml
@@ -12,7 +12,7 @@ on:
 jobs:
   release-plz:
     name: Release-plz
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-24.04
     steps:
       - name: Checkout repository
         uses: actions/checkout@v4
diff --git a/ci/download-musl.sh b/ci/download-musl.sh
index d0d8b310e..039e96157 100755
--- a/ci/download-musl.sh
+++ b/ci/download-musl.sh
@@ -7,7 +7,7 @@ fname=musl-1.2.5.tar.gz
 sha=a9a118bbe84d8764da0ea0d28b3ab3fae8477fc7e4085d90102b8596fc7c75e4
 
 mkdir musl
-curl "https://musl.libc.org/releases/$fname" -O
+curl -L "https://musl.libc.org/releases/$fname" -O
 
 case "$(uname -s)" in
     MINGW*)
diff --git a/ci/run-docker.sh b/ci/run-docker.sh
index d9f29656d..6626e7226 100755
--- a/ci/run-docker.sh
+++ b/ci/run-docker.sh
@@ -24,12 +24,14 @@ run() {
     # will be owned by root
     mkdir -p target
 
-    docker build -t "$target" "ci/docker/$target"
+    set_env="HOME=/tmp PATH=\$PATH:/rust/bin:/cargo/bin"
+    docker build -t "libm-$target" "ci/docker/$target"
     docker run \
         --rm \
         --user "$(id -u):$(id -g)" \
         -e CI \
         -e RUSTFLAGS \
+        -e CARGO_TERM_COLOR \
         -e CARGO_HOME=/cargo \
         -e CARGO_TARGET_DIR=/target \
         -e "EMULATED=$emulated" \
@@ -39,8 +41,8 @@ run() {
         -v "$(rustc --print sysroot):/rust:ro" \
         --init \
         -w /checkout \
-        "$target" \
-        sh -c "HOME=/tmp PATH=\$PATH:/rust/bin exec ci/run.sh $target"
+        "libm-$target" \
+        sh -c "$set_env exec ci/run.sh $target"
 }
 
 if [ -z "$1" ]; then
diff --git a/ci/run.sh b/ci/run.sh
index 008f32d5b..08ffaa81c 100755
--- a/ci/run.sh
+++ b/ci/run.sh
@@ -3,8 +3,10 @@
 set -eux
 
 export RUST_BACKTRACE="${RUST_BACKTRACE:-full}"
+export NEXTEST_STATUS_LEVEL=all
 
 target="${1:-}"
+flags=""
 
 if [ -z "$target" ]; then
     host_target=$(rustc -vV | awk '/^host/ { print $2 }')
@@ -13,22 +15,22 @@ if [ -z "$target" ]; then
 fi
 
 # We enumerate features manually.
-extra_flags="--no-default-features"
+flags="$flags --no-default-features"
 
 # Enable arch-specific routines when available.
-extra_flags="$extra_flags --features arch"
+flags="$flags --features arch"
 
 # Always enable `unstable-float` since it expands available API but does not
 # change any implementations.
-extra_flags="$extra_flags --features unstable-float"
+flags="$flags --features unstable-float"
 
 # We need to specifically skip tests for musl-math-sys on systems that can't
 # build musl since otherwise `--all` will activate it.
 case "$target" in
     # Can't build at all on MSVC, WASM, or thumb
-    *windows-msvc*) extra_flags="$extra_flags --exclude musl-math-sys" ;;
-    *wasm*) extra_flags="$extra_flags --exclude musl-math-sys" ;;
-    *thumb*) extra_flags="$extra_flags --exclude musl-math-sys" ;;
+    *windows-msvc*) flags="$flags --exclude musl-math-sys" ;;
+    *wasm*) flags="$flags --exclude musl-math-sys" ;;
+    *thumb*) flags="$flags --exclude musl-math-sys" ;;
 
     # We can build musl on MinGW but running tests gets a stack overflow
     *windows-gnu*) ;;
@@ -38,7 +40,7 @@ case "$target" in
     *powerpc64le*) ;;
 
     # Everything else gets musl enabled
-    *) extra_flags="$extra_flags --features libm-test/build-musl" ;;
+    *) flags="$flags --features libm-test/build-musl" ;;
 esac
 
 # Configure which targets test against MPFR
@@ -50,17 +52,17 @@ case "$target" in
     # Targets that aren't cross compiled work fine
     # FIXME(ci): we should be able to enable aarch64 Linux here once GHA
     # support rolls out.
-    x86_64*) extra_flags="$extra_flags --features libm-test/build-mpfr" ;;
-    i686*) extra_flags="$extra_flags --features libm-test/build-mpfr" ;;
-    i586*) extra_flags="$extra_flags --features libm-test/build-mpfr --features gmp-mpfr-sys/force-cross" ;;
+    x86_64*) flags="$flags --features libm-test/build-mpfr" ;;
+    i686*) flags="$flags --features libm-test/build-mpfr" ;;
+    i586*) flags="$flags --features libm-test/build-mpfr --features gmp-mpfr-sys/force-cross" ;;
     # Apple aarch64 is native
-    aarch64*apple*) extra_flags="$extra_flags --features libm-test/build-mpfr" ;;
+    aarch64*apple*) flags="$flags --features libm-test/build-mpfr" ;;
 esac
 
 # FIXME: `STATUS_DLL_NOT_FOUND` testing macros on CI.
 # <https://github.com/rust-lang/rust/issues/128944>
 case "$target" in
-    *windows-gnu) extra_flags="$extra_flags --exclude libm-macros" ;;
+    *windows-gnu) flags="$flags --exclude libm-macros" ;;
 esac
 
 # Make sure we can build with overriding features.
@@ -76,13 +78,31 @@ if [ "${BUILD_ONLY:-}" = "1" ]; then
     exit
 fi
 
-# Otherwise, run the test suite.
-
-cmd="cargo test --all --target $target $extra_flags"
+flags="$flags --all --target $target"
+cmd="cargo test $flags"
+profile="--profile"
+
+# If nextest is available, use that
+command -v cargo-nextest && nextest=1 || nextest=0
+if [ "$nextest" = "1" ]; then
+    # Workaround for https://github.com/nextest-rs/nextest/issues/2066
+    if [ -f /.dockerenv ]; then
+        cfg_file="/tmp/nextest-config.toml"
+        echo "[store]" >> "$cfg_file"
+        echo "dir = \"$CARGO_TARGET_DIR/nextest\"" >> "$cfg_file"
+        cfg_flag="--config-file $cfg_file"
+    fi
+    
+    cmd="cargo nextest run ${cfg_flag:-} $flags"
+    profile="--cargo-profile"
+fi
 
 # Test once without intrinsics
 $cmd
 
+# Run doctests if they were excluded by nextest
+[ "$nextest" = "1" ] && cargo test --doc $flags
+
 # Exclude the macros and utile crates from the rest of the tests to save CI
 # runtime, they shouldn't have anything feature- or opt-level-dependent.
 cmd="$cmd --exclude util --exclude libm-macros"
@@ -93,10 +113,10 @@ $cmd --features unstable-intrinsics --benches
 
 # Test the same in release mode, which also increases coverage. Also ensure
 # the soft float routines are checked.
-$cmd --profile release-checked 
-$cmd --profile release-checked --features force-soft-floats
-$cmd --profile release-checked --features unstable-intrinsics
-$cmd --profile release-checked --features unstable-intrinsics --benches
+$cmd "$profile" release-checked 
+$cmd "$profile" release-checked --features force-soft-floats
+$cmd "$profile" release-checked --features unstable-intrinsics
+$cmd "$profile" release-checked --features unstable-intrinsics --benches
 
 # Ensure that the routines do not panic.
 ENSURE_NO_PANIC=1 cargo build -p libm --target "$target" --no-default-features --release

From 88176ee60db8048463678d126b34bc50dc712f7e Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Tue, 14 Jan 2025 03:24:14 +0000
Subject: [PATCH 136/279] Change `.yml` files to the canonical extension
 `.yaml`

---
 .github/workflows/{main.yml => main.yaml}       | 0
 .github/workflows/{publish.yml => publish.yaml} | 0
 2 files changed, 0 insertions(+), 0 deletions(-)
 rename .github/workflows/{main.yml => main.yaml} (100%)
 rename .github/workflows/{publish.yml => publish.yaml} (100%)

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yaml
similarity index 100%
rename from .github/workflows/main.yml
rename to .github/workflows/main.yaml
diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yaml
similarity index 100%
rename from .github/workflows/publish.yml
rename to .github/workflows/publish.yaml

From c63ab7beb8f62d1eefd90491a1773e73dc356b9c Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Tue, 14 Jan 2025 07:46:20 +0000
Subject: [PATCH 137/279] Slightly restructure
 `ci/calculate-exhaustive-matrix.py`

Change this script into a generic CI utility that we will be able to
expand in the future.
---
 .github/workflows/main.yaml                   |  3 +-
 ...culate-exhaustive-matrix.py => ci-util.py} | 30 +++++++++++++++----
 2 files changed, 26 insertions(+), 7 deletions(-)
 rename ci/{calculate-exhaustive-matrix.py => ci-util.py} (87%)

diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml
index 30976d472..40b67c4c2 100644
--- a/.github/workflows/main.yaml
+++ b/.github/workflows/main.yaml
@@ -7,7 +7,6 @@ on:
 
 env:
   CARGO_TERM_COLOR: always
-  CARGO_TERM_VERBOSE: true
   RUSTDOCFLAGS: -Dwarnings
   RUSTFLAGS: -Dwarnings
   RUST_BACKTRACE: full
@@ -202,7 +201,7 @@ jobs:
       - name: Fetch pull request ref
         run: git fetch origin "$GITHUB_REF:$GITHUB_REF"
         if: github.event_name == 'pull_request'
-      - run: python3 ci/calculate-exhaustive-matrix.py >> "$GITHUB_OUTPUT"
+      - run: python3 ci/ci-util.py generate-matrix >> "$GITHUB_OUTPUT"
         id: script
 
   extensive:
diff --git a/ci/calculate-exhaustive-matrix.py b/ci/ci-util.py
similarity index 87%
rename from ci/calculate-exhaustive-matrix.py
rename to ci/ci-util.py
index 8b42f9389..733ec26fa 100755
--- a/ci/calculate-exhaustive-matrix.py
+++ b/ci/ci-util.py
@@ -1,18 +1,30 @@
 #!/usr/bin/env python3
-"""Calculate which exhaustive tests should be run as part of CI.
+"""Utilities for CI.
 
 This dynamically prepares a list of routines that had a source file change based on
 git history.
 """
 
+import json
 import subprocess as sp
 import sys
-import json
 from dataclasses import dataclass
+from inspect import cleandoc
 from os import getenv
 from pathlib import Path
 from typing import TypedDict
 
+USAGE = cleandoc(
+    """
+    usage:
+
+    ./ci/ci-util.py <SUBCOMMAND>
+
+    SUBCOMMAND:
+        generate-matrix    Calculate a matrix of which functions had source change,
+                           print that as JSON object.
+    """
+)
 
 REPO_ROOT = Path(__file__).parent.parent
 GIT = ["git", "-C", REPO_ROOT]
@@ -139,9 +151,17 @@ def eprint(*args, **kwargs):
 
 
 def main():
-    ctx = Context()
-    output = ctx.make_workflow_output()
-    print(f"matrix={output}")
+    match sys.argv[1:]:
+        case ["generate-matrix"]:
+            ctx = Context()
+            output = ctx.make_workflow_output()
+            print(f"matrix={output}")
+        case ["--help" | "-h"]:
+            print(USAGE)
+            exit()
+        case _:
+            eprint(USAGE)
+            exit(1)
 
 
 if __name__ == "__main__":

From f6434c8dcb4db54570e40c104f67c0136847d922 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Wed, 15 Jan 2025 00:54:55 +0000
Subject: [PATCH 138/279] Pass --max-fail to nextest so it doesn't fail fast

---
 ci/run.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ci/run.sh b/ci/run.sh
index 08ffaa81c..bb749b72a 100755
--- a/ci/run.sh
+++ b/ci/run.sh
@@ -93,7 +93,7 @@ if [ "$nextest" = "1" ]; then
         cfg_flag="--config-file $cfg_file"
     fi
     
-    cmd="cargo nextest run ${cfg_flag:-} $flags"
+    cmd="cargo nextest run ${cfg_flag:-} --max-fail=10 $flags"
     profile="--cargo-profile"
 fi
 

From 9965e9309211cf04fc029ceec545e26bbd59f16d Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Wed, 15 Jan 2025 01:05:38 +0000
Subject: [PATCH 139/279] Add an override for a recent failure
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Failed on i686:

    ──── STDERR:             libm-test::bench/random y1f/crate

    thread 'main' panicked at crates/libm-test/benches/random.rs:76:65:
    called `Result::unwrap()` on an `Err` value: ynf

    Caused by:
        0:
               input:    (213, 109.15641) (0x000000d5, 0x42da5015)
               expected: -3.3049217e38          0xff78a27a
               actual:   -inf                   0xff800000
        1: mismatched infinities
---
 crates/libm-test/src/precision.rs | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/crates/libm-test/src/precision.rs b/crates/libm-test/src/precision.rs
index 15913fe6d..0b3fe89be 100644
--- a/crates/libm-test/src/precision.rs
+++ b/crates/libm-test/src/precision.rs
@@ -454,6 +454,13 @@ impl MaybeOverride<(i32, f32)> for SpecialCase {
                 XFAIL
             }
 
+            // `ynf(213, 109.15641) = -inf` with our library, should be finite.
+            (_, BaseName::Yn)
+                if input.0 > 200 && !expected.is_infinite() && actual.is_infinite() =>
+            {
+                XFAIL
+            }
+
             _ => None,
         }
     }

From e3daee588367172c5f3d661b410a897d930c8c0b Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Tue, 7 Jan 2025 06:28:04 +0000
Subject: [PATCH 140/279] Replace `HasDomain` to enable multi-argument edge
 case and domain tests

This also allows reusing the same generator logic between logspace tests
and extensive tests, so comes with a nice bit of cleanup.

Changes:

* Make the generator part of `CheckCtx` since a `Generator` and
  `CheckCtx` are almost always passed together.
* Rename `domain_logspace` to `spaced` since this no longer only
  operates within a domain and we may want to handle integer spacing.
* Domain is now calculated at runtime rather than using traits, which is
  much easier to work with.
* With the above, domains for multidimensional functions are added.
* The extensive test generator code tests has been combined with the
  domain_logspace generator code. With this, the domain tests have just
  become a subset of extensive tests. These were renamed to "quickspace"
  since, technically, the extensive tests are also "domain" or "domain
  logspace" tests.
* Edge case generators now handle functions with multiple inputs.
* The test runners can be significantly cleaned up and deduplicated.
---
 crates/libm-test/benches/random.rs            |   4 +-
 crates/libm-test/examples/plot_domains.rs     |  28 +-
 crates/libm-test/src/domain.rs                | 303 ++++++++++--------
 crates/libm-test/src/gen.rs                   |   3 +-
 crates/libm-test/src/gen/domain_logspace.rs   |  31 --
 crates/libm-test/src/gen/edge_cases.rs        | 185 +++++++++--
 crates/libm-test/src/gen/random.rs            |  26 +-
 .../src/gen/{extensive.rs => spaced.rs}       |  93 +++---
 crates/libm-test/src/run_cfg.rs               |  60 +++-
 crates/libm-test/tests/compare_built_musl.rs  | 132 ++------
 crates/libm-test/tests/multiprecision.rs      | 142 ++------
 crates/libm-test/tests/z_extensive/run.rs     |  24 +-
 12 files changed, 525 insertions(+), 506 deletions(-)
 delete mode 100644 crates/libm-test/src/gen/domain_logspace.rs
 rename crates/libm-test/src/gen/{extensive.rs => spaced.rs} (76%)

diff --git a/crates/libm-test/benches/random.rs b/crates/libm-test/benches/random.rs
index e79002277..dcc7c1aca 100644
--- a/crates/libm-test/benches/random.rs
+++ b/crates/libm-test/benches/random.rs
@@ -4,7 +4,7 @@ use std::time::Duration;
 use criterion::{Criterion, criterion_main};
 use libm_test::gen::random;
 use libm_test::gen::random::RandomInput;
-use libm_test::{CheckBasis, CheckCtx, MathOp, TupleCall};
+use libm_test::{CheckBasis, CheckCtx, GeneratorKind, MathOp, TupleCall};
 
 /// Benchmark with this many items to get a variety
 const BENCH_ITER_ITEMS: usize = if cfg!(feature = "short-benchmarks") { 50 } else { 500 };
@@ -52,7 +52,7 @@ where
 {
     let name = Op::NAME;
 
-    let ctx = CheckCtx::new(Op::IDENTIFIER, CheckBasis::Musl);
+    let ctx = CheckCtx::new(Op::IDENTIFIER, CheckBasis::Musl, GeneratorKind::Random);
     let benchvec: Vec<_> =
         random::get_test_cases::<Op::RustArgs>(&ctx).take(BENCH_ITER_ITEMS).collect();
 
diff --git a/crates/libm-test/examples/plot_domains.rs b/crates/libm-test/examples/plot_domains.rs
index 626511245..fb7b854df 100644
--- a/crates/libm-test/examples/plot_domains.rs
+++ b/crates/libm-test/examples/plot_domains.rs
@@ -12,9 +12,9 @@ use std::path::Path;
 use std::process::Command;
 use std::{env, fs};
 
-use libm_test::domain::HasDomain;
-use libm_test::gen::{domain_logspace, edge_cases};
-use libm_test::{CheckBasis, CheckCtx, MathOp, op};
+use libm_test::gen::spaced::SpacedInput;
+use libm_test::gen::{edge_cases, spaced};
+use libm_test::{CheckBasis, CheckCtx, GeneratorKind, MathOp, op};
 
 const JL_PLOT: &str = "examples/plot_file.jl";
 
@@ -52,23 +52,13 @@ fn main() {
 /// Run multiple generators for a single operator.
 fn plot_one_operator<Op>(out_dir: &Path, config: &mut String)
 where
-    Op: MathOp<FTy = f32> + HasDomain<f32>,
+    Op: MathOp<FTy = f32, RustArgs = (f32,)>,
+    Op::RustArgs: SpacedInput<Op>,
 {
-    let ctx = CheckCtx::new(Op::IDENTIFIER, CheckBasis::Mpfr);
-    plot_one_generator(
-        out_dir,
-        &ctx,
-        "logspace",
-        config,
-        domain_logspace::get_test_cases::<Op>(&ctx),
-    );
-    plot_one_generator(
-        out_dir,
-        &ctx,
-        "edge_cases",
-        config,
-        edge_cases::get_test_cases::<Op, _>(&ctx),
-    );
+    let mut ctx = CheckCtx::new(Op::IDENTIFIER, CheckBasis::Mpfr, GeneratorKind::QuickSpaced);
+    plot_one_generator(out_dir, &ctx, "logspace", config, spaced::get_test_cases::<Op>(&ctx).0);
+    ctx.gen_kind = GeneratorKind::EdgeCases;
+    plot_one_generator(out_dir, &ctx, "edge_cases", config, edge_cases::get_test_cases::<Op>(&ctx));
 }
 
 /// Plot the output of a single generator.
diff --git a/crates/libm-test/src/domain.rs b/crates/libm-test/src/domain.rs
index 68b91bf02..5d650c00a 100644
--- a/crates/libm-test/src/domain.rs
+++ b/crates/libm-test/src/domain.rs
@@ -1,11 +1,13 @@
 //! Traits and operations related to bounds of a function.
 
 use std::fmt;
-use std::ops::{self, Bound};
+use std::ops::Bound;
 
-use crate::{Float, FloatExt};
+use libm::support::Int;
 
-/// Representation of a function's domain.
+use crate::{BaseName, Float, FloatExt, Identifier};
+
+/// Representation of a single dimension of a function's domain.
 #[derive(Clone, Debug)]
 pub struct Domain<T> {
     /// Start of the region for which a function is defined (ignoring poles).
@@ -39,56 +41,131 @@ impl<F: FloatExt> Domain<F> {
     }
 }
 
+/// A value that may be any float type or any integer type.
+#[derive(Clone, Debug)]
+pub enum EitherPrim<F, I> {
+    Float(F),
+    Int(I),
+}
+
+impl<F: fmt::Debug, I: fmt::Debug> EitherPrim<F, I> {
+    pub fn unwrap_float(self) -> F {
+        match self {
+            EitherPrim::Float(f) => f,
+            EitherPrim::Int(_) => panic!("expected float; got {self:?}"),
+        }
+    }
+
+    pub fn unwrap_int(self) -> I {
+        match self {
+            EitherPrim::Float(_) => panic!("expected int; got {self:?}"),
+            EitherPrim::Int(i) => i,
+        }
+    }
+}
+
+/// Convenience 1-dimensional float domains.
 impl<F: Float> Domain<F> {
     /// x ∈ ℝ
-    pub const UNBOUNDED: Self =
+    const UNBOUNDED: Self =
         Self { start: Bound::Unbounded, end: Bound::Unbounded, check_points: None };
 
     /// x ∈ ℝ >= 0
-    pub const POSITIVE: Self =
+    const POSITIVE: Self =
         Self { start: Bound::Included(F::ZERO), end: Bound::Unbounded, check_points: None };
 
     /// x ∈ ℝ > 0
-    pub const STRICTLY_POSITIVE: Self =
+    const STRICTLY_POSITIVE: Self =
         Self { start: Bound::Excluded(F::ZERO), end: Bound::Unbounded, check_points: None };
 
+    /// Wrap in the float variant of [`EitherPrim`].
+    const fn into_prim_float<I>(self) -> EitherPrim<Self, Domain<I>> {
+        EitherPrim::Float(self)
+    }
+}
+
+/// Convenience 1-dimensional integer domains.
+impl<I: Int> Domain<I> {
+    /// x ∈ ℝ
+    const UNBOUNDED_INT: Self =
+        Self { start: Bound::Unbounded, end: Bound::Unbounded, check_points: None };
+
+    /// Wrap in the int variant of [`EitherPrim`].
+    const fn into_prim_int<F>(self) -> EitherPrim<Domain<F>, Self> {
+        EitherPrim::Int(self)
+    }
+}
+
+/// Multidimensional domains, represented as an array of 1-D domains.
+impl<F: Float, I: Int> EitherPrim<Domain<F>, Domain<I>> {
+    /// x ∈ ℝ
+    const UNBOUNDED1: [Self; 1] =
+        [Domain { start: Bound::Unbounded, end: Bound::Unbounded, check_points: None }
+            .into_prim_float()];
+
+    /// {x1, x2} ∈ ℝ
+    const UNBOUNDED2: [Self; 2] =
+        [Domain::UNBOUNDED.into_prim_float(), Domain::UNBOUNDED.into_prim_float()];
+
+    /// {x1, x2, x3} ∈ ℝ
+    const UNBOUNDED3: [Self; 3] = [
+        Domain::UNBOUNDED.into_prim_float(),
+        Domain::UNBOUNDED.into_prim_float(),
+        Domain::UNBOUNDED.into_prim_float(),
+    ];
+
+    /// {x1, x2} ∈ ℝ, one float and one int
+    const UNBOUNDED_F_I: [Self; 2] =
+        [Domain::UNBOUNDED.into_prim_float(), Domain::UNBOUNDED_INT.into_prim_int()];
+
+    /// x ∈ ℝ >= 0
+    const POSITIVE: [Self; 1] = [Domain::POSITIVE.into_prim_float()];
+
+    /// x ∈ ℝ > 0
+    const STRICTLY_POSITIVE: [Self; 1] = [Domain::STRICTLY_POSITIVE.into_prim_float()];
+
     /// Used for versions of `asin` and `acos`.
-    pub const INVERSE_TRIG_PERIODIC: Self = Self {
+    const INVERSE_TRIG_PERIODIC: [Self; 1] = [Domain {
         start: Bound::Included(F::NEG_ONE),
         end: Bound::Included(F::ONE),
         check_points: None,
-    };
+    }
+    .into_prim_float()];
 
     /// Domain for `acosh`
-    pub const ACOSH: Self =
-        Self { start: Bound::Included(F::ONE), end: Bound::Unbounded, check_points: None };
+    const ACOSH: [Self; 1] =
+        [Domain { start: Bound::Included(F::ONE), end: Bound::Unbounded, check_points: None }
+            .into_prim_float()];
 
     /// Domain for `atanh`
-    pub const ATANH: Self = Self {
+    const ATANH: [Self; 1] = [Domain {
         start: Bound::Excluded(F::NEG_ONE),
         end: Bound::Excluded(F::ONE),
         check_points: None,
-    };
+    }
+    .into_prim_float()];
 
     /// Domain for `sin`, `cos`, and `tan`
-    pub const TRIG: Self = Self {
-        // TODO
+    const TRIG: [Self; 1] = [Domain {
+        // Trig functions have special behavior at fractions of π.
         check_points: Some(|| Box::new([-F::PI, -F::FRAC_PI_2, F::FRAC_PI_2, F::PI].into_iter())),
-        ..Self::UNBOUNDED
-    };
+        ..Domain::UNBOUNDED
+    }
+    .into_prim_float()];
 
     /// Domain for `log` in various bases
-    pub const LOG: Self = Self::STRICTLY_POSITIVE;
+    const LOG: [Self; 1] = Self::STRICTLY_POSITIVE;
 
     /// Domain for `log1p` i.e. `log(1 + x)`
-    pub const LOG1P: Self =
-        Self { start: Bound::Excluded(F::NEG_ONE), end: Bound::Unbounded, check_points: None };
+    const LOG1P: [Self; 1] =
+        [Domain { start: Bound::Excluded(F::NEG_ONE), end: Bound::Unbounded, check_points: None }
+            .into_prim_float()];
 
     /// Domain for `sqrt`
-    pub const SQRT: Self = Self::POSITIVE;
+    const SQRT: [Self; 1] = Self::POSITIVE;
 
     /// Domain for `gamma`
-    pub const GAMMA: Self = Self {
+    const GAMMA: [Self; 1] = [Domain {
         check_points: Some(|| {
             // Negative integers are asymptotes
             Box::new((0..u8::MAX).map(|scale| {
@@ -100,122 +177,84 @@ impl<F: Float> Domain<F> {
             }))
         }),
         // Whether or not gamma is defined for negative numbers is implementation dependent
-        ..Self::UNBOUNDED
-    };
+        ..Domain::UNBOUNDED
+    }
+    .into_prim_float()];
 
     /// Domain for `loggamma`
-    pub const LGAMMA: Self = Self::STRICTLY_POSITIVE;
-}
+    const LGAMMA: [Self; 1] = Self::STRICTLY_POSITIVE;
 
-/// Implement on `op::*` types to indicate how they are bounded.
-pub trait HasDomain<T>
-where
-    T: Copy + fmt::Debug + ops::Add<Output = T> + ops::Sub<Output = T> + PartialOrd + 'static,
-{
-    const DOMAIN: Domain<T>;
+    /// Domain for `jn` and `yn`.
+    // FIXME: the domain should provide some sort of "reasonable range" so we don't actually test
+    // the entire system unbounded.
+    const BESSEL_N: [Self; 2] =
+        [Domain::UNBOUNDED_INT.into_prim_int(), Domain::UNBOUNDED.into_prim_float()];
 }
 
-/// Implement [`HasDomain`] for both the `f32` and `f64` variants of a function.
-macro_rules! impl_has_domain {
-    ($($fn_name:ident => $domain:expr;)*) => {
-        paste::paste! {
-            $(
-                // Implement for f64 functions
-                impl HasDomain<f64> for $crate::op::$fn_name::Routine {
-                    const DOMAIN: Domain<f64> = Domain::<f64>::$domain;
-                }
-
-                // Implement for f32 functions
-                impl HasDomain<f32> for $crate::op::[< $fn_name f >]::Routine {
-                    const DOMAIN: Domain<f32> = Domain::<f32>::$domain;
-                }
-            )*
-        }
+/// Get the domain for a given function.
+pub fn get_domain<F: Float, I: Int>(
+    id: Identifier,
+    argnum: usize,
+) -> EitherPrim<Domain<F>, Domain<I>> {
+    let x = match id.base_name() {
+        BaseName::Acos => &EitherPrim::INVERSE_TRIG_PERIODIC[..],
+        BaseName::Acosh => &EitherPrim::ACOSH[..],
+        BaseName::Asin => &EitherPrim::INVERSE_TRIG_PERIODIC[..],
+        BaseName::Asinh => &EitherPrim::UNBOUNDED1[..],
+        BaseName::Atan => &EitherPrim::UNBOUNDED1[..],
+        BaseName::Atan2 => &EitherPrim::UNBOUNDED2[..],
+        BaseName::Cbrt => &EitherPrim::UNBOUNDED1[..],
+        BaseName::Atanh => &EitherPrim::ATANH[..],
+        BaseName::Ceil => &EitherPrim::UNBOUNDED1[..],
+        BaseName::Cosh => &EitherPrim::UNBOUNDED1[..],
+        BaseName::Copysign => &EitherPrim::UNBOUNDED2[..],
+        BaseName::Cos => &EitherPrim::TRIG[..],
+        BaseName::Exp => &EitherPrim::UNBOUNDED1[..],
+        BaseName::Erf => &EitherPrim::UNBOUNDED1[..],
+        BaseName::Erfc => &EitherPrim::UNBOUNDED1[..],
+        BaseName::Expm1 => &EitherPrim::UNBOUNDED1[..],
+        BaseName::Exp10 => &EitherPrim::UNBOUNDED1[..],
+        BaseName::Exp2 => &EitherPrim::UNBOUNDED1[..],
+        BaseName::Frexp => &EitherPrim::UNBOUNDED1[..],
+        BaseName::Fabs => &EitherPrim::UNBOUNDED1[..],
+        BaseName::Fdim => &EitherPrim::UNBOUNDED2[..],
+        BaseName::Floor => &EitherPrim::UNBOUNDED1[..],
+        BaseName::Fma => &EitherPrim::UNBOUNDED3[..],
+        BaseName::Fmax => &EitherPrim::UNBOUNDED2[..],
+        BaseName::Fmin => &EitherPrim::UNBOUNDED2[..],
+        BaseName::Fmod => &EitherPrim::UNBOUNDED2[..],
+        BaseName::Hypot => &EitherPrim::UNBOUNDED2[..],
+        BaseName::Ilogb => &EitherPrim::UNBOUNDED1[..],
+        BaseName::J0 => &EitherPrim::UNBOUNDED1[..],
+        BaseName::J1 => &EitherPrim::UNBOUNDED1[..],
+        BaseName::Jn => &EitherPrim::BESSEL_N[..],
+        BaseName::Ldexp => &EitherPrim::UNBOUNDED_F_I[..],
+        BaseName::Lgamma => &EitherPrim::LGAMMA[..],
+        BaseName::LgammaR => &EitherPrim::LGAMMA[..],
+        BaseName::Log => &EitherPrim::LOG[..],
+        BaseName::Log10 => &EitherPrim::LOG[..],
+        BaseName::Log1p => &EitherPrim::LOG1P[..],
+        BaseName::Log2 => &EitherPrim::LOG[..],
+        BaseName::Modf => &EitherPrim::UNBOUNDED1[..],
+        BaseName::Nextafter => &EitherPrim::UNBOUNDED2[..],
+        BaseName::Pow => &EitherPrim::UNBOUNDED2[..],
+        BaseName::Remainder => &EitherPrim::UNBOUNDED2[..],
+        BaseName::Remquo => &EitherPrim::UNBOUNDED2[..],
+        BaseName::Rint => &EitherPrim::UNBOUNDED1[..],
+        BaseName::Round => &EitherPrim::UNBOUNDED1[..],
+        BaseName::Scalbn => &EitherPrim::UNBOUNDED_F_I[..],
+        BaseName::Sin => &EitherPrim::TRIG[..],
+        BaseName::Sincos => &EitherPrim::TRIG[..],
+        BaseName::Sinh => &EitherPrim::UNBOUNDED1[..],
+        BaseName::Sqrt => &EitherPrim::SQRT[..],
+        BaseName::Tan => &EitherPrim::TRIG[..],
+        BaseName::Tanh => &EitherPrim::UNBOUNDED1[..],
+        BaseName::Tgamma => &EitherPrim::GAMMA[..],
+        BaseName::Trunc => &EitherPrim::UNBOUNDED1[..],
+        BaseName::Y0 => &EitherPrim::UNBOUNDED1[..],
+        BaseName::Y1 => &EitherPrim::UNBOUNDED1[..],
+        BaseName::Yn => &EitherPrim::BESSEL_N[..],
     };
-}
-
-// Tie functions together with their domains.
-impl_has_domain! {
-    acos => INVERSE_TRIG_PERIODIC;
-    acosh => ACOSH;
-    asin => INVERSE_TRIG_PERIODIC;
-    asinh => UNBOUNDED;
-    atan => UNBOUNDED;
-    atanh => ATANH;
-    cbrt => UNBOUNDED;
-    ceil => UNBOUNDED;
-    cos => TRIG;
-    cosh => UNBOUNDED;
-    erf => UNBOUNDED;
-    erfc => UNBOUNDED;
-    exp => UNBOUNDED;
-    exp10 => UNBOUNDED;
-    exp2 => UNBOUNDED;
-    expm1 => UNBOUNDED;
-    fabs => UNBOUNDED;
-    floor => UNBOUNDED;
-    frexp => UNBOUNDED;
-    ilogb => UNBOUNDED;
-    j0 => UNBOUNDED;
-    j1 => UNBOUNDED;
-    lgamma => LGAMMA;
-    log => LOG;
-    log10 => LOG;
-    log1p => LOG1P;
-    log2 => LOG;
-    modf => UNBOUNDED;
-    rint => UNBOUNDED;
-    round => UNBOUNDED;
-    sin => TRIG;
-    sincos => TRIG;
-    sinh => UNBOUNDED;
-    sqrt => SQRT;
-    tan => TRIG;
-    tanh => UNBOUNDED;
-    tgamma => GAMMA;
-    trunc => UNBOUNDED;
-    y0 => UNBOUNDED;
-    y1 => UNBOUNDED;
-}
-
-/* Manual implementations, these functions don't follow `foo`->`foof` naming */
-
-impl HasDomain<f32> for crate::op::lgammaf_r::Routine {
-    const DOMAIN: Domain<f32> = Domain::<f32>::LGAMMA;
-}
-
-impl HasDomain<f64> for crate::op::lgamma_r::Routine {
-    const DOMAIN: Domain<f64> = Domain::<f64>::LGAMMA;
-}
-
-/* Not all `f16` and `f128` functions exist yet so we can't easily use the macros. */
-
-#[cfg(f16_enabled)]
-impl HasDomain<f16> for crate::op::fabsf16::Routine {
-    const DOMAIN: Domain<f16> = Domain::<f16>::UNBOUNDED;
-}
-
-#[cfg(f128_enabled)]
-impl HasDomain<f128> for crate::op::fabsf128::Routine {
-    const DOMAIN: Domain<f128> = Domain::<f128>::UNBOUNDED;
-}
-
-#[cfg(f16_enabled)]
-impl HasDomain<f16> for crate::op::fdimf16::Routine {
-    const DOMAIN: Domain<f16> = Domain::<f16>::UNBOUNDED;
-}
-
-#[cfg(f128_enabled)]
-impl HasDomain<f128> for crate::op::fdimf128::Routine {
-    const DOMAIN: Domain<f128> = Domain::<f128>::UNBOUNDED;
-}
-
-#[cfg(f16_enabled)]
-impl HasDomain<f16> for crate::op::truncf16::Routine {
-    const DOMAIN: Domain<f16> = Domain::<f16>::UNBOUNDED;
-}
 
-#[cfg(f128_enabled)]
-impl HasDomain<f128> for crate::op::truncf128::Routine {
-    const DOMAIN: Domain<f128> = Domain::<f128>::UNBOUNDED;
+    x[argnum].clone()
 }
diff --git a/crates/libm-test/src/gen.rs b/crates/libm-test/src/gen.rs
index e2bfcdf34..e0a7f5766 100644
--- a/crates/libm-test/src/gen.rs
+++ b/crates/libm-test/src/gen.rs
@@ -1,9 +1,8 @@
 //! Different generators that can create random or systematic bit patterns.
 
-pub mod domain_logspace;
 pub mod edge_cases;
-pub mod extensive;
 pub mod random;
+pub mod spaced;
 
 /// A wrapper to turn any iterator into an `ExactSizeIterator`. Asserts the final result to ensure
 /// the provided size was correct.
diff --git a/crates/libm-test/src/gen/domain_logspace.rs b/crates/libm-test/src/gen/domain_logspace.rs
deleted file mode 100644
index c6963ad43..000000000
--- a/crates/libm-test/src/gen/domain_logspace.rs
+++ /dev/null
@@ -1,31 +0,0 @@
-//! A generator that produces logarithmically spaced values within domain bounds.
-
-use std::ops::RangeInclusive;
-
-use libm::support::{IntTy, MinInt};
-
-use crate::domain::HasDomain;
-use crate::op::OpITy;
-use crate::run_cfg::{GeneratorKind, iteration_count};
-use crate::{CheckCtx, MathOp, logspace};
-
-/// Create a range of logarithmically spaced inputs within a function's domain.
-///
-/// This allows us to get reasonably thorough coverage without wasting time on values that are
-/// NaN or out of range. Random tests will still cover values that are excluded here.
-pub fn get_test_cases<Op>(ctx: &CheckCtx) -> impl Iterator<Item = (Op::FTy,)>
-where
-    Op: MathOp + HasDomain<Op::FTy>,
-    IntTy<Op::FTy>: TryFrom<u64>,
-    RangeInclusive<IntTy<Op::FTy>>: Iterator,
-{
-    let domain = Op::DOMAIN;
-    let ntests = iteration_count(ctx, GeneratorKind::Domain, 0);
-
-    // We generate logspaced inputs within a specific range, excluding values that are out of
-    // range in order to make iterations useful (random tests still cover the full range).
-    let start = domain.range_start();
-    let end = domain.range_end();
-    let steps = OpITy::<Op>::try_from(ntests).unwrap_or(OpITy::<Op>::MAX);
-    logspace(start, end, steps).0.map(|v| (v,))
-}
diff --git a/crates/libm-test/src/gen/edge_cases.rs b/crates/libm-test/src/gen/edge_cases.rs
index 1f27c1467..d4014bdb3 100644
--- a/crates/libm-test/src/gen/edge_cases.rs
+++ b/crates/libm-test/src/gen/edge_cases.rs
@@ -1,20 +1,28 @@
 //! A generator that checks a handful of cases near infinities, zeros, asymptotes, and NaNs.
 
-use libm::support::Float;
+use libm::support::{Float, Int};
 
-use crate::domain::HasDomain;
+use crate::domain::get_domain;
+use crate::gen::KnownSize;
 use crate::run_cfg::{check_near_count, check_point_count};
-use crate::{CheckCtx, FloatExt, MathOp};
+use crate::{CheckCtx, FloatExt, MathOp, test_log};
+
+/// Generate a sequence of edge cases, e.g. numbers near zeroes and infiniteis.
+pub trait EdgeCaseInput<Op> {
+    fn get_cases(ctx: &CheckCtx) -> impl ExactSizeIterator<Item = Self> + Send;
+}
 
 /// Create a list of values around interesting points (infinities, zeroes, NaNs).
-pub fn get_test_cases<Op, F>(ctx: &CheckCtx) -> impl Iterator<Item = (F,)>
+fn float_edge_cases<Op>(
+    ctx: &CheckCtx,
+    argnum: usize,
+) -> (impl Iterator<Item = Op::FTy> + Clone, u64)
 where
-    Op: MathOp<FTy = F> + HasDomain<F>,
-    F: Float,
+    Op: MathOp,
 {
     let mut ret = Vec::new();
     let values = &mut ret;
-    let domain = Op::DOMAIN;
+    let domain = get_domain::<_, i8>(ctx.fn_ident, argnum).unwrap_float();
     let domain_start = domain.range_start();
     let domain_end = domain.range_end();
 
@@ -22,17 +30,17 @@ where
     let near_points = check_near_count(ctx);
 
     // Check near some notable constants
-    count_up(F::ONE, near_points, values);
-    count_up(F::ZERO, near_points, values);
-    count_up(F::NEG_ONE, near_points, values);
-    count_down(F::ONE, near_points, values);
-    count_down(F::ZERO, near_points, values);
-    count_down(F::NEG_ONE, near_points, values);
-    values.push(F::NEG_ZERO);
+    count_up(Op::FTy::ONE, near_points, values);
+    count_up(Op::FTy::ZERO, near_points, values);
+    count_up(Op::FTy::NEG_ONE, near_points, values);
+    count_down(Op::FTy::ONE, near_points, values);
+    count_down(Op::FTy::ZERO, near_points, values);
+    count_down(Op::FTy::NEG_ONE, near_points, values);
+    values.push(Op::FTy::NEG_ZERO);
 
     // Check values near the extremes
-    count_up(F::NEG_INFINITY, near_points, values);
-    count_down(F::INFINITY, near_points, values);
+    count_up(Op::FTy::NEG_INFINITY, near_points, values);
+    count_down(Op::FTy::INFINITY, near_points, values);
     count_down(domain_end, near_points, values);
     count_up(domain_start, near_points, values);
     count_down(domain_start, near_points, values);
@@ -40,8 +48,8 @@ where
     count_down(domain_end, near_points, values);
 
     // Check some special values that aren't included in the above ranges
-    values.push(F::NAN);
-    values.extend(F::consts().iter());
+    values.push(Op::FTy::NAN);
+    values.extend(Op::FTy::consts().iter());
 
     // Check around asymptotes
     if let Some(f) = domain.check_points {
@@ -56,7 +64,18 @@ where
     values.sort_by_key(|x| x.to_bits());
     values.dedup_by_key(|x| x.to_bits());
 
-    ret.into_iter().map(|v| (v,))
+    let count = ret.len().try_into().unwrap();
+
+    test_log(&format!(
+        "{gen_kind:?} {basis:?} {fn_ident} arg {arg}/{args}: {count} edge cases",
+        gen_kind = ctx.gen_kind,
+        basis = ctx.basis,
+        fn_ident = ctx.fn_ident,
+        arg = argnum + 1,
+        args = ctx.input_count(),
+    ));
+
+    (ret.into_iter(), count)
 }
 
 /// Add `AROUND` values starting at and including `x` and counting up. Uses the smallest possible
@@ -84,3 +103,131 @@ fn count_down<F: Float>(mut x: F, points: u64, values: &mut Vec<F>) {
         count += 1;
     }
 }
+
+/// Create a list of values around interesting integer points (min, zero, max).
+pub fn int_edge_cases<I: Int>(
+    ctx: &CheckCtx,
+    _argnum: usize,
+) -> (impl Iterator<Item = I> + Clone, u64) {
+    let mut values = Vec::new();
+    let near_points = check_near_count(ctx);
+
+    for up_from in [I::MIN, I::ZERO] {
+        let mut x = up_from;
+        for _ in 0..near_points {
+            values.push(x);
+            x += I::ONE;
+        }
+    }
+
+    for down_from in [I::ZERO, I::MAX] {
+        let mut x = down_from;
+        for _ in 0..near_points {
+            values.push(x);
+            x -= I::ONE;
+        }
+    }
+
+    values.sort();
+    values.dedup();
+    let len = values.len().try_into().unwrap();
+    (values.into_iter(), len)
+}
+
+macro_rules! impl_edge_case_input {
+    ($fty:ty) => {
+        impl<Op> EdgeCaseInput<Op> for ($fty,)
+        where
+            Op: MathOp<RustArgs = Self, FTy = $fty>,
+        {
+            fn get_cases(ctx: &CheckCtx) -> impl ExactSizeIterator<Item = Self> {
+                let (iter0, steps0) = float_edge_cases::<Op>(ctx, 0);
+                let iter0 = iter0.map(|v| (v,));
+                KnownSize::new(iter0, steps0)
+            }
+        }
+
+        impl<Op> EdgeCaseInput<Op> for ($fty, $fty)
+        where
+            Op: MathOp<RustArgs = Self, FTy = $fty>,
+        {
+            fn get_cases(ctx: &CheckCtx) -> impl ExactSizeIterator<Item = Self> {
+                let (iter0, steps0) = float_edge_cases::<Op>(ctx, 0);
+                let (iter1, steps1) = float_edge_cases::<Op>(ctx, 1);
+                let iter =
+                    iter0.flat_map(move |first| iter1.clone().map(move |second| (first, second)));
+                let count = steps0.checked_mul(steps1).unwrap();
+                KnownSize::new(iter, count)
+            }
+        }
+
+        impl<Op> EdgeCaseInput<Op> for ($fty, $fty, $fty)
+        where
+            Op: MathOp<RustArgs = Self, FTy = $fty>,
+        {
+            fn get_cases(ctx: &CheckCtx) -> impl ExactSizeIterator<Item = Self> {
+                let (iter0, steps0) = float_edge_cases::<Op>(ctx, 0);
+                let (iter1, steps1) = float_edge_cases::<Op>(ctx, 1);
+                let (iter2, steps2) = float_edge_cases::<Op>(ctx, 2);
+
+                let iter = iter0
+                    .flat_map(move |first| iter1.clone().map(move |second| (first, second)))
+                    .flat_map(move |(first, second)| {
+                        iter2.clone().map(move |third| (first, second, third))
+                    });
+                let count = steps0.checked_mul(steps1).unwrap().checked_mul(steps2).unwrap();
+
+                KnownSize::new(iter, count)
+            }
+        }
+
+        impl<Op> EdgeCaseInput<Op> for (i32, $fty)
+        where
+            Op: MathOp<RustArgs = Self, FTy = $fty>,
+        {
+            fn get_cases(ctx: &CheckCtx) -> impl ExactSizeIterator<Item = Self> {
+                let (iter0, steps0) = int_edge_cases(ctx, 0);
+                let (iter1, steps1) = float_edge_cases::<Op>(ctx, 1);
+
+                let iter =
+                    iter0.flat_map(move |first| iter1.clone().map(move |second| (first, second)));
+                let count = steps0.checked_mul(steps1).unwrap();
+
+                KnownSize::new(iter, count)
+            }
+        }
+
+        impl<Op> EdgeCaseInput<Op> for ($fty, i32)
+        where
+            Op: MathOp<RustArgs = Self, FTy = $fty>,
+        {
+            fn get_cases(ctx: &CheckCtx) -> impl ExactSizeIterator<Item = Self> {
+                let (iter0, steps0) = float_edge_cases::<Op>(ctx, 0);
+                let (iter1, steps1) = int_edge_cases(ctx, 1);
+
+                let iter =
+                    iter0.flat_map(move |first| iter1.clone().map(move |second| (first, second)));
+                let count = steps0.checked_mul(steps1).unwrap();
+
+                KnownSize::new(iter, count)
+            }
+        }
+    };
+}
+
+#[cfg(f16_enabled)]
+impl_edge_case_input!(f16);
+impl_edge_case_input!(f32);
+impl_edge_case_input!(f64);
+#[cfg(f128_enabled)]
+impl_edge_case_input!(f128);
+
+pub fn get_test_cases<Op>(
+    ctx: &CheckCtx,
+) -> impl ExactSizeIterator<Item = Op::RustArgs> + use<'_, Op>
+where
+    Op: MathOp,
+    Op::RustArgs: EdgeCaseInput<Op>,
+{
+    Op::RustArgs::get_cases(ctx)
+}
diff --git a/crates/libm-test/src/gen/random.rs b/crates/libm-test/src/gen/random.rs
index 6b08e560d..56c39981a 100644
--- a/crates/libm-test/src/gen/random.rs
+++ b/crates/libm-test/src/gen/random.rs
@@ -9,8 +9,8 @@ use rand::{Rng, SeedableRng};
 use rand_chacha::ChaCha8Rng;
 
 use super::KnownSize;
+use crate::CheckCtx;
 use crate::run_cfg::{int_range, iteration_count};
-use crate::{CheckCtx, GeneratorKind};
 
 pub(crate) const SEED_ENV: &str = "LIBM_SEED";
 
@@ -52,7 +52,7 @@ macro_rules! impl_random_input {
     ($fty:ty) => {
         impl RandomInput for ($fty,) {
             fn get_cases(ctx: &CheckCtx) -> impl ExactSizeIterator<Item = Self> {
-                let count = iteration_count(ctx, GeneratorKind::Random, 0);
+                let count = iteration_count(ctx, 0);
                 let iter = random_floats(count).map(|f: $fty| (f,));
                 KnownSize::new(iter, count)
             }
@@ -60,8 +60,8 @@ macro_rules! impl_random_input {
 
         impl RandomInput for ($fty, $fty) {
             fn get_cases(ctx: &CheckCtx) -> impl ExactSizeIterator<Item = Self> {
-                let count0 = iteration_count(ctx, GeneratorKind::Random, 0);
-                let count1 = iteration_count(ctx, GeneratorKind::Random, 1);
+                let count0 = iteration_count(ctx, 0);
+                let count1 = iteration_count(ctx, 1);
                 let iter = random_floats(count0)
                     .flat_map(move |f1: $fty| random_floats(count1).map(move |f2: $fty| (f1, f2)));
                 KnownSize::new(iter, count0 * count1)
@@ -70,9 +70,9 @@ macro_rules! impl_random_input {
 
         impl RandomInput for ($fty, $fty, $fty) {
             fn get_cases(ctx: &CheckCtx) -> impl ExactSizeIterator<Item = Self> {
-                let count0 = iteration_count(ctx, GeneratorKind::Random, 0);
-                let count1 = iteration_count(ctx, GeneratorKind::Random, 1);
-                let count2 = iteration_count(ctx, GeneratorKind::Random, 2);
+                let count0 = iteration_count(ctx, 0);
+                let count1 = iteration_count(ctx, 1);
+                let count2 = iteration_count(ctx, 2);
                 let iter = random_floats(count0).flat_map(move |f1: $fty| {
                     random_floats(count1).flat_map(move |f2: $fty| {
                         random_floats(count2).map(move |f3: $fty| (f1, f2, f3))
@@ -84,9 +84,9 @@ macro_rules! impl_random_input {
 
         impl RandomInput for (i32, $fty) {
             fn get_cases(ctx: &CheckCtx) -> impl ExactSizeIterator<Item = Self> {
-                let count0 = iteration_count(ctx, GeneratorKind::Random, 0);
-                let count1 = iteration_count(ctx, GeneratorKind::Random, 1);
-                let range0 = int_range(ctx, GeneratorKind::Random, 0);
+                let count0 = iteration_count(ctx, 0);
+                let count1 = iteration_count(ctx, 1);
+                let range0 = int_range(ctx, 0);
                 let iter = random_ints(count0, range0)
                     .flat_map(move |f1: i32| random_floats(count1).map(move |f2: $fty| (f1, f2)));
                 KnownSize::new(iter, count0 * count1)
@@ -95,9 +95,9 @@ macro_rules! impl_random_input {
 
         impl RandomInput for ($fty, i32) {
             fn get_cases(ctx: &CheckCtx) -> impl ExactSizeIterator<Item = Self> {
-                let count0 = iteration_count(ctx, GeneratorKind::Random, 0);
-                let count1 = iteration_count(ctx, GeneratorKind::Random, 1);
-                let range1 = int_range(ctx, GeneratorKind::Random, 1);
+                let count0 = iteration_count(ctx, 0);
+                let count1 = iteration_count(ctx, 1);
+                let range1 = int_range(ctx, 1);
                 let iter = random_floats(count0).flat_map(move |f1: $fty| {
                     random_ints(count1, range1.clone()).map(move |f2: i32| (f1, f2))
                 });
diff --git a/crates/libm-test/src/gen/extensive.rs b/crates/libm-test/src/gen/spaced.rs
similarity index 76%
rename from crates/libm-test/src/gen/extensive.rs
rename to crates/libm-test/src/gen/spaced.rs
index fb709e546..bea3f4c7e 100644
--- a/crates/libm-test/src/gen/extensive.rs
+++ b/crates/libm-test/src/gen/spaced.rs
@@ -3,23 +3,23 @@ use std::ops::RangeInclusive;
 
 use libm::support::{Float, MinInt};
 
-use crate::domain::HasDomain;
+use crate::domain::get_domain;
 use crate::op::OpITy;
 use crate::run_cfg::{int_range, iteration_count};
-use crate::{CheckCtx, GeneratorKind, MathOp, linear_ints, logspace};
+use crate::{CheckCtx, MathOp, linear_ints, logspace};
 
-/// Generate a sequence of inputs that either cover the domain in completeness (for smaller float
+/// Generate a sequence of inputs that eiher cover the domain in completeness (for smaller float
 /// types and single argument functions) or provide evenly spaced inputs across the domain with
 /// approximately `u32::MAX` total iterations.
-pub trait ExtensiveInput<Op> {
+pub trait SpacedInput<Op> {
     fn get_cases(ctx: &CheckCtx) -> (impl Iterator<Item = Self> + Send, u64);
 }
 
 /// Construct an iterator from `logspace` and also calculate the total number of steps expected
 /// for that iterator.
 fn logspace_steps<Op>(
-    start: Op::FTy,
-    end: Op::FTy,
+    ctx: &CheckCtx,
+    argnum: usize,
     max_steps: u64,
 ) -> (impl Iterator<Item = Op::FTy> + Clone, u64)
 where
@@ -28,6 +28,11 @@ where
     u64: TryFrom<OpITy<Op>, Error: fmt::Debug>,
     RangeInclusive<OpITy<Op>>: Iterator,
 {
+    // i8 is a dummy type here, it can be any integer.
+    let domain = get_domain::<Op::FTy, i8>(ctx.fn_ident, argnum).unwrap_float();
+    let start = domain.range_start();
+    let end = domain.range_end();
+
     let max_steps = OpITy::<Op>::try_from(max_steps).unwrap_or(OpITy::<Op>::MAX);
     let (iter, steps) = logspace(start, end, max_steps);
 
@@ -76,15 +81,14 @@ where
     (F::Int::MIN..=F::Int::MAX).map(|bits| F::from_bits(bits))
 }
 
-macro_rules! impl_extensive_input {
+macro_rules! impl_spaced_input {
     ($fty:ty) => {
-        impl<Op> ExtensiveInput<Op> for ($fty,)
+        impl<Op> SpacedInput<Op> for ($fty,)
         where
             Op: MathOp<RustArgs = Self, FTy = $fty>,
-            Op: HasDomain<Op::FTy>,
         {
             fn get_cases(ctx: &CheckCtx) -> (impl Iterator<Item = Self>, u64) {
-                let max_steps0 = iteration_count(ctx, GeneratorKind::Extensive, 0);
+                let max_steps0 = iteration_count(ctx, 0);
                 // `f16` and `f32` can have exhaustive tests.
                 match value_count::<Op::FTy>() {
                     Some(steps0) if steps0 <= max_steps0 => {
@@ -93,9 +97,7 @@ macro_rules! impl_extensive_input {
                         (EitherIter::A(iter0), steps0)
                     }
                     _ => {
-                        let start = Op::DOMAIN.range_start();
-                        let end = Op::DOMAIN.range_end();
-                        let (iter0, steps0) = logspace_steps::<Op>(start, end, max_steps0);
+                        let (iter0, steps0) = logspace_steps::<Op>(ctx, 0, max_steps0);
                         let iter0 = iter0.map(|v| (v,));
                         (EitherIter::B(iter0), steps0)
                     }
@@ -103,13 +105,13 @@ macro_rules! impl_extensive_input {
             }
         }
 
-        impl<Op> ExtensiveInput<Op> for ($fty, $fty)
+        impl<Op> SpacedInput<Op> for ($fty, $fty)
         where
             Op: MathOp<RustArgs = Self, FTy = $fty>,
         {
             fn get_cases(ctx: &CheckCtx) -> (impl Iterator<Item = Self>, u64) {
-                let max_steps0 = iteration_count(ctx, GeneratorKind::Extensive, 0);
-                let max_steps1 = iteration_count(ctx, GeneratorKind::Extensive, 1);
+                let max_steps0 = iteration_count(ctx, 0);
+                let max_steps1 = iteration_count(ctx, 1);
                 // `f16` can have exhaustive tests.
                 match value_count::<Op::FTy>() {
                     Some(count) if count <= max_steps0 && count <= max_steps1 => {
@@ -118,10 +120,8 @@ macro_rules! impl_extensive_input {
                         (EitherIter::A(iter), count.checked_mul(count).unwrap())
                     }
                     _ => {
-                        let start = <$fty>::NEG_INFINITY;
-                        let end = <$fty>::INFINITY;
-                        let (iter0, steps0) = logspace_steps::<Op>(start, end, max_steps0);
-                        let (iter1, steps1) = logspace_steps::<Op>(start, end, max_steps1);
+                        let (iter0, steps0) = logspace_steps::<Op>(ctx, 0, max_steps0);
+                        let (iter1, steps1) = logspace_steps::<Op>(ctx, 1, max_steps1);
                         let iter = iter0.flat_map(move |first| {
                             iter1.clone().map(move |second| (first, second))
                         });
@@ -132,14 +132,14 @@ macro_rules! impl_extensive_input {
             }
         }
 
-        impl<Op> ExtensiveInput<Op> for ($fty, $fty, $fty)
+        impl<Op> SpacedInput<Op> for ($fty, $fty, $fty)
         where
             Op: MathOp<RustArgs = Self, FTy = $fty>,
         {
             fn get_cases(ctx: &CheckCtx) -> (impl Iterator<Item = Self>, u64) {
-                let max_steps0 = iteration_count(ctx, GeneratorKind::Extensive, 0);
-                let max_steps1 = iteration_count(ctx, GeneratorKind::Extensive, 1);
-                let max_steps2 = iteration_count(ctx, GeneratorKind::Extensive, 2);
+                let max_steps0 = iteration_count(ctx, 0);
+                let max_steps1 = iteration_count(ctx, 1);
+                let max_steps2 = iteration_count(ctx, 2);
                 // `f16` can be exhaustive tested if `LIBM_EXTENSIVE_TESTS` is incresed.
                 match value_count::<Op::FTy>() {
                     Some(count)
@@ -153,12 +153,9 @@ macro_rules! impl_extensive_input {
                         (EitherIter::A(iter), count.checked_pow(3).unwrap())
                     }
                     _ => {
-                        let start = <$fty>::NEG_INFINITY;
-                        let end = <$fty>::INFINITY;
-
-                        let (iter0, steps0) = logspace_steps::<Op>(start, end, max_steps0);
-                        let (iter1, steps1) = logspace_steps::<Op>(start, end, max_steps1);
-                        let (iter2, steps2) = logspace_steps::<Op>(start, end, max_steps2);
+                        let (iter0, steps0) = logspace_steps::<Op>(ctx, 0, max_steps0);
+                        let (iter1, steps1) = logspace_steps::<Op>(ctx, 1, max_steps1);
+                        let (iter2, steps2) = logspace_steps::<Op>(ctx, 2, max_steps2);
 
                         let iter = iter0
                             .flat_map(move |first| iter1.clone().map(move |second| (first, second)))
@@ -174,14 +171,14 @@ macro_rules! impl_extensive_input {
             }
         }
 
-        impl<Op> ExtensiveInput<Op> for (i32, $fty)
+        impl<Op> SpacedInput<Op> for (i32, $fty)
         where
             Op: MathOp<RustArgs = Self, FTy = $fty>,
         {
             fn get_cases(ctx: &CheckCtx) -> (impl Iterator<Item = Self>, u64) {
-                let range0 = int_range(ctx, GeneratorKind::Extensive, 0);
-                let max_steps0 = iteration_count(ctx, GeneratorKind::Extensive, 0);
-                let max_steps1 = iteration_count(ctx, GeneratorKind::Extensive, 1);
+                let range0 = int_range(ctx, 0);
+                let max_steps0 = iteration_count(ctx, 0);
+                let max_steps1 = iteration_count(ctx, 1);
                 match value_count::<Op::FTy>() {
                     Some(count1) if count1 <= max_steps1 => {
                         let (iter0, steps0) = linear_ints(range0, max_steps0);
@@ -190,11 +187,8 @@ macro_rules! impl_extensive_input {
                         (EitherIter::A(iter), steps0.checked_mul(count1).unwrap())
                     }
                     _ => {
-                        let start = <$fty>::NEG_INFINITY;
-                        let end = <$fty>::INFINITY;
-
                         let (iter0, steps0) = linear_ints(range0, max_steps0);
-                        let (iter1, steps1) = logspace_steps::<Op>(start, end, max_steps1);
+                        let (iter1, steps1) = logspace_steps::<Op>(ctx, 1, max_steps1);
 
                         let iter = iter0.flat_map(move |first| {
                             iter1.clone().map(move |second| (first, second))
@@ -207,14 +201,14 @@ macro_rules! impl_extensive_input {
             }
         }
 
-        impl<Op> ExtensiveInput<Op> for ($fty, i32)
+        impl<Op> SpacedInput<Op> for ($fty, i32)
         where
             Op: MathOp<RustArgs = Self, FTy = $fty>,
         {
             fn get_cases(ctx: &CheckCtx) -> (impl Iterator<Item = Self>, u64) {
-                let max_steps0 = iteration_count(ctx, GeneratorKind::Extensive, 0);
-                let range1 = int_range(ctx, GeneratorKind::Extensive, 1);
-                let max_steps1 = iteration_count(ctx, GeneratorKind::Extensive, 1);
+                let max_steps0 = iteration_count(ctx, 0);
+                let range1 = int_range(ctx, 1);
+                let max_steps1 = iteration_count(ctx, 1);
                 match value_count::<Op::FTy>() {
                     Some(count0) if count0 <= max_steps0 => {
                         let (iter1, steps1) = linear_ints(range1, max_steps1);
@@ -224,10 +218,7 @@ macro_rules! impl_extensive_input {
                         (EitherIter::A(iter), count0.checked_mul(steps1).unwrap())
                     }
                     _ => {
-                        let start = <$fty>::NEG_INFINITY;
-                        let end = <$fty>::INFINITY;
-
-                        let (iter0, steps0) = logspace_steps::<Op>(start, end, max_steps0);
+                        let (iter0, steps0) = logspace_steps::<Op>(ctx, 0, max_steps0);
                         let (iter1, steps1) = linear_ints(range1, max_steps1);
 
                         let iter = iter0.flat_map(move |first| {
@@ -244,11 +235,11 @@ macro_rules! impl_extensive_input {
 }
 
 #[cfg(f16_enabled)]
-impl_extensive_input!(f16);
-impl_extensive_input!(f32);
-impl_extensive_input!(f64);
+impl_spaced_input!(f16);
+impl_spaced_input!(f32);
+impl_spaced_input!(f64);
 #[cfg(f128_enabled)]
-impl_extensive_input!(f128);
+impl_spaced_input!(f128);
 
 /// Create a test case iterator for extensive inputs. Also returns the total test case count.
 pub fn get_test_cases<Op>(
@@ -256,7 +247,7 @@ pub fn get_test_cases<Op>(
 ) -> (impl Iterator<Item = Op::RustArgs> + Send + use<'_, Op>, u64)
 where
     Op: MathOp,
-    Op::RustArgs: ExtensiveInput<Op>,
+    Op::RustArgs: SpacedInput<Op>,
 {
     Op::RustArgs::get_cases(ctx)
 }
diff --git a/crates/libm-test/src/run_cfg.rs b/crates/libm-test/src/run_cfg.rs
index 4a52091fe..6763de8bc 100644
--- a/crates/libm-test/src/run_cfg.rs
+++ b/crates/libm-test/src/run_cfg.rs
@@ -39,11 +39,12 @@ pub struct CheckCtx {
     pub base_name_str: &'static str,
     /// Source of truth for tests.
     pub basis: CheckBasis,
+    pub gen_kind: GeneratorKind,
 }
 
 impl CheckCtx {
     /// Create a new check context, using the default ULP for the function.
-    pub fn new(fn_ident: Identifier, basis: CheckBasis) -> Self {
+    pub fn new(fn_ident: Identifier, basis: CheckBasis, gen_kind: GeneratorKind) -> Self {
         let mut ret = Self {
             ulp: 0,
             fn_ident,
@@ -51,10 +52,16 @@ impl CheckCtx {
             base_name: fn_ident.base_name(),
             base_name_str: fn_ident.base_name().as_str(),
             basis,
+            gen_kind,
         };
         ret.ulp = crate::default_ulp(&ret);
         ret
     }
+
+    /// The number of input arguments for this function.
+    pub fn input_count(&self) -> usize {
+        self.fn_ident.math_op().rust_sig.args.len()
+    }
 }
 
 /// Possible items to test against
@@ -66,11 +73,13 @@ pub enum CheckBasis {
     Mpfr,
 }
 
-/// The different kinds of generators that provide test input.
+/// The different kinds of generators that provide test input, which account for input pattern
+/// and quantity.
 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
 pub enum GeneratorKind {
-    Domain,
+    EdgeCases,
     Extensive,
+    QuickSpaced,
     Random,
 }
 
@@ -155,7 +164,7 @@ impl TestEnv {
 }
 
 /// The number of iterations to run for a given test.
-pub fn iteration_count(ctx: &CheckCtx, gen_kind: GeneratorKind, argnum: usize) -> u64 {
+pub fn iteration_count(ctx: &CheckCtx, argnum: usize) -> u64 {
     let t_env = TestEnv::from_env(ctx);
 
     // Ideally run 5M tests
@@ -185,10 +194,13 @@ pub fn iteration_count(ctx: &CheckCtx, gen_kind: GeneratorKind, argnum: usize) -
     // Run fewer random tests than domain tests.
     let random_iter_count = domain_iter_count / 100;
 
-    let mut total_iterations = match gen_kind {
-        GeneratorKind::Domain => domain_iter_count,
+    let mut total_iterations = match ctx.gen_kind {
+        GeneratorKind::QuickSpaced => domain_iter_count,
         GeneratorKind::Random => random_iter_count,
         GeneratorKind::Extensive => *EXTENSIVE_MAX_ITERATIONS,
+        GeneratorKind::EdgeCases => {
+            unimplemented!("edge case tests shoudn't need `iteration_count`")
+        }
     };
 
     // FMA has a huge domain but is reasonably fast to run, so increase iterations.
@@ -213,16 +225,18 @@ pub fn iteration_count(ctx: &CheckCtx, gen_kind: GeneratorKind, argnum: usize) -
     };
     let total = ntests.pow(t_env.input_count.try_into().unwrap());
 
-    let seed_msg = match gen_kind {
-        GeneratorKind::Domain | GeneratorKind::Extensive => String::new(),
+    let seed_msg = match ctx.gen_kind {
+        GeneratorKind::QuickSpaced | GeneratorKind::Extensive => String::new(),
         GeneratorKind::Random => {
             format!(" using `{SEED_ENV}={}`", str::from_utf8(SEED.as_slice()).unwrap())
         }
+        GeneratorKind::EdgeCases => unreachable!(),
     };
 
     test_log(&format!(
         "{gen_kind:?} {basis:?} {fn_ident} arg {arg}/{args}: {ntests} iterations \
          ({total} total){seed_msg}",
+        gen_kind = ctx.gen_kind,
         basis = ctx.basis,
         fn_ident = ctx.fn_ident,
         arg = argnum + 1,
@@ -233,7 +247,7 @@ pub fn iteration_count(ctx: &CheckCtx, gen_kind: GeneratorKind, argnum: usize) -
 }
 
 /// Some tests require that an integer be kept within reasonable limits; generate that here.
-pub fn int_range(ctx: &CheckCtx, gen_kind: GeneratorKind, argnum: usize) -> RangeInclusive<i32> {
+pub fn int_range(ctx: &CheckCtx, argnum: usize) -> RangeInclusive<i32> {
     let t_env = TestEnv::from_env(ctx);
 
     if !matches!(ctx.base_name, BaseName::Jn | BaseName::Yn) {
@@ -252,22 +266,42 @@ pub fn int_range(ctx: &CheckCtx, gen_kind: GeneratorKind, argnum: usize) -> Rang
 
     let extensive_range = (-0xfff)..=0xfffff;
 
-    match gen_kind {
+    match ctx.gen_kind {
         GeneratorKind::Extensive => extensive_range,
-        GeneratorKind::Domain | GeneratorKind::Random => non_extensive_range,
+        GeneratorKind::QuickSpaced | GeneratorKind::Random => non_extensive_range,
+        GeneratorKind::EdgeCases => extensive_range,
     }
 }
 
 /// For domain tests, limit how many asymptotes or specified check points we test.
 pub fn check_point_count(ctx: &CheckCtx) -> usize {
+    assert_eq!(
+        ctx.gen_kind,
+        GeneratorKind::EdgeCases,
+        "check_point_count is intended for edge case tests"
+    );
     let t_env = TestEnv::from_env(ctx);
     if t_env.slow_platform || !cfg!(optimizations_enabled) { 4 } else { 10 }
 }
 
 /// When validating points of interest (e.g. asymptotes, inflection points, extremes), also check
 /// this many surrounding values.
-pub fn check_near_count(_ctx: &CheckCtx) -> u64 {
-    if cfg!(optimizations_enabled) { 100 } else { 10 }
+pub fn check_near_count(ctx: &CheckCtx) -> u64 {
+    assert_eq!(
+        ctx.gen_kind,
+        GeneratorKind::EdgeCases,
+        "check_near_count is intended for edge case tests"
+    );
+    if cfg!(optimizations_enabled) {
+        // Taper based on the number of inputs.
+        match ctx.input_count() {
+            1 | 2 => 100,
+            3 => 50,
+            x => panic!("unexpected argument count {x}"),
+        }
+    } else {
+        10
+    }
 }
 
 /// Check whether extensive actions should be run or skipped.
diff --git a/crates/libm-test/tests/compare_built_musl.rs b/crates/libm-test/tests/compare_built_musl.rs
index 836c425a5..f540a0b15 100644
--- a/crates/libm-test/tests/compare_built_musl.rs
+++ b/crates/libm-test/tests/compare_built_musl.rs
@@ -9,150 +9,78 @@
 // There are some targets we can't build musl for
 #![cfg(feature = "build-musl")]
 
-use libm_test::domain::HasDomain;
-use libm_test::gen::random::RandomInput;
-use libm_test::gen::{domain_logspace, edge_cases, random};
-use libm_test::{CheckBasis, CheckCtx, CheckOutput, MathOp, TupleCall};
+use libm_test::gen::{edge_cases, random, spaced};
+use libm_test::{CheckBasis, CheckCtx, CheckOutput, GeneratorKind, MathOp, TupleCall};
 
-macro_rules! musl_rand_tests {
-    (
-        fn_name: $fn_name:ident,
-        attrs: [$($attr:meta),*],
-    ) => {
-        paste::paste! {
-            #[test]
-            $(#[$attr])*
-            fn [< musl_random_ $fn_name >]() {
-                test_one_random::<libm_test::op::$fn_name::Routine>(musl_math_sys::$fn_name);
-            }
-        }
-    };
-}
-
-fn test_one_random<Op>(musl_fn: Op::CFn)
-where
-    Op: MathOp,
-    Op::RustArgs: RandomInput,
-{
-    let ctx = CheckCtx::new(Op::IDENTIFIER, CheckBasis::Musl);
-    let cases = random::get_test_cases::<Op::RustArgs>(&ctx);
+const BASIS: CheckBasis = CheckBasis::Musl;
 
+fn musl_runner<Op: MathOp>(
+    ctx: &CheckCtx,
+    cases: impl Iterator<Item = Op::RustArgs>,
+    musl_fn: Op::CFn,
+) {
     for input in cases {
         let musl_res = input.call(musl_fn);
         let crate_res = input.call(Op::ROUTINE);
 
-        crate_res.validate(musl_res, input, &ctx).unwrap();
+        crate_res.validate(musl_res, input, ctx).unwrap();
     }
 }
 
-libm_macros::for_each_function! {
-    callback: musl_rand_tests,
-    // Musl does not support `f16` and `f128` on all platforms.
-    skip: [
-        copysignf128,
-        copysignf16,
-        fabsf128,
-        fabsf16,
-        fdimf128,
-        fdimf16,
-        truncf128,
-        truncf16,
-    ],
-    attributes: [
-        #[cfg_attr(x86_no_sse, ignore)] // FIXME(correctness): wrong result on i586
-        [exp10, exp10f, exp2, exp2f, rint]
-    ],
-}
-
 /// Test against musl with generators from a domain.
-macro_rules! musl_domain_tests {
+macro_rules! musl_tests {
     (
         fn_name: $fn_name:ident,
         attrs: [$($attr:meta),*],
     ) => {
         paste::paste! {
+            #[test]
+            $(#[$attr])*
+            fn [< musl_random_ $fn_name >]() {
+                type Op = libm_test::op::$fn_name::Routine;
+                let ctx = CheckCtx::new(Op::IDENTIFIER, BASIS, GeneratorKind::Random);
+                let cases = random::get_test_cases::<<Op as MathOp>::RustArgs>(&ctx);
+                musl_runner::<Op>(&ctx, cases, musl_math_sys::$fn_name);
+            }
+
             #[test]
             $(#[$attr])*
             fn [< musl_edge_case_ $fn_name >]() {
                 type Op = libm_test::op::$fn_name::Routine;
-                domain_test_runner::<Op, _>(
-                    edge_cases::get_test_cases::<Op, _>,
-                    musl_math_sys::$fn_name,
-                );
+                let ctx = CheckCtx::new(Op::IDENTIFIER, BASIS, GeneratorKind::EdgeCases);
+                let cases = edge_cases::get_test_cases::<Op>(&ctx);
+                musl_runner::<Op>(&ctx, cases, musl_math_sys::$fn_name);
             }
 
             #[test]
             $(#[$attr])*
-            fn [< musl_logspace_ $fn_name >]() {
+            fn [< musl_quickspace_ $fn_name >]() {
                 type Op = libm_test::op::$fn_name::Routine;
-                domain_test_runner::<Op, _>(
-                    domain_logspace::get_test_cases::<Op>,
-                    musl_math_sys::$fn_name,
-                );
+                let ctx = CheckCtx::new(Op::IDENTIFIER, BASIS, GeneratorKind::QuickSpaced);
+                let cases = spaced::get_test_cases::<Op>(&ctx).0;
+                musl_runner::<Op>(&ctx, cases, musl_math_sys::$fn_name);
             }
         }
     };
 }
 
-/// Test a single routine against domaine-aware inputs.
-fn domain_test_runner<Op, I>(gen: impl FnOnce(&CheckCtx) -> I, musl_fn: Op::CFn)
-where
-    Op: MathOp,
-    Op: HasDomain<Op::FTy>,
-    I: Iterator<Item = Op::RustArgs>,
-{
-    let ctx = CheckCtx::new(Op::IDENTIFIER, CheckBasis::Musl);
-    let cases = gen(&ctx);
-
-    for input in cases {
-        let musl_res = input.call(musl_fn);
-        let crate_res = input.call(Op::ROUTINE);
-
-        crate_res.validate(musl_res, input, &ctx).unwrap();
-    }
-}
-
 libm_macros::for_each_function! {
-    callback: musl_domain_tests,
+    callback: musl_tests,
     attributes: [],
     skip: [
-        // Functions with multiple inputs
-        atan2,
-        atan2f,
-        copysign,
-        copysignf,
-        copysignf16,
-        copysignf128,
-        fdim,
-        fdimf,
-        fma,
-        fmaf,
-        fmax,
-        fmaxf,
-        fmin,
-        fminf,
-        fmod,
-        fmodf,
-        hypot,
-        hypotf,
+        // TODO integer inputs
         jn,
         jnf,
         ldexp,
         ldexpf,
-        nextafter,
-        nextafterf,
-        pow,
-        powf,
-        remainder,
-        remainderf,
-        remquo,
-        remquof,
         scalbn,
         scalbnf,
         yn,
         ynf,
 
         // Not provided by musl
+        copysignf128,
+        copysignf16,
         fabsf128,
         fabsf16,
         fdimf128,
diff --git a/crates/libm-test/tests/multiprecision.rs b/crates/libm-test/tests/multiprecision.rs
index 123abfdaf..761ca1f85 100644
--- a/crates/libm-test/tests/multiprecision.rs
+++ b/crates/libm-test/tests/multiprecision.rs
@@ -2,151 +2,69 @@
 
 #![cfg(feature = "build-mpfr")]
 
-use libm_test::domain::HasDomain;
-use libm_test::gen::random::RandomInput;
-use libm_test::gen::{domain_logspace, edge_cases, random};
+use libm_test::gen::{edge_cases, random, spaced};
 use libm_test::mpfloat::MpOp;
-use libm_test::{CheckBasis, CheckCtx, CheckOutput, MathOp, OpFTy, OpRustFn, OpRustRet, TupleCall};
+use libm_test::{CheckBasis, CheckCtx, CheckOutput, GeneratorKind, MathOp, TupleCall};
 
-/// Test against MPFR with random inputs.
-macro_rules! mp_rand_tests {
-    (
-        fn_name: $fn_name:ident,
-        attrs: [$($attr:meta),*],
-    ) => {
-        paste::paste! {
-            #[test]
-            $(#[$attr])*
-            fn [< mp_random_ $fn_name >]() {
-                test_one_random::<libm_test::op::$fn_name::Routine>();
-            }
-        }
-    };
-}
+const BASIS: CheckBasis = CheckBasis::Mpfr;
 
-/// Test a single routine with random inputs
-fn test_one_random<Op>()
-where
-    Op: MathOp + MpOp,
-    Op::RustArgs: RandomInput,
-{
+fn mp_runner<Op: MathOp + MpOp>(ctx: &CheckCtx, cases: impl Iterator<Item = Op::RustArgs>) {
     let mut mp_vals = Op::new_mp();
-    let ctx = CheckCtx::new(Op::IDENTIFIER, CheckBasis::Mpfr);
-    let cases = random::get_test_cases::<Op::RustArgs>(&ctx);
-
     for input in cases {
         let mp_res = Op::run(&mut mp_vals, input);
         let crate_res = input.call(Op::ROUTINE);
 
-        crate_res.validate(mp_res, input, &ctx).unwrap();
+        crate_res.validate(mp_res, input, ctx).unwrap();
     }
 }
 
-libm_macros::for_each_function! {
-    callback: mp_rand_tests,
-    attributes: [
-        // Also an assertion failure on i686: at `MPFR_ASSERTN (! mpfr_erangeflag_p ())`
-        #[ignore = "large values are infeasible in MPFR"]
-        [jn, jnf, yn, ynf],
-    ],
-    skip: [
-        // FIXME: test needed, see
-        // https://github.com/rust-lang/libm/pull/311#discussion_r1818273392
-        nextafter,
-        nextafterf,
-    ],
-}
-
-/// Test against MPFR with generators from a domain.
-macro_rules! mp_domain_tests {
+macro_rules! mp_tests {
     (
         fn_name: $fn_name:ident,
         attrs: [$($attr:meta),*],
     ) => {
         paste::paste! {
+            #[test]
+            $(#[$attr])*
+            fn [< mp_random_ $fn_name >]() {
+                type Op = libm_test::op::$fn_name::Routine;
+                let ctx = CheckCtx::new(Op::IDENTIFIER, BASIS, GeneratorKind::Random);
+                let cases = random::get_test_cases::<<Op as MathOp>::RustArgs>(&ctx);
+                mp_runner::<Op>(&ctx, cases);
+            }
+
             #[test]
             $(#[$attr])*
             fn [< mp_edge_case_ $fn_name >]() {
                 type Op = libm_test::op::$fn_name::Routine;
-                domain_test_runner::<Op, _>(edge_cases::get_test_cases::<Op, _>);
+                let ctx = CheckCtx::new(Op::IDENTIFIER, BASIS, GeneratorKind::EdgeCases);
+                let cases = edge_cases::get_test_cases::<Op>(&ctx);
+                mp_runner::<Op>(&ctx, cases);
             }
 
             #[test]
             $(#[$attr])*
-            fn [< mp_logspace_ $fn_name >]() {
+            fn [< mp_quickspace_ $fn_name >]() {
                 type Op = libm_test::op::$fn_name::Routine;
-                domain_test_runner::<Op, _>(domain_logspace::get_test_cases::<Op>);
+                let ctx = CheckCtx::new(Op::IDENTIFIER, BASIS, GeneratorKind::QuickSpaced);
+                let cases = spaced::get_test_cases::<Op>(&ctx).0;
+                mp_runner::<Op>(&ctx, cases);
             }
         }
     };
 }
 
-/// Test a single routine against domaine-aware inputs.
-fn domain_test_runner<Op, I>(gen: impl FnOnce(&CheckCtx) -> I)
-where
-    // Complicated generics...
-    // The operation must take a single float argument (unary only)
-    Op: MathOp<RustArgs = (<Op as MathOp>::FTy,)>,
-    // It must also support multiprecision operations
-    Op: MpOp,
-    // And it must have a domain specified
-    Op: HasDomain<Op::FTy>,
-    // The single float argument tuple must be able to call the `RustFn` and return `RustRet`
-    (OpFTy<Op>,): TupleCall<OpRustFn<Op>, Output = OpRustRet<Op>>,
-    I: Iterator<Item = (Op::FTy,)>,
-{
-    let mut mp_vals = Op::new_mp();
-    let ctx = CheckCtx::new(Op::IDENTIFIER, CheckBasis::Mpfr);
-    let cases = gen(&ctx);
-
-    for input in cases {
-        let mp_res = Op::run(&mut mp_vals, input);
-        let crate_res = input.call(Op::ROUTINE);
-
-        crate_res.validate(mp_res, input, &ctx).unwrap();
-    }
-}
-
 libm_macros::for_each_function! {
-    callback: mp_domain_tests,
-    attributes: [],
+    callback: mp_tests,
+    attributes: [
+        // Also an assertion failure on i686: at `MPFR_ASSERTN (! mpfr_erangeflag_p ())`
+        #[ignore = "large values are infeasible in MPFR"]
+        [jn, jnf, yn, ynf],
+    ],
     skip: [
-        // Functions with multiple inputs
-        atan2,
-        atan2f,
-        copysign,
-        copysignf,
-        copysignf16,
-        copysignf128,
-        fdim,
-        fdimf,
-        fdimf16,
-        fdimf128,
-        fma,
-        fmaf,
-        fmax,
-        fmaxf,
-        fmin,
-        fminf,
-        fmod,
-        fmodf,
-        hypot,
-        hypotf,
-        jn,
-        jnf,
-        ldexp,
-        ldexpf,
+        // FIXME: test needed, see
+        // https://github.com/rust-lang/libm/pull/311#discussion_r1818273392
         nextafter,
         nextafterf,
-        pow,
-        powf,
-        remainder,
-        remainderf,
-        remquo,
-        remquof,
-        scalbn,
-        scalbnf,
-        yn,
-        ynf,
     ],
 }
diff --git a/crates/libm-test/tests/z_extensive/run.rs b/crates/libm-test/tests/z_extensive/run.rs
index 7ee967851..a323c9110 100644
--- a/crates/libm-test/tests/z_extensive/run.rs
+++ b/crates/libm-test/tests/z_extensive/run.rs
@@ -6,13 +6,18 @@ use std::sync::atomic::{AtomicU64, Ordering};
 use std::time::Duration;
 
 use indicatif::{ProgressBar, ProgressStyle};
-use libm_test::gen::extensive::{self, ExtensiveInput};
+use libm_test::gen::spaced;
 use libm_test::mpfloat::MpOp;
 use libm_test::{
-    CheckBasis, CheckCtx, CheckOutput, MathOp, TestResult, TupleCall, skip_extensive_test,
+    CheckBasis, CheckCtx, CheckOutput, GeneratorKind, MathOp, TestResult, TupleCall,
+    skip_extensive_test,
 };
 use libtest_mimic::{Arguments, Trial};
 use rayon::prelude::*;
+use spaced::SpacedInput;
+
+const BASIS: CheckBasis = CheckBasis::Mpfr;
+const GEN_KIND: GeneratorKind = GeneratorKind::Extensive;
 
 /// Run the extensive test suite.
 pub fn run() {
@@ -62,10 +67,10 @@ fn register_all_tests() -> Vec<Trial> {
 fn register_single_test<Op>(all: &mut Vec<Trial>)
 where
     Op: MathOp + MpOp,
-    Op::RustArgs: ExtensiveInput<Op> + Send,
+    Op::RustArgs: SpacedInput<Op> + Send,
 {
     let test_name = format!("mp_extensive_{}", Op::NAME);
-    let ctx = CheckCtx::new(Op::IDENTIFIER, CheckBasis::Mpfr);
+    let ctx = CheckCtx::new(Op::IDENTIFIER, BASIS, GEN_KIND);
     let skip = skip_extensive_test(&ctx);
 
     let runner = move || {
@@ -73,7 +78,7 @@ where
             panic!("extensive tests should be run with --release");
         }
 
-        let res = run_single_test::<Op>();
+        let res = run_single_test::<Op>(&ctx);
         let e = match res {
             Ok(()) => return Ok(()),
             Err(e) => e,
@@ -91,18 +96,17 @@ where
 }
 
 /// Test runner for a signle routine.
-fn run_single_test<Op>() -> TestResult
+fn run_single_test<Op>(ctx: &CheckCtx) -> TestResult
 where
     Op: MathOp + MpOp,
-    Op::RustArgs: ExtensiveInput<Op> + Send,
+    Op::RustArgs: SpacedInput<Op> + Send,
 {
     // Small delay before printing anything so other output from the runner has a chance to flush.
     std::thread::sleep(Duration::from_millis(500));
     eprintln!();
 
     let completed = AtomicU64::new(0);
-    let ctx = CheckCtx::new(Op::IDENTIFIER, CheckBasis::Mpfr);
-    let (ref mut cases, total) = extensive::get_test_cases::<Op>(&ctx);
+    let (ref mut cases, total) = spaced::get_test_cases::<Op>(ctx);
     let pb = Progress::new(Op::NAME, total);
 
     let test_single_chunk = |mp_vals: &mut Op::MpTy, input_vec: Vec<Op::RustArgs>| -> TestResult {
@@ -110,7 +114,7 @@ where
             // Test the input.
             let mp_res = Op::run(mp_vals, input);
             let crate_res = input.call(Op::ROUTINE);
-            crate_res.validate(mp_res, input, &ctx)?;
+            crate_res.validate(mp_res, input, ctx)?;
 
             let completed = completed.fetch_add(1, Ordering::Relaxed) + 1;
             pb.update(completed, input);

From 154f7a5ceebca42fa5e6177c36977ed7eef13c9f Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Mon, 13 Jan 2025 21:10:12 +0000
Subject: [PATCH 141/279] Adjust precision and add xfails based on new tests

---
 crates/libm-test/src/precision.rs | 149 +++++++++++++++++++++++++++---
 src/math/support/float_traits.rs  |   7 +-
 2 files changed, 143 insertions(+), 13 deletions(-)

diff --git a/crates/libm-test/src/precision.rs b/crates/libm-test/src/precision.rs
index 0b3fe89be..3cb5e420f 100644
--- a/crates/libm-test/src/precision.rs
+++ b/crates/libm-test/src/precision.rs
@@ -102,6 +102,15 @@ pub fn default_ulp(ctx: &CheckCtx) -> u32 {
         }
     }
 
+    if cfg!(target_arch = "x86") {
+        match ctx.fn_ident {
+            // Input `fma(0.999999999999999, 1.0000000000000013, 0.0) = 1.0000000000000002` is
+            // incorrect on i586 and i686.
+            Id::Fma => ulp = 1,
+            _ => (),
+        }
+    }
+
     // In some cases, our implementation is less accurate than musl on i586.
     if cfg!(x86_no_sse) {
         match ctx.fn_ident {
@@ -370,59 +379,129 @@ fn maybe_check_nan_bits<F: Float>(actual: F, expected: F, ctx: &CheckCtx) -> Opt
 impl MaybeOverride<(f16, f16)> for SpecialCase {
     fn check_float<F: Float>(
         input: (f16, f16),
-        _actual: F,
+        actual: F,
         expected: F,
         _ulp: &mut u32,
         ctx: &CheckCtx,
     ) -> Option<TestResult> {
-        maybe_skip_binop_nan(input, expected, ctx)
+        binop_common(input, actual, expected, ctx)
     }
 }
 
 impl MaybeOverride<(f32, f32)> for SpecialCase {
     fn check_float<F: Float>(
         input: (f32, f32),
-        _actual: F,
+        actual: F,
         expected: F,
         _ulp: &mut u32,
         ctx: &CheckCtx,
     ) -> Option<TestResult> {
-        maybe_skip_binop_nan(input, expected, ctx)
+        if ctx.base_name == BaseName::Fmin
+            && input.0.biteq(f32::NEG_ZERO)
+            && input.1.biteq(f32::ZERO)
+            && expected.biteq(F::NEG_ZERO)
+            && actual.biteq(F::ZERO)
+        {
+            return XFAIL;
+        }
+
+        binop_common(input, actual, expected, ctx)
+    }
+
+    fn check_int<I: Int>(
+        _input: (f32, f32),
+        actual: I,
+        expected: I,
+        ctx: &CheckCtx,
+    ) -> Option<TestResult> {
+        remquo_common(actual, expected, ctx)
     }
 }
 
 impl MaybeOverride<(f64, f64)> for SpecialCase {
     fn check_float<F: Float>(
         input: (f64, f64),
-        _actual: F,
+        actual: F,
         expected: F,
         _ulp: &mut u32,
         ctx: &CheckCtx,
     ) -> Option<TestResult> {
-        maybe_skip_binop_nan(input, expected, ctx)
+        if ctx.base_name == BaseName::Fmin
+            && input.0.biteq(f64::NEG_ZERO)
+            && input.1.biteq(f64::ZERO)
+            && expected.biteq(F::ZERO)
+            && actual.biteq(F::NEG_ZERO)
+        {
+            return XFAIL;
+        }
+
+        binop_common(input, actual, expected, ctx)
+    }
+
+    fn check_int<I: Int>(
+        _input: (f64, f64),
+        actual: I,
+        expected: I,
+        ctx: &CheckCtx,
+    ) -> Option<TestResult> {
+        remquo_common(actual, expected, ctx)
+    }
+}
+
+fn remquo_common<I: Int>(actual: I, expected: I, ctx: &CheckCtx) -> Option<TestResult> {
+    // FIXME: Our MPFR implementation disagrees with musl and may need to be updated.
+    if ctx.basis == CheckBasis::Mpfr
+        && ctx.base_name == BaseName::Remquo
+        && expected == I::MIN
+        && actual == I::ZERO
+    {
+        return XFAIL;
     }
+
+    None
 }
 
 #[cfg(f128_enabled)]
 impl MaybeOverride<(f128, f128)> for SpecialCase {
     fn check_float<F: Float>(
         input: (f128, f128),
-        _actual: F,
+        actual: F,
         expected: F,
         _ulp: &mut u32,
         ctx: &CheckCtx,
     ) -> Option<TestResult> {
-        maybe_skip_binop_nan(input, expected, ctx)
+        binop_common(input, actual, expected, ctx)
     }
 }
 
-/// Musl propagates NaNs if one is provided as the input, but we return the other input.
 // F1 and F2 are always the same type, this is just to please generics
-fn maybe_skip_binop_nan<F1: Float, F2: Float>(
+fn binop_common<F1: Float, F2: Float>(
     input: (F1, F1),
+    actual: F2,
     expected: F2,
     ctx: &CheckCtx,
 ) -> Option<TestResult> {
+    /* FIXME(#439): we do not compare signed zeros */
+
+    if ctx.base_name == BaseName::Fmin
+        && input.0.biteq(F1::NEG_ZERO)
+        && input.1.biteq(F1::ZERO)
+        && expected.biteq(F2::NEG_ZERO)
+        && actual.biteq(F2::ZERO)
+    {
+        return XFAIL;
+    }
+
+    if ctx.base_name == BaseName::Fmax
+        && input.0.biteq(F1::NEG_ZERO)
+        && input.1.biteq(F1::ZERO)
+        && expected.biteq(F2::ZERO)
+        && actual.biteq(F2::NEG_ZERO)
+    {
+        return XFAIL;
+    }
+
+    // Musl propagates NaNs if one is provided as the input, but we return the other input.
     match (&ctx.basis, ctx.base_name) {
         (Musl, BaseName::Fmin | BaseName::Fmax)
             if (input.0.is_nan() || input.1.is_nan()) && expected.is_nan() =>
@@ -509,7 +588,53 @@ fn bessel_prec_dropoff<F: Float>(
     None
 }
 
-impl MaybeOverride<(f32, f32, f32)> for SpecialCase {}
-impl MaybeOverride<(f64, f64, f64)> for SpecialCase {}
 impl MaybeOverride<(f32, i32)> for SpecialCase {}
 impl MaybeOverride<(f64, i32)> for SpecialCase {}
+
+impl MaybeOverride<(f32, f32, f32)> for SpecialCase {
+    fn check_float<F: Float>(
+        input: (f32, f32, f32),
+        actual: F,
+        expected: F,
+        _ulp: &mut u32,
+        ctx: &CheckCtx,
+    ) -> Option<TestResult> {
+        ternop_common(input, actual, expected, ctx)
+    }
+}
+impl MaybeOverride<(f64, f64, f64)> for SpecialCase {
+    fn check_float<F: Float>(
+        input: (f64, f64, f64),
+        actual: F,
+        expected: F,
+        _ulp: &mut u32,
+        ctx: &CheckCtx,
+    ) -> Option<TestResult> {
+        ternop_common(input, actual, expected, ctx)
+    }
+}
+
+// F1 and F2 are always the same type, this is just to please generics
+fn ternop_common<F1: Float, F2: Float>(
+    input: (F1, F1, F1),
+    actual: F2,
+    expected: F2,
+    ctx: &CheckCtx,
+) -> Option<TestResult> {
+    // FIXME(fma): 754-2020 says "When the exact result of (a × b) + c is non-zero yet the result
+    // of fusedMultiplyAdd is zero because of rounding, the zero result takes the sign of the
+    // exact result". Our implementation returns the wrong sign:
+    //     fma(5e-324, -5e-324, 0.0) = 0.0 (should be -0.0)
+    if ctx.base_name == BaseName::Fma
+        && (input.0.is_sign_negative() ^ input.1.is_sign_negative())
+        && input.0 != F1::ZERO
+        && input.1 != F1::ZERO
+        && input.2.biteq(F1::ZERO)
+        && expected.biteq(F2::NEG_ZERO)
+        && actual.biteq(F2::ZERO)
+    {
+        return XFAIL;
+    }
+
+    None
+}
diff --git a/src/math/support/float_traits.rs b/src/math/support/float_traits.rs
index 3aa0d844a..647f4f5e2 100644
--- a/src/math/support/float_traits.rs
+++ b/src/math/support/float_traits.rs
@@ -93,9 +93,14 @@ pub trait Float:
     /// Returns true if the value is +inf or -inf.
     fn is_infinite(self) -> bool;
 
-    /// Returns true if the sign is negative.
+    /// Returns true if the sign is negative. Extracts the sign bit regardless of zero or NaN.
     fn is_sign_negative(self) -> bool;
 
+    /// Returns true if the sign is positive. Extracts the sign bit regardless of zero or NaN.
+    fn is_sign_positive(self) -> bool {
+        !self.is_sign_negative()
+    }
+
     /// Returns if `self` is subnormal
     fn is_subnormal(self) -> bool {
         (self.to_bits() & Self::EXP_MASK) == Self::Int::ZERO

From 2fab7e0108d4e0fdcc1f03332d9d14b6038457db Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Mon, 13 Jan 2025 22:48:03 +0000
Subject: [PATCH 142/279] Increase the CI timeout

---
 .github/workflows/main.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml
index 40b67c4c2..99a32a82e 100644
--- a/.github/workflows/main.yaml
+++ b/.github/workflows/main.yaml
@@ -14,7 +14,7 @@ env:
 jobs:
   test:
     name: Build and test
-    timeout-minutes: 25
+    timeout-minutes: 40
     strategy:
       fail-fast: false
       matrix:

From 342600b71b8d6b6c6b9fc906356f547bba1dc605 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Thu, 16 Jan 2025 07:30:51 +0000
Subject: [PATCH 143/279] Provide a way to override iteration count

Benchmarks need a way to limit how many iterations get run. Introuce a
way to inject this information here.
---
 crates/libm-test/src/run_cfg.rs | 19 ++++++++++++++++++-
 1 file changed, 18 insertions(+), 1 deletion(-)

diff --git a/crates/libm-test/src/run_cfg.rs b/crates/libm-test/src/run_cfg.rs
index 6763de8bc..3e91101f6 100644
--- a/crates/libm-test/src/run_cfg.rs
+++ b/crates/libm-test/src/run_cfg.rs
@@ -40,6 +40,8 @@ pub struct CheckCtx {
     /// Source of truth for tests.
     pub basis: CheckBasis,
     pub gen_kind: GeneratorKind,
+    /// If specified, this value will override the value returned by [`iteration_count`].
+    pub override_iterations: Option<u64>,
 }
 
 impl CheckCtx {
@@ -53,6 +55,7 @@ impl CheckCtx {
             base_name_str: fn_ident.base_name().as_str(),
             basis,
             gen_kind,
+            override_iterations: None,
         };
         ret.ulp = crate::default_ulp(&ret);
         ret
@@ -62,6 +65,10 @@ impl CheckCtx {
     pub fn input_count(&self) -> usize {
         self.fn_ident.math_op().rust_sig.args.len()
     }
+
+    pub fn override_iterations(&mut self, count: u64) {
+        self.override_iterations = Some(count)
+    }
 }
 
 /// Possible items to test against
@@ -71,6 +78,8 @@ pub enum CheckBasis {
     Musl,
     /// Check against infinite precision (MPFR).
     Mpfr,
+    /// Benchmarks or other times when this is not relevant.
+    None,
 }
 
 /// The different kinds of generators that provide test input, which account for input pattern
@@ -216,6 +225,12 @@ pub fn iteration_count(ctx: &CheckCtx, argnum: usize) -> u64 {
         total_iterations = 800;
     }
 
+    let mut overridden = false;
+    if let Some(count) = ctx.override_iterations {
+        total_iterations = count;
+        overridden = true;
+    }
+
     // Adjust for the number of inputs
     let ntests = match t_env.input_count {
         1 => total_iterations,
@@ -223,6 +238,7 @@ pub fn iteration_count(ctx: &CheckCtx, argnum: usize) -> u64 {
         3 => (total_iterations as f64).cbrt().ceil() as u64,
         _ => panic!("test has more than three arguments"),
     };
+
     let total = ntests.pow(t_env.input_count.try_into().unwrap());
 
     let seed_msg = match ctx.gen_kind {
@@ -235,12 +251,13 @@ pub fn iteration_count(ctx: &CheckCtx, argnum: usize) -> u64 {
 
     test_log(&format!(
         "{gen_kind:?} {basis:?} {fn_ident} arg {arg}/{args}: {ntests} iterations \
-         ({total} total){seed_msg}",
+         ({total} total){seed_msg}{omsg}",
         gen_kind = ctx.gen_kind,
         basis = ctx.basis,
         fn_ident = ctx.fn_ident,
         arg = argnum + 1,
         args = t_env.input_count,
+        omsg = if overridden { " (overridden)" } else { "" }
     ));
 
     ntests

From 697ce770b5c0bf6f08ccf37985421a7df46302e9 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Thu, 16 Jan 2025 07:30:38 +0000
Subject: [PATCH 144/279] Add benchmarks using iai-callgrind

Running walltime benchmarks in CI is notoriously unstable, Introduce
benchmarks that instead use instruction count and other more
reproducible metrics, using `iai-callgrind` [1], which we are able to
run in CI with a high degree of reproducibility.

Inputs to this benchmark are a logspace sweep, which gives an
approximation for real-world use, but may fail to indicate outlier
cases.

[1]: https://github.com/iai-callgrind/iai-callgrind
---
 Cargo.toml                         |   4 +
 crates/libm-test/Cargo.toml        |   9 ++
 crates/libm-test/benches/icount.rs | 175 +++++++++++++++++++++++++++++
 crates/libm-test/src/lib.rs        |   3 +-
 crates/libm-test/src/op.rs         |   2 +
 5 files changed, 192 insertions(+), 1 deletion(-)
 create mode 100644 crates/libm-test/benches/icount.rs

diff --git a/Cargo.toml b/Cargo.toml
index f84f3eac6..18d89997d 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -73,3 +73,7 @@ debug-assertions = true
 inherits = "release"
 lto = "fat"
 overflow-checks = true
+
+[profile.bench]
+# Required for iai-callgrind
+debug = true
diff --git a/crates/libm-test/Cargo.toml b/crates/libm-test/Cargo.toml
index d3f18ab3e..3a1ba8796 100644
--- a/crates/libm-test/Cargo.toml
+++ b/crates/libm-test/Cargo.toml
@@ -20,6 +20,9 @@ build-musl = ["dep:musl-math-sys"]
 # Enable report generation without bringing in more dependencies by default
 benchmarking-reports = ["criterion/plotters", "criterion/html_reports"]
 
+# Enable icount benchmarks (requires iai-callgrind and valgrind)
+icount = ["dep:iai-callgrind"]
+
 # Run with a reduced set of benchmarks, such as for CI
 short-benchmarks = []
 
@@ -27,6 +30,7 @@ short-benchmarks = []
 anyhow = "1.0.90"
 az = { version = "1.2.1", optional = true }
 gmp-mpfr-sys = { version = "1.6.4", optional = true, default-features = false, features = ["mpfr"] }
+iai-callgrind = { version = "0.14.0", optional = true }
 indicatif = { version = "0.17.9", default-features = false }
 libm = { path = "../..", features = ["unstable-public-internals"] }
 libm-macros = { path = "../libm-macros" }
@@ -48,6 +52,11 @@ rand = { version = "0.8.5", optional = true }
 criterion = { version = "0.5.1", default-features = false, features = ["cargo_bench_support"] }
 libtest-mimic = "0.8.1"
 
+[[bench]]
+name = "icount"
+harness = false
+required-features = ["icount"]
+
 [[bench]]
 name = "random"
 harness = false
diff --git a/crates/libm-test/benches/icount.rs b/crates/libm-test/benches/icount.rs
new file mode 100644
index 000000000..3a66249e8
--- /dev/null
+++ b/crates/libm-test/benches/icount.rs
@@ -0,0 +1,175 @@
+//! Benchmarks that use `iai-cachegrind` to be reasonably CI-stable.
+
+use std::hint::black_box;
+
+use iai_callgrind::{library_benchmark, library_benchmark_group, main};
+use libm_test::gen::spaced;
+use libm_test::{CheckBasis, CheckCtx, GeneratorKind, MathOp, OpRustArgs, TupleCall, op};
+
+const BENCH_ITER_ITEMS: u64 = 500;
+
+macro_rules! icount_benches {
+    (
+        fn_name: $fn_name:ident,
+        attrs: [$($_attr:meta),*],
+    ) => {
+        paste::paste! {
+            // Construct benchmark inputs from the logspace generator.
+            fn [< setup_ $fn_name >]() -> Vec<OpRustArgs<op::$fn_name::Routine>> {
+                type Op = op::$fn_name::Routine;
+                let mut ctx = CheckCtx::new(
+                    Op::IDENTIFIER,
+                    CheckBasis::None,
+                    GeneratorKind::QuickSpaced
+                );
+                ctx.override_iterations(BENCH_ITER_ITEMS);
+                let ret = spaced::get_test_cases::<Op>(&ctx).0.collect::<Vec<_>>();
+                println!("operation {}, {} steps", Op::NAME, ret.len());
+                ret
+            }
+
+            // Run benchmarks with the above inputs.
+            #[library_benchmark]
+            #[bench::logspace([< setup_ $fn_name >]())]
+            fn [< icount_bench_ $fn_name >](cases: Vec<OpRustArgs<op::$fn_name::Routine>>) {
+                type Op = op::$fn_name::Routine;
+                let f = black_box(Op::ROUTINE);
+                for input in cases.iter().copied() {
+                    input.call(f);
+                }
+            }
+
+            library_benchmark_group!(
+                name = [< icount_bench_ $fn_name _group  >];
+                benchmarks = [< icount_bench_ $fn_name >]
+            );
+        }
+    };
+}
+
+libm_macros::for_each_function! {
+    callback: icount_benches,
+}
+
+main!(
+    library_benchmark_groups = icount_bench_acos_group,
+    icount_bench_acosf_group,
+    icount_bench_acosh_group,
+    icount_bench_acoshf_group,
+    icount_bench_asin_group,
+    icount_bench_asinf_group,
+    icount_bench_asinh_group,
+    icount_bench_asinhf_group,
+    icount_bench_atan2_group,
+    icount_bench_atan2f_group,
+    icount_bench_atan_group,
+    icount_bench_atanf_group,
+    icount_bench_atanh_group,
+    icount_bench_atanhf_group,
+    icount_bench_cbrt_group,
+    icount_bench_cbrtf_group,
+    icount_bench_ceil_group,
+    icount_bench_ceilf_group,
+    icount_bench_copysign_group,
+    icount_bench_copysignf128_group,
+    icount_bench_copysignf16_group,
+    icount_bench_copysignf_group,
+    icount_bench_cos_group,
+    icount_bench_cosf_group,
+    icount_bench_cosh_group,
+    icount_bench_coshf_group,
+    icount_bench_erf_group,
+    icount_bench_erfc_group,
+    icount_bench_erfcf_group,
+    icount_bench_erff_group,
+    icount_bench_exp10_group,
+    icount_bench_exp10f_group,
+    icount_bench_exp2_group,
+    icount_bench_exp2f_group,
+    icount_bench_exp_group,
+    icount_bench_expf_group,
+    icount_bench_expm1_group,
+    icount_bench_expm1f_group,
+    icount_bench_fabs_group,
+    icount_bench_fabsf128_group,
+    icount_bench_fabsf16_group,
+    icount_bench_fabsf_group,
+    icount_bench_fdim_group,
+    icount_bench_fdimf128_group,
+    icount_bench_fdimf16_group,
+    icount_bench_fdimf_group,
+    icount_bench_floor_group,
+    icount_bench_floorf_group,
+    icount_bench_fma_group,
+    icount_bench_fmaf_group,
+    icount_bench_fmax_group,
+    icount_bench_fmaxf_group,
+    icount_bench_fmin_group,
+    icount_bench_fminf_group,
+    icount_bench_fmod_group,
+    icount_bench_fmodf_group,
+    icount_bench_frexp_group,
+    icount_bench_frexpf_group,
+    icount_bench_hypot_group,
+    icount_bench_hypotf_group,
+    icount_bench_ilogb_group,
+    icount_bench_ilogbf_group,
+    icount_bench_j0_group,
+    icount_bench_j0f_group,
+    icount_bench_j1_group,
+    icount_bench_j1f_group,
+    icount_bench_jn_group,
+    icount_bench_jnf_group,
+    icount_bench_ldexp_group,
+    icount_bench_ldexpf_group,
+    icount_bench_lgamma_group,
+    icount_bench_lgamma_r_group,
+    icount_bench_lgammaf_group,
+    icount_bench_lgammaf_r_group,
+    icount_bench_log10_group,
+    icount_bench_log10f_group,
+    icount_bench_log1p_group,
+    icount_bench_log1pf_group,
+    icount_bench_log2_group,
+    icount_bench_log2f_group,
+    icount_bench_log_group,
+    icount_bench_logf_group,
+    icount_bench_modf_group,
+    icount_bench_modff_group,
+    icount_bench_nextafter_group,
+    icount_bench_nextafterf_group,
+    icount_bench_pow_group,
+    icount_bench_powf_group,
+    icount_bench_remainder_group,
+    icount_bench_remainderf_group,
+    icount_bench_remquo_group,
+    icount_bench_remquof_group,
+    icount_bench_rint_group,
+    icount_bench_rintf_group,
+    icount_bench_round_group,
+    icount_bench_roundf_group,
+    icount_bench_scalbn_group,
+    icount_bench_scalbnf_group,
+    icount_bench_sin_group,
+    icount_bench_sinf_group,
+    icount_bench_sinh_group,
+    icount_bench_sinhf_group,
+    icount_bench_sqrt_group,
+    icount_bench_sqrtf_group,
+    icount_bench_tan_group,
+    icount_bench_tanf_group,
+    icount_bench_tanh_group,
+    icount_bench_tanhf_group,
+    icount_bench_tgamma_group,
+    icount_bench_tgammaf_group,
+    icount_bench_trunc_group,
+    icount_bench_truncf128_group,
+    icount_bench_truncf16_group,
+    icount_bench_truncf_group,
+    icount_bench_y0_group,
+    icount_bench_y0f_group,
+    icount_bench_y1_group,
+    icount_bench_y1f_group,
+    icount_bench_yn_group,
+    icount_bench_ynf_group,
+);
diff --git a/crates/libm-test/src/lib.rs b/crates/libm-test/src/lib.rs
index cb89f1c8b..b90423c1b 100644
--- a/crates/libm-test/src/lib.rs
+++ b/crates/libm-test/src/lib.rs
@@ -24,7 +24,8 @@ pub use f8_impl::f8;
 pub use libm::support::{Float, Int, IntTy, MinInt};
 pub use num::{FloatExt, linear_ints, logspace};
 pub use op::{
-    BaseName, FloatTy, Identifier, MathOp, OpCFn, OpCRet, OpFTy, OpRustFn, OpRustRet, Ty,
+    BaseName, FloatTy, Identifier, MathOp, OpCFn, OpCRet, OpFTy, OpRustArgs, OpRustFn, OpRustRet,
+    Ty,
 };
 pub use precision::{MaybeOverride, SpecialCase, default_ulp};
 use run_cfg::EXTENSIVE_MAX_ITERATIONS;
diff --git a/crates/libm-test/src/op.rs b/crates/libm-test/src/op.rs
index 8329d3424..239c9a3e1 100644
--- a/crates/libm-test/src/op.rs
+++ b/crates/libm-test/src/op.rs
@@ -100,6 +100,8 @@ pub type OpCFn<Op> = <Op as MathOp>::CFn;
 pub type OpCRet<Op> = <Op as MathOp>::CRet;
 /// Access the associated `RustFn` type from an op (helper to avoid ambiguous associated types).
 pub type OpRustFn<Op> = <Op as MathOp>::RustFn;
+/// Access the associated `RustArgs` type from an op (helper to avoid ambiguous associated types).
+pub type OpRustArgs<Op> = <Op as MathOp>::RustArgs;
 /// Access the associated `RustRet` type from an op (helper to avoid ambiguous associated types).
 pub type OpRustRet<Op> = <Op as MathOp>::RustRet;
 

From 6e6ab787ac0a9b7e4ce8a5d6a40883f544ddbe78 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Thu, 16 Jan 2025 07:31:34 +0000
Subject: [PATCH 145/279] Run iai-callgrind benchmarks in CI

Add support in `ci-util.py` for finding the most recent baseline and
downloading it, which new tests can then be compared against.

Arbitrarily select nightly-2025-01-16 as the rustc version to pin to in
benchmarks.
---
 .github/workflows/main.yaml |  58 ++++++++++++-
 ci/ci-util.py               | 159 ++++++++++++++++++++++++++++++++++--
 2 files changed, 207 insertions(+), 10 deletions(-)

diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml
index 99a32a82e..9face9311 100644
--- a/.github/workflows/main.yaml
+++ b/.github/workflows/main.yaml
@@ -10,6 +10,7 @@ env:
   RUSTDOCFLAGS: -Dwarnings
   RUSTFLAGS: -Dwarnings
   RUST_BACKTRACE: full
+  BENCHMARK_RUSTC: nightly-2025-01-16 # Pin the toolchain for reproducable results
 
 jobs:
   test:
@@ -147,19 +148,70 @@ jobs:
   benchmarks:
     name: Benchmarks
     runs-on: ubuntu-24.04
+    timeout-minutes: 20
     steps:
     - uses: actions/checkout@master
-    - name: Install Rust
-      run: rustup update nightly --no-self-update && rustup default nightly
+    - uses: taiki-e/install-action@cargo-binstall
+
+    - name: Set up dependencies
+      run: |
+        rustup update "$BENCHMARK_RUSTC" --no-self-update
+        rustup default "$BENCHMARK_RUSTC"
+        # Install the version of iai-callgrind-runner that is specified in Cargo.toml
+        iai_version="$(cargo metadata --format-version=1 --features icount |
+           jq -r '.packages[] | select(.name == "iai-callgrind").version')"
+        cargo binstall -y iai-callgrind-runner --version "$iai_version"
+        sudo apt-get install valgrind
+
     - uses: Swatinem/rust-cache@v2
     - name: Download musl source
       run: ./ci/download-musl.sh
-    - run: |
+
+    - name: Run icount benchmarks
+      env:
+        GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+      run: |
+        set -eux
+        iai_home="iai-home"
+        # Download the baseline from master
+        ./ci/ci-util.py locate-baseline --download --extract
+
+        # Run iai-callgrind benchmarks
+        cargo bench --no-default-features \
+          --features unstable,unstable-float,icount \
+          --bench icount \
+          -- \
+          --save-baseline=default \
+          --home "$(pwd)/$iai_home" \
+          --regression='ir=5.0' \
+          --save-summary
+        # NB: iai-callgrind should exit on error but does not, so we inspect the sumary
+        # for errors. See  https://github.com/iai-callgrind/iai-callgrind/issues/337
+        ./ci/ci-util.py check-regressions "$iai_home"
+
+        # Name and tar the new baseline
+        name="baseline-icount-$(date -u +'%Y%m%d%H%M')-${GITHUB_SHA:0:12}"
+        echo "BASELINE_NAME=$name" >> "$GITHUB_ENV"
+        tar cJf "$name.tar.xz" "$iai_home"
+
+    - name: Upload the benchmark baseline
+      uses: actions/upload-artifact@v4
+      with:
+        name: ${{ env.BASELINE_NAME }}
+        path: ${{ env.BASELINE_NAME }}.tar.xz
+    
+    - name: Run wall time benchmarks
+      run: |
         # Always use the same seed for benchmarks. Ideally we should switch to a
         # non-random generator.
         export LIBM_SEED=benchesbenchesbenchesbencheswoo!
         cargo bench --all --features libm-test/short-benchmarks,libm-test/build-musl
 
+    - name: Print test logs if available
+      if: always()
+      run: if [ -f "target/test-log.txt" ]; then cat target/test-log.txt; fi
+      shell: bash
+
   msrv:
     name: Check MSRV
     runs-on: ubuntu-24.04
diff --git a/ci/ci-util.py b/ci/ci-util.py
index 733ec26fa..1ec69b002 100755
--- a/ci/ci-util.py
+++ b/ci/ci-util.py
@@ -9,6 +9,7 @@
 import subprocess as sp
 import sys
 from dataclasses import dataclass
+from glob import glob, iglob
 from inspect import cleandoc
 from os import getenv
 from pathlib import Path
@@ -18,16 +19,33 @@
     """
     usage:
 
-    ./ci/ci-util.py <SUBCOMMAND>
+    ./ci/ci-util.py <COMMAND> [flags]
 
-    SUBCOMMAND:
-        generate-matrix    Calculate a matrix of which functions had source change,
-                           print that as JSON object.
+    COMMAND:
+        generate-matrix
+            Calculate a matrix of which functions had source change, print that as
+             a JSON object.
+
+        locate-baseline [--download] [--extract]
+            Locate the most recent benchmark baseline available in CI and, if flags
+            specify, download and extract it. Never exits with nonzero status if
+            downloading fails.
+
+            Note that `--extract` will overwrite files in `iai-home`.
+
+        check-regressions [iai-home]
+            Check `iai-home` (or `iai-home` if unspecified) for `summary.json`
+            files and see if there are any regressions. This is used as a workaround
+            for `iai-callgrind` not exiting with error status; see
+            <https://github.com/iai-callgrind/iai-callgrind/issues/337>.
     """
 )
 
 REPO_ROOT = Path(__file__).parent.parent
 GIT = ["git", "-C", REPO_ROOT]
+DEFAULT_BRANCH = "master"
+WORKFLOW_NAME = "CI"  # Workflow that generates the benchmark artifacts
+ARTIFACT_GLOB = "baseline-icount*"
 
 # Don't run exhaustive tests if these files change, even if they contaiin a function
 # definition.
@@ -40,6 +58,11 @@
 TYPES = ["f16", "f32", "f64", "f128"]
 
 
+def eprint(*args, **kwargs):
+    """Print to stderr."""
+    print(*args, file=sys.stderr, **kwargs)
+
+
 class FunctionDef(TypedDict):
     """Type for an entry in `function-definitions.json`"""
 
@@ -145,9 +168,125 @@ def make_workflow_output(self) -> str:
         return output
 
 
-def eprint(*args, **kwargs):
-    """Print to stderr."""
-    print(*args, file=sys.stderr, **kwargs)
+def locate_baseline(flags: list[str]) -> None:
+    """Find the most recent baseline from CI, download it if specified.
+
+    This returns rather than erroring, even if the `gh` commands fail. This is to avoid
+    erroring in CI if the baseline is unavailable (artifact time limit exceeded, first
+    run on the branch, etc).
+    """
+
+    download = False
+    extract = False
+
+    while len(flags) > 0:
+        match flags[0]:
+            case "--download":
+                download = True
+            case "--extract":
+                extract = True
+            case _:
+                eprint(USAGE)
+                exit(1)
+        flags = flags[1:]
+
+    if extract and not download:
+        eprint("cannot extract without downloading")
+        exit(1)
+
+    try:
+        # Locate the most recent job to complete with success on our branch
+        latest_job = sp.check_output(
+            [
+                "gh",
+                "run",
+                "list",
+                "--limit=1",
+                "--status=success",
+                f"--branch={DEFAULT_BRANCH}",
+                "--json=databaseId,url,headSha,conclusion,createdAt,"
+                "status,workflowDatabaseId,workflowName",
+                f'--jq=select(.[].workflowName == "{WORKFLOW_NAME}")',
+            ],
+            text=True,
+        )
+        eprint(f"latest: '{latest_job}'")
+    except sp.CalledProcessError as e:
+        eprint(f"failed to run github command: {e}")
+        return
+
+    try:
+        latest = json.loads(latest_job)[0]
+        eprint("latest job: ", json.dumps(latest, indent=4))
+    except json.JSONDecodeError as e:
+        eprint(f"failed to decode json '{latest_job}', {e}")
+        return
+
+    if not download:
+        eprint("--download not specified, returning")
+        return
+
+    job_id = latest.get("databaseId")
+    if job_id is None:
+        eprint("skipping download step")
+        return
+
+    sp.run(
+        ["gh", "run", "download", str(job_id), f"--pattern={ARTIFACT_GLOB}"],
+        check=False,
+    )
+
+    if not extract:
+        eprint("skipping extraction step")
+        return
+
+    # Find the baseline with the most recent timestamp. GH downloads the files to e.g.
+    # `some-dirname/some-dirname.tar.xz`, so just glob the whole thing together.
+    candidate_baselines = glob(f"{ARTIFACT_GLOB}/{ARTIFACT_GLOB}")
+    if len(candidate_baselines) == 0:
+        eprint("no possible baseline directories found")
+        return
+
+    candidate_baselines.sort(reverse=True)
+    baseline_archive = candidate_baselines[0]
+    eprint(f"extracting {baseline_archive}")
+    sp.run(["tar", "xJvf", baseline_archive], check=True)
+    eprint("baseline extracted successfully")
+
+
+def check_iai_regressions(iai_home: str | None | Path):
+    """Find regressions in iai summary.json files, exit with failure if any are
+    found.
+    """
+    if iai_home is None:
+        iai_home = "iai-home"
+    iai_home = Path(iai_home)
+
+    found_summaries = False
+    regressions = []
+    for summary_path in iglob("**/summary.json", root_dir=iai_home, recursive=True):
+        found_summaries = True
+        with open(iai_home / summary_path, "r") as f:
+            summary = json.load(f)
+
+        summary_regs = []
+        run = summary["callgrind_summary"]["callgrind_run"]
+        name_entry = {"name": f"{summary["function_name"]}.{summary["id"]}"}
+
+        for segment in run["segments"]:
+            summary_regs.extend(segment["regressions"])
+
+        summary_regs.extend(run["total"]["regressions"])
+
+        regressions.extend(name_entry | reg for reg in summary_regs)
+
+    if not found_summaries:
+        eprint(f"did not find any summary.json files within {iai_home}")
+        exit(1)
+
+    if len(regressions) > 0:
+        eprint("Found regressions:", json.dumps(regressions, indent=4))
+        exit(1)
 
 
 def main():
@@ -156,6 +295,12 @@ def main():
             ctx = Context()
             output = ctx.make_workflow_output()
             print(f"matrix={output}")
+        case ["locate-baseline", *flags]:
+            locate_baseline(flags)
+        case ["check-regressions"]:
+            check_iai_regressions(None)
+        case ["check-regressions", iai_home]:
+            check_iai_regressions(iai_home)
         case ["--help" | "-h"]:
             print(USAGE)
             exit()

From 31b41154e9632b4d57d9bdde04036447d2130db7 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Thu, 16 Jan 2025 07:31:07 +0000
Subject: [PATCH 146/279] Reduce the warm up and measurement time for
 `short-benchmarks`

The icount benchmarks are what we will be relying on in CI more than the
existing benchmarks. There isn't much reason to keep these around, but
there isn't much point in dropping them either. So, just reduce the
runtime.
---
 crates/libm-test/benches/random.rs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/crates/libm-test/benches/random.rs b/crates/libm-test/benches/random.rs
index dcc7c1aca..888161265 100644
--- a/crates/libm-test/benches/random.rs
+++ b/crates/libm-test/benches/random.rs
@@ -151,8 +151,8 @@ pub fn musl_random() {
     // about the same time as other tests.
     if cfg!(feature = "short-benchmarks") {
         criterion = criterion
-            .warm_up_time(Duration::from_millis(500))
-            .measurement_time(Duration::from_millis(1000));
+            .warm_up_time(Duration::from_millis(200))
+            .measurement_time(Duration::from_millis(600));
     }
 
     criterion = criterion.configure_from_args();

From 8fc8d413acf8f624476dea713c36575ebdb2a8a3 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Thu, 16 Jan 2025 09:47:00 +0000
Subject: [PATCH 147/279] Add an xfail for recent ynf failures

This failed a couple of times recently in CI, once on i686 and once on
aarch64-apple:

    thread 'main' panicked at crates/libm-test/benches/random.rs:76:65:
    called `Result::unwrap()` on an `Err` value: ynf

    Caused by:
        0:
               input:    (681, 509.90924) (0x000002a9, 0x43fef462)
               expected: -3.2161271e38          0xff71f45b
               actual:   -inf                   0xff800000
        1: mismatched infinities

    thread 'main' panicked at crates/libm-test/benches/random.rs:76:65:
    called `Result::unwrap()` on an `Err` value: ynf

    Caused by:
        0:
               input:    (132, 50.46604) (0x00000084, 0x4249dd3a)
               expected: -3.3364996e38          0xff7b02a5
               actual:   -inf                   0xff800000
        1: mismatched infinities

Add a new override to account for this.
---
 crates/libm-test/src/precision.rs | 22 ++++++++++++++++++----
 1 file changed, 18 insertions(+), 4 deletions(-)

diff --git a/crates/libm-test/src/precision.rs b/crates/libm-test/src/precision.rs
index 3cb5e420f..9d17ab8cc 100644
--- a/crates/libm-test/src/precision.rs
+++ b/crates/libm-test/src/precision.rs
@@ -4,6 +4,7 @@
 use core::f32;
 
 use CheckBasis::{Mpfr, Musl};
+use libm::support::CastFrom;
 use {BaseName as Bn, Identifier as Id};
 
 use crate::{BaseName, CheckBasis, CheckCtx, Float, Identifier, Int, TestResult};
@@ -524,7 +525,7 @@ impl MaybeOverride<(i32, f32)> for SpecialCase {
         ctx: &CheckCtx,
     ) -> Option<TestResult> {
         match (&ctx.basis, ctx.base_name) {
-            (Musl, _) => bessel_prec_dropoff(input, ulp, ctx),
+            (Musl, _) => bessel_prec_dropoff(input, actual, expected, ulp, ctx),
 
             // We return +0.0, MPFR returns -0.0
             (Mpfr, BaseName::Jn | BaseName::Yn)
@@ -554,7 +555,7 @@ impl MaybeOverride<(i32, f64)> for SpecialCase {
         ctx: &CheckCtx,
     ) -> Option<TestResult> {
         match (&ctx.basis, ctx.base_name) {
-            (Musl, _) => bessel_prec_dropoff(input, ulp, ctx),
+            (Musl, _) => bessel_prec_dropoff(input, actual, expected, ulp, ctx),
 
             // We return +0.0, MPFR returns -0.0
             (Mpfr, BaseName::Jn | BaseName::Yn)
@@ -569,8 +570,10 @@ impl MaybeOverride<(i32, f64)> for SpecialCase {
 }
 
 /// Our bessel functions blow up with large N values
-fn bessel_prec_dropoff<F: Float>(
-    input: (i32, F),
+fn bessel_prec_dropoff<F1: Float, F2: Float>(
+    input: (i32, F1),
+    actual: F2,
+    expected: F2,
     ulp: &mut u32,
     ctx: &CheckCtx,
 ) -> Option<TestResult> {
@@ -585,6 +588,17 @@ fn bessel_prec_dropoff<F: Float>(
         }
     }
 
+    // Values near infinity sometimes get cut off for us. `ynf(681, 509.90924) = -inf` but should
+    // be -3.2161271e38.
+    if ctx.fn_ident == Identifier::Ynf
+        && !expected.is_infinite()
+        && actual.is_infinite()
+        && (expected.abs().to_bits().abs_diff(actual.abs().to_bits())
+            < F2::Int::cast_from(1_000_000u32))
+    {
+        return XFAIL;
+    }
+
     None
 }
 

From 04443d5066e353f88a6f81612e5b24f56bc13192 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Thu, 16 Jan 2025 20:30:47 +0000
Subject: [PATCH 148/279] Remove the limit for querying a baseline

`--limit=1` seems to apply before `jq` filtering, meaning our
`WORKFLOW_NAME` ("CI") workflow may not appear in the input to the jq
query. Removing `--limit` provides a default amount of inputs that jq
can then filter from, so this works better.
---
 ci/ci-util.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/ci/ci-util.py b/ci/ci-util.py
index 1ec69b002..7a9f1bd2b 100755
--- a/ci/ci-util.py
+++ b/ci/ci-util.py
@@ -201,22 +201,24 @@ def locate_baseline(flags: list[str]) -> None:
                 "gh",
                 "run",
                 "list",
-                "--limit=1",
                 "--status=success",
                 f"--branch={DEFAULT_BRANCH}",
                 "--json=databaseId,url,headSha,conclusion,createdAt,"
                 "status,workflowDatabaseId,workflowName",
-                f'--jq=select(.[].workflowName == "{WORKFLOW_NAME}")',
+                # Return the first array element matching our workflow name. NB: cannot
+                # just use `--limit=1`, jq filtering happens after limiting. We also
+                # cannot just use `--workflow` because GH gets confused from
+                # different file names in history.
+                f'--jq=[.[] | select(.workflowName == "{WORKFLOW_NAME}")][0]',
             ],
             text=True,
         )
-        eprint(f"latest: '{latest_job}'")
     except sp.CalledProcessError as e:
         eprint(f"failed to run github command: {e}")
         return
 
     try:
-        latest = json.loads(latest_job)[0]
+        latest = json.loads(latest_job)
         eprint("latest job: ", json.dumps(latest, indent=4))
     except json.JSONDecodeError as e:
         eprint(f"failed to decode json '{latest_job}', {e}")

From a7c61786495a8ae966d3c7126f5b7e0b29a488a5 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Thu, 16 Jan 2025 19:50:29 +0000
Subject: [PATCH 149/279] Switch to the arm-linux runner and enable MPFR

The free arm64 Linux runners are now available [1]. Switch to using this
image in CI, and enable tests against MPFR since this is now a native
platform.

[1]: https://github.blog/changelog/2025-01-16-linux-arm64-hosted-runners-now-available-for-free-in-public-repositories-public-preview/
---
 .github/workflows/main.yaml |  2 +-
 ci/run.sh                   | 12 +++++-------
 2 files changed, 6 insertions(+), 8 deletions(-)

diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml
index 9face9311..8c0ff237d 100644
--- a/.github/workflows/main.yaml
+++ b/.github/workflows/main.yaml
@@ -23,7 +23,7 @@ jobs:
         - target: aarch64-apple-darwin
           os: macos-15
         - target: aarch64-unknown-linux-gnu
-          os: ubuntu-24.04
+          os: ubuntu-24.04-arm
         - target: aarch64-pc-windows-msvc
           os: windows-2025
           build_only: 1 # Can't run on x86 hosts
diff --git a/ci/run.sh b/ci/run.sh
index bb749b72a..6b2f07ab2 100755
--- a/ci/run.sh
+++ b/ci/run.sh
@@ -49,14 +49,12 @@ case "$target" in
     *windows-msvc*) ;;
     # FIXME: MinGW should be able to build MPFR, but setup in CI is nontrivial.
     *windows-gnu*) ;;
-    # Targets that aren't cross compiled work fine
-    # FIXME(ci): we should be able to enable aarch64 Linux here once GHA
-    # support rolls out.
-    x86_64*) flags="$flags --features libm-test/build-mpfr" ;;
-    i686*) flags="$flags --features libm-test/build-mpfr" ;;
-    i586*) flags="$flags --features libm-test/build-mpfr --features gmp-mpfr-sys/force-cross" ;;
-    # Apple aarch64 is native
+    # Targets that aren't cross compiled in CI work fine
     aarch64*apple*) flags="$flags --features libm-test/build-mpfr" ;;
+    aarch64*linux*) flags="$flags --features libm-test/build-mpfr" ;;
+    i586*) flags="$flags --features libm-test/build-mpfr --features gmp-mpfr-sys/force-cross" ;;
+    i686*) flags="$flags --features libm-test/build-mpfr" ;;
+    x86_64*) flags="$flags --features libm-test/build-mpfr" ;;
 esac
 
 # FIXME: `STATUS_DLL_NOT_FOUND` testing macros on CI.

From 4d5fc87b3270249d44e9af92637c7e7a124e34fe Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Tue, 21 Jan 2025 07:44:13 +0000
Subject: [PATCH 150/279] Run icount benchmarks once with softfloat and once
 with hardfloat

These benchmarks are fast to run, so the time cost here is pretty
minimal. Running softfloat benchmarks just ensures that we don't e.g.
test the performance of `_mm_sqrt_ss` rather than our implementation,
and running without softfloat gives us a way to see the effect of arch
intrinsics.
---
 .github/workflows/main.yaml | 24 +----------------
 ci/bench-icount.sh          | 53 +++++++++++++++++++++++++++++++++++++
 2 files changed, 54 insertions(+), 23 deletions(-)
 create mode 100755 ci/bench-icount.sh

diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml
index 8c0ff237d..f9d3a5a15 100644
--- a/.github/workflows/main.yaml
+++ b/.github/workflows/main.yaml
@@ -170,29 +170,7 @@ jobs:
     - name: Run icount benchmarks
       env:
         GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-      run: |
-        set -eux
-        iai_home="iai-home"
-        # Download the baseline from master
-        ./ci/ci-util.py locate-baseline --download --extract
-
-        # Run iai-callgrind benchmarks
-        cargo bench --no-default-features \
-          --features unstable,unstable-float,icount \
-          --bench icount \
-          -- \
-          --save-baseline=default \
-          --home "$(pwd)/$iai_home" \
-          --regression='ir=5.0' \
-          --save-summary
-        # NB: iai-callgrind should exit on error but does not, so we inspect the sumary
-        # for errors. See  https://github.com/iai-callgrind/iai-callgrind/issues/337
-        ./ci/ci-util.py check-regressions "$iai_home"
-
-        # Name and tar the new baseline
-        name="baseline-icount-$(date -u +'%Y%m%d%H%M')-${GITHUB_SHA:0:12}"
-        echo "BASELINE_NAME=$name" >> "$GITHUB_ENV"
-        tar cJf "$name.tar.xz" "$iai_home"
+      run: ./ci/bench-icount.sh
 
     - name: Upload the benchmark baseline
       uses: actions/upload-artifact@v4
diff --git a/ci/bench-icount.sh b/ci/bench-icount.sh
new file mode 100755
index 000000000..40b3ac95c
--- /dev/null
+++ b/ci/bench-icount.sh
@@ -0,0 +1,53 @@
+#!/bin/bash
+
+set -eux
+
+iai_home="iai-home"
+
+# Download the baseline from master
+./ci/ci-util.py locate-baseline --download --extract
+
+# Run benchmarks once 
+function run_icount_benchmarks() {
+    cargo_args=(
+        "--bench" "icount"
+        "--no-default-features"
+        "--features" "unstable,unstable-float,icount"
+    )
+
+    iai_args=(
+        "--home" "$(pwd)/$iai_home"
+        "--regression=ir=5.0"
+        "--save-summary"
+    )
+
+    # Parse `cargo_arg0 cargo_arg1 -- iai_arg0 iai_arg1` syntax
+    parsing_iai_args=0
+    while [ "$#" -gt 0 ]; do
+        if [ "$parsing_iai_args" == "1" ]; then
+            iai_args+=("$1")
+        elif [ "$1" == "--" ]; then
+            parsing_iai_args=1
+        else
+            cargo_args+=("$1")
+        fi
+
+        shift
+    done
+
+    # Run iai-callgrind benchmarks
+    cargo bench "${cargo_args[@]}" -- "${iai_args[@]}"
+
+    # NB: iai-callgrind should exit on error but does not, so we inspect the sumary
+    # for errors. See  https://github.com/iai-callgrind/iai-callgrind/issues/337
+    ./ci/ci-util.py check-regressions --home "$iai_home" || true
+}
+
+# Run once with softfloats, once with arch instructions enabled
+run_icount_benchmarks --features force-soft-floats -- --save-baseline=softfloat
+run_icount_benchmarks -- --save-baseline=hardfloat
+
+# Name and tar the new baseline
+name="baseline-icount-$(date -u +'%Y%m%d%H%M')-${GITHUB_SHA:0:12}"
+echo "BASELINE_NAME=$name" >>"$GITHUB_ENV"
+tar cJf "$name.tar.xz" "$iai_home"

From a7bc18508b167a37dba761f6fb2898d4f2907310 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Tue, 21 Jan 2025 07:45:44 +0000
Subject: [PATCH 151/279] Run wall time benchmarks with `--features
 force-soft-floats`

Similar to changes for `icount` benchmarks, this ensures we aren't
testing the throughput of architecture instructions.
---
 .github/workflows/main.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml
index f9d3a5a15..f019c73f8 100644
--- a/.github/workflows/main.yaml
+++ b/.github/workflows/main.yaml
@@ -183,7 +183,7 @@ jobs:
         # Always use the same seed for benchmarks. Ideally we should switch to a
         # non-random generator.
         export LIBM_SEED=benchesbenchesbenchesbencheswoo!
-        cargo bench --all --features libm-test/short-benchmarks,libm-test/build-musl
+        cargo bench --all --features short-benchmarks,build-musl,force-soft-floats
 
     - name: Print test logs if available
       if: always()

From 6d6042808425c82d0a198b90213649a039c05735 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Tue, 21 Jan 2025 07:47:41 +0000
Subject: [PATCH 152/279] Add a way to ignore benchmark regression checks

Introduce a way to ignore the results of icount regression tests, by
specifying `allow-regressions` in the pull request body. This should
apply to both pull requests and the merges based on them, since `gh pr
view` automatically handles both.
---
 .github/workflows/main.yaml |  1 +
 ci/bench-icount.sh          |  7 ++++-
 ci/ci-util.py               | 63 ++++++++++++++++++++++++++++++-------
 3 files changed, 59 insertions(+), 12 deletions(-)

diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml
index f019c73f8..7693de655 100644
--- a/.github/workflows/main.yaml
+++ b/.github/workflows/main.yaml
@@ -170,6 +170,7 @@ jobs:
     - name: Run icount benchmarks
       env:
         GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        PR_NUMBER: ${{ github.event.pull_request.number }}
       run: ./ci/bench-icount.sh
 
     - name: Upload the benchmark baseline
diff --git a/ci/bench-icount.sh b/ci/bench-icount.sh
index 40b3ac95c..3a2155f50 100755
--- a/ci/bench-icount.sh
+++ b/ci/bench-icount.sh
@@ -40,7 +40,12 @@ function run_icount_benchmarks() {
 
     # NB: iai-callgrind should exit on error but does not, so we inspect the sumary
     # for errors. See  https://github.com/iai-callgrind/iai-callgrind/issues/337
-    ./ci/ci-util.py check-regressions --home "$iai_home" || true
+    if [ -n "${PR_NUMBER:-}" ]; then
+        # If this is for a pull request, ignore regressions if specified.
+        ./ci/ci-util.py check-regressions --home "$iai_home" --allow-pr-override "$PR_NUMBER"
+    else
+        ./ci/ci-util.py check-regressions --home "$iai_home" || true
+    fi
 }
 
 # Run once with softfloats, once with arch instructions enabled
diff --git a/ci/ci-util.py b/ci/ci-util.py
index 7a9f1bd2b..7464fd425 100755
--- a/ci/ci-util.py
+++ b/ci/ci-util.py
@@ -33,11 +33,14 @@
 
             Note that `--extract` will overwrite files in `iai-home`.
 
-        check-regressions [iai-home]
+        check-regressions [--home iai-home] [--allow-pr-override pr_number]
             Check `iai-home` (or `iai-home` if unspecified) for `summary.json`
             files and see if there are any regressions. This is used as a workaround
             for `iai-callgrind` not exiting with error status; see
             <https://github.com/iai-callgrind/iai-callgrind/issues/337>.
+
+            If `--allow-pr-override` is specified, the regression check will not exit
+            with failure if any line in the PR starts with `allow-regressions`.
     """
 )
 
@@ -46,6 +49,8 @@
 DEFAULT_BRANCH = "master"
 WORKFLOW_NAME = "CI"  # Workflow that generates the benchmark artifacts
 ARTIFACT_GLOB = "baseline-icount*"
+# Place this in a PR body to skip regression checks (must be at the start of a line).
+REGRESSION_DIRECTIVE = "ci: allow-regressions"
 
 # Don't run exhaustive tests if these files change, even if they contaiin a function
 # definition.
@@ -256,12 +261,26 @@ def locate_baseline(flags: list[str]) -> None:
     eprint("baseline extracted successfully")
 
 
-def check_iai_regressions(iai_home: str | None | Path):
+def check_iai_regressions(args: list[str]):
     """Find regressions in iai summary.json files, exit with failure if any are
     found.
     """
-    if iai_home is None:
-        iai_home = "iai-home"
+
+    iai_home = "iai-home"
+    pr_number = False
+
+    while len(args) > 0:
+        match args:
+            case ["--home", home, *rest]:
+                iai_home = home
+                args = rest
+            case ["--allow-pr-override", pr_num, *rest]:
+                pr_number = pr_num
+                args = rest
+            case _:
+                eprint(USAGE)
+                exit(1)
+
     iai_home = Path(iai_home)
 
     found_summaries = False
@@ -286,9 +305,33 @@ def check_iai_regressions(iai_home: str | None | Path):
         eprint(f"did not find any summary.json files within {iai_home}")
         exit(1)
 
-    if len(regressions) > 0:
-        eprint("Found regressions:", json.dumps(regressions, indent=4))
-        exit(1)
+    if len(regressions) == 0:
+        eprint("No regressions found")
+        return
+
+    eprint("Found regressions:", json.dumps(regressions, indent=4))
+
+    if pr_number is not None:
+        pr_info = sp.check_output(
+            [
+                "gh",
+                "pr",
+                "view",
+                str(pr_number),
+                "--json=number,commits,body,createdAt",
+                "--jq=.commits |= map(.oid)",
+            ],
+            text=True,
+        )
+        pr = json.loads(pr_info)
+        eprint("PR info:", json.dumps(pr, indent=4))
+
+        lines = pr["body"].splitlines()
+        if any(line.startswith(REGRESSION_DIRECTIVE) for line in lines):
+            eprint("PR allows regressions, returning")
+            return
+
+    exit(1)
 
 
 def main():
@@ -299,10 +342,8 @@ def main():
             print(f"matrix={output}")
         case ["locate-baseline", *flags]:
             locate_baseline(flags)
-        case ["check-regressions"]:
-            check_iai_regressions(None)
-        case ["check-regressions", iai_home]:
-            check_iai_regressions(iai_home)
+        case ["check-regressions", *args]:
+            check_iai_regressions(args)
         case ["--help" | "-h"]:
             print(USAGE)
             exit()

From 32dcbd930e36cc6e322ecbb33fc95b6918173f5e Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Tue, 21 Jan 2025 07:52:26 +0000
Subject: [PATCH 153/279] Ignore files relevant to benchmarking

---
 .gitignore | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/.gitignore b/.gitignore
index 4e9c9c03d..a447c34cd 100644
--- a/.gitignore
+++ b/.gitignore
@@ -6,3 +6,7 @@ target
 Cargo.lock
 musl/
 **.tar.gz
+
+# Benchmark cache
+iai-home
+baseline-*

From 339511f2146c2c03f62f128cfaa350e049f53f21 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Wed, 22 Jan 2025 00:58:33 +0000
Subject: [PATCH 154/279] Remove trailing whitespace in scripts, run
 JuliaFormatter

---
 ci/bench-icount.sh                     |  2 +-
 ci/run.sh                              |  4 +-
 crates/libm-test/examples/plot_file.jl | 52 ++++++++++++++++----------
 3 files changed, 36 insertions(+), 22 deletions(-)

diff --git a/ci/bench-icount.sh b/ci/bench-icount.sh
index 3a2155f50..4d93e257a 100755
--- a/ci/bench-icount.sh
+++ b/ci/bench-icount.sh
@@ -7,7 +7,7 @@ iai_home="iai-home"
 # Download the baseline from master
 ./ci/ci-util.py locate-baseline --download --extract
 
-# Run benchmarks once 
+# Run benchmarks once
 function run_icount_benchmarks() {
     cargo_args=(
         "--bench" "icount"
diff --git a/ci/run.sh b/ci/run.sh
index 6b2f07ab2..296986d97 100755
--- a/ci/run.sh
+++ b/ci/run.sh
@@ -90,7 +90,7 @@ if [ "$nextest" = "1" ]; then
         echo "dir = \"$CARGO_TARGET_DIR/nextest\"" >> "$cfg_file"
         cfg_flag="--config-file $cfg_file"
     fi
-    
+
     cmd="cargo nextest run ${cfg_flag:-} --max-fail=10 $flags"
     profile="--cargo-profile"
 fi
@@ -111,7 +111,7 @@ $cmd --features unstable-intrinsics --benches
 
 # Test the same in release mode, which also increases coverage. Also ensure
 # the soft float routines are checked.
-$cmd "$profile" release-checked 
+$cmd "$profile" release-checked
 $cmd "$profile" release-checked --features force-soft-floats
 $cmd "$profile" release-checked --features unstable-intrinsics
 $cmd "$profile" release-checked --features unstable-intrinsics --benches
diff --git a/crates/libm-test/examples/plot_file.jl b/crates/libm-test/examples/plot_file.jl
index 14a128303..acffd9756 100644
--- a/crates/libm-test/examples/plot_file.jl
+++ b/crates/libm-test/examples/plot_file.jl
@@ -13,7 +13,7 @@ using CairoMakie
 using TOML
 
 function main()::Nothing
-    CairoMakie.activate!(px_per_unit=10)
+    CairoMakie.activate!(px_per_unit = 10)
     config_path = ARGS[1]
 
     cfg = Dict()
@@ -75,15 +75,25 @@ function plot_one(
     gen_x = map((v) -> parse(Float32, v), inputs)
 
     do_plot(
-        fig, gen_x, func, xlims[1], xlims[2],
+        fig,
+        gen_x,
+        func,
+        xlims[1],
+        xlims[2],
         "$fn_name $gen_name (linear scale)",
-        lin_out_file, false,
+        lin_out_file,
+        false,
     )
 
     do_plot(
-        fig, gen_x, func, xlims_log[1], xlims_log[2],
+        fig,
+        gen_x,
+        func,
+        xlims_log[1],
+        xlims_log[2],
         "$fn_name $gen_name (log scale)",
-        log_out_file, true,
+        log_out_file,
+        true,
     )
 end
 
@@ -97,7 +107,7 @@ function do_plot(
     title::String,
     out_file::String,
     logscale::Bool,
-)::Nothing where F<:AbstractFloat
+)::Nothing where {F<:AbstractFloat}
     println("plotting $title")
 
     # `gen_x` is the values the generator produces. `actual_x` is for plotting a
@@ -116,32 +126,36 @@ function do_plot(
         actual_x = LinRange(input_min, input_max, steps)
         xscale = identity
     end
-    
+
     gen_y = @. func(gen_x)
     actual_y = @. func(actual_x)
 
-    ax = Axis(fig[1, 1], xscale=xscale, title=title)
+    ax = Axis(fig[1, 1], xscale = xscale, title = title)
 
     lines!(
-        ax, actual_x, actual_y, color=(:lightblue, 0.6),
-        linewidth=6.0, label="true function",
+        ax,
+        actual_x,
+        actual_y,
+        color = (:lightblue, 0.6),
+        linewidth = 6.0,
+        label = "true function",
     )
     scatter!(
-        ax, gen_x, gen_y, color=(:darkblue, 0.9),
-        markersize=markersize, label="checked inputs",
+        ax,
+        gen_x,
+        gen_y,
+        color = (:darkblue, 0.9),
+        markersize = markersize,
+        label = "checked inputs",
     )
-    axislegend(ax, position=:rb, framevisible=false)
+    axislegend(ax, position = :rb, framevisible = false)
 
     save(out_file, fig)
     delete!(ax)
 end
 
 "Apply a function, returning the default if there is a domain error"
-function map_or(
-    input::AbstractFloat,
-    f::Function,
-    default::Any
-)::Union{AbstractFloat,Any}
+function map_or(input::AbstractFloat, f::Function, default::Any)::Union{AbstractFloat,Any}
     try
         return f(input)
     catch
@@ -151,7 +165,7 @@ end
 
 # Operations for logarithms that are symmetric about 0
 C = 10
-symlog10(x::Number) = sign(x) * (log10(1 + abs(x)/(10^C)))
+symlog10(x::Number) = sign(x) * (log10(1 + abs(x) / (10^C)))
 sympow10(x::Number) = (10^C) * (10^x - 1)
 
 main()

From e4cfb0d6ca268ad4b2ab449561feca3cd07ae80c Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Wed, 22 Jan 2025 02:35:29 +0000
Subject: [PATCH 155/279] Add a retry to the musl download

This download has occasionally been failing in CI recently. Add a retry
so this is less likely to cause the workflow to fail.
---
 ci/download-musl.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ci/download-musl.sh b/ci/download-musl.sh
index 039e96157..8a8c58550 100755
--- a/ci/download-musl.sh
+++ b/ci/download-musl.sh
@@ -7,7 +7,7 @@ fname=musl-1.2.5.tar.gz
 sha=a9a118bbe84d8764da0ea0d28b3ab3fae8477fc7e4085d90102b8596fc7c75e4
 
 mkdir musl
-curl -L "https://musl.libc.org/releases/$fname" -O
+curl -L "https://musl.libc.org/releases/$fname" -O --retry 5
 
 case "$(uname -s)" in
     MINGW*)

From de51755e299f63bb877c666fcbc563dfcbf14a13 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Wed, 15 Jan 2025 11:34:17 +0000
Subject: [PATCH 156/279] Don't set `opt_level` in the musl build script

`cc` automatically reads this from Cargo's `OPT_LEVEL` variable so we
don't need to set it explicitly. Remove this so running in a debugger
makes more sense.
---
 crates/musl-math-sys/build.rs | 1 -
 1 file changed, 1 deletion(-)

diff --git a/crates/musl-math-sys/build.rs b/crates/musl-math-sys/build.rs
index 03deb4ff0..d75748159 100644
--- a/crates/musl-math-sys/build.rs
+++ b/crates/musl-math-sys/build.rs
@@ -151,7 +151,6 @@ fn build_musl_math(cfg: &Config) {
         .flag_if_supported("-ffreestanding")
         .flag_if_supported("-nostdinc")
         .define("_ALL_SOURCE", "1")
-        .opt_level(3)
         .define(
             "ROOT_INCLUDE_FEATURES",
             Some(musl_dir.join("include/features.h").to_str().unwrap()),

From 855e15ac4d1033e3dc0789de09514f7a0b000e12 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Wed, 22 Jan 2025 05:29:36 +0000
Subject: [PATCH 157/279] Enable `force-soft-floats` for extensive tests

Any architecture-specific float operations are likely to consist of only
a few instructions, but the softfloat implementations are much more
complex. Ensure this is what gets tested.
---
 .github/workflows/main.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml
index 7693de655..89c5facef 100644
--- a/.github/workflows/main.yaml
+++ b/.github/workflows/main.yaml
@@ -270,7 +270,7 @@ jobs:
           fi
 
           LIBM_EXTENSIVE_TESTS="$CHANGED" cargo t \
-            --features build-mpfr,unstable \
+            --features build-mpfr,unstable,force-soft-floats \
             --profile release-checked \
             -- extensive
       - name: Print test logs if available

From 3cc2c03b0d34a92f29dfd1d3b18b54cbab102071 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Sun, 12 Jan 2025 11:45:40 +0000
Subject: [PATCH 158/279] Port the most recent version of Musl's `sqrt` as a
 generic algorithm

Musl commit 97e9b73d59 ("math: new software sqrt") adds a new algorithm
using Goldschmidt division. Port this algorithm to Rust and make it
generic, which shows a notable performance improvement over the existing
algorithm.

This also allows adding square root routines for `f16` and `f128`.
---
 etc/function-definitions.json  |   2 +
 src/math/generic/mod.rs        |   2 +
 src/math/generic/sqrt.rs       | 419 +++++++++++++++++++++++++++++++++
 src/math/sqrt.rs               | 252 +-------------------
 src/math/sqrtf.rs              | 133 +----------
 src/math/support/int_traits.rs |   2 +
 src/math/support/macros.rs     |  22 +-
 src/math/support/mod.rs        |  11 +
 8 files changed, 450 insertions(+), 393 deletions(-)
 create mode 100644 src/math/generic/sqrt.rs

diff --git a/etc/function-definitions.json b/etc/function-definitions.json
index dbaac931c..9f7c8ab25 100644
--- a/etc/function-definitions.json
+++ b/etc/function-definitions.json
@@ -704,6 +704,7 @@
             "src/libm_helper.rs",
             "src/math/arch/i686.rs",
             "src/math/arch/wasm32.rs",
+            "src/math/generic/sqrt.rs",
             "src/math/sqrt.rs"
         ],
         "type": "f64"
@@ -712,6 +713,7 @@
         "sources": [
             "src/math/arch/i686.rs",
             "src/math/arch/wasm32.rs",
+            "src/math/generic/sqrt.rs",
             "src/math/sqrtf.rs"
         ],
         "type": "f32"
diff --git a/src/math/generic/mod.rs b/src/math/generic/mod.rs
index 2b068d6c5..3b5a2c3ef 100644
--- a/src/math/generic/mod.rs
+++ b/src/math/generic/mod.rs
@@ -1,9 +1,11 @@
 mod copysign;
 mod fabs;
 mod fdim;
+mod sqrt;
 mod trunc;
 
 pub use copysign::copysign;
 pub use fabs::fabs;
 pub use fdim::fdim;
+pub use sqrt::sqrt;
 pub use trunc::trunc;
diff --git a/src/math/generic/sqrt.rs b/src/math/generic/sqrt.rs
new file mode 100644
index 000000000..a2e054f3c
--- /dev/null
+++ b/src/math/generic/sqrt.rs
@@ -0,0 +1,419 @@
+/* SPDX-License-Identifier: MIT */
+/* origin: musl src/math/sqrt.c. Ported to generic Rust algorithm in 2025, TG. */
+
+//! Generic square root algorithm.
+//!
+//! This routine operates around `m_u2`, a U.2 (fixed point with two integral bits) mantissa
+//! within the range [1, 4). A table lookup provides an initial estimate, then goldschmidt
+//! iterations at various widths are used to approach the real values.
+//!
+//! For the iterations, `r` is a U0 number that approaches `1/sqrt(m_u2)`, and `s` is a U2 number
+//! that approaches `sqrt(m_u2)`. Recall that m_u2 ∈ [1, 4).
+//!
+//! With Newton-Raphson iterations, this would be:
+//!
+//! - `w = r * r           w ~ 1 / m`
+//! - `u = 3 - m * w       u ~ 3 - m * w = 3 - m / m = 2`
+//! - `r = r * u / 2       r ~ r`
+//!
+//! (Note that the righthand column does not show anything analytically meaningful (i.e. r ~ r),
+//! since the value of performing one iteration is in reducing the error representable by `~`).
+//!
+//! Instead of Newton-Raphson iterations, Goldschmidt iterations are used to calculate
+//! `s = m * r`:
+//!
+//! - `s = m * r           s ~ m / sqrt(m)`
+//! - `u = 3 - s * r       u ~ 3 - (m / sqrt(m)) * (1 / sqrt(m)) = 3 - m / m = 2`
+//! - `r = r * u / 2       r ~ r`
+//! - `s = s * u / 2       s ~ s`
+//!
+//! The above is precise because it uses the original value `m`. There is also a faster version
+//! that performs fewer steps but does not use `m`:
+//!
+//! - `u = 3 - s * r       u ~ 3 - 1`
+//! - `r = r * u / 2       r ~ r`
+//! - `s = s * u / 2       s ~ s`
+//!
+//! Rounding errors accumulate faster with the second version, so it is only used for subsequent
+//! iterations within the same width integer. The first version is always used for the first
+//! iteration at a new width in order to avoid this accumulation.
+//!
+//! Goldschmidt has the advantage over Newton-Raphson that `sqrt(x)` and `1/sqrt(x)` are
+//! computed at the same time, i.e. there is no need to calculate `1/sqrt(x)` and invert it.
+
+use super::super::support::{IntTy, cold_path, raise_invalid};
+use super::super::{CastFrom, CastInto, DInt, Float, HInt, Int, MinInt};
+
+pub fn sqrt<F>(x: F) -> F
+where
+    F: Float + SqrtHelper,
+    F::Int: HInt,
+    F::Int: From<u8>,
+    F::Int: From<F::ISet2>,
+    F::Int: CastInto<F::ISet1>,
+    F::Int: CastInto<F::ISet2>,
+    u32: CastInto<F::Int>,
+{
+    let zero = IntTy::<F>::ZERO;
+    let one = IntTy::<F>::ONE;
+
+    let mut ix = x.to_bits();
+
+    // Top is the exponent and sign, which may or may not be shifted. If the float fits into a
+    // `u32`, we can get by without paying shifting costs.
+    let noshift = F::BITS <= u32::BITS;
+    let (mut top, special_case) = if noshift {
+        let exp_lsb = one << F::SIG_BITS;
+        let special_case = ix.wrapping_sub(exp_lsb) >= F::EXP_MASK - exp_lsb;
+        (Exp::NoShift(()), special_case)
+    } else {
+        let top = u32::cast_from(ix >> F::SIG_BITS);
+        let special_case = top.wrapping_sub(1) >= F::EXP_MAX - 1;
+        (Exp::Shifted(top), special_case)
+    };
+
+    // Handle NaN, zero, and out of domain (<= 0)
+    if special_case {
+        cold_path();
+
+        // +/-0
+        if ix << 1 == zero {
+            return x;
+        }
+
+        // Positive infinity
+        if ix == F::EXP_MASK {
+            return x;
+        }
+
+        // NaN or negative
+        if ix > F::EXP_MASK {
+            return raise_invalid(x);
+        }
+
+        // Normalize subnormals by multiplying by 1.0 << SIG_BITS (e.g. 0x1p52 for doubles).
+        let scaled = x * F::from_parts(false, (F::SIG_BITS + F::EXP_BIAS) as i32, zero);
+        ix = scaled.to_bits();
+        match top {
+            Exp::Shifted(ref mut v) => {
+                *v = scaled.exp().unsigned();
+                *v = (*v).wrapping_sub(F::SIG_BITS);
+            }
+            Exp::NoShift(()) => {
+                ix = ix.wrapping_sub((F::SIG_BITS << F::SIG_BITS).cast());
+            }
+        }
+    }
+
+    // Reduce arguments such that `x = 4^e * m`:
+    //
+    // - m_u2 ∈ [1, 4), a fixed point U2.BITS number
+    // - 2^e is the exponent part of the result
+    let (m_u2, exp) = match top {
+        Exp::Shifted(top) => {
+            // We now know `x` is positive, so `top` is just its (biased) exponent
+            let mut e = top;
+            // Construct a fixed point representation of the mantissa.
+            let mut m_u2 = (ix | F::IMPLICIT_BIT) << F::EXP_BITS;
+            let even = (e & 1) != 0;
+            if even {
+                m_u2 >>= 1;
+            }
+            e = (e.wrapping_add(F::EXP_MAX >> 1)) >> 1;
+            (m_u2, Exp::Shifted(e))
+        }
+        Exp::NoShift(()) => {
+            let even = ix & (one << F::SIG_BITS) != zero;
+
+            // Exponent part of the return value
+            let mut e_noshift = ix >> 1;
+            // ey &= (F::EXP_MASK << 2) >> 2; // clear the top exponent bit (result = 1.0)
+            e_noshift += (F::EXP_MASK ^ (F::SIGN_MASK >> 1)) >> 1;
+            e_noshift &= F::EXP_MASK;
+
+            let m1 = (ix << F::EXP_BITS) | F::SIGN_MASK;
+            let m0 = (ix << (F::EXP_BITS - 1)) & !F::SIGN_MASK;
+            let m_u2 = if even { m0 } else { m1 };
+
+            (m_u2, Exp::NoShift(e_noshift))
+        }
+    };
+
+    // Extract the top 6 bits of the significand with the lowest bit of the exponent.
+    let i = usize::cast_from(ix >> (F::SIG_BITS - 6)) & 0b1111111;
+
+    // Start with an initial guess for `r = 1 / sqrt(m)` from the table, and shift `m` as an
+    // initial value for `s = sqrt(m)`. See the module documentation for details.
+    let r1_u0: F::ISet1 = F::ISet1::cast_from(RSQRT_TAB[i]) << (F::ISet1::BITS - 16);
+    let s1_u2: F::ISet1 = ((m_u2) >> (F::BITS - F::ISet1::BITS)).cast();
+
+    // Perform iterations, if any, at quarter width (used for `f128`).
+    let (r1_u0, _s1_u2) = goldschmidt::<F, F::ISet1>(r1_u0, s1_u2, F::SET1_ROUNDS, false);
+
+    // Widen values and perform iterations at half width (used for `f64` and `f128`).
+    let r2_u0: F::ISet2 = F::ISet2::from(r1_u0) << (F::ISet2::BITS - F::ISet1::BITS);
+    let s2_u2: F::ISet2 = ((m_u2) >> (F::BITS - F::ISet2::BITS)).cast();
+    let (r2_u0, _s2_u2) = goldschmidt::<F, F::ISet2>(r2_u0, s2_u2, F::SET2_ROUNDS, false);
+
+    // Perform final iterations at full width (used for all float types).
+    let r_u0: F::Int = F::Int::from(r2_u0) << (F::BITS - F::ISet2::BITS);
+    let s_u2: F::Int = m_u2;
+    let (_r_u0, s_u2) = goldschmidt::<F, F::Int>(r_u0, s_u2, F::FINAL_ROUNDS, true);
+
+    // Shift back to mantissa position.
+    let mut m = s_u2 >> (F::EXP_BITS - 2);
+
+    // The musl source includes the following comment (with literals replaced):
+    //
+    // > s < sqrt(m) < s + 0x1.09p-SIG_BITS
+    // > compute nearest rounded result: the nearest result to SIG_BITS bits is either s or
+    // > s+0x1p-SIG_BITS, we can decide by comparing (2^SIG_BITS s + 0.5)^2 to 2^(2*SIG_BITS) m.
+    //
+    // Expanding this with , with `SIG_BITS = p` and adjusting based on the operations done to
+    // `d0` and `d1`:
+    //
+    // - `2^(2p)m ≟ ((2^p)m + 0.5)^2`
+    // - `2^(2p)m ≟ 2^(2p)m^2 + (2^p)m + 0.25`
+    // - `2^(2p)m - m^2 ≟ (2^(2p) - 1)m^2 + (2^p)m + 0.25`
+    // - `(1 - 2^(2p))m + m^2 ≟ (1 - 2^(2p))m^2 + (1 - 2^p)m + 0.25` (?)
+    //
+    // I do not follow how the rounding bit is extracted from this comparison with the below
+    // operations. In any case, the algorithm is well tested.
+
+    // The value needed to shift `m_u2` by to create `m*2^(2p)`. `2p = 2 * F::SIG_BITS`,
+    // `F::BITS - 2` accounts for the offset that `m_u2` already has.
+    let shift = 2 * F::SIG_BITS - (F::BITS - 2);
+
+    // `2^(2p)m - m^2`
+    let d0 = (m_u2 << shift).wrapping_sub(m.wrapping_mul(m));
+    // `m - 2^(2p)m + m^2`
+    let d1 = m.wrapping_sub(d0);
+    m += d1 >> (F::BITS - 1);
+    m &= F::SIG_MASK;
+
+    match exp {
+        Exp::Shifted(e) => m |= IntTy::<F>::cast_from(e) << F::SIG_BITS,
+        Exp::NoShift(e) => m |= e,
+    };
+
+    let mut y = F::from_bits(m);
+
+    // FIXME(f16): the fenv math does not work for `f16`
+    if F::BITS > 16 {
+        // Handle rounding and inexact. `(m + 1)^2 == 2^shift m` is exact; for all other cases, add
+        // a tiny value to cause fenv effects.
+        let d2 = d1.wrapping_add(m).wrapping_add(one);
+        let mut tiny = if d2 == zero {
+            cold_path();
+            zero
+        } else {
+            F::IMPLICIT_BIT
+        };
+
+        tiny |= (d1 ^ d2) & F::SIGN_MASK;
+        let t = F::from_bits(tiny);
+        y = y + t;
+    }
+
+    y
+}
+
+/// Multiply at the wider integer size, returning the high half.
+fn wmulh<I: HInt>(a: I, b: I) -> I {
+    a.widen_mul(b).hi()
+}
+
+/// Perform `count` goldschmidt iterations, returning `(r_u0, s_u?)`.
+///
+/// - `r_u0` is the reciprocal `r ~ 1 / sqrt(m)`, as U0.
+/// - `s_u2` is the square root, `s ~ sqrt(m)`, as U2.
+/// - `count` is the number of iterations to perform.
+/// - `final_set` should be true if this is the last round (same-sized integer). If so, the
+///   returned `s` will be U3, for later shifting. Otherwise, the returned `s` is U2.
+///
+/// Note that performance relies on the optimizer being able to unroll these loops (reasonably
+/// trivial, `count` is a constant when called).
+#[inline]
+fn goldschmidt<F, I>(mut r_u0: I, mut s_u2: I, count: u32, final_set: bool) -> (I, I)
+where
+    F: SqrtHelper,
+    I: HInt + From<u8>,
+{
+    let three_u2 = I::from(0b11u8) << (I::BITS - 2);
+    let mut u_u0 = r_u0;
+
+    for i in 0..count {
+        // First iteration: `s = m*r` (`u_u0 = r_u0` set above)
+        // Subsequent iterations: `s=s*u/2`
+        s_u2 = wmulh(s_u2, u_u0);
+
+        // Perform `s /= 2` if:
+        //
+        // 1. This is not the first iteration (the first iteration is `s = m*r`)...
+        // 2. ... and this is not the last set of iterations
+        // 3. ... or, if this is the last set, it is not the last iteration
+        //
+        // This step is not performed for the final iteration because the shift is combined with
+        // a later shift (moving `s` into the mantissa).
+        if i > 0 && (!final_set || i + 1 < count) {
+            s_u2 <<= 1;
+        }
+
+        // u = 3 - s*r
+        let d_u2 = wmulh(s_u2, r_u0);
+        u_u0 = three_u2.wrapping_sub(d_u2);
+
+        // r = r*u/2
+        r_u0 = wmulh(r_u0, u_u0) << 1;
+    }
+
+    (r_u0, s_u2)
+}
+
+/// Representation of whether we shift the exponent into a `u32`, or modify it in place to save
+/// the shift operations.
+enum Exp<T> {
+    /// The exponent has been shifted to a `u32` and is LSB-aligned.
+    Shifted(u32),
+    /// The exponent is in its natural position in integer repr.
+    NoShift(T),
+}
+
+/// Size-specific constants related to the square root routine.
+pub trait SqrtHelper: Float {
+    /// Integer for the first set of rounds. If unused, set to the same type as the next set.
+    type ISet1: HInt + Into<Self::ISet2> + CastFrom<Self::Int> + From<u8>;
+    /// Integer for the second set of rounds. If unused, set to the same type as the next set.
+    type ISet2: HInt + From<Self::ISet1> + From<u8>;
+
+    /// Number of rounds at `ISet1`.
+    const SET1_ROUNDS: u32 = 0;
+    /// Number of rounds at `ISet2`.
+    const SET2_ROUNDS: u32 = 0;
+    /// Number of rounds at `Self::Int`.
+    const FINAL_ROUNDS: u32;
+}
+
+impl SqrtHelper for f32 {
+    type ISet1 = u32; // unused
+    type ISet2 = u32; // unused
+
+    const FINAL_ROUNDS: u32 = 3;
+}
+
+impl SqrtHelper for f64 {
+    type ISet1 = u32; // unused
+    type ISet2 = u32;
+
+    const SET2_ROUNDS: u32 = 2;
+    const FINAL_ROUNDS: u32 = 2;
+}
+
+/// A U0.16 representation of `1/sqrt(x)`.
+///
+// / The index is a 7-bit number consisting of a single exponent bit and 6 bits of significand.
+#[rustfmt::skip]
+static RSQRT_TAB: [u16; 128] = [
+    0xb451, 0xb2f0, 0xb196, 0xb044, 0xaef9, 0xadb6, 0xac79, 0xab43,
+    0xaa14, 0xa8eb, 0xa7c8, 0xa6aa, 0xa592, 0xa480, 0xa373, 0xa26b,
+    0xa168, 0xa06a, 0x9f70, 0x9e7b, 0x9d8a, 0x9c9d, 0x9bb5, 0x9ad1,
+    0x99f0, 0x9913, 0x983a, 0x9765, 0x9693, 0x95c4, 0x94f8, 0x9430,
+    0x936b, 0x92a9, 0x91ea, 0x912e, 0x9075, 0x8fbe, 0x8f0a, 0x8e59,
+    0x8daa, 0x8cfe, 0x8c54, 0x8bac, 0x8b07, 0x8a64, 0x89c4, 0x8925,
+    0x8889, 0x87ee, 0x8756, 0x86c0, 0x862b, 0x8599, 0x8508, 0x8479,
+    0x83ec, 0x8361, 0x82d8, 0x8250, 0x81c9, 0x8145, 0x80c2, 0x8040,
+    0xff02, 0xfd0e, 0xfb25, 0xf947, 0xf773, 0xf5aa, 0xf3ea, 0xf234,
+    0xf087, 0xeee3, 0xed47, 0xebb3, 0xea27, 0xe8a3, 0xe727, 0xe5b2,
+    0xe443, 0xe2dc, 0xe17a, 0xe020, 0xdecb, 0xdd7d, 0xdc34, 0xdaf1,
+    0xd9b3, 0xd87b, 0xd748, 0xd61a, 0xd4f1, 0xd3cd, 0xd2ad, 0xd192,
+    0xd07b, 0xcf69, 0xce5b, 0xcd51, 0xcc4a, 0xcb48, 0xca4a, 0xc94f,
+    0xc858, 0xc764, 0xc674, 0xc587, 0xc49d, 0xc3b7, 0xc2d4, 0xc1f4,
+    0xc116, 0xc03c, 0xbf65, 0xbe90, 0xbdbe, 0xbcef, 0xbc23, 0xbb59,
+    0xba91, 0xb9cc, 0xb90a, 0xb84a, 0xb78c, 0xb6d0, 0xb617, 0xb560,
+];
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    /// Test against edge cases from https://en.cppreference.com/w/cpp/numeric/math/sqrt
+    fn spec_test<F>()
+    where
+        F: Float + SqrtHelper,
+        F::Int: HInt,
+        F::Int: From<u8>,
+        F::Int: From<F::ISet2>,
+        F::Int: CastInto<F::ISet1>,
+        F::Int: CastInto<F::ISet2>,
+        u32: CastInto<F::Int>,
+    {
+        // Not Asserted: FE_INVALID exception is raised if argument is negative.
+        assert!(sqrt(F::NEG_ONE).is_nan());
+        assert!(sqrt(F::NAN).is_nan());
+        for f in [F::ZERO, F::NEG_ZERO, F::INFINITY].iter().copied() {
+            assert_biteq!(sqrt(f), f);
+        }
+    }
+
+    #[test]
+    fn sanity_check_f32() {
+        assert_biteq!(sqrt(100.0f32), 10.0);
+        assert_biteq!(sqrt(4.0f32), 2.0);
+    }
+
+    #[test]
+    fn spec_tests_f32() {
+        spec_test::<f32>();
+    }
+
+    #[test]
+    #[allow(clippy::approx_constant)]
+    fn conformance_tests_f32() {
+        let cases = [
+            (f32::PI, 0x3fe2dfc5_u32),
+            (10000.0f32, 0x42c80000_u32),
+            (f32::from_bits(0x0000000f), 0x1b2f456f_u32),
+            (f32::INFINITY, f32::INFINITY.to_bits()),
+        ];
+
+        for (input, output) in cases {
+            assert_biteq!(
+                sqrt(input),
+                f32::from_bits(output),
+                "input: {input:?} ({:#018x})",
+                input.to_bits()
+            );
+        }
+    }
+
+    #[test]
+    fn sanity_check_f64() {
+        assert_biteq!(sqrt(100.0f64), 10.0);
+        assert_biteq!(sqrt(4.0f64), 2.0);
+    }
+
+    #[test]
+    fn spec_tests_f64() {
+        spec_test::<f64>();
+    }
+
+    #[test]
+    #[allow(clippy::approx_constant)]
+    fn conformance_tests_f64() {
+        let cases = [
+            (f64::PI, 0x3ffc5bf891b4ef6a_u64),
+            (10000.0, 0x4059000000000000_u64),
+            (f64::from_bits(0x0000000f), 0x1e7efbdeb14f4eda_u64),
+            (f64::INFINITY, f64::INFINITY.to_bits()),
+        ];
+
+        for (input, output) in cases {
+            assert_biteq!(
+                sqrt(input),
+                f64::from_bits(output),
+                "input: {input:?} ({:#018x})",
+                input.to_bits()
+            );
+        }
+    }
+}
diff --git a/src/math/sqrt.rs b/src/math/sqrt.rs
index 2fd7070b1..0e1d0cd2c 100644
--- a/src/math/sqrt.rs
+++ b/src/math/sqrt.rs
@@ -1,83 +1,3 @@
-/* origin: FreeBSD /usr/src/lib/msun/src/e_sqrt.c */
-/*
- * ====================================================
- * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
- *
- * Developed at SunSoft, a Sun Microsystems, Inc. business.
- * Permission to use, copy, modify, and distribute this
- * software is freely granted, provided that this notice
- * is preserved.
- * ====================================================
- */
-/* sqrt(x)
- * Return correctly rounded sqrt.
- *           ------------------------------------------
- *           |  Use the hardware sqrt if you have one |
- *           ------------------------------------------
- * Method:
- *   Bit by bit method using integer arithmetic. (Slow, but portable)
- *   1. Normalization
- *      Scale x to y in [1,4) with even powers of 2:
- *      find an integer k such that  1 <= (y=x*2^(2k)) < 4, then
- *              sqrt(x) = 2^k * sqrt(y)
- *   2. Bit by bit computation
- *      Let q  = sqrt(y) truncated to i bit after binary point (q = 1),
- *           i                                                   0
- *                                     i+1         2
- *          s  = 2*q , and      y  =  2   * ( y - q  ).         (1)
- *           i      i            i                 i
- *
- *      To compute q    from q , one checks whether
- *                  i+1       i
- *
- *                            -(i+1) 2
- *                      (q + 2      ) <= y.                     (2)
- *                        i
- *                                                            -(i+1)
- *      If (2) is false, then q   = q ; otherwise q   = q  + 2      .
- *                             i+1   i             i+1   i
- *
- *      With some algebraic manipulation, it is not difficult to see
- *      that (2) is equivalent to
- *                             -(i+1)
- *                      s  +  2       <= y                      (3)
- *                       i                i
- *
- *      The advantage of (3) is that s  and y  can be computed by
- *                                    i      i
- *      the following recurrence formula:
- *          if (3) is false
- *
- *          s     =  s  ,       y    = y   ;                    (4)
- *           i+1      i          i+1    i
- *
- *          otherwise,
- *                         -i                     -(i+1)
- *          s     =  s  + 2  ,  y    = y  -  s  - 2             (5)
- *           i+1      i          i+1    i     i
- *
- *      One may easily use induction to prove (4) and (5).
- *      Note. Since the left hand side of (3) contain only i+2 bits,
- *            it does not necessary to do a full (53-bit) comparison
- *            in (3).
- *   3. Final rounding
- *      After generating the 53 bits result, we compute one more bit.
- *      Together with the remainder, we can decide whether the
- *      result is exact, bigger than 1/2ulp, or less than 1/2ulp
- *      (it will never equal to 1/2ulp).
- *      The rounding mode can be detected by checking whether
- *      huge + tiny is equal to huge, and whether huge - tiny is
- *      equal to huge for some floating point number "huge" and "tiny".
- *
- * Special cases:
- *      sqrt(+-0) = +-0         ... exact
- *      sqrt(inf) = inf
- *      sqrt(-ve) = NaN         ... with invalid signal
- *      sqrt(NaN) = NaN         ... with invalid signal for signaling NaN
- */
-
-use core::f64;
-
 /// The square root of `x` (f64).
 #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
 pub fn sqrt(x: f64) -> f64 {
@@ -90,175 +10,5 @@ pub fn sqrt(x: f64) -> f64 {
         args: x,
     }
 
-    use core::num::Wrapping;
-
-    const TINY: f64 = 1.0e-300;
-
-    let mut z: f64;
-    let sign: Wrapping<u32> = Wrapping(0x80000000);
-    let mut ix0: i32;
-    let mut s0: i32;
-    let mut q: i32;
-    let mut m: i32;
-    let mut t: i32;
-    let mut i: i32;
-    let mut r: Wrapping<u32>;
-    let mut t1: Wrapping<u32>;
-    let mut s1: Wrapping<u32>;
-    let mut ix1: Wrapping<u32>;
-    let mut q1: Wrapping<u32>;
-
-    ix0 = (x.to_bits() >> 32) as i32;
-    ix1 = Wrapping(x.to_bits() as u32);
-
-    /* take care of Inf and NaN */
-    if (ix0 & 0x7ff00000) == 0x7ff00000 {
-        return x * x + x; /* sqrt(NaN)=NaN, sqrt(+inf)=+inf, sqrt(-inf)=sNaN */
-    }
-    /* take care of zero */
-    if ix0 <= 0 {
-        if ((ix0 & !(sign.0 as i32)) | ix1.0 as i32) == 0 {
-            return x; /* sqrt(+-0) = +-0 */
-        }
-        if ix0 < 0 {
-            return (x - x) / (x - x); /* sqrt(-ve) = sNaN */
-        }
-    }
-    /* normalize x */
-    m = ix0 >> 20;
-    if m == 0 {
-        /* subnormal x */
-        while ix0 == 0 {
-            m -= 21;
-            ix0 |= (ix1 >> 11).0 as i32;
-            ix1 <<= 21;
-        }
-        i = 0;
-        while (ix0 & 0x00100000) == 0 {
-            i += 1;
-            ix0 <<= 1;
-        }
-        m -= i - 1;
-        ix0 |= (ix1 >> (32 - i) as usize).0 as i32;
-        ix1 = ix1 << i as usize;
-    }
-    m -= 1023; /* unbias exponent */
-    ix0 = (ix0 & 0x000fffff) | 0x00100000;
-    if (m & 1) == 1 {
-        /* odd m, double x to make it even */
-        ix0 *= 2;
-        ix0 += ((ix1 & sign) >> 31).0 as i32;
-        ix1 += ix1;
-    }
-    m >>= 1; /* m = [m/2] */
-
-    /* generate sqrt(x) bit by bit */
-    ix0 *= 2;
-    ix0 += ((ix1 & sign) >> 31).0 as i32;
-    ix1 += ix1;
-    q = 0; /* [q,q1] = sqrt(x) */
-    q1 = Wrapping(0);
-    s0 = 0;
-    s1 = Wrapping(0);
-    r = Wrapping(0x00200000); /* r = moving bit from right to left */
-
-    while r != Wrapping(0) {
-        t = s0 + r.0 as i32;
-        if t <= ix0 {
-            s0 = t + r.0 as i32;
-            ix0 -= t;
-            q += r.0 as i32;
-        }
-        ix0 *= 2;
-        ix0 += ((ix1 & sign) >> 31).0 as i32;
-        ix1 += ix1;
-        r >>= 1;
-    }
-
-    r = sign;
-    while r != Wrapping(0) {
-        t1 = s1 + r;
-        t = s0;
-        if t < ix0 || (t == ix0 && t1 <= ix1) {
-            s1 = t1 + r;
-            if (t1 & sign) == sign && (s1 & sign) == Wrapping(0) {
-                s0 += 1;
-            }
-            ix0 -= t;
-            if ix1 < t1 {
-                ix0 -= 1;
-            }
-            ix1 -= t1;
-            q1 += r;
-        }
-        ix0 *= 2;
-        ix0 += ((ix1 & sign) >> 31).0 as i32;
-        ix1 += ix1;
-        r >>= 1;
-    }
-
-    /* use floating add to find out rounding direction */
-    if (ix0 as u32 | ix1.0) != 0 {
-        z = 1.0 - TINY; /* raise inexact flag */
-        if z >= 1.0 {
-            z = 1.0 + TINY;
-            if q1.0 == 0xffffffff {
-                q1 = Wrapping(0);
-                q += 1;
-            } else if z > 1.0 {
-                if q1.0 == 0xfffffffe {
-                    q += 1;
-                }
-                q1 += Wrapping(2);
-            } else {
-                q1 += q1 & Wrapping(1);
-            }
-        }
-    }
-    ix0 = (q >> 1) + 0x3fe00000;
-    ix1 = q1 >> 1;
-    if (q & 1) == 1 {
-        ix1 |= sign;
-    }
-    ix0 += m << 20;
-    f64::from_bits(((ix0 as u64) << 32) | ix1.0 as u64)
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn sanity_check() {
-        assert_eq!(sqrt(100.0), 10.0);
-        assert_eq!(sqrt(4.0), 2.0);
-    }
-
-    /// The spec: https://en.cppreference.com/w/cpp/numeric/math/sqrt
-    #[test]
-    fn spec_tests() {
-        // Not Asserted: FE_INVALID exception is raised if argument is negative.
-        assert!(sqrt(-1.0).is_nan());
-        assert!(sqrt(f64::NAN).is_nan());
-        for f in [0.0, -0.0, f64::INFINITY].iter().copied() {
-            assert_eq!(sqrt(f), f);
-        }
-    }
-
-    #[test]
-    #[allow(clippy::approx_constant)]
-    fn conformance_tests() {
-        let values = [3.14159265359, 10000.0, f64::from_bits(0x0000000f), f64::INFINITY];
-        let results = [
-            4610661241675116657u64,
-            4636737291354636288u64,
-            2197470602079456986u64,
-            9218868437227405312u64,
-        ];
-
-        for i in 0..values.len() {
-            let bits = f64::to_bits(sqrt(values[i]));
-            assert_eq!(results[i], bits);
-        }
-    }
+    super::generic::sqrt(x)
 }
diff --git a/src/math/sqrtf.rs b/src/math/sqrtf.rs
index 319335163..2e69a4b66 100644
--- a/src/math/sqrtf.rs
+++ b/src/math/sqrtf.rs
@@ -1,18 +1,3 @@
-/* origin: FreeBSD /usr/src/lib/msun/src/e_sqrtf.c */
-/*
- * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com.
- */
-/*
- * ====================================================
- * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
- *
- * Developed at SunPro, a Sun Microsystems, Inc. business.
- * Permission to use, copy, modify, and distribute this
- * software is freely granted, provided that this notice
- * is preserved.
- * ====================================================
- */
-
 /// The square root of `x` (f32).
 #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
 pub fn sqrtf(x: f32) -> f32 {
@@ -25,121 +10,5 @@ pub fn sqrtf(x: f32) -> f32 {
         args: x,
     }
 
-    const TINY: f32 = 1.0e-30;
-
-    let mut z: f32;
-    let sign: i32 = 0x80000000u32 as i32;
-    let mut ix: i32;
-    let mut s: i32;
-    let mut q: i32;
-    let mut m: i32;
-    let mut t: i32;
-    let mut i: i32;
-    let mut r: u32;
-
-    ix = x.to_bits() as i32;
-
-    /* take care of Inf and NaN */
-    if (ix as u32 & 0x7f800000) == 0x7f800000 {
-        return x * x + x; /* sqrt(NaN)=NaN, sqrt(+inf)=+inf, sqrt(-inf)=sNaN */
-    }
-
-    /* take care of zero */
-    if ix <= 0 {
-        if (ix & !sign) == 0 {
-            return x; /* sqrt(+-0) = +-0 */
-        }
-        if ix < 0 {
-            return (x - x) / (x - x); /* sqrt(-ve) = sNaN */
-        }
-    }
-
-    /* normalize x */
-    m = ix >> 23;
-    if m == 0 {
-        /* subnormal x */
-        i = 0;
-        while ix & 0x00800000 == 0 {
-            ix <<= 1;
-            i = i + 1;
-        }
-        m -= i - 1;
-    }
-    m -= 127; /* unbias exponent */
-    ix = (ix & 0x007fffff) | 0x00800000;
-    if m & 1 == 1 {
-        /* odd m, double x to make it even */
-        ix += ix;
-    }
-    m >>= 1; /* m = [m/2] */
-
-    /* generate sqrt(x) bit by bit */
-    ix += ix;
-    q = 0;
-    s = 0;
-    r = 0x01000000; /* r = moving bit from right to left */
-
-    while r != 0 {
-        t = s + r as i32;
-        if t <= ix {
-            s = t + r as i32;
-            ix -= t;
-            q += r as i32;
-        }
-        ix += ix;
-        r >>= 1;
-    }
-
-    /* use floating add to find out rounding direction */
-    if ix != 0 {
-        z = 1.0 - TINY; /* raise inexact flag */
-        if z >= 1.0 {
-            z = 1.0 + TINY;
-            if z > 1.0 {
-                q += 2;
-            } else {
-                q += q & 1;
-            }
-        }
-    }
-
-    ix = (q >> 1) + 0x3f000000;
-    ix += m << 23;
-    f32::from_bits(ix as u32)
-}
-
-// PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520
-#[cfg(not(target_arch = "powerpc64"))]
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn sanity_check() {
-        assert_eq!(sqrtf(100.0), 10.0);
-        assert_eq!(sqrtf(4.0), 2.0);
-    }
-
-    /// The spec: https://en.cppreference.com/w/cpp/numeric/math/sqrt
-    #[test]
-    fn spec_tests() {
-        // Not Asserted: FE_INVALID exception is raised if argument is negative.
-        assert!(sqrtf(-1.0).is_nan());
-        assert!(sqrtf(f32::NAN).is_nan());
-        for f in [0.0, -0.0, f32::INFINITY].iter().copied() {
-            assert_eq!(sqrtf(f), f);
-        }
-    }
-
-    #[test]
-    #[allow(clippy::approx_constant)]
-    fn conformance_tests() {
-        let values = [3.14159265359f32, 10000.0f32, f32::from_bits(0x0000000f), f32::INFINITY];
-        let results = [1071833029u32, 1120403456u32, 456082799u32, 2139095040u32];
-
-        for i in 0..values.len() {
-            let bits = f32::to_bits(sqrtf(values[i]));
-            assert_eq!(results[i], bits);
-        }
-    }
+    super::generic::sqrt(x)
 }
diff --git a/src/math/support/int_traits.rs b/src/math/support/int_traits.rs
index db799c030..cf19762e8 100644
--- a/src/math/support/int_traits.rs
+++ b/src/math/support/int_traits.rs
@@ -55,10 +55,12 @@ pub trait Int:
     + ops::BitAnd<Output = Self>
     + cmp::Ord
     + CastFrom<i32>
+    + CastFrom<u16>
     + CastFrom<u32>
     + CastFrom<u8>
     + CastFrom<usize>
     + CastInto<i32>
+    + CastInto<u16>
     + CastInto<u32>
     + CastInto<u8>
     + CastInto<usize>
diff --git a/src/math/support/macros.rs b/src/math/support/macros.rs
index 076fdf1f7..c9a36c0db 100644
--- a/src/math/support/macros.rs
+++ b/src/math/support/macros.rs
@@ -110,19 +110,21 @@ macro_rules! hf64 {
 /// Assert `F::biteq` with better messages.
 #[cfg(test)]
 macro_rules! assert_biteq {
-    ($left:expr, $right:expr, $($arg:tt)*) => {{
-        let bits = ($left.to_bits() * 0).leading_zeros(); // hack to get the width from the value
+    ($left:expr, $right:expr, $($tt:tt)*) => {{
+        let l = $left;
+        let r = $right;
+        let bits = (l.to_bits() - l.to_bits()).leading_zeros(); // hack to get the width from the value
         assert!(
-            $left.biteq($right),
-            "\nl: {l:?} ({lb:#0width$x})\nr: {r:?} ({rb:#0width$x})",
-            l = $left,
-            lb = $left.to_bits(),
-            r = $right,
-            rb = $right.to_bits(),
-            width = ((bits / 4) + 2) as usize
+            l.biteq(r),
+            "{}\nl: {l:?} ({lb:#0width$x})\nr: {r:?} ({rb:#0width$x})",
+            format_args!($($tt)*),
+            lb = l.to_bits(),
+            rb = r.to_bits(),
+            width = ((bits / 4) + 2) as usize,
+
         );
     }};
     ($left:expr, $right:expr $(,)?) => {
-        assert_biteq!($left, $right,)
+        assert_biteq!($left, $right, "")
     };
 }
diff --git a/src/math/support/mod.rs b/src/math/support/mod.rs
index e2f4e0e98..b4a57a34e 100644
--- a/src/math/support/mod.rs
+++ b/src/math/support/mod.rs
@@ -10,3 +10,14 @@ pub(crate) use float_traits::{f32_from_bits, f64_from_bits};
 #[allow(unused_imports)]
 pub use hex_float::{hf32, hf64};
 pub use int_traits::{CastFrom, CastInto, DInt, HInt, Int, MinInt};
+
+/// Hint to the compiler that the current path is cold.
+pub fn cold_path() {
+    #[cfg(intrinsics_enabled)]
+    core::intrinsics::cold_path();
+}
+
+/// Return `x`, first raising `FE_INVALID`.
+pub fn raise_invalid<F: Float>(x: F) -> F {
+    (x - x) / (x - x)
+}

From 2a29b7ac29564a642550fc65761919bb334f9b5d Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Wed, 15 Jan 2025 11:49:28 +0000
Subject: [PATCH 159/279] Copy the u256 implementation from compiler_builtins

---
 src/math/support/big.rs       | 302 ++++++++++++++++++++++++++++++++++
 src/math/support/big/tests.rs | 110 +++++++++++++
 src/math/support/mod.rs       |   1 +
 3 files changed, 413 insertions(+)
 create mode 100644 src/math/support/big.rs
 create mode 100644 src/math/support/big/tests.rs

diff --git a/src/math/support/big.rs b/src/math/support/big.rs
new file mode 100644
index 000000000..e0f5e5263
--- /dev/null
+++ b/src/math/support/big.rs
@@ -0,0 +1,302 @@
+//! Integers used for wide operations, larger than `u128`.
+
+#![allow(unused)]
+
+#[cfg(test)]
+mod tests;
+
+use core::{fmt, ops};
+
+use super::{DInt, HInt, Int, MinInt};
+
+const WORD_LO_MASK: u64 = 0x00000000ffffffff;
+const WORD_HI_MASK: u64 = 0xffffffff00000000;
+const WORD_FULL_MASK: u64 = 0xffffffffffffffff;
+const U128_LO_MASK: u128 = u64::MAX as u128;
+const U128_HI_MASK: u128 = (u64::MAX as u128) << 64;
+
+/// A 256-bit unsigned integer represented as 4 64-bit limbs.
+///
+/// Each limb is a native-endian number, but the array is little-limb-endian.
+#[allow(non_camel_case_types)]
+#[derive(Clone, Copy, Debug, PartialEq, PartialOrd)]
+pub struct u256(pub [u64; 4]);
+
+impl u256 {
+    pub const MAX: Self = Self([u64::MAX, u64::MAX, u64::MAX, u64::MAX]);
+
+    /// Reinterpret as a signed integer
+    pub fn signed(self) -> i256 {
+        i256(self.0)
+    }
+}
+
+/// A 256-bit signed integer represented as 4 64-bit limbs.
+///
+/// Each limb is a native-endian number, but the array is little-limb-endian.
+#[allow(non_camel_case_types)]
+#[derive(Clone, Copy, Debug, PartialEq, PartialOrd)]
+pub struct i256(pub [u64; 4]);
+
+impl i256 {
+    /// Reinterpret as an unsigned integer
+    pub fn unsigned(self) -> u256 {
+        u256(self.0)
+    }
+}
+
+impl MinInt for u256 {
+    type OtherSign = i256;
+
+    type Unsigned = u256;
+
+    const SIGNED: bool = false;
+    const BITS: u32 = 256;
+    const ZERO: Self = Self([0u64; 4]);
+    const ONE: Self = Self([1, 0, 0, 0]);
+    const MIN: Self = Self([0u64; 4]);
+    const MAX: Self = Self([u64::MAX; 4]);
+}
+
+impl MinInt for i256 {
+    type OtherSign = u256;
+
+    type Unsigned = u256;
+
+    const SIGNED: bool = false;
+    const BITS: u32 = 256;
+    const ZERO: Self = Self([0u64; 4]);
+    const ONE: Self = Self([1, 0, 0, 0]);
+    const MIN: Self = Self([0, 0, 0, 1 << 63]);
+    const MAX: Self = Self([u64::MAX, u64::MAX, u64::MAX, u64::MAX << 1]);
+}
+
+macro_rules! impl_common {
+    ($ty:ty) => {
+        impl ops::BitOr for $ty {
+            type Output = Self;
+
+            fn bitor(mut self, rhs: Self) -> Self::Output {
+                self.0[0] |= rhs.0[0];
+                self.0[1] |= rhs.0[1];
+                self.0[2] |= rhs.0[2];
+                self.0[3] |= rhs.0[3];
+                self
+            }
+        }
+
+        impl ops::Not for $ty {
+            type Output = Self;
+
+            fn not(self) -> Self::Output {
+                Self([!self.0[0], !self.0[1], !self.0[2], !self.0[3]])
+            }
+        }
+
+        impl ops::Shl<u32> for $ty {
+            type Output = Self;
+
+            fn shl(self, rhs: u32) -> Self::Output {
+                unimplemented!("only used to meet trait bounds")
+            }
+        }
+    };
+}
+
+impl_common!(i256);
+impl_common!(u256);
+
+impl ops::Shr<u32> for u256 {
+    type Output = Self;
+
+    fn shr(self, rhs: u32) -> Self::Output {
+        assert!(rhs < Self::BITS, "attempted to shift right with overflow");
+
+        if rhs == 0 {
+            return self;
+        }
+
+        let mut ret = self;
+        let byte_shift = rhs / 64;
+        let bit_shift = rhs % 64;
+
+        for idx in 0..4 {
+            let base_idx = idx + byte_shift as usize;
+
+            // FIXME(msrv): could be let...else.
+            let base = match ret.0.get(base_idx) {
+                Some(v) => v,
+                None => {
+                    ret.0[idx] = 0;
+                    continue;
+                }
+            };
+
+            let mut new_val = base >> bit_shift;
+
+            if let Some(new) = ret.0.get(base_idx + 1) {
+                new_val |= new.overflowing_shl(64 - bit_shift).0;
+            }
+
+            ret.0[idx] = new_val;
+        }
+
+        ret
+    }
+}
+
+macro_rules! word {
+    (1, $val:expr) => {
+        (($val >> (32 * 3)) & Self::from(WORD_LO_MASK)) as u64
+    };
+    (2, $val:expr) => {
+        (($val >> (32 * 2)) & Self::from(WORD_LO_MASK)) as u64
+    };
+    (3, $val:expr) => {
+        (($val >> (32 * 1)) & Self::from(WORD_LO_MASK)) as u64
+    };
+    (4, $val:expr) => {
+        (($val >> (32 * 0)) & Self::from(WORD_LO_MASK)) as u64
+    };
+}
+
+impl HInt for u128 {
+    type D = u256;
+
+    fn widen(self) -> Self::D {
+        let w0 = self & u128::from(u64::MAX);
+        let w1 = (self >> u64::BITS) & u128::from(u64::MAX);
+        u256([w0 as u64, w1 as u64, 0, 0])
+    }
+
+    fn zero_widen(self) -> Self::D {
+        self.widen()
+    }
+
+    fn zero_widen_mul(self, rhs: Self) -> Self::D {
+        let product11: u64 = word!(1, self) * word!(1, rhs);
+        let product12: u64 = word!(1, self) * word!(2, rhs);
+        let product13: u64 = word!(1, self) * word!(3, rhs);
+        let product14: u64 = word!(1, self) * word!(4, rhs);
+        let product21: u64 = word!(2, self) * word!(1, rhs);
+        let product22: u64 = word!(2, self) * word!(2, rhs);
+        let product23: u64 = word!(2, self) * word!(3, rhs);
+        let product24: u64 = word!(2, self) * word!(4, rhs);
+        let product31: u64 = word!(3, self) * word!(1, rhs);
+        let product32: u64 = word!(3, self) * word!(2, rhs);
+        let product33: u64 = word!(3, self) * word!(3, rhs);
+        let product34: u64 = word!(3, self) * word!(4, rhs);
+        let product41: u64 = word!(4, self) * word!(1, rhs);
+        let product42: u64 = word!(4, self) * word!(2, rhs);
+        let product43: u64 = word!(4, self) * word!(3, rhs);
+        let product44: u64 = word!(4, self) * word!(4, rhs);
+
+        let sum0: u128 = u128::from(product44);
+        let sum1: u128 = u128::from(product34) + u128::from(product43);
+        let sum2: u128 = u128::from(product24) + u128::from(product33) + u128::from(product42);
+        let sum3: u128 = u128::from(product14)
+            + u128::from(product23)
+            + u128::from(product32)
+            + u128::from(product41);
+        let sum4: u128 = u128::from(product13) + u128::from(product22) + u128::from(product31);
+        let sum5: u128 = u128::from(product12) + u128::from(product21);
+        let sum6: u128 = u128::from(product11);
+
+        let r0: u128 =
+            (sum0 & u128::from(WORD_FULL_MASK)) + ((sum1 & u128::from(WORD_LO_MASK)) << 32);
+        let r1: u128 = (sum0 >> 64)
+            + ((sum1 >> 32) & u128::from(WORD_FULL_MASK))
+            + (sum2 & u128::from(WORD_FULL_MASK))
+            + ((sum3 << 32) & u128::from(WORD_HI_MASK));
+
+        let (lo, carry) = r0.overflowing_add(r1 << 64);
+        let hi = (r1 >> 64)
+            + (sum1 >> 96)
+            + (sum2 >> 64)
+            + (sum3 >> 32)
+            + sum4
+            + (sum5 << 32)
+            + (sum6 << 64)
+            + u128::from(carry);
+
+        u256([
+            (lo & U128_LO_MASK) as u64,
+            ((lo >> 64) & U128_LO_MASK) as u64,
+            (hi & U128_LO_MASK) as u64,
+            ((hi >> 64) & U128_LO_MASK) as u64,
+        ])
+    }
+
+    fn widen_mul(self, rhs: Self) -> Self::D {
+        self.zero_widen_mul(rhs)
+    }
+
+    fn widen_hi(self) -> Self::D {
+        self.widen() << <Self as MinInt>::BITS
+    }
+}
+
+impl HInt for i128 {
+    type D = i256;
+
+    fn widen(self) -> Self::D {
+        let mut ret = self.unsigned().zero_widen().signed();
+        if self.is_negative() {
+            ret.0[2] = u64::MAX;
+            ret.0[3] = u64::MAX;
+        }
+        ret
+    }
+
+    fn zero_widen(self) -> Self::D {
+        self.unsigned().zero_widen().signed()
+    }
+
+    fn zero_widen_mul(self, rhs: Self) -> Self::D {
+        self.unsigned().zero_widen_mul(rhs.unsigned()).signed()
+    }
+
+    fn widen_mul(self, rhs: Self) -> Self::D {
+        unimplemented!("signed i128 widening multiply is not used")
+    }
+
+    fn widen_hi(self) -> Self::D {
+        self.widen() << <Self as MinInt>::BITS
+    }
+}
+
+impl DInt for u256 {
+    type H = u128;
+
+    fn lo(self) -> Self::H {
+        let mut tmp = [0u8; 16];
+        tmp[..8].copy_from_slice(&self.0[0].to_le_bytes());
+        tmp[8..].copy_from_slice(&self.0[1].to_le_bytes());
+        u128::from_le_bytes(tmp)
+    }
+
+    fn hi(self) -> Self::H {
+        let mut tmp = [0u8; 16];
+        tmp[..8].copy_from_slice(&self.0[2].to_le_bytes());
+        tmp[8..].copy_from_slice(&self.0[3].to_le_bytes());
+        u128::from_le_bytes(tmp)
+    }
+}
+
+impl DInt for i256 {
+    type H = i128;
+
+    fn lo(self) -> Self::H {
+        let mut tmp = [0u8; 16];
+        tmp[..8].copy_from_slice(&self.0[0].to_le_bytes());
+        tmp[8..].copy_from_slice(&self.0[1].to_le_bytes());
+        i128::from_le_bytes(tmp)
+    }
+
+    fn hi(self) -> Self::H {
+        let mut tmp = [0u8; 16];
+        tmp[..8].copy_from_slice(&self.0[2].to_le_bytes());
+        tmp[8..].copy_from_slice(&self.0[3].to_le_bytes());
+        i128::from_le_bytes(tmp)
+    }
+}
diff --git a/src/math/support/big/tests.rs b/src/math/support/big/tests.rs
new file mode 100644
index 000000000..f95f82973
--- /dev/null
+++ b/src/math/support/big/tests.rs
@@ -0,0 +1,110 @@
+extern crate std;
+use std::string::String;
+use std::vec::Vec;
+use std::{eprintln, format};
+
+use super::{HInt, MinInt, i256, u256};
+
+const LOHI_SPLIT: u128 = 0xaaaaaaaaaaaaaaaaffffffffffffffff;
+
+/// Print a `u256` as hex since we can't add format implementations
+fn hexu(v: u256) -> String {
+    format!("0x{:016x}{:016x}{:016x}{:016x}", v.0[3], v.0[2], v.0[1], v.0[0])
+}
+
+#[test]
+fn widen_u128() {
+    assert_eq!(u128::MAX.widen(), u256([u64::MAX, u64::MAX, 0, 0]));
+    assert_eq!(LOHI_SPLIT.widen(), u256([u64::MAX, 0xaaaaaaaaaaaaaaaa, 0, 0]));
+}
+
+#[test]
+fn widen_i128() {
+    assert_eq!((-1i128).widen(), u256::MAX.signed());
+    assert_eq!(
+        (LOHI_SPLIT as i128).widen(),
+        i256([u64::MAX, 0xaaaaaaaaaaaaaaaa, u64::MAX, u64::MAX])
+    );
+    assert_eq!((-1i128).zero_widen().unsigned(), (u128::MAX).widen());
+}
+
+#[test]
+fn widen_mul_u128() {
+    let tests = [
+        (u128::MAX / 2, 2_u128, u256([u64::MAX - 1, u64::MAX, 0, 0])),
+        (u128::MAX, 2_u128, u256([u64::MAX - 1, u64::MAX, 1, 0])),
+        (u128::MAX, u128::MAX, u256([1, 0, u64::MAX - 1, u64::MAX])),
+        (u128::MIN, u128::MIN, u256::ZERO),
+        (1234, 0, u256::ZERO),
+        (0, 1234, u256::ZERO),
+    ];
+
+    let mut errors = Vec::new();
+    for (i, (a, b, exp)) in tests.iter().copied().enumerate() {
+        let res = a.widen_mul(b);
+        let res_z = a.zero_widen_mul(b);
+        assert_eq!(res, res_z);
+        if res != exp {
+            errors.push((i, a, b, exp, res));
+        }
+    }
+
+    for (i, a, b, exp, res) in &errors {
+        eprintln!("FAILURE ({i}): {a:#034x} * {b:#034x} = {} got {}", hexu(*exp), hexu(*res));
+    }
+    assert!(errors.is_empty());
+}
+
+#[test]
+fn not_u128() {
+    assert_eq!(!u256::ZERO, u256::MAX);
+}
+
+#[test]
+fn shr_u128() {
+    let only_low = [1, u16::MAX.into(), u32::MAX.into(), u64::MAX.into(), u128::MAX];
+
+    let mut errors = Vec::new();
+
+    for a in only_low {
+        for perturb in 0..10 {
+            let a = a.saturating_add(perturb);
+            for shift in 0..128 {
+                let res = a.widen() >> shift;
+                let expected = (a >> shift).widen();
+                if res != expected {
+                    errors.push((a.widen(), shift, res, expected));
+                }
+            }
+        }
+    }
+
+    let check = [
+        (u256::MAX, 1, u256([u64::MAX, u64::MAX, u64::MAX, u64::MAX >> 1])),
+        (u256::MAX, 5, u256([u64::MAX, u64::MAX, u64::MAX, u64::MAX >> 5])),
+        (u256::MAX, 63, u256([u64::MAX, u64::MAX, u64::MAX, 1])),
+        (u256::MAX, 64, u256([u64::MAX, u64::MAX, u64::MAX, 0])),
+        (u256::MAX, 65, u256([u64::MAX, u64::MAX, u64::MAX >> 1, 0])),
+        (u256::MAX, 127, u256([u64::MAX, u64::MAX, 1, 0])),
+        (u256::MAX, 128, u256([u64::MAX, u64::MAX, 0, 0])),
+        (u256::MAX, 129, u256([u64::MAX, u64::MAX >> 1, 0, 0])),
+        (u256::MAX, 191, u256([u64::MAX, 1, 0, 0])),
+        (u256::MAX, 192, u256([u64::MAX, 0, 0, 0])),
+        (u256::MAX, 193, u256([u64::MAX >> 1, 0, 0, 0])),
+        (u256::MAX, 191, u256([u64::MAX, 1, 0, 0])),
+        (u256::MAX, 254, u256([0b11, 0, 0, 0])),
+        (u256::MAX, 255, u256([1, 0, 0, 0])),
+    ];
+
+    for (input, shift, expected) in check {
+        let res = input >> shift;
+        if res != expected {
+            errors.push((input, shift, res, expected));
+        }
+    }
+
+    for (a, b, res, expected) in &errors {
+        eprintln!("FAILURE: {} >> {b} = {} got {}", hexu(*a), hexu(*expected), hexu(*res),);
+    }
+    assert!(errors.is_empty());
+}
diff --git a/src/math/support/mod.rs b/src/math/support/mod.rs
index b4a57a34e..ddfc2e3e0 100644
--- a/src/math/support/mod.rs
+++ b/src/math/support/mod.rs
@@ -1,5 +1,6 @@
 #[macro_use]
 pub mod macros;
+mod big;
 mod float_traits;
 mod hex_float;
 mod int_traits;

From 839c72a0417afdf42c0fcbec4aadbdd06eadb015 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Tue, 21 Jan 2025 23:59:07 +0000
Subject: [PATCH 160/279] Add `sqrtf16` and `sqrtf128`

Use the generic algorithms to provide implementations for these
routines.
---
 crates/libm-macros/src/shared.rs             |  4 +-
 crates/libm-test/benches/icount.rs           |  2 +
 crates/libm-test/benches/random.rs           |  2 +
 crates/libm-test/tests/compare_built_musl.rs |  2 +
 crates/util/src/main.rs                      |  2 +
 etc/function-definitions.json                | 14 +++
 etc/function-list.txt                        |  2 +
 src/math/generic/sqrt.rs                     | 92 ++++++++++++++++++++
 src/math/mod.rs                              |  4 +
 src/math/sqrtf128.rs                         |  5 ++
 src/math/sqrtf16.rs                          |  5 ++
 11 files changed, 132 insertions(+), 2 deletions(-)
 create mode 100644 src/math/sqrtf128.rs
 create mode 100644 src/math/sqrtf16.rs

diff --git a/crates/libm-macros/src/shared.rs b/crates/libm-macros/src/shared.rs
index 608381962..d17bc6ffc 100644
--- a/crates/libm-macros/src/shared.rs
+++ b/crates/libm-macros/src/shared.rs
@@ -9,7 +9,7 @@ const ALL_OPERATIONS_NESTED: &[(FloatTy, Signature, Option<Signature>, &[&str])]
         FloatTy::F16,
         Signature { args: &[Ty::F16], returns: &[Ty::F16] },
         None,
-        &["fabsf16", "truncf16"],
+        &["fabsf16", "sqrtf16", "truncf16"],
     ),
     (
         // `fn(f32) -> f32`
@@ -40,7 +40,7 @@ const ALL_OPERATIONS_NESTED: &[(FloatTy, Signature, Option<Signature>, &[&str])]
         FloatTy::F128,
         Signature { args: &[Ty::F128], returns: &[Ty::F128] },
         None,
-        &["fabsf128", "truncf128"],
+        &["fabsf128", "sqrtf128", "truncf128"],
     ),
     (
         // `(f16, f16) -> f16`
diff --git a/crates/libm-test/benches/icount.rs b/crates/libm-test/benches/icount.rs
index 3a66249e8..c8451f88c 100644
--- a/crates/libm-test/benches/icount.rs
+++ b/crates/libm-test/benches/icount.rs
@@ -155,6 +155,8 @@ main!(
     icount_bench_sinh_group,
     icount_bench_sinhf_group,
     icount_bench_sqrt_group,
+    icount_bench_sqrtf128_group,
+    icount_bench_sqrtf16_group,
     icount_bench_sqrtf_group,
     icount_bench_tan_group,
     icount_bench_tanf_group,
diff --git a/crates/libm-test/benches/random.rs b/crates/libm-test/benches/random.rs
index 888161265..026841202 100644
--- a/crates/libm-test/benches/random.rs
+++ b/crates/libm-test/benches/random.rs
@@ -123,6 +123,8 @@ libm_macros::for_each_function! {
         | fabsf16
         | fdimf128
         | fdimf16
+        | sqrtf16
+        | sqrtf128
         | truncf128
         | truncf16 => (false, None),
 
diff --git a/crates/libm-test/tests/compare_built_musl.rs b/crates/libm-test/tests/compare_built_musl.rs
index f540a0b15..24703f273 100644
--- a/crates/libm-test/tests/compare_built_musl.rs
+++ b/crates/libm-test/tests/compare_built_musl.rs
@@ -87,5 +87,7 @@ libm_macros::for_each_function! {
         fdimf16,
         truncf128,
         truncf16,
+        sqrtf16,
+        sqrtf128,
     ],
 }
diff --git a/crates/util/src/main.rs b/crates/util/src/main.rs
index b979c60ad..cd68d9afd 100644
--- a/crates/util/src/main.rs
+++ b/crates/util/src/main.rs
@@ -90,6 +90,8 @@ fn do_eval(basis: &str, op: &str, inputs: &[&str]) {
             | fabsf16
             | fdimf128
             | fdimf16
+            | sqrtf128
+            | sqrtf16
             | truncf128
             | truncf16  => None,
             _ => Some(musl_math_sys::MACRO_FN_NAME)
diff --git a/etc/function-definitions.json b/etc/function-definitions.json
index 9f7c8ab25..2d0af3bcf 100644
--- a/etc/function-definitions.json
+++ b/etc/function-definitions.json
@@ -718,6 +718,20 @@
         ],
         "type": "f32"
     },
+    "sqrtf128": {
+        "sources": [
+            "src/math/generic/sqrt.rs",
+            "src/math/sqrtf128.rs"
+        ],
+        "type": "f128"
+    },
+    "sqrtf16": {
+        "sources": [
+            "src/math/generic/sqrt.rs",
+            "src/math/sqrtf16.rs"
+        ],
+        "type": "f16"
+    },
     "tan": {
         "sources": [
             "src/libm_helper.rs",
diff --git a/etc/function-list.txt b/etc/function-list.txt
index 7f96a4362..47c34ab90 100644
--- a/etc/function-list.txt
+++ b/etc/function-list.txt
@@ -105,6 +105,8 @@ sinh
 sinhf
 sqrt
 sqrtf
+sqrtf128
+sqrtf16
 tan
 tanf
 tanh
diff --git a/src/math/generic/sqrt.rs b/src/math/generic/sqrt.rs
index a2e054f3c..c892f9997 100644
--- a/src/math/generic/sqrt.rs
+++ b/src/math/generic/sqrt.rs
@@ -294,6 +294,14 @@ pub trait SqrtHelper: Float {
     const FINAL_ROUNDS: u32;
 }
 
+#[cfg(f16_enabled)]
+impl SqrtHelper for f16 {
+    type ISet1 = u16; // unused
+    type ISet2 = u16; // unused
+
+    const FINAL_ROUNDS: u32 = 2;
+}
+
 impl SqrtHelper for f32 {
     type ISet1 = u32; // unused
     type ISet2 = u32; // unused
@@ -309,6 +317,16 @@ impl SqrtHelper for f64 {
     const FINAL_ROUNDS: u32 = 2;
 }
 
+#[cfg(f128_enabled)]
+impl SqrtHelper for f128 {
+    type ISet1 = u32;
+    type ISet2 = u64;
+
+    const SET1_ROUNDS: u32 = 1;
+    const SET2_ROUNDS: u32 = 2;
+    const FINAL_ROUNDS: u32 = 2;
+}
+
 /// A U0.16 representation of `1/sqrt(x)`.
 ///
 // / The index is a 7-bit number consisting of a single exponent bit and 6 bits of significand.
@@ -355,6 +373,42 @@ mod tests {
         }
     }
 
+    #[test]
+    #[cfg(f16_enabled)]
+    fn sanity_check_f16() {
+        assert_biteq!(sqrt(100.0f16), 10.0);
+        assert_biteq!(sqrt(4.0f16), 2.0);
+    }
+
+    #[test]
+    #[cfg(f16_enabled)]
+    fn spec_tests_f16() {
+        spec_test::<f16>();
+    }
+
+    #[test]
+    #[cfg(f16_enabled)]
+    #[allow(clippy::approx_constant)]
+    fn conformance_tests_f16() {
+        let cases = [
+            (f16::PI, 0x3f17_u16),
+            // 10_000.0, using a hex literal for MSRV hack (Rust < 1.67 checks literal widths as
+            // part of the AST, so the `cfg` is irrelevant here).
+            (f16::from_bits(0x70e2), 0x5640_u16),
+            (f16::from_bits(0x0000000f), 0x13bf_u16),
+            (f16::INFINITY, f16::INFINITY.to_bits()),
+        ];
+
+        for (input, output) in cases {
+            assert_biteq!(
+                sqrt(input),
+                f16::from_bits(output),
+                "input: {input:?} ({:#018x})",
+                input.to_bits()
+            );
+        }
+    }
+
     #[test]
     fn sanity_check_f32() {
         assert_biteq!(sqrt(100.0f32), 10.0);
@@ -416,4 +470,42 @@ mod tests {
             );
         }
     }
+
+    #[test]
+    #[cfg(f128_enabled)]
+    fn sanity_check_f128() {
+        assert_biteq!(sqrt(100.0f128), 10.0);
+        assert_biteq!(sqrt(4.0f128), 2.0);
+    }
+
+    #[test]
+    #[cfg(f128_enabled)]
+    fn spec_tests_f128() {
+        spec_test::<f128>();
+    }
+
+    #[test]
+    #[cfg(f128_enabled)]
+    #[allow(clippy::approx_constant)]
+    fn conformance_tests_f128() {
+        let cases = [
+            (f128::PI, 0x3fffc5bf891b4ef6aa79c3b0520d5db9_u128),
+            // 10_000.0, see `f16` for reasoning.
+            (
+                f128::from_bits(0x400c3880000000000000000000000000),
+                0x40059000000000000000000000000000_u128,
+            ),
+            (f128::from_bits(0x0000000f), 0x1fc9efbdeb14f4ed9b17ae807907e1e9_u128),
+            (f128::INFINITY, f128::INFINITY.to_bits()),
+        ];
+
+        for (input, output) in cases {
+            assert_biteq!(
+                sqrt(input),
+                f128::from_bits(output),
+                "input: {input:?} ({:#018x})",
+                input.to_bits()
+            );
+        }
+    }
 }
diff --git a/src/math/mod.rs b/src/math/mod.rs
index 03adb6be1..3684025a6 100644
--- a/src/math/mod.rs
+++ b/src/math/mod.rs
@@ -344,11 +344,13 @@ cfg_if! {
         mod copysignf16;
         mod fabsf16;
         mod fdimf16;
+        mod sqrtf16;
         mod truncf16;
 
         pub use self::copysignf16::copysignf16;
         pub use self::fabsf16::fabsf16;
         pub use self::fdimf16::fdimf16;
+        pub use self::sqrtf16::sqrtf16;
         pub use self::truncf16::truncf16;
     }
 }
@@ -358,11 +360,13 @@ cfg_if! {
         mod copysignf128;
         mod fabsf128;
         mod fdimf128;
+        mod sqrtf128;
         mod truncf128;
 
         pub use self::copysignf128::copysignf128;
         pub use self::fabsf128::fabsf128;
         pub use self::fdimf128::fdimf128;
+        pub use self::sqrtf128::sqrtf128;
         pub use self::truncf128::truncf128;
     }
 }
diff --git a/src/math/sqrtf128.rs b/src/math/sqrtf128.rs
new file mode 100644
index 000000000..eaef6ae0c
--- /dev/null
+++ b/src/math/sqrtf128.rs
@@ -0,0 +1,5 @@
+/// The square root of `x` (f128).
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn sqrtf128(x: f128) -> f128 {
+    return super::generic::sqrt(x);
+}
diff --git a/src/math/sqrtf16.rs b/src/math/sqrtf16.rs
new file mode 100644
index 000000000..549bf902c
--- /dev/null
+++ b/src/math/sqrtf16.rs
@@ -0,0 +1,5 @@
+/// The square root of `x` (f16).
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn sqrtf16(x: f16) -> f16 {
+    return super::generic::sqrt(x);
+}

From 0ce530812492b8c8089e94a13025b370f792d8df Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Mon, 13 Jan 2025 11:58:17 +0000
Subject: [PATCH 161/279] Shift then mask, rather than mask then shift

This may allow for small optimizations with larger float types since
`u32` math can be used after shifting. LLVM may be already getting this
anyway.
---
 src/math/support/float_traits.rs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/math/support/float_traits.rs b/src/math/support/float_traits.rs
index 647f4f5e2..2e467111f 100644
--- a/src/math/support/float_traits.rs
+++ b/src/math/support/float_traits.rs
@@ -1,6 +1,6 @@
 use core::{fmt, mem, ops};
 
-use super::int_traits::{CastFrom, CastInto, Int, MinInt};
+use super::int_traits::{CastFrom, Int, MinInt};
 
 /// Trait for some basic operations on floats
 #[allow(dead_code)]
@@ -108,7 +108,7 @@ pub trait Float:
 
     /// Returns the exponent, not adjusting for bias, not accounting for subnormals or zero.
     fn exp(self) -> i32 {
-        ((self.to_bits() & Self::EXP_MASK) >> Self::SIG_BITS).cast()
+        (u32::cast_from(self.to_bits() >> Self::SIG_BITS) & Self::EXP_MAX).signed()
     }
 
     /// Extract the exponent and adjust it for bias, not accounting for subnormals or zero.

From b5cdbd4de30f116d83ab03e1a43ea6ff9ec7104b Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Wed, 22 Jan 2025 07:13:45 +0000
Subject: [PATCH 162/279] Make `Float::exp` return an unsigned integer

`exp` does not perform any form of unbiasing, so there isn't any reason
it should be signed. Change this.

Additionally, add `EPSILON` to the `Float` trait.
---
 crates/libm-test/src/f8_impl.rs  | 2 ++
 src/math/generic/sqrt.rs         | 2 +-
 src/math/support/float_traits.rs | 8 +++++---
 3 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/crates/libm-test/src/f8_impl.rs b/crates/libm-test/src/f8_impl.rs
index 299553d20..96b783924 100644
--- a/crates/libm-test/src/f8_impl.rs
+++ b/crates/libm-test/src/f8_impl.rs
@@ -30,6 +30,8 @@ impl Float for f8 {
     const INFINITY: Self = Self(0b0_1111_000);
     const NEG_INFINITY: Self = Self(0b1_1111_000);
     const NAN: Self = Self(0b0_1111_100);
+    // FIXME: incorrect values
+    const EPSILON: Self = Self::ZERO;
     const PI: Self = Self::ZERO;
     const NEG_PI: Self = Self::ZERO;
     const FRAC_PI_2: Self = Self::ZERO;
diff --git a/src/math/generic/sqrt.rs b/src/math/generic/sqrt.rs
index c892f9997..22ee93f57 100644
--- a/src/math/generic/sqrt.rs
+++ b/src/math/generic/sqrt.rs
@@ -96,7 +96,7 @@ where
         ix = scaled.to_bits();
         match top {
             Exp::Shifted(ref mut v) => {
-                *v = scaled.exp().unsigned();
+                *v = scaled.exp();
                 *v = (*v).wrapping_sub(F::SIG_BITS);
             }
             Exp::NoShift(()) => {
diff --git a/src/math/support/float_traits.rs b/src/math/support/float_traits.rs
index 2e467111f..1abb7c4de 100644
--- a/src/math/support/float_traits.rs
+++ b/src/math/support/float_traits.rs
@@ -34,6 +34,7 @@ pub trait Float:
     const NAN: Self;
     const MAX: Self;
     const MIN: Self;
+    const EPSILON: Self;
     const PI: Self;
     const NEG_PI: Self;
     const FRAC_PI_2: Self;
@@ -107,13 +108,13 @@ pub trait Float:
     }
 
     /// Returns the exponent, not adjusting for bias, not accounting for subnormals or zero.
-    fn exp(self) -> i32 {
-        (u32::cast_from(self.to_bits() >> Self::SIG_BITS) & Self::EXP_MAX).signed()
+    fn exp(self) -> u32 {
+        u32::cast_from(self.to_bits() >> Self::SIG_BITS) & Self::EXP_MAX
     }
 
     /// Extract the exponent and adjust it for bias, not accounting for subnormals or zero.
     fn exp_unbiased(self) -> i32 {
-        self.exp() - (Self::EXP_BIAS as i32)
+        self.exp().signed() - (Self::EXP_BIAS as i32)
     }
 
     /// Returns the significand with no implicit bit (or the "fractional" part)
@@ -180,6 +181,7 @@ macro_rules! float_impl {
             const MAX: Self = -Self::MIN;
             // Sign bit set, saturated mantissa, saturated exponent with last bit zeroed
             const MIN: Self = $from_bits(Self::Int::MAX & !(1 << Self::SIG_BITS));
+            const EPSILON: Self = <$ty>::EPSILON;
 
             const PI: Self = core::$ty::consts::PI;
             const NEG_PI: Self = -Self::PI;

From 11ff0e19a766db71d373ce66026bb0ff1a2122d9 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Mon, 13 Jan 2025 11:17:03 +0000
Subject: [PATCH 163/279] Add a generic version of `ceil`

Additionally, make use of this version to implement `ceil` and `ceilf`.

Musl's `ceilf` algorithm seems to work better for all versions of the
functions. Testing with a generic version of musl's `ceil` routine
showed the following regressions:

    icount::icount_bench_ceil_group::icount_bench_ceil logspace:setup_ceil()
    Performance has regressed: Instructions (14064 > 13171) regressed by +6.78005% (>+5.00000)
      Baselines:                      softfloat|softfloat
      Instructions:                       14064|13171                (+6.78005%) [+1.06780x]
      L1 Hits:                            16697|15803                (+5.65715%) [+1.05657x]
      L2 Hits:                                0|0                    (No change)
      RAM Hits:                               7|8                    (-12.5000%) [-1.14286x]
      Total read+write:                   16704|15811                (+5.64797%) [+1.05648x]
      Estimated Cycles:                   16942|16083                (+5.34104%) [+1.05341x]
    icount::icount_bench_ceilf_group::icount_bench_ceilf logspace:setup_ceilf()
    Performance has regressed: Instructions (14732 > 9901) regressed by +48.7931% (>+5.00000)
      Baselines:                      softfloat|softfloat
      Instructions:                       14732|9901                 (+48.7931%) [+1.48793x]
      L1 Hits:                            17494|12611                (+38.7202%) [+1.38720x]
      L2 Hits:                                0|0                    (No change)
      RAM Hits:                               6|6                    (No change)
      Total read+write:                   17500|12617                (+38.7018%) [+1.38702x]
      Estimated Cycles:                   17704|12821                (+38.0860%) [+1.38086x]
---
 src/math/ceil.rs         | 42 +------------------
 src/math/ceilf.rs        | 51 +----------------------
 src/math/generic/ceil.rs | 87 ++++++++++++++++++++++++++++++++++++++++
 src/math/generic/mod.rs  |  2 +
 4 files changed, 91 insertions(+), 91 deletions(-)
 create mode 100644 src/math/generic/ceil.rs

diff --git a/src/math/ceil.rs b/src/math/ceil.rs
index 398bfee47..535f434ac 100644
--- a/src/math/ceil.rs
+++ b/src/math/ceil.rs
@@ -1,8 +1,3 @@
-#![allow(unreachable_code)]
-use core::f64;
-
-const TOINT: f64 = 1. / f64::EPSILON;
-
 /// Ceil (f64)
 ///
 /// Finds the nearest integer greater than or equal to `x`.
@@ -15,40 +10,5 @@ pub fn ceil(x: f64) -> f64 {
         args: x,
     }
 
-    let u: u64 = x.to_bits();
-    let e: i64 = ((u >> 52) & 0x7ff) as i64;
-    let y: f64;
-
-    if e >= 0x3ff + 52 || x == 0. {
-        return x;
-    }
-    // y = int(x) - x, where int(x) is an integer neighbor of x
-    y = if (u >> 63) != 0 { x - TOINT + TOINT - x } else { x + TOINT - TOINT - x };
-    // special case because of non-nearest rounding modes
-    if e < 0x3ff {
-        force_eval!(y);
-        return if (u >> 63) != 0 { -0. } else { 1. };
-    }
-    if y < 0. { x + y + 1. } else { x + y }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn sanity_check() {
-        assert_eq!(ceil(1.1), 2.0);
-        assert_eq!(ceil(2.9), 3.0);
-    }
-
-    /// The spec: https://en.cppreference.com/w/cpp/numeric/math/ceil
-    #[test]
-    fn spec_tests() {
-        // Not Asserted: that the current rounding mode has no effect.
-        assert!(ceil(f64::NAN).is_nan());
-        for f in [0.0, -0.0, f64::INFINITY, f64::NEG_INFINITY].iter().copied() {
-            assert_eq!(ceil(f), f);
-        }
-    }
+    super::generic::ceil(x)
 }
diff --git a/src/math/ceilf.rs b/src/math/ceilf.rs
index 9e8e78e3e..66d44189c 100644
--- a/src/math/ceilf.rs
+++ b/src/math/ceilf.rs
@@ -1,5 +1,3 @@
-use core::f32;
-
 /// Ceil (f32)
 ///
 /// Finds the nearest integer greater than or equal to `x`.
@@ -11,52 +9,5 @@ pub fn ceilf(x: f32) -> f32 {
         args: x,
     }
 
-    let mut ui = x.to_bits();
-    let e = (((ui >> 23) & 0xff).wrapping_sub(0x7f)) as i32;
-
-    if e >= 23 {
-        return x;
-    }
-    if e >= 0 {
-        let m = 0x007fffff >> e;
-        if (ui & m) == 0 {
-            return x;
-        }
-        force_eval!(x + f32::from_bits(0x7b800000));
-        if ui >> 31 == 0 {
-            ui += m;
-        }
-        ui &= !m;
-    } else {
-        force_eval!(x + f32::from_bits(0x7b800000));
-        if ui >> 31 != 0 {
-            return -0.0;
-        } else if ui << 1 != 0 {
-            return 1.0;
-        }
-    }
-    f32::from_bits(ui)
-}
-
-// PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520
-#[cfg(not(target_arch = "powerpc64"))]
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn sanity_check() {
-        assert_eq!(ceilf(1.1), 2.0);
-        assert_eq!(ceilf(2.9), 3.0);
-    }
-
-    /// The spec: https://en.cppreference.com/w/cpp/numeric/math/ceil
-    #[test]
-    fn spec_tests() {
-        // Not Asserted: that the current rounding mode has no effect.
-        assert!(ceilf(f32::NAN).is_nan());
-        for f in [0.0, -0.0, f32::INFINITY, f32::NEG_INFINITY].iter().copied() {
-            assert_eq!(ceilf(f), f);
-        }
-    }
+    super::generic::ceil(x)
 }
diff --git a/src/math/generic/ceil.rs b/src/math/generic/ceil.rs
new file mode 100644
index 000000000..d16d06572
--- /dev/null
+++ b/src/math/generic/ceil.rs
@@ -0,0 +1,87 @@
+/* SPDX-License-Identifier: MIT */
+/* origin: musl src/math/ceilf.c */
+
+//! Generic `ceil` algorithm.
+//!
+//! Note that this uses the algorithm from musl's `ceilf` rather than `ceil` or `ceill` because
+//! performance seems to be better (based on icount) and it does not seem to experience rounding
+//! errors on i386.
+
+use super::super::{Float, Int, IntTy, MinInt};
+
+pub fn ceil<F: Float>(x: F) -> F {
+    let zero = IntTy::<F>::ZERO;
+
+    let mut ix = x.to_bits();
+    let e = x.exp_unbiased();
+
+    // If the represented value has no fractional part, no truncation is needed.
+    if e >= F::SIG_BITS as i32 {
+        return x;
+    }
+
+    if e >= 0 {
+        // |x| >= 1.0
+
+        let m = F::SIG_MASK >> e.unsigned();
+        if (ix & m) == zero {
+            // Portion to be masked is already zero; no adjustment needed.
+            return x;
+        }
+
+        // Otherwise, raise an inexact exception.
+        force_eval!(x + F::MAX);
+        if x.is_sign_positive() {
+            ix += m;
+        }
+        ix &= !m;
+    } else {
+        // |x| < 1.0, raise an inexact exception since truncation will happen (unless x == 0).
+        force_eval!(x + F::MAX);
+
+        if x.is_sign_negative() {
+            // -1.0 < x <= -0.0; rounding up goes toward -0.0.
+            return F::NEG_ZERO;
+        } else if ix << 1 != zero {
+            // 0.0 < x < 1.0; rounding up goes toward +1.0.
+            return F::ONE;
+        }
+    }
+
+    F::from_bits(ix)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    /// Test against https://en.cppreference.com/w/cpp/numeric/math/ceil
+    fn spec_test<F: Float>() {
+        // Not Asserted: that the current rounding mode has no effect.
+        for f in [F::ZERO, F::NEG_ZERO, F::INFINITY, F::NEG_INFINITY].iter().copied() {
+            assert_biteq!(ceil(f), f);
+        }
+    }
+
+    #[test]
+    fn sanity_check_f32() {
+        assert_eq!(ceil(1.1f32), 2.0);
+        assert_eq!(ceil(2.9f32), 3.0);
+    }
+
+    #[test]
+    fn spec_tests_f32() {
+        spec_test::<f32>();
+    }
+
+    #[test]
+    fn sanity_check_f64() {
+        assert_eq!(ceil(1.1f64), 2.0);
+        assert_eq!(ceil(2.9f64), 3.0);
+    }
+
+    #[test]
+    fn spec_tests_f64() {
+        spec_test::<f64>();
+    }
+}
diff --git a/src/math/generic/mod.rs b/src/math/generic/mod.rs
index 3b5a2c3ef..f8bb9fa6a 100644
--- a/src/math/generic/mod.rs
+++ b/src/math/generic/mod.rs
@@ -1,9 +1,11 @@
+mod ceil;
 mod copysign;
 mod fabs;
 mod fdim;
 mod sqrt;
 mod trunc;
 
+pub use ceil::ceil;
 pub use copysign::copysign;
 pub use fabs::fabs;
 pub use fdim::fdim;

From a836f6ea74ecf3b34028ca4c4bfc742439e97cb5 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Mon, 13 Jan 2025 11:17:17 +0000
Subject: [PATCH 164/279] Add `ceilf16` and `ceilf128`

Use the generic algorithms to provide implementations for these
routines.
---
 crates/libm-macros/src/shared.rs             |  4 ++--
 crates/libm-test/benches/icount.rs           |  2 ++
 crates/libm-test/benches/random.rs           |  4 +++-
 crates/libm-test/src/mpfloat.rs              |  4 ++++
 crates/libm-test/tests/compare_built_musl.rs |  2 ++
 crates/util/src/main.rs                      |  4 +++-
 etc/function-definitions.json                | 20 ++++++++++++++++++--
 etc/function-list.txt                        |  2 ++
 src/math/ceilf128.rs                         |  7 +++++++
 src/math/ceilf16.rs                          |  7 +++++++
 src/math/generic/ceil.rs                     | 14 ++++++++++++++
 src/math/mod.rs                              |  4 ++++
 12 files changed, 68 insertions(+), 6 deletions(-)
 create mode 100644 src/math/ceilf128.rs
 create mode 100644 src/math/ceilf16.rs

diff --git a/crates/libm-macros/src/shared.rs b/crates/libm-macros/src/shared.rs
index d17bc6ffc..e7d3d18d9 100644
--- a/crates/libm-macros/src/shared.rs
+++ b/crates/libm-macros/src/shared.rs
@@ -9,7 +9,7 @@ const ALL_OPERATIONS_NESTED: &[(FloatTy, Signature, Option<Signature>, &[&str])]
         FloatTy::F16,
         Signature { args: &[Ty::F16], returns: &[Ty::F16] },
         None,
-        &["fabsf16", "sqrtf16", "truncf16"],
+        &["ceilf16", "fabsf16", "sqrtf16", "truncf16"],
     ),
     (
         // `fn(f32) -> f32`
@@ -40,7 +40,7 @@ const ALL_OPERATIONS_NESTED: &[(FloatTy, Signature, Option<Signature>, &[&str])]
         FloatTy::F128,
         Signature { args: &[Ty::F128], returns: &[Ty::F128] },
         None,
-        &["fabsf128", "sqrtf128", "truncf128"],
+        &["ceilf128", "fabsf128", "sqrtf128", "truncf128"],
     ),
     (
         // `(f16, f16) -> f16`
diff --git a/crates/libm-test/benches/icount.rs b/crates/libm-test/benches/icount.rs
index c8451f88c..84be3d524 100644
--- a/crates/libm-test/benches/icount.rs
+++ b/crates/libm-test/benches/icount.rs
@@ -69,6 +69,8 @@ main!(
     icount_bench_cbrt_group,
     icount_bench_cbrtf_group,
     icount_bench_ceil_group,
+    icount_bench_ceilf128_group,
+    icount_bench_ceilf16_group,
     icount_bench_ceilf_group,
     icount_bench_copysign_group,
     icount_bench_copysignf128_group,
diff --git a/crates/libm-test/benches/random.rs b/crates/libm-test/benches/random.rs
index 026841202..511e26d91 100644
--- a/crates/libm-test/benches/random.rs
+++ b/crates/libm-test/benches/random.rs
@@ -117,7 +117,9 @@ libm_macros::for_each_function! {
         exp10 | exp10f | exp2 | exp2f => (true, Some(musl_math_sys::MACRO_FN_NAME)),
 
         // Musl does not provide `f16` and `f128` functions
-        copysignf128
+        ceilf128
+        | ceilf16
+        | copysignf128
         | copysignf16
         | fabsf128
         | fabsf16
diff --git a/crates/libm-test/src/mpfloat.rs b/crates/libm-test/src/mpfloat.rs
index 9d95356d3..bbd19dbb0 100644
--- a/crates/libm-test/src/mpfloat.rs
+++ b/crates/libm-test/src/mpfloat.rs
@@ -137,6 +137,8 @@ libm_macros::for_each_function! {
         // Most of these need a manual implementation
         ceil,
         ceilf,
+        ceilf128,
+        ceilf16,
         copysign,
         copysignf,
         copysignf128,
@@ -237,12 +239,14 @@ impl_no_round! {
 #[cfg(f16_enabled)]
 impl_no_round! {
     fabsf16 => abs_mut;
+    ceilf16 => ceil_mut;
     truncf16 => trunc_mut;
 }
 
 #[cfg(f128_enabled)]
 impl_no_round! {
     fabsf128 => abs_mut;
+    ceilf128 => ceil_mut;
     truncf128 => trunc_mut;
 }
 
diff --git a/crates/libm-test/tests/compare_built_musl.rs b/crates/libm-test/tests/compare_built_musl.rs
index 24703f273..e13acf3de 100644
--- a/crates/libm-test/tests/compare_built_musl.rs
+++ b/crates/libm-test/tests/compare_built_musl.rs
@@ -79,6 +79,8 @@ libm_macros::for_each_function! {
         ynf,
 
         // Not provided by musl
+        ceilf128,
+        ceilf16,
         copysignf128,
         copysignf16,
         fabsf128,
diff --git a/crates/util/src/main.rs b/crates/util/src/main.rs
index cd68d9afd..810919339 100644
--- a/crates/util/src/main.rs
+++ b/crates/util/src/main.rs
@@ -84,7 +84,9 @@ fn do_eval(basis: &str, op: &str, inputs: &[&str]) {
         emit_types: [CFn, RustFn, RustArgs],
         extra: (basis, op, inputs),
         fn_extra: match MACRO_FN_NAME {
-            copysignf128
+            ceilf128
+            | ceilf16
+            | copysignf128
             | copysignf16
             | fabsf128
             | fabsf16
diff --git a/etc/function-definitions.json b/etc/function-definitions.json
index 2d0af3bcf..c75152f63 100644
--- a/etc/function-definitions.json
+++ b/etc/function-definitions.json
@@ -109,17 +109,33 @@
             "src/libm_helper.rs",
             "src/math/arch/i586.rs",
             "src/math/arch/wasm32.rs",
-            "src/math/ceil.rs"
+            "src/math/ceil.rs",
+            "src/math/generic/ceil.rs"
         ],
         "type": "f64"
     },
     "ceilf": {
         "sources": [
             "src/math/arch/wasm32.rs",
-            "src/math/ceilf.rs"
+            "src/math/ceilf.rs",
+            "src/math/generic/ceil.rs"
         ],
         "type": "f32"
     },
+    "ceilf128": {
+        "sources": [
+            "src/math/ceilf128.rs",
+            "src/math/generic/ceil.rs"
+        ],
+        "type": "f128"
+    },
+    "ceilf16": {
+        "sources": [
+            "src/math/ceilf16.rs",
+            "src/math/generic/ceil.rs"
+        ],
+        "type": "f16"
+    },
     "copysign": {
         "sources": [
             "src/libm_helper.rs",
diff --git a/etc/function-list.txt b/etc/function-list.txt
index 47c34ab90..337e7e434 100644
--- a/etc/function-list.txt
+++ b/etc/function-list.txt
@@ -17,6 +17,8 @@ cbrt
 cbrtf
 ceil
 ceilf
+ceilf128
+ceilf16
 copysign
 copysignf
 copysignf128
diff --git a/src/math/ceilf128.rs b/src/math/ceilf128.rs
new file mode 100644
index 000000000..89980858e
--- /dev/null
+++ b/src/math/ceilf128.rs
@@ -0,0 +1,7 @@
+/// Ceil (f128)
+///
+/// Finds the nearest integer greater than or equal to `x`.
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn ceilf128(x: f128) -> f128 {
+    super::generic::ceil(x)
+}
diff --git a/src/math/ceilf16.rs b/src/math/ceilf16.rs
new file mode 100644
index 000000000..2af67eff0
--- /dev/null
+++ b/src/math/ceilf16.rs
@@ -0,0 +1,7 @@
+/// Ceil (f16)
+///
+/// Finds the nearest integer greater than or equal to `x`.
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn ceilf16(x: f16) -> f16 {
+    super::generic::ceil(x)
+}
diff --git a/src/math/generic/ceil.rs b/src/math/generic/ceil.rs
index d16d06572..34261faf7 100644
--- a/src/math/generic/ceil.rs
+++ b/src/math/generic/ceil.rs
@@ -63,6 +63,14 @@ mod tests {
         }
     }
 
+    /* Skipping f16 / f128 "sanity_check"s due to rejected literal lexing at MSRV */
+
+    #[test]
+    #[cfg(f16_enabled)]
+    fn spec_tests_f16() {
+        spec_test::<f16>();
+    }
+
     #[test]
     fn sanity_check_f32() {
         assert_eq!(ceil(1.1f32), 2.0);
@@ -84,4 +92,10 @@ mod tests {
     fn spec_tests_f64() {
         spec_test::<f64>();
     }
+
+    #[test]
+    #[cfg(f128_enabled)]
+    fn spec_tests_f128() {
+        spec_test::<f128>();
+    }
 }
diff --git a/src/math/mod.rs b/src/math/mod.rs
index 3684025a6..5228e78b7 100644
--- a/src/math/mod.rs
+++ b/src/math/mod.rs
@@ -341,12 +341,14 @@ pub use self::truncf::truncf;
 
 cfg_if! {
     if #[cfg(f16_enabled)] {
+        mod ceilf16;
         mod copysignf16;
         mod fabsf16;
         mod fdimf16;
         mod sqrtf16;
         mod truncf16;
 
+        pub use self::ceilf16::ceilf16;
         pub use self::copysignf16::copysignf16;
         pub use self::fabsf16::fabsf16;
         pub use self::fdimf16::fdimf16;
@@ -357,12 +359,14 @@ cfg_if! {
 
 cfg_if! {
     if #[cfg(f128_enabled)] {
+        mod ceilf128;
         mod copysignf128;
         mod fabsf128;
         mod fdimf128;
         mod sqrtf128;
         mod truncf128;
 
+        pub use self::ceilf128::ceilf128;
         pub use self::copysignf128::copysignf128;
         pub use self::fabsf128::fabsf128;
         pub use self::fdimf128::fdimf128;

From 42647c637581e7ee71fee1244af6eb075f8fa7a4 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Mon, 13 Jan 2025 13:27:22 +0000
Subject: [PATCH 165/279] Add a generic version of `floor`

Additionally, make use of this version to implement `floor` and
`floorf`.

Similar to `ceil`, musl'f `ceilf` routine seems to work better for all
float widths than the `ceil` algorithm. Trying with the `ceil` (`f64`)
algorithm produced the following regressions:

    icount::icount_bench_floor_group::icount_bench_floor logspace:setup_floor()
    Performance has regressed: Instructions (14064 > 13171) regressed by +6.78005% (>+5.00000)
      Baselines:                      softfloat|softfloat
      Instructions:                       14064|13171                (+6.78005%) [+1.06780x]
      L1 Hits:                            16821|15802                (+6.44855%) [+1.06449x]
      L2 Hits:                                0|0                    (No change)
      RAM Hits:                               8|9                    (-11.1111%) [-1.12500x]
      Total read+write:                   16829|15811                (+6.43856%) [+1.06439x]
      Estimated Cycles:                   17101|16117                (+6.10535%) [+1.06105x]
    icount::icount_bench_floorf128_group::icount_bench_floorf128 logspace:setup_floorf128()
      Baselines:                      softfloat|softfloat
      Instructions:                      166868|N/A                  (*********)
      L1 Hits:                           221429|N/A                  (*********)
      L2 Hits:                                1|N/A                  (*********)
      RAM Hits:                              34|N/A                  (*********)
      Total read+write:                  221464|N/A                  (*********)
      Estimated Cycles:                  222624|N/A                  (*********)
    icount::icount_bench_floorf16_group::icount_bench_floorf16 logspace:setup_floorf16()
      Baselines:                      softfloat|softfloat
      Instructions:                      143029|N/A                  (*********)
      L1 Hits:                           176517|N/A                  (*********)
      L2 Hits:                                1|N/A                  (*********)
      RAM Hits:                              13|N/A                  (*********)
      Total read+write:                  176531|N/A                  (*********)
      Estimated Cycles:                  176977|N/A                  (*********)
    icount::icount_bench_floorf_group::icount_bench_floorf logspace:setup_floorf()
    Performance has regressed: Instructions (14732 > 10441) regressed by +41.0976% (>+5.00000)
      Baselines:                      softfloat|softfloat
      Instructions:                       14732|10441                (+41.0976%) [+1.41098x]
      L1 Hits:                            17616|13027                (+35.2268%) [+1.35227x]
      L2 Hits:                                0|0                    (No change)
      RAM Hits:                               8|6                    (+33.3333%) [+1.33333x]
      Total read+write:                   17624|13033                (+35.2260%) [+1.35226x]
      Estimated Cycles:                   17896|13237                (+35.1968%) [+1.35197x]
---
 etc/function-definitions.json |   6 +-
 src/math/floor.rs             |  41 +------------
 src/math/floorf.rs            |  52 +----------------
 src/math/generic/floor.rs     | 106 ++++++++++++++++++++++++++++++++++
 src/math/generic/mod.rs       |   2 +
 5 files changed, 114 insertions(+), 93 deletions(-)
 create mode 100644 src/math/generic/floor.rs

diff --git a/etc/function-definitions.json b/etc/function-definitions.json
index c75152f63..6a865f427 100644
--- a/etc/function-definitions.json
+++ b/etc/function-definitions.json
@@ -336,14 +336,16 @@
             "src/libm_helper.rs",
             "src/math/arch/i586.rs",
             "src/math/arch/wasm32.rs",
-            "src/math/floor.rs"
+            "src/math/floor.rs",
+            "src/math/generic/floor.rs"
         ],
         "type": "f64"
     },
     "floorf": {
         "sources": [
             "src/math/arch/wasm32.rs",
-            "src/math/floorf.rs"
+            "src/math/floorf.rs",
+            "src/math/generic/floor.rs"
         ],
         "type": "f32"
     },
diff --git a/src/math/floor.rs b/src/math/floor.rs
index 2823bf44d..b4f02abc4 100644
--- a/src/math/floor.rs
+++ b/src/math/floor.rs
@@ -1,8 +1,3 @@
-#![allow(unreachable_code)]
-use core::f64;
-
-const TOINT: f64 = 1. / f64::EPSILON;
-
 /// Floor (f64)
 ///
 /// Finds the nearest integer less than or equal to `x`.
@@ -15,39 +10,5 @@ pub fn floor(x: f64) -> f64 {
         args: x,
     }
 
-    let ui = x.to_bits();
-    let e = ((ui >> 52) & 0x7ff) as i32;
-
-    if (e >= 0x3ff + 52) || (x == 0.) {
-        return x;
-    }
-    /* y = int(x) - x, where int(x) is an integer neighbor of x */
-    let y = if (ui >> 63) != 0 { x - TOINT + TOINT - x } else { x + TOINT - TOINT - x };
-    /* special case because of non-nearest rounding modes */
-    if e < 0x3ff {
-        force_eval!(y);
-        return if (ui >> 63) != 0 { -1. } else { 0. };
-    }
-    if y > 0. { x + y - 1. } else { x + y }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn sanity_check() {
-        assert_eq!(floor(1.1), 1.0);
-        assert_eq!(floor(2.9), 2.0);
-    }
-
-    /// The spec: https://en.cppreference.com/w/cpp/numeric/math/floor
-    #[test]
-    fn spec_tests() {
-        // Not Asserted: that the current rounding mode has no effect.
-        assert!(floor(f64::NAN).is_nan());
-        for f in [0.0, -0.0, f64::INFINITY, f64::NEG_INFINITY].iter().copied() {
-            assert_eq!(floor(f), f);
-        }
-    }
+    return super::generic::floor(x);
 }
diff --git a/src/math/floorf.rs b/src/math/floorf.rs
index 23a18c0f7..16957b7f3 100644
--- a/src/math/floorf.rs
+++ b/src/math/floorf.rs
@@ -1,5 +1,3 @@
-use core::f32;
-
 /// Floor (f32)
 ///
 /// Finds the nearest integer less than or equal to `x`.
@@ -11,53 +9,5 @@ pub fn floorf(x: f32) -> f32 {
         args: x,
     }
 
-    let mut ui = x.to_bits();
-    let e = (((ui >> 23) as i32) & 0xff) - 0x7f;
-
-    if e >= 23 {
-        return x;
-    }
-    if e >= 0 {
-        let m: u32 = 0x007fffff >> e;
-        if (ui & m) == 0 {
-            return x;
-        }
-        force_eval!(x + f32::from_bits(0x7b800000));
-        if ui >> 31 != 0 {
-            ui += m;
-        }
-        ui &= !m;
-    } else {
-        force_eval!(x + f32::from_bits(0x7b800000));
-        if ui >> 31 == 0 {
-            ui = 0;
-        } else if ui << 1 != 0 {
-            return -1.0;
-        }
-    }
-    f32::from_bits(ui)
-}
-
-// PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520
-#[cfg(not(target_arch = "powerpc64"))]
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn sanity_check() {
-        assert_eq!(floorf(0.5), 0.0);
-        assert_eq!(floorf(1.1), 1.0);
-        assert_eq!(floorf(2.9), 2.0);
-    }
-
-    /// The spec: https://en.cppreference.com/w/cpp/numeric/math/floor
-    #[test]
-    fn spec_tests() {
-        // Not Asserted: that the current rounding mode has no effect.
-        assert!(floorf(f32::NAN).is_nan());
-        for f in [0.0, -0.0, f32::INFINITY, f32::NEG_INFINITY].iter().copied() {
-            assert_eq!(floorf(f), f);
-        }
-    }
+    return super::generic::floor(x);
 }
diff --git a/src/math/generic/floor.rs b/src/math/generic/floor.rs
new file mode 100644
index 000000000..6754c08f8
--- /dev/null
+++ b/src/math/generic/floor.rs
@@ -0,0 +1,106 @@
+/* SPDX-License-Identifier: MIT
+ * origin: musl src/math/floor.c */
+
+//! Generic `floor` algorithm.
+//!
+//! Note that this uses the algorithm from musl's `floorf` rather than `floor` or `floorl` because
+//! performance seems to be better (based on icount) and it does not seem to experience rounding
+//! errors on i386.
+
+use super::super::{Float, Int, IntTy, MinInt};
+
+pub fn floor<F: Float>(x: F) -> F {
+    let zero = IntTy::<F>::ZERO;
+
+    let mut ix = x.to_bits();
+    let e = x.exp_unbiased();
+
+    // If the represented value has no fractional part, no truncation is needed.
+    if e >= F::SIG_BITS as i32 {
+        return x;
+    }
+
+    if e >= 0 {
+        // |x| >= 1.0
+
+        let m = F::SIG_MASK >> e.unsigned();
+        if ix & m == zero {
+            // Portion to be masked is already zero; no adjustment needed.
+            return x;
+        }
+
+        // Otherwise, raise an inexact exception.
+        force_eval!(x + F::MAX);
+
+        if x.is_sign_negative() {
+            ix += m;
+        }
+
+        ix &= !m;
+        F::from_bits(ix)
+    } else {
+        // |x| < 1.0, raise an inexact exception since truncation will happen (unless x == 0).
+        force_eval!(x + F::MAX);
+
+        if x.is_sign_positive() {
+            // 0.0 <= x < 1.0; rounding down goes toward +0.0.
+            F::ZERO
+        } else if ix << 1 != zero {
+            // -1.0 < x < 0.0; rounding down goes toward -1.0.
+            F::NEG_ONE
+        } else {
+            // -0.0 remains unchanged
+            x
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    /// Test against https://en.cppreference.com/w/cpp/numeric/math/floor
+    fn spec_test<F: Float>() {
+        // Not Asserted: that the current rounding mode has no effect.
+        for f in [F::ZERO, F::NEG_ZERO, F::INFINITY, F::NEG_INFINITY].iter().copied() {
+            assert_biteq!(floor(f), f);
+        }
+    }
+
+    /* Skipping f16 / f128 "sanity_check"s due to rejected literal lexing at MSRV */
+
+    #[test]
+    #[cfg(f16_enabled)]
+    fn spec_tests_f16() {
+        spec_test::<f16>();
+    }
+
+    #[test]
+    fn sanity_check_f32() {
+        assert_eq!(floor(0.5f32), 0.0);
+        assert_eq!(floor(1.1f32), 1.0);
+        assert_eq!(floor(2.9f32), 2.0);
+    }
+
+    #[test]
+    fn spec_tests_f32() {
+        spec_test::<f32>();
+    }
+
+    #[test]
+    fn sanity_check_f64() {
+        assert_eq!(floor(1.1f64), 1.0);
+        assert_eq!(floor(2.9f64), 2.0);
+    }
+
+    #[test]
+    fn spec_tests_f64() {
+        spec_test::<f64>();
+    }
+
+    #[test]
+    #[cfg(f128_enabled)]
+    fn spec_tests_f128() {
+        spec_test::<f128>();
+    }
+}
diff --git a/src/math/generic/mod.rs b/src/math/generic/mod.rs
index f8bb9fa6a..b08a77d5d 100644
--- a/src/math/generic/mod.rs
+++ b/src/math/generic/mod.rs
@@ -2,6 +2,7 @@ mod ceil;
 mod copysign;
 mod fabs;
 mod fdim;
+mod floor;
 mod sqrt;
 mod trunc;
 
@@ -9,5 +10,6 @@ pub use ceil::ceil;
 pub use copysign::copysign;
 pub use fabs::fabs;
 pub use fdim::fdim;
+pub use floor::floor;
 pub use sqrt::sqrt;
 pub use trunc::trunc;

From 23d616bef599e5ea0bbb2af714076d67d914a973 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Mon, 13 Jan 2025 13:33:24 +0000
Subject: [PATCH 166/279] Add `floorf16` and `floorf128`

Use the generic algorithms to provide implementations for these
routines.
---
 crates/compiler-builtins-smoke-test/src/lib.rs |  2 ++
 crates/libm-macros/src/shared.rs               |  4 ++--
 crates/libm-test/benches/icount.rs             |  2 ++
 crates/libm-test/benches/random.rs             |  4 +++-
 crates/libm-test/src/mpfloat.rs                |  4 ++++
 crates/libm-test/tests/compare_built_musl.rs   |  2 ++
 crates/util/src/main.rs                        |  2 ++
 etc/function-definitions.json                  | 14 ++++++++++++++
 etc/function-list.txt                          |  2 ++
 src/math/floorf128.rs                          |  7 +++++++
 src/math/floorf16.rs                           |  7 +++++++
 src/math/mod.rs                                |  4 ++++
 12 files changed, 51 insertions(+), 3 deletions(-)
 create mode 100644 src/math/floorf128.rs
 create mode 100644 src/math/floorf16.rs

diff --git a/crates/compiler-builtins-smoke-test/src/lib.rs b/crates/compiler-builtins-smoke-test/src/lib.rs
index b9521eb07..4834ba256 100644
--- a/crates/compiler-builtins-smoke-test/src/lib.rs
+++ b/crates/compiler-builtins-smoke-test/src/lib.rs
@@ -97,6 +97,8 @@ no_mangle! {
     fdimf16(x: f16, y: f16) -> f16;
     floor(x: f64) -> f64;
     floorf(x: f32) -> f32;
+    floorf128(x: f128) -> f128;
+    floorf16(x: f16) -> f16;
     fma(x: f64, y: f64, z: f64) -> f64;
     fmaf(x: f32, y: f32, z: f32) -> f32;
     fmax(x: f64, y: f64) -> f64;
diff --git a/crates/libm-macros/src/shared.rs b/crates/libm-macros/src/shared.rs
index e7d3d18d9..64623658d 100644
--- a/crates/libm-macros/src/shared.rs
+++ b/crates/libm-macros/src/shared.rs
@@ -9,7 +9,7 @@ const ALL_OPERATIONS_NESTED: &[(FloatTy, Signature, Option<Signature>, &[&str])]
         FloatTy::F16,
         Signature { args: &[Ty::F16], returns: &[Ty::F16] },
         None,
-        &["ceilf16", "fabsf16", "sqrtf16", "truncf16"],
+        &["ceilf16", "fabsf16", "floorf16", "sqrtf16", "truncf16"],
     ),
     (
         // `fn(f32) -> f32`
@@ -40,7 +40,7 @@ const ALL_OPERATIONS_NESTED: &[(FloatTy, Signature, Option<Signature>, &[&str])]
         FloatTy::F128,
         Signature { args: &[Ty::F128], returns: &[Ty::F128] },
         None,
-        &["ceilf128", "fabsf128", "sqrtf128", "truncf128"],
+        &["ceilf128", "fabsf128", "floorf128", "sqrtf128", "truncf128"],
     ),
     (
         // `(f16, f16) -> f16`
diff --git a/crates/libm-test/benches/icount.rs b/crates/libm-test/benches/icount.rs
index 84be3d524..eae63619c 100644
--- a/crates/libm-test/benches/icount.rs
+++ b/crates/libm-test/benches/icount.rs
@@ -101,6 +101,8 @@ main!(
     icount_bench_fdimf16_group,
     icount_bench_fdimf_group,
     icount_bench_floor_group,
+    icount_bench_floorf128_group,
+    icount_bench_floorf16_group,
     icount_bench_floorf_group,
     icount_bench_fma_group,
     icount_bench_fmaf_group,
diff --git a/crates/libm-test/benches/random.rs b/crates/libm-test/benches/random.rs
index 511e26d91..bd7b35971 100644
--- a/crates/libm-test/benches/random.rs
+++ b/crates/libm-test/benches/random.rs
@@ -125,8 +125,10 @@ libm_macros::for_each_function! {
         | fabsf16
         | fdimf128
         | fdimf16
-        | sqrtf16
+        | floorf128
+        | floorf16
         | sqrtf128
+        | sqrtf16
         | truncf128
         | truncf16 => (false, None),
 
diff --git a/crates/libm-test/src/mpfloat.rs b/crates/libm-test/src/mpfloat.rs
index bbd19dbb0..53fade7d0 100644
--- a/crates/libm-test/src/mpfloat.rs
+++ b/crates/libm-test/src/mpfloat.rs
@@ -148,6 +148,8 @@ libm_macros::for_each_function! {
         fabsf128,
         fabsf16,floor,
         floorf,
+        floorf128,
+        floorf16,
         fmod,
         fmodf,
         frexp,
@@ -240,6 +242,7 @@ impl_no_round! {
 impl_no_round! {
     fabsf16 => abs_mut;
     ceilf16 => ceil_mut;
+    floorf16 => floor_mut;
     truncf16 => trunc_mut;
 }
 
@@ -247,6 +250,7 @@ impl_no_round! {
 impl_no_round! {
     fabsf128 => abs_mut;
     ceilf128 => ceil_mut;
+    floorf128 => floor_mut;
     truncf128 => trunc_mut;
 }
 
diff --git a/crates/libm-test/tests/compare_built_musl.rs b/crates/libm-test/tests/compare_built_musl.rs
index e13acf3de..335496fce 100644
--- a/crates/libm-test/tests/compare_built_musl.rs
+++ b/crates/libm-test/tests/compare_built_musl.rs
@@ -87,6 +87,8 @@ libm_macros::for_each_function! {
         fabsf16,
         fdimf128,
         fdimf16,
+        floorf128,
+        floorf16,
         truncf128,
         truncf16,
         sqrtf16,
diff --git a/crates/util/src/main.rs b/crates/util/src/main.rs
index 810919339..988c01d07 100644
--- a/crates/util/src/main.rs
+++ b/crates/util/src/main.rs
@@ -92,6 +92,8 @@ fn do_eval(basis: &str, op: &str, inputs: &[&str]) {
             | fabsf16
             | fdimf128
             | fdimf16
+            | floorf128
+            | floorf16
             | sqrtf128
             | sqrtf16
             | truncf128
diff --git a/etc/function-definitions.json b/etc/function-definitions.json
index 6a865f427..eef176fb5 100644
--- a/etc/function-definitions.json
+++ b/etc/function-definitions.json
@@ -349,6 +349,20 @@
         ],
         "type": "f32"
     },
+    "floorf128": {
+        "sources": [
+            "src/math/floorf128.rs",
+            "src/math/generic/floor.rs"
+        ],
+        "type": "f128"
+    },
+    "floorf16": {
+        "sources": [
+            "src/math/floorf16.rs",
+            "src/math/generic/floor.rs"
+        ],
+        "type": "f16"
+    },
     "fma": {
         "sources": [
             "src/libm_helper.rs",
diff --git a/etc/function-list.txt b/etc/function-list.txt
index 337e7e434..3bb895f4a 100644
--- a/etc/function-list.txt
+++ b/etc/function-list.txt
@@ -49,6 +49,8 @@ fdimf128
 fdimf16
 floor
 floorf
+floorf128
+floorf16
 fma
 fmaf
 fmax
diff --git a/src/math/floorf128.rs b/src/math/floorf128.rs
new file mode 100644
index 000000000..9a9fe4151
--- /dev/null
+++ b/src/math/floorf128.rs
@@ -0,0 +1,7 @@
+/// Floor (f128)
+///
+/// Finds the nearest integer less than or equal to `x`.
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn floorf128(x: f128) -> f128 {
+    return super::generic::floor(x);
+}
diff --git a/src/math/floorf16.rs b/src/math/floorf16.rs
new file mode 100644
index 000000000..f9b868e04
--- /dev/null
+++ b/src/math/floorf16.rs
@@ -0,0 +1,7 @@
+/// Floor (f16)
+///
+/// Finds the nearest integer less than or equal to `x`.
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn floorf16(x: f16) -> f16 {
+    return super::generic::floor(x);
+}
diff --git a/src/math/mod.rs b/src/math/mod.rs
index 5228e78b7..68d201524 100644
--- a/src/math/mod.rs
+++ b/src/math/mod.rs
@@ -345,6 +345,7 @@ cfg_if! {
         mod copysignf16;
         mod fabsf16;
         mod fdimf16;
+        mod floorf16;
         mod sqrtf16;
         mod truncf16;
 
@@ -352,6 +353,7 @@ cfg_if! {
         pub use self::copysignf16::copysignf16;
         pub use self::fabsf16::fabsf16;
         pub use self::fdimf16::fdimf16;
+        pub use self::floorf16::floorf16;
         pub use self::sqrtf16::sqrtf16;
         pub use self::truncf16::truncf16;
     }
@@ -363,6 +365,7 @@ cfg_if! {
         mod copysignf128;
         mod fabsf128;
         mod fdimf128;
+        mod floorf128;
         mod sqrtf128;
         mod truncf128;
 
@@ -370,6 +373,7 @@ cfg_if! {
         pub use self::copysignf128::copysignf128;
         pub use self::fabsf128::fabsf128;
         pub use self::fdimf128::fdimf128;
+        pub use self::floorf128::floorf128;
         pub use self::sqrtf128::sqrtf128;
         pub use self::truncf128::truncf128;
     }

From 31f080219adc55756be2727623bf1aea68bedaf7 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Wed, 22 Jan 2025 08:48:02 +0000
Subject: [PATCH 167/279] Adjust `ceil` style to be more similar to `floor`

---
 crates/compiler-builtins-smoke-test/src/lib.rs |  2 ++
 src/math/generic/ceil.rs                       | 12 ++++++++----
 2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/crates/compiler-builtins-smoke-test/src/lib.rs b/crates/compiler-builtins-smoke-test/src/lib.rs
index 4834ba256..1a7aa983e 100644
--- a/crates/compiler-builtins-smoke-test/src/lib.rs
+++ b/crates/compiler-builtins-smoke-test/src/lib.rs
@@ -67,6 +67,8 @@ no_mangle! {
     cbrtf(x: f32) -> f32;
     ceil(x: f64) -> f64;
     ceilf(x: f32) -> f32;
+    ceilf128(x: f128) -> f128;
+    ceilf16(x: f16) -> f16;
     copysign(x: f64, y: f64) -> f64;
     copysignf(x: f32, y: f32) -> f32;
     copysignf128(x: f128, y: f128) -> f128;
diff --git a/src/math/generic/ceil.rs b/src/math/generic/ceil.rs
index 34261faf7..971a4d3d8 100644
--- a/src/math/generic/ceil.rs
+++ b/src/math/generic/ceil.rs
@@ -31,24 +31,28 @@ pub fn ceil<F: Float>(x: F) -> F {
 
         // Otherwise, raise an inexact exception.
         force_eval!(x + F::MAX);
+
         if x.is_sign_positive() {
             ix += m;
         }
+
         ix &= !m;
+        F::from_bits(ix)
     } else {
         // |x| < 1.0, raise an inexact exception since truncation will happen (unless x == 0).
         force_eval!(x + F::MAX);
 
         if x.is_sign_negative() {
             // -1.0 < x <= -0.0; rounding up goes toward -0.0.
-            return F::NEG_ZERO;
+            F::NEG_ZERO
         } else if ix << 1 != zero {
             // 0.0 < x < 1.0; rounding up goes toward +1.0.
-            return F::ONE;
+            F::ONE
+        } else {
+            // +0.0 remains unchanged
+            x
         }
     }
-
-    F::from_bits(ix)
 }
 
 #[cfg(test)]

From a9cfff95f7388119513d85194fc797c30c9f329d Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Wed, 22 Jan 2025 09:06:26 +0000
Subject: [PATCH 168/279] Add a generic version of `rint`

Use this to implement `rint` and `rintf`.
---
 etc/function-definitions.json |  2 +
 src/math/generic/mod.rs       |  2 +
 src/math/generic/rint.rs      | 72 +++++++++++++++++++++++++++++++++++
 src/math/rint.rs              | 48 +----------------------
 src/math/rintf.rs             | 48 +----------------------
 5 files changed, 78 insertions(+), 94 deletions(-)
 create mode 100644 src/math/generic/rint.rs

diff --git a/etc/function-definitions.json b/etc/function-definitions.json
index eef176fb5..86b088292 100644
--- a/etc/function-definitions.json
+++ b/etc/function-definitions.json
@@ -654,6 +654,7 @@
             "src/libm_helper.rs",
             "src/math/arch/aarch64.rs",
             "src/math/arch/wasm32.rs",
+            "src/math/generic/rint.rs",
             "src/math/rint.rs"
         ],
         "type": "f64"
@@ -662,6 +663,7 @@
         "sources": [
             "src/math/arch/aarch64.rs",
             "src/math/arch/wasm32.rs",
+            "src/math/generic/rint.rs",
             "src/math/rintf.rs"
         ],
         "type": "f32"
diff --git a/src/math/generic/mod.rs b/src/math/generic/mod.rs
index b08a77d5d..d3df650e1 100644
--- a/src/math/generic/mod.rs
+++ b/src/math/generic/mod.rs
@@ -3,6 +3,7 @@ mod copysign;
 mod fabs;
 mod fdim;
 mod floor;
+mod rint;
 mod sqrt;
 mod trunc;
 
@@ -11,5 +12,6 @@ pub use copysign::copysign;
 pub use fabs::fabs;
 pub use fdim::fdim;
 pub use floor::floor;
+pub use rint::rint;
 pub use sqrt::sqrt;
 pub use trunc::trunc;
diff --git a/src/math/generic/rint.rs b/src/math/generic/rint.rs
new file mode 100644
index 000000000..80ba1faac
--- /dev/null
+++ b/src/math/generic/rint.rs
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: MIT */
+/* origin: musl src/math/rint.c */
+
+use super::super::Float;
+
+pub fn rint<F: Float>(x: F) -> F {
+    let toint = F::ONE / F::EPSILON;
+    let e = x.exp();
+    let positive = x.is_sign_positive();
+
+    // On i386 `force_eval!` must be used to force rounding via storage to memory. Otherwise,
+    // the excess precission from x87 would cause an incorrect final result.
+    let use_force = cfg!(x86_no_sse) && F::BITS == 32 || F::BITS == 64;
+
+    if e >= F::EXP_BIAS + F::SIG_BITS {
+        // No fractional part; exact result can be returned.
+        x
+    } else {
+        // Apply a net-zero adjustment that nudges `y` in the direction of the rounding mode.
+        let y = if positive {
+            let tmp = if use_force { force_eval!(x) } else { x } + toint;
+            (if use_force { force_eval!(tmp) } else { tmp } - toint)
+        } else {
+            let tmp = if use_force { force_eval!(x) } else { x } - toint;
+            (if use_force { force_eval!(tmp) } else { tmp } + toint)
+        };
+
+        if y == F::ZERO {
+            // A zero result takes the sign of the input.
+            if positive { F::ZERO } else { F::NEG_ZERO }
+        } else {
+            y
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn zeroes_f32() {
+        assert_biteq!(rint(0.0_f32), 0.0_f32);
+        assert_biteq!(rint(-0.0_f32), -0.0_f32);
+    }
+
+    #[test]
+    fn sanity_check_f32() {
+        assert_biteq!(rint(-1.0_f32), -1.0);
+        assert_biteq!(rint(2.8_f32), 3.0);
+        assert_biteq!(rint(-0.5_f32), -0.0);
+        assert_biteq!(rint(0.5_f32), 0.0);
+        assert_biteq!(rint(-1.5_f32), -2.0);
+        assert_biteq!(rint(1.5_f32), 2.0);
+    }
+
+    #[test]
+    fn zeroes_f64() {
+        assert_biteq!(rint(0.0_f64), 0.0_f64);
+        assert_biteq!(rint(-0.0_f64), -0.0_f64);
+    }
+
+    #[test]
+    fn sanity_check_f64() {
+        assert_biteq!(rint(-1.0_f64), -1.0);
+        assert_biteq!(rint(2.8_f64), 3.0);
+        assert_biteq!(rint(-0.5_f64), -0.0);
+        assert_biteq!(rint(0.5_f64), 0.0);
+        assert_biteq!(rint(-1.5_f64), -2.0);
+        assert_biteq!(rint(1.5_f64), 2.0);
+    }
+}
diff --git a/src/math/rint.rs b/src/math/rint.rs
index c9ea6402e..592583aa5 100644
--- a/src/math/rint.rs
+++ b/src/math/rint.rs
@@ -9,51 +9,5 @@ pub fn rint(x: f64) -> f64 {
         args: x,
     }
 
-    let one_over_e = 1.0 / f64::EPSILON;
-    let as_u64: u64 = x.to_bits();
-    let exponent: u64 = (as_u64 >> 52) & 0x7ff;
-    let is_positive = (as_u64 >> 63) == 0;
-    if exponent >= 0x3ff + 52 {
-        x
-    } else {
-        let ans = if is_positive {
-            #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))]
-            let x = force_eval!(x);
-            let xplusoneovere = x + one_over_e;
-            #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))]
-            let xplusoneovere = force_eval!(xplusoneovere);
-            xplusoneovere - one_over_e
-        } else {
-            #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))]
-            let x = force_eval!(x);
-            let xminusoneovere = x - one_over_e;
-            #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))]
-            let xminusoneovere = force_eval!(xminusoneovere);
-            xminusoneovere + one_over_e
-        };
-
-        if ans == 0.0 { if is_positive { 0.0 } else { -0.0 } } else { ans }
-    }
-}
-
-// PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520
-#[cfg(not(target_arch = "powerpc64"))]
-#[cfg(test)]
-mod tests {
-    use super::rint;
-
-    #[test]
-    fn negative_zero() {
-        assert_eq!(rint(-0.0_f64).to_bits(), (-0.0_f64).to_bits());
-    }
-
-    #[test]
-    fn sanity_check() {
-        assert_eq!(rint(-1.0), -1.0);
-        assert_eq!(rint(2.8), 3.0);
-        assert_eq!(rint(-0.5), -0.0);
-        assert_eq!(rint(0.5), 0.0);
-        assert_eq!(rint(-1.5), -2.0);
-        assert_eq!(rint(1.5), 2.0);
-    }
+    super::generic::rint(x)
 }
diff --git a/src/math/rintf.rs b/src/math/rintf.rs
index 33b5b3dde..56666df11 100644
--- a/src/math/rintf.rs
+++ b/src/math/rintf.rs
@@ -9,51 +9,5 @@ pub fn rintf(x: f32) -> f32 {
         args: x,
     }
 
-    let one_over_e = 1.0 / f32::EPSILON;
-    let as_u32: u32 = x.to_bits();
-    let exponent: u32 = (as_u32 >> 23) & 0xff;
-    let is_positive = (as_u32 >> 31) == 0;
-    if exponent >= 0x7f + 23 {
-        x
-    } else {
-        let ans = if is_positive {
-            #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))]
-            let x = force_eval!(x);
-            let xplusoneovere = x + one_over_e;
-            #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))]
-            let xplusoneovere = force_eval!(xplusoneovere);
-            xplusoneovere - one_over_e
-        } else {
-            #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))]
-            let x = force_eval!(x);
-            let xminusoneovere = x - one_over_e;
-            #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))]
-            let xminusoneovere = force_eval!(xminusoneovere);
-            xminusoneovere + one_over_e
-        };
-
-        if ans == 0.0 { if is_positive { 0.0 } else { -0.0 } } else { ans }
-    }
-}
-
-// PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520
-#[cfg(not(target_arch = "powerpc64"))]
-#[cfg(test)]
-mod tests {
-    use super::rintf;
-
-    #[test]
-    fn negative_zero() {
-        assert_eq!(rintf(-0.0_f32).to_bits(), (-0.0_f32).to_bits());
-    }
-
-    #[test]
-    fn sanity_check() {
-        assert_eq!(rintf(-1.0), -1.0);
-        assert_eq!(rintf(2.8), 3.0);
-        assert_eq!(rintf(-0.5), -0.0);
-        assert_eq!(rintf(0.5), 0.0);
-        assert_eq!(rintf(-1.5), -2.0);
-        assert_eq!(rintf(1.5), 2.0);
-    }
+    super::generic::rint(x)
 }

From d2da7f7bbe4ef9c09f97deceddaba7810cb1d8b2 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Wed, 22 Jan 2025 10:58:29 +0000
Subject: [PATCH 169/279] Add `rintf16` and `rintf128`

Use the generic algorithms to provide implementations for these
routines.
---
 crates/compiler-builtins-smoke-test/src/lib.rs |  2 ++
 crates/libm-macros/src/shared.rs               |  4 ++--
 crates/libm-test/benches/icount.rs             |  2 ++
 crates/libm-test/benches/random.rs             |  2 ++
 crates/libm-test/src/mpfloat.rs                |  8 ++++++--
 crates/libm-test/tests/compare_built_musl.rs   |  6 ++++--
 crates/util/src/main.rs                        |  2 ++
 etc/function-definitions.json                  | 14 ++++++++++++++
 etc/function-list.txt                          |  2 ++
 src/math/mod.rs                                |  4 ++++
 src/math/rint.rs                               |  1 +
 src/math/rintf.rs                              |  1 +
 src/math/rintf128.rs                           |  5 +++++
 src/math/rintf16.rs                            |  5 +++++
 14 files changed, 52 insertions(+), 6 deletions(-)
 create mode 100644 src/math/rintf128.rs
 create mode 100644 src/math/rintf16.rs

diff --git a/crates/compiler-builtins-smoke-test/src/lib.rs b/crates/compiler-builtins-smoke-test/src/lib.rs
index 1a7aa983e..ccd0642a2 100644
--- a/crates/compiler-builtins-smoke-test/src/lib.rs
+++ b/crates/compiler-builtins-smoke-test/src/lib.rs
@@ -145,6 +145,8 @@ no_mangle! {
     remquof(x: f32, y: f32 | q: &mut c_int) -> f32;
     rint(x: f64) -> f64;
     rintf(x: f32) -> f32;
+    rintf128(x: f128) -> f128;
+    rintf16(x: f16) -> f16;
     round(x: f64) -> f64;
     roundf(x: f32) -> f32;
     scalbn(x: f64, y: c_int) -> f64;
diff --git a/crates/libm-macros/src/shared.rs b/crates/libm-macros/src/shared.rs
index 64623658d..80bd3e907 100644
--- a/crates/libm-macros/src/shared.rs
+++ b/crates/libm-macros/src/shared.rs
@@ -9,7 +9,7 @@ const ALL_OPERATIONS_NESTED: &[(FloatTy, Signature, Option<Signature>, &[&str])]
         FloatTy::F16,
         Signature { args: &[Ty::F16], returns: &[Ty::F16] },
         None,
-        &["ceilf16", "fabsf16", "floorf16", "sqrtf16", "truncf16"],
+        &["ceilf16", "fabsf16", "floorf16", "rintf16", "sqrtf16", "truncf16"],
     ),
     (
         // `fn(f32) -> f32`
@@ -40,7 +40,7 @@ const ALL_OPERATIONS_NESTED: &[(FloatTy, Signature, Option<Signature>, &[&str])]
         FloatTy::F128,
         Signature { args: &[Ty::F128], returns: &[Ty::F128] },
         None,
-        &["ceilf128", "fabsf128", "floorf128", "sqrtf128", "truncf128"],
+        &["ceilf128", "fabsf128", "floorf128", "rintf128", "sqrtf128", "truncf128"],
     ),
     (
         // `(f16, f16) -> f16`
diff --git a/crates/libm-test/benches/icount.rs b/crates/libm-test/benches/icount.rs
index eae63619c..84f953262 100644
--- a/crates/libm-test/benches/icount.rs
+++ b/crates/libm-test/benches/icount.rs
@@ -149,6 +149,8 @@ main!(
     icount_bench_remquo_group,
     icount_bench_remquof_group,
     icount_bench_rint_group,
+    icount_bench_rintf128_group,
+    icount_bench_rintf16_group,
     icount_bench_rintf_group,
     icount_bench_round_group,
     icount_bench_roundf_group,
diff --git a/crates/libm-test/benches/random.rs b/crates/libm-test/benches/random.rs
index bd7b35971..4d050e817 100644
--- a/crates/libm-test/benches/random.rs
+++ b/crates/libm-test/benches/random.rs
@@ -127,6 +127,8 @@ libm_macros::for_each_function! {
         | fdimf16
         | floorf128
         | floorf16
+        | rintf128
+        | rintf16
         | sqrtf128
         | sqrtf16
         | truncf128
diff --git a/crates/libm-test/src/mpfloat.rs b/crates/libm-test/src/mpfloat.rs
index 53fade7d0..a404f227b 100644
--- a/crates/libm-test/src/mpfloat.rs
+++ b/crates/libm-test/src/mpfloat.rs
@@ -170,6 +170,8 @@ libm_macros::for_each_function! {
         remquof,
         rint,
         rintf,
+        rintf128,
+        rintf16,
         round,
         roundf,
         scalbn,
@@ -240,17 +242,19 @@ impl_no_round! {
 
 #[cfg(f16_enabled)]
 impl_no_round! {
-    fabsf16 => abs_mut;
     ceilf16 => ceil_mut;
+    fabsf16 => abs_mut;
     floorf16 => floor_mut;
+    rintf16 => round_even_mut; // FIXME: respect rounding mode
     truncf16 => trunc_mut;
 }
 
 #[cfg(f128_enabled)]
 impl_no_round! {
-    fabsf128 => abs_mut;
     ceilf128 => ceil_mut;
+    fabsf128 => abs_mut;
     floorf128 => floor_mut;
+    rintf128 => round_even_mut; // FIXME: respect rounding mode
     truncf128 => trunc_mut;
 }
 
diff --git a/crates/libm-test/tests/compare_built_musl.rs b/crates/libm-test/tests/compare_built_musl.rs
index 335496fce..f009816c9 100644
--- a/crates/libm-test/tests/compare_built_musl.rs
+++ b/crates/libm-test/tests/compare_built_musl.rs
@@ -89,9 +89,11 @@ libm_macros::for_each_function! {
         fdimf16,
         floorf128,
         floorf16,
+        rintf128,
+        rintf16,
+        sqrtf128,
+        sqrtf16,
         truncf128,
         truncf16,
-        sqrtf16,
-        sqrtf128,
     ],
 }
diff --git a/crates/util/src/main.rs b/crates/util/src/main.rs
index 988c01d07..41d995b3b 100644
--- a/crates/util/src/main.rs
+++ b/crates/util/src/main.rs
@@ -94,6 +94,8 @@ fn do_eval(basis: &str, op: &str, inputs: &[&str]) {
             | fdimf16
             | floorf128
             | floorf16
+            | rintf128
+            | rintf16
             | sqrtf128
             | sqrtf16
             | truncf128
diff --git a/etc/function-definitions.json b/etc/function-definitions.json
index 86b088292..d3810b940 100644
--- a/etc/function-definitions.json
+++ b/etc/function-definitions.json
@@ -668,6 +668,20 @@
         ],
         "type": "f32"
     },
+    "rintf128": {
+        "sources": [
+            "src/math/generic/rint.rs",
+            "src/math/rintf128.rs"
+        ],
+        "type": "f128"
+    },
+    "rintf16": {
+        "sources": [
+            "src/math/generic/rint.rs",
+            "src/math/rintf16.rs"
+        ],
+        "type": "f16"
+    },
     "round": {
         "sources": [
             "src/libm_helper.rs",
diff --git a/etc/function-list.txt b/etc/function-list.txt
index 3bb895f4a..41bb4e06b 100644
--- a/etc/function-list.txt
+++ b/etc/function-list.txt
@@ -97,6 +97,8 @@ remquo
 remquof
 rint
 rintf
+rintf128
+rintf16
 round
 roundf
 scalbn
diff --git a/src/math/mod.rs b/src/math/mod.rs
index 68d201524..53d06974c 100644
--- a/src/math/mod.rs
+++ b/src/math/mod.rs
@@ -346,6 +346,7 @@ cfg_if! {
         mod fabsf16;
         mod fdimf16;
         mod floorf16;
+        mod rintf16;
         mod sqrtf16;
         mod truncf16;
 
@@ -354,6 +355,7 @@ cfg_if! {
         pub use self::fabsf16::fabsf16;
         pub use self::fdimf16::fdimf16;
         pub use self::floorf16::floorf16;
+        pub use self::rintf16::rintf16;
         pub use self::sqrtf16::sqrtf16;
         pub use self::truncf16::truncf16;
     }
@@ -366,6 +368,7 @@ cfg_if! {
         mod fabsf128;
         mod fdimf128;
         mod floorf128;
+        mod rintf128;
         mod sqrtf128;
         mod truncf128;
 
@@ -374,6 +377,7 @@ cfg_if! {
         pub use self::fabsf128::fabsf128;
         pub use self::fdimf128::fdimf128;
         pub use self::floorf128::floorf128;
+        pub use self::rintf128::rintf128;
         pub use self::sqrtf128::sqrtf128;
         pub use self::truncf128::truncf128;
     }
diff --git a/src/math/rint.rs b/src/math/rint.rs
index 592583aa5..f409ec282 100644
--- a/src/math/rint.rs
+++ b/src/math/rint.rs
@@ -1,3 +1,4 @@
+/// Round `x` to the nearest integer, breaking ties toward even.
 #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
 pub fn rint(x: f64) -> f64 {
     select_implementation! {
diff --git a/src/math/rintf.rs b/src/math/rintf.rs
index 56666df11..5e9f5f718 100644
--- a/src/math/rintf.rs
+++ b/src/math/rintf.rs
@@ -1,3 +1,4 @@
+/// Round `x` to the nearest integer, breaking ties toward even.
 #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
 pub fn rintf(x: f32) -> f32 {
     select_implementation! {
diff --git a/src/math/rintf128.rs b/src/math/rintf128.rs
new file mode 100644
index 000000000..6b16fcd84
--- /dev/null
+++ b/src/math/rintf128.rs
@@ -0,0 +1,5 @@
+/// Round `x` to the nearest integer, breaking ties toward even.
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn rintf128(x: f128) -> f128 {
+    super::generic::rint(x)
+}
diff --git a/src/math/rintf16.rs b/src/math/rintf16.rs
new file mode 100644
index 000000000..84d792561
--- /dev/null
+++ b/src/math/rintf16.rs
@@ -0,0 +1,5 @@
+/// Round `x` to the nearest integer, breaking ties toward even.
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn rintf16(x: f16) -> f16 {
+    super::generic::rint(x)
+}

From 29697dcf6fdc92a5e2e169b0769711c398b679fc Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Wed, 22 Jan 2025 21:31:06 +0000
Subject: [PATCH 170/279] Add the ability to parse hex, binary, and float hex
 with util

---
 crates/util/src/main.rs | 94 ++++++++++++++++++++++++++++++++++++++---
 1 file changed, 89 insertions(+), 5 deletions(-)

diff --git a/crates/util/src/main.rs b/crates/util/src/main.rs
index 41d995b3b..23aed06c0 100644
--- a/crates/util/src/main.rs
+++ b/crates/util/src/main.rs
@@ -5,10 +5,12 @@
 
 use std::any::type_name;
 use std::env;
+use std::num::ParseIntError;
 use std::str::FromStr;
 
 #[cfg(feature = "build-mpfr")]
 use az::Az;
+use libm::support::{hf32, hf64};
 #[cfg(feature = "build-mpfr")]
 use libm_test::mpfloat::MpOp;
 use libm_test::{MathOp, TupleCall};
@@ -238,21 +240,103 @@ impl_parse_tuple_via_rug!(f16);
 impl_parse_tuple_via_rug!(f128);
 
 /// Try to parse the number, printing a nice message on failure.
-fn parse<F: FromStr>(input: &[&str], idx: usize) -> F {
+fn parse<T: FromStr + FromStrRadix>(input: &[&str], idx: usize) -> T {
     let s = input[idx];
-    s.parse().unwrap_or_else(|_| panic!("invalid {} input '{s}'", type_name::<F>()))
+
+    let msg = || format!("invalid {} input '{s}'", type_name::<T>());
+
+    if s.starts_with("0x") {
+        return T::from_str_radix(s, 16).unwrap_or_else(|_| panic!("{}", msg()));
+    }
+
+    if s.starts_with("0b") {
+        return T::from_str_radix(s, 2).unwrap_or_else(|_| panic!("{}", msg()));
+    }
+
+    s.parse().unwrap_or_else(|_| panic!("{}", msg()))
 }
 
 /// Try to parse the float type going via `rug`, for `f16` and `f128` which don't yet implement
 /// `FromStr`.
 #[cfg(feature = "build-mpfr")]
-fn parse_rug<F: libm_test::Float>(input: &[&str], idx: usize) -> F
+fn parse_rug<F>(input: &[&str], idx: usize) -> F
 where
+    F: libm_test::Float + FromStrRadix,
     rug::Float: az::Cast<F>,
 {
     let s = input[idx];
-    let x =
-        rug::Float::parse(s).unwrap_or_else(|_| panic!("invalid {} input '{s}'", type_name::<F>()));
+
+    let msg = || format!("invalid {} input '{s}'", type_name::<F>());
+
+    if s.starts_with("0x") {
+        return F::from_str_radix(s, 16).unwrap_or_else(|_| panic!("{}", msg()));
+    }
+
+    if s.starts_with("0b") {
+        return F::from_str_radix(s, 2).unwrap_or_else(|_| panic!("{}", msg()));
+    }
+
+    let x = rug::Float::parse(s).unwrap_or_else(|_| panic!("{}", msg()));
     let x = rug::Float::with_val(F::BITS, x);
     x.az()
 }
+
+trait FromStrRadix: Sized {
+    fn from_str_radix(s: &str, radix: u32) -> Result<Self, ParseIntError>;
+}
+
+impl FromStrRadix for i32 {
+    fn from_str_radix(s: &str, radix: u32) -> Result<Self, ParseIntError> {
+        let s = strip_radix_prefix(s, radix);
+        i32::from_str_radix(s, radix)
+    }
+}
+
+#[cfg(f16_enabled)]
+impl FromStrRadix for f16 {
+    fn from_str_radix(s: &str, radix: u32) -> Result<Self, ParseIntError> {
+        let s = strip_radix_prefix(s, radix);
+        u16::from_str_radix(s, radix).map(Self::from_bits)
+    }
+}
+
+impl FromStrRadix for f32 {
+    fn from_str_radix(s: &str, radix: u32) -> Result<Self, ParseIntError> {
+        if radix == 16 && s.contains("p") {
+            // Parse as hex float
+            return Ok(hf32(s));
+        }
+
+        let s = strip_radix_prefix(s, radix);
+        u32::from_str_radix(s, radix).map(Self::from_bits)
+    }
+}
+
+impl FromStrRadix for f64 {
+    fn from_str_radix(s: &str, radix: u32) -> Result<Self, ParseIntError> {
+        if s.contains("p") {
+            return Ok(hf64(s));
+        }
+
+        let s = strip_radix_prefix(s, radix);
+        u64::from_str_radix(s, radix).map(Self::from_bits)
+    }
+}
+
+#[cfg(f128_enabled)]
+impl FromStrRadix for f128 {
+    fn from_str_radix(s: &str, radix: u32) -> Result<Self, ParseIntError> {
+        let s = strip_radix_prefix(s, radix);
+        u128::from_str_radix(s, radix).map(Self::from_bits)
+    }
+}
+
+fn strip_radix_prefix(s: &str, radix: u32) -> &str {
+    if radix == 16 {
+        s.strip_prefix("0x").unwrap()
+    } else if radix == 2 {
+        s.strip_prefix("0b").unwrap()
+    } else {
+        s
+    }
+}

From bc6a615676110faca4577f2a6b7f26e443df7b5c Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Wed, 22 Jan 2025 23:01:29 +0000
Subject: [PATCH 171/279] Fix the parsing of three-item tuples in `util`

---
 crates/util/src/main.rs | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/crates/util/src/main.rs b/crates/util/src/main.rs
index 23aed06c0..889823d2e 100644
--- a/crates/util/src/main.rs
+++ b/crates/util/src/main.rs
@@ -146,8 +146,8 @@ macro_rules! impl_parse_tuple {
 
         impl ParseTuple for ($ty, $ty, $ty) {
             fn parse(input: &[&str]) -> Self {
-                assert_eq!(input.len(), 2, "expected three arguments, got {input:?}");
-                (parse(input, 0), parse(input, 1), parse(input, 3))
+                assert_eq!(input.len(), 3, "expected three arguments, got {input:?}");
+                (parse(input, 0), parse(input, 1), parse(input, 2))
             }
         }
     };
@@ -187,8 +187,8 @@ macro_rules! impl_parse_tuple_via_rug {
 
         impl ParseTuple for ($ty, $ty, $ty) {
             fn parse(input: &[&str]) -> Self {
-                assert_eq!(input.len(), 2, "expected three arguments, got {input:?}");
-                (parse_rug(input, 0), parse_rug(input, 1), parse_rug(input, 3))
+                assert_eq!(input.len(), 3, "expected three arguments, got {input:?}");
+                (parse_rug(input, 0), parse_rug(input, 1), parse_rug(input, 2))
             }
         }
     };

From eb2946772ee6a8d679f0d74a4179e694c965d81f Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Wed, 22 Jan 2025 22:41:49 +0000
Subject: [PATCH 172/279] Add `hf16!` and `hf128!`

Expand the existing hex float functions and macros with versions that
work with `f16` and `f128`.
---
 src/math/support/hex_float.rs | 246 +++++++++++++++++++++++++++++++++-
 src/math/support/macros.rs    |  22 +++
 src/math/support/mod.rs       |   4 +
 3 files changed, 266 insertions(+), 6 deletions(-)

diff --git a/src/math/support/hex_float.rs b/src/math/support/hex_float.rs
index 1666c6153..949f21a57 100644
--- a/src/math/support/hex_float.rs
+++ b/src/math/support/hex_float.rs
@@ -4,6 +4,12 @@
 
 use super::{f32_from_bits, f64_from_bits};
 
+/// Construct a 16-bit float from hex float representation (C-style)
+#[cfg(f16_enabled)]
+pub const fn hf16(s: &str) -> f16 {
+    f16::from_bits(parse_any(s, 16, 10) as u16)
+}
+
 /// Construct a 32-bit float from hex float representation (C-style)
 pub const fn hf32(s: &str) -> f32 {
     f32_from_bits(parse_any(s, 32, 23) as u32)
@@ -14,6 +20,12 @@ pub const fn hf64(s: &str) -> f64 {
     f64_from_bits(parse_any(s, 64, 52) as u64)
 }
 
+/// Construct a 128-bit float from hex float representation (C-style)
+#[cfg(f128_enabled)]
+pub const fn hf128(s: &str) -> f128 {
+    f128::from_bits(parse_any(s, 128, 112))
+}
+
 const fn parse_any(s: &str, bits: u32, sig_bits: u32) -> u128 {
     let exp_bits: u32 = bits - sig_bits - 1;
     let max_msb: i32 = (1 << (exp_bits - 1)) - 1;
@@ -230,6 +242,57 @@ mod tests {
         }
     }
 
+    // HACK(msrv): 1.63 rejects unknown width float literals at an AST level, so use a macro to
+    // hide them from the AST.
+    #[cfg(f16_enabled)]
+    macro_rules! f16_tests {
+        () => {
+            #[test]
+            fn test_f16() {
+                let checks = [
+                    ("0x.1234p+16", (0x1234 as f16).to_bits()),
+                    ("0x1.234p+12", (0x1234 as f16).to_bits()),
+                    ("0x12.34p+8", (0x1234 as f16).to_bits()),
+                    ("0x123.4p+4", (0x1234 as f16).to_bits()),
+                    ("0x1234p+0", (0x1234 as f16).to_bits()),
+                    ("0x1234.p+0", (0x1234 as f16).to_bits()),
+                    ("0x1234.0p+0", (0x1234 as f16).to_bits()),
+                    ("0x1.ffcp+15", f16::MAX.to_bits()),
+                    ("0x1.0p+1", 2.0f16.to_bits()),
+                    ("0x1.0p+0", 1.0f16.to_bits()),
+                    ("0x1.ffp+8", 0x5ffc),
+                    ("+0x1.ffp+8", 0x5ffc),
+                    ("0x1p+0", 0x3c00),
+                    ("0x1.998p-4", 0x2e66),
+                    ("0x1.9p+6", 0x5640),
+                    ("0x0.0p0", 0.0f16.to_bits()),
+                    ("-0x0.0p0", (-0.0f16).to_bits()),
+                    ("0x1.0p0", 1.0f16.to_bits()),
+                    ("0x1.998p-4", (0.1f16).to_bits()),
+                    ("-0x1.998p-4", (-0.1f16).to_bits()),
+                    ("0x0.123p-12", 0x0123),
+                    ("0x1p-24", 0x0001),
+                ];
+                for (s, exp) in checks {
+                    println!("parsing {s}");
+                    let act = hf16(s).to_bits();
+                    assert_eq!(
+                        act, exp,
+                        "parsing {s}: {act:#06x} != {exp:#06x}\nact: {act:#018b}\nexp: {exp:#018b}"
+                    );
+                }
+            }
+
+            #[test]
+            fn test_macros_f16() {
+                assert_eq!(hf16!("0x1.ffp+8").to_bits(), 0x5ffc_u16);
+            }
+        };
+    }
+
+    #[cfg(f16_enabled)]
+    f16_tests!();
+
     #[test]
     fn test_f32() {
         let checks = [
@@ -308,16 +371,67 @@ mod tests {
         }
     }
 
-    #[test]
-    fn test_f32_almost_extra_precision() {
-        // Exact maximum precision allowed
-        hf32("0x1.abcdeep+0");
+    // HACK(msrv): 1.63 rejects unknown width float literals at an AST level, so use a macro to
+    // hide them from the AST.
+    #[cfg(f128_enabled)]
+    macro_rules! f128_tests {
+        () => {
+            #[test]
+            fn test_f128() {
+                let checks = [
+                    ("0x.1234p+16", (0x1234 as f128).to_bits()),
+                    ("0x1.234p+12", (0x1234 as f128).to_bits()),
+                    ("0x12.34p+8", (0x1234 as f128).to_bits()),
+                    ("0x123.4p+4", (0x1234 as f128).to_bits()),
+                    ("0x1234p+0", (0x1234 as f128).to_bits()),
+                    ("0x1234.p+0", (0x1234 as f128).to_bits()),
+                    ("0x1234.0p+0", (0x1234 as f128).to_bits()),
+                    ("0x1.ffffffffffffffffffffffffffffp+16383", f128::MAX.to_bits()),
+                    ("0x1.0p+1", 2.0f128.to_bits()),
+                    ("0x1.0p+0", 1.0f128.to_bits()),
+                    ("0x1.ffep+8", 0x4007ffe0000000000000000000000000),
+                    ("+0x1.ffep+8", 0x4007ffe0000000000000000000000000),
+                    ("0x1p+0", 0x3fff0000000000000000000000000000),
+                    ("0x1.999999999999999999999999999ap-4", 0x3ffb999999999999999999999999999a),
+                    ("0x1.9p+6", 0x40059000000000000000000000000000),
+                    ("0x0.0p0", 0.0f128.to_bits()),
+                    ("-0x0.0p0", (-0.0f128).to_bits()),
+                    ("0x1.0p0", 1.0f128.to_bits()),
+                    ("0x1.999999999999999999999999999ap-4", (0.1f128).to_bits()),
+                    ("-0x1.999999999999999999999999999ap-4", (-0.1f128).to_bits()),
+                    ("0x0.abcdef0123456789abcdef012345p-16382", 0x0000abcdef0123456789abcdef012345),
+                    ("0x1p-16494", 0x00000000000000000000000000000001),
+                ];
+                for (s, exp) in checks {
+                    println!("parsing {s}");
+                    let act = hf128(s).to_bits();
+                    assert_eq!(
+                        act, exp,
+                        "parsing {s}: {act:#034x} != {exp:#034x}\nact: {act:#0130b}\nexp: {exp:#0130b}"
+                    );
+                }
+            }
+
+            #[test]
+            fn test_macros_f128() {
+                assert_eq!(hf128!("0x1.ffep+8").to_bits(), 0x4007ffe0000000000000000000000000_u128);
+            }
+        }
     }
 
+    #[cfg(f128_enabled)]
+    f128_tests!();
+
     #[test]
     fn test_macros() {
-        assert_eq!(hf32!("0x1.ffep+8").to_bits(), 0x43fff000u32);
-        assert_eq!(hf64!("0x1.ffep+8").to_bits(), 0x407ffe0000000000u64);
+        // FIXME(msrv): enable once parsing works
+        // #[cfg(f16_enabled)]
+        // assert_eq!(hf16!("0x1.ffp+8").to_bits(), 0x5ffc_u16);
+        assert_eq!(hf32!("0x1.ffep+8").to_bits(), 0x43fff000_u32);
+        assert_eq!(hf64!("0x1.ffep+8").to_bits(), 0x407ffe0000000000_u64);
+        // FIXME(msrv): enable once parsing works
+        // #[cfg(f128_enabled)]
+        // assert_eq!(hf128!("0x1.ffep+8").to_bits(), 0x4007ffe0000000000000000000000000_u128);
     }
 }
 
@@ -328,6 +442,69 @@ mod tests_panicking {
     extern crate std;
     use super::*;
 
+    // HACK(msrv): 1.63 rejects unknown width float literals at an AST level, so use a macro to
+    // hide them from the AST.
+    #[cfg(f16_enabled)]
+    macro_rules! f16_tests {
+        () => {
+            #[test]
+            fn test_f16_almost_extra_precision() {
+                // Exact maximum precision allowed
+                hf16("0x1.ffcp+0");
+            }
+
+            #[test]
+            #[should_panic(expected = "the value is too precise")]
+            fn test_f16_extra_precision() {
+                // One bit more than the above.
+                hf16("0x1.ffdp+0");
+            }
+
+            #[test]
+            #[should_panic(expected = "the value is too huge")]
+            fn test_f16_overflow() {
+                // One bit more than the above.
+                hf16("0x1p+16");
+            }
+
+            #[test]
+            fn test_f16_tiniest() {
+                let x = hf16("0x1.p-24");
+                let y = hf16("0x0.001p-12");
+                let z = hf16("0x0.8p-23");
+                assert_eq!(x, y);
+                assert_eq!(x, z);
+            }
+
+            #[test]
+            #[should_panic(expected = "the value is too tiny")]
+            fn test_f16_too_tiny() {
+                hf16("0x1.p-25");
+            }
+
+            #[test]
+            #[should_panic(expected = "the value is too tiny")]
+            fn test_f16_also_too_tiny() {
+                hf16("0x0.8p-24");
+            }
+
+            #[test]
+            #[should_panic(expected = "the value is too tiny")]
+            fn test_f16_again_too_tiny() {
+                hf16("0x0.001p-13");
+            }
+        };
+    }
+
+    #[cfg(f16_enabled)]
+    f16_tests!();
+
+    #[test]
+    fn test_f32_almost_extra_precision() {
+        // Exact maximum precision allowed
+        hf32("0x1.abcdeep+0");
+    }
+
     #[test]
     #[should_panic]
     fn test_f32_extra_precision2() {
@@ -388,4 +565,61 @@ mod tests_panicking {
         // One bit more than the above.
         hf64("0x1.abcdabcdabcdf8p+0");
     }
+
+    // HACK(msrv): 1.63 rejects unknown width float literals at an AST level, so use a macro to
+    // hide them from the AST.
+    #[cfg(f128_enabled)]
+    macro_rules! f128_tests {
+        () => {
+            #[test]
+            fn test_f128_almost_extra_precision() {
+                // Exact maximum precision allowed
+                hf128("0x1.ffffffffffffffffffffffffffffp+16383");
+            }
+
+            #[test]
+            #[should_panic(expected = "the value is too precise")]
+            fn test_f128_extra_precision() {
+                // One bit more than the above.
+                hf128("0x1.ffffffffffffffffffffffffffff8p+16383");
+            }
+
+            #[test]
+            #[should_panic(expected = "the value is too huge")]
+            fn test_f128_overflow() {
+                // One bit more than the above.
+                hf128("0x1p+16384");
+            }
+
+            #[test]
+            fn test_f128_tiniest() {
+                let x = hf128("0x1.p-16494");
+                let y = hf128("0x0.0000000000000001p-16430");
+                let z = hf128("0x0.8p-16493");
+                assert_eq!(x, y);
+                assert_eq!(x, z);
+            }
+
+            #[test]
+            #[should_panic(expected = "the value is too tiny")]
+            fn test_f128_too_tiny() {
+                hf128("0x1.p-16495");
+            }
+
+            #[test]
+            #[should_panic(expected = "the value is too tiny")]
+            fn test_f128_again_too_tiny() {
+                hf128("0x0.0000000000000001p-16431");
+            }
+
+            #[test]
+            #[should_panic(expected = "the value is too tiny")]
+            fn test_f128_also_too_tiny() {
+                hf128("0x0.8p-16494");
+            }
+        };
+    }
+
+    #[cfg(f128_enabled)]
+    f128_tests!();
 }
diff --git a/src/math/support/macros.rs b/src/math/support/macros.rs
index c9a36c0db..d8ba04cff 100644
--- a/src/math/support/macros.rs
+++ b/src/math/support/macros.rs
@@ -87,6 +87,17 @@ macro_rules! select_implementation {
     (@cfg $provided:meta; $ex:expr) => { #[cfg($provided)] $ex };
 }
 
+/// Construct a 16-bit float from hex float representation (C-style), guaranteed to
+/// evaluate at compile time.
+#[cfg(f16_enabled)]
+#[allow(unused_macros)]
+macro_rules! hf16 {
+    ($s:literal) => {{
+        const X: f16 = $crate::math::support::hf16($s);
+        X
+    }};
+}
+
 /// Construct a 32-bit float from hex float representation (C-style), guaranteed to
 /// evaluate at compile time.
 #[allow(unused_macros)]
@@ -107,6 +118,17 @@ macro_rules! hf64 {
     }};
 }
 
+/// Construct a 128-bit float from hex float representation (C-style), guaranteed to
+/// evaluate at compile time.
+#[cfg(f128_enabled)]
+#[allow(unused_macros)]
+macro_rules! hf128 {
+    ($s:literal) => {{
+        const X: f128 = $crate::math::support::hf128($s);
+        X
+    }};
+}
+
 /// Assert `F::biteq` with better messages.
 #[cfg(test)]
 macro_rules! assert_biteq {
diff --git a/src/math/support/mod.rs b/src/math/support/mod.rs
index ddfc2e3e0..da9e2c9ed 100644
--- a/src/math/support/mod.rs
+++ b/src/math/support/mod.rs
@@ -8,6 +8,10 @@ mod int_traits;
 #[allow(unused_imports)]
 pub use float_traits::{Float, IntTy};
 pub(crate) use float_traits::{f32_from_bits, f64_from_bits};
+#[cfg(f16_enabled)]
+pub use hex_float::hf16;
+#[cfg(f128_enabled)]
+pub use hex_float::hf128;
 #[allow(unused_imports)]
 pub use hex_float::{hf32, hf64};
 pub use int_traits::{CastFrom, CastInto, DInt, HInt, Int, MinInt};

From 122ba48cf527b58fe337a8aa0e7122c224f686cb Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Thu, 23 Jan 2025 05:08:10 +0000
Subject: [PATCH 173/279] Introduce XFAILs that assert failure

Currently our XFAILs are open ended; we do not check that it actually
fails, so we have no easy way of knowing that a previously-failing test
starts passing. Introduce a new enum that we return from overrides to
give us more flexibility here, including the ability to assert that
expected failures happen.

With the new enum, it is also possible to specify ULP via return value
rather than passing a `&mut u32` parameter.

This includes refactoring of `precision.rs` to be more accurate about
where errors come from, if possible.

Fixes: https://github.com/rust-lang/libm/issues/455
---
 crates/libm-test/src/precision.rs   | 461 +++++++++++++---------------
 crates/libm-test/src/test_traits.rs |  60 +++-
 2 files changed, 268 insertions(+), 253 deletions(-)

diff --git a/crates/libm-test/src/precision.rs b/crates/libm-test/src/precision.rs
index 9d17ab8cc..800425f12 100644
--- a/crates/libm-test/src/precision.rs
+++ b/crates/libm-test/src/precision.rs
@@ -118,13 +118,13 @@ pub fn default_ulp(ctx: &CheckCtx) -> u32 {
             // FIXME(#401): these need to be correctly rounded but are not.
             Id::Fmaf => ulp = 1,
             Id::Fdim => ulp = 1,
+            Id::Round => ulp = 1,
 
             Id::Asinh => ulp = 3,
             Id::Asinhf => ulp = 3,
             Id::Exp10 | Id::Exp10f => ulp = 1_000_000,
             Id::Exp2 | Id::Exp2f => ulp = 10_000_000,
             Id::Log1p | Id::Log1pf => ulp = 2,
-            Id::Round => ulp = 1,
             Id::Tan => ulp = 2,
             _ => (),
         }
@@ -133,12 +133,42 @@ pub fn default_ulp(ctx: &CheckCtx) -> u32 {
     ulp
 }
 
+/// Result of checking for possible overrides.
+#[derive(Debug, Default)]
+pub enum CheckAction {
+    /// The check should pass. Default case.
+    #[default]
+    AssertSuccess,
+
+    /// Override the ULP for this check.
+    AssertWithUlp(u32),
+
+    /// Failure is expected, ensure this is the case (xfail). Takes a contxt string to help trace
+    /// back exactly why we expect this to fail.
+    AssertFailure(&'static str),
+
+    /// The override somehow validated the result, here it is.
+    Custom(TestResult),
+
+    /// Disregard the output.
+    Skip,
+}
+
 /// Don't run further validation on this test case.
-const SKIP: Option<TestResult> = Some(Ok(()));
+const SKIP: CheckAction = CheckAction::Skip;
 
-/// Return this to skip checks on a test that currently fails but shouldn't. Looks
-/// the same as skip, but we keep them separate to better indicate purpose.
-const XFAIL: Option<TestResult> = Some(Ok(()));
+/// Return this to skip checks on a test that currently fails but shouldn't. Takes a description
+/// of context.
+const XFAIL: fn(&'static str) -> CheckAction = CheckAction::AssertFailure;
+
+/// Indicates that we expect a test to fail but we aren't asserting that it does (e.g. some results
+/// within a range do actually pass).
+///
+/// Same as `SKIP`, just indicates we have something to eventually fix.
+const XFAIL_NOCHECK: CheckAction = CheckAction::Skip;
+
+/// By default, all tests should pass.
+const DEFAULT: CheckAction = CheckAction::AssertSuccess;
 
 /// Allow overriding the outputs of specific test cases.
 ///
@@ -158,19 +188,13 @@ pub trait MaybeOverride<Input> {
         _input: Input,
         _actual: F,
         _expected: F,
-        _ulp: &mut u32,
         _ctx: &CheckCtx,
-    ) -> Option<TestResult> {
-        None
+    ) -> CheckAction {
+        DEFAULT
     }
 
-    fn check_int<I: Int>(
-        _input: Input,
-        _actual: I,
-        _expected: I,
-        _ctx: &CheckCtx,
-    ) -> Option<TestResult> {
-        None
+    fn check_int<I: Int>(_input: Input, _actual: I, _expected: I, _ctx: &CheckCtx) -> CheckAction {
+        DEFAULT
     }
 }
 
@@ -178,33 +202,35 @@ pub trait MaybeOverride<Input> {
 impl MaybeOverride<(f16,)> for SpecialCase {}
 
 impl MaybeOverride<(f32,)> for SpecialCase {
-    fn check_float<F: Float>(
-        input: (f32,),
-        actual: F,
-        expected: F,
-        _ulp: &mut u32,
-        ctx: &CheckCtx,
-    ) -> Option<TestResult> {
-        if ctx.base_name == BaseName::Expm1 && input.0 > 80.0 && actual.is_infinite() {
+    fn check_float<F: Float>(input: (f32,), actual: F, expected: F, ctx: &CheckCtx) -> CheckAction {
+        if ctx.base_name == BaseName::Expm1
+            && !input.0.is_infinite()
+            && input.0 > 80.0
+            && actual.is_infinite()
+            && !expected.is_infinite()
+        {
             // we return infinity but the number is representable
-            return XFAIL;
-        }
-
-        if ctx.base_name == BaseName::Sinh && input.0.abs() > 80.0 && actual.is_nan() {
-            // we return some NaN that should be real values or infinite
-            return XFAIL;
+            if ctx.basis == CheckBasis::Musl {
+                return XFAIL_NOCHECK;
+            }
+            return XFAIL("expm1 representable numbers");
         }
 
-        if ctx.base_name == BaseName::Acosh && input.0 < -1.0 {
-            // acoshf is undefined for x <= 1.0, but we return a random result at lower
-            // values.
-            return XFAIL;
+        if cfg!(x86_no_sse)
+            && ctx.base_name == BaseName::Exp2
+            && !expected.is_infinite()
+            && actual.is_infinite()
+        {
+            // We return infinity when there is a representable value. Test input: 127.97238
+            return XFAIL("586 exp2 representable numbers");
         }
 
-        if ctx.base_name == BaseName::Lgamma || ctx.base_name == BaseName::LgammaR && input.0 < 0.0
-        {
-            // loggamma should not be defined for x < 0, yet we both return results
-            return XFAIL;
+        if ctx.base_name == BaseName::Sinh && input.0.abs() > 80.0 && actual.is_nan() {
+            // we return some NaN that should be real values or infinite
+            if ctx.basis == CheckBasis::Musl {
+                return XFAIL_NOCHECK;
+            }
+            return XFAIL("sinh unexpected NaN");
         }
 
         if (ctx.base_name == BaseName::Lgamma || ctx.base_name == BaseName::LgammaR)
@@ -213,32 +239,18 @@ impl MaybeOverride<(f32,)> for SpecialCase {
             && !actual.is_infinite()
         {
             // This result should saturate but we return a finite value.
-            return XFAIL;
+            return XFAIL_NOCHECK;
         }
 
         if ctx.base_name == BaseName::J0 && input.0 < -1e34 {
             // Errors get huge close to -inf
-            return XFAIL;
-        }
-
-        if cfg!(x86_no_sse)
-            && ctx.base_name == BaseName::Exp2
-            && !expected.is_infinite()
-            && actual.is_infinite()
-        {
-            // We return infinity when there is a representable value. Test input: 127.97238
-            return XFAIL;
+            return XFAIL_NOCHECK;
         }
 
-        maybe_check_nan_bits(actual, expected, ctx)
+        unop_common(input, actual, expected, ctx)
     }
 
-    fn check_int<I: Int>(
-        input: (f32,),
-        actual: I,
-        expected: I,
-        ctx: &CheckCtx,
-    ) -> Option<anyhow::Result<()>> {
+    fn check_int<I: Int>(input: (f32,), actual: I, expected: I, ctx: &CheckCtx) -> CheckAction {
         // On MPFR for lgammaf_r, we set -1 as the integer result for negative infinity but MPFR
         // sets +1
         if ctx.basis == CheckBasis::Mpfr
@@ -246,37 +258,25 @@ impl MaybeOverride<(f32,)> for SpecialCase {
             && input.0 == f32::NEG_INFINITY
             && actual.abs() == expected.abs()
         {
-            XFAIL
-        } else {
-            None
+            return XFAIL("lgammar integer result");
         }
+
+        DEFAULT
     }
 }
 
 impl MaybeOverride<(f64,)> for SpecialCase {
-    fn check_float<F: Float>(
-        input: (f64,),
-        actual: F,
-        expected: F,
-        _ulp: &mut u32,
-        ctx: &CheckCtx,
-    ) -> Option<TestResult> {
-        if ctx.basis == CheckBasis::Musl {
-            if cfg!(target_arch = "x86") && ctx.base_name == BaseName::Acosh && input.0 < 1.0 {
-                // The function is undefined, both implementations return random results
-                return SKIP;
-            }
-
-            if cfg!(x86_no_sse)
-                && ctx.base_name == BaseName::Ceil
-                && input.0 < 0.0
-                && input.0 > -1.0
-                && expected == F::ZERO
-                && actual == F::ZERO
-            {
-                // musl returns -0.0, we return +0.0
-                return XFAIL;
-            }
+    fn check_float<F: Float>(input: (f64,), actual: F, expected: F, ctx: &CheckCtx) -> CheckAction {
+        if cfg!(x86_no_sse)
+            && ctx.base_name == BaseName::Ceil
+            && ctx.basis == CheckBasis::Musl
+            && input.0 < 0.0
+            && input.0 > -1.0
+            && expected == F::ZERO
+            && actual == F::ZERO
+        {
+            // musl returns -0.0, we return +0.0
+            return XFAIL("i586 ceil signed zero");
         }
 
         if cfg!(x86_no_sse)
@@ -285,53 +285,37 @@ impl MaybeOverride<(f64,)> for SpecialCase {
             && (expected - actual).abs() > F::ZERO
         {
             // Our rounding mode is incorrect.
-            return XFAIL;
-        }
-
-        if ctx.base_name == BaseName::Acosh && input.0 < 1.0 {
-            // The function is undefined for the inputs, musl and our libm both return
-            // random results.
-            return XFAIL;
-        }
-
-        if ctx.base_name == BaseName::Lgamma || ctx.base_name == BaseName::LgammaR && input.0 < 0.0
-        {
-            // loggamma should not be defined for x < 0, yet we both return results
-            return XFAIL;
+            return XFAIL("i586 rint rounding mode");
         }
 
-        if ctx.base_name == BaseName::J0 && input.0 < -1e300 {
-            // Errors get huge close to -inf
-            return XFAIL;
-        }
-
-        if (ctx.fn_ident == Identifier::Ceil || ctx.fn_ident == Identifier::Floor)
-            && cfg!(x86_no_sse)
+        if cfg!(x86_no_sse)
+            && (ctx.fn_ident == Identifier::Ceil || ctx.fn_ident == Identifier::Floor)
             && expected.eq_repr(F::NEG_ZERO)
             && actual.eq_repr(F::ZERO)
         {
             // FIXME: the x87 implementations do not keep the distinction between -0.0 and 0.0.
             // See https://github.com/rust-lang/libm/pull/404#issuecomment-2572399955
-            return XFAIL;
+            return XFAIL("i586 ceil/floor signed zero");
         }
 
-        if (ctx.fn_ident == Identifier::Exp10 || ctx.fn_ident == Identifier::Exp2)
-            && cfg!(x86_no_sse)
+        if cfg!(x86_no_sse)
+            && (ctx.fn_ident == Identifier::Exp10 || ctx.fn_ident == Identifier::Exp2)
         {
             // FIXME: i586 has very imprecise results with ULP > u32::MAX for these
             // operations so we can't reasonably provide a limit.
-            return XFAIL;
+            return XFAIL_NOCHECK;
         }
 
-        maybe_check_nan_bits(actual, expected, ctx)
+        if ctx.base_name == BaseName::J0 && input.0 < -1e300 {
+            // Errors get huge close to -inf
+            return XFAIL_NOCHECK;
+        }
+
+        // maybe_check_nan_bits(actual, expected, ctx)
+        unop_common(input, actual, expected, ctx)
     }
 
-    fn check_int<I: Int>(
-        input: (f64,),
-        actual: I,
-        expected: I,
-        ctx: &CheckCtx,
-    ) -> Option<anyhow::Result<()>> {
+    fn check_int<I: Int>(input: (f64,), actual: I, expected: I, ctx: &CheckCtx) -> CheckAction {
         // On MPFR for lgamma_r, we set -1 as the integer result for negative infinity but MPFR
         // sets +1
         if ctx.basis == CheckBasis::Mpfr
@@ -339,41 +323,68 @@ impl MaybeOverride<(f64,)> for SpecialCase {
             && input.0 == f64::NEG_INFINITY
             && actual.abs() == expected.abs()
         {
-            XFAIL
-        } else {
-            None
+            return XFAIL("lgammar integer result");
         }
+
+        DEFAULT
     }
 }
 
 #[cfg(f128_enabled)]
 impl MaybeOverride<(f128,)> for SpecialCase {}
 
-/// Check NaN bits if the function requires it
-fn maybe_check_nan_bits<F: Float>(actual: F, expected: F, ctx: &CheckCtx) -> Option<TestResult> {
-    if !(ctx.base_name == BaseName::Fabs || ctx.base_name == BaseName::Copysign) {
-        return None;
-    }
+// F1 and F2 are always the same type, this is just to please generics
+fn unop_common<F1: Float, F2: Float>(
+    input: (F1,),
+    actual: F2,
+    expected: F2,
+    ctx: &CheckCtx,
+) -> CheckAction {
+    if ctx.base_name == BaseName::Acosh
+        && input.0 < F1::NEG_ONE
+        && !(expected.is_nan() && actual.is_nan())
+    {
+        // acoshf is undefined for x <= 1.0, but we return a random result at lower values.
 
-    // LLVM currently uses x87 instructions which quieten signalling NaNs to handle the i686
-    // `extern "C"` `f32`/`f64` return ABI.
-    // LLVM issue <https://github.com/llvm/llvm-project/issues/66803>
-    // Rust issue <https://github.com/rust-lang/rust/issues/115567>
-    if cfg!(target_arch = "x86") && ctx.basis == CheckBasis::Musl {
-        return SKIP;
+        if ctx.basis == CheckBasis::Musl {
+            return XFAIL_NOCHECK;
+        }
+
+        return XFAIL("acoshf undefined");
     }
 
-    // MPFR only has one NaN bitpattern; allow the default `.is_nan()` checks to validate.
-    if ctx.basis == CheckBasis::Mpfr {
-        return SKIP;
+    if (ctx.base_name == BaseName::Lgamma || ctx.base_name == BaseName::LgammaR)
+        && input.0 < F1::ZERO
+        && !input.0.is_infinite()
+    {
+        // loggamma should not be defined for x < 0, yet we both return results
+        return XFAIL_NOCHECK;
     }
 
-    // abs and copysign require signaling NaNs to be propagated, so verify bit equality.
-    if actual.to_bits() == expected.to_bits() {
-        SKIP
-    } else {
-        Some(Err(anyhow::anyhow!("NaNs have different bitpatterns")))
+    // fabs and copysign must leave NaNs untouched.
+    if ctx.base_name == BaseName::Fabs && input.0.is_nan() {
+        // LLVM currently uses x87 instructions which quieten signalling NaNs to handle the i686
+        // `extern "C"` `f32`/`f64` return ABI.
+        // LLVM issue <https://github.com/llvm/llvm-project/issues/66803>
+        // Rust issue <https://github.com/rust-lang/rust/issues/115567>
+        if cfg!(target_arch = "x86") && ctx.basis == CheckBasis::Musl && actual.is_nan() {
+            return XFAIL_NOCHECK;
+        }
+
+        // MPFR only has one NaN bitpattern; allow the default `.is_nan()` checks to validate.
+        if ctx.basis == CheckBasis::Mpfr {
+            return DEFAULT;
+        }
+
+        // abs and copysign require signaling NaNs to be propagated, so verify bit equality.
+        if actual.to_bits() == expected.to_bits() {
+            return CheckAction::Custom(Ok(()));
+        } else {
+            return CheckAction::Custom(Err(anyhow::anyhow!("NaNs have different bitpatterns")));
+        }
     }
+
+    DEFAULT
 }
 
 #[cfg(f16_enabled)]
@@ -382,9 +393,8 @@ impl MaybeOverride<(f16, f16)> for SpecialCase {
         input: (f16, f16),
         actual: F,
         expected: F,
-        _ulp: &mut u32,
         ctx: &CheckCtx,
-    ) -> Option<TestResult> {
+    ) -> CheckAction {
         binop_common(input, actual, expected, ctx)
     }
 }
@@ -394,18 +404,8 @@ impl MaybeOverride<(f32, f32)> for SpecialCase {
         input: (f32, f32),
         actual: F,
         expected: F,
-        _ulp: &mut u32,
         ctx: &CheckCtx,
-    ) -> Option<TestResult> {
-        if ctx.base_name == BaseName::Fmin
-            && input.0.biteq(f32::NEG_ZERO)
-            && input.1.biteq(f32::ZERO)
-            && expected.biteq(F::NEG_ZERO)
-            && actual.biteq(F::ZERO)
-        {
-            return XFAIL;
-        }
-
+    ) -> CheckAction {
         binop_common(input, actual, expected, ctx)
     }
 
@@ -414,7 +414,7 @@ impl MaybeOverride<(f32, f32)> for SpecialCase {
         actual: I,
         expected: I,
         ctx: &CheckCtx,
-    ) -> Option<TestResult> {
+    ) -> CheckAction {
         remquo_common(actual, expected, ctx)
     }
 }
@@ -424,18 +424,8 @@ impl MaybeOverride<(f64, f64)> for SpecialCase {
         input: (f64, f64),
         actual: F,
         expected: F,
-        _ulp: &mut u32,
         ctx: &CheckCtx,
-    ) -> Option<TestResult> {
-        if ctx.base_name == BaseName::Fmin
-            && input.0.biteq(f64::NEG_ZERO)
-            && input.1.biteq(f64::ZERO)
-            && expected.biteq(F::ZERO)
-            && actual.biteq(F::NEG_ZERO)
-        {
-            return XFAIL;
-        }
-
+    ) -> CheckAction {
         binop_common(input, actual, expected, ctx)
     }
 
@@ -444,33 +434,19 @@ impl MaybeOverride<(f64, f64)> for SpecialCase {
         actual: I,
         expected: I,
         ctx: &CheckCtx,
-    ) -> Option<TestResult> {
+    ) -> CheckAction {
         remquo_common(actual, expected, ctx)
     }
 }
 
-fn remquo_common<I: Int>(actual: I, expected: I, ctx: &CheckCtx) -> Option<TestResult> {
-    // FIXME: Our MPFR implementation disagrees with musl and may need to be updated.
-    if ctx.basis == CheckBasis::Mpfr
-        && ctx.base_name == BaseName::Remquo
-        && expected == I::MIN
-        && actual == I::ZERO
-    {
-        return XFAIL;
-    }
-
-    None
-}
-
 #[cfg(f128_enabled)]
 impl MaybeOverride<(f128, f128)> for SpecialCase {
     fn check_float<F: Float>(
         input: (f128, f128),
         actual: F,
         expected: F,
-        _ulp: &mut u32,
         ctx: &CheckCtx,
-    ) -> Option<TestResult> {
+    ) -> CheckAction {
         binop_common(input, actual, expected, ctx)
     }
 }
@@ -481,8 +457,17 @@ fn binop_common<F1: Float, F2: Float>(
     actual: F2,
     expected: F2,
     ctx: &CheckCtx,
-) -> Option<TestResult> {
-    /* FIXME(#439): we do not compare signed zeros */
+) -> CheckAction {
+    // MPFR only has one NaN bitpattern; allow the default `.is_nan()` checks to validate. Skip if
+    // the first input (magnitude source) is NaN and the output is also a NaN, or if the second
+    // input (sign source) is NaN.
+    if ctx.basis == CheckBasis::Mpfr
+        && ((input.0.is_nan() && actual.is_nan() && expected.is_nan()) || input.1.is_nan())
+    {
+        return SKIP;
+    }
+
+    /* FIXME(#439): our fmin and fmax do not compare signed zeros */
 
     if ctx.base_name == BaseName::Fmin
         && input.0.biteq(F1::NEG_ZERO)
@@ -490,7 +475,7 @@ fn binop_common<F1: Float, F2: Float>(
         && expected.biteq(F2::NEG_ZERO)
         && actual.biteq(F2::ZERO)
     {
-        return XFAIL;
+        return XFAIL("fmin signed zeroes");
     }
 
     if ctx.base_name == BaseName::Fmax
@@ -499,21 +484,32 @@ fn binop_common<F1: Float, F2: Float>(
         && expected.biteq(F2::ZERO)
         && actual.biteq(F2::NEG_ZERO)
     {
-        return XFAIL;
+        return XFAIL("fmax signed zeroes");
     }
 
     // Musl propagates NaNs if one is provided as the input, but we return the other input.
-    match (&ctx.basis, ctx.base_name) {
-        (Musl, BaseName::Fmin | BaseName::Fmax)
-            if (input.0.is_nan() || input.1.is_nan()) && expected.is_nan() =>
-        {
-            XFAIL
-        }
+    if (ctx.base_name == BaseName::Fmax || ctx.base_name == BaseName::Fmin)
+        && ctx.basis == Musl
+        && (input.0.is_nan() ^ input.1.is_nan())
+        && expected.is_nan()
+    {
+        return XFAIL("fmax/fmin musl NaN");
+    }
 
-        (Mpfr, BaseName::Copysign) if input.1.is_nan() => SKIP,
+    DEFAULT
+}
 
-        _ => None,
+fn remquo_common<I: Int>(actual: I, expected: I, ctx: &CheckCtx) -> CheckAction {
+    // FIXME: Our MPFR implementation disagrees with musl and may need to be updated.
+    if ctx.basis == CheckBasis::Mpfr
+        && ctx.base_name == BaseName::Remquo
+        && expected == I::MIN
+        && actual == I::ZERO
+    {
+        return XFAIL("remquo integer mismatch");
     }
+
+    DEFAULT
 }
 
 impl MaybeOverride<(i32, f32)> for SpecialCase {
@@ -521,28 +517,19 @@ impl MaybeOverride<(i32, f32)> for SpecialCase {
         input: (i32, f32),
         actual: F,
         expected: F,
-        ulp: &mut u32,
         ctx: &CheckCtx,
-    ) -> Option<TestResult> {
-        match (&ctx.basis, ctx.base_name) {
-            (Musl, _) => bessel_prec_dropoff(input, actual, expected, ulp, ctx),
-
-            // We return +0.0, MPFR returns -0.0
-            (Mpfr, BaseName::Jn | BaseName::Yn)
-                if input.1 == f32::NEG_INFINITY && actual == F::ZERO && expected == F::ZERO =>
-            {
-                XFAIL
-            }
-
-            // `ynf(213, 109.15641) = -inf` with our library, should be finite.
-            (_, BaseName::Yn)
-                if input.0 > 200 && !expected.is_infinite() && actual.is_infinite() =>
-            {
-                XFAIL
-            }
-
-            _ => None,
+    ) -> CheckAction {
+        // `ynf(213, 109.15641) = -inf` with our library, should be finite.
+        if ctx.basis == Mpfr
+            && ctx.base_name == BaseName::Yn
+            && input.0 > 200
+            && !expected.is_infinite()
+            && actual.is_infinite()
+        {
+            return XFAIL("ynf infinity mismatch");
         }
+
+        int_float_common(input, actual, expected, ctx)
     }
 }
 
@@ -551,55 +538,51 @@ impl MaybeOverride<(i32, f64)> for SpecialCase {
         input: (i32, f64),
         actual: F,
         expected: F,
-        ulp: &mut u32,
         ctx: &CheckCtx,
-    ) -> Option<TestResult> {
-        match (&ctx.basis, ctx.base_name) {
-            (Musl, _) => bessel_prec_dropoff(input, actual, expected, ulp, ctx),
-
-            // We return +0.0, MPFR returns -0.0
-            (Mpfr, BaseName::Jn | BaseName::Yn)
-                if input.1 == f64::NEG_INFINITY && actual == F::ZERO && expected == F::ZERO =>
-            {
-                XFAIL
-            }
-
-            _ => None,
-        }
+    ) -> CheckAction {
+        int_float_common(input, actual, expected, ctx)
     }
 }
 
-/// Our bessel functions blow up with large N values
-fn bessel_prec_dropoff<F1: Float, F2: Float>(
+fn int_float_common<F1: Float, F2: Float>(
     input: (i32, F1),
     actual: F2,
     expected: F2,
-    ulp: &mut u32,
     ctx: &CheckCtx,
-) -> Option<TestResult> {
-    if ctx.base_name == BaseName::Jn || ctx.base_name == BaseName::Yn {
+) -> CheckAction {
+    if ctx.basis == Mpfr
+        && (ctx.base_name == BaseName::Jn || ctx.base_name == BaseName::Yn)
+        && input.1 == F1::NEG_INFINITY
+        && actual == F2::ZERO
+        && expected == F2::ZERO
+    {
+        return XFAIL("mpfr b");
+    }
+
+    // Our bessel functions blow up with large N values
+    if ctx.basis == Musl && (ctx.base_name == BaseName::Jn || ctx.base_name == BaseName::Yn) {
         if input.0 > 4000 {
-            return XFAIL;
+            return XFAIL_NOCHECK;
         } else if input.0 > 2000 {
-            // *ulp = 20_000;
-            *ulp = 20000;
+            return CheckAction::AssertWithUlp(20_000);
         } else if input.0 > 1000 {
-            *ulp = 4000;
+            return CheckAction::AssertWithUlp(4_000);
         }
     }
 
     // Values near infinity sometimes get cut off for us. `ynf(681, 509.90924) = -inf` but should
     // be -3.2161271e38.
-    if ctx.fn_ident == Identifier::Ynf
+    if ctx.basis == Musl
+        && ctx.fn_ident == Identifier::Ynf
         && !expected.is_infinite()
         && actual.is_infinite()
         && (expected.abs().to_bits().abs_diff(actual.abs().to_bits())
             < F2::Int::cast_from(1_000_000u32))
     {
-        return XFAIL;
+        return XFAIL_NOCHECK;
     }
 
-    None
+    DEFAULT
 }
 
 impl MaybeOverride<(f32, i32)> for SpecialCase {}
@@ -610,9 +593,8 @@ impl MaybeOverride<(f32, f32, f32)> for SpecialCase {
         input: (f32, f32, f32),
         actual: F,
         expected: F,
-        _ulp: &mut u32,
         ctx: &CheckCtx,
-    ) -> Option<TestResult> {
+    ) -> CheckAction {
         ternop_common(input, actual, expected, ctx)
     }
 }
@@ -621,9 +603,8 @@ impl MaybeOverride<(f64, f64, f64)> for SpecialCase {
         input: (f64, f64, f64),
         actual: F,
         expected: F,
-        _ulp: &mut u32,
         ctx: &CheckCtx,
-    ) -> Option<TestResult> {
+    ) -> CheckAction {
         ternop_common(input, actual, expected, ctx)
     }
 }
@@ -634,7 +615,7 @@ fn ternop_common<F1: Float, F2: Float>(
     actual: F2,
     expected: F2,
     ctx: &CheckCtx,
-) -> Option<TestResult> {
+) -> CheckAction {
     // FIXME(fma): 754-2020 says "When the exact result of (a × b) + c is non-zero yet the result
     // of fusedMultiplyAdd is zero because of rounding, the zero result takes the sign of the
     // exact result". Our implementation returns the wrong sign:
@@ -647,8 +628,8 @@ fn ternop_common<F1: Float, F2: Float>(
         && expected.biteq(F2::NEG_ZERO)
         && actual.biteq(F2::ZERO)
     {
-        return XFAIL;
+        return XFAIL("fma sign");
     }
 
-    None
+    DEFAULT
 }
diff --git a/crates/libm-test/src/test_traits.rs b/crates/libm-test/src/test_traits.rs
index 0a4baa2e3..a5806943e 100644
--- a/crates/libm-test/src/test_traits.rs
+++ b/crates/libm-test/src/test_traits.rs
@@ -8,8 +8,9 @@
 
 use std::fmt;
 
-use anyhow::{Context, bail, ensure};
+use anyhow::{Context, anyhow, bail, ensure};
 
+use crate::precision::CheckAction;
 use crate::{CheckCtx, Float, Int, MaybeOverride, SpecialCase, TestResult};
 
 /// Trait for calling a function with a tuple as arguments.
@@ -185,20 +186,34 @@ where
     Input: Hex + fmt::Debug,
     SpecialCase: MaybeOverride<Input>,
 {
-    if let Some(res) = SpecialCase::check_int(input, actual, expected, ctx) {
-        return res;
-    }
+    let (result, xfail_msg) = match SpecialCase::check_int(input, actual, expected, ctx) {
+        CheckAction::AssertSuccess => (actual == expected, None),
+        CheckAction::AssertFailure(msg) => (actual != expected, Some(msg)),
+        CheckAction::Custom(res) => return res,
+        CheckAction::Skip => return Ok(()),
+        CheckAction::AssertWithUlp(_) => panic!("ulp has no meaning for integer checks"),
+    };
+
+    let make_xfail_msg = || match xfail_msg {
+        Some(m) => format!(
+            "expected failure but test passed. Does an XFAIL need to be updated?\n\
+            failed at: {m}",
+        ),
+        None => String::new(),
+    };
 
     anyhow::ensure!(
-        actual == expected,
+        result,
         "\
         \n    input:    {input:?} {ibits}\
         \n    expected: {expected:<22?} {expbits}\
         \n    actual:   {actual:<22?} {actbits}\
+        \n    {msg}\
         ",
         actbits = actual.hex(),
         expbits = expected.hex(),
         ibits = input.hex(),
+        msg = make_xfail_msg()
     );
 
     Ok(())
@@ -246,15 +261,19 @@ where
     u32: TryFrom<F::SignedInt, Error: fmt::Debug>,
     SpecialCase: MaybeOverride<Input>,
 {
+    let mut assert_failure_msg = None;
+
     // Create a wrapper function so we only need to `.with_context` once.
-    let inner = || -> TestResult {
+    let mut inner = || -> TestResult {
         let mut allowed_ulp = ctx.ulp;
 
-        // If the tested function requires a nonstandard test, run it here.
-        if let Some(res) = SpecialCase::check_float(input, actual, expected, &mut allowed_ulp, ctx)
-        {
-            return res;
-        }
+        match SpecialCase::check_float(input, actual, expected, ctx) {
+            CheckAction::AssertSuccess => (),
+            CheckAction::AssertFailure(msg) => assert_failure_msg = Some(msg),
+            CheckAction::Custom(res) => return res,
+            CheckAction::Skip => return Ok(()),
+            CheckAction::AssertWithUlp(ulp_override) => allowed_ulp = ulp_override,
+        };
 
         // Check when both are NaNs
         if actual.is_nan() && expected.is_nan() {
@@ -280,14 +299,29 @@ where
         let ulp_diff = act_bits.checked_sub(exp_bits).unwrap().abs();
 
         let ulp_u32 = u32::try_from(ulp_diff)
-            .map_err(|e| anyhow::anyhow!("{e:?}: ulp of {ulp_diff} exceeds u32::MAX"))?;
+            .map_err(|e| anyhow!("{e:?}: ulp of {ulp_diff} exceeds u32::MAX"))?;
 
         ensure!(ulp_u32 <= allowed_ulp, "ulp {ulp_diff} > {allowed_ulp}",);
 
         Ok(())
     };
 
-    inner().with_context(|| {
+    let mut res = inner();
+
+    if let Some(msg) = assert_failure_msg {
+        // Invert `Ok` and `Err` if the test is an xfail.
+        if res.is_ok() {
+            let e = anyhow!(
+                "expected failure but test passed. Does an XFAIL need to be updated?\n\
+                failed at: {msg}",
+            );
+            res = Err(e)
+        } else {
+            res = Ok(())
+        }
+    }
+
+    res.with_context(|| {
         format!(
             "\
             \n    input:    {input:?} {ibits}\

From 459dd80e3557b910be4c87f44d2ce051bb744b20 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Thu, 23 Jan 2025 09:15:43 +0000
Subject: [PATCH 174/279] Change `from_parts` to take a `u32` exponent rather
 than `i32`

Make things more consistent with other API that works with a bitwise
representation of the exponent. That is, use `u32` when working with a
bitwise (biased) representation, use `i32` when the bitwise
representation has been adjusted for bias and ay be negative.

Every place this has been used so far has an `as i32`, so this change
makes things cleaner anyway.
---
 src/math/generic/sqrt.rs         |  2 +-
 src/math/support/float_traits.rs | 16 ++++++++--------
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/src/math/generic/sqrt.rs b/src/math/generic/sqrt.rs
index 22ee93f57..c20c0f205 100644
--- a/src/math/generic/sqrt.rs
+++ b/src/math/generic/sqrt.rs
@@ -92,7 +92,7 @@ where
         }
 
         // Normalize subnormals by multiplying by 1.0 << SIG_BITS (e.g. 0x1p52 for doubles).
-        let scaled = x * F::from_parts(false, (F::SIG_BITS + F::EXP_BIAS) as i32, zero);
+        let scaled = x * F::from_parts(false, F::SIG_BITS + F::EXP_BIAS, zero);
         ix = scaled.to_bits();
         match top {
             Exp::Shifted(ref mut v) => {
diff --git a/src/math/support/float_traits.rs b/src/math/support/float_traits.rs
index 1abb7c4de..57e4aebec 100644
--- a/src/math/support/float_traits.rs
+++ b/src/math/support/float_traits.rs
@@ -131,11 +131,11 @@ pub trait Float:
     fn from_bits(a: Self::Int) -> Self;
 
     /// Constructs a `Self` from its parts. Inputs are treated as bits and shifted into position.
-    fn from_parts(negative: bool, exponent: i32, significand: Self::Int) -> Self {
+    fn from_parts(negative: bool, exponent: u32, significand: Self::Int) -> Self {
         let sign = if negative { Self::Int::ONE } else { Self::Int::ZERO };
         Self::from_bits(
             (sign << (Self::BITS - 1))
-                | (Self::Int::cast_from(exponent as u32 & Self::EXP_MAX) << Self::SIG_BITS)
+                | (Self::Int::cast_from(exponent & Self::EXP_MAX) << Self::SIG_BITS)
                 | (significand & Self::SIG_MASK),
         )
     }
@@ -282,7 +282,7 @@ mod tests {
         assert_eq!(f16::from_bits(0x1).exp_unbiased(), -15);
 
         // `from_parts`
-        assert_biteq!(f16::from_parts(true, f16::EXP_BIAS as i32, 0), -1.0f16);
+        assert_biteq!(f16::from_parts(true, f16::EXP_BIAS, 0), -1.0f16);
         assert_biteq!(f16::from_parts(false, 0, 1), f16::from_bits(0x1));
     }
 
@@ -304,8 +304,8 @@ mod tests {
         assert_eq!(f32::from_bits(0x1).exp_unbiased(), -127);
 
         // `from_parts`
-        assert_biteq!(f32::from_parts(true, f32::EXP_BIAS as i32, 0), -1.0f32);
-        assert_biteq!(f32::from_parts(false, 10 + f32::EXP_BIAS as i32, 0), hf32!("0x1p10"));
+        assert_biteq!(f32::from_parts(true, f32::EXP_BIAS, 0), -1.0f32);
+        assert_biteq!(f32::from_parts(false, 10 + f32::EXP_BIAS, 0), hf32!("0x1p10"));
         assert_biteq!(f32::from_parts(false, 0, 1), f32::from_bits(0x1));
     }
 
@@ -327,8 +327,8 @@ mod tests {
         assert_eq!(f64::from_bits(0x1).exp_unbiased(), -1023);
 
         // `from_parts`
-        assert_biteq!(f64::from_parts(true, f64::EXP_BIAS as i32, 0), -1.0f64);
-        assert_biteq!(f64::from_parts(false, 10 + f64::EXP_BIAS as i32, 0), hf64!("0x1p10"));
+        assert_biteq!(f64::from_parts(true, f64::EXP_BIAS, 0), -1.0f64);
+        assert_biteq!(f64::from_parts(false, 10 + f64::EXP_BIAS, 0), hf64!("0x1p10"));
         assert_biteq!(f64::from_parts(false, 0, 1), f64::from_bits(0x1));
     }
 
@@ -351,7 +351,7 @@ mod tests {
         assert_eq!(f128::from_bits(0x1).exp_unbiased(), -16383);
 
         // `from_parts`
-        assert_biteq!(f128::from_parts(true, f128::EXP_BIAS as i32, 0), -1.0f128);
+        assert_biteq!(f128::from_parts(true, f128::EXP_BIAS, 0), -1.0f128);
         assert_biteq!(f128::from_parts(false, 0, 1), f128::from_bits(0x1));
     }
 }

From 220c8e5745f3f60c0191a9ee7a2598fc24814ea6 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Thu, 23 Jan 2025 22:02:22 +0000
Subject: [PATCH 175/279] Add a generic version of `scalbn`

This replaces the `f32` and `f64` versions of `scalbn` and `ldexp`.
---
 crates/libm-test/src/mpfloat.rs |   3 +-
 etc/function-definitions.json   |   2 +
 src/math/generic/mod.rs         |   2 +
 src/math/generic/scalbn.rs      | 123 ++++++++++++++++++++++++++++++++
 src/math/scalbn.rs              |  33 +--------
 src/math/scalbnf.rs             |  29 +-------
 6 files changed, 133 insertions(+), 59 deletions(-)
 create mode 100644 src/math/generic/scalbn.rs

diff --git a/crates/libm-test/src/mpfloat.rs b/crates/libm-test/src/mpfloat.rs
index a404f227b..4ac70c2eb 100644
--- a/crates/libm-test/src/mpfloat.rs
+++ b/crates/libm-test/src/mpfloat.rs
@@ -158,7 +158,8 @@ libm_macros::for_each_function! {
         ilogbf,
         jn,
         jnf,
-        ldexp,ldexpf,
+        ldexp,
+        ldexpf,
         lgamma_r,
         lgammaf_r,
         modf,
diff --git a/etc/function-definitions.json b/etc/function-definitions.json
index d3810b940..bbb2b40f1 100644
--- a/etc/function-definitions.json
+++ b/etc/function-definitions.json
@@ -698,12 +698,14 @@
     "scalbn": {
         "sources": [
             "src/libm_helper.rs",
+            "src/math/generic/scalbn.rs",
             "src/math/scalbn.rs"
         ],
         "type": "f64"
     },
     "scalbnf": {
         "sources": [
+            "src/math/generic/scalbn.rs",
             "src/math/scalbnf.rs"
         ],
         "type": "f32"
diff --git a/src/math/generic/mod.rs b/src/math/generic/mod.rs
index d3df650e1..c7741cb46 100644
--- a/src/math/generic/mod.rs
+++ b/src/math/generic/mod.rs
@@ -4,6 +4,7 @@ mod fabs;
 mod fdim;
 mod floor;
 mod rint;
+mod scalbn;
 mod sqrt;
 mod trunc;
 
@@ -13,5 +14,6 @@ pub use fabs::fabs;
 pub use fdim::fdim;
 pub use floor::floor;
 pub use rint::rint;
+pub use scalbn::scalbn;
 pub use sqrt::sqrt;
 pub use trunc::trunc;
diff --git a/src/math/generic/scalbn.rs b/src/math/generic/scalbn.rs
new file mode 100644
index 000000000..f036c15cc
--- /dev/null
+++ b/src/math/generic/scalbn.rs
@@ -0,0 +1,123 @@
+use super::super::{CastFrom, CastInto, Float, IntTy, MinInt};
+
+/// Scale the exponent.
+///
+/// From N3220:
+///
+/// > The scalbn and scalbln functions compute `x * b^n`, where `b = FLT_RADIX` if the return type
+/// > of the function is a standard floating type, or `b = 10` if the return type of the function
+/// > is a decimal floating type. A range error occurs for some finite x, depending on n.
+/// >
+/// > [...]
+/// >
+/// > * `scalbn(±0, n)` returns `±0`.
+/// > * `scalbn(x, 0)` returns `x`.
+/// > * `scalbn(±∞, n)` returns `±∞`.
+/// >
+/// > If the calculation does not overflow or underflow, the returned value is exact and
+/// > independent of the current rounding direction mode.
+pub fn scalbn<F: Float>(mut x: F, mut n: i32) -> F
+where
+    u32: CastInto<F::Int>,
+    F::Int: CastFrom<i32>,
+    F::Int: CastFrom<u32>,
+{
+    let zero = IntTy::<F>::ZERO;
+
+    // Bits including the implicit bit
+    let sig_total_bits = F::SIG_BITS + 1;
+
+    // Maximum and minimum values when biased
+    let exp_max: i32 = F::EXP_BIAS as i32;
+    let exp_min = -(exp_max - 1);
+
+    // 2 ^ Emax, where Emax is the maximum biased exponent value (1023 for f64)
+    let f_exp_max = F::from_parts(false, F::EXP_BIAS << 1, zero);
+
+    // 2 ^ Emin, where Emin is the minimum biased exponent value (-1022 for f64)
+    let f_exp_min = F::from_parts(false, 1, zero);
+
+    // 2 ^ sig_total_bits, representation of what can be accounted for with subnormals
+    let f_exp_subnorm = F::from_parts(false, sig_total_bits + F::EXP_BIAS, zero);
+
+    if n > exp_max {
+        x *= f_exp_max;
+        n -= exp_max;
+        if n > exp_max {
+            x *= f_exp_max;
+            n -= exp_max;
+            if n > exp_max {
+                n = exp_max;
+            }
+        }
+    } else if n < exp_min {
+        let mul = f_exp_min * f_exp_subnorm;
+        let add = (exp_max - 1) - sig_total_bits as i32;
+
+        x *= mul;
+        n += add;
+        if n < exp_min {
+            x *= mul;
+            n += add;
+            if n < exp_min {
+                n = exp_min;
+            }
+        }
+    }
+
+    x * F::from_parts(false, (F::EXP_BIAS as i32 + n) as u32, zero)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::super::super::Int;
+    use super::*;
+
+    // Tests against N3220
+    fn spec_test<F: Float>()
+    where
+        u32: CastInto<F::Int>,
+        F::Int: CastFrom<i32>,
+        F::Int: CastFrom<u32>,
+    {
+        // `scalbn(±0, n)` returns `±0`.
+        assert_biteq!(scalbn(F::NEG_ZERO, 10), F::NEG_ZERO);
+        assert_biteq!(scalbn(F::NEG_ZERO, 0), F::NEG_ZERO);
+        assert_biteq!(scalbn(F::NEG_ZERO, -10), F::NEG_ZERO);
+        assert_biteq!(scalbn(F::ZERO, 10), F::ZERO);
+        assert_biteq!(scalbn(F::ZERO, 0), F::ZERO);
+        assert_biteq!(scalbn(F::ZERO, -10), F::ZERO);
+
+        // `scalbn(x, 0)` returns `x`.
+        assert_biteq!(scalbn(F::MIN, 0), F::MIN);
+        assert_biteq!(scalbn(F::MAX, 0), F::MAX);
+        assert_biteq!(scalbn(F::INFINITY, 0), F::INFINITY);
+        assert_biteq!(scalbn(F::NEG_INFINITY, 0), F::NEG_INFINITY);
+        assert_biteq!(scalbn(F::ZERO, 0), F::ZERO);
+        assert_biteq!(scalbn(F::NEG_ZERO, 0), F::NEG_ZERO);
+
+        // `scalbn(±∞, n)` returns `±∞`.
+        assert_biteq!(scalbn(F::INFINITY, 10), F::INFINITY);
+        assert_biteq!(scalbn(F::INFINITY, -10), F::INFINITY);
+        assert_biteq!(scalbn(F::NEG_INFINITY, 10), F::NEG_INFINITY);
+        assert_biteq!(scalbn(F::NEG_INFINITY, -10), F::NEG_INFINITY);
+
+        // NaN should remain NaNs.
+        assert!(scalbn(F::NAN, 10).is_nan());
+        assert!(scalbn(F::NAN, 0).is_nan());
+        assert!(scalbn(F::NAN, -10).is_nan());
+        assert!(scalbn(-F::NAN, 10).is_nan());
+        assert!(scalbn(-F::NAN, 0).is_nan());
+        assert!(scalbn(-F::NAN, -10).is_nan());
+    }
+
+    #[test]
+    fn spec_test_f32() {
+        spec_test::<f32>();
+    }
+
+    #[test]
+    fn spec_test_f64() {
+        spec_test::<f64>();
+    }
+}
diff --git a/src/math/scalbn.rs b/src/math/scalbn.rs
index 00c455a10..f809dad51 100644
--- a/src/math/scalbn.rs
+++ b/src/math/scalbn.rs
@@ -1,33 +1,4 @@
 #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
-pub fn scalbn(x: f64, mut n: i32) -> f64 {
-    let x1p1023 = f64::from_bits(0x7fe0000000000000); // 0x1p1023 === 2 ^ 1023
-    let x1p53 = f64::from_bits(0x4340000000000000); // 0x1p53 === 2 ^ 53
-    let x1p_1022 = f64::from_bits(0x0010000000000000); // 0x1p-1022 === 2 ^ (-1022)
-
-    let mut y = x;
-
-    if n > 1023 {
-        y *= x1p1023;
-        n -= 1023;
-        if n > 1023 {
-            y *= x1p1023;
-            n -= 1023;
-            if n > 1023 {
-                n = 1023;
-            }
-        }
-    } else if n < -1022 {
-        /* make sure final n < -53 to avoid double
-        rounding in the subnormal range */
-        y *= x1p_1022 * x1p53;
-        n += 1022 - 53;
-        if n < -1022 {
-            y *= x1p_1022 * x1p53;
-            n += 1022 - 53;
-            if n < -1022 {
-                n = -1022;
-            }
-        }
-    }
-    y * f64::from_bits(((0x3ff + n) as u64) << 52)
+pub fn scalbn(x: f64, n: i32) -> f64 {
+    super::generic::scalbn(x, n)
 }
diff --git a/src/math/scalbnf.rs b/src/math/scalbnf.rs
index 73f4bb57a..57e7ba76f 100644
--- a/src/math/scalbnf.rs
+++ b/src/math/scalbnf.rs
@@ -1,29 +1,4 @@
 #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
-pub fn scalbnf(mut x: f32, mut n: i32) -> f32 {
-    let x1p127 = f32::from_bits(0x7f000000); // 0x1p127f === 2 ^ 127
-    let x1p_126 = f32::from_bits(0x800000); // 0x1p-126f === 2 ^ -126
-    let x1p24 = f32::from_bits(0x4b800000); // 0x1p24f === 2 ^ 24
-
-    if n > 127 {
-        x *= x1p127;
-        n -= 127;
-        if n > 127 {
-            x *= x1p127;
-            n -= 127;
-            if n > 127 {
-                n = 127;
-            }
-        }
-    } else if n < -126 {
-        x *= x1p_126 * x1p24;
-        n += 126 - 24;
-        if n < -126 {
-            x *= x1p_126 * x1p24;
-            n += 126 - 24;
-            if n < -126 {
-                n = -126;
-            }
-        }
-    }
-    x * f32::from_bits(((0x7f + n) as u32) << 23)
+pub fn scalbnf(x: f32, n: i32) -> f32 {
+    super::generic::scalbn(x, n)
 }

From 9c68f74216c5b6fcd462595642674ff2768791d1 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Fri, 24 Jan 2025 01:49:23 +0000
Subject: [PATCH 176/279] Add a generic version of `round`

This replaces `round` and `roundf`.
---
 etc/function-definitions.json |  2 ++
 src/math/generic/mod.rs       |  2 ++
 src/math/generic/round.rs     | 46 +++++++++++++++++++++++++++++++++++
 src/math/round.rs             | 27 ++------------------
 src/math/roundf.rs            | 29 ++--------------------
 5 files changed, 54 insertions(+), 52 deletions(-)
 create mode 100644 src/math/generic/round.rs

diff --git a/etc/function-definitions.json b/etc/function-definitions.json
index bbb2b40f1..4aea45a07 100644
--- a/etc/function-definitions.json
+++ b/etc/function-definitions.json
@@ -685,12 +685,14 @@
     "round": {
         "sources": [
             "src/libm_helper.rs",
+            "src/math/generic/round.rs",
             "src/math/round.rs"
         ],
         "type": "f64"
     },
     "roundf": {
         "sources": [
+            "src/math/generic/round.rs",
             "src/math/roundf.rs"
         ],
         "type": "f32"
diff --git a/src/math/generic/mod.rs b/src/math/generic/mod.rs
index c7741cb46..1f557719f 100644
--- a/src/math/generic/mod.rs
+++ b/src/math/generic/mod.rs
@@ -4,6 +4,7 @@ mod fabs;
 mod fdim;
 mod floor;
 mod rint;
+mod round;
 mod scalbn;
 mod sqrt;
 mod trunc;
@@ -14,6 +15,7 @@ pub use fabs::fabs;
 pub use fdim::fdim;
 pub use floor::floor;
 pub use rint::rint;
+pub use round::round;
 pub use scalbn::scalbn;
 pub use sqrt::sqrt;
 pub use trunc::trunc;
diff --git a/src/math/generic/round.rs b/src/math/generic/round.rs
new file mode 100644
index 000000000..fc9a1b675
--- /dev/null
+++ b/src/math/generic/round.rs
@@ -0,0 +1,46 @@
+use super::super::{Float, MinInt};
+use super::{copysign, trunc};
+
+pub fn round<F: Float>(x: F) -> F {
+    let f0p5 = F::from_parts(false, F::EXP_BIAS - 1, F::Int::ZERO); // 0.5
+    let f0p25 = F::from_parts(false, F::EXP_BIAS - 2, F::Int::ZERO); // 0.25
+
+    trunc(x + copysign(f0p5 - f0p25 * F::EPSILON, x))
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn zeroes_f32() {
+        assert_biteq!(round(0.0_f32), 0.0_f32);
+        assert_biteq!(round(-0.0_f32), -0.0_f32);
+    }
+
+    #[test]
+    fn sanity_check_f32() {
+        assert_eq!(round(-1.0_f32), -1.0);
+        assert_eq!(round(2.8_f32), 3.0);
+        assert_eq!(round(-0.5_f32), -1.0);
+        assert_eq!(round(0.5_f32), 1.0);
+        assert_eq!(round(-1.5_f32), -2.0);
+        assert_eq!(round(1.5_f32), 2.0);
+    }
+
+    #[test]
+    fn zeroes_f64() {
+        assert_biteq!(round(0.0_f64), 0.0_f64);
+        assert_biteq!(round(-0.0_f64), -0.0_f64);
+    }
+
+    #[test]
+    fn sanity_check_f64() {
+        assert_eq!(round(-1.0_f64), -1.0);
+        assert_eq!(round(2.8_f64), 3.0);
+        assert_eq!(round(-0.5_f64), -1.0);
+        assert_eq!(round(0.5_f64), 1.0);
+        assert_eq!(round(-1.5_f64), -2.0);
+        assert_eq!(round(1.5_f64), 2.0);
+    }
+}
diff --git a/src/math/round.rs b/src/math/round.rs
index b81ebaa1d..36e0eb1f2 100644
--- a/src/math/round.rs
+++ b/src/math/round.rs
@@ -1,28 +1,5 @@
-use core::f64;
-
-use super::{copysign, trunc};
-
+/// Round `x` to the nearest integer, breaking ties away from zero.
 #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
 pub fn round(x: f64) -> f64 {
-    trunc(x + copysign(0.5 - 0.25 * f64::EPSILON, x))
-}
-
-#[cfg(test)]
-mod tests {
-    use super::round;
-
-    #[test]
-    fn negative_zero() {
-        assert_eq!(round(-0.0_f64).to_bits(), (-0.0_f64).to_bits());
-    }
-
-    #[test]
-    fn sanity_check() {
-        assert_eq!(round(-1.0), -1.0);
-        assert_eq!(round(2.8), 3.0);
-        assert_eq!(round(-0.5), -1.0);
-        assert_eq!(round(0.5), 1.0);
-        assert_eq!(round(-1.5), -2.0);
-        assert_eq!(round(1.5), 2.0);
-    }
+    super::generic::round(x)
 }
diff --git a/src/math/roundf.rs b/src/math/roundf.rs
index fb974bbfe..b5d7c9d69 100644
--- a/src/math/roundf.rs
+++ b/src/math/roundf.rs
@@ -1,30 +1,5 @@
-use core::f32;
-
-use super::{copysignf, truncf};
-
+/// Round `x` to the nearest integer, breaking ties away from zero.
 #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
 pub fn roundf(x: f32) -> f32 {
-    truncf(x + copysignf(0.5 - 0.25 * f32::EPSILON, x))
-}
-
-// PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520
-#[cfg(not(target_arch = "powerpc64"))]
-#[cfg(test)]
-mod tests {
-    use super::roundf;
-
-    #[test]
-    fn negative_zero() {
-        assert_eq!(roundf(-0.0_f32).to_bits(), (-0.0_f32).to_bits());
-    }
-
-    #[test]
-    fn sanity_check() {
-        assert_eq!(roundf(-1.0), -1.0);
-        assert_eq!(roundf(2.8), 3.0);
-        assert_eq!(roundf(-0.5), -1.0);
-        assert_eq!(roundf(0.5), 1.0);
-        assert_eq!(roundf(-1.5), -2.0);
-        assert_eq!(roundf(1.5), 2.0);
-    }
+    super::generic::round(x)
 }

From e2ac6efaa71a4d5b971c675148a6dd96cc50c39c Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Fri, 24 Jan 2025 01:57:12 +0000
Subject: [PATCH 177/279] Add `roundf16` and `roundf128`

---
 crates/libm-macros/src/shared.rs             |  4 +--
 crates/libm-test/benches/random.rs           |  2 ++
 crates/libm-test/src/mpfloat.rs              |  4 +++
 crates/libm-test/tests/compare_built_musl.rs |  2 ++
 crates/util/src/main.rs                      |  2 ++
 etc/function-definitions.json                | 14 ++++++++
 etc/function-list.txt                        |  2 ++
 src/math/generic/round.rs                    | 36 ++++++++++++++++++++
 src/math/mod.rs                              |  4 +++
 src/math/roundf128.rs                        |  5 +++
 src/math/roundf16.rs                         |  5 +++
 11 files changed, 78 insertions(+), 2 deletions(-)
 create mode 100644 src/math/roundf128.rs
 create mode 100644 src/math/roundf16.rs

diff --git a/crates/libm-macros/src/shared.rs b/crates/libm-macros/src/shared.rs
index 80bd3e907..b233e34f1 100644
--- a/crates/libm-macros/src/shared.rs
+++ b/crates/libm-macros/src/shared.rs
@@ -9,7 +9,7 @@ const ALL_OPERATIONS_NESTED: &[(FloatTy, Signature, Option<Signature>, &[&str])]
         FloatTy::F16,
         Signature { args: &[Ty::F16], returns: &[Ty::F16] },
         None,
-        &["ceilf16", "fabsf16", "floorf16", "rintf16", "sqrtf16", "truncf16"],
+        &["ceilf16", "fabsf16", "floorf16", "rintf16", "roundf16", "sqrtf16", "truncf16"],
     ),
     (
         // `fn(f32) -> f32`
@@ -40,7 +40,7 @@ const ALL_OPERATIONS_NESTED: &[(FloatTy, Signature, Option<Signature>, &[&str])]
         FloatTy::F128,
         Signature { args: &[Ty::F128], returns: &[Ty::F128] },
         None,
-        &["ceilf128", "fabsf128", "floorf128", "rintf128", "sqrtf128", "truncf128"],
+        &["ceilf128", "fabsf128", "floorf128", "rintf128", "roundf128", "sqrtf128", "truncf128"],
     ),
     (
         // `(f16, f16) -> f16`
diff --git a/crates/libm-test/benches/random.rs b/crates/libm-test/benches/random.rs
index 4d050e817..d0ecd851e 100644
--- a/crates/libm-test/benches/random.rs
+++ b/crates/libm-test/benches/random.rs
@@ -129,6 +129,8 @@ libm_macros::for_each_function! {
         | floorf16
         | rintf128
         | rintf16
+        | roundf128
+        | roundf16
         | sqrtf128
         | sqrtf16
         | truncf128
diff --git a/crates/libm-test/src/mpfloat.rs b/crates/libm-test/src/mpfloat.rs
index 4ac70c2eb..4422ab88d 100644
--- a/crates/libm-test/src/mpfloat.rs
+++ b/crates/libm-test/src/mpfloat.rs
@@ -175,6 +175,8 @@ libm_macros::for_each_function! {
         rintf16,
         round,
         roundf,
+        roundf128,
+        roundf16,
         scalbn,
         scalbnf,
         sincos,sincosf,
@@ -247,6 +249,7 @@ impl_no_round! {
     fabsf16 => abs_mut;
     floorf16 => floor_mut;
     rintf16 => round_even_mut; // FIXME: respect rounding mode
+    roundf16 => round_mut;
     truncf16 => trunc_mut;
 }
 
@@ -256,6 +259,7 @@ impl_no_round! {
     fabsf128 => abs_mut;
     floorf128 => floor_mut;
     rintf128 => round_even_mut; // FIXME: respect rounding mode
+    roundf128 => round_mut;
     truncf128 => trunc_mut;
 }
 
diff --git a/crates/libm-test/tests/compare_built_musl.rs b/crates/libm-test/tests/compare_built_musl.rs
index f009816c9..0fc1b0df1 100644
--- a/crates/libm-test/tests/compare_built_musl.rs
+++ b/crates/libm-test/tests/compare_built_musl.rs
@@ -91,6 +91,8 @@ libm_macros::for_each_function! {
         floorf16,
         rintf128,
         rintf16,
+        roundf128,
+        roundf16,
         sqrtf128,
         sqrtf16,
         truncf128,
diff --git a/crates/util/src/main.rs b/crates/util/src/main.rs
index 889823d2e..aaedda6d1 100644
--- a/crates/util/src/main.rs
+++ b/crates/util/src/main.rs
@@ -98,6 +98,8 @@ fn do_eval(basis: &str, op: &str, inputs: &[&str]) {
             | floorf16
             | rintf128
             | rintf16
+            | roundf128
+            | roundf16
             | sqrtf128
             | sqrtf16
             | truncf128
diff --git a/etc/function-definitions.json b/etc/function-definitions.json
index 4aea45a07..8c5903e93 100644
--- a/etc/function-definitions.json
+++ b/etc/function-definitions.json
@@ -697,6 +697,20 @@
         ],
         "type": "f32"
     },
+    "roundf128": {
+        "sources": [
+            "src/math/generic/round.rs",
+            "src/math/roundf128.rs"
+        ],
+        "type": "f128"
+    },
+    "roundf16": {
+        "sources": [
+            "src/math/generic/round.rs",
+            "src/math/roundf16.rs"
+        ],
+        "type": "f16"
+    },
     "scalbn": {
         "sources": [
             "src/libm_helper.rs",
diff --git a/etc/function-list.txt b/etc/function-list.txt
index 41bb4e06b..0b6eed828 100644
--- a/etc/function-list.txt
+++ b/etc/function-list.txt
@@ -101,6 +101,8 @@ rintf128
 rintf16
 round
 roundf
+roundf128
+roundf16
 scalbn
 scalbnf
 sin
diff --git a/src/math/generic/round.rs b/src/math/generic/round.rs
index fc9a1b675..8b5138188 100644
--- a/src/math/generic/round.rs
+++ b/src/math/generic/round.rs
@@ -12,6 +12,24 @@ pub fn round<F: Float>(x: F) -> F {
 mod tests {
     use super::*;
 
+    #[test]
+    #[cfg(f16_enabled)]
+    fn zeroes_f16() {
+        assert_biteq!(round(0.0_f16), 0.0_f16);
+        assert_biteq!(round(-0.0_f16), -0.0_f16);
+    }
+
+    #[test]
+    #[cfg(f16_enabled)]
+    fn sanity_check_f16() {
+        assert_eq!(round(-1.0_f16), -1.0);
+        assert_eq!(round(2.8_f16), 3.0);
+        assert_eq!(round(-0.5_f16), -1.0);
+        assert_eq!(round(0.5_f16), 1.0);
+        assert_eq!(round(-1.5_f16), -2.0);
+        assert_eq!(round(1.5_f16), 2.0);
+    }
+
     #[test]
     fn zeroes_f32() {
         assert_biteq!(round(0.0_f32), 0.0_f32);
@@ -43,4 +61,22 @@ mod tests {
         assert_eq!(round(-1.5_f64), -2.0);
         assert_eq!(round(1.5_f64), 2.0);
     }
+
+    #[test]
+    #[cfg(f128_enabled)]
+    fn zeroes_f128() {
+        assert_biteq!(round(0.0_f128), 0.0_f128);
+        assert_biteq!(round(-0.0_f128), -0.0_f128);
+    }
+
+    #[test]
+    #[cfg(f128_enabled)]
+    fn sanity_check_f128() {
+        assert_eq!(round(-1.0_f128), -1.0);
+        assert_eq!(round(2.8_f128), 3.0);
+        assert_eq!(round(-0.5_f128), -1.0);
+        assert_eq!(round(0.5_f128), 1.0);
+        assert_eq!(round(-1.5_f128), -2.0);
+        assert_eq!(round(1.5_f128), 2.0);
+    }
 }
diff --git a/src/math/mod.rs b/src/math/mod.rs
index 53d06974c..8db17a02d 100644
--- a/src/math/mod.rs
+++ b/src/math/mod.rs
@@ -347,6 +347,7 @@ cfg_if! {
         mod fdimf16;
         mod floorf16;
         mod rintf16;
+        mod roundf16;
         mod sqrtf16;
         mod truncf16;
 
@@ -356,6 +357,7 @@ cfg_if! {
         pub use self::fdimf16::fdimf16;
         pub use self::floorf16::floorf16;
         pub use self::rintf16::rintf16;
+        pub use self::roundf16::roundf16;
         pub use self::sqrtf16::sqrtf16;
         pub use self::truncf16::truncf16;
     }
@@ -369,6 +371,7 @@ cfg_if! {
         mod fdimf128;
         mod floorf128;
         mod rintf128;
+        mod roundf128;
         mod sqrtf128;
         mod truncf128;
 
@@ -378,6 +381,7 @@ cfg_if! {
         pub use self::fdimf128::fdimf128;
         pub use self::floorf128::floorf128;
         pub use self::rintf128::rintf128;
+        pub use self::roundf128::roundf128;
         pub use self::sqrtf128::sqrtf128;
         pub use self::truncf128::truncf128;
     }
diff --git a/src/math/roundf128.rs b/src/math/roundf128.rs
new file mode 100644
index 000000000..fc3164929
--- /dev/null
+++ b/src/math/roundf128.rs
@@ -0,0 +1,5 @@
+/// Round `x` to the nearest integer, breaking ties away from zero.
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn roundf128(x: f128) -> f128 {
+    super::generic::round(x)
+}
diff --git a/src/math/roundf16.rs b/src/math/roundf16.rs
new file mode 100644
index 000000000..8b356eaab
--- /dev/null
+++ b/src/math/roundf16.rs
@@ -0,0 +1,5 @@
+/// Round `x` to the nearest integer, breaking ties away from zero.
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn roundf16(x: f16) -> f16 {
+    super::generic::round(x)
+}

From 581db99592b02892e4994305c830cf1464561cf4 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Fri, 24 Jan 2025 01:57:41 +0000
Subject: [PATCH 178/279] Remove an outdated note about precision

---
 crates/libm-test/src/precision.rs | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/crates/libm-test/src/precision.rs b/crates/libm-test/src/precision.rs
index 800425f12..bed615882 100644
--- a/crates/libm-test/src/precision.rs
+++ b/crates/libm-test/src/precision.rs
@@ -13,9 +13,6 @@ use crate::{BaseName, CheckBasis, CheckCtx, Float, Identifier, Int, TestResult};
 pub struct SpecialCase;
 
 /// ULP allowed to differ from the results returned by a test basis.
-///
-/// Note that these results were obtained using 400M rounds of random inputs, which
-/// is not a value used by default.
 pub fn default_ulp(ctx: &CheckCtx) -> u32 {
     // ULP compared to the infinite (MPFR) result.
     let mut ulp = match ctx.base_name {

From 97d8bfa459c7c8d77293680805d051aa3763704f Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Fri, 24 Jan 2025 02:10:52 +0000
Subject: [PATCH 179/279] Add a generic version of `fmin` and `fmax`

These can be used for `fmin`, `fminf`, `fmax`, and `fmaxf`. No changes
to the implementation are made, so [1] is not fixed.

[1]: https://github.com/rust-lang/libm/issues/439
---
 etc/function-definitions.json | 12 ++++++++----
 src/math/fmax.rs              | 11 ++---------
 src/math/fmaxf.rs             | 11 ++---------
 src/math/fmin.rs              | 11 ++---------
 src/math/fminf.rs             | 11 ++---------
 src/math/generic/fmax.rs      | 14 ++++++++++++++
 src/math/generic/fmin.rs      | 13 +++++++++++++
 src/math/generic/mod.rs       |  4 ++++
 8 files changed, 47 insertions(+), 40 deletions(-)
 create mode 100644 src/math/generic/fmax.rs
 create mode 100644 src/math/generic/fmin.rs

diff --git a/etc/function-definitions.json b/etc/function-definitions.json
index 8c5903e93..7ffe91ead 100644
--- a/etc/function-definitions.json
+++ b/etc/function-definitions.json
@@ -379,26 +379,30 @@
     "fmax": {
         "sources": [
             "src/libm_helper.rs",
-            "src/math/fmax.rs"
+            "src/math/fmax.rs",
+            "src/math/generic/fmax.rs"
         ],
         "type": "f64"
     },
     "fmaxf": {
         "sources": [
-            "src/math/fmaxf.rs"
+            "src/math/fmaxf.rs",
+            "src/math/generic/fmax.rs"
         ],
         "type": "f32"
     },
     "fmin": {
         "sources": [
             "src/libm_helper.rs",
-            "src/math/fmin.rs"
+            "src/math/fmin.rs",
+            "src/math/generic/fmin.rs"
         ],
         "type": "f64"
     },
     "fminf": {
         "sources": [
-            "src/math/fminf.rs"
+            "src/math/fminf.rs",
+            "src/math/generic/fmin.rs"
         ],
         "type": "f32"
     },
diff --git a/src/math/fmax.rs b/src/math/fmax.rs
index 93c97bc61..d5d9b513b 100644
--- a/src/math/fmax.rs
+++ b/src/math/fmax.rs
@@ -1,12 +1,5 @@
+/// Return the greater of two arguments or, if either argument is NaN, the other argument.
 #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
 pub fn fmax(x: f64, y: f64) -> f64 {
-    // IEEE754 says: maxNum(x, y) is the canonicalized number y if x < y, x if y < x, the
-    // canonicalized number if one operand is a number and the other a quiet NaN. Otherwise it
-    // is either x or y, canonicalized (this means results might differ among implementations).
-    // When either x or y is a signalingNaN, then the result is according to 6.2.
-    //
-    // Since we do not support sNaN in Rust yet, we do not need to handle them.
-    // FIXME(nagisa): due to https://bugs.llvm.org/show_bug.cgi?id=33303 we canonicalize by
-    // multiplying by 1.0. Should switch to the `canonicalize` when it works.
-    (if x.is_nan() || x < y { y } else { x }) * 1.0
+    super::generic::fmax(x, y)
 }
diff --git a/src/math/fmaxf.rs b/src/math/fmaxf.rs
index 607746647..3197d5cf2 100644
--- a/src/math/fmaxf.rs
+++ b/src/math/fmaxf.rs
@@ -1,12 +1,5 @@
+/// Return the greater of two arguments or, if either argument is NaN, the other argument.
 #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
 pub fn fmaxf(x: f32, y: f32) -> f32 {
-    // IEEE754 says: maxNum(x, y) is the canonicalized number y if x < y, x if y < x, the
-    // canonicalized number if one operand is a number and the other a quiet NaN. Otherwise it
-    // is either x or y, canonicalized (this means results might differ among implementations).
-    // When either x or y is a signalingNaN, then the result is according to 6.2.
-    //
-    // Since we do not support sNaN in Rust yet, we do not need to handle them.
-    // FIXME(nagisa): due to https://bugs.llvm.org/show_bug.cgi?id=33303 we canonicalize by
-    // multiplying by 1.0. Should switch to the `canonicalize` when it works.
-    (if x.is_nan() || x < y { y } else { x }) * 1.0
+    super::generic::fmax(x, y)
 }
diff --git a/src/math/fmin.rs b/src/math/fmin.rs
index ab1509f34..df8ff7c32 100644
--- a/src/math/fmin.rs
+++ b/src/math/fmin.rs
@@ -1,12 +1,5 @@
+/// Return the lesser of two arguments or, if either argument is NaN, the other argument.
 #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
 pub fn fmin(x: f64, y: f64) -> f64 {
-    // IEEE754 says: minNum(x, y) is the canonicalized number x if x < y, y if y < x, the
-    // canonicalized number if one operand is a number and the other a quiet NaN. Otherwise it
-    // is either x or y, canonicalized (this means results might differ among implementations).
-    // When either x or y is a signalingNaN, then the result is according to 6.2.
-    //
-    // Since we do not support sNaN in Rust yet, we do not need to handle them.
-    // FIXME(nagisa): due to https://bugs.llvm.org/show_bug.cgi?id=33303 we canonicalize by
-    // multiplying by 1.0. Should switch to the `canonicalize` when it works.
-    (if y.is_nan() || x < y { x } else { y }) * 1.0
+    super::generic::fmin(x, y)
 }
diff --git a/src/math/fminf.rs b/src/math/fminf.rs
index 0049e7117..b2cdfe89d 100644
--- a/src/math/fminf.rs
+++ b/src/math/fminf.rs
@@ -1,12 +1,5 @@
+/// Return the lesser of two arguments or, if either argument is NaN, the other argument.
 #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
 pub fn fminf(x: f32, y: f32) -> f32 {
-    // IEEE754 says: minNum(x, y) is the canonicalized number x if x < y, y if y < x, the
-    // canonicalized number if one operand is a number and the other a quiet NaN. Otherwise it
-    // is either x or y, canonicalized (this means results might differ among implementations).
-    // When either x or y is a signalingNaN, then the result is according to 6.2.
-    //
-    // Since we do not support sNaN in Rust yet, we do not need to handle them.
-    // FIXME(nagisa): due to https://bugs.llvm.org/show_bug.cgi?id=33303 we canonicalize by
-    // multiplying by 1.0. Should switch to the `canonicalize` when it works.
-    (if y.is_nan() || x < y { x } else { y }) * 1.0
+    super::generic::fmin(x, y)
 }
diff --git a/src/math/generic/fmax.rs b/src/math/generic/fmax.rs
new file mode 100644
index 000000000..97803052b
--- /dev/null
+++ b/src/math/generic/fmax.rs
@@ -0,0 +1,14 @@
+use super::super::Float;
+
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn fmax<F: Float>(x: F, y: F) -> F {
+    // IEEE754 says: maxNum(x, y) is the canonicalized number y if x < y, x if y < x, the
+    // canonicalized number if one operand is a number and the other a quiet NaN. Otherwise it
+    // is either x or y, canonicalized (this means results might differ among implementations).
+    // When either x or y is a signalingNaN, then the result is according to 6.2.
+    //
+    // Since we do not support sNaN in Rust yet, we do not need to handle them.
+    // FIXME(nagisa): due to https://bugs.llvm.org/show_bug.cgi?id=33303 we canonicalize by
+    // multiplying by 1.0. Should switch to the `canonicalize` when it works.
+    (if x.is_nan() || x < y { y } else { x }) * F::ONE
+}
diff --git a/src/math/generic/fmin.rs b/src/math/generic/fmin.rs
new file mode 100644
index 000000000..697f72004
--- /dev/null
+++ b/src/math/generic/fmin.rs
@@ -0,0 +1,13 @@
+use super::super::Float;
+
+pub fn fmin<F: Float>(x: F, y: F) -> F {
+    // IEEE754 says: minNum(x, y) is the canonicalized number x if x < y, y if y < x, the
+    // canonicalized number if one operand is a number and the other a quiet NaN. Otherwise it
+    // is either x or y, canonicalized (this means results might differ among implementations).
+    // When either x or y is a signalingNaN, then the result is according to 6.2.
+    //
+    // Since we do not support sNaN in Rust yet, we do not need to handle them.
+    // FIXME(nagisa): due to https://bugs.llvm.org/show_bug.cgi?id=33303 we canonicalize by
+    // multiplying by 1.0. Should switch to the `canonicalize` when it works.
+    (if y.is_nan() || x < y { x } else { y }) * F::ONE
+}
diff --git a/src/math/generic/mod.rs b/src/math/generic/mod.rs
index 1f557719f..819781a21 100644
--- a/src/math/generic/mod.rs
+++ b/src/math/generic/mod.rs
@@ -3,6 +3,8 @@ mod copysign;
 mod fabs;
 mod fdim;
 mod floor;
+mod fmax;
+mod fmin;
 mod rint;
 mod round;
 mod scalbn;
@@ -14,6 +16,8 @@ pub use copysign::copysign;
 pub use fabs::fabs;
 pub use fdim::fdim;
 pub use floor::floor;
+pub use fmax::fmax;
+pub use fmin::fmin;
 pub use rint::rint;
 pub use round::round;
 pub use scalbn::scalbn;

From f65487bb1676cd6b57896acb4d6410a2a38aeb40 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Fri, 24 Jan 2025 02:57:35 +0000
Subject: [PATCH 180/279] Add `fminf16`, `fmaxf16`, `fminf128`, and `fmaxf128`

---
 crates/libm-macros/src/shared.rs             |  4 +--
 crates/libm-test/benches/random.rs           |  4 +++
 crates/libm-test/src/mpfloat.rs              |  4 +--
 crates/libm-test/tests/compare_built_musl.rs |  4 +++
 crates/util/src/main.rs                      |  4 +++
 etc/function-definitions.json                | 28 ++++++++++++++++++++
 etc/function-list.txt                        |  4 +++
 src/math/fmaxf128.rs                         |  5 ++++
 src/math/fmaxf16.rs                          |  5 ++++
 src/math/fminf128.rs                         |  5 ++++
 src/math/fminf16.rs                          |  5 ++++
 src/math/mod.rs                              |  8 ++++++
 12 files changed, 76 insertions(+), 4 deletions(-)
 create mode 100644 src/math/fmaxf128.rs
 create mode 100644 src/math/fmaxf16.rs
 create mode 100644 src/math/fminf128.rs
 create mode 100644 src/math/fminf16.rs

diff --git a/crates/libm-macros/src/shared.rs b/crates/libm-macros/src/shared.rs
index b233e34f1..fbe0702a6 100644
--- a/crates/libm-macros/src/shared.rs
+++ b/crates/libm-macros/src/shared.rs
@@ -47,7 +47,7 @@ const ALL_OPERATIONS_NESTED: &[(FloatTy, Signature, Option<Signature>, &[&str])]
         FloatTy::F16,
         Signature { args: &[Ty::F16, Ty::F16], returns: &[Ty::F16] },
         None,
-        &["copysignf16", "fdimf16"],
+        &["copysignf16", "fdimf16", "fmaxf16", "fminf16"],
     ),
     (
         // `(f32, f32) -> f32`
@@ -90,7 +90,7 @@ const ALL_OPERATIONS_NESTED: &[(FloatTy, Signature, Option<Signature>, &[&str])]
         FloatTy::F128,
         Signature { args: &[Ty::F128, Ty::F128], returns: &[Ty::F128] },
         None,
-        &["copysignf128", "fdimf128"],
+        &["copysignf128", "fdimf128", "fmaxf128", "fminf128"],
     ),
     (
         // `(f32, f32, f32) -> f32`
diff --git a/crates/libm-test/benches/random.rs b/crates/libm-test/benches/random.rs
index d0ecd851e..aac8379fd 100644
--- a/crates/libm-test/benches/random.rs
+++ b/crates/libm-test/benches/random.rs
@@ -127,6 +127,10 @@ libm_macros::for_each_function! {
         | fdimf16
         | floorf128
         | floorf16
+        | fmaxf128
+        | fmaxf16
+        | fminf128
+        | fminf16
         | rintf128
         | rintf16
         | roundf128
diff --git a/crates/libm-test/src/mpfloat.rs b/crates/libm-test/src/mpfloat.rs
index 4422ab88d..da674c162 100644
--- a/crates/libm-test/src/mpfloat.rs
+++ b/crates/libm-test/src/mpfloat.rs
@@ -192,8 +192,8 @@ libm_macros::for_each_function! {
         fabs | fabsf => abs,
         fdim | fdimf | fdimf16 | fdimf128  => positive_diff,
         fma | fmaf => mul_add,
-        fmax | fmaxf => max,
-        fmin | fminf => min,
+        fmax | fmaxf | fmaxf16 | fmaxf128 => max,
+        fmin | fminf | fminf16 | fminf128 => min,
         lgamma | lgammaf => ln_gamma,
         log | logf => ln,
         log1p | log1pf => ln_1p,
diff --git a/crates/libm-test/tests/compare_built_musl.rs b/crates/libm-test/tests/compare_built_musl.rs
index 0fc1b0df1..ca070e8f6 100644
--- a/crates/libm-test/tests/compare_built_musl.rs
+++ b/crates/libm-test/tests/compare_built_musl.rs
@@ -89,6 +89,10 @@ libm_macros::for_each_function! {
         fdimf16,
         floorf128,
         floorf16,
+        fmaxf128,
+        fmaxf16,
+        fminf128,
+        fminf16,
         rintf128,
         rintf16,
         roundf128,
diff --git a/crates/util/src/main.rs b/crates/util/src/main.rs
index aaedda6d1..eb8e37589 100644
--- a/crates/util/src/main.rs
+++ b/crates/util/src/main.rs
@@ -96,6 +96,10 @@ fn do_eval(basis: &str, op: &str, inputs: &[&str]) {
             | fdimf16
             | floorf128
             | floorf16
+            | fmaxf128
+            | fmaxf16
+            | fminf128
+            | fminf16
             | rintf128
             | rintf16
             | roundf128
diff --git a/etc/function-definitions.json b/etc/function-definitions.json
index 7ffe91ead..b6653295c 100644
--- a/etc/function-definitions.json
+++ b/etc/function-definitions.json
@@ -391,6 +391,20 @@
         ],
         "type": "f32"
     },
+    "fmaxf128": {
+        "sources": [
+            "src/math/fmaxf128.rs",
+            "src/math/generic/fmax.rs"
+        ],
+        "type": "f128"
+    },
+    "fmaxf16": {
+        "sources": [
+            "src/math/fmaxf16.rs",
+            "src/math/generic/fmax.rs"
+        ],
+        "type": "f16"
+    },
     "fmin": {
         "sources": [
             "src/libm_helper.rs",
@@ -406,6 +420,20 @@
         ],
         "type": "f32"
     },
+    "fminf128": {
+        "sources": [
+            "src/math/fminf128.rs",
+            "src/math/generic/fmin.rs"
+        ],
+        "type": "f128"
+    },
+    "fminf16": {
+        "sources": [
+            "src/math/fminf16.rs",
+            "src/math/generic/fmin.rs"
+        ],
+        "type": "f16"
+    },
     "fmod": {
         "sources": [
             "src/libm_helper.rs",
diff --git a/etc/function-list.txt b/etc/function-list.txt
index 0b6eed828..25b92e58b 100644
--- a/etc/function-list.txt
+++ b/etc/function-list.txt
@@ -55,8 +55,12 @@ fma
 fmaf
 fmax
 fmaxf
+fmaxf128
+fmaxf16
 fmin
 fminf
+fminf128
+fminf16
 fmod
 fmodf
 frexp
diff --git a/src/math/fmaxf128.rs b/src/math/fmaxf128.rs
new file mode 100644
index 000000000..bace9ab53
--- /dev/null
+++ b/src/math/fmaxf128.rs
@@ -0,0 +1,5 @@
+/// Return the greater of two arguments or, if either argument is NaN, the other argument.
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn fmaxf128(x: f128, y: f128) -> f128 {
+    super::generic::fmax(x, y)
+}
diff --git a/src/math/fmaxf16.rs b/src/math/fmaxf16.rs
new file mode 100644
index 000000000..fea15be8f
--- /dev/null
+++ b/src/math/fmaxf16.rs
@@ -0,0 +1,5 @@
+/// Return the greater of two arguments or, if either argument is NaN, the other argument.
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn fmaxf16(x: f16, y: f16) -> f16 {
+    super::generic::fmax(x, y)
+}
diff --git a/src/math/fminf128.rs b/src/math/fminf128.rs
new file mode 100644
index 000000000..a9224c22a
--- /dev/null
+++ b/src/math/fminf128.rs
@@ -0,0 +1,5 @@
+/// Return the lesser of two arguments or, if either argument is NaN, the other argument.
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn fminf128(x: f128, y: f128) -> f128 {
+    super::generic::fmin(x, y)
+}
diff --git a/src/math/fminf16.rs b/src/math/fminf16.rs
new file mode 100644
index 000000000..6d936be34
--- /dev/null
+++ b/src/math/fminf16.rs
@@ -0,0 +1,5 @@
+/// Return the lesser of two arguments or, if either argument is NaN, the other argument.
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn fminf16(x: f16, y: f16) -> f16 {
+    super::generic::fmin(x, y)
+}
diff --git a/src/math/mod.rs b/src/math/mod.rs
index 8db17a02d..cb83b2587 100644
--- a/src/math/mod.rs
+++ b/src/math/mod.rs
@@ -346,6 +346,8 @@ cfg_if! {
         mod fabsf16;
         mod fdimf16;
         mod floorf16;
+        mod fmaxf16;
+        mod fminf16;
         mod rintf16;
         mod roundf16;
         mod sqrtf16;
@@ -356,6 +358,8 @@ cfg_if! {
         pub use self::fabsf16::fabsf16;
         pub use self::fdimf16::fdimf16;
         pub use self::floorf16::floorf16;
+        pub use self::fmaxf16::fmaxf16;
+        pub use self::fminf16::fminf16;
         pub use self::rintf16::rintf16;
         pub use self::roundf16::roundf16;
         pub use self::sqrtf16::sqrtf16;
@@ -370,6 +374,8 @@ cfg_if! {
         mod fabsf128;
         mod fdimf128;
         mod floorf128;
+        mod fmaxf128;
+        mod fminf128;
         mod rintf128;
         mod roundf128;
         mod sqrtf128;
@@ -380,6 +386,8 @@ cfg_if! {
         pub use self::fabsf128::fabsf128;
         pub use self::fdimf128::fdimf128;
         pub use self::floorf128::floorf128;
+        pub use self::fmaxf128::fmaxf128;
+        pub use self::fminf128::fminf128;
         pub use self::rintf128::rintf128;
         pub use self::roundf128::roundf128;
         pub use self::sqrtf128::sqrtf128;

From 566e8e12e1050db92f9da94a2a531b2880121271 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Fri, 24 Jan 2025 05:02:47 +0000
Subject: [PATCH 181/279] Add a generic version of `fmod`

This can replace `fmod` and `fmodf`. As part of this change I was able
to replace some of the `while` loops with `leading_zeros`.
---
 etc/function-definitions.json  |  6 ++-
 src/math/fmod.rs               | 77 +-----------------------------
 src/math/fmodf.rs              | 87 +---------------------------------
 src/math/generic/fmod.rs       | 84 ++++++++++++++++++++++++++++++++
 src/math/generic/mod.rs        |  2 +
 src/math/support/int_traits.rs |  2 +
 6 files changed, 96 insertions(+), 162 deletions(-)
 create mode 100644 src/math/generic/fmod.rs

diff --git a/etc/function-definitions.json b/etc/function-definitions.json
index b6653295c..866e9a439 100644
--- a/etc/function-definitions.json
+++ b/etc/function-definitions.json
@@ -437,13 +437,15 @@
     "fmod": {
         "sources": [
             "src/libm_helper.rs",
-            "src/math/fmod.rs"
+            "src/math/fmod.rs",
+            "src/math/generic/fmod.rs"
         ],
         "type": "f64"
     },
     "fmodf": {
         "sources": [
-            "src/math/fmodf.rs"
+            "src/math/fmodf.rs",
+            "src/math/generic/fmod.rs"
         ],
         "type": "f32"
     },
diff --git a/src/math/fmod.rs b/src/math/fmod.rs
index b68e6b0ea..d9786b53d 100644
--- a/src/math/fmod.rs
+++ b/src/math/fmod.rs
@@ -1,78 +1,5 @@
+/// Calculate the remainder of `x / y`, the precise result of `x - trunc(x / y) * y`.
 #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
 pub fn fmod(x: f64, y: f64) -> f64 {
-    let mut uxi = x.to_bits();
-    let mut uyi = y.to_bits();
-    let mut ex = ((uxi >> 52) & 0x7ff) as i64;
-    let mut ey = ((uyi >> 52) & 0x7ff) as i64;
-    let sx = uxi >> 63;
-    let mut i;
-
-    if uyi << 1 == 0 || y.is_nan() || ex == 0x7ff {
-        return (x * y) / (x * y);
-    }
-    if uxi << 1 <= uyi << 1 {
-        if uxi << 1 == uyi << 1 {
-            return 0.0 * x;
-        }
-        return x;
-    }
-
-    /* normalize x and y */
-    if ex == 0 {
-        i = uxi << 12;
-        while i >> 63 == 0 {
-            ex -= 1;
-            i <<= 1;
-        }
-        uxi <<= -ex + 1;
-    } else {
-        uxi &= u64::MAX >> 12;
-        uxi |= 1 << 52;
-    }
-    if ey == 0 {
-        i = uyi << 12;
-        while i >> 63 == 0 {
-            ey -= 1;
-            i <<= 1;
-        }
-        uyi <<= -ey + 1;
-    } else {
-        uyi &= u64::MAX >> 12;
-        uyi |= 1 << 52;
-    }
-
-    /* x mod y */
-    while ex > ey {
-        i = uxi.wrapping_sub(uyi);
-        if i >> 63 == 0 {
-            if i == 0 {
-                return 0.0 * x;
-            }
-            uxi = i;
-        }
-        uxi <<= 1;
-        ex -= 1;
-    }
-    i = uxi.wrapping_sub(uyi);
-    if i >> 63 == 0 {
-        if i == 0 {
-            return 0.0 * x;
-        }
-        uxi = i;
-    }
-    while uxi >> 52 == 0 {
-        uxi <<= 1;
-        ex -= 1;
-    }
-
-    /* scale result */
-    if ex > 0 {
-        uxi -= 1 << 52;
-        uxi |= (ex as u64) << 52;
-    } else {
-        uxi >>= -ex + 1;
-    }
-    uxi |= sx << 63;
-
-    f64::from_bits(uxi)
+    super::generic::fmod(x, y)
 }
diff --git a/src/math/fmodf.rs b/src/math/fmodf.rs
index 4de181957..4e95696e2 100644
--- a/src/math/fmodf.rs
+++ b/src/math/fmodf.rs
@@ -1,88 +1,5 @@
-use core::f32;
-
+/// Calculate the remainder of `x / y`, the precise result of `x - trunc(x / y) * y`.
 #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
 pub fn fmodf(x: f32, y: f32) -> f32 {
-    let mut uxi = x.to_bits();
-    let mut uyi = y.to_bits();
-    let mut ex = ((uxi >> 23) & 0xff) as i32;
-    let mut ey = ((uyi >> 23) & 0xff) as i32;
-    let sx = uxi & 0x80000000;
-    let mut i;
-
-    if uyi << 1 == 0 || y.is_nan() || ex == 0xff {
-        return (x * y) / (x * y);
-    }
-
-    if uxi << 1 <= uyi << 1 {
-        if uxi << 1 == uyi << 1 {
-            return 0.0 * x;
-        }
-
-        return x;
-    }
-
-    /* normalize x and y */
-    if ex == 0 {
-        i = uxi << 9;
-        while i >> 31 == 0 {
-            ex -= 1;
-            i <<= 1;
-        }
-
-        uxi <<= -ex + 1;
-    } else {
-        uxi &= u32::MAX >> 9;
-        uxi |= 1 << 23;
-    }
-
-    if ey == 0 {
-        i = uyi << 9;
-        while i >> 31 == 0 {
-            ey -= 1;
-            i <<= 1;
-        }
-
-        uyi <<= -ey + 1;
-    } else {
-        uyi &= u32::MAX >> 9;
-        uyi |= 1 << 23;
-    }
-
-    /* x mod y */
-    while ex > ey {
-        i = uxi.wrapping_sub(uyi);
-        if i >> 31 == 0 {
-            if i == 0 {
-                return 0.0 * x;
-            }
-            uxi = i;
-        }
-        uxi <<= 1;
-
-        ex -= 1;
-    }
-
-    i = uxi.wrapping_sub(uyi);
-    if i >> 31 == 0 {
-        if i == 0 {
-            return 0.0 * x;
-        }
-        uxi = i;
-    }
-
-    while uxi >> 23 == 0 {
-        uxi <<= 1;
-        ex -= 1;
-    }
-
-    /* scale result up */
-    if ex > 0 {
-        uxi -= 1 << 23;
-        uxi |= (ex as u32) << 23;
-    } else {
-        uxi >>= -ex + 1;
-    }
-    uxi |= sx;
-
-    f32::from_bits(uxi)
+    super::generic::fmod(x, y)
 }
diff --git a/src/math/generic/fmod.rs b/src/math/generic/fmod.rs
new file mode 100644
index 000000000..93da6c51e
--- /dev/null
+++ b/src/math/generic/fmod.rs
@@ -0,0 +1,84 @@
+/* SPDX-License-Identifier: MIT */
+/* origin: musl src/math/fmod.c. Ported to generic Rust algorithm in 2025, TG. */
+
+use super::super::{CastFrom, Float, Int, MinInt};
+
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn fmod<F: Float>(x: F, y: F) -> F {
+    let zero = F::Int::ZERO;
+    let one = F::Int::ONE;
+    let mut ix = x.to_bits();
+    let mut iy = y.to_bits();
+    let mut ex = x.exp().signed();
+    let mut ey = y.exp().signed();
+    let sx = ix & F::SIGN_MASK;
+
+    if iy << 1 == zero || y.is_nan() || ex == F::EXP_MAX as i32 {
+        return (x * y) / (x * y);
+    }
+
+    if ix << 1 <= iy << 1 {
+        if ix << 1 == iy << 1 {
+            return F::ZERO * x;
+        }
+        return x;
+    }
+
+    /* normalize x and y */
+    if ex == 0 {
+        let i = ix << F::EXP_BITS;
+        ex -= i.leading_zeros() as i32;
+        ix <<= -ex + 1;
+    } else {
+        ix &= F::Int::MAX >> F::EXP_BITS;
+        ix |= one << F::SIG_BITS;
+    }
+
+    if ey == 0 {
+        let i = iy << F::EXP_BITS;
+        ey -= i.leading_zeros() as i32;
+        iy <<= -ey + 1;
+    } else {
+        iy &= F::Int::MAX >> F::EXP_BITS;
+        iy |= one << F::SIG_BITS;
+    }
+
+    /* x mod y */
+    while ex > ey {
+        let i = ix.wrapping_sub(iy);
+        if i >> (F::BITS - 1) == zero {
+            if i == zero {
+                return F::ZERO * x;
+            }
+            ix = i;
+        }
+
+        ix <<= 1;
+        ex -= 1;
+    }
+
+    let i = ix.wrapping_sub(iy);
+    if i >> (F::BITS - 1) == zero {
+        if i == zero {
+            return F::ZERO * x;
+        }
+
+        ix = i;
+    }
+
+    let shift = ix.leading_zeros().saturating_sub(F::EXP_BITS);
+    ix <<= shift;
+    ex -= shift as i32;
+
+    /* scale result */
+    if ex > 0 {
+        ix -= one << F::SIG_BITS;
+        ix |= F::Int::cast_from(ex) << F::SIG_BITS;
+    } else {
+        ix >>= -ex + 1;
+    }
+
+    ix |= sx;
+
+    F::from_bits(ix)
+}
diff --git a/src/math/generic/mod.rs b/src/math/generic/mod.rs
index 819781a21..68686b0b2 100644
--- a/src/math/generic/mod.rs
+++ b/src/math/generic/mod.rs
@@ -5,6 +5,7 @@ mod fdim;
 mod floor;
 mod fmax;
 mod fmin;
+mod fmod;
 mod rint;
 mod round;
 mod scalbn;
@@ -18,6 +19,7 @@ pub use fdim::fdim;
 pub use floor::floor;
 pub use fmax::fmax;
 pub use fmin::fmin;
+pub use fmod::fmod;
 pub use rint::rint;
 pub use round::round;
 pub use scalbn::scalbn;
diff --git a/src/math/support/int_traits.rs b/src/math/support/int_traits.rs
index cf19762e8..b403c658c 100644
--- a/src/math/support/int_traits.rs
+++ b/src/math/support/int_traits.rs
@@ -45,7 +45,9 @@ pub trait Int:
     + ops::BitOrAssign
     + ops::BitXorAssign
     + ops::ShlAssign<i32>
+    + ops::ShlAssign<u32>
     + ops::ShrAssign<u32>
+    + ops::ShrAssign<i32>
     + ops::Add<Output = Self>
     + ops::Sub<Output = Self>
     + ops::Mul<Output = Self>

From 856e314b04f2c1ce9cd9b2587402ea7106f91f01 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Fri, 24 Jan 2025 05:09:08 +0000
Subject: [PATCH 182/279] Add `fmodf16` using the generic implementation

---
 crates/libm-macros/src/shared.rs             |  2 +-
 crates/libm-test/benches/icount.rs           |  1 +
 crates/libm-test/benches/random.rs           |  1 +
 crates/libm-test/src/mpfloat.rs              | 17 +++++++++++++++++
 crates/libm-test/tests/compare_built_musl.rs |  1 +
 crates/util/src/main.rs                      |  1 +
 etc/function-definitions.json                |  7 +++++++
 etc/function-list.txt                        |  1 +
 src/math/fmodf16.rs                          |  5 +++++
 src/math/mod.rs                              |  2 ++
 10 files changed, 37 insertions(+), 1 deletion(-)
 create mode 100644 src/math/fmodf16.rs

diff --git a/crates/libm-macros/src/shared.rs b/crates/libm-macros/src/shared.rs
index fbe0702a6..69fe45e03 100644
--- a/crates/libm-macros/src/shared.rs
+++ b/crates/libm-macros/src/shared.rs
@@ -47,7 +47,7 @@ const ALL_OPERATIONS_NESTED: &[(FloatTy, Signature, Option<Signature>, &[&str])]
         FloatTy::F16,
         Signature { args: &[Ty::F16, Ty::F16], returns: &[Ty::F16] },
         None,
-        &["copysignf16", "fdimf16", "fmaxf16", "fminf16"],
+        &["copysignf16", "fdimf16", "fmaxf16", "fminf16", "fmodf16"],
     ),
     (
         // `(f32, f32) -> f32`
diff --git a/crates/libm-test/benches/icount.rs b/crates/libm-test/benches/icount.rs
index 84f953262..97e78d8f1 100644
--- a/crates/libm-test/benches/icount.rs
+++ b/crates/libm-test/benches/icount.rs
@@ -111,6 +111,7 @@ main!(
     icount_bench_fmin_group,
     icount_bench_fminf_group,
     icount_bench_fmod_group,
+    icount_bench_fmodf16_group,
     icount_bench_fmodf_group,
     icount_bench_frexp_group,
     icount_bench_frexpf_group,
diff --git a/crates/libm-test/benches/random.rs b/crates/libm-test/benches/random.rs
index aac8379fd..3e816e81a 100644
--- a/crates/libm-test/benches/random.rs
+++ b/crates/libm-test/benches/random.rs
@@ -131,6 +131,7 @@ libm_macros::for_each_function! {
         | fmaxf16
         | fminf128
         | fminf16
+        | fmodf16
         | rintf128
         | rintf16
         | roundf128
diff --git a/crates/libm-test/src/mpfloat.rs b/crates/libm-test/src/mpfloat.rs
index da674c162..56234b14a 100644
--- a/crates/libm-test/src/mpfloat.rs
+++ b/crates/libm-test/src/mpfloat.rs
@@ -152,6 +152,7 @@ libm_macros::for_each_function! {
         floorf16,
         fmod,
         fmodf,
+        fmodf16,
         frexp,
         frexpf,
         ilogb,
@@ -525,6 +526,22 @@ impl MpOp for crate::op::lgammaf_r::Routine {
     }
 }
 
+// No fmodf128 yet
+impl MpOp for crate::op::fmodf16::Routine {
+    type MpTy = (MpFloat, MpFloat);
+
+    fn new_mp() -> Self::MpTy {
+        (new_mpfloat::<Self::FTy>(), new_mpfloat::<Self::FTy>())
+    }
+
+    fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet {
+        this.0.assign(input.0);
+        this.1.assign(input.1);
+        let ord = this.0.rem_assign_round(&this.1, Nearest);
+        prep_retval::<Self::RustRet>(&mut this.0, ord)
+    }
+}
+
 /* stub implementations so we don't need to special case them */
 
 impl MpOp for crate::op::nextafter::Routine {
diff --git a/crates/libm-test/tests/compare_built_musl.rs b/crates/libm-test/tests/compare_built_musl.rs
index ca070e8f6..46474c046 100644
--- a/crates/libm-test/tests/compare_built_musl.rs
+++ b/crates/libm-test/tests/compare_built_musl.rs
@@ -93,6 +93,7 @@ libm_macros::for_each_function! {
         fmaxf16,
         fminf128,
         fminf16,
+        fmodf16,
         rintf128,
         rintf16,
         roundf128,
diff --git a/crates/util/src/main.rs b/crates/util/src/main.rs
index eb8e37589..999b03af9 100644
--- a/crates/util/src/main.rs
+++ b/crates/util/src/main.rs
@@ -100,6 +100,7 @@ fn do_eval(basis: &str, op: &str, inputs: &[&str]) {
             | fmaxf16
             | fminf128
             | fminf16
+            | fmodf16
             | rintf128
             | rintf16
             | roundf128
diff --git a/etc/function-definitions.json b/etc/function-definitions.json
index 866e9a439..966060f77 100644
--- a/etc/function-definitions.json
+++ b/etc/function-definitions.json
@@ -449,6 +449,13 @@
         ],
         "type": "f32"
     },
+    "fmodf16": {
+        "sources": [
+            "src/math/fmodf16.rs",
+            "src/math/generic/fmod.rs"
+        ],
+        "type": "f16"
+    },
     "frexp": {
         "sources": [
             "src/libm_helper.rs",
diff --git a/etc/function-list.txt b/etc/function-list.txt
index 25b92e58b..ff4de0cb5 100644
--- a/etc/function-list.txt
+++ b/etc/function-list.txt
@@ -63,6 +63,7 @@ fminf128
 fminf16
 fmod
 fmodf
+fmodf16
 frexp
 frexpf
 hypot
diff --git a/src/math/fmodf16.rs b/src/math/fmodf16.rs
new file mode 100644
index 000000000..11972a7de
--- /dev/null
+++ b/src/math/fmodf16.rs
@@ -0,0 +1,5 @@
+/// Calculate the remainder of `x / y`, the precise result of `x - trunc(x / y) * y`.
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn fmodf16(x: f16, y: f16) -> f16 {
+    super::generic::fmod(x, y)
+}
diff --git a/src/math/mod.rs b/src/math/mod.rs
index cb83b2587..aab551bed 100644
--- a/src/math/mod.rs
+++ b/src/math/mod.rs
@@ -348,6 +348,7 @@ cfg_if! {
         mod floorf16;
         mod fmaxf16;
         mod fminf16;
+        mod fmodf16;
         mod rintf16;
         mod roundf16;
         mod sqrtf16;
@@ -360,6 +361,7 @@ cfg_if! {
         pub use self::floorf16::floorf16;
         pub use self::fmaxf16::fmaxf16;
         pub use self::fminf16::fminf16;
+        pub use self::fmodf16::fmodf16;
         pub use self::rintf16::rintf16;
         pub use self::roundf16::roundf16;
         pub use self::sqrtf16::sqrtf16;

From bae270f22075ecdcd05385f14c15bc1df318b7f9 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Fri, 24 Jan 2025 06:24:45 +0000
Subject: [PATCH 183/279] Increase or set CI timeouts

With the new routines, some of our tests are running close to their
timeouts. Increase the timeout for test jobs, and set a short timeout
for all other jobs that did not have one.
---
 .github/workflows/main.yaml | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml
index 89c5facef..599552711 100644
--- a/.github/workflows/main.yaml
+++ b/.github/workflows/main.yaml
@@ -15,7 +15,7 @@ env:
 jobs:
   test:
     name: Build and test
-    timeout-minutes: 40
+    timeout-minutes: 60
     strategy:
       fail-fast: false
       matrix:
@@ -123,6 +123,7 @@ jobs:
   clippy:
     name: Clippy
     runs-on: ubuntu-24.04
+    timeout-minutes: 10
     steps:
     - uses: actions/checkout@master
     - name: Install Rust
@@ -138,6 +139,7 @@ jobs:
   builtins:
     name: Check use with compiler-builtins
     runs-on: ubuntu-24.04
+    timeout-minutes: 10
     steps:
     - uses: actions/checkout@master
     - name: Install Rust
@@ -194,6 +196,7 @@ jobs:
   msrv:
     name: Check MSRV
     runs-on: ubuntu-24.04
+    timeout-minutes: 10
     env:
       RUSTFLAGS: # No need to check warnings on old MSRV, unset `-Dwarnings`
     steps:
@@ -210,6 +213,7 @@ jobs:
   rustfmt:
     name: Rustfmt
     runs-on: ubuntu-24.04
+    timeout-minutes: 10
     steps:
     - uses: actions/checkout@master
     - name: Install Rust
@@ -223,6 +227,7 @@ jobs:
   calculate_extensive_matrix:
     name: Calculate job matrix
     runs-on: ubuntu-24.04
+    timeout-minutes: 10
     outputs:
       matrix: ${{ steps.script.outputs.matrix }}
     steps:
@@ -242,7 +247,7 @@ jobs:
       - clippy
       - calculate_extensive_matrix
     runs-on: ubuntu-24.04
-    timeout-minutes: 80
+    timeout-minutes: 180
     strategy:
       matrix:
         # Use the output from `calculate_extensive_matrix` to calculate the matrix
@@ -286,6 +291,7 @@ jobs:
       - rustfmt
       - extensive
     runs-on: ubuntu-24.04
+    timeout-minutes: 10
     # GitHub branch protection is exceedingly silly and treats "jobs skipped because a dependency
     # failed" as success. So we have to do some contortions to ensure the job fails if any of its
     # dependencies fails.

From a9d3ba4cec6f8ce914af1a6d05ca3bdf1f8245ad Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Fri, 24 Jan 2025 07:59:09 +0000
Subject: [PATCH 184/279] Add way to override the number of iterations for
 specific tests

Certain functions (`fmodf128`) are significantly slower than others,
to the point that running the default number of tests adds tens of
minutes to PR CI and extensive test time increases to ~1day. It does not
make sense to do this by default; so, introduce `EXTREMELY_SLOW_TESTS`
to test configuration that allows setting specific tests that need to
have a reduced iteration count.
---
 crates/libm-test/src/lib.rs     |  4 ++--
 crates/libm-test/src/run_cfg.rs | 34 ++++++++++++++++++++++++++-------
 2 files changed, 29 insertions(+), 9 deletions(-)

diff --git a/crates/libm-test/src/lib.rs b/crates/libm-test/src/lib.rs
index b90423c1b..78b011b1f 100644
--- a/crates/libm-test/src/lib.rs
+++ b/crates/libm-test/src/lib.rs
@@ -28,7 +28,7 @@ pub use op::{
     Ty,
 };
 pub use precision::{MaybeOverride, SpecialCase, default_ulp};
-use run_cfg::EXTENSIVE_MAX_ITERATIONS;
+use run_cfg::extensive_max_iterations;
 pub use run_cfg::{CheckBasis, CheckCtx, EXTENSIVE_ENV, GeneratorKind, skip_extensive_test};
 pub use test_traits::{CheckOutput, Hex, TupleCall};
 
@@ -89,7 +89,7 @@ pub fn test_log(s: &str) {
         writeln!(f, "cargo features: {}", env!("CFG_CARGO_FEATURES")).unwrap();
         writeln!(f, "opt level: {}", env!("CFG_OPT_LEVEL")).unwrap();
         writeln!(f, "target features: {}", env!("CFG_TARGET_FEATURES")).unwrap();
-        writeln!(f, "extensive iterations {}", *EXTENSIVE_MAX_ITERATIONS).unwrap();
+        writeln!(f, "extensive iterations {}", extensive_max_iterations()).unwrap();
 
         Some(f)
     });
diff --git a/crates/libm-test/src/run_cfg.rs b/crates/libm-test/src/run_cfg.rs
index 3e91101f6..c76b6699f 100644
--- a/crates/libm-test/src/run_cfg.rs
+++ b/crates/libm-test/src/run_cfg.rs
@@ -13,18 +13,27 @@ pub const EXTENSIVE_ENV: &str = "LIBM_EXTENSIVE_TESTS";
 /// Specify the number of iterations via this environment variable, rather than using the default.
 pub const EXTENSIVE_ITER_ENV: &str = "LIBM_EXTENSIVE_ITERATIONS";
 
+/// The override value, if set by the above environment.
+static EXTENSIVE_ITER_OVERRIDE: LazyLock<Option<u64>> = LazyLock::new(|| {
+    env::var(EXTENSIVE_ITER_ENV).map(|v| v.parse().expect("failed to parse iteration count")).ok()
+});
+
+/// Specific tests that need to have a reduced amount of iterations to complete in a reasonable
+/// amount of time.
+///
+/// Contains the itentifier+generator combo to match on, plus the factor to reduce by.
+const EXTEMELY_SLOW_TESTS: &[(Identifier, GeneratorKind, u64)] = &[];
+
 /// Maximum number of iterations to run for a single routine.
 ///
 /// The default value of one greater than `u32::MAX` allows testing single-argument `f32` routines
 /// and single- or double-argument `f16` routines exhaustively. `f64` and `f128` can't feasibly
 /// be tested exhaustively; however, [`EXTENSIVE_ITER_ENV`] can be set to run tests for multiple
 /// hours.
-pub static EXTENSIVE_MAX_ITERATIONS: LazyLock<u64> = LazyLock::new(|| {
-    let default = 1 << 32;
-    env::var(EXTENSIVE_ITER_ENV)
-        .map(|v| v.parse().expect("failed to parse iteration count"))
-        .unwrap_or(default)
-});
+pub fn extensive_max_iterations() -> u64 {
+    let default = 1 << 32; // default value
+    EXTENSIVE_ITER_OVERRIDE.unwrap_or(default)
+}
 
 /// Context passed to [`CheckOutput`].
 #[derive(Clone, Debug, PartialEq, Eq)]
@@ -206,12 +215,23 @@ pub fn iteration_count(ctx: &CheckCtx, argnum: usize) -> u64 {
     let mut total_iterations = match ctx.gen_kind {
         GeneratorKind::QuickSpaced => domain_iter_count,
         GeneratorKind::Random => random_iter_count,
-        GeneratorKind::Extensive => *EXTENSIVE_MAX_ITERATIONS,
+        GeneratorKind::Extensive => extensive_max_iterations(),
         GeneratorKind::EdgeCases => {
             unimplemented!("edge case tests shoudn't need `iteration_count`")
         }
     };
 
+    // Some tests are significantly slower than others and need to be further reduced.
+    if let Some((_id, _gen, scale)) = EXTEMELY_SLOW_TESTS
+        .iter()
+        .find(|(id, gen, _scale)| *id == ctx.fn_ident && *gen == ctx.gen_kind)
+    {
+        // However, do not override if the extensive iteration count has been manually set.
+        if !(ctx.gen_kind == GeneratorKind::Extensive && EXTENSIVE_ITER_OVERRIDE.is_some()) {
+            total_iterations /= scale;
+        }
+    }
+
     // FMA has a huge domain but is reasonably fast to run, so increase iterations.
     if ctx.base_name == BaseName::Fma {
         total_iterations *= 4;

From 68b903648a81a7ad010ed6ba3f4d01b068e7a8f9 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Fri, 24 Jan 2025 05:58:08 +0000
Subject: [PATCH 185/279] Add `fmodf128`

This function is significantly slower than all others so includes an
override in `EXTREMELY_SLOW_TESTS`. Without it, PR CI takes ~1hour and
the extensive tests in CI take ~1day.
---
 crates/libm-macros/src/shared.rs             |  2 +-
 crates/libm-test/benches/icount.rs           |  1 +
 crates/libm-test/benches/random.rs           |  1 +
 crates/libm-test/src/mpfloat.rs              | 47 +++++++-------------
 crates/libm-test/src/run_cfg.rs              |  5 ++-
 crates/libm-test/tests/compare_built_musl.rs |  1 +
 crates/util/src/main.rs                      |  1 +
 etc/function-definitions.json                |  7 +++
 etc/function-list.txt                        |  1 +
 src/math/fmodf128.rs                         |  5 +++
 src/math/mod.rs                              |  2 +
 11 files changed, 40 insertions(+), 33 deletions(-)
 create mode 100644 src/math/fmodf128.rs

diff --git a/crates/libm-macros/src/shared.rs b/crates/libm-macros/src/shared.rs
index 69fe45e03..b1f4f46cc 100644
--- a/crates/libm-macros/src/shared.rs
+++ b/crates/libm-macros/src/shared.rs
@@ -90,7 +90,7 @@ const ALL_OPERATIONS_NESTED: &[(FloatTy, Signature, Option<Signature>, &[&str])]
         FloatTy::F128,
         Signature { args: &[Ty::F128, Ty::F128], returns: &[Ty::F128] },
         None,
-        &["copysignf128", "fdimf128", "fmaxf128", "fminf128"],
+        &["copysignf128", "fdimf128", "fmaxf128", "fminf128", "fmodf128"],
     ),
     (
         // `(f32, f32, f32) -> f32`
diff --git a/crates/libm-test/benches/icount.rs b/crates/libm-test/benches/icount.rs
index 97e78d8f1..46a659524 100644
--- a/crates/libm-test/benches/icount.rs
+++ b/crates/libm-test/benches/icount.rs
@@ -111,6 +111,7 @@ main!(
     icount_bench_fmin_group,
     icount_bench_fminf_group,
     icount_bench_fmod_group,
+    icount_bench_fmodf128_group,
     icount_bench_fmodf16_group,
     icount_bench_fmodf_group,
     icount_bench_frexp_group,
diff --git a/crates/libm-test/benches/random.rs b/crates/libm-test/benches/random.rs
index 3e816e81a..ca9e86c10 100644
--- a/crates/libm-test/benches/random.rs
+++ b/crates/libm-test/benches/random.rs
@@ -131,6 +131,7 @@ libm_macros::for_each_function! {
         | fmaxf16
         | fminf128
         | fminf16
+        | fmodf128
         | fmodf16
         | rintf128
         | rintf16
diff --git a/crates/libm-test/src/mpfloat.rs b/crates/libm-test/src/mpfloat.rs
index 56234b14a..98b80505f 100644
--- a/crates/libm-test/src/mpfloat.rs
+++ b/crates/libm-test/src/mpfloat.rs
@@ -152,6 +152,7 @@ libm_macros::for_each_function! {
         floorf16,
         fmod,
         fmodf,
+        fmodf128,
         fmodf16,
         frexp,
         frexpf,
@@ -301,21 +302,6 @@ macro_rules! impl_op_for_ty {
                 }
             }
 
-            impl MpOp for crate::op::[<fmod $suffix>]::Routine {
-                type MpTy = (MpFloat, MpFloat);
-
-                fn new_mp() -> Self::MpTy {
-                    (new_mpfloat::<Self::FTy>(), new_mpfloat::<Self::FTy>())
-                }
-
-                fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet {
-                    this.0.assign(input.0);
-                    this.1.assign(input.1);
-                    let ord = this.0.rem_assign_round(&this.1, Nearest);
-                    prep_retval::<Self::RustRet>(&mut this.0, ord)
-                }
-            }
-
             impl MpOp for crate::op::[<frexp $suffix>]::Routine {
                 type MpTy = MpFloat;
 
@@ -481,6 +467,21 @@ macro_rules! impl_op_for_ty_all {
                     prep_retval::<Self::RustRet>(&mut this.0, Ordering::Equal)
                 }
             }
+
+            impl MpOp for crate::op::[<fmod $suffix>]::Routine {
+                type MpTy = (MpFloat, MpFloat);
+
+                fn new_mp() -> Self::MpTy {
+                    (new_mpfloat::<Self::FTy>(), new_mpfloat::<Self::FTy>())
+                }
+
+                fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet {
+                    this.0.assign(input.0);
+                    this.1.assign(input.1);
+                    let ord = this.0.rem_assign_round(&this.1, Nearest);
+                    prep_retval::<Self::RustRet>(&mut this.0, ord)
+                }
+            }
         }
     };
 }
@@ -526,22 +527,6 @@ impl MpOp for crate::op::lgammaf_r::Routine {
     }
 }
 
-// No fmodf128 yet
-impl MpOp for crate::op::fmodf16::Routine {
-    type MpTy = (MpFloat, MpFloat);
-
-    fn new_mp() -> Self::MpTy {
-        (new_mpfloat::<Self::FTy>(), new_mpfloat::<Self::FTy>())
-    }
-
-    fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet {
-        this.0.assign(input.0);
-        this.1.assign(input.1);
-        let ord = this.0.rem_assign_round(&this.1, Nearest);
-        prep_retval::<Self::RustRet>(&mut this.0, ord)
-    }
-}
-
 /* stub implementations so we don't need to special case them */
 
 impl MpOp for crate::op::nextafter::Routine {
diff --git a/crates/libm-test/src/run_cfg.rs b/crates/libm-test/src/run_cfg.rs
index c76b6699f..783142e37 100644
--- a/crates/libm-test/src/run_cfg.rs
+++ b/crates/libm-test/src/run_cfg.rs
@@ -22,7 +22,10 @@ static EXTENSIVE_ITER_OVERRIDE: LazyLock<Option<u64>> = LazyLock::new(|| {
 /// amount of time.
 ///
 /// Contains the itentifier+generator combo to match on, plus the factor to reduce by.
-const EXTEMELY_SLOW_TESTS: &[(Identifier, GeneratorKind, u64)] = &[];
+const EXTEMELY_SLOW_TESTS: &[(Identifier, GeneratorKind, u64)] = &[
+    (Identifier::Fmodf128, GeneratorKind::QuickSpaced, 40),
+    (Identifier::Fmodf128, GeneratorKind::Extensive, 40),
+];
 
 /// Maximum number of iterations to run for a single routine.
 ///
diff --git a/crates/libm-test/tests/compare_built_musl.rs b/crates/libm-test/tests/compare_built_musl.rs
index 46474c046..5466edf4f 100644
--- a/crates/libm-test/tests/compare_built_musl.rs
+++ b/crates/libm-test/tests/compare_built_musl.rs
@@ -93,6 +93,7 @@ libm_macros::for_each_function! {
         fmaxf16,
         fminf128,
         fminf16,
+        fmodf128,
         fmodf16,
         rintf128,
         rintf16,
diff --git a/crates/util/src/main.rs b/crates/util/src/main.rs
index 999b03af9..f4ee8fd2e 100644
--- a/crates/util/src/main.rs
+++ b/crates/util/src/main.rs
@@ -100,6 +100,7 @@ fn do_eval(basis: &str, op: &str, inputs: &[&str]) {
             | fmaxf16
             | fminf128
             | fminf16
+            | fmodf128
             | fmodf16
             | rintf128
             | rintf16
diff --git a/etc/function-definitions.json b/etc/function-definitions.json
index 966060f77..574ffea2e 100644
--- a/etc/function-definitions.json
+++ b/etc/function-definitions.json
@@ -449,6 +449,13 @@
         ],
         "type": "f32"
     },
+    "fmodf128": {
+        "sources": [
+            "src/math/fmodf128.rs",
+            "src/math/generic/fmod.rs"
+        ],
+        "type": "f128"
+    },
     "fmodf16": {
         "sources": [
             "src/math/fmodf16.rs",
diff --git a/etc/function-list.txt b/etc/function-list.txt
index ff4de0cb5..d82838b32 100644
--- a/etc/function-list.txt
+++ b/etc/function-list.txt
@@ -63,6 +63,7 @@ fminf128
 fminf16
 fmod
 fmodf
+fmodf128
 fmodf16
 frexp
 frexpf
diff --git a/src/math/fmodf128.rs b/src/math/fmodf128.rs
new file mode 100644
index 000000000..ff0e0493e
--- /dev/null
+++ b/src/math/fmodf128.rs
@@ -0,0 +1,5 @@
+/// Calculate the remainder of `x / y`, the precise result of `x - trunc(x / y) * y`.
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn fmodf128(x: f128, y: f128) -> f128 {
+    super::generic::fmod(x, y)
+}
diff --git a/src/math/mod.rs b/src/math/mod.rs
index aab551bed..969c1bfd9 100644
--- a/src/math/mod.rs
+++ b/src/math/mod.rs
@@ -378,6 +378,7 @@ cfg_if! {
         mod floorf128;
         mod fmaxf128;
         mod fminf128;
+        mod fmodf128;
         mod rintf128;
         mod roundf128;
         mod sqrtf128;
@@ -390,6 +391,7 @@ cfg_if! {
         pub use self::floorf128::floorf128;
         pub use self::fmaxf128::fmaxf128;
         pub use self::fminf128::fminf128;
+        pub use self::fmodf128::fmodf128;
         pub use self::rintf128::rintf128;
         pub use self::roundf128::roundf128;
         pub use self::sqrtf128::sqrtf128;

From b67b4ccb773ce60582944a927768f6459e92a0b5 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Fri, 24 Jan 2025 09:11:07 +0000
Subject: [PATCH 186/279] Enable missing icount benchmarks

A few new functions were added but this list did not get updated. Do so
here.
---
 crates/libm-test/benches/icount.rs | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/crates/libm-test/benches/icount.rs b/crates/libm-test/benches/icount.rs
index 46a659524..d5026f461 100644
--- a/crates/libm-test/benches/icount.rs
+++ b/crates/libm-test/benches/icount.rs
@@ -107,8 +107,12 @@ main!(
     icount_bench_fma_group,
     icount_bench_fmaf_group,
     icount_bench_fmax_group,
+    icount_bench_fmaxf128_group,
+    icount_bench_fmaxf16_group,
     icount_bench_fmaxf_group,
     icount_bench_fmin_group,
+    icount_bench_fminf128_group,
+    icount_bench_fminf16_group,
     icount_bench_fminf_group,
     icount_bench_fmod_group,
     icount_bench_fmodf128_group,
@@ -155,6 +159,8 @@ main!(
     icount_bench_rintf16_group,
     icount_bench_rintf_group,
     icount_bench_round_group,
+    icount_bench_roundf128_group,
+    icount_bench_roundf16_group,
     icount_bench_roundf_group,
     icount_bench_scalbn_group,
     icount_bench_scalbnf_group,

From 5bcd31f461cf3231ab9403e7b62a7c8797b7af9b Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Sat, 25 Jan 2025 00:50:02 +0000
Subject: [PATCH 187/279] Upgrade all dependencies to the latest version

In particular, this includes updates to Rug that we can make use of [1],
[2], [3], [4].

[1]: https://gitlab.com/tspiteri/rug/-/issues/78
[2]: https://gitlab.com/tspiteri/rug/-/issues/80
[3]: https://gitlab.com/tspiteri/rug/-/issues/76
[4]: https://gitlab.com/tspiteri/rug/-/issues/73
---
 Cargo.toml                      | 2 +-
 crates/libm-macros/Cargo.toml   | 6 +++---
 crates/libm-test/Cargo.toml     | 4 ++--
 crates/musl-math-sys/Cargo.toml | 2 +-
 crates/util/Cargo.toml          | 2 +-
 5 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/Cargo.toml b/Cargo.toml
index 18d89997d..7b6f9e1ce 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -59,7 +59,7 @@ exclude = [
 ]
 
 [dev-dependencies]
-no-panic = "0.1.30"
+no-panic = "0.1.33"
 
 [profile.release]
 # Options for no-panic to correctly detect the lack of panics
diff --git a/crates/libm-macros/Cargo.toml b/crates/libm-macros/Cargo.toml
index 9194232b2..f0de0e176 100644
--- a/crates/libm-macros/Cargo.toml
+++ b/crates/libm-macros/Cargo.toml
@@ -9,9 +9,9 @@ proc-macro = true
 
 [dependencies]
 heck = "0.5.0"
-proc-macro2 = "1.0.88"
-quote = "1.0.37"
-syn = { version = "2.0.79", features = ["full", "extra-traits", "visit-mut"] }
+proc-macro2 = "1.0.93"
+quote = "1.0.38"
+syn = { version = "2.0.96", features = ["full", "extra-traits", "visit-mut"] }
 
 [lints.rust]
 # Values used during testing
diff --git a/crates/libm-test/Cargo.toml b/crates/libm-test/Cargo.toml
index 3a1ba8796..137b81464 100644
--- a/crates/libm-test/Cargo.toml
+++ b/crates/libm-test/Cargo.toml
@@ -27,7 +27,7 @@ icount = ["dep:iai-callgrind"]
 short-benchmarks = []
 
 [dependencies]
-anyhow = "1.0.90"
+anyhow = "1.0.95"
 az = { version = "1.2.1", optional = true }
 gmp-mpfr-sys = { version = "1.6.4", optional = true, default-features = false, features = ["mpfr"] }
 iai-callgrind = { version = "0.14.0", optional = true }
@@ -39,7 +39,7 @@ paste = "1.0.15"
 rand = "0.8.5"
 rand_chacha = "0.3.1"
 rayon = "1.10.0"
-rug = { version = "1.26.1", optional = true, default-features = false, features = ["float", "integer", "std"] }
+rug = { version = "1.27.0", optional = true, default-features = false, features = ["float", "integer", "std"] }
 
 [target.'cfg(target_family = "wasm")'.dependencies]
 # Enable randomness on WASM
diff --git a/crates/musl-math-sys/Cargo.toml b/crates/musl-math-sys/Cargo.toml
index 7f6272d79..cde78fd3c 100644
--- a/crates/musl-math-sys/Cargo.toml
+++ b/crates/musl-math-sys/Cargo.toml
@@ -10,4 +10,4 @@ publish = false
 libm = { path = "../../" }
 
 [build-dependencies]
-cc = "1.1.24"
+cc = "1.2.10"
diff --git a/crates/util/Cargo.toml b/crates/util/Cargo.toml
index acf5db704..51f44dddf 100644
--- a/crates/util/Cargo.toml
+++ b/crates/util/Cargo.toml
@@ -16,4 +16,4 @@ libm = { path = "../..", default-features = false }
 libm-macros = { path = "../libm-macros" }
 libm-test = { path = "../libm-test", default-features = false }
 musl-math-sys = { path = "../musl-math-sys", optional = true }
-rug = { version = "1.26.1", optional = true, default-features = false, features = ["float", "std"] }
+rug = { version = "1.27.0", optional = true, default-features = false, features = ["float", "std"] }

From bd2ea1ceabfdcaf36c4c78222f2fe55ce0b98b1f Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Sat, 25 Jan 2025 00:55:03 +0000
Subject: [PATCH 188/279] Use `az` exported from Rug

Since Rug 1.27.0, `az` is reexported. This means we no longer need to
track it as a separate dependency.
---
 crates/libm-test/Cargo.toml     | 3 +--
 crates/libm-test/src/mpfloat.rs | 2 +-
 crates/util/Cargo.toml          | 3 +--
 crates/util/src/main.rs         | 4 ++--
 4 files changed, 5 insertions(+), 7 deletions(-)

diff --git a/crates/libm-test/Cargo.toml b/crates/libm-test/Cargo.toml
index 137b81464..31cbf6e68 100644
--- a/crates/libm-test/Cargo.toml
+++ b/crates/libm-test/Cargo.toml
@@ -12,7 +12,7 @@ unstable-float = ["libm/unstable-float", "rug?/nightly-float"]
 
 # Generate tests which are random inputs and the outputs are calculated with
 # musl libc.
-build-mpfr = ["dep:az", "dep:rug", "dep:gmp-mpfr-sys"]
+build-mpfr = ["dep:rug", "dep:gmp-mpfr-sys"]
 
 # Build our own musl for testing and benchmarks
 build-musl = ["dep:musl-math-sys"]
@@ -28,7 +28,6 @@ short-benchmarks = []
 
 [dependencies]
 anyhow = "1.0.95"
-az = { version = "1.2.1", optional = true }
 gmp-mpfr-sys = { version = "1.6.4", optional = true, default-features = false, features = ["mpfr"] }
 iai-callgrind = { version = "0.14.0", optional = true }
 indicatif = { version = "0.17.9", default-features = false }
diff --git a/crates/libm-test/src/mpfloat.rs b/crates/libm-test/src/mpfloat.rs
index 98b80505f..e2be6584d 100644
--- a/crates/libm-test/src/mpfloat.rs
+++ b/crates/libm-test/src/mpfloat.rs
@@ -6,10 +6,10 @@
 use std::cmp::Ordering;
 use std::ffi::{c_int, c_long};
 
-use az::Az;
 use gmp_mpfr_sys::mpfr::rnd_t;
 use rug::Assign;
 pub use rug::Float as MpFloat;
+use rug::az::{self, Az};
 use rug::float::Round;
 use rug::float::Round::Nearest;
 use rug::ops::{PowAssignRound, RemAssignRound};
diff --git a/crates/util/Cargo.toml b/crates/util/Cargo.toml
index 51f44dddf..8005459db 100644
--- a/crates/util/Cargo.toml
+++ b/crates/util/Cargo.toml
@@ -7,11 +7,10 @@ publish = false
 [features]
 default = ["build-musl", "build-mpfr", "unstable-float"]
 build-musl = ["libm-test/build-musl", "dep:musl-math-sys"]
-build-mpfr = ["libm-test/build-mpfr", "dep:az", "dep:rug"]
+build-mpfr = ["libm-test/build-mpfr", "dep:rug"]
 unstable-float = ["libm/unstable-float", "libm-test/unstable-float", "rug?/nightly-float"]
 
 [dependencies]
-az = { version = "1.2.1", optional = true }
 libm = { path = "../..", default-features = false }
 libm-macros = { path = "../libm-macros" }
 libm-test = { path = "../libm-test", default-features = false }
diff --git a/crates/util/src/main.rs b/crates/util/src/main.rs
index f4ee8fd2e..6ea1be3d9 100644
--- a/crates/util/src/main.rs
+++ b/crates/util/src/main.rs
@@ -8,12 +8,12 @@ use std::env;
 use std::num::ParseIntError;
 use std::str::FromStr;
 
-#[cfg(feature = "build-mpfr")]
-use az::Az;
 use libm::support::{hf32, hf64};
 #[cfg(feature = "build-mpfr")]
 use libm_test::mpfloat::MpOp;
 use libm_test::{MathOp, TupleCall};
+#[cfg(feature = "build-mpfr")]
+use rug::az::{self, Az};
 
 const USAGE: &str = "\
 usage:

From a6ba2494b7a821c390fe4193f5950815a9be34d3 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Sat, 25 Jan 2025 01:00:01 +0000
Subject: [PATCH 189/279] Use `frexp` from Rug

Rug 1.27.0 exposes `frexp`. Make use of it for our tests.
---
 crates/libm-test/src/mpfloat.rs | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/crates/libm-test/src/mpfloat.rs b/crates/libm-test/src/mpfloat.rs
index e2be6584d..6896425d1 100644
--- a/crates/libm-test/src/mpfloat.rs
+++ b/crates/libm-test/src/mpfloat.rs
@@ -310,13 +310,8 @@ macro_rules! impl_op_for_ty {
                 }
 
                 fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet {
-                    // Implementation taken from `rug::Float::to_f32_exp`.
                     this.assign(input.0);
-                    let exp = this.get_exp().unwrap_or(0);
-                    if exp != 0 {
-                        *this >>= exp;
-                    }
-
+                    let exp = this.frexp_mut();
                     (prep_retval::<Self::FTy>(this, Ordering::Equal), exp)
                 }
             }

From 857ba9cabaa19376d37e25c309cd992ab2c2ddb5 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Sat, 25 Jan 2025 01:01:24 +0000
Subject: [PATCH 190/279] Use `remquo` from Rug

Rug 1.27.0 exposes `remquo`; make use of it for our tests. Removing our
workaround also allows removing the direct dependency on `gmp-mpfr-sys`
---
 crates/libm-test/Cargo.toml       |  3 ++-
 crates/libm-test/src/mpfloat.rs   | 38 +++----------------------------
 crates/libm-test/src/precision.rs | 31 -------------------------
 3 files changed, 5 insertions(+), 67 deletions(-)

diff --git a/crates/libm-test/Cargo.toml b/crates/libm-test/Cargo.toml
index 31cbf6e68..dcbddb667 100644
--- a/crates/libm-test/Cargo.toml
+++ b/crates/libm-test/Cargo.toml
@@ -28,7 +28,8 @@ short-benchmarks = []
 
 [dependencies]
 anyhow = "1.0.95"
-gmp-mpfr-sys = { version = "1.6.4", optional = true, default-features = false, features = ["mpfr"] }
+# This is not directly used but is required so we can enable `gmp-mpfr-sys/force-cross`.
+gmp-mpfr-sys = { version = "1.6.4", optional = true, default-features = false }
 iai-callgrind = { version = "0.14.0", optional = true }
 indicatif = { version = "0.17.9", default-features = false }
 libm = { path = "../..", features = ["unstable-public-internals"] }
diff --git a/crates/libm-test/src/mpfloat.rs b/crates/libm-test/src/mpfloat.rs
index 6896425d1..3d84740cc 100644
--- a/crates/libm-test/src/mpfloat.rs
+++ b/crates/libm-test/src/mpfloat.rs
@@ -4,13 +4,10 @@
 //! a struct named `Operation` that implements [`MpOp`].
 
 use std::cmp::Ordering;
-use std::ffi::{c_int, c_long};
 
-use gmp_mpfr_sys::mpfr::rnd_t;
 use rug::Assign;
 pub use rug::Float as MpFloat;
 use rug::az::{self, Az};
-use rug::float::Round;
 use rug::float::Round::Nearest;
 use rug::ops::{PowAssignRound, RemAssignRound};
 
@@ -401,28 +398,20 @@ macro_rules! impl_op_for_ty {
             }
 
             impl MpOp for crate::op::[<remquo $suffix>]::Routine {
-                type MpTy = (MpFloat, MpFloat, MpFloat);
+                type MpTy = (MpFloat, MpFloat);
 
                 fn new_mp() -> Self::MpTy {
                     (
                         new_mpfloat::<Self::FTy>(),
                         new_mpfloat::<Self::FTy>(),
-                        new_mpfloat::<Self::FTy>()
                     )
                 }
 
                 fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet {
                     this.0.assign(input.0);
                     this.1.assign(input.1);
-                    let (ord, ql) = mpfr_remquo(&mut this.2, &this.0, &this.1, Nearest);
-
-                    // `remquo` integer results are sign-magnitude representation. Transfer the
-                    // sign bit from the long result to the int result.
-                    let clear = !(1 << (c_int::BITS - 1));
-                    let sign = ((ql >> (c_long::BITS - 1)) as i32) << (c_int::BITS - 1);
-                    let q = (ql as i32) & clear | sign;
-
-                    (prep_retval::<Self::FTy>(&mut this.2, ord), q)
+                    let (ord, q) = this.0.remainder_quo31_round(&this.1, Nearest);
+                    (prep_retval::<Self::FTy>(&mut this.0, ord), q)
                 }
             }
 
@@ -547,24 +536,3 @@ impl MpOp for crate::op::nextafterf::Routine {
         unimplemented!("nextafter does not yet have a MPFR operation");
     }
 }
-
-/// `rug` does not provide `remquo` so this exposes `mpfr_remquo`. See rug#76.
-fn mpfr_remquo(r: &mut MpFloat, x: &MpFloat, y: &MpFloat, round: Round) -> (Ordering, c_long) {
-    let r = r.as_raw_mut();
-    let x = x.as_raw();
-    let y = y.as_raw();
-    let mut q: c_long = 0;
-
-    let round = match round {
-        Round::Nearest => rnd_t::RNDN,
-        Round::Zero => rnd_t::RNDZ,
-        Round::Up => rnd_t::RNDU,
-        Round::Down => rnd_t::RNDD,
-        Round::AwayZero => rnd_t::RNDA,
-        _ => unreachable!(),
-    };
-
-    // SAFETY: mutable and const pointers are valid and do not alias, by Rust's rules.
-    let ord = unsafe { gmp_mpfr_sys::mpfr::remquo(r, &mut q, x, y, round) };
-    (ord.cmp(&0), q)
-}
diff --git a/crates/libm-test/src/precision.rs b/crates/libm-test/src/precision.rs
index bed615882..ffb322e38 100644
--- a/crates/libm-test/src/precision.rs
+++ b/crates/libm-test/src/precision.rs
@@ -405,15 +405,6 @@ impl MaybeOverride<(f32, f32)> for SpecialCase {
     ) -> CheckAction {
         binop_common(input, actual, expected, ctx)
     }
-
-    fn check_int<I: Int>(
-        _input: (f32, f32),
-        actual: I,
-        expected: I,
-        ctx: &CheckCtx,
-    ) -> CheckAction {
-        remquo_common(actual, expected, ctx)
-    }
 }
 
 impl MaybeOverride<(f64, f64)> for SpecialCase {
@@ -425,15 +416,6 @@ impl MaybeOverride<(f64, f64)> for SpecialCase {
     ) -> CheckAction {
         binop_common(input, actual, expected, ctx)
     }
-
-    fn check_int<I: Int>(
-        _input: (f64, f64),
-        actual: I,
-        expected: I,
-        ctx: &CheckCtx,
-    ) -> CheckAction {
-        remquo_common(actual, expected, ctx)
-    }
 }
 
 #[cfg(f128_enabled)]
@@ -496,19 +478,6 @@ fn binop_common<F1: Float, F2: Float>(
     DEFAULT
 }
 
-fn remquo_common<I: Int>(actual: I, expected: I, ctx: &CheckCtx) -> CheckAction {
-    // FIXME: Our MPFR implementation disagrees with musl and may need to be updated.
-    if ctx.basis == CheckBasis::Mpfr
-        && ctx.base_name == BaseName::Remquo
-        && expected == I::MIN
-        && actual == I::ZERO
-    {
-        return XFAIL("remquo integer mismatch");
-    }
-
-    DEFAULT
-}
-
 impl MaybeOverride<(i32, f32)> for SpecialCase {
     fn check_float<F: Float>(
         input: (i32, f32),

From a2074e5bb64d43e3376bc547ef83b0fd3099dc31 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Sat, 25 Jan 2025 04:18:44 +0000
Subject: [PATCH 191/279] Remove remnants of the `checked` feature

The Cargo feature `checked` was added in d166a30c1137 ("Overhaul tests")
and later removed in 5e0eca75fb14 ("swap stable to be unstable, checked
is now debug_assertions"). However, there are a few remaining uses of
`feature = "checked"` that did not get removed. Clean these up here.
---
 build.rs                                      | 19 +++++++------------
 .../compiler-builtins-smoke-test/Cargo.toml   |  1 -
 src/math/rem_pio2_large.rs                    |  5 +++--
 3 files changed, 10 insertions(+), 15 deletions(-)

diff --git a/build.rs b/build.rs
index ca4a639a1..caf5a108a 100644
--- a/build.rs
+++ b/build.rs
@@ -8,18 +8,13 @@ fn main() {
     println!("cargo:rerun-if-changed=build.rs");
     println!("cargo:rustc-check-cfg=cfg(assert_no_panic)");
 
-    println!("cargo:rustc-check-cfg=cfg(feature, values(\"checked\"))");
-
-    #[allow(unexpected_cfgs)]
-    if !cfg!(feature = "checked") {
-        let lvl = env::var("OPT_LEVEL").unwrap();
-        if lvl != "0" && !cfg!(debug_assertions) {
-            println!("cargo:rustc-cfg=assert_no_panic");
-        } else if env::var("ENSURE_NO_PANIC").is_ok() {
-            // Give us a defensive way of ensureing that no-panic is checked  when we
-            // expect it to be.
-            panic!("`assert_no_panic `was not enabled");
-        }
+    let lvl = env::var("OPT_LEVEL").unwrap();
+    if lvl != "0" && !cfg!(debug_assertions) {
+        println!("cargo:rustc-cfg=assert_no_panic");
+    } else if env::var("ENSURE_NO_PANIC").is_ok() {
+        // Give us a defensive way of ensureing that no-panic is checked  when we
+        // expect it to be.
+        panic!("`assert_no_panic `was not enabled");
     }
 
     configure::emit_libm_config(&cfg);
diff --git a/crates/compiler-builtins-smoke-test/Cargo.toml b/crates/compiler-builtins-smoke-test/Cargo.toml
index d578b0dcd..24b33645e 100644
--- a/crates/compiler-builtins-smoke-test/Cargo.toml
+++ b/crates/compiler-builtins-smoke-test/Cargo.toml
@@ -22,7 +22,6 @@ unexpected_cfgs = { level = "warn", check-cfg = [
   "cfg(arch_enabled)",
   "cfg(assert_no_panic)",
   "cfg(intrinsics_enabled)",
-  'cfg(feature, values("checked"))',
   'cfg(feature, values("force-soft-floats"))',
   'cfg(feature, values("unstable"))',
   'cfg(feature, values("unstable-intrinsics"))',
diff --git a/src/math/rem_pio2_large.rs b/src/math/rem_pio2_large.rs
index ec8397f4b..6d679bbe9 100644
--- a/src/math/rem_pio2_large.rs
+++ b/src/math/rem_pio2_large.rs
@@ -226,8 +226,9 @@ pub(crate) fn rem_pio2_large(x: &[f64], y: &mut [f64], e0: i32, prec: usize) ->
     let x1p24 = f64::from_bits(0x4170000000000000); // 0x1p24 === 2 ^ 24
     let x1p_24 = f64::from_bits(0x3e70000000000000); // 0x1p_24 === 2 ^ (-24)
 
-    #[cfg(all(target_pointer_width = "64", feature = "checked"))]
-    assert!(e0 <= 16360);
+    if cfg!(target_pointer_width = "64") {
+        debug_assert!(e0 <= 16360);
+    }
 
     let nx = x.len();
 

From b1e7ea093ecca1f866269c625975e0df1bbca46f Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Sat, 25 Jan 2025 05:22:11 +0000
Subject: [PATCH 192/279] Rework the available Cargo profiles

Currently the default release profile enables LTO and single CGU builds,
which is very slow to build. Most tests are better run with
optimizations enabled since it allows testing a much larger number of
inputs, so it is inconvenient that building can sometimes take
significantly longer than the tests.

Remedy this by doing the following:

* Move the existing `release` profile to `release-opt`.
* With the above, the default `release` profile is untouched (16 CGUs
  and thin local LTO).
* `release-checked` inherits `release`, so no LTO or single CGU.

This means that the simple `cargo test --release` becomes much faster
for local development. We are able to enable the other profiles as
needed in CI.

Tests should ideally still be run with `--profile release-checked` to
ensure there are no debug assetions or unexpected wrapping math hit.

`no-panic` still needs a single CGU, so must be run with `--profile
release-opt`. Since it is not possible to detect CGU or profilel
configuration from within build scripts, the `ENSURE_NO_PANIC`
environment variable must now always be set.
---
 Cargo.toml | 14 ++++++++------
 build.rs   |  8 ++------
 ci/run.sh  | 12 +++++++++++-
 3 files changed, 21 insertions(+), 13 deletions(-)

diff --git a/Cargo.toml b/Cargo.toml
index 7b6f9e1ce..08342a929 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -61,18 +61,20 @@ exclude = [
 [dev-dependencies]
 no-panic = "0.1.33"
 
-[profile.release]
-# Options for no-panic to correctly detect the lack of panics
-codegen-units = 1
-lto = "fat"
+# The default release profile is unchanged.
 
 # Release mode with debug assertions
 [profile.release-checked]
-codegen-units = 1
+inherits = "release"
 debug-assertions = true
+overflow-checks = true
+
+# Release with maximum optimizations, which is very slow to build. This is also
+# what is needed to check `no-panic`.
+[profile.release-opt]
 inherits = "release"
+codegen-units = 1
 lto = "fat"
-overflow-checks = true
 
 [profile.bench]
 # Required for iai-callgrind
diff --git a/build.rs b/build.rs
index caf5a108a..7042b54d7 100644
--- a/build.rs
+++ b/build.rs
@@ -8,13 +8,9 @@ fn main() {
     println!("cargo:rerun-if-changed=build.rs");
     println!("cargo:rustc-check-cfg=cfg(assert_no_panic)");
 
-    let lvl = env::var("OPT_LEVEL").unwrap();
-    if lvl != "0" && !cfg!(debug_assertions) {
+    // If set, enable `no-panic`. Requires LTO (`release-opt` profile).
+    if env::var("ENSURE_NO_PANIC").is_ok() {
         println!("cargo:rustc-cfg=assert_no_panic");
-    } else if env::var("ENSURE_NO_PANIC").is_ok() {
-        // Give us a defensive way of ensureing that no-panic is checked  when we
-        // expect it to be.
-        panic!("`assert_no_panic `was not enabled");
     }
 
     configure::emit_libm_config(&cfg);
diff --git a/ci/run.sh b/ci/run.sh
index 296986d97..a946d325e 100755
--- a/ci/run.sh
+++ b/ci/run.sh
@@ -117,4 +117,14 @@ $cmd "$profile" release-checked --features unstable-intrinsics
 $cmd "$profile" release-checked --features unstable-intrinsics --benches
 
 # Ensure that the routines do not panic.
-ENSURE_NO_PANIC=1 cargo build -p libm --target "$target" --no-default-features --release
+# 
+# `--tests` must be passed because no-panic is only enabled as a dev
+# dependency. The `release-opt` profile must be used to enable LTO and a
+# single CGU.
+ENSURE_NO_PANIC=1 cargo build \
+     -p libm \
+    --target "$target" \
+    --no-default-features \
+    --features unstable-float \
+    --tests \
+    --profile release-opt

From f5f789adec1c7d257ecb676acb4a225dbbb6484f Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Mon, 27 Jan 2025 11:37:01 +0000
Subject: [PATCH 193/279] Ignore specific `atan2` and `sin` tests on i586

There seems to be a case of unsoundness with the `i586` version of
`atan2`. For the following test:

    assert_eq!(atan2(2.0, -1.0), atan(2.0 / -1.0) + PI);atan2(2.0, -1.0)

The output is optimization-dependent. The new `release-checked` profile
produces the following failure:

    thread 'math::atan2::sanity_check' panicked at src/math/atan2.rs:123:5:
    assertion `left == right` failed
      left: 2.0344439357957027
     right: 2.0344439357957027

Similarly, `sin::test_near_pi` fails with the following:

    thread 'math::sin::test_near_pi' panicked at src/math/sin.rs:91:5:
    assertion `left == right` failed
      left: 6.273720864039203e-7
     right: 6.273720864039205e-7

Mark the tests ignored on `i586` for now.
---
 src/math/atan2.rs | 22 ++++++++++++++--------
 src/math/sin.rs   | 19 +++++++++++--------
 2 files changed, 25 insertions(+), 16 deletions(-)

diff --git a/src/math/atan2.rs b/src/math/atan2.rs
index b9bf0da93..c668731cf 100644
--- a/src/math/atan2.rs
+++ b/src/math/atan2.rs
@@ -114,12 +114,18 @@ pub fn atan2(y: f64, x: f64) -> f64 {
     }
 }
 
-#[test]
-fn sanity_check() {
-    assert_eq!(atan2(0.0, 1.0), 0.0);
-    assert_eq!(atan2(0.0, -1.0), PI);
-    assert_eq!(atan2(-0.0, -1.0), -PI);
-    assert_eq!(atan2(3.0, 2.0), atan(3.0 / 2.0));
-    assert_eq!(atan2(2.0, -1.0), atan(2.0 / -1.0) + PI);
-    assert_eq!(atan2(-2.0, -1.0), atan(-2.0 / -1.0) - PI);
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    #[cfg_attr(x86_no_sse, ignore = "FIXME(i586): possible incorrect rounding")]
+    fn sanity_check() {
+        assert_eq!(atan2(0.0, 1.0), 0.0);
+        assert_eq!(atan2(0.0, -1.0), PI);
+        assert_eq!(atan2(-0.0, -1.0), -PI);
+        assert_eq!(atan2(3.0, 2.0), atan(3.0 / 2.0));
+        assert_eq!(atan2(2.0, -1.0), atan(2.0 / -1.0) + PI);
+        assert_eq!(atan2(-2.0, -1.0), atan(-2.0 / -1.0) - PI);
+    }
 }
diff --git a/src/math/sin.rs b/src/math/sin.rs
index e04e0d6a0..229fa4bef 100644
--- a/src/math/sin.rs
+++ b/src/math/sin.rs
@@ -81,12 +81,15 @@ pub fn sin(x: f64) -> f64 {
     }
 }
 
-#[test]
-fn test_near_pi() {
-    let x = f64::from_bits(0x400921fb000FD5DD); // 3.141592026217707
-    let sx = f64::from_bits(0x3ea50d15ced1a4a2); // 6.273720864039205e-7
-    let result = sin(x);
-    #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))]
-    let result = force_eval!(result);
-    assert_eq!(result, sx);
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    #[cfg_attr(x86_no_sse, ignore = "FIXME(i586): possible incorrect rounding")]
+    fn test_near_pi() {
+        let x = f64::from_bits(0x400921fb000FD5DD); // 3.141592026217707
+        let sx = f64::from_bits(0x3ea50d15ced1a4a2); // 6.273720864039205e-7
+        assert_eq!(sin(x), sx);
+    }
 }

From 0c7362cb099d3ab8d1855c7464a062770142bb78 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Mon, 27 Jan 2025 23:25:00 +0000
Subject: [PATCH 194/279] Switch musl from a script download to a submodule

Rather than keeping a script that downloads the tarball, we can just add
musl as a submodule and let git handle the synchronizatoin. Do so here.
---
 .github/workflows/main.yaml   | 18 ++++++++----------
 .gitignore                    |  1 -
 .gitmodules                   |  4 ++++
 CONTRIBUTING.md               |  4 ++++
 ci/download-musl.sh           | 24 ------------------------
 crates/musl-math-sys/build.rs | 14 +++-----------
 crates/musl-math-sys/musl     |  1 +
 7 files changed, 20 insertions(+), 46 deletions(-)
 create mode 100644 .gitmodules
 delete mode 100755 ci/download-musl.sh
 create mode 160000 crates/musl-math-sys/musl

diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml
index 599552711..e03d7ecd3 100644
--- a/.github/workflows/main.yaml
+++ b/.github/workflows/main.yaml
@@ -79,6 +79,8 @@ jobs:
     - name: Print runner information
       run: uname -a
     - uses: actions/checkout@v4
+      with:
+        submodules: true
     - name: Install Rust (rustup)
       shell: bash
       run: |
@@ -94,10 +96,6 @@ jobs:
       with:
         key: ${{ matrix.target }}
 
-    - name: Download musl source
-      run: ./ci/download-musl.sh
-      shell: bash
-
     - name: Verify API list
       if: matrix.os == 'ubuntu-24.04'
       run: python3 etc/update-api-list.py --check
@@ -126,14 +124,14 @@ jobs:
     timeout-minutes: 10
     steps:
     - uses: actions/checkout@master
+      with:
+        submodules: true
     - name: Install Rust
       run: |
         rustup update nightly --no-self-update
         rustup default nightly
         rustup component add clippy
     - uses: Swatinem/rust-cache@v2
-    - name: Download musl source
-      run: ./ci/download-musl.sh
     - run: cargo clippy --all --all-features --all-targets
 
   builtins:
@@ -153,6 +151,8 @@ jobs:
     timeout-minutes: 20
     steps:
     - uses: actions/checkout@master
+      with:
+        submodules: true
     - uses: taiki-e/install-action@cargo-binstall
 
     - name: Set up dependencies
@@ -166,8 +166,6 @@ jobs:
         sudo apt-get install valgrind
 
     - uses: Swatinem/rust-cache@v2
-    - name: Download musl source
-      run: ./ci/download-musl.sh
 
     - name: Run icount benchmarks
       env:
@@ -259,13 +257,13 @@ jobs:
       CHANGED: ${{ matrix.changed }}
     steps:
       - uses: actions/checkout@v4
+        with:
+          submodules: true
       - name: Install Rust
         run: |
           rustup update nightly --no-self-update
           rustup default nightly
       - uses: Swatinem/rust-cache@v2
-      - name: Download musl source
-        run: ./ci/download-musl.sh
       - name: Run extensive tests
         run: |
           echo "Changed: '$CHANGED'"
diff --git a/.gitignore b/.gitignore
index a447c34cd..d5caba1a0 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,7 +4,6 @@
 /math/src
 target
 Cargo.lock
-musl/
 **.tar.gz
 
 # Benchmark cache
diff --git a/.gitmodules b/.gitmodules
new file mode 100644
index 000000000..35b269ead
--- /dev/null
+++ b/.gitmodules
@@ -0,0 +1,4 @@
+[submodule "musl"]
+	path = crates/musl-math-sys/musl
+	url = https://git.musl-libc.org/git/musl
+	shallow = true
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index ba7f78ca0..dc4006035 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -62,6 +62,10 @@ Check [PR #65] for an example.
 Normal tests can be executed with:
 
 ```sh
+# Tests against musl require that the submodule is up to date.
+git submodule init
+git submodule update
+
 # `--release` ables more test cases
 cargo test --release
 ```
diff --git a/ci/download-musl.sh b/ci/download-musl.sh
deleted file mode 100755
index 8a8c58550..000000000
--- a/ci/download-musl.sh
+++ /dev/null
@@ -1,24 +0,0 @@
-#!/bin/sh
-# Download the expected version of musl to a directory `musl`
-
-set -eux
-
-fname=musl-1.2.5.tar.gz
-sha=a9a118bbe84d8764da0ea0d28b3ab3fae8477fc7e4085d90102b8596fc7c75e4
-
-mkdir musl
-curl -L "https://musl.libc.org/releases/$fname" -O --retry 5
-
-case "$(uname -s)" in
-    MINGW*)
-        # Need to extract the second line because certutil does human output
-        fsha=$(certutil -hashfile "$fname" SHA256 | sed -n '2p')
-        [ "$sha" = "$fsha" ] || exit 1
-    ;;
-    *)
-        echo "$sha  $fname" | shasum -a 256 --check || exit 1
-    ;;
-esac
-
-tar -xzf "$fname" -C musl --strip-components 1
-rm "$fname"
diff --git a/crates/musl-math-sys/build.rs b/crates/musl-math-sys/build.rs
index d75748159..f06d84ee2 100644
--- a/crates/musl-math-sys/build.rs
+++ b/crates/musl-math-sys/build.rs
@@ -79,17 +79,12 @@ impl Config {
         let target_features = env::var("CARGO_CFG_TARGET_FEATURE")
             .map(|feats| feats.split(',').map(ToOwned::to_owned).collect())
             .unwrap_or_default();
-
-        // Default to the `{workspace_root}/musl` if not specified
-        let musl_dir = env::var("MUSL_SOURCE_DIR")
-            .map(PathBuf::from)
-            .unwrap_or_else(|_| manifest_dir.parent().unwrap().parent().unwrap().join("musl"));
+        let musl_dir = manifest_dir.join("musl");
 
         let target_arch = env::var("CARGO_CFG_TARGET_ARCH").unwrap();
         let musl_arch = if target_arch == "x86" { "i386".to_owned() } else { target_arch.clone() };
 
         println!("cargo::rerun-if-changed={}/c_patches", manifest_dir.display());
-        println!("cargo::rerun-if-env-changed=MUSL_SOURCE_DIR");
         println!("cargo::rerun-if-changed={}", musl_dir.display());
 
         Self {
@@ -111,13 +106,10 @@ impl Config {
 /// Build musl math symbols to a static library
 fn build_musl_math(cfg: &Config) {
     let musl_dir = &cfg.musl_dir;
-    assert!(
-        musl_dir.exists(),
-        "musl source is missing. it can be downloaded with ./ci/download-musl.sh"
-    );
-
     let math = musl_dir.join("src/math");
     let arch_dir = musl_dir.join("arch").join(&cfg.musl_arch);
+    assert!(math.exists(), "musl source not found. Is the submodule up to date?");
+
     let source_map = find_math_source(&math, cfg);
     let out_path = cfg.out_dir.join(format!("lib{LIB_NAME}.a"));
 
diff --git a/crates/musl-math-sys/musl b/crates/musl-math-sys/musl
new file mode 160000
index 000000000..0784374d5
--- /dev/null
+++ b/crates/musl-math-sys/musl
@@ -0,0 +1 @@
+Subproject commit 0784374d561435f7c787a555aeab8ede699ed298

From e901b3eba7e8387ae4f901fe417336f270b836ba Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Tue, 28 Jan 2025 10:31:33 +0000
Subject: [PATCH 195/279] Temporarily pin `indicatif` to 0.17.9

0.17.10 introduced a change that removes `Sync` from `ProgressStyle`,
which makes it more difficult to share in a callback. Pin the dependency
for now until we see if `indicatif` will change this back or if we need
to find a workaround.
---
 crates/libm-test/Cargo.toml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/crates/libm-test/Cargo.toml b/crates/libm-test/Cargo.toml
index dcbddb667..63e75260e 100644
--- a/crates/libm-test/Cargo.toml
+++ b/crates/libm-test/Cargo.toml
@@ -31,7 +31,8 @@ anyhow = "1.0.95"
 # This is not directly used but is required so we can enable `gmp-mpfr-sys/force-cross`.
 gmp-mpfr-sys = { version = "1.6.4", optional = true, default-features = false }
 iai-callgrind = { version = "0.14.0", optional = true }
-indicatif = { version = "0.17.9", default-features = false }
+# 0.17.10 made `ProgressStyle` non-`Sync`
+indicatif = { version = "=0.17.9", default-features = false }
 libm = { path = "../..", features = ["unstable-public-internals"] }
 libm-macros = { path = "../libm-macros" }
 musl-math-sys = { path = "../musl-math-sys", optional = true }

From a89add35ebda33250cfeec0fdd942ea43a758120 Mon Sep 17 00:00:00 2001
From: Trevor Gross <t.gross35@gmail.com>
Date: Tue, 28 Jan 2025 13:54:26 -0600
Subject: [PATCH 196/279] Revert "Temporarily pin `indicatif` to 0.17.9"

This reverts commit e901b3eba7e8387ae4f901fe417336f270b836ba.
---
 crates/libm-test/Cargo.toml | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/crates/libm-test/Cargo.toml b/crates/libm-test/Cargo.toml
index 63e75260e..dcbddb667 100644
--- a/crates/libm-test/Cargo.toml
+++ b/crates/libm-test/Cargo.toml
@@ -31,8 +31,7 @@ anyhow = "1.0.95"
 # This is not directly used but is required so we can enable `gmp-mpfr-sys/force-cross`.
 gmp-mpfr-sys = { version = "1.6.4", optional = true, default-features = false }
 iai-callgrind = { version = "0.14.0", optional = true }
-# 0.17.10 made `ProgressStyle` non-`Sync`
-indicatif = { version = "=0.17.9", default-features = false }
+indicatif = { version = "0.17.9", default-features = false }
 libm = { path = "../..", features = ["unstable-public-internals"] }
 libm-macros = { path = "../libm-macros" }
 musl-math-sys = { path = "../musl-math-sys", optional = true }

From 2f770cda0e85e12da5dc2ed9d250742cf8e42d02 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Tue, 28 Jan 2025 09:50:21 +0000
Subject: [PATCH 197/279] Support parsing NaN and infinities from the `hf*`
 functions

This isn't very useful for constants since the trait constants are
available, but does enable roundtripping via hex float syntax.
---
 crates/libm-test/src/f8_impl.rs |  6 ++++
 crates/libm-test/src/lib.rs     |  2 +-
 src/math/support/hex_float.rs   | 63 +++++++++++++++++++++++++++------
 src/math/support/mod.rs         |  2 +-
 4 files changed, 61 insertions(+), 12 deletions(-)

diff --git a/crates/libm-test/src/f8_impl.rs b/crates/libm-test/src/f8_impl.rs
index 96b783924..5dce9be18 100644
--- a/crates/libm-test/src/f8_impl.rs
+++ b/crates/libm-test/src/f8_impl.rs
@@ -3,6 +3,8 @@
 use std::cmp::{self, Ordering};
 use std::{fmt, ops};
 
+use libm::support::hex_float::parse_any;
+
 use crate::Float;
 
 /// Sometimes verifying float logic is easiest when all values can quickly be checked exhaustively
@@ -490,3 +492,7 @@ impl fmt::LowerHex for f8 {
         self.0.fmt(f)
     }
 }
+
+pub const fn hf8(s: &str) -> f8 {
+    f8(parse_any(s, 8, 3) as u8)
+}
diff --git a/crates/libm-test/src/lib.rs b/crates/libm-test/src/lib.rs
index 78b011b1f..d2fef2325 100644
--- a/crates/libm-test/src/lib.rs
+++ b/crates/libm-test/src/lib.rs
@@ -20,7 +20,7 @@ use std::path::PathBuf;
 use std::sync::LazyLock;
 use std::time::SystemTime;
 
-pub use f8_impl::f8;
+pub use f8_impl::{f8, hf8};
 pub use libm::support::{Float, Int, IntTy, MinInt};
 pub use num::{FloatExt, linear_ints, logspace};
 pub use op::{
diff --git a/src/math/support/hex_float.rs b/src/math/support/hex_float.rs
index 949f21a57..6eb1bd67a 100644
--- a/src/math/support/hex_float.rs
+++ b/src/math/support/hex_float.rs
@@ -26,17 +26,25 @@ pub const fn hf128(s: &str) -> f128 {
     f128::from_bits(parse_any(s, 128, 112))
 }
 
-const fn parse_any(s: &str, bits: u32, sig_bits: u32) -> u128 {
+/// Parse any float from hex to its bitwise representation.
+///
+/// `nan_repr` is passed rather than constructed so the platform-specific NaN is returned.
+pub const fn parse_any(s: &str, bits: u32, sig_bits: u32) -> u128 {
     let exp_bits: u32 = bits - sig_bits - 1;
     let max_msb: i32 = (1 << (exp_bits - 1)) - 1;
     // The exponent of one ULP in the subnormals
     let min_lsb: i32 = 1 - max_msb - sig_bits as i32;
 
-    let (neg, mut sig, exp) = parse_hex(s.as_bytes());
+    let exp_mask = ((1 << exp_bits) - 1) << sig_bits;
 
-    if sig == 0 {
-        return (neg as u128) << (bits - 1);
-    }
+    let (neg, mut sig, exp) = match parse_hex(s.as_bytes()) {
+        Parsed::Finite { neg, sig: 0, .. } => return (neg as u128) << (bits - 1),
+        Parsed::Finite { neg, sig, exp } => (neg, sig, exp),
+        Parsed::Infinite { neg } => return ((neg as u128) << (bits - 1)) | exp_mask,
+        Parsed::Nan { neg } => {
+            return ((neg as u128) << (bits - 1)) | exp_mask | 1 << (sig_bits - 1);
+        }
+    };
 
     // exponents of the least and most significant bits in the value
     let lsb = sig.trailing_zeros() as i32;
@@ -76,11 +84,24 @@ const fn parse_any(s: &str, bits: u32, sig_bits: u32) -> u128 {
     sig | ((neg as u128) << (bits - 1))
 }
 
+/// A parsed floating point number.
+enum Parsed {
+    /// Absolute value sig * 2^e
+    Finite {
+        neg: bool,
+        sig: u128,
+        exp: i32,
+    },
+    Infinite {
+        neg: bool,
+    },
+    Nan {
+        neg: bool,
+    },
+}
+
 /// Parse a hexadecimal float x
-/// returns (s,n,e):
-///     s == x.is_sign_negative()
-///     n * 2^e == x.abs()
-const fn parse_hex(mut b: &[u8]) -> (bool, u128, i32) {
+const fn parse_hex(mut b: &[u8]) -> Parsed {
     let mut neg = false;
     let mut sig: u128 = 0;
     let mut exp: i32 = 0;
@@ -90,6 +111,12 @@ const fn parse_hex(mut b: &[u8]) -> (bool, u128, i32) {
         neg = c == b'-';
     }
 
+    match *b {
+        [b'i' | b'I', b'n' | b'N', b'f' | b'F'] => return Parsed::Infinite { neg },
+        [b'n' | b'N', b'a' | b'A', b'n' | b'N'] => return Parsed::Nan { neg },
+        _ => (),
+    }
+
     if let &[b'0', b'x' | b'X', ref rest @ ..] = b {
         b = rest;
     } else {
@@ -152,7 +179,7 @@ const fn parse_hex(mut b: &[u8]) -> (bool, u128, i32) {
         exp += pexp;
     }
 
-    (neg, sig, exp)
+    Parsed::Finite { neg, sig, exp }
 }
 
 const fn dec_digit(c: u8) -> u8 {
@@ -272,6 +299,10 @@ mod tests {
                     ("-0x1.998p-4", (-0.1f16).to_bits()),
                     ("0x0.123p-12", 0x0123),
                     ("0x1p-24", 0x0001),
+                    ("nan", f16::NAN.to_bits()),
+                    ("-nan", (-f16::NAN).to_bits()),
+                    ("inf", f16::INFINITY.to_bits()),
+                    ("-inf", f16::NEG_INFINITY.to_bits()),
                 ];
                 for (s, exp) in checks {
                     println!("parsing {s}");
@@ -322,6 +353,10 @@ mod tests {
             ("0x1.111114p-127", 0x00444445),
             ("0x1.23456p-130", 0x00091a2b),
             ("0x1p-149", 0x00000001),
+            ("nan", f32::NAN.to_bits()),
+            ("-nan", (-f32::NAN).to_bits()),
+            ("inf", f32::INFINITY.to_bits()),
+            ("-inf", f32::NEG_INFINITY.to_bits()),
         ];
         for (s, exp) in checks {
             println!("parsing {s}");
@@ -360,6 +395,10 @@ mod tests {
             ("0x0.8000000000001p-1022", 0x0008000000000001),
             ("0x0.123456789abcdp-1022", 0x000123456789abcd),
             ("0x0.0000000000002p-1022", 0x0000000000000002),
+            ("nan", f64::NAN.to_bits()),
+            ("-nan", (-f64::NAN).to_bits()),
+            ("inf", f64::INFINITY.to_bits()),
+            ("-inf", f64::NEG_INFINITY.to_bits()),
         ];
         for (s, exp) in checks {
             println!("parsing {s}");
@@ -401,6 +440,10 @@ mod tests {
                     ("-0x1.999999999999999999999999999ap-4", (-0.1f128).to_bits()),
                     ("0x0.abcdef0123456789abcdef012345p-16382", 0x0000abcdef0123456789abcdef012345),
                     ("0x1p-16494", 0x00000000000000000000000000000001),
+                    ("nan", f128::NAN.to_bits()),
+                    ("-nan", (-f128::NAN).to_bits()),
+                    ("inf", f128::INFINITY.to_bits()),
+                    ("-inf", f128::NEG_INFINITY.to_bits()),
                 ];
                 for (s, exp) in checks {
                     println!("parsing {s}");
diff --git a/src/math/support/mod.rs b/src/math/support/mod.rs
index da9e2c9ed..b82a2ea05 100644
--- a/src/math/support/mod.rs
+++ b/src/math/support/mod.rs
@@ -2,7 +2,7 @@
 pub mod macros;
 mod big;
 mod float_traits;
-mod hex_float;
+pub mod hex_float;
 mod int_traits;
 
 #[allow(unused_imports)]

From 6d959375e25d4c0fd7df771f371d4c6ecf548f4d Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Tue, 28 Jan 2025 09:51:43 +0000
Subject: [PATCH 198/279] Introduce a wrapper type for IEEE hex float
 formatting

---
 src/math/support/hex_float.rs | 183 +++++++++++++++++++++++++++++++++-
 src/math/support/mod.rs       |   2 +-
 2 files changed, 181 insertions(+), 4 deletions(-)

diff --git a/src/math/support/hex_float.rs b/src/math/support/hex_float.rs
index 6eb1bd67a..da41622f2 100644
--- a/src/math/support/hex_float.rs
+++ b/src/math/support/hex_float.rs
@@ -2,7 +2,9 @@
 
 #![allow(dead_code)] // FIXME: remove once this gets used
 
-use super::{f32_from_bits, f64_from_bits};
+use core::fmt;
+
+use super::{Float, f32_from_bits, f64_from_bits};
 
 /// Construct a 16-bit float from hex float representation (C-style)
 #[cfg(f16_enabled)]
@@ -42,7 +44,7 @@ pub const fn parse_any(s: &str, bits: u32, sig_bits: u32) -> u128 {
         Parsed::Finite { neg, sig, exp } => (neg, sig, exp),
         Parsed::Infinite { neg } => return ((neg as u128) << (bits - 1)) | exp_mask,
         Parsed::Nan { neg } => {
-            return ((neg as u128) << (bits - 1)) | exp_mask | 1 << (sig_bits - 1);
+            return ((neg as u128) << (bits - 1)) | exp_mask | (1 << (sig_bits - 1));
         }
     };
 
@@ -206,8 +208,107 @@ const fn u128_ilog2(v: u128) -> u32 {
     u128::BITS - 1 - v.leading_zeros()
 }
 
+/// Format a floating point number as its IEEE hex (`%a`) representation.
+pub struct Hexf<F>(pub F);
+
+// Adapted from https://github.com/ericseppanen/hexfloat2/blob/a5c27932f0ff/src/format.rs
+fn fmt_any_hex<F: Float>(x: &F, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+    if x.is_sign_negative() {
+        write!(f, "-")?;
+    }
+
+    if x.is_nan() {
+        return write!(f, "NaN");
+    } else if x.is_infinite() {
+        return write!(f, "inf");
+    } else if *x == F::ZERO {
+        return write!(f, "0x0p+0");
+    }
+
+    let mut exponent = x.exp_unbiased();
+    let sig = x.to_bits() & F::SIG_MASK;
+
+    let bias = F::EXP_BIAS as i32;
+    // The mantissa MSB needs to be shifted up to the nearest nibble.
+    let mshift = (4 - (F::SIG_BITS % 4)) % 4;
+    let sig = sig << mshift;
+    // The width is rounded up to the nearest char (4 bits)
+    let mwidth = (F::SIG_BITS as usize + 3) / 4;
+    let leading = if exponent == -bias {
+        // subnormal number means we shift our output by 1 bit.
+        exponent += 1;
+        "0."
+    } else {
+        "1."
+    };
+
+    write!(f, "0x{leading}{sig:0mwidth$x}p{exponent:+}")
+}
+
+#[cfg(f16_enabled)]
+impl fmt::LowerHex for Hexf<f16> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        fmt_any_hex(&self.0, f)
+    }
+}
+
+impl fmt::LowerHex for Hexf<f32> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        fmt_any_hex(&self.0, f)
+    }
+}
+
+impl fmt::LowerHex for Hexf<f64> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        fmt_any_hex(&self.0, f)
+    }
+}
+
+#[cfg(f128_enabled)]
+impl fmt::LowerHex for Hexf<f128> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        fmt_any_hex(&self.0, f)
+    }
+}
+
+impl fmt::LowerHex for Hexf<i32> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        fmt::LowerHex::fmt(&self.0, f)
+    }
+}
+
+impl<T1, T2> fmt::LowerHex for Hexf<(T1, T2)>
+where
+    T1: Copy,
+    T2: Copy,
+    Hexf<T1>: fmt::LowerHex,
+    Hexf<T2>: fmt::LowerHex,
+{
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        write!(f, "({:x}, {:x})", Hexf(self.0.0), Hexf(self.0.1))
+    }
+}
+
+impl<T> fmt::Debug for Hexf<T>
+where
+    Hexf<T>: fmt::LowerHex,
+{
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        fmt::LowerHex::fmt(self, f)
+    }
+}
+
+impl<T> fmt::Display for Hexf<T>
+where
+    Hexf<T>: fmt::LowerHex,
+{
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        fmt::LowerHex::fmt(self, f)
+    }
+}
+
 #[cfg(test)]
-mod tests {
+mod parse_tests {
     extern crate std;
     use std::{format, println};
 
@@ -666,3 +767,79 @@ mod tests_panicking {
     #[cfg(f128_enabled)]
     f128_tests!();
 }
+
+#[cfg(test)]
+mod print_tests {
+    extern crate std;
+    use std::string::ToString;
+
+    use super::*;
+
+    #[test]
+    #[cfg(f16_enabled)]
+    fn test_f16() {
+        use std::format;
+        // Exhaustively check that `f16` roundtrips.
+        for x in 0..=u16::MAX {
+            let f = f16::from_bits(x);
+            let s = format!("{}", Hexf(f));
+            let from_s = hf16(&s);
+
+            if f.is_nan() && from_s.is_nan() {
+                continue;
+            }
+
+            assert_eq!(
+                f.to_bits(),
+                from_s.to_bits(),
+                "{f:?} formatted as {s} but parsed as {from_s:?}"
+            );
+        }
+    }
+
+    #[test]
+    fn spot_checks() {
+        assert_eq!(Hexf(f32::MAX).to_string(), "0x1.fffffep+127");
+        assert_eq!(Hexf(f64::MAX).to_string(), "0x1.fffffffffffffp+1023");
+
+        assert_eq!(Hexf(f32::MIN).to_string(), "-0x1.fffffep+127");
+        assert_eq!(Hexf(f64::MIN).to_string(), "-0x1.fffffffffffffp+1023");
+
+        assert_eq!(Hexf(f32::ZERO).to_string(), "0x0p+0");
+        assert_eq!(Hexf(f64::ZERO).to_string(), "0x0p+0");
+
+        assert_eq!(Hexf(f32::NEG_ZERO).to_string(), "-0x0p+0");
+        assert_eq!(Hexf(f64::NEG_ZERO).to_string(), "-0x0p+0");
+
+        assert_eq!(Hexf(f32::NAN).to_string(), "NaN");
+        assert_eq!(Hexf(f64::NAN).to_string(), "NaN");
+
+        assert_eq!(Hexf(f32::INFINITY).to_string(), "inf");
+        assert_eq!(Hexf(f64::INFINITY).to_string(), "inf");
+
+        assert_eq!(Hexf(f32::NEG_INFINITY).to_string(), "-inf");
+        assert_eq!(Hexf(f64::NEG_INFINITY).to_string(), "-inf");
+
+        #[cfg(f16_enabled)]
+        {
+            assert_eq!(Hexf(f16::MAX).to_string(), "0x1.ffcp+15");
+            assert_eq!(Hexf(f16::MIN).to_string(), "-0x1.ffcp+15");
+            assert_eq!(Hexf(f16::ZERO).to_string(), "0x0p+0");
+            assert_eq!(Hexf(f16::NEG_ZERO).to_string(), "-0x0p+0");
+            assert_eq!(Hexf(f16::NAN).to_string(), "NaN");
+            assert_eq!(Hexf(f16::INFINITY).to_string(), "inf");
+            assert_eq!(Hexf(f16::NEG_INFINITY).to_string(), "-inf");
+        }
+
+        #[cfg(f128_enabled)]
+        {
+            assert_eq!(Hexf(f128::MAX).to_string(), "0x1.ffffffffffffffffffffffffffffp+16383");
+            assert_eq!(Hexf(f128::MIN).to_string(), "-0x1.ffffffffffffffffffffffffffffp+16383");
+            assert_eq!(Hexf(f128::ZERO).to_string(), "0x0p+0");
+            assert_eq!(Hexf(f128::NEG_ZERO).to_string(), "-0x0p+0");
+            assert_eq!(Hexf(f128::NAN).to_string(), "NaN");
+            assert_eq!(Hexf(f128::INFINITY).to_string(), "inf");
+            assert_eq!(Hexf(f128::NEG_INFINITY).to_string(), "-inf");
+        }
+    }
+}
diff --git a/src/math/support/mod.rs b/src/math/support/mod.rs
index b82a2ea05..d471c5b70 100644
--- a/src/math/support/mod.rs
+++ b/src/math/support/mod.rs
@@ -13,7 +13,7 @@ pub use hex_float::hf16;
 #[cfg(f128_enabled)]
 pub use hex_float::hf128;
 #[allow(unused_imports)]
-pub use hex_float::{hf32, hf64};
+pub use hex_float::{Hexf, hf32, hf64};
 pub use int_traits::{CastFrom, CastInto, DInt, HInt, Int, MinInt};
 
 /// Hint to the compiler that the current path is cold.

From 151cd29dccae781b144eed517aa7684160f38139 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Tue, 28 Jan 2025 10:09:27 +0000
Subject: [PATCH 199/279] Util: also print the hex float format for outputs

---
 crates/util/src/main.rs | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/crates/util/src/main.rs b/crates/util/src/main.rs
index 6ea1be3d9..357df6b4f 100644
--- a/crates/util/src/main.rs
+++ b/crates/util/src/main.rs
@@ -8,7 +8,7 @@ use std::env;
 use std::num::ParseIntError;
 use std::str::FromStr;
 
-use libm::support::{hf32, hf64};
+use libm::support::{Hexf, hf32, hf64};
 #[cfg(feature = "build-mpfr")]
 use libm_test::mpfloat::MpOp;
 use libm_test::{MathOp, TupleCall};
@@ -73,7 +73,7 @@ macro_rules! handle_call {
                 }
                 _ => panic!("unrecognized or disabled basis '{}'", $basis),
             };
-            println!("{output:?}");
+            println!("{output:?} {:x}", Hexf(output));
             return;
         }
     };
@@ -303,6 +303,10 @@ impl FromStrRadix for i32 {
 #[cfg(f16_enabled)]
 impl FromStrRadix for f16 {
     fn from_str_radix(s: &str, radix: u32) -> Result<Self, ParseIntError> {
+        if radix == 16 && s.contains("p") {
+            return Ok(libm::support::hf16(s));
+        }
+
         let s = strip_radix_prefix(s, radix);
         u16::from_str_radix(s, radix).map(Self::from_bits)
     }
@@ -334,6 +338,9 @@ impl FromStrRadix for f64 {
 #[cfg(f128_enabled)]
 impl FromStrRadix for f128 {
     fn from_str_radix(s: &str, radix: u32) -> Result<Self, ParseIntError> {
+        if radix == 16 && s.contains("p") {
+            return Ok(libm::support::hf128(s));
+        }
         let s = strip_radix_prefix(s, radix);
         u128::from_str_radix(s, radix).map(Self::from_bits)
     }

From 45870ef2a5cf6e5d47f979dbbfdaca498001d2d1 Mon Sep 17 00:00:00 2001
From: Manish Goregaokar <manishsmail@gmail.com>
Date: Thu, 30 Jan 2025 13:06:41 -0800
Subject: [PATCH 200/279] Specify license as just MIT

Simplify the SPDX string to the user-facing version to make it easier for
users and tooling to understand. Contributions must still be `MIT OR Apache-2.0`.

[ add commit body with context - Trevor ]
---
 Cargo.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Cargo.toml b/Cargo.toml
index 08342a929..f24f4423c 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -4,7 +4,7 @@ categories = ["no-std"]
 description = "libm in pure Rust"
 documentation = "https://docs.rs/libm"
 keywords = ["libm", "math"]
-license = "MIT AND (MIT OR Apache-2.0)"
+license = "MIT"
 name = "libm"
 readme = "README.md"
 repository = "https://github.com/rust-lang/libm"

From f66e0f1100d9cc06fa61f084e5633e25fbe6c67a Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Fri, 31 Jan 2025 12:31:35 +0000
Subject: [PATCH 201/279] Rename `EXP_MAX` to `EXP_SAT`

`EXP_MAX` sounds like it would be the maximum value representable by
that float type's exponent, rather than the maximum unsigned value of
its bits. Clarify this by renaming to `EXP_SAT`, the "saturated"
exponent representation.
---
 src/math/generic/fmod.rs         |  2 +-
 src/math/generic/sqrt.rs         |  4 ++--
 src/math/support/float_traits.rs | 21 ++++++++++++---------
 3 files changed, 15 insertions(+), 12 deletions(-)

diff --git a/src/math/generic/fmod.rs b/src/math/generic/fmod.rs
index 93da6c51e..ca1cda383 100644
--- a/src/math/generic/fmod.rs
+++ b/src/math/generic/fmod.rs
@@ -13,7 +13,7 @@ pub fn fmod<F: Float>(x: F, y: F) -> F {
     let mut ey = y.exp().signed();
     let sx = ix & F::SIGN_MASK;
 
-    if iy << 1 == zero || y.is_nan() || ex == F::EXP_MAX as i32 {
+    if iy << 1 == zero || y.is_nan() || ex == F::EXP_SAT as i32 {
         return (x * y) / (x * y);
     }
 
diff --git a/src/math/generic/sqrt.rs b/src/math/generic/sqrt.rs
index c20c0f205..90d6c01e9 100644
--- a/src/math/generic/sqrt.rs
+++ b/src/math/generic/sqrt.rs
@@ -68,7 +68,7 @@ where
         (Exp::NoShift(()), special_case)
     } else {
         let top = u32::cast_from(ix >> F::SIG_BITS);
-        let special_case = top.wrapping_sub(1) >= F::EXP_MAX - 1;
+        let special_case = top.wrapping_sub(1) >= F::EXP_SAT - 1;
         (Exp::Shifted(top), special_case)
     };
 
@@ -119,7 +119,7 @@ where
             if even {
                 m_u2 >>= 1;
             }
-            e = (e.wrapping_add(F::EXP_MAX >> 1)) >> 1;
+            e = (e.wrapping_add(F::EXP_SAT >> 1)) >> 1;
             (m_u2, Exp::Shifted(e))
         }
         Exp::NoShift(()) => {
diff --git a/src/math/support/float_traits.rs b/src/math/support/float_traits.rs
index 57e4aebec..1fe2cb424 100644
--- a/src/math/support/float_traits.rs
+++ b/src/math/support/float_traits.rs
@@ -48,11 +48,14 @@ pub trait Float:
     /// The bitwidth of the exponent
     const EXP_BITS: u32 = Self::BITS - Self::SIG_BITS - 1;
 
-    /// The saturated value of the exponent (infinite representation), in the rightmost postiion.
-    const EXP_MAX: u32 = (1 << Self::EXP_BITS) - 1;
+    /// The saturated (maximum bitpattern) value of the exponent, i.e. the infinite
+    /// representation.
+    ///
+    /// This shifted fully right, use `EXP_MASK` for the shifted value.
+    const EXP_SAT: u32 = (1 << Self::EXP_BITS) - 1;
 
     /// The exponent bias value
-    const EXP_BIAS: u32 = Self::EXP_MAX >> 1;
+    const EXP_BIAS: u32 = Self::EXP_SAT >> 1;
 
     /// A mask for the sign bit
     const SIGN_MASK: Self::Int;
@@ -109,7 +112,7 @@ pub trait Float:
 
     /// Returns the exponent, not adjusting for bias, not accounting for subnormals or zero.
     fn exp(self) -> u32 {
-        u32::cast_from(self.to_bits() >> Self::SIG_BITS) & Self::EXP_MAX
+        u32::cast_from(self.to_bits() >> Self::SIG_BITS) & Self::EXP_SAT
     }
 
     /// Extract the exponent and adjust it for bias, not accounting for subnormals or zero.
@@ -135,7 +138,7 @@ pub trait Float:
         let sign = if negative { Self::Int::ONE } else { Self::Int::ZERO };
         Self::from_bits(
             (sign << (Self::BITS - 1))
-                | (Self::Int::cast_from(exponent & Self::EXP_MAX) << Self::SIG_BITS)
+                | (Self::Int::cast_from(exponent & Self::EXP_SAT) << Self::SIG_BITS)
                 | (significand & Self::SIG_MASK),
         )
     }
@@ -267,7 +270,7 @@ mod tests {
     #[cfg(f16_enabled)]
     fn check_f16() {
         // Constants
-        assert_eq!(f16::EXP_MAX, 0b11111);
+        assert_eq!(f16::EXP_SAT, 0b11111);
         assert_eq!(f16::EXP_BIAS, 15);
 
         // `exp_unbiased`
@@ -289,7 +292,7 @@ mod tests {
     #[test]
     fn check_f32() {
         // Constants
-        assert_eq!(f32::EXP_MAX, 0b11111111);
+        assert_eq!(f32::EXP_SAT, 0b11111111);
         assert_eq!(f32::EXP_BIAS, 127);
 
         // `exp_unbiased`
@@ -312,7 +315,7 @@ mod tests {
     #[test]
     fn check_f64() {
         // Constants
-        assert_eq!(f64::EXP_MAX, 0b11111111111);
+        assert_eq!(f64::EXP_SAT, 0b11111111111);
         assert_eq!(f64::EXP_BIAS, 1023);
 
         // `exp_unbiased`
@@ -336,7 +339,7 @@ mod tests {
     #[cfg(f128_enabled)]
     fn check_f128() {
         // Constants
-        assert_eq!(f128::EXP_MAX, 0b111111111111111);
+        assert_eq!(f128::EXP_SAT, 0b111111111111111);
         assert_eq!(f128::EXP_BIAS, 16383);
 
         // `exp_unbiased`

From ac0714ba65579b158fb5d49ddad6aae569d5ea47 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Thu, 30 Jan 2025 12:52:35 +0000
Subject: [PATCH 202/279] Fix hex float trait recursion problem

---
 src/math/support/hex_float.rs | 30 +++++-------------------------
 1 file changed, 5 insertions(+), 25 deletions(-)

diff --git a/src/math/support/hex_float.rs b/src/math/support/hex_float.rs
index da41622f2..ebc4f7c64 100644
--- a/src/math/support/hex_float.rs
+++ b/src/math/support/hex_float.rs
@@ -245,29 +245,21 @@ fn fmt_any_hex<F: Float>(x: &F, f: &mut fmt::Formatter<'_>) -> fmt::Result {
     write!(f, "0x{leading}{sig:0mwidth$x}p{exponent:+}")
 }
 
-#[cfg(f16_enabled)]
-impl fmt::LowerHex for Hexf<f16> {
+impl<F: Float> fmt::LowerHex for Hexf<F> {
     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
         fmt_any_hex(&self.0, f)
     }
 }
 
-impl fmt::LowerHex for Hexf<f32> {
+impl<F: Float> fmt::LowerHex for Hexf<(F, F)> {
     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        fmt_any_hex(&self.0, f)
-    }
-}
-
-impl fmt::LowerHex for Hexf<f64> {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        fmt_any_hex(&self.0, f)
+        write!(f, "({:x}, {:x})", Hexf(self.0.0), Hexf(self.0.1))
     }
 }
 
-#[cfg(f128_enabled)]
-impl fmt::LowerHex for Hexf<f128> {
+impl<F: Float> fmt::LowerHex for Hexf<(F, i32)> {
     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        fmt_any_hex(&self.0, f)
+        write!(f, "({:x}, {:x})", Hexf(self.0.0), Hexf(self.0.1))
     }
 }
 
@@ -277,18 +269,6 @@ impl fmt::LowerHex for Hexf<i32> {
     }
 }
 
-impl<T1, T2> fmt::LowerHex for Hexf<(T1, T2)>
-where
-    T1: Copy,
-    T2: Copy,
-    Hexf<T1>: fmt::LowerHex,
-    Hexf<T2>: fmt::LowerHex,
-{
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        write!(f, "({:x}, {:x})", Hexf(self.0.0), Hexf(self.0.1))
-    }
-}
-
 impl<T> fmt::Debug for Hexf<T>
 where
     Hexf<T>: fmt::LowerHex,

From 9c93e011fa110b8b183b20a32845230c7729c7e9 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Fri, 3 Jan 2025 04:34:21 +0000
Subject: [PATCH 203/279] Add `scalbnf16`, `scalbnf128`, `ldexpf16`, and
 `ldexpf128`

Use the generic `scalbn` to provide `f16` and `f128` versions, which
also work for `ldexp`.

This involves a new algorithm for `f16` because the default does not
converge fast enough with a limited number of rounds.
---
 crates/libm-macros/src/shared.rs             | 14 ++++
 crates/libm-test/benches/icount.rs           |  4 +
 crates/libm-test/benches/random.rs           |  4 +
 crates/libm-test/src/mpfloat.rs              | 61 +++++++-------
 crates/libm-test/src/precision.rs            |  4 +
 crates/libm-test/tests/compare_built_musl.rs |  4 +
 crates/util/src/main.rs                      |  4 +
 etc/function-definitions.json                | 26 ++++++
 etc/function-list.txt                        |  4 +
 src/math/generic/scalbn.rs                   | 85 +++++++++++++++++---
 src/math/ldexpf128.rs                        |  4 +
 src/math/ldexpf16.rs                         |  4 +
 src/math/mod.rs                              |  8 ++
 src/math/scalbnf128.rs                       |  4 +
 src/math/scalbnf16.rs                        |  4 +
 15 files changed, 195 insertions(+), 39 deletions(-)
 create mode 100644 src/math/ldexpf128.rs
 create mode 100644 src/math/ldexpf16.rs
 create mode 100644 src/math/scalbnf128.rs
 create mode 100644 src/math/scalbnf16.rs

diff --git a/crates/libm-macros/src/shared.rs b/crates/libm-macros/src/shared.rs
index b1f4f46cc..4fd0834f6 100644
--- a/crates/libm-macros/src/shared.rs
+++ b/crates/libm-macros/src/shared.rs
@@ -134,6 +134,13 @@ const ALL_OPERATIONS_NESTED: &[(FloatTy, Signature, Option<Signature>, &[&str])]
         None,
         &["jn", "yn"],
     ),
+    (
+        // `(f16, i32) -> f16`
+        FloatTy::F16,
+        Signature { args: &[Ty::F16, Ty::I32], returns: &[Ty::F16] },
+        None,
+        &["scalbnf16", "ldexpf16"],
+    ),
     (
         // `(f32, i32) -> f32`
         FloatTy::F32,
@@ -148,6 +155,13 @@ const ALL_OPERATIONS_NESTED: &[(FloatTy, Signature, Option<Signature>, &[&str])]
         None,
         &["scalbn", "ldexp"],
     ),
+    (
+        // `(f128, i32) -> f128`
+        FloatTy::F128,
+        Signature { args: &[Ty::F128, Ty::I32], returns: &[Ty::F128] },
+        None,
+        &["scalbnf128", "ldexpf128"],
+    ),
     (
         // `(f32, &mut f32) -> f32` as `(f32) -> (f32, f32)`
         FloatTy::F32,
diff --git a/crates/libm-test/benches/icount.rs b/crates/libm-test/benches/icount.rs
index d5026f461..13de799c7 100644
--- a/crates/libm-test/benches/icount.rs
+++ b/crates/libm-test/benches/icount.rs
@@ -131,6 +131,8 @@ main!(
     icount_bench_jn_group,
     icount_bench_jnf_group,
     icount_bench_ldexp_group,
+    icount_bench_ldexpf128_group,
+    icount_bench_ldexpf16_group,
     icount_bench_ldexpf_group,
     icount_bench_lgamma_group,
     icount_bench_lgamma_r_group,
@@ -163,6 +165,8 @@ main!(
     icount_bench_roundf16_group,
     icount_bench_roundf_group,
     icount_bench_scalbn_group,
+    icount_bench_scalbnf128_group,
+    icount_bench_scalbnf16_group,
     icount_bench_scalbnf_group,
     icount_bench_sin_group,
     icount_bench_sinf_group,
diff --git a/crates/libm-test/benches/random.rs b/crates/libm-test/benches/random.rs
index ca9e86c10..56d288c33 100644
--- a/crates/libm-test/benches/random.rs
+++ b/crates/libm-test/benches/random.rs
@@ -133,10 +133,14 @@ libm_macros::for_each_function! {
         | fminf16
         | fmodf128
         | fmodf16
+        | ldexpf128
+        | ldexpf16
         | rintf128
         | rintf16
         | roundf128
         | roundf16
+        | scalbnf128
+        | scalbnf16
         | sqrtf128
         | sqrtf16
         | truncf128
diff --git a/crates/libm-test/src/mpfloat.rs b/crates/libm-test/src/mpfloat.rs
index 3d84740cc..e3211b913 100644
--- a/crates/libm-test/src/mpfloat.rs
+++ b/crates/libm-test/src/mpfloat.rs
@@ -159,6 +159,8 @@ libm_macros::for_each_function! {
         jnf,
         ldexp,
         ldexpf,
+        ldexpf128,
+        ldexpf16,
         lgamma_r,
         lgammaf_r,
         modf,
@@ -178,6 +180,8 @@ libm_macros::for_each_function! {
         roundf16,
         scalbn,
         scalbnf,
+        scalbnf128,
+        scalbnf16,
         sincos,sincosf,
         trunc,
         truncf,
@@ -351,34 +355,6 @@ macro_rules! impl_op_for_ty {
                 }
             }
 
-            // `ldexp` and `scalbn` are the same for binary floating point, so just forward all
-            // methods.
-            impl MpOp for crate::op::[<ldexp $suffix>]::Routine {
-                type MpTy = <crate::op::[<scalbn $suffix>]::Routine as MpOp>::MpTy;
-
-                fn new_mp() -> Self::MpTy {
-                    <crate::op::[<scalbn $suffix>]::Routine as MpOp>::new_mp()
-                }
-
-                fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet {
-                    <crate::op::[<scalbn $suffix>]::Routine as MpOp>::run(this, input)
-                }
-            }
-
-            impl MpOp for crate::op::[<scalbn $suffix>]::Routine {
-                type MpTy = MpFloat;
-
-                fn new_mp() -> Self::MpTy {
-                    new_mpfloat::<Self::FTy>()
-                }
-
-                fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet {
-                    this.assign(input.0);
-                    *this <<= input.1;
-                    prep_retval::<Self::FTy>(this, Ordering::Equal)
-                }
-            }
-
             impl MpOp for crate::op::[<sincos $suffix>]::Routine {
                 type MpTy = (MpFloat, MpFloat);
 
@@ -464,6 +440,35 @@ macro_rules! impl_op_for_ty_all {
                     this.1.assign(input.1);
                     let ord = this.0.rem_assign_round(&this.1, Nearest);
                     prep_retval::<Self::RustRet>(&mut this.0, ord)
+
+                }
+            }
+
+            // `ldexp` and `scalbn` are the same for binary floating point, so just forward all
+            // methods.
+            impl MpOp for crate::op::[<ldexp $suffix>]::Routine {
+                type MpTy = <crate::op::[<scalbn $suffix>]::Routine as MpOp>::MpTy;
+
+                fn new_mp() -> Self::MpTy {
+                    <crate::op::[<scalbn $suffix>]::Routine as MpOp>::new_mp()
+                }
+
+                fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet {
+                    <crate::op::[<scalbn $suffix>]::Routine as MpOp>::run(this, input)
+                }
+            }
+
+            impl MpOp for crate::op::[<scalbn $suffix>]::Routine {
+                type MpTy = MpFloat;
+
+                fn new_mp() -> Self::MpTy {
+                    new_mpfloat::<Self::FTy>()
+                }
+
+                fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet {
+                    this.assign(input.0);
+                    *this <<= input.1;
+                    prep_retval::<Self::FTy>(this, Ordering::Equal)
                 }
             }
         }
diff --git a/crates/libm-test/src/precision.rs b/crates/libm-test/src/precision.rs
index ffb322e38..051960b7a 100644
--- a/crates/libm-test/src/precision.rs
+++ b/crates/libm-test/src/precision.rs
@@ -551,8 +551,12 @@ fn int_float_common<F1: Float, F2: Float>(
     DEFAULT
 }
 
+#[cfg(f16_enabled)]
+impl MaybeOverride<(f16, i32)> for SpecialCase {}
 impl MaybeOverride<(f32, i32)> for SpecialCase {}
 impl MaybeOverride<(f64, i32)> for SpecialCase {}
+#[cfg(f128_enabled)]
+impl MaybeOverride<(f128, i32)> for SpecialCase {}
 
 impl MaybeOverride<(f32, f32, f32)> for SpecialCase {
     fn check_float<F: Float>(
diff --git a/crates/libm-test/tests/compare_built_musl.rs b/crates/libm-test/tests/compare_built_musl.rs
index 5466edf4f..191c7e69d 100644
--- a/crates/libm-test/tests/compare_built_musl.rs
+++ b/crates/libm-test/tests/compare_built_musl.rs
@@ -95,10 +95,14 @@ libm_macros::for_each_function! {
         fminf16,
         fmodf128,
         fmodf16,
+        ldexpf128,
+        ldexpf16,
         rintf128,
         rintf16,
         roundf128,
         roundf16,
+        scalbnf128,
+        scalbnf16,
         sqrtf128,
         sqrtf16,
         truncf128,
diff --git a/crates/util/src/main.rs b/crates/util/src/main.rs
index 357df6b4f..e5d6f374a 100644
--- a/crates/util/src/main.rs
+++ b/crates/util/src/main.rs
@@ -102,10 +102,14 @@ fn do_eval(basis: &str, op: &str, inputs: &[&str]) {
             | fminf16
             | fmodf128
             | fmodf16
+            | ldexpf128
+            | ldexpf16
             | rintf128
             | rintf16
             | roundf128
             | roundf16
+            | scalbnf128
+            | scalbnf16
             | sqrtf128
             | sqrtf16
             | truncf128
diff --git a/etc/function-definitions.json b/etc/function-definitions.json
index 574ffea2e..e38dfd236 100644
--- a/etc/function-definitions.json
+++ b/etc/function-definitions.json
@@ -554,6 +554,18 @@
         ],
         "type": "f32"
     },
+    "ldexpf128": {
+        "sources": [
+            "src/math/ldexpf128.rs"
+        ],
+        "type": "f128"
+    },
+    "ldexpf16": {
+        "sources": [
+            "src/math/ldexpf16.rs"
+        ],
+        "type": "f16"
+    },
     "lgamma": {
         "sources": [
             "src/libm_helper.rs",
@@ -774,6 +786,20 @@
         ],
         "type": "f32"
     },
+    "scalbnf128": {
+        "sources": [
+            "src/math/generic/scalbn.rs",
+            "src/math/scalbnf128.rs"
+        ],
+        "type": "f128"
+    },
+    "scalbnf16": {
+        "sources": [
+            "src/math/generic/scalbn.rs",
+            "src/math/scalbnf16.rs"
+        ],
+        "type": "f16"
+    },
     "sin": {
         "sources": [
             "src/libm_helper.rs",
diff --git a/etc/function-list.txt b/etc/function-list.txt
index d82838b32..c92eaf9e2 100644
--- a/etc/function-list.txt
+++ b/etc/function-list.txt
@@ -79,6 +79,8 @@ jn
 jnf
 ldexp
 ldexpf
+ldexpf128
+ldexpf16
 lgamma
 lgamma_r
 lgammaf
@@ -111,6 +113,8 @@ roundf128
 roundf16
 scalbn
 scalbnf
+scalbnf128
+scalbnf16
 sin
 sincos
 sincosf
diff --git a/src/math/generic/scalbn.rs b/src/math/generic/scalbn.rs
index f036c15cc..f15cb75d6 100644
--- a/src/math/generic/scalbn.rs
+++ b/src/math/generic/scalbn.rs
@@ -31,16 +31,27 @@ where
     let exp_max: i32 = F::EXP_BIAS as i32;
     let exp_min = -(exp_max - 1);
 
-    // 2 ^ Emax, where Emax is the maximum biased exponent value (1023 for f64)
+    // 2 ^ Emax, maximum positive with null significand (0x1p1023 for f64)
     let f_exp_max = F::from_parts(false, F::EXP_BIAS << 1, zero);
 
-    // 2 ^ Emin, where Emin is the minimum biased exponent value (-1022 for f64)
+    // 2 ^ Emin, minimum positive normal with null significand (0x1p-1022 for f64)
     let f_exp_min = F::from_parts(false, 1, zero);
 
-    // 2 ^ sig_total_bits, representation of what can be accounted for with subnormals
-    let f_exp_subnorm = F::from_parts(false, sig_total_bits + F::EXP_BIAS, zero);
+    // 2 ^ sig_total_bits, moltiplier to normalize subnormals (0x1p53 for f64)
+    let f_pow_subnorm = F::from_parts(false, sig_total_bits + F::EXP_BIAS, zero);
+
+    /*
+     * The goal is to multiply `x` by a scale factor that applies `n`. However, there are cases
+     * where `2^n` is not representable by `F` but the result should be, e.g. `x = 2^Emin` with
+     * `n = -EMin + 2` (one out of range of 2^Emax). To get around this, reduce the magnitude of
+     * the final scale operation by prescaling by the max/min power representable by `F`.
+     */
 
     if n > exp_max {
+        // Worse case positive `n`: `x`  is the minimum subnormal value, the result is `F::MAX`.
+        // This can be reached by three scaling multiplications (two here and one final).
+        debug_assert!(-exp_min + F::SIG_BITS as i32 + exp_max <= exp_max * 3);
+
         x *= f_exp_max;
         n -= exp_max;
         if n > exp_max {
@@ -51,21 +62,61 @@ where
             }
         }
     } else if n < exp_min {
-        let mul = f_exp_min * f_exp_subnorm;
-        let add = (exp_max - 1) - sig_total_bits as i32;
+        // When scaling toward 0, the prescaling is limited to a value that does not allow `x` to
+        // go subnormal. This avoids double rounding.
+        if F::BITS > 16 {
+            // `mul` s.t. `!(x * mul).is_subnormal() ∀ x`
+            let mul = f_exp_min * f_pow_subnorm;
+            let add = -exp_min - sig_total_bits as i32;
+
+            // Worse case negative `n`: `x`  is the maximum positive value, the result is `F::MIN`.
+            // This must be reachable by three scaling multiplications (two here and one final).
+            debug_assert!(-exp_min + F::SIG_BITS as i32 + exp_max <= add * 2 + -exp_min);
 
-        x *= mul;
-        n += add;
-        if n < exp_min {
             x *= mul;
             n += add;
+
             if n < exp_min {
-                n = exp_min;
+                x *= mul;
+                n += add;
+
+                if n < exp_min {
+                    n = exp_min;
+                }
+            }
+        } else {
+            // `f16` is unique compared to other float types in that the difference between the
+            // minimum exponent and the significand bits (`add = -exp_min - sig_total_bits`) is
+            // small, only three. The above method depend on decrementing `n` by `add` two times;
+            // for other float types this works out because `add` is a substantial fraction of
+            // the exponent range. For `f16`, however, 3 is relatively small compared to the
+            // exponent range (which is 39), so that requires ~10 prescale rounds rather than two.
+            //
+            // Work aroudn this by using a different algorithm that calculates the prescale
+            // dynamically based on the maximum possible value. This adds more operations per round
+            // since it needs to construct the scale, but works better in the general case.
+            let add = -(n + sig_total_bits as i32).clamp(exp_min, sig_total_bits as i32);
+            let mul = F::from_parts(false, (F::EXP_BIAS as i32 - add) as u32, zero);
+
+            x *= mul;
+            n += add;
+
+            if n < exp_min {
+                let add = -(n + sig_total_bits as i32).clamp(exp_min, sig_total_bits as i32);
+                let mul = F::from_parts(false, (F::EXP_BIAS as i32 - add) as u32, zero);
+
+                x *= mul;
+                n += add;
+
+                if n < exp_min {
+                    n = exp_min;
+                }
             }
         }
     }
 
-    x * F::from_parts(false, (F::EXP_BIAS as i32 + n) as u32, zero)
+    let scale = F::from_parts(false, (F::EXP_BIAS as i32 + n) as u32, zero);
+    x * scale
 }
 
 #[cfg(test)]
@@ -111,6 +162,12 @@ mod tests {
         assert!(scalbn(-F::NAN, -10).is_nan());
     }
 
+    #[test]
+    #[cfg(f16_enabled)]
+    fn spec_test_f16() {
+        spec_test::<f16>();
+    }
+
     #[test]
     fn spec_test_f32() {
         spec_test::<f32>();
@@ -120,4 +177,10 @@ mod tests {
     fn spec_test_f64() {
         spec_test::<f64>();
     }
+
+    #[test]
+    #[cfg(f128_enabled)]
+    fn spec_test_f128() {
+        spec_test::<f128>();
+    }
 }
diff --git a/src/math/ldexpf128.rs b/src/math/ldexpf128.rs
new file mode 100644
index 000000000..b35277d15
--- /dev/null
+++ b/src/math/ldexpf128.rs
@@ -0,0 +1,4 @@
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn ldexpf128(x: f128, n: i32) -> f128 {
+    super::scalbnf128(x, n)
+}
diff --git a/src/math/ldexpf16.rs b/src/math/ldexpf16.rs
new file mode 100644
index 000000000..8de6cffd6
--- /dev/null
+++ b/src/math/ldexpf16.rs
@@ -0,0 +1,4 @@
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn ldexpf16(x: f16, n: i32) -> f16 {
+    super::scalbnf16(x, n)
+}
diff --git a/src/math/mod.rs b/src/math/mod.rs
index 969c1bfd9..9b07dc8a7 100644
--- a/src/math/mod.rs
+++ b/src/math/mod.rs
@@ -349,8 +349,10 @@ cfg_if! {
         mod fmaxf16;
         mod fminf16;
         mod fmodf16;
+        mod ldexpf16;
         mod rintf16;
         mod roundf16;
+        mod scalbnf16;
         mod sqrtf16;
         mod truncf16;
 
@@ -362,8 +364,10 @@ cfg_if! {
         pub use self::fmaxf16::fmaxf16;
         pub use self::fminf16::fminf16;
         pub use self::fmodf16::fmodf16;
+        pub use self::ldexpf16::ldexpf16;
         pub use self::rintf16::rintf16;
         pub use self::roundf16::roundf16;
+        pub use self::scalbnf16::scalbnf16;
         pub use self::sqrtf16::sqrtf16;
         pub use self::truncf16::truncf16;
     }
@@ -379,8 +383,10 @@ cfg_if! {
         mod fmaxf128;
         mod fminf128;
         mod fmodf128;
+        mod ldexpf128;
         mod rintf128;
         mod roundf128;
+        mod scalbnf128;
         mod sqrtf128;
         mod truncf128;
 
@@ -392,8 +398,10 @@ cfg_if! {
         pub use self::fmaxf128::fmaxf128;
         pub use self::fminf128::fminf128;
         pub use self::fmodf128::fmodf128;
+        pub use self::ldexpf128::ldexpf128;
         pub use self::rintf128::rintf128;
         pub use self::roundf128::roundf128;
+        pub use self::scalbnf128::scalbnf128;
         pub use self::sqrtf128::sqrtf128;
         pub use self::truncf128::truncf128;
     }
diff --git a/src/math/scalbnf128.rs b/src/math/scalbnf128.rs
new file mode 100644
index 000000000..c1d2b4855
--- /dev/null
+++ b/src/math/scalbnf128.rs
@@ -0,0 +1,4 @@
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn scalbnf128(x: f128, n: i32) -> f128 {
+    super::generic::scalbn(x, n)
+}
diff --git a/src/math/scalbnf16.rs b/src/math/scalbnf16.rs
new file mode 100644
index 000000000..2209e1a17
--- /dev/null
+++ b/src/math/scalbnf16.rs
@@ -0,0 +1,4 @@
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn scalbnf16(x: f16, n: i32) -> f16 {
+    super::generic::scalbn(x, n)
+}

From c3b13e566ab2c546a308f93990e8face1fdb9a97 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Wed, 5 Feb 2025 15:00:04 +0000
Subject: [PATCH 204/279] Add a check in the `shared.rs` that the function list
 is sorted

---
 crates/libm-macros/src/shared.rs | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/crates/libm-macros/src/shared.rs b/crates/libm-macros/src/shared.rs
index 4fd0834f6..da16cd8e2 100644
--- a/crates/libm-macros/src/shared.rs
+++ b/crates/libm-macros/src/shared.rs
@@ -18,7 +18,7 @@ const ALL_OPERATIONS_NESTED: &[(FloatTy, Signature, Option<Signature>, &[&str])]
         None,
         &[
             "acosf", "acoshf", "asinf", "asinhf", "atanf", "atanhf", "cbrtf", "ceilf", "cosf",
-            "coshf", "erff", "erfcf", "exp10f", "exp2f", "expf", "expm1f", "fabsf", "floorf",
+            "coshf", "erfcf", "erff", "exp10f", "exp2f", "expf", "expm1f", "fabsf", "floorf",
             "j0f", "j1f", "lgammaf", "log10f", "log1pf", "log2f", "logf", "rintf", "roundf",
             "sinf", "sinhf", "sqrtf", "tanf", "tanhf", "tgammaf", "truncf", "y0f", "y1f",
         ],
@@ -30,8 +30,8 @@ const ALL_OPERATIONS_NESTED: &[(FloatTy, Signature, Option<Signature>, &[&str])]
         None,
         &[
             "acos", "acosh", "asin", "asinh", "atan", "atanh", "cbrt", "ceil", "cos", "cosh",
-            "erf", "erfc", "exp10", "exp2", "exp", "expm1", "fabs", "floor", "j0", "j1", "lgamma",
-            "log10", "log1p", "log2", "log", "rint", "round", "sin", "sinh", "sqrt", "tan", "tanh",
+            "erf", "erfc", "exp", "exp10", "exp2", "expm1", "fabs", "floor", "j0", "j1", "lgamma",
+            "log", "log10", "log1p", "log2", "rint", "round", "sin", "sinh", "sqrt", "tan", "tanh",
             "tgamma", "trunc", "y0", "y1",
         ],
     ),
@@ -139,28 +139,28 @@ const ALL_OPERATIONS_NESTED: &[(FloatTy, Signature, Option<Signature>, &[&str])]
         FloatTy::F16,
         Signature { args: &[Ty::F16, Ty::I32], returns: &[Ty::F16] },
         None,
-        &["scalbnf16", "ldexpf16"],
+        &["ldexpf16", "scalbnf16"],
     ),
     (
         // `(f32, i32) -> f32`
         FloatTy::F32,
         Signature { args: &[Ty::F32, Ty::I32], returns: &[Ty::F32] },
         None,
-        &["scalbnf", "ldexpf"],
+        &["ldexpf", "scalbnf"],
     ),
     (
         // `(f64, i64) -> f64`
         FloatTy::F64,
         Signature { args: &[Ty::F64, Ty::I32], returns: &[Ty::F64] },
         None,
-        &["scalbn", "ldexp"],
+        &["ldexp", "scalbn"],
     ),
     (
         // `(f128, i32) -> f128`
         FloatTy::F128,
         Signature { args: &[Ty::F128, Ty::I32], returns: &[Ty::F128] },
         None,
-        &["scalbnf128", "ldexpf128"],
+        &["ldexpf128", "scalbnf128"],
     ),
     (
         // `(f32, &mut f32) -> f32` as `(f32) -> (f32, f32)`
@@ -312,6 +312,12 @@ pub static ALL_OPERATIONS: LazyLock<Vec<MathOpInfo>> = LazyLock::new(|| {
             };
             ret.push(api);
         }
+
+        if !names.is_sorted() {
+            let mut sorted = (*names).to_owned();
+            sorted.sort_unstable();
+            panic!("names list is not sorted: {names:?}\nExpected: {sorted:?}");
+        }
     }
 
     ret.sort_by_key(|item| item.name);

From a72828820fa7e3eabe2c66814ae4dd86002ebb02 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Wed, 5 Feb 2025 15:02:17 +0000
Subject: [PATCH 205/279] Do not add `libm_helper.rs` to the sources list

This is just a collection of all functions and should not trigger
extensive tests when changed.
---
 etc/function-definitions.json | 57 -----------------------------------
 etc/update-api-list.py        |  9 ++++++
 2 files changed, 9 insertions(+), 57 deletions(-)

diff --git a/etc/function-definitions.json b/etc/function-definitions.json
index e38dfd236..a1d3adf59 100644
--- a/etc/function-definitions.json
+++ b/etc/function-definitions.json
@@ -2,7 +2,6 @@
     "__comment": "Autogenerated by update-api-list.py. List of files that define a function with a given name. This file is checked in to make it obvious if refactoring breaks things",
     "acos": {
         "sources": [
-            "src/libm_helper.rs",
             "src/math/acos.rs"
         ],
         "type": "f64"
@@ -15,7 +14,6 @@
     },
     "acosh": {
         "sources": [
-            "src/libm_helper.rs",
             "src/math/acosh.rs"
         ],
         "type": "f64"
@@ -28,7 +26,6 @@
     },
     "asin": {
         "sources": [
-            "src/libm_helper.rs",
             "src/math/asin.rs"
         ],
         "type": "f64"
@@ -41,7 +38,6 @@
     },
     "asinh": {
         "sources": [
-            "src/libm_helper.rs",
             "src/math/asinh.rs"
         ],
         "type": "f64"
@@ -54,14 +50,12 @@
     },
     "atan": {
         "sources": [
-            "src/libm_helper.rs",
             "src/math/atan.rs"
         ],
         "type": "f64"
     },
     "atan2": {
         "sources": [
-            "src/libm_helper.rs",
             "src/math/atan2.rs"
         ],
         "type": "f64"
@@ -80,7 +74,6 @@
     },
     "atanh": {
         "sources": [
-            "src/libm_helper.rs",
             "src/math/atanh.rs"
         ],
         "type": "f64"
@@ -93,7 +86,6 @@
     },
     "cbrt": {
         "sources": [
-            "src/libm_helper.rs",
             "src/math/cbrt.rs"
         ],
         "type": "f64"
@@ -106,7 +98,6 @@
     },
     "ceil": {
         "sources": [
-            "src/libm_helper.rs",
             "src/math/arch/i586.rs",
             "src/math/arch/wasm32.rs",
             "src/math/ceil.rs",
@@ -138,7 +129,6 @@
     },
     "copysign": {
         "sources": [
-            "src/libm_helper.rs",
             "src/math/copysign.rs",
             "src/math/generic/copysign.rs",
             "src/math/support/float_traits.rs"
@@ -168,7 +158,6 @@
     },
     "cos": {
         "sources": [
-            "src/libm_helper.rs",
             "src/math/cos.rs"
         ],
         "type": "f64"
@@ -181,7 +170,6 @@
     },
     "cosh": {
         "sources": [
-            "src/libm_helper.rs",
             "src/math/cosh.rs"
         ],
         "type": "f64"
@@ -194,14 +182,12 @@
     },
     "erf": {
         "sources": [
-            "src/libm_helper.rs",
             "src/math/erf.rs"
         ],
         "type": "f64"
     },
     "erfc": {
         "sources": [
-            "src/libm_helper.rs",
             "src/math/erf.rs"
         ],
         "type": "f64"
@@ -220,7 +206,6 @@
     },
     "exp": {
         "sources": [
-            "src/libm_helper.rs",
             "src/math/exp.rs",
             "src/math/support/float_traits.rs"
         ],
@@ -228,7 +213,6 @@
     },
     "exp10": {
         "sources": [
-            "src/libm_helper.rs",
             "src/math/exp10.rs"
         ],
         "type": "f64"
@@ -241,7 +225,6 @@
     },
     "exp2": {
         "sources": [
-            "src/libm_helper.rs",
             "src/math/exp2.rs"
         ],
         "type": "f64"
@@ -260,7 +243,6 @@
     },
     "expm1": {
         "sources": [
-            "src/libm_helper.rs",
             "src/math/expm1.rs"
         ],
         "type": "f64"
@@ -273,7 +255,6 @@
     },
     "fabs": {
         "sources": [
-            "src/libm_helper.rs",
             "src/math/arch/wasm32.rs",
             "src/math/fabs.rs",
             "src/math/generic/fabs.rs"
@@ -304,7 +285,6 @@
     },
     "fdim": {
         "sources": [
-            "src/libm_helper.rs",
             "src/math/fdim.rs",
             "src/math/generic/fdim.rs"
         ],
@@ -333,7 +313,6 @@
     },
     "floor": {
         "sources": [
-            "src/libm_helper.rs",
             "src/math/arch/i586.rs",
             "src/math/arch/wasm32.rs",
             "src/math/floor.rs",
@@ -365,7 +344,6 @@
     },
     "fma": {
         "sources": [
-            "src/libm_helper.rs",
             "src/math/fma.rs"
         ],
         "type": "f64"
@@ -378,7 +356,6 @@
     },
     "fmax": {
         "sources": [
-            "src/libm_helper.rs",
             "src/math/fmax.rs",
             "src/math/generic/fmax.rs"
         ],
@@ -407,7 +384,6 @@
     },
     "fmin": {
         "sources": [
-            "src/libm_helper.rs",
             "src/math/fmin.rs",
             "src/math/generic/fmin.rs"
         ],
@@ -436,7 +412,6 @@
     },
     "fmod": {
         "sources": [
-            "src/libm_helper.rs",
             "src/math/fmod.rs",
             "src/math/generic/fmod.rs"
         ],
@@ -465,7 +440,6 @@
     },
     "frexp": {
         "sources": [
-            "src/libm_helper.rs",
             "src/math/frexp.rs"
         ],
         "type": "f64"
@@ -478,7 +452,6 @@
     },
     "hypot": {
         "sources": [
-            "src/libm_helper.rs",
             "src/math/hypot.rs"
         ],
         "type": "f64"
@@ -491,7 +464,6 @@
     },
     "ilogb": {
         "sources": [
-            "src/libm_helper.rs",
             "src/math/ilogb.rs"
         ],
         "type": "f64"
@@ -504,7 +476,6 @@
     },
     "j0": {
         "sources": [
-            "src/libm_helper.rs",
             "src/math/j0.rs"
         ],
         "type": "f64"
@@ -517,7 +488,6 @@
     },
     "j1": {
         "sources": [
-            "src/libm_helper.rs",
             "src/math/j1.rs"
         ],
         "type": "f64"
@@ -530,7 +500,6 @@
     },
     "jn": {
         "sources": [
-            "src/libm_helper.rs",
             "src/math/jn.rs"
         ],
         "type": "f64"
@@ -543,7 +512,6 @@
     },
     "ldexp": {
         "sources": [
-            "src/libm_helper.rs",
             "src/math/ldexp.rs"
         ],
         "type": "f64"
@@ -568,14 +536,12 @@
     },
     "lgamma": {
         "sources": [
-            "src/libm_helper.rs",
             "src/math/lgamma.rs"
         ],
         "type": "f64"
     },
     "lgamma_r": {
         "sources": [
-            "src/libm_helper.rs",
             "src/math/lgamma_r.rs"
         ],
         "type": "f64"
@@ -594,14 +560,12 @@
     },
     "log": {
         "sources": [
-            "src/libm_helper.rs",
             "src/math/log.rs"
         ],
         "type": "f64"
     },
     "log10": {
         "sources": [
-            "src/libm_helper.rs",
             "src/math/log10.rs"
         ],
         "type": "f64"
@@ -614,7 +578,6 @@
     },
     "log1p": {
         "sources": [
-            "src/libm_helper.rs",
             "src/math/log1p.rs"
         ],
         "type": "f64"
@@ -627,7 +590,6 @@
     },
     "log2": {
         "sources": [
-            "src/libm_helper.rs",
             "src/math/log2.rs"
         ],
         "type": "f64"
@@ -646,7 +608,6 @@
     },
     "modf": {
         "sources": [
-            "src/libm_helper.rs",
             "src/math/modf.rs"
         ],
         "type": "f64"
@@ -659,7 +620,6 @@
     },
     "nextafter": {
         "sources": [
-            "src/libm_helper.rs",
             "src/math/nextafter.rs"
         ],
         "type": "f64"
@@ -672,7 +632,6 @@
     },
     "pow": {
         "sources": [
-            "src/libm_helper.rs",
             "src/math/pow.rs"
         ],
         "type": "f64"
@@ -685,7 +644,6 @@
     },
     "remainder": {
         "sources": [
-            "src/libm_helper.rs",
             "src/math/remainder.rs"
         ],
         "type": "f64"
@@ -698,7 +656,6 @@
     },
     "remquo": {
         "sources": [
-            "src/libm_helper.rs",
             "src/math/remquo.rs"
         ],
         "type": "f64"
@@ -711,7 +668,6 @@
     },
     "rint": {
         "sources": [
-            "src/libm_helper.rs",
             "src/math/arch/aarch64.rs",
             "src/math/arch/wasm32.rs",
             "src/math/generic/rint.rs",
@@ -744,7 +700,6 @@
     },
     "round": {
         "sources": [
-            "src/libm_helper.rs",
             "src/math/generic/round.rs",
             "src/math/round.rs"
         ],
@@ -773,7 +728,6 @@
     },
     "scalbn": {
         "sources": [
-            "src/libm_helper.rs",
             "src/math/generic/scalbn.rs",
             "src/math/scalbn.rs"
         ],
@@ -802,14 +756,12 @@
     },
     "sin": {
         "sources": [
-            "src/libm_helper.rs",
             "src/math/sin.rs"
         ],
         "type": "f64"
     },
     "sincos": {
         "sources": [
-            "src/libm_helper.rs",
             "src/math/sincos.rs"
         ],
         "type": "f64"
@@ -828,7 +780,6 @@
     },
     "sinh": {
         "sources": [
-            "src/libm_helper.rs",
             "src/math/sinh.rs"
         ],
         "type": "f64"
@@ -841,7 +792,6 @@
     },
     "sqrt": {
         "sources": [
-            "src/libm_helper.rs",
             "src/math/arch/i686.rs",
             "src/math/arch/wasm32.rs",
             "src/math/generic/sqrt.rs",
@@ -874,7 +824,6 @@
     },
     "tan": {
         "sources": [
-            "src/libm_helper.rs",
             "src/math/tan.rs"
         ],
         "type": "f64"
@@ -887,7 +836,6 @@
     },
     "tanh": {
         "sources": [
-            "src/libm_helper.rs",
             "src/math/tanh.rs"
         ],
         "type": "f64"
@@ -900,7 +848,6 @@
     },
     "tgamma": {
         "sources": [
-            "src/libm_helper.rs",
             "src/math/tgamma.rs"
         ],
         "type": "f64"
@@ -913,7 +860,6 @@
     },
     "trunc": {
         "sources": [
-            "src/libm_helper.rs",
             "src/math/arch/wasm32.rs",
             "src/math/generic/trunc.rs",
             "src/math/trunc.rs"
@@ -944,7 +890,6 @@
     },
     "y0": {
         "sources": [
-            "src/libm_helper.rs",
             "src/math/j0.rs"
         ],
         "type": "f64"
@@ -957,7 +902,6 @@
     },
     "y1": {
         "sources": [
-            "src/libm_helper.rs",
             "src/math/j1.rs"
         ],
         "type": "f64"
@@ -970,7 +914,6 @@
     },
     "yn": {
         "sources": [
-            "src/libm_helper.rs",
             "src/math/jn.rs"
         ],
         "type": "f64"
diff --git a/etc/update-api-list.py b/etc/update-api-list.py
index 67f73e59c..54da13257 100755
--- a/etc/update-api-list.py
+++ b/etc/update-api-list.py
@@ -1,6 +1,9 @@
 #!/usr/bin/env python3
 """Create a text file listing all public API. This can be used to ensure that all
 functions are covered by our macros.
+
+This file additionally does tidy-esque checks that all functions are listed where
+needed, or that lists are sorted.
 """
 
 import difflib
@@ -15,6 +18,9 @@
 ETC_DIR = Path(__file__).parent
 ROOT_DIR = ETC_DIR.parent
 
+# These files do not trigger a retest.
+IGNORED_SOURCES = ["src/libm_helper.rs"]
+
 IndexTy: TypeAlias = dict[str, dict[str, Any]]
 """Type of the `index` item in rustdoc's JSON output"""
 
@@ -120,6 +126,9 @@ def _init_defs(self, index: IndexTy) -> None:
             for src in (s for s in base_sources if "generic" in s):
                 sources.add(src)
 
+            for src in IGNORED_SOURCES:
+                sources.discard(src)
+
         # Sort the set
         self.defs = {k: sorted(v) for (k, v) in defs.items()}
 

From ad83c9a5198c7a6a442f4493e5599cd7baae822d Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Wed, 5 Feb 2025 15:03:34 +0000
Subject: [PATCH 206/279] Add checks via annotation that lists are sorted or
 exhaustive

This crate has a handful of lists that need to list all API and can't
easily be verified. Additionally, some longer lists should be kept
sorted so they are easier to look through. Resolve both of these by
adding a check in `update-api-list.py` that looks for annotations and
verifies the contents are as expected.

Annotations are `verify-apilist-start`, `verify-apilist-end`,
`verify-sorted-start`, and `verify-sorted-end`.

This includes fixes for anything that did not meet the criteria.
---
 crates/libm-test/benches/icount.rs           |   9 +-
 crates/libm-test/src/mpfloat.rs              |   2 +
 crates/libm-test/tests/compare_built_musl.rs |   2 +
 etc/update-api-list.py                       | 143 +++++++++++++++++--
 src/libm_helper.rs                           |  44 +++++-
 src/math/mod.rs                              |   8 ++
 6 files changed, 187 insertions(+), 21 deletions(-)

diff --git a/crates/libm-test/benches/icount.rs b/crates/libm-test/benches/icount.rs
index 13de799c7..53ecb5a37 100644
--- a/crates/libm-test/benches/icount.rs
+++ b/crates/libm-test/benches/icount.rs
@@ -52,7 +52,10 @@ libm_macros::for_each_function! {
 }
 
 main!(
-    library_benchmark_groups = icount_bench_acos_group,
+    library_benchmark_groups =
+    // verify-apilist-start
+    // verify-sorted-start
+    icount_bench_acos_group,
     icount_bench_acosf_group,
     icount_bench_acosh_group,
     icount_bench_acoshf_group,
@@ -169,6 +172,8 @@ main!(
     icount_bench_scalbnf16_group,
     icount_bench_scalbnf_group,
     icount_bench_sin_group,
+    icount_bench_sincos_group,
+    icount_bench_sincosf_group,
     icount_bench_sinf_group,
     icount_bench_sinh_group,
     icount_bench_sinhf_group,
@@ -192,4 +197,6 @@ main!(
     icount_bench_y1f_group,
     icount_bench_yn_group,
     icount_bench_ynf_group,
+    // verify-sorted-end
+    // verify-apilist-end
 );
diff --git a/crates/libm-test/src/mpfloat.rs b/crates/libm-test/src/mpfloat.rs
index e3211b913..ab77d541c 100644
--- a/crates/libm-test/src/mpfloat.rs
+++ b/crates/libm-test/src/mpfloat.rs
@@ -132,6 +132,7 @@ libm_macros::for_each_function! {
     emit_types: [RustFn],
     skip: [
         // Most of these need a manual implementation
+        // verify-sorted-start
         ceil,
         ceilf,
         ceilf128,
@@ -188,6 +189,7 @@ libm_macros::for_each_function! {
         truncf128,
         truncf16,yn,
         ynf,
+        // verify-sorted-end
     ],
     fn_extra: match MACRO_FN_NAME {
         // Remap function names that are different between mpfr and libm
diff --git a/crates/libm-test/tests/compare_built_musl.rs b/crates/libm-test/tests/compare_built_musl.rs
index 191c7e69d..0b0a9f097 100644
--- a/crates/libm-test/tests/compare_built_musl.rs
+++ b/crates/libm-test/tests/compare_built_musl.rs
@@ -79,6 +79,7 @@ libm_macros::for_each_function! {
         ynf,
 
         // Not provided by musl
+        // verify-sorted-start
         ceilf128,
         ceilf16,
         copysignf128,
@@ -107,5 +108,6 @@ libm_macros::for_each_function! {
         sqrtf16,
         truncf128,
         truncf16,
+        // verify-sorted-end
     ],
 }
diff --git a/etc/update-api-list.py b/etc/update-api-list.py
index 54da13257..9cf625554 100755
--- a/etc/update-api-list.py
+++ b/etc/update-api-list.py
@@ -8,16 +8,21 @@
 
 import difflib
 import json
+import re
 import subprocess as sp
 import sys
 from dataclasses import dataclass
-from glob import glob
+from glob import glob, iglob
 from pathlib import Path
-from typing import Any, TypeAlias
+from typing import Any, Callable, TypeAlias
 
-ETC_DIR = Path(__file__).parent
+SELF_PATH = Path(__file__)
+ETC_DIR = SELF_PATH.parent
 ROOT_DIR = ETC_DIR.parent
 
+# Loose approximation of what gets checked in to git, without needing `git ls-files`.
+DIRECTORIES = [".github", "ci", "crates", "etc", "src"]
+
 # These files do not trigger a retest.
 IGNORED_SOURCES = ["src/libm_helper.rs"]
 
@@ -25,6 +30,11 @@
 """Type of the `index` item in rustdoc's JSON output"""
 
 
+def eprint(*args, **kwargs):
+    """Print to stderr."""
+    print(*args, file=sys.stderr, **kwargs)
+
+
 @dataclass
 class Crate:
     """Representation of public interfaces and function defintion locations in
@@ -146,7 +156,7 @@ def write_function_list(self, check: bool) -> None:
         if check:
             with open(out_file, "r") as f:
                 current = f.read()
-            diff_and_exit(current, output)
+            diff_and_exit(current, output, "function list")
         else:
             with open(out_file, "w") as f:
                 f.write(output)
@@ -171,18 +181,115 @@ def write_function_defs(self, check: bool) -> None:
         if check:
             with open(out_file, "r") as f:
                 current = f.read()
-            diff_and_exit(current, output)
+            diff_and_exit(current, output, "source list")
         else:
             with open(out_file, "w") as f:
                 f.write(output)
 
+    def tidy_lists(self) -> None:
+        """In each file, check annotations indicating blocks of code should be sorted or should
+        include all public API.
+        """
+        for dirname in DIRECTORIES:
+            dir = ROOT_DIR.joinpath(dirname)
+            for fname in iglob("**", root_dir=dir, recursive=True):
+                fpath = dir.joinpath(fname)
+                if fpath.is_dir() or fpath == SELF_PATH:
+                    continue
+
+                lines = fpath.read_text().splitlines()
+
+                validate_delimited_block(
+                    fpath,
+                    lines,
+                    "verify-sorted-start",
+                    "verify-sorted-end",
+                    ensure_sorted,
+                )
+
+                validate_delimited_block(
+                    fpath,
+                    lines,
+                    "verify-apilist-start",
+                    "verify-apilist-end",
+                    lambda p, n, lines: self.ensure_contains_api(p, n, lines),
+                )
+
+    def ensure_contains_api(self, fpath: Path, line_num: int, lines: list[str]):
+        """Given a list of strings, ensure that each public function we have is named
+        somewhere.
+        """
+        not_found = []
+        for func in self.public_functions:
+            # The function name may be on its own or somewhere in a snake case string.
+            pat = re.compile(rf"(\b|_){func}(\b|_)")
+            found = next((line for line in lines if pat.search(line)), None)
+
+            if found is None:
+                not_found.append(func)
+
+        if len(not_found) == 0:
+            return
+
+        relpath = fpath.relative_to(ROOT_DIR)
+        eprint(f"functions not found at {relpath}:{line_num}: {not_found}")
+        exit(1)
+
+
+def validate_delimited_block(
+    fpath: Path,
+    lines: list[str],
+    start: str,
+    end: str,
+    validate: Callable[[Path, int, list[str]], None],
+) -> None:
+    """Identify blocks of code wrapped within `start` and `end`, collect their contents
+    to a list of strings, and call `validate` for each of those lists.
+    """
+    relpath = fpath.relative_to(ROOT_DIR)
+    block_lines = []
+    block_start_line: None | int = None
+    for line_num, line in enumerate(lines):
+        line_num += 1
+
+        if start in line:
+            block_start_line = line_num
+            continue
+
+        if end in line:
+            if block_start_line is None:
+                eprint(f"`{end}` without `{start}` at {relpath}:{line_num}")
+                exit(1)
+
+            validate(fpath, block_start_line, block_lines)
+            block_lines = []
+            block_start_line = None
+            continue
+
+        if block_start_line is not None:
+            block_lines.append(line)
+
+    if block_start_line is not None:
+        eprint(f"`{start}` without `{end}` at {relpath}:{block_start_line}")
+        exit(1)
+
+
+def ensure_sorted(fpath: Path, block_start_line: int, lines: list[str]) -> None:
+    """Ensure that a list of lines is sorted, otherwise print a diff and exit."""
+    relpath = fpath.relative_to(ROOT_DIR)
+    diff_and_exit(
+        "".join(lines),
+        "".join(sorted(lines)),
+        f"sorted block at {relpath}:{block_start_line}",
+    )
 
-def diff_and_exit(actual: str, expected: str):
+
+def diff_and_exit(actual: str, expected: str, name: str):
     """If the two strings are different, print a diff between them and then exit
     with an error.
     """
     if actual == expected:
-        print("output matches expected; success")
+        print(f"{name} output matches expected; success")
         return
 
     a = [f"{line}\n" for line in actual.splitlines()]
@@ -190,7 +297,7 @@ def diff_and_exit(actual: str, expected: str):
 
     diff = difflib.unified_diff(a, b, "actual", "expected")
     sys.stdout.writelines(diff)
-    print("mismatched function list")
+    print(f"mismatched {name}")
     exit(1)
 
 
@@ -223,23 +330,31 @@ def base_name(name: str) -> tuple[str, str]:
     return (name, "f64")
 
 
+def ensure_updated_list(check: bool) -> None:
+    """Runner to update the function list and JSON, or check that it is already up
+    to date.
+    """
+    crate = Crate()
+    crate.write_function_list(check)
+    crate.write_function_defs(check)
+
+    if check:
+        crate.tidy_lists()
+
+
 def main():
     """By default overwrite the file. If `--check` is passed, print a diff instead and
     error if the files are different.
     """
     match sys.argv:
         case [_]:
-            check = False
+            ensure_updated_list(False)
         case [_, "--check"]:
-            check = True
+            ensure_updated_list(True)
         case _:
             print("unrecognized arguments")
             exit(1)
 
-    crate = Crate()
-    crate.write_function_list(check)
-    crate.write_function_defs(check)
-
 
 if __name__ == "__main__":
     main()
diff --git a/src/libm_helper.rs b/src/libm_helper.rs
index 73bae4567..0768839c7 100644
--- a/src/libm_helper.rs
+++ b/src/libm_helper.rs
@@ -44,9 +44,11 @@ macro_rules! libm_helper {
     };
 }
 
+// verify-apilist-start
 libm_helper! {
     f32,
     funcs: {
+        // verify-sorted-start
         (fn acos(x: f32) -> (f32);                  => acosf);
         (fn acosh(x: f32) -> (f32);                 => acoshf);
         (fn asin(x: f32) -> (f32);                  => asinf);
@@ -62,8 +64,8 @@ libm_helper! {
         (fn erf(x: f32) -> (f32);                   => erff);
         (fn erfc(x: f32) -> (f32);                  => erfcf);
         (fn exp(x: f32) -> (f32);                   => expf);
-        (fn exp2(x: f32) -> (f32);                  => exp2f);
         (fn exp10(x: f32) -> (f32);                 => exp10f);
+        (fn exp2(x: f32) -> (f32);                  => exp2f);
         (fn expm1(x: f32) -> (f32);                 => expm1f);
         (fn fabs(x: f32) -> (f32);                  => fabsf);
         (fn fdim(x: f32, y: f32) -> (f32);          => fdimf);
@@ -79,12 +81,12 @@ libm_helper! {
         (fn j1(x: f32) -> (f32);                    => j1f);
         (fn jn(n: i32, x: f32) -> (f32);            => jnf);
         (fn ldexp(x: f32, n: i32) -> (f32);         => ldexpf);
-        (fn lgamma_r(x: f32) -> (f32, i32);         => lgammaf_r);
         (fn lgamma(x: f32) -> (f32);                => lgammaf);
+        (fn lgamma_r(x: f32) -> (f32, i32);         => lgammaf_r);
         (fn log(x: f32) -> (f32);                   => logf);
+        (fn log10(x: f32) -> (f32);                 => log10f);
         (fn log1p(x: f32) -> (f32);                 => log1pf);
         (fn log2(x: f32) -> (f32);                  => log2f);
-        (fn log10(x: f32) -> (f32);                 => log10f);
         (fn modf(x: f32) -> (f32, f32);             => modff);
         (fn nextafter(x: f32, y: f32) -> (f32);     => nextafterf);
         (fn pow(x: f32, y: f32) -> (f32);           => powf);
@@ -104,12 +106,14 @@ libm_helper! {
         (fn y0(x: f32) -> (f32);                    => y0f);
         (fn y1(x: f32) -> (f32);                    => y1f);
         (fn yn(n: i32, x: f32) -> (f32);            => ynf);
+        // verify-sorted-end
     }
 }
 
 libm_helper! {
     f64,
     funcs: {
+        // verify-sorted-start
         (fn acos(x: f64) -> (f64);                  => acos);
         (fn acosh(x: f64) -> (f64);                 => acosh);
         (fn asin(x: f64) -> (f64);                  => asin);
@@ -125,8 +129,8 @@ libm_helper! {
         (fn erf(x: f64) -> (f64);                   => erf);
         (fn erfc(x: f64) -> (f64);                  => erfc);
         (fn exp(x: f64) -> (f64);                   => exp);
-        (fn exp2(x: f64) -> (f64);                  => exp2);
         (fn exp10(x: f64) -> (f64);                 => exp10);
+        (fn exp2(x: f64) -> (f64);                  => exp2);
         (fn expm1(x: f64) -> (f64);                 => expm1);
         (fn fabs(x: f64) -> (f64);                  => fabs);
         (fn fdim(x: f64, y: f64) -> (f64);          => fdim);
@@ -142,12 +146,12 @@ libm_helper! {
         (fn j1(x: f64) -> (f64);                    => j1);
         (fn jn(n: i32, x: f64) -> (f64);            => jn);
         (fn ldexp(x: f64, n: i32) -> (f64);         => ldexp);
-        (fn lgamma_r(x: f64) -> (f64, i32);         => lgamma_r);
         (fn lgamma(x: f64) -> (f64);                => lgamma);
+        (fn lgamma_r(x: f64) -> (f64, i32);         => lgamma_r);
         (fn log(x: f64) -> (f64);                   => log);
+        (fn log10(x: f64) -> (f64);                 => log10);
         (fn log1p(x: f64) -> (f64);                 => log1p);
         (fn log2(x: f64) -> (f64);                  => log2);
-        (fn log10(x: f64) -> (f64);                 => log10);
         (fn modf(x: f64) -> (f64, f64);             => modf);
         (fn nextafter(x: f64, y: f64) -> (f64);     => nextafter);
         (fn pow(x: f64, y: f64) -> (f64);           => pow);
@@ -167,6 +171,7 @@ libm_helper! {
         (fn y0(x: f64) -> (f64);                    => y0);
         (fn y1(x: f64) -> (f64);                    => y1);
         (fn yn(n: i32, x: f64) -> (f64);            => yn);
+        // verify-sorted-end
     }
 }
 
@@ -174,9 +179,22 @@ libm_helper! {
 libm_helper! {
     f16,
     funcs: {
+        // verify-sorted-start
+        (fn ceilf(x: f16) -> (f16);                 => ceilf16);
         (fn copysign(x: f16, y: f16) -> (f16);      => copysignf16);
         (fn fabs(x: f16) -> (f16);                  => fabsf16);
         (fn fdim(x: f16, y: f16) -> (f16);          => fdimf16);
+        (fn floorf(x: f16) -> (f16);                => floorf16);
+        (fn fmaxf(x: f16, y: f16) -> (f16);         => fmaxf16);
+        (fn fminf(x: f16, y: f16) -> (f16);         => fminf16);
+        (fn fmodf(x: f16, y: f16) -> (f16);         => fmodf16);
+        (fn ldexpf16(x: f16, n: i32) -> (f16);      => ldexpf16);
+        (fn rintf(x: f16) -> (f16);                 => rintf16);
+        (fn roundf(x: f16) -> (f16);                => roundf16);
+        (fn scalbnf16(x: f16, n: i32) -> (f16);     => ldexpf16);
+        (fn sqrtf(x: f16) -> (f16);                 => sqrtf16);
+        (fn truncf(x: f16) -> (f16);                => truncf16);
+        // verify-sorted-end
     }
 }
 
@@ -184,8 +202,22 @@ libm_helper! {
 libm_helper! {
     f128,
     funcs: {
+        // verify-sorted-start
+        (fn ceil(x: f128) -> (f128);                => ceilf128);
         (fn copysign(x: f128, y: f128) -> (f128);   => copysignf128);
         (fn fabs(x: f128) -> (f128);                => fabsf128);
         (fn fdim(x: f128, y: f128) -> (f128);       => fdimf128);
+        (fn floor(x: f128) -> (f128);               => floorf128);
+        (fn fmax(x: f128, y: f128) -> (f128);       => fmaxf128);
+        (fn fmin(x: f128, y: f128) -> (f128);       => fminf128);
+        (fn fmod(x: f128, y: f128) -> (f128);       => fmodf128);
+        (fn ldexpf128(x: f128, n: i32) -> (f128);   => ldexpf128);
+        (fn rint(x: f128) -> (f128);                => rintf128);
+        (fn round(x: f128) -> (f128);               => roundf128);
+        (fn scalbnf128(x: f128, n: i32) -> (f128);  => ldexpf128);
+        (fn sqrt(x: f128) -> (f128);                => sqrtf128);
+        (fn trunc(x: f128) -> (f128);               => truncf128);
+        // verify-sorted-end
     }
 }
+// verify-apilist-end
diff --git a/src/math/mod.rs b/src/math/mod.rs
index 9b07dc8a7..f0698ad02 100644
--- a/src/math/mod.rs
+++ b/src/math/mod.rs
@@ -341,6 +341,7 @@ pub use self::truncf::truncf;
 
 cfg_if! {
     if #[cfg(f16_enabled)] {
+        // verify-sorted-start
         mod ceilf16;
         mod copysignf16;
         mod fabsf16;
@@ -355,7 +356,9 @@ cfg_if! {
         mod scalbnf16;
         mod sqrtf16;
         mod truncf16;
+        // verify-sorted-end
 
+        // verify-sorted-start
         pub use self::ceilf16::ceilf16;
         pub use self::copysignf16::copysignf16;
         pub use self::fabsf16::fabsf16;
@@ -370,11 +373,13 @@ cfg_if! {
         pub use self::scalbnf16::scalbnf16;
         pub use self::sqrtf16::sqrtf16;
         pub use self::truncf16::truncf16;
+        // verify-sorted-end
     }
 }
 
 cfg_if! {
     if #[cfg(f128_enabled)] {
+        // verify-sorted-start
         mod ceilf128;
         mod copysignf128;
         mod fabsf128;
@@ -389,7 +394,9 @@ cfg_if! {
         mod scalbnf128;
         mod sqrtf128;
         mod truncf128;
+        // verify-sorted-end
 
+        // verify-sorted-start
         pub use self::ceilf128::ceilf128;
         pub use self::copysignf128::copysignf128;
         pub use self::fabsf128::fabsf128;
@@ -404,6 +411,7 @@ cfg_if! {
         pub use self::scalbnf128::scalbnf128;
         pub use self::sqrtf128::sqrtf128;
         pub use self::truncf128::truncf128;
+        // verify-sorted-end
     }
 }
 

From d6551262f4e83f9cb743e2dd264b18469602a0ef Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Thu, 23 Jan 2025 08:28:58 +0000
Subject: [PATCH 207/279] Start converting `fma` to a generic function

This is the first step toward making `fma` usable for `f128`, and
possibly `f32` on platforms where growing to `f64` is not fast. This
does not yet work for anything other than `f64`.
---
 etc/function-definitions.json    |   6 +-
 src/math/fma.rs                  | 192 +-------------------------
 src/math/generic/fma.rs          | 227 +++++++++++++++++++++++++++++++
 src/math/generic/mod.rs          |   2 +
 src/math/support/float_traits.rs |   4 +-
 src/math/support/int_traits.rs   |  39 ++++++
 6 files changed, 278 insertions(+), 192 deletions(-)
 create mode 100644 src/math/generic/fma.rs

diff --git a/etc/function-definitions.json b/etc/function-definitions.json
index a1d3adf59..243862075 100644
--- a/etc/function-definitions.json
+++ b/etc/function-definitions.json
@@ -344,13 +344,15 @@
     },
     "fma": {
         "sources": [
-            "src/math/fma.rs"
+            "src/math/fma.rs",
+            "src/math/generic/fma.rs"
         ],
         "type": "f64"
     },
     "fmaf": {
         "sources": [
-            "src/math/fmaf.rs"
+            "src/math/fmaf.rs",
+            "src/math/generic/fma.rs"
         ],
         "type": "f32"
     },
diff --git a/src/math/fma.rs b/src/math/fma.rs
index 826143d5a..69cc3eb67 100644
--- a/src/math/fma.rs
+++ b/src/math/fma.rs
@@ -1,195 +1,9 @@
-use core::{f32, f64};
-
-use super::scalbn;
-
-const ZEROINFNAN: i32 = 0x7ff - 0x3ff - 52 - 1;
-
-struct Num {
-    m: u64,
-    e: i32,
-    sign: i32,
-}
-
-fn normalize(x: f64) -> Num {
-    let x1p63: f64 = f64::from_bits(0x43e0000000000000); // 0x1p63 === 2 ^ 63
-
-    let mut ix: u64 = x.to_bits();
-    let mut e: i32 = (ix >> 52) as i32;
-    let sign: i32 = e & 0x800;
-    e &= 0x7ff;
-    if e == 0 {
-        ix = (x * x1p63).to_bits();
-        e = (ix >> 52) as i32 & 0x7ff;
-        e = if e != 0 { e - 63 } else { 0x800 };
-    }
-    ix &= (1 << 52) - 1;
-    ix |= 1 << 52;
-    ix <<= 1;
-    e -= 0x3ff + 52 + 1;
-    Num { m: ix, e, sign }
-}
-
-#[inline]
-fn mul(x: u64, y: u64) -> (u64, u64) {
-    let t = (x as u128).wrapping_mul(y as u128);
-    ((t >> 64) as u64, t as u64)
-}
-
-/// Floating multiply add (f64)
+/// Fused multiply add (f64)
 ///
-/// Computes `(x*y)+z`, rounded as one ternary operation:
-/// Computes the value (as if) to infinite precision and rounds once to the result format,
-/// according to the rounding mode characterized by the value of FLT_ROUNDS.
+/// Computes `(x*y)+z`, rounded as one ternary operation (i.e. calculated with infinite precision).
 #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
 pub fn fma(x: f64, y: f64, z: f64) -> f64 {
-    let x1p63: f64 = f64::from_bits(0x43e0000000000000); // 0x1p63 === 2 ^ 63
-    let x0_ffffff8p_63 = f64::from_bits(0x3bfffffff0000000); // 0x0.ffffff8p-63
-
-    /* normalize so top 10bits and last bit are 0 */
-    let nx = normalize(x);
-    let ny = normalize(y);
-    let nz = normalize(z);
-
-    if nx.e >= ZEROINFNAN || ny.e >= ZEROINFNAN {
-        return x * y + z;
-    }
-    if nz.e >= ZEROINFNAN {
-        if nz.e > ZEROINFNAN {
-            /* z==0 */
-            return x * y + z;
-        }
-        return z;
-    }
-
-    /* mul: r = x*y */
-    let zhi: u64;
-    let zlo: u64;
-    let (mut rhi, mut rlo) = mul(nx.m, ny.m);
-    /* either top 20 or 21 bits of rhi and last 2 bits of rlo are 0 */
-
-    /* align exponents */
-    let mut e: i32 = nx.e + ny.e;
-    let mut d: i32 = nz.e - e;
-    /* shift bits z<<=kz, r>>=kr, so kz+kr == d, set e = e+kr (== ez-kz) */
-    if d > 0 {
-        if d < 64 {
-            zlo = nz.m << d;
-            zhi = nz.m >> (64 - d);
-        } else {
-            zlo = 0;
-            zhi = nz.m;
-            e = nz.e - 64;
-            d -= 64;
-            if d == 0 {
-            } else if d < 64 {
-                rlo = (rhi << (64 - d)) | (rlo >> d) | ((rlo << (64 - d)) != 0) as u64;
-                rhi = rhi >> d;
-            } else {
-                rlo = 1;
-                rhi = 0;
-            }
-        }
-    } else {
-        zhi = 0;
-        d = -d;
-        if d == 0 {
-            zlo = nz.m;
-        } else if d < 64 {
-            zlo = (nz.m >> d) | ((nz.m << (64 - d)) != 0) as u64;
-        } else {
-            zlo = 1;
-        }
-    }
-
-    /* add */
-    let mut sign: i32 = nx.sign ^ ny.sign;
-    let samesign: bool = (sign ^ nz.sign) == 0;
-    let mut nonzero: i32 = 1;
-    if samesign {
-        /* r += z */
-        rlo = rlo.wrapping_add(zlo);
-        rhi += zhi + (rlo < zlo) as u64;
-    } else {
-        /* r -= z */
-        let (res, borrow) = rlo.overflowing_sub(zlo);
-        rlo = res;
-        rhi = rhi.wrapping_sub(zhi.wrapping_add(borrow as u64));
-        if (rhi >> 63) != 0 {
-            rlo = (rlo as i64).wrapping_neg() as u64;
-            rhi = (rhi as i64).wrapping_neg() as u64 - (rlo != 0) as u64;
-            sign = (sign == 0) as i32;
-        }
-        nonzero = (rhi != 0) as i32;
-    }
-
-    /* set rhi to top 63bit of the result (last bit is sticky) */
-    if nonzero != 0 {
-        e += 64;
-        d = rhi.leading_zeros() as i32 - 1;
-        /* note: d > 0 */
-        rhi = (rhi << d) | (rlo >> (64 - d)) | ((rlo << d) != 0) as u64;
-    } else if rlo != 0 {
-        d = rlo.leading_zeros() as i32 - 1;
-        if d < 0 {
-            rhi = (rlo >> 1) | (rlo & 1);
-        } else {
-            rhi = rlo << d;
-        }
-    } else {
-        /* exact +-0 */
-        return x * y + z;
-    }
-    e -= d;
-
-    /* convert to double */
-    let mut i: i64 = rhi as i64; /* i is in [1<<62,(1<<63)-1] */
-    if sign != 0 {
-        i = -i;
-    }
-    let mut r: f64 = i as f64; /* |r| is in [0x1p62,0x1p63] */
-
-    if e < -1022 - 62 {
-        /* result is subnormal before rounding */
-        if e == -1022 - 63 {
-            let mut c: f64 = x1p63;
-            if sign != 0 {
-                c = -c;
-            }
-            if r == c {
-                /* min normal after rounding, underflow depends
-                on arch behaviour which can be imitated by
-                a double to float conversion */
-                let fltmin: f32 = (x0_ffffff8p_63 * f32::MIN_POSITIVE as f64 * r) as f32;
-                return f64::MIN_POSITIVE / f32::MIN_POSITIVE as f64 * fltmin as f64;
-            }
-            /* one bit is lost when scaled, add another top bit to
-            only round once at conversion if it is inexact */
-            if (rhi << 53) != 0 {
-                i = ((rhi >> 1) | (rhi & 1) | (1 << 62)) as i64;
-                if sign != 0 {
-                    i = -i;
-                }
-                r = i as f64;
-                r = 2. * r - c; /* remove top bit */
-
-                /* raise underflow portably, such that it
-                cannot be optimized away */
-                {
-                    let tiny: f64 = f64::MIN_POSITIVE / f32::MIN_POSITIVE as f64 * r;
-                    r += (tiny * tiny) * (r - r);
-                }
-            }
-        } else {
-            /* only round once when scaled */
-            d = 10;
-            i = (((rhi >> d) | ((rhi << (64 - d)) != 0) as u64) << d) as i64;
-            if sign != 0 {
-                i = -i;
-            }
-            r = i as f64;
-        }
-    }
-    scalbn(r, e)
+    return super::generic::fma(x, y, z);
 }
 
 #[cfg(test)]
diff --git a/src/math/generic/fma.rs b/src/math/generic/fma.rs
new file mode 100644
index 000000000..3d5459f1a
--- /dev/null
+++ b/src/math/generic/fma.rs
@@ -0,0 +1,227 @@
+use core::{f32, f64};
+
+use super::super::support::{DInt, HInt, IntTy};
+use super::super::{CastFrom, CastInto, Float, Int, MinInt};
+
+const ZEROINFNAN: i32 = 0x7ff - 0x3ff - 52 - 1;
+
+/// Fused multiply-add that works when there is not a larger float size available. Currently this
+/// is still specialized only for `f64`. Computes `(x * y) + z`.
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn fma<F>(x: F, y: F, z: F) -> F
+where
+    F: Float + FmaHelper,
+    F: CastFrom<F::SignedInt>,
+    F: CastFrom<i8>,
+    F::Int: HInt,
+    u32: CastInto<F::Int>,
+{
+    let one = IntTy::<F>::ONE;
+    let zero = IntTy::<F>::ZERO;
+    let magic = F::from_parts(false, F::BITS - 1 + F::EXP_BIAS, zero);
+
+    /* normalize so top 10bits and last bit are 0 */
+    let nx = Norm::from_float(x);
+    let ny = Norm::from_float(y);
+    let nz = Norm::from_float(z);
+
+    if nx.e >= ZEROINFNAN || ny.e >= ZEROINFNAN {
+        return x * y + z;
+    }
+    if nz.e >= ZEROINFNAN {
+        if nz.e > ZEROINFNAN {
+            /* z==0 */
+            return x * y + z;
+        }
+        return z;
+    }
+
+    /* mul: r = x*y */
+    let zhi: F::Int;
+    let zlo: F::Int;
+    let (mut rlo, mut rhi) = nx.m.widen_mul(ny.m).lo_hi();
+
+    /* either top 20 or 21 bits of rhi and last 2 bits of rlo are 0 */
+
+    /* align exponents */
+    let mut e: i32 = nx.e + ny.e;
+    let mut d: i32 = nz.e - e;
+    let sbits = F::BITS as i32;
+
+    /* shift bits z<<=kz, r>>=kr, so kz+kr == d, set e = e+kr (== ez-kz) */
+    if d > 0 {
+        if d < sbits {
+            zlo = nz.m << d;
+            zhi = nz.m >> (sbits - d);
+        } else {
+            zlo = zero;
+            zhi = nz.m;
+            e = nz.e - sbits;
+            d -= sbits;
+            if d == 0 {
+            } else if d < sbits {
+                rlo = (rhi << (sbits - d))
+                    | (rlo >> d)
+                    | IntTy::<F>::from((rlo << (sbits - d)) != zero);
+                rhi = rhi >> d;
+            } else {
+                rlo = one;
+                rhi = zero;
+            }
+        }
+    } else {
+        zhi = zero;
+        d = -d;
+        if d == 0 {
+            zlo = nz.m;
+        } else if d < sbits {
+            zlo = (nz.m >> d) | IntTy::<F>::from((nz.m << (sbits - d)) != zero);
+        } else {
+            zlo = one;
+        }
+    }
+
+    /* add */
+    let mut neg = nx.neg ^ ny.neg;
+    let samesign: bool = !neg ^ nz.neg;
+    let mut nonzero: i32 = 1;
+    if samesign {
+        /* r += z */
+        rlo = rlo.wrapping_add(zlo);
+        rhi += zhi + IntTy::<F>::from(rlo < zlo);
+    } else {
+        /* r -= z */
+        let (res, borrow) = rlo.overflowing_sub(zlo);
+        rlo = res;
+        rhi = rhi.wrapping_sub(zhi.wrapping_add(IntTy::<F>::from(borrow)));
+        if (rhi >> (F::BITS - 1)) != zero {
+            rlo = rlo.signed().wrapping_neg().unsigned();
+            rhi = rhi.signed().wrapping_neg().unsigned() - IntTy::<F>::from(rlo != zero);
+            neg = !neg;
+        }
+        nonzero = (rhi != zero) as i32;
+    }
+
+    /* set rhi to top 63bit of the result (last bit is sticky) */
+    if nonzero != 0 {
+        e += sbits;
+        d = rhi.leading_zeros() as i32 - 1;
+        /* note: d > 0 */
+        rhi = (rhi << d) | (rlo >> (sbits - d)) | IntTy::<F>::from((rlo << d) != zero);
+    } else if rlo != zero {
+        d = rlo.leading_zeros() as i32 - 1;
+        if d < 0 {
+            rhi = (rlo >> 1) | (rlo & one);
+        } else {
+            rhi = rlo << d;
+        }
+    } else {
+        /* exact +-0 */
+        return x * y + z;
+    }
+    e -= d;
+
+    /* convert to double */
+    let mut i: F::SignedInt = rhi.signed(); /* i is in [1<<62,(1<<63)-1] */
+    if neg {
+        i = -i;
+    }
+
+    let mut r: F = F::cast_from_lossy(i); /* |r| is in [0x1p62,0x1p63] */
+
+    if e < -(F::EXP_BIAS as i32 - 1) - (sbits - 2) {
+        /* result is subnormal before rounding */
+        if e == -(F::EXP_BIAS as i32 - 1) - (sbits - 1) {
+            let mut c: F = magic;
+            if neg {
+                c = -c;
+            }
+            if r == c {
+                /* min normal after rounding, underflow depends
+                 * on arch behaviour which can be imitated by
+                 * a double to float conversion */
+                return r.raise_underflow();
+            }
+            /* one bit is lost when scaled, add another top bit to
+             * only round once at conversion if it is inexact */
+            if (rhi << F::SIG_BITS) != zero {
+                let iu: F::Int = (rhi >> 1) | (rhi & one) | (one << 62);
+                i = iu.signed();
+                if neg {
+                    i = -i;
+                }
+                r = F::cast_from_lossy(i);
+                r = F::cast_from(2i8) * r - c; /* remove top bit */
+
+                /* raise underflow portably, such that it
+                 * cannot be optimized away */
+                r += r.raise_underflow2();
+            }
+        } else {
+            /* only round once when scaled */
+            d = 10;
+            i = (((rhi >> d) | IntTy::<F>::from(rhi << (F::BITS as i32 - d) != zero)) << d)
+                .signed();
+            if neg {
+                i = -i;
+            }
+            r = F::cast_from(i);
+        }
+    }
+
+    super::scalbn(r, e)
+}
+
+/// Representation of `F` that has handled subnormals.
+struct Norm<F: Float> {
+    /// Normalized significand with one guard bit.
+    m: F::Int,
+    /// Unbiased exponent, normalized.
+    e: i32,
+    neg: bool,
+}
+
+impl<F: Float> Norm<F> {
+    fn from_float(x: F) -> Self {
+        let mut ix = x.to_bits();
+        let mut e = x.exp() as i32;
+        let neg = x.is_sign_negative();
+        if e == 0 {
+            // Normalize subnormals by multiplication
+            let magic = F::from_parts(false, F::BITS - 1 + F::EXP_BIAS, F::Int::ZERO);
+            let scaled = x * magic;
+            ix = scaled.to_bits();
+            e = scaled.exp() as i32;
+            e = if e != 0 { e - (F::BITS as i32 - 1) } else { 0x800 };
+        }
+
+        e -= F::EXP_BIAS as i32 + 52 + 1;
+
+        ix &= F::SIG_MASK;
+        ix |= F::IMPLICIT_BIT;
+        ix <<= 1; // add a guard bit
+
+        Self { m: ix, e, neg }
+    }
+}
+
+/// Type-specific helpers that are not needed outside of fma.
+pub trait FmaHelper {
+    fn raise_underflow(self) -> Self;
+    fn raise_underflow2(self) -> Self;
+}
+
+impl FmaHelper for f64 {
+    fn raise_underflow(self) -> Self {
+        let x0_ffffff8p_63 = f64::from_bits(0x3bfffffff0000000); // 0x0.ffffff8p-63
+        let fltmin: f32 = (x0_ffffff8p_63 * f32::MIN_POSITIVE as f64 * self) as f32;
+        f64::MIN_POSITIVE / f32::MIN_POSITIVE as f64 * fltmin as f64
+    }
+
+    fn raise_underflow2(self) -> Self {
+        /* raise underflow portably, such that it
+         * cannot be optimized away */
+        let tiny: f64 = f64::MIN_POSITIVE / f32::MIN_POSITIVE as f64 * self;
+        (tiny * tiny) * (self - self)
+    }
+}
diff --git a/src/math/generic/mod.rs b/src/math/generic/mod.rs
index 68686b0b2..e19cc83a9 100644
--- a/src/math/generic/mod.rs
+++ b/src/math/generic/mod.rs
@@ -3,6 +3,7 @@ mod copysign;
 mod fabs;
 mod fdim;
 mod floor;
+mod fma;
 mod fmax;
 mod fmin;
 mod fmod;
@@ -17,6 +18,7 @@ pub use copysign::copysign;
 pub use fabs::fabs;
 pub use fdim::fdim;
 pub use floor::floor;
+pub use fma::fma;
 pub use fmax::fmax;
 pub use fmin::fmin;
 pub use fmod::fmod;
diff --git a/src/math/support/float_traits.rs b/src/math/support/float_traits.rs
index 1fe2cb424..24cf7d4b0 100644
--- a/src/math/support/float_traits.rs
+++ b/src/math/support/float_traits.rs
@@ -23,7 +23,9 @@ pub trait Float:
     type Int: Int<OtherSign = Self::SignedInt, Unsigned = Self::Int>;
 
     /// A int of the same width as the float
-    type SignedInt: Int + MinInt<OtherSign = Self::Int, Unsigned = Self::Int>;
+    type SignedInt: Int
+        + MinInt<OtherSign = Self::Int, Unsigned = Self::Int>
+        + ops::Neg<Output = Self::SignedInt>;
 
     const ZERO: Self;
     const NEG_ZERO: Self;
diff --git a/src/math/support/int_traits.rs b/src/math/support/int_traits.rs
index b403c658c..793a0f306 100644
--- a/src/math/support/int_traits.rs
+++ b/src/math/support/int_traits.rs
@@ -52,10 +52,14 @@ pub trait Int:
     + ops::Sub<Output = Self>
     + ops::Mul<Output = Self>
     + ops::Div<Output = Self>
+    + ops::Shl<i32, Output = Self>
+    + ops::Shl<u32, Output = Self>
+    + ops::Shr<i32, Output = Self>
     + ops::Shr<u32, Output = Self>
     + ops::BitXor<Output = Self>
     + ops::BitAnd<Output = Self>
     + cmp::Ord
+    + From<bool>
     + CastFrom<i32>
     + CastFrom<u16>
     + CastFrom<u32>
@@ -92,6 +96,7 @@ pub trait Int:
     fn wrapping_shr(self, other: u32) -> Self;
     fn rotate_left(self, other: u32) -> Self;
     fn overflowing_add(self, other: Self) -> (Self, bool);
+    fn overflowing_sub(self, other: Self) -> (Self, bool);
     fn leading_zeros(self) -> u32;
     fn ilog2(self) -> u32;
 }
@@ -150,6 +155,10 @@ macro_rules! int_impl_common {
             <Self>::overflowing_add(self, other)
         }
 
+        fn overflowing_sub(self, other: Self) -> (Self, bool) {
+            <Self>::overflowing_sub(self, other)
+        }
+
         fn leading_zeros(self) -> u32 {
             <Self>::leading_zeros(self)
         }
@@ -399,6 +408,30 @@ macro_rules! cast_into {
     )*};
 }
 
+macro_rules! cast_into_float {
+    ($ty:ty) => {
+        #[cfg(f16_enabled)]
+        cast_into_float!($ty; f16);
+
+        cast_into_float!($ty; f32, f64);
+
+        #[cfg(f128_enabled)]
+        cast_into_float!($ty; f128);
+    };
+    ($ty:ty; $($into:ty),*) => {$(
+        impl CastInto<$into> for $ty {
+            fn cast(self) -> $into {
+                debug_assert_eq!(self as $into as $ty, self, "inexact float cast");
+                self as $into
+            }
+
+            fn cast_lossy(self) -> $into {
+                self as $into
+            }
+        }
+    )*};
+}
+
 cast_into!(usize);
 cast_into!(isize);
 cast_into!(u8);
@@ -411,3 +444,9 @@ cast_into!(u64);
 cast_into!(i64);
 cast_into!(u128);
 cast_into!(i128);
+
+cast_into_float!(i8);
+cast_into_float!(i16);
+cast_into_float!(i32);
+cast_into_float!(i64);
+cast_into_float!(i128);

From fd7592ec1d0da229812613dc3a377b46baa16d66 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Wed, 5 Feb 2025 21:18:33 +0000
Subject: [PATCH 208/279] Commonize the signature for all instances of
 `get_test_cases`

In order to make these more interchangeable in more places, always
return `(impl Iterator, u64)`. This will facilitate using other
generators for extensive tests.
---
 crates/libm-test/benches/random.rs           |  2 +-
 crates/libm-test/examples/plot_domains.rs    |  8 ++++-
 crates/libm-test/src/gen/edge_cases.rs       | 29 ++++++++++--------
 crates/libm-test/src/gen/random.rs           | 31 +++++++++++---------
 crates/libm-test/tests/compare_built_musl.rs |  4 +--
 crates/libm-test/tests/multiprecision.rs     |  4 +--
 6 files changed, 45 insertions(+), 33 deletions(-)

diff --git a/crates/libm-test/benches/random.rs b/crates/libm-test/benches/random.rs
index 56d288c33..66486a56a 100644
--- a/crates/libm-test/benches/random.rs
+++ b/crates/libm-test/benches/random.rs
@@ -54,7 +54,7 @@ where
 
     let ctx = CheckCtx::new(Op::IDENTIFIER, CheckBasis::Musl, GeneratorKind::Random);
     let benchvec: Vec<_> =
-        random::get_test_cases::<Op::RustArgs>(&ctx).take(BENCH_ITER_ITEMS).collect();
+        random::get_test_cases::<Op::RustArgs>(&ctx).0.take(BENCH_ITER_ITEMS).collect();
 
     // Perform a sanity check that we are benchmarking the same thing
     // Don't test against musl if it is not available
diff --git a/crates/libm-test/examples/plot_domains.rs b/crates/libm-test/examples/plot_domains.rs
index fb7b854df..441889c69 100644
--- a/crates/libm-test/examples/plot_domains.rs
+++ b/crates/libm-test/examples/plot_domains.rs
@@ -58,7 +58,13 @@ where
     let mut ctx = CheckCtx::new(Op::IDENTIFIER, CheckBasis::Mpfr, GeneratorKind::QuickSpaced);
     plot_one_generator(out_dir, &ctx, "logspace", config, spaced::get_test_cases::<Op>(&ctx).0);
     ctx.gen_kind = GeneratorKind::EdgeCases;
-    plot_one_generator(out_dir, &ctx, "edge_cases", config, edge_cases::get_test_cases::<Op>(&ctx));
+    plot_one_generator(
+        out_dir,
+        &ctx,
+        "edge_cases",
+        config,
+        edge_cases::get_test_cases::<Op>(&ctx).0,
+    );
 }
 
 /// Plot the output of a single generator.
diff --git a/crates/libm-test/src/gen/edge_cases.rs b/crates/libm-test/src/gen/edge_cases.rs
index d4014bdb3..8de954ae3 100644
--- a/crates/libm-test/src/gen/edge_cases.rs
+++ b/crates/libm-test/src/gen/edge_cases.rs
@@ -9,7 +9,7 @@ use crate::{CheckCtx, FloatExt, MathOp, test_log};
 
 /// Generate a sequence of edge cases, e.g. numbers near zeroes and infiniteis.
 pub trait EdgeCaseInput<Op> {
-    fn get_cases(ctx: &CheckCtx) -> impl ExactSizeIterator<Item = Self> + Send;
+    fn get_cases(ctx: &CheckCtx) -> (impl Iterator<Item = Self> + Send, u64);
 }
 
 /// Create a list of values around interesting points (infinities, zeroes, NaNs).
@@ -140,10 +140,10 @@ macro_rules! impl_edge_case_input {
         where
             Op: MathOp<RustArgs = Self, FTy = $fty>,
         {
-            fn get_cases(ctx: &CheckCtx) -> impl ExactSizeIterator<Item = Self> {
+            fn get_cases(ctx: &CheckCtx) -> (impl Iterator<Item = Self>, u64) {
                 let (iter0, steps0) = float_edge_cases::<Op>(ctx, 0);
                 let iter0 = iter0.map(|v| (v,));
-                KnownSize::new(iter0, steps0)
+                (iter0, steps0)
             }
         }
 
@@ -151,13 +151,13 @@ macro_rules! impl_edge_case_input {
         where
             Op: MathOp<RustArgs = Self, FTy = $fty>,
         {
-            fn get_cases(ctx: &CheckCtx) -> impl ExactSizeIterator<Item = Self> {
+            fn get_cases(ctx: &CheckCtx) -> (impl Iterator<Item = Self>, u64) {
                 let (iter0, steps0) = float_edge_cases::<Op>(ctx, 0);
                 let (iter1, steps1) = float_edge_cases::<Op>(ctx, 1);
                 let iter =
                     iter0.flat_map(move |first| iter1.clone().map(move |second| (first, second)));
                 let count = steps0.checked_mul(steps1).unwrap();
-                KnownSize::new(iter, count)
+                (iter, count)
             }
         }
 
@@ -165,7 +165,7 @@ macro_rules! impl_edge_case_input {
         where
             Op: MathOp<RustArgs = Self, FTy = $fty>,
         {
-            fn get_cases(ctx: &CheckCtx) -> impl ExactSizeIterator<Item = Self> {
+            fn get_cases(ctx: &CheckCtx) -> (impl Iterator<Item = Self>, u64) {
                 let (iter0, steps0) = float_edge_cases::<Op>(ctx, 0);
                 let (iter1, steps1) = float_edge_cases::<Op>(ctx, 1);
                 let (iter2, steps2) = float_edge_cases::<Op>(ctx, 2);
@@ -177,7 +177,7 @@ macro_rules! impl_edge_case_input {
                     });
                 let count = steps0.checked_mul(steps1).unwrap().checked_mul(steps2).unwrap();
 
-                KnownSize::new(iter, count)
+                (iter, count)
             }
         }
 
@@ -185,7 +185,7 @@ macro_rules! impl_edge_case_input {
         where
             Op: MathOp<RustArgs = Self, FTy = $fty>,
         {
-            fn get_cases(ctx: &CheckCtx) -> impl ExactSizeIterator<Item = Self> {
+            fn get_cases(ctx: &CheckCtx) -> (impl Iterator<Item = Self>, u64) {
                 let (iter0, steps0) = int_edge_cases(ctx, 0);
                 let (iter1, steps1) = float_edge_cases::<Op>(ctx, 1);
 
@@ -193,7 +193,7 @@ macro_rules! impl_edge_case_input {
                     iter0.flat_map(move |first| iter1.clone().map(move |second| (first, second)));
                 let count = steps0.checked_mul(steps1).unwrap();
 
-                KnownSize::new(iter, count)
+                (iter, count)
             }
         }
 
@@ -201,7 +201,7 @@ macro_rules! impl_edge_case_input {
         where
             Op: MathOp<RustArgs = Self, FTy = $fty>,
         {
-            fn get_cases(ctx: &CheckCtx) -> impl ExactSizeIterator<Item = Self> {
+            fn get_cases(ctx: &CheckCtx) -> (impl Iterator<Item = Self>, u64) {
                 let (iter0, steps0) = float_edge_cases::<Op>(ctx, 0);
                 let (iter1, steps1) = int_edge_cases(ctx, 1);
 
@@ -209,7 +209,7 @@ macro_rules! impl_edge_case_input {
                     iter0.flat_map(move |first| iter1.clone().map(move |second| (first, second)));
                 let count = steps0.checked_mul(steps1).unwrap();
 
-                KnownSize::new(iter, count)
+                (iter, count)
             }
         }
     };
@@ -224,10 +224,13 @@ impl_edge_case_input!(f128);
 
 pub fn get_test_cases<Op>(
     ctx: &CheckCtx,
-) -> impl ExactSizeIterator<Item = Op::RustArgs> + use<'_, Op>
+) -> (impl Iterator<Item = Op::RustArgs> + Send + use<'_, Op>, u64)
 where
     Op: MathOp,
     Op::RustArgs: EdgeCaseInput<Op>,
 {
-    Op::RustArgs::get_cases(ctx)
+    let (iter, count) = Op::RustArgs::get_cases(ctx);
+
+    // Wrap in `KnownSize` so we get an assertion if the cuunt is wrong.
+    (KnownSize::new(iter, count), count)
 }
diff --git a/crates/libm-test/src/gen/random.rs b/crates/libm-test/src/gen/random.rs
index 56c39981a..5b127f38d 100644
--- a/crates/libm-test/src/gen/random.rs
+++ b/crates/libm-test/src/gen/random.rs
@@ -26,8 +26,8 @@ pub(crate) static SEED: LazyLock<[u8; 32]> = LazyLock::new(|| {
 });
 
 /// Generate a sequence of random values of this type.
-pub trait RandomInput {
-    fn get_cases(ctx: &CheckCtx) -> impl ExactSizeIterator<Item = Self>;
+pub trait RandomInput: Sized {
+    fn get_cases(ctx: &CheckCtx) -> (impl Iterator<Item = Self> + Send, u64);
 }
 
 /// Generate a sequence of deterministically random floats.
@@ -51,25 +51,25 @@ fn random_ints(count: u64, range: RangeInclusive<i32>) -> impl Iterator<Item = i
 macro_rules! impl_random_input {
     ($fty:ty) => {
         impl RandomInput for ($fty,) {
-            fn get_cases(ctx: &CheckCtx) -> impl ExactSizeIterator<Item = Self> {
+            fn get_cases(ctx: &CheckCtx) -> (impl Iterator<Item = Self>, u64) {
                 let count = iteration_count(ctx, 0);
                 let iter = random_floats(count).map(|f: $fty| (f,));
-                KnownSize::new(iter, count)
+                (iter, count)
             }
         }
 
         impl RandomInput for ($fty, $fty) {
-            fn get_cases(ctx: &CheckCtx) -> impl ExactSizeIterator<Item = Self> {
+            fn get_cases(ctx: &CheckCtx) -> (impl Iterator<Item = Self>, u64) {
                 let count0 = iteration_count(ctx, 0);
                 let count1 = iteration_count(ctx, 1);
                 let iter = random_floats(count0)
                     .flat_map(move |f1: $fty| random_floats(count1).map(move |f2: $fty| (f1, f2)));
-                KnownSize::new(iter, count0 * count1)
+                (iter, count0 * count1)
             }
         }
 
         impl RandomInput for ($fty, $fty, $fty) {
-            fn get_cases(ctx: &CheckCtx) -> impl ExactSizeIterator<Item = Self> {
+            fn get_cases(ctx: &CheckCtx) -> (impl Iterator<Item = Self>, u64) {
                 let count0 = iteration_count(ctx, 0);
                 let count1 = iteration_count(ctx, 1);
                 let count2 = iteration_count(ctx, 2);
@@ -78,30 +78,30 @@ macro_rules! impl_random_input {
                         random_floats(count2).map(move |f3: $fty| (f1, f2, f3))
                     })
                 });
-                KnownSize::new(iter, count0 * count1 * count2)
+                (iter, count0 * count1 * count2)
             }
         }
 
         impl RandomInput for (i32, $fty) {
-            fn get_cases(ctx: &CheckCtx) -> impl ExactSizeIterator<Item = Self> {
+            fn get_cases(ctx: &CheckCtx) -> (impl Iterator<Item = Self>, u64) {
                 let count0 = iteration_count(ctx, 0);
                 let count1 = iteration_count(ctx, 1);
                 let range0 = int_range(ctx, 0);
                 let iter = random_ints(count0, range0)
                     .flat_map(move |f1: i32| random_floats(count1).map(move |f2: $fty| (f1, f2)));
-                KnownSize::new(iter, count0 * count1)
+                (iter, count0 * count1)
             }
         }
 
         impl RandomInput for ($fty, i32) {
-            fn get_cases(ctx: &CheckCtx) -> impl ExactSizeIterator<Item = Self> {
+            fn get_cases(ctx: &CheckCtx) -> (impl Iterator<Item = Self>, u64) {
                 let count0 = iteration_count(ctx, 0);
                 let count1 = iteration_count(ctx, 1);
                 let range1 = int_range(ctx, 1);
                 let iter = random_floats(count0).flat_map(move |f1: $fty| {
                     random_ints(count1, range1.clone()).map(move |f2: i32| (f1, f2))
                 });
-                KnownSize::new(iter, count0 * count1)
+                (iter, count0 * count1)
             }
         }
     };
@@ -117,6 +117,9 @@ impl_random_input!(f128);
 /// Create a test case iterator.
 pub fn get_test_cases<RustArgs: RandomInput>(
     ctx: &CheckCtx,
-) -> impl Iterator<Item = RustArgs> + use<'_, RustArgs> {
-    RustArgs::get_cases(ctx)
+) -> (impl Iterator<Item = RustArgs> + Send + use<'_, RustArgs>, u64) {
+    let (iter, count) = RustArgs::get_cases(ctx);
+
+    // Wrap in `KnownSize` so we get an assertion if the cuunt is wrong.
+    (KnownSize::new(iter, count), count)
 }
diff --git a/crates/libm-test/tests/compare_built_musl.rs b/crates/libm-test/tests/compare_built_musl.rs
index 0b0a9f097..c8beaffc3 100644
--- a/crates/libm-test/tests/compare_built_musl.rs
+++ b/crates/libm-test/tests/compare_built_musl.rs
@@ -39,7 +39,7 @@ macro_rules! musl_tests {
             fn [< musl_random_ $fn_name >]() {
                 type Op = libm_test::op::$fn_name::Routine;
                 let ctx = CheckCtx::new(Op::IDENTIFIER, BASIS, GeneratorKind::Random);
-                let cases = random::get_test_cases::<<Op as MathOp>::RustArgs>(&ctx);
+                let cases = random::get_test_cases::<<Op as MathOp>::RustArgs>(&ctx).0;
                 musl_runner::<Op>(&ctx, cases, musl_math_sys::$fn_name);
             }
 
@@ -48,7 +48,7 @@ macro_rules! musl_tests {
             fn [< musl_edge_case_ $fn_name >]() {
                 type Op = libm_test::op::$fn_name::Routine;
                 let ctx = CheckCtx::new(Op::IDENTIFIER, BASIS, GeneratorKind::EdgeCases);
-                let cases = edge_cases::get_test_cases::<Op>(&ctx);
+                let cases = edge_cases::get_test_cases::<Op>(&ctx).0;
                 musl_runner::<Op>(&ctx, cases, musl_math_sys::$fn_name);
             }
 
diff --git a/crates/libm-test/tests/multiprecision.rs b/crates/libm-test/tests/multiprecision.rs
index 761ca1f85..0d5c5e60c 100644
--- a/crates/libm-test/tests/multiprecision.rs
+++ b/crates/libm-test/tests/multiprecision.rs
@@ -29,7 +29,7 @@ macro_rules! mp_tests {
             fn [< mp_random_ $fn_name >]() {
                 type Op = libm_test::op::$fn_name::Routine;
                 let ctx = CheckCtx::new(Op::IDENTIFIER, BASIS, GeneratorKind::Random);
-                let cases = random::get_test_cases::<<Op as MathOp>::RustArgs>(&ctx);
+                let cases = random::get_test_cases::<<Op as MathOp>::RustArgs>(&ctx).0;
                 mp_runner::<Op>(&ctx, cases);
             }
 
@@ -38,7 +38,7 @@ macro_rules! mp_tests {
             fn [< mp_edge_case_ $fn_name >]() {
                 type Op = libm_test::op::$fn_name::Routine;
                 let ctx = CheckCtx::new(Op::IDENTIFIER, BASIS, GeneratorKind::EdgeCases);
-                let cases = edge_cases::get_test_cases::<Op>(&ctx);
+                let cases = edge_cases::get_test_cases::<Op>(&ctx).0;
                 mp_runner::<Op>(&ctx, cases);
             }
 

From 9612576b896a8a448dc06cb15e6b53dcc5368c85 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Thu, 6 Feb 2025 00:02:00 +0000
Subject: [PATCH 209/279] Print the hex float format upon failure

Now that we have a hex float formatter, make use of it for test output.
This produces values that are easier to read than the bitwise hex
representation.

Example:

    thread 'mp_quickspace_fmaf128' panicked at crates/libm-test/tests/multiprecision.rs:17:48:
    called `Result::unwrap()` on an `Err` value:
        input:    (0xe38d71c71c71c71c71c71c71c71c71c8, 0xe38d71c71c71c71c71c71c71c71c71c8, 0xffff0000000000000000000000000000)
        as hex:   (-0x1.71c71c71c71c71c71c71c71c71c8p+9102, -0x1.71c71c71c71c71c71c71c71c71c8p+9102, -inf)
        as bits:  (0xe38d71c71c71c71c71c71c71c71c71c8, 0xe38d71c71c71c71c71c71c71c71c71c8, 0xffff0000000000000000000000000000)
        expected: 0xffff0000000000000000000000000000 -inf 0xffff0000000000000000000000000000
        actual:   0x7fff8000000000000000000000000000 NaN 0x7fff8000000000000000000000000000

    Caused by:
        real value != NaN
---
 crates/libm-test/src/test_traits.rs | 46 ++++++++++++++++++++++++-----
 1 file changed, 39 insertions(+), 7 deletions(-)

diff --git a/crates/libm-test/src/test_traits.rs b/crates/libm-test/src/test_traits.rs
index a5806943e..1bd5bce16 100644
--- a/crates/libm-test/src/test_traits.rs
+++ b/crates/libm-test/src/test_traits.rs
@@ -9,6 +9,7 @@
 use std::fmt;
 
 use anyhow::{Context, anyhow, bail, ensure};
+use libm::support::Hexf;
 
 use crate::precision::CheckAction;
 use crate::{CheckCtx, Float, Int, MaybeOverride, SpecialCase, TestResult};
@@ -35,7 +36,10 @@ pub trait CheckOutput<Input>: Sized {
 ///
 /// This is only used for printing errors so allocating is okay.
 pub trait Hex: Copy {
+    /// Hex integer syntax.
     fn hex(self) -> String;
+    /// Hex float syntax.
+    fn hexf(self) -> String;
 }
 
 /* implement `TupleCall` */
@@ -128,6 +132,10 @@ where
     fn hex(self) -> String {
         format!("({},)", self.0.hex())
     }
+
+    fn hexf(self) -> String {
+        format!("({},)", self.0.hexf())
+    }
 }
 
 impl<T1, T2> Hex for (T1, T2)
@@ -138,6 +146,10 @@ where
     fn hex(self) -> String {
         format!("({}, {})", self.0.hex(), self.1.hex())
     }
+
+    fn hexf(self) -> String {
+        format!("({}, {})", self.0.hexf(), self.1.hexf())
+    }
 }
 
 impl<T1, T2, T3> Hex for (T1, T2, T3)
@@ -149,6 +161,10 @@ where
     fn hex(self) -> String {
         format!("({}, {}, {})", self.0.hex(), self.1.hex(), self.2.hex())
     }
+
+    fn hexf(self) -> String {
+        format!("({}, {}, {})", self.0.hexf(), self.1.hexf(), self.2.hexf())
+    }
 }
 
 /* trait implementations for ints */
@@ -160,6 +176,10 @@ macro_rules! impl_int {
                 fn hex(self) -> String {
                     format!("{self:#0width$x}", width = ((Self::BITS / 4) + 2) as usize)
                 }
+
+                fn hexf(self) -> String {
+                    String::new()
+                }
             }
 
             impl<Input> $crate::CheckOutput<Input> for $ty
@@ -234,6 +254,10 @@ macro_rules! impl_float {
                         width = ((Self::BITS / 4) + 2) as usize
                     )
                 }
+
+                fn hexf(self) -> String {
+                    format!("{}", Hexf(self))
+                }
             }
 
             impl<Input> $crate::CheckOutput<Input> for $ty
@@ -324,13 +348,18 @@ where
     res.with_context(|| {
         format!(
             "\
-            \n    input:    {input:?} {ibits}\
-            \n    expected: {expected:<22?} {expbits}\
-            \n    actual:   {actual:<22?} {actbits}\
+            \n    input:    {input:?}\
+            \n    as hex:   {ihex}\
+            \n    as bits:  {ibits}\
+            \n    expected: {expected:<22?} {exphex} {expbits}\
+            \n    actual:   {actual:<22?} {acthex} {actbits}\
             ",
-            actbits = actual.hex(),
-            expbits = expected.hex(),
+            ihex = input.hexf(),
             ibits = input.hex(),
+            exphex = expected.hexf(),
+            expbits = expected.hex(),
+            actbits = actual.hex(),
+            acthex = actual.hexf(),
         )
     })
 }
@@ -365,12 +394,15 @@ macro_rules! impl_tuples {
                         .with_context(|| format!(
                             "full context:\
                             \n    input:    {input:?} {ibits}\
+                            \n    as hex:   {ihex}\
+                            \n    as bits:  {ibits}\
                             \n    expected: {expected:?} {expbits}\
                             \n    actual:   {self:?} {actbits}\
                             ",
-                            actbits = self.hex(),
-                            expbits = expected.hex(),
+                            ihex = input.hexf(),
                             ibits = input.hex(),
+                            expbits = expected.hex(),
+                            actbits = self.hex(),
                         ))
                 }
             }

From b2a1e35b9de3b1747eaa67131343edbcd0dd44b5 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Thu, 6 Feb 2025 00:34:56 +0000
Subject: [PATCH 210/279] fma: Ensure zero has the correct sign

Currently, `fma(tiny, -tiny, 0.0)` returns 0.0 while the answer should
be -0.0. This is because `-0.0 + 0.0 = +0.0` in the default rounding
mode; however, the result should be negative. Musl has the same pattern
but that version worked because the C compiler was contracting `x*y + z`
to (ironically) `fmadd`.

Musl was fixed in 9683bd6241 ("math: fix fma(x,y,0) when x*y rounds to
-0"). Add the same fix here, which allows dropping the xfails.
---
 crates/libm-test/src/precision.rs | 47 ++-----------------------------
 src/math/generic/fma.rs           |  2 +-
 2 files changed, 3 insertions(+), 46 deletions(-)

diff --git a/crates/libm-test/src/precision.rs b/crates/libm-test/src/precision.rs
index 051960b7a..596f91fe1 100644
--- a/crates/libm-test/src/precision.rs
+++ b/crates/libm-test/src/precision.rs
@@ -558,48 +558,5 @@ impl MaybeOverride<(f64, i32)> for SpecialCase {}
 #[cfg(f128_enabled)]
 impl MaybeOverride<(f128, i32)> for SpecialCase {}
 
-impl MaybeOverride<(f32, f32, f32)> for SpecialCase {
-    fn check_float<F: Float>(
-        input: (f32, f32, f32),
-        actual: F,
-        expected: F,
-        ctx: &CheckCtx,
-    ) -> CheckAction {
-        ternop_common(input, actual, expected, ctx)
-    }
-}
-impl MaybeOverride<(f64, f64, f64)> for SpecialCase {
-    fn check_float<F: Float>(
-        input: (f64, f64, f64),
-        actual: F,
-        expected: F,
-        ctx: &CheckCtx,
-    ) -> CheckAction {
-        ternop_common(input, actual, expected, ctx)
-    }
-}
-
-// F1 and F2 are always the same type, this is just to please generics
-fn ternop_common<F1: Float, F2: Float>(
-    input: (F1, F1, F1),
-    actual: F2,
-    expected: F2,
-    ctx: &CheckCtx,
-) -> CheckAction {
-    // FIXME(fma): 754-2020 says "When the exact result of (a × b) + c is non-zero yet the result
-    // of fusedMultiplyAdd is zero because of rounding, the zero result takes the sign of the
-    // exact result". Our implementation returns the wrong sign:
-    //     fma(5e-324, -5e-324, 0.0) = 0.0 (should be -0.0)
-    if ctx.base_name == BaseName::Fma
-        && (input.0.is_sign_negative() ^ input.1.is_sign_negative())
-        && input.0 != F1::ZERO
-        && input.1 != F1::ZERO
-        && input.2.biteq(F1::ZERO)
-        && expected.biteq(F2::NEG_ZERO)
-        && actual.biteq(F2::ZERO)
-    {
-        return XFAIL("fma sign");
-    }
-
-    DEFAULT
-}
+impl MaybeOverride<(f32, f32, f32)> for SpecialCase {}
+impl MaybeOverride<(f64, f64, f64)> for SpecialCase {}
diff --git a/src/math/generic/fma.rs b/src/math/generic/fma.rs
index 3d5459f1a..b0e2117ea 100644
--- a/src/math/generic/fma.rs
+++ b/src/math/generic/fma.rs
@@ -31,7 +31,7 @@ where
     if nz.e >= ZEROINFNAN {
         if nz.e > ZEROINFNAN {
             /* z==0 */
-            return x * y + z;
+            return x * y;
         }
         return z;
     }

From 41558695b7fb45cacaf5667e717e84cd5443f2af Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Thu, 6 Feb 2025 01:59:44 +0000
Subject: [PATCH 211/279] Switch `musl` to track `master`

A few bugs have been fixed, including the sign of `fma(tiny, -tiny,
0.0)`. Switch to tracking `master` rather than the latest tag so we
don't need to xfail these tests.
---
 crates/musl-math-sys/musl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/crates/musl-math-sys/musl b/crates/musl-math-sys/musl
index 0784374d5..61399d4bd 160000
--- a/crates/musl-math-sys/musl
+++ b/crates/musl-math-sys/musl
@@ -1 +1 @@
-Subproject commit 0784374d561435f7c787a555aeab8ede699ed298
+Subproject commit 61399d4bd02ae1ec03068445aa7ffe9174466bfd

From 3f4d1e01330168f6e6db52994fabe735f5d30726 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Thu, 6 Feb 2025 04:03:11 +0000
Subject: [PATCH 212/279] Add an integration test that verifies a list of cases

We need someplace to collect known failures, previous regressions, edge
cases that are difficult to construct from generics, and similar.
Introduce this here.
---
 crates/libm-test/src/gen.rs                  |   1 +
 crates/libm-test/src/gen/case_list.rs        | 686 +++++++++++++++++++
 crates/libm-test/src/run_cfg.rs              |   8 +-
 crates/libm-test/tests/compare_built_musl.rs |  11 +-
 crates/libm-test/tests/multiprecision.rs     |  11 +-
 crates/libm-test/tests/standalone.rs         |  38 +
 6 files changed, 750 insertions(+), 5 deletions(-)
 create mode 100644 crates/libm-test/src/gen/case_list.rs
 create mode 100644 crates/libm-test/tests/standalone.rs

diff --git a/crates/libm-test/src/gen.rs b/crates/libm-test/src/gen.rs
index e0a7f5766..89ca09a7a 100644
--- a/crates/libm-test/src/gen.rs
+++ b/crates/libm-test/src/gen.rs
@@ -1,5 +1,6 @@
 //! Different generators that can create random or systematic bit patterns.
 
+pub mod case_list;
 pub mod edge_cases;
 pub mod random;
 pub mod spaced;
diff --git a/crates/libm-test/src/gen/case_list.rs b/crates/libm-test/src/gen/case_list.rs
new file mode 100644
index 000000000..9720f68e9
--- /dev/null
+++ b/crates/libm-test/src/gen/case_list.rs
@@ -0,0 +1,686 @@
+//! Test cases to verify specific values.
+//!
+//! Each routine can have a set of inputs and, optinoally, outputs. If an output is provided, it
+//! will be used to check against. If only inputs are provided, the case will be checked against
+//! a basis.
+//!
+//! This is useful for adding regression tests or expected failures.
+
+use crate::{CheckBasis, CheckCtx, GeneratorKind, MathOp, op};
+
+pub struct TestCase<Op: MathOp> {
+    pub input: Op::RustArgs,
+    pub output: Option<Op::RustRet>,
+}
+
+impl<Op: MathOp> TestCase<Op> {
+    #[expect(dead_code)]
+    fn append_inputs(v: &mut Vec<Self>, l: &[Op::RustArgs]) {
+        v.extend(l.iter().copied().map(|input| Self { input, output: None }));
+    }
+
+    fn append_pairs(v: &mut Vec<Self>, l: &[(Op::RustArgs, Option<Op::RustRet>)])
+    where
+        Op::RustRet: Copy,
+    {
+        v.extend(l.iter().copied().map(|(input, output)| Self { input, output }));
+    }
+}
+
+fn acos_cases() -> Vec<TestCase<op::acos::Routine>> {
+    vec![]
+}
+
+fn acosf_cases() -> Vec<TestCase<op::acosf::Routine>> {
+    vec![]
+}
+
+fn acosh_cases() -> Vec<TestCase<op::acosh::Routine>> {
+    vec![]
+}
+
+fn acoshf_cases() -> Vec<TestCase<op::acoshf::Routine>> {
+    vec![]
+}
+
+fn asin_cases() -> Vec<TestCase<op::asin::Routine>> {
+    vec![]
+}
+
+fn asinf_cases() -> Vec<TestCase<op::asinf::Routine>> {
+    vec![]
+}
+
+fn asinh_cases() -> Vec<TestCase<op::asinh::Routine>> {
+    vec![]
+}
+
+fn asinhf_cases() -> Vec<TestCase<op::asinhf::Routine>> {
+    vec![]
+}
+
+fn atan_cases() -> Vec<TestCase<op::atan::Routine>> {
+    vec![]
+}
+
+fn atan2_cases() -> Vec<TestCase<op::atan2::Routine>> {
+    vec![]
+}
+
+fn atan2f_cases() -> Vec<TestCase<op::atan2f::Routine>> {
+    vec![]
+}
+
+fn atanf_cases() -> Vec<TestCase<op::atanf::Routine>> {
+    vec![]
+}
+
+fn atanh_cases() -> Vec<TestCase<op::atanh::Routine>> {
+    vec![]
+}
+
+fn atanhf_cases() -> Vec<TestCase<op::atanhf::Routine>> {
+    vec![]
+}
+
+fn cbrt_cases() -> Vec<TestCase<op::cbrt::Routine>> {
+    vec![]
+}
+
+fn cbrtf_cases() -> Vec<TestCase<op::cbrtf::Routine>> {
+    vec![]
+}
+
+fn ceil_cases() -> Vec<TestCase<op::ceil::Routine>> {
+    vec![]
+}
+
+fn ceilf_cases() -> Vec<TestCase<op::ceilf::Routine>> {
+    vec![]
+}
+
+#[cfg(f128_enabled)]
+fn ceilf128_cases() -> Vec<TestCase<op::ceilf128::Routine>> {
+    vec![]
+}
+
+#[cfg(f16_enabled)]
+fn ceilf16_cases() -> Vec<TestCase<op::ceilf16::Routine>> {
+    vec![]
+}
+
+fn copysign_cases() -> Vec<TestCase<op::copysign::Routine>> {
+    vec![]
+}
+
+fn copysignf_cases() -> Vec<TestCase<op::copysignf::Routine>> {
+    vec![]
+}
+
+#[cfg(f128_enabled)]
+fn copysignf128_cases() -> Vec<TestCase<op::copysignf128::Routine>> {
+    vec![]
+}
+
+#[cfg(f16_enabled)]
+fn copysignf16_cases() -> Vec<TestCase<op::copysignf16::Routine>> {
+    vec![]
+}
+
+fn cos_cases() -> Vec<TestCase<op::cos::Routine>> {
+    vec![]
+}
+
+fn cosf_cases() -> Vec<TestCase<op::cosf::Routine>> {
+    vec![]
+}
+
+fn cosh_cases() -> Vec<TestCase<op::cosh::Routine>> {
+    vec![]
+}
+
+fn coshf_cases() -> Vec<TestCase<op::coshf::Routine>> {
+    vec![]
+}
+
+fn erf_cases() -> Vec<TestCase<op::erf::Routine>> {
+    vec![]
+}
+
+fn erfc_cases() -> Vec<TestCase<op::erfc::Routine>> {
+    vec![]
+}
+
+fn erfcf_cases() -> Vec<TestCase<op::erfcf::Routine>> {
+    vec![]
+}
+
+fn erff_cases() -> Vec<TestCase<op::erff::Routine>> {
+    vec![]
+}
+
+fn exp_cases() -> Vec<TestCase<op::exp::Routine>> {
+    vec![]
+}
+
+fn exp10_cases() -> Vec<TestCase<op::exp10::Routine>> {
+    vec![]
+}
+
+fn exp10f_cases() -> Vec<TestCase<op::exp10f::Routine>> {
+    vec![]
+}
+
+fn exp2_cases() -> Vec<TestCase<op::exp2::Routine>> {
+    vec![]
+}
+
+fn exp2f_cases() -> Vec<TestCase<op::exp2f::Routine>> {
+    vec![]
+}
+
+fn expf_cases() -> Vec<TestCase<op::expf::Routine>> {
+    vec![]
+}
+
+fn expm1_cases() -> Vec<TestCase<op::expm1::Routine>> {
+    vec![]
+}
+
+fn expm1f_cases() -> Vec<TestCase<op::expm1f::Routine>> {
+    vec![]
+}
+
+fn fabs_cases() -> Vec<TestCase<op::fabs::Routine>> {
+    vec![]
+}
+
+fn fabsf_cases() -> Vec<TestCase<op::fabsf::Routine>> {
+    vec![]
+}
+
+#[cfg(f128_enabled)]
+fn fabsf128_cases() -> Vec<TestCase<op::fabsf128::Routine>> {
+    vec![]
+}
+
+#[cfg(f16_enabled)]
+fn fabsf16_cases() -> Vec<TestCase<op::fabsf16::Routine>> {
+    vec![]
+}
+
+fn fdim_cases() -> Vec<TestCase<op::fdim::Routine>> {
+    vec![]
+}
+
+fn fdimf_cases() -> Vec<TestCase<op::fdimf::Routine>> {
+    vec![]
+}
+
+#[cfg(f128_enabled)]
+fn fdimf128_cases() -> Vec<TestCase<op::fdimf128::Routine>> {
+    vec![]
+}
+
+#[cfg(f16_enabled)]
+fn fdimf16_cases() -> Vec<TestCase<op::fdimf16::Routine>> {
+    vec![]
+}
+
+fn floor_cases() -> Vec<TestCase<op::floor::Routine>> {
+    vec![]
+}
+
+fn floorf_cases() -> Vec<TestCase<op::floorf::Routine>> {
+    vec![]
+}
+
+#[cfg(f128_enabled)]
+fn floorf128_cases() -> Vec<TestCase<op::floorf128::Routine>> {
+    vec![]
+}
+
+#[cfg(f16_enabled)]
+fn floorf16_cases() -> Vec<TestCase<op::floorf16::Routine>> {
+    vec![]
+}
+
+fn fma_cases() -> Vec<TestCase<op::fma::Routine>> {
+    let mut v = vec![];
+    TestCase::append_pairs(
+        &mut v,
+        &[
+            // Previously failure with incorrect sign
+            ((5e-324, -5e-324, 0.0), Some(-0.0)),
+        ],
+    );
+    v
+}
+
+fn fmaf_cases() -> Vec<TestCase<op::fmaf::Routine>> {
+    vec![]
+}
+
+fn fmax_cases() -> Vec<TestCase<op::fmax::Routine>> {
+    vec![]
+}
+
+fn fmaxf_cases() -> Vec<TestCase<op::fmaxf::Routine>> {
+    vec![]
+}
+
+#[cfg(f128_enabled)]
+fn fmaxf128_cases() -> Vec<TestCase<op::fmaxf128::Routine>> {
+    vec![]
+}
+
+#[cfg(f16_enabled)]
+fn fmaxf16_cases() -> Vec<TestCase<op::fmaxf16::Routine>> {
+    vec![]
+}
+
+fn fmin_cases() -> Vec<TestCase<op::fmin::Routine>> {
+    vec![]
+}
+
+fn fminf_cases() -> Vec<TestCase<op::fminf::Routine>> {
+    vec![]
+}
+
+#[cfg(f128_enabled)]
+fn fminf128_cases() -> Vec<TestCase<op::fminf128::Routine>> {
+    vec![]
+}
+
+#[cfg(f16_enabled)]
+fn fminf16_cases() -> Vec<TestCase<op::fminf16::Routine>> {
+    vec![]
+}
+
+fn fmod_cases() -> Vec<TestCase<op::fmod::Routine>> {
+    vec![]
+}
+
+fn fmodf_cases() -> Vec<TestCase<op::fmodf::Routine>> {
+    vec![]
+}
+
+#[cfg(f128_enabled)]
+fn fmodf128_cases() -> Vec<TestCase<op::fmodf128::Routine>> {
+    vec![]
+}
+
+#[cfg(f16_enabled)]
+fn fmodf16_cases() -> Vec<TestCase<op::fmodf16::Routine>> {
+    vec![]
+}
+
+fn frexp_cases() -> Vec<TestCase<op::frexp::Routine>> {
+    vec![]
+}
+
+fn frexpf_cases() -> Vec<TestCase<op::frexpf::Routine>> {
+    vec![]
+}
+
+fn hypot_cases() -> Vec<TestCase<op::hypot::Routine>> {
+    vec![]
+}
+
+fn hypotf_cases() -> Vec<TestCase<op::hypotf::Routine>> {
+    vec![]
+}
+
+fn ilogb_cases() -> Vec<TestCase<op::ilogb::Routine>> {
+    vec![]
+}
+
+fn ilogbf_cases() -> Vec<TestCase<op::ilogbf::Routine>> {
+    vec![]
+}
+
+fn j0_cases() -> Vec<TestCase<op::j0::Routine>> {
+    vec![]
+}
+
+fn j0f_cases() -> Vec<TestCase<op::j0f::Routine>> {
+    vec![]
+}
+
+fn j1_cases() -> Vec<TestCase<op::j1::Routine>> {
+    vec![]
+}
+
+fn j1f_cases() -> Vec<TestCase<op::j1f::Routine>> {
+    vec![]
+}
+
+fn jn_cases() -> Vec<TestCase<op::jn::Routine>> {
+    vec![]
+}
+
+fn jnf_cases() -> Vec<TestCase<op::jnf::Routine>> {
+    vec![]
+}
+
+fn ldexp_cases() -> Vec<TestCase<op::ldexp::Routine>> {
+    vec![]
+}
+
+fn ldexpf_cases() -> Vec<TestCase<op::ldexpf::Routine>> {
+    vec![]
+}
+
+#[cfg(f128_enabled)]
+fn ldexpf128_cases() -> Vec<TestCase<op::ldexpf128::Routine>> {
+    vec![]
+}
+
+#[cfg(f16_enabled)]
+fn ldexpf16_cases() -> Vec<TestCase<op::ldexpf16::Routine>> {
+    vec![]
+}
+
+fn lgamma_cases() -> Vec<TestCase<op::lgamma::Routine>> {
+    vec![]
+}
+
+fn lgamma_r_cases() -> Vec<TestCase<op::lgamma_r::Routine>> {
+    vec![]
+}
+
+fn lgammaf_cases() -> Vec<TestCase<op::lgammaf::Routine>> {
+    vec![]
+}
+
+fn lgammaf_r_cases() -> Vec<TestCase<op::lgammaf_r::Routine>> {
+    vec![]
+}
+
+fn log_cases() -> Vec<TestCase<op::log::Routine>> {
+    vec![]
+}
+
+fn log10_cases() -> Vec<TestCase<op::log10::Routine>> {
+    vec![]
+}
+
+fn log10f_cases() -> Vec<TestCase<op::log10f::Routine>> {
+    vec![]
+}
+
+fn log1p_cases() -> Vec<TestCase<op::log1p::Routine>> {
+    vec![]
+}
+
+fn log1pf_cases() -> Vec<TestCase<op::log1pf::Routine>> {
+    vec![]
+}
+
+fn log2_cases() -> Vec<TestCase<op::log2::Routine>> {
+    vec![]
+}
+
+fn log2f_cases() -> Vec<TestCase<op::log2f::Routine>> {
+    vec![]
+}
+
+fn logf_cases() -> Vec<TestCase<op::logf::Routine>> {
+    vec![]
+}
+
+fn modf_cases() -> Vec<TestCase<op::modf::Routine>> {
+    vec![]
+}
+
+fn modff_cases() -> Vec<TestCase<op::modff::Routine>> {
+    vec![]
+}
+
+fn nextafter_cases() -> Vec<TestCase<op::nextafter::Routine>> {
+    vec![]
+}
+
+fn nextafterf_cases() -> Vec<TestCase<op::nextafterf::Routine>> {
+    vec![]
+}
+
+fn pow_cases() -> Vec<TestCase<op::pow::Routine>> {
+    vec![]
+}
+
+fn powf_cases() -> Vec<TestCase<op::powf::Routine>> {
+    vec![]
+}
+
+fn remainder_cases() -> Vec<TestCase<op::remainder::Routine>> {
+    vec![]
+}
+
+fn remainderf_cases() -> Vec<TestCase<op::remainderf::Routine>> {
+    vec![]
+}
+
+fn remquo_cases() -> Vec<TestCase<op::remquo::Routine>> {
+    vec![]
+}
+
+fn remquof_cases() -> Vec<TestCase<op::remquof::Routine>> {
+    vec![]
+}
+
+fn rint_cases() -> Vec<TestCase<op::rint::Routine>> {
+    vec![]
+}
+
+fn rintf_cases() -> Vec<TestCase<op::rintf::Routine>> {
+    vec![]
+}
+
+#[cfg(f128_enabled)]
+fn rintf128_cases() -> Vec<TestCase<op::rintf128::Routine>> {
+    vec![]
+}
+
+#[cfg(f16_enabled)]
+fn rintf16_cases() -> Vec<TestCase<op::rintf16::Routine>> {
+    vec![]
+}
+
+fn round_cases() -> Vec<TestCase<op::round::Routine>> {
+    vec![]
+}
+
+fn roundf_cases() -> Vec<TestCase<op::roundf::Routine>> {
+    vec![]
+}
+
+#[cfg(f128_enabled)]
+fn roundf128_cases() -> Vec<TestCase<op::roundf128::Routine>> {
+    vec![]
+}
+
+#[cfg(f16_enabled)]
+fn roundf16_cases() -> Vec<TestCase<op::roundf16::Routine>> {
+    vec![]
+}
+
+fn scalbn_cases() -> Vec<TestCase<op::scalbn::Routine>> {
+    vec![]
+}
+
+fn scalbnf_cases() -> Vec<TestCase<op::scalbnf::Routine>> {
+    vec![]
+}
+
+#[cfg(f128_enabled)]
+fn scalbnf128_cases() -> Vec<TestCase<op::scalbnf128::Routine>> {
+    vec![]
+}
+
+#[cfg(f16_enabled)]
+fn scalbnf16_cases() -> Vec<TestCase<op::scalbnf16::Routine>> {
+    vec![]
+}
+
+fn sin_cases() -> Vec<TestCase<op::sin::Routine>> {
+    vec![]
+}
+
+fn sincos_cases() -> Vec<TestCase<op::sincos::Routine>> {
+    vec![]
+}
+
+fn sincosf_cases() -> Vec<TestCase<op::sincosf::Routine>> {
+    vec![]
+}
+
+fn sinf_cases() -> Vec<TestCase<op::sinf::Routine>> {
+    vec![]
+}
+
+fn sinh_cases() -> Vec<TestCase<op::sinh::Routine>> {
+    vec![]
+}
+
+fn sinhf_cases() -> Vec<TestCase<op::sinhf::Routine>> {
+    vec![]
+}
+
+fn sqrt_cases() -> Vec<TestCase<op::sqrt::Routine>> {
+    vec![]
+}
+
+fn sqrtf_cases() -> Vec<TestCase<op::sqrtf::Routine>> {
+    vec![]
+}
+
+#[cfg(f128_enabled)]
+fn sqrtf128_cases() -> Vec<TestCase<op::sqrtf128::Routine>> {
+    vec![]
+}
+
+#[cfg(f16_enabled)]
+fn sqrtf16_cases() -> Vec<TestCase<op::sqrtf16::Routine>> {
+    vec![]
+}
+
+fn tan_cases() -> Vec<TestCase<op::tan::Routine>> {
+    vec![]
+}
+
+fn tanf_cases() -> Vec<TestCase<op::tanf::Routine>> {
+    vec![]
+}
+
+fn tanh_cases() -> Vec<TestCase<op::tanh::Routine>> {
+    vec![]
+}
+
+fn tanhf_cases() -> Vec<TestCase<op::tanhf::Routine>> {
+    vec![]
+}
+
+fn tgamma_cases() -> Vec<TestCase<op::tgamma::Routine>> {
+    vec![]
+}
+
+fn tgammaf_cases() -> Vec<TestCase<op::tgammaf::Routine>> {
+    vec![]
+}
+
+fn trunc_cases() -> Vec<TestCase<op::trunc::Routine>> {
+    vec![]
+}
+
+fn truncf_cases() -> Vec<TestCase<op::truncf::Routine>> {
+    vec![]
+}
+
+#[cfg(f128_enabled)]
+fn truncf128_cases() -> Vec<TestCase<op::truncf128::Routine>> {
+    vec![]
+}
+
+#[cfg(f16_enabled)]
+fn truncf16_cases() -> Vec<TestCase<op::truncf16::Routine>> {
+    vec![]
+}
+
+fn y0_cases() -> Vec<TestCase<op::y0::Routine>> {
+    vec![]
+}
+
+fn y0f_cases() -> Vec<TestCase<op::y0f::Routine>> {
+    vec![]
+}
+
+fn y1_cases() -> Vec<TestCase<op::y1::Routine>> {
+    vec![]
+}
+
+fn y1f_cases() -> Vec<TestCase<op::y1f::Routine>> {
+    vec![]
+}
+
+fn yn_cases() -> Vec<TestCase<op::yn::Routine>> {
+    vec![]
+}
+
+fn ynf_cases() -> Vec<TestCase<op::ynf::Routine>> {
+    vec![]
+}
+
+pub trait CaseListInput: MathOp + Sized {
+    fn get_cases() -> Vec<TestCase<Self>>;
+}
+
+macro_rules! impl_case_list {
+    (
+        fn_name: $fn_name:ident,
+        attrs: [$($attr:meta),*],
+    ) => {
+        paste::paste! {
+            $(#[$attr])*
+            impl CaseListInput for crate::op::$fn_name::Routine {
+                fn get_cases() -> Vec<TestCase<Self>> {
+                    [< $fn_name _cases >]()
+                }
+            }
+        }
+    };
+}
+
+libm_macros::for_each_function! {
+    callback: impl_case_list,
+}
+
+/// This is the test generator for standalone tests, i.e. those with no basis. For this, it
+/// only extracts tests with a known output.
+pub fn get_test_cases_standalone<Op>(
+    ctx: &CheckCtx,
+) -> impl Iterator<Item = (Op::RustArgs, Op::RustRet)> + use<'_, Op>
+where
+    Op: MathOp + CaseListInput,
+{
+    assert_eq!(ctx.basis, CheckBasis::None);
+    assert_eq!(ctx.gen_kind, GeneratorKind::List);
+    Op::get_cases().into_iter().filter_map(|x| x.output.map(|o| (x.input, o)))
+}
+
+/// Opposite of the above; extract only test cases that don't have a known output, to be run
+/// against a basis.
+pub fn get_test_cases_basis<Op>(
+    ctx: &CheckCtx,
+) -> (impl Iterator<Item = Op::RustArgs> + use<'_, Op>, u64)
+where
+    Op: MathOp + CaseListInput,
+{
+    assert_ne!(ctx.basis, CheckBasis::None);
+    assert_eq!(ctx.gen_kind, GeneratorKind::List);
+
+    let cases = Op::get_cases();
+    let count: u64 = cases.iter().filter(|case| case.output.is_none()).count().try_into().unwrap();
+
+    (cases.into_iter().filter(|x| x.output.is_none()).map(|x| x.input), count)
+}
diff --git a/crates/libm-test/src/run_cfg.rs b/crates/libm-test/src/run_cfg.rs
index 783142e37..5728c3b2e 100644
--- a/crates/libm-test/src/run_cfg.rs
+++ b/crates/libm-test/src/run_cfg.rs
@@ -102,6 +102,7 @@ pub enum GeneratorKind {
     Extensive,
     QuickSpaced,
     Random,
+    List,
 }
 
 /// A list of all functions that should get extensive tests.
@@ -219,8 +220,8 @@ pub fn iteration_count(ctx: &CheckCtx, argnum: usize) -> u64 {
         GeneratorKind::QuickSpaced => domain_iter_count,
         GeneratorKind::Random => random_iter_count,
         GeneratorKind::Extensive => extensive_max_iterations(),
-        GeneratorKind::EdgeCases => {
-            unimplemented!("edge case tests shoudn't need `iteration_count`")
+        GeneratorKind::EdgeCases | GeneratorKind::List => {
+            unimplemented!("shoudn't need `iteration_count` for {:?}", ctx.gen_kind)
         }
     };
 
@@ -269,7 +270,7 @@ pub fn iteration_count(ctx: &CheckCtx, argnum: usize) -> u64 {
         GeneratorKind::Random => {
             format!(" using `{SEED_ENV}={}`", str::from_utf8(SEED.as_slice()).unwrap())
         }
-        GeneratorKind::EdgeCases => unreachable!(),
+        GeneratorKind::EdgeCases | GeneratorKind::List => unimplemented!(),
     };
 
     test_log(&format!(
@@ -310,6 +311,7 @@ pub fn int_range(ctx: &CheckCtx, argnum: usize) -> RangeInclusive<i32> {
         GeneratorKind::Extensive => extensive_range,
         GeneratorKind::QuickSpaced | GeneratorKind::Random => non_extensive_range,
         GeneratorKind::EdgeCases => extensive_range,
+        GeneratorKind::List => unimplemented!("shoudn't need range for {:?}", ctx.gen_kind),
     }
 }
 
diff --git a/crates/libm-test/tests/compare_built_musl.rs b/crates/libm-test/tests/compare_built_musl.rs
index c8beaffc3..927cb25af 100644
--- a/crates/libm-test/tests/compare_built_musl.rs
+++ b/crates/libm-test/tests/compare_built_musl.rs
@@ -9,7 +9,7 @@
 // There are some targets we can't build musl for
 #![cfg(feature = "build-musl")]
 
-use libm_test::gen::{edge_cases, random, spaced};
+use libm_test::gen::{case_list, edge_cases, random, spaced};
 use libm_test::{CheckBasis, CheckCtx, CheckOutput, GeneratorKind, MathOp, TupleCall};
 
 const BASIS: CheckBasis = CheckBasis::Musl;
@@ -34,6 +34,15 @@ macro_rules! musl_tests {
         attrs: [$($attr:meta),*],
     ) => {
         paste::paste! {
+            #[test]
+            $(#[$attr])*
+            fn [< musl_case_list_ $fn_name >]() {
+                type Op = libm_test::op::$fn_name::Routine;
+                let ctx = CheckCtx::new(Op::IDENTIFIER, BASIS, GeneratorKind::List);
+                let cases = case_list::get_test_cases_basis::<Op>(&ctx).0;
+                musl_runner::<Op>(&ctx, cases, musl_math_sys::$fn_name);
+            }
+
             #[test]
             $(#[$attr])*
             fn [< musl_random_ $fn_name >]() {
diff --git a/crates/libm-test/tests/multiprecision.rs b/crates/libm-test/tests/multiprecision.rs
index 0d5c5e60c..fd1f11610 100644
--- a/crates/libm-test/tests/multiprecision.rs
+++ b/crates/libm-test/tests/multiprecision.rs
@@ -2,7 +2,7 @@
 
 #![cfg(feature = "build-mpfr")]
 
-use libm_test::gen::{edge_cases, random, spaced};
+use libm_test::gen::{case_list, edge_cases, random, spaced};
 use libm_test::mpfloat::MpOp;
 use libm_test::{CheckBasis, CheckCtx, CheckOutput, GeneratorKind, MathOp, TupleCall};
 
@@ -24,6 +24,15 @@ macro_rules! mp_tests {
         attrs: [$($attr:meta),*],
     ) => {
         paste::paste! {
+            #[test]
+            $(#[$attr])*
+            fn [< mp_case_list_ $fn_name >]() {
+                type Op = libm_test::op::$fn_name::Routine;
+                let ctx = CheckCtx::new(Op::IDENTIFIER, BASIS, GeneratorKind::List);
+                let cases = case_list::get_test_cases_basis::<Op>(&ctx).0;
+                mp_runner::<Op>(&ctx, cases);
+            }
+
             #[test]
             $(#[$attr])*
             fn [< mp_random_ $fn_name >]() {
diff --git a/crates/libm-test/tests/standalone.rs b/crates/libm-test/tests/standalone.rs
new file mode 100644
index 000000000..d6417acac
--- /dev/null
+++ b/crates/libm-test/tests/standalone.rs
@@ -0,0 +1,38 @@
+//! Test cases that have both an input and an output, so do not require a basis.
+
+use libm_test::gen::case_list;
+use libm_test::{CheckBasis, CheckCtx, CheckOutput, GeneratorKind, MathOp, TupleCall};
+
+const BASIS: CheckBasis = CheckBasis::None;
+
+fn standalone_runner<Op: MathOp>(
+    ctx: &CheckCtx,
+    cases: impl Iterator<Item = (Op::RustArgs, Op::RustRet)>,
+) {
+    for (input, expected) in cases {
+        let crate_res = input.call(Op::ROUTINE);
+        crate_res.validate(expected, input, ctx).unwrap();
+    }
+}
+
+macro_rules! mp_tests {
+    (
+        fn_name: $fn_name:ident,
+        attrs: [$($attr:meta),*],
+    ) => {
+        paste::paste! {
+            #[test]
+            $(#[$attr])*
+            fn [< standalone_ $fn_name >]() {
+                type Op = libm_test::op::$fn_name::Routine;
+                let ctx = CheckCtx::new(Op::IDENTIFIER, BASIS, GeneratorKind::List);
+                let cases = case_list::get_test_cases_standalone::<Op>(&ctx);
+                standalone_runner::<Op>(&ctx, cases);
+            }
+        }
+    };
+}
+
+libm_macros::for_each_function! {
+    callback: mp_tests,
+}

From 3e49c09010a22d66a7b5e952ea5293641c2686e5 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Thu, 6 Feb 2025 23:17:27 +0000
Subject: [PATCH 213/279] Improve tidy output

Print a better diff when lists are unsorted, and always check tidy lists
even if `--check` is not passed.
---
 etc/update-api-list.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/etc/update-api-list.py b/etc/update-api-list.py
index 9cf625554..c0b6e41d3 100755
--- a/etc/update-api-list.py
+++ b/etc/update-api-list.py
@@ -278,8 +278,8 @@ def ensure_sorted(fpath: Path, block_start_line: int, lines: list[str]) -> None:
     """Ensure that a list of lines is sorted, otherwise print a diff and exit."""
     relpath = fpath.relative_to(ROOT_DIR)
     diff_and_exit(
-        "".join(lines),
-        "".join(sorted(lines)),
+        "\n".join(lines),
+        "\n".join(sorted(lines)),
         f"sorted block at {relpath}:{block_start_line}",
     )
 
@@ -338,8 +338,7 @@ def ensure_updated_list(check: bool) -> None:
     crate.write_function_list(check)
     crate.write_function_defs(check)
 
-    if check:
-        crate.tidy_lists()
+    crate.tidy_lists()
 
 
 def main():

From c8aec8fe3077b508b7bf28fa1d397809105bc55f Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Thu, 6 Feb 2025 23:18:31 +0000
Subject: [PATCH 214/279] Make it possible to use `hf32!` and similar macros
 outside of `libm`

Adjust paths such that these macros don't go through the private `math`
module. `feature = "private-test-deps"` is still needed.

Additionally, ensure that `cargo check` for this crate gets run in CI
because `cargo test` does not seem to identify this problem.

`compiler_builtins` will need to reexport the `support` module.
---
 .github/workflows/main.yaml                    |  1 +
 crates/compiler-builtins-smoke-test/src/lib.rs |  3 +++
 src/math/mod.rs                                |  2 +-
 src/math/support/float_traits.rs               |  2 +-
 src/math/support/macros.rs                     | 12 ++++++++----
 src/math/support/mod.rs                        |  2 ++
 6 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml
index e03d7ecd3..e1d263dea 100644
--- a/.github/workflows/main.yaml
+++ b/.github/workflows/main.yaml
@@ -143,6 +143,7 @@ jobs:
     - name: Install Rust
       run: rustup update nightly --no-self-update && rustup default nightly
     - uses: Swatinem/rust-cache@v2
+    - run: cargo check --manifest-path crates/compiler-builtins-smoke-test/Cargo.toml
     - run: cargo test --manifest-path crates/compiler-builtins-smoke-test/Cargo.toml
 
   benchmarks:
diff --git a/crates/compiler-builtins-smoke-test/src/lib.rs b/crates/compiler-builtins-smoke-test/src/lib.rs
index ccd0642a2..77a4666a1 100644
--- a/crates/compiler-builtins-smoke-test/src/lib.rs
+++ b/crates/compiler-builtins-smoke-test/src/lib.rs
@@ -17,6 +17,9 @@ pub mod libm;
 
 use core::ffi::c_int;
 
+// Required for macro paths.
+use libm::support;
+
 /// Mark functions `#[no_mangle]` and with the C ABI.
 macro_rules! no_mangle {
     ($( $name:ident( $($tt:tt)+ ) -> $ret:ty; )+) => {
diff --git a/src/math/mod.rs b/src/math/mod.rs
index f0698ad02..7ad808cf7 100644
--- a/src/math/mod.rs
+++ b/src/math/mod.rs
@@ -81,7 +81,7 @@ pub mod support;
 
 #[macro_use]
 #[cfg(not(feature = "unstable-public-internals"))]
-mod support;
+pub(crate) mod support;
 
 cfg_if! {
     if #[cfg(feature = "unstable-public-internals")] {
diff --git a/src/math/support/float_traits.rs b/src/math/support/float_traits.rs
index 24cf7d4b0..328b70610 100644
--- a/src/math/support/float_traits.rs
+++ b/src/math/support/float_traits.rs
@@ -107,7 +107,7 @@ pub trait Float:
         !self.is_sign_negative()
     }
 
-    /// Returns if `self` is subnormal
+    /// Returns if `self` is subnormal.
     fn is_subnormal(self) -> bool {
         (self.to_bits() & Self::EXP_MASK) == Self::Int::ZERO
     }
diff --git a/src/math/support/macros.rs b/src/math/support/macros.rs
index d8ba04cff..c80e77511 100644
--- a/src/math/support/macros.rs
+++ b/src/math/support/macros.rs
@@ -90,10 +90,11 @@ macro_rules! select_implementation {
 /// Construct a 16-bit float from hex float representation (C-style), guaranteed to
 /// evaluate at compile time.
 #[cfg(f16_enabled)]
+#[cfg_attr(feature = "unstable-public-internals", macro_export)]
 #[allow(unused_macros)]
 macro_rules! hf16 {
     ($s:literal) => {{
-        const X: f16 = $crate::math::support::hf16($s);
+        const X: f16 = $crate::support::hf16($s);
         X
     }};
 }
@@ -101,9 +102,10 @@ macro_rules! hf16 {
 /// Construct a 32-bit float from hex float representation (C-style), guaranteed to
 /// evaluate at compile time.
 #[allow(unused_macros)]
+#[cfg_attr(feature = "unstable-public-internals", macro_export)]
 macro_rules! hf32 {
     ($s:literal) => {{
-        const X: f32 = $crate::math::support::hf32($s);
+        const X: f32 = $crate::support::hf32($s);
         X
     }};
 }
@@ -111,9 +113,10 @@ macro_rules! hf32 {
 /// Construct a 64-bit float from hex float representation (C-style), guaranteed to
 /// evaluate at compile time.
 #[allow(unused_macros)]
+#[cfg_attr(feature = "unstable-public-internals", macro_export)]
 macro_rules! hf64 {
     ($s:literal) => {{
-        const X: f64 = $crate::math::support::hf64($s);
+        const X: f64 = $crate::support::hf64($s);
         X
     }};
 }
@@ -122,9 +125,10 @@ macro_rules! hf64 {
 /// evaluate at compile time.
 #[cfg(f128_enabled)]
 #[allow(unused_macros)]
+#[cfg_attr(feature = "unstable-public-internals", macro_export)]
 macro_rules! hf128 {
     ($s:literal) => {{
-        const X: f128 = $crate::math::support::hf128($s);
+        const X: f128 = $crate::support::hf128($s);
         X
     }};
 }
diff --git a/src/math/support/mod.rs b/src/math/support/mod.rs
index d471c5b70..68f1e49e5 100644
--- a/src/math/support/mod.rs
+++ b/src/math/support/mod.rs
@@ -9,8 +9,10 @@ mod int_traits;
 pub use float_traits::{Float, IntTy};
 pub(crate) use float_traits::{f32_from_bits, f64_from_bits};
 #[cfg(f16_enabled)]
+#[allow(unused_imports)]
 pub use hex_float::hf16;
 #[cfg(f128_enabled)]
+#[allow(unused_imports)]
 pub use hex_float::hf128;
 #[allow(unused_imports)]
 pub use hex_float::{Hexf, hf32, hf64};

From c967c7e2b2076b9a1e3b503acea3a871615c7899 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Wed, 5 Feb 2025 23:45:14 +0000
Subject: [PATCH 215/279] Add `fmaf128`

Resolve all remaining `f64`-specific items in the generic version of
`fma`, then expose `fmaf128`.
---
 crates/libm-macros/src/shared.rs             |   7 +
 crates/libm-test/benches/icount.rs           |   1 +
 crates/libm-test/benches/random.rs           |   1 +
 crates/libm-test/src/gen/case_list.rs        |  23 +-
 crates/libm-test/src/mpfloat.rs              |   2 +-
 crates/libm-test/src/precision.rs            |   2 +
 crates/libm-test/tests/compare_built_musl.rs |   1 +
 crates/util/src/main.rs                      |   1 +
 etc/function-definitions.json                |   7 +
 etc/function-list.txt                        |   1 +
 src/libm_helper.rs                           |   1 +
 src/math/fmaf128.rs                          |   7 +
 src/math/generic/fma.rs                      | 248 ++++++++++++++-----
 src/math/mod.rs                              |   2 +
 14 files changed, 237 insertions(+), 67 deletions(-)
 create mode 100644 src/math/fmaf128.rs

diff --git a/crates/libm-macros/src/shared.rs b/crates/libm-macros/src/shared.rs
index da16cd8e2..48d19c50d 100644
--- a/crates/libm-macros/src/shared.rs
+++ b/crates/libm-macros/src/shared.rs
@@ -106,6 +106,13 @@ const ALL_OPERATIONS_NESTED: &[(FloatTy, Signature, Option<Signature>, &[&str])]
         None,
         &["fma"],
     ),
+    (
+        // `(f128, f128, f128) -> f128`
+        FloatTy::F128,
+        Signature { args: &[Ty::F128, Ty::F128, Ty::F128], returns: &[Ty::F128] },
+        None,
+        &["fmaf128"],
+    ),
     (
         // `(f32) -> i32`
         FloatTy::F32,
diff --git a/crates/libm-test/benches/icount.rs b/crates/libm-test/benches/icount.rs
index 53ecb5a37..c41cef24e 100644
--- a/crates/libm-test/benches/icount.rs
+++ b/crates/libm-test/benches/icount.rs
@@ -108,6 +108,7 @@ main!(
     icount_bench_floorf16_group,
     icount_bench_floorf_group,
     icount_bench_fma_group,
+    icount_bench_fmaf128_group,
     icount_bench_fmaf_group,
     icount_bench_fmax_group,
     icount_bench_fmaxf128_group,
diff --git a/crates/libm-test/benches/random.rs b/crates/libm-test/benches/random.rs
index 66486a56a..6e8a33479 100644
--- a/crates/libm-test/benches/random.rs
+++ b/crates/libm-test/benches/random.rs
@@ -127,6 +127,7 @@ libm_macros::for_each_function! {
         | fdimf16
         | floorf128
         | floorf16
+        | fmaf128
         | fmaxf128
         | fmaxf16
         | fminf128
diff --git a/crates/libm-test/src/gen/case_list.rs b/crates/libm-test/src/gen/case_list.rs
index 9720f68e9..302d5c391 100644
--- a/crates/libm-test/src/gen/case_list.rs
+++ b/crates/libm-test/src/gen/case_list.rs
@@ -6,6 +6,9 @@
 //!
 //! This is useful for adding regression tests or expected failures.
 
+#[cfg(f128_enabled)]
+use libm::hf128;
+
 use crate::{CheckBasis, CheckCtx, GeneratorKind, MathOp, op};
 
 pub struct TestCase<Op: MathOp> {
@@ -250,7 +253,7 @@ fn fma_cases() -> Vec<TestCase<op::fma::Routine>> {
     TestCase::append_pairs(
         &mut v,
         &[
-            // Previously failure with incorrect sign
+            // Previous failure with incorrect sign
             ((5e-324, -5e-324, 0.0), Some(-0.0)),
         ],
     );
@@ -261,6 +264,24 @@ fn fmaf_cases() -> Vec<TestCase<op::fmaf::Routine>> {
     vec![]
 }
 
+#[cfg(f128_enabled)]
+fn fmaf128_cases() -> Vec<TestCase<op::fmaf128::Routine>> {
+    let mut v = vec![];
+    TestCase::append_pairs(
+        &mut v,
+        &[(
+            // Tricky rounding case that previously failed in extensive tests
+            (
+                hf128!("-0x1.1966cc01966cc01966cc01966f06p-25"),
+                hf128!("-0x1.669933fe69933fe69933fe6997c9p-16358"),
+                hf128!("-0x0.000000000000000000000000048ap-16382"),
+            ),
+            Some(hf128!("0x0.c5171470a3ff5e0f68d751491b18p-16382")),
+        )],
+    );
+    v
+}
+
 fn fmax_cases() -> Vec<TestCase<op::fmax::Routine>> {
     vec![]
 }
diff --git a/crates/libm-test/src/mpfloat.rs b/crates/libm-test/src/mpfloat.rs
index ab77d541c..f4a9ff7ff 100644
--- a/crates/libm-test/src/mpfloat.rs
+++ b/crates/libm-test/src/mpfloat.rs
@@ -196,7 +196,7 @@ libm_macros::for_each_function! {
         expm1 | expm1f => exp_m1,
         fabs | fabsf => abs,
         fdim | fdimf | fdimf16 | fdimf128  => positive_diff,
-        fma | fmaf => mul_add,
+        fma | fmaf | fmaf128 => mul_add,
         fmax | fmaxf | fmaxf16 | fmaxf128 => max,
         fmin | fminf | fminf16 | fminf128 => min,
         lgamma | lgammaf => ln_gamma,
diff --git a/crates/libm-test/src/precision.rs b/crates/libm-test/src/precision.rs
index 596f91fe1..20aa96b6a 100644
--- a/crates/libm-test/src/precision.rs
+++ b/crates/libm-test/src/precision.rs
@@ -560,3 +560,5 @@ impl MaybeOverride<(f128, i32)> for SpecialCase {}
 
 impl MaybeOverride<(f32, f32, f32)> for SpecialCase {}
 impl MaybeOverride<(f64, f64, f64)> for SpecialCase {}
+#[cfg(f128_enabled)]
+impl MaybeOverride<(f128, f128, f128)> for SpecialCase {}
diff --git a/crates/libm-test/tests/compare_built_musl.rs b/crates/libm-test/tests/compare_built_musl.rs
index 927cb25af..7fa77e832 100644
--- a/crates/libm-test/tests/compare_built_musl.rs
+++ b/crates/libm-test/tests/compare_built_musl.rs
@@ -99,6 +99,7 @@ libm_macros::for_each_function! {
         fdimf16,
         floorf128,
         floorf16,
+        fmaf128,
         fmaxf128,
         fmaxf16,
         fminf128,
diff --git a/crates/util/src/main.rs b/crates/util/src/main.rs
index e5d6f374a..0f845a1c4 100644
--- a/crates/util/src/main.rs
+++ b/crates/util/src/main.rs
@@ -96,6 +96,7 @@ fn do_eval(basis: &str, op: &str, inputs: &[&str]) {
             | fdimf16
             | floorf128
             | floorf16
+            | fmaf128
             | fmaxf128
             | fmaxf16
             | fminf128
diff --git a/etc/function-definitions.json b/etc/function-definitions.json
index 243862075..5742ed585 100644
--- a/etc/function-definitions.json
+++ b/etc/function-definitions.json
@@ -356,6 +356,13 @@
         ],
         "type": "f32"
     },
+    "fmaf128": {
+        "sources": [
+            "src/math/fmaf128.rs",
+            "src/math/generic/fma.rs"
+        ],
+        "type": "f128"
+    },
     "fmax": {
         "sources": [
             "src/math/fmax.rs",
diff --git a/etc/function-list.txt b/etc/function-list.txt
index c92eaf9e2..1c9c5e3bc 100644
--- a/etc/function-list.txt
+++ b/etc/function-list.txt
@@ -53,6 +53,7 @@ floorf128
 floorf16
 fma
 fmaf
+fmaf128
 fmax
 fmaxf
 fmaxf128
diff --git a/src/libm_helper.rs b/src/libm_helper.rs
index 0768839c7..68f1fb362 100644
--- a/src/libm_helper.rs
+++ b/src/libm_helper.rs
@@ -208,6 +208,7 @@ libm_helper! {
         (fn fabs(x: f128) -> (f128);                => fabsf128);
         (fn fdim(x: f128, y: f128) -> (f128);       => fdimf128);
         (fn floor(x: f128) -> (f128);               => floorf128);
+        (fn fmaf128(x: f128, y: f128, z: f128) -> (f128); => fmaf128);
         (fn fmax(x: f128, y: f128) -> (f128);       => fmaxf128);
         (fn fmin(x: f128, y: f128) -> (f128);       => fminf128);
         (fn fmod(x: f128, y: f128) -> (f128);       => fmodf128);
diff --git a/src/math/fmaf128.rs b/src/math/fmaf128.rs
new file mode 100644
index 000000000..50f7360de
--- /dev/null
+++ b/src/math/fmaf128.rs
@@ -0,0 +1,7 @@
+/// Fused multiply add (f128)
+///
+/// Computes `(x*y)+z`, rounded as one ternary operation (i.e. calculated with infinite precision).
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn fmaf128(x: f128, y: f128, z: f128) -> f128 {
+    return super::generic::fma(x, y, z);
+}
diff --git a/src/math/generic/fma.rs b/src/math/generic/fma.rs
index b0e2117ea..ac53acadf 100644
--- a/src/math/generic/fma.rs
+++ b/src/math/generic/fma.rs
@@ -1,10 +1,11 @@
+/* SPDX-License-Identifier: MIT */
+/* origin: musl src/math/fma.c. Ported to generic Rust algorithm in 2025, TG. */
+
 use core::{f32, f64};
 
 use super::super::support::{DInt, HInt, IntTy};
 use super::super::{CastFrom, CastInto, Float, Int, MinInt};
 
-const ZEROINFNAN: i32 = 0x7ff - 0x3ff - 52 - 1;
-
 /// Fused multiply-add that works when there is not a larger float size available. Currently this
 /// is still specialized only for `f64`. Computes `(x * y) + z`.
 #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
@@ -18,79 +19,99 @@ where
 {
     let one = IntTy::<F>::ONE;
     let zero = IntTy::<F>::ZERO;
-    let magic = F::from_parts(false, F::BITS - 1 + F::EXP_BIAS, zero);
 
-    /* normalize so top 10bits and last bit are 0 */
+    // Normalize such that the top of the mantissa is zero and we have a guard bit.
     let nx = Norm::from_float(x);
     let ny = Norm::from_float(y);
     let nz = Norm::from_float(z);
 
-    if nx.e >= ZEROINFNAN || ny.e >= ZEROINFNAN {
+    if nx.is_zero_nan_inf() || ny.is_zero_nan_inf() {
+        // Value will overflow, defer to non-fused operations.
         return x * y + z;
     }
-    if nz.e >= ZEROINFNAN {
-        if nz.e > ZEROINFNAN {
-            /* z==0 */
+
+    if nz.is_zero_nan_inf() {
+        if nz.is_zero() {
+            // Empty add component means we only need to multiply.
             return x * y;
         }
+        // `z` is NaN or infinity, which sets the result.
         return z;
     }
 
-    /* mul: r = x*y */
+    // multiply: r = x * y
     let zhi: F::Int;
     let zlo: F::Int;
     let (mut rlo, mut rhi) = nx.m.widen_mul(ny.m).lo_hi();
 
-    /* either top 20 or 21 bits of rhi and last 2 bits of rlo are 0 */
-
-    /* align exponents */
+    // Exponent result of multiplication
     let mut e: i32 = nx.e + ny.e;
+    // Needed shift to align `z` to the multiplication result
     let mut d: i32 = nz.e - e;
     let sbits = F::BITS as i32;
 
-    /* shift bits z<<=kz, r>>=kr, so kz+kr == d, set e = e+kr (== ez-kz) */
+    // Scale `z`. Shift `z <<= kz`, `r >>= kr`, so `kz+kr == d`, set `e = e+kr` (== ez-kz)
     if d > 0 {
+        // The magnitude of `z` is larger than `x * y`
         if d < sbits {
+            // Maximum shift of one `F::BITS` means shifted `z` will fit into `2 * F::BITS`. Shift
+            // it into `(zhi, zlo)`. No exponent adjustment necessary.
             zlo = nz.m << d;
             zhi = nz.m >> (sbits - d);
         } else {
+            // Shift larger than `sbits`, `z` only needs the top half `zhi`. Place it there (acts
+            // as a shift by `sbits`).
             zlo = zero;
             zhi = nz.m;
-            e = nz.e - sbits;
             d -= sbits;
+
+            // `z`'s exponent is large enough that it now needs to be taken into account.
+            e = nz.e - sbits;
+
             if d == 0 {
+                // Exactly `sbits`, nothing to do
             } else if d < sbits {
-                rlo = (rhi << (sbits - d))
-                    | (rlo >> d)
-                    | IntTy::<F>::from((rlo << (sbits - d)) != zero);
+                // Remaining shift fits within `sbits`. Leave `z` in place, shift `x * y`
+                rlo = (rhi << (sbits - d)) | (rlo >> d);
+                // Set the sticky bit
+                rlo |= IntTy::<F>::from((rlo << (sbits - d)) != zero);
                 rhi = rhi >> d;
             } else {
+                // `z`'s magnitude is enough that `x * y` is irrelevant. It was nonzero, so set
+                // the sticky bit.
                 rlo = one;
                 rhi = zero;
             }
         }
     } else {
+        // `z`'s magnitude once shifted fits entirely within `zlo`
         zhi = zero;
         d = -d;
         if d == 0 {
+            // No shift needed
             zlo = nz.m;
         } else if d < sbits {
-            zlo = (nz.m >> d) | IntTy::<F>::from((nz.m << (sbits - d)) != zero);
+            // Shift s.t. `nz.m` fits into `zlo`
+            let sticky = IntTy::<F>::from((nz.m << (sbits - d)) != zero);
+            zlo = (nz.m >> d) | sticky;
         } else {
+            // Would be entirely shifted out, only set the sticky bit
             zlo = one;
         }
     }
 
-    /* add */
+    /* addition */
+
     let mut neg = nx.neg ^ ny.neg;
     let samesign: bool = !neg ^ nz.neg;
-    let mut nonzero: i32 = 1;
+    let mut rhi_nonzero = true;
+
     if samesign {
-        /* r += z */
+        // r += z
         rlo = rlo.wrapping_add(zlo);
         rhi += zhi + IntTy::<F>::from(rlo < zlo);
     } else {
-        /* r -= z */
+        // r -= z
         let (res, borrow) = rlo.overflowing_sub(zlo);
         rlo = res;
         rhi = rhi.wrapping_sub(zhi.wrapping_add(IntTy::<F>::from(borrow)));
@@ -99,129 +120,226 @@ where
             rhi = rhi.signed().wrapping_neg().unsigned() - IntTy::<F>::from(rlo != zero);
             neg = !neg;
         }
-        nonzero = (rhi != zero) as i32;
+        rhi_nonzero = rhi != zero;
     }
 
-    /* set rhi to top 63bit of the result (last bit is sticky) */
-    if nonzero != 0 {
+    /* Construct result */
+
+    // Shift result into `rhi`, left-aligned. Last bit is sticky
+    if rhi_nonzero {
+        // `d` > 0, need to shift both `rhi` and `rlo` into result
         e += sbits;
         d = rhi.leading_zeros() as i32 - 1;
-        /* note: d > 0 */
-        rhi = (rhi << d) | (rlo >> (sbits - d)) | IntTy::<F>::from((rlo << d) != zero);
+        rhi = (rhi << d) | (rlo >> (sbits - d));
+        // Update sticky
+        rhi |= IntTy::<F>::from((rlo << d) != zero);
     } else if rlo != zero {
+        // `rhi` is zero, `rlo` is the entire result and needs to be shifted
         d = rlo.leading_zeros() as i32 - 1;
         if d < 0 {
+            // Shift and set sticky
             rhi = (rlo >> 1) | (rlo & one);
         } else {
             rhi = rlo << d;
         }
     } else {
-        /* exact +-0 */
+        // exact +/- 0.0
         return x * y + z;
     }
     e -= d;
 
-    /* convert to double */
-    let mut i: F::SignedInt = rhi.signed(); /* i is in [1<<62,(1<<63)-1] */
+    // Use int->float conversion to populate the significand.
+    // i is in [1 << (BITS - 2), (1 << (BITS - 1)) - 1]
+    let mut i: F::SignedInt = rhi.signed();
+
     if neg {
         i = -i;
     }
 
-    let mut r: F = F::cast_from_lossy(i); /* |r| is in [0x1p62,0x1p63] */
+    // `|r|` is in `[0x1p62,0x1p63]` for `f64`
+    let mut r: F = F::cast_from_lossy(i);
+
+    /* Account for subnormal and rounding */
+
+    // Unbiased exponent for the maximum value of `r`
+    let max_pow = F::BITS - 1 + F::EXP_BIAS;
 
-    if e < -(F::EXP_BIAS as i32 - 1) - (sbits - 2) {
-        /* result is subnormal before rounding */
-        if e == -(F::EXP_BIAS as i32 - 1) - (sbits - 1) {
-            let mut c: F = magic;
+    if e < -(max_pow as i32 - 2) {
+        // Result is subnormal before rounding
+        if e == -(max_pow as i32 - 1) {
+            let mut c = F::from_parts(false, max_pow, zero);
             if neg {
                 c = -c;
             }
+
             if r == c {
-                /* min normal after rounding, underflow depends
-                 * on arch behaviour which can be imitated by
-                 * a double to float conversion */
-                return r.raise_underflow();
+                // Min normal after rounding,
+                return r.raise_underflow_ret_self();
             }
-            /* one bit is lost when scaled, add another top bit to
-             * only round once at conversion if it is inexact */
-            if (rhi << F::SIG_BITS) != zero {
-                let iu: F::Int = (rhi >> 1) | (rhi & one) | (one << 62);
+
+            if (rhi << (F::SIG_BITS + 1)) != zero {
+                // Account for truncated bits. One bit will be lost in the `scalbn` call, add
+                // another top bit to avoid double rounding if inexact.
+                let iu: F::Int = (rhi >> 1) | (rhi & one) | (one << (F::BITS - 2));
                 i = iu.signed();
+
                 if neg {
                     i = -i;
                 }
+
                 r = F::cast_from_lossy(i);
-                r = F::cast_from(2i8) * r - c; /* remove top bit */
 
-                /* raise underflow portably, such that it
-                 * cannot be optimized away */
-                r += r.raise_underflow2();
+                // Remove the top bit
+                r = F::cast_from(2i8) * r - c;
+                r += r.raise_underflow_ret_zero();
             }
         } else {
-            /* only round once when scaled */
-            d = 10;
-            i = (((rhi >> d) | IntTy::<F>::from(rhi << (F::BITS as i32 - d) != zero)) << d)
-                .signed();
+            // Only round once when scaled
+            d = F::EXP_BITS as i32 - 1;
+            let sticky = IntTy::<F>::from(rhi << (F::BITS as i32 - d) != zero);
+            i = (((rhi >> d) | sticky) << d).signed();
+
             if neg {
                 i = -i;
             }
-            r = F::cast_from(i);
+
+            r = F::cast_from_lossy(i);
         }
     }
 
+    // Use our exponent to scale the final value.
     super::scalbn(r, e)
 }
 
 /// Representation of `F` that has handled subnormals.
+#[derive(Clone, Copy, Debug)]
 struct Norm<F: Float> {
-    /// Normalized significand with one guard bit.
+    /// Normalized significand with one guard bit, unsigned.
     m: F::Int,
-    /// Unbiased exponent, normalized.
+    /// Exponent of the mantissa such that `m * 2^e = x`. Accounts for the shift in the mantissa
+    /// and the guard bit; that is, 1.0 will normalize as `m = 1 << 53` and `e = -53`.
     e: i32,
     neg: bool,
 }
 
 impl<F: Float> Norm<F> {
+    /// Unbias the exponent and account for the mantissa's precision, including the guard bit.
+    const EXP_UNBIAS: u32 = F::EXP_BIAS + F::SIG_BITS + 1;
+
+    /// Values greater than this had a saturated exponent (infinity or NaN), OR were zero and we
+    /// adjusted the exponent such that it exceeds this threashold.
+    const ZERO_INF_NAN: u32 = F::EXP_SAT - Self::EXP_UNBIAS;
+
     fn from_float(x: F) -> Self {
         let mut ix = x.to_bits();
         let mut e = x.exp() as i32;
         let neg = x.is_sign_negative();
         if e == 0 {
             // Normalize subnormals by multiplication
-            let magic = F::from_parts(false, F::BITS - 1 + F::EXP_BIAS, F::Int::ZERO);
-            let scaled = x * magic;
+            let scale_i = F::BITS - 1;
+            let scale_f = F::from_parts(false, scale_i + F::EXP_BIAS, F::Int::ZERO);
+            let scaled = x * scale_f;
             ix = scaled.to_bits();
             e = scaled.exp() as i32;
-            e = if e != 0 { e - (F::BITS as i32 - 1) } else { 0x800 };
+            e = if e == 0 {
+                // If the exponent is still zero, the input was zero. Artifically set this value
+                // such that the final `e` will exceed `ZERO_INF_NAN`.
+                1 << F::EXP_BITS
+            } else {
+                // Otherwise, account for the scaling we just did.
+                e - scale_i as i32
+            };
         }
 
-        e -= F::EXP_BIAS as i32 + 52 + 1;
+        e -= Self::EXP_UNBIAS as i32;
 
+        // Absolute  value, set the implicit bit, and shift to create a guard bit
         ix &= F::SIG_MASK;
         ix |= F::IMPLICIT_BIT;
-        ix <<= 1; // add a guard bit
+        ix <<= 1;
 
         Self { m: ix, e, neg }
     }
+
+    /// True if the value was zero, infinity, or NaN.
+    fn is_zero_nan_inf(self) -> bool {
+        self.e >= Self::ZERO_INF_NAN as i32
+    }
+
+    /// The only value we have
+    fn is_zero(self) -> bool {
+        // The only exponent that strictly exceeds this value is our sentinel value for zero.
+        self.e > Self::ZERO_INF_NAN as i32
+    }
 }
 
 /// Type-specific helpers that are not needed outside of fma.
 pub trait FmaHelper {
-    fn raise_underflow(self) -> Self;
-    fn raise_underflow2(self) -> Self;
+    fn raise_underflow_ret_self(self) -> Self;
+    fn raise_underflow_ret_zero(self) -> Self;
 }
 
 impl FmaHelper for f64 {
-    fn raise_underflow(self) -> Self {
-        let x0_ffffff8p_63 = f64::from_bits(0x3bfffffff0000000); // 0x0.ffffff8p-63
-        let fltmin: f32 = (x0_ffffff8p_63 * f32::MIN_POSITIVE as f64 * self) as f32;
+    fn raise_underflow_ret_self(self) -> Self {
+        /* min normal after rounding, underflow depends
+         * on arch behaviour which can be imitated by
+         * a double to float conversion */
+        let fltmin: f32 = (hf64!("0x0.ffffff8p-63") * f32::MIN_POSITIVE as f64 * self) as f32;
         f64::MIN_POSITIVE / f32::MIN_POSITIVE as f64 * fltmin as f64
     }
 
-    fn raise_underflow2(self) -> Self {
+    fn raise_underflow_ret_zero(self) -> Self {
         /* raise underflow portably, such that it
          * cannot be optimized away */
         let tiny: f64 = f64::MIN_POSITIVE / f32::MIN_POSITIVE as f64 * self;
         (tiny * tiny) * (self - self)
     }
 }
+
+#[cfg(f128_enabled)]
+impl FmaHelper for f128 {
+    fn raise_underflow_ret_self(self) -> Self {
+        self
+    }
+
+    fn raise_underflow_ret_zero(self) -> Self {
+        f128::ZERO
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    fn spec_test<F>()
+    where
+        F: Float + FmaHelper,
+        F: CastFrom<F::SignedInt>,
+        F: CastFrom<i8>,
+        F::Int: HInt,
+        u32: CastInto<F::Int>,
+    {
+        let x = F::from_bits(F::Int::ONE);
+        let y = F::from_bits(F::Int::ONE);
+        let z = F::ZERO;
+
+        // 754-2020 says "When the exact result of (a × b) + c is non-zero yet the result of
+        // fusedMultiplyAdd is zero because of rounding, the zero result takes the sign of the
+        // exact result"
+        assert_biteq!(fma(x, y, z), F::ZERO);
+        assert_biteq!(fma(x, -y, z), F::NEG_ZERO);
+        assert_biteq!(fma(-x, y, z), F::NEG_ZERO);
+        assert_biteq!(fma(-x, -y, z), F::ZERO);
+    }
+
+    #[test]
+    fn spec_test_f64() {
+        spec_test::<f64>();
+    }
+
+    #[test]
+    #[cfg(f128_enabled)]
+    fn spec_test_f128() {
+        spec_test::<f128>();
+    }
+}
diff --git a/src/math/mod.rs b/src/math/mod.rs
index 7ad808cf7..677ed8d6e 100644
--- a/src/math/mod.rs
+++ b/src/math/mod.rs
@@ -385,6 +385,7 @@ cfg_if! {
         mod fabsf128;
         mod fdimf128;
         mod floorf128;
+        mod fmaf128;
         mod fmaxf128;
         mod fminf128;
         mod fmodf128;
@@ -402,6 +403,7 @@ cfg_if! {
         pub use self::fabsf128::fabsf128;
         pub use self::fdimf128::fdimf128;
         pub use self::floorf128::floorf128;
+        pub use self::fmaf128::fmaf128;
         pub use self::fmaxf128::fmaxf128;
         pub use self::fminf128::fminf128;
         pub use self::fmodf128::fmodf128;

From 724e47d30cfb4d630228aee550106e371cdc885a Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Fri, 7 Feb 2025 00:47:00 +0000
Subject: [PATCH 216/279] Add better edge case testing for `scalbn`

Include integer values around the minimum and maximum exponents which
require different behavior in the scale functions.
---
 crates/libm-test/src/gen/edge_cases.rs | 100 +++++++++++++++++++------
 src/math/generic/scalbn.rs             |   4 +-
 src/math/support/float_traits.rs       |  21 ++++++
 3 files changed, 101 insertions(+), 24 deletions(-)

diff --git a/crates/libm-test/src/gen/edge_cases.rs b/crates/libm-test/src/gen/edge_cases.rs
index 8de954ae3..8da635114 100644
--- a/crates/libm-test/src/gen/edge_cases.rs
+++ b/crates/libm-test/src/gen/edge_cases.rs
@@ -1,11 +1,11 @@
 //! A generator that checks a handful of cases near infinities, zeros, asymptotes, and NaNs.
 
-use libm::support::{Float, Int};
+use libm::support::{CastInto, Float, Int};
 
 use crate::domain::get_domain;
 use crate::gen::KnownSize;
 use crate::run_cfg::{check_near_count, check_point_count};
-use crate::{CheckCtx, FloatExt, MathOp, test_log};
+use crate::{BaseName, CheckCtx, FloatExt, FloatTy, MathOp, test_log};
 
 /// Generate a sequence of edge cases, e.g. numbers near zeroes and infiniteis.
 pub trait EdgeCaseInput<Op> {
@@ -78,7 +78,7 @@ where
     (ret.into_iter(), count)
 }
 
-/// Add `AROUND` values starting at and including `x` and counting up. Uses the smallest possible
+/// Add `points` values starting at and including `x` and counting up. Uses the smallest possible
 /// increments (1 ULP).
 fn count_up<F: Float>(mut x: F, points: u64, values: &mut Vec<F>) {
     assert!(!x.is_nan());
@@ -91,7 +91,7 @@ fn count_up<F: Float>(mut x: F, points: u64, values: &mut Vec<F>) {
     }
 }
 
-/// Add `AROUND` values starting at and including `x` and counting down. Uses the smallest possible
+/// Add `points` values starting at and including `x` and counting down. Uses the smallest possible
 /// increments (1 ULP).
 fn count_down<F: Float>(mut x: F, points: u64, values: &mut Vec<F>) {
     assert!(!x.is_nan());
@@ -107,31 +107,87 @@ fn count_down<F: Float>(mut x: F, points: u64, values: &mut Vec<F>) {
 /// Create a list of values around interesting integer points (min, zero, max).
 pub fn int_edge_cases<I: Int>(
     ctx: &CheckCtx,
-    _argnum: usize,
-) -> (impl Iterator<Item = I> + Clone, u64) {
+    argnum: usize,
+) -> (impl Iterator<Item = I> + Clone, u64)
+where
+    i32: CastInto<I>,
+{
     let mut values = Vec::new();
     let near_points = check_near_count(ctx);
 
-    for up_from in [I::MIN, I::ZERO] {
-        let mut x = up_from;
-        for _ in 0..near_points {
-            values.push(x);
-            x += I::ONE;
-        }
-    }
-
-    for down_from in [I::ZERO, I::MAX] {
-        let mut x = down_from;
-        for _ in 0..near_points {
-            values.push(x);
-            x -= I::ONE;
-        }
+    // Check around max/min and zero
+    int_count_around(I::MIN, near_points, &mut values);
+    int_count_around(I::MAX, near_points, &mut values);
+    int_count_around(I::ZERO, near_points, &mut values);
+    int_count_around(I::ZERO, near_points, &mut values);
+
+    if matches!(ctx.base_name, BaseName::Scalbn | BaseName::Ldexp) {
+        assert_eq!(argnum, 1, "scalbn integer argument should be arg1");
+        let (emax, emin, emin_sn) = match ctx.fn_ident.math_op().float_ty {
+            FloatTy::F16 => {
+                #[cfg(not(f16_enabled))]
+                unreachable!();
+                #[cfg(f16_enabled)]
+                (f16::EXP_MAX, f16::EXP_MIN, f16::EXP_MIN_SUBNORM)
+            }
+            FloatTy::F32 => (f32::EXP_MAX, f32::EXP_MIN, f32::EXP_MIN_SUBNORM),
+            FloatTy::F64 => (f64::EXP_MAX, f64::EXP_MIN, f64::EXP_MIN_SUBNORM),
+            FloatTy::F128 => {
+                #[cfg(not(f128_enabled))]
+                unreachable!();
+                #[cfg(f128_enabled)]
+                (f128::EXP_MAX, f128::EXP_MIN, f128::EXP_MIN_SUBNORM)
+            }
+        };
+
+        // `scalbn`/`ldexp` have their trickiest behavior around exponent limits
+        int_count_around(emax.cast(), near_points, &mut values);
+        int_count_around(emin.cast(), near_points, &mut values);
+        int_count_around(emin_sn.cast(), near_points, &mut values);
+        int_count_around((-emin_sn).cast(), near_points, &mut values);
+
+        // Also check values that cause the maximum possible difference in exponents
+        int_count_around((emax - emin).cast(), near_points, &mut values);
+        int_count_around((emin - emax).cast(), near_points, &mut values);
+        int_count_around((emax - emin_sn).cast(), near_points, &mut values);
+        int_count_around((emin_sn - emax).cast(), near_points, &mut values);
     }
 
     values.sort();
     values.dedup();
-    let len = values.len().try_into().unwrap();
-    (values.into_iter(), len)
+    let count = values.len().try_into().unwrap();
+
+    test_log(&format!(
+        "{gen_kind:?} {basis:?} {fn_ident} arg {arg}/{args}: {count} edge cases",
+        gen_kind = ctx.gen_kind,
+        basis = ctx.basis,
+        fn_ident = ctx.fn_ident,
+        arg = argnum + 1,
+        args = ctx.input_count(),
+    ));
+
+    (values.into_iter(), count)
+}
+
+/// Add `points` values both up and down, starting at and including `x`.
+fn int_count_around<I: Int>(x: I, points: u64, values: &mut Vec<I>) {
+    let mut current = x;
+    for _ in 0..points {
+        values.push(current);
+        current = match current.checked_add(I::ONE) {
+            Some(v) => v,
+            None => break,
+        };
+    }
+
+    current = x;
+    for _ in 0..points {
+        values.push(current);
+        current = match current.checked_sub(I::ONE) {
+            Some(v) => v,
+            None => break,
+        };
+    }
 }
 
 macro_rules! impl_edge_case_input {
diff --git a/src/math/generic/scalbn.rs b/src/math/generic/scalbn.rs
index f15cb75d6..5ba7f2ab2 100644
--- a/src/math/generic/scalbn.rs
+++ b/src/math/generic/scalbn.rs
@@ -28,8 +28,8 @@ where
     let sig_total_bits = F::SIG_BITS + 1;
 
     // Maximum and minimum values when biased
-    let exp_max: i32 = F::EXP_BIAS as i32;
-    let exp_min = -(exp_max - 1);
+    let exp_max = F::EXP_MAX;
+    let exp_min = F::EXP_MIN;
 
     // 2 ^ Emax, maximum positive with null significand (0x1p1023 for f64)
     let f_exp_max = F::from_parts(false, F::EXP_BIAS << 1, zero);
diff --git a/src/math/support/float_traits.rs b/src/math/support/float_traits.rs
index 328b70610..d6ce13f69 100644
--- a/src/math/support/float_traits.rs
+++ b/src/math/support/float_traits.rs
@@ -59,6 +59,15 @@ pub trait Float:
     /// The exponent bias value
     const EXP_BIAS: u32 = Self::EXP_SAT >> 1;
 
+    /// Maximum unbiased exponent value.
+    const EXP_MAX: i32 = Self::EXP_BIAS as i32;
+
+    /// Minimum *NORMAL* unbiased exponent value.
+    const EXP_MIN: i32 = -(Self::EXP_MAX - 1);
+
+    /// Minimum subnormal exponent value.
+    const EXP_MIN_SUBNORM: i32 = Self::EXP_MIN - Self::SIG_BITS as i32;
+
     /// A mask for the sign bit
     const SIGN_MASK: Self::Int;
 
@@ -274,6 +283,9 @@ mod tests {
         // Constants
         assert_eq!(f16::EXP_SAT, 0b11111);
         assert_eq!(f16::EXP_BIAS, 15);
+        assert_eq!(f16::EXP_MAX, 15);
+        assert_eq!(f16::EXP_MIN, -14);
+        assert_eq!(f16::EXP_MIN_SUBNORM, -24);
 
         // `exp_unbiased`
         assert_eq!(f16::FRAC_PI_2.exp_unbiased(), 0);
@@ -296,6 +308,9 @@ mod tests {
         // Constants
         assert_eq!(f32::EXP_SAT, 0b11111111);
         assert_eq!(f32::EXP_BIAS, 127);
+        assert_eq!(f32::EXP_MAX, 127);
+        assert_eq!(f32::EXP_MIN, -126);
+        assert_eq!(f32::EXP_MIN_SUBNORM, -149);
 
         // `exp_unbiased`
         assert_eq!(f32::FRAC_PI_2.exp_unbiased(), 0);
@@ -319,6 +334,9 @@ mod tests {
         // Constants
         assert_eq!(f64::EXP_SAT, 0b11111111111);
         assert_eq!(f64::EXP_BIAS, 1023);
+        assert_eq!(f64::EXP_MAX, 1023);
+        assert_eq!(f64::EXP_MIN, -1022);
+        assert_eq!(f64::EXP_MIN_SUBNORM, -1074);
 
         // `exp_unbiased`
         assert_eq!(f64::FRAC_PI_2.exp_unbiased(), 0);
@@ -343,6 +361,9 @@ mod tests {
         // Constants
         assert_eq!(f128::EXP_SAT, 0b111111111111111);
         assert_eq!(f128::EXP_BIAS, 16383);
+        assert_eq!(f128::EXP_MAX, 16383);
+        assert_eq!(f128::EXP_MIN, -16382);
+        assert_eq!(f128::EXP_MIN_SUBNORM, -16494);
 
         // `exp_unbiased`
         assert_eq!(f128::FRAC_PI_2.exp_unbiased(), 0);

From 34cd8f68a3a247ebe3d482199d584a99b781a624 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Fri, 7 Feb 2025 01:23:19 +0000
Subject: [PATCH 217/279] Run standard tests before running integration tests

To ensure we don't waste time running extensive tests when there is an
easily identifiable failure, run the normal test suite for relevant
functions before starting extensive tests.
---
 .github/workflows/main.yaml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml
index e1d263dea..de131639b 100644
--- a/.github/workflows/main.yaml
+++ b/.github/workflows/main.yaml
@@ -273,6 +273,9 @@ jobs:
             exit
           fi
 
+          # Run the non-extensive tests first to catch any easy failures
+          cargo t --profile release-checked -- "$CHANGED"
+
           LIBM_EXTENSIVE_TESTS="$CHANGED" cargo t \
             --features build-mpfr,unstable,force-soft-floats \
             --profile release-checked \

From 0fbdb7b93f2bae24ee625babd7d77b4fc21e8816 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Fri, 7 Feb 2025 01:05:38 +0000
Subject: [PATCH 218/279] Check more subnormal values during edge cases tests

Add checks at the max subnormal value and a couple values scatted
throughout the subnormal range. This helped identifiy a bug in
`fmaf128`.

As part of this, slightly reduce the amount of edge cases checked
without optimizations because the change makes it become noticible.
---
 crates/libm-test/src/gen/edge_cases.rs | 20 +++++++++++++++++++-
 crates/libm-test/src/run_cfg.rs        |  2 +-
 2 files changed, 20 insertions(+), 2 deletions(-)

diff --git a/crates/libm-test/src/gen/edge_cases.rs b/crates/libm-test/src/gen/edge_cases.rs
index 8da635114..69b59a105 100644
--- a/crates/libm-test/src/gen/edge_cases.rs
+++ b/crates/libm-test/src/gen/edge_cases.rs
@@ -1,9 +1,10 @@
 //! A generator that checks a handful of cases near infinities, zeros, asymptotes, and NaNs.
 
-use libm::support::{CastInto, Float, Int};
+use libm::support::{CastInto, Float, Int, MinInt};
 
 use crate::domain::get_domain;
 use crate::gen::KnownSize;
+use crate::op::OpITy;
 use crate::run_cfg::{check_near_count, check_point_count};
 use crate::{BaseName, CheckCtx, FloatExt, FloatTy, MathOp, test_log};
 
@@ -21,6 +22,7 @@ where
     Op: MathOp,
 {
     let mut ret = Vec::new();
+    let one = OpITy::<Op>::ONE;
     let values = &mut ret;
     let domain = get_domain::<_, i8>(ctx.fn_ident, argnum).unwrap_float();
     let domain_start = domain.range_start();
@@ -51,6 +53,22 @@ where
     values.push(Op::FTy::NAN);
     values.extend(Op::FTy::consts().iter());
 
+    // Check around the maximum subnormal value
+    let sub_max = Op::FTy::from_bits(Op::FTy::SIG_MASK);
+    count_up(sub_max, near_points, values);
+    count_down(sub_max, near_points, values);
+    count_up(-sub_max, near_points, values);
+    count_down(-sub_max, near_points, values);
+
+    // Check a few values around the subnormal range
+    for shift in (0..Op::FTy::SIG_BITS).step_by(Op::FTy::SIG_BITS as usize / 5) {
+        let v = Op::FTy::from_bits(one << shift);
+        count_up(v, 2, values);
+        count_down(v, 2, values);
+        count_up(-v, 2, values);
+        count_down(-v, 2, values);
+    }
+
     // Check around asymptotes
     if let Some(f) = domain.check_points {
         let iter = f();
diff --git a/crates/libm-test/src/run_cfg.rs b/crates/libm-test/src/run_cfg.rs
index 5728c3b2e..4dd43bdf3 100644
--- a/crates/libm-test/src/run_cfg.rs
+++ b/crates/libm-test/src/run_cfg.rs
@@ -342,7 +342,7 @@ pub fn check_near_count(ctx: &CheckCtx) -> u64 {
             x => panic!("unexpected argument count {x}"),
         }
     } else {
-        10
+        8
     }
 }
 

From 4e86806098219d0f0726ca934ddc841b101ad0fc Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Fri, 7 Feb 2025 00:52:56 +0000
Subject: [PATCH 219/279] fmaf128: fix exponent calculation for subnormals

When `fmaf128` was introduced in [1], it included a bug where `self`
gets returned rather than the expected minimum positive value. Resolve
this and add a regression test.

[1]: https://github.com/rust-lang/libm/pull/494
---
 crates/libm-test/src/gen/case_list.rs | 25 ++++++++++++++++++-------
 src/math/generic/fma.rs               | 13 ++++++++-----
 2 files changed, 26 insertions(+), 12 deletions(-)

diff --git a/crates/libm-test/src/gen/case_list.rs b/crates/libm-test/src/gen/case_list.rs
index 302d5c391..23226d5c2 100644
--- a/crates/libm-test/src/gen/case_list.rs
+++ b/crates/libm-test/src/gen/case_list.rs
@@ -269,15 +269,26 @@ fn fmaf128_cases() -> Vec<TestCase<op::fmaf128::Routine>> {
     let mut v = vec![];
     TestCase::append_pairs(
         &mut v,
-        &[(
-            // Tricky rounding case that previously failed in extensive tests
+        &[
+            (
+                // Tricky rounding case that previously failed in extensive tests
+                (
+                    hf128!("-0x1.1966cc01966cc01966cc01966f06p-25"),
+                    hf128!("-0x1.669933fe69933fe69933fe6997c9p-16358"),
+                    hf128!("-0x0.000000000000000000000000048ap-16382"),
+                ),
+                Some(hf128!("0x0.c5171470a3ff5e0f68d751491b18p-16382")),
+            ),
             (
-                hf128!("-0x1.1966cc01966cc01966cc01966f06p-25"),
-                hf128!("-0x1.669933fe69933fe69933fe6997c9p-16358"),
-                hf128!("-0x0.000000000000000000000000048ap-16382"),
+                // Subnormal edge case that caused a failure
+                (
+                    hf128!("0x0.7ffffffffffffffffffffffffff7p-16382"),
+                    hf128!("0x1.ffffffffffffffffffffffffffffp-1"),
+                    hf128!("0x0.8000000000000000000000000009p-16382"),
+                ),
+                Some(hf128!("0x1.0000000000000000000000000000p-16382")),
             ),
-            Some(hf128!("0x0.c5171470a3ff5e0f68d751491b18p-16382")),
-        )],
+        ],
     );
     v
 }
diff --git a/src/math/generic/fma.rs b/src/math/generic/fma.rs
index ac53acadf..4c6f1fad6 100644
--- a/src/math/generic/fma.rs
+++ b/src/math/generic/fma.rs
@@ -146,6 +146,7 @@ where
         // exact +/- 0.0
         return x * y + z;
     }
+
     e -= d;
 
     // Use int->float conversion to populate the significand.
@@ -174,7 +175,7 @@ where
 
             if r == c {
                 // Min normal after rounding,
-                return r.raise_underflow_ret_self();
+                return r.raise_underflow_as_min_positive();
             }
 
             if (rhi << (F::SIG_BITS + 1)) != zero {
@@ -275,12 +276,14 @@ impl<F: Float> Norm<F> {
 
 /// Type-specific helpers that are not needed outside of fma.
 pub trait FmaHelper {
-    fn raise_underflow_ret_self(self) -> Self;
+    /// Raise underflow and return the minimum positive normal value with the sign of `self`.
+    fn raise_underflow_as_min_positive(self) -> Self;
+    /// Raise underflow and return zero.
     fn raise_underflow_ret_zero(self) -> Self;
 }
 
 impl FmaHelper for f64 {
-    fn raise_underflow_ret_self(self) -> Self {
+    fn raise_underflow_as_min_positive(self) -> Self {
         /* min normal after rounding, underflow depends
          * on arch behaviour which can be imitated by
          * a double to float conversion */
@@ -298,8 +301,8 @@ impl FmaHelper for f64 {
 
 #[cfg(f128_enabled)]
 impl FmaHelper for f128 {
-    fn raise_underflow_ret_self(self) -> Self {
-        self
+    fn raise_underflow_as_min_positive(self) -> Self {
+        f128::MIN_POSITIVE.copysign(self)
     }
 
     fn raise_underflow_ret_zero(self) -> Self {

From 57a21a1204b9a57d6ae66a798c7e32668e1b49da Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Fri, 7 Feb 2025 02:36:58 +0000
Subject: [PATCH 220/279] Remove or reduce the scope of `allow(unused)` where
 possible

Now that we have more in this crate making use of traits, try to be more
specific about what is actually unused.
---
 crates/libm-macros/tests/basic.rs | 13 +++----------
 src/math/support/big.rs           | 11 +++++------
 src/math/support/float_traits.rs  | 17 ++++++++++-------
 src/math/support/int_traits.rs    |  7 ++-----
 src/math/support/mod.rs           |  1 -
 5 files changed, 20 insertions(+), 29 deletions(-)

diff --git a/crates/libm-macros/tests/basic.rs b/crates/libm-macros/tests/basic.rs
index 0aa417f13..5314e84bb 100644
--- a/crates/libm-macros/tests/basic.rs
+++ b/crates/libm-macros/tests/basic.rs
@@ -18,20 +18,14 @@ macro_rules! basic {
         fn_extra: $fn_extra:expr,
     ) => {
         $(#[$attr])*
-        mod $fn_name {
-            #[allow(unused)]
+        #[allow(dead_code)]
+        pub mod $fn_name {
             type FTy= $FTy;
-            #[allow(unused)]
             type CFnTy<'a> = $CFn;
-            #[allow(unused)]
             type RustFnTy = $RustFn;
-            #[allow(unused)]
             type RustArgsTy = $RustArgs;
-            #[allow(unused)]
             type RustRetTy = $RustRet;
-            #[allow(unused)]
             const A: &[&str] = &[$($extra_tt)*];
-            #[allow(unused)]
             fn foo(a: f32) -> f32 {
                 $fn_extra(a)
             }
@@ -92,10 +86,9 @@ macro_rules! specified_types {
         attrs: [$($attr:meta),*],
     ) => {
         $(#[$attr])*
+        #[allow(dead_code)]
         mod $fn_name {
-            #[allow(unused)]
             type RustFnTy = $RustFn;
-            #[allow(unused)]
             type RustArgsTy = $RustArgs;
         }
     };
diff --git a/src/math/support/big.rs b/src/math/support/big.rs
index e0f5e5263..bf47d2001 100644
--- a/src/math/support/big.rs
+++ b/src/math/support/big.rs
@@ -1,11 +1,9 @@
 //! Integers used for wide operations, larger than `u128`.
 
-#![allow(unused)]
-
 #[cfg(test)]
 mod tests;
 
-use core::{fmt, ops};
+use core::ops;
 
 use super::{DInt, HInt, Int, MinInt};
 
@@ -13,7 +11,6 @@ const WORD_LO_MASK: u64 = 0x00000000ffffffff;
 const WORD_HI_MASK: u64 = 0xffffffff00000000;
 const WORD_FULL_MASK: u64 = 0xffffffffffffffff;
 const U128_LO_MASK: u128 = u64::MAX as u128;
-const U128_HI_MASK: u128 = (u64::MAX as u128) << 64;
 
 /// A 256-bit unsigned integer represented as 4 64-bit limbs.
 ///
@@ -23,6 +20,7 @@ const U128_HI_MASK: u128 = (u64::MAX as u128) << 64;
 pub struct u256(pub [u64; 4]);
 
 impl u256 {
+    #[cfg(test)]
     pub const MAX: Self = Self([u64::MAX, u64::MAX, u64::MAX, u64::MAX]);
 
     /// Reinterpret as a signed integer
@@ -40,6 +38,7 @@ pub struct i256(pub [u64; 4]);
 
 impl i256 {
     /// Reinterpret as an unsigned integer
+    #[cfg(test)]
     pub fn unsigned(self) -> u256 {
         u256(self.0)
     }
@@ -96,7 +95,7 @@ macro_rules! impl_common {
         impl ops::Shl<u32> for $ty {
             type Output = Self;
 
-            fn shl(self, rhs: u32) -> Self::Output {
+            fn shl(self, _rhs: u32) -> Self::Output {
                 unimplemented!("only used to meet trait bounds")
             }
         }
@@ -256,7 +255,7 @@ impl HInt for i128 {
         self.unsigned().zero_widen_mul(rhs.unsigned()).signed()
     }
 
-    fn widen_mul(self, rhs: Self) -> Self::D {
+    fn widen_mul(self, _rhs: Self) -> Self::D {
         unimplemented!("signed i128 widening multiply is not used")
     }
 
diff --git a/src/math/support/float_traits.rs b/src/math/support/float_traits.rs
index d6ce13f69..3b27f8de5 100644
--- a/src/math/support/float_traits.rs
+++ b/src/math/support/float_traits.rs
@@ -3,7 +3,7 @@ use core::{fmt, mem, ops};
 use super::int_traits::{CastFrom, Int, MinInt};
 
 /// Trait for some basic operations on floats
-#[allow(dead_code)]
+// #[allow(dead_code)]
 pub trait Float:
     Copy
     + fmt::Debug
@@ -84,11 +84,13 @@ pub trait Float:
     fn to_bits(self) -> Self::Int;
 
     /// Returns `self` transmuted to `Self::SignedInt`
+    #[allow(dead_code)]
     fn to_bits_signed(self) -> Self::SignedInt {
         self.to_bits().signed()
     }
 
     /// Check bitwise equality.
+    #[allow(dead_code)]
     fn biteq(self, rhs: Self) -> bool {
         self.to_bits() == rhs.to_bits()
     }
@@ -98,6 +100,7 @@ pub trait Float:
     ///
     /// This method returns `true` if two NaNs are compared. Use [`biteq`](Self::biteq) instead
     /// if `NaN` should not be treated separately.
+    #[allow(dead_code)]
     fn eq_repr(self, rhs: Self) -> bool {
         if self.is_nan() && rhs.is_nan() { true } else { self.biteq(rhs) }
     }
@@ -117,6 +120,7 @@ pub trait Float:
     }
 
     /// Returns if `self` is subnormal.
+    #[allow(dead_code)]
     fn is_subnormal(self) -> bool {
         (self.to_bits() & Self::EXP_MASK) == Self::Int::ZERO
     }
@@ -132,15 +136,11 @@ pub trait Float:
     }
 
     /// Returns the significand with no implicit bit (or the "fractional" part)
+    #[allow(dead_code)]
     fn frac(self) -> Self::Int {
         self.to_bits() & Self::SIG_MASK
     }
 
-    /// Returns the significand with implicit bit.
-    fn imp_frac(self) -> Self::Int {
-        self.frac() | Self::IMPLICIT_BIT
-    }
-
     /// Returns a `Self::Int` transmuted back to `Self`
     fn from_bits(a: Self::Int) -> Self;
 
@@ -154,22 +154,25 @@ pub trait Float:
         )
     }
 
+    #[allow(dead_code)]
     fn abs(self) -> Self;
 
     /// Returns a number composed of the magnitude of self and the sign of sign.
+    #[allow(dead_code)]
     fn copysign(self, other: Self) -> Self;
 
     /// Returns (normalized exponent, normalized significand)
+    #[allow(dead_code)]
     fn normalize(significand: Self::Int) -> (i32, Self::Int);
 
     /// Returns a number that represents the sign of self.
+    #[allow(dead_code)]
     fn signum(self) -> Self {
         if self.is_nan() { self } else { Self::ONE.copysign(self) }
     }
 }
 
 /// Access the associated `Int` type from a float (helper to avoid ambiguous associated types).
-#[allow(dead_code)]
 pub type IntTy<F> = <F as Float>::Int;
 
 macro_rules! float_impl {
diff --git a/src/math/support/int_traits.rs b/src/math/support/int_traits.rs
index 793a0f306..d34797764 100644
--- a/src/math/support/int_traits.rs
+++ b/src/math/support/int_traits.rs
@@ -1,7 +1,6 @@
 use core::{cmp, fmt, ops};
 
 /// Minimal integer implementations needed on all integer types, including wide integers.
-#[allow(dead_code)]
 pub trait MinInt:
     Copy
     + fmt::Debug
@@ -261,7 +260,6 @@ int_impl!(i128, u128);
 
 /// Trait for integers twice the bit width of another integer. This is implemented for all
 /// primitives except for `u8`, because there is not a smaller primitive.
-#[allow(unused)]
 pub trait DInt: MinInt {
     /// Integer that is half the bit width of the integer this trait is implemented for
     type H: HInt<D = Self>;
@@ -275,6 +273,7 @@ pub trait DInt: MinInt {
         (self.lo(), self.hi())
     }
     /// Constructs an integer using lower and higher half parts
+    #[allow(unused)]
     fn from_lo_hi(lo: Self::H, hi: Self::H) -> Self {
         lo.zero_widen() | hi.widen_hi()
     }
@@ -282,7 +281,6 @@ pub trait DInt: MinInt {
 
 /// Trait for integers half the bit width of another integer. This is implemented for all
 /// primitives except for `u128`, because it there is not a larger primitive.
-#[allow(unused)]
 pub trait HInt: Int {
     /// Integer that is double the bit width of the integer this trait is implemented for
     type D: DInt<H = Self> + MinInt;
@@ -297,6 +295,7 @@ pub trait HInt: Int {
     /// around problems with associated type bounds (such as `Int<Othersign: DInt>`) being unstable
     fn zero_widen(self) -> Self::D;
     /// Widens the integer to have double bit width and shifts the integer into the higher bits
+    #[allow(unused)]
     fn widen_hi(self) -> Self::D;
     /// Widening multiplication with zero widening. This cannot overflow.
     fn zero_widen_mul(self, rhs: Self) -> Self::D;
@@ -360,7 +359,6 @@ impl_h_int!(
 );
 
 /// Trait to express (possibly lossy) casting of integers
-#[allow(unused)]
 pub trait CastInto<T: Copy>: Copy {
     /// By default, casts should be exact.
     fn cast(self) -> T;
@@ -369,7 +367,6 @@ pub trait CastInto<T: Copy>: Copy {
     fn cast_lossy(self) -> T;
 }
 
-#[allow(unused)]
 pub trait CastFrom<T: Copy>: Copy {
     /// By default, casts should be exact.
     fn cast_from(value: T) -> Self;
diff --git a/src/math/support/mod.rs b/src/math/support/mod.rs
index 68f1e49e5..d3c932b97 100644
--- a/src/math/support/mod.rs
+++ b/src/math/support/mod.rs
@@ -5,7 +5,6 @@ mod float_traits;
 pub mod hex_float;
 mod int_traits;
 
-#[allow(unused_imports)]
 pub use float_traits::{Float, IntTy};
 pub(crate) use float_traits::{f32_from_bits, f64_from_bits};
 #[cfg(f16_enabled)]

From 2795848eaed74f777e676efc87a2973525b5426b Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Fri, 7 Feb 2025 03:41:05 +0000
Subject: [PATCH 221/279] Convert `fmaf` to a generic implementation

Introduce a version of generic `fma` that works when there is a larger
hardware-backed float type available to compute the result with more
precision. This is currently used only for `f32`, but with some minor
adjustments it should work for `f16` as well.
---
 src/math/fmaf.rs                 | 96 +-------------------------------
 src/math/generic/fma.rs          | 67 +++++++++++++++++++++-
 src/math/generic/mod.rs          |  2 +-
 src/math/mod.rs                  |  2 +-
 src/math/support/float_traits.rs | 58 +++++++++++++++++++
 src/math/support/mod.rs          |  3 +-
 6 files changed, 129 insertions(+), 99 deletions(-)

diff --git a/src/math/fmaf.rs b/src/math/fmaf.rs
index 79371c836..40d7f40d6 100644
--- a/src/math/fmaf.rs
+++ b/src/math/fmaf.rs
@@ -1,103 +1,11 @@
-/* origin: FreeBSD /usr/src/lib/msun/src/s_fmaf.c */
-/*-
- * Copyright (c) 2005-2011 David Schultz <das@FreeBSD.ORG>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-use core::f32;
-use core::ptr::read_volatile;
-
-use super::fenv::{
-    FE_INEXACT, FE_TONEAREST, FE_UNDERFLOW, feclearexcept, fegetround, feraiseexcept, fetestexcept,
-};
-
-/*
- * Fused multiply-add: Compute x * y + z with a single rounding error.
- *
- * A double has more than twice as much precision than a float, so
- * direct double-precision arithmetic suffices, except where double
- * rounding occurs.
- */
-
 /// Floating multiply add (f32)
 ///
 /// Computes `(x*y)+z`, rounded as one ternary operation:
 /// Computes the value (as if) to infinite precision and rounds once to the result format,
 /// according to the rounding mode characterized by the value of FLT_ROUNDS.
 #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
-pub fn fmaf(x: f32, y: f32, mut z: f32) -> f32 {
-    let xy: f64;
-    let mut result: f64;
-    let mut ui: u64;
-    let e: i32;
-
-    xy = x as f64 * y as f64;
-    result = xy + z as f64;
-    ui = result.to_bits();
-    e = (ui >> 52) as i32 & 0x7ff;
-    /* Common case: The double precision result is fine. */
-    if (
-        /* not a halfway case */
-        ui & 0x1fffffff) != 0x10000000 ||
-        /* NaN */
-        e == 0x7ff ||
-        /* exact */
-        (result - xy == z as f64 && result - z as f64 == xy) ||
-        /* not round-to-nearest */
-        fegetround() != FE_TONEAREST
-    {
-        /*
-            underflow may not be raised correctly, example:
-            fmaf(0x1p-120f, 0x1p-120f, 0x1p-149f)
-        */
-        if ((0x3ff - 149)..(0x3ff - 126)).contains(&e) && fetestexcept(FE_INEXACT) != 0 {
-            feclearexcept(FE_INEXACT);
-            // prevent `xy + vz` from being CSE'd with `xy + z` above
-            let vz: f32 = unsafe { read_volatile(&z) };
-            result = xy + vz as f64;
-            if fetestexcept(FE_INEXACT) != 0 {
-                feraiseexcept(FE_UNDERFLOW);
-            } else {
-                feraiseexcept(FE_INEXACT);
-            }
-        }
-        z = result as f32;
-        return z;
-    }
-
-    /*
-     * If result is inexact, and exactly halfway between two float values,
-     * we need to adjust the low-order bit in the direction of the error.
-     */
-    let neg = ui >> 63 != 0;
-    let err = if neg == (z as f64 > xy) { xy - result + z as f64 } else { z as f64 - result + xy };
-    if neg == (err < 0.0) {
-        ui += 1;
-    } else {
-        ui -= 1;
-    }
-    f64::from_bits(ui) as f32
+pub fn fmaf(x: f32, y: f32, z: f32) -> f32 {
+    super::generic::fma_wide(x, y, z)
 }
 
 #[cfg(test)]
diff --git a/src/math/generic/fma.rs b/src/math/generic/fma.rs
index 4c6f1fad6..a40d7aaaf 100644
--- a/src/math/generic/fma.rs
+++ b/src/math/generic/fma.rs
@@ -1,10 +1,13 @@
 /* SPDX-License-Identifier: MIT */
-/* origin: musl src/math/fma.c. Ported to generic Rust algorithm in 2025, TG. */
+/* origin: musl src/math/{fma,fmaf}.c. Ported to generic Rust algorithm in 2025, TG. */
 
 use core::{f32, f64};
 
+use super::super::fenv::{
+    FE_INEXACT, FE_TONEAREST, FE_UNDERFLOW, feclearexcept, fegetround, feraiseexcept, fetestexcept,
+};
 use super::super::support::{DInt, HInt, IntTy};
-use super::super::{CastFrom, CastInto, Float, Int, MinInt};
+use super::super::{CastFrom, CastInto, DFloat, Float, HFloat, Int, MinInt};
 
 /// Fused multiply-add that works when there is not a larger float size available. Currently this
 /// is still specialized only for `f64`. Computes `(x * y) + z`.
@@ -212,6 +215,66 @@ where
     super::scalbn(r, e)
 }
 
+/// Fma implementation when a hardware-backed larger float type is available. For `f32` and `f64`,
+/// `f64` has enough precision to represent the `f32` in its entirety, except for double rounding.
+pub fn fma_wide<F, B>(x: F, y: F, z: F) -> F
+where
+    F: Float + HFloat<D = B>,
+    B: Float + DFloat<H = F>,
+    B::Int: CastInto<i32>,
+    i32: CastFrom<i32>,
+{
+    let one = IntTy::<B>::ONE;
+
+    let xy: B = x.widen() * y.widen();
+    let mut result: B = xy + z.widen();
+    let mut ui: B::Int = result.to_bits();
+    let re = result.exp();
+    let zb: B = z.widen();
+
+    let prec_diff = B::SIG_BITS - F::SIG_BITS;
+    let excess_prec = ui & ((one << prec_diff) - one);
+    let halfway = one << (prec_diff - 1);
+
+    // Common case: the larger precision is fine if...
+    // This is not a halfway case
+    if excess_prec != halfway
+        // Or the result is NaN
+        || re == B::EXP_SAT
+        // Or the result is exact
+        || (result - xy == zb && result - zb == xy)
+        // Or the mode is something other than round to nearest
+        || fegetround() != FE_TONEAREST
+    {
+        let min_inexact_exp = (B::EXP_BIAS as i32 + F::EXP_MIN_SUBNORM) as u32;
+        let max_inexact_exp = (B::EXP_BIAS as i32 + F::EXP_MIN) as u32;
+
+        if (min_inexact_exp..max_inexact_exp).contains(&re) && fetestexcept(FE_INEXACT) != 0 {
+            feclearexcept(FE_INEXACT);
+            // prevent `xy + vz` from being CSE'd with `xy + z` above
+            let vz: F = force_eval!(z);
+            result = xy + vz.widen();
+            if fetestexcept(FE_INEXACT) != 0 {
+                feraiseexcept(FE_UNDERFLOW);
+            } else {
+                feraiseexcept(FE_INEXACT);
+            }
+        }
+
+        return result.narrow();
+    }
+
+    let neg = ui >> (B::BITS - 1) != IntTy::<B>::ZERO;
+    let err = if neg == (zb > xy) { xy - result + zb } else { zb - result + xy };
+    if neg == (err < B::ZERO) {
+        ui += one;
+    } else {
+        ui -= one;
+    }
+
+    B::from_bits(ui).narrow()
+}
+
 /// Representation of `F` that has handled subnormals.
 #[derive(Clone, Copy, Debug)]
 struct Norm<F: Float> {
diff --git a/src/math/generic/mod.rs b/src/math/generic/mod.rs
index e19cc83a9..b34d3dfae 100644
--- a/src/math/generic/mod.rs
+++ b/src/math/generic/mod.rs
@@ -18,7 +18,7 @@ pub use copysign::copysign;
 pub use fabs::fabs;
 pub use fdim::fdim;
 pub use floor::floor;
-pub use fma::fma;
+pub use fma::{fma, fma_wide};
 pub use fmax::fmax;
 pub use fmin::fmin;
 pub use fmod::fmod;
diff --git a/src/math/mod.rs b/src/math/mod.rs
index 677ed8d6e..e32045021 100644
--- a/src/math/mod.rs
+++ b/src/math/mod.rs
@@ -121,7 +121,7 @@ use self::rem_pio2::rem_pio2;
 use self::rem_pio2_large::rem_pio2_large;
 use self::rem_pio2f::rem_pio2f;
 #[allow(unused_imports)]
-use self::support::{CastFrom, CastInto, DInt, Float, HInt, Int, IntTy, MinInt};
+use self::support::{CastFrom, CastInto, DFloat, DInt, Float, HFloat, HInt, Int, IntTy, MinInt};
 
 // Public modules
 mod acos;
diff --git a/src/math/support/float_traits.rs b/src/math/support/float_traits.rs
index 3b27f8de5..ee83c793d 100644
--- a/src/math/support/float_traits.rs
+++ b/src/math/support/float_traits.rs
@@ -276,6 +276,64 @@ pub const fn f64_from_bits(bits: u64) -> f64 {
     unsafe { mem::transmute::<u64, f64>(bits) }
 }
 
+/// Trait for floats twice the bit width of another integer.
+pub trait DFloat: Float {
+    /// Float that is half the bit width of the floatthis trait is implemented for.
+    type H: HFloat<D = Self>;
+
+    /// Narrow the float type.
+    fn narrow(self) -> Self::H;
+}
+
+/// Trait for floats half the bit width of another float.
+pub trait HFloat: Float {
+    /// Float that is double the bit width of the float this trait is implemented for.
+    type D: DFloat<H = Self>;
+
+    /// Widen the float type.
+    fn widen(self) -> Self::D;
+}
+
+macro_rules! impl_d_float {
+    ($($X:ident $D:ident),*) => {
+        $(
+            impl DFloat for $D {
+                type H = $X;
+
+                fn narrow(self) -> Self::H {
+                    self as $X
+                }
+            }
+        )*
+    };
+}
+
+macro_rules! impl_h_float {
+    ($($H:ident $X:ident),*) => {
+        $(
+            impl HFloat for $H {
+                type D = $X;
+
+                fn widen(self) -> Self::D {
+                    self as $X
+                }
+            }
+        )*
+    };
+}
+
+impl_d_float!(f32 f64);
+#[cfg(f16_enabled)]
+impl_d_float!(f16 f32);
+#[cfg(f128_enabled)]
+impl_d_float!(f64 f128);
+
+impl_h_float!(f32 f64);
+#[cfg(f16_enabled)]
+impl_h_float!(f16 f32);
+#[cfg(f128_enabled)]
+impl_h_float!(f64 f128);
+
 #[cfg(test)]
 mod tests {
     use super::*;
diff --git a/src/math/support/mod.rs b/src/math/support/mod.rs
index d3c932b97..9eebd4403 100644
--- a/src/math/support/mod.rs
+++ b/src/math/support/mod.rs
@@ -5,7 +5,8 @@ mod float_traits;
 pub mod hex_float;
 mod int_traits;
 
-pub use float_traits::{Float, IntTy};
+#[allow(unused_imports)]
+pub use float_traits::{DFloat, Float, HFloat, IntTy};
 pub(crate) use float_traits::{f32_from_bits, f64_from_bits};
 #[cfg(f16_enabled)]
 #[allow(unused_imports)]

From 020db46099e3b397d47a773d7d27b525187c4345 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Fri, 7 Feb 2025 21:25:03 +0000
Subject: [PATCH 222/279] Uncomment some hex float tests that should work now

---
 src/math/support/hex_float.rs | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/src/math/support/hex_float.rs b/src/math/support/hex_float.rs
index ebc4f7c64..99ad8bec3 100644
--- a/src/math/support/hex_float.rs
+++ b/src/math/support/hex_float.rs
@@ -1,7 +1,5 @@
 //! Utilities for working with hex float formats.
 
-#![allow(dead_code)] // FIXME: remove once this gets used
-
 use core::fmt;
 
 use super::{Float, f32_from_bits, f64_from_bits};
@@ -13,6 +11,7 @@ pub const fn hf16(s: &str) -> f16 {
 }
 
 /// Construct a 32-bit float from hex float representation (C-style)
+#[allow(unused)]
 pub const fn hf32(s: &str) -> f32 {
     f32_from_bits(parse_any(s, 32, 23) as u32)
 }
@@ -548,14 +547,12 @@ mod parse_tests {
 
     #[test]
     fn test_macros() {
-        // FIXME(msrv): enable once parsing works
-        // #[cfg(f16_enabled)]
-        // assert_eq!(hf16!("0x1.ffp+8").to_bits(), 0x5ffc_u16);
+        #[cfg(f16_enabled)]
+        assert_eq!(hf16!("0x1.ffp+8").to_bits(), 0x5ffc_u16);
         assert_eq!(hf32!("0x1.ffep+8").to_bits(), 0x43fff000_u32);
         assert_eq!(hf64!("0x1.ffep+8").to_bits(), 0x407ffe0000000000_u64);
-        // FIXME(msrv): enable once parsing works
-        // #[cfg(f128_enabled)]
-        // assert_eq!(hf128!("0x1.ffep+8").to_bits(), 0x4007ffe0000000000000000000000000_u128);
+        #[cfg(f128_enabled)]
+        assert_eq!(hf128!("0x1.ffep+8").to_bits(), 0x4007ffe0000000000000000000000000_u128);
     }
 }
 

From e66ec88df8325fbe151939c4dc0a9f7c25759fdf Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Fri, 7 Feb 2025 22:28:19 +0000
Subject: [PATCH 223/279] Work arouind iai-callgrind apt failures

Usually `cargo binstall iai-callgrind-runner` handles apt dependencies.
However, the following has been happening:

    Err:11 mirror+file:/etc/apt/apt-mirrors.txt noble-updates/main amd64 libc6-dbg amd64 2.39-0ubuntu8.3
      404  Not Found [IP: 40.81.13.82 80]
    E: Failed to fetch mirror+file:/etc/apt/apt-mirrors.txt/pool/main/g/glibc/libc6-dbg_2.39-0ubuntu8.3_amd64.deb  404  Not Found [IP: 40.81.13.82 80]
    Fetched 19.8 MB in 6s (3138 kB/s)
    E: Unable to fetch some archives, maybe run apt-get update or try with --fix-missing?

Installing the dependencies manually seems to resolve the issue.
---
 .github/workflows/main.yaml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml
index de131639b..265702965 100644
--- a/.github/workflows/main.yaml
+++ b/.github/workflows/main.yaml
@@ -158,6 +158,8 @@ jobs:
 
     - name: Set up dependencies
       run: |
+        sudo apt update
+        sudo apt install -y valgrind gdb libc6-dbg # Needed for iai-callgrind
         rustup update "$BENCHMARK_RUSTC" --no-self-update
         rustup default "$BENCHMARK_RUSTC"
         # Install the version of iai-callgrind-runner that is specified in Cargo.toml

From 68bfe1d0d62e342bcead759a9400c02e9da3d6fd Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Sat, 25 Jan 2025 00:29:04 +0000
Subject: [PATCH 224/279] Add an enum representation of rounding mode

We only round using nearest, but some incoming code has more handling of
rounding modes that would be nice to `match` on. Rather than checking
integer values, add an enum representation.
---
 src/math/fenv.rs | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/src/math/fenv.rs b/src/math/fenv.rs
index c91272e82..328c9f346 100644
--- a/src/math/fenv.rs
+++ b/src/math/fenv.rs
@@ -5,6 +5,9 @@ pub(crate) const FE_UNDERFLOW: i32 = 0;
 pub(crate) const FE_INEXACT: i32 = 0;
 
 pub(crate) const FE_TONEAREST: i32 = 0;
+pub(crate) const FE_DOWNWARD: i32 = 1;
+pub(crate) const FE_UPWARD: i32 = 2;
+pub(crate) const FE_TOWARDZERO: i32 = 3;
 
 #[inline]
 pub(crate) fn feclearexcept(_mask: i32) -> i32 {
@@ -25,3 +28,22 @@ pub(crate) fn fetestexcept(_mask: i32) -> i32 {
 pub(crate) fn fegetround() -> i32 {
     FE_TONEAREST
 }
+
+#[derive(Clone, Copy, Debug, PartialEq)]
+pub(crate) enum Rounding {
+    Nearest = FE_TONEAREST as isize,
+    Downward = FE_DOWNWARD as isize,
+    Upward = FE_UPWARD as isize,
+    ToZero = FE_TOWARDZERO as isize,
+}
+
+impl Rounding {
+    pub(crate) fn get() -> Self {
+        match fegetround() {
+            x if x == FE_DOWNWARD => Self::Downward,
+            x if x == FE_UPWARD => Self::Upward,
+            x if x == FE_TOWARDZERO => Self::ToZero,
+            _ => Self::Nearest,
+        }
+    }
+}

From b19cde93614fca99be9bf33441dc6e139d4beb60 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Fri, 25 Oct 2024 03:56:09 -0500
Subject: [PATCH 225/279] Port the CORE-MATH version of `cbrt`

Replace our current implementation with one that is correctly rounded.

Source: https://gitlab.inria.fr/core-math/core-math/-/blob/81d447bb1c46592291bec3476bc24fa2c2688c67/src/binary64/cbrt/cbrt.c
---
 src/math/cbrt.rs | 319 ++++++++++++++++++++++++++++++++---------------
 1 file changed, 216 insertions(+), 103 deletions(-)

diff --git a/src/math/cbrt.rs b/src/math/cbrt.rs
index b4e77eaa2..fbf81f77d 100644
--- a/src/math/cbrt.rs
+++ b/src/math/cbrt.rs
@@ -1,113 +1,226 @@
-/* origin: FreeBSD /usr/src/lib/msun/src/s_cbrt.c */
-/*
- * ====================================================
- * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
- *
- * Developed at SunPro, a Sun Microsystems, Inc. business.
- * Permission to use, copy, modify, and distribute this
- * software is freely granted, provided that this notice
- * is preserved.
- * ====================================================
- *
- * Optimized by Bruce D. Evans.
+/* SPDX-License-Identifier: MIT */
+/* origin: core-math/src/binary64/cbrt/cbrt.c
+ * Copyright (c) 2021-2022 Alexei Sibidanov.
+ * Ported to Rust in 2025 by Trevor Gross.
  */
-/* cbrt(x)
- * Return cube root of x
- */
-
-use core::f64;
 
-const B1: u32 = 715094163; /* B1 = (1023-1023/3-0.03306235651)*2**20 */
-const B2: u32 = 696219795; /* B2 = (1023-1023/3-54/3-0.03306235651)*2**20 */
+use super::Float;
+use super::fenv::Rounding;
+use super::support::cold_path;
 
-/* |1/cbrt(x) - p(x)| < 2**-23.5 (~[-7.93e-8, 7.929e-8]). */
-const P0: f64 = 1.87595182427177009643; /* 0x3ffe03e6, 0x0f61e692 */
-const P1: f64 = -1.88497979543377169875; /* 0xbffe28e0, 0x92f02420 */
-const P2: f64 = 1.621429720105354466140; /* 0x3ff9f160, 0x4a49d6c2 */
-const P3: f64 = -0.758397934778766047437; /* 0xbfe844cb, 0xbee751d9 */
-const P4: f64 = 0.145996192886612446982; /* 0x3fc2b000, 0xd4e4edd7 */
-
-// Cube root (f64)
-///
-/// Computes the cube root of the argument.
+/// Compute the cube root of the argument.
 #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
 pub fn cbrt(x: f64) -> f64 {
-    let x1p54 = f64::from_bits(0x4350000000000000); // 0x1p54 === 2 ^ 54
-
-    let mut ui: u64 = x.to_bits();
-    let mut r: f64;
-    let s: f64;
-    let mut t: f64;
-    let w: f64;
-    let mut hx: u32 = (ui >> 32) as u32 & 0x7fffffff;
-
-    if hx >= 0x7ff00000 {
-        /* cbrt(NaN,INF) is itself */
-        return x + x;
+    const ESCALE: [f64; 3] = [
+        1.0,
+        hf64!("0x1.428a2f98d728bp+0"), /* 2^(1/3) */
+        hf64!("0x1.965fea53d6e3dp+0"), /* 2^(2/3) */
+    ];
+
+    /* the polynomial c0+c1*x+c2*x^2+c3*x^3 approximates x^(1/3) on [1,2]
+    with maximal error < 9.2e-5 (attained at x=2) */
+    const C: [f64; 4] = [
+        hf64!("0x1.1b0babccfef9cp-1"),
+        hf64!("0x1.2c9a3e94d1da5p-1"),
+        hf64!("-0x1.4dc30b1a1ddbap-3"),
+        hf64!("0x1.7a8d3e4ec9b07p-6"),
+    ];
+
+    let u0: f64 = hf64!("0x1.5555555555555p-2");
+    let u1: f64 = hf64!("0x1.c71c71c71c71cp-3");
+
+    let rsc = [1.0, -1.0, 0.5, -0.5, 0.25, -0.25];
+
+    let off = [hf64!("0x1p-53"), 0.0, 0.0, 0.0];
+
+    let rm = Rounding::get();
+
+    /* rm=0 for rounding to nearest, and other values for directed roundings */
+    let hx: u64 = x.to_bits();
+    let mut mant: u64 = hx & f64::SIG_MASK;
+    let sign: u64 = hx >> 63;
+
+    let mut e: u32 = (hx >> f64::SIG_BITS) as u32 & f64::EXP_SAT;
+
+    if ((e + 1) & f64::EXP_SAT) < 2 {
+        cold_path();
+
+        let ix: u64 = hx & !f64::SIGN_MASK;
+
+        /* 0, inf, nan: we return x + x instead of simply x,
+        to that for x a signaling NaN, it correctly triggers
+        the invalid exception. */
+        if e == f64::EXP_SAT || ix == 0 {
+            return x + x;
+        }
+
+        let nz = ix.leading_zeros() - 11; /* subnormal */
+        mant <<= nz;
+        mant &= f64::SIG_MASK;
+        e = e.wrapping_sub(nz - 1);
+    }
+
+    e = e.wrapping_add(3072);
+    let cvt1: u64 = mant | (0x3ffu64 << 52);
+    let mut cvt5: u64 = cvt1;
+
+    let et: u32 = e / 3;
+    let it: u32 = e % 3;
+
+    /* 2^(3k+it) <= x < 2^(3k+it+1), with 0 <= it <= 3 */
+    cvt5 += u64::from(it) << f64::SIG_BITS;
+    cvt5 |= sign << 63;
+    let zz: f64 = f64::from_bits(cvt5);
+
+    /* cbrt(x) = cbrt(zz)*2^(et-1365) where 1 <= zz < 8 */
+    let mut isc: u64 = ESCALE[it as usize].to_bits(); // todo: index
+    isc |= sign << 63;
+    let cvt2: u64 = isc;
+    let z: f64 = f64::from_bits(cvt1);
+
+    /* cbrt(zz) = cbrt(z)*isc, where isc encodes 1, 2^(1/3) or 2^(2/3),
+    and 1 <= z < 2 */
+    let r: f64 = 1.0 / z;
+    let rr: f64 = r * rsc[((it as usize) << 1) | sign as usize];
+    let z2: f64 = z * z;
+    let c0: f64 = C[0] + z * C[1];
+    let c2: f64 = C[2] + z * C[3];
+    let mut y: f64 = c0 + z2 * c2;
+    let mut y2: f64 = y * y;
+
+    /* y is an approximation of z^(1/3) */
+    let mut h: f64 = y2 * (y * r) - 1.0;
+
+    /* h determines the error between y and z^(1/3) */
+    y -= (h * y) * (u0 - u1 * h);
+
+    /* The correction y -= (h*y)*(u0 - u1*h) corresponds to a cubic variant
+    of Newton's method, with the function f(y) = 1-z/y^3. */
+    y *= f64::from_bits(cvt2);
+
+    /* Now y is an approximation of zz^(1/3),
+     * and rr an approximation of 1/zz. We now perform another iteration of
+     * Newton-Raphson, this time with a linear approximation only. */
+    y2 = y * y;
+    let mut y2l: f64 = fmaf64(y, y, -y2);
+
+    /* y2 + y2l = y^2 exactly */
+    let mut y3: f64 = y2 * y;
+    let mut y3l: f64 = fmaf64(y, y2, -y3) + y * y2l;
+
+    /* y3 + y3l approximates y^3 with about 106 bits of accuracy */
+    h = ((y3 - zz) + y3l) * rr;
+    let mut dy: f64 = h * (y * u0);
+
+    /* the approximation of zz^(1/3) is y - dy */
+    let mut y1: f64 = y - dy;
+    dy = (y - y1) - dy;
+
+    /* the approximation of zz^(1/3) is now y1 + dy, where |dy| < 1/2 ulp(y)
+     * (for rounding to nearest) */
+    let mut ady: f64 = dy.abs();
+
+    /* For directed roundings, ady0 is tiny when dy is tiny, or ady0 is near
+     * from ulp(1);
+     * for rounding to nearest, ady0 is tiny when dy is near from 1/2 ulp(1),
+     * or from 3/2 ulp(1). */
+    let mut ady0: f64 = (ady - off[rm as usize]).abs();
+    let mut ady1: f64 = (ady - (hf64!("0x1p-52") + off[rm as usize])).abs();
+
+    if ady0 < hf64!("0x1p-75") || ady1 < hf64!("0x1p-75") {
+        cold_path();
+
+        y2 = y1 * y1;
+        y2l = fmaf64(y1, y1, -y2);
+        y3 = y2 * y1;
+        y3l = fmaf64(y1, y2, -y3) + y1 * y2l;
+        h = ((y3 - zz) + y3l) * rr;
+        dy = h * (y1 * u0);
+        y = y1 - dy;
+        dy = (y1 - y) - dy;
+        y1 = y;
+        ady = dy.abs();
+        ady0 = (ady - off[rm as usize]).abs();
+        ady1 = (ady - (hf64!("0x1p-52") + off[rm as usize])).abs();
+
+        if ady0 < hf64!("0x1p-98") || ady1 < hf64!("0x1p-98") {
+            cold_path();
+            let azz: f64 = zz.abs();
+
+            // ~ 0x1.79d15d0e8d59b80000000000000ffc3dp+0
+            if azz == hf64!("0x1.9b78223aa307cp+1") {
+                y1 = hf64!("0x1.79d15d0e8d59cp+0").copysign(zz);
+            }
+
+            // ~ 0x1.de87aa837820e80000000000001c0f08p+0
+            if azz == hf64!("0x1.a202bfc89ddffp+2") {
+                y1 = hf64!("0x1.de87aa837820fp+0").copysign(zz);
+            }
+
+            if rm != Rounding::Nearest {
+                let wlist = [
+                    (hf64!("0x1.3a9ccd7f022dbp+0"), hf64!("0x1.1236160ba9b93p+0")), // ~ 0x1.1236160ba9b930000000000001e7e8fap+0
+                    (hf64!("0x1.7845d2faac6fep+0"), hf64!("0x1.23115e657e49cp+0")), // ~ 0x1.23115e657e49c0000000000001d7a799p+0
+                    (hf64!("0x1.d1ef81cbbbe71p+0"), hf64!("0x1.388fb44cdcf5ap+0")), // ~ 0x1.388fb44cdcf5a0000000000002202c55p+0
+                    (hf64!("0x1.0a2014f62987cp+1"), hf64!("0x1.46bcbf47dc1e8p+0")), // ~ 0x1.46bcbf47dc1e8000000000000303aa2dp+0
+                    (hf64!("0x1.fe18a044a5501p+1"), hf64!("0x1.95decfec9c904p+0")), // ~ 0x1.95decfec9c9040000000000000159e8ep+0
+                    (hf64!("0x1.a6bb8c803147bp+2"), hf64!("0x1.e05335a6401dep+0")), // ~ 0x1.e05335a6401de00000000000027ca017p+0
+                    (hf64!("0x1.ac8538a031cbdp+2"), hf64!("0x1.e281d87098de8p+0")), // ~ 0x1.e281d87098de80000000000000ee9314p+0
+                ];
+
+                for (a, b) in wlist {
+                    if azz == a {
+                        let tmp = if rm as u64 + sign == 2 { hf64!("0x1p-52") } else { 0.0 };
+                        y1 = (b + tmp).copysign(zz);
+                    }
+                }
+            }
+        }
+    }
+
+    let mut cvt3: u64 = y1.to_bits();
+    cvt3 = cvt3.wrapping_add(((et.wrapping_sub(342).wrapping_sub(1023)) as u64) << 52);
+    let m0: u64 = cvt3 << 30;
+    let m1 = m0 >> 63;
+
+    if (m0 ^ m1) <= (1u64 << 30) {
+        cold_path();
+
+        let mut cvt4: u64 = y1.to_bits();
+        cvt4 = (cvt4 + (164 << 15)) & 0xffffffffffff0000u64;
+
+        if ((f64::from_bits(cvt4) - y1) - dy).abs() < hf64!("0x1p-60") || (zz).abs() == 1.0 {
+            cvt3 = (cvt3 + (1u64 << 15)) & 0xffffffffffff0000u64;
+        }
     }
 
-    /*
-     * Rough cbrt to 5 bits:
-     *    cbrt(2**e*(1+m) ~= 2**(e/3)*(1+(e%3+m)/3)
-     * where e is integral and >= 0, m is real and in [0, 1), and "/" and
-     * "%" are integer division and modulus with rounding towards minus
-     * infinity.  The RHS is always >= the LHS and has a maximum relative
-     * error of about 1 in 16.  Adding a bias of -0.03306235651 to the
-     * (e%3+m)/3 term reduces the error to about 1 in 32. With the IEEE
-     * floating point representation, for finite positive normal values,
-     * ordinary integer divison of the value in bits magically gives
-     * almost exactly the RHS of the above provided we first subtract the
-     * exponent bias (1023 for doubles) and later add it back.  We do the
-     * subtraction virtually to keep e >= 0 so that ordinary integer
-     * division rounds towards minus infinity; this is also efficient.
-     */
-    if hx < 0x00100000 {
-        /* zero or subnormal? */
-        ui = (x * x1p54).to_bits();
-        hx = (ui >> 32) as u32 & 0x7fffffff;
-        if hx == 0 {
-            return x; /* cbrt(0) is itself */
+    f64::from_bits(cvt3)
+}
+
+fn fmaf64(x: f64, y: f64, z: f64) -> f64 {
+    #[cfg(intrinsics_enabled)]
+    {
+        return unsafe { core::intrinsics::fmaf64(x, y, z) };
+    }
+
+    #[cfg(not(intrinsics_enabled))]
+    {
+        return super::fma(x, y, z);
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn spot_checks() {
+        if !cfg!(x86_no_sse) {
+            // Exposes a rounding mode problem. Ignored on i586 because of inaccurate FMA.
+            assert_biteq!(
+                cbrt(f64::from_bits(0xf7f792b28f600000)),
+                f64::from_bits(0xd29ce68655d962f3)
+            );
         }
-        hx = hx / 3 + B2;
-    } else {
-        hx = hx / 3 + B1;
     }
-    ui &= 1 << 63;
-    ui |= (hx as u64) << 32;
-    t = f64::from_bits(ui);
-
-    /*
-     * New cbrt to 23 bits:
-     *    cbrt(x) = t*cbrt(x/t**3) ~= t*P(t**3/x)
-     * where P(r) is a polynomial of degree 4 that approximates 1/cbrt(r)
-     * to within 2**-23.5 when |r - 1| < 1/10.  The rough approximation
-     * has produced t such than |t/cbrt(x) - 1| ~< 1/32, and cubing this
-     * gives us bounds for r = t**3/x.
-     *
-     * Try to optimize for parallel evaluation as in __tanf.c.
-     */
-    r = (t * t) * (t / x);
-    t = t * ((P0 + r * (P1 + r * P2)) + ((r * r) * r) * (P3 + r * P4));
-
-    /*
-     * Round t away from zero to 23 bits (sloppily except for ensuring that
-     * the result is larger in magnitude than cbrt(x) but not much more than
-     * 2 23-bit ulps larger).  With rounding towards zero, the error bound
-     * would be ~5/6 instead of ~4/6.  With a maximum error of 2 23-bit ulps
-     * in the rounded t, the infinite-precision error in the Newton
-     * approximation barely affects third digit in the final error
-     * 0.667; the error in the rounded t can be up to about 3 23-bit ulps
-     * before the final error is larger than 0.667 ulps.
-     */
-    ui = t.to_bits();
-    ui = (ui + 0x80000000) & 0xffffffffc0000000;
-    t = f64::from_bits(ui);
-
-    /* one step Newton iteration to 53 bits with error < 0.667 ulps */
-    s = t * t; /* t*t is exact */
-    r = x / s; /* error <= 0.5 ulps; |r| < |t| */
-    w = t + t; /* t+t is exact */
-    r = (r - t) / (w + r); /* r-t is exact; w+r ~= 3*t */
-    t = t + t * r; /* error <= 0.5 + 0.5/3 + epsilon */
-    t
 }

From f069b541782a1ead595bba96a274310f7567af7c Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Sat, 25 Jan 2025 00:31:49 +0000
Subject: [PATCH 226/279] Decrease the allowed error for `cbrt`

With the correctly rounded implementation, we can reduce the ULP
requirement for `cbrt` to zero. There is still an override required for
`i586` because of the imprecise FMA.
---
 crates/libm-test/src/precision.rs | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/crates/libm-test/src/precision.rs b/crates/libm-test/src/precision.rs
index 20aa96b6a..a85996539 100644
--- a/crates/libm-test/src/precision.rs
+++ b/crates/libm-test/src/precision.rs
@@ -41,7 +41,7 @@ pub fn default_ulp(ctx: &CheckCtx) -> u32 {
         | Bn::Trunc => 0,
 
         // Operations that aren't required to be exact, but our implementations are.
-        Bn::Cbrt if ctx.fn_ident != Id::Cbrt => 0,
+        Bn::Cbrt => 0,
 
         // Bessel functions have large inaccuracies.
         Bn::J0 | Bn::J1 | Bn::Y0 | Bn::Y1 | Bn::Jn | Bn::Yn => 8_000_000,
@@ -54,7 +54,6 @@ pub fn default_ulp(ctx: &CheckCtx) -> u32 {
         Bn::Atan => 1,
         Bn::Atan2 => 2,
         Bn::Atanh => 2,
-        Bn::Cbrt => 1,
         Bn::Cos => 1,
         Bn::Cosh => 1,
         Bn::Erf => 1,
@@ -92,6 +91,7 @@ pub fn default_ulp(ctx: &CheckCtx) -> u32 {
         }
 
         match ctx.fn_ident {
+            Id::Cbrt => ulp = 2,
             // FIXME(#401): musl has an incorrect result here.
             Id::Fdim => ulp = 2,
             Id::Sincosf => ulp = 500,
@@ -119,6 +119,7 @@ pub fn default_ulp(ctx: &CheckCtx) -> u32 {
 
             Id::Asinh => ulp = 3,
             Id::Asinhf => ulp = 3,
+            Id::Cbrt => ulp = 1,
             Id::Exp10 | Id::Exp10f => ulp = 1_000_000,
             Id::Exp2 | Id::Exp2f => ulp = 10_000_000,
             Id::Log1p | Id::Log1pf => ulp = 2,

From 555a29c32d2d8fd8b57603abdf1d76ec9b8a0042 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Sat, 8 Feb 2025 07:06:59 +0000
Subject: [PATCH 227/279] Add simple icount benchmarks for `u256` operations

---
 crates/libm-test/benches/icount.rs | 101 +++++++++++++++++++++++++++++
 src/math/support/big.rs            |   2 +-
 src/math/support/mod.rs            |   2 +
 3 files changed, 104 insertions(+), 1 deletion(-)

diff --git a/crates/libm-test/benches/icount.rs b/crates/libm-test/benches/icount.rs
index c41cef24e..232a3de38 100644
--- a/crates/libm-test/benches/icount.rs
+++ b/crates/libm-test/benches/icount.rs
@@ -1,8 +1,10 @@
 //! Benchmarks that use `iai-cachegrind` to be reasonably CI-stable.
 
 use std::hint::black_box;
+use std::ops::Shr;
 
 use iai_callgrind::{library_benchmark, library_benchmark_group, main};
+use libm::support::{HInt, u256};
 use libm_test::gen::spaced;
 use libm_test::{CheckBasis, CheckCtx, GeneratorKind, MathOp, OpRustArgs, TupleCall, op};
 
@@ -51,8 +53,107 @@ libm_macros::for_each_function! {
     callback: icount_benches,
 }
 
+fn setup_u128_mul() -> Vec<(u128, u128)> {
+    let step = u128::MAX / 300;
+    let mut x = 0u128;
+    let mut y = 0u128;
+    let mut v = Vec::new();
+
+    loop {
+        'inner: loop {
+            match y.checked_add(step) {
+                Some(new) => y = new,
+                None => break 'inner,
+            }
+
+            v.push((x, y))
+        }
+
+        match x.checked_add(step) {
+            Some(new) => x = new,
+            None => break,
+        }
+    }
+
+    v
+}
+
+/*
+fn setup_u256_add() -> Vec<(u256, u256)> {
+    let mut v = Vec::new();
+    for (x, y) in setup_u128_mul() {
+        // square the u128 inputs to cover most of the u256 range
+        v.push((x.widen_mul(x), y.widen_mul(y)));
+    }
+    // Doesn't get covered by `u128:MAX^2`
+    v.push((u256::MAX, u256::MAX));
+    v
+}
+*/
+
+fn setup_u256_shift() -> Vec<(u256, u32)> {
+    let mut v = Vec::new();
+
+    for (x, _) in setup_u128_mul() {
+        let x2 = x.widen_mul(x);
+        for y in 0u32..256 {
+            v.push((x2, y));
+        }
+    }
+
+    v
+}
+
+#[library_benchmark]
+#[bench::linspace(setup_u128_mul())]
+fn icount_bench_u128_widen_mul(cases: Vec<(u128, u128)>) {
+    let f = black_box(u128::zero_widen_mul);
+    for (x, y) in cases.iter().copied() {
+        f(x, y);
+    }
+}
+
+library_benchmark_group!(
+    name = icount_bench_u128_widen_mul_group;
+    benchmarks = icount_bench_u128_widen_mul
+);
+
+/* Not yet implemented
+#[library_benchmark]
+#[bench::linspace(setup_u256_add())]
+fn icount_bench_u256_add(cases: Vec<(u256, u256)>) {
+    let f = black_box(u256::add);
+    for (x, y) in cases.iter().copied() {
+        f(x, y);
+    }
+}
+
+library_benchmark_group!(
+    name = icount_bench_u256_add_group;
+    benchmarks = icount_bench_u256_add
+);
+*/
+
+#[library_benchmark]
+#[bench::linspace(setup_u256_shift())]
+fn icount_bench_u256_shr(cases: Vec<(u256, u32)>) {
+    let f = black_box(u256::shr);
+    for (x, y) in cases.iter().copied() {
+        f(x, y);
+    }
+}
+
+library_benchmark_group!(
+    name = icount_bench_u256_shr_group;
+    benchmarks = icount_bench_u256_shr
+);
+
 main!(
     library_benchmark_groups =
+    // u256-related benchmarks
+    icount_bench_u128_widen_mul_group,
+    // icount_bench_u256_add_group,
+    icount_bench_u256_shr_group,
     // verify-apilist-start
     // verify-sorted-start
     icount_bench_acos_group,
diff --git a/src/math/support/big.rs b/src/math/support/big.rs
index bf47d2001..7a437b67a 100644
--- a/src/math/support/big.rs
+++ b/src/math/support/big.rs
@@ -20,7 +20,7 @@ const U128_LO_MASK: u128 = u64::MAX as u128;
 pub struct u256(pub [u64; 4]);
 
 impl u256 {
-    #[cfg(test)]
+    #[allow(unused)]
     pub const MAX: Self = Self([u64::MAX, u64::MAX, u64::MAX, u64::MAX]);
 
     /// Reinterpret as a signed integer
diff --git a/src/math/support/mod.rs b/src/math/support/mod.rs
index 9eebd4403..28e9fd413 100644
--- a/src/math/support/mod.rs
+++ b/src/math/support/mod.rs
@@ -5,6 +5,8 @@ mod float_traits;
 pub mod hex_float;
 mod int_traits;
 
+#[allow(unused_imports)]
+pub use big::{i256, u256};
 #[allow(unused_imports)]
 pub use float_traits::{DFloat, Float, HFloat, IntTy};
 pub(crate) use float_traits::{f32_from_bits, f64_from_bits};

From d202e8f17f777c2874ba2de986f5ad911aa573d2 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Sat, 8 Feb 2025 09:39:05 +0000
Subject: [PATCH 228/279] Change how operators are `black_box`ed

For some reason, the upcoming limb changes in [1] seem to ignore the
black boxing when applied to the operator function. Changing to instead
black box the inputs appears to fix this.

[1]: https://github.com/rust-lang/libm/pull/503
---
 crates/libm-test/benches/icount.rs | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/crates/libm-test/benches/icount.rs b/crates/libm-test/benches/icount.rs
index 232a3de38..9fac52e0b 100644
--- a/crates/libm-test/benches/icount.rs
+++ b/crates/libm-test/benches/icount.rs
@@ -1,7 +1,6 @@
 //! Benchmarks that use `iai-cachegrind` to be reasonably CI-stable.
 
 use std::hint::black_box;
-use std::ops::Shr;
 
 use iai_callgrind::{library_benchmark, library_benchmark_group, main};
 use libm::support::{HInt, u256};
@@ -107,9 +106,8 @@ fn setup_u256_shift() -> Vec<(u256, u32)> {
 #[library_benchmark]
 #[bench::linspace(setup_u128_mul())]
 fn icount_bench_u128_widen_mul(cases: Vec<(u128, u128)>) {
-    let f = black_box(u128::zero_widen_mul);
     for (x, y) in cases.iter().copied() {
-        f(x, y);
+        black_box(black_box(x).zero_widen_mul(black_box(y)));
     }
 }
 
@@ -122,9 +120,8 @@ library_benchmark_group!(
 #[library_benchmark]
 #[bench::linspace(setup_u256_add())]
 fn icount_bench_u256_add(cases: Vec<(u256, u256)>) {
-    let f = black_box(u256::add);
     for (x, y) in cases.iter().copied() {
-        f(x, y);
+        black_box(black_box(x) + black_box(y));
     }
 }
 
@@ -137,9 +134,8 @@ library_benchmark_group!(
 #[library_benchmark]
 #[bench::linspace(setup_u256_shift())]
 fn icount_bench_u256_shr(cases: Vec<(u256, u32)>) {
-    let f = black_box(u256::shr);
     for (x, y) in cases.iter().copied() {
-        f(x, y);
+        black_box(black_box(x) >> black_box(y));
     }
 }
 

From 22c83fe3c93c77812d0e60acd785f0f81f077d22 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Sat, 8 Feb 2025 09:48:14 +0000
Subject: [PATCH 229/279] Replace an `assert!` with `debug_assert!` in
 `u256::shr`

The implementation came from the `compiler_builtins` port but this
should be weakened to match other integer types.
---
 src/math/support/big.rs       |  5 ++++-
 src/math/support/big/tests.rs | 19 +++++++++++++++++++
 2 files changed, 23 insertions(+), 1 deletion(-)

diff --git a/src/math/support/big.rs b/src/math/support/big.rs
index 7a437b67a..017e9455e 100644
--- a/src/math/support/big.rs
+++ b/src/math/support/big.rs
@@ -109,7 +109,10 @@ impl ops::Shr<u32> for u256 {
     type Output = Self;
 
     fn shr(self, rhs: u32) -> Self::Output {
-        assert!(rhs < Self::BITS, "attempted to shift right with overflow");
+        debug_assert!(rhs < Self::BITS, "attempted to shift right with overflow");
+        if rhs >= Self::BITS {
+            return Self::ZERO;
+        }
 
         if rhs == 0 {
             return self;
diff --git a/src/math/support/big/tests.rs b/src/math/support/big/tests.rs
index f95f82973..815a62dfe 100644
--- a/src/math/support/big/tests.rs
+++ b/src/math/support/big/tests.rs
@@ -108,3 +108,22 @@ fn shr_u128() {
     }
     assert!(errors.is_empty());
 }
+
+#[test]
+#[should_panic]
+#[cfg(debug_assertions)]
+// FIXME(ppc): ppc64le seems to have issues with `should_panic` tests.
+#[cfg(not(all(target_arch = "powerpc64", target_endian = "little")))]
+fn shr_u256_overflow() {
+    // Like regular shr, panic on overflow with debug assertions
+    let _ = u256::MAX >> 256;
+}
+
+#[test]
+#[cfg(not(debug_assertions))]
+fn shr_u256_overflow() {
+    // No panic without debug assertions
+    assert_eq!(u256::MAX >> 256, u256::ZERO);
+    assert_eq!(u256::MAX >> 257, u256::ZERO);
+    assert_eq!(u256::MAX >> u32::MAX, u256::ZERO);
+}

From 995bc5a62043b9d9a6c6bffefc10e6c5b77cbb15 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Mon, 10 Feb 2025 02:05:41 +0000
Subject: [PATCH 230/279] Increase the tolerance for `jn` and `yn`

These still fail random tests, e.g.:

    called `Result::unwrap()` on an `Err` value: jn

    Caused by:
        0:
               input:    (1068, -16013.98381387313)
               as hex:   (, -0x1.f46fded9ced39p+13)
               as bits:  (0x0000042c, 0xc0cf46fded9ced39)
               expected: 6.7603314308122506e-6  0x1.c5ad9c102d413p-18 0x3edc5ad9c102d413
               actual:   6.7603314308006335e-6  0x1.c5ad9c1029e80p-18 0x3edc5ad9c1029e80
        1: ulp 13715 > 4000

    Caused by:
        0:
               input:    (195, 42147.94)
               as hex:   (, 0x1.4947e2p+15)
               as bits:  (0x000000c3, 0x4724a3f1)
               expected: -2.13669e-7            -0x1.cad9c6p-23 0xb4656ce3
               actual:   -2.1376937e-7          -0x1.cb10f4p-23 0xb465887a
        1: ulp 7063 > 4000

    Caused by:
        0:
               input:    (194, 740.1916)
               as hex:   (, 0x1.721886p+9)
               as bits:  (0x000000c2, 0x44390c43)
               expected: 1.212096e-6            0x1.455e9ap-20 0x35a2af4d
               actual:   1.2172386e-6           0x1.46c000p-20 0x35a36000
        1: ulp 45235 > 10000

Increase allowed precision to avoid spurious failures.
---
 crates/libm-test/src/precision.rs | 26 ++++++++++++++------------
 1 file changed, 14 insertions(+), 12 deletions(-)

diff --git a/crates/libm-test/src/precision.rs b/crates/libm-test/src/precision.rs
index a85996539..2f55ad22e 100644
--- a/crates/libm-test/src/precision.rs
+++ b/crates/libm-test/src/precision.rs
@@ -523,18 +523,7 @@ fn int_float_common<F1: Float, F2: Float>(
         && actual == F2::ZERO
         && expected == F2::ZERO
     {
-        return XFAIL("mpfr b");
-    }
-
-    // Our bessel functions blow up with large N values
-    if ctx.basis == Musl && (ctx.base_name == BaseName::Jn || ctx.base_name == BaseName::Yn) {
-        if input.0 > 4000 {
-            return XFAIL_NOCHECK;
-        } else if input.0 > 2000 {
-            return CheckAction::AssertWithUlp(20_000);
-        } else if input.0 > 1000 {
-            return CheckAction::AssertWithUlp(4_000);
-        }
+        return XFAIL("we disagree with MPFR on the sign of zero");
     }
 
     // Values near infinity sometimes get cut off for us. `ynf(681, 509.90924) = -inf` but should
@@ -549,6 +538,19 @@ fn int_float_common<F1: Float, F2: Float>(
         return XFAIL_NOCHECK;
     }
 
+    // Our bessel functions blow up with large N values
+    if ctx.basis == Musl && (ctx.base_name == BaseName::Jn || ctx.base_name == BaseName::Yn) {
+        if cfg!(x86_no_sse) {
+            // Precision is especially bad on i586, not worth checking.
+            return XFAIL_NOCHECK;
+        }
+
+        if input.0 > 4000 {
+            return XFAIL_NOCHECK;
+        } else if input.0 > 100 {
+            return CheckAction::AssertWithUlp(1_000_000);
+        }
+    }
     DEFAULT
 }
 

From 0d9ab5ccd841bd4357a6f8184544cffe453ccb36 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Sun, 9 Feb 2025 22:40:20 +0000
Subject: [PATCH 231/279] ci: Pin the nightly toolchain for i686-pc-windows-gnu

Pin i686-pc-windows-gnu to nightly-2025-02-07 until [1] is resolved.

[1]: https://github.com/rust-lang/rust/issues/136795
---
 .github/workflows/main.yaml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml
index 265702965..f066f4a8c 100644
--- a/.github/workflows/main.yaml
+++ b/.github/workflows/main.yaml
@@ -68,7 +68,8 @@ jobs:
           os: windows-2025
         - target: i686-pc-windows-gnu
           os: windows-2025
-          channel: nightly-i686-gnu
+          # FIXME: pinned due to https://github.com/rust-lang/rust/issues/136795
+          channel: nightly-2025-02-07-i686-gnu
         - target: x86_64-pc-windows-gnu
           os: windows-2025
           channel: nightly-x86_64-gnu

From 6fab367310eb085fa1d9fbe49634567e48346131 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Sat, 8 Feb 2025 04:09:59 +0000
Subject: [PATCH 232/279] Implement `u256` with two `u128`s rather than `u64`

This produces better assembly, e.g. on aarch64:

            .globl  libm::u128_wmul
            .p2align        2
    libm::u128_wmul:
    Lfunc_begin124:
            .cfi_startproc
            mul x9, x2, x0
            umulh x10, x2, x0
            umulh x11, x3, x0
            mul x12, x3, x0
            umulh x13, x2, x1
            mul x14, x2, x1
            umulh x15, x3, x1
            mul x16, x3, x1
            adds x10, x10, x14
            cinc x13, x13, hs
            adds x13, x13, x16
            cinc x14, x15, hs
            adds x10, x10, x12
            cinc x11, x11, hs
            adds x11, x13, x11
            stp x9, x10, [x8]
            cinc x9, x14, hs
            stp x11, x9, [x8, #16]
            ret

The original was ~70 instructions so the improvement is significant.
With these changes, the result is reasonably close to what LLVM
generates using `u256` operands [1].

[1]: https://llvm.godbolt.org/z/re1aGdaqY
---
 crates/libm-test/benches/icount.rs |   6 +-
 crates/libm-test/src/gen/random.rs |   2 +-
 crates/libm-test/src/lib.rs        |   5 +-
 crates/libm-test/src/run_cfg.rs    |  30 ++--
 crates/libm-test/tests/u256.rs     | 147 +++++++++++++++++++
 src/math/support/big.rs            | 217 ++++++++++-------------------
 src/math/support/big/tests.rs      |  79 ++++++-----
 7 files changed, 298 insertions(+), 188 deletions(-)
 create mode 100644 crates/libm-test/tests/u256.rs

diff --git a/crates/libm-test/benches/icount.rs b/crates/libm-test/benches/icount.rs
index 9fac52e0b..be85dd567 100644
--- a/crates/libm-test/benches/icount.rs
+++ b/crates/libm-test/benches/icount.rs
@@ -77,7 +77,6 @@ fn setup_u128_mul() -> Vec<(u128, u128)> {
     v
 }
 
-/*
 fn setup_u256_add() -> Vec<(u256, u256)> {
     let mut v = Vec::new();
     for (x, y) in setup_u128_mul() {
@@ -88,7 +87,6 @@ fn setup_u256_add() -> Vec<(u256, u256)> {
     v.push((u256::MAX, u256::MAX));
     v
 }
-*/
 
 fn setup_u256_shift() -> Vec<(u256, u32)> {
     let mut v = Vec::new();
@@ -116,7 +114,6 @@ library_benchmark_group!(
     benchmarks = icount_bench_u128_widen_mul
 );
 
-/* Not yet implemented
 #[library_benchmark]
 #[bench::linspace(setup_u256_add())]
 fn icount_bench_u256_add(cases: Vec<(u256, u256)>) {
@@ -129,7 +126,6 @@ library_benchmark_group!(
     name = icount_bench_u256_add_group;
     benchmarks = icount_bench_u256_add
 );
-*/
 
 #[library_benchmark]
 #[bench::linspace(setup_u256_shift())]
@@ -148,7 +144,7 @@ main!(
     library_benchmark_groups =
     // u256-related benchmarks
     icount_bench_u128_widen_mul_group,
-    // icount_bench_u256_add_group,
+    icount_bench_u256_add_group,
     icount_bench_u256_shr_group,
     // verify-apilist-start
     // verify-sorted-start
diff --git a/crates/libm-test/src/gen/random.rs b/crates/libm-test/src/gen/random.rs
index 5b127f38d..c2cd172d1 100644
--- a/crates/libm-test/src/gen/random.rs
+++ b/crates/libm-test/src/gen/random.rs
@@ -14,7 +14,7 @@ use crate::run_cfg::{int_range, iteration_count};
 
 pub(crate) const SEED_ENV: &str = "LIBM_SEED";
 
-pub(crate) static SEED: LazyLock<[u8; 32]> = LazyLock::new(|| {
+pub static SEED: LazyLock<[u8; 32]> = LazyLock::new(|| {
     let s = env::var(SEED_ENV).unwrap_or_else(|_| {
         let mut rng = rand::thread_rng();
         (0..32).map(|_| rng.sample(Alphanumeric) as char).collect()
diff --git a/crates/libm-test/src/lib.rs b/crates/libm-test/src/lib.rs
index d2fef2325..824f09a33 100644
--- a/crates/libm-test/src/lib.rs
+++ b/crates/libm-test/src/lib.rs
@@ -29,7 +29,10 @@ pub use op::{
 };
 pub use precision::{MaybeOverride, SpecialCase, default_ulp};
 use run_cfg::extensive_max_iterations;
-pub use run_cfg::{CheckBasis, CheckCtx, EXTENSIVE_ENV, GeneratorKind, skip_extensive_test};
+pub use run_cfg::{
+    CheckBasis, CheckCtx, EXTENSIVE_ENV, GeneratorKind, bigint_fuzz_iteration_count,
+    skip_extensive_test,
+};
 pub use test_traits::{CheckOutput, Hex, TupleCall};
 
 /// Result type for tests is usually from `anyhow`. Most times there is no success value to
diff --git a/crates/libm-test/src/run_cfg.rs b/crates/libm-test/src/run_cfg.rs
index 4dd43bdf3..6b2689976 100644
--- a/crates/libm-test/src/run_cfg.rs
+++ b/crates/libm-test/src/run_cfg.rs
@@ -158,14 +158,6 @@ impl TestEnv {
         let op = id.math_op();
 
         let will_run_mp = cfg!(feature = "build-mpfr");
-
-        // Tests are pretty slow on non-64-bit targets, x86 MacOS, and targets that run in QEMU. Start
-        // with a reduced number on these platforms.
-        let slow_on_ci = crate::emulated()
-            || usize::BITS < 64
-            || cfg!(all(target_arch = "x86_64", target_vendor = "apple"));
-        let slow_platform = slow_on_ci && crate::ci();
-
         let large_float_ty = match op.float_ty {
             FloatTy::F16 | FloatTy::F32 => false,
             FloatTy::F64 | FloatTy::F128 => true,
@@ -176,7 +168,7 @@ impl TestEnv {
         let input_count = op.rust_sig.args.len();
 
         Self {
-            slow_platform,
+            slow_platform: slow_platform(),
             large_float_ty,
             should_run_extensive: will_run_extensive,
             mp_tests_enabled: will_run_mp,
@@ -185,6 +177,17 @@ impl TestEnv {
     }
 }
 
+/// Tests are pretty slow on non-64-bit targets, x86 MacOS, and targets that run in QEMU. Start
+/// with a reduced number on these platforms.
+fn slow_platform() -> bool {
+    let slow_on_ci = crate::emulated()
+        || usize::BITS < 64
+        || cfg!(all(target_arch = "x86_64", target_vendor = "apple"));
+
+    // If not running in CI, there is no need to reduce iteration count.
+    slow_on_ci && crate::ci()
+}
+
 /// The number of iterations to run for a given test.
 pub fn iteration_count(ctx: &CheckCtx, argnum: usize) -> u64 {
     let t_env = TestEnv::from_env(ctx);
@@ -351,3 +354,12 @@ pub fn skip_extensive_test(ctx: &CheckCtx) -> bool {
     let t_env = TestEnv::from_env(ctx);
     !t_env.should_run_extensive
 }
+
+/// The number of iterations to run for `u256` fuzz tests.
+pub fn bigint_fuzz_iteration_count() -> u64 {
+    if !cfg!(optimizations_enabled) {
+        return 1000;
+    }
+
+    if slow_platform() { 100_000 } else { 5_000_000 }
+}
diff --git a/crates/libm-test/tests/u256.rs b/crates/libm-test/tests/u256.rs
new file mode 100644
index 000000000..4174820c0
--- /dev/null
+++ b/crates/libm-test/tests/u256.rs
@@ -0,0 +1,147 @@
+//! Test the u256 implementation. the ops already get exercised reasonably well through the `f128`
+//! routines, so this only does a few million fuzz iterations against GMP.
+
+#![cfg(feature = "build-mpfr")]
+
+use std::sync::LazyLock;
+
+use libm::support::{HInt, u256};
+type BigInt = rug::Integer;
+
+use libm_test::bigint_fuzz_iteration_count;
+use libm_test::gen::random::SEED;
+use rand::{Rng, SeedableRng};
+use rand_chacha::ChaCha8Rng;
+use rug::Assign;
+use rug::integer::Order;
+use rug::ops::NotAssign;
+
+static BIGINT_U256_MAX: LazyLock<BigInt> =
+    LazyLock::new(|| BigInt::from_digits(&[u128::MAX, u128::MAX], Order::Lsf));
+
+/// Copied from the test module.
+fn hexu(v: u256) -> String {
+    format!("0x{:032x}{:032x}", v.hi, v.lo)
+}
+
+fn random_u256(rng: &mut ChaCha8Rng) -> u256 {
+    let lo: u128 = rng.gen();
+    let hi: u128 = rng.gen();
+    u256 { lo, hi }
+}
+
+fn assign_bigint(bx: &mut BigInt, x: u256) {
+    bx.assign_digits(&[x.lo, x.hi], Order::Lsf);
+}
+
+fn from_bigint(bx: &mut BigInt) -> u256 {
+    // Truncate so the result fits into `[u128; 2]`. This makes all ops overflowing.
+    *bx &= &*BIGINT_U256_MAX;
+    let mut bres = [0u128, 0];
+    bx.write_digits(&mut bres, Order::Lsf);
+    bx.assign(0);
+    u256 { lo: bres[0], hi: bres[1] }
+}
+
+fn check_one(
+    x: impl FnOnce() -> String,
+    y: impl FnOnce() -> Option<String>,
+    actual: u256,
+    expected: &mut BigInt,
+) {
+    let expected = from_bigint(expected);
+    if actual != expected {
+        let xmsg = x();
+        let ymsg = y().map(|y| format!("y:        {y}\n")).unwrap_or_default();
+        panic!(
+            "Results do not match\n\
+            input:    {xmsg}\n\
+            {ymsg}\
+            actual:   {}\n\
+            expected: {}\
+            ",
+            hexu(actual),
+            hexu(expected),
+        )
+    }
+}
+
+#[test]
+fn mp_u256_bitor() {
+    let mut rng = ChaCha8Rng::from_seed(*SEED);
+    let mut bx = BigInt::new();
+    let mut by = BigInt::new();
+
+    for _ in 0..bigint_fuzz_iteration_count() {
+        let x = random_u256(&mut rng);
+        let y = random_u256(&mut rng);
+        assign_bigint(&mut bx, x);
+        assign_bigint(&mut by, y);
+        let actual = x | y;
+        bx |= &by;
+        check_one(|| hexu(x), || Some(hexu(y)), actual, &mut bx);
+    }
+}
+
+#[test]
+fn mp_u256_not() {
+    let mut rng = ChaCha8Rng::from_seed(*SEED);
+    let mut bx = BigInt::new();
+
+    for _ in 0..bigint_fuzz_iteration_count() {
+        let x = random_u256(&mut rng);
+        assign_bigint(&mut bx, x);
+        let actual = !x;
+        bx.not_assign();
+        check_one(|| hexu(x), || None, actual, &mut bx);
+    }
+}
+
+#[test]
+fn mp_u256_add() {
+    let mut rng = ChaCha8Rng::from_seed(*SEED);
+    let mut bx = BigInt::new();
+    let mut by = BigInt::new();
+
+    for _ in 0..bigint_fuzz_iteration_count() {
+        let x = random_u256(&mut rng);
+        let y = random_u256(&mut rng);
+        assign_bigint(&mut bx, x);
+        assign_bigint(&mut by, y);
+        let actual = x + y;
+        bx += &by;
+        check_one(|| hexu(x), || Some(hexu(y)), actual, &mut bx);
+    }
+}
+
+#[test]
+fn mp_u256_shr() {
+    let mut rng = ChaCha8Rng::from_seed(*SEED);
+    let mut bx = BigInt::new();
+
+    for _ in 0..bigint_fuzz_iteration_count() {
+        let x = random_u256(&mut rng);
+        let shift: u32 = rng.gen_range(0..255);
+        assign_bigint(&mut bx, x);
+        let actual = x >> shift;
+        bx >>= shift;
+        check_one(|| hexu(x), || Some(shift.to_string()), actual, &mut bx);
+    }
+}
+
+#[test]
+fn mp_u256_widen_mul() {
+    let mut rng = ChaCha8Rng::from_seed(*SEED);
+    let mut bx = BigInt::new();
+    let mut by = BigInt::new();
+
+    for _ in 0..bigint_fuzz_iteration_count() {
+        let x: u128 = rng.gen();
+        let y: u128 = rng.gen();
+        bx.assign(x);
+        by.assign(y);
+        let actual = x.widen_mul(y);
+        bx *= &by;
+        check_one(|| format!("{x:#034x}"), || Some(format!("{y:#034x}")), actual, &mut bx);
+    }
+}
diff --git a/src/math/support/big.rs b/src/math/support/big.rs
index 017e9455e..eae08238e 100644
--- a/src/math/support/big.rs
+++ b/src/math/support/big.rs
@@ -7,40 +7,39 @@ use core::ops;
 
 use super::{DInt, HInt, Int, MinInt};
 
-const WORD_LO_MASK: u64 = 0x00000000ffffffff;
-const WORD_HI_MASK: u64 = 0xffffffff00000000;
-const WORD_FULL_MASK: u64 = 0xffffffffffffffff;
 const U128_LO_MASK: u128 = u64::MAX as u128;
 
-/// A 256-bit unsigned integer represented as 4 64-bit limbs.
-///
-/// Each limb is a native-endian number, but the array is little-limb-endian.
+/// A 256-bit unsigned integer represented as two 128-bit native-endian limbs.
 #[allow(non_camel_case_types)]
 #[derive(Clone, Copy, Debug, PartialEq, PartialOrd)]
-pub struct u256(pub [u64; 4]);
+pub struct u256 {
+    pub lo: u128,
+    pub hi: u128,
+}
 
 impl u256 {
-    #[allow(unused)]
-    pub const MAX: Self = Self([u64::MAX, u64::MAX, u64::MAX, u64::MAX]);
+    #[cfg(any(test, feature = "unstable-public-internals"))]
+    pub const MAX: Self = Self { lo: u128::MAX, hi: u128::MAX };
 
     /// Reinterpret as a signed integer
     pub fn signed(self) -> i256 {
-        i256(self.0)
+        i256 { lo: self.lo, hi: self.hi }
     }
 }
 
-/// A 256-bit signed integer represented as 4 64-bit limbs.
-///
-/// Each limb is a native-endian number, but the array is little-limb-endian.
+/// A 256-bit signed integer represented as two 128-bit native-endian limbs.
 #[allow(non_camel_case_types)]
 #[derive(Clone, Copy, Debug, PartialEq, PartialOrd)]
-pub struct i256(pub [u64; 4]);
+pub struct i256 {
+    pub lo: u128,
+    pub hi: u128,
+}
 
 impl i256 {
     /// Reinterpret as an unsigned integer
-    #[cfg(test)]
+    #[cfg(any(test, feature = "unstable-public-internals"))]
     pub fn unsigned(self) -> u256 {
-        u256(self.0)
+        u256 { lo: self.lo, hi: self.hi }
     }
 }
 
@@ -51,10 +50,10 @@ impl MinInt for u256 {
 
     const SIGNED: bool = false;
     const BITS: u32 = 256;
-    const ZERO: Self = Self([0u64; 4]);
-    const ONE: Self = Self([1, 0, 0, 0]);
-    const MIN: Self = Self([0u64; 4]);
-    const MAX: Self = Self([u64::MAX; 4]);
+    const ZERO: Self = Self { lo: 0, hi: 0 };
+    const ONE: Self = Self { lo: 1, hi: 0 };
+    const MIN: Self = Self { lo: 0, hi: 0 };
+    const MAX: Self = Self { lo: u128::MAX, hi: u128::MAX };
 }
 
 impl MinInt for i256 {
@@ -64,10 +63,10 @@ impl MinInt for i256 {
 
     const SIGNED: bool = false;
     const BITS: u32 = 256;
-    const ZERO: Self = Self([0u64; 4]);
-    const ONE: Self = Self([1, 0, 0, 0]);
-    const MIN: Self = Self([0, 0, 0, 1 << 63]);
-    const MAX: Self = Self([u64::MAX, u64::MAX, u64::MAX, u64::MAX << 1]);
+    const ZERO: Self = Self { lo: 0, hi: 0 };
+    const ONE: Self = Self { lo: 1, hi: 0 };
+    const MIN: Self = Self { lo: 0, hi: 1 << 127 };
+    const MAX: Self = Self { lo: u128::MAX, hi: u128::MAX << 1 };
 }
 
 macro_rules! impl_common {
@@ -76,10 +75,8 @@ macro_rules! impl_common {
             type Output = Self;
 
             fn bitor(mut self, rhs: Self) -> Self::Output {
-                self.0[0] |= rhs.0[0];
-                self.0[1] |= rhs.0[1];
-                self.0[2] |= rhs.0[2];
-                self.0[3] |= rhs.0[3];
+                self.lo |= rhs.lo;
+                self.hi |= rhs.hi;
                 self
             }
         }
@@ -87,8 +84,10 @@ macro_rules! impl_common {
         impl ops::Not for $ty {
             type Output = Self;
 
-            fn not(self) -> Self::Output {
-                Self([!self.0[0], !self.0[1], !self.0[2], !self.0[3]])
+            fn not(mut self) -> Self::Output {
+                self.lo = !self.lo;
+                self.hi = !self.hi;
+                self
             }
         }
 
@@ -105,10 +104,21 @@ macro_rules! impl_common {
 impl_common!(i256);
 impl_common!(u256);
 
+impl ops::Add<Self> for u256 {
+    type Output = Self;
+
+    fn add(self, rhs: Self) -> Self::Output {
+        let (lo, carry) = self.lo.overflowing_add(rhs.lo);
+        let hi = self.hi.wrapping_add(carry as u128).wrapping_add(rhs.hi);
+
+        Self { lo, hi }
+    }
+}
+
 impl ops::Shr<u32> for u256 {
     type Output = Self;
 
-    fn shr(self, rhs: u32) -> Self::Output {
+    fn shr(mut self, rhs: u32) -> Self::Output {
         debug_assert!(rhs < Self::BITS, "attempted to shift right with overflow");
         if rhs >= Self::BITS {
             return Self::ZERO;
@@ -118,57 +128,28 @@ impl ops::Shr<u32> for u256 {
             return self;
         }
 
-        let mut ret = self;
-        let byte_shift = rhs / 64;
-        let bit_shift = rhs % 64;
-
-        for idx in 0..4 {
-            let base_idx = idx + byte_shift as usize;
-
-            // FIXME(msrv): could be let...else.
-            let base = match ret.0.get(base_idx) {
-                Some(v) => v,
-                None => {
-                    ret.0[idx] = 0;
-                    continue;
-                }
-            };
-
-            let mut new_val = base >> bit_shift;
-
-            if let Some(new) = ret.0.get(base_idx + 1) {
-                new_val |= new.overflowing_shl(64 - bit_shift).0;
-            }
+        if rhs < 128 {
+            self.lo >>= rhs;
+            self.lo |= self.hi << (128 - rhs);
+        } else {
+            self.lo = self.hi >> (rhs - 128);
+        }
 
-            ret.0[idx] = new_val;
+        if rhs < 128 {
+            self.hi >>= rhs;
+        } else {
+            self.hi = 0;
         }
 
-        ret
+        self
     }
 }
 
-macro_rules! word {
-    (1, $val:expr) => {
-        (($val >> (32 * 3)) & Self::from(WORD_LO_MASK)) as u64
-    };
-    (2, $val:expr) => {
-        (($val >> (32 * 2)) & Self::from(WORD_LO_MASK)) as u64
-    };
-    (3, $val:expr) => {
-        (($val >> (32 * 1)) & Self::from(WORD_LO_MASK)) as u64
-    };
-    (4, $val:expr) => {
-        (($val >> (32 * 0)) & Self::from(WORD_LO_MASK)) as u64
-    };
-}
-
 impl HInt for u128 {
     type D = u256;
 
     fn widen(self) -> Self::D {
-        let w0 = self & u128::from(u64::MAX);
-        let w1 = (self >> u64::BITS) & u128::from(u64::MAX);
-        u256([w0 as u64, w1 as u64, 0, 0])
+        u256 { lo: self, hi: 0 }
     }
 
     fn zero_widen(self) -> Self::D {
@@ -176,57 +157,24 @@ impl HInt for u128 {
     }
 
     fn zero_widen_mul(self, rhs: Self) -> Self::D {
-        let product11: u64 = word!(1, self) * word!(1, rhs);
-        let product12: u64 = word!(1, self) * word!(2, rhs);
-        let product13: u64 = word!(1, self) * word!(3, rhs);
-        let product14: u64 = word!(1, self) * word!(4, rhs);
-        let product21: u64 = word!(2, self) * word!(1, rhs);
-        let product22: u64 = word!(2, self) * word!(2, rhs);
-        let product23: u64 = word!(2, self) * word!(3, rhs);
-        let product24: u64 = word!(2, self) * word!(4, rhs);
-        let product31: u64 = word!(3, self) * word!(1, rhs);
-        let product32: u64 = word!(3, self) * word!(2, rhs);
-        let product33: u64 = word!(3, self) * word!(3, rhs);
-        let product34: u64 = word!(3, self) * word!(4, rhs);
-        let product41: u64 = word!(4, self) * word!(1, rhs);
-        let product42: u64 = word!(4, self) * word!(2, rhs);
-        let product43: u64 = word!(4, self) * word!(3, rhs);
-        let product44: u64 = word!(4, self) * word!(4, rhs);
-
-        let sum0: u128 = u128::from(product44);
-        let sum1: u128 = u128::from(product34) + u128::from(product43);
-        let sum2: u128 = u128::from(product24) + u128::from(product33) + u128::from(product42);
-        let sum3: u128 = u128::from(product14)
-            + u128::from(product23)
-            + u128::from(product32)
-            + u128::from(product41);
-        let sum4: u128 = u128::from(product13) + u128::from(product22) + u128::from(product31);
-        let sum5: u128 = u128::from(product12) + u128::from(product21);
-        let sum6: u128 = u128::from(product11);
-
-        let r0: u128 =
-            (sum0 & u128::from(WORD_FULL_MASK)) + ((sum1 & u128::from(WORD_LO_MASK)) << 32);
-        let r1: u128 = (sum0 >> 64)
-            + ((sum1 >> 32) & u128::from(WORD_FULL_MASK))
-            + (sum2 & u128::from(WORD_FULL_MASK))
-            + ((sum3 << 32) & u128::from(WORD_HI_MASK));
-
-        let (lo, carry) = r0.overflowing_add(r1 << 64);
-        let hi = (r1 >> 64)
-            + (sum1 >> 96)
-            + (sum2 >> 64)
-            + (sum3 >> 32)
-            + sum4
-            + (sum5 << 32)
-            + (sum6 << 64)
-            + u128::from(carry);
-
-        u256([
-            (lo & U128_LO_MASK) as u64,
-            ((lo >> 64) & U128_LO_MASK) as u64,
-            (hi & U128_LO_MASK) as u64,
-            ((hi >> 64) & U128_LO_MASK) as u64,
-        ])
+        let l0 = self & U128_LO_MASK;
+        let l1 = rhs & U128_LO_MASK;
+        let h0 = self >> 64;
+        let h1 = rhs >> 64;
+
+        let p_ll: u128 = l0.overflowing_mul(l1).0;
+        let p_lh: u128 = l0.overflowing_mul(h1).0;
+        let p_hl: u128 = h0.overflowing_mul(l1).0;
+        let p_hh: u128 = h0.overflowing_mul(h1).0;
+
+        let s0 = p_hl + (p_ll >> 64);
+        let s1 = (p_ll & U128_LO_MASK) + (s0 << 64);
+        let s2 = p_lh + (s1 >> 64);
+
+        let lo = (p_ll & U128_LO_MASK) + (s2 << 64);
+        let hi = p_hh + (s0 >> 64) + (s2 >> 64);
+
+        u256 { lo, hi }
     }
 
     fn widen_mul(self, rhs: Self) -> Self::D {
@@ -244,8 +192,7 @@ impl HInt for i128 {
     fn widen(self) -> Self::D {
         let mut ret = self.unsigned().zero_widen().signed();
         if self.is_negative() {
-            ret.0[2] = u64::MAX;
-            ret.0[3] = u64::MAX;
+            ret.hi = u128::MAX;
         }
         ret
     }
@@ -271,17 +218,11 @@ impl DInt for u256 {
     type H = u128;
 
     fn lo(self) -> Self::H {
-        let mut tmp = [0u8; 16];
-        tmp[..8].copy_from_slice(&self.0[0].to_le_bytes());
-        tmp[8..].copy_from_slice(&self.0[1].to_le_bytes());
-        u128::from_le_bytes(tmp)
+        self.lo
     }
 
     fn hi(self) -> Self::H {
-        let mut tmp = [0u8; 16];
-        tmp[..8].copy_from_slice(&self.0[2].to_le_bytes());
-        tmp[8..].copy_from_slice(&self.0[3].to_le_bytes());
-        u128::from_le_bytes(tmp)
+        self.hi
     }
 }
 
@@ -289,16 +230,10 @@ impl DInt for i256 {
     type H = i128;
 
     fn lo(self) -> Self::H {
-        let mut tmp = [0u8; 16];
-        tmp[..8].copy_from_slice(&self.0[0].to_le_bytes());
-        tmp[8..].copy_from_slice(&self.0[1].to_le_bytes());
-        i128::from_le_bytes(tmp)
+        self.lo as i128
     }
 
     fn hi(self) -> Self::H {
-        let mut tmp = [0u8; 16];
-        tmp[..8].copy_from_slice(&self.0[2].to_le_bytes());
-        tmp[8..].copy_from_slice(&self.0[3].to_le_bytes());
-        i128::from_le_bytes(tmp)
+        self.hi as i128
     }
 }
diff --git a/src/math/support/big/tests.rs b/src/math/support/big/tests.rs
index 815a62dfe..6d06c700a 100644
--- a/src/math/support/big/tests.rs
+++ b/src/math/support/big/tests.rs
@@ -9,33 +9,30 @@ const LOHI_SPLIT: u128 = 0xaaaaaaaaaaaaaaaaffffffffffffffff;
 
 /// Print a `u256` as hex since we can't add format implementations
 fn hexu(v: u256) -> String {
-    format!("0x{:016x}{:016x}{:016x}{:016x}", v.0[3], v.0[2], v.0[1], v.0[0])
+    format!("0x{:032x}{:032x}", v.hi, v.lo)
 }
 
 #[test]
 fn widen_u128() {
-    assert_eq!(u128::MAX.widen(), u256([u64::MAX, u64::MAX, 0, 0]));
-    assert_eq!(LOHI_SPLIT.widen(), u256([u64::MAX, 0xaaaaaaaaaaaaaaaa, 0, 0]));
+    assert_eq!(u128::MAX.widen(), u256 { lo: u128::MAX, hi: 0 });
+    assert_eq!(LOHI_SPLIT.widen(), u256 { lo: LOHI_SPLIT, hi: 0 });
 }
 
 #[test]
 fn widen_i128() {
     assert_eq!((-1i128).widen(), u256::MAX.signed());
-    assert_eq!(
-        (LOHI_SPLIT as i128).widen(),
-        i256([u64::MAX, 0xaaaaaaaaaaaaaaaa, u64::MAX, u64::MAX])
-    );
+    assert_eq!((LOHI_SPLIT as i128).widen(), i256 { lo: LOHI_SPLIT, hi: u128::MAX });
     assert_eq!((-1i128).zero_widen().unsigned(), (u128::MAX).widen());
 }
 
 #[test]
 fn widen_mul_u128() {
     let tests = [
-        (u128::MAX / 2, 2_u128, u256([u64::MAX - 1, u64::MAX, 0, 0])),
-        (u128::MAX, 2_u128, u256([u64::MAX - 1, u64::MAX, 1, 0])),
-        (u128::MAX, u128::MAX, u256([1, 0, u64::MAX - 1, u64::MAX])),
-        (u128::MIN, u128::MIN, u256::ZERO),
-        (1234, 0, u256::ZERO),
+        (u128::MAX / 2, 2_u128, u256 { lo: u128::MAX - 1, hi: 0 }),
+        (u128::MAX, 2_u128, u256 { lo: u128::MAX - 1, hi: 1 }),
+        (u128::MAX, u128::MAX, u256 { lo: 1, hi: u128::MAX - 1 }),
+        (0, 0, u256::ZERO),
+        (1234u128, 0, u256::ZERO),
         (0, 1234, u256::ZERO),
     ];
 
@@ -50,20 +47,27 @@ fn widen_mul_u128() {
     }
 
     for (i, a, b, exp, res) in &errors {
-        eprintln!("FAILURE ({i}): {a:#034x} * {b:#034x} = {} got {}", hexu(*exp), hexu(*res));
+        eprintln!(
+            "\
+            FAILURE ({i}): {a:#034x} * {b:#034x}\n\
+            expected: {}\n\
+            got:      {}\
+            ",
+            hexu(*exp),
+            hexu(*res)
+        );
     }
     assert!(errors.is_empty());
 }
 
 #[test]
-fn not_u128() {
+fn not_u256() {
     assert_eq!(!u256::ZERO, u256::MAX);
 }
 
 #[test]
-fn shr_u128() {
+fn shr_u256() {
     let only_low = [1, u16::MAX.into(), u32::MAX.into(), u64::MAX.into(), u128::MAX];
-
     let mut errors = Vec::new();
 
     for a in only_low {
@@ -80,20 +84,24 @@ fn shr_u128() {
     }
 
     let check = [
-        (u256::MAX, 1, u256([u64::MAX, u64::MAX, u64::MAX, u64::MAX >> 1])),
-        (u256::MAX, 5, u256([u64::MAX, u64::MAX, u64::MAX, u64::MAX >> 5])),
-        (u256::MAX, 63, u256([u64::MAX, u64::MAX, u64::MAX, 1])),
-        (u256::MAX, 64, u256([u64::MAX, u64::MAX, u64::MAX, 0])),
-        (u256::MAX, 65, u256([u64::MAX, u64::MAX, u64::MAX >> 1, 0])),
-        (u256::MAX, 127, u256([u64::MAX, u64::MAX, 1, 0])),
-        (u256::MAX, 128, u256([u64::MAX, u64::MAX, 0, 0])),
-        (u256::MAX, 129, u256([u64::MAX, u64::MAX >> 1, 0, 0])),
-        (u256::MAX, 191, u256([u64::MAX, 1, 0, 0])),
-        (u256::MAX, 192, u256([u64::MAX, 0, 0, 0])),
-        (u256::MAX, 193, u256([u64::MAX >> 1, 0, 0, 0])),
-        (u256::MAX, 191, u256([u64::MAX, 1, 0, 0])),
-        (u256::MAX, 254, u256([0b11, 0, 0, 0])),
-        (u256::MAX, 255, u256([1, 0, 0, 0])),
+        (u256::MAX, 1, u256 { lo: u128::MAX, hi: u128::MAX >> 1 }),
+        (u256::MAX, 5, u256 { lo: u128::MAX, hi: u128::MAX >> 5 }),
+        (u256::MAX, 63, u256 { lo: u128::MAX, hi: u64::MAX as u128 | (1 << 64) }),
+        (u256::MAX, 64, u256 { lo: u128::MAX, hi: u64::MAX as u128 }),
+        (u256::MAX, 65, u256 { lo: u128::MAX, hi: (u64::MAX >> 1) as u128 }),
+        (u256::MAX, 127, u256 { lo: u128::MAX, hi: 1 }),
+        (u256::MAX, 128, u256 { lo: u128::MAX, hi: 0 }),
+        (u256::MAX, 129, u256 { lo: u128::MAX >> 1, hi: 0 }),
+        (u256::MAX, 191, u256 { lo: u64::MAX as u128 | 1 << 64, hi: 0 }),
+        (u256::MAX, 192, u256 { lo: u64::MAX as u128, hi: 0 }),
+        (u256::MAX, 193, u256 { lo: u64::MAX as u128 >> 1, hi: 0 }),
+        (u256::MAX, 254, u256 { lo: 0b11, hi: 0 }),
+        (u256::MAX, 255, u256 { lo: 1, hi: 0 }),
+        (
+            u256 { hi: LOHI_SPLIT, lo: 0 },
+            64,
+            u256 { lo: 0xffffffffffffffff0000000000000000, hi: 0xaaaaaaaaaaaaaaaa },
+        ),
     ];
 
     for (input, shift, expected) in check {
@@ -104,7 +112,16 @@ fn shr_u128() {
     }
 
     for (a, b, res, expected) in &errors {
-        eprintln!("FAILURE: {} >> {b} = {} got {}", hexu(*a), hexu(*expected), hexu(*res),);
+        eprintln!(
+            "\
+            FAILURE:  {} >> {b}\n\
+            expected: {}\n\
+            got:      {}\
+            ",
+            hexu(*a),
+            hexu(*expected),
+            hexu(*res)
+        );
     }
     assert!(errors.is_empty());
 }

From 36aaf4a4d541cf98be8815c9fffc67024a80a779 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Mon, 10 Feb 2025 08:17:57 +0000
Subject: [PATCH 233/279] Introduce a trait constant for the minimum positive
 normal value

---
 crates/libm-test/src/f8_impl.rs  | 1 +
 src/math/support/float_traits.rs | 9 +++++++++
 2 files changed, 10 insertions(+)

diff --git a/crates/libm-test/src/f8_impl.rs b/crates/libm-test/src/f8_impl.rs
index 5dce9be18..56ea0b729 100644
--- a/crates/libm-test/src/f8_impl.rs
+++ b/crates/libm-test/src/f8_impl.rs
@@ -32,6 +32,7 @@ impl Float for f8 {
     const INFINITY: Self = Self(0b0_1111_000);
     const NEG_INFINITY: Self = Self(0b1_1111_000);
     const NAN: Self = Self(0b0_1111_100);
+    const MIN_POSITIVE_NORMAL: Self = Self(1 << Self::SIG_BITS);
     // FIXME: incorrect values
     const EPSILON: Self = Self::ZERO;
     const PI: Self = Self::ZERO;
diff --git a/src/math/support/float_traits.rs b/src/math/support/float_traits.rs
index ee83c793d..42ce31484 100644
--- a/src/math/support/float_traits.rs
+++ b/src/math/support/float_traits.rs
@@ -41,6 +41,8 @@ pub trait Float:
     const NEG_PI: Self;
     const FRAC_PI_2: Self;
 
+    const MIN_POSITIVE_NORMAL: Self;
+
     /// The bitwidth of the float type
     const BITS: u32;
 
@@ -200,6 +202,9 @@ macro_rules! float_impl {
             const MIN: Self = $from_bits(Self::Int::MAX & !(1 << Self::SIG_BITS));
             const EPSILON: Self = <$ty>::EPSILON;
 
+            // Exponent is a 1 in the LSB
+            const MIN_POSITIVE_NORMAL: Self = $from_bits(1 << Self::SIG_BITS);
+
             const PI: Self = core::$ty::consts::PI;
             const NEG_PI: Self = -Self::PI;
             const FRAC_PI_2: Self = core::$ty::consts::FRAC_PI_2;
@@ -358,6 +363,7 @@ mod tests {
         // results for zero and subnormals.
         assert_eq!(f16::ZERO.exp_unbiased(), -15);
         assert_eq!(f16::from_bits(0x1).exp_unbiased(), -15);
+        assert_eq!(f16::MIN_POSITIVE, f16::MIN_POSITIVE_NORMAL);
 
         // `from_parts`
         assert_biteq!(f16::from_parts(true, f16::EXP_BIAS, 0), -1.0f16);
@@ -383,6 +389,7 @@ mod tests {
         // results for zero and subnormals.
         assert_eq!(f32::ZERO.exp_unbiased(), -127);
         assert_eq!(f32::from_bits(0x1).exp_unbiased(), -127);
+        assert_eq!(f32::MIN_POSITIVE, f32::MIN_POSITIVE_NORMAL);
 
         // `from_parts`
         assert_biteq!(f32::from_parts(true, f32::EXP_BIAS, 0), -1.0f32);
@@ -409,6 +416,7 @@ mod tests {
         // results for zero and subnormals.
         assert_eq!(f64::ZERO.exp_unbiased(), -1023);
         assert_eq!(f64::from_bits(0x1).exp_unbiased(), -1023);
+        assert_eq!(f64::MIN_POSITIVE, f64::MIN_POSITIVE_NORMAL);
 
         // `from_parts`
         assert_biteq!(f64::from_parts(true, f64::EXP_BIAS, 0), -1.0f64);
@@ -436,6 +444,7 @@ mod tests {
         // results for zero and subnormals.
         assert_eq!(f128::ZERO.exp_unbiased(), -16383);
         assert_eq!(f128::from_bits(0x1).exp_unbiased(), -16383);
+        assert_eq!(f128::MIN_POSITIVE, f128::MIN_POSITIVE_NORMAL);
 
         // `from_parts`
         assert_biteq!(f128::from_parts(true, f128::EXP_BIAS, 0), -1.0f128);

From a8fdead5618d9f84fc1f0c2afca38b60e89f1a61 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Mon, 10 Feb 2025 09:17:54 +0000
Subject: [PATCH 234/279] Migrate away from nonfunctional `fenv` stubs

Many routines have some form of handling for rounding mode and floating
point exceptions, which are implemented via a combination of stubs and
`force_eval!` use. This is suboptimal, however, because:

1. Rust does not interact with the floating point environment, so most
   of this code does nothing.
2. The parts of the code that are not dead are not testable.
3. `force_eval!` blocks optimizations, which is unnecessary because we
   do not rely on its side effects.

We cannot ensure correct rounding and exception handling in all cases
without some form of arithmetic operations that are aware of this
behavior. However, the cases where rounding mode is explicitly handled
or exceptions are explicitly raised are testable. Make this possible
here for functions that depend on `math::fenv` by moving the
implementation to a nonpublic function that takes a `Round` and returns
a `Status`.

Link: https://github.com/rust-lang/libm/issues/480
---
 src/math/cbrt.rs         |  25 ++++----
 src/math/fenv.rs         |  49 ---------------
 src/math/generic/fma.rs  | 133 +++++++++++++++++++++------------------
 src/math/generic/sqrt.rs |  48 ++++++++++----
 src/math/mod.rs          |   1 -
 src/math/support/env.rs  | 118 ++++++++++++++++++++++++++++++++++
 src/math/support/mod.rs  |   7 +--
 7 files changed, 240 insertions(+), 141 deletions(-)
 delete mode 100644 src/math/fenv.rs
 create mode 100644 src/math/support/env.rs

diff --git a/src/math/cbrt.rs b/src/math/cbrt.rs
index fbf81f77d..8560d37ab 100644
--- a/src/math/cbrt.rs
+++ b/src/math/cbrt.rs
@@ -5,12 +5,15 @@
  */
 
 use super::Float;
-use super::fenv::Rounding;
-use super::support::cold_path;
+use super::support::{FpResult, Round, cold_path};
 
 /// Compute the cube root of the argument.
 #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
 pub fn cbrt(x: f64) -> f64 {
+    cbrt_round(x, Round::Nearest).val
+}
+
+pub fn cbrt_round(x: f64, round: Round) -> FpResult<f64> {
     const ESCALE: [f64; 3] = [
         1.0,
         hf64!("0x1.428a2f98d728bp+0"), /* 2^(1/3) */
@@ -33,8 +36,6 @@ pub fn cbrt(x: f64) -> f64 {
 
     let off = [hf64!("0x1p-53"), 0.0, 0.0, 0.0];
 
-    let rm = Rounding::get();
-
     /* rm=0 for rounding to nearest, and other values for directed roundings */
     let hx: u64 = x.to_bits();
     let mut mant: u64 = hx & f64::SIG_MASK;
@@ -51,7 +52,7 @@ pub fn cbrt(x: f64) -> f64 {
         to that for x a signaling NaN, it correctly triggers
         the invalid exception. */
         if e == f64::EXP_SAT || ix == 0 {
-            return x + x;
+            return FpResult::ok(x + x);
         }
 
         let nz = ix.leading_zeros() - 11; /* subnormal */
@@ -124,8 +125,8 @@ pub fn cbrt(x: f64) -> f64 {
      * from ulp(1);
      * for rounding to nearest, ady0 is tiny when dy is near from 1/2 ulp(1),
      * or from 3/2 ulp(1). */
-    let mut ady0: f64 = (ady - off[rm as usize]).abs();
-    let mut ady1: f64 = (ady - (hf64!("0x1p-52") + off[rm as usize])).abs();
+    let mut ady0: f64 = (ady - off[round as usize]).abs();
+    let mut ady1: f64 = (ady - (hf64!("0x1p-52") + off[round as usize])).abs();
 
     if ady0 < hf64!("0x1p-75") || ady1 < hf64!("0x1p-75") {
         cold_path();
@@ -140,8 +141,8 @@ pub fn cbrt(x: f64) -> f64 {
         dy = (y1 - y) - dy;
         y1 = y;
         ady = dy.abs();
-        ady0 = (ady - off[rm as usize]).abs();
-        ady1 = (ady - (hf64!("0x1p-52") + off[rm as usize])).abs();
+        ady0 = (ady - off[round as usize]).abs();
+        ady1 = (ady - (hf64!("0x1p-52") + off[round as usize])).abs();
 
         if ady0 < hf64!("0x1p-98") || ady1 < hf64!("0x1p-98") {
             cold_path();
@@ -157,7 +158,7 @@ pub fn cbrt(x: f64) -> f64 {
                 y1 = hf64!("0x1.de87aa837820fp+0").copysign(zz);
             }
 
-            if rm != Rounding::Nearest {
+            if round != Round::Nearest {
                 let wlist = [
                     (hf64!("0x1.3a9ccd7f022dbp+0"), hf64!("0x1.1236160ba9b93p+0")), // ~ 0x1.1236160ba9b930000000000001e7e8fap+0
                     (hf64!("0x1.7845d2faac6fep+0"), hf64!("0x1.23115e657e49cp+0")), // ~ 0x1.23115e657e49c0000000000001d7a799p+0
@@ -170,7 +171,7 @@ pub fn cbrt(x: f64) -> f64 {
 
                 for (a, b) in wlist {
                     if azz == a {
-                        let tmp = if rm as u64 + sign == 2 { hf64!("0x1p-52") } else { 0.0 };
+                        let tmp = if round as u64 + sign == 2 { hf64!("0x1p-52") } else { 0.0 };
                         y1 = (b + tmp).copysign(zz);
                     }
                 }
@@ -194,7 +195,7 @@ pub fn cbrt(x: f64) -> f64 {
         }
     }
 
-    f64::from_bits(cvt3)
+    FpResult::ok(f64::from_bits(cvt3))
 }
 
 fn fmaf64(x: f64, y: f64, z: f64) -> f64 {
diff --git a/src/math/fenv.rs b/src/math/fenv.rs
deleted file mode 100644
index 328c9f346..000000000
--- a/src/math/fenv.rs
+++ /dev/null
@@ -1,49 +0,0 @@
-// src: musl/src/fenv/fenv.c
-/* Dummy functions for archs lacking fenv implementation */
-
-pub(crate) const FE_UNDERFLOW: i32 = 0;
-pub(crate) const FE_INEXACT: i32 = 0;
-
-pub(crate) const FE_TONEAREST: i32 = 0;
-pub(crate) const FE_DOWNWARD: i32 = 1;
-pub(crate) const FE_UPWARD: i32 = 2;
-pub(crate) const FE_TOWARDZERO: i32 = 3;
-
-#[inline]
-pub(crate) fn feclearexcept(_mask: i32) -> i32 {
-    0
-}
-
-#[inline]
-pub(crate) fn feraiseexcept(_mask: i32) -> i32 {
-    0
-}
-
-#[inline]
-pub(crate) fn fetestexcept(_mask: i32) -> i32 {
-    0
-}
-
-#[inline]
-pub(crate) fn fegetround() -> i32 {
-    FE_TONEAREST
-}
-
-#[derive(Clone, Copy, Debug, PartialEq)]
-pub(crate) enum Rounding {
-    Nearest = FE_TONEAREST as isize,
-    Downward = FE_DOWNWARD as isize,
-    Upward = FE_UPWARD as isize,
-    ToZero = FE_TOWARDZERO as isize,
-}
-
-impl Rounding {
-    pub(crate) fn get() -> Self {
-        match fegetround() {
-            x if x == FE_DOWNWARD => Self::Downward,
-            x if x == FE_UPWARD => Self::Upward,
-            x if x == FE_TOWARDZERO => Self::ToZero,
-            _ => Self::Nearest,
-        }
-    }
-}
diff --git a/src/math/generic/fma.rs b/src/math/generic/fma.rs
index a40d7aaaf..821aee090 100644
--- a/src/math/generic/fma.rs
+++ b/src/math/generic/fma.rs
@@ -1,12 +1,7 @@
 /* SPDX-License-Identifier: MIT */
 /* origin: musl src/math/{fma,fmaf}.c. Ported to generic Rust algorithm in 2025, TG. */
 
-use core::{f32, f64};
-
-use super::super::fenv::{
-    FE_INEXACT, FE_TONEAREST, FE_UNDERFLOW, feclearexcept, fegetround, feraiseexcept, fetestexcept,
-};
-use super::super::support::{DInt, HInt, IntTy};
+use super::super::support::{DInt, FpResult, HInt, IntTy, Round, Status};
 use super::super::{CastFrom, CastInto, DFloat, Float, HFloat, Int, MinInt};
 
 /// Fused multiply-add that works when there is not a larger float size available. Currently this
@@ -14,7 +9,18 @@ use super::super::{CastFrom, CastInto, DFloat, Float, HFloat, Int, MinInt};
 #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
 pub fn fma<F>(x: F, y: F, z: F) -> F
 where
-    F: Float + FmaHelper,
+    F: Float,
+    F: CastFrom<F::SignedInt>,
+    F: CastFrom<i8>,
+    F::Int: HInt,
+    u32: CastInto<F::Int>,
+{
+    fma_round(x, y, z, Round::Nearest).val
+}
+
+pub fn fma_round<F>(x: F, y: F, z: F, _round: Round) -> FpResult<F>
+where
+    F: Float,
     F: CastFrom<F::SignedInt>,
     F: CastFrom<i8>,
     F::Int: HInt,
@@ -30,16 +36,16 @@ where
 
     if nx.is_zero_nan_inf() || ny.is_zero_nan_inf() {
         // Value will overflow, defer to non-fused operations.
-        return x * y + z;
+        return FpResult::ok(x * y + z);
     }
 
     if nz.is_zero_nan_inf() {
         if nz.is_zero() {
             // Empty add component means we only need to multiply.
-            return x * y;
+            return FpResult::ok(x * y);
         }
         // `z` is NaN or infinity, which sets the result.
-        return z;
+        return FpResult::ok(z);
     }
 
     // multiply: r = x * y
@@ -147,7 +153,7 @@ where
         }
     } else {
         // exact +/- 0.0
-        return x * y + z;
+        return FpResult::ok(x * y + z);
     }
 
     e -= d;
@@ -168,6 +174,8 @@ where
     // Unbiased exponent for the maximum value of `r`
     let max_pow = F::BITS - 1 + F::EXP_BIAS;
 
+    let mut status = Status::OK;
+
     if e < -(max_pow as i32 - 2) {
         // Result is subnormal before rounding
         if e == -(max_pow as i32 - 1) {
@@ -178,7 +186,9 @@ where
 
             if r == c {
                 // Min normal after rounding,
-                return r.raise_underflow_as_min_positive();
+                status.set_underflow(true);
+                r = F::MIN_POSITIVE_NORMAL.copysign(r);
+                return FpResult::new(r, status);
             }
 
             if (rhi << (F::SIG_BITS + 1)) != zero {
@@ -195,7 +205,7 @@ where
 
                 // Remove the top bit
                 r = F::cast_from(2i8) * r - c;
-                r += r.raise_underflow_ret_zero();
+                status.set_underflow(true);
             }
         } else {
             // Only round once when scaled
@@ -212,12 +222,22 @@ where
     }
 
     // Use our exponent to scale the final value.
-    super::scalbn(r, e)
+    FpResult::new(super::scalbn(r, e), status)
 }
 
 /// Fma implementation when a hardware-backed larger float type is available. For `f32` and `f64`,
 /// `f64` has enough precision to represent the `f32` in its entirety, except for double rounding.
 pub fn fma_wide<F, B>(x: F, y: F, z: F) -> F
+where
+    F: Float + HFloat<D = B>,
+    B: Float + DFloat<H = F>,
+    B::Int: CastInto<i32>,
+    i32: CastFrom<i32>,
+{
+    fma_wide_round(x, y, z, Round::Nearest).val
+}
+
+pub fn fma_wide_round<F, B>(x: F, y: F, z: F, round: Round) -> FpResult<F>
 where
     F: Float + HFloat<D = B>,
     B: Float + DFloat<H = F>,
@@ -244,24 +264,26 @@ where
         // Or the result is exact
         || (result - xy == zb && result - zb == xy)
         // Or the mode is something other than round to nearest
-        || fegetround() != FE_TONEAREST
+        || round != Round::Nearest
     {
         let min_inexact_exp = (B::EXP_BIAS as i32 + F::EXP_MIN_SUBNORM) as u32;
         let max_inexact_exp = (B::EXP_BIAS as i32 + F::EXP_MIN) as u32;
 
-        if (min_inexact_exp..max_inexact_exp).contains(&re) && fetestexcept(FE_INEXACT) != 0 {
-            feclearexcept(FE_INEXACT);
-            // prevent `xy + vz` from being CSE'd with `xy + z` above
-            let vz: F = force_eval!(z);
-            result = xy + vz.widen();
-            if fetestexcept(FE_INEXACT) != 0 {
-                feraiseexcept(FE_UNDERFLOW);
+        let mut status = Status::OK;
+
+        if (min_inexact_exp..max_inexact_exp).contains(&re) && status.inexact() {
+            // This branch is never hit; requires previous operations to set a status
+            status.set_inexact(false);
+
+            result = xy + z.widen();
+            if status.inexact() {
+                status.set_underflow(true);
             } else {
-                feraiseexcept(FE_INEXACT);
+                status.set_inexact(true);
             }
         }
 
-        return result.narrow();
+        return FpResult { val: result.narrow(), status };
     }
 
     let neg = ui >> (B::BITS - 1) != IntTy::<B>::ZERO;
@@ -272,7 +294,7 @@ where
         ui -= one;
     }
 
-    B::from_bits(ui).narrow()
+    FpResult::ok(B::from_bits(ui).narrow())
 }
 
 /// Representation of `F` that has handled subnormals.
@@ -337,49 +359,13 @@ impl<F: Float> Norm<F> {
     }
 }
 
-/// Type-specific helpers that are not needed outside of fma.
-pub trait FmaHelper {
-    /// Raise underflow and return the minimum positive normal value with the sign of `self`.
-    fn raise_underflow_as_min_positive(self) -> Self;
-    /// Raise underflow and return zero.
-    fn raise_underflow_ret_zero(self) -> Self;
-}
-
-impl FmaHelper for f64 {
-    fn raise_underflow_as_min_positive(self) -> Self {
-        /* min normal after rounding, underflow depends
-         * on arch behaviour which can be imitated by
-         * a double to float conversion */
-        let fltmin: f32 = (hf64!("0x0.ffffff8p-63") * f32::MIN_POSITIVE as f64 * self) as f32;
-        f64::MIN_POSITIVE / f32::MIN_POSITIVE as f64 * fltmin as f64
-    }
-
-    fn raise_underflow_ret_zero(self) -> Self {
-        /* raise underflow portably, such that it
-         * cannot be optimized away */
-        let tiny: f64 = f64::MIN_POSITIVE / f32::MIN_POSITIVE as f64 * self;
-        (tiny * tiny) * (self - self)
-    }
-}
-
-#[cfg(f128_enabled)]
-impl FmaHelper for f128 {
-    fn raise_underflow_as_min_positive(self) -> Self {
-        f128::MIN_POSITIVE.copysign(self)
-    }
-
-    fn raise_underflow_ret_zero(self) -> Self {
-        f128::ZERO
-    }
-}
-
 #[cfg(test)]
 mod tests {
     use super::*;
 
     fn spec_test<F>()
     where
-        F: Float + FmaHelper,
+        F: Float,
         F: CastFrom<F::SignedInt>,
         F: CastFrom<i8>,
         F::Int: HInt,
@@ -401,6 +387,29 @@ mod tests {
     #[test]
     fn spec_test_f64() {
         spec_test::<f64>();
+
+        let expect_underflow = [
+            (
+                hf64!("0x1.0p-1070"),
+                hf64!("0x1.0p-1070"),
+                hf64!("0x1.ffffffffffffp-1023"),
+                hf64!("0x0.ffffffffffff8p-1022"),
+            ),
+            (
+                // FIXME: we raise underflow but this should only be inexact (based on C and
+                // `rustc_apfloat`).
+                hf64!("0x1.0p-1070"),
+                hf64!("0x1.0p-1070"),
+                hf64!("-0x1.0p-1022"),
+                hf64!("-0x1.0p-1022"),
+            ),
+        ];
+
+        for (x, y, z, res) in expect_underflow {
+            let FpResult { val, status } = fma_round(x, y, z, Round::Nearest);
+            assert_biteq!(val, res);
+            assert_eq!(status, Status::UNDERFLOW);
+        }
     }
 
     #[test]
diff --git a/src/math/generic/sqrt.rs b/src/math/generic/sqrt.rs
index 90d6c01e9..fdd612493 100644
--- a/src/math/generic/sqrt.rs
+++ b/src/math/generic/sqrt.rs
@@ -41,10 +41,23 @@
 //! Goldschmidt has the advantage over Newton-Raphson that `sqrt(x)` and `1/sqrt(x)` are
 //! computed at the same time, i.e. there is no need to calculate `1/sqrt(x)` and invert it.
 
-use super::super::support::{IntTy, cold_path, raise_invalid};
+use super::super::support::{FpResult, IntTy, Round, Status, cold_path};
 use super::super::{CastFrom, CastInto, DInt, Float, HInt, Int, MinInt};
 
 pub fn sqrt<F>(x: F) -> F
+where
+    F: Float + SqrtHelper,
+    F::Int: HInt,
+    F::Int: From<u8>,
+    F::Int: From<F::ISet2>,
+    F::Int: CastInto<F::ISet1>,
+    F::Int: CastInto<F::ISet2>,
+    u32: CastInto<F::Int>,
+{
+    sqrt_round(x, Round::Nearest).val
+}
+
+pub fn sqrt_round<F>(x: F, _round: Round) -> FpResult<F>
 where
     F: Float + SqrtHelper,
     F::Int: HInt,
@@ -78,17 +91,17 @@ where
 
         // +/-0
         if ix << 1 == zero {
-            return x;
+            return FpResult::ok(x);
         }
 
         // Positive infinity
         if ix == F::EXP_MASK {
-            return x;
+            return FpResult::ok(x);
         }
 
         // NaN or negative
         if ix > F::EXP_MASK {
-            return raise_invalid(x);
+            return FpResult::new(F::NAN, Status::INVALID);
         }
 
         // Normalize subnormals by multiplying by 1.0 << SIG_BITS (e.g. 0x1p52 for doubles).
@@ -215,7 +228,7 @@ where
         y = y + t;
     }
 
-    y
+    FpResult::ok(y)
 }
 
 /// Multiply at the wider integer size, returning the high half.
@@ -329,7 +342,7 @@ impl SqrtHelper for f128 {
 
 /// A U0.16 representation of `1/sqrt(x)`.
 ///
-// / The index is a 7-bit number consisting of a single exponent bit and 6 bits of significand.
+/// The index is a 7-bit number consisting of a single exponent bit and 6 bits of significand.
 #[rustfmt::skip]
 static RSQRT_TAB: [u16; 128] = [
     0xb451, 0xb2f0, 0xb196, 0xb044, 0xaef9, 0xadb6, 0xac79, 0xab43,
@@ -354,7 +367,7 @@ static RSQRT_TAB: [u16; 128] = [
 mod tests {
     use super::*;
 
-    /// Test against edge cases from https://en.cppreference.com/w/cpp/numeric/math/sqrt
+    /// Test behavior specified in IEEE 754 `squareRoot`.
     fn spec_test<F>()
     where
         F: Float + SqrtHelper,
@@ -365,11 +378,22 @@ mod tests {
         F::Int: CastInto<F::ISet2>,
         u32: CastInto<F::Int>,
     {
-        // Not Asserted: FE_INVALID exception is raised if argument is negative.
-        assert!(sqrt(F::NEG_ONE).is_nan());
-        assert!(sqrt(F::NAN).is_nan());
-        for f in [F::ZERO, F::NEG_ZERO, F::INFINITY].iter().copied() {
-            assert_biteq!(sqrt(f), f);
+        // Values that should return a NaN and raise invalid
+        let nan = [F::NEG_INFINITY, F::NEG_ONE, F::NAN, F::MIN];
+
+        // Values that return unaltered
+        let roundtrip = [F::ZERO, F::NEG_ZERO, F::INFINITY];
+
+        for x in nan {
+            let FpResult { val, status } = sqrt_round(x, Round::Nearest);
+            assert!(val.is_nan());
+            assert!(status == Status::INVALID);
+        }
+
+        for x in roundtrip {
+            let FpResult { val, status } = sqrt_round(x, Round::Nearest);
+            assert_biteq!(val, x);
+            assert!(status == Status::OK);
         }
     }
 
diff --git a/src/math/mod.rs b/src/math/mod.rs
index e32045021..ae4a278f2 100644
--- a/src/math/mod.rs
+++ b/src/math/mod.rs
@@ -94,7 +94,6 @@ cfg_if! {
 // Private modules
 mod arch;
 mod expo2;
-mod fenv;
 mod k_cos;
 mod k_cosf;
 mod k_expo2;
diff --git a/src/math/support/env.rs b/src/math/support/env.rs
new file mode 100644
index 000000000..7244381da
--- /dev/null
+++ b/src/math/support/env.rs
@@ -0,0 +1,118 @@
+//! Support for rounding directions and status flags as specified by IEEE 754.
+//!
+//! Rust does not support the floating point environment so rounding mode is passed as an argument
+//! and status flags are returned as part of the result. There is currently not much support for
+//! this; most existing ports from musl use a form of `force_eval!` to raise exceptions, but this
+//! has no side effects in Rust. Further, correct behavior relies on elementary operations making
+//! use of the correct rounding and raising relevant exceptions, which is not the case for Rust.
+//!
+//! This module exists so no functionality is lost when porting algorithms that respect floating
+//! point environment, and so that some functionality may be tested (that which does not rely on
+//! side effects from elementary operations). Full support would require wrappers around basic
+//! operations, but there is no plan to add this at the current time.
+
+/// A value combined with a floating point status.
+pub struct FpResult<T> {
+    pub val: T,
+    #[cfg_attr(not(feature = "unstable-public-internals"), allow(dead_code))]
+    pub status: Status,
+}
+
+impl<T> FpResult<T> {
+    pub fn new(val: T, status: Status) -> Self {
+        Self { val, status }
+    }
+
+    /// Return `val` with `Status::OK`.
+    pub fn ok(val: T) -> Self {
+        Self { val, status: Status::OK }
+    }
+}
+
+/// IEEE 754 rounding mode, excluding the optional `roundTiesToAway` version of nearest.
+///
+/// Integer representation comes from what CORE-MATH uses for indexing.
+#[cfg_attr(not(feature = "unstable-public-internals"), allow(dead_code))]
+#[derive(Clone, Copy, Debug, PartialEq)]
+pub enum Round {
+    /// IEEE 754 nearest, `roundTiesToEven`.
+    Nearest = 0,
+    /// IEEE 754 `roundTowardNegative`.
+    Negative = 1,
+    /// IEEE 754 `roundTowardPositive`.
+    Positive = 2,
+    /// IEEE 754 `roundTowardZero`.
+    Zero = 3,
+}
+
+/// IEEE 754 exception status flags.
+#[derive(Clone, Copy, Debug, PartialEq)]
+pub struct Status(u8);
+
+impl Status {
+    /// Default status indicating no errors.
+    pub const OK: Self = Self(0);
+
+    /// No definable result.
+    ///
+    /// Includes:
+    /// - Any ops on sNaN, with a few exceptions.
+    /// - `0 * inf`, `inf * 0`.
+    /// - `fma(0, inf, c)` or `fma(inf, 0, c)`, possibly excluding `c = qNaN`.
+    /// - `+inf + -inf` and similar (includes subtraction and fma).
+    /// - `0.0 / 0.0`, `inf / inf`
+    /// - `remainder(x, y)` if `y == 0.0` or `x == inf`, and neither is NaN.
+    /// - `sqrt(x)` with `x < 0.0`.
+    pub const INVALID: Self = Self(1);
+
+    /// Division by zero.
+    ///
+    /// The default result for division is +/-inf based on operand sign. For `logB`, the default
+    /// result is -inf.
+    /// `x / y` when `x != 0.0` and `y == 0.0`,
+
+    #[cfg_attr(not(feature = "unstable-public-internals"), allow(dead_code))]
+    pub const DIVIDE_BY_ZERO: Self = Self(1 << 2);
+
+    /// The result exceeds the maximum finite value.
+    ///
+    /// The default result depends on rounding mode. `Nearest*` rounds to +/- infinity, sign based
+    /// on the intermediate result. `Zero` rounds to the signed maximum finite. `Positive` and
+    /// `Negative` round to signed maximum finite in one direction, signed infinity in the other.
+    #[cfg_attr(not(feature = "unstable-public-internals"), allow(dead_code))]
+    pub const OVERFLOW: Self = Self(1 << 3);
+
+    /// The result is subnormal and lost precision.
+    pub const UNDERFLOW: Self = Self(1 << 4);
+
+    /// The finite-precision result does not match that of infinite precision, and the reason
+    /// is not represented by one of the other flags.
+    pub const INEXACT: Self = Self(1 << 5);
+
+    /// True if `UNDERFLOW` is set.
+    #[cfg_attr(not(feature = "unstable-public-internals"), allow(dead_code))]
+    pub fn underflow(self) -> bool {
+        self.0 & Self::UNDERFLOW.0 != 0
+    }
+
+    pub fn set_underflow(&mut self, val: bool) {
+        self.set_flag(val, Self::UNDERFLOW);
+    }
+
+    /// True if `INEXACT` is set.
+    pub fn inexact(self) -> bool {
+        self.0 & Self::INEXACT.0 != 0
+    }
+
+    pub fn set_inexact(&mut self, val: bool) {
+        self.set_flag(val, Self::INEXACT);
+    }
+
+    fn set_flag(&mut self, val: bool, mask: Self) {
+        if val {
+            self.0 |= mask.0;
+        } else {
+            self.0 &= !mask.0;
+        }
+    }
+}
diff --git a/src/math/support/mod.rs b/src/math/support/mod.rs
index 28e9fd413..ee3f2bbdf 100644
--- a/src/math/support/mod.rs
+++ b/src/math/support/mod.rs
@@ -1,12 +1,14 @@
 #[macro_use]
 pub mod macros;
 mod big;
+mod env;
 mod float_traits;
 pub mod hex_float;
 mod int_traits;
 
 #[allow(unused_imports)]
 pub use big::{i256, u256};
+pub use env::{FpResult, Round, Status};
 #[allow(unused_imports)]
 pub use float_traits::{DFloat, Float, HFloat, IntTy};
 pub(crate) use float_traits::{f32_from_bits, f64_from_bits};
@@ -25,8 +27,3 @@ pub fn cold_path() {
     #[cfg(intrinsics_enabled)]
     core::intrinsics::cold_path();
 }
-
-/// Return `x`, first raising `FE_INVALID`.
-pub fn raise_invalid<F: Float>(x: F) -> F {
-    (x - x) / (x - x)
-}

From e8fbb05fd4631da644cbe6d61e1d1235a17b5137 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Mon, 10 Feb 2025 12:01:16 +0000
Subject: [PATCH 235/279] Eliminate the use of `force_eval!` in `ceil`,
 `floor`, and `trunc`

---
 src/math/generic/ceil.rs  | 91 ++++++++++++++++++++++++++++++++-------
 src/math/generic/floor.rs | 77 +++++++++++++++++++++++++--------
 src/math/generic/trunc.rs | 89 +++++++++++++++++++++++++++++++++++---
 3 files changed, 220 insertions(+), 37 deletions(-)

diff --git a/src/math/generic/ceil.rs b/src/math/generic/ceil.rs
index 971a4d3d8..bf7e1d8e2 100644
--- a/src/math/generic/ceil.rs
+++ b/src/math/generic/ceil.rs
@@ -7,9 +7,14 @@
 //! performance seems to be better (based on icount) and it does not seem to experience rounding
 //! errors on i386.
 
+use super::super::support::{FpResult, Status};
 use super::super::{Float, Int, IntTy, MinInt};
 
 pub fn ceil<F: Float>(x: F) -> F {
+    ceil_status(x).val
+}
+
+pub fn ceil_status<F: Float>(x: F) -> FpResult<F> {
     let zero = IntTy::<F>::ZERO;
 
     let mut ix = x.to_bits();
@@ -17,20 +22,20 @@ pub fn ceil<F: Float>(x: F) -> F {
 
     // If the represented value has no fractional part, no truncation is needed.
     if e >= F::SIG_BITS as i32 {
-        return x;
+        return FpResult::ok(x);
     }
 
-    if e >= 0 {
+    let status;
+    let res = if e >= 0 {
         // |x| >= 1.0
-
         let m = F::SIG_MASK >> e.unsigned();
         if (ix & m) == zero {
             // Portion to be masked is already zero; no adjustment needed.
-            return x;
+            return FpResult::ok(x);
         }
 
         // Otherwise, raise an inexact exception.
-        force_eval!(x + F::MAX);
+        status = Status::INEXACT;
 
         if x.is_sign_positive() {
             ix += m;
@@ -40,7 +45,11 @@ pub fn ceil<F: Float>(x: F) -> F {
         F::from_bits(ix)
     } else {
         // |x| < 1.0, raise an inexact exception since truncation will happen (unless x == 0).
-        force_eval!(x + F::MAX);
+        if ix & F::SIG_MASK == F::Int::ZERO {
+            status = Status::OK;
+        } else {
+            status = Status::INEXACT;
+        }
 
         if x.is_sign_negative() {
             // -1.0 < x <= -0.0; rounding up goes toward -0.0.
@@ -52,18 +61,30 @@ pub fn ceil<F: Float>(x: F) -> F {
             // +0.0 remains unchanged
             x
         }
-    }
+    };
+
+    FpResult::new(res, status)
 }
 
 #[cfg(test)]
 mod tests {
     use super::*;
+    use crate::support::Hexf;
 
     /// Test against https://en.cppreference.com/w/cpp/numeric/math/ceil
-    fn spec_test<F: Float>() {
-        // Not Asserted: that the current rounding mode has no effect.
-        for f in [F::ZERO, F::NEG_ZERO, F::INFINITY, F::NEG_INFINITY].iter().copied() {
-            assert_biteq!(ceil(f), f);
+    fn spec_test<F: Float>(cases: &[(F, F, Status)]) {
+        let roundtrip = [F::ZERO, F::ONE, F::NEG_ONE, F::NEG_ZERO, F::INFINITY, F::NEG_INFINITY];
+
+        for x in roundtrip {
+            let FpResult { val, status } = ceil_status(x);
+            assert_biteq!(val, x, "{}", Hexf(x));
+            assert_eq!(status, Status::OK, "{}", Hexf(x));
+        }
+
+        for &(x, res, res_stat) in cases {
+            let FpResult { val, status } = ceil_status(x);
+            assert_biteq!(val, res, "{}", Hexf(x));
+            assert_eq!(status, res_stat, "{}", Hexf(x));
         }
     }
 
@@ -72,7 +93,17 @@ mod tests {
     #[test]
     #[cfg(f16_enabled)]
     fn spec_tests_f16() {
-        spec_test::<f16>();
+        let cases = [
+            (0.1, 1.0, Status::INEXACT),
+            (-0.1, -0.0, Status::INEXACT),
+            (0.9, 1.0, Status::INEXACT),
+            (-0.9, -0.0, Status::INEXACT),
+            (1.1, 2.0, Status::INEXACT),
+            (-1.1, -1.0, Status::INEXACT),
+            (1.9, 2.0, Status::INEXACT),
+            (-1.9, -1.0, Status::INEXACT),
+        ];
+        spec_test::<f16>(&cases);
     }
 
     #[test]
@@ -83,7 +114,17 @@ mod tests {
 
     #[test]
     fn spec_tests_f32() {
-        spec_test::<f32>();
+        let cases = [
+            (0.1, 1.0, Status::INEXACT),
+            (-0.1, -0.0, Status::INEXACT),
+            (0.9, 1.0, Status::INEXACT),
+            (-0.9, -0.0, Status::INEXACT),
+            (1.1, 2.0, Status::INEXACT),
+            (-1.1, -1.0, Status::INEXACT),
+            (1.9, 2.0, Status::INEXACT),
+            (-1.9, -1.0, Status::INEXACT),
+        ];
+        spec_test::<f32>(&cases);
     }
 
     #[test]
@@ -94,12 +135,32 @@ mod tests {
 
     #[test]
     fn spec_tests_f64() {
-        spec_test::<f64>();
+        let cases = [
+            (0.1, 1.0, Status::INEXACT),
+            (-0.1, -0.0, Status::INEXACT),
+            (0.9, 1.0, Status::INEXACT),
+            (-0.9, -0.0, Status::INEXACT),
+            (1.1, 2.0, Status::INEXACT),
+            (-1.1, -1.0, Status::INEXACT),
+            (1.9, 2.0, Status::INEXACT),
+            (-1.9, -1.0, Status::INEXACT),
+        ];
+        spec_test::<f64>(&cases);
     }
 
     #[test]
     #[cfg(f128_enabled)]
     fn spec_tests_f128() {
-        spec_test::<f128>();
+        let cases = [
+            (0.1, 1.0, Status::INEXACT),
+            (-0.1, -0.0, Status::INEXACT),
+            (0.9, 1.0, Status::INEXACT),
+            (-0.9, -0.0, Status::INEXACT),
+            (1.1, 2.0, Status::INEXACT),
+            (-1.1, -1.0, Status::INEXACT),
+            (1.9, 2.0, Status::INEXACT),
+            (-1.9, -1.0, Status::INEXACT),
+        ];
+        spec_test::<f128>(&cases);
     }
 }
diff --git a/src/math/generic/floor.rs b/src/math/generic/floor.rs
index 6754c08f8..779955164 100644
--- a/src/math/generic/floor.rs
+++ b/src/math/generic/floor.rs
@@ -7,9 +7,14 @@
 //! performance seems to be better (based on icount) and it does not seem to experience rounding
 //! errors on i386.
 
+use super::super::support::{FpResult, Status};
 use super::super::{Float, Int, IntTy, MinInt};
 
 pub fn floor<F: Float>(x: F) -> F {
+    floor_status(x).val
+}
+
+pub fn floor_status<F: Float>(x: F) -> FpResult<F> {
     let zero = IntTy::<F>::ZERO;
 
     let mut ix = x.to_bits();
@@ -17,20 +22,20 @@ pub fn floor<F: Float>(x: F) -> F {
 
     // If the represented value has no fractional part, no truncation is needed.
     if e >= F::SIG_BITS as i32 {
-        return x;
+        return FpResult::ok(x);
     }
 
-    if e >= 0 {
+    let status;
+    let res = if e >= 0 {
         // |x| >= 1.0
-
         let m = F::SIG_MASK >> e.unsigned();
         if ix & m == zero {
             // Portion to be masked is already zero; no adjustment needed.
-            return x;
+            return FpResult::ok(x);
         }
 
         // Otherwise, raise an inexact exception.
-        force_eval!(x + F::MAX);
+        status = Status::INEXACT;
 
         if x.is_sign_negative() {
             ix += m;
@@ -39,8 +44,12 @@ pub fn floor<F: Float>(x: F) -> F {
         ix &= !m;
         F::from_bits(ix)
     } else {
-        // |x| < 1.0, raise an inexact exception since truncation will happen (unless x == 0).
-        force_eval!(x + F::MAX);
+        // |x| < 1.0, raise an inexact exception since truncation will happen.
+        if ix & F::SIG_MASK == F::Int::ZERO {
+            status = Status::OK;
+        } else {
+            status = Status::INEXACT;
+        }
 
         if x.is_sign_positive() {
             // 0.0 <= x < 1.0; rounding down goes toward +0.0.
@@ -52,27 +61,40 @@ pub fn floor<F: Float>(x: F) -> F {
             // -0.0 remains unchanged
             x
         }
-    }
+    };
+
+    FpResult::new(res, status)
 }
 
 #[cfg(test)]
 mod tests {
     use super::*;
+    use crate::support::Hexf;
 
     /// Test against https://en.cppreference.com/w/cpp/numeric/math/floor
-    fn spec_test<F: Float>() {
-        // Not Asserted: that the current rounding mode has no effect.
-        for f in [F::ZERO, F::NEG_ZERO, F::INFINITY, F::NEG_INFINITY].iter().copied() {
-            assert_biteq!(floor(f), f);
+    fn spec_test<F: Float>(cases: &[(F, F, Status)]) {
+        let roundtrip = [F::ZERO, F::ONE, F::NEG_ONE, F::NEG_ZERO, F::INFINITY, F::NEG_INFINITY];
+
+        for x in roundtrip {
+            let FpResult { val, status } = floor_status(x);
+            assert_biteq!(val, x, "{}", Hexf(x));
+            assert_eq!(status, Status::OK, "{}", Hexf(x));
+        }
+
+        for &(x, res, res_stat) in cases {
+            let FpResult { val, status } = floor_status(x);
+            assert_biteq!(val, res, "{}", Hexf(x));
+            assert_eq!(status, res_stat, "{}", Hexf(x));
         }
     }
 
-    /* Skipping f16 / f128 "sanity_check"s due to rejected literal lexing at MSRV */
+    /* Skipping f16 / f128 "sanity_check"s and spec cases due to rejected literal lexing at MSRV */
 
     #[test]
     #[cfg(f16_enabled)]
     fn spec_tests_f16() {
-        spec_test::<f16>();
+        let cases = [];
+        spec_test::<f16>(&cases);
     }
 
     #[test]
@@ -84,7 +106,17 @@ mod tests {
 
     #[test]
     fn spec_tests_f32() {
-        spec_test::<f32>();
+        let cases = [
+            (0.1, 0.0, Status::INEXACT),
+            (-0.1, -1.0, Status::INEXACT),
+            (0.9, 0.0, Status::INEXACT),
+            (-0.9, -1.0, Status::INEXACT),
+            (1.1, 1.0, Status::INEXACT),
+            (-1.1, -2.0, Status::INEXACT),
+            (1.9, 1.0, Status::INEXACT),
+            (-1.9, -2.0, Status::INEXACT),
+        ];
+        spec_test::<f32>(&cases);
     }
 
     #[test]
@@ -95,12 +127,23 @@ mod tests {
 
     #[test]
     fn spec_tests_f64() {
-        spec_test::<f64>();
+        let cases = [
+            (0.1, 0.0, Status::INEXACT),
+            (-0.1, -1.0, Status::INEXACT),
+            (0.9, 0.0, Status::INEXACT),
+            (-0.9, -1.0, Status::INEXACT),
+            (1.1, 1.0, Status::INEXACT),
+            (-1.1, -2.0, Status::INEXACT),
+            (1.9, 1.0, Status::INEXACT),
+            (-1.9, -2.0, Status::INEXACT),
+        ];
+        spec_test::<f64>(&cases);
     }
 
     #[test]
     #[cfg(f128_enabled)]
     fn spec_tests_f128() {
-        spec_test::<f128>();
+        let cases = [];
+        spec_test::<f128>(&cases);
     }
 }
diff --git a/src/math/generic/trunc.rs b/src/math/generic/trunc.rs
index ca5f1bdd6..0fb3fa5ad 100644
--- a/src/math/generic/trunc.rs
+++ b/src/math/generic/trunc.rs
@@ -1,15 +1,20 @@
 /* SPDX-License-Identifier: MIT
  * origin: musl src/math/trunc.c */
 
+use super::super::support::{FpResult, Status};
 use super::super::{Float, Int, IntTy, MinInt};
 
 pub fn trunc<F: Float>(x: F) -> F {
+    trunc_status(x).val
+}
+
+pub fn trunc_status<F: Float>(x: F) -> FpResult<F> {
     let mut xi: F::Int = x.to_bits();
     let e: i32 = x.exp_unbiased();
 
     // C1: The represented value has no fractional part, so no truncation is needed
     if e >= F::SIG_BITS as i32 {
-        return x;
+        return FpResult::ok(x);
     }
 
     let mask = if e < 0 {
@@ -23,22 +28,68 @@ pub fn trunc<F: Float>(x: F) -> F {
 
     // C4: If the to-be-masked-out portion is already zero, we have an exact result
     if (xi & !mask) == IntTy::<F>::ZERO {
-        return x;
+        return FpResult::ok(x);
     }
 
     // C5: Otherwise the result is inexact and we will truncate. Raise `FE_INEXACT`, mask the
     // result, and return.
-    force_eval!(x + F::MAX);
+
+    let status = if xi & F::SIG_MASK == F::Int::ZERO { Status::OK } else { Status::INEXACT };
     xi &= mask;
-    F::from_bits(xi)
+    FpResult::new(F::from_bits(xi), status)
 }
 
 #[cfg(test)]
 mod tests {
     use super::*;
+    use crate::support::Hexf;
+
+    fn spec_test<F: Float>(cases: &[(F, F, Status)]) {
+        let roundtrip = [F::ZERO, F::ONE, F::NEG_ONE, F::NEG_ZERO, F::INFINITY, F::NEG_INFINITY];
+
+        for x in roundtrip {
+            let FpResult { val, status } = trunc_status(x);
+            assert_biteq!(val, x, "{}", Hexf(x));
+            assert_eq!(status, Status::OK, "{}", Hexf(x));
+        }
+
+        for &(x, res, res_stat) in cases {
+            let FpResult { val, status } = trunc_status(x);
+            assert_biteq!(val, res, "{}", Hexf(x));
+            assert_eq!(status, res_stat, "{}", Hexf(x));
+        }
+    }
+
+    /* Skipping f16 / f128 "sanity_check"s and spec cases due to rejected literal lexing at MSRV */
+
+    #[test]
+    #[cfg(f16_enabled)]
+    fn spec_tests_f16() {
+        let cases = [];
+        spec_test::<f16>(&cases);
+    }
+
+    #[test]
+    fn sanity_check_f32() {
+        assert_eq!(trunc(0.5f32), 0.0);
+        assert_eq!(trunc(1.1f32), 1.0);
+        assert_eq!(trunc(2.9f32), 2.0);
+    }
 
     #[test]
-    fn sanity_check() {
+    fn spec_tests_f32() {
+        let cases = [
+            (0.1, 0.0, Status::INEXACT),
+            (-0.1, -0.0, Status::INEXACT),
+            (0.9, 0.0, Status::INEXACT),
+            (-0.9, -0.0, Status::INEXACT),
+            (1.1, 1.0, Status::INEXACT),
+            (-1.1, -1.0, Status::INEXACT),
+            (1.9, 1.0, Status::INEXACT),
+            (-1.9, -1.0, Status::INEXACT),
+        ];
+        spec_test::<f32>(&cases);
+
         assert_biteq!(trunc(1.1f32), 1.0);
         assert_biteq!(trunc(1.1f64), 1.0);
 
@@ -54,4 +105,32 @@ mod tests {
         assert_biteq!(trunc(hf32!("-0x1p-1")), -0.0);
         assert_biteq!(trunc(hf64!("-0x1p-1")), -0.0);
     }
+
+    #[test]
+    fn sanity_check_f64() {
+        assert_eq!(trunc(1.1f64), 1.0);
+        assert_eq!(trunc(2.9f64), 2.0);
+    }
+
+    #[test]
+    fn spec_tests_f64() {
+        let cases = [
+            (0.1, 0.0, Status::INEXACT),
+            (-0.1, -0.0, Status::INEXACT),
+            (0.9, 0.0, Status::INEXACT),
+            (-0.9, -0.0, Status::INEXACT),
+            (1.1, 1.0, Status::INEXACT),
+            (-1.1, -1.0, Status::INEXACT),
+            (1.9, 1.0, Status::INEXACT),
+            (-1.9, -1.0, Status::INEXACT),
+        ];
+        spec_test::<f64>(&cases);
+    }
+
+    #[test]
+    #[cfg(f128_enabled)]
+    fn spec_tests_f128() {
+        let cases = [];
+        spec_test::<f128>(&cases);
+    }
 }

From 90872c1beef8a12874260db4c112bddcd7a574c8 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Mon, 10 Feb 2025 05:26:50 +0000
Subject: [PATCH 236/279] Small refactor of bigint tests

Print errors immediately rather than deferring to the end, so any debug
output shows up immediately before the relevant failed test.
---
 src/math/support/big/tests.rs | 65 ++++++++++++++++++-----------------
 1 file changed, 34 insertions(+), 31 deletions(-)

diff --git a/src/math/support/big/tests.rs b/src/math/support/big/tests.rs
index 6d06c700a..2c71191ba 100644
--- a/src/math/support/big/tests.rs
+++ b/src/math/support/big/tests.rs
@@ -1,6 +1,5 @@
 extern crate std;
 use std::string::String;
-use std::vec::Vec;
 use std::{eprintln, format};
 
 use super::{HInt, MinInt, i256, u256};
@@ -36,28 +35,30 @@ fn widen_mul_u128() {
         (0, 1234, u256::ZERO),
     ];
 
-    let mut errors = Vec::new();
-    for (i, (a, b, exp)) in tests.iter().copied().enumerate() {
-        let res = a.widen_mul(b);
-        let res_z = a.zero_widen_mul(b);
-        assert_eq!(res, res_z);
-        if res != exp {
-            errors.push((i, a, b, exp, res));
-        }
-    }
-
-    for (i, a, b, exp, res) in &errors {
+    let mut has_errors = false;
+    let mut add_error = |i, a, b, expected, actual| {
+        has_errors = true;
         eprintln!(
             "\
             FAILURE ({i}): {a:#034x} * {b:#034x}\n\
             expected: {}\n\
             got:      {}\
             ",
-            hexu(*exp),
-            hexu(*res)
+            hexu(expected),
+            hexu(actual)
         );
+    };
+
+    for (i, (a, b, exp)) in tests.iter().copied().enumerate() {
+        let res = a.widen_mul(b);
+        let res_z = a.zero_widen_mul(b);
+        assert_eq!(res, res_z);
+        if res != exp {
+            add_error(i, a, b, exp, res);
+        }
     }
-    assert!(errors.is_empty());
+
+    assert!(!has_errors);
 }
 
 #[test]
@@ -68,7 +69,21 @@ fn not_u256() {
 #[test]
 fn shr_u256() {
     let only_low = [1, u16::MAX.into(), u32::MAX.into(), u64::MAX.into(), u128::MAX];
-    let mut errors = Vec::new();
+    let mut has_errors = false;
+
+    let mut add_error = |a, b, expected, actual| {
+        has_errors = true;
+        eprintln!(
+            "\
+            FAILURE:  {} >> {b}\n\
+            expected: {}\n\
+            actual:   {}\
+            ",
+            hexu(a),
+            hexu(expected),
+            hexu(actual),
+        );
+    };
 
     for a in only_low {
         for perturb in 0..10 {
@@ -77,7 +92,7 @@ fn shr_u256() {
                 let res = a.widen() >> shift;
                 let expected = (a >> shift).widen();
                 if res != expected {
-                    errors.push((a.widen(), shift, res, expected));
+                    add_error(a.widen(), shift, expected, res);
                 }
             }
         }
@@ -107,23 +122,11 @@ fn shr_u256() {
     for (input, shift, expected) in check {
         let res = input >> shift;
         if res != expected {
-            errors.push((input, shift, res, expected));
+            add_error(input, shift, expected, res);
         }
     }
 
-    for (a, b, res, expected) in &errors {
-        eprintln!(
-            "\
-            FAILURE:  {} >> {b}\n\
-            expected: {}\n\
-            got:      {}\
-            ",
-            hexu(*a),
-            hexu(*expected),
-            hexu(*res)
-        );
-    }
-    assert!(errors.is_empty());
+    assert!(!has_errors);
 }
 
 #[test]

From 2d3bd6cbf487a1f3939bf7deef10e98468e26d2a Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Mon, 10 Feb 2025 19:56:52 +0000
Subject: [PATCH 237/279] Combine `fmin{,f,f16,f128}` and `fmax{,f,f16,128}`
 into a single file

These don't have much content since they now use the generic
implementation. There will be more similar functions in the near future
(fminimum, fmaximum, fminimum_num, fmaximum_num); start the pattern of
combining similar functions now so we don't have to eventually maintain
similar docs across 24 different files.
---
 etc/function-definitions.json | 16 +++++------
 src/math/fmax.rs              |  5 ----
 src/math/fmaxf.rs             |  5 ----
 src/math/fmaxf128.rs          |  5 ----
 src/math/fmaxf16.rs           |  5 ----
 src/math/fmin.rs              |  5 ----
 src/math/fmin_fmax.rs         | 51 +++++++++++++++++++++++++++++++++++
 src/math/fminf.rs             |  5 ----
 src/math/fminf128.rs          |  5 ----
 src/math/fminf16.rs           |  5 ----
 src/math/mod.rs               | 22 +++++----------
 11 files changed, 65 insertions(+), 64 deletions(-)
 delete mode 100644 src/math/fmax.rs
 delete mode 100644 src/math/fmaxf.rs
 delete mode 100644 src/math/fmaxf128.rs
 delete mode 100644 src/math/fmaxf16.rs
 delete mode 100644 src/math/fmin.rs
 create mode 100644 src/math/fmin_fmax.rs
 delete mode 100644 src/math/fminf.rs
 delete mode 100644 src/math/fminf128.rs
 delete mode 100644 src/math/fminf16.rs

diff --git a/etc/function-definitions.json b/etc/function-definitions.json
index 5742ed585..d3e51f29a 100644
--- a/etc/function-definitions.json
+++ b/etc/function-definitions.json
@@ -365,56 +365,56 @@
     },
     "fmax": {
         "sources": [
-            "src/math/fmax.rs",
+            "src/math/fmin_fmax.rs",
             "src/math/generic/fmax.rs"
         ],
         "type": "f64"
     },
     "fmaxf": {
         "sources": [
-            "src/math/fmaxf.rs",
+            "src/math/fmin_fmax.rs",
             "src/math/generic/fmax.rs"
         ],
         "type": "f32"
     },
     "fmaxf128": {
         "sources": [
-            "src/math/fmaxf128.rs",
+            "src/math/fmin_fmax.rs",
             "src/math/generic/fmax.rs"
         ],
         "type": "f128"
     },
     "fmaxf16": {
         "sources": [
-            "src/math/fmaxf16.rs",
+            "src/math/fmin_fmax.rs",
             "src/math/generic/fmax.rs"
         ],
         "type": "f16"
     },
     "fmin": {
         "sources": [
-            "src/math/fmin.rs",
+            "src/math/fmin_fmax.rs",
             "src/math/generic/fmin.rs"
         ],
         "type": "f64"
     },
     "fminf": {
         "sources": [
-            "src/math/fminf.rs",
+            "src/math/fmin_fmax.rs",
             "src/math/generic/fmin.rs"
         ],
         "type": "f32"
     },
     "fminf128": {
         "sources": [
-            "src/math/fminf128.rs",
+            "src/math/fmin_fmax.rs",
             "src/math/generic/fmin.rs"
         ],
         "type": "f128"
     },
     "fminf16": {
         "sources": [
-            "src/math/fminf16.rs",
+            "src/math/fmin_fmax.rs",
             "src/math/generic/fmin.rs"
         ],
         "type": "f16"
diff --git a/src/math/fmax.rs b/src/math/fmax.rs
deleted file mode 100644
index d5d9b513b..000000000
--- a/src/math/fmax.rs
+++ /dev/null
@@ -1,5 +0,0 @@
-/// Return the greater of two arguments or, if either argument is NaN, the other argument.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
-pub fn fmax(x: f64, y: f64) -> f64 {
-    super::generic::fmax(x, y)
-}
diff --git a/src/math/fmaxf.rs b/src/math/fmaxf.rs
deleted file mode 100644
index 3197d5cf2..000000000
--- a/src/math/fmaxf.rs
+++ /dev/null
@@ -1,5 +0,0 @@
-/// Return the greater of two arguments or, if either argument is NaN, the other argument.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
-pub fn fmaxf(x: f32, y: f32) -> f32 {
-    super::generic::fmax(x, y)
-}
diff --git a/src/math/fmaxf128.rs b/src/math/fmaxf128.rs
deleted file mode 100644
index bace9ab53..000000000
--- a/src/math/fmaxf128.rs
+++ /dev/null
@@ -1,5 +0,0 @@
-/// Return the greater of two arguments or, if either argument is NaN, the other argument.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
-pub fn fmaxf128(x: f128, y: f128) -> f128 {
-    super::generic::fmax(x, y)
-}
diff --git a/src/math/fmaxf16.rs b/src/math/fmaxf16.rs
deleted file mode 100644
index fea15be8f..000000000
--- a/src/math/fmaxf16.rs
+++ /dev/null
@@ -1,5 +0,0 @@
-/// Return the greater of two arguments or, if either argument is NaN, the other argument.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
-pub fn fmaxf16(x: f16, y: f16) -> f16 {
-    super::generic::fmax(x, y)
-}
diff --git a/src/math/fmin.rs b/src/math/fmin.rs
deleted file mode 100644
index df8ff7c32..000000000
--- a/src/math/fmin.rs
+++ /dev/null
@@ -1,5 +0,0 @@
-/// Return the lesser of two arguments or, if either argument is NaN, the other argument.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
-pub fn fmin(x: f64, y: f64) -> f64 {
-    super::generic::fmin(x, y)
-}
diff --git a/src/math/fmin_fmax.rs b/src/math/fmin_fmax.rs
new file mode 100644
index 000000000..97912e758
--- /dev/null
+++ b/src/math/fmin_fmax.rs
@@ -0,0 +1,51 @@
+/// Return the lesser of two arguments or, if either argument is NaN, the other argument.
+#[cfg(f16_enabled)]
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn fminf16(x: f16, y: f16) -> f16 {
+    super::generic::fmin(x, y)
+}
+
+/// Return the lesser of two arguments or, if either argument is NaN, the other argument.
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn fminf(x: f32, y: f32) -> f32 {
+    super::generic::fmin(x, y)
+}
+
+/// Return the lesser of two arguments or, if either argument is NaN, the other argument.
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn fmin(x: f64, y: f64) -> f64 {
+    super::generic::fmin(x, y)
+}
+
+/// Return the lesser of two arguments or, if either argument is NaN, the other argument.
+#[cfg(f128_enabled)]
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn fminf128(x: f128, y: f128) -> f128 {
+    super::generic::fmin(x, y)
+}
+
+/// Return the greater of two arguments or, if either argument is NaN, the other argument.
+#[cfg(f16_enabled)]
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn fmaxf16(x: f16, y: f16) -> f16 {
+    super::generic::fmax(x, y)
+}
+
+/// Return the greater of two arguments or, if either argument is NaN, the other argument.
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn fmaxf(x: f32, y: f32) -> f32 {
+    super::generic::fmax(x, y)
+}
+
+/// Return the greater of two arguments or, if either argument is NaN, the other argument.
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn fmax(x: f64, y: f64) -> f64 {
+    super::generic::fmax(x, y)
+}
+
+/// Return the greater of two arguments or, if either argument is NaN, the other argument.
+#[cfg(f128_enabled)]
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn fmaxf128(x: f128, y: f128) -> f128 {
+    super::generic::fmax(x, y)
+}
diff --git a/src/math/fminf.rs b/src/math/fminf.rs
deleted file mode 100644
index b2cdfe89d..000000000
--- a/src/math/fminf.rs
+++ /dev/null
@@ -1,5 +0,0 @@
-/// Return the lesser of two arguments or, if either argument is NaN, the other argument.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
-pub fn fminf(x: f32, y: f32) -> f32 {
-    super::generic::fmin(x, y)
-}
diff --git a/src/math/fminf128.rs b/src/math/fminf128.rs
deleted file mode 100644
index a9224c22a..000000000
--- a/src/math/fminf128.rs
+++ /dev/null
@@ -1,5 +0,0 @@
-/// Return the lesser of two arguments or, if either argument is NaN, the other argument.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
-pub fn fminf128(x: f128, y: f128) -> f128 {
-    super::generic::fmin(x, y)
-}
diff --git a/src/math/fminf16.rs b/src/math/fminf16.rs
deleted file mode 100644
index 6d936be34..000000000
--- a/src/math/fminf16.rs
+++ /dev/null
@@ -1,5 +0,0 @@
-/// Return the lesser of two arguments or, if either argument is NaN, the other argument.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
-pub fn fminf16(x: f16, y: f16) -> f16 {
-    super::generic::fmin(x, y)
-}
diff --git a/src/math/mod.rs b/src/math/mod.rs
index ae4a278f2..ba0b933f1 100644
--- a/src/math/mod.rs
+++ b/src/math/mod.rs
@@ -165,10 +165,7 @@ mod floor;
 mod floorf;
 mod fma;
 mod fmaf;
-mod fmax;
-mod fmaxf;
-mod fmin;
-mod fminf;
+mod fmin_fmax;
 mod fmod;
 mod fmodf;
 mod frexp;
@@ -273,10 +270,7 @@ pub use self::floor::floor;
 pub use self::floorf::floorf;
 pub use self::fma::fma;
 pub use self::fmaf::fmaf;
-pub use self::fmax::fmax;
-pub use self::fmaxf::fmaxf;
-pub use self::fmin::fmin;
-pub use self::fminf::fminf;
+pub use self::fmin_fmax::{fmax, fmaxf, fmin, fminf};
 pub use self::fmod::fmod;
 pub use self::fmodf::fmodf;
 pub use self::frexp::frexp;
@@ -346,8 +340,6 @@ cfg_if! {
         mod fabsf16;
         mod fdimf16;
         mod floorf16;
-        mod fmaxf16;
-        mod fminf16;
         mod fmodf16;
         mod ldexpf16;
         mod rintf16;
@@ -363,8 +355,8 @@ cfg_if! {
         pub use self::fabsf16::fabsf16;
         pub use self::fdimf16::fdimf16;
         pub use self::floorf16::floorf16;
-        pub use self::fmaxf16::fmaxf16;
-        pub use self::fminf16::fminf16;
+        pub use self::fmin_fmax::fmaxf16;
+        pub use self::fmin_fmax::fminf16;
         pub use self::fmodf16::fmodf16;
         pub use self::ldexpf16::ldexpf16;
         pub use self::rintf16::rintf16;
@@ -385,8 +377,6 @@ cfg_if! {
         mod fdimf128;
         mod floorf128;
         mod fmaf128;
-        mod fmaxf128;
-        mod fminf128;
         mod fmodf128;
         mod ldexpf128;
         mod rintf128;
@@ -403,8 +393,8 @@ cfg_if! {
         pub use self::fdimf128::fdimf128;
         pub use self::floorf128::floorf128;
         pub use self::fmaf128::fmaf128;
-        pub use self::fmaxf128::fmaxf128;
-        pub use self::fminf128::fminf128;
+        pub use self::fmin_fmax::fmaxf128;
+        pub use self::fmin_fmax::fminf128;
         pub use self::fmodf128::fmodf128;
         pub use self::ldexpf128::ldexpf128;
         pub use self::rintf128::rintf128;

From e21748d01f6fdd70dbb5704c555c13394f5c1a6d Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Mon, 10 Feb 2025 19:43:49 +0000
Subject: [PATCH 238/279] Add `fminimum`, `fmaximum`, `fminimum_num`, and
 `fmaximum_num`

These functions represent new operations from IEEE 754-2019. Introduce
them for all float sizes.
---
 crates/libm-macros/src/shared.rs             |  32 +++++-
 crates/libm-test/benches/icount.rs           |  16 +++
 crates/libm-test/benches/random.rs           |  16 +++
 crates/libm-test/src/domain.rs               |   4 +
 crates/libm-test/src/gen/case_list.rs        |  80 ++++++++++++-
 crates/libm-test/src/mpfloat.rs              |  54 ++++++++-
 crates/libm-test/src/precision.rs            |   4 +
 crates/libm-test/tests/compare_built_musl.rs |  16 +++
 crates/util/src/main.rs                      |  16 +++
 etc/function-definitions.json                | 112 +++++++++++++++++++
 etc/function-list.txt                        |  16 +++
 src/libm_helper.rs                           |  18 ++-
 src/math/fmin_fmax.rs                        |  24 ++++
 src/math/fminimum_fmaximum.rs                |  67 +++++++++++
 src/math/fminimum_fmaximum_num.rs            |  67 +++++++++++
 src/math/generic/fmax.rs                     |  77 +++++++++++--
 src/math/generic/fmaximum.rs                 |  78 +++++++++++++
 src/math/generic/fmaximum_num.rs             |  77 +++++++++++++
 src/math/generic/fmin.rs                     |  77 +++++++++++--
 src/math/generic/fminimum.rs                 |  78 +++++++++++++
 src/math/generic/fminimum_num.rs             |  77 +++++++++++++
 src/math/generic/mod.rs                      |   8 ++
 src/math/mod.rs                              |  14 ++-
 23 files changed, 997 insertions(+), 31 deletions(-)
 create mode 100644 src/math/fminimum_fmaximum.rs
 create mode 100644 src/math/fminimum_fmaximum_num.rs
 create mode 100644 src/math/generic/fmaximum.rs
 create mode 100644 src/math/generic/fmaximum_num.rs
 create mode 100644 src/math/generic/fminimum.rs
 create mode 100644 src/math/generic/fminimum_num.rs

diff --git a/crates/libm-macros/src/shared.rs b/crates/libm-macros/src/shared.rs
index 48d19c50d..cb5a1d187 100644
--- a/crates/libm-macros/src/shared.rs
+++ b/crates/libm-macros/src/shared.rs
@@ -47,7 +47,17 @@ const ALL_OPERATIONS_NESTED: &[(FloatTy, Signature, Option<Signature>, &[&str])]
         FloatTy::F16,
         Signature { args: &[Ty::F16, Ty::F16], returns: &[Ty::F16] },
         None,
-        &["copysignf16", "fdimf16", "fmaxf16", "fminf16", "fmodf16"],
+        &[
+            "copysignf16",
+            "fdimf16",
+            "fmaxf16",
+            "fmaximum_numf16",
+            "fmaximumf16",
+            "fminf16",
+            "fminimum_numf16",
+            "fminimumf16",
+            "fmodf16",
+        ],
     ),
     (
         // `(f32, f32) -> f32`
@@ -59,7 +69,11 @@ const ALL_OPERATIONS_NESTED: &[(FloatTy, Signature, Option<Signature>, &[&str])]
             "copysignf",
             "fdimf",
             "fmaxf",
+            "fmaximum_numf",
+            "fmaximumf",
             "fminf",
+            "fminimum_numf",
+            "fminimumf",
             "fmodf",
             "hypotf",
             "nextafterf",
@@ -77,7 +91,11 @@ const ALL_OPERATIONS_NESTED: &[(FloatTy, Signature, Option<Signature>, &[&str])]
             "copysign",
             "fdim",
             "fmax",
+            "fmaximum",
+            "fmaximum_num",
             "fmin",
+            "fminimum",
+            "fminimum_num",
             "fmod",
             "hypot",
             "nextafter",
@@ -90,7 +108,17 @@ const ALL_OPERATIONS_NESTED: &[(FloatTy, Signature, Option<Signature>, &[&str])]
         FloatTy::F128,
         Signature { args: &[Ty::F128, Ty::F128], returns: &[Ty::F128] },
         None,
-        &["copysignf128", "fdimf128", "fmaxf128", "fminf128", "fmodf128"],
+        &[
+            "copysignf128",
+            "fdimf128",
+            "fmaxf128",
+            "fmaximum_numf128",
+            "fmaximumf128",
+            "fminf128",
+            "fminimum_numf128",
+            "fminimumf128",
+            "fmodf128",
+        ],
     ),
     (
         // `(f32, f32, f32) -> f32`
diff --git a/crates/libm-test/benches/icount.rs b/crates/libm-test/benches/icount.rs
index be85dd567..e28f4973c 100644
--- a/crates/libm-test/benches/icount.rs
+++ b/crates/libm-test/benches/icount.rs
@@ -207,10 +207,26 @@ main!(
     icount_bench_fmaxf128_group,
     icount_bench_fmaxf16_group,
     icount_bench_fmaxf_group,
+    icount_bench_fmaximum_group,
+    icount_bench_fmaximum_num_group,
+    icount_bench_fmaximum_numf128_group,
+    icount_bench_fmaximum_numf16_group,
+    icount_bench_fmaximum_numf_group,
+    icount_bench_fmaximumf128_group,
+    icount_bench_fmaximumf16_group,
+    icount_bench_fmaximumf_group,
     icount_bench_fmin_group,
     icount_bench_fminf128_group,
     icount_bench_fminf16_group,
     icount_bench_fminf_group,
+    icount_bench_fminimum_group,
+    icount_bench_fminimum_num_group,
+    icount_bench_fminimum_numf128_group,
+    icount_bench_fminimum_numf16_group,
+    icount_bench_fminimum_numf_group,
+    icount_bench_fminimumf128_group,
+    icount_bench_fminimumf16_group,
+    icount_bench_fminimumf_group,
     icount_bench_fmod_group,
     icount_bench_fmodf128_group,
     icount_bench_fmodf16_group,
diff --git a/crates/libm-test/benches/random.rs b/crates/libm-test/benches/random.rs
index 6e8a33479..6f6b05d95 100644
--- a/crates/libm-test/benches/random.rs
+++ b/crates/libm-test/benches/random.rs
@@ -130,8 +130,24 @@ libm_macros::for_each_function! {
         | fmaf128
         | fmaxf128
         | fmaxf16
+        | fmaximum
+        | fmaximum_num
+        | fmaximum_numf
+        | fmaximum_numf128
+        | fmaximum_numf16
+        | fmaximumf
+        | fmaximumf128
+        | fmaximumf16
         | fminf128
         | fminf16
+        | fminimum
+        | fminimum_num
+        | fminimum_numf
+        | fminimum_numf128
+        | fminimum_numf16
+        | fminimumf
+        | fminimumf128
+        | fminimumf16
         | fmodf128
         | fmodf16
         | ldexpf128
diff --git a/crates/libm-test/src/domain.rs b/crates/libm-test/src/domain.rs
index 5d650c00a..c662e95b4 100644
--- a/crates/libm-test/src/domain.rs
+++ b/crates/libm-test/src/domain.rs
@@ -221,7 +221,11 @@ pub fn get_domain<F: Float, I: Int>(
         BaseName::Floor => &EitherPrim::UNBOUNDED1[..],
         BaseName::Fma => &EitherPrim::UNBOUNDED3[..],
         BaseName::Fmax => &EitherPrim::UNBOUNDED2[..],
+        BaseName::Fmaximum => &EitherPrim::UNBOUNDED2[..],
+        BaseName::FmaximumNum => &EitherPrim::UNBOUNDED2[..],
         BaseName::Fmin => &EitherPrim::UNBOUNDED2[..],
+        BaseName::Fminimum => &EitherPrim::UNBOUNDED2[..],
+        BaseName::FminimumNum => &EitherPrim::UNBOUNDED2[..],
         BaseName::Fmod => &EitherPrim::UNBOUNDED2[..],
         BaseName::Hypot => &EitherPrim::UNBOUNDED2[..],
         BaseName::Ilogb => &EitherPrim::UNBOUNDED1[..],
diff --git a/crates/libm-test/src/gen/case_list.rs b/crates/libm-test/src/gen/case_list.rs
index 23226d5c2..49e731b88 100644
--- a/crates/libm-test/src/gen/case_list.rs
+++ b/crates/libm-test/src/gen/case_list.rs
@@ -293,7 +293,8 @@ fn fmaf128_cases() -> Vec<TestCase<op::fmaf128::Routine>> {
     v
 }
 
-fn fmax_cases() -> Vec<TestCase<op::fmax::Routine>> {
+#[cfg(f16_enabled)]
+fn fmaxf16_cases() -> Vec<TestCase<op::fmaxf16::Routine>> {
     vec![]
 }
 
@@ -301,17 +302,53 @@ fn fmaxf_cases() -> Vec<TestCase<op::fmaxf::Routine>> {
     vec![]
 }
 
+fn fmax_cases() -> Vec<TestCase<op::fmax::Routine>> {
+    vec![]
+}
+
 #[cfg(f128_enabled)]
 fn fmaxf128_cases() -> Vec<TestCase<op::fmaxf128::Routine>> {
     vec![]
 }
 
 #[cfg(f16_enabled)]
-fn fmaxf16_cases() -> Vec<TestCase<op::fmaxf16::Routine>> {
+fn fmaximumf16_cases() -> Vec<TestCase<op::fmaximumf16::Routine>> {
     vec![]
 }
 
-fn fmin_cases() -> Vec<TestCase<op::fmin::Routine>> {
+fn fmaximumf_cases() -> Vec<TestCase<op::fmaximumf::Routine>> {
+    vec![]
+}
+
+fn fmaximum_cases() -> Vec<TestCase<op::fmaximum::Routine>> {
+    vec![]
+}
+
+#[cfg(f128_enabled)]
+fn fmaximumf128_cases() -> Vec<TestCase<op::fmaximumf128::Routine>> {
+    vec![]
+}
+
+#[cfg(f16_enabled)]
+fn fmaximum_numf16_cases() -> Vec<TestCase<op::fmaximum_numf16::Routine>> {
+    vec![]
+}
+
+fn fmaximum_numf_cases() -> Vec<TestCase<op::fmaximum_numf::Routine>> {
+    vec![]
+}
+
+fn fmaximum_num_cases() -> Vec<TestCase<op::fmaximum_num::Routine>> {
+    vec![]
+}
+
+#[cfg(f128_enabled)]
+fn fmaximum_numf128_cases() -> Vec<TestCase<op::fmaximum_numf128::Routine>> {
+    vec![]
+}
+
+#[cfg(f16_enabled)]
+fn fminf16_cases() -> Vec<TestCase<op::fminf16::Routine>> {
     vec![]
 }
 
@@ -319,13 +356,48 @@ fn fminf_cases() -> Vec<TestCase<op::fminf::Routine>> {
     vec![]
 }
 
+fn fmin_cases() -> Vec<TestCase<op::fmin::Routine>> {
+    vec![]
+}
+
 #[cfg(f128_enabled)]
 fn fminf128_cases() -> Vec<TestCase<op::fminf128::Routine>> {
     vec![]
 }
 
 #[cfg(f16_enabled)]
-fn fminf16_cases() -> Vec<TestCase<op::fminf16::Routine>> {
+fn fminimumf16_cases() -> Vec<TestCase<op::fminimumf16::Routine>> {
+    vec![]
+}
+
+fn fminimumf_cases() -> Vec<TestCase<op::fminimumf::Routine>> {
+    vec![]
+}
+
+fn fminimum_cases() -> Vec<TestCase<op::fminimum::Routine>> {
+    vec![]
+}
+
+#[cfg(f128_enabled)]
+fn fminimumf128_cases() -> Vec<TestCase<op::fminimumf128::Routine>> {
+    vec![]
+}
+
+#[cfg(f16_enabled)]
+fn fminimum_numf16_cases() -> Vec<TestCase<op::fminimum_numf16::Routine>> {
+    vec![]
+}
+
+fn fminimum_numf_cases() -> Vec<TestCase<op::fminimum_numf::Routine>> {
+    vec![]
+}
+
+fn fminimum_num_cases() -> Vec<TestCase<op::fminimum_num::Routine>> {
+    vec![]
+}
+
+#[cfg(f128_enabled)]
+fn fminimum_numf128_cases() -> Vec<TestCase<op::fminimum_numf128::Routine>> {
     vec![]
 }
 
diff --git a/crates/libm-test/src/mpfloat.rs b/crates/libm-test/src/mpfloat.rs
index f4a9ff7ff..63cdebe4e 100644
--- a/crates/libm-test/src/mpfloat.rs
+++ b/crates/libm-test/src/mpfloat.rs
@@ -148,6 +148,14 @@ libm_macros::for_each_function! {
         floorf,
         floorf128,
         floorf16,
+        fmaximum,
+        fmaximumf,
+        fmaximumf128,
+        fmaximumf16,
+        fminimum,
+        fminimumf,
+        fminimumf128,
+        fminimumf16,
         fmod,
         fmodf,
         fmodf128,
@@ -197,8 +205,10 @@ libm_macros::for_each_function! {
         fabs | fabsf => abs,
         fdim | fdimf | fdimf16 | fdimf128  => positive_diff,
         fma | fmaf | fmaf128 => mul_add,
-        fmax | fmaxf | fmaxf16 | fmaxf128 => max,
-        fmin | fminf | fminf16 | fminf128 => min,
+        fmax | fmaxf | fmaxf16 | fmaxf128 |
+        fmaximum_num | fmaximum_numf | fmaximum_numf16 | fmaximum_numf128 => max,
+        fmin | fminf | fminf16 | fminf128 |
+        fminimum_num | fminimum_numf | fminimum_numf16 | fminimum_numf128 => min,
         lgamma | lgammaf => ln_gamma,
         log | logf => ln,
         log1p | log1pf => ln_1p,
@@ -446,6 +456,46 @@ macro_rules! impl_op_for_ty_all {
                 }
             }
 
+            impl MpOp for crate::op::[< fmaximum $suffix >]::Routine {
+                type MpTy = (MpFloat, MpFloat);
+
+                fn new_mp() -> Self::MpTy {
+                    (new_mpfloat::<Self::FTy>(), new_mpfloat::<Self::FTy>())
+                }
+
+                fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet {
+                    this.0.assign(input.0);
+                    this.1.assign(input.1);
+                    let ord = if this.0.is_nan() || this.1.is_nan() {
+                        this.0.assign($fty::NAN);
+                        Ordering::Equal
+                    } else {
+                        this.0.max_round(&this.1, Nearest)
+                    };
+                    prep_retval::<Self::RustRet>(&mut this.0, ord)
+                }
+            }
+
+            impl MpOp for crate::op::[< fminimum $suffix >]::Routine {
+                type MpTy = (MpFloat, MpFloat);
+
+                fn new_mp() -> Self::MpTy {
+                    (new_mpfloat::<Self::FTy>(), new_mpfloat::<Self::FTy>())
+                }
+
+                fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet {
+                    this.0.assign(input.0);
+                    this.1.assign(input.1);
+                    let ord = if this.0.is_nan() || this.1.is_nan() {
+                        this.0.assign($fty::NAN);
+                        Ordering::Equal
+                    } else {
+                        this.0.min_round(&this.1, Nearest)
+                    };
+                    prep_retval::<Self::RustRet>(&mut this.0, ord)
+                }
+            }
+
             // `ldexp` and `scalbn` are the same for binary floating point, so just forward all
             // methods.
             impl MpOp for crate::op::[<ldexp $suffix>]::Routine {
diff --git a/crates/libm-test/src/precision.rs b/crates/libm-test/src/precision.rs
index 2f55ad22e..1d916e572 100644
--- a/crates/libm-test/src/precision.rs
+++ b/crates/libm-test/src/precision.rs
@@ -25,7 +25,11 @@ pub fn default_ulp(ctx: &CheckCtx) -> u32 {
         | Bn::Floor
         | Bn::Fma
         | Bn::Fmax
+        | Bn::Fmaximum
+        | Bn::FmaximumNum
         | Bn::Fmin
+        | Bn::Fminimum
+        | Bn::FminimumNum
         | Bn::Fmod
         | Bn::Frexp
         | Bn::Ilogb
diff --git a/crates/libm-test/tests/compare_built_musl.rs b/crates/libm-test/tests/compare_built_musl.rs
index 7fa77e832..ffd7f1f60 100644
--- a/crates/libm-test/tests/compare_built_musl.rs
+++ b/crates/libm-test/tests/compare_built_musl.rs
@@ -102,8 +102,24 @@ libm_macros::for_each_function! {
         fmaf128,
         fmaxf128,
         fmaxf16,
+        fmaximum,
+        fmaximum_num,
+        fmaximum_numf,
+        fmaximum_numf128,
+        fmaximum_numf16,
+        fmaximumf,
+        fmaximumf128,
+        fmaximumf16,
         fminf128,
         fminf16,
+        fminimum,
+        fminimum_num,
+        fminimum_numf,
+        fminimum_numf128,
+        fminimum_numf16,
+        fminimumf,
+        fminimumf128,
+        fminimumf16,
         fmodf128,
         fmodf16,
         ldexpf128,
diff --git a/crates/util/src/main.rs b/crates/util/src/main.rs
index 0f845a1c4..a519713c0 100644
--- a/crates/util/src/main.rs
+++ b/crates/util/src/main.rs
@@ -99,8 +99,24 @@ fn do_eval(basis: &str, op: &str, inputs: &[&str]) {
             | fmaf128
             | fmaxf128
             | fmaxf16
+            | fmaximum
+            | fmaximum_num
+            | fmaximum_numf
+            | fmaximum_numf128
+            | fmaximum_numf16
+            | fmaximumf
+            | fmaximumf128
+            | fmaximumf16
             | fminf128
             | fminf16
+            | fminimum
+            | fminimum_num
+            | fminimum_numf
+            | fminimum_numf128
+            | fminimum_numf16
+            | fminimumf
+            | fminimumf128
+            | fminimumf16
             | fmodf128
             | fmodf16
             | ldexpf128
diff --git a/etc/function-definitions.json b/etc/function-definitions.json
index d3e51f29a..008a47df2 100644
--- a/etc/function-definitions.json
+++ b/etc/function-definitions.json
@@ -391,6 +391,62 @@
         ],
         "type": "f16"
     },
+    "fmaximum": {
+        "sources": [
+            "src/math/fminimum_fmaximum.rs",
+            "src/math/generic/fmaximum.rs"
+        ],
+        "type": "f64"
+    },
+    "fmaximum_num": {
+        "sources": [
+            "src/math/fminimum_fmaximum_num.rs",
+            "src/math/generic/fmaximum_num.rs"
+        ],
+        "type": "f64"
+    },
+    "fmaximum_numf": {
+        "sources": [
+            "src/math/fminimum_fmaximum_num.rs",
+            "src/math/generic/fmaximum_num.rs"
+        ],
+        "type": "f32"
+    },
+    "fmaximum_numf128": {
+        "sources": [
+            "src/math/fminimum_fmaximum_num.rs",
+            "src/math/generic/fmaximum_num.rs"
+        ],
+        "type": "f128"
+    },
+    "fmaximum_numf16": {
+        "sources": [
+            "src/math/fminimum_fmaximum_num.rs",
+            "src/math/generic/fmaximum_num.rs"
+        ],
+        "type": "f16"
+    },
+    "fmaximumf": {
+        "sources": [
+            "src/math/fminimum_fmaximum.rs",
+            "src/math/generic/fmaximum.rs"
+        ],
+        "type": "f32"
+    },
+    "fmaximumf128": {
+        "sources": [
+            "src/math/fminimum_fmaximum.rs",
+            "src/math/generic/fmaximum.rs"
+        ],
+        "type": "f128"
+    },
+    "fmaximumf16": {
+        "sources": [
+            "src/math/fminimum_fmaximum.rs",
+            "src/math/generic/fmaximum.rs"
+        ],
+        "type": "f16"
+    },
     "fmin": {
         "sources": [
             "src/math/fmin_fmax.rs",
@@ -419,6 +475,62 @@
         ],
         "type": "f16"
     },
+    "fminimum": {
+        "sources": [
+            "src/math/fminimum_fmaximum.rs",
+            "src/math/generic/fminimum.rs"
+        ],
+        "type": "f64"
+    },
+    "fminimum_num": {
+        "sources": [
+            "src/math/fminimum_fmaximum_num.rs",
+            "src/math/generic/fminimum_num.rs"
+        ],
+        "type": "f64"
+    },
+    "fminimum_numf": {
+        "sources": [
+            "src/math/fminimum_fmaximum_num.rs",
+            "src/math/generic/fminimum_num.rs"
+        ],
+        "type": "f32"
+    },
+    "fminimum_numf128": {
+        "sources": [
+            "src/math/fminimum_fmaximum_num.rs",
+            "src/math/generic/fminimum_num.rs"
+        ],
+        "type": "f128"
+    },
+    "fminimum_numf16": {
+        "sources": [
+            "src/math/fminimum_fmaximum_num.rs",
+            "src/math/generic/fminimum_num.rs"
+        ],
+        "type": "f16"
+    },
+    "fminimumf": {
+        "sources": [
+            "src/math/fminimum_fmaximum.rs",
+            "src/math/generic/fminimum.rs"
+        ],
+        "type": "f32"
+    },
+    "fminimumf128": {
+        "sources": [
+            "src/math/fminimum_fmaximum.rs",
+            "src/math/generic/fminimum.rs"
+        ],
+        "type": "f128"
+    },
+    "fminimumf16": {
+        "sources": [
+            "src/math/fminimum_fmaximum.rs",
+            "src/math/generic/fminimum.rs"
+        ],
+        "type": "f16"
+    },
     "fmod": {
         "sources": [
             "src/math/fmod.rs",
diff --git a/etc/function-list.txt b/etc/function-list.txt
index 1c9c5e3bc..90ca8f34e 100644
--- a/etc/function-list.txt
+++ b/etc/function-list.txt
@@ -58,10 +58,26 @@ fmax
 fmaxf
 fmaxf128
 fmaxf16
+fmaximum
+fmaximum_num
+fmaximum_numf
+fmaximum_numf128
+fmaximum_numf16
+fmaximumf
+fmaximumf128
+fmaximumf16
 fmin
 fminf
 fminf128
 fminf16
+fminimum
+fminimum_num
+fminimum_numf
+fminimum_numf128
+fminimum_numf16
+fminimumf
+fminimumf128
+fminimumf16
 fmod
 fmodf
 fmodf128
diff --git a/src/libm_helper.rs b/src/libm_helper.rs
index 68f1fb362..489dbc0d4 100644
--- a/src/libm_helper.rs
+++ b/src/libm_helper.rs
@@ -137,7 +137,15 @@ libm_helper! {
         (fn floor(x: f64) -> (f64);                 => floor);
         (fn fma(x: f64, y: f64, z: f64) -> (f64);   => fma);
         (fn fmax(x: f64, y: f64) -> (f64);          => fmax);
+        (fn fmaximum(x: f64, y: f64) -> (f64);      => fmaximum);
+        (fn fmaximum_num(x: f64, y: f64) -> (f64);  => fmaximum_num);
+        (fn fmaximum_numf(x: f32, y: f32) -> (f32); => fmaximum_numf);
+        (fn fmaximumf(x: f32, y: f32) -> (f32);     => fmaximumf);
         (fn fmin(x: f64, y: f64) -> (f64);          => fmin);
+        (fn fminimum(x: f64, y: f64) -> (f64);      => fminimum);
+        (fn fminimum_num(x: f64, y: f64) -> (f64);  => fminimum_num);
+        (fn fminimum_numf(x: f32, y: f32) -> (f32); => fminimum_numf);
+        (fn fminimumf(x: f32, y: f32) -> (f32);     => fminimumf);
         (fn fmod(x: f64, y: f64) -> (f64);          => fmod);
         (fn frexp(x: f64) -> (f64, i32);            => frexp);
         (fn hypot(x: f64, y: f64) -> (f64);         => hypot);
@@ -186,7 +194,11 @@ libm_helper! {
         (fn fdim(x: f16, y: f16) -> (f16);          => fdimf16);
         (fn floorf(x: f16) -> (f16);                => floorf16);
         (fn fmaxf(x: f16, y: f16) -> (f16);         => fmaxf16);
+        (fn fmaximum_numf16(x: f16, y: f16) -> (f16);   => fmaximum_numf16);
+        (fn fmaximumf16(x: f16, y: f16) -> (f16);   => fmaximumf16);
         (fn fminf(x: f16, y: f16) -> (f16);         => fminf16);
+        (fn fminimum_numf16(x: f16, y: f16) -> (f16);   => fminimum_numf16);
+        (fn fminimumf16(x: f16, y: f16) -> (f16);   => fminimumf16);
         (fn fmodf(x: f16, y: f16) -> (f16);         => fmodf16);
         (fn ldexpf16(x: f16, n: i32) -> (f16);      => ldexpf16);
         (fn rintf(x: f16) -> (f16);                 => rintf16);
@@ -208,9 +220,13 @@ libm_helper! {
         (fn fabs(x: f128) -> (f128);                => fabsf128);
         (fn fdim(x: f128, y: f128) -> (f128);       => fdimf128);
         (fn floor(x: f128) -> (f128);               => floorf128);
-        (fn fmaf128(x: f128, y: f128, z: f128) -> (f128); => fmaf128);
+        (fn fmaf128(x: f128, y: f128, z: f128) -> (f128);  => fmaf128);
         (fn fmax(x: f128, y: f128) -> (f128);       => fmaxf128);
+        (fn fmaximum_numf128(x: f128, y: f128) -> (f128);  => fmaximum_numf128);
+        (fn fmaximumf128(x: f128, y: f128) -> (f128);      => fmaximumf128);
         (fn fmin(x: f128, y: f128) -> (f128);       => fminf128);
+        (fn fminimum_numf128(x: f128, y: f128) -> (f128);  => fminimum_numf128);
+        (fn fminimumf128(x: f128, y: f128) -> (f128);      => fminimumf128);
         (fn fmod(x: f128, y: f128) -> (f128);       => fmodf128);
         (fn ldexpf128(x: f128, n: i32) -> (f128);   => ldexpf128);
         (fn rint(x: f128) -> (f128);                => rintf128);
diff --git a/src/math/fmin_fmax.rs b/src/math/fmin_fmax.rs
index 97912e758..4f9136dbb 100644
--- a/src/math/fmin_fmax.rs
+++ b/src/math/fmin_fmax.rs
@@ -1,4 +1,7 @@
 /// Return the lesser of two arguments or, if either argument is NaN, the other argument.
+///
+/// This coincides with IEEE 754-2011 `minNum`. The result disregards signed zero (meaning if
+/// the inputs are -0.0 and +0.0, either may be returned).
 #[cfg(f16_enabled)]
 #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
 pub fn fminf16(x: f16, y: f16) -> f16 {
@@ -6,18 +9,27 @@ pub fn fminf16(x: f16, y: f16) -> f16 {
 }
 
 /// Return the lesser of two arguments or, if either argument is NaN, the other argument.
+///
+/// This coincides with IEEE 754-2011 `minNum`. The result disregards signed zero (meaning if
+/// the inputs are -0.0 and +0.0, either may be returned).
 #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
 pub fn fminf(x: f32, y: f32) -> f32 {
     super::generic::fmin(x, y)
 }
 
 /// Return the lesser of two arguments or, if either argument is NaN, the other argument.
+///
+/// This coincides with IEEE 754-2011 `minNum`. The result disregards signed zero (meaning if
+/// the inputs are -0.0 and +0.0, either may be returned).
 #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
 pub fn fmin(x: f64, y: f64) -> f64 {
     super::generic::fmin(x, y)
 }
 
 /// Return the lesser of two arguments or, if either argument is NaN, the other argument.
+///
+/// This coincides with IEEE 754-2011 `minNum`. The result disregards signed zero (meaning if
+/// the inputs are -0.0 and +0.0, either may be returned).
 #[cfg(f128_enabled)]
 #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
 pub fn fminf128(x: f128, y: f128) -> f128 {
@@ -25,6 +37,9 @@ pub fn fminf128(x: f128, y: f128) -> f128 {
 }
 
 /// Return the greater of two arguments or, if either argument is NaN, the other argument.
+///
+/// This coincides with IEEE 754-2011 `maxNum`. The result disregards signed zero (meaning if
+/// the inputs are -0.0 and +0.0, either may be returned).
 #[cfg(f16_enabled)]
 #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
 pub fn fmaxf16(x: f16, y: f16) -> f16 {
@@ -32,18 +47,27 @@ pub fn fmaxf16(x: f16, y: f16) -> f16 {
 }
 
 /// Return the greater of two arguments or, if either argument is NaN, the other argument.
+///
+/// This coincides with IEEE 754-2011 `maxNum`. The result disregards signed zero (meaning if
+/// the inputs are -0.0 and +0.0, either may be returned).
 #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
 pub fn fmaxf(x: f32, y: f32) -> f32 {
     super::generic::fmax(x, y)
 }
 
 /// Return the greater of two arguments or, if either argument is NaN, the other argument.
+///
+/// This coincides with IEEE 754-2011 `maxNum`. The result disregards signed zero (meaning if
+/// the inputs are -0.0 and +0.0, either may be returned).
 #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
 pub fn fmax(x: f64, y: f64) -> f64 {
     super::generic::fmax(x, y)
 }
 
 /// Return the greater of two arguments or, if either argument is NaN, the other argument.
+///
+/// This coincides with IEEE 754-2011 `maxNum`. The result disregards signed zero (meaning if
+/// the inputs are -0.0 and +0.0, either may be returned).
 #[cfg(f128_enabled)]
 #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
 pub fn fmaxf128(x: f128, y: f128) -> f128 {
diff --git a/src/math/fminimum_fmaximum.rs b/src/math/fminimum_fmaximum.rs
new file mode 100644
index 000000000..fd3c5ed10
--- /dev/null
+++ b/src/math/fminimum_fmaximum.rs
@@ -0,0 +1,67 @@
+/// Return the lesser of two arguments or, if either argument is NaN, the other argument.
+///
+/// This coincides with IEEE 754-2019 `minimum`. The result orders -0.0 < 0.0.
+#[cfg(f16_enabled)]
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn fminimumf16(x: f16, y: f16) -> f16 {
+    super::generic::fminimum(x, y)
+}
+
+/// Return the lesser of two arguments or, if either argument is NaN, the other argument.
+///
+/// This coincides with IEEE 754-2019 `minimum`. The result orders -0.0 < 0.0.
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn fminimum(x: f64, y: f64) -> f64 {
+    super::generic::fminimum(x, y)
+}
+
+/// Return the lesser of two arguments or, if either argument is NaN, the other argument.
+///
+/// This coincides with IEEE 754-2019 `minimum`. The result orders -0.0 < 0.0.
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn fminimumf(x: f32, y: f32) -> f32 {
+    super::generic::fminimum(x, y)
+}
+
+/// Return the lesser of two arguments or, if either argument is NaN, the other argument.
+///
+/// This coincides with IEEE 754-2019 `minimum`. The result orders -0.0 < 0.0.
+#[cfg(f128_enabled)]
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn fminimumf128(x: f128, y: f128) -> f128 {
+    super::generic::fminimum(x, y)
+}
+
+/// Return the greater of two arguments or, if either argument is NaN, the other argument.
+///
+/// This coincides with IEEE 754-2019 `maximum`. The result orders -0.0 < 0.0.
+#[cfg(f16_enabled)]
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn fmaximumf16(x: f16, y: f16) -> f16 {
+    super::generic::fmaximum(x, y)
+}
+
+/// Return the greater of two arguments or, if either argument is NaN, the other argument.
+///
+/// This coincides with IEEE 754-2019 `maximum`. The result orders -0.0 < 0.0.
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn fmaximumf(x: f32, y: f32) -> f32 {
+    super::generic::fmaximum(x, y)
+}
+
+/// Return the greater of two arguments or, if either argument is NaN, the other argument.
+///
+/// This coincides with IEEE 754-2019 `maximum`. The result orders -0.0 < 0.0.
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn fmaximum(x: f64, y: f64) -> f64 {
+    super::generic::fmaximum(x, y)
+}
+
+/// Return the greater of two arguments or, if either argument is NaN, the other argument.
+///
+/// This coincides with IEEE 754-2019 `maximum`. The result orders -0.0 < 0.0.
+#[cfg(f128_enabled)]
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn fmaximumf128(x: f128, y: f128) -> f128 {
+    super::generic::fmaximum(x, y)
+}
diff --git a/src/math/fminimum_fmaximum_num.rs b/src/math/fminimum_fmaximum_num.rs
new file mode 100644
index 000000000..640ddfc9b
--- /dev/null
+++ b/src/math/fminimum_fmaximum_num.rs
@@ -0,0 +1,67 @@
+/// Return the lesser of two arguments or, if either argument is NaN, NaN.
+///
+/// This coincides with IEEE 754-2019 `minimumNumber`. The result orders -0.0 < 0.0.
+#[cfg(f16_enabled)]
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn fminimum_numf16(x: f16, y: f16) -> f16 {
+    super::generic::fminimum_num(x, y)
+}
+
+/// Return the lesser of two arguments or, if either argument is NaN, NaN.
+///
+/// This coincides with IEEE 754-2019 `minimumNumber`. The result orders -0.0 < 0.0.
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn fminimum_numf(x: f32, y: f32) -> f32 {
+    super::generic::fminimum_num(x, y)
+}
+
+/// Return the lesser of two arguments or, if either argument is NaN, NaN.
+///
+/// This coincides with IEEE 754-2019 `minimumNumber`. The result orders -0.0 < 0.0.
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn fminimum_num(x: f64, y: f64) -> f64 {
+    super::generic::fminimum_num(x, y)
+}
+
+/// Return the lesser of two arguments or, if either argument is NaN, NaN.
+///
+/// This coincides with IEEE 754-2019 `minimumNumber`. The result orders -0.0 < 0.0.
+#[cfg(f128_enabled)]
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn fminimum_numf128(x: f128, y: f128) -> f128 {
+    super::generic::fminimum_num(x, y)
+}
+
+/// Return the greater of two arguments or, if either argument is NaN, NaN.
+///
+/// This coincides with IEEE 754-2019 `maximumNumber`. The result orders -0.0 < 0.0.
+#[cfg(f16_enabled)]
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn fmaximum_numf16(x: f16, y: f16) -> f16 {
+    super::generic::fmaximum_num(x, y)
+}
+
+/// Return the greater of two arguments or, if either argument is NaN, NaN.
+///
+/// This coincides with IEEE 754-2019 `maximumNumber`. The result orders -0.0 < 0.0.
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn fmaximum_numf(x: f32, y: f32) -> f32 {
+    super::generic::fmaximum_num(x, y)
+}
+
+/// Return the greater of two arguments or, if either argument is NaN, NaN.
+///
+/// This coincides with IEEE 754-2019 `maximumNumber`. The result orders -0.0 < 0.0.
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn fmaximum_num(x: f64, y: f64) -> f64 {
+    super::generic::fmaximum_num(x, y)
+}
+
+/// Return the greater of two arguments or, if either argument is NaN, NaN.
+///
+/// This coincides with IEEE 754-2019 `maximumNumber`. The result orders -0.0 < 0.0.
+#[cfg(f128_enabled)]
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn fmaximum_numf128(x: f128, y: f128) -> f128 {
+    super::generic::fmaximum_num(x, y)
+}
diff --git a/src/math/generic/fmax.rs b/src/math/generic/fmax.rs
index 97803052b..32613a46b 100644
--- a/src/math/generic/fmax.rs
+++ b/src/math/generic/fmax.rs
@@ -1,14 +1,73 @@
+/* SPDX-License-Identifier: MIT OR Apache-2.0 */
+//! IEEE 754-2011 `maxNum`. This has been superseded by IEEE 754-2019 `maximumNumber`.
+//!
+//! Per the spec, returns the canonicalized result of:
+//! - `x` if `x > y`
+//! - `y` if `y > x`
+//! - The other number if one is NaN
+//! - Otherwise, either `x` or `y`, canonicalized
+//! - -0.0 and +0.0 may be disregarded (unlike newer operations)
+//!
+//! Excluded from our implementation is sNaN handling.
+//!
+//! More on the differences: [link].
+//!
+//! [link]: https://grouper.ieee.org/groups/msc/ANSI_IEEE-Std-754-2019/background/minNum_maxNum_Removal_Demotion_v3.pdf
+
 use super::super::Float;
 
 #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
 pub fn fmax<F: Float>(x: F, y: F) -> F {
-    // IEEE754 says: maxNum(x, y) is the canonicalized number y if x < y, x if y < x, the
-    // canonicalized number if one operand is a number and the other a quiet NaN. Otherwise it
-    // is either x or y, canonicalized (this means results might differ among implementations).
-    // When either x or y is a signalingNaN, then the result is according to 6.2.
-    //
-    // Since we do not support sNaN in Rust yet, we do not need to handle them.
-    // FIXME(nagisa): due to https://bugs.llvm.org/show_bug.cgi?id=33303 we canonicalize by
-    // multiplying by 1.0. Should switch to the `canonicalize` when it works.
-    (if x.is_nan() || x < y { y } else { x }) * F::ONE
+    let res = if x.is_nan() || x < y { y } else { x };
+    // Canonicalize
+    res * F::ONE
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::support::{Hexf, Int};
+
+    fn spec_test<F: Float>() {
+        let cases = [
+            (F::ZERO, F::ZERO, F::ZERO),
+            (F::ONE, F::ONE, F::ONE),
+            (F::ZERO, F::ONE, F::ONE),
+            (F::ONE, F::ZERO, F::ONE),
+            (F::ZERO, F::NEG_ONE, F::ZERO),
+            (F::NEG_ONE, F::ZERO, F::ZERO),
+            (F::INFINITY, F::ZERO, F::INFINITY),
+            (F::NEG_INFINITY, F::ZERO, F::ZERO),
+            (F::NAN, F::ZERO, F::ZERO),
+            (F::ZERO, F::NAN, F::ZERO),
+            (F::NAN, F::NAN, F::NAN),
+        ];
+
+        for (x, y, res) in cases {
+            let val = fmax(x, y);
+            assert_biteq!(val, res, "fmax({}, {})", Hexf(x), Hexf(y));
+        }
+    }
+
+    #[test]
+    #[cfg(f16_enabled)]
+    fn spec_tests_f16() {
+        spec_test::<f16>();
+    }
+
+    #[test]
+    fn spec_tests_f32() {
+        spec_test::<f32>();
+    }
+
+    #[test]
+    fn spec_tests_f64() {
+        spec_test::<f64>();
+    }
+
+    #[test]
+    #[cfg(f128_enabled)]
+    fn spec_tests_f128() {
+        spec_test::<f128>();
+    }
 }
diff --git a/src/math/generic/fmaximum.rs b/src/math/generic/fmaximum.rs
new file mode 100644
index 000000000..5f653ce94
--- /dev/null
+++ b/src/math/generic/fmaximum.rs
@@ -0,0 +1,78 @@
+/* SPDX-License-Identifier: MIT OR Apache-2.0 */
+//! IEEE 754-2019 `maximum`.
+//!
+//! Per the spec, returns the canonicalized result of:
+//! - `x` if `x > y`
+//! - `y` if `y > x`
+//! - qNaN if either operation is NaN
+//! - Logic following +0.0 > -0.0
+//!
+//! Excluded from our implementation is sNaN handling.
+
+use super::super::Float;
+
+pub fn fmaximum<F: Float>(x: F, y: F) -> F {
+    let res = if x.is_nan() {
+        x
+    } else if y.is_nan() {
+        y
+    } else if x > y || (y.to_bits() == F::NEG_ZERO.to_bits() && x.is_sign_positive()) {
+        x
+    } else {
+        y
+    };
+
+    // Canonicalize
+    res * F::ONE
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::support::{Hexf, Int};
+
+    fn spec_test<F: Float>() {
+        let cases = [
+            (F::ZERO, F::ZERO, F::ZERO),
+            (F::ONE, F::ONE, F::ONE),
+            (F::ZERO, F::ONE, F::ONE),
+            (F::ONE, F::ZERO, F::ONE),
+            (F::ZERO, F::NEG_ONE, F::ZERO),
+            (F::NEG_ONE, F::ZERO, F::ZERO),
+            (F::INFINITY, F::ZERO, F::INFINITY),
+            (F::NEG_INFINITY, F::ZERO, F::ZERO),
+            (F::NAN, F::ZERO, F::NAN),
+            (F::ZERO, F::NAN, F::NAN),
+            (F::NAN, F::NAN, F::NAN),
+            (F::ZERO, F::NEG_ZERO, F::ZERO),
+            (F::NEG_ZERO, F::ZERO, F::ZERO),
+        ];
+
+        for (x, y, res) in cases {
+            let val = fmaximum(x, y);
+            assert_biteq!(val, res, "fmaximum({}, {})", Hexf(x), Hexf(y));
+        }
+    }
+
+    #[test]
+    #[cfg(f16_enabled)]
+    fn spec_tests_f16() {
+        spec_test::<f16>();
+    }
+
+    #[test]
+    fn spec_tests_f32() {
+        spec_test::<f32>();
+    }
+
+    #[test]
+    fn spec_tests_f64() {
+        spec_test::<f64>();
+    }
+
+    #[test]
+    #[cfg(f128_enabled)]
+    fn spec_tests_f128() {
+        spec_test::<f128>();
+    }
+}
diff --git a/src/math/generic/fmaximum_num.rs b/src/math/generic/fmaximum_num.rs
new file mode 100644
index 000000000..224660123
--- /dev/null
+++ b/src/math/generic/fmaximum_num.rs
@@ -0,0 +1,77 @@
+/* SPDX-License-Identifier: MIT OR Apache-2.0 */
+//! IEEE 754-2019 `maximumNumber`.
+//!
+//! Per the spec, returns:
+//! - `x` if `x > y`
+//! - `y` if `y > x`
+//! - Non-NaN if one operand is NaN
+//! - Logic following +0.0 > -0.0
+//! - Either `x` or `y` if `x == y` and the signs are the same
+//! - qNaN if either operand is a NaN
+//!
+//! Excluded from our implementation is sNaN handling.
+
+use super::super::Float;
+
+pub fn fmaximum_num<F: Float>(x: F, y: F) -> F {
+    let res =
+        if x.is_nan() || x < y || (x.to_bits() == F::NEG_ZERO.to_bits() && y.is_sign_positive()) {
+            y
+        } else {
+            x
+        };
+
+    // Canonicalize
+    res * F::ONE
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::support::{Hexf, Int};
+
+    fn spec_test<F: Float>() {
+        let cases = [
+            (F::ZERO, F::ZERO, F::ZERO),
+            (F::ONE, F::ONE, F::ONE),
+            (F::ZERO, F::ONE, F::ONE),
+            (F::ONE, F::ZERO, F::ONE),
+            (F::ZERO, F::NEG_ONE, F::ZERO),
+            (F::NEG_ONE, F::ZERO, F::ZERO),
+            (F::INFINITY, F::ZERO, F::INFINITY),
+            (F::NEG_INFINITY, F::ZERO, F::ZERO),
+            (F::NAN, F::ZERO, F::ZERO),
+            (F::ZERO, F::NAN, F::ZERO),
+            (F::NAN, F::NAN, F::NAN),
+            (F::ZERO, F::NEG_ZERO, F::ZERO),
+            (F::NEG_ZERO, F::ZERO, F::ZERO),
+        ];
+
+        for (x, y, res) in cases {
+            let val = fmaximum_num(x, y);
+            assert_biteq!(val, res, "fmaximum_num({}, {})", Hexf(x), Hexf(y));
+        }
+    }
+
+    #[test]
+    #[cfg(f16_enabled)]
+    fn spec_tests_f16() {
+        spec_test::<f16>();
+    }
+
+    #[test]
+    fn spec_tests_f32() {
+        spec_test::<f32>();
+    }
+
+    #[test]
+    fn spec_tests_f64() {
+        spec_test::<f64>();
+    }
+
+    #[test]
+    #[cfg(f128_enabled)]
+    fn spec_tests_f128() {
+        spec_test::<f128>();
+    }
+}
diff --git a/src/math/generic/fmin.rs b/src/math/generic/fmin.rs
index 697f72004..5cc33e904 100644
--- a/src/math/generic/fmin.rs
+++ b/src/math/generic/fmin.rs
@@ -1,13 +1,72 @@
+/* SPDX-License-Identifier: MIT OR Apache-2.0 */
+//! IEEE 754-2008 `minNum`. This has been superseded by IEEE 754-2019 `minimumNumber`.
+//!
+//! Per the spec, returns the canonicalized result of:
+//! - `x` if `x < y`
+//! - `y` if `y < x`
+//! - The other number if one is NaN
+//! - Otherwise, either `x` or `y`, canonicalized
+//! - -0.0 and +0.0 may be disregarded (unlike newer operations)
+//!
+//! Excluded from our implementation is sNaN handling.
+//!
+//! More on the differences: [link].
+//!
+//! [link]: https://grouper.ieee.org/groups/msc/ANSI_IEEE-Std-754-2019/background/minNum_maxNum_Removal_Demotion_v3.pdf
+
 use super::super::Float;
 
 pub fn fmin<F: Float>(x: F, y: F) -> F {
-    // IEEE754 says: minNum(x, y) is the canonicalized number x if x < y, y if y < x, the
-    // canonicalized number if one operand is a number and the other a quiet NaN. Otherwise it
-    // is either x or y, canonicalized (this means results might differ among implementations).
-    // When either x or y is a signalingNaN, then the result is according to 6.2.
-    //
-    // Since we do not support sNaN in Rust yet, we do not need to handle them.
-    // FIXME(nagisa): due to https://bugs.llvm.org/show_bug.cgi?id=33303 we canonicalize by
-    // multiplying by 1.0. Should switch to the `canonicalize` when it works.
-    (if y.is_nan() || x < y { x } else { y }) * F::ONE
+    let res = if y.is_nan() || x < y { x } else { y };
+    // Canonicalize
+    res * F::ONE
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::support::{Hexf, Int};
+
+    fn spec_test<F: Float>() {
+        let cases = [
+            (F::ZERO, F::ZERO, F::ZERO),
+            (F::ONE, F::ONE, F::ONE),
+            (F::ZERO, F::ONE, F::ZERO),
+            (F::ONE, F::ZERO, F::ZERO),
+            (F::ZERO, F::NEG_ONE, F::NEG_ONE),
+            (F::NEG_ONE, F::ZERO, F::NEG_ONE),
+            (F::INFINITY, F::ZERO, F::ZERO),
+            (F::NEG_INFINITY, F::ZERO, F::NEG_INFINITY),
+            (F::NAN, F::ZERO, F::ZERO),
+            (F::ZERO, F::NAN, F::ZERO),
+            (F::NAN, F::NAN, F::NAN),
+        ];
+
+        for (x, y, res) in cases {
+            let val = fmin(x, y);
+            assert_biteq!(val, res, "fmin({}, {})", Hexf(x), Hexf(y));
+        }
+    }
+
+    #[test]
+    #[cfg(f16_enabled)]
+    fn spec_tests_f16() {
+        spec_test::<f16>();
+    }
+
+    #[test]
+    fn spec_tests_f32() {
+        spec_test::<f32>();
+    }
+
+    #[test]
+    fn spec_tests_f64() {
+        spec_test::<f64>();
+    }
+
+    #[test]
+    #[cfg(f128_enabled)]
+    fn spec_tests_f128() {
+        spec_test::<f128>();
+    }
 }
diff --git a/src/math/generic/fminimum.rs b/src/math/generic/fminimum.rs
new file mode 100644
index 000000000..f566d9631
--- /dev/null
+++ b/src/math/generic/fminimum.rs
@@ -0,0 +1,78 @@
+/* SPDX-License-Identifier: MIT OR Apache-2.0 */
+//! IEEE 754-2019 `minimum`.
+//!
+//! Per the spec, returns the canonicalized result of:
+//! - `x` if `x < y`
+//! - `y` if `y < x`
+//! - qNaN if either operation is NaN
+//! - Logic following +0.0 > -0.0
+//!
+//! Excluded from our implementation is sNaN handling.
+
+use super::super::Float;
+
+pub fn fminimum<F: Float>(x: F, y: F) -> F {
+    let res = if x.is_nan() {
+        x
+    } else if y.is_nan() {
+        y
+    } else if x < y || (x.to_bits() == F::NEG_ZERO.to_bits() && y.is_sign_positive()) {
+        x
+    } else {
+        y
+    };
+
+    // Canonicalize
+    res * F::ONE
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::support::{Hexf, Int};
+
+    fn spec_test<F: Float>() {
+        let cases = [
+            (F::ZERO, F::ZERO, F::ZERO),
+            (F::ONE, F::ONE, F::ONE),
+            (F::ZERO, F::ONE, F::ZERO),
+            (F::ONE, F::ZERO, F::ZERO),
+            (F::ZERO, F::NEG_ONE, F::NEG_ONE),
+            (F::NEG_ONE, F::ZERO, F::NEG_ONE),
+            (F::INFINITY, F::ZERO, F::ZERO),
+            (F::NEG_INFINITY, F::ZERO, F::NEG_INFINITY),
+            (F::NAN, F::ZERO, F::NAN),
+            (F::ZERO, F::NAN, F::NAN),
+            (F::NAN, F::NAN, F::NAN),
+            (F::ZERO, F::NEG_ZERO, F::NEG_ZERO),
+            (F::NEG_ZERO, F::ZERO, F::NEG_ZERO),
+        ];
+
+        for (x, y, res) in cases {
+            let val = fminimum(x, y);
+            assert_biteq!(val, res, "fminimum({}, {})", Hexf(x), Hexf(y));
+        }
+    }
+
+    #[test]
+    #[cfg(f16_enabled)]
+    fn spec_tests_f16() {
+        spec_test::<f16>();
+    }
+
+    #[test]
+    fn spec_tests_f32() {
+        spec_test::<f32>();
+    }
+
+    #[test]
+    fn spec_tests_f64() {
+        spec_test::<f64>();
+    }
+
+    #[test]
+    #[cfg(f128_enabled)]
+    fn spec_tests_f128() {
+        spec_test::<f128>();
+    }
+}
diff --git a/src/math/generic/fminimum_num.rs b/src/math/generic/fminimum_num.rs
new file mode 100644
index 000000000..e58a585c3
--- /dev/null
+++ b/src/math/generic/fminimum_num.rs
@@ -0,0 +1,77 @@
+/* SPDX-License-Identifier: MIT OR Apache-2.0 */
+//! IEEE 754-2019 `minimum`.
+//!
+//! Per the spec, returns:
+//! - `x` if `x < y`
+//! - `y` if `y < x`
+//! - Non-NaN if one operand is NaN
+//! - Logic following +0.0 > -0.0
+//! - Either `x` or `y` if `x == y` and the signs are the same
+//! - qNaN if either operand is a NaN
+//!
+//! Excluded from our implementation is sNaN handling.
+
+use super::super::Float;
+
+pub fn fminimum_num<F: Float>(x: F, y: F) -> F {
+    let res =
+        if y.is_nan() || x < y || (x.to_bits() == F::NEG_ZERO.to_bits() && y.is_sign_positive()) {
+            x
+        } else {
+            y
+        };
+
+    // Canonicalize
+    res * F::ONE
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::support::{Hexf, Int};
+
+    fn spec_test<F: Float>() {
+        let cases = [
+            (F::ZERO, F::ZERO, F::ZERO),
+            (F::ONE, F::ONE, F::ONE),
+            (F::ZERO, F::ONE, F::ZERO),
+            (F::ONE, F::ZERO, F::ZERO),
+            (F::ZERO, F::NEG_ONE, F::NEG_ONE),
+            (F::NEG_ONE, F::ZERO, F::NEG_ONE),
+            (F::INFINITY, F::ZERO, F::ZERO),
+            (F::NEG_INFINITY, F::ZERO, F::NEG_INFINITY),
+            (F::NAN, F::ZERO, F::ZERO),
+            (F::ZERO, F::NAN, F::ZERO),
+            (F::NAN, F::NAN, F::NAN),
+            (F::ZERO, F::NEG_ZERO, F::NEG_ZERO),
+            (F::NEG_ZERO, F::ZERO, F::NEG_ZERO),
+        ];
+
+        for (x, y, res) in cases {
+            let val = fminimum_num(x, y);
+            assert_biteq!(val, res, "fminimum_num({}, {})", Hexf(x), Hexf(y));
+        }
+    }
+
+    #[test]
+    #[cfg(f16_enabled)]
+    fn spec_tests_f16() {
+        spec_test::<f16>();
+    }
+
+    #[test]
+    fn spec_tests_f32() {
+        spec_test::<f32>();
+    }
+
+    #[test]
+    fn spec_tests_f64() {
+        spec_test::<f64>();
+    }
+
+    #[test]
+    #[cfg(f128_enabled)]
+    fn spec_tests_f128() {
+        spec_test::<f128>();
+    }
+}
diff --git a/src/math/generic/mod.rs b/src/math/generic/mod.rs
index b34d3dfae..092f9317b 100644
--- a/src/math/generic/mod.rs
+++ b/src/math/generic/mod.rs
@@ -5,7 +5,11 @@ mod fdim;
 mod floor;
 mod fma;
 mod fmax;
+mod fmaximum;
+mod fmaximum_num;
 mod fmin;
+mod fminimum;
+mod fminimum_num;
 mod fmod;
 mod rint;
 mod round;
@@ -20,7 +24,11 @@ pub use fdim::fdim;
 pub use floor::floor;
 pub use fma::{fma, fma_wide};
 pub use fmax::fmax;
+pub use fmaximum::fmaximum;
+pub use fmaximum_num::fmaximum_num;
 pub use fmin::fmin;
+pub use fminimum::fminimum;
+pub use fminimum_num::fminimum_num;
 pub use fmod::fmod;
 pub use rint::rint;
 pub use round::round;
diff --git a/src/math/mod.rs b/src/math/mod.rs
index ba0b933f1..4e75292a6 100644
--- a/src/math/mod.rs
+++ b/src/math/mod.rs
@@ -166,6 +166,8 @@ mod floorf;
 mod fma;
 mod fmaf;
 mod fmin_fmax;
+mod fminimum_fmaximum;
+mod fminimum_fmaximum_num;
 mod fmod;
 mod fmodf;
 mod frexp;
@@ -271,6 +273,8 @@ pub use self::floorf::floorf;
 pub use self::fma::fma;
 pub use self::fmaf::fmaf;
 pub use self::fmin_fmax::{fmax, fmaxf, fmin, fminf};
+pub use self::fminimum_fmaximum::{fmaximum, fmaximumf, fminimum, fminimumf};
+pub use self::fminimum_fmaximum_num::{fmaximum_num, fmaximum_numf, fminimum_num, fminimum_numf};
 pub use self::fmod::fmod;
 pub use self::fmodf::fmodf;
 pub use self::frexp::frexp;
@@ -355,8 +359,9 @@ cfg_if! {
         pub use self::fabsf16::fabsf16;
         pub use self::fdimf16::fdimf16;
         pub use self::floorf16::floorf16;
-        pub use self::fmin_fmax::fmaxf16;
-        pub use self::fmin_fmax::fminf16;
+        pub use self::fmin_fmax::{fmaxf16, fminf16};
+        pub use self::fminimum_fmaximum::{fmaximumf16, fminimumf16};
+        pub use self::fminimum_fmaximum_num::{fmaximum_numf16, fminimum_numf16};
         pub use self::fmodf16::fmodf16;
         pub use self::ldexpf16::ldexpf16;
         pub use self::rintf16::rintf16;
@@ -393,8 +398,9 @@ cfg_if! {
         pub use self::fdimf128::fdimf128;
         pub use self::floorf128::floorf128;
         pub use self::fmaf128::fmaf128;
-        pub use self::fmin_fmax::fmaxf128;
-        pub use self::fmin_fmax::fminf128;
+        pub use self::fmin_fmax::{fmaxf128, fminf128};
+        pub use self::fminimum_fmaximum::{fmaximumf128, fminimumf128};
+        pub use self::fminimum_fmaximum_num::{fmaximum_numf128, fminimum_numf128};
         pub use self::fmodf128::fmodf128;
         pub use self::ldexpf128::ldexpf128;
         pub use self::rintf128::rintf128;

From a65eb91fa20725cb71b759d1e254369b47585945 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Mon, 10 Feb 2025 21:42:26 +0000
Subject: [PATCH 239/279] Increase allowed offset from infinity for ynf

Failed with

    called `Result::unwrap()` on an `Err` value: ynf

    Caused by:
        0:
               input:    (223, 116.89665)
               as hex:   (, 0x1.d3962cp+6)
               as bits:  (0x000000df, 0x42e9cb16)
               expected: -3.1836905e38          -0x1.df074cp+127 0xff6f83a6
               actual:   -inf                   -inf 0xff800000
        1: mismatched infinities
---
 crates/libm-test/src/precision.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/crates/libm-test/src/precision.rs b/crates/libm-test/src/precision.rs
index 1d916e572..8b0892546 100644
--- a/crates/libm-test/src/precision.rs
+++ b/crates/libm-test/src/precision.rs
@@ -537,7 +537,7 @@ fn int_float_common<F1: Float, F2: Float>(
         && !expected.is_infinite()
         && actual.is_infinite()
         && (expected.abs().to_bits().abs_diff(actual.abs().to_bits())
-            < F2::Int::cast_from(1_000_000u32))
+            < F2::Int::cast_from(10_000_000u32))
     {
         return XFAIL_NOCHECK;
     }

From 913256dfb226adf5de06aac885a1e5d1b6b94484 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Tue, 11 Feb 2025 02:17:06 +0000
Subject: [PATCH 240/279] Fix parsing of negative hex float literals in util

---
 crates/util/src/main.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/crates/util/src/main.rs b/crates/util/src/main.rs
index a519713c0..710adbb17 100644
--- a/crates/util/src/main.rs
+++ b/crates/util/src/main.rs
@@ -274,7 +274,7 @@ fn parse<T: FromStr + FromStrRadix>(input: &[&str], idx: usize) -> T {
 
     let msg = || format!("invalid {} input '{s}'", type_name::<T>());
 
-    if s.starts_with("0x") {
+    if s.starts_with("0x") || s.starts_with("-0x") {
         return T::from_str_radix(s, 16).unwrap_or_else(|_| panic!("{}", msg()));
     }
 

From c2bf9537cb61dc9237386403e9ae51499d2ff7f2 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Tue, 11 Feb 2025 00:17:32 +0000
Subject: [PATCH 241/279] Add `roundeven{,f,f16,f128}`

C23 specifies a new set of `roundeven` functions that round to the
nearest integral, with ties to even. It does not raise any floating
point exceptions.

This behavior is similar to two other functions:

1. `rint`, which rounds to the nearest integer respecting rounding mode
   and possibly raising exceptions.
2. `nearbyint`, which is identical to `rint` except it may not raise
   exceptions.

Technically `rint`, `nearbyint`, and `roundeven` all behave the same in
Rust because we assume default floating point environment. The backends
are allowed to lower to `roundeven`, however, so we should provide it in
case the fallback is needed.

Add the `roundeven` family here and convert `rint` to a function that
takes a rounding mode. This currently has no effect.
---
 crates/libm-macros/src/shared.rs             | 104 ++++++++++++++++--
 crates/libm-test/benches/icount.rs           |   4 +
 crates/libm-test/benches/random.rs           |   4 +
 crates/libm-test/src/domain.rs               |   1 +
 crates/libm-test/src/gen/case_list.rs        |  39 ++++++-
 crates/libm-test/src/mpfloat.rs              |   8 ++
 crates/libm-test/src/precision.rs            |   3 +-
 crates/libm-test/tests/compare_built_musl.rs |   4 +
 crates/util/src/main.rs                      |   4 +
 etc/function-definitions.json                |  34 ++++--
 etc/function-list.txt                        |   4 +
 src/libm_helper.rs                           |  42 ++++----
 src/math/generic/mod.rs                      |   2 +-
 src/math/generic/rint.rs                     | 105 ++++++++++++++-----
 src/math/mod.rs                              |  14 +--
 src/math/rint.rs                             |  33 +++++-
 src/math/rintf.rs                            |  14 ---
 src/math/rintf128.rs                         |   5 -
 src/math/rintf16.rs                          |   5 -
 src/math/roundeven.rs                        |  35 +++++++
 20 files changed, 363 insertions(+), 101 deletions(-)
 delete mode 100644 src/math/rintf.rs
 delete mode 100644 src/math/rintf128.rs
 delete mode 100644 src/math/rintf16.rs
 create mode 100644 src/math/roundeven.rs

diff --git a/crates/libm-macros/src/shared.rs b/crates/libm-macros/src/shared.rs
index cb5a1d187..5e58220eb 100644
--- a/crates/libm-macros/src/shared.rs
+++ b/crates/libm-macros/src/shared.rs
@@ -9,7 +9,16 @@ const ALL_OPERATIONS_NESTED: &[(FloatTy, Signature, Option<Signature>, &[&str])]
         FloatTy::F16,
         Signature { args: &[Ty::F16], returns: &[Ty::F16] },
         None,
-        &["ceilf16", "fabsf16", "floorf16", "rintf16", "roundf16", "sqrtf16", "truncf16"],
+        &[
+            "ceilf16",
+            "fabsf16",
+            "floorf16",
+            "rintf16",
+            "roundevenf16",
+            "roundf16",
+            "sqrtf16",
+            "truncf16",
+        ],
     ),
     (
         // `fn(f32) -> f32`
@@ -17,10 +26,43 @@ const ALL_OPERATIONS_NESTED: &[(FloatTy, Signature, Option<Signature>, &[&str])]
         Signature { args: &[Ty::F32], returns: &[Ty::F32] },
         None,
         &[
-            "acosf", "acoshf", "asinf", "asinhf", "atanf", "atanhf", "cbrtf", "ceilf", "cosf",
-            "coshf", "erfcf", "erff", "exp10f", "exp2f", "expf", "expm1f", "fabsf", "floorf",
-            "j0f", "j1f", "lgammaf", "log10f", "log1pf", "log2f", "logf", "rintf", "roundf",
-            "sinf", "sinhf", "sqrtf", "tanf", "tanhf", "tgammaf", "truncf", "y0f", "y1f",
+            "acosf",
+            "acoshf",
+            "asinf",
+            "asinhf",
+            "atanf",
+            "atanhf",
+            "cbrtf",
+            "ceilf",
+            "cosf",
+            "coshf",
+            "erfcf",
+            "erff",
+            "exp10f",
+            "exp2f",
+            "expf",
+            "expm1f",
+            "fabsf",
+            "floorf",
+            "j0f",
+            "j1f",
+            "lgammaf",
+            "log10f",
+            "log1pf",
+            "log2f",
+            "logf",
+            "rintf",
+            "roundevenf",
+            "roundf",
+            "sinf",
+            "sinhf",
+            "sqrtf",
+            "tanf",
+            "tanhf",
+            "tgammaf",
+            "truncf",
+            "y0f",
+            "y1f",
         ],
     ),
     (
@@ -29,10 +71,43 @@ const ALL_OPERATIONS_NESTED: &[(FloatTy, Signature, Option<Signature>, &[&str])]
         Signature { args: &[Ty::F64], returns: &[Ty::F64] },
         None,
         &[
-            "acos", "acosh", "asin", "asinh", "atan", "atanh", "cbrt", "ceil", "cos", "cosh",
-            "erf", "erfc", "exp", "exp10", "exp2", "expm1", "fabs", "floor", "j0", "j1", "lgamma",
-            "log", "log10", "log1p", "log2", "rint", "round", "sin", "sinh", "sqrt", "tan", "tanh",
-            "tgamma", "trunc", "y0", "y1",
+            "acos",
+            "acosh",
+            "asin",
+            "asinh",
+            "atan",
+            "atanh",
+            "cbrt",
+            "ceil",
+            "cos",
+            "cosh",
+            "erf",
+            "erfc",
+            "exp",
+            "exp10",
+            "exp2",
+            "expm1",
+            "fabs",
+            "floor",
+            "j0",
+            "j1",
+            "lgamma",
+            "log",
+            "log10",
+            "log1p",
+            "log2",
+            "rint",
+            "round",
+            "roundeven",
+            "sin",
+            "sinh",
+            "sqrt",
+            "tan",
+            "tanh",
+            "tgamma",
+            "trunc",
+            "y0",
+            "y1",
         ],
     ),
     (
@@ -40,7 +115,16 @@ const ALL_OPERATIONS_NESTED: &[(FloatTy, Signature, Option<Signature>, &[&str])]
         FloatTy::F128,
         Signature { args: &[Ty::F128], returns: &[Ty::F128] },
         None,
-        &["ceilf128", "fabsf128", "floorf128", "rintf128", "roundf128", "sqrtf128", "truncf128"],
+        &[
+            "ceilf128",
+            "fabsf128",
+            "floorf128",
+            "rintf128",
+            "roundevenf128",
+            "roundf128",
+            "sqrtf128",
+            "truncf128",
+        ],
     ),
     (
         // `(f16, f16) -> f16`
diff --git a/crates/libm-test/benches/icount.rs b/crates/libm-test/benches/icount.rs
index e28f4973c..4a10ec383 100644
--- a/crates/libm-test/benches/icount.rs
+++ b/crates/libm-test/benches/icount.rs
@@ -274,6 +274,10 @@ main!(
     icount_bench_rintf16_group,
     icount_bench_rintf_group,
     icount_bench_round_group,
+    icount_bench_roundeven_group,
+    icount_bench_roundevenf128_group,
+    icount_bench_roundevenf16_group,
+    icount_bench_roundevenf_group,
     icount_bench_roundf128_group,
     icount_bench_roundf16_group,
     icount_bench_roundf_group,
diff --git a/crates/libm-test/benches/random.rs b/crates/libm-test/benches/random.rs
index 6f6b05d95..17e4e0d55 100644
--- a/crates/libm-test/benches/random.rs
+++ b/crates/libm-test/benches/random.rs
@@ -154,6 +154,10 @@ libm_macros::for_each_function! {
         | ldexpf16
         | rintf128
         | rintf16
+        | roundeven
+        | roundevenf
+        | roundevenf128
+        | roundevenf16
         | roundf128
         | roundf16
         | scalbnf128
diff --git a/crates/libm-test/src/domain.rs b/crates/libm-test/src/domain.rs
index c662e95b4..41e948461 100644
--- a/crates/libm-test/src/domain.rs
+++ b/crates/libm-test/src/domain.rs
@@ -246,6 +246,7 @@ pub fn get_domain<F: Float, I: Int>(
         BaseName::Remquo => &EitherPrim::UNBOUNDED2[..],
         BaseName::Rint => &EitherPrim::UNBOUNDED1[..],
         BaseName::Round => &EitherPrim::UNBOUNDED1[..],
+        BaseName::Roundeven => &EitherPrim::UNBOUNDED1[..],
         BaseName::Scalbn => &EitherPrim::UNBOUNDED_F_I[..],
         BaseName::Sin => &EitherPrim::TRIG[..],
         BaseName::Sincos => &EitherPrim::TRIG[..],
diff --git a/crates/libm-test/src/gen/case_list.rs b/crates/libm-test/src/gen/case_list.rs
index 49e731b88..8c7a735fa 100644
--- a/crates/libm-test/src/gen/case_list.rs
+++ b/crates/libm-test/src/gen/case_list.rs
@@ -6,6 +6,7 @@
 //!
 //! This is useful for adding regression tests or expected failures.
 
+use libm::hf64;
 #[cfg(f128_enabled)]
 use libm::hf128;
 
@@ -574,7 +575,15 @@ fn remquof_cases() -> Vec<TestCase<op::remquof::Routine>> {
 }
 
 fn rint_cases() -> Vec<TestCase<op::rint::Routine>> {
-    vec![]
+    let mut v = vec![];
+    TestCase::append_pairs(
+        &mut v,
+        &[
+            // Failure on i586
+            ((hf64!("-0x1.e3f13ff995ffcp+38"),), Some(hf64!("-0x1.e3f13ff994000p+38"))),
+        ],
+    );
+    v
 }
 
 fn rintf_cases() -> Vec<TestCase<op::rintf::Routine>> {
@@ -591,6 +600,11 @@ fn rintf16_cases() -> Vec<TestCase<op::rintf16::Routine>> {
     vec![]
 }
 
+#[cfg(f16_enabled)]
+fn roundf16_cases() -> Vec<TestCase<op::roundf16::Routine>> {
+    vec![]
+}
+
 fn round_cases() -> Vec<TestCase<op::round::Routine>> {
     vec![]
 }
@@ -605,7 +619,28 @@ fn roundf128_cases() -> Vec<TestCase<op::roundf128::Routine>> {
 }
 
 #[cfg(f16_enabled)]
-fn roundf16_cases() -> Vec<TestCase<op::roundf16::Routine>> {
+fn roundevenf16_cases() -> Vec<TestCase<op::roundevenf16::Routine>> {
+    vec![]
+}
+
+fn roundeven_cases() -> Vec<TestCase<op::roundeven::Routine>> {
+    let mut v = vec![];
+    TestCase::append_pairs(
+        &mut v,
+        &[
+            // Failure on i586
+            ((hf64!("-0x1.e3f13ff995ffcp+38"),), Some(hf64!("-0x1.e3f13ff994000p+38"))),
+        ],
+    );
+    v
+}
+
+fn roundevenf_cases() -> Vec<TestCase<op::roundevenf::Routine>> {
+    vec![]
+}
+
+#[cfg(f128_enabled)]
+fn roundevenf128_cases() -> Vec<TestCase<op::roundevenf128::Routine>> {
     vec![]
 }
 
diff --git a/crates/libm-test/src/mpfloat.rs b/crates/libm-test/src/mpfloat.rs
index 63cdebe4e..9b51dc605 100644
--- a/crates/libm-test/src/mpfloat.rs
+++ b/crates/libm-test/src/mpfloat.rs
@@ -184,6 +184,10 @@ libm_macros::for_each_function! {
         rintf128,
         rintf16,
         round,
+        roundeven,
+        roundevenf,
+        roundevenf128,
+        roundevenf16,
         roundf,
         roundf128,
         roundf16,
@@ -253,6 +257,8 @@ impl_no_round! {
     rint => round_even_mut; // FIXME: respect rounding mode
     rintf => round_even_mut; // FIXME: respect rounding mode
     round => round_mut;
+    roundeven => round_even_mut;
+    roundevenf => round_even_mut;
     roundf => round_mut;
     trunc => trunc_mut;
     truncf => trunc_mut;
@@ -265,6 +271,7 @@ impl_no_round! {
     floorf16 => floor_mut;
     rintf16 => round_even_mut; // FIXME: respect rounding mode
     roundf16 => round_mut;
+    roundevenf16 => round_even_mut;
     truncf16 => trunc_mut;
 }
 
@@ -275,6 +282,7 @@ impl_no_round! {
     floorf128 => floor_mut;
     rintf128 => round_even_mut; // FIXME: respect rounding mode
     roundf128 => round_mut;
+    roundevenf128 => round_even_mut;
     truncf128 => trunc_mut;
 }
 
diff --git a/crates/libm-test/src/precision.rs b/crates/libm-test/src/precision.rs
index 8b0892546..8916b43ab 100644
--- a/crates/libm-test/src/precision.rs
+++ b/crates/libm-test/src/precision.rs
@@ -40,6 +40,7 @@ pub fn default_ulp(ctx: &CheckCtx) -> u32 {
         | Bn::Remquo
         | Bn::Rint
         | Bn::Round
+        | Bn::Roundeven
         | Bn::Scalbn
         | Bn::Sqrt
         | Bn::Trunc => 0,
@@ -282,7 +283,7 @@ impl MaybeOverride<(f64,)> for SpecialCase {
         }
 
         if cfg!(x86_no_sse)
-            && ctx.base_name == BaseName::Rint
+            && (ctx.base_name == BaseName::Rint || ctx.base_name == BaseName::Roundeven)
             && (expected - actual).abs() <= F::ONE
             && (expected - actual).abs() > F::ZERO
         {
diff --git a/crates/libm-test/tests/compare_built_musl.rs b/crates/libm-test/tests/compare_built_musl.rs
index ffd7f1f60..2b16b9aa0 100644
--- a/crates/libm-test/tests/compare_built_musl.rs
+++ b/crates/libm-test/tests/compare_built_musl.rs
@@ -126,6 +126,10 @@ libm_macros::for_each_function! {
         ldexpf16,
         rintf128,
         rintf16,
+        roundeven,
+        roundevenf,
+        roundevenf128,
+        roundevenf16,
         roundf128,
         roundf16,
         scalbnf128,
diff --git a/crates/util/src/main.rs b/crates/util/src/main.rs
index 710adbb17..130ac4531 100644
--- a/crates/util/src/main.rs
+++ b/crates/util/src/main.rs
@@ -123,6 +123,10 @@ fn do_eval(basis: &str, op: &str, inputs: &[&str]) {
             | ldexpf16
             | rintf128
             | rintf16
+            | roundeven
+            | roundevenf
+            | roundevenf128
+            | roundevenf16
             | roundf128
             | roundf16
             | scalbnf128
diff --git a/etc/function-definitions.json b/etc/function-definitions.json
index 008a47df2..a47aaad57 100644
--- a/etc/function-definitions.json
+++ b/etc/function-definitions.json
@@ -791,7 +791,6 @@
         "sources": [
             "src/math/arch/aarch64.rs",
             "src/math/arch/wasm32.rs",
-            "src/math/generic/rint.rs",
             "src/math/rint.rs"
         ],
         "type": "f64"
@@ -800,22 +799,19 @@
         "sources": [
             "src/math/arch/aarch64.rs",
             "src/math/arch/wasm32.rs",
-            "src/math/generic/rint.rs",
-            "src/math/rintf.rs"
+            "src/math/rint.rs"
         ],
         "type": "f32"
     },
     "rintf128": {
         "sources": [
-            "src/math/generic/rint.rs",
-            "src/math/rintf128.rs"
+            "src/math/rint.rs"
         ],
         "type": "f128"
     },
     "rintf16": {
         "sources": [
-            "src/math/generic/rint.rs",
-            "src/math/rintf16.rs"
+            "src/math/rint.rs"
         ],
         "type": "f16"
     },
@@ -826,6 +822,30 @@
         ],
         "type": "f64"
     },
+    "roundeven": {
+        "sources": [
+            "src/math/roundeven.rs"
+        ],
+        "type": "f64"
+    },
+    "roundevenf": {
+        "sources": [
+            "src/math/roundeven.rs"
+        ],
+        "type": "f32"
+    },
+    "roundevenf128": {
+        "sources": [
+            "src/math/roundeven.rs"
+        ],
+        "type": "f128"
+    },
+    "roundevenf16": {
+        "sources": [
+            "src/math/roundeven.rs"
+        ],
+        "type": "f16"
+    },
     "roundf": {
         "sources": [
             "src/math/generic/round.rs",
diff --git a/etc/function-list.txt b/etc/function-list.txt
index 90ca8f34e..1f226c8c0 100644
--- a/etc/function-list.txt
+++ b/etc/function-list.txt
@@ -125,6 +125,10 @@ rintf
 rintf128
 rintf16
 round
+roundeven
+roundevenf
+roundevenf128
+roundevenf16
 roundf
 roundf128
 roundf16
diff --git a/src/libm_helper.rs b/src/libm_helper.rs
index 489dbc0d4..dfa1ff77b 100644
--- a/src/libm_helper.rs
+++ b/src/libm_helper.rs
@@ -94,6 +94,7 @@ libm_helper! {
         (fn remquo(x: f32, y: f32) -> (f32, i32);   => remquof);
         (fn rint(x: f32) -> (f32);                  => rintf);
         (fn round(x: f32) -> (f32);                 => roundf);
+        (fn roundeven(x: f32) -> (f32);             => roundevenf);
         (fn scalbn(x: f32, n: i32) -> (f32);        => scalbnf);
         (fn sin(x: f32) -> (f32);                   => sinf);
         (fn sincos(x: f32) -> (f32, f32);           => sincosf);
@@ -167,6 +168,7 @@ libm_helper! {
         (fn remquo(x: f64, y: f64) -> (f64, i32);   => remquo);
         (fn rint(x: f64) -> (f64);                  => rint);
         (fn round(x: f64) -> (f64);                 => round);
+        (fn roundevem(x: f64) -> (f64);             => roundeven);
         (fn scalbn(x: f64, n: i32) -> (f64);        => scalbn);
         (fn sin(x: f64) -> (f64);                   => sin);
         (fn sincos(x: f64) -> (f64, f64);           => sincos);
@@ -188,22 +190,23 @@ libm_helper! {
     f16,
     funcs: {
         // verify-sorted-start
-        (fn ceilf(x: f16) -> (f16);                 => ceilf16);
+        (fn ceil(x: f16) -> (f16);                  => ceilf16);
         (fn copysign(x: f16, y: f16) -> (f16);      => copysignf16);
         (fn fabs(x: f16) -> (f16);                  => fabsf16);
         (fn fdim(x: f16, y: f16) -> (f16);          => fdimf16);
-        (fn floorf(x: f16) -> (f16);                => floorf16);
-        (fn fmaxf(x: f16, y: f16) -> (f16);         => fmaxf16);
-        (fn fmaximum_numf16(x: f16, y: f16) -> (f16);   => fmaximum_numf16);
+        (fn floor(x: f16) -> (f16);                 => floorf16);
+        (fn fmax(x: f16, y: f16) -> (f16);          => fmaxf16);
+        (fn fmaximum_num(x: f16, y: f16) -> (f16);  => fmaximum_numf16);
         (fn fmaximumf16(x: f16, y: f16) -> (f16);   => fmaximumf16);
-        (fn fminf(x: f16, y: f16) -> (f16);         => fminf16);
-        (fn fminimum_numf16(x: f16, y: f16) -> (f16);   => fminimum_numf16);
-        (fn fminimumf16(x: f16, y: f16) -> (f16);   => fminimumf16);
-        (fn fmodf(x: f16, y: f16) -> (f16);         => fmodf16);
-        (fn ldexpf16(x: f16, n: i32) -> (f16);      => ldexpf16);
-        (fn rintf(x: f16) -> (f16);                 => rintf16);
-        (fn roundf(x: f16) -> (f16);                => roundf16);
-        (fn scalbnf16(x: f16, n: i32) -> (f16);     => ldexpf16);
+        (fn fmin(x: f16, y: f16) -> (f16);          => fminf16);
+        (fn fminimum(x: f16, y: f16) -> (f16);      => fminimumf16);
+        (fn fminimum_num(x: f16, y: f16) -> (f16);  => fminimum_numf16);
+        (fn fmod(x: f16, y: f16) -> (f16);          => fmodf16);
+        (fn ldexp(x: f16, n: i32) -> (f16);         => ldexpf16);
+        (fn rint(x: f16) -> (f16);                  => rintf16);
+        (fn round(x: f16) -> (f16);                 => roundf16);
+        (fn roundeven(x: f16) -> (f16);             => roundevenf16);
+        (fn scalbn(x: f16, n: i32) -> (f16);        => scalbnf16);
         (fn sqrtf(x: f16) -> (f16);                 => sqrtf16);
         (fn truncf(x: f16) -> (f16);                => truncf16);
         // verify-sorted-end
@@ -220,18 +223,19 @@ libm_helper! {
         (fn fabs(x: f128) -> (f128);                => fabsf128);
         (fn fdim(x: f128, y: f128) -> (f128);       => fdimf128);
         (fn floor(x: f128) -> (f128);               => floorf128);
-        (fn fmaf128(x: f128, y: f128, z: f128) -> (f128);  => fmaf128);
+        (fn fma(x: f128, y: f128, z: f128) -> (f128); => fmaf128);
         (fn fmax(x: f128, y: f128) -> (f128);       => fmaxf128);
-        (fn fmaximum_numf128(x: f128, y: f128) -> (f128);  => fmaximum_numf128);
-        (fn fmaximumf128(x: f128, y: f128) -> (f128);      => fmaximumf128);
+        (fn fmaximum(x: f128, y: f128) -> (f128);      => fmaximumf128);
+        (fn fmaximum_num(x: f128, y: f128) -> (f128);  => fmaximum_numf128);
         (fn fmin(x: f128, y: f128) -> (f128);       => fminf128);
-        (fn fminimum_numf128(x: f128, y: f128) -> (f128);  => fminimum_numf128);
-        (fn fminimumf128(x: f128, y: f128) -> (f128);      => fminimumf128);
+        (fn fminimum(x: f128, y: f128) -> (f128);      => fminimumf128);
+        (fn fminimum_num(x: f128, y: f128) -> (f128);  => fminimum_numf128);
         (fn fmod(x: f128, y: f128) -> (f128);       => fmodf128);
-        (fn ldexpf128(x: f128, n: i32) -> (f128);   => ldexpf128);
+        (fn ldexp(x: f128, n: i32) -> (f128);       => ldexpf128);
         (fn rint(x: f128) -> (f128);                => rintf128);
         (fn round(x: f128) -> (f128);               => roundf128);
-        (fn scalbnf128(x: f128, n: i32) -> (f128);  => ldexpf128);
+        (fn roundeven(x: f128) -> (f128);           => roundevenf128);
+        (fn scalbn(x: f128, n: i32) -> (f128);      => scalbnf128);
         (fn sqrt(x: f128) -> (f128);                => sqrtf128);
         (fn trunc(x: f128) -> (f128);               => truncf128);
         // verify-sorted-end
diff --git a/src/math/generic/mod.rs b/src/math/generic/mod.rs
index 092f9317b..f224eba73 100644
--- a/src/math/generic/mod.rs
+++ b/src/math/generic/mod.rs
@@ -30,7 +30,7 @@ pub use fmin::fmin;
 pub use fminimum::fminimum;
 pub use fminimum_num::fminimum_num;
 pub use fmod::fmod;
-pub use rint::rint;
+pub use rint::rint_round;
 pub use round::round;
 pub use scalbn::scalbn;
 pub use sqrt::sqrt;
diff --git a/src/math/generic/rint.rs b/src/math/generic/rint.rs
index 80ba1faac..04e8f332f 100644
--- a/src/math/generic/rint.rs
+++ b/src/math/generic/rint.rs
@@ -2,27 +2,31 @@
 /* origin: musl src/math/rint.c */
 
 use super::super::Float;
+use super::super::support::{FpResult, Round};
 
-pub fn rint<F: Float>(x: F) -> F {
+/// IEEE 754-2019 `roundToIntegralExact`, which respects rounding mode and raises inexact if
+/// applicable.
+pub fn rint_round<F: Float>(x: F, _round: Round) -> FpResult<F> {
     let toint = F::ONE / F::EPSILON;
     let e = x.exp();
     let positive = x.is_sign_positive();
 
     // On i386 `force_eval!` must be used to force rounding via storage to memory. Otherwise,
     // the excess precission from x87 would cause an incorrect final result.
-    let use_force = cfg!(x86_no_sse) && F::BITS == 32 || F::BITS == 64;
+    let force = |x| {
+        if cfg!(x86_no_sse) && (F::BITS == 32 || F::BITS == 64) { force_eval!(x) } else { x }
+    };
 
-    if e >= F::EXP_BIAS + F::SIG_BITS {
+    let res = if e >= F::EXP_BIAS + F::SIG_BITS {
         // No fractional part; exact result can be returned.
         x
     } else {
-        // Apply a net-zero adjustment that nudges `y` in the direction of the rounding mode.
+        // Apply a net-zero adjustment that nudges `y` in the direction of the rounding mode. For
+        // Rust this is always nearest, but ideally it would take `round` into account.
         let y = if positive {
-            let tmp = if use_force { force_eval!(x) } else { x } + toint;
-            (if use_force { force_eval!(tmp) } else { tmp } - toint)
+            force(force(x) + toint) - toint
         } else {
-            let tmp = if use_force { force_eval!(x) } else { x } - toint;
-            (if use_force { force_eval!(tmp) } else { tmp } + toint)
+            force(force(x) - toint) + toint
         };
 
         if y == F::ZERO {
@@ -31,42 +35,85 @@ pub fn rint<F: Float>(x: F) -> F {
         } else {
             y
         }
-    }
+    };
+
+    FpResult::ok(res)
 }
 
 #[cfg(test)]
 mod tests {
     use super::*;
+    use crate::support::{Hexf, Int, Status};
+
+    fn spec_test<F: Float>(cases: &[(F, F, Status)]) {
+        let roundtrip = [F::ZERO, F::ONE, F::NEG_ONE, F::NEG_ZERO, F::INFINITY, F::NEG_INFINITY];
+
+        for x in roundtrip {
+            let FpResult { val, status } = rint_round(x, Round::Nearest);
+            assert_biteq!(val, x, "rint_round({})", Hexf(x));
+            assert_eq!(status, Status::OK, "{}", Hexf(x));
+        }
+
+        for &(x, res, res_stat) in cases {
+            let FpResult { val, status } = rint_round(x, Round::Nearest);
+            assert_biteq!(val, res, "rint_round({})", Hexf(x));
+            assert_eq!(status, res_stat, "{}", Hexf(x));
+        }
+    }
 
     #[test]
-    fn zeroes_f32() {
-        assert_biteq!(rint(0.0_f32), 0.0_f32);
-        assert_biteq!(rint(-0.0_f32), -0.0_f32);
+    #[cfg(f16_enabled)]
+    fn spec_tests_f16() {
+        let cases = [];
+        spec_test::<f16>(&cases);
     }
 
     #[test]
-    fn sanity_check_f32() {
-        assert_biteq!(rint(-1.0_f32), -1.0);
-        assert_biteq!(rint(2.8_f32), 3.0);
-        assert_biteq!(rint(-0.5_f32), -0.0);
-        assert_biteq!(rint(0.5_f32), 0.0);
-        assert_biteq!(rint(-1.5_f32), -2.0);
-        assert_biteq!(rint(1.5_f32), 2.0);
+    fn spec_tests_f32() {
+        let cases = [
+            (0.1, 0.0, Status::OK),
+            (-0.1, -0.0, Status::OK),
+            (0.5, 0.0, Status::OK),
+            (-0.5, -0.0, Status::OK),
+            (0.9, 1.0, Status::OK),
+            (-0.9, -1.0, Status::OK),
+            (1.1, 1.0, Status::OK),
+            (-1.1, -1.0, Status::OK),
+            (1.5, 2.0, Status::OK),
+            (-1.5, -2.0, Status::OK),
+            (1.9, 2.0, Status::OK),
+            (-1.9, -2.0, Status::OK),
+            (2.8, 3.0, Status::OK),
+            (-2.8, -3.0, Status::OK),
+        ];
+        spec_test::<f32>(&cases);
     }
 
     #[test]
-    fn zeroes_f64() {
-        assert_biteq!(rint(0.0_f64), 0.0_f64);
-        assert_biteq!(rint(-0.0_f64), -0.0_f64);
+    fn spec_tests_f64() {
+        let cases = [
+            (0.1, 0.0, Status::OK),
+            (-0.1, -0.0, Status::OK),
+            (0.5, 0.0, Status::OK),
+            (-0.5, -0.0, Status::OK),
+            (0.9, 1.0, Status::OK),
+            (-0.9, -1.0, Status::OK),
+            (1.1, 1.0, Status::OK),
+            (-1.1, -1.0, Status::OK),
+            (1.5, 2.0, Status::OK),
+            (-1.5, -2.0, Status::OK),
+            (1.9, 2.0, Status::OK),
+            (-1.9, -2.0, Status::OK),
+            (2.8, 3.0, Status::OK),
+            (-2.8, -3.0, Status::OK),
+        ];
+        spec_test::<f64>(&cases);
     }
 
     #[test]
-    fn sanity_check_f64() {
-        assert_biteq!(rint(-1.0_f64), -1.0);
-        assert_biteq!(rint(2.8_f64), 3.0);
-        assert_biteq!(rint(-0.5_f64), -0.0);
-        assert_biteq!(rint(0.5_f64), 0.0);
-        assert_biteq!(rint(-1.5_f64), -2.0);
-        assert_biteq!(rint(1.5_f64), 2.0);
+    #[cfg(f128_enabled)]
+    fn spec_tests_f128() {
+        let cases = [];
+        spec_test::<f128>(&cases);
     }
 }
diff --git a/src/math/mod.rs b/src/math/mod.rs
index 4e75292a6..e58d79adc 100644
--- a/src/math/mod.rs
+++ b/src/math/mod.rs
@@ -207,8 +207,8 @@ mod remainderf;
 mod remquo;
 mod remquof;
 mod rint;
-mod rintf;
 mod round;
+mod roundeven;
 mod roundf;
 mod scalbn;
 mod scalbnf;
@@ -313,9 +313,9 @@ pub use self::remainder::remainder;
 pub use self::remainderf::remainderf;
 pub use self::remquo::remquo;
 pub use self::remquof::remquof;
-pub use self::rint::rint;
-pub use self::rintf::rintf;
+pub use self::rint::{rint, rintf};
 pub use self::round::round;
+pub use self::roundeven::{roundeven, roundevenf};
 pub use self::roundf::roundf;
 pub use self::scalbn::scalbn;
 pub use self::scalbnf::scalbnf;
@@ -346,7 +346,6 @@ cfg_if! {
         mod floorf16;
         mod fmodf16;
         mod ldexpf16;
-        mod rintf16;
         mod roundf16;
         mod scalbnf16;
         mod sqrtf16;
@@ -364,7 +363,8 @@ cfg_if! {
         pub use self::fminimum_fmaximum_num::{fmaximum_numf16, fminimum_numf16};
         pub use self::fmodf16::fmodf16;
         pub use self::ldexpf16::ldexpf16;
-        pub use self::rintf16::rintf16;
+        pub use self::rint::rintf16;
+        pub use self::roundeven::roundevenf16;
         pub use self::roundf16::roundf16;
         pub use self::scalbnf16::scalbnf16;
         pub use self::sqrtf16::sqrtf16;
@@ -384,7 +384,6 @@ cfg_if! {
         mod fmaf128;
         mod fmodf128;
         mod ldexpf128;
-        mod rintf128;
         mod roundf128;
         mod scalbnf128;
         mod sqrtf128;
@@ -403,7 +402,8 @@ cfg_if! {
         pub use self::fminimum_fmaximum_num::{fmaximum_numf128, fminimum_numf128};
         pub use self::fmodf128::fmodf128;
         pub use self::ldexpf128::ldexpf128;
-        pub use self::rintf128::rintf128;
+        pub use self::rint::rintf128;
+        pub use self::roundeven::roundevenf128;
         pub use self::roundf128::roundf128;
         pub use self::scalbnf128::scalbnf128;
         pub use self::sqrtf128::sqrtf128;
diff --git a/src/math/rint.rs b/src/math/rint.rs
index f409ec282..8a5cbeab4 100644
--- a/src/math/rint.rs
+++ b/src/math/rint.rs
@@ -1,3 +1,27 @@
+use super::support::Round;
+
+/// Round `x` to the nearest integer, breaking ties toward even.
+#[cfg(f16_enabled)]
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn rintf16(x: f16) -> f16 {
+    super::generic::rint_round(x, Round::Nearest).val
+}
+
+/// Round `x` to the nearest integer, breaking ties toward even.
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn rintf(x: f32) -> f32 {
+    select_implementation! {
+        name: rintf,
+        use_arch: any(
+            all(target_arch = "wasm32", intrinsics_enabled),
+            all(target_arch = "aarch64", target_feature = "neon", target_endian = "little"),
+        ),
+        args: x,
+    }
+
+    super::generic::rint_round(x, Round::Nearest).val
+}
+
 /// Round `x` to the nearest integer, breaking ties toward even.
 #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
 pub fn rint(x: f64) -> f64 {
@@ -10,5 +34,12 @@ pub fn rint(x: f64) -> f64 {
         args: x,
     }
 
-    super::generic::rint(x)
+    super::generic::rint_round(x, Round::Nearest).val
+}
+
+/// Round `x` to the nearest integer, breaking ties toward even.
+#[cfg(f128_enabled)]
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn rintf128(x: f128) -> f128 {
+    super::generic::rint_round(x, Round::Nearest).val
 }
diff --git a/src/math/rintf.rs b/src/math/rintf.rs
deleted file mode 100644
index 5e9f5f718..000000000
--- a/src/math/rintf.rs
+++ /dev/null
@@ -1,14 +0,0 @@
-/// Round `x` to the nearest integer, breaking ties toward even.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
-pub fn rintf(x: f32) -> f32 {
-    select_implementation! {
-        name: rintf,
-        use_arch: any(
-            all(target_arch = "wasm32", intrinsics_enabled),
-            all(target_arch = "aarch64", target_feature = "neon", target_endian = "little"),
-        ),
-        args: x,
-    }
-
-    super::generic::rint(x)
-}
diff --git a/src/math/rintf128.rs b/src/math/rintf128.rs
deleted file mode 100644
index 6b16fcd84..000000000
--- a/src/math/rintf128.rs
+++ /dev/null
@@ -1,5 +0,0 @@
-/// Round `x` to the nearest integer, breaking ties toward even.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
-pub fn rintf128(x: f128) -> f128 {
-    super::generic::rint(x)
-}
diff --git a/src/math/rintf16.rs b/src/math/rintf16.rs
deleted file mode 100644
index 84d792561..000000000
--- a/src/math/rintf16.rs
+++ /dev/null
@@ -1,5 +0,0 @@
-/// Round `x` to the nearest integer, breaking ties toward even.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
-pub fn rintf16(x: f16) -> f16 {
-    super::generic::rint(x)
-}
diff --git a/src/math/roundeven.rs b/src/math/roundeven.rs
new file mode 100644
index 000000000..ec1738285
--- /dev/null
+++ b/src/math/roundeven.rs
@@ -0,0 +1,35 @@
+use super::support::{Float, Round};
+
+/// Round `x` to the nearest integer, breaking ties toward even. This is IEEE 754
+/// `roundToIntegralTiesToEven`.
+#[cfg(f16_enabled)]
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn roundevenf16(x: f16) -> f16 {
+    roundeven_impl(x)
+}
+
+/// Round `x` to the nearest integer, breaking ties toward even. This is IEEE 754
+/// `roundToIntegralTiesToEven`.
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn roundevenf(x: f32) -> f32 {
+    roundeven_impl(x)
+}
+
+/// Round `x` to the nearest integer, breaking ties toward even. This is IEEE 754
+/// `roundToIntegralTiesToEven`.
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn roundeven(x: f64) -> f64 {
+    roundeven_impl(x)
+}
+
+/// Round `x` to the nearest integer, breaking ties toward even. This is IEEE 754
+/// `roundToIntegralTiesToEven`.
+#[cfg(f128_enabled)]
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn roundevenf128(x: f128) -> f128 {
+    roundeven_impl(x)
+}
+
+pub fn roundeven_impl<F: Float>(x: F) -> F {
+    super::generic::rint_round(x, Round::Nearest).val
+}

From 8f2a9ae6abc7cd4bd53533c06133f2ba6a778b58 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Tue, 11 Feb 2025 07:45:14 +0000
Subject: [PATCH 242/279] Check exact values for specified cases

Inputs in `case_list` shouldn't hit xfails or increased ULP tolerance.
Ensure that overrides are skipped when testing against MPFR or a
specified value and that NaNs, if any, are checked bitwise.
---
 crates/libm-test/src/gen/case_list.rs | 10 ++++++++--
 crates/libm-test/src/test_traits.rs   | 14 +++++++++++++-
 2 files changed, 21 insertions(+), 3 deletions(-)

diff --git a/crates/libm-test/src/gen/case_list.rs b/crates/libm-test/src/gen/case_list.rs
index 8c7a735fa..7cb9897d8 100644
--- a/crates/libm-test/src/gen/case_list.rs
+++ b/crates/libm-test/src/gen/case_list.rs
@@ -579,8 +579,11 @@ fn rint_cases() -> Vec<TestCase<op::rint::Routine>> {
     TestCase::append_pairs(
         &mut v,
         &[
-            // Failure on i586
+            // Known failure on i586
+            #[cfg(not(x86_no_sse))]
             ((hf64!("-0x1.e3f13ff995ffcp+38"),), Some(hf64!("-0x1.e3f13ff994000p+38"))),
+            #[cfg(x86_no_sse)]
+            ((hf64!("-0x1.e3f13ff995ffcp+38"),), Some(hf64!("-0x1.e3f13ff998000p+38"))),
         ],
     );
     v
@@ -628,8 +631,11 @@ fn roundeven_cases() -> Vec<TestCase<op::roundeven::Routine>> {
     TestCase::append_pairs(
         &mut v,
         &[
-            // Failure on i586
+            // Known failure on i586
+            #[cfg(not(x86_no_sse))]
             ((hf64!("-0x1.e3f13ff995ffcp+38"),), Some(hf64!("-0x1.e3f13ff994000p+38"))),
+            #[cfg(x86_no_sse)]
+            ((hf64!("-0x1.e3f13ff995ffcp+38"),), Some(hf64!("-0x1.e3f13ff998000p+38"))),
         ],
     );
     v
diff --git a/crates/libm-test/src/test_traits.rs b/crates/libm-test/src/test_traits.rs
index 1bd5bce16..bba1fca64 100644
--- a/crates/libm-test/src/test_traits.rs
+++ b/crates/libm-test/src/test_traits.rs
@@ -12,7 +12,9 @@ use anyhow::{Context, anyhow, bail, ensure};
 use libm::support::Hexf;
 
 use crate::precision::CheckAction;
-use crate::{CheckCtx, Float, Int, MaybeOverride, SpecialCase, TestResult};
+use crate::{
+    CheckBasis, CheckCtx, Float, GeneratorKind, Int, MaybeOverride, SpecialCase, TestResult,
+};
 
 /// Trait for calling a function with a tuple as arguments.
 ///
@@ -207,6 +209,8 @@ where
     SpecialCase: MaybeOverride<Input>,
 {
     let (result, xfail_msg) = match SpecialCase::check_int(input, actual, expected, ctx) {
+        // `require_biteq` forbids overrides.
+        _ if ctx.gen_kind == GeneratorKind::List => (actual == expected, None),
         CheckAction::AssertSuccess => (actual == expected, None),
         CheckAction::AssertFailure(msg) => (actual != expected, Some(msg)),
         CheckAction::Custom(res) => return res,
@@ -291,7 +295,12 @@ where
     let mut inner = || -> TestResult {
         let mut allowed_ulp = ctx.ulp;
 
+        // Forbid overrides if the items came from an explicit list, as long as we are checking
+        // against either MPFR or the result itself.
+        let require_biteq = ctx.gen_kind == GeneratorKind::List && ctx.basis != CheckBasis::Musl;
+
         match SpecialCase::check_float(input, actual, expected, ctx) {
+            _ if require_biteq => (),
             CheckAction::AssertSuccess => (),
             CheckAction::AssertFailure(msg) => assert_failure_msg = Some(msg),
             CheckAction::Custom(res) => return res,
@@ -301,6 +310,9 @@ where
 
         // Check when both are NaNs
         if actual.is_nan() && expected.is_nan() {
+            if require_biteq && ctx.basis == CheckBasis::None {
+                ensure!(actual.to_bits() == expected.to_bits(), "mismatched NaN bitpatterns");
+            }
             // By default, NaNs have nothing special to check.
             return Ok(());
         } else if actual.is_nan() || expected.is_nan() {

From 6f0aae813c4138db05cfd29f8313d6a5066959a8 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Tue, 11 Feb 2025 15:40:17 +0000
Subject: [PATCH 243/279] Rename `Float::exp` to `Float::ex`

Our function to get the exponent conflicts with the inherent `exp`
function for `e^x`. Rename `exp` to `ex` to avoid confusion and usage
problems.
---
 etc/function-definitions.json    | 3 +--
 src/math/generic/fma.rs          | 6 +++---
 src/math/generic/fmod.rs         | 4 ++--
 src/math/generic/rint.rs         | 2 +-
 src/math/generic/sqrt.rs         | 2 +-
 src/math/support/float_traits.rs | 4 ++--
 6 files changed, 10 insertions(+), 11 deletions(-)

diff --git a/etc/function-definitions.json b/etc/function-definitions.json
index a47aaad57..63d9927ad 100644
--- a/etc/function-definitions.json
+++ b/etc/function-definitions.json
@@ -206,8 +206,7 @@
     },
     "exp": {
         "sources": [
-            "src/math/exp.rs",
-            "src/math/support/float_traits.rs"
+            "src/math/exp.rs"
         ],
         "type": "f64"
     },
diff --git a/src/math/generic/fma.rs b/src/math/generic/fma.rs
index 821aee090..cb1061cc3 100644
--- a/src/math/generic/fma.rs
+++ b/src/math/generic/fma.rs
@@ -249,7 +249,7 @@ where
     let xy: B = x.widen() * y.widen();
     let mut result: B = xy + z.widen();
     let mut ui: B::Int = result.to_bits();
-    let re = result.exp();
+    let re = result.ex();
     let zb: B = z.widen();
 
     let prec_diff = B::SIG_BITS - F::SIG_BITS;
@@ -318,7 +318,7 @@ impl<F: Float> Norm<F> {
 
     fn from_float(x: F) -> Self {
         let mut ix = x.to_bits();
-        let mut e = x.exp() as i32;
+        let mut e = x.ex() as i32;
         let neg = x.is_sign_negative();
         if e == 0 {
             // Normalize subnormals by multiplication
@@ -326,7 +326,7 @@ impl<F: Float> Norm<F> {
             let scale_f = F::from_parts(false, scale_i + F::EXP_BIAS, F::Int::ZERO);
             let scaled = x * scale_f;
             ix = scaled.to_bits();
-            e = scaled.exp() as i32;
+            e = scaled.ex() as i32;
             e = if e == 0 {
                 // If the exponent is still zero, the input was zero. Artifically set this value
                 // such that the final `e` will exceed `ZERO_INF_NAN`.
diff --git a/src/math/generic/fmod.rs b/src/math/generic/fmod.rs
index ca1cda383..c74b593d5 100644
--- a/src/math/generic/fmod.rs
+++ b/src/math/generic/fmod.rs
@@ -9,8 +9,8 @@ pub fn fmod<F: Float>(x: F, y: F) -> F {
     let one = F::Int::ONE;
     let mut ix = x.to_bits();
     let mut iy = y.to_bits();
-    let mut ex = x.exp().signed();
-    let mut ey = y.exp().signed();
+    let mut ex = x.ex().signed();
+    let mut ey = y.ex().signed();
     let sx = ix & F::SIGN_MASK;
 
     if iy << 1 == zero || y.is_nan() || ex == F::EXP_SAT as i32 {
diff --git a/src/math/generic/rint.rs b/src/math/generic/rint.rs
index 04e8f332f..2f8b2b365 100644
--- a/src/math/generic/rint.rs
+++ b/src/math/generic/rint.rs
@@ -8,7 +8,7 @@ use super::super::support::{FpResult, Round};
 /// applicable.
 pub fn rint_round<F: Float>(x: F, _round: Round) -> FpResult<F> {
     let toint = F::ONE / F::EPSILON;
-    let e = x.exp();
+    let e = x.ex();
     let positive = x.is_sign_positive();
 
     // On i386 `force_eval!` must be used to force rounding via storage to memory. Otherwise,
diff --git a/src/math/generic/sqrt.rs b/src/math/generic/sqrt.rs
index fdd612493..5918025bc 100644
--- a/src/math/generic/sqrt.rs
+++ b/src/math/generic/sqrt.rs
@@ -109,7 +109,7 @@ where
         ix = scaled.to_bits();
         match top {
             Exp::Shifted(ref mut v) => {
-                *v = scaled.exp();
+                *v = scaled.ex();
                 *v = (*v).wrapping_sub(F::SIG_BITS);
             }
             Exp::NoShift(()) => {
diff --git a/src/math/support/float_traits.rs b/src/math/support/float_traits.rs
index 42ce31484..534ca9a07 100644
--- a/src/math/support/float_traits.rs
+++ b/src/math/support/float_traits.rs
@@ -128,13 +128,13 @@ pub trait Float:
     }
 
     /// Returns the exponent, not adjusting for bias, not accounting for subnormals or zero.
-    fn exp(self) -> u32 {
+    fn ex(self) -> u32 {
         u32::cast_from(self.to_bits() >> Self::SIG_BITS) & Self::EXP_SAT
     }
 
     /// Extract the exponent and adjust it for bias, not accounting for subnormals or zero.
     fn exp_unbiased(self) -> i32 {
-        self.exp().signed() - (Self::EXP_BIAS as i32)
+        self.ex().signed() - (Self::EXP_BIAS as i32)
     }
 
     /// Returns the significand with no implicit bit (or the "fractional" part)

From a1950c7cee9a8aad9ff45586ecf0188557b48700 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Wed, 12 Feb 2025 03:48:20 +0000
Subject: [PATCH 244/279] Add a way to print inputs on failure

When there is a panic in an extensive test, tracing down where it came
from can be difficult since no information is provides (messeges are
e.g. "attempted to subtract with overflow"). Resolve this by calling the
functions within `panic::catch_unwind`, printing the input, and
continuing.
---
 crates/libm-test/src/op.rs                   |  6 ++++--
 crates/libm-test/src/test_traits.rs          | 19 ++++++++++++++++++-
 crates/libm-test/tests/compare_built_musl.rs |  2 +-
 crates/libm-test/tests/multiprecision.rs     |  2 +-
 crates/libm-test/tests/standalone.rs         |  2 +-
 crates/libm-test/tests/z_extensive/run.rs    |  2 +-
 crates/util/src/main.rs                      |  2 +-
 7 files changed, 27 insertions(+), 8 deletions(-)

diff --git a/crates/libm-test/src/op.rs b/crates/libm-test/src/op.rs
index 239c9a3e1..47d72ae58 100644
--- a/crates/libm-test/src/op.rs
+++ b/crates/libm-test/src/op.rs
@@ -14,6 +14,7 @@
 //!   level. `Op` is also used as the name for generic parameters since it is terse.
 
 use std::fmt;
+use std::panic::{RefUnwindSafe, UnwindSafe};
 
 pub use shared::{ALL_OPERATIONS, FloatTy, MathOpInfo, Ty};
 
@@ -64,7 +65,7 @@ pub trait MathOp {
     type CRet;
 
     /// The signature of the Rust function as a `fn(...) -> ...` type.
-    type RustFn: Copy;
+    type RustFn: Copy + UnwindSafe;
 
     /// Arguments passed to the Rust library function as a tuple.
     ///
@@ -72,7 +73,8 @@ pub trait MathOp {
     /// to the Rust function.
     type RustArgs: Copy
         + TupleCall<Self::RustFn, Output = Self::RustRet>
-        + TupleCall<Self::CFn, Output = Self::RustRet>;
+        + TupleCall<Self::CFn, Output = Self::RustRet>
+        + RefUnwindSafe;
 
     /// Type returned from the Rust function.
     type RustRet: CheckOutput<Self::RustArgs>;
diff --git a/crates/libm-test/src/test_traits.rs b/crates/libm-test/src/test_traits.rs
index bba1fca64..c560dade8 100644
--- a/crates/libm-test/src/test_traits.rs
+++ b/crates/libm-test/src/test_traits.rs
@@ -6,7 +6,8 @@
 //! - `CheckOutput`: implemented on anything that is an output type for validation against an
 //!   expected value.
 
-use std::fmt;
+use std::panic::{RefUnwindSafe, UnwindSafe};
+use std::{fmt, panic};
 
 use anyhow::{Context, anyhow, bail, ensure};
 use libm::support::Hexf;
@@ -23,6 +24,22 @@ use crate::{
 pub trait TupleCall<Func>: fmt::Debug {
     type Output;
     fn call(self, f: Func) -> Self::Output;
+
+    /// Intercept panics and print the input to stderr before continuing.
+    fn call_intercept_panics(self, f: Func) -> Self::Output
+    where
+        Self: RefUnwindSafe + Copy,
+        Func: UnwindSafe,
+    {
+        let res = panic::catch_unwind(|| self.call(f));
+        match res {
+            Ok(v) => v,
+            Err(e) => {
+                eprintln!("panic with the following input: {self:?}");
+                panic::resume_unwind(e)
+            }
+        }
+    }
 }
 
 /// A trait to implement on any output type so we can verify it in a generic way.
diff --git a/crates/libm-test/tests/compare_built_musl.rs b/crates/libm-test/tests/compare_built_musl.rs
index 2b16b9aa0..897dfc26e 100644
--- a/crates/libm-test/tests/compare_built_musl.rs
+++ b/crates/libm-test/tests/compare_built_musl.rs
@@ -21,7 +21,7 @@ fn musl_runner<Op: MathOp>(
 ) {
     for input in cases {
         let musl_res = input.call(musl_fn);
-        let crate_res = input.call(Op::ROUTINE);
+        let crate_res = input.call_intercept_panics(Op::ROUTINE);
 
         crate_res.validate(musl_res, input, ctx).unwrap();
     }
diff --git a/crates/libm-test/tests/multiprecision.rs b/crates/libm-test/tests/multiprecision.rs
index fd1f11610..0ab4b64da 100644
--- a/crates/libm-test/tests/multiprecision.rs
+++ b/crates/libm-test/tests/multiprecision.rs
@@ -12,7 +12,7 @@ fn mp_runner<Op: MathOp + MpOp>(ctx: &CheckCtx, cases: impl Iterator<Item = Op::
     let mut mp_vals = Op::new_mp();
     for input in cases {
         let mp_res = Op::run(&mut mp_vals, input);
-        let crate_res = input.call(Op::ROUTINE);
+        let crate_res = input.call_intercept_panics(Op::ROUTINE);
 
         crate_res.validate(mp_res, input, ctx).unwrap();
     }
diff --git a/crates/libm-test/tests/standalone.rs b/crates/libm-test/tests/standalone.rs
index d6417acac..7d694843e 100644
--- a/crates/libm-test/tests/standalone.rs
+++ b/crates/libm-test/tests/standalone.rs
@@ -10,7 +10,7 @@ fn standalone_runner<Op: MathOp>(
     cases: impl Iterator<Item = (Op::RustArgs, Op::RustRet)>,
 ) {
     for (input, expected) in cases {
-        let crate_res = input.call(Op::ROUTINE);
+        let crate_res = input.call_intercept_panics(Op::ROUTINE);
         crate_res.validate(expected, input, ctx).unwrap();
     }
 }
diff --git a/crates/libm-test/tests/z_extensive/run.rs b/crates/libm-test/tests/z_extensive/run.rs
index a323c9110..786546a9d 100644
--- a/crates/libm-test/tests/z_extensive/run.rs
+++ b/crates/libm-test/tests/z_extensive/run.rs
@@ -113,7 +113,7 @@ where
         for input in input_vec {
             // Test the input.
             let mp_res = Op::run(mp_vals, input);
-            let crate_res = input.call(Op::ROUTINE);
+            let crate_res = input.call_intercept_panics(Op::ROUTINE);
             crate_res.validate(mp_res, input, ctx)?;
 
             let completed = completed.fetch_add(1, Ordering::Relaxed) + 1;
diff --git a/crates/util/src/main.rs b/crates/util/src/main.rs
index 130ac4531..ef70ec903 100644
--- a/crates/util/src/main.rs
+++ b/crates/util/src/main.rs
@@ -59,7 +59,7 @@ macro_rules! handle_call {
             let libm_fn: <Op as MathOp>::RustFn = libm::$fn_name;
 
             let output = match $basis {
-                "libm" => input.call(libm_fn),
+                "libm" => input.call_intercept_panics(libm_fn),
                 #[cfg(feature = "build-musl")]
                 "musl" => {
                     let musl_fn: <Op as MathOp>::CFn =

From 4fbd9b7e5993cbf6a9aab7937d6d5be595b54572 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Wed, 12 Feb 2025 09:25:16 +0000
Subject: [PATCH 245/279] Scale test iteration count at a later point

Currently the argument multiplier and large float multiplier happen
before selecting count based on generator. However, this means that
bivariate and trivariate functions don't get scaled at all (except for
the special cased fma).

Move this scaling to a later point.
---
 crates/libm-test/src/run_cfg.rs | 37 +++++++++++++++++++--------------
 1 file changed, 21 insertions(+), 16 deletions(-)

diff --git a/crates/libm-test/src/run_cfg.rs b/crates/libm-test/src/run_cfg.rs
index 6b2689976..8e4fff53c 100644
--- a/crates/libm-test/src/run_cfg.rs
+++ b/crates/libm-test/src/run_cfg.rs
@@ -23,8 +23,8 @@ static EXTENSIVE_ITER_OVERRIDE: LazyLock<Option<u64>> = LazyLock::new(|| {
 ///
 /// Contains the itentifier+generator combo to match on, plus the factor to reduce by.
 const EXTEMELY_SLOW_TESTS: &[(Identifier, GeneratorKind, u64)] = &[
-    (Identifier::Fmodf128, GeneratorKind::QuickSpaced, 40),
-    (Identifier::Fmodf128, GeneratorKind::Extensive, 40),
+    (Identifier::Fmodf128, GeneratorKind::QuickSpaced, 50),
+    (Identifier::Fmodf128, GeneratorKind::Extensive, 50),
 ];
 
 /// Maximum number of iterations to run for a single routine.
@@ -200,15 +200,6 @@ pub fn iteration_count(ctx: &CheckCtx, argnum: usize) -> u64 {
         domain_iter_count = 100_000;
     }
 
-    // Larger float types get more iterations.
-    if t_env.large_float_ty {
-        domain_iter_count *= 4;
-    }
-
-    // Functions with more arguments get more iterations.
-    let arg_multiplier = 1 << (t_env.input_count - 1);
-    domain_iter_count *= arg_multiplier;
-
     // If we will be running tests against MPFR, we don't need to test as much against musl.
     // However, there are some platforms where we have to test against musl since MPFR can't be
     // built.
@@ -228,6 +219,25 @@ pub fn iteration_count(ctx: &CheckCtx, argnum: usize) -> u64 {
         }
     };
 
+    // Larger float types get more iterations.
+    if t_env.large_float_ty && ctx.gen_kind != GeneratorKind::Extensive {
+        if ctx.gen_kind == GeneratorKind::Extensive {
+            // Extensive already has a pretty high test count.
+            total_iterations *= 2;
+        } else {
+            total_iterations *= 4;
+        }
+    }
+
+    // Functions with more arguments get more iterations.
+    let arg_multiplier = 1 << (t_env.input_count - 1);
+    total_iterations *= arg_multiplier;
+
+    // FMA has a huge domain but is reasonably fast to run, so increase another 1.5x.
+    if ctx.base_name == BaseName::Fma {
+        total_iterations = 3 * total_iterations / 2;
+    }
+
     // Some tests are significantly slower than others and need to be further reduced.
     if let Some((_id, _gen, scale)) = EXTEMELY_SLOW_TESTS
         .iter()
@@ -239,11 +249,6 @@ pub fn iteration_count(ctx: &CheckCtx, argnum: usize) -> u64 {
         }
     }
 
-    // FMA has a huge domain but is reasonably fast to run, so increase iterations.
-    if ctx.base_name == BaseName::Fma {
-        total_iterations *= 4;
-    }
-
     if cfg!(optimizations_enabled) {
         // Always run at least 10,000 tests.
         total_iterations = total_iterations.max(10_000);

From c0485a39ed72a4f8563d887f9b9be5945aecfb3a Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Wed, 12 Feb 2025 10:12:24 +0000
Subject: [PATCH 246/279] fma refactor 1/3: remove math/fma.rs

Done in stages so git tracks the moved file correctly.
---
 src/math/fma.rs | 40 ----------------------------------------
 1 file changed, 40 deletions(-)
 delete mode 100644 src/math/fma.rs

diff --git a/src/math/fma.rs b/src/math/fma.rs
deleted file mode 100644
index 69cc3eb67..000000000
--- a/src/math/fma.rs
+++ /dev/null
@@ -1,40 +0,0 @@
-/// Fused multiply add (f64)
-///
-/// Computes `(x*y)+z`, rounded as one ternary operation (i.e. calculated with infinite precision).
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
-pub fn fma(x: f64, y: f64, z: f64) -> f64 {
-    return super::generic::fma(x, y, z);
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    #[test]
-    fn fma_segfault() {
-        // These two inputs cause fma to segfault on release due to overflow:
-        assert_eq!(
-            fma(
-                -0.0000000000000002220446049250313,
-                -0.0000000000000002220446049250313,
-                -0.0000000000000002220446049250313
-            ),
-            -0.00000000000000022204460492503126,
-        );
-
-        let result = fma(-0.992, -0.992, -0.992);
-        //force rounding to storage format on x87 to prevent superious errors.
-        #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))]
-        let result = force_eval!(result);
-        assert_eq!(result, -0.007936000000000007,);
-    }
-
-    #[test]
-    fn fma_sbb() {
-        assert_eq!(fma(-(1.0 - f64::EPSILON), f64::MIN, f64::MIN), -3991680619069439e277);
-    }
-
-    #[test]
-    fn fma_underflow() {
-        assert_eq!(fma(1.1102230246251565e-16, -9.812526705433188e-305, 1.0894e-320), 0.0,);
-    }
-}

From 2bafb583b580f7e12cb0d19019f2876135e2aa31 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Wed, 12 Feb 2025 10:12:24 +0000
Subject: [PATCH 247/279] fma refactor 2/3: move math/generic/fma.rs to
 math/fma.rs

Done in stages so git tracks the moved file correctly.
---
 src/math/{generic => }/fma.rs | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename src/math/{generic => }/fma.rs (100%)

diff --git a/src/math/generic/fma.rs b/src/math/fma.rs
similarity index 100%
rename from src/math/generic/fma.rs
rename to src/math/fma.rs

From 4f9dddb891fa910fc3b7599bf1e59732ea6d81e4 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Wed, 12 Feb 2025 09:55:04 +0000
Subject: [PATCH 248/279] fma refactor 3/3: combine `fma` public API with its
 implementation

Similar to other recent changes, just put public API in the same file as
its generic implementation. To keep things slightly cleaner, split the
default implementation from the `_wide` implementation.

Also introduces a stub `fmaf16`.
---
 etc/function-definitions.json |   9 +--
 src/math/fma.rs               | 140 +++++++++++++---------------------
 src/math/fma_wide.rs          |  97 +++++++++++++++++++++++
 src/math/fmaf.rs              |  21 -----
 src/math/fmaf128.rs           |   7 --
 src/math/generic/mod.rs       |   2 -
 src/math/mod.rs               |  10 ++-
 7 files changed, 161 insertions(+), 125 deletions(-)
 create mode 100644 src/math/fma_wide.rs
 delete mode 100644 src/math/fmaf.rs
 delete mode 100644 src/math/fmaf128.rs

diff --git a/etc/function-definitions.json b/etc/function-definitions.json
index 63d9927ad..a966852b1 100644
--- a/etc/function-definitions.json
+++ b/etc/function-definitions.json
@@ -343,22 +343,19 @@
     },
     "fma": {
         "sources": [
-            "src/math/fma.rs",
-            "src/math/generic/fma.rs"
+            "src/math/fma.rs"
         ],
         "type": "f64"
     },
     "fmaf": {
         "sources": [
-            "src/math/fmaf.rs",
-            "src/math/generic/fma.rs"
+            "src/math/fma_wide.rs"
         ],
         "type": "f32"
     },
     "fmaf128": {
         "sources": [
-            "src/math/fmaf128.rs",
-            "src/math/generic/fma.rs"
+            "src/math/fma.rs"
         ],
         "type": "f128"
     },
diff --git a/src/math/fma.rs b/src/math/fma.rs
index cb1061cc3..a54984c93 100644
--- a/src/math/fma.rs
+++ b/src/math/fma.rs
@@ -1,23 +1,28 @@
 /* SPDX-License-Identifier: MIT */
-/* origin: musl src/math/{fma,fmaf}.c. Ported to generic Rust algorithm in 2025, TG. */
+/* origin: musl src/math/fma.c. Ported to generic Rust algorithm in 2025, TG. */
 
 use super::super::support::{DInt, FpResult, HInt, IntTy, Round, Status};
-use super::super::{CastFrom, CastInto, DFloat, Float, HFloat, Int, MinInt};
+use super::{CastFrom, CastInto, Float, Int, MinInt};
 
-/// Fused multiply-add that works when there is not a larger float size available. Currently this
-/// is still specialized only for `f64`. Computes `(x * y) + z`.
+/// Fused multiply add (f64)
+///
+/// Computes `(x*y)+z`, rounded as one ternary operation (i.e. calculated with infinite precision).
 #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
-pub fn fma<F>(x: F, y: F, z: F) -> F
-where
-    F: Float,
-    F: CastFrom<F::SignedInt>,
-    F: CastFrom<i8>,
-    F::Int: HInt,
-    u32: CastInto<F::Int>,
-{
+pub fn fma(x: f64, y: f64, z: f64) -> f64 {
+    fma_round(x, y, z, Round::Nearest).val
+}
+
+/// Fused multiply add (f128)
+///
+/// Computes `(x*y)+z`, rounded as one ternary operation (i.e. calculated with infinite precision).
+#[cfg(f128_enabled)]
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn fmaf128(x: f128, y: f128, z: f128) -> f128 {
     fma_round(x, y, z, Round::Nearest).val
 }
 
+/// Fused multiply-add that works when there is not a larger float size available. Computes
+/// `(x * y) + z`.
 pub fn fma_round<F>(x: F, y: F, z: F, _round: Round) -> FpResult<F>
 where
     F: Float,
@@ -222,79 +227,7 @@ where
     }
 
     // Use our exponent to scale the final value.
-    FpResult::new(super::scalbn(r, e), status)
-}
-
-/// Fma implementation when a hardware-backed larger float type is available. For `f32` and `f64`,
-/// `f64` has enough precision to represent the `f32` in its entirety, except for double rounding.
-pub fn fma_wide<F, B>(x: F, y: F, z: F) -> F
-where
-    F: Float + HFloat<D = B>,
-    B: Float + DFloat<H = F>,
-    B::Int: CastInto<i32>,
-    i32: CastFrom<i32>,
-{
-    fma_wide_round(x, y, z, Round::Nearest).val
-}
-
-pub fn fma_wide_round<F, B>(x: F, y: F, z: F, round: Round) -> FpResult<F>
-where
-    F: Float + HFloat<D = B>,
-    B: Float + DFloat<H = F>,
-    B::Int: CastInto<i32>,
-    i32: CastFrom<i32>,
-{
-    let one = IntTy::<B>::ONE;
-
-    let xy: B = x.widen() * y.widen();
-    let mut result: B = xy + z.widen();
-    let mut ui: B::Int = result.to_bits();
-    let re = result.ex();
-    let zb: B = z.widen();
-
-    let prec_diff = B::SIG_BITS - F::SIG_BITS;
-    let excess_prec = ui & ((one << prec_diff) - one);
-    let halfway = one << (prec_diff - 1);
-
-    // Common case: the larger precision is fine if...
-    // This is not a halfway case
-    if excess_prec != halfway
-        // Or the result is NaN
-        || re == B::EXP_SAT
-        // Or the result is exact
-        || (result - xy == zb && result - zb == xy)
-        // Or the mode is something other than round to nearest
-        || round != Round::Nearest
-    {
-        let min_inexact_exp = (B::EXP_BIAS as i32 + F::EXP_MIN_SUBNORM) as u32;
-        let max_inexact_exp = (B::EXP_BIAS as i32 + F::EXP_MIN) as u32;
-
-        let mut status = Status::OK;
-
-        if (min_inexact_exp..max_inexact_exp).contains(&re) && status.inexact() {
-            // This branch is never hit; requires previous operations to set a status
-            status.set_inexact(false);
-
-            result = xy + z.widen();
-            if status.inexact() {
-                status.set_underflow(true);
-            } else {
-                status.set_inexact(true);
-            }
-        }
-
-        return FpResult { val: result.narrow(), status };
-    }
-
-    let neg = ui >> (B::BITS - 1) != IntTy::<B>::ZERO;
-    let err = if neg == (zb > xy) { xy - result + zb } else { zb - result + xy };
-    if neg == (err < B::ZERO) {
-        ui += one;
-    } else {
-        ui -= one;
-    }
-
-    FpResult::ok(B::from_bits(ui).narrow())
+    FpResult::new(super::generic::scalbn(r, e), status)
 }
 
 /// Representation of `F` that has handled subnormals.
@@ -363,6 +296,7 @@ impl<F: Float> Norm<F> {
 mod tests {
     use super::*;
 
+    /// Test the generic `fma_round` algorithm for a given float.
     fn spec_test<F>()
     where
         F: Float,
@@ -375,6 +309,8 @@ mod tests {
         let y = F::from_bits(F::Int::ONE);
         let z = F::ZERO;
 
+        let fma = |x, y, z| fma_round(x, y, z, Round::Nearest).val;
+
         // 754-2020 says "When the exact result of (a × b) + c is non-zero yet the result of
         // fusedMultiplyAdd is zero because of rounding, the zero result takes the sign of the
         // exact result"
@@ -384,6 +320,11 @@ mod tests {
         assert_biteq!(fma(-x, -y, z), F::ZERO);
     }
 
+    #[test]
+    fn spec_test_f32() {
+        spec_test::<f32>();
+    }
+
     #[test]
     fn spec_test_f64() {
         spec_test::<f64>();
@@ -417,4 +358,33 @@ mod tests {
     fn spec_test_f128() {
         spec_test::<f128>();
     }
+
+    #[test]
+    fn fma_segfault() {
+        // These two inputs cause fma to segfault on release due to overflow:
+        assert_eq!(
+            fma(
+                -0.0000000000000002220446049250313,
+                -0.0000000000000002220446049250313,
+                -0.0000000000000002220446049250313
+            ),
+            -0.00000000000000022204460492503126,
+        );
+
+        let result = fma(-0.992, -0.992, -0.992);
+        //force rounding to storage format on x87 to prevent superious errors.
+        #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))]
+        let result = force_eval!(result);
+        assert_eq!(result, -0.007936000000000007,);
+    }
+
+    #[test]
+    fn fma_sbb() {
+        assert_eq!(fma(-(1.0 - f64::EPSILON), f64::MIN, f64::MIN), -3991680619069439e277);
+    }
+
+    #[test]
+    fn fma_underflow() {
+        assert_eq!(fma(1.1102230246251565e-16, -9.812526705433188e-305, 1.0894e-320), 0.0,);
+    }
 }
diff --git a/src/math/fma_wide.rs b/src/math/fma_wide.rs
new file mode 100644
index 000000000..a8c1a5488
--- /dev/null
+++ b/src/math/fma_wide.rs
@@ -0,0 +1,97 @@
+/* SPDX-License-Identifier: MIT */
+/* origin: musl src/math/fmaf.c. Ported to generic Rust algorithm in 2025, TG. */
+
+use super::super::support::{FpResult, IntTy, Round, Status};
+use super::{CastFrom, CastInto, DFloat, Float, HFloat, MinInt};
+
+// Placeholder so we can have `fmaf16` in the `Float` trait.
+#[allow(unused)]
+#[cfg(f16_enabled)]
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub(crate) fn fmaf16(_x: f16, _y: f16, _z: f16) -> f16 {
+    unimplemented!()
+}
+
+/// Floating multiply add (f32)
+///
+/// Computes `(x*y)+z`, rounded as one ternary operation (i.e. calculated with infinite precision).
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn fmaf(x: f32, y: f32, z: f32) -> f32 {
+    fma_wide_round(x, y, z, Round::Nearest).val
+}
+
+/// Fma implementation when a hardware-backed larger float type is available. For `f32` and `f64`,
+/// `f64` has enough precision to represent the `f32` in its entirety, except for double rounding.
+pub fn fma_wide_round<F, B>(x: F, y: F, z: F, round: Round) -> FpResult<F>
+where
+    F: Float + HFloat<D = B>,
+    B: Float + DFloat<H = F>,
+    B::Int: CastInto<i32>,
+    i32: CastFrom<i32>,
+{
+    let one = IntTy::<B>::ONE;
+
+    let xy: B = x.widen() * y.widen();
+    let mut result: B = xy + z.widen();
+    let mut ui: B::Int = result.to_bits();
+    let re = result.ex();
+    let zb: B = z.widen();
+
+    let prec_diff = B::SIG_BITS - F::SIG_BITS;
+    let excess_prec = ui & ((one << prec_diff) - one);
+    let halfway = one << (prec_diff - 1);
+
+    // Common case: the larger precision is fine if...
+    // This is not a halfway case
+    if excess_prec != halfway
+        // Or the result is NaN
+        || re == B::EXP_SAT
+        // Or the result is exact
+        || (result - xy == zb && result - zb == xy)
+        // Or the mode is something other than round to nearest
+        || round != Round::Nearest
+    {
+        let min_inexact_exp = (B::EXP_BIAS as i32 + F::EXP_MIN_SUBNORM) as u32;
+        let max_inexact_exp = (B::EXP_BIAS as i32 + F::EXP_MIN) as u32;
+
+        let mut status = Status::OK;
+
+        if (min_inexact_exp..max_inexact_exp).contains(&re) && status.inexact() {
+            // This branch is never hit; requires previous operations to set a status
+            status.set_inexact(false);
+
+            result = xy + z.widen();
+            if status.inexact() {
+                status.set_underflow(true);
+            } else {
+                status.set_inexact(true);
+            }
+        }
+
+        return FpResult { val: result.narrow(), status };
+    }
+
+    let neg = ui >> (B::BITS - 1) != IntTy::<B>::ZERO;
+    let err = if neg == (zb > xy) { xy - result + zb } else { zb - result + xy };
+    if neg == (err < B::ZERO) {
+        ui += one;
+    } else {
+        ui -= one;
+    }
+
+    FpResult::ok(B::from_bits(ui).narrow())
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn issue_263() {
+        let a = f32::from_bits(1266679807);
+        let b = f32::from_bits(1300234242);
+        let c = f32::from_bits(1115553792);
+        let expected = f32::from_bits(1501560833);
+        assert_eq!(fmaf(a, b, c), expected);
+    }
+}
diff --git a/src/math/fmaf.rs b/src/math/fmaf.rs
deleted file mode 100644
index 40d7f40d6..000000000
--- a/src/math/fmaf.rs
+++ /dev/null
@@ -1,21 +0,0 @@
-/// Floating multiply add (f32)
-///
-/// Computes `(x*y)+z`, rounded as one ternary operation:
-/// Computes the value (as if) to infinite precision and rounds once to the result format,
-/// according to the rounding mode characterized by the value of FLT_ROUNDS.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
-pub fn fmaf(x: f32, y: f32, z: f32) -> f32 {
-    super::generic::fma_wide(x, y, z)
-}
-
-#[cfg(test)]
-mod tests {
-    #[test]
-    fn issue_263() {
-        let a = f32::from_bits(1266679807);
-        let b = f32::from_bits(1300234242);
-        let c = f32::from_bits(1115553792);
-        let expected = f32::from_bits(1501560833);
-        assert_eq!(super::fmaf(a, b, c), expected);
-    }
-}
diff --git a/src/math/fmaf128.rs b/src/math/fmaf128.rs
deleted file mode 100644
index 50f7360de..000000000
--- a/src/math/fmaf128.rs
+++ /dev/null
@@ -1,7 +0,0 @@
-/// Fused multiply add (f128)
-///
-/// Computes `(x*y)+z`, rounded as one ternary operation (i.e. calculated with infinite precision).
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
-pub fn fmaf128(x: f128, y: f128, z: f128) -> f128 {
-    return super::generic::fma(x, y, z);
-}
diff --git a/src/math/generic/mod.rs b/src/math/generic/mod.rs
index f224eba73..9be185f80 100644
--- a/src/math/generic/mod.rs
+++ b/src/math/generic/mod.rs
@@ -3,7 +3,6 @@ mod copysign;
 mod fabs;
 mod fdim;
 mod floor;
-mod fma;
 mod fmax;
 mod fmaximum;
 mod fmaximum_num;
@@ -22,7 +21,6 @@ pub use copysign::copysign;
 pub use fabs::fabs;
 pub use fdim::fdim;
 pub use floor::floor;
-pub use fma::{fma, fma_wide};
 pub use fmax::fmax;
 pub use fmaximum::fmaximum;
 pub use fmaximum_num::fmaximum_num;
diff --git a/src/math/mod.rs b/src/math/mod.rs
index e58d79adc..5fc8fa0b3 100644
--- a/src/math/mod.rs
+++ b/src/math/mod.rs
@@ -164,7 +164,7 @@ mod fdimf;
 mod floor;
 mod floorf;
 mod fma;
-mod fmaf;
+mod fma_wide;
 mod fmin_fmax;
 mod fminimum_fmaximum;
 mod fminimum_fmaximum_num;
@@ -271,7 +271,7 @@ pub use self::fdimf::fdimf;
 pub use self::floor::floor;
 pub use self::floorf::floorf;
 pub use self::fma::fma;
-pub use self::fmaf::fmaf;
+pub use self::fma_wide::fmaf;
 pub use self::fmin_fmax::{fmax, fmaxf, fmin, fminf};
 pub use self::fminimum_fmaximum::{fmaximum, fmaximumf, fminimum, fminimumf};
 pub use self::fminimum_fmaximum_num::{fmaximum_num, fmaximum_numf, fminimum_num, fminimum_numf};
@@ -370,6 +370,9 @@ cfg_if! {
         pub use self::sqrtf16::sqrtf16;
         pub use self::truncf16::truncf16;
         // verify-sorted-end
+
+        #[allow(unused_imports)]
+        pub(crate) use self::fma_wide::fmaf16;
     }
 }
 
@@ -381,7 +384,6 @@ cfg_if! {
         mod fabsf128;
         mod fdimf128;
         mod floorf128;
-        mod fmaf128;
         mod fmodf128;
         mod ldexpf128;
         mod roundf128;
@@ -396,7 +398,7 @@ cfg_if! {
         pub use self::fabsf128::fabsf128;
         pub use self::fdimf128::fdimf128;
         pub use self::floorf128::floorf128;
-        pub use self::fmaf128::fmaf128;
+        pub use self::fma::fmaf128;
         pub use self::fmin_fmax::{fmaxf128, fminf128};
         pub use self::fminimum_fmaximum::{fmaximumf128, fminimumf128};
         pub use self::fminimum_fmaximum_num::{fmaximum_numf128, fminimum_numf128};

From bfbeb10fbccc86e363f0edcb9db6ff85271aee64 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Wed, 12 Feb 2025 10:16:48 +0000
Subject: [PATCH 249/279] Make `fma` a trait method on `Float`

---
 crates/libm-test/src/f8_impl.rs  |  4 ++++
 etc/function-definitions.json    |  3 +--
 etc/update-api-list.py           |  2 +-
 src/math/cbrt.rs                 | 20 ++++----------------
 src/math/support/float_traits.rs | 26 ++++++++++++++++++++------
 5 files changed, 30 insertions(+), 25 deletions(-)

diff --git a/crates/libm-test/src/f8_impl.rs b/crates/libm-test/src/f8_impl.rs
index 56ea0b729..0683d8392 100644
--- a/crates/libm-test/src/f8_impl.rs
+++ b/crates/libm-test/src/f8_impl.rs
@@ -78,6 +78,10 @@ impl Float for f8 {
         libm::generic::copysign(self, other)
     }
 
+    fn fma(self, _y: Self, _z: Self) -> Self {
+        unimplemented!()
+    }
+
     fn normalize(_significand: Self::Int) -> (i32, Self::Int) {
         unimplemented!()
     }
diff --git a/etc/function-definitions.json b/etc/function-definitions.json
index a966852b1..64a775ba9 100644
--- a/etc/function-definitions.json
+++ b/etc/function-definitions.json
@@ -130,8 +130,7 @@
     "copysign": {
         "sources": [
             "src/math/copysign.rs",
-            "src/math/generic/copysign.rs",
-            "src/math/support/float_traits.rs"
+            "src/math/generic/copysign.rs"
         ],
         "type": "f64"
     },
diff --git a/etc/update-api-list.py b/etc/update-api-list.py
index c0b6e41d3..67d1b0508 100755
--- a/etc/update-api-list.py
+++ b/etc/update-api-list.py
@@ -24,7 +24,7 @@
 DIRECTORIES = [".github", "ci", "crates", "etc", "src"]
 
 # These files do not trigger a retest.
-IGNORED_SOURCES = ["src/libm_helper.rs"]
+IGNORED_SOURCES = ["src/libm_helper.rs", "src/math/support/float_traits.rs"]
 
 IndexTy: TypeAlias = dict[str, dict[str, Any]]
 """Type of the `index` item in rustdoc's JSON output"""
diff --git a/src/math/cbrt.rs b/src/math/cbrt.rs
index 8560d37ab..9d3311cd6 100644
--- a/src/math/cbrt.rs
+++ b/src/math/cbrt.rs
@@ -103,11 +103,11 @@ pub fn cbrt_round(x: f64, round: Round) -> FpResult<f64> {
      * and rr an approximation of 1/zz. We now perform another iteration of
      * Newton-Raphson, this time with a linear approximation only. */
     y2 = y * y;
-    let mut y2l: f64 = fmaf64(y, y, -y2);
+    let mut y2l: f64 = y.fma(y, -y2);
 
     /* y2 + y2l = y^2 exactly */
     let mut y3: f64 = y2 * y;
-    let mut y3l: f64 = fmaf64(y, y2, -y3) + y * y2l;
+    let mut y3l: f64 = y.fma(y2, -y3) + y * y2l;
 
     /* y3 + y3l approximates y^3 with about 106 bits of accuracy */
     h = ((y3 - zz) + y3l) * rr;
@@ -132,9 +132,9 @@ pub fn cbrt_round(x: f64, round: Round) -> FpResult<f64> {
         cold_path();
 
         y2 = y1 * y1;
-        y2l = fmaf64(y1, y1, -y2);
+        y2l = y1.fma(y1, -y2);
         y3 = y2 * y1;
-        y3l = fmaf64(y1, y2, -y3) + y1 * y2l;
+        y3l = y1.fma(y2, -y3) + y1 * y2l;
         h = ((y3 - zz) + y3l) * rr;
         dy = h * (y1 * u0);
         y = y1 - dy;
@@ -198,18 +198,6 @@ pub fn cbrt_round(x: f64, round: Round) -> FpResult<f64> {
     FpResult::ok(f64::from_bits(cvt3))
 }
 
-fn fmaf64(x: f64, y: f64, z: f64) -> f64 {
-    #[cfg(intrinsics_enabled)]
-    {
-        return unsafe { core::intrinsics::fmaf64(x, y, z) };
-    }
-
-    #[cfg(not(intrinsics_enabled))]
-    {
-        return super::fma(x, y, z);
-    }
-}
-
 #[cfg(test)]
 mod tests {
     use super::*;
diff --git a/src/math/support/float_traits.rs b/src/math/support/float_traits.rs
index 534ca9a07..96c209c85 100644
--- a/src/math/support/float_traits.rs
+++ b/src/math/support/float_traits.rs
@@ -160,9 +160,11 @@ pub trait Float:
     fn abs(self) -> Self;
 
     /// Returns a number composed of the magnitude of self and the sign of sign.
-    #[allow(dead_code)]
     fn copysign(self, other: Self) -> Self;
 
+    /// Fused multiply add, rounding once.
+    fn fma(self, y: Self, z: Self) -> Self;
+
     /// Returns (normalized exponent, normalized significand)
     #[allow(dead_code)]
     fn normalize(significand: Self::Int) -> (i32, Self::Int);
@@ -184,7 +186,9 @@ macro_rules! float_impl {
         $sity:ident,
         $bits:expr,
         $significand_bits:expr,
-        $from_bits:path
+        $from_bits:path,
+        $fma_fn:ident,
+        $fma_intrinsic:ident
     ) => {
         impl Float for $ty {
             type Int = $ity;
@@ -252,6 +256,16 @@ macro_rules! float_impl {
                     }
                 }
             }
+            fn fma(self, y: Self, z: Self) -> Self {
+                cfg_if! {
+                    // fma is not yet available in `core`
+                    if #[cfg(intrinsics_enabled)] {
+                        unsafe{ core::intrinsics::$fma_intrinsic(self, y, z) }
+                    } else {
+                        super::super::$fma_fn(self, y, z)
+                    }
+                }
+            }
             fn normalize(significand: Self::Int) -> (i32, Self::Int) {
                 let shift = significand.leading_zeros().wrapping_sub(Self::EXP_BITS);
                 (1i32.wrapping_sub(shift as i32), significand << shift as Self::Int)
@@ -261,11 +275,11 @@ macro_rules! float_impl {
 }
 
 #[cfg(f16_enabled)]
-float_impl!(f16, u16, i16, 16, 10, f16::from_bits);
-float_impl!(f32, u32, i32, 32, 23, f32_from_bits);
-float_impl!(f64, u64, i64, 64, 52, f64_from_bits);
+float_impl!(f16, u16, i16, 16, 10, f16::from_bits, fmaf16, fmaf16);
+float_impl!(f32, u32, i32, 32, 23, f32_from_bits, fmaf, fmaf32);
+float_impl!(f64, u64, i64, 64, 52, f64_from_bits, fma, fmaf64);
 #[cfg(f128_enabled)]
-float_impl!(f128, u128, i128, 128, 112, f128::from_bits);
+float_impl!(f128, u128, i128, 128, 112, f128::from_bits, fmaf128, fmaf128);
 
 /* FIXME(msrv): vendor some things that are not const stable at our MSRV */
 

From 5275b537d26f088a7e620010b54f5a5e359d03b8 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Wed, 12 Feb 2025 21:06:15 +0000
Subject: [PATCH 250/279] Use `git ls-files` rather than manually globbing for
 tidy

This avoids matching build directories, ignored files, and submodules.
---
 etc/update-api-list.py | 54 ++++++++++++++++++++----------------------
 1 file changed, 26 insertions(+), 28 deletions(-)

diff --git a/etc/update-api-list.py b/etc/update-api-list.py
index 67d1b0508..b4ce2c453 100755
--- a/etc/update-api-list.py
+++ b/etc/update-api-list.py
@@ -12,7 +12,7 @@
 import subprocess as sp
 import sys
 from dataclasses import dataclass
-from glob import glob, iglob
+from glob import glob
 from pathlib import Path
 from typing import Any, Callable, TypeAlias
 
@@ -20,9 +20,6 @@
 ETC_DIR = SELF_PATH.parent
 ROOT_DIR = ETC_DIR.parent
 
-# Loose approximation of what gets checked in to git, without needing `git ls-files`.
-DIRECTORIES = [".github", "ci", "crates", "etc", "src"]
-
 # These files do not trigger a retest.
 IGNORED_SOURCES = ["src/libm_helper.rs", "src/math/support/float_traits.rs"]
 
@@ -190,30 +187,31 @@ def tidy_lists(self) -> None:
         """In each file, check annotations indicating blocks of code should be sorted or should
         include all public API.
         """
-        for dirname in DIRECTORIES:
-            dir = ROOT_DIR.joinpath(dirname)
-            for fname in iglob("**", root_dir=dir, recursive=True):
-                fpath = dir.joinpath(fname)
-                if fpath.is_dir() or fpath == SELF_PATH:
-                    continue
-
-                lines = fpath.read_text().splitlines()
-
-                validate_delimited_block(
-                    fpath,
-                    lines,
-                    "verify-sorted-start",
-                    "verify-sorted-end",
-                    ensure_sorted,
-                )
-
-                validate_delimited_block(
-                    fpath,
-                    lines,
-                    "verify-apilist-start",
-                    "verify-apilist-end",
-                    lambda p, n, lines: self.ensure_contains_api(p, n, lines),
-                )
+
+        flist = sp.check_output(["git", "ls-files"], cwd=ROOT_DIR, text=True)
+
+        for path in flist.splitlines():
+            fpath = ROOT_DIR.joinpath(path)
+            if fpath.is_dir() or fpath == SELF_PATH:
+                continue
+
+            lines = fpath.read_text().splitlines()
+
+            validate_delimited_block(
+                fpath,
+                lines,
+                "verify-sorted-start",
+                "verify-sorted-end",
+                ensure_sorted,
+            )
+
+            validate_delimited_block(
+                fpath,
+                lines,
+                "verify-apilist-start",
+                "verify-apilist-end",
+                lambda p, n, lines: self.ensure_contains_api(p, n, lines),
+            )
 
     def ensure_contains_api(self, fpath: Path, line_num: int, lines: list[str]):
         """Given a list of strings, ensure that each public function we have is named

From f0e10b661e3adebb8acfe2c3229e9cc6ea1e3ae8 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Mon, 24 Feb 2025 01:23:37 +0000
Subject: [PATCH 251/279] ci: Pin the nightly toolchain for aarch64 jobs

Pin aarch64-unknown-linux-gnu and aarch64-apple-darwin to
nightly-2025-02-07 until [1] makes it to a Rust nightly.

[1]: https://github.com/llvm/llvm-project/issues/127804
---
 .github/workflows/main.yaml | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml
index f066f4a8c..e86f936f7 100644
--- a/.github/workflows/main.yaml
+++ b/.github/workflows/main.yaml
@@ -22,8 +22,12 @@ jobs:
         include:
         - target: aarch64-apple-darwin
           os: macos-15
+          # FIXME: pinned due to https://github.com/llvm/llvm-project/issues/127804
+          channel: nightly-2025-02-07
         - target: aarch64-unknown-linux-gnu
           os: ubuntu-24.04-arm
+          # FIXME: pinned due to https://github.com/llvm/llvm-project/issues/127804
+          channel: nightly-2025-02-07
         - target: aarch64-pc-windows-msvc
           os: windows-2025
           build_only: 1 # Can't run on x86 hosts

From 9a60739f6407e54f9899c43233bec368c0e97de7 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Mon, 24 Feb 2025 00:01:04 +0000
Subject: [PATCH 252/279] Make the compiler-builtins test more accurately
 mirror compiler-builtins

In `compiler-builtins`, `libm` is contained within a `math` module. The
smoke test in this repo has a slightly different layout so some things
were passing that shouldn't be.

Change module layouts in `compiler-builtins-smoke-test` to match
`compiler-builtins` and update a few instances of broken paths.
---
 .../compiler-builtins-smoke-test/src/lib.rs   | 185 +-----------------
 .../compiler-builtins-smoke-test/src/math.rs  | 182 +++++++++++++++++
 src/math/fma.rs                               |   2 +-
 src/math/fma_wide.rs                          |   2 +-
 4 files changed, 186 insertions(+), 185 deletions(-)
 create mode 100644 crates/compiler-builtins-smoke-test/src/math.rs

diff --git a/crates/compiler-builtins-smoke-test/src/lib.rs b/crates/compiler-builtins-smoke-test/src/lib.rs
index 77a4666a1..f9e6e75a8 100644
--- a/crates/compiler-builtins-smoke-test/src/lib.rs
+++ b/crates/compiler-builtins-smoke-test/src/lib.rs
@@ -10,187 +10,6 @@
 #![allow(internal_features)]
 #![no_std]
 
-#[allow(dead_code)]
-#[allow(clippy::all)] // We don't get `libm`'s list of `allow`s, so just ignore Clippy.
-#[path = "../../../src/math/mod.rs"]
-pub mod libm;
-
-use core::ffi::c_int;
-
+mod math;
 // Required for macro paths.
-use libm::support;
-
-/// Mark functions `#[no_mangle]` and with the C ABI.
-macro_rules! no_mangle {
-    ($( $name:ident( $($tt:tt)+ ) -> $ret:ty; )+) => {
-        $( no_mangle!(@inner $name( $($tt)+ ) -> $ret); )+
-    };
-
-    // Handle simple functions with single return types
-    (@inner $name:ident( $($arg:ident: $aty:ty),+ ) -> $ret:ty) => {
-        #[no_mangle]
-        extern "C" fn $name($($arg: $aty),+) -> $ret {
-            libm::$name($($arg),+)
-        }
-    };
-
-
-    // Functions with `&mut` return values need to be handled differently, use `|` to
-    // separate inputs vs. outputs.
-    (
-        @inner $name:ident( $($arg:ident: $aty:ty),+ | $($rarg:ident: $rty:ty),+) -> $ret:ty
-    ) => {
-        #[no_mangle]
-        extern "C" fn $name($($arg: $aty,)+ $($rarg: $rty),+) -> $ret {
-            let ret;
-            (ret, $(*$rarg),+) = libm::$name($($arg),+);
-            ret
-        }
-    };
-}
-
-no_mangle! {
-    frexp(x: f64 | y: &mut c_int) -> f64;
-    frexpf(x: f32 | y: &mut c_int) -> f32;
-    acos(x: f64) -> f64;
-    acosf(x: f32) -> f32;
-    acosh(x: f64) -> f64;
-    acoshf(x: f32) -> f32;
-    asin(x: f64) -> f64;
-    asinf(x: f32) -> f32;
-    asinh(x: f64) -> f64;
-    asinhf(x: f32) -> f32;
-    atan(x: f64) -> f64;
-    atan2(x: f64, y: f64) -> f64;
-    atan2f(x: f32, y: f32) -> f32;
-    atanf(x: f32) -> f32;
-    atanh(x: f64) -> f64;
-    atanhf(x: f32) -> f32;
-    cbrt(x: f64) -> f64;
-    cbrtf(x: f32) -> f32;
-    ceil(x: f64) -> f64;
-    ceilf(x: f32) -> f32;
-    ceilf128(x: f128) -> f128;
-    ceilf16(x: f16) -> f16;
-    copysign(x: f64, y: f64) -> f64;
-    copysignf(x: f32, y: f32) -> f32;
-    copysignf128(x: f128, y: f128) -> f128;
-    copysignf16(x: f16, y: f16) -> f16;
-    cos(x: f64) -> f64;
-    cosf(x: f32) -> f32;
-    cosh(x: f64) -> f64;
-    coshf(x: f32) -> f32;
-    erf(x: f64) -> f64;
-    erfc(x: f64) -> f64;
-    erfcf(x: f32) -> f32;
-    erff(x: f32) -> f32;
-    exp(x: f64) -> f64;
-    exp10(x: f64) -> f64;
-    exp10f(x: f32) -> f32;
-    exp2(x: f64) -> f64;
-    exp2f(x: f32) -> f32;
-    expf(x: f32) -> f32;
-    expm1(x: f64) -> f64;
-    expm1f(x: f32) -> f32;
-    fabs(x: f64) -> f64;
-    fabsf(x: f32) -> f32;
-    fabsf128(x: f128) -> f128;
-    fabsf16(x: f16) -> f16;
-    fdim(x: f64, y: f64) -> f64;
-    fdimf(x: f32, y: f32) -> f32;
-    fdimf128(x: f128, y: f128) -> f128;
-    fdimf16(x: f16, y: f16) -> f16;
-    floor(x: f64) -> f64;
-    floorf(x: f32) -> f32;
-    floorf128(x: f128) -> f128;
-    floorf16(x: f16) -> f16;
-    fma(x: f64, y: f64, z: f64) -> f64;
-    fmaf(x: f32, y: f32, z: f32) -> f32;
-    fmax(x: f64, y: f64) -> f64;
-    fmaxf(x: f32, y: f32) -> f32;
-    fmin(x: f64, y: f64) -> f64;
-    fminf(x: f32, y: f32) -> f32;
-    fmod(x: f64, y: f64) -> f64;
-    fmodf(x: f32, y: f32) -> f32;
-    hypot(x: f64, y: f64) -> f64;
-    hypotf(x: f32, y: f32) -> f32;
-    ilogb(x: f64) -> c_int;
-    ilogbf(x: f32) -> c_int;
-    j0(x: f64) -> f64;
-    j0f(x: f32) -> f32;
-    j1(x: f64) -> f64;
-    j1f(x: f32) -> f32;
-    jn(x: c_int, y: f64) -> f64;
-    jnf(x: c_int, y: f32) -> f32;
-    ldexp(x: f64, y: c_int) -> f64;
-    ldexpf(x: f32, y: c_int) -> f32;
-    lgamma(x: f64) -> f64;
-    lgamma_r(x: f64 | r: &mut c_int) -> f64;
-    lgammaf(x: f32) -> f32;
-    lgammaf_r(x: f32 | r: &mut c_int) -> f32;
-    log(x: f64) -> f64;
-    log10(x: f64) -> f64;
-    log10f(x: f32) -> f32;
-    log1p(x: f64) -> f64;
-    log1pf(x: f32) -> f32;
-    log2(x: f64) -> f64;
-    log2f(x: f32) -> f32;
-    logf(x: f32) -> f32;
-    modf(x: f64 | r: &mut f64) -> f64;
-    modff(x: f32 | r: &mut f32) -> f32;
-    nextafter(x: f64, y: f64) -> f64;
-    nextafterf(x: f32, y: f32) -> f32;
-    pow(x: f64, y: f64) -> f64;
-    powf(x: f32, y: f32) -> f32;
-    remainder(x: f64, y: f64) -> f64;
-    remainderf(x: f32, y: f32) -> f32;
-    remquo(x: f64, y: f64 | q: &mut c_int) -> f64;
-    remquof(x: f32, y: f32 | q: &mut c_int) -> f32;
-    rint(x: f64) -> f64;
-    rintf(x: f32) -> f32;
-    rintf128(x: f128) -> f128;
-    rintf16(x: f16) -> f16;
-    round(x: f64) -> f64;
-    roundf(x: f32) -> f32;
-    scalbn(x: f64, y: c_int) -> f64;
-    scalbnf(x: f32, y: c_int) -> f32;
-    sin(x: f64) -> f64;
-    sinf(x: f32) -> f32;
-    sinh(x: f64) -> f64;
-    sinhf(x: f32) -> f32;
-    sqrt(x: f64) -> f64;
-    sqrtf(x: f32) -> f32;
-    tan(x: f64) -> f64;
-    tanf(x: f32) -> f32;
-    tanh(x: f64) -> f64;
-    tanhf(x: f32) -> f32;
-    tgamma(x: f64) -> f64;
-    tgammaf(x: f32) -> f32;
-    trunc(x: f64) -> f64;
-    truncf(x: f32) -> f32;
-    truncf128(x: f128) -> f128;
-    truncf16(x: f16) -> f16;
-    y0(x: f64) -> f64;
-    y0f(x: f32) -> f32;
-    y1(x: f64) -> f64;
-    y1f(x: f32) -> f32;
-    yn(x: c_int, y: f64) -> f64;
-    ynf(x: c_int, y: f32) -> f32;
-}
-
-/* sincos has no direct return type, not worth handling in the macro */
-
-#[no_mangle]
-extern "C" fn sincos(x: f64, s: &mut f64, c: &mut f64) {
-    (*s, *c) = libm::sincos(x);
-}
-
-#[no_mangle]
-extern "C" fn sincosf(x: f32, s: &mut f32, c: &mut f32) {
-    (*s, *c) = libm::sincosf(x);
-}
-
-#[panic_handler]
-fn panic(_info: &core::panic::PanicInfo) -> ! {
-    loop {}
-}
+use math::libm::support;
diff --git a/crates/compiler-builtins-smoke-test/src/math.rs b/crates/compiler-builtins-smoke-test/src/math.rs
new file mode 100644
index 000000000..7e0146998
--- /dev/null
+++ b/crates/compiler-builtins-smoke-test/src/math.rs
@@ -0,0 +1,182 @@
+use core::ffi::c_int;
+
+#[allow(dead_code)]
+#[allow(clippy::all)] // We don't get `libm`'s list of `allow`s, so just ignore Clippy.
+#[allow(unused_imports)]
+#[path = "../../../src/math/mod.rs"]
+pub mod libm;
+
+/// Mark functions `#[no_mangle]` and with the C ABI.
+macro_rules! no_mangle {
+    ($( $name:ident( $($tt:tt)+ ) -> $ret:ty; )+) => {
+        $( no_mangle!(@inner $name( $($tt)+ ) -> $ret); )+
+    };
+
+    // Handle simple functions with single return types
+    (@inner $name:ident( $($arg:ident: $aty:ty),+ ) -> $ret:ty) => {
+        #[no_mangle]
+        extern "C" fn $name($($arg: $aty),+) -> $ret {
+            libm::$name($($arg),+)
+        }
+    };
+
+
+    // Functions with `&mut` return values need to be handled differently, use `|` to
+    // separate inputs vs. outputs.
+    (
+        @inner $name:ident( $($arg:ident: $aty:ty),+ | $($rarg:ident: $rty:ty),+) -> $ret:ty
+    ) => {
+        #[no_mangle]
+        extern "C" fn $name($($arg: $aty,)+ $($rarg: $rty),+) -> $ret {
+            let ret;
+            (ret, $(*$rarg),+) = libm::$name($($arg),+);
+            ret
+        }
+    };
+}
+
+no_mangle! {
+    frexp(x: f64 | y: &mut c_int) -> f64;
+    frexpf(x: f32 | y: &mut c_int) -> f32;
+    acos(x: f64) -> f64;
+    acosf(x: f32) -> f32;
+    acosh(x: f64) -> f64;
+    acoshf(x: f32) -> f32;
+    asin(x: f64) -> f64;
+    asinf(x: f32) -> f32;
+    asinh(x: f64) -> f64;
+    asinhf(x: f32) -> f32;
+    atan(x: f64) -> f64;
+    atan2(x: f64, y: f64) -> f64;
+    atan2f(x: f32, y: f32) -> f32;
+    atanf(x: f32) -> f32;
+    atanh(x: f64) -> f64;
+    atanhf(x: f32) -> f32;
+    cbrt(x: f64) -> f64;
+    cbrtf(x: f32) -> f32;
+    ceil(x: f64) -> f64;
+    ceilf(x: f32) -> f32;
+    ceilf128(x: f128) -> f128;
+    ceilf16(x: f16) -> f16;
+    copysign(x: f64, y: f64) -> f64;
+    copysignf(x: f32, y: f32) -> f32;
+    copysignf128(x: f128, y: f128) -> f128;
+    copysignf16(x: f16, y: f16) -> f16;
+    cos(x: f64) -> f64;
+    cosf(x: f32) -> f32;
+    cosh(x: f64) -> f64;
+    coshf(x: f32) -> f32;
+    erf(x: f64) -> f64;
+    erfc(x: f64) -> f64;
+    erfcf(x: f32) -> f32;
+    erff(x: f32) -> f32;
+    exp(x: f64) -> f64;
+    exp10(x: f64) -> f64;
+    exp10f(x: f32) -> f32;
+    exp2(x: f64) -> f64;
+    exp2f(x: f32) -> f32;
+    expf(x: f32) -> f32;
+    expm1(x: f64) -> f64;
+    expm1f(x: f32) -> f32;
+    fabs(x: f64) -> f64;
+    fabsf(x: f32) -> f32;
+    fabsf128(x: f128) -> f128;
+    fabsf16(x: f16) -> f16;
+    fdim(x: f64, y: f64) -> f64;
+    fdimf(x: f32, y: f32) -> f32;
+    fdimf128(x: f128, y: f128) -> f128;
+    fdimf16(x: f16, y: f16) -> f16;
+    floor(x: f64) -> f64;
+    floorf(x: f32) -> f32;
+    floorf128(x: f128) -> f128;
+    floorf16(x: f16) -> f16;
+    fma(x: f64, y: f64, z: f64) -> f64;
+    fmaf(x: f32, y: f32, z: f32) -> f32;
+    fmax(x: f64, y: f64) -> f64;
+    fmaxf(x: f32, y: f32) -> f32;
+    fmin(x: f64, y: f64) -> f64;
+    fminf(x: f32, y: f32) -> f32;
+    fmod(x: f64, y: f64) -> f64;
+    fmodf(x: f32, y: f32) -> f32;
+    hypot(x: f64, y: f64) -> f64;
+    hypotf(x: f32, y: f32) -> f32;
+    ilogb(x: f64) -> c_int;
+    ilogbf(x: f32) -> c_int;
+    j0(x: f64) -> f64;
+    j0f(x: f32) -> f32;
+    j1(x: f64) -> f64;
+    j1f(x: f32) -> f32;
+    jn(x: c_int, y: f64) -> f64;
+    jnf(x: c_int, y: f32) -> f32;
+    ldexp(x: f64, y: c_int) -> f64;
+    ldexpf(x: f32, y: c_int) -> f32;
+    lgamma(x: f64) -> f64;
+    lgamma_r(x: f64 | r: &mut c_int) -> f64;
+    lgammaf(x: f32) -> f32;
+    lgammaf_r(x: f32 | r: &mut c_int) -> f32;
+    log(x: f64) -> f64;
+    log10(x: f64) -> f64;
+    log10f(x: f32) -> f32;
+    log1p(x: f64) -> f64;
+    log1pf(x: f32) -> f32;
+    log2(x: f64) -> f64;
+    log2f(x: f32) -> f32;
+    logf(x: f32) -> f32;
+    modf(x: f64 | r: &mut f64) -> f64;
+    modff(x: f32 | r: &mut f32) -> f32;
+    nextafter(x: f64, y: f64) -> f64;
+    nextafterf(x: f32, y: f32) -> f32;
+    pow(x: f64, y: f64) -> f64;
+    powf(x: f32, y: f32) -> f32;
+    remainder(x: f64, y: f64) -> f64;
+    remainderf(x: f32, y: f32) -> f32;
+    remquo(x: f64, y: f64 | q: &mut c_int) -> f64;
+    remquof(x: f32, y: f32 | q: &mut c_int) -> f32;
+    rint(x: f64) -> f64;
+    rintf(x: f32) -> f32;
+    rintf128(x: f128) -> f128;
+    rintf16(x: f16) -> f16;
+    round(x: f64) -> f64;
+    roundf(x: f32) -> f32;
+    scalbn(x: f64, y: c_int) -> f64;
+    scalbnf(x: f32, y: c_int) -> f32;
+    sin(x: f64) -> f64;
+    sinf(x: f32) -> f32;
+    sinh(x: f64) -> f64;
+    sinhf(x: f32) -> f32;
+    sqrt(x: f64) -> f64;
+    sqrtf(x: f32) -> f32;
+    tan(x: f64) -> f64;
+    tanf(x: f32) -> f32;
+    tanh(x: f64) -> f64;
+    tanhf(x: f32) -> f32;
+    tgamma(x: f64) -> f64;
+    tgammaf(x: f32) -> f32;
+    trunc(x: f64) -> f64;
+    truncf(x: f32) -> f32;
+    truncf128(x: f128) -> f128;
+    truncf16(x: f16) -> f16;
+    y0(x: f64) -> f64;
+    y0f(x: f32) -> f32;
+    y1(x: f64) -> f64;
+    y1f(x: f32) -> f32;
+    yn(x: c_int, y: f64) -> f64;
+    ynf(x: c_int, y: f32) -> f32;
+}
+
+/* sincos has no direct return type, not worth handling in the macro */
+
+#[no_mangle]
+extern "C" fn sincos(x: f64, s: &mut f64, c: &mut f64) {
+    (*s, *c) = libm::sincos(x);
+}
+
+#[no_mangle]
+extern "C" fn sincosf(x: f32, s: &mut f32, c: &mut f32) {
+    (*s, *c) = libm::sincosf(x);
+}
+
+#[panic_handler]
+fn panic(_info: &core::panic::PanicInfo) -> ! {
+    loop {}
+}
diff --git a/src/math/fma.rs b/src/math/fma.rs
index a54984c93..049f573cc 100644
--- a/src/math/fma.rs
+++ b/src/math/fma.rs
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: MIT */
 /* origin: musl src/math/fma.c. Ported to generic Rust algorithm in 2025, TG. */
 
-use super::super::support::{DInt, FpResult, HInt, IntTy, Round, Status};
+use super::support::{DInt, FpResult, HInt, IntTy, Round, Status};
 use super::{CastFrom, CastInto, Float, Int, MinInt};
 
 /// Fused multiply add (f64)
diff --git a/src/math/fma_wide.rs b/src/math/fma_wide.rs
index a8c1a5488..d0cf33baf 100644
--- a/src/math/fma_wide.rs
+++ b/src/math/fma_wide.rs
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: MIT */
 /* origin: musl src/math/fmaf.c. Ported to generic Rust algorithm in 2025, TG. */
 
-use super::super::support::{FpResult, IntTy, Round, Status};
+use super::support::{FpResult, IntTy, Round, Status};
 use super::{CastFrom, CastInto, DFloat, Float, HFloat, MinInt};
 
 // Placeholder so we can have `fmaf16` in the `Float` trait.

From 8dbc1405149d1eb4413b1965f9f5326f27366d0c Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Mon, 24 Feb 2025 04:06:07 +0000
Subject: [PATCH 253/279] Resolve monomorphization errors in
 `compiler-builtins`

`compiler-builtins` is not allowed to call anything from `core`;
however, there are a couple of cases where we do so in `libm` for debug
output. Gate relevant locations behind the `compiler-builtins` Cargo
feature.
---
 Cargo.toml                                     |  7 +++++++
 crates/compiler-builtins-smoke-test/Cargo.toml |  1 +
 crates/compiler-builtins-smoke-test/src/lib.rs |  2 ++
 src/math/support/hex_float.rs                  | 16 ++++++++++++++--
 src/math/support/int_traits.rs                 |  1 +
 5 files changed, 25 insertions(+), 2 deletions(-)

diff --git a/Cargo.toml b/Cargo.toml
index f24f4423c..eb133dada 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -61,6 +61,13 @@ exclude = [
 [dev-dependencies]
 no-panic = "0.1.33"
 
+
+[lints.rust]
+unexpected_cfgs = { level = "warn", check-cfg = [
+  # compiler-builtins sets this feature, but we use it in `libm`
+  'cfg(feature, values("compiler-builtins"))',
+] }
+
 # The default release profile is unchanged.
 
 # Release mode with debug assertions
diff --git a/crates/compiler-builtins-smoke-test/Cargo.toml b/crates/compiler-builtins-smoke-test/Cargo.toml
index 24b33645e..6ef905ea7 100644
--- a/crates/compiler-builtins-smoke-test/Cargo.toml
+++ b/crates/compiler-builtins-smoke-test/Cargo.toml
@@ -22,6 +22,7 @@ unexpected_cfgs = { level = "warn", check-cfg = [
   "cfg(arch_enabled)",
   "cfg(assert_no_panic)",
   "cfg(intrinsics_enabled)",
+  'cfg(feature, values("compiler-builtins"))',
   'cfg(feature, values("force-soft-floats"))',
   'cfg(feature, values("unstable"))',
   'cfg(feature, values("unstable-intrinsics"))',
diff --git a/crates/compiler-builtins-smoke-test/src/lib.rs b/crates/compiler-builtins-smoke-test/src/lib.rs
index f9e6e75a8..e70f6d9e0 100644
--- a/crates/compiler-builtins-smoke-test/src/lib.rs
+++ b/crates/compiler-builtins-smoke-test/src/lib.rs
@@ -4,7 +4,9 @@
 //! Additionally, it provides a `#[no_mangle]` C API that can be easier to inspect than the
 //! default `.rlib`.
 
+#![compiler_builtins]
 #![feature(core_intrinsics)]
+#![feature(compiler_builtins)]
 #![feature(f16)]
 #![feature(f128)]
 #![allow(internal_features)]
diff --git a/src/math/support/hex_float.rs b/src/math/support/hex_float.rs
index 99ad8bec3..2155d5c58 100644
--- a/src/math/support/hex_float.rs
+++ b/src/math/support/hex_float.rs
@@ -246,7 +246,13 @@ fn fmt_any_hex<F: Float>(x: &F, f: &mut fmt::Formatter<'_>) -> fmt::Result {
 
 impl<F: Float> fmt::LowerHex for Hexf<F> {
     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        fmt_any_hex(&self.0, f)
+        cfg_if! {
+            if #[cfg(feature = "compiler-builtins")] {
+                unreachable!()
+            } else {
+                fmt_any_hex(&self.0, f)
+            }
+        }
     }
 }
 
@@ -264,7 +270,13 @@ impl<F: Float> fmt::LowerHex for Hexf<(F, i32)> {
 
 impl fmt::LowerHex for Hexf<i32> {
     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        fmt::LowerHex::fmt(&self.0, f)
+        cfg_if! {
+            if #[cfg(feature = "compiler-builtins")] {
+                unreachable!()
+            } else {
+                fmt::LowerHex::fmt(&self.0, f)
+            }
+        }
     }
 }
 
diff --git a/src/math/support/int_traits.rs b/src/math/support/int_traits.rs
index d34797764..f19c86835 100644
--- a/src/math/support/int_traits.rs
+++ b/src/math/support/int_traits.rs
@@ -418,6 +418,7 @@ macro_rules! cast_into_float {
     ($ty:ty; $($into:ty),*) => {$(
         impl CastInto<$into> for $ty {
             fn cast(self) -> $into {
+                #[cfg(not(feature = "compiler-builtins"))]
                 debug_assert_eq!(self as $into as $ty, self, "inexact float cast");
                 self as $into
             }

From aa6791f18618b55107ead3c654d3698503be7cb3 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Mon, 24 Feb 2025 04:55:38 +0000
Subject: [PATCH 254/279] Ignore unused variables when `compiler-builtins` is
 set

---
 crates/compiler-builtins-smoke-test/Cargo.toml | 6 +++---
 src/math/support/hex_float.rs                  | 6 ++++--
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/crates/compiler-builtins-smoke-test/Cargo.toml b/crates/compiler-builtins-smoke-test/Cargo.toml
index 6ef905ea7..38a511669 100644
--- a/crates/compiler-builtins-smoke-test/Cargo.toml
+++ b/crates/compiler-builtins-smoke-test/Cargo.toml
@@ -11,18 +11,18 @@ test = false
 bench = false
 
 [features]
-default = ["arch", "unstable-float"]
+default = ["arch", "compiler-builtins", "unstable-float"]
 
 # Copied from `libm`'s root `Cargo.toml`'
-unstable-float = []
 arch = []
+compiler-builtins = []
+unstable-float = []
 
 [lints.rust]
 unexpected_cfgs = { level = "warn", check-cfg = [
   "cfg(arch_enabled)",
   "cfg(assert_no_panic)",
   "cfg(intrinsics_enabled)",
-  'cfg(feature, values("compiler-builtins"))',
   'cfg(feature, values("force-soft-floats"))',
   'cfg(feature, values("unstable"))',
   'cfg(feature, values("unstable-intrinsics"))',
diff --git a/src/math/support/hex_float.rs b/src/math/support/hex_float.rs
index 2155d5c58..0ecf61695 100644
--- a/src/math/support/hex_float.rs
+++ b/src/math/support/hex_float.rs
@@ -248,7 +248,8 @@ impl<F: Float> fmt::LowerHex for Hexf<F> {
     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
         cfg_if! {
             if #[cfg(feature = "compiler-builtins")] {
-                unreachable!()
+                let _ = f;
+                unimplemented!()
             } else {
                 fmt_any_hex(&self.0, f)
             }
@@ -272,7 +273,8 @@ impl fmt::LowerHex for Hexf<i32> {
     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
         cfg_if! {
             if #[cfg(feature = "compiler-builtins")] {
-                unreachable!()
+                let _ = f;
+                unimplemented!()
             } else {
                 fmt::LowerHex::fmt(&self.0, f)
             }

From cb14a239d5f91f7cf18fedf4640794eea6bd0fda Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Mon, 24 Feb 2025 05:45:41 +0000
Subject: [PATCH 255/279] Configure out remaining formatting when
 `compiler-builtins` is set

These are still causing errors in the compiler-builtins CI.
---
 src/math/support/hex_float.rs | 42 +++++++++++++++++++++++++++++++----
 1 file changed, 38 insertions(+), 4 deletions(-)

diff --git a/src/math/support/hex_float.rs b/src/math/support/hex_float.rs
index 0ecf61695..be7d7607f 100644
--- a/src/math/support/hex_float.rs
+++ b/src/math/support/hex_float.rs
@@ -211,6 +211,7 @@ const fn u128_ilog2(v: u128) -> u32 {
 pub struct Hexf<F>(pub F);
 
 // Adapted from https://github.com/ericseppanen/hexfloat2/blob/a5c27932f0ff/src/format.rs
+#[cfg(not(feature = "compiler-builtins"))]
 fn fmt_any_hex<F: Float>(x: &F, f: &mut fmt::Formatter<'_>) -> fmt::Result {
     if x.is_sign_negative() {
         write!(f, "-")?;
@@ -244,6 +245,11 @@ fn fmt_any_hex<F: Float>(x: &F, f: &mut fmt::Formatter<'_>) -> fmt::Result {
     write!(f, "0x{leading}{sig:0mwidth$x}p{exponent:+}")
 }
 
+#[cfg(feature = "compiler-builtins")]
+fn fmt_any_hex<F: Float>(_x: &F, _f: &mut fmt::Formatter<'_>) -> fmt::Result {
+    unimplemented!()
+}
+
 impl<F: Float> fmt::LowerHex for Hexf<F> {
     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
         cfg_if! {
@@ -259,13 +265,27 @@ impl<F: Float> fmt::LowerHex for Hexf<F> {
 
 impl<F: Float> fmt::LowerHex for Hexf<(F, F)> {
     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        write!(f, "({:x}, {:x})", Hexf(self.0.0), Hexf(self.0.1))
+        cfg_if! {
+            if #[cfg(feature = "compiler-builtins")] {
+                let _ = f;
+                unimplemented!()
+            } else {
+                write!(f, "({:x}, {:x})", Hexf(self.0.0), Hexf(self.0.1))
+            }
+        }
     }
 }
 
 impl<F: Float> fmt::LowerHex for Hexf<(F, i32)> {
     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        write!(f, "({:x}, {:x})", Hexf(self.0.0), Hexf(self.0.1))
+        cfg_if! {
+            if #[cfg(feature = "compiler-builtins")] {
+                let _ = f;
+                unimplemented!()
+            } else {
+                write!(f, "({:x}, {:x})", Hexf(self.0.0), Hexf(self.0.1))
+            }
+        }
     }
 }
 
@@ -287,7 +307,14 @@ where
     Hexf<T>: fmt::LowerHex,
 {
     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        fmt::LowerHex::fmt(self, f)
+        cfg_if! {
+            if #[cfg(feature = "compiler-builtins")] {
+                let _ = f;
+                unimplemented!()
+            } else {
+                fmt::LowerHex::fmt(self, f)
+            }
+        }
     }
 }
 
@@ -296,7 +323,14 @@ where
     Hexf<T>: fmt::LowerHex,
 {
     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        fmt::LowerHex::fmt(self, f)
+        cfg_if! {
+            if #[cfg(feature = "compiler-builtins")] {
+                let _ = f;
+                unimplemented!()
+            } else {
+                fmt::LowerHex::fmt(self, f)
+            }
+        }
     }
 }
 

From 69219c491ee9f05761d2068fd6d4c7c0de6faa3a Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Mon, 24 Feb 2025 06:52:48 +0000
Subject: [PATCH 256/279] Gate another assertion behind `compiler-builtins`

This is causing link errors on Windows.
---
 src/math/support/int_traits.rs | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/math/support/int_traits.rs b/src/math/support/int_traits.rs
index f19c86835..491adb1f2 100644
--- a/src/math/support/int_traits.rs
+++ b/src/math/support/int_traits.rs
@@ -394,6 +394,7 @@ macro_rules! cast_into {
             fn cast(self) -> $into {
                 // All we can really do to enforce casting rules is check the rules when in
                 // debug mode.
+                #[cfg(not(feature = "compiler-builtins"))]
                 debug_assert!(<$into>::try_from(self).is_ok(), "failed cast from {self}");
                 self as $into
             }

From c9672e5a1a75bfa82981b6240b7bc3ed3524b8b3 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Tue, 18 Mar 2025 23:51:19 +0000
Subject: [PATCH 257/279] Upgrade all dependencies to the latest

This is mostly done to get the latest version of `rand`, which includes
some breaking changes.
---
 Cargo.toml                         |  3 +--
 crates/libm-macros/Cargo.toml      |  6 +++---
 crates/libm-test/Cargo.toml        | 14 +++++---------
 crates/libm-test/src/gen/random.rs | 10 +++++-----
 crates/libm-test/tests/u256.rs     | 10 +++++-----
 crates/musl-math-sys/Cargo.toml    |  2 +-
 6 files changed, 20 insertions(+), 25 deletions(-)

diff --git a/Cargo.toml b/Cargo.toml
index eb133dada..e0aeb07d5 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -59,8 +59,7 @@ exclude = [
 ]
 
 [dev-dependencies]
-no-panic = "0.1.33"
-
+no-panic = "0.1.35"
 
 [lints.rust]
 unexpected_cfgs = { level = "warn", check-cfg = [
diff --git a/crates/libm-macros/Cargo.toml b/crates/libm-macros/Cargo.toml
index f0de0e176..314f4ae37 100644
--- a/crates/libm-macros/Cargo.toml
+++ b/crates/libm-macros/Cargo.toml
@@ -9,9 +9,9 @@ proc-macro = true
 
 [dependencies]
 heck = "0.5.0"
-proc-macro2 = "1.0.93"
-quote = "1.0.38"
-syn = { version = "2.0.96", features = ["full", "extra-traits", "visit-mut"] }
+proc-macro2 = "1.0.94"
+quote = "1.0.40"
+syn = { version = "2.0.100", features = ["full", "extra-traits", "visit-mut"] }
 
 [lints.rust]
 # Values used during testing
diff --git a/crates/libm-test/Cargo.toml b/crates/libm-test/Cargo.toml
index dcbddb667..98da73cea 100644
--- a/crates/libm-test/Cargo.toml
+++ b/crates/libm-test/Cargo.toml
@@ -27,26 +27,22 @@ icount = ["dep:iai-callgrind"]
 short-benchmarks = []
 
 [dependencies]
-anyhow = "1.0.95"
+anyhow = "1.0.97"
 # This is not directly used but is required so we can enable `gmp-mpfr-sys/force-cross`.
 gmp-mpfr-sys = { version = "1.6.4", optional = true, default-features = false }
 iai-callgrind = { version = "0.14.0", optional = true }
-indicatif = { version = "0.17.9", default-features = false }
+indicatif = { version = "0.17.11", default-features = false }
 libm = { path = "../..", features = ["unstable-public-internals"] }
 libm-macros = { path = "../libm-macros" }
 musl-math-sys = { path = "../musl-math-sys", optional = true }
 paste = "1.0.15"
-rand = "0.8.5"
-rand_chacha = "0.3.1"
+rand = "0.9.0"
+rand_chacha = "0.9.0"
 rayon = "1.10.0"
 rug = { version = "1.27.0", optional = true, default-features = false, features = ["float", "integer", "std"] }
 
-[target.'cfg(target_family = "wasm")'.dependencies]
-# Enable randomness on WASM
-getrandom = { version = "0.2", features = ["js"] }
-
 [build-dependencies]
-rand = { version = "0.8.5", optional = true }
+rand = { version = "0.9.0", optional = true }
 
 [dev-dependencies]
 criterion = { version = "0.5.1", default-features = false, features = ["cargo_bench_support"] }
diff --git a/crates/libm-test/src/gen/random.rs b/crates/libm-test/src/gen/random.rs
index c2cd172d1..e8a7ee905 100644
--- a/crates/libm-test/src/gen/random.rs
+++ b/crates/libm-test/src/gen/random.rs
@@ -3,7 +3,7 @@ use std::ops::RangeInclusive;
 use std::sync::LazyLock;
 
 use libm::support::Float;
-use rand::distributions::{Alphanumeric, Standard};
+use rand::distr::{Alphanumeric, StandardUniform};
 use rand::prelude::Distribution;
 use rand::{Rng, SeedableRng};
 use rand_chacha::ChaCha8Rng;
@@ -16,7 +16,7 @@ pub(crate) const SEED_ENV: &str = "LIBM_SEED";
 
 pub static SEED: LazyLock<[u8; 32]> = LazyLock::new(|| {
     let s = env::var(SEED_ENV).unwrap_or_else(|_| {
-        let mut rng = rand::thread_rng();
+        let mut rng = rand::rng();
         (0..32).map(|_| rng.sample(Alphanumeric) as char).collect()
     });
 
@@ -33,19 +33,19 @@ pub trait RandomInput: Sized {
 /// Generate a sequence of deterministically random floats.
 fn random_floats<F: Float>(count: u64) -> impl Iterator<Item = F>
 where
-    Standard: Distribution<F::Int>,
+    StandardUniform: Distribution<F::Int>,
 {
     let mut rng = ChaCha8Rng::from_seed(*SEED);
 
     // Generate integers to get a full range of bitpatterns (including NaNs), then convert back
     // to the float type.
-    (0..count).map(move |_| F::from_bits(rng.gen::<F::Int>()))
+    (0..count).map(move |_| F::from_bits(rng.random::<F::Int>()))
 }
 
 /// Generate a sequence of deterministically random `i32`s within a specified range.
 fn random_ints(count: u64, range: RangeInclusive<i32>) -> impl Iterator<Item = i32> {
     let mut rng = ChaCha8Rng::from_seed(*SEED);
-    (0..count).map(move |_| rng.gen_range::<i32, _>(range.clone()))
+    (0..count).map(move |_| rng.random_range::<i32, _>(range.clone()))
 }
 
 macro_rules! impl_random_input {
diff --git a/crates/libm-test/tests/u256.rs b/crates/libm-test/tests/u256.rs
index 4174820c0..460353424 100644
--- a/crates/libm-test/tests/u256.rs
+++ b/crates/libm-test/tests/u256.rs
@@ -25,8 +25,8 @@ fn hexu(v: u256) -> String {
 }
 
 fn random_u256(rng: &mut ChaCha8Rng) -> u256 {
-    let lo: u128 = rng.gen();
-    let hi: u128 = rng.gen();
+    let lo: u128 = rng.random();
+    let hi: u128 = rng.random();
     u256 { lo, hi }
 }
 
@@ -121,7 +121,7 @@ fn mp_u256_shr() {
 
     for _ in 0..bigint_fuzz_iteration_count() {
         let x = random_u256(&mut rng);
-        let shift: u32 = rng.gen_range(0..255);
+        let shift: u32 = rng.random_range(0..255);
         assign_bigint(&mut bx, x);
         let actual = x >> shift;
         bx >>= shift;
@@ -136,8 +136,8 @@ fn mp_u256_widen_mul() {
     let mut by = BigInt::new();
 
     for _ in 0..bigint_fuzz_iteration_count() {
-        let x: u128 = rng.gen();
-        let y: u128 = rng.gen();
+        let x: u128 = rng.random();
+        let y: u128 = rng.random();
         bx.assign(x);
         by.assign(y);
         let actual = x.widen_mul(y);
diff --git a/crates/musl-math-sys/Cargo.toml b/crates/musl-math-sys/Cargo.toml
index cde78fd3c..34682b74c 100644
--- a/crates/musl-math-sys/Cargo.toml
+++ b/crates/musl-math-sys/Cargo.toml
@@ -10,4 +10,4 @@ publish = false
 libm = { path = "../../" }
 
 [build-dependencies]
-cc = "1.2.10"
+cc = "1.2.16"

From 63d069ae1f5a8e1d6182119dbb45f93cb5de7baf Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Wed, 9 Apr 2025 01:56:14 +0000
Subject: [PATCH 258/279] Replace calls to `core::arch` intrinsics with
 assembly

Some backends may replace calls to `core::arch` with multiple calls to
`sqrt` [1], which becomes recursive. Help mitigate this by replacing the
call with assembly.

Results in the same assembly as the current implementation when built
with optimizations.

[1]: https://github.com/rust-lang/compiler-builtins/issues/649
---
 src/math/arch/i686.rs | 35 ++++++++++++++++++++---------------
 1 file changed, 20 insertions(+), 15 deletions(-)

diff --git a/src/math/arch/i686.rs b/src/math/arch/i686.rs
index ad54d8b61..3e1d19bfa 100644
--- a/src/math/arch/i686.rs
+++ b/src/math/arch/i686.rs
@@ -1,22 +1,27 @@
 //! Architecture-specific support for x86-32 and x86-64 with SSE2
 
-#[cfg(target_arch = "x86")]
-use core::arch::x86::*;
-#[cfg(target_arch = "x86_64")]
-use core::arch::x86_64::*;
-
-pub fn sqrtf(x: f32) -> f32 {
+pub fn sqrtf(mut x: f32) -> f32 {
+    // SAFETY: `sqrtss` is part of `sse2`, which this module is gated behind. It has no memory
+    // access or side effects.
     unsafe {
-        let m = _mm_set_ss(x);
-        let m_sqrt = _mm_sqrt_ss(m);
-        _mm_cvtss_f32(m_sqrt)
-    }
+        core::arch::asm!(
+            "sqrtss {x}, {x}",
+            x = inout(xmm_reg) x,
+            options(nostack, nomem, pure),
+        )
+    };
+    x
 }
 
-pub fn sqrt(x: f64) -> f64 {
+pub fn sqrt(mut x: f64) -> f64 {
+    // SAFETY: `sqrtsd` is part of `sse2`, which this module is gated behind. It has no memory
+    // access or side effects.
     unsafe {
-        let m = _mm_set_sd(x);
-        let m_sqrt = _mm_sqrt_pd(m);
-        _mm_cvtsd_f64(m_sqrt)
-    }
+        core::arch::asm!(
+            "sqrtsd {x}, {x}",
+            x = inout(xmm_reg) x,
+            options(nostack, nomem, pure),
+        )
+    };
+    x
 }

From 4eb670555c26e5a4195e5447f14c7ab98ff157a8 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Wed, 9 Apr 2025 02:22:15 +0000
Subject: [PATCH 259/279] Resolve small errors identified by recent clippy

---
 crates/libm-test/src/precision.rs | 1 +
 src/math/support/env.rs           | 1 -
 2 files changed, 1 insertion(+), 1 deletion(-)

diff --git a/crates/libm-test/src/precision.rs b/crates/libm-test/src/precision.rs
index 8916b43ab..f5fb5f670 100644
--- a/crates/libm-test/src/precision.rs
+++ b/crates/libm-test/src/precision.rs
@@ -13,6 +13,7 @@ use crate::{BaseName, CheckBasis, CheckCtx, Float, Identifier, Int, TestResult};
 pub struct SpecialCase;
 
 /// ULP allowed to differ from the results returned by a test basis.
+#[allow(clippy::single_match)]
 pub fn default_ulp(ctx: &CheckCtx) -> u32 {
     // ULP compared to the infinite (MPFR) result.
     let mut ulp = match ctx.base_name {
diff --git a/src/math/support/env.rs b/src/math/support/env.rs
index 7244381da..c05890d98 100644
--- a/src/math/support/env.rs
+++ b/src/math/support/env.rs
@@ -70,7 +70,6 @@ impl Status {
     /// The default result for division is +/-inf based on operand sign. For `logB`, the default
     /// result is -inf.
     /// `x / y` when `x != 0.0` and `y == 0.0`,
-
     #[cfg_attr(not(feature = "unstable-public-internals"), allow(dead_code))]
     pub const DIVIDE_BY_ZERO: Self = Self(1 << 2);
 

From 96d1400326f47381858f8149451a2b2fd8de2ea4 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Thu, 23 Jan 2025 01:46:24 +0000
Subject: [PATCH 260/279] Add assembly version of simple operations on aarch64

Replace `core::arch` versions of the following with handwritten
assembly, which avoids recursion issues (cg_gcc using `rint` as a
fallback) as well as problems with `aarch64be`.

* `rint`
* `rintf`

Additionally, add assembly versions of the following:

* `fma`
* `fmaf`
* `sqrt`
* `sqrtf`

If the `fp16` target feature is available, which implies `neon`, also
include the following:

* `rintf16`
* `sqrtf16`

`sqrt` is added to match the implementation for `x86`. `fma` is included
since it is used by many other routines.

There are a handful of other operations that have assembly
implementations. They are omitted here because we should have basic
float math routines available in `core` in the near future, which will
allow us to defer to LLVM for assembly lowering rather than implementing
these ourselves.
---
 etc/function-definitions.json |   6 ++
 src/math/arch/aarch64.rs      | 126 ++++++++++++++++++++++++++++------
 src/math/arch/mod.rs          |  21 ++++--
 src/math/fma.rs               |   6 ++
 src/math/fma_wide.rs          |   6 ++
 src/math/rint.rs              |  10 ++-
 src/math/sqrt.rs              |   1 +
 src/math/sqrtf.rs             |   1 +
 src/math/sqrtf16.rs           |   6 ++
 9 files changed, 155 insertions(+), 28 deletions(-)

diff --git a/etc/function-definitions.json b/etc/function-definitions.json
index 64a775ba9..bca58402f 100644
--- a/etc/function-definitions.json
+++ b/etc/function-definitions.json
@@ -342,12 +342,14 @@
     },
     "fma": {
         "sources": [
+            "src/math/arch/aarch64.rs",
             "src/math/fma.rs"
         ],
         "type": "f64"
     },
     "fmaf": {
         "sources": [
+            "src/math/arch/aarch64.rs",
             "src/math/fma_wide.rs"
         ],
         "type": "f32"
@@ -806,6 +808,7 @@
     },
     "rintf16": {
         "sources": [
+            "src/math/arch/aarch64.rs",
             "src/math/rint.rs"
         ],
         "type": "f16"
@@ -928,6 +931,7 @@
     },
     "sqrt": {
         "sources": [
+            "src/math/arch/aarch64.rs",
             "src/math/arch/i686.rs",
             "src/math/arch/wasm32.rs",
             "src/math/generic/sqrt.rs",
@@ -937,6 +941,7 @@
     },
     "sqrtf": {
         "sources": [
+            "src/math/arch/aarch64.rs",
             "src/math/arch/i686.rs",
             "src/math/arch/wasm32.rs",
             "src/math/generic/sqrt.rs",
@@ -953,6 +958,7 @@
     },
     "sqrtf16": {
         "sources": [
+            "src/math/arch/aarch64.rs",
             "src/math/generic/sqrt.rs",
             "src/math/sqrtf16.rs"
         ],
diff --git a/src/math/arch/aarch64.rs b/src/math/arch/aarch64.rs
index 374ec11bf..020bb731c 100644
--- a/src/math/arch/aarch64.rs
+++ b/src/math/arch/aarch64.rs
@@ -1,33 +1,115 @@
-use core::arch::aarch64::{
-    float32x2_t, float64x1_t, vdup_n_f32, vdup_n_f64, vget_lane_f32, vget_lane_f64, vrndn_f32,
-    vrndn_f64,
-};
+//! Architecture-specific support for aarch64 with neon.
 
-pub fn rint(x: f64) -> f64 {
-    // SAFETY: only requires target_feature=neon, ensured by `cfg_if` in parent module.
-    let x_vec: float64x1_t = unsafe { vdup_n_f64(x) };
+use core::arch::asm;
 
-    // SAFETY: only requires target_feature=neon, ensured by `cfg_if` in parent module.
-    let result_vec: float64x1_t = unsafe { vrndn_f64(x_vec) };
+pub fn fma(mut x: f64, y: f64, z: f64) -> f64 {
+    // SAFETY: `fmadd` is available with neon and has no side effects.
+    unsafe {
+        asm!(
+            "fmadd {x:d}, {x:d}, {y:d}, {z:d}",
+            x = inout(vreg) x,
+            y = in(vreg) y,
+            z = in(vreg) z,
+            options(nomem, nostack, pure)
+        );
+    }
+    x
+}
 
-    // SAFETY: only requires target_feature=neon, ensured by `cfg_if` in parent module.
-    let result: f64 = unsafe { vget_lane_f64::<0>(result_vec) };
+pub fn fmaf(mut x: f32, y: f32, z: f32) -> f32 {
+    // SAFETY: `fmadd` is available with neon and has no side effects.
+    unsafe {
+        asm!(
+            "fmadd {x:s}, {x:s}, {y:s}, {z:s}",
+            x = inout(vreg) x,
+            y = in(vreg) y,
+            z = in(vreg) z,
+            options(nomem, nostack, pure)
+        );
+    }
+    x
+}
 
-    result
+pub fn rint(mut x: f64) -> f64 {
+    // SAFETY: `frintn` is available with neon and has no side effects.
+    //
+    // `frintn` is always round-to-nearest which does not match the C specification, but Rust does
+    // not support rounding modes.
+    unsafe {
+        asm!(
+            "frintn {x:d}, {x:d}",
+            x = inout(vreg) x,
+            options(nomem, nostack, pure)
+        );
+    }
+    x
 }
 
-pub fn rintf(x: f32) -> f32 {
-    // There's a scalar form of this instruction (FRINTN) but core::arch doesn't expose it, so we
-    // have to use the vector form and drop the other lanes afterwards.
+pub fn rintf(mut x: f32) -> f32 {
+    // SAFETY: `frintn` is available with neon and has no side effects.
+    //
+    // `frintn` is always round-to-nearest which does not match the C specification, but Rust does
+    // not support rounding modes.
+    unsafe {
+        asm!(
+            "frintn {x:s}, {x:s}",
+            x = inout(vreg) x,
+            options(nomem, nostack, pure)
+        );
+    }
+    x
+}
 
-    // SAFETY: only requires target_feature=neon, ensured by `cfg_if` in parent module.
-    let x_vec: float32x2_t = unsafe { vdup_n_f32(x) };
+#[cfg(all(f16_enabled, target_feature = "fp16"))]
+pub fn rintf16(mut x: f16) -> f16 {
+    // SAFETY: `frintn` is available for `f16` with `fp16` (implies `neon`) and has no side effects.
+    //
+    // `frintn` is always round-to-nearest which does not match the C specification, but Rust does
+    // not support rounding modes.
+    unsafe {
+        asm!(
+            "frintn {x:h}, {x:h}",
+            x = inout(vreg) x,
+            options(nomem, nostack, pure)
+        );
+    }
+    x
+}
 
-    // SAFETY: only requires target_feature=neon, ensured by `cfg_if` in parent module.
-    let result_vec: float32x2_t = unsafe { vrndn_f32(x_vec) };
+pub fn sqrt(mut x: f64) -> f64 {
+    // SAFETY: `fsqrt` is available with neon and has no side effects.
+    unsafe {
+        asm!(
+            "fsqrt {x:d}, {x:d}",
+            x = inout(vreg) x,
+            options(nomem, nostack, pure)
+        );
+    }
+    x
+}
 
-    // SAFETY: only requires target_feature=neon, ensured by `cfg_if` in parent module.
-    let result: f32 = unsafe { vget_lane_f32::<0>(result_vec) };
+pub fn sqrtf(mut x: f32) -> f32 {
+    // SAFETY: `fsqrt` is available with neon and has no side effects.
+    unsafe {
+        asm!(
+            "fsqrt {x:s}, {x:s}",
+            x = inout(vreg) x,
+            options(nomem, nostack, pure)
+        );
+    }
+    x
+}
 
-    result
+#[cfg(all(f16_enabled, target_feature = "fp16"))]
+pub fn sqrtf16(mut x: f16) -> f16 {
+    // SAFETY: `fsqrt` is available for `f16` with `fp16` (implies `neon`) and has no
+    // side effects.
+    unsafe {
+        asm!(
+            "fsqrt {x:h}, {x:h}",
+            x = inout(vreg) x,
+            options(nomem, nostack, pure)
+        );
+    }
+    x
 }
diff --git a/src/math/arch/mod.rs b/src/math/arch/mod.rs
index 091d7650a..d9f2aad66 100644
--- a/src/math/arch/mod.rs
+++ b/src/math/arch/mod.rs
@@ -18,12 +18,25 @@ cfg_if! {
         mod i686;
         pub use i686::{sqrt, sqrtf};
     } else if #[cfg(all(
-        target_arch = "aarch64", // TODO: also arm64ec?
-        target_feature = "neon",
-        target_endian = "little", // see https://github.com/rust-lang/stdarch/issues/1484
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        target_feature = "neon"
     ))] {
         mod aarch64;
-        pub use aarch64::{rint, rintf};
+
+        pub use aarch64::{
+            fma,
+            fmaf,
+            rint,
+            rintf,
+            sqrt,
+            sqrtf,
+        };
+
+        #[cfg(all(f16_enabled, target_feature = "fp16"))]
+        pub use aarch64::{
+            rintf16,
+            sqrtf16,
+        };
     }
 }
 
diff --git a/src/math/fma.rs b/src/math/fma.rs
index 049f573cc..789b0836a 100644
--- a/src/math/fma.rs
+++ b/src/math/fma.rs
@@ -9,6 +9,12 @@ use super::{CastFrom, CastInto, Float, Int, MinInt};
 /// Computes `(x*y)+z`, rounded as one ternary operation (i.e. calculated with infinite precision).
 #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
 pub fn fma(x: f64, y: f64, z: f64) -> f64 {
+    select_implementation! {
+        name: fma,
+        use_arch: all(target_arch = "aarch64", target_feature = "neon"),
+        args: x, y, z,
+    }
+
     fma_round(x, y, z, Round::Nearest).val
 }
 
diff --git a/src/math/fma_wide.rs b/src/math/fma_wide.rs
index d0cf33baf..8e908a14f 100644
--- a/src/math/fma_wide.rs
+++ b/src/math/fma_wide.rs
@@ -17,6 +17,12 @@ pub(crate) fn fmaf16(_x: f16, _y: f16, _z: f16) -> f16 {
 /// Computes `(x*y)+z`, rounded as one ternary operation (i.e. calculated with infinite precision).
 #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
 pub fn fmaf(x: f32, y: f32, z: f32) -> f32 {
+    select_implementation! {
+        name: fmaf,
+        use_arch: all(target_arch = "aarch64", target_feature = "neon"),
+        args: x, y, z,
+    }
+
     fma_wide_round(x, y, z, Round::Nearest).val
 }
 
diff --git a/src/math/rint.rs b/src/math/rint.rs
index 8a5cbeab4..e1c32c943 100644
--- a/src/math/rint.rs
+++ b/src/math/rint.rs
@@ -4,6 +4,12 @@ use super::support::Round;
 #[cfg(f16_enabled)]
 #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
 pub fn rintf16(x: f16) -> f16 {
+    select_implementation! {
+        name: rintf16,
+        use_arch: all(target_arch = "aarch64", target_feature = "fp16"),
+        args: x,
+    }
+
     super::generic::rint_round(x, Round::Nearest).val
 }
 
@@ -13,8 +19,8 @@ pub fn rintf(x: f32) -> f32 {
     select_implementation! {
         name: rintf,
         use_arch: any(
+            all(target_arch = "aarch64", target_feature = "neon"),
             all(target_arch = "wasm32", intrinsics_enabled),
-            all(target_arch = "aarch64", target_feature = "neon", target_endian = "little"),
         ),
         args: x,
     }
@@ -28,8 +34,8 @@ pub fn rint(x: f64) -> f64 {
     select_implementation! {
         name: rint,
         use_arch: any(
+            all(target_arch = "aarch64", target_feature = "neon"),
             all(target_arch = "wasm32", intrinsics_enabled),
-            all(target_arch = "aarch64", target_feature = "neon", target_endian = "little"),
         ),
         args: x,
     }
diff --git a/src/math/sqrt.rs b/src/math/sqrt.rs
index 0e1d0cd2c..2bfc42bcf 100644
--- a/src/math/sqrt.rs
+++ b/src/math/sqrt.rs
@@ -4,6 +4,7 @@ pub fn sqrt(x: f64) -> f64 {
     select_implementation! {
         name: sqrt,
         use_arch: any(
+            all(target_arch = "aarch64", target_feature = "neon"),
             all(target_arch = "wasm32", intrinsics_enabled),
             target_feature = "sse2"
         ),
diff --git a/src/math/sqrtf.rs b/src/math/sqrtf.rs
index 2e69a4b66..c28a705e3 100644
--- a/src/math/sqrtf.rs
+++ b/src/math/sqrtf.rs
@@ -4,6 +4,7 @@ pub fn sqrtf(x: f32) -> f32 {
     select_implementation! {
         name: sqrtf,
         use_arch: any(
+            all(target_arch = "aarch64", target_feature = "neon"),
             all(target_arch = "wasm32", intrinsics_enabled),
             target_feature = "sse2"
         ),
diff --git a/src/math/sqrtf16.rs b/src/math/sqrtf16.rs
index 549bf902c..7bedb7f8b 100644
--- a/src/math/sqrtf16.rs
+++ b/src/math/sqrtf16.rs
@@ -1,5 +1,11 @@
 /// The square root of `x` (f16).
 #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
 pub fn sqrtf16(x: f16) -> f16 {
+    select_implementation! {
+        name: sqrtf16,
+        use_arch: all(target_arch = "aarch64", target_feature = "fp16"),
+        args: x,
+    }
+
     return super::generic::sqrt(x);
 }

From 3020d64a688d6da460963c848227076bf47b329e Mon Sep 17 00:00:00 2001
From: quaternic <57393910+quaternic@users.noreply.github.com>
Date: Tue, 15 Apr 2025 03:46:12 +0300
Subject: [PATCH 261/279] Implement rounding for the hex float parsing and
 prepare to improve error handling

Parsing errors are now bubbled up part of the way, but that needs some
more work.

Rounding should be correct, and the `Status` returned by `parse_any`
should have the correct bits set. These are used for the current (unchanged)
behavior of the surface level functions like `hf64`: panic on invalid inputs, or
values that aren't exactly representable.
---
 crates/libm-test/src/f8_impl.rs |   5 +-
 src/math/support/env.rs         |  16 +-
 src/math/support/hex_float.rs   | 495 +++++++++++++++++++++++++-------
 3 files changed, 405 insertions(+), 111 deletions(-)

diff --git a/crates/libm-test/src/f8_impl.rs b/crates/libm-test/src/f8_impl.rs
index 0683d8392..6772e092c 100644
--- a/crates/libm-test/src/f8_impl.rs
+++ b/crates/libm-test/src/f8_impl.rs
@@ -3,8 +3,6 @@
 use std::cmp::{self, Ordering};
 use std::{fmt, ops};
 
-use libm::support::hex_float::parse_any;
-
 use crate::Float;
 
 /// Sometimes verifying float logic is easiest when all values can quickly be checked exhaustively
@@ -499,5 +497,6 @@ impl fmt::LowerHex for f8 {
 }
 
 pub const fn hf8(s: &str) -> f8 {
-    f8(parse_any(s, 8, 3) as u8)
+    let Ok(bits) = libm::support::hex_float::parse_hex_exact(s, 8, 3) else { panic!() };
+    f8(bits as u8)
 }
diff --git a/src/math/support/env.rs b/src/math/support/env.rs
index c05890d98..796309372 100644
--- a/src/math/support/env.rs
+++ b/src/math/support/env.rs
@@ -46,7 +46,7 @@ pub enum Round {
 }
 
 /// IEEE 754 exception status flags.
-#[derive(Clone, Copy, Debug, PartialEq)]
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
 pub struct Status(u8);
 
 impl Status {
@@ -90,16 +90,22 @@ impl Status {
 
     /// True if `UNDERFLOW` is set.
     #[cfg_attr(not(feature = "unstable-public-internals"), allow(dead_code))]
-    pub fn underflow(self) -> bool {
+    pub const fn underflow(self) -> bool {
         self.0 & Self::UNDERFLOW.0 != 0
     }
 
+    /// True if `OVERFLOW` is set.
+    #[cfg_attr(not(feature = "unstable-public-internals"), allow(dead_code))]
+    pub const fn overflow(self) -> bool {
+        self.0 & Self::OVERFLOW.0 != 0
+    }
+
     pub fn set_underflow(&mut self, val: bool) {
         self.set_flag(val, Self::UNDERFLOW);
     }
 
     /// True if `INEXACT` is set.
-    pub fn inexact(self) -> bool {
+    pub const fn inexact(self) -> bool {
         self.0 & Self::INEXACT.0 != 0
     }
 
@@ -114,4 +120,8 @@ impl Status {
             self.0 &= !mask.0;
         }
     }
+
+    pub(crate) const fn with(self, rhs: Self) -> Self {
+        Self(self.0 | rhs.0)
+    }
 }
diff --git a/src/math/support/hex_float.rs b/src/math/support/hex_float.rs
index be7d7607f..819e2f56e 100644
--- a/src/math/support/hex_float.rs
+++ b/src/math/support/hex_float.rs
@@ -2,149 +2,260 @@
 
 use core::fmt;
 
-use super::{Float, f32_from_bits, f64_from_bits};
+use super::{Float, Round, Status, f32_from_bits, f64_from_bits};
 
 /// Construct a 16-bit float from hex float representation (C-style)
 #[cfg(f16_enabled)]
 pub const fn hf16(s: &str) -> f16 {
-    f16::from_bits(parse_any(s, 16, 10) as u16)
+    match parse_hex_exact(s, 16, 10) {
+        Ok(bits) => f16::from_bits(bits as u16),
+        Err(HexFloatParseError(s)) => panic!("{}", s),
+    }
 }
 
 /// Construct a 32-bit float from hex float representation (C-style)
 #[allow(unused)]
 pub const fn hf32(s: &str) -> f32 {
-    f32_from_bits(parse_any(s, 32, 23) as u32)
+    match parse_hex_exact(s, 32, 23) {
+        Ok(bits) => f32_from_bits(bits as u32),
+        Err(HexFloatParseError(s)) => panic!("{}", s),
+    }
 }
 
 /// Construct a 64-bit float from hex float representation (C-style)
 pub const fn hf64(s: &str) -> f64 {
-    f64_from_bits(parse_any(s, 64, 52) as u64)
+    match parse_hex_exact(s, 64, 52) {
+        Ok(bits) => f64_from_bits(bits as u64),
+        Err(HexFloatParseError(s)) => panic!("{}", s),
+    }
 }
 
 /// Construct a 128-bit float from hex float representation (C-style)
 #[cfg(f128_enabled)]
 pub const fn hf128(s: &str) -> f128 {
-    f128::from_bits(parse_any(s, 128, 112))
+    match parse_hex_exact(s, 128, 112) {
+        Ok(bits) => f128::from_bits(bits),
+        Err(HexFloatParseError(s)) => panic!("{}", s),
+    }
+}
+#[derive(Copy, Clone, Debug)]
+pub struct HexFloatParseError(&'static str);
+
+/// Parses any float to its bitwise representation, returning an error if it cannot be represented exactly
+pub const fn parse_hex_exact(
+    s: &str,
+    bits: u32,
+    sig_bits: u32,
+) -> Result<u128, HexFloatParseError> {
+    match parse_any(s, bits, sig_bits, Round::Nearest) {
+        Err(e) => Err(e),
+        Ok((bits, Status::OK)) => Ok(bits),
+        Ok((_, status)) if status.overflow() => Err(HexFloatParseError("the value is too huge")),
+        Ok((_, status)) if status.underflow() => Err(HexFloatParseError("the value is too tiny")),
+        Ok((_, status)) if status.inexact() => Err(HexFloatParseError("the value is too precise")),
+        Ok(_) => unreachable!(),
+    }
 }
 
 /// Parse any float from hex to its bitwise representation.
-///
-/// `nan_repr` is passed rather than constructed so the platform-specific NaN is returned.
-pub const fn parse_any(s: &str, bits: u32, sig_bits: u32) -> u128 {
+pub const fn parse_any(
+    s: &str,
+    bits: u32,
+    sig_bits: u32,
+    round: Round,
+) -> Result<(u128, Status), HexFloatParseError> {
+    let mut b = s.as_bytes();
+
+    if sig_bits > 119 || bits > 128 || bits < sig_bits + 3 || bits > sig_bits + 30 {
+        return Err(HexFloatParseError("unsupported target float configuration"));
+    }
+
+    let neg = matches!(b, [b'-', ..]);
+    if let &[b'-' | b'+', ref rest @ ..] = b {
+        b = rest;
+    }
+
+    let sign_bit = 1 << (bits - 1);
+    let quiet_bit = 1 << (sig_bits - 1);
+    let nan = sign_bit - quiet_bit;
+    let inf = nan - quiet_bit;
+
+    let (mut x, status) = match *b {
+        [b'i' | b'I', b'n' | b'N', b'f' | b'F'] => (inf, Status::OK),
+        [b'n' | b'N', b'a' | b'A', b'n' | b'N'] => (nan, Status::OK),
+        [b'0', b'x' | b'X', ref rest @ ..] => {
+            let round = match (neg, round) {
+                // parse("-x", Round::Positive) == -parse("x", Round::Negative)
+                (true, Round::Positive) => Round::Negative,
+                (true, Round::Negative) => Round::Positive,
+                // rounding toward nearest or zero are symmetric
+                (true, Round::Nearest | Round::Zero) | (false, _) => round,
+            };
+            match parse_finite(rest, bits, sig_bits, round) {
+                Err(e) => return Err(e),
+                Ok(res) => res,
+            }
+        }
+        _ => return Err(HexFloatParseError("no hex indicator")),
+    };
+
+    if neg {
+        x ^= sign_bit;
+    }
+
+    Ok((x, status))
+}
+
+const fn parse_finite(
+    b: &[u8],
+    bits: u32,
+    sig_bits: u32,
+    rounding_mode: Round,
+) -> Result<(u128, Status), HexFloatParseError> {
     let exp_bits: u32 = bits - sig_bits - 1;
     let max_msb: i32 = (1 << (exp_bits - 1)) - 1;
     // The exponent of one ULP in the subnormals
     let min_lsb: i32 = 1 - max_msb - sig_bits as i32;
 
-    let exp_mask = ((1 << exp_bits) - 1) << sig_bits;
+    let (mut sig, mut exp) = match parse_hex(b) {
+        Err(e) => return Err(e),
+        Ok(Parsed { sig: 0, .. }) => return Ok((0, Status::OK)),
+        Ok(Parsed { sig, exp }) => (sig, exp),
+    };
+
+    let mut round_bits = u128_ilog2(sig) as i32 - sig_bits as i32;
+
+    // Round at least up to min_lsb
+    if exp < min_lsb - round_bits {
+        round_bits = min_lsb - exp;
+    }
+
+    let mut status = Status::OK;
 
-    let (neg, mut sig, exp) = match parse_hex(s.as_bytes()) {
-        Parsed::Finite { neg, sig: 0, .. } => return (neg as u128) << (bits - 1),
-        Parsed::Finite { neg, sig, exp } => (neg, sig, exp),
-        Parsed::Infinite { neg } => return ((neg as u128) << (bits - 1)) | exp_mask,
-        Parsed::Nan { neg } => {
-            return ((neg as u128) << (bits - 1)) | exp_mask | (1 << (sig_bits - 1));
+    exp += round_bits;
+
+    if round_bits > 0 {
+        // first, prepare for rounding exactly two bits
+        if round_bits == 1 {
+            sig <<= 1;
+        } else if round_bits > 2 {
+            sig = shr_odd_rounding(sig, (round_bits - 2) as u32);
         }
-    };
 
-    // exponents of the least and most significant bits in the value
-    let lsb = sig.trailing_zeros() as i32;
-    let msb = u128_ilog2(sig) as i32;
-    let sig_bits = sig_bits as i32;
+        if sig & 0b11 != 0 {
+            status = Status::INEXACT;
+        }
 
-    assert!(msb - lsb <= sig_bits, "the value is too precise");
-    assert!(msb + exp <= max_msb, "the value is too huge");
-    assert!(lsb + exp >= min_lsb, "the value is too tiny");
+        sig = shr2_round(sig, rounding_mode);
+    } else if round_bits < 0 {
+        sig <<= -round_bits;
+    }
 
     // The parsed value is X = sig * 2^exp
     // Expressed as a multiple U of the smallest subnormal value:
     // X = U * 2^min_lsb, so U = sig * 2^(exp-min_lsb)
-    let mut uexp = exp - min_lsb;
+    let uexp = (exp - min_lsb) as u128;
+    let uexp = uexp << sig_bits;
 
-    let shift = if uexp + msb >= sig_bits {
-        // normal, shift msb to position sig_bits
-        sig_bits - msb
-    } else {
-        // subnormal, shift so that uexp becomes 0
-        uexp
+    // Note that it is possible for the exponent bits to equal 2 here
+    // if the value rounded up, but that means the mantissa is all zeroes
+    // so the value is still correct
+    debug_assert!(sig <= 2 << sig_bits);
+
+    let inf = ((1 << exp_bits) - 1) << sig_bits;
+
+    let bits = match sig.checked_add(uexp) {
+        Some(bits) if bits < inf => {
+            // inexact subnormal or zero?
+            if status.inexact() && bits < (1 << sig_bits) {
+                status = status.with(Status::UNDERFLOW);
+            }
+            bits
+        }
+        _ => {
+            // overflow to infinity
+            status = status.with(Status::OVERFLOW).with(Status::INEXACT);
+            match rounding_mode {
+                Round::Positive | Round::Nearest => inf,
+                Round::Negative | Round::Zero => inf - 1,
+            }
+        }
     };
+    Ok((bits, status))
+}
 
-    if shift >= 0 {
-        sig <<= shift;
+/// Shift right, rounding all inexact divisions to the nearest odd number
+/// E.g. (0 >> 4) -> 0, (1..=31 >> 4) -> 1, (32 >> 4) -> 2, ...
+///
+/// Useful for reducing a number before rounding the last two bits, since
+/// the result of the final rounding is preserved for all rounding modes.
+const fn shr_odd_rounding(x: u128, k: u32) -> u128 {
+    if k < 128 {
+        let inexact = x.trailing_zeros() < k;
+        (x >> k) | (inexact as u128)
     } else {
-        sig >>= -shift;
+        (x != 0) as u128
     }
-    uexp -= shift;
-
-    // the most significant bit is like having 1 in the exponent bits
-    // add any leftover exponent to that
-    assert!(uexp >= 0 && uexp < (1 << exp_bits) - 2);
-    sig += (uexp as u128) << sig_bits;
+}
 
-    // finally, set the sign bit if necessary
-    sig | ((neg as u128) << (bits - 1))
+/// Divide by 4, rounding with the given mode
+const fn shr2_round(mut x: u128, round: Round) -> u128 {
+    let t = (x as u32) & 0b111;
+    x >>= 2;
+    match round {
+        // Look-up-table on the last three bits for when to round up
+        Round::Nearest => x + ((0b11001000_u8 >> t) & 1) as u128,
+
+        Round::Negative => x,
+        Round::Zero => x,
+        Round::Positive => x + (t & 0b11 != 0) as u128,
+    }
 }
 
-/// A parsed floating point number.
-enum Parsed {
-    /// Absolute value sig * 2^e
-    Finite {
-        neg: bool,
-        sig: u128,
-        exp: i32,
-    },
-    Infinite {
-        neg: bool,
-    },
-    Nan {
-        neg: bool,
-    },
+/// A parsed finite and unsigned floating point number.
+struct Parsed {
+    /// Absolute value sig * 2^exp
+    sig: u128,
+    exp: i32,
 }
 
 /// Parse a hexadecimal float x
-const fn parse_hex(mut b: &[u8]) -> Parsed {
-    let mut neg = false;
+const fn parse_hex(mut b: &[u8]) -> Result<Parsed, HexFloatParseError> {
     let mut sig: u128 = 0;
     let mut exp: i32 = 0;
 
-    if let &[c @ (b'-' | b'+'), ref rest @ ..] = b {
-        b = rest;
-        neg = c == b'-';
-    }
-
-    match *b {
-        [b'i' | b'I', b'n' | b'N', b'f' | b'F'] => return Parsed::Infinite { neg },
-        [b'n' | b'N', b'a' | b'A', b'n' | b'N'] => return Parsed::Nan { neg },
-        _ => (),
-    }
-
-    if let &[b'0', b'x' | b'X', ref rest @ ..] = b {
-        b = rest;
-    } else {
-        panic!("no hex indicator");
-    }
-
     let mut seen_point = false;
     let mut some_digits = false;
+    let mut inexact = false;
 
     while let &[c, ref rest @ ..] = b {
         b = rest;
 
         match c {
             b'.' => {
-                assert!(!seen_point);
+                if seen_point {
+                    return Err(HexFloatParseError("unexpected '.' parsing fractional digits"));
+                }
                 seen_point = true;
                 continue;
             }
             b'p' | b'P' => break,
             c => {
-                let digit = hex_digit(c);
+                let digit = match hex_digit(c) {
+                    Some(d) => d,
+                    None => return Err(HexFloatParseError("expected hexadecimal digit")),
+                };
                 some_digits = true;
-                let of;
-                (sig, of) = sig.overflowing_mul(16);
-                assert!(!of, "too many digits");
-                sig |= digit as u128;
-                // up until the fractional point, the value grows
+
+                if (sig >> 124) == 0 {
+                    sig <<= 4;
+                    sig |= digit as u128;
+                } else {
+                    // FIXME: it is technically possible for exp to overflow if parsing a string with >500M digits
+                    exp += 4;
+                    inexact |= digit != 0;
+                }
+                // Up until the fractional point, the value grows
                 // with more digits, but after it the exponent is
                 // compensated to match.
                 if seen_point {
@@ -153,49 +264,79 @@ const fn parse_hex(mut b: &[u8]) -> Parsed {
             }
         }
     }
-    assert!(some_digits, "at least one digit is required");
+    // If we've set inexact, the exact value has more than 125
+    // significant bits, and lies somewhere between sig and sig + 1.
+    // Because we'll round off at least two of the trailing bits,
+    // setting the last bit gives correct rounding for inexact values.
+    sig |= inexact as u128;
+
+    if !some_digits {
+        return Err(HexFloatParseError("at least one digit is required"));
+    };
+
     some_digits = false;
 
-    let mut negate_exp = false;
-    if let &[c @ (b'-' | b'+'), ref rest @ ..] = b {
+    let negate_exp = matches!(b, [b'-', ..]);
+    if let &[b'-' | b'+', ref rest @ ..] = b {
         b = rest;
-        negate_exp = c == b'-';
     }
 
-    let mut pexp: i32 = 0;
+    let mut pexp: u32 = 0;
     while let &[c, ref rest @ ..] = b {
         b = rest;
-        let digit = dec_digit(c);
+        let digit = match dec_digit(c) {
+            Some(d) => d,
+            None => return Err(HexFloatParseError("expected decimal digit")),
+        };
         some_digits = true;
-        let of;
-        (pexp, of) = pexp.overflowing_mul(10);
-        assert!(!of, "too many exponent digits");
-        pexp += digit as i32;
+        pexp = pexp.saturating_mul(10);
+        pexp += digit as u32;
     }
-    assert!(some_digits, "at least one exponent digit is required");
 
+    if !some_digits {
+        return Err(HexFloatParseError("at least one exponent digit is required"));
+    };
+
+    {
+        let e;
+        if negate_exp {
+            e = (exp as i64) - (pexp as i64);
+        } else {
+            e = (exp as i64) + (pexp as i64);
+        };
+
+        exp = if e < i32::MIN as i64 {
+            i32::MIN
+        } else if e > i32::MAX as i64 {
+            i32::MAX
+        } else {
+            e as i32
+        };
+    }
+    /* FIXME(msrv): once MSRV >= 1.66, replace the above workaround block with:
     if negate_exp {
-        exp -= pexp;
+        exp = exp.saturating_sub_unsigned(pexp);
     } else {
-        exp += pexp;
-    }
+        exp = exp.saturating_add_unsigned(pexp);
+    };
+    */
 
-    Parsed::Finite { neg, sig, exp }
+    Ok(Parsed { sig, exp })
 }
 
-const fn dec_digit(c: u8) -> u8 {
+const fn dec_digit(c: u8) -> Option<u8> {
     match c {
-        b'0'..=b'9' => c - b'0',
-        _ => panic!("bad char"),
+        b'0'..=b'9' => Some(c - b'0'),
+        _ => None,
     }
 }
 
-const fn hex_digit(c: u8) -> u8 {
+const fn hex_digit(c: u8) -> Option<u8> {
     match c {
-        b'0'..=b'9' => c - b'0',
-        b'a'..=b'f' => c - b'a' + 10,
-        b'A'..=b'F' => c - b'A' + 10,
-        _ => panic!("bad char"),
+        b'0'..=b'9' => Some(c - b'0'),
+        b'a'..=b'f' => Some(c - b'a' + 10),
+        b'A'..=b'F' => Some(c - b'A' + 10),
+        _ => None,
     }
 }
 
@@ -341,6 +482,61 @@ mod parse_tests {
 
     use super::*;
 
+    #[cfg(f16_enabled)]
+    fn rounding_properties(s: &str) -> Result<(), HexFloatParseError> {
+        let (xd, s0) = parse_any(s, 16, 10, Round::Negative)?;
+        let (xu, s1) = parse_any(s, 16, 10, Round::Positive)?;
+        let (xz, s2) = parse_any(s, 16, 10, Round::Zero)?;
+        let (xn, s3) = parse_any(s, 16, 10, Round::Nearest)?;
+
+        // FIXME: A value between the least normal and largest subnormal
+        // could have underflow status depend on rounding mode.
+
+        if let Status::OK = s0 {
+            // an exact result is the same for all rounding modes
+            assert_eq!(s0, s1);
+            assert_eq!(s0, s2);
+            assert_eq!(s0, s3);
+
+            assert_eq!(xd, xu);
+            assert_eq!(xd, xz);
+            assert_eq!(xd, xn);
+        } else {
+            assert!([s0, s1, s2, s3].into_iter().all(Status::inexact));
+
+            let xd = f16::from_bits(xd as u16);
+            let xu = f16::from_bits(xu as u16);
+            let xz = f16::from_bits(xz as u16);
+            let xn = f16::from_bits(xn as u16);
+
+            assert_biteq!(xd.next_up(), xu, "s={s}, xd={xd:?}, xu={xu:?}");
+
+            let signs = [xd, xu, xz, xn].map(f16::is_sign_negative);
+
+            if signs == [true; 4] {
+                assert_biteq!(xz, xu);
+            } else {
+                assert_eq!(signs, [false; 4]);
+                assert_biteq!(xz, xd);
+            }
+
+            if xn.to_bits() != xd.to_bits() {
+                assert_biteq!(xn, xu);
+            }
+        }
+        Ok(())
+    }
+    #[test]
+    #[cfg(f16_enabled)]
+    fn test_rounding() {
+        let n = 1_i32 << 14;
+        for i in -n..n {
+            let u = i.rotate_right(11) as u32;
+            let s = format!("{}", Hexf(f32::from_bits(u)));
+            assert!(rounding_properties(&s).is_ok());
+        }
+    }
+
     #[test]
     fn test_parse_any() {
         for k in -149..=127 {
@@ -397,6 +593,48 @@ mod parse_tests {
         }
     }
 
+    // FIXME: this test is causing failures that are likely UB on various platforms
+    #[cfg(all(target_arch = "x86_64", target_os = "linux"))]
+    #[test]
+    #[cfg(f128_enabled)]
+    fn rounding() {
+        let pi = std::f128::consts::PI;
+        let s = format!("{}", Hexf(pi));
+
+        for k in 0..=111 {
+            let (bits, status) = parse_any(&s, 128 - k, 112 - k, Round::Nearest).unwrap();
+            let scale = (1u128 << (112 - k - 1)) as f128;
+            let expected = (pi * scale).round_ties_even() / scale;
+            assert_eq!(bits << k, expected.to_bits(), "k = {k}, s = {s}");
+            assert_eq!(expected != pi, status.inexact());
+        }
+    }
+    #[test]
+    fn rounding_extreme_underflow() {
+        for k in 1..1000 {
+            let s = format!("0x1p{}", -149 - k);
+            let Ok((bits, status)) = parse_any(&s, 32, 23, Round::Nearest) else { unreachable!() };
+            assert_eq!(bits, 0, "{s} should round to zero, got bits={bits}");
+            assert!(status.underflow(), "should indicate underflow when parsing {s}");
+            assert!(status.inexact(), "should indicate inexact when parsing {s}");
+        }
+    }
+    #[test]
+    fn long_tail() {
+        for k in 1..1000 {
+            let s = format!("0x1.{}p0", "0".repeat(k));
+            let Ok(bits) = parse_hex_exact(&s, 32, 23) else { panic!("parsing {s} failed") };
+            assert_eq!(f32::from_bits(bits as u32), 1.0);
+
+            let s = format!("0x1.{}1p0", "0".repeat(k));
+            let Ok((bits, status)) = parse_any(&s, 32, 23, Round::Nearest) else { unreachable!() };
+            if status.inexact() {
+                assert!(1.0 == f32::from_bits(bits as u32));
+            } else {
+                assert!(1.0 < f32::from_bits(bits as u32));
+            }
+        }
+    }
     // HACK(msrv): 1.63 rejects unknown width float literals at an AST level, so use a macro to
     // hide them from the AST.
     #[cfg(f16_enabled)]
@@ -434,6 +672,7 @@ mod parse_tests {
                 ];
                 for (s, exp) in checks {
                     println!("parsing {s}");
+                    assert!(rounding_properties(s).is_ok());
                     let act = hf16(s).to_bits();
                     assert_eq!(
                         act, exp,
@@ -749,7 +988,13 @@ mod tests_panicking {
             #[test]
             #[should_panic(expected = "the value is too precise")]
             fn test_f128_extra_precision() {
-                // One bit more than the above.
+                // Just below the maximum finite.
+                hf128("0x1.fffffffffffffffffffffffffffe8p+16383");
+            }
+            #[test]
+            #[should_panic(expected = "the value is too huge")]
+            fn test_f128_extra_precision_overflow() {
+                // One bit more than the above. Should overflow.
                 hf128("0x1.ffffffffffffffffffffffffffff8p+16383");
             }
 
@@ -822,6 +1067,46 @@ mod print_tests {
         }
     }
 
+    #[test]
+    #[cfg(f16_enabled)]
+    fn test_f16_to_f32() {
+        use std::format;
+        // Exhaustively check that these are equivalent for all `f16`:
+        //  - `f16 -> f32`
+        //  - `f16 -> str -> f32`
+        //  - `f16 -> f32 -> str -> f32`
+        //  - `f16 -> f32 -> str -> f16 -> f32`
+        for x in 0..=u16::MAX {
+            let f16 = f16::from_bits(x);
+            let s16 = format!("{}", Hexf(f16));
+            let f32 = f16 as f32;
+            let s32 = format!("{}", Hexf(f32));
+
+            let a = hf32(&s16);
+            let b = hf32(&s32);
+            let c = hf16(&s32);
+
+            if f32.is_nan() && a.is_nan() && b.is_nan() && c.is_nan() {
+                continue;
+            }
+
+            assert_eq!(
+                f32.to_bits(),
+                a.to_bits(),
+                "{f16:?} : f16 formatted as {s16} which parsed as {a:?} : f16"
+            );
+            assert_eq!(
+                f32.to_bits(),
+                b.to_bits(),
+                "{f32:?} : f32 formatted as {s32} which parsed as {b:?} : f32"
+            );
+            assert_eq!(
+                f32.to_bits(),
+                (c as f32).to_bits(),
+                "{f32:?} : f32 formatted as {s32} which parsed as {c:?} : f16"
+            );
+        }
+    }
     #[test]
     fn spot_checks() {
         assert_eq!(Hexf(f32::MAX).to_string(), "0x1.fffffep+127");

From e64f55eab9b7ffb326f1c8bfedb1061de10036ff Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Tue, 15 Apr 2025 04:20:17 +0000
Subject: [PATCH 262/279] fmod: Add regression tests for subnormal issue

From discussion at [1] our loop count calculation is incorrect, causing
an issue with subnormal numbers. Add test cases for known failures.

[1]: https://github.com/rust-lang/libm/pull/469#discussion_r2012473920
---
 crates/libm-test/src/gen/case_list.rs | 26 ++++++++++++++++++++++++--
 1 file changed, 24 insertions(+), 2 deletions(-)

diff --git a/crates/libm-test/src/gen/case_list.rs b/crates/libm-test/src/gen/case_list.rs
index 7cb9897d8..e3628d51c 100644
--- a/crates/libm-test/src/gen/case_list.rs
+++ b/crates/libm-test/src/gen/case_list.rs
@@ -403,11 +403,33 @@ fn fminimum_numf128_cases() -> Vec<TestCase<op::fminimum_numf128::Routine>> {
 }
 
 fn fmod_cases() -> Vec<TestCase<op::fmod::Routine>> {
-    vec![]
+    let mut v = vec![];
+    TestCase::append_pairs(
+        &mut v,
+        &[
+            // Previous failure with incorrect loop iteration
+            // <https://github.com/rust-lang/libm/pull/469#discussion_r2022337272>
+            ((2.1, 3.123e-320), Some(2.0696e-320)),
+            ((2.1, 2.253547e-318), Some(1.772535e-318)),
+        ],
+    );
+    v
 }
 
 fn fmodf_cases() -> Vec<TestCase<op::fmodf::Routine>> {
-    vec![]
+    let mut v = vec![];
+    TestCase::append_pairs(
+        &mut v,
+        &[
+            // Previous failure with incorrect loop iteration
+            // <https://github.com/rust-lang/libm/pull/469#discussion_r2022337272>
+            ((2.1, 8.858e-42), Some(8.085e-42)),
+            ((2.1, 6.39164e-40), Some(6.1636e-40)),
+            ((5.5, 6.39164e-40), Some(4.77036e-40)),
+            ((-151.189, 6.39164e-40), Some(-5.64734e-40)),
+        ],
+    );
+    v
 }
 
 #[cfg(f128_enabled)]

From 56bb84c7ea1d080677205c79995d536aecc36b00 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Wed, 16 Apr 2025 18:43:25 +0000
Subject: [PATCH 263/279] fmod: Correct the normalization of subnormals

Discussed at [1], there was an off-by-one mistake when converting from
the loop routine to using `leading_zeros` for normalization.

Currently, using `EXP_BITS` has the effect that `ix` after the branch
has its MSB _one bit to the left_ of the implicit bit's position,
whereas a shift by `EXP_BITS + 1` ensures that the MSB is exactly at the
implicit bit's position, matching what is done for normals (where the
implicit bit is set to be explicit). This doesn't seem to have any
effect in our implementation since the failing test cases from [1]
appear to still have correct results.

Since the result of using `EXP_BITS + 1` is more consistent with what is
done for normals, apply this here.

[1]: https://github.com/rust-lang/libm/pull/469#discussion_r2012473920
---
 src/math/generic/fmod.rs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/math/generic/fmod.rs b/src/math/generic/fmod.rs
index c74b593d5..cd23350ea 100644
--- a/src/math/generic/fmod.rs
+++ b/src/math/generic/fmod.rs
@@ -26,7 +26,7 @@ pub fn fmod<F: Float>(x: F, y: F) -> F {
 
     /* normalize x and y */
     if ex == 0 {
-        let i = ix << F::EXP_BITS;
+        let i = ix << (F::EXP_BITS + 1);
         ex -= i.leading_zeros() as i32;
         ix <<= -ex + 1;
     } else {
@@ -35,7 +35,7 @@ pub fn fmod<F: Float>(x: F, y: F) -> F {
     }
 
     if ey == 0 {
-        let i = iy << F::EXP_BITS;
+        let i = iy << (F::EXP_BITS + 1);
         ey -= i.leading_zeros() as i32;
         iy <<= -ey + 1;
     } else {

From b6d3b0e25c588627ccf69b7197048fca9f2ebed0 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Fri, 18 Apr 2025 00:18:12 +0000
Subject: [PATCH 264/279] Add `NEG_NAN` to `Float`

Introduce a constant representing NaN with a negative sign bit for use
with testing. There isn't really any guarantee that `F::NAN` is positive
but in practice it always is, which is good enough for testing purposes.
---
 crates/libm-test/src/f8_impl.rs  |  1 +
 src/math/support/float_traits.rs | 25 +++++++++++++++++++++----
 2 files changed, 22 insertions(+), 4 deletions(-)

diff --git a/crates/libm-test/src/f8_impl.rs b/crates/libm-test/src/f8_impl.rs
index 6772e092c..ddb7bf90e 100644
--- a/crates/libm-test/src/f8_impl.rs
+++ b/crates/libm-test/src/f8_impl.rs
@@ -30,6 +30,7 @@ impl Float for f8 {
     const INFINITY: Self = Self(0b0_1111_000);
     const NEG_INFINITY: Self = Self(0b1_1111_000);
     const NAN: Self = Self(0b0_1111_100);
+    const NEG_NAN: Self = Self(0b1_1111_100);
     const MIN_POSITIVE_NORMAL: Self = Self(1 << Self::SIG_BITS);
     // FIXME: incorrect values
     const EPSILON: Self = Self::ZERO;
diff --git a/src/math/support/float_traits.rs b/src/math/support/float_traits.rs
index 96c209c85..fac104832 100644
--- a/src/math/support/float_traits.rs
+++ b/src/math/support/float_traits.rs
@@ -34,6 +34,7 @@ pub trait Float:
     const INFINITY: Self;
     const NEG_INFINITY: Self;
     const NAN: Self;
+    const NEG_NAN: Self;
     const MAX: Self;
     const MIN: Self;
     const EPSILON: Self;
@@ -187,6 +188,7 @@ macro_rules! float_impl {
         $bits:expr,
         $significand_bits:expr,
         $from_bits:path,
+        $to_bits:path,
         $fma_fn:ident,
         $fma_intrinsic:ident
     ) => {
@@ -201,6 +203,9 @@ macro_rules! float_impl {
             const INFINITY: Self = Self::INFINITY;
             const NEG_INFINITY: Self = Self::NEG_INFINITY;
             const NAN: Self = Self::NAN;
+            // NAN isn't guaranteed to be positive but it usually is. We only use this for
+            // tests.
+            const NEG_NAN: Self = $from_bits($to_bits(Self::NAN) | Self::SIGN_MASK);
             const MAX: Self = -Self::MIN;
             // Sign bit set, saturated mantissa, saturated exponent with last bit zeroed
             const MIN: Self = $from_bits(Self::Int::MAX & !(1 << Self::SIG_BITS));
@@ -275,11 +280,11 @@ macro_rules! float_impl {
 }
 
 #[cfg(f16_enabled)]
-float_impl!(f16, u16, i16, 16, 10, f16::from_bits, fmaf16, fmaf16);
-float_impl!(f32, u32, i32, 32, 23, f32_from_bits, fmaf, fmaf32);
-float_impl!(f64, u64, i64, 64, 52, f64_from_bits, fma, fmaf64);
+float_impl!(f16, u16, i16, 16, 10, f16::from_bits, f16::to_bits, fmaf16, fmaf16);
+float_impl!(f32, u32, i32, 32, 23, f32_from_bits, f32_to_bits, fmaf, fmaf32);
+float_impl!(f64, u64, i64, 64, 52, f64_from_bits, f64_to_bits, fma, fmaf64);
 #[cfg(f128_enabled)]
-float_impl!(f128, u128, i128, 128, 112, f128::from_bits, fmaf128, fmaf128);
+float_impl!(f128, u128, i128, 128, 112, f128::from_bits, f128::to_bits, fmaf128, fmaf128);
 
 /* FIXME(msrv): vendor some things that are not const stable at our MSRV */
 
@@ -289,12 +294,24 @@ pub const fn f32_from_bits(bits: u32) -> f32 {
     unsafe { mem::transmute::<u32, f32>(bits) }
 }
 
+/// `f32::to_bits`
+pub const fn f32_to_bits(x: f32) -> u32 {
+    // SAFETY: POD cast with no preconditions
+    unsafe { mem::transmute::<f32, u32>(x) }
+}
+
 /// `f64::from_bits`
 pub const fn f64_from_bits(bits: u64) -> f64 {
     // SAFETY: POD cast with no preconditions
     unsafe { mem::transmute::<u64, f64>(bits) }
 }
 
+/// `f64::to_bits`
+pub const fn f64_to_bits(x: f64) -> u64 {
+    // SAFETY: POD cast with no preconditions
+    unsafe { mem::transmute::<f64, u64>(x) }
+}
+
 /// Trait for floats twice the bit width of another integer.
 pub trait DFloat: Float {
     /// Float that is half the bit width of the floatthis trait is implemented for.

From 9ec0b96d0f78ef61f35c601d5835b42c7c8949ca Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Fri, 18 Apr 2025 00:22:38 +0000
Subject: [PATCH 265/279] Make `assert_biteq!` not rely on having `Int` in
 scope

---
 src/math/generic/fmax.rs         | 2 +-
 src/math/generic/fmaximum.rs     | 2 +-
 src/math/generic/fmaximum_num.rs | 2 +-
 src/math/generic/fmin.rs         | 2 +-
 src/math/generic/fminimum.rs     | 2 +-
 src/math/generic/fminimum_num.rs | 2 +-
 src/math/generic/rint.rs         | 2 +-
 src/math/generic/scalbn.rs       | 1 -
 src/math/support/macros.rs       | 3 ++-
 9 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/src/math/generic/fmax.rs b/src/math/generic/fmax.rs
index 32613a46b..039ffce9f 100644
--- a/src/math/generic/fmax.rs
+++ b/src/math/generic/fmax.rs
@@ -26,7 +26,7 @@ pub fn fmax<F: Float>(x: F, y: F) -> F {
 #[cfg(test)]
 mod tests {
     use super::*;
-    use crate::support::{Hexf, Int};
+    use crate::support::Hexf;
 
     fn spec_test<F: Float>() {
         let cases = [
diff --git a/src/math/generic/fmaximum.rs b/src/math/generic/fmaximum.rs
index 5f653ce94..b0fde88e8 100644
--- a/src/math/generic/fmaximum.rs
+++ b/src/math/generic/fmaximum.rs
@@ -29,7 +29,7 @@ pub fn fmaximum<F: Float>(x: F, y: F) -> F {
 #[cfg(test)]
 mod tests {
     use super::*;
-    use crate::support::{Hexf, Int};
+    use crate::support::Hexf;
 
     fn spec_test<F: Float>() {
         let cases = [
diff --git a/src/math/generic/fmaximum_num.rs b/src/math/generic/fmaximum_num.rs
index 224660123..68b03109d 100644
--- a/src/math/generic/fmaximum_num.rs
+++ b/src/math/generic/fmaximum_num.rs
@@ -28,7 +28,7 @@ pub fn fmaximum_num<F: Float>(x: F, y: F) -> F {
 #[cfg(test)]
 mod tests {
     use super::*;
-    use crate::support::{Hexf, Int};
+    use crate::support::Hexf;
 
     fn spec_test<F: Float>() {
         let cases = [
diff --git a/src/math/generic/fmin.rs b/src/math/generic/fmin.rs
index 5cc33e904..2aa7f6af7 100644
--- a/src/math/generic/fmin.rs
+++ b/src/math/generic/fmin.rs
@@ -25,7 +25,7 @@ pub fn fmin<F: Float>(x: F, y: F) -> F {
 #[cfg(test)]
 mod tests {
     use super::*;
-    use crate::support::{Hexf, Int};
+    use crate::support::Hexf;
 
     fn spec_test<F: Float>() {
         let cases = [
diff --git a/src/math/generic/fminimum.rs b/src/math/generic/fminimum.rs
index f566d9631..e01c88646 100644
--- a/src/math/generic/fminimum.rs
+++ b/src/math/generic/fminimum.rs
@@ -29,7 +29,7 @@ pub fn fminimum<F: Float>(x: F, y: F) -> F {
 #[cfg(test)]
 mod tests {
     use super::*;
-    use crate::support::{Hexf, Int};
+    use crate::support::Hexf;
 
     fn spec_test<F: Float>() {
         let cases = [
diff --git a/src/math/generic/fminimum_num.rs b/src/math/generic/fminimum_num.rs
index e58a585c3..3e97b893b 100644
--- a/src/math/generic/fminimum_num.rs
+++ b/src/math/generic/fminimum_num.rs
@@ -28,7 +28,7 @@ pub fn fminimum_num<F: Float>(x: F, y: F) -> F {
 #[cfg(test)]
 mod tests {
     use super::*;
-    use crate::support::{Hexf, Int};
+    use crate::support::Hexf;
 
     fn spec_test<F: Float>() {
         let cases = [
diff --git a/src/math/generic/rint.rs b/src/math/generic/rint.rs
index 2f8b2b365..45d2f3138 100644
--- a/src/math/generic/rint.rs
+++ b/src/math/generic/rint.rs
@@ -43,7 +43,7 @@ pub fn rint_round<F: Float>(x: F, _round: Round) -> FpResult<F> {
 #[cfg(test)]
 mod tests {
     use super::*;
-    use crate::support::{Hexf, Int, Status};
+    use crate::support::{Hexf, Status};
 
     fn spec_test<F: Float>(cases: &[(F, F, Status)]) {
         let roundtrip = [F::ZERO, F::ONE, F::NEG_ONE, F::NEG_ZERO, F::INFINITY, F::NEG_INFINITY];
diff --git a/src/math/generic/scalbn.rs b/src/math/generic/scalbn.rs
index 5ba7f2ab2..aaa243933 100644
--- a/src/math/generic/scalbn.rs
+++ b/src/math/generic/scalbn.rs
@@ -121,7 +121,6 @@ where
 
 #[cfg(test)]
 mod tests {
-    use super::super::super::Int;
     use super::*;
 
     // Tests against N3220
diff --git a/src/math/support/macros.rs b/src/math/support/macros.rs
index c80e77511..0b72db0e4 100644
--- a/src/math/support/macros.rs
+++ b/src/math/support/macros.rs
@@ -137,9 +137,10 @@ macro_rules! hf128 {
 #[cfg(test)]
 macro_rules! assert_biteq {
     ($left:expr, $right:expr, $($tt:tt)*) => {{
+        use $crate::support::Int;
         let l = $left;
         let r = $right;
-        let bits = (l.to_bits() - l.to_bits()).leading_zeros(); // hack to get the width from the value
+        let bits = Int::leading_zeros(l.to_bits() - l.to_bits()); // hack to get the width from the value
         assert!(
             l.biteq(r),
             "{}\nl: {l:?} ({lb:#0width$x})\nr: {r:?} ({rb:#0width$x})",

From 5c0e7b401b259aad36b7cfd835cfb7d0b7573f8d Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Fri, 18 Apr 2025 00:22:21 +0000
Subject: [PATCH 266/279] Combine the source files for more generic
 implementations

Splitting into different source files by float size doesn't have any
benefit when the only content is a small function that forwards to the
generic implementation. Combine the source files for all width versions
of:

* ceil
* copysign
* fabs
* fdim
* floor
* fmaximum
* fmaximum_num
* fminimum
* fminimum_num
* ldexp
* scalbn
* sqrt
* truc

fmod is excluded to avoid conflicts with an open PR.

As part of this change move unit tests out of the generic module,
instead testing the type-specific functions (e.g. `ceilf16` rather than
`ceil::<f16>()`). This ensures that unit tests are validating whatever
we expose, such as arch-specific implementations via
`select_implementation!`, which would otherwise be skipped. (They are
still covered by integration tests).
---
 etc/function-definitions.json     |  60 ++++++++---------
 src/math/acosf.rs                 |   2 +-
 src/math/asinf.rs                 |   4 +-
 src/math/ceil.rs                  |  32 ++++++++++
 src/math/ceilf.rs                 |  13 ----
 src/math/ceilf128.rs              |   7 --
 src/math/ceilf16.rs               |   7 --
 src/math/copysign.rs              |  80 +++++++++++++++++++++++
 src/math/copysignf.rs             |   8 ---
 src/math/copysignf128.rs          |   8 ---
 src/math/copysignf16.rs           |   8 ---
 src/math/fabs.rs                  | 103 ++++++++++++++++++++++++++----
 src/math/fabsf.rs                 |  39 -----------
 src/math/fabsf128.rs              |  31 ---------
 src/math/fabsf16.rs               |  31 ---------
 src/math/fdim.rs                  |  41 ++++++++++++
 src/math/fdimf.rs                 |  12 ----
 src/math/fdimf128.rs              |  12 ----
 src/math/fdimf16.rs               |  12 ----
 src/math/floor.rs                 |  32 ++++++++++
 src/math/floorf.rs                |  13 ----
 src/math/floorf128.rs             |   7 --
 src/math/floorf16.rs              |   7 --
 src/math/fmin_fmax.rs             |  92 ++++++++++++++++++++++++++
 src/math/fminimum_fmaximum.rs     |  96 ++++++++++++++++++++++++++++
 src/math/fminimum_fmaximum_num.rs |  96 ++++++++++++++++++++++++++++
 src/math/generic/fmax.rs          |  49 --------------
 src/math/generic/fmaximum.rs      |  51 ---------------
 src/math/generic/fmaximum_num.rs  |  51 ---------------
 src/math/generic/fmin.rs          |  49 --------------
 src/math/generic/fminimum.rs      |  51 ---------------
 src/math/generic/fminimum_num.rs  |  51 ---------------
 src/math/generic/scalbn.rs        |  65 -------------------
 src/math/ldexp.rs                 |  17 +++++
 src/math/ldexpf.rs                |   4 --
 src/math/ldexpf128.rs             |   4 --
 src/math/ldexpf16.rs              |   4 --
 src/math/mod.rs                   | 100 +++++++++--------------------
 src/math/round.rs                 |  20 ++++++
 src/math/roundf.rs                |   5 --
 src/math/roundf128.rs             |   5 --
 src/math/roundf16.rs              |   5 --
 src/math/scalbn.rs                |  83 ++++++++++++++++++++++++
 src/math/scalbnf.rs               |   4 --
 src/math/scalbnf128.rs            |   4 --
 src/math/scalbnf16.rs             |   4 --
 src/math/sqrt.rs                  |  36 +++++++++++
 src/math/sqrtf.rs                 |  15 -----
 src/math/sqrtf128.rs              |   5 --
 src/math/sqrtf16.rs               |  11 ----
 src/math/trunc.rs                 |  40 ++++++++++++
 src/math/truncf.rs                |  23 -------
 src/math/truncf128.rs             |   7 --
 src/math/truncf16.rs              |   7 --
 54 files changed, 819 insertions(+), 804 deletions(-)
 delete mode 100644 src/math/ceilf.rs
 delete mode 100644 src/math/ceilf128.rs
 delete mode 100644 src/math/ceilf16.rs
 delete mode 100644 src/math/copysignf.rs
 delete mode 100644 src/math/copysignf128.rs
 delete mode 100644 src/math/copysignf16.rs
 delete mode 100644 src/math/fabsf.rs
 delete mode 100644 src/math/fabsf128.rs
 delete mode 100644 src/math/fabsf16.rs
 delete mode 100644 src/math/fdimf.rs
 delete mode 100644 src/math/fdimf128.rs
 delete mode 100644 src/math/fdimf16.rs
 delete mode 100644 src/math/floorf.rs
 delete mode 100644 src/math/floorf128.rs
 delete mode 100644 src/math/floorf16.rs
 delete mode 100644 src/math/ldexpf.rs
 delete mode 100644 src/math/ldexpf128.rs
 delete mode 100644 src/math/ldexpf16.rs
 delete mode 100644 src/math/roundf.rs
 delete mode 100644 src/math/roundf128.rs
 delete mode 100644 src/math/roundf16.rs
 delete mode 100644 src/math/scalbnf.rs
 delete mode 100644 src/math/scalbnf128.rs
 delete mode 100644 src/math/scalbnf16.rs
 delete mode 100644 src/math/sqrtf.rs
 delete mode 100644 src/math/sqrtf128.rs
 delete mode 100644 src/math/sqrtf16.rs
 delete mode 100644 src/math/truncf.rs
 delete mode 100644 src/math/truncf128.rs
 delete mode 100644 src/math/truncf16.rs

diff --git a/etc/function-definitions.json b/etc/function-definitions.json
index bca58402f..801e74b22 100644
--- a/etc/function-definitions.json
+++ b/etc/function-definitions.json
@@ -108,21 +108,21 @@
     "ceilf": {
         "sources": [
             "src/math/arch/wasm32.rs",
-            "src/math/ceilf.rs",
+            "src/math/ceil.rs",
             "src/math/generic/ceil.rs"
         ],
         "type": "f32"
     },
     "ceilf128": {
         "sources": [
-            "src/math/ceilf128.rs",
+            "src/math/ceil.rs",
             "src/math/generic/ceil.rs"
         ],
         "type": "f128"
     },
     "ceilf16": {
         "sources": [
-            "src/math/ceilf16.rs",
+            "src/math/ceil.rs",
             "src/math/generic/ceil.rs"
         ],
         "type": "f16"
@@ -136,21 +136,21 @@
     },
     "copysignf": {
         "sources": [
-            "src/math/copysignf.rs",
+            "src/math/copysign.rs",
             "src/math/generic/copysign.rs"
         ],
         "type": "f32"
     },
     "copysignf128": {
         "sources": [
-            "src/math/copysignf128.rs",
+            "src/math/copysign.rs",
             "src/math/generic/copysign.rs"
         ],
         "type": "f128"
     },
     "copysignf16": {
         "sources": [
-            "src/math/copysignf16.rs",
+            "src/math/copysign.rs",
             "src/math/generic/copysign.rs"
         ],
         "type": "f16"
@@ -262,21 +262,21 @@
     "fabsf": {
         "sources": [
             "src/math/arch/wasm32.rs",
-            "src/math/fabsf.rs",
+            "src/math/fabs.rs",
             "src/math/generic/fabs.rs"
         ],
         "type": "f32"
     },
     "fabsf128": {
         "sources": [
-            "src/math/fabsf128.rs",
+            "src/math/fabs.rs",
             "src/math/generic/fabs.rs"
         ],
         "type": "f128"
     },
     "fabsf16": {
         "sources": [
-            "src/math/fabsf16.rs",
+            "src/math/fabs.rs",
             "src/math/generic/fabs.rs"
         ],
         "type": "f16"
@@ -290,21 +290,21 @@
     },
     "fdimf": {
         "sources": [
-            "src/math/fdimf.rs",
+            "src/math/fdim.rs",
             "src/math/generic/fdim.rs"
         ],
         "type": "f32"
     },
     "fdimf128": {
         "sources": [
-            "src/math/fdimf128.rs",
+            "src/math/fdim.rs",
             "src/math/generic/fdim.rs"
         ],
         "type": "f128"
     },
     "fdimf16": {
         "sources": [
-            "src/math/fdimf16.rs",
+            "src/math/fdim.rs",
             "src/math/generic/fdim.rs"
         ],
         "type": "f16"
@@ -321,21 +321,21 @@
     "floorf": {
         "sources": [
             "src/math/arch/wasm32.rs",
-            "src/math/floorf.rs",
+            "src/math/floor.rs",
             "src/math/generic/floor.rs"
         ],
         "type": "f32"
     },
     "floorf128": {
         "sources": [
-            "src/math/floorf128.rs",
+            "src/math/floor.rs",
             "src/math/generic/floor.rs"
         ],
         "type": "f128"
     },
     "floorf16": {
         "sources": [
-            "src/math/floorf16.rs",
+            "src/math/floor.rs",
             "src/math/generic/floor.rs"
         ],
         "type": "f16"
@@ -636,19 +636,19 @@
     },
     "ldexpf": {
         "sources": [
-            "src/math/ldexpf.rs"
+            "src/math/ldexp.rs"
         ],
         "type": "f32"
     },
     "ldexpf128": {
         "sources": [
-            "src/math/ldexpf128.rs"
+            "src/math/ldexp.rs"
         ],
         "type": "f128"
     },
     "ldexpf16": {
         "sources": [
-            "src/math/ldexpf16.rs"
+            "src/math/ldexp.rs"
         ],
         "type": "f16"
     },
@@ -847,21 +847,21 @@
     "roundf": {
         "sources": [
             "src/math/generic/round.rs",
-            "src/math/roundf.rs"
+            "src/math/round.rs"
         ],
         "type": "f32"
     },
     "roundf128": {
         "sources": [
             "src/math/generic/round.rs",
-            "src/math/roundf128.rs"
+            "src/math/round.rs"
         ],
         "type": "f128"
     },
     "roundf16": {
         "sources": [
             "src/math/generic/round.rs",
-            "src/math/roundf16.rs"
+            "src/math/round.rs"
         ],
         "type": "f16"
     },
@@ -875,21 +875,21 @@
     "scalbnf": {
         "sources": [
             "src/math/generic/scalbn.rs",
-            "src/math/scalbnf.rs"
+            "src/math/scalbn.rs"
         ],
         "type": "f32"
     },
     "scalbnf128": {
         "sources": [
             "src/math/generic/scalbn.rs",
-            "src/math/scalbnf128.rs"
+            "src/math/scalbn.rs"
         ],
         "type": "f128"
     },
     "scalbnf16": {
         "sources": [
             "src/math/generic/scalbn.rs",
-            "src/math/scalbnf16.rs"
+            "src/math/scalbn.rs"
         ],
         "type": "f16"
     },
@@ -945,14 +945,14 @@
             "src/math/arch/i686.rs",
             "src/math/arch/wasm32.rs",
             "src/math/generic/sqrt.rs",
-            "src/math/sqrtf.rs"
+            "src/math/sqrt.rs"
         ],
         "type": "f32"
     },
     "sqrtf128": {
         "sources": [
             "src/math/generic/sqrt.rs",
-            "src/math/sqrtf128.rs"
+            "src/math/sqrt.rs"
         ],
         "type": "f128"
     },
@@ -960,7 +960,7 @@
         "sources": [
             "src/math/arch/aarch64.rs",
             "src/math/generic/sqrt.rs",
-            "src/math/sqrtf16.rs"
+            "src/math/sqrt.rs"
         ],
         "type": "f16"
     },
@@ -1012,21 +1012,21 @@
         "sources": [
             "src/math/arch/wasm32.rs",
             "src/math/generic/trunc.rs",
-            "src/math/truncf.rs"
+            "src/math/trunc.rs"
         ],
         "type": "f32"
     },
     "truncf128": {
         "sources": [
             "src/math/generic/trunc.rs",
-            "src/math/truncf128.rs"
+            "src/math/trunc.rs"
         ],
         "type": "f128"
     },
     "truncf16": {
         "sources": [
             "src/math/generic/trunc.rs",
-            "src/math/truncf16.rs"
+            "src/math/trunc.rs"
         ],
         "type": "f16"
     },
diff --git a/src/math/acosf.rs b/src/math/acosf.rs
index 1a60479e3..dd88eea5b 100644
--- a/src/math/acosf.rs
+++ b/src/math/acosf.rs
@@ -13,7 +13,7 @@
  * ====================================================
  */
 
-use super::sqrtf::sqrtf;
+use super::sqrt::sqrtf;
 
 const PIO2_HI: f32 = 1.5707962513e+00; /* 0x3fc90fda */
 const PIO2_LO: f32 = 7.5497894159e-08; /* 0x33a22168 */
diff --git a/src/math/asinf.rs b/src/math/asinf.rs
index 0ea49c076..ed6855567 100644
--- a/src/math/asinf.rs
+++ b/src/math/asinf.rs
@@ -13,8 +13,8 @@
  * ====================================================
  */
 
-use super::fabsf::fabsf;
 use super::sqrt::sqrt;
+use super::support::Float;
 
 const PIO2: f64 = 1.570796326794896558e+00;
 
@@ -61,7 +61,7 @@ pub fn asinf(mut x: f32) -> f32 {
     }
 
     /* 1 > |x| >= 0.5 */
-    let z = (1. - fabsf(x)) * 0.5;
+    let z = (1. - Float::abs(x)) * 0.5;
     let s = sqrt(z as f64);
     x = (PIO2 - 2. * (s + s * (r(z) as f64))) as f32;
     if (hx >> 31) != 0 { -x } else { x }
diff --git a/src/math/ceil.rs b/src/math/ceil.rs
index 535f434ac..4e1035457 100644
--- a/src/math/ceil.rs
+++ b/src/math/ceil.rs
@@ -1,3 +1,26 @@
+/// Ceil (f16)
+///
+/// Finds the nearest integer greater than or equal to `x`.
+#[cfg(f16_enabled)]
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn ceilf16(x: f16) -> f16 {
+    super::generic::ceil(x)
+}
+
+/// Ceil (f32)
+///
+/// Finds the nearest integer greater than or equal to `x`.
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn ceilf(x: f32) -> f32 {
+    select_implementation! {
+        name: ceilf,
+        use_arch: all(target_arch = "wasm32", intrinsics_enabled),
+        args: x,
+    }
+
+    super::generic::ceil(x)
+}
+
 /// Ceil (f64)
 ///
 /// Finds the nearest integer greater than or equal to `x`.
@@ -12,3 +35,12 @@ pub fn ceil(x: f64) -> f64 {
 
     super::generic::ceil(x)
 }
+
+/// Ceil (f128)
+///
+/// Finds the nearest integer greater than or equal to `x`.
+#[cfg(f128_enabled)]
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn ceilf128(x: f128) -> f128 {
+    super::generic::ceil(x)
+}
diff --git a/src/math/ceilf.rs b/src/math/ceilf.rs
deleted file mode 100644
index 66d44189c..000000000
--- a/src/math/ceilf.rs
+++ /dev/null
@@ -1,13 +0,0 @@
-/// Ceil (f32)
-///
-/// Finds the nearest integer greater than or equal to `x`.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
-pub fn ceilf(x: f32) -> f32 {
-    select_implementation! {
-        name: ceilf,
-        use_arch: all(target_arch = "wasm32", intrinsics_enabled),
-        args: x,
-    }
-
-    super::generic::ceil(x)
-}
diff --git a/src/math/ceilf128.rs b/src/math/ceilf128.rs
deleted file mode 100644
index 89980858e..000000000
--- a/src/math/ceilf128.rs
+++ /dev/null
@@ -1,7 +0,0 @@
-/// Ceil (f128)
-///
-/// Finds the nearest integer greater than or equal to `x`.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
-pub fn ceilf128(x: f128) -> f128 {
-    super::generic::ceil(x)
-}
diff --git a/src/math/ceilf16.rs b/src/math/ceilf16.rs
deleted file mode 100644
index 2af67eff0..000000000
--- a/src/math/ceilf16.rs
+++ /dev/null
@@ -1,7 +0,0 @@
-/// Ceil (f16)
-///
-/// Finds the nearest integer greater than or equal to `x`.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
-pub fn ceilf16(x: f16) -> f16 {
-    super::generic::ceil(x)
-}
diff --git a/src/math/copysign.rs b/src/math/copysign.rs
index 552bf3975..d2a86e7fd 100644
--- a/src/math/copysign.rs
+++ b/src/math/copysign.rs
@@ -1,3 +1,22 @@
+/// Sign of Y, magnitude of X (f16)
+///
+/// Constructs a number with the magnitude (absolute value) of its
+/// first argument, `x`, and the sign of its second argument, `y`.
+#[cfg(f16_enabled)]
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn copysignf16(x: f16, y: f16) -> f16 {
+    super::generic::copysign(x, y)
+}
+
+/// Sign of Y, magnitude of X (f32)
+///
+/// Constructs a number with the magnitude (absolute value) of its
+/// first argument, `x`, and the sign of its second argument, `y`.
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn copysignf(x: f32, y: f32) -> f32 {
+    super::generic::copysign(x, y)
+}
+
 /// Sign of Y, magnitude of X (f64)
 ///
 /// Constructs a number with the magnitude (absolute value) of its
@@ -6,3 +25,64 @@
 pub fn copysign(x: f64, y: f64) -> f64 {
     super::generic::copysign(x, y)
 }
+
+/// Sign of Y, magnitude of X (f128)
+///
+/// Constructs a number with the magnitude (absolute value) of its
+/// first argument, `x`, and the sign of its second argument, `y`.
+#[cfg(f128_enabled)]
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn copysignf128(x: f128, y: f128) -> f128 {
+    super::generic::copysign(x, y)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::support::Float;
+
+    fn spec_test<F: Float>(f: impl Fn(F, F) -> F) {
+        assert_biteq!(f(F::ZERO, F::ZERO), F::ZERO);
+        assert_biteq!(f(F::NEG_ZERO, F::ZERO), F::ZERO);
+        assert_biteq!(f(F::ZERO, F::NEG_ZERO), F::NEG_ZERO);
+        assert_biteq!(f(F::NEG_ZERO, F::NEG_ZERO), F::NEG_ZERO);
+
+        assert_biteq!(f(F::ONE, F::ONE), F::ONE);
+        assert_biteq!(f(F::NEG_ONE, F::ONE), F::ONE);
+        assert_biteq!(f(F::ONE, F::NEG_ONE), F::NEG_ONE);
+        assert_biteq!(f(F::NEG_ONE, F::NEG_ONE), F::NEG_ONE);
+
+        assert_biteq!(f(F::INFINITY, F::INFINITY), F::INFINITY);
+        assert_biteq!(f(F::NEG_INFINITY, F::INFINITY), F::INFINITY);
+        assert_biteq!(f(F::INFINITY, F::NEG_INFINITY), F::NEG_INFINITY);
+        assert_biteq!(f(F::NEG_INFINITY, F::NEG_INFINITY), F::NEG_INFINITY);
+
+        // Not required but we expect it
+        assert_biteq!(f(F::NAN, F::NAN), F::NAN);
+        assert_biteq!(f(F::NEG_NAN, F::NAN), F::NAN);
+        assert_biteq!(f(F::NAN, F::NEG_NAN), F::NEG_NAN);
+        assert_biteq!(f(F::NEG_NAN, F::NEG_NAN), F::NEG_NAN);
+    }
+
+    #[test]
+    #[cfg(f16_enabled)]
+    fn spec_tests_f16() {
+        spec_test::<f16>(copysignf16);
+    }
+
+    #[test]
+    fn spec_tests_f32() {
+        spec_test::<f32>(copysignf);
+    }
+
+    #[test]
+    fn spec_tests_f64() {
+        spec_test::<f64>(copysign);
+    }
+
+    #[test]
+    #[cfg(f128_enabled)]
+    fn spec_tests_f128() {
+        spec_test::<f128>(copysignf128);
+    }
+}
diff --git a/src/math/copysignf.rs b/src/math/copysignf.rs
deleted file mode 100644
index 8b9bed4c0..000000000
--- a/src/math/copysignf.rs
+++ /dev/null
@@ -1,8 +0,0 @@
-/// Sign of Y, magnitude of X (f32)
-///
-/// Constructs a number with the magnitude (absolute value) of its
-/// first argument, `x`, and the sign of its second argument, `y`.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
-pub fn copysignf(x: f32, y: f32) -> f32 {
-    super::generic::copysign(x, y)
-}
diff --git a/src/math/copysignf128.rs b/src/math/copysignf128.rs
deleted file mode 100644
index 7bd81d42b..000000000
--- a/src/math/copysignf128.rs
+++ /dev/null
@@ -1,8 +0,0 @@
-/// Sign of Y, magnitude of X (f128)
-///
-/// Constructs a number with the magnitude (absolute value) of its
-/// first argument, `x`, and the sign of its second argument, `y`.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
-pub fn copysignf128(x: f128, y: f128) -> f128 {
-    super::generic::copysign(x, y)
-}
diff --git a/src/math/copysignf16.rs b/src/math/copysignf16.rs
deleted file mode 100644
index 820658686..000000000
--- a/src/math/copysignf16.rs
+++ /dev/null
@@ -1,8 +0,0 @@
-/// Sign of Y, magnitude of X (f16)
-///
-/// Constructs a number with the magnitude (absolute value) of its
-/// first argument, `x`, and the sign of its second argument, `y`.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
-pub fn copysignf16(x: f16, y: f16) -> f16 {
-    super::generic::copysign(x, y)
-}
diff --git a/src/math/fabs.rs b/src/math/fabs.rs
index 22867fab0..0050a309f 100644
--- a/src/math/fabs.rs
+++ b/src/math/fabs.rs
@@ -1,3 +1,28 @@
+/// Absolute value (magnitude) (f16)
+///
+/// Calculates the absolute value (magnitude) of the argument `x`,
+/// by direct manipulation of the bit representation of `x`.
+#[cfg(f16_enabled)]
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn fabsf16(x: f16) -> f16 {
+    super::generic::fabs(x)
+}
+
+/// Absolute value (magnitude) (f32)
+///
+/// Calculates the absolute value (magnitude) of the argument `x`,
+/// by direct manipulation of the bit representation of `x`.
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn fabsf(x: f32) -> f32 {
+    select_implementation! {
+        name: fabsf,
+        use_arch: all(target_arch = "wasm32", intrinsics_enabled),
+        args: x,
+    }
+
+    super::generic::fabs(x)
+}
+
 /// Absolute value (magnitude) (f64)
 ///
 /// Calculates the absolute value (magnitude) of the argument `x`,
@@ -13,25 +38,79 @@ pub fn fabs(x: f64) -> f64 {
     super::generic::fabs(x)
 }
 
+/// Absolute value (magnitude) (f128)
+///
+/// Calculates the absolute value (magnitude) of the argument `x`,
+/// by direct manipulation of the bit representation of `x`.
+#[cfg(f128_enabled)]
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn fabsf128(x: f128) -> f128 {
+    super::generic::fabs(x)
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
+    use crate::support::Float;
+
+    /// Based on https://en.cppreference.com/w/cpp/numeric/math/fabs
+    fn spec_test<F: Float>(f: impl Fn(F) -> F) {
+        assert_biteq!(f(F::ZERO), F::ZERO);
+        assert_biteq!(f(F::NEG_ZERO), F::ZERO);
+        assert_biteq!(f(F::INFINITY), F::INFINITY);
+        assert_biteq!(f(F::NEG_INFINITY), F::INFINITY);
+        assert!(f(F::NAN).is_nan());
+
+        // Not spec rewquired but we expect it
+        assert!(f(F::NAN).is_sign_positive());
+        assert!(f(F::from_bits(F::NAN.to_bits() | F::SIGN_MASK)).is_sign_positive());
+    }
+
+    #[test]
+    #[cfg(f16_enabled)]
+    fn sanity_check_f16() {
+        assert_eq!(fabsf16(-1.0f16), 1.0);
+        assert_eq!(fabsf16(2.8f16), 2.8);
+    }
+
+    #[test]
+    #[cfg(f16_enabled)]
+    fn spec_tests_f16() {
+        spec_test::<f16>(fabsf16);
+    }
+
+    #[test]
+    fn sanity_check_f32() {
+        assert_eq!(fabsf(-1.0f32), 1.0);
+        assert_eq!(fabsf(2.8f32), 2.8);
+    }
+
+    #[test]
+    fn spec_tests_f32() {
+        spec_test::<f32>(fabsf);
+    }
+
+    #[test]
+    fn sanity_check_f64() {
+        assert_eq!(fabs(-1.0f64), 1.0);
+        assert_eq!(fabs(2.8f64), 2.8);
+    }
+
+    #[test]
+    fn spec_tests_f64() {
+        spec_test::<f64>(fabs);
+    }
 
     #[test]
-    fn sanity_check() {
-        assert_eq!(fabs(-1.0), 1.0);
-        assert_eq!(fabs(2.8), 2.8);
+    #[cfg(f128_enabled)]
+    fn sanity_check_f128() {
+        assert_eq!(fabsf128(-1.0f128), 1.0);
+        assert_eq!(fabsf128(2.8f128), 2.8);
     }
 
-    /// The spec: https://en.cppreference.com/w/cpp/numeric/math/fabs
     #[test]
-    fn spec_tests() {
-        assert!(fabs(f64::NAN).is_nan());
-        for f in [0.0, -0.0].iter().copied() {
-            assert_eq!(fabs(f), 0.0);
-        }
-        for f in [f64::INFINITY, f64::NEG_INFINITY].iter().copied() {
-            assert_eq!(fabs(f), f64::INFINITY);
-        }
+    #[cfg(f128_enabled)]
+    fn spec_tests_f128() {
+        spec_test::<f128>(fabsf128);
     }
 }
diff --git a/src/math/fabsf.rs b/src/math/fabsf.rs
deleted file mode 100644
index e5820a26c..000000000
--- a/src/math/fabsf.rs
+++ /dev/null
@@ -1,39 +0,0 @@
-/// Absolute value (magnitude) (f32)
-///
-/// Calculates the absolute value (magnitude) of the argument `x`,
-/// by direct manipulation of the bit representation of `x`.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
-pub fn fabsf(x: f32) -> f32 {
-    select_implementation! {
-        name: fabsf,
-        use_arch: all(target_arch = "wasm32", intrinsics_enabled),
-        args: x,
-    }
-
-    super::generic::fabs(x)
-}
-
-// PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520
-#[cfg(not(target_arch = "powerpc64"))]
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn sanity_check() {
-        assert_eq!(fabsf(-1.0), 1.0);
-        assert_eq!(fabsf(2.8), 2.8);
-    }
-
-    /// The spec: https://en.cppreference.com/w/cpp/numeric/math/fabs
-    #[test]
-    fn spec_tests() {
-        assert!(fabsf(f32::NAN).is_nan());
-        for f in [0.0, -0.0].iter().copied() {
-            assert_eq!(fabsf(f), 0.0);
-        }
-        for f in [f32::INFINITY, f32::NEG_INFINITY].iter().copied() {
-            assert_eq!(fabsf(f), f32::INFINITY);
-        }
-    }
-}
diff --git a/src/math/fabsf128.rs b/src/math/fabsf128.rs
deleted file mode 100644
index 46429ca49..000000000
--- a/src/math/fabsf128.rs
+++ /dev/null
@@ -1,31 +0,0 @@
-/// Absolute value (magnitude) (f128)
-///
-/// Calculates the absolute value (magnitude) of the argument `x`,
-/// by direct manipulation of the bit representation of `x`.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
-pub fn fabsf128(x: f128) -> f128 {
-    super::generic::fabs(x)
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn sanity_check() {
-        assert_eq!(fabsf128(-1.0), 1.0);
-        assert_eq!(fabsf128(2.8), 2.8);
-    }
-
-    /// The spec: https://en.cppreference.com/w/cpp/numeric/math/fabs
-    #[test]
-    fn spec_tests() {
-        assert!(fabsf128(f128::NAN).is_nan());
-        for f in [0.0, -0.0].iter().copied() {
-            assert_eq!(fabsf128(f), 0.0);
-        }
-        for f in [f128::INFINITY, f128::NEG_INFINITY].iter().copied() {
-            assert_eq!(fabsf128(f), f128::INFINITY);
-        }
-    }
-}
diff --git a/src/math/fabsf16.rs b/src/math/fabsf16.rs
deleted file mode 100644
index eee42ac6a..000000000
--- a/src/math/fabsf16.rs
+++ /dev/null
@@ -1,31 +0,0 @@
-/// Absolute value (magnitude) (f16)
-///
-/// Calculates the absolute value (magnitude) of the argument `x`,
-/// by direct manipulation of the bit representation of `x`.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
-pub fn fabsf16(x: f16) -> f16 {
-    super::generic::fabs(x)
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn sanity_check() {
-        assert_eq!(fabsf16(-1.0), 1.0);
-        assert_eq!(fabsf16(2.8), 2.8);
-    }
-
-    /// The spec: https://en.cppreference.com/w/cpp/numeric/math/fabs
-    #[test]
-    fn spec_tests() {
-        assert!(fabsf16(f16::NAN).is_nan());
-        for f in [0.0, -0.0].iter().copied() {
-            assert_eq!(fabsf16(f), 0.0);
-        }
-        for f in [f16::INFINITY, f16::NEG_INFINITY].iter().copied() {
-            assert_eq!(fabsf16(f), f16::INFINITY);
-        }
-    }
-}
diff --git a/src/math/fdim.rs b/src/math/fdim.rs
index 10ffa2881..082c5478b 100644
--- a/src/math/fdim.rs
+++ b/src/math/fdim.rs
@@ -1,3 +1,30 @@
+/// Positive difference (f16)
+///
+/// Determines the positive difference between arguments, returning:
+/// * x - y if x > y, or
+/// * +0    if x <= y, or
+/// * NAN   if either argument is NAN.
+///
+/// A range error may occur.
+#[cfg(f16_enabled)]
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn fdimf16(x: f16, y: f16) -> f16 {
+    super::generic::fdim(x, y)
+}
+
+/// Positive difference (f32)
+///
+/// Determines the positive difference between arguments, returning:
+/// * x - y if x > y, or
+/// * +0    if x <= y, or
+/// * NAN   if either argument is NAN.
+///
+/// A range error may occur.
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn fdimf(x: f32, y: f32) -> f32 {
+    super::generic::fdim(x, y)
+}
+
 /// Positive difference (f64)
 ///
 /// Determines the positive difference between arguments, returning:
@@ -10,3 +37,17 @@
 pub fn fdim(x: f64, y: f64) -> f64 {
     super::generic::fdim(x, y)
 }
+
+/// Positive difference (f128)
+///
+/// Determines the positive difference between arguments, returning:
+/// * x - y if x > y, or
+/// * +0    if x <= y, or
+/// * NAN   if either argument is NAN.
+///
+/// A range error may occur.
+#[cfg(f128_enabled)]
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn fdimf128(x: f128, y: f128) -> f128 {
+    super::generic::fdim(x, y)
+}
diff --git a/src/math/fdimf.rs b/src/math/fdimf.rs
deleted file mode 100644
index 367ef517c..000000000
--- a/src/math/fdimf.rs
+++ /dev/null
@@ -1,12 +0,0 @@
-/// Positive difference (f32)
-///
-/// Determines the positive difference between arguments, returning:
-/// * x - y if x > y, or
-/// * +0    if x <= y, or
-/// * NAN   if either argument is NAN.
-///
-/// A range error may occur.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
-pub fn fdimf(x: f32, y: f32) -> f32 {
-    super::generic::fdim(x, y)
-}
diff --git a/src/math/fdimf128.rs b/src/math/fdimf128.rs
deleted file mode 100644
index 6f3d1d0ff..000000000
--- a/src/math/fdimf128.rs
+++ /dev/null
@@ -1,12 +0,0 @@
-/// Positive difference (f128)
-///
-/// Determines the positive difference between arguments, returning:
-/// * x - y if x > y, or
-/// * +0    if x <= y, or
-/// * NAN   if either argument is NAN.
-///
-/// A range error may occur.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
-pub fn fdimf128(x: f128, y: f128) -> f128 {
-    super::generic::fdim(x, y)
-}
diff --git a/src/math/fdimf16.rs b/src/math/fdimf16.rs
deleted file mode 100644
index 37bd68858..000000000
--- a/src/math/fdimf16.rs
+++ /dev/null
@@ -1,12 +0,0 @@
-/// Positive difference (f16)
-///
-/// Determines the positive difference between arguments, returning:
-/// * x - y if x > y, or
-/// * +0    if x <= y, or
-/// * NAN   if either argument is NAN.
-///
-/// A range error may occur.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
-pub fn fdimf16(x: f16, y: f16) -> f16 {
-    super::generic::fdim(x, y)
-}
diff --git a/src/math/floor.rs b/src/math/floor.rs
index b4f02abc4..3c5eab101 100644
--- a/src/math/floor.rs
+++ b/src/math/floor.rs
@@ -1,3 +1,12 @@
+/// Floor (f16)
+///
+/// Finds the nearest integer less than or equal to `x`.
+#[cfg(f16_enabled)]
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn floorf16(x: f16) -> f16 {
+    return super::generic::floor(x);
+}
+
 /// Floor (f64)
 ///
 /// Finds the nearest integer less than or equal to `x`.
@@ -12,3 +21,26 @@ pub fn floor(x: f64) -> f64 {
 
     return super::generic::floor(x);
 }
+
+/// Floor (f32)
+///
+/// Finds the nearest integer less than or equal to `x`.
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn floorf(x: f32) -> f32 {
+    select_implementation! {
+        name: floorf,
+        use_arch: all(target_arch = "wasm32", intrinsics_enabled),
+        args: x,
+    }
+
+    return super::generic::floor(x);
+}
+
+/// Floor (f128)
+///
+/// Finds the nearest integer less than or equal to `x`.
+#[cfg(f128_enabled)]
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn floorf128(x: f128) -> f128 {
+    return super::generic::floor(x);
+}
diff --git a/src/math/floorf.rs b/src/math/floorf.rs
deleted file mode 100644
index 16957b7f3..000000000
--- a/src/math/floorf.rs
+++ /dev/null
@@ -1,13 +0,0 @@
-/// Floor (f32)
-///
-/// Finds the nearest integer less than or equal to `x`.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
-pub fn floorf(x: f32) -> f32 {
-    select_implementation! {
-        name: floorf,
-        use_arch: all(target_arch = "wasm32", intrinsics_enabled),
-        args: x,
-    }
-
-    return super::generic::floor(x);
-}
diff --git a/src/math/floorf128.rs b/src/math/floorf128.rs
deleted file mode 100644
index 9a9fe4151..000000000
--- a/src/math/floorf128.rs
+++ /dev/null
@@ -1,7 +0,0 @@
-/// Floor (f128)
-///
-/// Finds the nearest integer less than or equal to `x`.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
-pub fn floorf128(x: f128) -> f128 {
-    return super::generic::floor(x);
-}
diff --git a/src/math/floorf16.rs b/src/math/floorf16.rs
deleted file mode 100644
index f9b868e04..000000000
--- a/src/math/floorf16.rs
+++ /dev/null
@@ -1,7 +0,0 @@
-/// Floor (f16)
-///
-/// Finds the nearest integer less than or equal to `x`.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
-pub fn floorf16(x: f16) -> f16 {
-    return super::generic::floor(x);
-}
diff --git a/src/math/fmin_fmax.rs b/src/math/fmin_fmax.rs
index 4f9136dbb..2947b783e 100644
--- a/src/math/fmin_fmax.rs
+++ b/src/math/fmin_fmax.rs
@@ -73,3 +73,95 @@ pub fn fmax(x: f64, y: f64) -> f64 {
 pub fn fmaxf128(x: f128, y: f128) -> f128 {
     super::generic::fmax(x, y)
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::support::{Float, Hexf};
+
+    fn fmin_spec_test<F: Float>(f: impl Fn(F, F) -> F) {
+        let cases = [
+            (F::ZERO, F::ZERO, F::ZERO),
+            (F::ONE, F::ONE, F::ONE),
+            (F::ZERO, F::ONE, F::ZERO),
+            (F::ONE, F::ZERO, F::ZERO),
+            (F::ZERO, F::NEG_ONE, F::NEG_ONE),
+            (F::NEG_ONE, F::ZERO, F::NEG_ONE),
+            (F::INFINITY, F::ZERO, F::ZERO),
+            (F::NEG_INFINITY, F::ZERO, F::NEG_INFINITY),
+            (F::NAN, F::ZERO, F::ZERO),
+            (F::ZERO, F::NAN, F::ZERO),
+            (F::NAN, F::NAN, F::NAN),
+        ];
+
+        for (x, y, res) in cases {
+            let val = f(x, y);
+            assert_biteq!(val, res, "fmin({}, {})", Hexf(x), Hexf(y));
+        }
+    }
+
+    #[test]
+    #[cfg(f16_enabled)]
+    fn fmin_spec_tests_f16() {
+        fmin_spec_test::<f16>(fminf16);
+    }
+
+    #[test]
+    fn fmin_spec_tests_f32() {
+        fmin_spec_test::<f32>(fminf);
+    }
+
+    #[test]
+    fn fmin_spec_tests_f64() {
+        fmin_spec_test::<f64>(fmin);
+    }
+
+    #[test]
+    #[cfg(f128_enabled)]
+    fn fmin_spec_tests_f128() {
+        fmin_spec_test::<f128>(fminf128);
+    }
+
+    fn fmax_spec_test<F: Float>(f: impl Fn(F, F) -> F) {
+        let cases = [
+            (F::ZERO, F::ZERO, F::ZERO),
+            (F::ONE, F::ONE, F::ONE),
+            (F::ZERO, F::ONE, F::ONE),
+            (F::ONE, F::ZERO, F::ONE),
+            (F::ZERO, F::NEG_ONE, F::ZERO),
+            (F::NEG_ONE, F::ZERO, F::ZERO),
+            (F::INFINITY, F::ZERO, F::INFINITY),
+            (F::NEG_INFINITY, F::ZERO, F::ZERO),
+            (F::NAN, F::ZERO, F::ZERO),
+            (F::ZERO, F::NAN, F::ZERO),
+            (F::NAN, F::NAN, F::NAN),
+        ];
+
+        for (x, y, res) in cases {
+            let val = f(x, y);
+            assert_biteq!(val, res, "fmax({}, {})", Hexf(x), Hexf(y));
+        }
+    }
+
+    #[test]
+    #[cfg(f16_enabled)]
+    fn fmax_spec_tests_f16() {
+        fmax_spec_test::<f16>(fmaxf16);
+    }
+
+    #[test]
+    fn fmax_spec_tests_f32() {
+        fmax_spec_test::<f32>(fmaxf);
+    }
+
+    #[test]
+    fn fmax_spec_tests_f64() {
+        fmax_spec_test::<f64>(fmax);
+    }
+
+    #[test]
+    #[cfg(f128_enabled)]
+    fn fmax_spec_tests_f128() {
+        fmax_spec_test::<f128>(fmaxf128);
+    }
+}
diff --git a/src/math/fminimum_fmaximum.rs b/src/math/fminimum_fmaximum.rs
index fd3c5ed10..b7999e273 100644
--- a/src/math/fminimum_fmaximum.rs
+++ b/src/math/fminimum_fmaximum.rs
@@ -65,3 +65,99 @@ pub fn fmaximum(x: f64, y: f64) -> f64 {
 pub fn fmaximumf128(x: f128, y: f128) -> f128 {
     super::generic::fmaximum(x, y)
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::support::{Float, Hexf};
+
+    fn fminimum_spec_test<F: Float>(f: impl Fn(F, F) -> F) {
+        let cases = [
+            (F::ZERO, F::ZERO, F::ZERO),
+            (F::ONE, F::ONE, F::ONE),
+            (F::ZERO, F::ONE, F::ZERO),
+            (F::ONE, F::ZERO, F::ZERO),
+            (F::ZERO, F::NEG_ONE, F::NEG_ONE),
+            (F::NEG_ONE, F::ZERO, F::NEG_ONE),
+            (F::INFINITY, F::ZERO, F::ZERO),
+            (F::NEG_INFINITY, F::ZERO, F::NEG_INFINITY),
+            (F::NAN, F::ZERO, F::NAN),
+            (F::ZERO, F::NAN, F::NAN),
+            (F::NAN, F::NAN, F::NAN),
+            (F::ZERO, F::NEG_ZERO, F::NEG_ZERO),
+            (F::NEG_ZERO, F::ZERO, F::NEG_ZERO),
+        ];
+
+        for (x, y, res) in cases {
+            let val = f(x, y);
+            assert_biteq!(val, res, "fminimum({}, {})", Hexf(x), Hexf(y));
+        }
+    }
+
+    #[test]
+    #[cfg(f16_enabled)]
+    fn fminimum_spec_tests_f16() {
+        fminimum_spec_test::<f16>(fminimumf16);
+    }
+
+    #[test]
+    fn fminimum_spec_tests_f32() {
+        fminimum_spec_test::<f32>(fminimumf);
+    }
+
+    #[test]
+    fn fminimum_spec_tests_f64() {
+        fminimum_spec_test::<f64>(fminimum);
+    }
+
+    #[test]
+    #[cfg(f128_enabled)]
+    fn fminimum_spec_tests_f128() {
+        fminimum_spec_test::<f128>(fminimumf128);
+    }
+
+    fn fmaximum_spec_test<F: Float>(f: impl Fn(F, F) -> F) {
+        let cases = [
+            (F::ZERO, F::ZERO, F::ZERO),
+            (F::ONE, F::ONE, F::ONE),
+            (F::ZERO, F::ONE, F::ONE),
+            (F::ONE, F::ZERO, F::ONE),
+            (F::ZERO, F::NEG_ONE, F::ZERO),
+            (F::NEG_ONE, F::ZERO, F::ZERO),
+            (F::INFINITY, F::ZERO, F::INFINITY),
+            (F::NEG_INFINITY, F::ZERO, F::ZERO),
+            (F::NAN, F::ZERO, F::NAN),
+            (F::ZERO, F::NAN, F::NAN),
+            (F::NAN, F::NAN, F::NAN),
+            (F::ZERO, F::NEG_ZERO, F::ZERO),
+            (F::NEG_ZERO, F::ZERO, F::ZERO),
+        ];
+
+        for (x, y, res) in cases {
+            let val = f(x, y);
+            assert_biteq!(val, res, "fmaximum({}, {})", Hexf(x), Hexf(y));
+        }
+    }
+
+    #[test]
+    #[cfg(f16_enabled)]
+    fn fmaximum_spec_tests_f16() {
+        fmaximum_spec_test::<f16>(fmaximumf16);
+    }
+
+    #[test]
+    fn fmaximum_spec_tests_f32() {
+        fmaximum_spec_test::<f32>(fmaximumf);
+    }
+
+    #[test]
+    fn fmaximum_spec_tests_f64() {
+        fmaximum_spec_test::<f64>(fmaximum);
+    }
+
+    #[test]
+    #[cfg(f128_enabled)]
+    fn fmaximum_spec_tests_f128() {
+        fmaximum_spec_test::<f128>(fmaximumf128);
+    }
+}
diff --git a/src/math/fminimum_fmaximum_num.rs b/src/math/fminimum_fmaximum_num.rs
index 640ddfc9b..180d21f72 100644
--- a/src/math/fminimum_fmaximum_num.rs
+++ b/src/math/fminimum_fmaximum_num.rs
@@ -65,3 +65,99 @@ pub fn fmaximum_num(x: f64, y: f64) -> f64 {
 pub fn fmaximum_numf128(x: f128, y: f128) -> f128 {
     super::generic::fmaximum_num(x, y)
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::support::{Float, Hexf};
+
+    fn fminimum_num_spec_test<F: Float>(f: impl Fn(F, F) -> F) {
+        let cases = [
+            (F::ZERO, F::ZERO, F::ZERO),
+            (F::ONE, F::ONE, F::ONE),
+            (F::ZERO, F::ONE, F::ZERO),
+            (F::ONE, F::ZERO, F::ZERO),
+            (F::ZERO, F::NEG_ONE, F::NEG_ONE),
+            (F::NEG_ONE, F::ZERO, F::NEG_ONE),
+            (F::INFINITY, F::ZERO, F::ZERO),
+            (F::NEG_INFINITY, F::ZERO, F::NEG_INFINITY),
+            (F::NAN, F::ZERO, F::ZERO),
+            (F::ZERO, F::NAN, F::ZERO),
+            (F::NAN, F::NAN, F::NAN),
+            (F::ZERO, F::NEG_ZERO, F::NEG_ZERO),
+            (F::NEG_ZERO, F::ZERO, F::NEG_ZERO),
+        ];
+
+        for (x, y, res) in cases {
+            let val = f(x, y);
+            assert_biteq!(val, res, "fminimum_num({}, {})", Hexf(x), Hexf(y));
+        }
+    }
+
+    #[test]
+    #[cfg(f16_enabled)]
+    fn fminimum_num_spec_tests_f16() {
+        fminimum_num_spec_test::<f16>(fminimum_numf16);
+    }
+
+    #[test]
+    fn fminimum_num_spec_tests_f32() {
+        fminimum_num_spec_test::<f32>(fminimum_numf);
+    }
+
+    #[test]
+    fn fminimum_num_spec_tests_f64() {
+        fminimum_num_spec_test::<f64>(fminimum_num);
+    }
+
+    #[test]
+    #[cfg(f128_enabled)]
+    fn fminimum_num_spec_tests_f128() {
+        fminimum_num_spec_test::<f128>(fminimum_numf128);
+    }
+
+    fn fmaximum_num_spec_test<F: Float>(f: impl Fn(F, F) -> F) {
+        let cases = [
+            (F::ZERO, F::ZERO, F::ZERO),
+            (F::ONE, F::ONE, F::ONE),
+            (F::ZERO, F::ONE, F::ONE),
+            (F::ONE, F::ZERO, F::ONE),
+            (F::ZERO, F::NEG_ONE, F::ZERO),
+            (F::NEG_ONE, F::ZERO, F::ZERO),
+            (F::INFINITY, F::ZERO, F::INFINITY),
+            (F::NEG_INFINITY, F::ZERO, F::ZERO),
+            (F::NAN, F::ZERO, F::ZERO),
+            (F::ZERO, F::NAN, F::ZERO),
+            (F::NAN, F::NAN, F::NAN),
+            (F::ZERO, F::NEG_ZERO, F::ZERO),
+            (F::NEG_ZERO, F::ZERO, F::ZERO),
+        ];
+
+        for (x, y, res) in cases {
+            let val = f(x, y);
+            assert_biteq!(val, res, "fmaximum_num({}, {})", Hexf(x), Hexf(y));
+        }
+    }
+
+    #[test]
+    #[cfg(f16_enabled)]
+    fn fmaximum_num_spec_tests_f16() {
+        fmaximum_num_spec_test::<f16>(fmaximum_numf16);
+    }
+
+    #[test]
+    fn fmaximum_num_spec_tests_f32() {
+        fmaximum_num_spec_test::<f32>(fmaximum_numf);
+    }
+
+    #[test]
+    fn fmaximum_num_spec_tests_f64() {
+        fmaximum_num_spec_test::<f64>(fmaximum_num);
+    }
+
+    #[test]
+    #[cfg(f128_enabled)]
+    fn fmaximum_num_spec_tests_f128() {
+        fmaximum_num_spec_test::<f128>(fmaximum_numf128);
+    }
+}
diff --git a/src/math/generic/fmax.rs b/src/math/generic/fmax.rs
index 039ffce9f..29a031100 100644
--- a/src/math/generic/fmax.rs
+++ b/src/math/generic/fmax.rs
@@ -22,52 +22,3 @@ pub fn fmax<F: Float>(x: F, y: F) -> F {
     // Canonicalize
     res * F::ONE
 }
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::support::Hexf;
-
-    fn spec_test<F: Float>() {
-        let cases = [
-            (F::ZERO, F::ZERO, F::ZERO),
-            (F::ONE, F::ONE, F::ONE),
-            (F::ZERO, F::ONE, F::ONE),
-            (F::ONE, F::ZERO, F::ONE),
-            (F::ZERO, F::NEG_ONE, F::ZERO),
-            (F::NEG_ONE, F::ZERO, F::ZERO),
-            (F::INFINITY, F::ZERO, F::INFINITY),
-            (F::NEG_INFINITY, F::ZERO, F::ZERO),
-            (F::NAN, F::ZERO, F::ZERO),
-            (F::ZERO, F::NAN, F::ZERO),
-            (F::NAN, F::NAN, F::NAN),
-        ];
-
-        for (x, y, res) in cases {
-            let val = fmax(x, y);
-            assert_biteq!(val, res, "fmax({}, {})", Hexf(x), Hexf(y));
-        }
-    }
-
-    #[test]
-    #[cfg(f16_enabled)]
-    fn spec_tests_f16() {
-        spec_test::<f16>();
-    }
-
-    #[test]
-    fn spec_tests_f32() {
-        spec_test::<f32>();
-    }
-
-    #[test]
-    fn spec_tests_f64() {
-        spec_test::<f64>();
-    }
-
-    #[test]
-    #[cfg(f128_enabled)]
-    fn spec_tests_f128() {
-        spec_test::<f128>();
-    }
-}
diff --git a/src/math/generic/fmaximum.rs b/src/math/generic/fmaximum.rs
index b0fde88e8..9e8d1739f 100644
--- a/src/math/generic/fmaximum.rs
+++ b/src/math/generic/fmaximum.rs
@@ -25,54 +25,3 @@ pub fn fmaximum<F: Float>(x: F, y: F) -> F {
     // Canonicalize
     res * F::ONE
 }
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::support::Hexf;
-
-    fn spec_test<F: Float>() {
-        let cases = [
-            (F::ZERO, F::ZERO, F::ZERO),
-            (F::ONE, F::ONE, F::ONE),
-            (F::ZERO, F::ONE, F::ONE),
-            (F::ONE, F::ZERO, F::ONE),
-            (F::ZERO, F::NEG_ONE, F::ZERO),
-            (F::NEG_ONE, F::ZERO, F::ZERO),
-            (F::INFINITY, F::ZERO, F::INFINITY),
-            (F::NEG_INFINITY, F::ZERO, F::ZERO),
-            (F::NAN, F::ZERO, F::NAN),
-            (F::ZERO, F::NAN, F::NAN),
-            (F::NAN, F::NAN, F::NAN),
-            (F::ZERO, F::NEG_ZERO, F::ZERO),
-            (F::NEG_ZERO, F::ZERO, F::ZERO),
-        ];
-
-        for (x, y, res) in cases {
-            let val = fmaximum(x, y);
-            assert_biteq!(val, res, "fmaximum({}, {})", Hexf(x), Hexf(y));
-        }
-    }
-
-    #[test]
-    #[cfg(f16_enabled)]
-    fn spec_tests_f16() {
-        spec_test::<f16>();
-    }
-
-    #[test]
-    fn spec_tests_f32() {
-        spec_test::<f32>();
-    }
-
-    #[test]
-    fn spec_tests_f64() {
-        spec_test::<f64>();
-    }
-
-    #[test]
-    #[cfg(f128_enabled)]
-    fn spec_tests_f128() {
-        spec_test::<f128>();
-    }
-}
diff --git a/src/math/generic/fmaximum_num.rs b/src/math/generic/fmaximum_num.rs
index 68b03109d..756ef5d9f 100644
--- a/src/math/generic/fmaximum_num.rs
+++ b/src/math/generic/fmaximum_num.rs
@@ -24,54 +24,3 @@ pub fn fmaximum_num<F: Float>(x: F, y: F) -> F {
     // Canonicalize
     res * F::ONE
 }
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::support::Hexf;
-
-    fn spec_test<F: Float>() {
-        let cases = [
-            (F::ZERO, F::ZERO, F::ZERO),
-            (F::ONE, F::ONE, F::ONE),
-            (F::ZERO, F::ONE, F::ONE),
-            (F::ONE, F::ZERO, F::ONE),
-            (F::ZERO, F::NEG_ONE, F::ZERO),
-            (F::NEG_ONE, F::ZERO, F::ZERO),
-            (F::INFINITY, F::ZERO, F::INFINITY),
-            (F::NEG_INFINITY, F::ZERO, F::ZERO),
-            (F::NAN, F::ZERO, F::ZERO),
-            (F::ZERO, F::NAN, F::ZERO),
-            (F::NAN, F::NAN, F::NAN),
-            (F::ZERO, F::NEG_ZERO, F::ZERO),
-            (F::NEG_ZERO, F::ZERO, F::ZERO),
-        ];
-
-        for (x, y, res) in cases {
-            let val = fmaximum_num(x, y);
-            assert_biteq!(val, res, "fmaximum_num({}, {})", Hexf(x), Hexf(y));
-        }
-    }
-
-    #[test]
-    #[cfg(f16_enabled)]
-    fn spec_tests_f16() {
-        spec_test::<f16>();
-    }
-
-    #[test]
-    fn spec_tests_f32() {
-        spec_test::<f32>();
-    }
-
-    #[test]
-    fn spec_tests_f64() {
-        spec_test::<f64>();
-    }
-
-    #[test]
-    #[cfg(f128_enabled)]
-    fn spec_tests_f128() {
-        spec_test::<f128>();
-    }
-}
diff --git a/src/math/generic/fmin.rs b/src/math/generic/fmin.rs
index 2aa7f6af7..69fbf85a1 100644
--- a/src/math/generic/fmin.rs
+++ b/src/math/generic/fmin.rs
@@ -21,52 +21,3 @@ pub fn fmin<F: Float>(x: F, y: F) -> F {
     // Canonicalize
     res * F::ONE
 }
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::support::Hexf;
-
-    fn spec_test<F: Float>() {
-        let cases = [
-            (F::ZERO, F::ZERO, F::ZERO),
-            (F::ONE, F::ONE, F::ONE),
-            (F::ZERO, F::ONE, F::ZERO),
-            (F::ONE, F::ZERO, F::ZERO),
-            (F::ZERO, F::NEG_ONE, F::NEG_ONE),
-            (F::NEG_ONE, F::ZERO, F::NEG_ONE),
-            (F::INFINITY, F::ZERO, F::ZERO),
-            (F::NEG_INFINITY, F::ZERO, F::NEG_INFINITY),
-            (F::NAN, F::ZERO, F::ZERO),
-            (F::ZERO, F::NAN, F::ZERO),
-            (F::NAN, F::NAN, F::NAN),
-        ];
-
-        for (x, y, res) in cases {
-            let val = fmin(x, y);
-            assert_biteq!(val, res, "fmin({}, {})", Hexf(x), Hexf(y));
-        }
-    }
-
-    #[test]
-    #[cfg(f16_enabled)]
-    fn spec_tests_f16() {
-        spec_test::<f16>();
-    }
-
-    #[test]
-    fn spec_tests_f32() {
-        spec_test::<f32>();
-    }
-
-    #[test]
-    fn spec_tests_f64() {
-        spec_test::<f64>();
-    }
-
-    #[test]
-    #[cfg(f128_enabled)]
-    fn spec_tests_f128() {
-        spec_test::<f128>();
-    }
-}
diff --git a/src/math/generic/fminimum.rs b/src/math/generic/fminimum.rs
index e01c88646..ee5493880 100644
--- a/src/math/generic/fminimum.rs
+++ b/src/math/generic/fminimum.rs
@@ -25,54 +25,3 @@ pub fn fminimum<F: Float>(x: F, y: F) -> F {
     // Canonicalize
     res * F::ONE
 }
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::support::Hexf;
-
-    fn spec_test<F: Float>() {
-        let cases = [
-            (F::ZERO, F::ZERO, F::ZERO),
-            (F::ONE, F::ONE, F::ONE),
-            (F::ZERO, F::ONE, F::ZERO),
-            (F::ONE, F::ZERO, F::ZERO),
-            (F::ZERO, F::NEG_ONE, F::NEG_ONE),
-            (F::NEG_ONE, F::ZERO, F::NEG_ONE),
-            (F::INFINITY, F::ZERO, F::ZERO),
-            (F::NEG_INFINITY, F::ZERO, F::NEG_INFINITY),
-            (F::NAN, F::ZERO, F::NAN),
-            (F::ZERO, F::NAN, F::NAN),
-            (F::NAN, F::NAN, F::NAN),
-            (F::ZERO, F::NEG_ZERO, F::NEG_ZERO),
-            (F::NEG_ZERO, F::ZERO, F::NEG_ZERO),
-        ];
-
-        for (x, y, res) in cases {
-            let val = fminimum(x, y);
-            assert_biteq!(val, res, "fminimum({}, {})", Hexf(x), Hexf(y));
-        }
-    }
-
-    #[test]
-    #[cfg(f16_enabled)]
-    fn spec_tests_f16() {
-        spec_test::<f16>();
-    }
-
-    #[test]
-    fn spec_tests_f32() {
-        spec_test::<f32>();
-    }
-
-    #[test]
-    fn spec_tests_f64() {
-        spec_test::<f64>();
-    }
-
-    #[test]
-    #[cfg(f128_enabled)]
-    fn spec_tests_f128() {
-        spec_test::<f128>();
-    }
-}
diff --git a/src/math/generic/fminimum_num.rs b/src/math/generic/fminimum_num.rs
index 3e97b893b..966618328 100644
--- a/src/math/generic/fminimum_num.rs
+++ b/src/math/generic/fminimum_num.rs
@@ -24,54 +24,3 @@ pub fn fminimum_num<F: Float>(x: F, y: F) -> F {
     // Canonicalize
     res * F::ONE
 }
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::support::Hexf;
-
-    fn spec_test<F: Float>() {
-        let cases = [
-            (F::ZERO, F::ZERO, F::ZERO),
-            (F::ONE, F::ONE, F::ONE),
-            (F::ZERO, F::ONE, F::ZERO),
-            (F::ONE, F::ZERO, F::ZERO),
-            (F::ZERO, F::NEG_ONE, F::NEG_ONE),
-            (F::NEG_ONE, F::ZERO, F::NEG_ONE),
-            (F::INFINITY, F::ZERO, F::ZERO),
-            (F::NEG_INFINITY, F::ZERO, F::NEG_INFINITY),
-            (F::NAN, F::ZERO, F::ZERO),
-            (F::ZERO, F::NAN, F::ZERO),
-            (F::NAN, F::NAN, F::NAN),
-            (F::ZERO, F::NEG_ZERO, F::NEG_ZERO),
-            (F::NEG_ZERO, F::ZERO, F::NEG_ZERO),
-        ];
-
-        for (x, y, res) in cases {
-            let val = fminimum_num(x, y);
-            assert_biteq!(val, res, "fminimum_num({}, {})", Hexf(x), Hexf(y));
-        }
-    }
-
-    #[test]
-    #[cfg(f16_enabled)]
-    fn spec_tests_f16() {
-        spec_test::<f16>();
-    }
-
-    #[test]
-    fn spec_tests_f32() {
-        spec_test::<f32>();
-    }
-
-    #[test]
-    fn spec_tests_f64() {
-        spec_test::<f64>();
-    }
-
-    #[test]
-    #[cfg(f128_enabled)]
-    fn spec_tests_f128() {
-        spec_test::<f128>();
-    }
-}
diff --git a/src/math/generic/scalbn.rs b/src/math/generic/scalbn.rs
index aaa243933..b2696e5cc 100644
--- a/src/math/generic/scalbn.rs
+++ b/src/math/generic/scalbn.rs
@@ -118,68 +118,3 @@ where
     let scale = F::from_parts(false, (F::EXP_BIAS as i32 + n) as u32, zero);
     x * scale
 }
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    // Tests against N3220
-    fn spec_test<F: Float>()
-    where
-        u32: CastInto<F::Int>,
-        F::Int: CastFrom<i32>,
-        F::Int: CastFrom<u32>,
-    {
-        // `scalbn(±0, n)` returns `±0`.
-        assert_biteq!(scalbn(F::NEG_ZERO, 10), F::NEG_ZERO);
-        assert_biteq!(scalbn(F::NEG_ZERO, 0), F::NEG_ZERO);
-        assert_biteq!(scalbn(F::NEG_ZERO, -10), F::NEG_ZERO);
-        assert_biteq!(scalbn(F::ZERO, 10), F::ZERO);
-        assert_biteq!(scalbn(F::ZERO, 0), F::ZERO);
-        assert_biteq!(scalbn(F::ZERO, -10), F::ZERO);
-
-        // `scalbn(x, 0)` returns `x`.
-        assert_biteq!(scalbn(F::MIN, 0), F::MIN);
-        assert_biteq!(scalbn(F::MAX, 0), F::MAX);
-        assert_biteq!(scalbn(F::INFINITY, 0), F::INFINITY);
-        assert_biteq!(scalbn(F::NEG_INFINITY, 0), F::NEG_INFINITY);
-        assert_biteq!(scalbn(F::ZERO, 0), F::ZERO);
-        assert_biteq!(scalbn(F::NEG_ZERO, 0), F::NEG_ZERO);
-
-        // `scalbn(±∞, n)` returns `±∞`.
-        assert_biteq!(scalbn(F::INFINITY, 10), F::INFINITY);
-        assert_biteq!(scalbn(F::INFINITY, -10), F::INFINITY);
-        assert_biteq!(scalbn(F::NEG_INFINITY, 10), F::NEG_INFINITY);
-        assert_biteq!(scalbn(F::NEG_INFINITY, -10), F::NEG_INFINITY);
-
-        // NaN should remain NaNs.
-        assert!(scalbn(F::NAN, 10).is_nan());
-        assert!(scalbn(F::NAN, 0).is_nan());
-        assert!(scalbn(F::NAN, -10).is_nan());
-        assert!(scalbn(-F::NAN, 10).is_nan());
-        assert!(scalbn(-F::NAN, 0).is_nan());
-        assert!(scalbn(-F::NAN, -10).is_nan());
-    }
-
-    #[test]
-    #[cfg(f16_enabled)]
-    fn spec_test_f16() {
-        spec_test::<f16>();
-    }
-
-    #[test]
-    fn spec_test_f32() {
-        spec_test::<f32>();
-    }
-
-    #[test]
-    fn spec_test_f64() {
-        spec_test::<f64>();
-    }
-
-    #[test]
-    #[cfg(f128_enabled)]
-    fn spec_test_f128() {
-        spec_test::<f128>();
-    }
-}
diff --git a/src/math/ldexp.rs b/src/math/ldexp.rs
index e46242e55..24899ba30 100644
--- a/src/math/ldexp.rs
+++ b/src/math/ldexp.rs
@@ -1,4 +1,21 @@
+#[cfg(f16_enabled)]
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn ldexpf16(x: f16, n: i32) -> f16 {
+    super::scalbnf16(x, n)
+}
+
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn ldexpf(x: f32, n: i32) -> f32 {
+    super::scalbnf(x, n)
+}
+
 #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
 pub fn ldexp(x: f64, n: i32) -> f64 {
     super::scalbn(x, n)
 }
+
+#[cfg(f128_enabled)]
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn ldexpf128(x: f128, n: i32) -> f128 {
+    super::scalbnf128(x, n)
+}
diff --git a/src/math/ldexpf.rs b/src/math/ldexpf.rs
deleted file mode 100644
index 95b27fc49..000000000
--- a/src/math/ldexpf.rs
+++ /dev/null
@@ -1,4 +0,0 @@
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
-pub fn ldexpf(x: f32, n: i32) -> f32 {
-    super::scalbnf(x, n)
-}
diff --git a/src/math/ldexpf128.rs b/src/math/ldexpf128.rs
deleted file mode 100644
index b35277d15..000000000
--- a/src/math/ldexpf128.rs
+++ /dev/null
@@ -1,4 +0,0 @@
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
-pub fn ldexpf128(x: f128, n: i32) -> f128 {
-    super::scalbnf128(x, n)
-}
diff --git a/src/math/ldexpf16.rs b/src/math/ldexpf16.rs
deleted file mode 100644
index 8de6cffd6..000000000
--- a/src/math/ldexpf16.rs
+++ /dev/null
@@ -1,4 +0,0 @@
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
-pub fn ldexpf16(x: f16, n: i32) -> f16 {
-    super::scalbnf16(x, n)
-}
diff --git a/src/math/mod.rs b/src/math/mod.rs
index 5fc8fa0b3..023cf67a3 100644
--- a/src/math/mod.rs
+++ b/src/math/mod.rs
@@ -140,9 +140,7 @@ mod atanhf;
 mod cbrt;
 mod cbrtf;
 mod ceil;
-mod ceilf;
 mod copysign;
-mod copysignf;
 mod cos;
 mod cosf;
 mod cosh;
@@ -158,11 +156,8 @@ mod expf;
 mod expm1;
 mod expm1f;
 mod fabs;
-mod fabsf;
 mod fdim;
-mod fdimf;
 mod floor;
-mod floorf;
 mod fma;
 mod fma_wide;
 mod fmin_fmax;
@@ -183,7 +178,6 @@ mod j1f;
 mod jn;
 mod jnf;
 mod ldexp;
-mod ldexpf;
 mod lgamma;
 mod lgamma_r;
 mod lgammaf;
@@ -209,9 +203,7 @@ mod remquof;
 mod rint;
 mod round;
 mod roundeven;
-mod roundf;
 mod scalbn;
-mod scalbnf;
 mod sin;
 mod sincos;
 mod sincosf;
@@ -219,7 +211,6 @@ mod sinf;
 mod sinh;
 mod sinhf;
 mod sqrt;
-mod sqrtf;
 mod tan;
 mod tanf;
 mod tanh;
@@ -227,7 +218,6 @@ mod tanhf;
 mod tgamma;
 mod tgammaf;
 mod trunc;
-mod truncf;
 
 // Use separated imports instead of {}-grouped imports for easier merging.
 pub use self::acos::acos;
@@ -246,10 +236,8 @@ pub use self::atanh::atanh;
 pub use self::atanhf::atanhf;
 pub use self::cbrt::cbrt;
 pub use self::cbrtf::cbrtf;
-pub use self::ceil::ceil;
-pub use self::ceilf::ceilf;
-pub use self::copysign::copysign;
-pub use self::copysignf::copysignf;
+pub use self::ceil::{ceil, ceilf};
+pub use self::copysign::{copysign, copysignf};
 pub use self::cos::cos;
 pub use self::cosf::cosf;
 pub use self::cosh::cosh;
@@ -264,12 +252,9 @@ pub use self::exp10f::exp10f;
 pub use self::expf::expf;
 pub use self::expm1::expm1;
 pub use self::expm1f::expm1f;
-pub use self::fabs::fabs;
-pub use self::fabsf::fabsf;
-pub use self::fdim::fdim;
-pub use self::fdimf::fdimf;
-pub use self::floor::floor;
-pub use self::floorf::floorf;
+pub use self::fabs::{fabs, fabsf};
+pub use self::fdim::{fdim, fdimf};
+pub use self::floor::{floor, floorf};
 pub use self::fma::fma;
 pub use self::fma_wide::fmaf;
 pub use self::fmin_fmax::{fmax, fmaxf, fmin, fminf};
@@ -289,8 +274,7 @@ pub use self::j1::{j1, y1};
 pub use self::j1f::{j1f, y1f};
 pub use self::jn::{jn, yn};
 pub use self::jnf::{jnf, ynf};
-pub use self::ldexp::ldexp;
-pub use self::ldexpf::ldexpf;
+pub use self::ldexp::{ldexp, ldexpf};
 pub use self::lgamma::lgamma;
 pub use self::lgamma_r::lgamma_r;
 pub use self::lgammaf::lgammaf;
@@ -314,61 +298,47 @@ pub use self::remainderf::remainderf;
 pub use self::remquo::remquo;
 pub use self::remquof::remquof;
 pub use self::rint::{rint, rintf};
-pub use self::round::round;
+pub use self::round::{round, roundf};
 pub use self::roundeven::{roundeven, roundevenf};
-pub use self::roundf::roundf;
-pub use self::scalbn::scalbn;
-pub use self::scalbnf::scalbnf;
+pub use self::scalbn::{scalbn, scalbnf};
 pub use self::sin::sin;
 pub use self::sincos::sincos;
 pub use self::sincosf::sincosf;
 pub use self::sinf::sinf;
 pub use self::sinh::sinh;
 pub use self::sinhf::sinhf;
-pub use self::sqrt::sqrt;
-pub use self::sqrtf::sqrtf;
+pub use self::sqrt::{sqrt, sqrtf};
 pub use self::tan::tan;
 pub use self::tanf::tanf;
 pub use self::tanh::tanh;
 pub use self::tanhf::tanhf;
 pub use self::tgamma::tgamma;
 pub use self::tgammaf::tgammaf;
-pub use self::trunc::trunc;
-pub use self::truncf::truncf;
+pub use self::trunc::{trunc, truncf};
 
 cfg_if! {
     if #[cfg(f16_enabled)] {
         // verify-sorted-start
-        mod ceilf16;
-        mod copysignf16;
-        mod fabsf16;
-        mod fdimf16;
-        mod floorf16;
         mod fmodf16;
-        mod ldexpf16;
-        mod roundf16;
-        mod scalbnf16;
-        mod sqrtf16;
-        mod truncf16;
         // verify-sorted-end
 
         // verify-sorted-start
-        pub use self::ceilf16::ceilf16;
-        pub use self::copysignf16::copysignf16;
-        pub use self::fabsf16::fabsf16;
-        pub use self::fdimf16::fdimf16;
-        pub use self::floorf16::floorf16;
+        pub use self::ceil::ceilf16;
+        pub use self::copysign::copysignf16;
+        pub use self::fabs::fabsf16;
+        pub use self::fdim::fdimf16;
+        pub use self::floor::floorf16;
         pub use self::fmin_fmax::{fmaxf16, fminf16};
         pub use self::fminimum_fmaximum::{fmaximumf16, fminimumf16};
         pub use self::fminimum_fmaximum_num::{fmaximum_numf16, fminimum_numf16};
         pub use self::fmodf16::fmodf16;
-        pub use self::ldexpf16::ldexpf16;
+        pub use self::ldexp::ldexpf16;
         pub use self::rint::rintf16;
+        pub use self::round::roundf16;
         pub use self::roundeven::roundevenf16;
-        pub use self::roundf16::roundf16;
-        pub use self::scalbnf16::scalbnf16;
-        pub use self::sqrtf16::sqrtf16;
-        pub use self::truncf16::truncf16;
+        pub use self::scalbn::scalbnf16;
+        pub use self::sqrt::sqrtf16;
+        pub use self::trunc::truncf16;
         // verify-sorted-end
 
         #[allow(unused_imports)]
@@ -379,37 +349,27 @@ cfg_if! {
 cfg_if! {
     if #[cfg(f128_enabled)] {
         // verify-sorted-start
-        mod ceilf128;
-        mod copysignf128;
-        mod fabsf128;
-        mod fdimf128;
-        mod floorf128;
         mod fmodf128;
-        mod ldexpf128;
-        mod roundf128;
-        mod scalbnf128;
-        mod sqrtf128;
-        mod truncf128;
         // verify-sorted-end
 
         // verify-sorted-start
-        pub use self::ceilf128::ceilf128;
-        pub use self::copysignf128::copysignf128;
-        pub use self::fabsf128::fabsf128;
-        pub use self::fdimf128::fdimf128;
-        pub use self::floorf128::floorf128;
+        pub use self::ceil::ceilf128;
+        pub use self::copysign::copysignf128;
+        pub use self::fabs::fabsf128;
+        pub use self::fdim::fdimf128;
+        pub use self::floor::floorf128;
         pub use self::fma::fmaf128;
         pub use self::fmin_fmax::{fmaxf128, fminf128};
         pub use self::fminimum_fmaximum::{fmaximumf128, fminimumf128};
         pub use self::fminimum_fmaximum_num::{fmaximum_numf128, fminimum_numf128};
         pub use self::fmodf128::fmodf128;
-        pub use self::ldexpf128::ldexpf128;
+        pub use self::ldexp::ldexpf128;
         pub use self::rint::rintf128;
+        pub use self::round::roundf128;
         pub use self::roundeven::roundevenf128;
-        pub use self::roundf128::roundf128;
-        pub use self::scalbnf128::scalbnf128;
-        pub use self::sqrtf128::sqrtf128;
-        pub use self::truncf128::truncf128;
+        pub use self::scalbn::scalbnf128;
+        pub use self::sqrt::sqrtf128;
+        pub use self::trunc::truncf128;
         // verify-sorted-end
     }
 }
diff --git a/src/math/round.rs b/src/math/round.rs
index 36e0eb1f2..6cd091cd7 100644
--- a/src/math/round.rs
+++ b/src/math/round.rs
@@ -1,5 +1,25 @@
+/// Round `x` to the nearest integer, breaking ties away from zero.
+#[cfg(f16_enabled)]
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn roundf16(x: f16) -> f16 {
+    super::generic::round(x)
+}
+
+/// Round `x` to the nearest integer, breaking ties away from zero.
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn roundf(x: f32) -> f32 {
+    super::generic::round(x)
+}
+
 /// Round `x` to the nearest integer, breaking ties away from zero.
 #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
 pub fn round(x: f64) -> f64 {
     super::generic::round(x)
 }
+
+/// Round `x` to the nearest integer, breaking ties away from zero.
+#[cfg(f128_enabled)]
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn roundf128(x: f128) -> f128 {
+    super::generic::round(x)
+}
diff --git a/src/math/roundf.rs b/src/math/roundf.rs
deleted file mode 100644
index b5d7c9d69..000000000
--- a/src/math/roundf.rs
+++ /dev/null
@@ -1,5 +0,0 @@
-/// Round `x` to the nearest integer, breaking ties away from zero.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
-pub fn roundf(x: f32) -> f32 {
-    super::generic::round(x)
-}
diff --git a/src/math/roundf128.rs b/src/math/roundf128.rs
deleted file mode 100644
index fc3164929..000000000
--- a/src/math/roundf128.rs
+++ /dev/null
@@ -1,5 +0,0 @@
-/// Round `x` to the nearest integer, breaking ties away from zero.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
-pub fn roundf128(x: f128) -> f128 {
-    super::generic::round(x)
-}
diff --git a/src/math/roundf16.rs b/src/math/roundf16.rs
deleted file mode 100644
index 8b356eaab..000000000
--- a/src/math/roundf16.rs
+++ /dev/null
@@ -1,5 +0,0 @@
-/// Round `x` to the nearest integer, breaking ties away from zero.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
-pub fn roundf16(x: f16) -> f16 {
-    super::generic::round(x)
-}
diff --git a/src/math/scalbn.rs b/src/math/scalbn.rs
index f809dad51..ed73c3f94 100644
--- a/src/math/scalbn.rs
+++ b/src/math/scalbn.rs
@@ -1,4 +1,87 @@
+#[cfg(f16_enabled)]
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn scalbnf16(x: f16, n: i32) -> f16 {
+    super::generic::scalbn(x, n)
+}
+
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn scalbnf(x: f32, n: i32) -> f32 {
+    super::generic::scalbn(x, n)
+}
+
 #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
 pub fn scalbn(x: f64, n: i32) -> f64 {
     super::generic::scalbn(x, n)
 }
+
+#[cfg(f128_enabled)]
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn scalbnf128(x: f128, n: i32) -> f128 {
+    super::generic::scalbn(x, n)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::support::{CastFrom, CastInto, Float};
+
+    // Tests against N3220
+    fn spec_test<F: Float>(f: impl Fn(F, i32) -> F)
+    where
+        u32: CastInto<F::Int>,
+        F::Int: CastFrom<i32>,
+        F::Int: CastFrom<u32>,
+    {
+        // `scalbn(±0, n)` returns `±0`.
+        assert_biteq!(f(F::NEG_ZERO, 10), F::NEG_ZERO);
+        assert_biteq!(f(F::NEG_ZERO, 0), F::NEG_ZERO);
+        assert_biteq!(f(F::NEG_ZERO, -10), F::NEG_ZERO);
+        assert_biteq!(f(F::ZERO, 10), F::ZERO);
+        assert_biteq!(f(F::ZERO, 0), F::ZERO);
+        assert_biteq!(f(F::ZERO, -10), F::ZERO);
+
+        // `scalbn(x, 0)` returns `x`.
+        assert_biteq!(f(F::MIN, 0), F::MIN);
+        assert_biteq!(f(F::MAX, 0), F::MAX);
+        assert_biteq!(f(F::INFINITY, 0), F::INFINITY);
+        assert_biteq!(f(F::NEG_INFINITY, 0), F::NEG_INFINITY);
+        assert_biteq!(f(F::ZERO, 0), F::ZERO);
+        assert_biteq!(f(F::NEG_ZERO, 0), F::NEG_ZERO);
+
+        // `scalbn(±∞, n)` returns `±∞`.
+        assert_biteq!(f(F::INFINITY, 10), F::INFINITY);
+        assert_biteq!(f(F::INFINITY, -10), F::INFINITY);
+        assert_biteq!(f(F::NEG_INFINITY, 10), F::NEG_INFINITY);
+        assert_biteq!(f(F::NEG_INFINITY, -10), F::NEG_INFINITY);
+
+        // NaN should remain NaNs.
+        assert!(f(F::NAN, 10).is_nan());
+        assert!(f(F::NAN, 0).is_nan());
+        assert!(f(F::NAN, -10).is_nan());
+        assert!(f(-F::NAN, 10).is_nan());
+        assert!(f(-F::NAN, 0).is_nan());
+        assert!(f(-F::NAN, -10).is_nan());
+    }
+
+    #[test]
+    #[cfg(f16_enabled)]
+    fn spec_test_f16() {
+        spec_test::<f16>(scalbnf16);
+    }
+
+    #[test]
+    fn spec_test_f32() {
+        spec_test::<f32>(scalbnf);
+    }
+
+    #[test]
+    fn spec_test_f64() {
+        spec_test::<f64>(scalbn);
+    }
+
+    #[test]
+    #[cfg(f128_enabled)]
+    fn spec_test_f128() {
+        spec_test::<f128>(scalbnf128);
+    }
+}
diff --git a/src/math/scalbnf.rs b/src/math/scalbnf.rs
deleted file mode 100644
index 57e7ba76f..000000000
--- a/src/math/scalbnf.rs
+++ /dev/null
@@ -1,4 +0,0 @@
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
-pub fn scalbnf(x: f32, n: i32) -> f32 {
-    super::generic::scalbn(x, n)
-}
diff --git a/src/math/scalbnf128.rs b/src/math/scalbnf128.rs
deleted file mode 100644
index c1d2b4855..000000000
--- a/src/math/scalbnf128.rs
+++ /dev/null
@@ -1,4 +0,0 @@
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
-pub fn scalbnf128(x: f128, n: i32) -> f128 {
-    super::generic::scalbn(x, n)
-}
diff --git a/src/math/scalbnf16.rs b/src/math/scalbnf16.rs
deleted file mode 100644
index 2209e1a17..000000000
--- a/src/math/scalbnf16.rs
+++ /dev/null
@@ -1,4 +0,0 @@
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
-pub fn scalbnf16(x: f16, n: i32) -> f16 {
-    super::generic::scalbn(x, n)
-}
diff --git a/src/math/sqrt.rs b/src/math/sqrt.rs
index 2bfc42bcf..76bc240cf 100644
--- a/src/math/sqrt.rs
+++ b/src/math/sqrt.rs
@@ -1,3 +1,32 @@
+/// The square root of `x` (f16).
+#[cfg(f16_enabled)]
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn sqrtf16(x: f16) -> f16 {
+    select_implementation! {
+        name: sqrtf16,
+        use_arch: all(target_arch = "aarch64", target_feature = "fp16"),
+        args: x,
+    }
+
+    return super::generic::sqrt(x);
+}
+
+/// The square root of `x` (f32).
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn sqrtf(x: f32) -> f32 {
+    select_implementation! {
+        name: sqrtf,
+        use_arch: any(
+            all(target_arch = "aarch64", target_feature = "neon"),
+            all(target_arch = "wasm32", intrinsics_enabled),
+            target_feature = "sse2"
+        ),
+        args: x,
+    }
+
+    super::generic::sqrt(x)
+}
+
 /// The square root of `x` (f64).
 #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
 pub fn sqrt(x: f64) -> f64 {
@@ -13,3 +42,10 @@ pub fn sqrt(x: f64) -> f64 {
 
     super::generic::sqrt(x)
 }
+
+/// The square root of `x` (f128).
+#[cfg(f128_enabled)]
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn sqrtf128(x: f128) -> f128 {
+    return super::generic::sqrt(x);
+}
diff --git a/src/math/sqrtf.rs b/src/math/sqrtf.rs
deleted file mode 100644
index c28a705e3..000000000
--- a/src/math/sqrtf.rs
+++ /dev/null
@@ -1,15 +0,0 @@
-/// The square root of `x` (f32).
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
-pub fn sqrtf(x: f32) -> f32 {
-    select_implementation! {
-        name: sqrtf,
-        use_arch: any(
-            all(target_arch = "aarch64", target_feature = "neon"),
-            all(target_arch = "wasm32", intrinsics_enabled),
-            target_feature = "sse2"
-        ),
-        args: x,
-    }
-
-    super::generic::sqrt(x)
-}
diff --git a/src/math/sqrtf128.rs b/src/math/sqrtf128.rs
deleted file mode 100644
index eaef6ae0c..000000000
--- a/src/math/sqrtf128.rs
+++ /dev/null
@@ -1,5 +0,0 @@
-/// The square root of `x` (f128).
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
-pub fn sqrtf128(x: f128) -> f128 {
-    return super::generic::sqrt(x);
-}
diff --git a/src/math/sqrtf16.rs b/src/math/sqrtf16.rs
deleted file mode 100644
index 7bedb7f8b..000000000
--- a/src/math/sqrtf16.rs
+++ /dev/null
@@ -1,11 +0,0 @@
-/// The square root of `x` (f16).
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
-pub fn sqrtf16(x: f16) -> f16 {
-    select_implementation! {
-        name: sqrtf16,
-        use_arch: all(target_arch = "aarch64", target_feature = "fp16"),
-        args: x,
-    }
-
-    return super::generic::sqrt(x);
-}
diff --git a/src/math/trunc.rs b/src/math/trunc.rs
index 2cc8aaa7e..fa50d55e1 100644
--- a/src/math/trunc.rs
+++ b/src/math/trunc.rs
@@ -1,3 +1,26 @@
+/// Rounds the number toward 0 to the closest integral value (f16).
+///
+/// This effectively removes the decimal part of the number, leaving the integral part.
+#[cfg(f16_enabled)]
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn truncf16(x: f16) -> f16 {
+    super::generic::trunc(x)
+}
+
+/// Rounds the number toward 0 to the closest integral value (f32).
+///
+/// This effectively removes the decimal part of the number, leaving the integral part.
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn truncf(x: f32) -> f32 {
+    select_implementation! {
+        name: truncf,
+        use_arch: all(target_arch = "wasm32", intrinsics_enabled),
+        args: x,
+    }
+
+    super::generic::trunc(x)
+}
+
 /// Rounds the number toward 0 to the closest integral value (f64).
 ///
 /// This effectively removes the decimal part of the number, leaving the integral part.
@@ -11,3 +34,20 @@ pub fn trunc(x: f64) -> f64 {
 
     super::generic::trunc(x)
 }
+
+/// Rounds the number toward 0 to the closest integral value (f128).
+///
+/// This effectively removes the decimal part of the number, leaving the integral part.
+#[cfg(f128_enabled)]
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn truncf128(x: f128) -> f128 {
+    super::generic::trunc(x)
+}
+
+#[cfg(test)]
+mod tests {
+    #[test]
+    fn sanity_check() {
+        assert_eq!(super::truncf(1.1), 1.0);
+    }
+}
diff --git a/src/math/truncf.rs b/src/math/truncf.rs
deleted file mode 100644
index 14533a267..000000000
--- a/src/math/truncf.rs
+++ /dev/null
@@ -1,23 +0,0 @@
-/// Rounds the number toward 0 to the closest integral value (f32).
-///
-/// This effectively removes the decimal part of the number, leaving the integral part.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
-pub fn truncf(x: f32) -> f32 {
-    select_implementation! {
-        name: truncf,
-        use_arch: all(target_arch = "wasm32", intrinsics_enabled),
-        args: x,
-    }
-
-    super::generic::trunc(x)
-}
-
-// PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520
-#[cfg(not(target_arch = "powerpc64"))]
-#[cfg(test)]
-mod tests {
-    #[test]
-    fn sanity_check() {
-        assert_eq!(super::truncf(1.1), 1.0);
-    }
-}
diff --git a/src/math/truncf128.rs b/src/math/truncf128.rs
deleted file mode 100644
index 9dccc0d0e..000000000
--- a/src/math/truncf128.rs
+++ /dev/null
@@ -1,7 +0,0 @@
-/// Rounds the number toward 0 to the closest integral value (f128).
-///
-/// This effectively removes the decimal part of the number, leaving the integral part.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
-pub fn truncf128(x: f128) -> f128 {
-    super::generic::trunc(x)
-}
diff --git a/src/math/truncf16.rs b/src/math/truncf16.rs
deleted file mode 100644
index d7c3d225c..000000000
--- a/src/math/truncf16.rs
+++ /dev/null
@@ -1,7 +0,0 @@
-/// Rounds the number toward 0 to the closest integral value (f16).
-///
-/// This effectively removes the decimal part of the number, leaving the integral part.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
-pub fn truncf16(x: f16) -> f16 {
-    super::generic::trunc(x)
-}

From 279725db749d3c57125ef86c1c05c1d0bccb7280 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Fri, 18 Apr 2025 03:22:13 +0000
Subject: [PATCH 267/279] ci: Cancel jobs when a new push happens

Jobs should just cancel automatically, it isn't ideal that extensive
jobs can continue running for multiple hours after code has been
updated. Use a solution from [1] to do this.

[1]: https://stackoverflow.com/a/72408109/5380651
---
 .github/workflows/main.yaml | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml
index e86f936f7..93c56c9d4 100644
--- a/.github/workflows/main.yaml
+++ b/.github/workflows/main.yaml
@@ -5,6 +5,11 @@ on:
       - master
   pull_request:
 
+concurrency:
+  # Make sure that new pushes cancel running jobs
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+
 env:
   CARGO_TERM_COLOR: always
   RUSTDOCFLAGS: -Dwarnings

From e075c728ad5590ecd4cda2f437d39d9fb1fa34cd Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Fri, 18 Apr 2025 01:58:05 +0000
Subject: [PATCH 268/279] ci: Allow skipping extensive tests with `ci:
 skip-extensive`

Sometimes we do refactoring that moves things around and triggers an
extensive test, even though the implementation didn't change. There
isn't any need to run full extensive CI in these cases, so add a way to
skip it from the PR message.
---
 .github/workflows/main.yaml | 15 ++++---
 ci/ci-util.py               | 88 ++++++++++++++++++++++++++-----------
 2 files changed, 73 insertions(+), 30 deletions(-)

diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml
index 93c56c9d4..2b2891ab2 100644
--- a/.github/workflows/main.yaml
+++ b/.github/workflows/main.yaml
@@ -239,6 +239,9 @@ jobs:
     name: Calculate job matrix
     runs-on: ubuntu-24.04
     timeout-minutes: 10
+    env:
+      GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+      PR_NUMBER: ${{ github.event.pull_request.number }}
     outputs:
       matrix: ${{ steps.script.outputs.matrix }}
     steps:
@@ -267,7 +270,7 @@ jobs:
         # this is not currently possible https://github.com/actions/runner/issues/1985.
         include: ${{ fromJSON(needs.calculate_extensive_matrix.outputs.matrix).matrix }}
     env:
-      CHANGED: ${{ matrix.changed }}
+      TO_TEST: ${{ matrix.to_test }}
     steps:
       - uses: actions/checkout@v4
         with:
@@ -279,16 +282,18 @@ jobs:
       - uses: Swatinem/rust-cache@v2
       - name: Run extensive tests
         run: |
-          echo "Changed: '$CHANGED'"
-          if [ -z "$CHANGED" ]; then
+          echo "Tests to run: '$TO_TEST'"
+          if [ -z "$TO_TEST" ]; then
             echo "No tests to run, exiting."
             exit
           fi
 
+          set -x
+
           # Run the non-extensive tests first to catch any easy failures
-          cargo t --profile release-checked -- "$CHANGED"
+          cargo t --profile release-checked -- "$TO_TEST"
 
-          LIBM_EXTENSIVE_TESTS="$CHANGED" cargo t \
+          LIBM_EXTENSIVE_TESTS="$TO_TEST" cargo t \
             --features build-mpfr,unstable,force-soft-floats \
             --profile release-checked \
             -- extensive
diff --git a/ci/ci-util.py b/ci/ci-util.py
index 7464fd425..8b07dde31 100755
--- a/ci/ci-util.py
+++ b/ci/ci-util.py
@@ -6,6 +6,7 @@
 """
 
 import json
+import os
 import subprocess as sp
 import sys
 from dataclasses import dataclass
@@ -13,7 +14,7 @@
 from inspect import cleandoc
 from os import getenv
 from pathlib import Path
-from typing import TypedDict
+from typing import TypedDict, Self
 
 USAGE = cleandoc(
     """
@@ -51,6 +52,8 @@
 ARTIFACT_GLOB = "baseline-icount*"
 # Place this in a PR body to skip regression checks (must be at the start of a line).
 REGRESSION_DIRECTIVE = "ci: allow-regressions"
+# Place this in a PR body to skip extensive tests
+SKIP_EXTENSIVE_DIRECTIVE = "ci: skip-extensive"
 
 # Don't run exhaustive tests if these files change, even if they contaiin a function
 # definition.
@@ -68,6 +71,39 @@ def eprint(*args, **kwargs):
     print(*args, file=sys.stderr, **kwargs)
 
 
+@dataclass
+class PrInfo:
+    """GitHub response for PR query"""
+
+    body: str
+    commits: list[str]
+    created_at: str
+    number: int
+
+    @classmethod
+    def load(cls, pr_number: int | str) -> Self:
+        """For a given PR number, query the body and commit list"""
+        pr_info = sp.check_output(
+            [
+                "gh",
+                "pr",
+                "view",
+                str(pr_number),
+                "--json=number,commits,body,createdAt",
+                # Flatten the commit list to only hashes, change a key to snake naming
+                "--jq=.commits |= map(.oid) | .created_at = .createdAt | del(.createdAt)",
+            ],
+            text=True,
+        )
+        eprint("PR info:", json.dumps(pr_info, indent=4))
+        return cls(**json.loads(pr_info))
+
+    def contains_directive(self, directive: str) -> bool:
+        """Return true if the provided directive is on a line in the PR body"""
+        lines = self.body.splitlines()
+        return any(line.startswith(directive) for line in lines)
+
+
 class FunctionDef(TypedDict):
     """Type for an entry in `function-definitions.json`"""
 
@@ -149,7 +185,7 @@ def changed_routines(self) -> dict[str, list[str]]:
                 eprint(f"changed files for {name}: {changed}")
                 routines.add(name)
 
-        ret = {}
+        ret: dict[str, list[str]] = {}
         for r in sorted(routines):
             ret.setdefault(self.defs[r]["type"], []).append(r)
 
@@ -159,13 +195,27 @@ def make_workflow_output(self) -> str:
         """Create a JSON object a list items for each type's changed files, if any
         did change, and the routines that were affected by the change.
         """
+
+        pr_number = os.environ.get("PR_NUMBER")
+        skip_tests = False
+
+        if pr_number is not None:
+            pr = PrInfo.load(pr_number)
+            skip_tests = pr.contains_directive(SKIP_EXTENSIVE_DIRECTIVE)
+
+            if skip_tests:
+                eprint("Skipping all extensive tests")
+
         changed = self.changed_routines()
         ret = []
         for ty in TYPES:
             ty_changed = changed.get(ty, [])
+            changed_str = ",".join(ty_changed)
+
             item = {
                 "ty": ty,
-                "changed": ",".join(ty_changed),
+                "changed": changed_str,
+                "to_test": "" if skip_tests else changed_str,
             }
             ret.append(item)
         output = json.dumps({"matrix": ret}, separators=(",", ":"))
@@ -266,13 +316,13 @@ def check_iai_regressions(args: list[str]):
     found.
     """
 
-    iai_home = "iai-home"
-    pr_number = False
+    iai_home_str = "iai-home"
+    pr_number = None
 
     while len(args) > 0:
         match args:
             case ["--home", home, *rest]:
-                iai_home = home
+                iai_home_str = home
                 args = rest
             case ["--allow-pr-override", pr_num, *rest]:
                 pr_number = pr_num
@@ -281,10 +331,10 @@ def check_iai_regressions(args: list[str]):
                 eprint(USAGE)
                 exit(1)
 
-    iai_home = Path(iai_home)
+    iai_home = Path(iai_home_str)
 
     found_summaries = False
-    regressions = []
+    regressions: list[dict] = []
     for summary_path in iglob("**/summary.json", root_dir=iai_home, recursive=True):
         found_summaries = True
         with open(iai_home / summary_path, "r") as f:
@@ -292,7 +342,9 @@ def check_iai_regressions(args: list[str]):
 
         summary_regs = []
         run = summary["callgrind_summary"]["callgrind_run"]
-        name_entry = {"name": f"{summary["function_name"]}.{summary["id"]}"}
+        fname = summary["function_name"]
+        id = summary["id"]
+        name_entry = {"name": f"{fname}.{id}"}
 
         for segment in run["segments"]:
             summary_regs.extend(segment["regressions"])
@@ -312,22 +364,8 @@ def check_iai_regressions(args: list[str]):
     eprint("Found regressions:", json.dumps(regressions, indent=4))
 
     if pr_number is not None:
-        pr_info = sp.check_output(
-            [
-                "gh",
-                "pr",
-                "view",
-                str(pr_number),
-                "--json=number,commits,body,createdAt",
-                "--jq=.commits |= map(.oid)",
-            ],
-            text=True,
-        )
-        pr = json.loads(pr_info)
-        eprint("PR info:", json.dumps(pr, indent=4))
-
-        lines = pr["body"].splitlines()
-        if any(line.startswith(REGRESSION_DIRECTIVE) for line in lines):
+        pr = PrInfo.load(pr_number)
+        if pr.contains_directive(REGRESSION_DIRECTIVE):
             eprint("PR allows regressions, returning")
             return
 

From 992c2718300c4be286bd85a6a9039792493b7ce5 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Fri, 18 Apr 2025 02:20:21 +0000
Subject: [PATCH 269/279] ci: Require `ci: allow-many-extensive` if a threshold
 is exceeded

Error out when too many extensive tests would be run unless `ci:
allow-many-extensive` is in the PR description. This allows us to set a
much higher CI timeout with less risk that a 4+ hour job gets started by
accident.
---
 ci/ci-util.py | 28 +++++++++++++++++++++++++---
 1 file changed, 25 insertions(+), 3 deletions(-)

diff --git a/ci/ci-util.py b/ci/ci-util.py
index 8b07dde31..aae791d0f 100755
--- a/ci/ci-util.py
+++ b/ci/ci-util.py
@@ -54,6 +54,11 @@
 REGRESSION_DIRECTIVE = "ci: allow-regressions"
 # Place this in a PR body to skip extensive tests
 SKIP_EXTENSIVE_DIRECTIVE = "ci: skip-extensive"
+# Place this in a PR body to allow running a large number of extensive tests. If not
+# set, this script will error out if a threshold is exceeded in order to avoid
+# accidentally spending huge amounts of CI time.
+ALLOW_MANY_EXTENSIVE_DIRECTIVE = "ci: allow-many-extensive"
+MANY_EXTENSIVE_THRESHOLD = 20
 
 # Don't run exhaustive tests if these files change, even if they contaiin a function
 # definition.
@@ -198,28 +203,45 @@ def make_workflow_output(self) -> str:
 
         pr_number = os.environ.get("PR_NUMBER")
         skip_tests = False
+        error_on_many_tests = False
 
         if pr_number is not None:
             pr = PrInfo.load(pr_number)
             skip_tests = pr.contains_directive(SKIP_EXTENSIVE_DIRECTIVE)
+            error_on_many_tests = not pr.contains_directive(
+                ALLOW_MANY_EXTENSIVE_DIRECTIVE
+            )
 
             if skip_tests:
                 eprint("Skipping all extensive tests")
 
         changed = self.changed_routines()
         ret = []
+        total_to_test = 0
+
         for ty in TYPES:
             ty_changed = changed.get(ty, [])
-            changed_str = ",".join(ty_changed)
+            ty_to_test = [] if skip_tests else ty_changed
+            total_to_test += len(ty_to_test)
 
             item = {
                 "ty": ty,
-                "changed": changed_str,
-                "to_test": "" if skip_tests else changed_str,
+                "changed": ",".join(ty_changed),
+                "to_test": ",".join(ty_to_test),
             }
+
             ret.append(item)
         output = json.dumps({"matrix": ret}, separators=(",", ":"))
         eprint(f"output: {output}")
+        eprint(f"total extensive tests: {total_to_test}")
+
+        if error_on_many_tests and total_to_test > MANY_EXTENSIVE_THRESHOLD:
+            eprint(
+                f"More than {MANY_EXTENSIVE_THRESHOLD} tests would be run; add"
+                f" `{ALLOW_MANY_EXTENSIVE_DIRECTIVE}` to the PR body if this is intentional"
+            )
+            exit(1)
+
         return output
 
 

From 932898e0c18ad63bf209d8ddda2417a5196f15da Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Fri, 18 Apr 2025 01:24:38 +0000
Subject: [PATCH 270/279] ci: Increase the timeout for extensive tests

The reorganization PR has caused this to fail once before because every
file shows up as changed. Increase the timeout so this doesn't happen.

We now cancel the job if too many extensive tests are run unless `ci:
allow-many-extensive` is in the PR description, so this helps prevent
the limit being hit by accident.
---
 .github/workflows/main.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml
index 2b2891ab2..c925e63aa 100644
--- a/.github/workflows/main.yaml
+++ b/.github/workflows/main.yaml
@@ -261,7 +261,7 @@ jobs:
       - clippy
       - calculate_extensive_matrix
     runs-on: ubuntu-24.04
-    timeout-minutes: 180
+    timeout-minutes: 240 # 4 hours
     strategy:
       matrix:
         # Use the output from `calculate_extensive_matrix` to calculate the matrix

From 38bbb3be2cd9cbdfd3d0aa656f8301474b31c1ac Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Fri, 18 Apr 2025 06:15:10 +0000
Subject: [PATCH 271/279] Ensure configure.rs changes trigger rebuilds

---
 crates/compiler-builtins-smoke-test/build.rs | 1 +
 crates/libm-test/build.rs                    | 1 +
 crates/util/build.rs                         | 1 +
 3 files changed, 3 insertions(+)

diff --git a/crates/compiler-builtins-smoke-test/build.rs b/crates/compiler-builtins-smoke-test/build.rs
index 401b7e1eb..4ee5ab585 100644
--- a/crates/compiler-builtins-smoke-test/build.rs
+++ b/crates/compiler-builtins-smoke-test/build.rs
@@ -2,6 +2,7 @@
 mod configure;
 
 fn main() {
+    println!("cargo:rerun-if-changed=../../configure.rs");
     let cfg = configure::Config::from_env();
     configure::emit_libm_config(&cfg);
 }
diff --git a/crates/libm-test/build.rs b/crates/libm-test/build.rs
index 134fb11ce..d2d0df9cb 100644
--- a/crates/libm-test/build.rs
+++ b/crates/libm-test/build.rs
@@ -3,6 +3,7 @@ mod configure;
 use configure::Config;
 
 fn main() {
+    println!("cargo:rerun-if-changed=../../configure.rs");
     let cfg = Config::from_env();
     configure::emit_test_config(&cfg);
 }
diff --git a/crates/util/build.rs b/crates/util/build.rs
index 0745ef3dd..b6cceb5f1 100644
--- a/crates/util/build.rs
+++ b/crates/util/build.rs
@@ -4,6 +4,7 @@
 mod configure;
 
 fn main() {
+    println!("cargo:rerun-if-changed=../../configure.rs");
     let cfg = configure::Config::from_env();
     configure::emit_libm_config(&cfg);
 }

From 523f1517c446e61f6736fe7b87672fceda4715a4 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Fri, 18 Apr 2025 06:43:19 +0000
Subject: [PATCH 272/279] ci: Account for `PR_NUMBER` being set to an empty
 string

This is the case for CI after merge that is no longer associated with a
pull request.
---
 ci/ci-util.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ci/ci-util.py b/ci/ci-util.py
index aae791d0f..7468fd690 100755
--- a/ci/ci-util.py
+++ b/ci/ci-util.py
@@ -205,7 +205,7 @@ def make_workflow_output(self) -> str:
         skip_tests = False
         error_on_many_tests = False
 
-        if pr_number is not None:
+        if pr_number is not None and len(pr_number) > 0:
             pr = PrInfo.load(pr_number)
             skip_tests = pr.contains_directive(SKIP_EXTENSIVE_DIRECTIVE)
             error_on_many_tests = not pr.contains_directive(

From 7689428642e5035621e98f0cdda3f8852590309c Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Fri, 18 Apr 2025 07:05:13 +0000
Subject: [PATCH 273/279] Ensure all public functions are marked `no_panic`

Fixes: https://github.com/rust-lang/libm/issues/201
---
 src/math/frexpf.rs | 1 +
 src/math/j0.rs     | 2 ++
 src/math/j0f.rs    | 2 ++
 src/math/j1.rs     | 2 ++
 src/math/j1f.rs    | 2 ++
 src/math/jn.rs     | 2 ++
 src/math/jnf.rs    | 2 ++
 src/math/modf.rs   | 1 +
 src/math/modff.rs  | 1 +
 9 files changed, 15 insertions(+)

diff --git a/src/math/frexpf.rs b/src/math/frexpf.rs
index 2919c0ab0..0ec91c2d3 100644
--- a/src/math/frexpf.rs
+++ b/src/math/frexpf.rs
@@ -1,3 +1,4 @@
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
 pub fn frexpf(x: f32) -> (f32, i32) {
     let mut y = x.to_bits();
     let ee: i32 = ((y >> 23) & 0xff) as i32;
diff --git a/src/math/j0.rs b/src/math/j0.rs
index 5e5e839f8..99d656f0d 100644
--- a/src/math/j0.rs
+++ b/src/math/j0.rs
@@ -110,6 +110,7 @@ const S03: f64 = 5.13546550207318111446e-07; /* 0x3EA13B54, 0xCE84D5A9 */
 const S04: f64 = 1.16614003333790000205e-09; /* 0x3E1408BC, 0xF4745D8F */
 
 /// Zeroth order of the [Bessel function](https://en.wikipedia.org/wiki/Bessel_function) of the first kind (f64).
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
 pub fn j0(mut x: f64) -> f64 {
     let z: f64;
     let r: f64;
@@ -164,6 +165,7 @@ const V03: f64 = 2.59150851840457805467e-07; /* 0x3E91642D, 0x7FF202FD */
 const V04: f64 = 4.41110311332675467403e-10; /* 0x3DFE5018, 0x3BD6D9EF */
 
 /// Zeroth order of the [Bessel function](https://en.wikipedia.org/wiki/Bessel_function) of the second kind (f64).
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
 pub fn y0(x: f64) -> f64 {
     let z: f64;
     let u: f64;
diff --git a/src/math/j0f.rs b/src/math/j0f.rs
index afb6ee9ba..25e5b325c 100644
--- a/src/math/j0f.rs
+++ b/src/math/j0f.rs
@@ -63,6 +63,7 @@ const S03: f32 = 5.1354652442e-07; /* 0x3509daa6 */
 const S04: f32 = 1.1661400734e-09; /* 0x30a045e8 */
 
 /// Zeroth order of the [Bessel function](https://en.wikipedia.org/wiki/Bessel_function) of the first kind (f32).
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
 pub fn j0f(mut x: f32) -> f32 {
     let z: f32;
     let r: f32;
@@ -109,6 +110,7 @@ const V03: f32 = 2.5915085189e-07; /* 0x348b216c */
 const V04: f32 = 4.4111031494e-10; /* 0x2ff280c2 */
 
 /// Zeroth order of the [Bessel function](https://en.wikipedia.org/wiki/Bessel_function) of the second kind (f32).
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
 pub fn y0f(x: f32) -> f32 {
     let z: f32;
     let u: f32;
diff --git a/src/math/j1.rs b/src/math/j1.rs
index 578ae59d3..9b604d9e4 100644
--- a/src/math/j1.rs
+++ b/src/math/j1.rs
@@ -114,6 +114,7 @@ const S04: f64 = 5.04636257076217042715e-09; /* 0x3E35AC88, 0xC97DFF2C */
 const S05: f64 = 1.23542274426137913908e-11; /* 0x3DAB2ACF, 0xCFB97ED8 */
 
 /// First order of the [Bessel function](https://en.wikipedia.org/wiki/Bessel_function) of the first kind (f64).
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
 pub fn j1(x: f64) -> f64 {
     let mut z: f64;
     let r: f64;
@@ -160,6 +161,7 @@ const V0: [f64; 5] = [
 ];
 
 /// First order of the [Bessel function](https://en.wikipedia.org/wiki/Bessel_function) of the second kind (f64).
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
 pub fn y1(x: f64) -> f64 {
     let z: f64;
     let u: f64;
diff --git a/src/math/j1f.rs b/src/math/j1f.rs
index 02a3efd24..a47472401 100644
--- a/src/math/j1f.rs
+++ b/src/math/j1f.rs
@@ -64,6 +64,7 @@ const S04: f32 = 5.0463624390e-09; /* 0x31ad6446 */
 const S05: f32 = 1.2354227016e-11; /* 0x2d59567e */
 
 /// First order of the [Bessel function](https://en.wikipedia.org/wiki/Bessel_function) of the first kind (f32).
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
 pub fn j1f(x: f32) -> f32 {
     let mut z: f32;
     let r: f32;
@@ -109,6 +110,7 @@ const V0: [f32; 5] = [
 ];
 
 /// First order of the [Bessel function](https://en.wikipedia.org/wiki/Bessel_function) of the second kind (f32).
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
 pub fn y1f(x: f32) -> f32 {
     let z: f32;
     let u: f32;
diff --git a/src/math/jn.rs b/src/math/jn.rs
index d228781d1..31f8d9c53 100644
--- a/src/math/jn.rs
+++ b/src/math/jn.rs
@@ -39,6 +39,7 @@ use super::{cos, fabs, get_high_word, get_low_word, j0, j1, log, sin, sqrt, y0,
 const INVSQRTPI: f64 = 5.64189583547756279280e-01; /* 0x3FE20DD7, 0x50429B6D */
 
 /// Integer order of the [Bessel function](https://en.wikipedia.org/wiki/Bessel_function) of the first kind (f64).
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
 pub fn jn(n: i32, mut x: f64) -> f64 {
     let mut ix: u32;
     let lx: u32;
@@ -248,6 +249,7 @@ pub fn jn(n: i32, mut x: f64) -> f64 {
 }
 
 /// Integer order of the [Bessel function](https://en.wikipedia.org/wiki/Bessel_function) of the second kind (f64).
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
 pub fn yn(n: i32, x: f64) -> f64 {
     let mut ix: u32;
     let lx: u32;
diff --git a/src/math/jnf.rs b/src/math/jnf.rs
index 754f8f33b..52cf7d8a8 100644
--- a/src/math/jnf.rs
+++ b/src/math/jnf.rs
@@ -16,6 +16,7 @@
 use super::{fabsf, j0f, j1f, logf, y0f, y1f};
 
 /// Integer order of the [Bessel function](https://en.wikipedia.org/wiki/Bessel_function) of the first kind (f32).
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
 pub fn jnf(n: i32, mut x: f32) -> f32 {
     let mut ix: u32;
     let mut nm1: i32;
@@ -191,6 +192,7 @@ pub fn jnf(n: i32, mut x: f32) -> f32 {
 }
 
 /// Integer order of the [Bessel function](https://en.wikipedia.org/wiki/Bessel_function) of the second kind (f32).
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
 pub fn ynf(n: i32, x: f32) -> f32 {
     let mut ix: u32;
     let mut ib: u32;
diff --git a/src/math/modf.rs b/src/math/modf.rs
index e29e80ccf..6541862cd 100644
--- a/src/math/modf.rs
+++ b/src/math/modf.rs
@@ -1,3 +1,4 @@
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
 pub fn modf(x: f64) -> (f64, f64) {
     let rv2: f64;
     let mut u = x.to_bits();
diff --git a/src/math/modff.rs b/src/math/modff.rs
index fac60abaa..90c6bca7d 100644
--- a/src/math/modff.rs
+++ b/src/math/modff.rs
@@ -1,3 +1,4 @@
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
 pub fn modff(x: f32) -> (f32, f32) {
     let rv2: f32;
     let mut u: u32 = x.to_bits();

From 1c64b1657e5eb4284735fe4027c1942ead07776e Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Fri, 18 Apr 2025 07:15:26 +0000
Subject: [PATCH 274/279] Combine the source files for `fmod`

Since `fmod` is generic, there isn't any need to have the small wrappers
in separate files. Most operations was done in [1] but `fmod` was
omitted until now.

[1]: https://github.com/rust-lang/libm/pull/537
---
 etc/function-definitions.json |  6 +++---
 src/math/fmod.rs              | 20 ++++++++++++++++++++
 src/math/fmodf.rs             |  5 -----
 src/math/fmodf128.rs          |  5 -----
 src/math/fmodf16.rs           |  5 -----
 src/math/frexp.rs             |  1 +
 src/math/mod.rs               | 16 +++-------------
 7 files changed, 27 insertions(+), 31 deletions(-)
 delete mode 100644 src/math/fmodf.rs
 delete mode 100644 src/math/fmodf128.rs
 delete mode 100644 src/math/fmodf16.rs

diff --git a/etc/function-definitions.json b/etc/function-definitions.json
index 801e74b22..ead1f807f 100644
--- a/etc/function-definitions.json
+++ b/etc/function-definitions.json
@@ -537,21 +537,21 @@
     },
     "fmodf": {
         "sources": [
-            "src/math/fmodf.rs",
+            "src/math/fmod.rs",
             "src/math/generic/fmod.rs"
         ],
         "type": "f32"
     },
     "fmodf128": {
         "sources": [
-            "src/math/fmodf128.rs",
+            "src/math/fmod.rs",
             "src/math/generic/fmod.rs"
         ],
         "type": "f128"
     },
     "fmodf16": {
         "sources": [
-            "src/math/fmodf16.rs",
+            "src/math/fmod.rs",
             "src/math/generic/fmod.rs"
         ],
         "type": "f16"
diff --git a/src/math/fmod.rs b/src/math/fmod.rs
index d9786b53d..c4752b925 100644
--- a/src/math/fmod.rs
+++ b/src/math/fmod.rs
@@ -1,5 +1,25 @@
+/// Calculate the remainder of `x / y`, the precise result of `x - trunc(x / y) * y`.
+#[cfg(f16_enabled)]
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn fmodf16(x: f16, y: f16) -> f16 {
+    super::generic::fmod(x, y)
+}
+
+/// Calculate the remainder of `x / y`, the precise result of `x - trunc(x / y) * y`.
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn fmodf(x: f32, y: f32) -> f32 {
+    super::generic::fmod(x, y)
+}
+
 /// Calculate the remainder of `x / y`, the precise result of `x - trunc(x / y) * y`.
 #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
 pub fn fmod(x: f64, y: f64) -> f64 {
     super::generic::fmod(x, y)
 }
+
+/// Calculate the remainder of `x / y`, the precise result of `x - trunc(x / y) * y`.
+#[cfg(f128_enabled)]
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn fmodf128(x: f128, y: f128) -> f128 {
+    super::generic::fmod(x, y)
+}
diff --git a/src/math/fmodf.rs b/src/math/fmodf.rs
deleted file mode 100644
index 4e95696e2..000000000
--- a/src/math/fmodf.rs
+++ /dev/null
@@ -1,5 +0,0 @@
-/// Calculate the remainder of `x / y`, the precise result of `x - trunc(x / y) * y`.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
-pub fn fmodf(x: f32, y: f32) -> f32 {
-    super::generic::fmod(x, y)
-}
diff --git a/src/math/fmodf128.rs b/src/math/fmodf128.rs
deleted file mode 100644
index ff0e0493e..000000000
--- a/src/math/fmodf128.rs
+++ /dev/null
@@ -1,5 +0,0 @@
-/// Calculate the remainder of `x / y`, the precise result of `x - trunc(x / y) * y`.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
-pub fn fmodf128(x: f128, y: f128) -> f128 {
-    super::generic::fmod(x, y)
-}
diff --git a/src/math/fmodf16.rs b/src/math/fmodf16.rs
deleted file mode 100644
index 11972a7de..000000000
--- a/src/math/fmodf16.rs
+++ /dev/null
@@ -1,5 +0,0 @@
-/// Calculate the remainder of `x / y`, the precise result of `x - trunc(x / y) * y`.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
-pub fn fmodf16(x: f16, y: f16) -> f16 {
-    super::generic::fmod(x, y)
-}
diff --git a/src/math/frexp.rs b/src/math/frexp.rs
index badad786a..de7a64fda 100644
--- a/src/math/frexp.rs
+++ b/src/math/frexp.rs
@@ -1,3 +1,4 @@
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
 pub fn frexp(x: f64) -> (f64, i32) {
     let mut y = x.to_bits();
     let ee = ((y >> 52) & 0x7ff) as i32;
diff --git a/src/math/mod.rs b/src/math/mod.rs
index 023cf67a3..949c18b40 100644
--- a/src/math/mod.rs
+++ b/src/math/mod.rs
@@ -164,7 +164,6 @@ mod fmin_fmax;
 mod fminimum_fmaximum;
 mod fminimum_fmaximum_num;
 mod fmod;
-mod fmodf;
 mod frexp;
 mod frexpf;
 mod hypot;
@@ -260,8 +259,7 @@ pub use self::fma_wide::fmaf;
 pub use self::fmin_fmax::{fmax, fmaxf, fmin, fminf};
 pub use self::fminimum_fmaximum::{fmaximum, fmaximumf, fminimum, fminimumf};
 pub use self::fminimum_fmaximum_num::{fmaximum_num, fmaximum_numf, fminimum_num, fminimum_numf};
-pub use self::fmod::fmod;
-pub use self::fmodf::fmodf;
+pub use self::fmod::{fmod, fmodf};
 pub use self::frexp::frexp;
 pub use self::frexpf::frexpf;
 pub use self::hypot::hypot;
@@ -318,10 +316,6 @@ pub use self::trunc::{trunc, truncf};
 
 cfg_if! {
     if #[cfg(f16_enabled)] {
-        // verify-sorted-start
-        mod fmodf16;
-        // verify-sorted-end
-
         // verify-sorted-start
         pub use self::ceil::ceilf16;
         pub use self::copysign::copysignf16;
@@ -331,7 +325,7 @@ cfg_if! {
         pub use self::fmin_fmax::{fmaxf16, fminf16};
         pub use self::fminimum_fmaximum::{fmaximumf16, fminimumf16};
         pub use self::fminimum_fmaximum_num::{fmaximum_numf16, fminimum_numf16};
-        pub use self::fmodf16::fmodf16;
+        pub use self::fmod::fmodf16;
         pub use self::ldexp::ldexpf16;
         pub use self::rint::rintf16;
         pub use self::round::roundf16;
@@ -348,10 +342,6 @@ cfg_if! {
 
 cfg_if! {
     if #[cfg(f128_enabled)] {
-        // verify-sorted-start
-        mod fmodf128;
-        // verify-sorted-end
-
         // verify-sorted-start
         pub use self::ceil::ceilf128;
         pub use self::copysign::copysignf128;
@@ -362,7 +352,7 @@ cfg_if! {
         pub use self::fmin_fmax::{fmaxf128, fminf128};
         pub use self::fminimum_fmaximum::{fmaximumf128, fminimumf128};
         pub use self::fminimum_fmaximum_num::{fmaximum_numf128, fminimum_numf128};
-        pub use self::fmodf128::fmodf128;
+        pub use self::fmod::fmodf128;
         pub use self::ldexp::ldexpf128;
         pub use self::rint::rintf128;
         pub use self::round::roundf128;

From 9b25961648f3694f509a4872066d9aa2aacb0fd9 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Fri, 18 Apr 2025 19:19:24 +0000
Subject: [PATCH 275/279] Mark generic functions `#[inline]`

Benchmarks for [1] seemed to indicate that repository organization for
some reason had an effect on performance, even though the exact same
rustc commands were running (though some with a different order). After
investigating more, it appears that dependencies may have an affect on
inlining thresholds for generic functions.

It is surprising that this happens, we more or less expect that public
functions will be standalone but everything they call will be inlined.
To help ensure this, mark all generic functions `#[inline]` if they
should be merged into the public function.

Zulip discussion at [2].

[1]: https://github.com/rust-lang/libm/pull/533
[2]: https://rust-lang.zulipchat.com/#narrow/channel/182449-t-compiler.2Fhelp/topic/Dependencies.20affecting.20codegen/with/513079387
---
 src/math/fma.rs                  | 1 +
 src/math/fma_wide.rs             | 1 +
 src/math/generic/ceil.rs         | 2 ++
 src/math/generic/copysign.rs     | 1 +
 src/math/generic/fabs.rs         | 1 +
 src/math/generic/fdim.rs         | 1 +
 src/math/generic/floor.rs        | 2 ++
 src/math/generic/fmax.rs         | 2 +-
 src/math/generic/fmaximum.rs     | 1 +
 src/math/generic/fmaximum_num.rs | 1 +
 src/math/generic/fmin.rs         | 1 +
 src/math/generic/fminimum.rs     | 1 +
 src/math/generic/fminimum_num.rs | 1 +
 src/math/generic/fmod.rs         | 2 +-
 src/math/generic/mod.rs          | 3 +++
 src/math/generic/rint.rs         | 1 +
 src/math/generic/round.rs        | 1 +
 src/math/generic/scalbn.rs       | 1 +
 src/math/generic/sqrt.rs         | 2 ++
 src/math/generic/trunc.rs        | 2 ++
 src/math/roundeven.rs            | 1 +
 21 files changed, 27 insertions(+), 2 deletions(-)

diff --git a/src/math/fma.rs b/src/math/fma.rs
index 789b0836a..e0b3347ac 100644
--- a/src/math/fma.rs
+++ b/src/math/fma.rs
@@ -29,6 +29,7 @@ pub fn fmaf128(x: f128, y: f128, z: f128) -> f128 {
 
 /// Fused multiply-add that works when there is not a larger float size available. Computes
 /// `(x * y) + z`.
+#[inline]
 pub fn fma_round<F>(x: F, y: F, z: F, _round: Round) -> FpResult<F>
 where
     F: Float,
diff --git a/src/math/fma_wide.rs b/src/math/fma_wide.rs
index 8e908a14f..08b78b022 100644
--- a/src/math/fma_wide.rs
+++ b/src/math/fma_wide.rs
@@ -28,6 +28,7 @@ pub fn fmaf(x: f32, y: f32, z: f32) -> f32 {
 
 /// Fma implementation when a hardware-backed larger float type is available. For `f32` and `f64`,
 /// `f64` has enough precision to represent the `f32` in its entirety, except for double rounding.
+#[inline]
 pub fn fma_wide_round<F, B>(x: F, y: F, z: F, round: Round) -> FpResult<F>
 where
     F: Float + HFloat<D = B>,
diff --git a/src/math/generic/ceil.rs b/src/math/generic/ceil.rs
index bf7e1d8e2..5c5bb4763 100644
--- a/src/math/generic/ceil.rs
+++ b/src/math/generic/ceil.rs
@@ -10,10 +10,12 @@
 use super::super::support::{FpResult, Status};
 use super::super::{Float, Int, IntTy, MinInt};
 
+#[inline]
 pub fn ceil<F: Float>(x: F) -> F {
     ceil_status(x).val
 }
 
+#[inline]
 pub fn ceil_status<F: Float>(x: F) -> FpResult<F> {
     let zero = IntTy::<F>::ZERO;
 
diff --git a/src/math/generic/copysign.rs b/src/math/generic/copysign.rs
index 04864a359..a61af22f0 100644
--- a/src/math/generic/copysign.rs
+++ b/src/math/generic/copysign.rs
@@ -1,6 +1,7 @@
 use super::super::Float;
 
 /// Copy the sign of `y` to `x`.
+#[inline]
 pub fn copysign<F: Float>(x: F, y: F) -> F {
     let mut ux = x.to_bits();
     let uy = y.to_bits();
diff --git a/src/math/generic/fabs.rs b/src/math/generic/fabs.rs
index 75b473107..0fa0edf9b 100644
--- a/src/math/generic/fabs.rs
+++ b/src/math/generic/fabs.rs
@@ -1,6 +1,7 @@
 use super::super::Float;
 
 /// Absolute value.
+#[inline]
 pub fn fabs<F: Float>(x: F) -> F {
     let abs_mask = !F::SIGN_MASK;
     F::from_bits(x.to_bits() & abs_mask)
diff --git a/src/math/generic/fdim.rs b/src/math/generic/fdim.rs
index bf971cd7d..a63007b19 100644
--- a/src/math/generic/fdim.rs
+++ b/src/math/generic/fdim.rs
@@ -1,5 +1,6 @@
 use super::super::Float;
 
+#[inline]
 pub fn fdim<F: Float>(x: F, y: F) -> F {
     if x <= y { F::ZERO } else { x - y }
 }
diff --git a/src/math/generic/floor.rs b/src/math/generic/floor.rs
index 779955164..243804625 100644
--- a/src/math/generic/floor.rs
+++ b/src/math/generic/floor.rs
@@ -10,10 +10,12 @@
 use super::super::support::{FpResult, Status};
 use super::super::{Float, Int, IntTy, MinInt};
 
+#[inline]
 pub fn floor<F: Float>(x: F) -> F {
     floor_status(x).val
 }
 
+#[inline]
 pub fn floor_status<F: Float>(x: F) -> FpResult<F> {
     let zero = IntTy::<F>::ZERO;
 
diff --git a/src/math/generic/fmax.rs b/src/math/generic/fmax.rs
index 29a031100..bf3f847e8 100644
--- a/src/math/generic/fmax.rs
+++ b/src/math/generic/fmax.rs
@@ -16,7 +16,7 @@
 
 use super::super::Float;
 
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[inline]
 pub fn fmax<F: Float>(x: F, y: F) -> F {
     let res = if x.is_nan() || x < y { y } else { x };
     // Canonicalize
diff --git a/src/math/generic/fmaximum.rs b/src/math/generic/fmaximum.rs
index 9e8d1739f..387055af2 100644
--- a/src/math/generic/fmaximum.rs
+++ b/src/math/generic/fmaximum.rs
@@ -11,6 +11,7 @@
 
 use super::super::Float;
 
+#[inline]
 pub fn fmaximum<F: Float>(x: F, y: F) -> F {
     let res = if x.is_nan() {
         x
diff --git a/src/math/generic/fmaximum_num.rs b/src/math/generic/fmaximum_num.rs
index 756ef5d9f..f7efdde80 100644
--- a/src/math/generic/fmaximum_num.rs
+++ b/src/math/generic/fmaximum_num.rs
@@ -13,6 +13,7 @@
 
 use super::super::Float;
 
+#[inline]
 pub fn fmaximum_num<F: Float>(x: F, y: F) -> F {
     let res =
         if x.is_nan() || x < y || (x.to_bits() == F::NEG_ZERO.to_bits() && y.is_sign_positive()) {
diff --git a/src/math/generic/fmin.rs b/src/math/generic/fmin.rs
index 69fbf85a1..cd3caeee4 100644
--- a/src/math/generic/fmin.rs
+++ b/src/math/generic/fmin.rs
@@ -16,6 +16,7 @@
 
 use super::super::Float;
 
+#[inline]
 pub fn fmin<F: Float>(x: F, y: F) -> F {
     let res = if y.is_nan() || x < y { x } else { y };
     // Canonicalize
diff --git a/src/math/generic/fminimum.rs b/src/math/generic/fminimum.rs
index ee5493880..4ddb36455 100644
--- a/src/math/generic/fminimum.rs
+++ b/src/math/generic/fminimum.rs
@@ -11,6 +11,7 @@
 
 use super::super::Float;
 
+#[inline]
 pub fn fminimum<F: Float>(x: F, y: F) -> F {
     let res = if x.is_nan() {
         x
diff --git a/src/math/generic/fminimum_num.rs b/src/math/generic/fminimum_num.rs
index 966618328..441c204a9 100644
--- a/src/math/generic/fminimum_num.rs
+++ b/src/math/generic/fminimum_num.rs
@@ -13,6 +13,7 @@
 
 use super::super::Float;
 
+#[inline]
 pub fn fminimum_num<F: Float>(x: F, y: F) -> F {
     let res =
         if y.is_nan() || x < y || (x.to_bits() == F::NEG_ZERO.to_bits() && y.is_sign_positive()) {
diff --git a/src/math/generic/fmod.rs b/src/math/generic/fmod.rs
index cd23350ea..6414bbd25 100644
--- a/src/math/generic/fmod.rs
+++ b/src/math/generic/fmod.rs
@@ -3,7 +3,7 @@
 
 use super::super::{CastFrom, Float, Int, MinInt};
 
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[inline]
 pub fn fmod<F: Float>(x: F, y: F) -> F {
     let zero = F::Int::ZERO;
     let one = F::Int::ONE;
diff --git a/src/math/generic/mod.rs b/src/math/generic/mod.rs
index 9be185f80..35846351a 100644
--- a/src/math/generic/mod.rs
+++ b/src/math/generic/mod.rs
@@ -1,3 +1,6 @@
+// Note: generic functions are marked `#[inline]` because, even though generic functions are
+// typically inlined, this does not seem to always be the case.
+
 mod ceil;
 mod copysign;
 mod fabs;
diff --git a/src/math/generic/rint.rs b/src/math/generic/rint.rs
index 45d2f3138..9cdeb1185 100644
--- a/src/math/generic/rint.rs
+++ b/src/math/generic/rint.rs
@@ -6,6 +6,7 @@ use super::super::support::{FpResult, Round};
 
 /// IEEE 754-2019 `roundToIntegralExact`, which respects rounding mode and raises inexact if
 /// applicable.
+#[inline]
 pub fn rint_round<F: Float>(x: F, _round: Round) -> FpResult<F> {
     let toint = F::ONE / F::EPSILON;
     let e = x.ex();
diff --git a/src/math/generic/round.rs b/src/math/generic/round.rs
index 8b5138188..01314ac70 100644
--- a/src/math/generic/round.rs
+++ b/src/math/generic/round.rs
@@ -1,6 +1,7 @@
 use super::super::{Float, MinInt};
 use super::{copysign, trunc};
 
+#[inline]
 pub fn round<F: Float>(x: F) -> F {
     let f0p5 = F::from_parts(false, F::EXP_BIAS - 1, F::Int::ZERO); // 0.5
     let f0p25 = F::from_parts(false, F::EXP_BIAS - 2, F::Int::ZERO); // 0.25
diff --git a/src/math/generic/scalbn.rs b/src/math/generic/scalbn.rs
index b2696e5cc..a45db1b4a 100644
--- a/src/math/generic/scalbn.rs
+++ b/src/math/generic/scalbn.rs
@@ -16,6 +16,7 @@ use super::super::{CastFrom, CastInto, Float, IntTy, MinInt};
 /// >
 /// > If the calculation does not overflow or underflow, the returned value is exact and
 /// > independent of the current rounding direction mode.
+#[inline]
 pub fn scalbn<F: Float>(mut x: F, mut n: i32) -> F
 where
     u32: CastInto<F::Int>,
diff --git a/src/math/generic/sqrt.rs b/src/math/generic/sqrt.rs
index 5918025bc..ec9ff22df 100644
--- a/src/math/generic/sqrt.rs
+++ b/src/math/generic/sqrt.rs
@@ -44,6 +44,7 @@
 use super::super::support::{FpResult, IntTy, Round, Status, cold_path};
 use super::super::{CastFrom, CastInto, DInt, Float, HInt, Int, MinInt};
 
+#[inline]
 pub fn sqrt<F>(x: F) -> F
 where
     F: Float + SqrtHelper,
@@ -57,6 +58,7 @@ where
     sqrt_round(x, Round::Nearest).val
 }
 
+#[inline]
 pub fn sqrt_round<F>(x: F, _round: Round) -> FpResult<F>
 where
     F: Float + SqrtHelper,
diff --git a/src/math/generic/trunc.rs b/src/math/generic/trunc.rs
index 0fb3fa5ad..25414ecf4 100644
--- a/src/math/generic/trunc.rs
+++ b/src/math/generic/trunc.rs
@@ -4,10 +4,12 @@
 use super::super::support::{FpResult, Status};
 use super::super::{Float, Int, IntTy, MinInt};
 
+#[inline]
 pub fn trunc<F: Float>(x: F) -> F {
     trunc_status(x).val
 }
 
+#[inline]
 pub fn trunc_status<F: Float>(x: F) -> FpResult<F> {
     let mut xi: F::Int = x.to_bits();
     let e: i32 = x.exp_unbiased();
diff --git a/src/math/roundeven.rs b/src/math/roundeven.rs
index ec1738285..6e621d762 100644
--- a/src/math/roundeven.rs
+++ b/src/math/roundeven.rs
@@ -30,6 +30,7 @@ pub fn roundevenf128(x: f128) -> f128 {
     roundeven_impl(x)
 }
 
+#[inline]
 pub fn roundeven_impl<F: Float>(x: F) -> F {
     super::generic::rint_round(x, Round::Nearest).val
 }

From 5ba41f051e4c4c70e960bbbc5c181c414f6442c2 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Wed, 9 Apr 2025 01:21:33 +0000
Subject: [PATCH 276/279] refactor: Move the `libm` crate to a subdirectory

In preparation for switching to a virtual manifest, move the `libm`
crate into a subdirectory and update paths to match.

Updating `Cargo.toml` is done in the next commit so git tracks the moved
file correctly.
---
 .github/workflows/main.yaml                   |  2 +-
 ci/ci-util.py                                 |  6 +--
 crates/compiler-builtins-smoke-test/build.rs  |  4 +-
 .../compiler-builtins-smoke-test/src/math.rs  |  2 +-
 crates/libm-test/Cargo.toml                   |  2 +-
 crates/libm-test/build.rs                     |  4 +-
 crates/musl-math-sys/Cargo.toml               |  2 +-
 crates/util/Cargo.toml                        |  2 +-
 crates/util/build.rs                          |  4 +-
 etc/update-api-list.py                        | 18 ++++-----
 Cargo.toml => libm/Cargo.toml                 |  0
 build.rs => libm/build.rs                     |  1 +
 configure.rs => libm/configure.rs             |  0
 {src => libm/src}/lib.rs                      |  0
 {src => libm/src}/libm_helper.rs              |  0
 {src => libm/src}/math/acos.rs                |  0
 {src => libm/src}/math/acosf.rs               |  0
 {src => libm/src}/math/acosh.rs               |  0
 {src => libm/src}/math/acoshf.rs              |  0
 {src => libm/src}/math/arch/aarch64.rs        |  0
 {src => libm/src}/math/arch/i586.rs           |  0
 {src => libm/src}/math/arch/i686.rs           |  0
 {src => libm/src}/math/arch/mod.rs            |  0
 {src => libm/src}/math/arch/wasm32.rs         |  0
 {src => libm/src}/math/asin.rs                |  0
 {src => libm/src}/math/asinf.rs               |  0
 {src => libm/src}/math/asinh.rs               |  0
 {src => libm/src}/math/asinhf.rs              |  0
 {src => libm/src}/math/atan.rs                |  0
 {src => libm/src}/math/atan2.rs               |  0
 {src => libm/src}/math/atan2f.rs              |  0
 {src => libm/src}/math/atanf.rs               |  0
 {src => libm/src}/math/atanh.rs               |  0
 {src => libm/src}/math/atanhf.rs              |  0
 {src => libm/src}/math/cbrt.rs                |  0
 {src => libm/src}/math/cbrtf.rs               |  0
 {src => libm/src}/math/ceil.rs                |  0
 {src => libm/src}/math/copysign.rs            |  0
 libm/src/math/copysignf.rs                    |  8 ++++
 libm/src/math/copysignf128.rs                 |  8 ++++
 libm/src/math/copysignf16.rs                  |  8 ++++
 {src => libm/src}/math/cos.rs                 |  0
 {src => libm/src}/math/cosf.rs                |  0
 {src => libm/src}/math/cosh.rs                |  0
 {src => libm/src}/math/coshf.rs               |  0
 {src => libm/src}/math/erf.rs                 |  0
 {src => libm/src}/math/erff.rs                |  0
 {src => libm/src}/math/exp.rs                 |  0
 {src => libm/src}/math/exp10.rs               |  0
 {src => libm/src}/math/exp10f.rs              |  0
 {src => libm/src}/math/exp2.rs                |  0
 {src => libm/src}/math/exp2f.rs               |  0
 {src => libm/src}/math/expf.rs                |  0
 {src => libm/src}/math/expm1.rs               |  0
 {src => libm/src}/math/expm1f.rs              |  0
 {src => libm/src}/math/expo2.rs               |  0
 {src => libm/src}/math/fabs.rs                |  0
 libm/src/math/fabsf.rs                        | 39 +++++++++++++++++++
 libm/src/math/fabsf128.rs                     | 31 +++++++++++++++
 libm/src/math/fabsf16.rs                      | 31 +++++++++++++++
 {src => libm/src}/math/fdim.rs                |  0
 libm/src/math/fdimf.rs                        | 12 ++++++
 libm/src/math/fdimf128.rs                     | 12 ++++++
 libm/src/math/fdimf16.rs                      | 12 ++++++
 {src => libm/src}/math/floor.rs               |  0
 libm/src/math/floorf.rs                       | 13 +++++++
 libm/src/math/floorf128.rs                    |  7 ++++
 libm/src/math/floorf16.rs                     |  7 ++++
 {src => libm/src}/math/fma.rs                 |  0
 {src => libm/src}/math/fma_wide.rs            |  0
 {src => libm/src}/math/fmin_fmax.rs           |  0
 {src => libm/src}/math/fminimum_fmaximum.rs   |  0
 .../src}/math/fminimum_fmaximum_num.rs        |  0
 {src => libm/src}/math/fmod.rs                |  0
 libm/src/math/fmodf.rs                        |  5 +++
 libm/src/math/fmodf128.rs                     |  5 +++
 libm/src/math/fmodf16.rs                      |  5 +++
 {src => libm/src}/math/frexp.rs               |  0
 {src => libm/src}/math/frexpf.rs              |  0
 {src => libm/src}/math/generic/ceil.rs        |  0
 {src => libm/src}/math/generic/copysign.rs    |  0
 {src => libm/src}/math/generic/fabs.rs        |  0
 {src => libm/src}/math/generic/fdim.rs        |  0
 {src => libm/src}/math/generic/floor.rs       |  0
 {src => libm/src}/math/generic/fmax.rs        |  0
 {src => libm/src}/math/generic/fmaximum.rs    |  0
 .../src}/math/generic/fmaximum_num.rs         |  0
 {src => libm/src}/math/generic/fmin.rs        |  0
 {src => libm/src}/math/generic/fminimum.rs    |  0
 .../src}/math/generic/fminimum_num.rs         |  0
 {src => libm/src}/math/generic/fmod.rs        |  0
 {src => libm/src}/math/generic/mod.rs         |  0
 {src => libm/src}/math/generic/rint.rs        |  0
 {src => libm/src}/math/generic/round.rs       |  0
 {src => libm/src}/math/generic/scalbn.rs      |  0
 {src => libm/src}/math/generic/sqrt.rs        |  0
 {src => libm/src}/math/generic/trunc.rs       |  0
 {src => libm/src}/math/hypot.rs               |  0
 {src => libm/src}/math/hypotf.rs              |  0
 {src => libm/src}/math/ilogb.rs               |  0
 {src => libm/src}/math/ilogbf.rs              |  0
 {src => libm/src}/math/j0.rs                  |  0
 {src => libm/src}/math/j0f.rs                 |  0
 {src => libm/src}/math/j1.rs                  |  0
 {src => libm/src}/math/j1f.rs                 |  0
 {src => libm/src}/math/jn.rs                  |  0
 {src => libm/src}/math/jnf.rs                 |  0
 {src => libm/src}/math/k_cos.rs               |  0
 {src => libm/src}/math/k_cosf.rs              |  0
 {src => libm/src}/math/k_expo2.rs             |  0
 {src => libm/src}/math/k_expo2f.rs            |  0
 {src => libm/src}/math/k_sin.rs               |  0
 {src => libm/src}/math/k_sinf.rs              |  0
 {src => libm/src}/math/k_tan.rs               |  0
 {src => libm/src}/math/k_tanf.rs              |  0
 {src => libm/src}/math/ldexp.rs               |  0
 libm/src/math/ldexpf.rs                       |  4 ++
 libm/src/math/ldexpf128.rs                    |  4 ++
 libm/src/math/ldexpf16.rs                     |  4 ++
 {src => libm/src}/math/lgamma.rs              |  0
 {src => libm/src}/math/lgamma_r.rs            |  0
 {src => libm/src}/math/lgammaf.rs             |  0
 {src => libm/src}/math/lgammaf_r.rs           |  0
 {src => libm/src}/math/log.rs                 |  0
 {src => libm/src}/math/log10.rs               |  0
 {src => libm/src}/math/log10f.rs              |  0
 {src => libm/src}/math/log1p.rs               |  0
 {src => libm/src}/math/log1pf.rs              |  0
 {src => libm/src}/math/log2.rs                |  0
 {src => libm/src}/math/log2f.rs               |  0
 {src => libm/src}/math/logf.rs                |  0
 {src => libm/src}/math/mod.rs                 |  0
 {src => libm/src}/math/modf.rs                |  0
 {src => libm/src}/math/modff.rs               |  0
 {src => libm/src}/math/nextafter.rs           |  0
 {src => libm/src}/math/nextafterf.rs          |  0
 {src => libm/src}/math/pow.rs                 |  0
 {src => libm/src}/math/powf.rs                |  0
 {src => libm/src}/math/rem_pio2.rs            |  0
 {src => libm/src}/math/rem_pio2_large.rs      |  0
 {src => libm/src}/math/rem_pio2f.rs           |  0
 {src => libm/src}/math/remainder.rs           |  0
 {src => libm/src}/math/remainderf.rs          |  0
 {src => libm/src}/math/remquo.rs              |  0
 {src => libm/src}/math/remquof.rs             |  0
 {src => libm/src}/math/rint.rs                |  0
 {src => libm/src}/math/round.rs               |  0
 {src => libm/src}/math/roundeven.rs           |  0
 libm/src/math/roundf.rs                       |  5 +++
 libm/src/math/roundf128.rs                    |  5 +++
 libm/src/math/roundf16.rs                     |  5 +++
 {src => libm/src}/math/scalbn.rs              |  0
 libm/src/math/scalbnf.rs                      |  4 ++
 libm/src/math/scalbnf128.rs                   |  4 ++
 libm/src/math/scalbnf16.rs                    |  4 ++
 {src => libm/src}/math/sin.rs                 |  0
 {src => libm/src}/math/sincos.rs              |  0
 {src => libm/src}/math/sincosf.rs             |  0
 {src => libm/src}/math/sinf.rs                |  0
 {src => libm/src}/math/sinh.rs                |  0
 {src => libm/src}/math/sinhf.rs               |  0
 {src => libm/src}/math/sqrt.rs                |  0
 libm/src/math/sqrtf.rs                        | 15 +++++++
 libm/src/math/sqrtf128.rs                     |  5 +++
 libm/src/math/sqrtf16.rs                      | 11 ++++++
 {src => libm/src}/math/support/big.rs         |  0
 {src => libm/src}/math/support/big/tests.rs   |  0
 {src => libm/src}/math/support/env.rs         |  0
 .../src}/math/support/float_traits.rs         |  0
 {src => libm/src}/math/support/hex_float.rs   |  0
 {src => libm/src}/math/support/int_traits.rs  |  0
 {src => libm/src}/math/support/macros.rs      |  0
 {src => libm/src}/math/support/mod.rs         |  0
 {src => libm/src}/math/tan.rs                 |  0
 {src => libm/src}/math/tanf.rs                |  0
 {src => libm/src}/math/tanh.rs                |  0
 {src => libm/src}/math/tanhf.rs               |  0
 {src => libm/src}/math/tgamma.rs              |  0
 {src => libm/src}/math/tgammaf.rs             |  0
 {src => libm/src}/math/trunc.rs               |  0
 libm/src/math/truncf.rs                       | 23 +++++++++++
 libm/src/math/truncf128.rs                    |  7 ++++
 libm/src/math/truncf16.rs                     |  7 ++++
 183 files changed, 334 insertions(+), 23 deletions(-)
 rename Cargo.toml => libm/Cargo.toml (100%)
 rename build.rs => libm/build.rs (88%)
 rename configure.rs => libm/configure.rs (100%)
 rename {src => libm/src}/lib.rs (100%)
 rename {src => libm/src}/libm_helper.rs (100%)
 rename {src => libm/src}/math/acos.rs (100%)
 rename {src => libm/src}/math/acosf.rs (100%)
 rename {src => libm/src}/math/acosh.rs (100%)
 rename {src => libm/src}/math/acoshf.rs (100%)
 rename {src => libm/src}/math/arch/aarch64.rs (100%)
 rename {src => libm/src}/math/arch/i586.rs (100%)
 rename {src => libm/src}/math/arch/i686.rs (100%)
 rename {src => libm/src}/math/arch/mod.rs (100%)
 rename {src => libm/src}/math/arch/wasm32.rs (100%)
 rename {src => libm/src}/math/asin.rs (100%)
 rename {src => libm/src}/math/asinf.rs (100%)
 rename {src => libm/src}/math/asinh.rs (100%)
 rename {src => libm/src}/math/asinhf.rs (100%)
 rename {src => libm/src}/math/atan.rs (100%)
 rename {src => libm/src}/math/atan2.rs (100%)
 rename {src => libm/src}/math/atan2f.rs (100%)
 rename {src => libm/src}/math/atanf.rs (100%)
 rename {src => libm/src}/math/atanh.rs (100%)
 rename {src => libm/src}/math/atanhf.rs (100%)
 rename {src => libm/src}/math/cbrt.rs (100%)
 rename {src => libm/src}/math/cbrtf.rs (100%)
 rename {src => libm/src}/math/ceil.rs (100%)
 rename {src => libm/src}/math/copysign.rs (100%)
 create mode 100644 libm/src/math/copysignf.rs
 create mode 100644 libm/src/math/copysignf128.rs
 create mode 100644 libm/src/math/copysignf16.rs
 rename {src => libm/src}/math/cos.rs (100%)
 rename {src => libm/src}/math/cosf.rs (100%)
 rename {src => libm/src}/math/cosh.rs (100%)
 rename {src => libm/src}/math/coshf.rs (100%)
 rename {src => libm/src}/math/erf.rs (100%)
 rename {src => libm/src}/math/erff.rs (100%)
 rename {src => libm/src}/math/exp.rs (100%)
 rename {src => libm/src}/math/exp10.rs (100%)
 rename {src => libm/src}/math/exp10f.rs (100%)
 rename {src => libm/src}/math/exp2.rs (100%)
 rename {src => libm/src}/math/exp2f.rs (100%)
 rename {src => libm/src}/math/expf.rs (100%)
 rename {src => libm/src}/math/expm1.rs (100%)
 rename {src => libm/src}/math/expm1f.rs (100%)
 rename {src => libm/src}/math/expo2.rs (100%)
 rename {src => libm/src}/math/fabs.rs (100%)
 create mode 100644 libm/src/math/fabsf.rs
 create mode 100644 libm/src/math/fabsf128.rs
 create mode 100644 libm/src/math/fabsf16.rs
 rename {src => libm/src}/math/fdim.rs (100%)
 create mode 100644 libm/src/math/fdimf.rs
 create mode 100644 libm/src/math/fdimf128.rs
 create mode 100644 libm/src/math/fdimf16.rs
 rename {src => libm/src}/math/floor.rs (100%)
 create mode 100644 libm/src/math/floorf.rs
 create mode 100644 libm/src/math/floorf128.rs
 create mode 100644 libm/src/math/floorf16.rs
 rename {src => libm/src}/math/fma.rs (100%)
 rename {src => libm/src}/math/fma_wide.rs (100%)
 rename {src => libm/src}/math/fmin_fmax.rs (100%)
 rename {src => libm/src}/math/fminimum_fmaximum.rs (100%)
 rename {src => libm/src}/math/fminimum_fmaximum_num.rs (100%)
 rename {src => libm/src}/math/fmod.rs (100%)
 create mode 100644 libm/src/math/fmodf.rs
 create mode 100644 libm/src/math/fmodf128.rs
 create mode 100644 libm/src/math/fmodf16.rs
 rename {src => libm/src}/math/frexp.rs (100%)
 rename {src => libm/src}/math/frexpf.rs (100%)
 rename {src => libm/src}/math/generic/ceil.rs (100%)
 rename {src => libm/src}/math/generic/copysign.rs (100%)
 rename {src => libm/src}/math/generic/fabs.rs (100%)
 rename {src => libm/src}/math/generic/fdim.rs (100%)
 rename {src => libm/src}/math/generic/floor.rs (100%)
 rename {src => libm/src}/math/generic/fmax.rs (100%)
 rename {src => libm/src}/math/generic/fmaximum.rs (100%)
 rename {src => libm/src}/math/generic/fmaximum_num.rs (100%)
 rename {src => libm/src}/math/generic/fmin.rs (100%)
 rename {src => libm/src}/math/generic/fminimum.rs (100%)
 rename {src => libm/src}/math/generic/fminimum_num.rs (100%)
 rename {src => libm/src}/math/generic/fmod.rs (100%)
 rename {src => libm/src}/math/generic/mod.rs (100%)
 rename {src => libm/src}/math/generic/rint.rs (100%)
 rename {src => libm/src}/math/generic/round.rs (100%)
 rename {src => libm/src}/math/generic/scalbn.rs (100%)
 rename {src => libm/src}/math/generic/sqrt.rs (100%)
 rename {src => libm/src}/math/generic/trunc.rs (100%)
 rename {src => libm/src}/math/hypot.rs (100%)
 rename {src => libm/src}/math/hypotf.rs (100%)
 rename {src => libm/src}/math/ilogb.rs (100%)
 rename {src => libm/src}/math/ilogbf.rs (100%)
 rename {src => libm/src}/math/j0.rs (100%)
 rename {src => libm/src}/math/j0f.rs (100%)
 rename {src => libm/src}/math/j1.rs (100%)
 rename {src => libm/src}/math/j1f.rs (100%)
 rename {src => libm/src}/math/jn.rs (100%)
 rename {src => libm/src}/math/jnf.rs (100%)
 rename {src => libm/src}/math/k_cos.rs (100%)
 rename {src => libm/src}/math/k_cosf.rs (100%)
 rename {src => libm/src}/math/k_expo2.rs (100%)
 rename {src => libm/src}/math/k_expo2f.rs (100%)
 rename {src => libm/src}/math/k_sin.rs (100%)
 rename {src => libm/src}/math/k_sinf.rs (100%)
 rename {src => libm/src}/math/k_tan.rs (100%)
 rename {src => libm/src}/math/k_tanf.rs (100%)
 rename {src => libm/src}/math/ldexp.rs (100%)
 create mode 100644 libm/src/math/ldexpf.rs
 create mode 100644 libm/src/math/ldexpf128.rs
 create mode 100644 libm/src/math/ldexpf16.rs
 rename {src => libm/src}/math/lgamma.rs (100%)
 rename {src => libm/src}/math/lgamma_r.rs (100%)
 rename {src => libm/src}/math/lgammaf.rs (100%)
 rename {src => libm/src}/math/lgammaf_r.rs (100%)
 rename {src => libm/src}/math/log.rs (100%)
 rename {src => libm/src}/math/log10.rs (100%)
 rename {src => libm/src}/math/log10f.rs (100%)
 rename {src => libm/src}/math/log1p.rs (100%)
 rename {src => libm/src}/math/log1pf.rs (100%)
 rename {src => libm/src}/math/log2.rs (100%)
 rename {src => libm/src}/math/log2f.rs (100%)
 rename {src => libm/src}/math/logf.rs (100%)
 rename {src => libm/src}/math/mod.rs (100%)
 rename {src => libm/src}/math/modf.rs (100%)
 rename {src => libm/src}/math/modff.rs (100%)
 rename {src => libm/src}/math/nextafter.rs (100%)
 rename {src => libm/src}/math/nextafterf.rs (100%)
 rename {src => libm/src}/math/pow.rs (100%)
 rename {src => libm/src}/math/powf.rs (100%)
 rename {src => libm/src}/math/rem_pio2.rs (100%)
 rename {src => libm/src}/math/rem_pio2_large.rs (100%)
 rename {src => libm/src}/math/rem_pio2f.rs (100%)
 rename {src => libm/src}/math/remainder.rs (100%)
 rename {src => libm/src}/math/remainderf.rs (100%)
 rename {src => libm/src}/math/remquo.rs (100%)
 rename {src => libm/src}/math/remquof.rs (100%)
 rename {src => libm/src}/math/rint.rs (100%)
 rename {src => libm/src}/math/round.rs (100%)
 rename {src => libm/src}/math/roundeven.rs (100%)
 create mode 100644 libm/src/math/roundf.rs
 create mode 100644 libm/src/math/roundf128.rs
 create mode 100644 libm/src/math/roundf16.rs
 rename {src => libm/src}/math/scalbn.rs (100%)
 create mode 100644 libm/src/math/scalbnf.rs
 create mode 100644 libm/src/math/scalbnf128.rs
 create mode 100644 libm/src/math/scalbnf16.rs
 rename {src => libm/src}/math/sin.rs (100%)
 rename {src => libm/src}/math/sincos.rs (100%)
 rename {src => libm/src}/math/sincosf.rs (100%)
 rename {src => libm/src}/math/sinf.rs (100%)
 rename {src => libm/src}/math/sinh.rs (100%)
 rename {src => libm/src}/math/sinhf.rs (100%)
 rename {src => libm/src}/math/sqrt.rs (100%)
 create mode 100644 libm/src/math/sqrtf.rs
 create mode 100644 libm/src/math/sqrtf128.rs
 create mode 100644 libm/src/math/sqrtf16.rs
 rename {src => libm/src}/math/support/big.rs (100%)
 rename {src => libm/src}/math/support/big/tests.rs (100%)
 rename {src => libm/src}/math/support/env.rs (100%)
 rename {src => libm/src}/math/support/float_traits.rs (100%)
 rename {src => libm/src}/math/support/hex_float.rs (100%)
 rename {src => libm/src}/math/support/int_traits.rs (100%)
 rename {src => libm/src}/math/support/macros.rs (100%)
 rename {src => libm/src}/math/support/mod.rs (100%)
 rename {src => libm/src}/math/tan.rs (100%)
 rename {src => libm/src}/math/tanf.rs (100%)
 rename {src => libm/src}/math/tanh.rs (100%)
 rename {src => libm/src}/math/tanhf.rs (100%)
 rename {src => libm/src}/math/tgamma.rs (100%)
 rename {src => libm/src}/math/tgammaf.rs (100%)
 rename {src => libm/src}/math/trunc.rs (100%)
 create mode 100644 libm/src/math/truncf.rs
 create mode 100644 libm/src/math/truncf128.rs
 create mode 100644 libm/src/math/truncf16.rs

diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml
index c925e63aa..a717c3ea8 100644
--- a/.github/workflows/main.yaml
+++ b/.github/workflows/main.yaml
@@ -213,7 +213,7 @@ jobs:
     steps:
     - uses: actions/checkout@master
     - run: |
-        msrv="$(perl -ne 'print if s/rust-version\s*=\s*"(.*)"/\1/g' Cargo.toml)"
+        msrv="$(perl -ne 'print if s/rust-version\s*=\s*"(.*)"/\1/g' libm/Cargo.toml)"
         echo "MSRV: $msrv"
         echo "MSRV=$msrv" >> "$GITHUB_ENV"
     - name: Install Rust
diff --git a/ci/ci-util.py b/ci/ci-util.py
index 7468fd690..ed63d6dee 100755
--- a/ci/ci-util.py
+++ b/ci/ci-util.py
@@ -63,9 +63,9 @@
 # Don't run exhaustive tests if these files change, even if they contaiin a function
 # definition.
 IGNORE_FILES = [
-    "src/math/support/",
-    "src/libm_helper.rs",
-    "src/math/arch/intrinsics.rs",
+    "libm/src/math/support/",
+    "libm/src/libm_helper.rs",
+    "libm/src/math/arch/intrinsics.rs",
 ]
 
 TYPES = ["f16", "f32", "f64", "f128"]
diff --git a/crates/compiler-builtins-smoke-test/build.rs b/crates/compiler-builtins-smoke-test/build.rs
index 4ee5ab585..ef8d613c9 100644
--- a/crates/compiler-builtins-smoke-test/build.rs
+++ b/crates/compiler-builtins-smoke-test/build.rs
@@ -1,8 +1,8 @@
-#[path = "../../configure.rs"]
+#[path = "../../libm/configure.rs"]
 mod configure;
 
 fn main() {
-    println!("cargo:rerun-if-changed=../../configure.rs");
+    println!("cargo:rerun-if-changed=../../libm/configure.rs");
     let cfg = configure::Config::from_env();
     configure::emit_libm_config(&cfg);
 }
diff --git a/crates/compiler-builtins-smoke-test/src/math.rs b/crates/compiler-builtins-smoke-test/src/math.rs
index 7e0146998..f17fc1231 100644
--- a/crates/compiler-builtins-smoke-test/src/math.rs
+++ b/crates/compiler-builtins-smoke-test/src/math.rs
@@ -3,7 +3,7 @@ use core::ffi::c_int;
 #[allow(dead_code)]
 #[allow(clippy::all)] // We don't get `libm`'s list of `allow`s, so just ignore Clippy.
 #[allow(unused_imports)]
-#[path = "../../../src/math/mod.rs"]
+#[path = "../../../libm/src/math/mod.rs"]
 pub mod libm;
 
 /// Mark functions `#[no_mangle]` and with the C ABI.
diff --git a/crates/libm-test/Cargo.toml b/crates/libm-test/Cargo.toml
index 98da73cea..1bcc163ed 100644
--- a/crates/libm-test/Cargo.toml
+++ b/crates/libm-test/Cargo.toml
@@ -32,7 +32,7 @@ anyhow = "1.0.97"
 gmp-mpfr-sys = { version = "1.6.4", optional = true, default-features = false }
 iai-callgrind = { version = "0.14.0", optional = true }
 indicatif = { version = "0.17.11", default-features = false }
-libm = { path = "../..", features = ["unstable-public-internals"] }
+libm = { path = "../../libm", features = ["unstable-public-internals"] }
 libm-macros = { path = "../libm-macros" }
 musl-math-sys = { path = "../musl-math-sys", optional = true }
 paste = "1.0.15"
diff --git a/crates/libm-test/build.rs b/crates/libm-test/build.rs
index d2d0df9cb..f75e3dda5 100644
--- a/crates/libm-test/build.rs
+++ b/crates/libm-test/build.rs
@@ -1,9 +1,9 @@
-#[path = "../../configure.rs"]
+#[path = "../../libm/configure.rs"]
 mod configure;
 use configure::Config;
 
 fn main() {
-    println!("cargo:rerun-if-changed=../../configure.rs");
+    println!("cargo:rerun-if-changed=../../libm/configure.rs");
     let cfg = Config::from_env();
     configure::emit_test_config(&cfg);
 }
diff --git a/crates/musl-math-sys/Cargo.toml b/crates/musl-math-sys/Cargo.toml
index 34682b74c..ad73578d8 100644
--- a/crates/musl-math-sys/Cargo.toml
+++ b/crates/musl-math-sys/Cargo.toml
@@ -7,7 +7,7 @@ publish = false
 [dependencies]
 
 [dev-dependencies]
-libm = { path = "../../" }
+libm = { path = "../../libm" }
 
 [build-dependencies]
 cc = "1.2.16"
diff --git a/crates/util/Cargo.toml b/crates/util/Cargo.toml
index 8005459db..94c7f1033 100644
--- a/crates/util/Cargo.toml
+++ b/crates/util/Cargo.toml
@@ -11,7 +11,7 @@ build-mpfr = ["libm-test/build-mpfr", "dep:rug"]
 unstable-float = ["libm/unstable-float", "libm-test/unstable-float", "rug?/nightly-float"]
 
 [dependencies]
-libm = { path = "../..", default-features = false }
+libm = { path = "../../libm", default-features = false }
 libm-macros = { path = "../libm-macros" }
 libm-test = { path = "../libm-test", default-features = false }
 musl-math-sys = { path = "../musl-math-sys", optional = true }
diff --git a/crates/util/build.rs b/crates/util/build.rs
index b6cceb5f1..a1be41275 100644
--- a/crates/util/build.rs
+++ b/crates/util/build.rs
@@ -1,10 +1,10 @@
 #![allow(unexpected_cfgs)]
 
-#[path = "../../configure.rs"]
+#[path = "../../libm/configure.rs"]
 mod configure;
 
 fn main() {
-    println!("cargo:rerun-if-changed=../../configure.rs");
+    println!("cargo:rerun-if-changed=../../libm/configure.rs");
     let cfg = configure::Config::from_env();
     configure::emit_libm_config(&cfg);
 }
diff --git a/etc/update-api-list.py b/etc/update-api-list.py
index b4ce2c453..950824fc4 100755
--- a/etc/update-api-list.py
+++ b/etc/update-api-list.py
@@ -18,7 +18,7 @@
 
 SELF_PATH = Path(__file__)
 ETC_DIR = SELF_PATH.parent
-ROOT_DIR = ETC_DIR.parent
+LIBM_DIR = ETC_DIR.parent.joinpath("libm")
 
 # These files do not trigger a retest.
 IGNORED_SOURCES = ["src/libm_helper.rs", "src/math/support/float_traits.rs"]
@@ -75,7 +75,7 @@ def get_rustdoc_json() -> dict[Any, Any]:
                 "-Zunstable-options",
                 "-o-",
             ],
-            cwd=ROOT_DIR,
+            cwd=LIBM_DIR,
             text=True,
         )
         j = json.loads(j)
@@ -121,8 +121,8 @@ def _init_defs(self, index: IndexTy) -> None:
 
         # A lot of the `arch` module is often configured out so doesn't show up in docs. Use
         # string matching as a fallback.
-        for fname in glob("src/math/arch/**.rs", root_dir=ROOT_DIR):
-            contents = (ROOT_DIR.joinpath(fname)).read_text()
+        for fname in glob("src/math/arch/**.rs", root_dir=LIBM_DIR):
+            contents = (LIBM_DIR.joinpath(fname)).read_text()
 
             for name in self.public_functions:
                 if f"fn {name}" in contents:
@@ -188,10 +188,10 @@ def tidy_lists(self) -> None:
         include all public API.
         """
 
-        flist = sp.check_output(["git", "ls-files"], cwd=ROOT_DIR, text=True)
+        flist = sp.check_output(["git", "ls-files"], cwd=LIBM_DIR, text=True)
 
         for path in flist.splitlines():
-            fpath = ROOT_DIR.joinpath(path)
+            fpath = LIBM_DIR.joinpath(path)
             if fpath.is_dir() or fpath == SELF_PATH:
                 continue
 
@@ -229,7 +229,7 @@ def ensure_contains_api(self, fpath: Path, line_num: int, lines: list[str]):
         if len(not_found) == 0:
             return
 
-        relpath = fpath.relative_to(ROOT_DIR)
+        relpath = fpath.relative_to(LIBM_DIR)
         eprint(f"functions not found at {relpath}:{line_num}: {not_found}")
         exit(1)
 
@@ -244,7 +244,7 @@ def validate_delimited_block(
     """Identify blocks of code wrapped within `start` and `end`, collect their contents
     to a list of strings, and call `validate` for each of those lists.
     """
-    relpath = fpath.relative_to(ROOT_DIR)
+    relpath = fpath.relative_to(LIBM_DIR)
     block_lines = []
     block_start_line: None | int = None
     for line_num, line in enumerate(lines):
@@ -274,7 +274,7 @@ def validate_delimited_block(
 
 def ensure_sorted(fpath: Path, block_start_line: int, lines: list[str]) -> None:
     """Ensure that a list of lines is sorted, otherwise print a diff and exit."""
-    relpath = fpath.relative_to(ROOT_DIR)
+    relpath = fpath.relative_to(LIBM_DIR)
     diff_and_exit(
         "\n".join(lines),
         "\n".join(sorted(lines)),
diff --git a/Cargo.toml b/libm/Cargo.toml
similarity index 100%
rename from Cargo.toml
rename to libm/Cargo.toml
diff --git a/build.rs b/libm/build.rs
similarity index 88%
rename from build.rs
rename to libm/build.rs
index 7042b54d7..07d08ed43 100644
--- a/build.rs
+++ b/libm/build.rs
@@ -6,6 +6,7 @@ fn main() {
     let cfg = configure::Config::from_env();
 
     println!("cargo:rerun-if-changed=build.rs");
+    println!("cargo:rerun-if-changed=configure.rs");
     println!("cargo:rustc-check-cfg=cfg(assert_no_panic)");
 
     // If set, enable `no-panic`. Requires LTO (`release-opt` profile).
diff --git a/configure.rs b/libm/configure.rs
similarity index 100%
rename from configure.rs
rename to libm/configure.rs
diff --git a/src/lib.rs b/libm/src/lib.rs
similarity index 100%
rename from src/lib.rs
rename to libm/src/lib.rs
diff --git a/src/libm_helper.rs b/libm/src/libm_helper.rs
similarity index 100%
rename from src/libm_helper.rs
rename to libm/src/libm_helper.rs
diff --git a/src/math/acos.rs b/libm/src/math/acos.rs
similarity index 100%
rename from src/math/acos.rs
rename to libm/src/math/acos.rs
diff --git a/src/math/acosf.rs b/libm/src/math/acosf.rs
similarity index 100%
rename from src/math/acosf.rs
rename to libm/src/math/acosf.rs
diff --git a/src/math/acosh.rs b/libm/src/math/acosh.rs
similarity index 100%
rename from src/math/acosh.rs
rename to libm/src/math/acosh.rs
diff --git a/src/math/acoshf.rs b/libm/src/math/acoshf.rs
similarity index 100%
rename from src/math/acoshf.rs
rename to libm/src/math/acoshf.rs
diff --git a/src/math/arch/aarch64.rs b/libm/src/math/arch/aarch64.rs
similarity index 100%
rename from src/math/arch/aarch64.rs
rename to libm/src/math/arch/aarch64.rs
diff --git a/src/math/arch/i586.rs b/libm/src/math/arch/i586.rs
similarity index 100%
rename from src/math/arch/i586.rs
rename to libm/src/math/arch/i586.rs
diff --git a/src/math/arch/i686.rs b/libm/src/math/arch/i686.rs
similarity index 100%
rename from src/math/arch/i686.rs
rename to libm/src/math/arch/i686.rs
diff --git a/src/math/arch/mod.rs b/libm/src/math/arch/mod.rs
similarity index 100%
rename from src/math/arch/mod.rs
rename to libm/src/math/arch/mod.rs
diff --git a/src/math/arch/wasm32.rs b/libm/src/math/arch/wasm32.rs
similarity index 100%
rename from src/math/arch/wasm32.rs
rename to libm/src/math/arch/wasm32.rs
diff --git a/src/math/asin.rs b/libm/src/math/asin.rs
similarity index 100%
rename from src/math/asin.rs
rename to libm/src/math/asin.rs
diff --git a/src/math/asinf.rs b/libm/src/math/asinf.rs
similarity index 100%
rename from src/math/asinf.rs
rename to libm/src/math/asinf.rs
diff --git a/src/math/asinh.rs b/libm/src/math/asinh.rs
similarity index 100%
rename from src/math/asinh.rs
rename to libm/src/math/asinh.rs
diff --git a/src/math/asinhf.rs b/libm/src/math/asinhf.rs
similarity index 100%
rename from src/math/asinhf.rs
rename to libm/src/math/asinhf.rs
diff --git a/src/math/atan.rs b/libm/src/math/atan.rs
similarity index 100%
rename from src/math/atan.rs
rename to libm/src/math/atan.rs
diff --git a/src/math/atan2.rs b/libm/src/math/atan2.rs
similarity index 100%
rename from src/math/atan2.rs
rename to libm/src/math/atan2.rs
diff --git a/src/math/atan2f.rs b/libm/src/math/atan2f.rs
similarity index 100%
rename from src/math/atan2f.rs
rename to libm/src/math/atan2f.rs
diff --git a/src/math/atanf.rs b/libm/src/math/atanf.rs
similarity index 100%
rename from src/math/atanf.rs
rename to libm/src/math/atanf.rs
diff --git a/src/math/atanh.rs b/libm/src/math/atanh.rs
similarity index 100%
rename from src/math/atanh.rs
rename to libm/src/math/atanh.rs
diff --git a/src/math/atanhf.rs b/libm/src/math/atanhf.rs
similarity index 100%
rename from src/math/atanhf.rs
rename to libm/src/math/atanhf.rs
diff --git a/src/math/cbrt.rs b/libm/src/math/cbrt.rs
similarity index 100%
rename from src/math/cbrt.rs
rename to libm/src/math/cbrt.rs
diff --git a/src/math/cbrtf.rs b/libm/src/math/cbrtf.rs
similarity index 100%
rename from src/math/cbrtf.rs
rename to libm/src/math/cbrtf.rs
diff --git a/src/math/ceil.rs b/libm/src/math/ceil.rs
similarity index 100%
rename from src/math/ceil.rs
rename to libm/src/math/ceil.rs
diff --git a/src/math/copysign.rs b/libm/src/math/copysign.rs
similarity index 100%
rename from src/math/copysign.rs
rename to libm/src/math/copysign.rs
diff --git a/libm/src/math/copysignf.rs b/libm/src/math/copysignf.rs
new file mode 100644
index 000000000..8b9bed4c0
--- /dev/null
+++ b/libm/src/math/copysignf.rs
@@ -0,0 +1,8 @@
+/// Sign of Y, magnitude of X (f32)
+///
+/// Constructs a number with the magnitude (absolute value) of its
+/// first argument, `x`, and the sign of its second argument, `y`.
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn copysignf(x: f32, y: f32) -> f32 {
+    super::generic::copysign(x, y)
+}
diff --git a/libm/src/math/copysignf128.rs b/libm/src/math/copysignf128.rs
new file mode 100644
index 000000000..7bd81d42b
--- /dev/null
+++ b/libm/src/math/copysignf128.rs
@@ -0,0 +1,8 @@
+/// Sign of Y, magnitude of X (f128)
+///
+/// Constructs a number with the magnitude (absolute value) of its
+/// first argument, `x`, and the sign of its second argument, `y`.
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn copysignf128(x: f128, y: f128) -> f128 {
+    super::generic::copysign(x, y)
+}
diff --git a/libm/src/math/copysignf16.rs b/libm/src/math/copysignf16.rs
new file mode 100644
index 000000000..820658686
--- /dev/null
+++ b/libm/src/math/copysignf16.rs
@@ -0,0 +1,8 @@
+/// Sign of Y, magnitude of X (f16)
+///
+/// Constructs a number with the magnitude (absolute value) of its
+/// first argument, `x`, and the sign of its second argument, `y`.
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn copysignf16(x: f16, y: f16) -> f16 {
+    super::generic::copysign(x, y)
+}
diff --git a/src/math/cos.rs b/libm/src/math/cos.rs
similarity index 100%
rename from src/math/cos.rs
rename to libm/src/math/cos.rs
diff --git a/src/math/cosf.rs b/libm/src/math/cosf.rs
similarity index 100%
rename from src/math/cosf.rs
rename to libm/src/math/cosf.rs
diff --git a/src/math/cosh.rs b/libm/src/math/cosh.rs
similarity index 100%
rename from src/math/cosh.rs
rename to libm/src/math/cosh.rs
diff --git a/src/math/coshf.rs b/libm/src/math/coshf.rs
similarity index 100%
rename from src/math/coshf.rs
rename to libm/src/math/coshf.rs
diff --git a/src/math/erf.rs b/libm/src/math/erf.rs
similarity index 100%
rename from src/math/erf.rs
rename to libm/src/math/erf.rs
diff --git a/src/math/erff.rs b/libm/src/math/erff.rs
similarity index 100%
rename from src/math/erff.rs
rename to libm/src/math/erff.rs
diff --git a/src/math/exp.rs b/libm/src/math/exp.rs
similarity index 100%
rename from src/math/exp.rs
rename to libm/src/math/exp.rs
diff --git a/src/math/exp10.rs b/libm/src/math/exp10.rs
similarity index 100%
rename from src/math/exp10.rs
rename to libm/src/math/exp10.rs
diff --git a/src/math/exp10f.rs b/libm/src/math/exp10f.rs
similarity index 100%
rename from src/math/exp10f.rs
rename to libm/src/math/exp10f.rs
diff --git a/src/math/exp2.rs b/libm/src/math/exp2.rs
similarity index 100%
rename from src/math/exp2.rs
rename to libm/src/math/exp2.rs
diff --git a/src/math/exp2f.rs b/libm/src/math/exp2f.rs
similarity index 100%
rename from src/math/exp2f.rs
rename to libm/src/math/exp2f.rs
diff --git a/src/math/expf.rs b/libm/src/math/expf.rs
similarity index 100%
rename from src/math/expf.rs
rename to libm/src/math/expf.rs
diff --git a/src/math/expm1.rs b/libm/src/math/expm1.rs
similarity index 100%
rename from src/math/expm1.rs
rename to libm/src/math/expm1.rs
diff --git a/src/math/expm1f.rs b/libm/src/math/expm1f.rs
similarity index 100%
rename from src/math/expm1f.rs
rename to libm/src/math/expm1f.rs
diff --git a/src/math/expo2.rs b/libm/src/math/expo2.rs
similarity index 100%
rename from src/math/expo2.rs
rename to libm/src/math/expo2.rs
diff --git a/src/math/fabs.rs b/libm/src/math/fabs.rs
similarity index 100%
rename from src/math/fabs.rs
rename to libm/src/math/fabs.rs
diff --git a/libm/src/math/fabsf.rs b/libm/src/math/fabsf.rs
new file mode 100644
index 000000000..e5820a26c
--- /dev/null
+++ b/libm/src/math/fabsf.rs
@@ -0,0 +1,39 @@
+/// Absolute value (magnitude) (f32)
+///
+/// Calculates the absolute value (magnitude) of the argument `x`,
+/// by direct manipulation of the bit representation of `x`.
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn fabsf(x: f32) -> f32 {
+    select_implementation! {
+        name: fabsf,
+        use_arch: all(target_arch = "wasm32", intrinsics_enabled),
+        args: x,
+    }
+
+    super::generic::fabs(x)
+}
+
+// PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520
+#[cfg(not(target_arch = "powerpc64"))]
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn sanity_check() {
+        assert_eq!(fabsf(-1.0), 1.0);
+        assert_eq!(fabsf(2.8), 2.8);
+    }
+
+    /// The spec: https://en.cppreference.com/w/cpp/numeric/math/fabs
+    #[test]
+    fn spec_tests() {
+        assert!(fabsf(f32::NAN).is_nan());
+        for f in [0.0, -0.0].iter().copied() {
+            assert_eq!(fabsf(f), 0.0);
+        }
+        for f in [f32::INFINITY, f32::NEG_INFINITY].iter().copied() {
+            assert_eq!(fabsf(f), f32::INFINITY);
+        }
+    }
+}
diff --git a/libm/src/math/fabsf128.rs b/libm/src/math/fabsf128.rs
new file mode 100644
index 000000000..46429ca49
--- /dev/null
+++ b/libm/src/math/fabsf128.rs
@@ -0,0 +1,31 @@
+/// Absolute value (magnitude) (f128)
+///
+/// Calculates the absolute value (magnitude) of the argument `x`,
+/// by direct manipulation of the bit representation of `x`.
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn fabsf128(x: f128) -> f128 {
+    super::generic::fabs(x)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn sanity_check() {
+        assert_eq!(fabsf128(-1.0), 1.0);
+        assert_eq!(fabsf128(2.8), 2.8);
+    }
+
+    /// The spec: https://en.cppreference.com/w/cpp/numeric/math/fabs
+    #[test]
+    fn spec_tests() {
+        assert!(fabsf128(f128::NAN).is_nan());
+        for f in [0.0, -0.0].iter().copied() {
+            assert_eq!(fabsf128(f), 0.0);
+        }
+        for f in [f128::INFINITY, f128::NEG_INFINITY].iter().copied() {
+            assert_eq!(fabsf128(f), f128::INFINITY);
+        }
+    }
+}
diff --git a/libm/src/math/fabsf16.rs b/libm/src/math/fabsf16.rs
new file mode 100644
index 000000000..eee42ac6a
--- /dev/null
+++ b/libm/src/math/fabsf16.rs
@@ -0,0 +1,31 @@
+/// Absolute value (magnitude) (f16)
+///
+/// Calculates the absolute value (magnitude) of the argument `x`,
+/// by direct manipulation of the bit representation of `x`.
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn fabsf16(x: f16) -> f16 {
+    super::generic::fabs(x)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn sanity_check() {
+        assert_eq!(fabsf16(-1.0), 1.0);
+        assert_eq!(fabsf16(2.8), 2.8);
+    }
+
+    /// The spec: https://en.cppreference.com/w/cpp/numeric/math/fabs
+    #[test]
+    fn spec_tests() {
+        assert!(fabsf16(f16::NAN).is_nan());
+        for f in [0.0, -0.0].iter().copied() {
+            assert_eq!(fabsf16(f), 0.0);
+        }
+        for f in [f16::INFINITY, f16::NEG_INFINITY].iter().copied() {
+            assert_eq!(fabsf16(f), f16::INFINITY);
+        }
+    }
+}
diff --git a/src/math/fdim.rs b/libm/src/math/fdim.rs
similarity index 100%
rename from src/math/fdim.rs
rename to libm/src/math/fdim.rs
diff --git a/libm/src/math/fdimf.rs b/libm/src/math/fdimf.rs
new file mode 100644
index 000000000..367ef517c
--- /dev/null
+++ b/libm/src/math/fdimf.rs
@@ -0,0 +1,12 @@
+/// Positive difference (f32)
+///
+/// Determines the positive difference between arguments, returning:
+/// * x - y if x > y, or
+/// * +0    if x <= y, or
+/// * NAN   if either argument is NAN.
+///
+/// A range error may occur.
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn fdimf(x: f32, y: f32) -> f32 {
+    super::generic::fdim(x, y)
+}
diff --git a/libm/src/math/fdimf128.rs b/libm/src/math/fdimf128.rs
new file mode 100644
index 000000000..6f3d1d0ff
--- /dev/null
+++ b/libm/src/math/fdimf128.rs
@@ -0,0 +1,12 @@
+/// Positive difference (f128)
+///
+/// Determines the positive difference between arguments, returning:
+/// * x - y if x > y, or
+/// * +0    if x <= y, or
+/// * NAN   if either argument is NAN.
+///
+/// A range error may occur.
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn fdimf128(x: f128, y: f128) -> f128 {
+    super::generic::fdim(x, y)
+}
diff --git a/libm/src/math/fdimf16.rs b/libm/src/math/fdimf16.rs
new file mode 100644
index 000000000..37bd68858
--- /dev/null
+++ b/libm/src/math/fdimf16.rs
@@ -0,0 +1,12 @@
+/// Positive difference (f16)
+///
+/// Determines the positive difference between arguments, returning:
+/// * x - y if x > y, or
+/// * +0    if x <= y, or
+/// * NAN   if either argument is NAN.
+///
+/// A range error may occur.
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn fdimf16(x: f16, y: f16) -> f16 {
+    super::generic::fdim(x, y)
+}
diff --git a/src/math/floor.rs b/libm/src/math/floor.rs
similarity index 100%
rename from src/math/floor.rs
rename to libm/src/math/floor.rs
diff --git a/libm/src/math/floorf.rs b/libm/src/math/floorf.rs
new file mode 100644
index 000000000..16957b7f3
--- /dev/null
+++ b/libm/src/math/floorf.rs
@@ -0,0 +1,13 @@
+/// Floor (f32)
+///
+/// Finds the nearest integer less than or equal to `x`.
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn floorf(x: f32) -> f32 {
+    select_implementation! {
+        name: floorf,
+        use_arch: all(target_arch = "wasm32", intrinsics_enabled),
+        args: x,
+    }
+
+    return super::generic::floor(x);
+}
diff --git a/libm/src/math/floorf128.rs b/libm/src/math/floorf128.rs
new file mode 100644
index 000000000..9a9fe4151
--- /dev/null
+++ b/libm/src/math/floorf128.rs
@@ -0,0 +1,7 @@
+/// Floor (f128)
+///
+/// Finds the nearest integer less than or equal to `x`.
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn floorf128(x: f128) -> f128 {
+    return super::generic::floor(x);
+}
diff --git a/libm/src/math/floorf16.rs b/libm/src/math/floorf16.rs
new file mode 100644
index 000000000..f9b868e04
--- /dev/null
+++ b/libm/src/math/floorf16.rs
@@ -0,0 +1,7 @@
+/// Floor (f16)
+///
+/// Finds the nearest integer less than or equal to `x`.
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn floorf16(x: f16) -> f16 {
+    return super::generic::floor(x);
+}
diff --git a/src/math/fma.rs b/libm/src/math/fma.rs
similarity index 100%
rename from src/math/fma.rs
rename to libm/src/math/fma.rs
diff --git a/src/math/fma_wide.rs b/libm/src/math/fma_wide.rs
similarity index 100%
rename from src/math/fma_wide.rs
rename to libm/src/math/fma_wide.rs
diff --git a/src/math/fmin_fmax.rs b/libm/src/math/fmin_fmax.rs
similarity index 100%
rename from src/math/fmin_fmax.rs
rename to libm/src/math/fmin_fmax.rs
diff --git a/src/math/fminimum_fmaximum.rs b/libm/src/math/fminimum_fmaximum.rs
similarity index 100%
rename from src/math/fminimum_fmaximum.rs
rename to libm/src/math/fminimum_fmaximum.rs
diff --git a/src/math/fminimum_fmaximum_num.rs b/libm/src/math/fminimum_fmaximum_num.rs
similarity index 100%
rename from src/math/fminimum_fmaximum_num.rs
rename to libm/src/math/fminimum_fmaximum_num.rs
diff --git a/src/math/fmod.rs b/libm/src/math/fmod.rs
similarity index 100%
rename from src/math/fmod.rs
rename to libm/src/math/fmod.rs
diff --git a/libm/src/math/fmodf.rs b/libm/src/math/fmodf.rs
new file mode 100644
index 000000000..4e95696e2
--- /dev/null
+++ b/libm/src/math/fmodf.rs
@@ -0,0 +1,5 @@
+/// Calculate the remainder of `x / y`, the precise result of `x - trunc(x / y) * y`.
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn fmodf(x: f32, y: f32) -> f32 {
+    super::generic::fmod(x, y)
+}
diff --git a/libm/src/math/fmodf128.rs b/libm/src/math/fmodf128.rs
new file mode 100644
index 000000000..ff0e0493e
--- /dev/null
+++ b/libm/src/math/fmodf128.rs
@@ -0,0 +1,5 @@
+/// Calculate the remainder of `x / y`, the precise result of `x - trunc(x / y) * y`.
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn fmodf128(x: f128, y: f128) -> f128 {
+    super::generic::fmod(x, y)
+}
diff --git a/libm/src/math/fmodf16.rs b/libm/src/math/fmodf16.rs
new file mode 100644
index 000000000..11972a7de
--- /dev/null
+++ b/libm/src/math/fmodf16.rs
@@ -0,0 +1,5 @@
+/// Calculate the remainder of `x / y`, the precise result of `x - trunc(x / y) * y`.
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn fmodf16(x: f16, y: f16) -> f16 {
+    super::generic::fmod(x, y)
+}
diff --git a/src/math/frexp.rs b/libm/src/math/frexp.rs
similarity index 100%
rename from src/math/frexp.rs
rename to libm/src/math/frexp.rs
diff --git a/src/math/frexpf.rs b/libm/src/math/frexpf.rs
similarity index 100%
rename from src/math/frexpf.rs
rename to libm/src/math/frexpf.rs
diff --git a/src/math/generic/ceil.rs b/libm/src/math/generic/ceil.rs
similarity index 100%
rename from src/math/generic/ceil.rs
rename to libm/src/math/generic/ceil.rs
diff --git a/src/math/generic/copysign.rs b/libm/src/math/generic/copysign.rs
similarity index 100%
rename from src/math/generic/copysign.rs
rename to libm/src/math/generic/copysign.rs
diff --git a/src/math/generic/fabs.rs b/libm/src/math/generic/fabs.rs
similarity index 100%
rename from src/math/generic/fabs.rs
rename to libm/src/math/generic/fabs.rs
diff --git a/src/math/generic/fdim.rs b/libm/src/math/generic/fdim.rs
similarity index 100%
rename from src/math/generic/fdim.rs
rename to libm/src/math/generic/fdim.rs
diff --git a/src/math/generic/floor.rs b/libm/src/math/generic/floor.rs
similarity index 100%
rename from src/math/generic/floor.rs
rename to libm/src/math/generic/floor.rs
diff --git a/src/math/generic/fmax.rs b/libm/src/math/generic/fmax.rs
similarity index 100%
rename from src/math/generic/fmax.rs
rename to libm/src/math/generic/fmax.rs
diff --git a/src/math/generic/fmaximum.rs b/libm/src/math/generic/fmaximum.rs
similarity index 100%
rename from src/math/generic/fmaximum.rs
rename to libm/src/math/generic/fmaximum.rs
diff --git a/src/math/generic/fmaximum_num.rs b/libm/src/math/generic/fmaximum_num.rs
similarity index 100%
rename from src/math/generic/fmaximum_num.rs
rename to libm/src/math/generic/fmaximum_num.rs
diff --git a/src/math/generic/fmin.rs b/libm/src/math/generic/fmin.rs
similarity index 100%
rename from src/math/generic/fmin.rs
rename to libm/src/math/generic/fmin.rs
diff --git a/src/math/generic/fminimum.rs b/libm/src/math/generic/fminimum.rs
similarity index 100%
rename from src/math/generic/fminimum.rs
rename to libm/src/math/generic/fminimum.rs
diff --git a/src/math/generic/fminimum_num.rs b/libm/src/math/generic/fminimum_num.rs
similarity index 100%
rename from src/math/generic/fminimum_num.rs
rename to libm/src/math/generic/fminimum_num.rs
diff --git a/src/math/generic/fmod.rs b/libm/src/math/generic/fmod.rs
similarity index 100%
rename from src/math/generic/fmod.rs
rename to libm/src/math/generic/fmod.rs
diff --git a/src/math/generic/mod.rs b/libm/src/math/generic/mod.rs
similarity index 100%
rename from src/math/generic/mod.rs
rename to libm/src/math/generic/mod.rs
diff --git a/src/math/generic/rint.rs b/libm/src/math/generic/rint.rs
similarity index 100%
rename from src/math/generic/rint.rs
rename to libm/src/math/generic/rint.rs
diff --git a/src/math/generic/round.rs b/libm/src/math/generic/round.rs
similarity index 100%
rename from src/math/generic/round.rs
rename to libm/src/math/generic/round.rs
diff --git a/src/math/generic/scalbn.rs b/libm/src/math/generic/scalbn.rs
similarity index 100%
rename from src/math/generic/scalbn.rs
rename to libm/src/math/generic/scalbn.rs
diff --git a/src/math/generic/sqrt.rs b/libm/src/math/generic/sqrt.rs
similarity index 100%
rename from src/math/generic/sqrt.rs
rename to libm/src/math/generic/sqrt.rs
diff --git a/src/math/generic/trunc.rs b/libm/src/math/generic/trunc.rs
similarity index 100%
rename from src/math/generic/trunc.rs
rename to libm/src/math/generic/trunc.rs
diff --git a/src/math/hypot.rs b/libm/src/math/hypot.rs
similarity index 100%
rename from src/math/hypot.rs
rename to libm/src/math/hypot.rs
diff --git a/src/math/hypotf.rs b/libm/src/math/hypotf.rs
similarity index 100%
rename from src/math/hypotf.rs
rename to libm/src/math/hypotf.rs
diff --git a/src/math/ilogb.rs b/libm/src/math/ilogb.rs
similarity index 100%
rename from src/math/ilogb.rs
rename to libm/src/math/ilogb.rs
diff --git a/src/math/ilogbf.rs b/libm/src/math/ilogbf.rs
similarity index 100%
rename from src/math/ilogbf.rs
rename to libm/src/math/ilogbf.rs
diff --git a/src/math/j0.rs b/libm/src/math/j0.rs
similarity index 100%
rename from src/math/j0.rs
rename to libm/src/math/j0.rs
diff --git a/src/math/j0f.rs b/libm/src/math/j0f.rs
similarity index 100%
rename from src/math/j0f.rs
rename to libm/src/math/j0f.rs
diff --git a/src/math/j1.rs b/libm/src/math/j1.rs
similarity index 100%
rename from src/math/j1.rs
rename to libm/src/math/j1.rs
diff --git a/src/math/j1f.rs b/libm/src/math/j1f.rs
similarity index 100%
rename from src/math/j1f.rs
rename to libm/src/math/j1f.rs
diff --git a/src/math/jn.rs b/libm/src/math/jn.rs
similarity index 100%
rename from src/math/jn.rs
rename to libm/src/math/jn.rs
diff --git a/src/math/jnf.rs b/libm/src/math/jnf.rs
similarity index 100%
rename from src/math/jnf.rs
rename to libm/src/math/jnf.rs
diff --git a/src/math/k_cos.rs b/libm/src/math/k_cos.rs
similarity index 100%
rename from src/math/k_cos.rs
rename to libm/src/math/k_cos.rs
diff --git a/src/math/k_cosf.rs b/libm/src/math/k_cosf.rs
similarity index 100%
rename from src/math/k_cosf.rs
rename to libm/src/math/k_cosf.rs
diff --git a/src/math/k_expo2.rs b/libm/src/math/k_expo2.rs
similarity index 100%
rename from src/math/k_expo2.rs
rename to libm/src/math/k_expo2.rs
diff --git a/src/math/k_expo2f.rs b/libm/src/math/k_expo2f.rs
similarity index 100%
rename from src/math/k_expo2f.rs
rename to libm/src/math/k_expo2f.rs
diff --git a/src/math/k_sin.rs b/libm/src/math/k_sin.rs
similarity index 100%
rename from src/math/k_sin.rs
rename to libm/src/math/k_sin.rs
diff --git a/src/math/k_sinf.rs b/libm/src/math/k_sinf.rs
similarity index 100%
rename from src/math/k_sinf.rs
rename to libm/src/math/k_sinf.rs
diff --git a/src/math/k_tan.rs b/libm/src/math/k_tan.rs
similarity index 100%
rename from src/math/k_tan.rs
rename to libm/src/math/k_tan.rs
diff --git a/src/math/k_tanf.rs b/libm/src/math/k_tanf.rs
similarity index 100%
rename from src/math/k_tanf.rs
rename to libm/src/math/k_tanf.rs
diff --git a/src/math/ldexp.rs b/libm/src/math/ldexp.rs
similarity index 100%
rename from src/math/ldexp.rs
rename to libm/src/math/ldexp.rs
diff --git a/libm/src/math/ldexpf.rs b/libm/src/math/ldexpf.rs
new file mode 100644
index 000000000..95b27fc49
--- /dev/null
+++ b/libm/src/math/ldexpf.rs
@@ -0,0 +1,4 @@
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn ldexpf(x: f32, n: i32) -> f32 {
+    super::scalbnf(x, n)
+}
diff --git a/libm/src/math/ldexpf128.rs b/libm/src/math/ldexpf128.rs
new file mode 100644
index 000000000..b35277d15
--- /dev/null
+++ b/libm/src/math/ldexpf128.rs
@@ -0,0 +1,4 @@
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn ldexpf128(x: f128, n: i32) -> f128 {
+    super::scalbnf128(x, n)
+}
diff --git a/libm/src/math/ldexpf16.rs b/libm/src/math/ldexpf16.rs
new file mode 100644
index 000000000..8de6cffd6
--- /dev/null
+++ b/libm/src/math/ldexpf16.rs
@@ -0,0 +1,4 @@
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn ldexpf16(x: f16, n: i32) -> f16 {
+    super::scalbnf16(x, n)
+}
diff --git a/src/math/lgamma.rs b/libm/src/math/lgamma.rs
similarity index 100%
rename from src/math/lgamma.rs
rename to libm/src/math/lgamma.rs
diff --git a/src/math/lgamma_r.rs b/libm/src/math/lgamma_r.rs
similarity index 100%
rename from src/math/lgamma_r.rs
rename to libm/src/math/lgamma_r.rs
diff --git a/src/math/lgammaf.rs b/libm/src/math/lgammaf.rs
similarity index 100%
rename from src/math/lgammaf.rs
rename to libm/src/math/lgammaf.rs
diff --git a/src/math/lgammaf_r.rs b/libm/src/math/lgammaf_r.rs
similarity index 100%
rename from src/math/lgammaf_r.rs
rename to libm/src/math/lgammaf_r.rs
diff --git a/src/math/log.rs b/libm/src/math/log.rs
similarity index 100%
rename from src/math/log.rs
rename to libm/src/math/log.rs
diff --git a/src/math/log10.rs b/libm/src/math/log10.rs
similarity index 100%
rename from src/math/log10.rs
rename to libm/src/math/log10.rs
diff --git a/src/math/log10f.rs b/libm/src/math/log10f.rs
similarity index 100%
rename from src/math/log10f.rs
rename to libm/src/math/log10f.rs
diff --git a/src/math/log1p.rs b/libm/src/math/log1p.rs
similarity index 100%
rename from src/math/log1p.rs
rename to libm/src/math/log1p.rs
diff --git a/src/math/log1pf.rs b/libm/src/math/log1pf.rs
similarity index 100%
rename from src/math/log1pf.rs
rename to libm/src/math/log1pf.rs
diff --git a/src/math/log2.rs b/libm/src/math/log2.rs
similarity index 100%
rename from src/math/log2.rs
rename to libm/src/math/log2.rs
diff --git a/src/math/log2f.rs b/libm/src/math/log2f.rs
similarity index 100%
rename from src/math/log2f.rs
rename to libm/src/math/log2f.rs
diff --git a/src/math/logf.rs b/libm/src/math/logf.rs
similarity index 100%
rename from src/math/logf.rs
rename to libm/src/math/logf.rs
diff --git a/src/math/mod.rs b/libm/src/math/mod.rs
similarity index 100%
rename from src/math/mod.rs
rename to libm/src/math/mod.rs
diff --git a/src/math/modf.rs b/libm/src/math/modf.rs
similarity index 100%
rename from src/math/modf.rs
rename to libm/src/math/modf.rs
diff --git a/src/math/modff.rs b/libm/src/math/modff.rs
similarity index 100%
rename from src/math/modff.rs
rename to libm/src/math/modff.rs
diff --git a/src/math/nextafter.rs b/libm/src/math/nextafter.rs
similarity index 100%
rename from src/math/nextafter.rs
rename to libm/src/math/nextafter.rs
diff --git a/src/math/nextafterf.rs b/libm/src/math/nextafterf.rs
similarity index 100%
rename from src/math/nextafterf.rs
rename to libm/src/math/nextafterf.rs
diff --git a/src/math/pow.rs b/libm/src/math/pow.rs
similarity index 100%
rename from src/math/pow.rs
rename to libm/src/math/pow.rs
diff --git a/src/math/powf.rs b/libm/src/math/powf.rs
similarity index 100%
rename from src/math/powf.rs
rename to libm/src/math/powf.rs
diff --git a/src/math/rem_pio2.rs b/libm/src/math/rem_pio2.rs
similarity index 100%
rename from src/math/rem_pio2.rs
rename to libm/src/math/rem_pio2.rs
diff --git a/src/math/rem_pio2_large.rs b/libm/src/math/rem_pio2_large.rs
similarity index 100%
rename from src/math/rem_pio2_large.rs
rename to libm/src/math/rem_pio2_large.rs
diff --git a/src/math/rem_pio2f.rs b/libm/src/math/rem_pio2f.rs
similarity index 100%
rename from src/math/rem_pio2f.rs
rename to libm/src/math/rem_pio2f.rs
diff --git a/src/math/remainder.rs b/libm/src/math/remainder.rs
similarity index 100%
rename from src/math/remainder.rs
rename to libm/src/math/remainder.rs
diff --git a/src/math/remainderf.rs b/libm/src/math/remainderf.rs
similarity index 100%
rename from src/math/remainderf.rs
rename to libm/src/math/remainderf.rs
diff --git a/src/math/remquo.rs b/libm/src/math/remquo.rs
similarity index 100%
rename from src/math/remquo.rs
rename to libm/src/math/remquo.rs
diff --git a/src/math/remquof.rs b/libm/src/math/remquof.rs
similarity index 100%
rename from src/math/remquof.rs
rename to libm/src/math/remquof.rs
diff --git a/src/math/rint.rs b/libm/src/math/rint.rs
similarity index 100%
rename from src/math/rint.rs
rename to libm/src/math/rint.rs
diff --git a/src/math/round.rs b/libm/src/math/round.rs
similarity index 100%
rename from src/math/round.rs
rename to libm/src/math/round.rs
diff --git a/src/math/roundeven.rs b/libm/src/math/roundeven.rs
similarity index 100%
rename from src/math/roundeven.rs
rename to libm/src/math/roundeven.rs
diff --git a/libm/src/math/roundf.rs b/libm/src/math/roundf.rs
new file mode 100644
index 000000000..b5d7c9d69
--- /dev/null
+++ b/libm/src/math/roundf.rs
@@ -0,0 +1,5 @@
+/// Round `x` to the nearest integer, breaking ties away from zero.
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn roundf(x: f32) -> f32 {
+    super::generic::round(x)
+}
diff --git a/libm/src/math/roundf128.rs b/libm/src/math/roundf128.rs
new file mode 100644
index 000000000..fc3164929
--- /dev/null
+++ b/libm/src/math/roundf128.rs
@@ -0,0 +1,5 @@
+/// Round `x` to the nearest integer, breaking ties away from zero.
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn roundf128(x: f128) -> f128 {
+    super::generic::round(x)
+}
diff --git a/libm/src/math/roundf16.rs b/libm/src/math/roundf16.rs
new file mode 100644
index 000000000..8b356eaab
--- /dev/null
+++ b/libm/src/math/roundf16.rs
@@ -0,0 +1,5 @@
+/// Round `x` to the nearest integer, breaking ties away from zero.
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn roundf16(x: f16) -> f16 {
+    super::generic::round(x)
+}
diff --git a/src/math/scalbn.rs b/libm/src/math/scalbn.rs
similarity index 100%
rename from src/math/scalbn.rs
rename to libm/src/math/scalbn.rs
diff --git a/libm/src/math/scalbnf.rs b/libm/src/math/scalbnf.rs
new file mode 100644
index 000000000..57e7ba76f
--- /dev/null
+++ b/libm/src/math/scalbnf.rs
@@ -0,0 +1,4 @@
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn scalbnf(x: f32, n: i32) -> f32 {
+    super::generic::scalbn(x, n)
+}
diff --git a/libm/src/math/scalbnf128.rs b/libm/src/math/scalbnf128.rs
new file mode 100644
index 000000000..c1d2b4855
--- /dev/null
+++ b/libm/src/math/scalbnf128.rs
@@ -0,0 +1,4 @@
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn scalbnf128(x: f128, n: i32) -> f128 {
+    super::generic::scalbn(x, n)
+}
diff --git a/libm/src/math/scalbnf16.rs b/libm/src/math/scalbnf16.rs
new file mode 100644
index 000000000..2209e1a17
--- /dev/null
+++ b/libm/src/math/scalbnf16.rs
@@ -0,0 +1,4 @@
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn scalbnf16(x: f16, n: i32) -> f16 {
+    super::generic::scalbn(x, n)
+}
diff --git a/src/math/sin.rs b/libm/src/math/sin.rs
similarity index 100%
rename from src/math/sin.rs
rename to libm/src/math/sin.rs
diff --git a/src/math/sincos.rs b/libm/src/math/sincos.rs
similarity index 100%
rename from src/math/sincos.rs
rename to libm/src/math/sincos.rs
diff --git a/src/math/sincosf.rs b/libm/src/math/sincosf.rs
similarity index 100%
rename from src/math/sincosf.rs
rename to libm/src/math/sincosf.rs
diff --git a/src/math/sinf.rs b/libm/src/math/sinf.rs
similarity index 100%
rename from src/math/sinf.rs
rename to libm/src/math/sinf.rs
diff --git a/src/math/sinh.rs b/libm/src/math/sinh.rs
similarity index 100%
rename from src/math/sinh.rs
rename to libm/src/math/sinh.rs
diff --git a/src/math/sinhf.rs b/libm/src/math/sinhf.rs
similarity index 100%
rename from src/math/sinhf.rs
rename to libm/src/math/sinhf.rs
diff --git a/src/math/sqrt.rs b/libm/src/math/sqrt.rs
similarity index 100%
rename from src/math/sqrt.rs
rename to libm/src/math/sqrt.rs
diff --git a/libm/src/math/sqrtf.rs b/libm/src/math/sqrtf.rs
new file mode 100644
index 000000000..c28a705e3
--- /dev/null
+++ b/libm/src/math/sqrtf.rs
@@ -0,0 +1,15 @@
+/// The square root of `x` (f32).
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn sqrtf(x: f32) -> f32 {
+    select_implementation! {
+        name: sqrtf,
+        use_arch: any(
+            all(target_arch = "aarch64", target_feature = "neon"),
+            all(target_arch = "wasm32", intrinsics_enabled),
+            target_feature = "sse2"
+        ),
+        args: x,
+    }
+
+    super::generic::sqrt(x)
+}
diff --git a/libm/src/math/sqrtf128.rs b/libm/src/math/sqrtf128.rs
new file mode 100644
index 000000000..eaef6ae0c
--- /dev/null
+++ b/libm/src/math/sqrtf128.rs
@@ -0,0 +1,5 @@
+/// The square root of `x` (f128).
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn sqrtf128(x: f128) -> f128 {
+    return super::generic::sqrt(x);
+}
diff --git a/libm/src/math/sqrtf16.rs b/libm/src/math/sqrtf16.rs
new file mode 100644
index 000000000..7bedb7f8b
--- /dev/null
+++ b/libm/src/math/sqrtf16.rs
@@ -0,0 +1,11 @@
+/// The square root of `x` (f16).
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn sqrtf16(x: f16) -> f16 {
+    select_implementation! {
+        name: sqrtf16,
+        use_arch: all(target_arch = "aarch64", target_feature = "fp16"),
+        args: x,
+    }
+
+    return super::generic::sqrt(x);
+}
diff --git a/src/math/support/big.rs b/libm/src/math/support/big.rs
similarity index 100%
rename from src/math/support/big.rs
rename to libm/src/math/support/big.rs
diff --git a/src/math/support/big/tests.rs b/libm/src/math/support/big/tests.rs
similarity index 100%
rename from src/math/support/big/tests.rs
rename to libm/src/math/support/big/tests.rs
diff --git a/src/math/support/env.rs b/libm/src/math/support/env.rs
similarity index 100%
rename from src/math/support/env.rs
rename to libm/src/math/support/env.rs
diff --git a/src/math/support/float_traits.rs b/libm/src/math/support/float_traits.rs
similarity index 100%
rename from src/math/support/float_traits.rs
rename to libm/src/math/support/float_traits.rs
diff --git a/src/math/support/hex_float.rs b/libm/src/math/support/hex_float.rs
similarity index 100%
rename from src/math/support/hex_float.rs
rename to libm/src/math/support/hex_float.rs
diff --git a/src/math/support/int_traits.rs b/libm/src/math/support/int_traits.rs
similarity index 100%
rename from src/math/support/int_traits.rs
rename to libm/src/math/support/int_traits.rs
diff --git a/src/math/support/macros.rs b/libm/src/math/support/macros.rs
similarity index 100%
rename from src/math/support/macros.rs
rename to libm/src/math/support/macros.rs
diff --git a/src/math/support/mod.rs b/libm/src/math/support/mod.rs
similarity index 100%
rename from src/math/support/mod.rs
rename to libm/src/math/support/mod.rs
diff --git a/src/math/tan.rs b/libm/src/math/tan.rs
similarity index 100%
rename from src/math/tan.rs
rename to libm/src/math/tan.rs
diff --git a/src/math/tanf.rs b/libm/src/math/tanf.rs
similarity index 100%
rename from src/math/tanf.rs
rename to libm/src/math/tanf.rs
diff --git a/src/math/tanh.rs b/libm/src/math/tanh.rs
similarity index 100%
rename from src/math/tanh.rs
rename to libm/src/math/tanh.rs
diff --git a/src/math/tanhf.rs b/libm/src/math/tanhf.rs
similarity index 100%
rename from src/math/tanhf.rs
rename to libm/src/math/tanhf.rs
diff --git a/src/math/tgamma.rs b/libm/src/math/tgamma.rs
similarity index 100%
rename from src/math/tgamma.rs
rename to libm/src/math/tgamma.rs
diff --git a/src/math/tgammaf.rs b/libm/src/math/tgammaf.rs
similarity index 100%
rename from src/math/tgammaf.rs
rename to libm/src/math/tgammaf.rs
diff --git a/src/math/trunc.rs b/libm/src/math/trunc.rs
similarity index 100%
rename from src/math/trunc.rs
rename to libm/src/math/trunc.rs
diff --git a/libm/src/math/truncf.rs b/libm/src/math/truncf.rs
new file mode 100644
index 000000000..14533a267
--- /dev/null
+++ b/libm/src/math/truncf.rs
@@ -0,0 +1,23 @@
+/// Rounds the number toward 0 to the closest integral value (f32).
+///
+/// This effectively removes the decimal part of the number, leaving the integral part.
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn truncf(x: f32) -> f32 {
+    select_implementation! {
+        name: truncf,
+        use_arch: all(target_arch = "wasm32", intrinsics_enabled),
+        args: x,
+    }
+
+    super::generic::trunc(x)
+}
+
+// PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520
+#[cfg(not(target_arch = "powerpc64"))]
+#[cfg(test)]
+mod tests {
+    #[test]
+    fn sanity_check() {
+        assert_eq!(super::truncf(1.1), 1.0);
+    }
+}
diff --git a/libm/src/math/truncf128.rs b/libm/src/math/truncf128.rs
new file mode 100644
index 000000000..9dccc0d0e
--- /dev/null
+++ b/libm/src/math/truncf128.rs
@@ -0,0 +1,7 @@
+/// Rounds the number toward 0 to the closest integral value (f128).
+///
+/// This effectively removes the decimal part of the number, leaving the integral part.
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn truncf128(x: f128) -> f128 {
+    super::generic::trunc(x)
+}
diff --git a/libm/src/math/truncf16.rs b/libm/src/math/truncf16.rs
new file mode 100644
index 000000000..d7c3d225c
--- /dev/null
+++ b/libm/src/math/truncf16.rs
@@ -0,0 +1,7 @@
+/// Rounds the number toward 0 to the closest integral value (f16).
+///
+/// This effectively removes the decimal part of the number, leaving the integral part.
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn truncf16(x: f16) -> f16 {
+    super::generic::trunc(x)
+}

From 9064743be858ef4077ca9f729ad5431c416b61ce Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Wed, 9 Apr 2025 01:21:44 +0000
Subject: [PATCH 277/279] refactor: Introduce a virtual manifest

Move the workspace configuration to a virtual manifest. This
reorganization makes a more clear separation between package contents
and support files that don't get distributed. It will also make it
easier to merge this repository with `compiler-builtins` which is
planned (builtins had a similar update done in [1]).

LICENSE.txt and README.md are symlinkedinto the new directory to ensure
they get included in the package.

[1]: https://github.com/rust-lang/compiler-builtins/pull/702
---
 Cargo.toml                    |  37 +++
 ci/ci-util.py                 |   2 +-
 etc/function-definitions.json | 502 +++++++++++++++++-----------------
 etc/update-api-list.py        |  26 +-
 libm/Cargo.toml               |  38 ---
 libm/LICENSE.txt              |   1 +
 libm/README.md                |   1 +
 7 files changed, 305 insertions(+), 302 deletions(-)
 create mode 100644 Cargo.toml
 create mode 120000 libm/LICENSE.txt
 create mode 120000 libm/README.md

diff --git a/Cargo.toml b/Cargo.toml
new file mode 100644
index 000000000..268b6fb0e
--- /dev/null
+++ b/Cargo.toml
@@ -0,0 +1,37 @@
+[workspace]
+resolver = "2"
+members = [
+  "libm",
+  "crates/libm-macros",
+  "crates/libm-test",
+  "crates/musl-math-sys",
+  "crates/util",
+]
+default-members = [
+  "libm",
+  "crates/libm-macros",
+  "crates/libm-test"
+]
+exclude = [
+  # Requires `panic = abort` so can't be a member of the workspace
+  "crates/compiler-builtins-smoke-test",
+]
+
+# The default release profile is unchanged.
+
+# Release mode with debug assertions
+[profile.release-checked]
+inherits = "release"
+debug-assertions = true
+overflow-checks = true
+
+# Release with maximum optimizations, which is very slow to build. This is also
+# what is needed to check `no-panic`.
+[profile.release-opt]
+inherits = "release"
+codegen-units = 1
+lto = "fat"
+
+[profile.bench]
+# Required for iai-callgrind
+debug = true
diff --git a/ci/ci-util.py b/ci/ci-util.py
index ed63d6dee..d9e402d6b 100755
--- a/ci/ci-util.py
+++ b/ci/ci-util.py
@@ -25,7 +25,7 @@
     COMMAND:
         generate-matrix
             Calculate a matrix of which functions had source change, print that as
-             a JSON object.
+            a JSON object.
 
         locate-baseline [--download] [--extract]
             Locate the most recent benchmark baseline available in CI and, if flags
diff --git a/etc/function-definitions.json b/etc/function-definitions.json
index ead1f807f..3e33343c4 100644
--- a/etc/function-definitions.json
+++ b/etc/function-definitions.json
@@ -2,1067 +2,1067 @@
     "__comment": "Autogenerated by update-api-list.py. List of files that define a function with a given name. This file is checked in to make it obvious if refactoring breaks things",
     "acos": {
         "sources": [
-            "src/math/acos.rs"
+            "libm/src/math/acos.rs"
         ],
         "type": "f64"
     },
     "acosf": {
         "sources": [
-            "src/math/acosf.rs"
+            "libm/src/math/acosf.rs"
         ],
         "type": "f32"
     },
     "acosh": {
         "sources": [
-            "src/math/acosh.rs"
+            "libm/src/math/acosh.rs"
         ],
         "type": "f64"
     },
     "acoshf": {
         "sources": [
-            "src/math/acoshf.rs"
+            "libm/src/math/acoshf.rs"
         ],
         "type": "f32"
     },
     "asin": {
         "sources": [
-            "src/math/asin.rs"
+            "libm/src/math/asin.rs"
         ],
         "type": "f64"
     },
     "asinf": {
         "sources": [
-            "src/math/asinf.rs"
+            "libm/src/math/asinf.rs"
         ],
         "type": "f32"
     },
     "asinh": {
         "sources": [
-            "src/math/asinh.rs"
+            "libm/src/math/asinh.rs"
         ],
         "type": "f64"
     },
     "asinhf": {
         "sources": [
-            "src/math/asinhf.rs"
+            "libm/src/math/asinhf.rs"
         ],
         "type": "f32"
     },
     "atan": {
         "sources": [
-            "src/math/atan.rs"
+            "libm/src/math/atan.rs"
         ],
         "type": "f64"
     },
     "atan2": {
         "sources": [
-            "src/math/atan2.rs"
+            "libm/src/math/atan2.rs"
         ],
         "type": "f64"
     },
     "atan2f": {
         "sources": [
-            "src/math/atan2f.rs"
+            "libm/src/math/atan2f.rs"
         ],
         "type": "f32"
     },
     "atanf": {
         "sources": [
-            "src/math/atanf.rs"
+            "libm/src/math/atanf.rs"
         ],
         "type": "f32"
     },
     "atanh": {
         "sources": [
-            "src/math/atanh.rs"
+            "libm/src/math/atanh.rs"
         ],
         "type": "f64"
     },
     "atanhf": {
         "sources": [
-            "src/math/atanhf.rs"
+            "libm/src/math/atanhf.rs"
         ],
         "type": "f32"
     },
     "cbrt": {
         "sources": [
-            "src/math/cbrt.rs"
+            "libm/src/math/cbrt.rs"
         ],
         "type": "f64"
     },
     "cbrtf": {
         "sources": [
-            "src/math/cbrtf.rs"
+            "libm/src/math/cbrtf.rs"
         ],
         "type": "f32"
     },
     "ceil": {
         "sources": [
-            "src/math/arch/i586.rs",
-            "src/math/arch/wasm32.rs",
-            "src/math/ceil.rs",
-            "src/math/generic/ceil.rs"
+            "libm/src/math/arch/i586.rs",
+            "libm/src/math/arch/wasm32.rs",
+            "libm/src/math/ceil.rs",
+            "libm/src/math/generic/ceil.rs"
         ],
         "type": "f64"
     },
     "ceilf": {
         "sources": [
-            "src/math/arch/wasm32.rs",
-            "src/math/ceil.rs",
-            "src/math/generic/ceil.rs"
+            "libm/src/math/arch/wasm32.rs",
+            "libm/src/math/ceil.rs",
+            "libm/src/math/generic/ceil.rs"
         ],
         "type": "f32"
     },
     "ceilf128": {
         "sources": [
-            "src/math/ceil.rs",
-            "src/math/generic/ceil.rs"
+            "libm/src/math/ceil.rs",
+            "libm/src/math/generic/ceil.rs"
         ],
         "type": "f128"
     },
     "ceilf16": {
         "sources": [
-            "src/math/ceil.rs",
-            "src/math/generic/ceil.rs"
+            "libm/src/math/ceil.rs",
+            "libm/src/math/generic/ceil.rs"
         ],
         "type": "f16"
     },
     "copysign": {
         "sources": [
-            "src/math/copysign.rs",
-            "src/math/generic/copysign.rs"
+            "libm/src/math/copysign.rs",
+            "libm/src/math/generic/copysign.rs"
         ],
         "type": "f64"
     },
     "copysignf": {
         "sources": [
-            "src/math/copysign.rs",
-            "src/math/generic/copysign.rs"
+            "libm/src/math/copysign.rs",
+            "libm/src/math/generic/copysign.rs"
         ],
         "type": "f32"
     },
     "copysignf128": {
         "sources": [
-            "src/math/copysign.rs",
-            "src/math/generic/copysign.rs"
+            "libm/src/math/copysign.rs",
+            "libm/src/math/generic/copysign.rs"
         ],
         "type": "f128"
     },
     "copysignf16": {
         "sources": [
-            "src/math/copysign.rs",
-            "src/math/generic/copysign.rs"
+            "libm/src/math/copysign.rs",
+            "libm/src/math/generic/copysign.rs"
         ],
         "type": "f16"
     },
     "cos": {
         "sources": [
-            "src/math/cos.rs"
+            "libm/src/math/cos.rs"
         ],
         "type": "f64"
     },
     "cosf": {
         "sources": [
-            "src/math/cosf.rs"
+            "libm/src/math/cosf.rs"
         ],
         "type": "f32"
     },
     "cosh": {
         "sources": [
-            "src/math/cosh.rs"
+            "libm/src/math/cosh.rs"
         ],
         "type": "f64"
     },
     "coshf": {
         "sources": [
-            "src/math/coshf.rs"
+            "libm/src/math/coshf.rs"
         ],
         "type": "f32"
     },
     "erf": {
         "sources": [
-            "src/math/erf.rs"
+            "libm/src/math/erf.rs"
         ],
         "type": "f64"
     },
     "erfc": {
         "sources": [
-            "src/math/erf.rs"
+            "libm/src/math/erf.rs"
         ],
         "type": "f64"
     },
     "erfcf": {
         "sources": [
-            "src/math/erff.rs"
+            "libm/src/math/erff.rs"
         ],
         "type": "f32"
     },
     "erff": {
         "sources": [
-            "src/math/erff.rs"
+            "libm/src/math/erff.rs"
         ],
         "type": "f32"
     },
     "exp": {
         "sources": [
-            "src/math/exp.rs"
+            "libm/src/math/exp.rs"
         ],
         "type": "f64"
     },
     "exp10": {
         "sources": [
-            "src/math/exp10.rs"
+            "libm/src/math/exp10.rs"
         ],
         "type": "f64"
     },
     "exp10f": {
         "sources": [
-            "src/math/exp10f.rs"
+            "libm/src/math/exp10f.rs"
         ],
         "type": "f32"
     },
     "exp2": {
         "sources": [
-            "src/math/exp2.rs"
+            "libm/src/math/exp2.rs"
         ],
         "type": "f64"
     },
     "exp2f": {
         "sources": [
-            "src/math/exp2f.rs"
+            "libm/src/math/exp2f.rs"
         ],
         "type": "f32"
     },
     "expf": {
         "sources": [
-            "src/math/expf.rs"
+            "libm/src/math/expf.rs"
         ],
         "type": "f32"
     },
     "expm1": {
         "sources": [
-            "src/math/expm1.rs"
+            "libm/src/math/expm1.rs"
         ],
         "type": "f64"
     },
     "expm1f": {
         "sources": [
-            "src/math/expm1f.rs"
+            "libm/src/math/expm1f.rs"
         ],
         "type": "f32"
     },
     "fabs": {
         "sources": [
-            "src/math/arch/wasm32.rs",
-            "src/math/fabs.rs",
-            "src/math/generic/fabs.rs"
+            "libm/src/math/arch/wasm32.rs",
+            "libm/src/math/fabs.rs",
+            "libm/src/math/generic/fabs.rs"
         ],
         "type": "f64"
     },
     "fabsf": {
         "sources": [
-            "src/math/arch/wasm32.rs",
-            "src/math/fabs.rs",
-            "src/math/generic/fabs.rs"
+            "libm/src/math/arch/wasm32.rs",
+            "libm/src/math/fabs.rs",
+            "libm/src/math/generic/fabs.rs"
         ],
         "type": "f32"
     },
     "fabsf128": {
         "sources": [
-            "src/math/fabs.rs",
-            "src/math/generic/fabs.rs"
+            "libm/src/math/fabs.rs",
+            "libm/src/math/generic/fabs.rs"
         ],
         "type": "f128"
     },
     "fabsf16": {
         "sources": [
-            "src/math/fabs.rs",
-            "src/math/generic/fabs.rs"
+            "libm/src/math/fabs.rs",
+            "libm/src/math/generic/fabs.rs"
         ],
         "type": "f16"
     },
     "fdim": {
         "sources": [
-            "src/math/fdim.rs",
-            "src/math/generic/fdim.rs"
+            "libm/src/math/fdim.rs",
+            "libm/src/math/generic/fdim.rs"
         ],
         "type": "f64"
     },
     "fdimf": {
         "sources": [
-            "src/math/fdim.rs",
-            "src/math/generic/fdim.rs"
+            "libm/src/math/fdim.rs",
+            "libm/src/math/generic/fdim.rs"
         ],
         "type": "f32"
     },
     "fdimf128": {
         "sources": [
-            "src/math/fdim.rs",
-            "src/math/generic/fdim.rs"
+            "libm/src/math/fdim.rs",
+            "libm/src/math/generic/fdim.rs"
         ],
         "type": "f128"
     },
     "fdimf16": {
         "sources": [
-            "src/math/fdim.rs",
-            "src/math/generic/fdim.rs"
+            "libm/src/math/fdim.rs",
+            "libm/src/math/generic/fdim.rs"
         ],
         "type": "f16"
     },
     "floor": {
         "sources": [
-            "src/math/arch/i586.rs",
-            "src/math/arch/wasm32.rs",
-            "src/math/floor.rs",
-            "src/math/generic/floor.rs"
+            "libm/src/math/arch/i586.rs",
+            "libm/src/math/arch/wasm32.rs",
+            "libm/src/math/floor.rs",
+            "libm/src/math/generic/floor.rs"
         ],
         "type": "f64"
     },
     "floorf": {
         "sources": [
-            "src/math/arch/wasm32.rs",
-            "src/math/floor.rs",
-            "src/math/generic/floor.rs"
+            "libm/src/math/arch/wasm32.rs",
+            "libm/src/math/floor.rs",
+            "libm/src/math/generic/floor.rs"
         ],
         "type": "f32"
     },
     "floorf128": {
         "sources": [
-            "src/math/floor.rs",
-            "src/math/generic/floor.rs"
+            "libm/src/math/floor.rs",
+            "libm/src/math/generic/floor.rs"
         ],
         "type": "f128"
     },
     "floorf16": {
         "sources": [
-            "src/math/floor.rs",
-            "src/math/generic/floor.rs"
+            "libm/src/math/floor.rs",
+            "libm/src/math/generic/floor.rs"
         ],
         "type": "f16"
     },
     "fma": {
         "sources": [
-            "src/math/arch/aarch64.rs",
-            "src/math/fma.rs"
+            "libm/src/math/arch/aarch64.rs",
+            "libm/src/math/fma.rs"
         ],
         "type": "f64"
     },
     "fmaf": {
         "sources": [
-            "src/math/arch/aarch64.rs",
-            "src/math/fma_wide.rs"
+            "libm/src/math/arch/aarch64.rs",
+            "libm/src/math/fma_wide.rs"
         ],
         "type": "f32"
     },
     "fmaf128": {
         "sources": [
-            "src/math/fma.rs"
+            "libm/src/math/fma.rs"
         ],
         "type": "f128"
     },
     "fmax": {
         "sources": [
-            "src/math/fmin_fmax.rs",
-            "src/math/generic/fmax.rs"
+            "libm/src/math/fmin_fmax.rs",
+            "libm/src/math/generic/fmax.rs"
         ],
         "type": "f64"
     },
     "fmaxf": {
         "sources": [
-            "src/math/fmin_fmax.rs",
-            "src/math/generic/fmax.rs"
+            "libm/src/math/fmin_fmax.rs",
+            "libm/src/math/generic/fmax.rs"
         ],
         "type": "f32"
     },
     "fmaxf128": {
         "sources": [
-            "src/math/fmin_fmax.rs",
-            "src/math/generic/fmax.rs"
+            "libm/src/math/fmin_fmax.rs",
+            "libm/src/math/generic/fmax.rs"
         ],
         "type": "f128"
     },
     "fmaxf16": {
         "sources": [
-            "src/math/fmin_fmax.rs",
-            "src/math/generic/fmax.rs"
+            "libm/src/math/fmin_fmax.rs",
+            "libm/src/math/generic/fmax.rs"
         ],
         "type": "f16"
     },
     "fmaximum": {
         "sources": [
-            "src/math/fminimum_fmaximum.rs",
-            "src/math/generic/fmaximum.rs"
+            "libm/src/math/fminimum_fmaximum.rs",
+            "libm/src/math/generic/fmaximum.rs"
         ],
         "type": "f64"
     },
     "fmaximum_num": {
         "sources": [
-            "src/math/fminimum_fmaximum_num.rs",
-            "src/math/generic/fmaximum_num.rs"
+            "libm/src/math/fminimum_fmaximum_num.rs",
+            "libm/src/math/generic/fmaximum_num.rs"
         ],
         "type": "f64"
     },
     "fmaximum_numf": {
         "sources": [
-            "src/math/fminimum_fmaximum_num.rs",
-            "src/math/generic/fmaximum_num.rs"
+            "libm/src/math/fminimum_fmaximum_num.rs",
+            "libm/src/math/generic/fmaximum_num.rs"
         ],
         "type": "f32"
     },
     "fmaximum_numf128": {
         "sources": [
-            "src/math/fminimum_fmaximum_num.rs",
-            "src/math/generic/fmaximum_num.rs"
+            "libm/src/math/fminimum_fmaximum_num.rs",
+            "libm/src/math/generic/fmaximum_num.rs"
         ],
         "type": "f128"
     },
     "fmaximum_numf16": {
         "sources": [
-            "src/math/fminimum_fmaximum_num.rs",
-            "src/math/generic/fmaximum_num.rs"
+            "libm/src/math/fminimum_fmaximum_num.rs",
+            "libm/src/math/generic/fmaximum_num.rs"
         ],
         "type": "f16"
     },
     "fmaximumf": {
         "sources": [
-            "src/math/fminimum_fmaximum.rs",
-            "src/math/generic/fmaximum.rs"
+            "libm/src/math/fminimum_fmaximum.rs",
+            "libm/src/math/generic/fmaximum.rs"
         ],
         "type": "f32"
     },
     "fmaximumf128": {
         "sources": [
-            "src/math/fminimum_fmaximum.rs",
-            "src/math/generic/fmaximum.rs"
+            "libm/src/math/fminimum_fmaximum.rs",
+            "libm/src/math/generic/fmaximum.rs"
         ],
         "type": "f128"
     },
     "fmaximumf16": {
         "sources": [
-            "src/math/fminimum_fmaximum.rs",
-            "src/math/generic/fmaximum.rs"
+            "libm/src/math/fminimum_fmaximum.rs",
+            "libm/src/math/generic/fmaximum.rs"
         ],
         "type": "f16"
     },
     "fmin": {
         "sources": [
-            "src/math/fmin_fmax.rs",
-            "src/math/generic/fmin.rs"
+            "libm/src/math/fmin_fmax.rs",
+            "libm/src/math/generic/fmin.rs"
         ],
         "type": "f64"
     },
     "fminf": {
         "sources": [
-            "src/math/fmin_fmax.rs",
-            "src/math/generic/fmin.rs"
+            "libm/src/math/fmin_fmax.rs",
+            "libm/src/math/generic/fmin.rs"
         ],
         "type": "f32"
     },
     "fminf128": {
         "sources": [
-            "src/math/fmin_fmax.rs",
-            "src/math/generic/fmin.rs"
+            "libm/src/math/fmin_fmax.rs",
+            "libm/src/math/generic/fmin.rs"
         ],
         "type": "f128"
     },
     "fminf16": {
         "sources": [
-            "src/math/fmin_fmax.rs",
-            "src/math/generic/fmin.rs"
+            "libm/src/math/fmin_fmax.rs",
+            "libm/src/math/generic/fmin.rs"
         ],
         "type": "f16"
     },
     "fminimum": {
         "sources": [
-            "src/math/fminimum_fmaximum.rs",
-            "src/math/generic/fminimum.rs"
+            "libm/src/math/fminimum_fmaximum.rs",
+            "libm/src/math/generic/fminimum.rs"
         ],
         "type": "f64"
     },
     "fminimum_num": {
         "sources": [
-            "src/math/fminimum_fmaximum_num.rs",
-            "src/math/generic/fminimum_num.rs"
+            "libm/src/math/fminimum_fmaximum_num.rs",
+            "libm/src/math/generic/fminimum_num.rs"
         ],
         "type": "f64"
     },
     "fminimum_numf": {
         "sources": [
-            "src/math/fminimum_fmaximum_num.rs",
-            "src/math/generic/fminimum_num.rs"
+            "libm/src/math/fminimum_fmaximum_num.rs",
+            "libm/src/math/generic/fminimum_num.rs"
         ],
         "type": "f32"
     },
     "fminimum_numf128": {
         "sources": [
-            "src/math/fminimum_fmaximum_num.rs",
-            "src/math/generic/fminimum_num.rs"
+            "libm/src/math/fminimum_fmaximum_num.rs",
+            "libm/src/math/generic/fminimum_num.rs"
         ],
         "type": "f128"
     },
     "fminimum_numf16": {
         "sources": [
-            "src/math/fminimum_fmaximum_num.rs",
-            "src/math/generic/fminimum_num.rs"
+            "libm/src/math/fminimum_fmaximum_num.rs",
+            "libm/src/math/generic/fminimum_num.rs"
         ],
         "type": "f16"
     },
     "fminimumf": {
         "sources": [
-            "src/math/fminimum_fmaximum.rs",
-            "src/math/generic/fminimum.rs"
+            "libm/src/math/fminimum_fmaximum.rs",
+            "libm/src/math/generic/fminimum.rs"
         ],
         "type": "f32"
     },
     "fminimumf128": {
         "sources": [
-            "src/math/fminimum_fmaximum.rs",
-            "src/math/generic/fminimum.rs"
+            "libm/src/math/fminimum_fmaximum.rs",
+            "libm/src/math/generic/fminimum.rs"
         ],
         "type": "f128"
     },
     "fminimumf16": {
         "sources": [
-            "src/math/fminimum_fmaximum.rs",
-            "src/math/generic/fminimum.rs"
+            "libm/src/math/fminimum_fmaximum.rs",
+            "libm/src/math/generic/fminimum.rs"
         ],
         "type": "f16"
     },
     "fmod": {
         "sources": [
-            "src/math/fmod.rs",
-            "src/math/generic/fmod.rs"
+            "libm/src/math/fmod.rs",
+            "libm/src/math/generic/fmod.rs"
         ],
         "type": "f64"
     },
     "fmodf": {
         "sources": [
-            "src/math/fmod.rs",
-            "src/math/generic/fmod.rs"
+            "libm/src/math/fmod.rs",
+            "libm/src/math/generic/fmod.rs"
         ],
         "type": "f32"
     },
     "fmodf128": {
         "sources": [
-            "src/math/fmod.rs",
-            "src/math/generic/fmod.rs"
+            "libm/src/math/fmod.rs",
+            "libm/src/math/generic/fmod.rs"
         ],
         "type": "f128"
     },
     "fmodf16": {
         "sources": [
-            "src/math/fmod.rs",
-            "src/math/generic/fmod.rs"
+            "libm/src/math/fmod.rs",
+            "libm/src/math/generic/fmod.rs"
         ],
         "type": "f16"
     },
     "frexp": {
         "sources": [
-            "src/math/frexp.rs"
+            "libm/src/math/frexp.rs"
         ],
         "type": "f64"
     },
     "frexpf": {
         "sources": [
-            "src/math/frexpf.rs"
+            "libm/src/math/frexpf.rs"
         ],
         "type": "f32"
     },
     "hypot": {
         "sources": [
-            "src/math/hypot.rs"
+            "libm/src/math/hypot.rs"
         ],
         "type": "f64"
     },
     "hypotf": {
         "sources": [
-            "src/math/hypotf.rs"
+            "libm/src/math/hypotf.rs"
         ],
         "type": "f32"
     },
     "ilogb": {
         "sources": [
-            "src/math/ilogb.rs"
+            "libm/src/math/ilogb.rs"
         ],
         "type": "f64"
     },
     "ilogbf": {
         "sources": [
-            "src/math/ilogbf.rs"
+            "libm/src/math/ilogbf.rs"
         ],
         "type": "f32"
     },
     "j0": {
         "sources": [
-            "src/math/j0.rs"
+            "libm/src/math/j0.rs"
         ],
         "type": "f64"
     },
     "j0f": {
         "sources": [
-            "src/math/j0f.rs"
+            "libm/src/math/j0f.rs"
         ],
         "type": "f32"
     },
     "j1": {
         "sources": [
-            "src/math/j1.rs"
+            "libm/src/math/j1.rs"
         ],
         "type": "f64"
     },
     "j1f": {
         "sources": [
-            "src/math/j1f.rs"
+            "libm/src/math/j1f.rs"
         ],
         "type": "f32"
     },
     "jn": {
         "sources": [
-            "src/math/jn.rs"
+            "libm/src/math/jn.rs"
         ],
         "type": "f64"
     },
     "jnf": {
         "sources": [
-            "src/math/jnf.rs"
+            "libm/src/math/jnf.rs"
         ],
         "type": "f32"
     },
     "ldexp": {
         "sources": [
-            "src/math/ldexp.rs"
+            "libm/src/math/ldexp.rs"
         ],
         "type": "f64"
     },
     "ldexpf": {
         "sources": [
-            "src/math/ldexp.rs"
+            "libm/src/math/ldexp.rs"
         ],
         "type": "f32"
     },
     "ldexpf128": {
         "sources": [
-            "src/math/ldexp.rs"
+            "libm/src/math/ldexp.rs"
         ],
         "type": "f128"
     },
     "ldexpf16": {
         "sources": [
-            "src/math/ldexp.rs"
+            "libm/src/math/ldexp.rs"
         ],
         "type": "f16"
     },
     "lgamma": {
         "sources": [
-            "src/math/lgamma.rs"
+            "libm/src/math/lgamma.rs"
         ],
         "type": "f64"
     },
     "lgamma_r": {
         "sources": [
-            "src/math/lgamma_r.rs"
+            "libm/src/math/lgamma_r.rs"
         ],
         "type": "f64"
     },
     "lgammaf": {
         "sources": [
-            "src/math/lgammaf.rs"
+            "libm/src/math/lgammaf.rs"
         ],
         "type": "f32"
     },
     "lgammaf_r": {
         "sources": [
-            "src/math/lgammaf_r.rs"
+            "libm/src/math/lgammaf_r.rs"
         ],
         "type": "f32"
     },
     "log": {
         "sources": [
-            "src/math/log.rs"
+            "libm/src/math/log.rs"
         ],
         "type": "f64"
     },
     "log10": {
         "sources": [
-            "src/math/log10.rs"
+            "libm/src/math/log10.rs"
         ],
         "type": "f64"
     },
     "log10f": {
         "sources": [
-            "src/math/log10f.rs"
+            "libm/src/math/log10f.rs"
         ],
         "type": "f32"
     },
     "log1p": {
         "sources": [
-            "src/math/log1p.rs"
+            "libm/src/math/log1p.rs"
         ],
         "type": "f64"
     },
     "log1pf": {
         "sources": [
-            "src/math/log1pf.rs"
+            "libm/src/math/log1pf.rs"
         ],
         "type": "f32"
     },
     "log2": {
         "sources": [
-            "src/math/log2.rs"
+            "libm/src/math/log2.rs"
         ],
         "type": "f64"
     },
     "log2f": {
         "sources": [
-            "src/math/log2f.rs"
+            "libm/src/math/log2f.rs"
         ],
         "type": "f32"
     },
     "logf": {
         "sources": [
-            "src/math/logf.rs"
+            "libm/src/math/logf.rs"
         ],
         "type": "f32"
     },
     "modf": {
         "sources": [
-            "src/math/modf.rs"
+            "libm/src/math/modf.rs"
         ],
         "type": "f64"
     },
     "modff": {
         "sources": [
-            "src/math/modff.rs"
+            "libm/src/math/modff.rs"
         ],
         "type": "f32"
     },
     "nextafter": {
         "sources": [
-            "src/math/nextafter.rs"
+            "libm/src/math/nextafter.rs"
         ],
         "type": "f64"
     },
     "nextafterf": {
         "sources": [
-            "src/math/nextafterf.rs"
+            "libm/src/math/nextafterf.rs"
         ],
         "type": "f32"
     },
     "pow": {
         "sources": [
-            "src/math/pow.rs"
+            "libm/src/math/pow.rs"
         ],
         "type": "f64"
     },
     "powf": {
         "sources": [
-            "src/math/powf.rs"
+            "libm/src/math/powf.rs"
         ],
         "type": "f32"
     },
     "remainder": {
         "sources": [
-            "src/math/remainder.rs"
+            "libm/src/math/remainder.rs"
         ],
         "type": "f64"
     },
     "remainderf": {
         "sources": [
-            "src/math/remainderf.rs"
+            "libm/src/math/remainderf.rs"
         ],
         "type": "f32"
     },
     "remquo": {
         "sources": [
-            "src/math/remquo.rs"
+            "libm/src/math/remquo.rs"
         ],
         "type": "f64"
     },
     "remquof": {
         "sources": [
-            "src/math/remquof.rs"
+            "libm/src/math/remquof.rs"
         ],
         "type": "f32"
     },
     "rint": {
         "sources": [
-            "src/math/arch/aarch64.rs",
-            "src/math/arch/wasm32.rs",
-            "src/math/rint.rs"
+            "libm/src/math/arch/aarch64.rs",
+            "libm/src/math/arch/wasm32.rs",
+            "libm/src/math/rint.rs"
         ],
         "type": "f64"
     },
     "rintf": {
         "sources": [
-            "src/math/arch/aarch64.rs",
-            "src/math/arch/wasm32.rs",
-            "src/math/rint.rs"
+            "libm/src/math/arch/aarch64.rs",
+            "libm/src/math/arch/wasm32.rs",
+            "libm/src/math/rint.rs"
         ],
         "type": "f32"
     },
     "rintf128": {
         "sources": [
-            "src/math/rint.rs"
+            "libm/src/math/rint.rs"
         ],
         "type": "f128"
     },
     "rintf16": {
         "sources": [
-            "src/math/arch/aarch64.rs",
-            "src/math/rint.rs"
+            "libm/src/math/arch/aarch64.rs",
+            "libm/src/math/rint.rs"
         ],
         "type": "f16"
     },
     "round": {
         "sources": [
-            "src/math/generic/round.rs",
-            "src/math/round.rs"
+            "libm/src/math/generic/round.rs",
+            "libm/src/math/round.rs"
         ],
         "type": "f64"
     },
     "roundeven": {
         "sources": [
-            "src/math/roundeven.rs"
+            "libm/src/math/roundeven.rs"
         ],
         "type": "f64"
     },
     "roundevenf": {
         "sources": [
-            "src/math/roundeven.rs"
+            "libm/src/math/roundeven.rs"
         ],
         "type": "f32"
     },
     "roundevenf128": {
         "sources": [
-            "src/math/roundeven.rs"
+            "libm/src/math/roundeven.rs"
         ],
         "type": "f128"
     },
     "roundevenf16": {
         "sources": [
-            "src/math/roundeven.rs"
+            "libm/src/math/roundeven.rs"
         ],
         "type": "f16"
     },
     "roundf": {
         "sources": [
-            "src/math/generic/round.rs",
-            "src/math/round.rs"
+            "libm/src/math/generic/round.rs",
+            "libm/src/math/round.rs"
         ],
         "type": "f32"
     },
     "roundf128": {
         "sources": [
-            "src/math/generic/round.rs",
-            "src/math/round.rs"
+            "libm/src/math/generic/round.rs",
+            "libm/src/math/round.rs"
         ],
         "type": "f128"
     },
     "roundf16": {
         "sources": [
-            "src/math/generic/round.rs",
-            "src/math/round.rs"
+            "libm/src/math/generic/round.rs",
+            "libm/src/math/round.rs"
         ],
         "type": "f16"
     },
     "scalbn": {
         "sources": [
-            "src/math/generic/scalbn.rs",
-            "src/math/scalbn.rs"
+            "libm/src/math/generic/scalbn.rs",
+            "libm/src/math/scalbn.rs"
         ],
         "type": "f64"
     },
     "scalbnf": {
         "sources": [
-            "src/math/generic/scalbn.rs",
-            "src/math/scalbn.rs"
+            "libm/src/math/generic/scalbn.rs",
+            "libm/src/math/scalbn.rs"
         ],
         "type": "f32"
     },
     "scalbnf128": {
         "sources": [
-            "src/math/generic/scalbn.rs",
-            "src/math/scalbn.rs"
+            "libm/src/math/generic/scalbn.rs",
+            "libm/src/math/scalbn.rs"
         ],
         "type": "f128"
     },
     "scalbnf16": {
         "sources": [
-            "src/math/generic/scalbn.rs",
-            "src/math/scalbn.rs"
+            "libm/src/math/generic/scalbn.rs",
+            "libm/src/math/scalbn.rs"
         ],
         "type": "f16"
     },
     "sin": {
         "sources": [
-            "src/math/sin.rs"
+            "libm/src/math/sin.rs"
         ],
         "type": "f64"
     },
     "sincos": {
         "sources": [
-            "src/math/sincos.rs"
+            "libm/src/math/sincos.rs"
         ],
         "type": "f64"
     },
     "sincosf": {
         "sources": [
-            "src/math/sincosf.rs"
+            "libm/src/math/sincosf.rs"
         ],
         "type": "f32"
     },
     "sinf": {
         "sources": [
-            "src/math/sinf.rs"
+            "libm/src/math/sinf.rs"
         ],
         "type": "f32"
     },
     "sinh": {
         "sources": [
-            "src/math/sinh.rs"
+            "libm/src/math/sinh.rs"
         ],
         "type": "f64"
     },
     "sinhf": {
         "sources": [
-            "src/math/sinhf.rs"
+            "libm/src/math/sinhf.rs"
         ],
         "type": "f32"
     },
     "sqrt": {
         "sources": [
-            "src/math/arch/aarch64.rs",
-            "src/math/arch/i686.rs",
-            "src/math/arch/wasm32.rs",
-            "src/math/generic/sqrt.rs",
-            "src/math/sqrt.rs"
+            "libm/src/math/arch/aarch64.rs",
+            "libm/src/math/arch/i686.rs",
+            "libm/src/math/arch/wasm32.rs",
+            "libm/src/math/generic/sqrt.rs",
+            "libm/src/math/sqrt.rs"
         ],
         "type": "f64"
     },
     "sqrtf": {
         "sources": [
-            "src/math/arch/aarch64.rs",
-            "src/math/arch/i686.rs",
-            "src/math/arch/wasm32.rs",
-            "src/math/generic/sqrt.rs",
-            "src/math/sqrt.rs"
+            "libm/src/math/arch/aarch64.rs",
+            "libm/src/math/arch/i686.rs",
+            "libm/src/math/arch/wasm32.rs",
+            "libm/src/math/generic/sqrt.rs",
+            "libm/src/math/sqrt.rs"
         ],
         "type": "f32"
     },
     "sqrtf128": {
         "sources": [
-            "src/math/generic/sqrt.rs",
-            "src/math/sqrt.rs"
+            "libm/src/math/generic/sqrt.rs",
+            "libm/src/math/sqrt.rs"
         ],
         "type": "f128"
     },
     "sqrtf16": {
         "sources": [
-            "src/math/arch/aarch64.rs",
-            "src/math/generic/sqrt.rs",
-            "src/math/sqrt.rs"
+            "libm/src/math/arch/aarch64.rs",
+            "libm/src/math/generic/sqrt.rs",
+            "libm/src/math/sqrt.rs"
         ],
         "type": "f16"
     },
     "tan": {
         "sources": [
-            "src/math/tan.rs"
+            "libm/src/math/tan.rs"
         ],
         "type": "f64"
     },
     "tanf": {
         "sources": [
-            "src/math/tanf.rs"
+            "libm/src/math/tanf.rs"
         ],
         "type": "f32"
     },
     "tanh": {
         "sources": [
-            "src/math/tanh.rs"
+            "libm/src/math/tanh.rs"
         ],
         "type": "f64"
     },
     "tanhf": {
         "sources": [
-            "src/math/tanhf.rs"
+            "libm/src/math/tanhf.rs"
         ],
         "type": "f32"
     },
     "tgamma": {
         "sources": [
-            "src/math/tgamma.rs"
+            "libm/src/math/tgamma.rs"
         ],
         "type": "f64"
     },
     "tgammaf": {
         "sources": [
-            "src/math/tgammaf.rs"
+            "libm/src/math/tgammaf.rs"
         ],
         "type": "f32"
     },
     "trunc": {
         "sources": [
-            "src/math/arch/wasm32.rs",
-            "src/math/generic/trunc.rs",
-            "src/math/trunc.rs"
+            "libm/src/math/arch/wasm32.rs",
+            "libm/src/math/generic/trunc.rs",
+            "libm/src/math/trunc.rs"
         ],
         "type": "f64"
     },
     "truncf": {
         "sources": [
-            "src/math/arch/wasm32.rs",
-            "src/math/generic/trunc.rs",
-            "src/math/trunc.rs"
+            "libm/src/math/arch/wasm32.rs",
+            "libm/src/math/generic/trunc.rs",
+            "libm/src/math/trunc.rs"
         ],
         "type": "f32"
     },
     "truncf128": {
         "sources": [
-            "src/math/generic/trunc.rs",
-            "src/math/trunc.rs"
+            "libm/src/math/generic/trunc.rs",
+            "libm/src/math/trunc.rs"
         ],
         "type": "f128"
     },
     "truncf16": {
         "sources": [
-            "src/math/generic/trunc.rs",
-            "src/math/trunc.rs"
+            "libm/src/math/generic/trunc.rs",
+            "libm/src/math/trunc.rs"
         ],
         "type": "f16"
     },
     "y0": {
         "sources": [
-            "src/math/j0.rs"
+            "libm/src/math/j0.rs"
         ],
         "type": "f64"
     },
     "y0f": {
         "sources": [
-            "src/math/j0f.rs"
+            "libm/src/math/j0f.rs"
         ],
         "type": "f32"
     },
     "y1": {
         "sources": [
-            "src/math/j1.rs"
+            "libm/src/math/j1.rs"
         ],
         "type": "f64"
     },
     "y1f": {
         "sources": [
-            "src/math/j1f.rs"
+            "libm/src/math/j1f.rs"
         ],
         "type": "f32"
     },
     "yn": {
         "sources": [
-            "src/math/jn.rs"
+            "libm/src/math/jn.rs"
         ],
         "type": "f64"
     },
     "ynf": {
         "sources": [
-            "src/math/jnf.rs"
+            "libm/src/math/jnf.rs"
         ],
         "type": "f32"
     }
diff --git a/etc/update-api-list.py b/etc/update-api-list.py
index 950824fc4..0770a8b20 100755
--- a/etc/update-api-list.py
+++ b/etc/update-api-list.py
@@ -18,10 +18,10 @@
 
 SELF_PATH = Path(__file__)
 ETC_DIR = SELF_PATH.parent
-LIBM_DIR = ETC_DIR.parent.joinpath("libm")
+ROOT_DIR = ETC_DIR.parent
 
 # These files do not trigger a retest.
-IGNORED_SOURCES = ["src/libm_helper.rs", "src/math/support/float_traits.rs"]
+IGNORED_SOURCES = ["libm/src/libm_helper.rs", "libm/src/math/support/float_traits.rs"]
 
 IndexTy: TypeAlias = dict[str, dict[str, Any]]
 """Type of the `index` item in rustdoc's JSON output"""
@@ -66,7 +66,7 @@ def get_rustdoc_json() -> dict[Any, Any]:
         j = sp.check_output(
             [
                 "rustdoc",
-                "src/lib.rs",
+                "libm/src/lib.rs",
                 "--edition=2021",
                 "--document-private-items",
                 "--output-format=json",
@@ -75,7 +75,7 @@ def get_rustdoc_json() -> dict[Any, Any]:
                 "-Zunstable-options",
                 "-o-",
             ],
-            cwd=LIBM_DIR,
+            cwd=ROOT_DIR,
             text=True,
         )
         j = json.loads(j)
@@ -94,7 +94,9 @@ def _init_function_list(self, index: IndexTy) -> None:
         # Collect a list of source IDs for reexported items in `lib.rs` or `mod math`.
         use = (i for i in public if "use" in i["inner"])
         use = (
-            i for i in use if i["span"]["filename"] in ["src/math/mod.rs", "src/lib.rs"]
+            i
+            for i in use
+            if i["span"]["filename"] in ["libm/src/math/mod.rs", "libm/src/lib.rs"]
         )
         reexported_ids = [item["inner"]["use"]["id"] for item in use]
 
@@ -121,8 +123,8 @@ def _init_defs(self, index: IndexTy) -> None:
 
         # A lot of the `arch` module is often configured out so doesn't show up in docs. Use
         # string matching as a fallback.
-        for fname in glob("src/math/arch/**.rs", root_dir=LIBM_DIR):
-            contents = (LIBM_DIR.joinpath(fname)).read_text()
+        for fname in glob("libm/src/math/arch/**.rs", root_dir=ROOT_DIR):
+            contents = (ROOT_DIR.joinpath(fname)).read_text()
 
             for name in self.public_functions:
                 if f"fn {name}" in contents:
@@ -188,10 +190,10 @@ def tidy_lists(self) -> None:
         include all public API.
         """
 
-        flist = sp.check_output(["git", "ls-files"], cwd=LIBM_DIR, text=True)
+        flist = sp.check_output(["git", "ls-files"], cwd=ROOT_DIR, text=True)
 
         for path in flist.splitlines():
-            fpath = LIBM_DIR.joinpath(path)
+            fpath = ROOT_DIR.joinpath(path)
             if fpath.is_dir() or fpath == SELF_PATH:
                 continue
 
@@ -229,7 +231,7 @@ def ensure_contains_api(self, fpath: Path, line_num: int, lines: list[str]):
         if len(not_found) == 0:
             return
 
-        relpath = fpath.relative_to(LIBM_DIR)
+        relpath = fpath.relative_to(ROOT_DIR)
         eprint(f"functions not found at {relpath}:{line_num}: {not_found}")
         exit(1)
 
@@ -244,7 +246,7 @@ def validate_delimited_block(
     """Identify blocks of code wrapped within `start` and `end`, collect their contents
     to a list of strings, and call `validate` for each of those lists.
     """
-    relpath = fpath.relative_to(LIBM_DIR)
+    relpath = fpath.relative_to(ROOT_DIR)
     block_lines = []
     block_start_line: None | int = None
     for line_num, line in enumerate(lines):
@@ -274,7 +276,7 @@ def validate_delimited_block(
 
 def ensure_sorted(fpath: Path, block_start_line: int, lines: list[str]) -> None:
     """Ensure that a list of lines is sorted, otherwise print a diff and exit."""
-    relpath = fpath.relative_to(LIBM_DIR)
+    relpath = fpath.relative_to(ROOT_DIR)
     diff_and_exit(
         "\n".join(lines),
         "\n".join(sorted(lines)),
diff --git a/libm/Cargo.toml b/libm/Cargo.toml
index e0aeb07d5..44154c1a8 100644
--- a/libm/Cargo.toml
+++ b/libm/Cargo.toml
@@ -10,7 +10,6 @@ readme = "README.md"
 repository = "https://github.com/rust-lang/libm"
 version = "0.2.11"
 edition = "2021"
-exclude = ["/ci/", "/.github/workflows/"]
 rust-version = "1.63"
 
 [features]
@@ -40,24 +39,6 @@ unstable-float = []
 # hard float operations.
 force-soft-floats = []
 
-[workspace]
-resolver = "2"
-members = [
-  "crates/libm-macros",
-  "crates/libm-test",
-  "crates/musl-math-sys",
-  "crates/util",
-]
-default-members = [
-  ".",
-  "crates/libm-macros",
-  "crates/libm-test",
-]
-exclude = [
-  # Requires `panic = abort` so can't be a member of the workspace
-  "crates/compiler-builtins-smoke-test",
-]
-
 [dev-dependencies]
 no-panic = "0.1.35"
 
@@ -66,22 +47,3 @@ unexpected_cfgs = { level = "warn", check-cfg = [
   # compiler-builtins sets this feature, but we use it in `libm`
   'cfg(feature, values("compiler-builtins"))',
 ] }
-
-# The default release profile is unchanged.
-
-# Release mode with debug assertions
-[profile.release-checked]
-inherits = "release"
-debug-assertions = true
-overflow-checks = true
-
-# Release with maximum optimizations, which is very slow to build. This is also
-# what is needed to check `no-panic`.
-[profile.release-opt]
-inherits = "release"
-codegen-units = 1
-lto = "fat"
-
-[profile.bench]
-# Required for iai-callgrind
-debug = true
diff --git a/libm/LICENSE.txt b/libm/LICENSE.txt
new file mode 120000
index 000000000..4ab43736a
--- /dev/null
+++ b/libm/LICENSE.txt
@@ -0,0 +1 @@
+../LICENSE.txt
\ No newline at end of file
diff --git a/libm/README.md b/libm/README.md
new file mode 120000
index 000000000..32d46ee88
--- /dev/null
+++ b/libm/README.md
@@ -0,0 +1 @@
+../README.md
\ No newline at end of file

From c94017af75c3ec4616d5b7f9b6b1b3826b934469 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Wed, 19 Mar 2025 00:11:39 +0000
Subject: [PATCH 278/279] Migrate all crates except `libm` to edition 2024

Unfortunately this means we lose use of the convenient name `gen`, so
this includes a handful of renaming.

We can't increase the edition for `libm` yet due to MSRV, but we can
enable `unsafe_op_in_unsafe_fn` to help make that change smoother in the
future.
---
 .github/workflows/main.yaml                         | 13 ++++++++-----
 crates/compiler-builtins-smoke-test/src/math.rs     |  8 ++++----
 crates/libm-macros/Cargo.toml                       |  2 +-
 crates/libm-test/Cargo.toml                         |  2 +-
 crates/libm-test/benches/icount.rs                  |  2 +-
 crates/libm-test/benches/random.rs                  |  4 ++--
 crates/libm-test/examples/plot_domains.rs           |  8 ++++----
 crates/libm-test/src/{gen.rs => generate.rs}        |  0
 crates/libm-test/src/{gen => generate}/case_list.rs |  0
 .../libm-test/src/{gen => generate}/edge_cases.rs   |  2 +-
 crates/libm-test/src/{gen => generate}/random.rs    |  0
 crates/libm-test/src/{gen => generate}/spaced.rs    |  0
 crates/libm-test/src/lib.rs                         |  2 +-
 crates/libm-test/src/run_cfg.rs                     |  4 ++--
 crates/libm-test/tests/compare_built_musl.rs        |  2 +-
 crates/libm-test/tests/multiprecision.rs            |  2 +-
 crates/libm-test/tests/standalone.rs                |  2 +-
 crates/libm-test/tests/u256.rs                      |  2 +-
 crates/libm-test/tests/z_extensive/run.rs           |  2 +-
 crates/musl-math-sys/Cargo.toml                     |  2 +-
 crates/musl-math-sys/src/lib.rs                     |  2 +-
 crates/util/Cargo.toml                              |  2 +-
 libm/src/lib.rs                                     |  1 +
 23 files changed, 34 insertions(+), 30 deletions(-)
 rename crates/libm-test/src/{gen.rs => generate.rs} (100%)
 rename crates/libm-test/src/{gen => generate}/case_list.rs (100%)
 rename crates/libm-test/src/{gen => generate}/edge_cases.rs (99%)
 rename crates/libm-test/src/{gen => generate}/random.rs (100%)
 rename crates/libm-test/src/{gen => generate}/spaced.rs (100%)

diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml
index a717c3ea8..5ce0dbc26 100644
--- a/.github/workflows/main.yaml
+++ b/.github/workflows/main.yaml
@@ -212,14 +212,17 @@ jobs:
       RUSTFLAGS: # No need to check warnings on old MSRV, unset `-Dwarnings`
     steps:
     - uses: actions/checkout@master
-    - run: |
+    - name: Install Rust
+      run: |
         msrv="$(perl -ne 'print if s/rust-version\s*=\s*"(.*)"/\1/g' libm/Cargo.toml)"
         echo "MSRV: $msrv"
-        echo "MSRV=$msrv" >> "$GITHUB_ENV"
-    - name: Install Rust
-      run: rustup update "$MSRV" --no-self-update && rustup default "$MSRV"
+        rustup update "$msrv" --no-self-update && rustup default "$msrv"
     - uses: Swatinem/rust-cache@v2
-    - run: cargo build -p libm
+    - run: |
+        # FIXME(msrv): Remove the workspace Cargo.toml so 1.63 cargo doesn't see
+        # `edition = "2024"` and get spooked.
+        rm Cargo.toml
+        cargo build --manifest-path libm/Cargo.toml
 
   rustfmt:
     name: Rustfmt
diff --git a/crates/compiler-builtins-smoke-test/src/math.rs b/crates/compiler-builtins-smoke-test/src/math.rs
index f17fc1231..58a5bfbb9 100644
--- a/crates/compiler-builtins-smoke-test/src/math.rs
+++ b/crates/compiler-builtins-smoke-test/src/math.rs
@@ -14,7 +14,7 @@ macro_rules! no_mangle {
 
     // Handle simple functions with single return types
     (@inner $name:ident( $($arg:ident: $aty:ty),+ ) -> $ret:ty) => {
-        #[no_mangle]
+        #[unsafe(no_mangle)]
         extern "C" fn $name($($arg: $aty),+) -> $ret {
             libm::$name($($arg),+)
         }
@@ -26,7 +26,7 @@ macro_rules! no_mangle {
     (
         @inner $name:ident( $($arg:ident: $aty:ty),+ | $($rarg:ident: $rty:ty),+) -> $ret:ty
     ) => {
-        #[no_mangle]
+        #[unsafe(no_mangle)]
         extern "C" fn $name($($arg: $aty,)+ $($rarg: $rty),+) -> $ret {
             let ret;
             (ret, $(*$rarg),+) = libm::$name($($arg),+);
@@ -166,12 +166,12 @@ no_mangle! {
 
 /* sincos has no direct return type, not worth handling in the macro */
 
-#[no_mangle]
+#[unsafe(no_mangle)]
 extern "C" fn sincos(x: f64, s: &mut f64, c: &mut f64) {
     (*s, *c) = libm::sincos(x);
 }
 
-#[no_mangle]
+#[unsafe(no_mangle)]
 extern "C" fn sincosf(x: f32, s: &mut f32, c: &mut f32) {
     (*s, *c) = libm::sincosf(x);
 }
diff --git a/crates/libm-macros/Cargo.toml b/crates/libm-macros/Cargo.toml
index 314f4ae37..50c869db7 100644
--- a/crates/libm-macros/Cargo.toml
+++ b/crates/libm-macros/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
 name = "libm-macros"
 version = "0.1.0"
-edition = "2021"
+edition = "2024"
 publish = false
 
 [lib]
diff --git a/crates/libm-test/Cargo.toml b/crates/libm-test/Cargo.toml
index 1bcc163ed..5d150b4ae 100644
--- a/crates/libm-test/Cargo.toml
+++ b/crates/libm-test/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
 name = "libm-test"
 version = "0.1.0"
-edition = "2021"
+edition = "2024"
 publish = false
 
 [features]
diff --git a/crates/libm-test/benches/icount.rs b/crates/libm-test/benches/icount.rs
index 4a10ec383..da8c6bfd1 100644
--- a/crates/libm-test/benches/icount.rs
+++ b/crates/libm-test/benches/icount.rs
@@ -4,7 +4,7 @@ use std::hint::black_box;
 
 use iai_callgrind::{library_benchmark, library_benchmark_group, main};
 use libm::support::{HInt, u256};
-use libm_test::gen::spaced;
+use libm_test::generate::spaced;
 use libm_test::{CheckBasis, CheckCtx, GeneratorKind, MathOp, OpRustArgs, TupleCall, op};
 
 const BENCH_ITER_ITEMS: u64 = 500;
diff --git a/crates/libm-test/benches/random.rs b/crates/libm-test/benches/random.rs
index 17e4e0d55..63d7e5c6d 100644
--- a/crates/libm-test/benches/random.rs
+++ b/crates/libm-test/benches/random.rs
@@ -2,8 +2,8 @@ use std::hint::black_box;
 use std::time::Duration;
 
 use criterion::{Criterion, criterion_main};
-use libm_test::gen::random;
-use libm_test::gen::random::RandomInput;
+use libm_test::generate::random;
+use libm_test::generate::random::RandomInput;
 use libm_test::{CheckBasis, CheckCtx, GeneratorKind, MathOp, TupleCall};
 
 /// Benchmark with this many items to get a variety
diff --git a/crates/libm-test/examples/plot_domains.rs b/crates/libm-test/examples/plot_domains.rs
index 441889c69..78524761e 100644
--- a/crates/libm-test/examples/plot_domains.rs
+++ b/crates/libm-test/examples/plot_domains.rs
@@ -12,8 +12,8 @@ use std::path::Path;
 use std::process::Command;
 use std::{env, fs};
 
-use libm_test::gen::spaced::SpacedInput;
-use libm_test::gen::{edge_cases, spaced};
+use libm_test::generate::spaced::SpacedInput;
+use libm_test::generate::{edge_cases, spaced};
 use libm_test::{CheckBasis, CheckCtx, GeneratorKind, MathOp, op};
 
 const JL_PLOT: &str = "examples/plot_file.jl";
@@ -73,7 +73,7 @@ fn plot_one_generator(
     ctx: &CheckCtx,
     gen_name: &str,
     config: &mut String,
-    gen: impl Iterator<Item = (f32,)>,
+    generator: impl Iterator<Item = (f32,)>,
 ) {
     let fn_name = ctx.base_name_str;
     let text_file = out_dir.join(format!("input-{fn_name}-{gen_name}.txt"));
@@ -82,7 +82,7 @@ fn plot_one_generator(
     let mut w = BufWriter::new(f);
     let mut count = 0u64;
 
-    for input in gen {
+    for input in generator {
         writeln!(w, "{:e}", input.0).unwrap();
         count += 1;
     }
diff --git a/crates/libm-test/src/gen.rs b/crates/libm-test/src/generate.rs
similarity index 100%
rename from crates/libm-test/src/gen.rs
rename to crates/libm-test/src/generate.rs
diff --git a/crates/libm-test/src/gen/case_list.rs b/crates/libm-test/src/generate/case_list.rs
similarity index 100%
rename from crates/libm-test/src/gen/case_list.rs
rename to crates/libm-test/src/generate/case_list.rs
diff --git a/crates/libm-test/src/gen/edge_cases.rs b/crates/libm-test/src/generate/edge_cases.rs
similarity index 99%
rename from crates/libm-test/src/gen/edge_cases.rs
rename to crates/libm-test/src/generate/edge_cases.rs
index 69b59a105..56cc9fa9a 100644
--- a/crates/libm-test/src/gen/edge_cases.rs
+++ b/crates/libm-test/src/generate/edge_cases.rs
@@ -3,7 +3,7 @@
 use libm::support::{CastInto, Float, Int, MinInt};
 
 use crate::domain::get_domain;
-use crate::gen::KnownSize;
+use crate::generate::KnownSize;
 use crate::op::OpITy;
 use crate::run_cfg::{check_near_count, check_point_count};
 use crate::{BaseName, CheckCtx, FloatExt, FloatTy, MathOp, test_log};
diff --git a/crates/libm-test/src/gen/random.rs b/crates/libm-test/src/generate/random.rs
similarity index 100%
rename from crates/libm-test/src/gen/random.rs
rename to crates/libm-test/src/generate/random.rs
diff --git a/crates/libm-test/src/gen/spaced.rs b/crates/libm-test/src/generate/spaced.rs
similarity index 100%
rename from crates/libm-test/src/gen/spaced.rs
rename to crates/libm-test/src/generate/spaced.rs
diff --git a/crates/libm-test/src/lib.rs b/crates/libm-test/src/lib.rs
index 824f09a33..485c01a47 100644
--- a/crates/libm-test/src/lib.rs
+++ b/crates/libm-test/src/lib.rs
@@ -4,7 +4,7 @@
 
 pub mod domain;
 mod f8_impl;
-pub mod gen;
+pub mod generate;
 #[cfg(feature = "build-mpfr")]
 pub mod mpfloat;
 mod num;
diff --git a/crates/libm-test/src/run_cfg.rs b/crates/libm-test/src/run_cfg.rs
index 8e4fff53c..b36164b00 100644
--- a/crates/libm-test/src/run_cfg.rs
+++ b/crates/libm-test/src/run_cfg.rs
@@ -4,7 +4,7 @@ use std::ops::RangeInclusive;
 use std::sync::LazyLock;
 use std::{env, str};
 
-use crate::gen::random::{SEED, SEED_ENV};
+use crate::generate::random::{SEED, SEED_ENV};
 use crate::{BaseName, FloatTy, Identifier, test_log};
 
 /// The environment variable indicating which extensive tests should be run.
@@ -241,7 +241,7 @@ pub fn iteration_count(ctx: &CheckCtx, argnum: usize) -> u64 {
     // Some tests are significantly slower than others and need to be further reduced.
     if let Some((_id, _gen, scale)) = EXTEMELY_SLOW_TESTS
         .iter()
-        .find(|(id, gen, _scale)| *id == ctx.fn_ident && *gen == ctx.gen_kind)
+        .find(|(id, generator, _scale)| *id == ctx.fn_ident && *generator == ctx.gen_kind)
     {
         // However, do not override if the extensive iteration count has been manually set.
         if !(ctx.gen_kind == GeneratorKind::Extensive && EXTENSIVE_ITER_OVERRIDE.is_some()) {
diff --git a/crates/libm-test/tests/compare_built_musl.rs b/crates/libm-test/tests/compare_built_musl.rs
index 897dfc26e..cbb4bd49b 100644
--- a/crates/libm-test/tests/compare_built_musl.rs
+++ b/crates/libm-test/tests/compare_built_musl.rs
@@ -9,7 +9,7 @@
 // There are some targets we can't build musl for
 #![cfg(feature = "build-musl")]
 
-use libm_test::gen::{case_list, edge_cases, random, spaced};
+use libm_test::generate::{case_list, edge_cases, random, spaced};
 use libm_test::{CheckBasis, CheckCtx, CheckOutput, GeneratorKind, MathOp, TupleCall};
 
 const BASIS: CheckBasis = CheckBasis::Musl;
diff --git a/crates/libm-test/tests/multiprecision.rs b/crates/libm-test/tests/multiprecision.rs
index 0ab4b64da..80b2c7868 100644
--- a/crates/libm-test/tests/multiprecision.rs
+++ b/crates/libm-test/tests/multiprecision.rs
@@ -2,7 +2,7 @@
 
 #![cfg(feature = "build-mpfr")]
 
-use libm_test::gen::{case_list, edge_cases, random, spaced};
+use libm_test::generate::{case_list, edge_cases, random, spaced};
 use libm_test::mpfloat::MpOp;
 use libm_test::{CheckBasis, CheckCtx, CheckOutput, GeneratorKind, MathOp, TupleCall};
 
diff --git a/crates/libm-test/tests/standalone.rs b/crates/libm-test/tests/standalone.rs
index 7d694843e..7b30a3b48 100644
--- a/crates/libm-test/tests/standalone.rs
+++ b/crates/libm-test/tests/standalone.rs
@@ -1,6 +1,6 @@
 //! Test cases that have both an input and an output, so do not require a basis.
 
-use libm_test::gen::case_list;
+use libm_test::generate::case_list;
 use libm_test::{CheckBasis, CheckCtx, CheckOutput, GeneratorKind, MathOp, TupleCall};
 
 const BASIS: CheckBasis = CheckBasis::None;
diff --git a/crates/libm-test/tests/u256.rs b/crates/libm-test/tests/u256.rs
index 460353424..4444036d0 100644
--- a/crates/libm-test/tests/u256.rs
+++ b/crates/libm-test/tests/u256.rs
@@ -9,7 +9,7 @@ use libm::support::{HInt, u256};
 type BigInt = rug::Integer;
 
 use libm_test::bigint_fuzz_iteration_count;
-use libm_test::gen::random::SEED;
+use libm_test::generate::random::SEED;
 use rand::{Rng, SeedableRng};
 use rand_chacha::ChaCha8Rng;
 use rug::Assign;
diff --git a/crates/libm-test/tests/z_extensive/run.rs b/crates/libm-test/tests/z_extensive/run.rs
index 786546a9d..b10c231d1 100644
--- a/crates/libm-test/tests/z_extensive/run.rs
+++ b/crates/libm-test/tests/z_extensive/run.rs
@@ -6,7 +6,7 @@ use std::sync::atomic::{AtomicU64, Ordering};
 use std::time::Duration;
 
 use indicatif::{ProgressBar, ProgressStyle};
-use libm_test::gen::spaced;
+use libm_test::generate::spaced;
 use libm_test::mpfloat::MpOp;
 use libm_test::{
     CheckBasis, CheckCtx, CheckOutput, GeneratorKind, MathOp, TestResult, TupleCall,
diff --git a/crates/musl-math-sys/Cargo.toml b/crates/musl-math-sys/Cargo.toml
index ad73578d8..9e866a970 100644
--- a/crates/musl-math-sys/Cargo.toml
+++ b/crates/musl-math-sys/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
 name = "musl-math-sys"
 version = "0.1.0"
-edition = "2021"
+edition = "2024"
 publish = false
 
 [dependencies]
diff --git a/crates/musl-math-sys/src/lib.rs b/crates/musl-math-sys/src/lib.rs
index 07277ef3e..6a4bf4859 100644
--- a/crates/musl-math-sys/src/lib.rs
+++ b/crates/musl-math-sys/src/lib.rs
@@ -10,7 +10,7 @@ macro_rules! functions {
         $( #[$meta:meta] )*
         $pfx_name:ident: $name:ident( $($arg:ident: $aty:ty),+ ) -> $rty:ty;
     )* ) => {
-        extern "C" {
+        unsafe extern "C" {
             $( fn $pfx_name( $($arg: $aty),+ ) -> $rty; )*
         }
 
diff --git a/crates/util/Cargo.toml b/crates/util/Cargo.toml
index 94c7f1033..4bcb97472 100644
--- a/crates/util/Cargo.toml
+++ b/crates/util/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
 name = "util"
 version = "0.1.0"
-edition = "2021"
+edition = "2024"
 publish = false
 
 [features]
diff --git a/libm/src/lib.rs b/libm/src/lib.rs
index b0e431211..7e56bd079 100644
--- a/libm/src/lib.rs
+++ b/libm/src/lib.rs
@@ -17,6 +17,7 @@
 #![allow(clippy::needless_return)]
 #![allow(clippy::unreadable_literal)]
 #![allow(clippy::zero_divided_by_zero)]
+#![forbid(unsafe_op_in_unsafe_fn)]
 
 mod libm_helper;
 mod math;

From 8f7436d260f000f054042545bb6e4c0d99fe35b2 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Sun, 20 Apr 2025 04:43:58 +0000
Subject: [PATCH 279/279] Note that this crate has been moved

Since [1], `libm` is now part of the `compiler-builtins` repositoy, so
this repo will be archived. Update the README to reflect that.

[1]: https://github.com/rust-lang/compiler-builtins/pull/822
---
 README.md | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/README.md b/README.md
index 52d760a4f..c120a7588 100644
--- a/README.md
+++ b/README.md
@@ -2,6 +2,10 @@
 
 A port of [MUSL]'s libm to Rust.
 
+> [!NOTE]  
+> The `libm` crate has been merged into the `compiler-builtins` repository. Future
+> development work will take place there: https://github.com/rust-lang/compiler-builtins.
+
 [MUSL]: https://musl.libc.org/
 
 ## Goals