From 3739372d433f93ca0a4b291ff46a5438d9b28784 Mon Sep 17 00:00:00 2001 From: Devon Hollowood Date: Wed, 26 Oct 2022 23:40:30 -0700 Subject: [PATCH 1/3] Implement more accurate hypot() This uses the improved-accuracy hypotenuse algorithm from Borges 2019, see https://arxiv.org/abs/1904.09481. --- Lib/test/test_math.py | 2 -- stdlib/src/math.rs | 81 +++++++++++++++++++++++++++++++++++-------- 2 files changed, 66 insertions(+), 17 deletions(-) diff --git a/Lib/test/test_math.py b/Lib/test/test_math.py index 08d1b5fa02..0067e1c06b 100644 --- a/Lib/test/test_math.py +++ b/Lib/test/test_math.py @@ -833,8 +833,6 @@ def testHypot(self): @requires_IEEE_754 @unittest.skipIf(HAVE_DOUBLE_ROUNDING, "hypot() loses accuracy on machines with double rounding") - # TODO: RUSTPYTHON - @unittest.expectedFailure def testHypotAccuracy(self): # Verify improved accuracy in cases that were known to be inaccurate. # diff --git a/stdlib/src/math.rs b/stdlib/src/math.rs index 6147626b04..982148a8cb 100644 --- a/stdlib/src/math.rs +++ b/stdlib/src/math.rs @@ -7,6 +7,7 @@ mod math { function::{ArgIntoFloat, ArgIterable, Either, OptionalArg, PosArgs}, identifier, PyObject, PyObjectRef, PyRef, PyResult, VirtualMachine, }; + use itertools::Itertools; use num_bigint::BigInt; use num_rational::Ratio; use num_traits::{One, Signed, ToPrimitive, Zero}; @@ -283,24 +284,73 @@ mod math { if has_nan { return f64::NAN; } - vector_norm(&coordinates, max) + coordinates.sort_unstable_by(|x, y| x.total_cmp(y).reverse()); + vector_norm(&coordinates) } - fn vector_norm(v: &[f64], max: f64) -> f64 { - if max == 0.0 || v.len() <= 1 { + /// Implementation of accurate hypotenuse algorithm from Borges 2019. + /// See https://arxiv.org/abs/1904.09481. + /// This assumes that its arguments are positive finite and have been scaled to avoid overflow + /// and underflow. + fn accurate_hypot(max: f64, min: f64) -> f64 { + if min <= max * (f64::EPSILON / 2.0).sqrt() { return max; } - let mut csum = 1.0; - let mut frac = 0.0; - for &f in v { - let f = f / max; - let f = f * f; - let old = csum; - csum += f; - // this seemingly redundant operation is to reduce float rounding errors/inaccuracy - frac += (old - csum) + f; - } - max * f64::sqrt(csum - 1.0 + frac) + let hypot = max.mul_add(max, min * min).sqrt(); + let hypot_sq = hypot * hypot; + let max_sq = max * max; + let correction = (-min).mul_add(min, hypot_sq - max_sq) + hypot.mul_add(hypot, -hypot_sq) + - max.mul_add(max, -max_sq); + hypot - correction / (2.0 * hypot) + } + + /// Calculates the norm of the vector given by `v`. + /// `v` is assumed to be a list of non-negative finite floats, sorted in descending order. + fn vector_norm(v: &[f64]) -> f64 { + // Drop zeros from the vector. + let zero_count = v.iter().rev().cloned().take_while(|x| *x == 0.0).count(); + let v = &v[..v.len() - zero_count]; + if v.is_empty() { + return 0.0; + } + if v.len() == 1 { + return v[0]; + } + // Calculate scaling to avoid overflow / underflow. + let max = *v.first().unwrap(); + let min = *v.last().unwrap(); + let scale = if max > (f64::MAX / v.len() as f64).sqrt() { + max + } else if min < f64::MIN_POSITIVE.sqrt() { + // ^ This can be an `else if`, because if the max is near f64::MAX and the min is near + // f64::MIN_POSITIVE, then the min is relatively unimportant and will be effectively + // ignored. + min + } else { + 1.0 + }; + let mut norm = v + .iter() + .cloned() + .map(|x| x / scale) + .reduce(accurate_hypot) + .unwrap_or_default(); + if v.len() > 2 { + // For larger lists of numbers, we can accumulate a rounding error, so a correction is + // needed, similar to that in `accurate_hypot()`. + // First, we estimate [sum of squares - norm^2], then we add the first-order + // approximation of the square root of that to `norm`. + let correction = v + .iter() + .cloned() + .map(|x| x / scale) + .map(|x| x * x) + .chain(std::iter::once(-norm * norm)) + .tree_fold1(std::ops::Add::add) + .unwrap(); + norm = norm + correction / (2.0 * norm); + } + norm * scale } #[pyfunction] @@ -339,7 +389,8 @@ mod math { if has_nan { return Ok(f64::NAN); } - Ok(vector_norm(&diffs, max)) + diffs.sort_unstable_by(|x, y| x.total_cmp(y).reverse()); + Ok(vector_norm(&diffs)) } #[pyfunction] From fa50056fce127508b9aceb0cbbe64f610cbbf294 Mon Sep 17 00:00:00 2001 From: Devon Hollowood Date: Thu, 27 Oct 2022 18:20:50 -0700 Subject: [PATCH 2/3] Apply suggestions from code review Co-authored-by: Jeong YunWon <69878+youknowone@users.noreply.github.com> --- stdlib/src/math.rs | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/stdlib/src/math.rs b/stdlib/src/math.rs index 982148a8cb..31571eb1a9 100644 --- a/stdlib/src/math.rs +++ b/stdlib/src/math.rs @@ -331,7 +331,7 @@ mod math { }; let mut norm = v .iter() - .cloned() + .copied() .map(|x| x / scale) .reduce(accurate_hypot) .unwrap_or_default(); @@ -342,9 +342,8 @@ mod math { // approximation of the square root of that to `norm`. let correction = v .iter() - .cloned() - .map(|x| x / scale) - .map(|x| x * x) + .copied() + .map(|x| (x / scale).pow(2)) .chain(std::iter::once(-norm * norm)) .tree_fold1(std::ops::Add::add) .unwrap(); From 96bf177accd485ca0c4579df9ffa9e469a66ee81 Mon Sep 17 00:00:00 2001 From: Devon Hollowood Date: Thu, 27 Oct 2022 18:27:06 -0700 Subject: [PATCH 3/3] Respond to reviewer comments --- stdlib/src/math.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/stdlib/src/math.rs b/stdlib/src/math.rs index 31571eb1a9..14483498df 100644 --- a/stdlib/src/math.rs +++ b/stdlib/src/math.rs @@ -343,10 +343,11 @@ mod math { let correction = v .iter() .copied() - .map(|x| (x / scale).pow(2)) + .map(|x| (x / scale).powi(2)) .chain(std::iter::once(-norm * norm)) + // Pairwise summation of floats gives less rounding error than a naive sum. .tree_fold1(std::ops::Add::add) - .unwrap(); + .expect("expected at least 1 element"); norm = norm + correction / (2.0 * norm); } norm * scale