Skip to content

Commit b139bcd

Browse files
authored
GH-100485: Tweaks to sumprod() (GH-100857)
1 parent 11f9932 commit b139bcd

File tree

3 files changed

+41
-25
lines changed

3 files changed

+41
-25
lines changed

Doc/whatsnew/3.12.rst

+6
Original file line numberDiff line numberDiff line change
@@ -262,6 +262,12 @@ dis
262262
:data:`~dis.hasarg` collection instead.
263263
(Contributed by Irit Katriel in :gh:`94216`.)
264264

265+
math
266+
----
267+
268+
* Added :func:`math.sumprod` for computing a sum of products.
269+
(Contributed by Raymond Hettinger in :gh:`100485`.)
270+
265271
os
266272
--
267273

Lib/test/test_math.py

+1
Original file line numberDiff line numberDiff line change
@@ -1294,6 +1294,7 @@ def test_sumprod_accuracy(self):
12941294
self.assertEqual(sumprod([0.1] * 20, [True, False] * 10), 1.0)
12951295
self.assertEqual(sumprod([1.0, 10E100, 1.0, -10E100], [1.0]*4), 2.0)
12961296

1297+
@support.requires_resource('cpu')
12971298
def test_sumprod_stress(self):
12981299
sumprod = math.sumprod
12991300
product = itertools.product

Modules/mathmodule.c

+34-25
Original file line numberDiff line numberDiff line change
@@ -2832,7 +2832,7 @@ long_add_would_overflow(long a, long b)
28322832
}
28332833

28342834
/*
2835-
Double length extended precision floating point arithmetic
2835+
Double and triple length extended precision floating point arithmetic
28362836
based on ideas from three sources:
28372837
28382838
Improved Kahan–Babuška algorithm by Arnold Neumaier
@@ -2845,22 +2845,22 @@ based on ideas from three sources:
28452845
Ultimately Fast Accurate Summation by Siegfried M. Rump
28462846
https://www.tuhh.de/ti3/paper/rump/Ru08b.pdf
28472847
2848-
The double length routines allow for quite a bit of instruction
2849-
level parallelism. On a 3.22 Ghz Apple M1 Max, the incremental
2850-
cost of increasing the input vector size by one is 6.0 nsec.
2848+
Double length functions:
2849+
* dl_split() exact split of a C double into two half precision components.
2850+
* dl_mul() exact multiplication of two C doubles.
28512851
2852-
dl_zero() returns an extended precision zero
2853-
dl_split() exactly splits a double into two half precision components.
2854-
dl_add() performs compensated summation to keep a running total.
2855-
dl_mul() implements lossless multiplication of doubles.
2856-
dl_fma() implements an extended precision fused-multiply-add.
2857-
dl_to_d() converts from extended precision to double precision.
2852+
Triple length functions and constant:
2853+
* tl_zero is a triple length zero for starting or resetting an accumulation.
2854+
* tl_add() compensated addition of a C double to a triple length number.
2855+
* tl_fma() performs a triple length fused-multiply-add.
2856+
* tl_to_d() converts from triple length number back to a C double.
28582857
28592858
*/
28602859

28612860
typedef struct{ double hi; double lo; } DoubleLength;
2861+
typedef struct{ double hi; double lo; double tiny; } TripleLength;
28622862

2863-
static const DoubleLength dl_zero = {0.0, 0.0};
2863+
static const TripleLength tl_zero = {0.0, 0.0, 0.0};
28642864

28652865
static inline DoubleLength
28662866
twosum(double a, double b)
@@ -2874,11 +2874,20 @@ twosum(double a, double b)
28742874
return (DoubleLength) {s, t};
28752875
}
28762876

2877-
static inline DoubleLength
2878-
dl_add(DoubleLength total, double x)
2877+
static inline TripleLength
2878+
tl_add(TripleLength total, double x)
28792879
{
2880-
DoubleLength s = twosum(total.hi, x);
2881-
return (DoubleLength) {s.hi, total.lo + s.lo};
2880+
/* Input: x total.hi total.lo total.tiny
2881+
|--- twosum ---|
2882+
s.hi s.lo
2883+
|--- twosum ---|
2884+
t.hi t.lo
2885+
|--- single sum ---|
2886+
Output: s.hi t.hi tiny
2887+
*/
2888+
DoubleLength s = twosum(x, total.hi);
2889+
DoubleLength t = twosum(s.lo, total.lo);
2890+
return (TripleLength) {s.hi, t.hi, t.lo + total.tiny};
28822891
}
28832892

28842893
static inline DoubleLength
@@ -2902,18 +2911,18 @@ dl_mul(double x, double y)
29022911
return (DoubleLength) {z, zz};
29032912
}
29042913

2905-
static inline DoubleLength
2906-
dl_fma(DoubleLength total, double p, double q)
2914+
static inline TripleLength
2915+
tl_fma(TripleLength total, double p, double q)
29072916
{
29082917
DoubleLength product = dl_mul(p, q);
2909-
total = dl_add(total, product.hi);
2910-
return dl_add(total, product.lo);
2918+
total = tl_add(total, product.hi);
2919+
return tl_add(total, product.lo);
29112920
}
29122921

29132922
static inline double
2914-
dl_to_d(DoubleLength total)
2923+
tl_to_d(TripleLength total)
29152924
{
2916-
return total.hi + total.lo;
2925+
return total.tiny + total.lo + total.hi;
29172926
}
29182927

29192928
/*[clinic input]
@@ -2944,7 +2953,7 @@ math_sumprod_impl(PyObject *module, PyObject *p, PyObject *q)
29442953
bool int_path_enabled = true, int_total_in_use = false;
29452954
bool flt_path_enabled = true, flt_total_in_use = false;
29462955
long int_total = 0;
2947-
DoubleLength flt_total = dl_zero;
2956+
TripleLength flt_total = tl_zero;
29482957

29492958
p_it = PyObject_GetIter(p);
29502959
if (p_it == NULL) {
@@ -3079,7 +3088,7 @@ math_sumprod_impl(PyObject *module, PyObject *p, PyObject *q)
30793088
} else {
30803089
goto finalize_flt_path;
30813090
}
3082-
DoubleLength new_flt_total = dl_fma(flt_total, flt_p, flt_q);
3091+
TripleLength new_flt_total = tl_fma(flt_total, flt_p, flt_q);
30833092
if (isfinite(new_flt_total.hi)) {
30843093
flt_total = new_flt_total;
30853094
flt_total_in_use = true;
@@ -3093,7 +3102,7 @@ math_sumprod_impl(PyObject *module, PyObject *p, PyObject *q)
30933102
// We're finished, overflowed, have a non-float, or got a non-finite value
30943103
flt_path_enabled = false;
30953104
if (flt_total_in_use) {
3096-
term_i = PyFloat_FromDouble(dl_to_d(flt_total));
3105+
term_i = PyFloat_FromDouble(tl_to_d(flt_total));
30973106
if (term_i == NULL) {
30983107
goto err_exit;
30993108
}
@@ -3104,7 +3113,7 @@ math_sumprod_impl(PyObject *module, PyObject *p, PyObject *q)
31043113
Py_SETREF(total, new_total);
31053114
new_total = NULL;
31063115
Py_CLEAR(term_i);
3107-
flt_total = dl_zero;
3116+
flt_total = tl_zero;
31083117
flt_total_in_use = false;
31093118
}
31103119
}

0 commit comments

Comments
 (0)