diff --git a/Lib/test/test_math.py b/Lib/test/test_math.py
index fcdec93484e000..b90409c70e1c2b 100644
--- a/Lib/test/test_math.py
+++ b/Lib/test/test_math.py
@@ -1379,7 +1379,7 @@ def test_sumprod_extended_precision_accuracy(self):
         from fractions import Fraction
         from itertools import starmap
         from collections import namedtuple
-        from math import log2, exp2, fabs
+        from math import log2, exp2, fabs, ulp
         from random import choices, uniform, shuffle
         from statistics import median
 
@@ -1406,7 +1406,14 @@ def GenDot(n, c):
             and y is then constructed choosing xi randomly with decreasing exponent,
             and calculating yi such that some cancellation occurs. Finally, we permute
             the vectors x, y randomly and calculate the achieved condition number.
+
+            In general the actual condition number is a little larger than the
+            anticipated one with quite some variation. However, this is unimportant
+            because in the following figures we plot the relative error against the
+            actually achieved condition number.
+
             """
+            # Test generator from: https://www.tuhh.de/ti3/paper/rump/OgRuOi05.pdf
 
             assert n >= 6
             n2 = n // 2
@@ -1435,22 +1442,51 @@ def GenDot(n, c):
 
             return DotExample(x, y, DotExact(x, y), Condition(x, y))
 
-        def RelativeError(res, ex):
-            x, y, target_sumprod, condition = ex
-            n = DotExact(list(x) + [-res], list(y) + [1])
-            return fabs(n / target_sumprod)
+        def AbsoluteError(x, y, result):
+            return DotExact(list(x) + [-result], list(y) + [1])
 
-        def Trial(dotfunc, c, n):
-            ex = GenDot(10, c)
-            res = dotfunc(ex.x, ex.y)
-            return RelativeError(res, ex)
+        def err_gamma(n):             # Definition (2.4)
+            return n * ulp(1.0) / (1 - n * ulp(1.0))
 
-        times = 1000          # Number of trials
-        n = 20                # Length of vectors
-        c = 1e30              # Target condition number
+        def dotk_bound(cond, n, K=3): # Proposition (5.11)
+            "Relative error bound for DotK"
+            return ulp(1.0) + 2 * err_gamma(4*n-2)**2 + 1/2 * err_gamma(4*n-2)**K * cond
 
-        relative_err = median(Trial(math.sumprod, c, n) for i in range(times))
-        self.assertLess(relative_err, 1e-16)
+        # For a vector length of 20, the triple length sumprod calculation's
+        # theoretical error bound starts to degrade above a condition
+        # number of 1e25 where we get just under 52 bits of accuracy:
+        #
+        #    >>> -log2(dotk_bound(1e25, n=20, K=3))
+        #    51.84038876713239
+        #    >>> -log2(dotk_bound(1e26, n=20, K=3))
+        #    50.8823973719395
+        #    >>> -log2(dotk_bound(1e27, n=20, K=3))
+        #    48.3334013169794
+        #
+        # The paper notes that the theoretical error bound is pessimistic
+        # by several orders of magnitude, so our test can use a higher
+        # condition number.  (See Table 6.2)
+        #
+        # Here, we test a vector length of 20 and condition number of 1e28
+        # for an absolute error difference within 1 ulp.  This should be
+        # sufficiently loose to alway pass, but tight enough to fail in case
+        # of an implementation error, an incorrect compiler optimization,
+        # or an inadequate libmath implementation of fma().  If the latter
+        # ends up being a problem, we can revise dl_mul() to use Dekker's
+        # mul12() algorithm which slows sumprod() by 20% but would not be
+        # dependent on libmath() meeting the C99 specification for fma().
+
+        vector_length = 20
+        target_condition_number = 1e28
+        for i in range(5_000):
+            # Generate examples centered around a quarter of the target condition
+            # number.  Reject actual condition numbers higher than our target.
+            ex = GenDot(n=vector_length, c=target_condition_number / 4)
+            if ex.condition > target_condition_number:
+                continue
+            result = math.sumprod(ex.x, ex.y)
+            error = AbsoluteError(ex.x, ex.y, result)
+            self.assertLess(fabs(error), ulp(result), (ex, result, error))
 
     def testModf(self):
         self.assertRaises(TypeError, math.modf)