137
137
from itertools import groupby , repeat
138
138
from bisect import bisect_left , bisect_right
139
139
from math import hypot , sqrt , fabs , exp , erf , tau , log , fsum
140
- from operator import itemgetter , mul
140
+ from operator import mul
141
141
from collections import Counter , namedtuple
142
142
143
143
_SQRT2 = sqrt (2.0 )
@@ -248,6 +248,28 @@ def _exact_ratio(x):
248
248
249
249
x is expected to be an int, Fraction, Decimal or float.
250
250
"""
251
+
252
+ # XXX We should revisit whether using fractions to accumulate exact
253
+ # ratios is the right way to go.
254
+
255
+ # The integer ratios for binary floats can have numerators or
256
+ # denominators with over 300 decimal digits. The problem is more
257
+ # acute with decimal floats where the the default decimal context
258
+ # supports a huge range of exponents from Emin=-999999 to
259
+ # Emax=999999. When expanded with as_integer_ratio(), numbers like
260
+ # Decimal('3.14E+5000') and Decimal('3.14E-5000') have large
261
+ # numerators or denominators that will slow computation.
262
+
263
+ # When the integer ratios are accumulated as fractions, the size
264
+ # grows to cover the full range from the smallest magnitude to the
265
+ # largest. For example, Fraction(3.14E+300) + Fraction(3.14E-300),
266
+ # has a 616 digit numerator. Likewise,
267
+ # Fraction(Decimal('3.14E+5000')) + Fraction(Decimal('3.14E-5000'))
268
+ # has 10,003 digit numerator.
269
+
270
+ # This doesn't seem to have been problem in practice, but it is a
271
+ # potential pitfall.
272
+
251
273
try :
252
274
return x .as_integer_ratio ()
253
275
except AttributeError :
@@ -305,28 +327,60 @@ def _fail_neg(values, errmsg='negative value'):
305
327
raise StatisticsError (errmsg )
306
328
yield x
307
329
308
- def _isqrt_frac_rto (n : int , m : int ) -> float :
330
+
331
+ def _integer_sqrt_of_frac_rto (n : int , m : int ) -> int :
309
332
"""Square root of n/m, rounded to the nearest integer using round-to-odd."""
310
333
# Reference: https://www.lri.fr/~melquion/doc/05-imacs17_1-expose.pdf
311
334
a = math .isqrt (n // m )
312
335
return a | (a * a * m != n )
313
336
314
- # For 53 bit precision floats, the _sqrt_frac() shift is 109.
315
- _sqrt_shift : int = 2 * sys .float_info .mant_dig + 3
316
337
317
- def _sqrt_frac (n : int , m : int ) -> float :
338
+ # For 53 bit precision floats, the bit width used in
339
+ # _float_sqrt_of_frac() is 109.
340
+ _sqrt_bit_width : int = 2 * sys .float_info .mant_dig + 3
341
+
342
+
343
+ def _float_sqrt_of_frac (n : int , m : int ) -> float :
318
344
"""Square root of n/m as a float, correctly rounded."""
319
345
# See principle and proof sketch at: https://bugs.python.org/msg407078
320
- q = (n .bit_length () - m .bit_length () - _sqrt_shift ) // 2
346
+ q = (n .bit_length () - m .bit_length () - _sqrt_bit_width ) // 2
321
347
if q >= 0 :
322
- numerator = _isqrt_frac_rto (n , m << 2 * q ) << q
348
+ numerator = _integer_sqrt_of_frac_rto (n , m << 2 * q ) << q
323
349
denominator = 1
324
350
else :
325
- numerator = _isqrt_frac_rto (n << - 2 * q , m )
351
+ numerator = _integer_sqrt_of_frac_rto (n << - 2 * q , m )
326
352
denominator = 1 << - q
327
353
return numerator / denominator # Convert to float
328
354
329
355
356
+ def _decimal_sqrt_of_frac (n : int , m : int ) -> Decimal :
357
+ """Square root of n/m as a Decimal, correctly rounded."""
358
+ # Premise: For decimal, computing (n/m).sqrt() can be off
359
+ # by 1 ulp from the correctly rounded result.
360
+ # Method: Check the result, moving up or down a step if needed.
361
+ if n <= 0 :
362
+ if not n :
363
+ return Decimal ('0.0' )
364
+ n , m = - n , - m
365
+
366
+ root = (Decimal (n ) / Decimal (m )).sqrt ()
367
+ nr , dr = root .as_integer_ratio ()
368
+
369
+ plus = root .next_plus ()
370
+ np , dp = plus .as_integer_ratio ()
371
+ # test: n / m > ((root + plus) / 2) ** 2
372
+ if 4 * n * (dr * dp )** 2 > m * (dr * np + dp * nr )** 2 :
373
+ return plus
374
+
375
+ minus = root .next_minus ()
376
+ nm , dm = minus .as_integer_ratio ()
377
+ # test: n / m < ((root + minus) / 2) ** 2
378
+ if 4 * n * (dr * dm )** 2 < m * (dr * nm + dm * nr )** 2 :
379
+ return minus
380
+
381
+ return root
382
+
383
+
330
384
# === Measures of central tendency (averages) ===
331
385
332
386
def mean (data ):
@@ -869,7 +923,7 @@ def stdev(data, xbar=None):
869
923
if hasattr (T , 'sqrt' ):
870
924
var = _convert (mss , T )
871
925
return var .sqrt ()
872
- return _sqrt_frac (mss .numerator , mss .denominator )
926
+ return _float_sqrt_of_frac (mss .numerator , mss .denominator )
873
927
874
928
875
929
def pstdev (data , mu = None ):
@@ -888,10 +942,9 @@ def pstdev(data, mu=None):
888
942
raise StatisticsError ('pstdev requires at least one data point' )
889
943
T , ss = _ss (data , mu )
890
944
mss = ss / n
891
- if hasattr (T , 'sqrt' ):
892
- var = _convert (mss , T )
893
- return var .sqrt ()
894
- return _sqrt_frac (mss .numerator , mss .denominator )
945
+ if issubclass (T , Decimal ):
946
+ return _decimal_sqrt_of_frac (mss .numerator , mss .denominator )
947
+ return _float_sqrt_of_frac (mss .numerator , mss .denominator )
895
948
896
949
897
950
# === Statistics for relations between two inputs ===
0 commit comments