From e4beac592dc06f428f03d929e27f926b1c23ed4b Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Sun, 8 May 2022 23:15:00 -0500 Subject: [PATCH 1/5] Simplify main code path. Inputs were already going to be converted to floats. --- Lib/statistics.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/Lib/statistics.py b/Lib/statistics.py index 54f4e132651897..3f1ab5d9071e65 100644 --- a/Lib/statistics.py +++ b/Lib/statistics.py @@ -674,11 +674,7 @@ def median_grouped(data, interval=1): # Interpolate the median using the formula found at: # https://www.cuemath.com/data/median-of-grouped-data/ - try: - L = x - interval / 2 # The lower limit of the median interval. - except TypeError: - # Coerce mixed types to float. - L = float(x) - float(interval) / 2 + L = float(x) - float(interval) / 2 cf = i # Cumulative frequency of the preceding interval f = j - i # Number of elements in the median internal return L + interval * (n / 2 - cf) / f From 2845755009aecb5b84d135cd8828693174daeb91 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Mon, 9 May 2022 00:53:03 -0500 Subject: [PATCH 2/5] Fix return type bug --- Lib/statistics.py | 22 ++++++++++++---------- Lib/test/test_statistics.py | 6 ++++++ 2 files changed, 18 insertions(+), 10 deletions(-) diff --git a/Lib/statistics.py b/Lib/statistics.py index 3f1ab5d9071e65..88d85c0be9603b 100644 --- a/Lib/statistics.py +++ b/Lib/statistics.py @@ -611,7 +611,7 @@ def median_high(data): return data[n // 2] -def median_grouped(data, interval=1): +def median_grouped(data, interval=1.0): """Estimates the median for numeric data binned around the midpoints of consecutive, fixed-width intervals. @@ -650,31 +650,33 @@ def median_grouped(data, interval=1): by exact multiples of *interval*. This is essential for getting a correct result. The function does not check this precondition. + Inputs may be any numeric type but will be coerced to floats. + """ data = sorted(data) n = len(data) - if n == 0: + if not n: raise StatisticsError("no median for empty data") - elif n == 1: - return data[0] # Find the value at the midpoint. Remember this corresponds to the # midpoint of the class interval. x = data[n // 2] - # Generate a clear error message for non-numeric data - for obj in (x, interval): - if isinstance(obj, (str, bytes)): - raise TypeError(f'expected a number but got {obj!r}') - # Using O(log n) bisection, find where all the x values occur in the data. # All x will lie within data[i:j]. i = bisect_left(data, x) j = bisect_right(data, x, lo=i) + # Coerce to floats, raising TypeError if not possible + try: + interval = float(interval) + x = float(x) + except ValueError: + raise TypeError(f'Expected expressable as a float') + # Interpolate the median using the formula found at: # https://www.cuemath.com/data/median-of-grouped-data/ - L = float(x) - float(interval) / 2 + L = x - interval / 2.0 # Lower limit of the median interval cf = i # Cumulative frequency of the preceding interval f = j - i # Number of elements in the median internal return L + interval * (n / 2 - cf) / f diff --git a/Lib/test/test_statistics.py b/Lib/test/test_statistics.py index ed6021d60bde75..6de98241c294d7 100644 --- a/Lib/test/test_statistics.py +++ b/Lib/test/test_statistics.py @@ -1742,6 +1742,12 @@ def test_repeated_single_value(self): data = [x]*count self.assertEqual(self.func(data), float(x)) + def test_single_value(self): + # Override method from AverageMixin. + # Average of a single value is the value as a float. + for x in (23, 42.5, 1.3e15, Fraction(15, 19), Decimal('0.28')): + self.assertEqual(self.func([x]), float(x)) + def test_odd_fractions(self): # Test median_grouped works with an odd number of Fractions. F = Fraction From 21125024c17d4cfb13c347f02e786bf326006a6c Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Mon, 9 May 2022 01:07:28 -0500 Subject: [PATCH 3/5] Tweak the wording a bit --- Lib/statistics.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/Lib/statistics.py b/Lib/statistics.py index 88d85c0be9603b..2d831a164596db 100644 --- a/Lib/statistics.py +++ b/Lib/statistics.py @@ -650,7 +650,8 @@ def median_grouped(data, interval=1.0): by exact multiples of *interval*. This is essential for getting a correct result. The function does not check this precondition. - Inputs may be any numeric type but will be coerced to floats. + Inputs may be any numeric type but will be coerced to a float during + the interpolation step. """ data = sorted(data) @@ -667,12 +668,12 @@ def median_grouped(data, interval=1.0): i = bisect_left(data, x) j = bisect_right(data, x, lo=i) - # Coerce to floats, raising TypeError if not possible + # Coerce to floats, raising a TypeError if not possible try: interval = float(interval) x = float(x) except ValueError: - raise TypeError(f'Expected expressable as a float') + raise TypeError(f'Value cannot be converted to a float') # Interpolate the median using the formula found at: # https://www.cuemath.com/data/median-of-grouped-data/ From 4e21d53c4a18e0bea4adb8a2a847513db346c919 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Mon, 9 May 2022 01:20:04 -0500 Subject: [PATCH 4/5] Another wording tweak --- Lib/statistics.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/statistics.py b/Lib/statistics.py index 2d831a164596db..2d66b0522f19d5 100644 --- a/Lib/statistics.py +++ b/Lib/statistics.py @@ -650,7 +650,7 @@ def median_grouped(data, interval=1.0): by exact multiples of *interval*. This is essential for getting a correct result. The function does not check this precondition. - Inputs may be any numeric type but will be coerced to a float during + Inputs may be any numeric type that can be coerced to a float during the interpolation step. """ From 90bde240799985d7c5279d6dd256c3358bbc4cb4 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Mon, 9 May 2022 01:27:37 -0500 Subject: [PATCH 5/5] Add blurb --- .../next/Library/2022-05-09-01-27-25.gh-issue-92531.vV7S_O.rst | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2022-05-09-01-27-25.gh-issue-92531.vV7S_O.rst diff --git a/Misc/NEWS.d/next/Library/2022-05-09-01-27-25.gh-issue-92531.vV7S_O.rst b/Misc/NEWS.d/next/Library/2022-05-09-01-27-25.gh-issue-92531.vV7S_O.rst new file mode 100644 index 00000000000000..574fa6c4d97991 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2022-05-09-01-27-25.gh-issue-92531.vV7S_O.rst @@ -0,0 +1,3 @@ +The statistics.median_grouped() function now always return a float. +Formerly, it did not convert the input type when for sequences of length +one.