scikit-learn · lucyleeow · Feb 7, 2025 · Feb 7, 2025 · Feb 8, 2025 · Feb 8, 2025
diff --git a/doc/whats_new/upcoming_changes/sklearn.metrics/30787.fix.rst b/doc/whats_new/upcoming_changes/sklearn.metrics/30787.fix.rst
@@ -0,0 +1,6 @@
+- :func:`metrics.median_absolute_error` now uses `_averaged_weighted_percentile`
+  instead of `_weighted_percentile` to calculate median when `sample_weight` is not
+  `None`. This is equivalent to using the "averaged_inverted_cdf" instead of
+  the "inverted_cdf" quantile method, which gives results equivalent to `numpy.median`
+  if equal weights used.
+  By :user:`Lucy Liu <lucyleeow>`
diff --git a/sklearn/metrics/_regression.py b/sklearn/metrics/_regression.py
@@ -28,7 +28,7 @@
     _xlogy as xlogy,
 )
 from ..utils._param_validation import Interval, StrOptions, validate_params
-from ..utils.stats import _weighted_percentile
+from ..utils.stats import _averaged_weighted_percentile, _weighted_percentile
 from ..utils.validation import (
     _check_sample_weight,
     _num_samples,
@@ -923,7 +923,7 @@ def median_absolute_error(
     if sample_weight is None:
         output_errors = _median(xp.abs(y_pred - y_true), axis=0)
     else:
-        output_errors = _weighted_percentile(
+        output_errors = _averaged_weighted_percentile(
             xp.abs(y_pred - y_true), sample_weight=sample_weight
         )
     if isinstance(multioutput, str):

diff --git a/sklearn/metrics/tests/test_common.py b/sklearn/metrics/tests/test_common.py
@@ -555,7 +555,6 @@ def precision_recall_curve_padded_thresholds(*args, **kwargs):
 
 # No Sample weight support
 METRICS_WITHOUT_SAMPLE_WEIGHT = {
-    "median_absolute_error",
     "max_error",
     "ovo_roc_auc",
     "weighted_ovo_roc_auc",
@@ -1474,9 +1473,10 @@ def test_averaging_multilabel_all_ones(name):
     check_averaging(name, y_true, y_true_binarize, y_pred, y_pred_binarize, y_score)
 
 
-def check_sample_weight_invariance(name, metric, y1, y2):
+def check_sample_weight_invariance(name, metric, y1, y2, sample_weight=None):
     rng = np.random.RandomState(0)
-    sample_weight = rng.randint(1, 10, size=len(y1))
+    if sample_weight is None:
+        sample_weight = rng.randint(1, 10, size=len(y1))
 
     # top_k_accuracy_score always lead to a perfect score for k > 1 in the
     # binary case
@@ -1552,7 +1552,11 @@ def check_sample_weight_invariance(name, metric, y1, y2):
     if not name.startswith("unnormalized"):
         # check that the score is invariant under scaling of the weights by a
         # common factor
-        for scaling in [2, 0.3]:
+        # Due to numerical instability of floating points in `cumulative_sum` in
+        # `median_absolute_error` (not fixable by `stable_cumsum`), it is not
+        # always equivalent when scaling by a float.
+        scaling_values = [2] if name == "median_absolute_error" else [2, 0.3]
 xp.searchsorted( 
     weight_cdf[feature_idx, ...], adjusted_percentile_rank[feature_idx] 
 xp.searchsorted( 
     weight_cdf[feature_idx, ...], adjusted_percentile_rank[feature_idx] 
+        for scaling in scaling_values:
             assert_allclose(
                 weighted_score,
                 metric(y1, y2, sample_weight=sample_weight * scaling),
@@ -1584,8 +1588,10 @@ def test_regression_sample_weight_invariance(name):
     # regression
     y_true = random_state.random_sample(size=(n_samples,))
     y_pred = random_state.random_sample(size=(n_samples,))
+    sample_weight = np.arange(len(y_true))
     metric = ALL_METRICS[name]
-    check_sample_weight_invariance(name, metric, y_true, y_pred)
+
+    check_sample_weight_invariance(name, metric, y_true, y_pred, sample_weight)
 
 
 @pytest.mark.parametrize(

diff --git a/sklearn/utils/stats.py b/sklearn/utils/stats.py
@@ -101,10 +101,12 @@ def _weighted_percentile(array, sample_weight, percentile_rank=50, xp=None):
             for feature_idx in range(weight_cdf.shape[0])
         ],
     )
+    print(f"{adjusted_percentile_rank=} {adjusted_percentile_rank[0]}")
+    print(f"{weight_cdf=}")
     # In rare cases, `percentile_indices` equals to `sorted_idx.shape[0]`
     max_idx = sorted_idx.shape[0] - 1
     percentile_indices = xp.clip(percentile_indices, 0, max_idx)
-
+    print(f"XXXX {percentile_indices=}")
     col_indices = xp.arange(array.shape[1], device=device)
     percentile_in_sorted = sorted_idx[percentile_indices, col_indices]