diff --git a/doc/whats_new/upcoming_changes/sklearn.metrics/30787.fix.rst b/doc/whats_new/upcoming_changes/sklearn.metrics/30787.fix.rst new file mode 100644 index 0000000000000..13edbdfc7874d --- /dev/null +++ b/doc/whats_new/upcoming_changes/sklearn.metrics/30787.fix.rst @@ -0,0 +1,6 @@ +- :func:`metrics.median_absolute_error` now uses `_averaged_weighted_percentile` + instead of `_weighted_percentile` to calculate median when `sample_weight` is not + `None`. This is equivalent to using the "averaged_inverted_cdf" instead of + the "inverted_cdf" quantile method, which gives results equivalent to `numpy.median` + if equal weights used. + By :user:`Lucy Liu ` diff --git a/sklearn/metrics/_regression.py b/sklearn/metrics/_regression.py index e7435756c52b2..3e0148345ffa1 100644 --- a/sklearn/metrics/_regression.py +++ b/sklearn/metrics/_regression.py @@ -28,7 +28,7 @@ _xlogy as xlogy, ) from ..utils._param_validation import Interval, StrOptions, validate_params -from ..utils.stats import _weighted_percentile +from ..utils.stats import _averaged_weighted_percentile, _weighted_percentile from ..utils.validation import ( _check_sample_weight, _num_samples, @@ -923,7 +923,7 @@ def median_absolute_error( if sample_weight is None: output_errors = _median(xp.abs(y_pred - y_true), axis=0) else: - output_errors = _weighted_percentile( + output_errors = _averaged_weighted_percentile( xp.abs(y_pred - y_true), sample_weight=sample_weight ) if isinstance(multioutput, str): diff --git a/sklearn/metrics/tests/test_common.py b/sklearn/metrics/tests/test_common.py index 238ea821d8340..77e16c2da86c3 100644 --- a/sklearn/metrics/tests/test_common.py +++ b/sklearn/metrics/tests/test_common.py @@ -555,7 +555,6 @@ def precision_recall_curve_padded_thresholds(*args, **kwargs): # No Sample weight support METRICS_WITHOUT_SAMPLE_WEIGHT = { - "median_absolute_error", "max_error", "ovo_roc_auc", "weighted_ovo_roc_auc", @@ -1474,9 +1473,10 @@ def test_averaging_multilabel_all_ones(name): check_averaging(name, y_true, y_true_binarize, y_pred, y_pred_binarize, y_score) -def check_sample_weight_invariance(name, metric, y1, y2): +def check_sample_weight_invariance(name, metric, y1, y2, sample_weight=None): rng = np.random.RandomState(0) - sample_weight = rng.randint(1, 10, size=len(y1)) + if sample_weight is None: + sample_weight = rng.randint(1, 10, size=len(y1)) # top_k_accuracy_score always lead to a perfect score for k > 1 in the # binary case @@ -1552,7 +1552,10 @@ def check_sample_weight_invariance(name, metric, y1, y2): if not name.startswith("unnormalized"): # check that the score is invariant under scaling of the weights by a # common factor - for scaling in [2, 0.3]: + # Due to numerical instability of floating points in `cumulative_sum` in + # `median_absolute_error`, it is not always equivalent when scaling by a float. + scaling_values = [2] if name == "median_absolute_error" else [2, 0.3] + for scaling in scaling_values: assert_allclose( weighted_score, metric(y1, y2, sample_weight=sample_weight * scaling), @@ -1584,8 +1587,10 @@ def test_regression_sample_weight_invariance(name): # regression y_true = random_state.random_sample(size=(n_samples,)) y_pred = random_state.random_sample(size=(n_samples,)) + sample_weight = np.arange(len(y_true)) metric = ALL_METRICS[name] - check_sample_weight_invariance(name, metric, y_true, y_pred) + + check_sample_weight_invariance(name, metric, y_true, y_pred, sample_weight) @pytest.mark.parametrize(