Skip to content

Median absolute error #3761

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Oct 12, 2014
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/modules/classes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -770,6 +770,7 @@ details.
metrics.explained_variance_score
metrics.mean_absolute_error
metrics.mean_squared_error
metrics.median_absolute_error
metrics.r2_score

Multilabel ranking metrics
Expand Down
31 changes: 30 additions & 1 deletion doc/modules/model_evaluation.rst
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ Scoring Function
**Regression**
'mean_absolute_error' :func:`metrics.mean_absolute_error`
'mean_squared_error' :func:`metrics.mean_squared_error`
'median_absolute_error' :func:`metrics.median_absolute_error`
'r2' :func:`metrics.r2_score`
====================== ======================================= ==================================

Expand All @@ -83,7 +84,7 @@ Usage examples:
>>> model = svm.SVC()
>>> cross_validation.cross_val_score(model, X, y, scoring='wrong_choice')
Traceback (most recent call last):
ValueError: 'wrong_choice' is not a valid scoring value. Valid options are ['accuracy', 'adjusted_rand_score', 'average_precision', 'f1', 'log_loss', 'mean_absolute_error', 'mean_squared_error', 'precision', 'r2', 'recall', 'roc_auc']
ValueError: 'wrong_choice' is not a valid scoring value. Valid options are ['accuracy', 'adjusted_rand_score', 'average_precision', 'f1', 'log_loss', 'mean_absolute_error', 'mean_squared_error', 'median_absolute_error', 'precision', 'r2', 'recall', 'roc_auc']
>>> clf = svm.SVC(probability=True, random_state=0)
>>> cross_validation.cross_val_score(clf, X, y, scoring='log_loss') # doctest: +ELLIPSIS
array([-0.07..., -0.16..., -0.06...])
Expand Down Expand Up @@ -1087,6 +1088,34 @@ function::
for an example of mean squared error usage to
evaluate gradient boosting regression.

Median absolute error
---------------------

The :func:`median_absolute_error` is particularly interesting because it is
robust to outliers. The loss is calculated by taking the median of all absolute
differences between the target and the prediction.

If :math:`\hat{y}_i` is the predicted value of the :math:`i`-th sample
and :math:`y_i` is the corresponding true value, then the median absolute error
(MedAE) estimated over :math:`n_{\text{samples}}` is defined as

.. math::

\text{MedAE}(y, \hat{y}) = \text{median}(\mid y_1 - \hat{y}_1 \mid, \ldots, \mid y_n - \hat{y}_n \mid).

Here is a small example of usage of the :func:`median_absolute_error`
function::

>>> from sklearn.metrics import median_absolute_error
>>> y_true = [3, -0.5, 2, 7]
>>> y_pred = [2.5, 0.0, 2, 8]
>>> median_absolute_error(y_true, y_pred)
0.5
>>> y_true = [[0.5, 1], [-1, 1], [7, -6]]
>>> y_pred = [[0, 2], [-1, 2], [8, -5]]
>>> median_absolute_error(y_true, y_pred)
1.0

R² score, the coefficient of determination
-------------------------------------------

Expand Down
2 changes: 2 additions & 0 deletions sklearn/metrics/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
from .regression import explained_variance_score
from .regression import mean_absolute_error
from .regression import mean_squared_error
from .regression import median_absolute_error
from .regression import r2_score

from .scorer import make_scorer
Expand Down Expand Up @@ -81,6 +82,7 @@
'matthews_corrcoef',
'mean_absolute_error',
'mean_squared_error',
'median_absolute_error',
'mutual_info_score',
'normalized_mutual_info_score',
'pairwise_distances',
Expand Down
1 change: 1 addition & 0 deletions sklearn/metrics/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
from .regression import explained_variance_score
from .regression import mean_absolute_error
from .regression import mean_squared_error
from .regression import median_absolute_error
from .regression import r2_score

# Deprecated in 0.16
Expand Down
34 changes: 34 additions & 0 deletions sklearn/metrics/regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
__ALL__ = [
"mean_absolute_error",
"mean_squared_error",
"median_absolute_error",
"r2_score",
"explained_variance_score"
]
Expand Down Expand Up @@ -177,6 +178,39 @@ def mean_squared_error(y_true, y_pred, sample_weight=None):
weights=sample_weight)


def median_absolute_error(y_true, y_pred):
"""Median absolute error regression loss

Parameters
----------
y_true : array-like of shape = [n_samples] or [n_samples, n_outputs]
Ground truth (correct) target values.

y_pred : array-like of shape = [n_samples] or [n_samples, n_outputs]
Estimated target values.

Returns
-------
loss : float
A positive floating point value (the best value is 0.0).

Examples
--------
>>> from sklearn.metrics import median_absolute_error
>>> y_true = [3, -0.5, 2, 7]
>>> y_pred = [2.5, 0.0, 2, 8]
>>> median_absolute_error(y_true, y_pred)
0.5
>>> y_true = [[0.5, 1], [-1, 1], [7, -6]]
>>> y_pred = [[0, 2], [-1, 2], [8, -5]]
>>> median_absolute_error(y_true, y_pred)
1.0

"""
y_type, y_true, y_pred = _check_reg_targets(y_true, y_pred)
return np.median(np.abs(y_pred - y_true))


def explained_variance_score(y_true, y_pred, sample_weight=None):
"""Explained variance regression score function

Expand Down
11 changes: 7 additions & 4 deletions sklearn/metrics/scorer.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,9 @@

import numpy as np

from . import (r2_score, mean_absolute_error, mean_squared_error,
accuracy_score, f1_score, roc_auc_score,
average_precision_score,
from . import (r2_score, median_absolute_error, mean_absolute_error,
mean_squared_error, accuracy_score, f1_score,
roc_auc_score, average_precision_score,
precision_score, recall_score, log_loss)
from .cluster import adjusted_rand_score
from ..utils.multiclass import type_of_target
Expand Down Expand Up @@ -86,7 +86,7 @@ def __call__(self, estimator, X, y_true, sample_weight=None):
else:
return self._sign * self._score_func(y_true, y_pred,
**self._kwargs)


class _ProbaScorer(_BaseScorer):
def __call__(self, clf, X, y, sample_weight=None):
Expand Down Expand Up @@ -316,6 +316,8 @@ def make_scorer(score_func, greater_is_better=True, needs_proba=False,
greater_is_better=False)
mean_absolute_error_scorer = make_scorer(mean_absolute_error,
greater_is_better=False)
median_absolute_error_scorer = make_scorer(median_absolute_error,
greater_is_better=False)

# Standard Classification Scores
accuracy_scorer = make_scorer(accuracy_score)
Expand All @@ -337,6 +339,7 @@ def make_scorer(score_func, greater_is_better=True, needs_proba=False,
adjusted_rand_scorer = make_scorer(adjusted_rand_score)

SCORERS = dict(r2=r2_scorer,
median_absolute_error=median_absolute_error_scorer,
mean_absolute_error=mean_absolute_error_scorer,
mean_squared_error=mean_squared_error_scorer,
accuracy=accuracy_scorer, f1=f1_scorer, roc_auc=roc_auc_scorer,
Expand Down
9 changes: 7 additions & 2 deletions sklearn/metrics/tests/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
from sklearn.metrics import matthews_corrcoef
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import median_absolute_error
from sklearn.metrics import precision_score
from sklearn.metrics import r2_score
from sklearn.metrics import recall_score
Expand Down Expand Up @@ -87,6 +88,7 @@
REGRESSION_METRICS = {
"mean_absolute_error": mean_absolute_error,
"mean_squared_error": mean_squared_error,
"median_absolute_error": median_absolute_error,
"explained_variance_score": explained_variance_score,
"r2_score": r2_score,
}
Expand Down Expand Up @@ -278,7 +280,8 @@

# Regression metrics with "multioutput-continuous" format support
MULTIOUTPUT_METRICS = [
"mean_absolute_error", "mean_squared_error", "r2_score",
"mean_absolute_error", "mean_squared_error", "median_absolute_error",
"r2_score",
]

# Symmetric with respect to their input arguments y_true and y_pred
Expand All @@ -291,7 +294,8 @@

"f1_score", "weighted_f1_score", "micro_f1_score", "macro_f1_score",

"matthews_corrcoef_score", "mean_absolute_error", "mean_squared_error"
"matthews_corrcoef_score", "mean_absolute_error", "mean_squared_error",
"median_absolute_error"

]

Expand Down Expand Up @@ -321,6 +325,7 @@
"hamming_loss",
"hinge_loss",
"matthews_corrcoef_score",
"median_absolute_error",
]


Expand Down
3 changes: 3 additions & 0 deletions sklearn/metrics/tests/test_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from sklearn.metrics import explained_variance_score
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import median_absolute_error
from sklearn.metrics import r2_score

from sklearn.metrics.regression import _check_reg_targets
Expand All @@ -22,6 +23,7 @@ def test_regression_metrics(n_samples=50):

assert_almost_equal(mean_squared_error(y_true, y_pred), 1.)
assert_almost_equal(mean_absolute_error(y_true, y_pred), 1.)
assert_almost_equal(median_absolute_error(y_true, y_pred), 1.)
assert_almost_equal(r2_score(y_true, y_pred), 0.995, 2)
assert_almost_equal(explained_variance_score(y_true, y_pred), 1.)

Expand All @@ -45,6 +47,7 @@ def test_multioutput_regression():
def test_regression_metrics_at_limits():
assert_almost_equal(mean_squared_error([0.], [0.]), 0.00, 2)
assert_almost_equal(mean_absolute_error([0.], [0.]), 0.00, 2)
assert_almost_equal(median_absolute_error([0.], [0.]), 0.00, 2)
assert_almost_equal(explained_variance_score([0.], [0.]), 1.00, 2)
assert_almost_equal(r2_score([0., 1], [0., 1]), 1.00, 2)

Expand Down
10 changes: 6 additions & 4 deletions sklearn/metrics/tests/test_score_objects.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,8 @@
from sklearn.multiclass import OneVsRestClassifier


REGRESSION_SCORERS = ['r2', 'mean_absolute_error', 'mean_squared_error']
REGRESSION_SCORERS = ['r2', 'mean_absolute_error', 'mean_squared_error',
'median_absolute_error']
CLF_SCORERS = ['accuracy', 'f1', 'roc_auc', 'average_precision', 'precision',
'recall', 'log_loss',
'adjusted_rand_score' # not really, but works
Expand Down Expand Up @@ -273,9 +274,10 @@ def test_scorer_sample_weight():
"called with sample weights: {1} vs "
"{2}".format(name, weighted, unweighted))
assert_almost_equal(weighted, ignored,
err_msg="scorer {0} behaves differently when "
"ignoring samples and setting sample_weight to 0: "
"{1} vs {2}".format(name, weighted, ignored))
err_msg="scorer {0} behaves differently when "
"ignoring samples and setting sample_weight to"
" 0: {1} vs {2}".format(name, weighted,
ignored))

except TypeError as e:
assert_true("sample_weight" in str(e),
Expand Down