From a5545d3c7baef08e84d17af79e7d2895b517135a Mon Sep 17 00:00:00 2001 From: Emily Chen Date: Wed, 19 Jun 2024 15:28:17 +0200 Subject: [PATCH 01/15] array API support for mean_absolute_percentage_error --- doc/modules/array_api.rst | 1 + sklearn/metrics/_regression.py | 14 +++++++++----- sklearn/metrics/tests/test_common.py | 4 ++++ 3 files changed, 14 insertions(+), 5 deletions(-) diff --git a/doc/modules/array_api.rst b/doc/modules/array_api.rst index a51ee60e47e04..a7d57eeb469fd 100644 --- a/doc/modules/array_api.rst +++ b/doc/modules/array_api.rst @@ -117,6 +117,7 @@ Metrics - :func:`sklearn.metrics.d2_tweedie_score` - :func:`sklearn.metrics.max_error` - :func:`sklearn.metrics.mean_absolute_error` +- :func:`sklearn.metrics.mean_absolute_percentage_error` - :func:`sklearn.metrics.mean_gamma_deviance` - :func:`sklearn.metrics.mean_squared_error` - :func:`sklearn.metrics.mean_tweedie_deviance` diff --git a/sklearn/metrics/_regression.py b/sklearn/metrics/_regression.py index 36a4638718118..cc24f44ac1ba0 100644 --- a/sklearn/metrics/_regression.py +++ b/sklearn/metrics/_regression.py @@ -395,21 +395,25 @@ def mean_absolute_percentage_error( >>> mean_absolute_percentage_error(y_true, y_pred) 112589990684262.48 """ + input_arrays = [y_true, y_pred, sample_weight, multioutput] + xp, _ = get_namespace(*input_arrays) + y_type, y_true, y_pred, multioutput = _check_reg_targets( y_true, y_pred, multioutput ) check_consistent_length(y_true, y_pred, sample_weight) - epsilon = np.finfo(np.float64).eps - mape = np.abs(y_pred - y_true) / np.maximum(np.abs(y_true), epsilon) - output_errors = np.average(mape, weights=sample_weight, axis=0) + epsilon = xp.asarray(xp.finfo(xp.float64).eps) + y_true_abs = xp.abs(y_true) + mape = xp.abs(y_pred - y_true) / xp.where(epsilon < y_true_abs, y_true_abs, epsilon) + output_errors = _average(mape, weights=sample_weight, axis=0) if isinstance(multioutput, str): if multioutput == "raw_values": return output_errors elif multioutput == "uniform_average": - # pass None as weights to np.average: uniform mean + # pass None as weights to _average: uniform mean multioutput = None - return np.average(output_errors, weights=multioutput) + return _average(output_errors, weights=multioutput) @validate_params( diff --git a/sklearn/metrics/tests/test_common.py b/sklearn/metrics/tests/test_common.py index 6110cbd3d1d13..272170c9c9eb2 100644 --- a/sklearn/metrics/tests/test_common.py +++ b/sklearn/metrics/tests/test_common.py @@ -2013,6 +2013,10 @@ def check_array_api_metric_pairwise(metric, array_namespace, device, dtype_name) additive_chi2_kernel: [check_array_api_metric_pairwise], mean_gamma_deviance: [check_array_api_regression_metric], max_error: [check_array_api_regression_metric], + mean_absolute_percentage_error: [ + check_array_api_regression_metric, + check_array_api_multioutput_regression_metric, + ], chi2_kernel: [check_array_api_metric_pairwise], } From 50f03d3583cf16de26c264593ec5f0ae93d5755d Mon Sep 17 00:00:00 2001 From: Emily Chen Date: Wed, 19 Jun 2024 15:36:17 +0200 Subject: [PATCH 02/15] update PR number --- doc/whats_new/v1.6.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/whats_new/v1.6.rst b/doc/whats_new/v1.6.rst index c98314d5ca1de..8922469eb1beb 100644 --- a/doc/whats_new/v1.6.rst +++ b/doc/whats_new/v1.6.rst @@ -37,6 +37,7 @@ See :ref:`array_api` for more details. - :func:`sklearn.metrics.max_error` :pr:`29212` by :user:`Edoardo Abati `; - :func:`sklearn.metrics.mean_absolute_error` :pr:`27736` by :user:`Edoardo Abati ` and :pr:`29143` by :user:`Tialo ` and :user:`Loïc Estève `; +- :func:`sklearn.metrics.mean_absolute_percentage_error` :pr:`29300` by :user:`Emily Chen ` - :func:`sklearn.metrics.mean_gamma_deviance` :pr:`29239` by :usser:`Emily Chen `; - :func:`sklearn.metrics.mean_squared_error` :pr:`29142` by :user:`Yaroslav Korobko `; - :func:`sklearn.metrics.mean_tweedie_deviance` :pr:`28106` by :user:`Thomas Li `; From 2919e995e98a9384c4e714f263b3cb74051911e0 Mon Sep 17 00:00:00 2001 From: Emily Chen Date: Wed, 19 Jun 2024 16:40:24 +0200 Subject: [PATCH 03/15] make the average case always return a floatn --- sklearn/metrics/_regression.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sklearn/metrics/_regression.py b/sklearn/metrics/_regression.py index cc24f44ac1ba0..516d6e4469131 100644 --- a/sklearn/metrics/_regression.py +++ b/sklearn/metrics/_regression.py @@ -364,7 +364,7 @@ def mean_absolute_percentage_error( Returns ------- - loss : float or ndarray of floats + loss : float or array of floats If multioutput is 'raw_values', then mean absolute percentage error is returned for each output separately. If multioutput is 'uniform_average' or an ndarray of weights, then the @@ -413,7 +413,9 @@ def mean_absolute_percentage_error( # pass None as weights to _average: uniform mean multioutput = None - return _average(output_errors, weights=multioutput) + mean_absolute_percentage_error = _average(output_errors, weights=multioutput) + assert mean_absolute_percentage_error.shape == () + return float(mean_absolute_percentage_error) @validate_params( From 2273e58b492061b538a51c71b36a1a8dfd92a503 Mon Sep 17 00:00:00 2001 From: Emily Chen Date: Wed, 19 Jun 2024 17:58:44 +0200 Subject: [PATCH 04/15] addressing review comments --- sklearn/metrics/_regression.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/metrics/_regression.py b/sklearn/metrics/_regression.py index 516d6e4469131..854cce6028bda 100644 --- a/sklearn/metrics/_regression.py +++ b/sklearn/metrics/_regression.py @@ -364,7 +364,7 @@ def mean_absolute_percentage_error( Returns ------- - loss : float or array of floats + loss : float or ndarray of floats If multioutput is 'raw_values', then mean absolute percentage error is returned for each output separately. If multioutput is 'uniform_average' or an ndarray of weights, then the From 230332efb09d7ae8d000084bf9b10d3c6f9a94a5 Mon Sep 17 00:00:00 2001 From: Emily Chen Date: Thu, 20 Jun 2024 14:49:29 +0200 Subject: [PATCH 05/15] fixing typo and bad merge --- doc/whats_new/v1.6.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/whats_new/v1.6.rst b/doc/whats_new/v1.6.rst index 8922469eb1beb..8e3fb8efe52a7 100644 --- a/doc/whats_new/v1.6.rst +++ b/doc/whats_new/v1.6.rst @@ -38,7 +38,7 @@ See :ref:`array_api` for more details. - :func:`sklearn.metrics.mean_absolute_error` :pr:`27736` by :user:`Edoardo Abati ` and :pr:`29143` by :user:`Tialo ` and :user:`Loïc Estève `; - :func:`sklearn.metrics.mean_absolute_percentage_error` :pr:`29300` by :user:`Emily Chen ` -- :func:`sklearn.metrics.mean_gamma_deviance` :pr:`29239` by :usser:`Emily Chen `; +- :func:`sklearn.metrics.mean_gamma_deviance` :pr:`29239` by :user:`Emily Chen `; - :func:`sklearn.metrics.mean_squared_error` :pr:`29142` by :user:`Yaroslav Korobko `; - :func:`sklearn.metrics.mean_tweedie_deviance` :pr:`28106` by :user:`Thomas Li `; - :func:`sklearn.metrics.pairwise.additive_chi2_kernel` :pr:`29144` by :user:`Yaroslav Korobko `; From 685b7c2c1acc3667beb10665dd978e0d32e57abf Mon Sep 17 00:00:00 2001 From: Emily Chen Date: Thu, 20 Jun 2024 15:20:08 +0200 Subject: [PATCH 06/15] fixing tests --- sklearn/metrics/_regression.py | 8 +++++--- sklearn/metrics/tests/test_common.py | 2 +- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/sklearn/metrics/_regression.py b/sklearn/metrics/_regression.py index 854cce6028bda..982eb44903939 100644 --- a/sklearn/metrics/_regression.py +++ b/sklearn/metrics/_regression.py @@ -402,9 +402,11 @@ def mean_absolute_percentage_error( y_true, y_pred, multioutput ) check_consistent_length(y_true, y_pred, sample_weight) - epsilon = xp.asarray(xp.finfo(xp.float64).eps) - y_true_abs = xp.abs(y_true) - mape = xp.abs(y_pred - y_true) / xp.where(epsilon < y_true_abs, y_true_abs, epsilon) + epsilon = xp.asarray(xp.finfo(xp.float64).eps, dtype=xp.float64) + y_true_abs = xp.asarray(xp.abs(y_true), dtype=xp.float64) + mape = xp.asarray(xp.abs(y_pred - y_true), dtype=xp.float64) / xp.where( + epsilon < y_true_abs, y_true_abs, epsilon + ) output_errors = _average(mape, weights=sample_weight, axis=0) if isinstance(multioutput, str): if multioutput == "raw_values": diff --git a/sklearn/metrics/tests/test_common.py b/sklearn/metrics/tests/test_common.py index 272170c9c9eb2..0838688d1b9bf 100644 --- a/sklearn/metrics/tests/test_common.py +++ b/sklearn/metrics/tests/test_common.py @@ -2015,7 +2015,7 @@ def check_array_api_metric_pairwise(metric, array_namespace, device, dtype_name) max_error: [check_array_api_regression_metric], mean_absolute_percentage_error: [ check_array_api_regression_metric, - check_array_api_multioutput_regression_metric, + check_array_api_regression_metric_multioutput, ], chi2_kernel: [check_array_api_metric_pairwise], } From 852a53ee023598db84c4863351cd4196f514b6a5 Mon Sep 17 00:00:00 2001 From: Emily Chen Date: Fri, 21 Jun 2024 10:40:39 +0200 Subject: [PATCH 07/15] addressing review comments regarding mps float64 typecasting and typo --- doc/whats_new/v1.6.rst | 2 +- sklearn/metrics/_regression.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/doc/whats_new/v1.6.rst b/doc/whats_new/v1.6.rst index 8e3fb8efe52a7..9619a2d983a54 100644 --- a/doc/whats_new/v1.6.rst +++ b/doc/whats_new/v1.6.rst @@ -37,7 +37,7 @@ See :ref:`array_api` for more details. - :func:`sklearn.metrics.max_error` :pr:`29212` by :user:`Edoardo Abati `; - :func:`sklearn.metrics.mean_absolute_error` :pr:`27736` by :user:`Edoardo Abati ` and :pr:`29143` by :user:`Tialo ` and :user:`Loïc Estève `; -- :func:`sklearn.metrics.mean_absolute_percentage_error` :pr:`29300` by :user:`Emily Chen ` +- :func:`sklearn.metrics.mean_absolute_percentage_error` :pr:`29300` by :user:`Emily Chen `; - :func:`sklearn.metrics.mean_gamma_deviance` :pr:`29239` by :user:`Emily Chen `; - :func:`sklearn.metrics.mean_squared_error` :pr:`29142` by :user:`Yaroslav Korobko `; - :func:`sklearn.metrics.mean_tweedie_deviance` :pr:`28106` by :user:`Thomas Li `; diff --git a/sklearn/metrics/_regression.py b/sklearn/metrics/_regression.py index 982eb44903939..96a445ad8d17e 100644 --- a/sklearn/metrics/_regression.py +++ b/sklearn/metrics/_regression.py @@ -402,9 +402,9 @@ def mean_absolute_percentage_error( y_true, y_pred, multioutput ) check_consistent_length(y_true, y_pred, sample_weight) - epsilon = xp.asarray(xp.finfo(xp.float64).eps, dtype=xp.float64) - y_true_abs = xp.asarray(xp.abs(y_true), dtype=xp.float64) - mape = xp.asarray(xp.abs(y_pred - y_true), dtype=xp.float64) / xp.where( + epsilon = xp.asarray(xp.finfo(xp.float64).eps, dtype=xp.asarray(0.0).dtype) + y_true_abs = xp.asarray(xp.abs(y_true), dtype=xp.asarray(0.0).dtype) + mape = xp.asarray(xp.abs(y_pred - y_true), dtype=xp.asarray(0.0).dtype) / xp.where( epsilon < y_true_abs, y_true_abs, epsilon ) output_errors = _average(mape, weights=sample_weight, axis=0) From fc5e5961dd3192aa5fdfefad9e1ed4983c859746 Mon Sep 17 00:00:00 2001 From: Emily Chen Date: Wed, 19 Jun 2024 16:40:24 +0200 Subject: [PATCH 08/15] make the average case always return a floatn --- sklearn/metrics/_regression.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/metrics/_regression.py b/sklearn/metrics/_regression.py index 96a445ad8d17e..9d75c6cb94f43 100644 --- a/sklearn/metrics/_regression.py +++ b/sklearn/metrics/_regression.py @@ -364,7 +364,7 @@ def mean_absolute_percentage_error( Returns ------- - loss : float or ndarray of floats + loss : float or array of floats If multioutput is 'raw_values', then mean absolute percentage error is returned for each output separately. If multioutput is 'uniform_average' or an ndarray of weights, then the From aeaec8f6016fd1eddc0b02bdb51aff0e83ae7091 Mon Sep 17 00:00:00 2001 From: Emily Chen Date: Wed, 19 Jun 2024 17:58:44 +0200 Subject: [PATCH 09/15] addressing review comments --- sklearn/metrics/_regression.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/metrics/_regression.py b/sklearn/metrics/_regression.py index 9d75c6cb94f43..96a445ad8d17e 100644 --- a/sklearn/metrics/_regression.py +++ b/sklearn/metrics/_regression.py @@ -364,7 +364,7 @@ def mean_absolute_percentage_error( Returns ------- - loss : float or array of floats + loss : float or ndarray of floats If multioutput is 'raw_values', then mean absolute percentage error is returned for each output separately. If multioutput is 'uniform_average' or an ndarray of weights, then the From 0565a9fb36470db1e4dad001bdca97728888aa81 Mon Sep 17 00:00:00 2001 From: Emily Chen Date: Wed, 19 Jun 2024 16:40:24 +0200 Subject: [PATCH 10/15] make the average case always return a floatn --- sklearn/metrics/_regression.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/metrics/_regression.py b/sklearn/metrics/_regression.py index 96a445ad8d17e..9d75c6cb94f43 100644 --- a/sklearn/metrics/_regression.py +++ b/sklearn/metrics/_regression.py @@ -364,7 +364,7 @@ def mean_absolute_percentage_error( Returns ------- - loss : float or ndarray of floats + loss : float or array of floats If multioutput is 'raw_values', then mean absolute percentage error is returned for each output separately. If multioutput is 'uniform_average' or an ndarray of weights, then the From 23c5de3f627488d18ecdd4e0feb32a43ea2c2606 Mon Sep 17 00:00:00 2001 From: Emily Chen Date: Wed, 19 Jun 2024 17:58:44 +0200 Subject: [PATCH 11/15] addressing review comments --- sklearn/metrics/_regression.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/metrics/_regression.py b/sklearn/metrics/_regression.py index 9d75c6cb94f43..96a445ad8d17e 100644 --- a/sklearn/metrics/_regression.py +++ b/sklearn/metrics/_regression.py @@ -364,7 +364,7 @@ def mean_absolute_percentage_error( Returns ------- - loss : float or array of floats + loss : float or ndarray of floats If multioutput is 'raw_values', then mean absolute percentage error is returned for each output separately. If multioutput is 'uniform_average' or an ndarray of weights, then the From 4d1208063d7203b833784e1d0de471d9886efd28 Mon Sep 17 00:00:00 2001 From: Emily Chen Date: Wed, 19 Jun 2024 16:40:24 +0200 Subject: [PATCH 12/15] make the average case always return a floatn --- sklearn/metrics/_regression.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/metrics/_regression.py b/sklearn/metrics/_regression.py index 96a445ad8d17e..9d75c6cb94f43 100644 --- a/sklearn/metrics/_regression.py +++ b/sklearn/metrics/_regression.py @@ -364,7 +364,7 @@ def mean_absolute_percentage_error( Returns ------- - loss : float or ndarray of floats + loss : float or array of floats If multioutput is 'raw_values', then mean absolute percentage error is returned for each output separately. If multioutput is 'uniform_average' or an ndarray of weights, then the From ff9b82c1c1913be9c14010b66f969477e0c5e13d Mon Sep 17 00:00:00 2001 From: Emily Chen Date: Wed, 19 Jun 2024 17:58:44 +0200 Subject: [PATCH 13/15] addressing review comments --- sklearn/metrics/_regression.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/metrics/_regression.py b/sklearn/metrics/_regression.py index 9d75c6cb94f43..96a445ad8d17e 100644 --- a/sklearn/metrics/_regression.py +++ b/sklearn/metrics/_regression.py @@ -364,7 +364,7 @@ def mean_absolute_percentage_error( Returns ------- - loss : float or array of floats + loss : float or ndarray of floats If multioutput is 'raw_values', then mean absolute percentage error is returned for each output separately. If multioutput is 'uniform_average' or an ndarray of weights, then the From ddebb21dfc497f0ec86464de1b31d46c2d6038f6 Mon Sep 17 00:00:00 2001 From: Emily Chen Date: Mon, 8 Jul 2024 13:59:02 +0200 Subject: [PATCH 14/15] fixing bad push --- sklearn/metrics/_regression.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/sklearn/metrics/_regression.py b/sklearn/metrics/_regression.py index 96a445ad8d17e..14672953ec2a0 100644 --- a/sklearn/metrics/_regression.py +++ b/sklearn/metrics/_regression.py @@ -397,14 +397,15 @@ def mean_absolute_percentage_error( """ input_arrays = [y_true, y_pred, sample_weight, multioutput] xp, _ = get_namespace(*input_arrays) + dtype = _find_matching_floating_dtype(y_true, y_pred, sample_weight, xp=xp) y_type, y_true, y_pred, multioutput = _check_reg_targets( y_true, y_pred, multioutput ) check_consistent_length(y_true, y_pred, sample_weight) - epsilon = xp.asarray(xp.finfo(xp.float64).eps, dtype=xp.asarray(0.0).dtype) - y_true_abs = xp.asarray(xp.abs(y_true), dtype=xp.asarray(0.0).dtype) - mape = xp.asarray(xp.abs(y_pred - y_true), dtype=xp.asarray(0.0).dtype) / xp.where( + epsilon = xp.asarray(xp.finfo(xp.float64).eps, dtype=dtype) + y_true_abs = xp.asarray(xp.abs(y_true), dtype=dtype) + mape = xp.asarray(xp.abs(y_pred - y_true), dtype=dtype) / xp.where( epsilon < y_true_abs, y_true_abs, epsilon ) output_errors = _average(mape, weights=sample_weight, axis=0) From c191a60433d09a97221bf876cee9c7c673c1bd8a Mon Sep 17 00:00:00 2001 From: Emily Chen Date: Fri, 12 Jul 2024 14:38:37 +0200 Subject: [PATCH 15/15] address review comments --- sklearn/metrics/_regression.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn/metrics/_regression.py b/sklearn/metrics/_regression.py index 14672953ec2a0..482d5dc260b31 100644 --- a/sklearn/metrics/_regression.py +++ b/sklearn/metrics/_regression.py @@ -405,8 +405,8 @@ def mean_absolute_percentage_error( check_consistent_length(y_true, y_pred, sample_weight) epsilon = xp.asarray(xp.finfo(xp.float64).eps, dtype=dtype) y_true_abs = xp.asarray(xp.abs(y_true), dtype=dtype) - mape = xp.asarray(xp.abs(y_pred - y_true), dtype=dtype) / xp.where( - epsilon < y_true_abs, y_true_abs, epsilon + mape = xp.asarray(xp.abs(y_pred - y_true), dtype=dtype) / xp.maximum( + y_true_abs, epsilon ) output_errors = _average(mape, weights=sample_weight, axis=0) if isinstance(multioutput, str):