From a472a796e15688a57857f876dd31ce8c360a3209 Mon Sep 17 00:00:00 2001 From: Julien Jerphanion Date: Thu, 18 Nov 2021 10:57:33 +0100 Subject: [PATCH 1/9] TST XFAIL test when unstable openblas configuration --- sklearn/linear_model/tests/test_omp.py | 2 ++ sklearn/utils/__init__.py | 36 +++++++++++++++++++++++++- sklearn/utils/_testing.py | 11 +++++++- 3 files changed, 47 insertions(+), 2 deletions(-) diff --git a/sklearn/linear_model/tests/test_omp.py b/sklearn/linear_model/tests/test_omp.py index bcc5f4790203f..9d02a32f95d11 100644 --- a/sklearn/linear_model/tests/test_omp.py +++ b/sklearn/linear_model/tests/test_omp.py @@ -6,6 +6,7 @@ from sklearn.utils._testing import assert_array_equal from sklearn.utils._testing import assert_array_almost_equal +from sklearn.utils._testing import fails_if_unstable_openblas from sklearn.utils._testing import ignore_warnings @@ -150,6 +151,7 @@ def test_orthogonal_mp_gram_readonly(): # FIXME: 'normalize' to be removed in 1.4 +@fails_if_unstable_openblas @pytest.mark.filterwarnings("ignore:The default of 'normalize'") def test_estimator(): omp = OrthogonalMatchingPursuit(n_nonzero_coefs=n_nonzero_coefs) diff --git a/sklearn/utils/__init__.py b/sklearn/utils/__init__.py index 3d8a1ca87d210..47baee275e946 100644 --- a/sklearn/utils/__init__.py +++ b/sklearn/utils/__init__.py @@ -19,6 +19,7 @@ import warnings import numpy as np +from distutils.version import LooseVersion from scipy.sparse import issparse from .murmurhash import murmurhash3_32 @@ -41,7 +42,7 @@ check_scalar, ) from .. import get_config - +from ..utils.fixes import threadpool_info # Do not deprecate parallel_backend and register_parallel_backend as they are # needed to tune `scikit-learn` behavior and have different effect if called @@ -81,6 +82,39 @@ _IS_32BIT = 8 * struct.calcsize("P") == 32 +def _in_unstable_openblas_configuration(): + """Return True if in an unstable configuration for OpenBLAS""" + + # Import libraries which might load OpenBLAS. + import numpy # noqa + import scipy # noqa + + modules_info = threadpool_info() + + open_blas_used = any(info["internal_api"] == "openblas" for info in modules_info) + if not open_blas_used: + return False + + # OpenBLAS < 0.3.13 causes segfault for Prescott architecture, see: + # https://github.com/scikit-learn/scikit-learn/issues/21361 # noqa + openblas_stable_version = LooseVersion("0.3.13") + for info in modules_info: + if info["internal_api"] != "openblas": + continue + openblas_version = info.get("version") + openblas_architecture = info.get("architecture") + if openblas_version is None or openblas_architecture is None: + # Cannot be sure that we use a good version of OpenBLAS + # Hence assume bad configuration: + return True + if ( + openblas_architecture.lower() == "prescott" + and openblas_version < openblas_stable_version + ): + return True + return False + + class Bunch(dict): """Container object exposing keys as attributes. diff --git a/sklearn/utils/_testing.py b/sklearn/utils/_testing.py index 1e4ecdd53e136..b5f103b40c639 100644 --- a/sklearn/utils/_testing.py +++ b/sklearn/utils/_testing.py @@ -48,7 +48,12 @@ import joblib import sklearn -from sklearn.utils import IS_PYPY, _IS_32BIT, deprecated +from sklearn.utils import ( + IS_PYPY, + _IS_32BIT, + deprecated, + _in_unstable_openblas_configuration, +) from sklearn.utils.multiclass import check_classification_targets from sklearn.utils.validation import ( check_array, @@ -452,6 +457,10 @@ def set_random_state(estimator, random_state=0): not joblib.parallel.mp, reason="joblib is in serial mode" ) + fails_if_unstable_openblas = pytest.mark.xfail( + _in_unstable_openblas_configuration(), + reason="OpenBLAS is unstable for this configuration", + ) # Decorator for tests involving both BLAS calls and multiprocessing. # # Under POSIX (e.g. Linux or OSX), using multiprocessing in conjunction From 9c46d26bc7b5e9f2f7273db3a63f270dccc6819f Mon Sep 17 00:00:00 2001 From: Julien Jerphanion Date: Thu, 18 Nov 2021 11:39:46 +0100 Subject: [PATCH 2/9] Adapt versions parsing Co-authored-By: Adrin Jalali --- sklearn/utils/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn/utils/__init__.py b/sklearn/utils/__init__.py index 47baee275e946..7c8ac33916518 100644 --- a/sklearn/utils/__init__.py +++ b/sklearn/utils/__init__.py @@ -19,13 +19,13 @@ import warnings import numpy as np -from distutils.version import LooseVersion from scipy.sparse import issparse from .murmurhash import murmurhash3_32 from .class_weight import compute_class_weight, compute_sample_weight from . import _joblib from ..exceptions import DataConversionWarning +from ..externals._packaging.version import parse from .deprecation import deprecated from .fixes import np_version, parse_version from ._estimator_html_repr import estimator_html_repr @@ -97,7 +97,7 @@ def _in_unstable_openblas_configuration(): # OpenBLAS < 0.3.13 causes segfault for Prescott architecture, see: # https://github.com/scikit-learn/scikit-learn/issues/21361 # noqa - openblas_stable_version = LooseVersion("0.3.13") + openblas_stable_version = parse("0.3.13") for info in modules_info: if info["internal_api"] != "openblas": continue From 5ca918a131cc11a93742661b0178943055790dfa Mon Sep 17 00:00:00 2001 From: Julien Jerphanion Date: Thu, 18 Nov 2021 11:53:29 +0100 Subject: [PATCH 3/9] DOC Prefer ifskip and reference #21361 Co-authored-by: Olivier Grisel --- sklearn/utils/__init__.py | 1 + sklearn/utils/_testing.py | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/sklearn/utils/__init__.py b/sklearn/utils/__init__.py index 7c8ac33916518..b60327267eb60 100644 --- a/sklearn/utils/__init__.py +++ b/sklearn/utils/__init__.py @@ -42,6 +42,7 @@ check_scalar, ) from .. import get_config + from ..utils.fixes import threadpool_info # Do not deprecate parallel_backend and register_parallel_backend as they are diff --git a/sklearn/utils/_testing.py b/sklearn/utils/_testing.py index b5f103b40c639..d8ee52a77b787 100644 --- a/sklearn/utils/_testing.py +++ b/sklearn/utils/_testing.py @@ -457,9 +457,9 @@ def set_random_state(estimator, random_state=0): not joblib.parallel.mp, reason="joblib is in serial mode" ) - fails_if_unstable_openblas = pytest.mark.xfail( + fails_if_unstable_openblas = pytest.mark.skipif( _in_unstable_openblas_configuration(), - reason="OpenBLAS is unstable for this configuration", + reason="OpenBLAS is unstable for this configuration (#21361)", ) # Decorator for tests involving both BLAS calls and multiprocessing. # From 8ea56bb4d1e2b2d93b83d5a7ad1cb18eb48eff63 Mon Sep 17 00:00:00 2001 From: Julien Jerphanion Date: Thu, 18 Nov 2021 14:46:09 +0100 Subject: [PATCH 4/9] Fix Julien's scheduler incorrect dispatch --- sklearn/linear_model/tests/test_omp.py | 2 -- sklearn/tests/test_common.py | 2 ++ 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn/linear_model/tests/test_omp.py b/sklearn/linear_model/tests/test_omp.py index 9d02a32f95d11..bcc5f4790203f 100644 --- a/sklearn/linear_model/tests/test_omp.py +++ b/sklearn/linear_model/tests/test_omp.py @@ -6,7 +6,6 @@ from sklearn.utils._testing import assert_array_equal from sklearn.utils._testing import assert_array_almost_equal -from sklearn.utils._testing import fails_if_unstable_openblas from sklearn.utils._testing import ignore_warnings @@ -151,7 +150,6 @@ def test_orthogonal_mp_gram_readonly(): # FIXME: 'normalize' to be removed in 1.4 -@fails_if_unstable_openblas @pytest.mark.filterwarnings("ignore:The default of 'normalize'") def test_estimator(): omp = OrthogonalMatchingPursuit(n_nonzero_coefs=n_nonzero_coefs) diff --git a/sklearn/tests/test_common.py b/sklearn/tests/test_common.py index 483c0c7641a63..c28f332e5da07 100644 --- a/sklearn/tests/test_common.py +++ b/sklearn/tests/test_common.py @@ -40,6 +40,7 @@ from sklearn.utils import IS_PYPY from sklearn.utils._tags import _DEFAULT_TAGS, _safe_tags from sklearn.utils._testing import ( + fails_if_unstable_openblas, SkipTest, set_random_state, ) @@ -103,6 +104,7 @@ def _tested_estimators(type_filter=None): @parametrize_with_checks(list(_tested_estimators())) +@fails_if_unstable_openblas def test_estimators(estimator, check, request): # Common tests for estimator instances with ignore_warnings(category=(FutureWarning, ConvergenceWarning, UserWarning)): From e2c27f345ed7a0e02160f3e4fef87f036c3614ae Mon Sep 17 00:00:00 2001 From: Julien Jerphanion Date: Thu, 18 Nov 2021 16:45:25 +0100 Subject: [PATCH 5/9] Simplify Co-authored-by: Olivier Grisel --- sklearn/tests/test_common.py | 2 -- sklearn/utils/__init__.py | 36 ------------------------------- sklearn/utils/_testing.py | 11 +--------- sklearn/utils/estimator_checks.py | 14 +++++++++++- 4 files changed, 14 insertions(+), 49 deletions(-) diff --git a/sklearn/tests/test_common.py b/sklearn/tests/test_common.py index c28f332e5da07..483c0c7641a63 100644 --- a/sklearn/tests/test_common.py +++ b/sklearn/tests/test_common.py @@ -40,7 +40,6 @@ from sklearn.utils import IS_PYPY from sklearn.utils._tags import _DEFAULT_TAGS, _safe_tags from sklearn.utils._testing import ( - fails_if_unstable_openblas, SkipTest, set_random_state, ) @@ -104,7 +103,6 @@ def _tested_estimators(type_filter=None): @parametrize_with_checks(list(_tested_estimators())) -@fails_if_unstable_openblas def test_estimators(estimator, check, request): # Common tests for estimator instances with ignore_warnings(category=(FutureWarning, ConvergenceWarning, UserWarning)): diff --git a/sklearn/utils/__init__.py b/sklearn/utils/__init__.py index b60327267eb60..1025e92b9bec9 100644 --- a/sklearn/utils/__init__.py +++ b/sklearn/utils/__init__.py @@ -25,7 +25,6 @@ from .class_weight import compute_class_weight, compute_sample_weight from . import _joblib from ..exceptions import DataConversionWarning -from ..externals._packaging.version import parse from .deprecation import deprecated from .fixes import np_version, parse_version from ._estimator_html_repr import estimator_html_repr @@ -43,8 +42,6 @@ ) from .. import get_config -from ..utils.fixes import threadpool_info - # Do not deprecate parallel_backend and register_parallel_backend as they are # needed to tune `scikit-learn` behavior and have different effect if called # from the vendored version or or the site-package version. The other are @@ -83,39 +80,6 @@ _IS_32BIT = 8 * struct.calcsize("P") == 32 -def _in_unstable_openblas_configuration(): - """Return True if in an unstable configuration for OpenBLAS""" - - # Import libraries which might load OpenBLAS. - import numpy # noqa - import scipy # noqa - - modules_info = threadpool_info() - - open_blas_used = any(info["internal_api"] == "openblas" for info in modules_info) - if not open_blas_used: - return False - - # OpenBLAS < 0.3.13 causes segfault for Prescott architecture, see: - # https://github.com/scikit-learn/scikit-learn/issues/21361 # noqa - openblas_stable_version = parse("0.3.13") - for info in modules_info: - if info["internal_api"] != "openblas": - continue - openblas_version = info.get("version") - openblas_architecture = info.get("architecture") - if openblas_version is None or openblas_architecture is None: - # Cannot be sure that we use a good version of OpenBLAS - # Hence assume bad configuration: - return True - if ( - openblas_architecture.lower() == "prescott" - and openblas_version < openblas_stable_version - ): - return True - return False - - class Bunch(dict): """Container object exposing keys as attributes. diff --git a/sklearn/utils/_testing.py b/sklearn/utils/_testing.py index 8637b1e3cc359..1724063be2f43 100644 --- a/sklearn/utils/_testing.py +++ b/sklearn/utils/_testing.py @@ -48,12 +48,7 @@ import joblib import sklearn -from sklearn.utils import ( - IS_PYPY, - _IS_32BIT, - deprecated, - _in_unstable_openblas_configuration, -) +from sklearn.utils import IS_PYPY, _IS_32BIT, deprecated from sklearn.utils.multiclass import check_classification_targets from sklearn.utils.validation import ( check_array, @@ -457,10 +452,6 @@ def set_random_state(estimator, random_state=0): not joblib.parallel.mp, reason="joblib is in serial mode" ) - fails_if_unstable_openblas = pytest.mark.skipif( - _in_unstable_openblas_configuration(), - reason="OpenBLAS is unstable for this configuration (#21361)", - ) # Decorator for tests involving both BLAS calls and multiprocessing. # # Under POSIX (e.g. Linux or OSX), using multiprocessing in conjunction diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py index 5c32b7b8fc36e..9615f5e4d5158 100644 --- a/sklearn/utils/estimator_checks.py +++ b/sklearn/utils/estimator_checks.py @@ -53,6 +53,7 @@ from ..model_selection import ShuffleSplit from ..model_selection._validation import _safe_split from ..metrics.pairwise import rbf_kernel, linear_kernel, pairwise_distances +from ..utils.fixes import threadpool_info from ..utils.validation import check_is_fitted from . import shuffle @@ -2080,7 +2081,18 @@ def check_classifiers_train( X_b -= X_b.min() if readonly_memmap: - X_m, y_m, X_b, y_b = create_memmap_backed_data([X_m, y_m, X_b, y_b]) + # OpenBLAS is known to segfault with unaligned data on the Prescott architecture + # See: https://github.com/scipy/scipy/issues/14886 + has_prescott_openblas = any( + True + for info in threadpool_info() + if info["internal_api"] == "openblas" + and info.get("architecture").lower() == "prescott" + ) + + X_m, y_m, X_b, y_b = create_memmap_backed_data( + data=[X_m, y_m, X_b, y_b], aligned=has_prescott_openblas + ) problems = [(X_b, y_b)] tags = _safe_tags(classifier_orig) From 21bb5cf6f92ecb6c967b4b9e8f27fcb6c8e65b66 Mon Sep 17 00:00:00 2001 From: Julien Jerphanion Date: Thu, 18 Nov 2021 17:08:04 +0100 Subject: [PATCH 6/9] Do not remove blank line --- sklearn/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sklearn/__init__.py b/sklearn/__init__.py index 77ee28271bfaf..39beab8eb5cfa 100644 --- a/sklearn/__init__.py +++ b/sklearn/__init__.py @@ -42,6 +42,7 @@ __version__ = "1.1.dev0" + # On OSX, we can get a runtime error due to multiple OpenMP libraries loaded # simultaneously. This can happen for instance when calling BLAS inside a # prange. Setting the following environment variable allows multiple OpenMP From a51a83fa095ba0c85b835af5247b797f298cc1f9 Mon Sep 17 00:00:00 2001 From: Julien Jerphanion Date: Thu, 18 Nov 2021 17:10:31 +0100 Subject: [PATCH 7/9] fixup! Do not remove blank line --- sklearn/__init__.py | 1 - sklearn/utils/__init__.py | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/__init__.py b/sklearn/__init__.py index 39beab8eb5cfa..77ee28271bfaf 100644 --- a/sklearn/__init__.py +++ b/sklearn/__init__.py @@ -42,7 +42,6 @@ __version__ = "1.1.dev0" - # On OSX, we can get a runtime error due to multiple OpenMP libraries loaded # simultaneously. This can happen for instance when calling BLAS inside a # prange. Setting the following environment variable allows multiple OpenMP diff --git a/sklearn/utils/__init__.py b/sklearn/utils/__init__.py index 1025e92b9bec9..3d8a1ca87d210 100644 --- a/sklearn/utils/__init__.py +++ b/sklearn/utils/__init__.py @@ -42,6 +42,7 @@ ) from .. import get_config + # Do not deprecate parallel_backend and register_parallel_backend as they are # needed to tune `scikit-learn` behavior and have different effect if called # from the vendored version or or the site-package version. The other are From 5b422d102b5649418b83d7e842a50a60600924f9 Mon Sep 17 00:00:00 2001 From: Julien Jerphanion Date: Thu, 18 Nov 2021 17:25:41 +0100 Subject: [PATCH 8/9] Retrigger CI From 97397b71d9f432098bf194837aae70cbf65d6475 Mon Sep 17 00:00:00 2001 From: Julien Jerphanion Date: Thu, 18 Nov 2021 17:33:05 +0100 Subject: [PATCH 9/9] Prudently assume Prescott might be the architecture if it is unknown Co-authored-by: Olivier Grisel --- sklearn/utils/estimator_checks.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py index 9615f5e4d5158..83deff0d5d7e0 100644 --- a/sklearn/utils/estimator_checks.py +++ b/sklearn/utils/estimator_checks.py @@ -2087,7 +2087,8 @@ def check_classifiers_train( True for info in threadpool_info() if info["internal_api"] == "openblas" - and info.get("architecture").lower() == "prescott" + # Prudently assume Prescott might be the architecture if it is unknown. + and info.get("architecture", "prescott").lower() == "prescott" ) X_m, y_m, X_b, y_b = create_memmap_backed_data(