diff --git a/_unittests/ut_mlmodel/test_tsne_predictable.py b/_unittests/ut_mlmodel/test_tsne_predictable.py index 4e10bb1d..39692b0c 100644 --- a/_unittests/ut_mlmodel/test_tsne_predictable.py +++ b/_unittests/ut_mlmodel/test_tsne_predictable.py @@ -6,17 +6,20 @@ import numpy from numpy.random import RandomState from sklearn import datasets +from sklearn.exceptions import ConvergenceWarning from sklearn.preprocessing import StandardScaler from sklearn.neighbors import KNeighborsRegressor from sklearn.neural_network import MLPRegressor from sklearn.manifold import TSNE -from pyquickhelper.pycode import ExtTestCase, skipif_circleci +from pyquickhelper.pycode import ( + ExtTestCase, skipif_circleci, ignore_warnings) from mlinsights.mlmodel import PredictableTSNE from mlinsights.mlmodel import test_sklearn_pickle, test_sklearn_clone class TestPredictableTSNE(ExtTestCase): + @ignore_warnings(ConvergenceWarning) def test_predictable_tsne(self): iris = datasets.load_iris() X, y = iris.data[:20], iris.target[:20] @@ -28,6 +31,7 @@ def test_predictable_tsne(self): self.assertNotEmpty(pred) @skipif_circleci('stuck') + @ignore_warnings(ConvergenceWarning) def test_predictable_tsne_knn(self): iris = datasets.load_iris() X, y = iris.data[:20], iris.target[:20] @@ -39,6 +43,7 @@ def test_predictable_tsne_knn(self): self.assertIsInstance(clr.estimator_, KNeighborsRegressor) self.assertEqual(pred.shape, (X.shape[0], 2)) + @ignore_warnings(ConvergenceWarning) def test_predictable_tsne_intercept_weights(self): iris = datasets.load_iris() X, y = iris.data[:20], iris.target[:20] @@ -48,15 +53,18 @@ def test_predictable_tsne_intercept_weights(self): self.assertGreater(clr.loss_, 0) self.assertEqual(acc.shape, (X.shape[0], 2)) + @ignore_warnings(ConvergenceWarning) def test_predictable_tsne_pickle(self): iris = datasets.load_iris() X, y = iris.data[:20], iris.target[:20] test_sklearn_pickle(lambda: PredictableTSNE(), X, y) + @ignore_warnings(ConvergenceWarning) def test_predictable_tsne_clone(self): self.maxDiff = None test_sklearn_clone(lambda: PredictableTSNE()) + @ignore_warnings(ConvergenceWarning) def test_predictable_tsne_relevance(self): state = RandomState(seed=0) Xs = [] diff --git a/mlinsights/mlbatch/pipeline_cache.py b/mlinsights/mlbatch/pipeline_cache.py index c7622f6f..6025993e 100644 --- a/mlinsights/mlbatch/pipeline_cache.py +++ b/mlinsights/mlbatch/pipeline_cache.py @@ -2,7 +2,7 @@ @file @brief Caches training. """ -from distutils.version import StrictVersion +from distutils.version import StrictVersion # pylint: disable=W0402 from sklearn import __version__ as skl_version from sklearn.base import clone from sklearn.pipeline import Pipeline, _fit_transform_one diff --git a/mlinsights/mlmodel/predictable_tsne.py b/mlinsights/mlmodel/predictable_tsne.py index 715bf5d9..75690573 100644 --- a/mlinsights/mlmodel/predictable_tsne.py +++ b/mlinsights/mlmodel/predictable_tsne.py @@ -20,23 +20,19 @@ class PredictableTSNE(BaseEstimator, TransformerMixin): which approximates the outputs of a :epkg:`TSNE` transformer. Notebooks :ref:`predictabletsnerst` gives an example on how to use this class. + + :param normalizer: None by default + :param transformer: :epkg:`sklearn:manifold:TSNE` by default + :param estimator: :epkg:`sklearn:neural_network:MLPRegressor` by default + :param normalize: normalizes the outputs, centers and normalizes + the output of the *t-SNE* and applies that same + normalization to he prediction of the estimator + :param keep_tsne_output: if True, keep raw outputs of + :epkg:`TSNE` is stored in member `tsne_outputs_` """ def __init__(self, normalizer=None, transformer=None, estimator=None, normalize=True, keep_tsne_outputs=False): - """ - @param normalizer None by default - @param transformer :epkg:`sklearn:manifold:TSNE` - by default - @param estimator :epkg:`sklearn:neural_network:MLPRegressor` - by default - @param normalize normalizes the outputs, centers and normalizes - the output of the *t-SNE* and applies that same - normalization to he prediction of the estimator - @param keep_tsne_output if True, keep raw outputs of - :epkg:`TSNE` is stored in member - *tsne_outputs_* - """ TransformerMixin.__init__(self) BaseEstimator.__init__(self) if estimator is None: @@ -98,6 +94,9 @@ def fit(self, X, y, sample_weight=None): self.normalizer_ = None self.transformer_ = clone(self.transformer) + if (hasattr(self.transformer_, 'perplexity') and + self.transformer_.perplexity >= X.shape[0]): + self.transformer_.perplexity = X.shape[0] - 1 sig = inspect.signature(self.transformer.fit_transform) pars = {} diff --git a/mlinsights/plotting/visualize.py b/mlinsights/plotting/visualize.py index 582232dd..a211a161 100644 --- a/mlinsights/plotting/visualize.py +++ b/mlinsights/plotting/visualize.py @@ -219,8 +219,7 @@ def pipeline2dot(pipe, data, **params): elif isinstance(raw_data, numpy.ndarray): if len(raw_data.shape) != 2: raise NotImplementedError( # pragma: no cover - f"Unexpected training data dimension: {data.shape}." - f"") # pylint: disable=E1101 + f"Unexpected training data dimension {raw_data.shape}.") for i in range(raw_data.shape[1]): data['X%d' % i] = 'sch0:f%d' % i elif not isinstance(raw_data, list):