Skip to content

Fixes perplexity issue with PredictableTSNE #110

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Aug 6, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion _unittests/ut_mlmodel/test_tsne_predictable.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,17 +6,20 @@
import numpy
from numpy.random import RandomState
from sklearn import datasets
from sklearn.exceptions import ConvergenceWarning
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.manifold import TSNE
from pyquickhelper.pycode import ExtTestCase, skipif_circleci
from pyquickhelper.pycode import (
ExtTestCase, skipif_circleci, ignore_warnings)
from mlinsights.mlmodel import PredictableTSNE
from mlinsights.mlmodel import test_sklearn_pickle, test_sklearn_clone


class TestPredictableTSNE(ExtTestCase):

@ignore_warnings(ConvergenceWarning)
def test_predictable_tsne(self):
iris = datasets.load_iris()
X, y = iris.data[:20], iris.target[:20]
Expand All @@ -28,6 +31,7 @@ def test_predictable_tsne(self):
self.assertNotEmpty(pred)

@skipif_circleci('stuck')
@ignore_warnings(ConvergenceWarning)
def test_predictable_tsne_knn(self):
iris = datasets.load_iris()
X, y = iris.data[:20], iris.target[:20]
Expand All @@ -39,6 +43,7 @@ def test_predictable_tsne_knn(self):
self.assertIsInstance(clr.estimator_, KNeighborsRegressor)
self.assertEqual(pred.shape, (X.shape[0], 2))

@ignore_warnings(ConvergenceWarning)
def test_predictable_tsne_intercept_weights(self):
iris = datasets.load_iris()
X, y = iris.data[:20], iris.target[:20]
Expand All @@ -48,15 +53,18 @@ def test_predictable_tsne_intercept_weights(self):
self.assertGreater(clr.loss_, 0)
self.assertEqual(acc.shape, (X.shape[0], 2))

@ignore_warnings(ConvergenceWarning)
def test_predictable_tsne_pickle(self):
iris = datasets.load_iris()
X, y = iris.data[:20], iris.target[:20]
test_sklearn_pickle(lambda: PredictableTSNE(), X, y)

@ignore_warnings(ConvergenceWarning)
def test_predictable_tsne_clone(self):
self.maxDiff = None
test_sklearn_clone(lambda: PredictableTSNE())

@ignore_warnings(ConvergenceWarning)
def test_predictable_tsne_relevance(self):
state = RandomState(seed=0)
Xs = []
Expand Down
2 changes: 1 addition & 1 deletion mlinsights/mlbatch/pipeline_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
@file
@brief Caches training.
"""
from distutils.version import StrictVersion
from distutils.version import StrictVersion # pylint: disable=W0402
from sklearn import __version__ as skl_version
from sklearn.base import clone
from sklearn.pipeline import Pipeline, _fit_transform_one
Expand Down
25 changes: 12 additions & 13 deletions mlinsights/mlmodel/predictable_tsne.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,23 +20,19 @@ class PredictableTSNE(BaseEstimator, TransformerMixin):
which approximates the outputs of a :epkg:`TSNE` transformer.
Notebooks :ref:`predictabletsnerst` gives an example on how to
use this class.

:param normalizer: None by default
:param transformer: :epkg:`sklearn:manifold:TSNE` by default
:param estimator: :epkg:`sklearn:neural_network:MLPRegressor` by default
:param normalize: normalizes the outputs, centers and normalizes
the output of the *t-SNE* and applies that same
normalization to he prediction of the estimator
:param keep_tsne_output: if True, keep raw outputs of
:epkg:`TSNE` is stored in member `tsne_outputs_`
"""

def __init__(self, normalizer=None, transformer=None, estimator=None,
normalize=True, keep_tsne_outputs=False):
"""
@param normalizer None by default
@param transformer :epkg:`sklearn:manifold:TSNE`
by default
@param estimator :epkg:`sklearn:neural_network:MLPRegressor`
by default
@param normalize normalizes the outputs, centers and normalizes
the output of the *t-SNE* and applies that same
normalization to he prediction of the estimator
@param keep_tsne_output if True, keep raw outputs of
:epkg:`TSNE` is stored in member
*tsne_outputs_*
"""
TransformerMixin.__init__(self)
BaseEstimator.__init__(self)
if estimator is None:
Expand Down Expand Up @@ -98,6 +94,9 @@ def fit(self, X, y, sample_weight=None):
self.normalizer_ = None

self.transformer_ = clone(self.transformer)
if (hasattr(self.transformer_, 'perplexity') and
self.transformer_.perplexity >= X.shape[0]):
self.transformer_.perplexity = X.shape[0] - 1

sig = inspect.signature(self.transformer.fit_transform)
pars = {}
Expand Down
3 changes: 1 addition & 2 deletions mlinsights/plotting/visualize.py
Original file line number Diff line number Diff line change
Expand Up @@ -219,8 +219,7 @@ def pipeline2dot(pipe, data, **params):
elif isinstance(raw_data, numpy.ndarray):
if len(raw_data.shape) != 2:
raise NotImplementedError( # pragma: no cover
f"Unexpected training data dimension: {data.shape}."
f"") # pylint: disable=E1101
f"Unexpected training data dimension {raw_data.shape}.")
for i in range(raw_data.shape[1]):
data['X%d' % i] = 'sch0:f%d' % i
elif not isinstance(raw_data, list):
Expand Down