From 710ff121337309eb9d9eea3ca90f6c5cd39d11ac Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Tue, 25 Jun 2019 09:55:16 +0200 Subject: [PATCH 01/12] Remove random_seed in fit and use the one in init --- metric_learn/constraints.py | 15 +++++--- metric_learn/itml.py | 20 +++++++--- metric_learn/lsml.py | 19 +++++++--- metric_learn/mmc.py | 19 +++++++--- metric_learn/rca.py | 21 +++++++++-- metric_learn/sdml.py | 20 +++++++--- test/metric_learn_test.py | 73 +++++++++++++++++++++++++++++++++++++ 7 files changed, 157 insertions(+), 30 deletions(-) diff --git a/metric_learn/constraints.py b/metric_learn/constraints.py index e591830b..6ad4b09a 100644 --- a/metric_learn/constraints.py +++ b/metric_learn/constraints.py @@ -6,6 +6,7 @@ import warnings from six.moves import xrange from scipy.sparse import coo_matrix +from sklearn.utils import check_random_state __all__ = ['Constraints'] @@ -18,7 +19,8 @@ def __init__(self, partial_labels): self.known_label_idx, = np.where(partial_labels >= 0) self.known_labels = partial_labels[self.known_label_idx] - def adjacency_matrix(self, num_constraints, random_state=np.random): + def adjacency_matrix(self, num_constraints, random_state=None): + random_state = check_random_state(random_state) a, b, c, d = self.positive_negative_pairs(num_constraints, random_state=random_state) row = np.concatenate((a, c)) @@ -30,7 +32,8 @@ def adjacency_matrix(self, num_constraints, random_state=np.random): return adj + adj.T def positive_negative_pairs(self, num_constraints, same_length=False, - random_state=np.random): + random_state=None): + random_state = check_random_state(random_state) a, b = self._pairs(num_constraints, same_label=True, random_state=random_state) c, d = self._pairs(num_constraints, same_label=False, @@ -41,7 +44,8 @@ def positive_negative_pairs(self, num_constraints, same_length=False, return a, b, c, d def _pairs(self, num_constraints, same_label=True, max_iter=10, - random_state=np.random): + random_state=None): + random_state = check_random_state(random_state) num_labels = len(self.known_labels) ab = set() it = 0 @@ -63,13 +67,14 @@ def _pairs(self, num_constraints, same_label=True, max_iter=10, ab = np.array(list(ab)[:num_constraints], dtype=int) return self.known_label_idx[ab.T] - def chunks(self, num_chunks=100, chunk_size=2, random_state=np.random): + def chunks(self, num_chunks=100, chunk_size=2, random_state=None): """ the random state object to be passed must be a numpy random seed """ + random_state = check_random_state(random_state) chunks = -np.ones_like(self.known_label_idx, dtype=int) uniq, lookup = np.unique(self.known_labels, return_inverse=True) - all_inds = [set(np.where(lookup==c)[0]) for c in xrange(len(uniq))] + all_inds = [set(np.where(lookup == c)[0]) for c in xrange(len(uniq))] idx = 0 while idx < num_chunks and all_inds: if len(all_inds) == 1: diff --git a/metric_learn/itml.py b/metric_learn/itml.py index 21303c18..d87eb6be 100644 --- a/metric_learn/itml.py +++ b/metric_learn/itml.py @@ -320,7 +320,9 @@ def __init__(self, gamma=1., max_iter=1000, convergence_threshold=1e-3, tuples will be formed like this: X[indices]. random_state : int or numpy.RandomState or None, optional (default=None) A pseudo random number generator object or a seed for it if int. If - ``prior='random'``, ``random_state`` is used to set the prior. + ``prior='random'``, ``random_state`` is used to set the prior. In any + case, `random_state` is also used to randomly sample constraints from + labels. """ _BaseITML.__init__(self, gamma=gamma, max_iter=max_iter, convergence_threshold=convergence_threshold, @@ -330,7 +332,7 @@ def __init__(self, gamma=1., max_iter=1000, convergence_threshold=1e-3, self.num_constraints = num_constraints self.bounds = bounds - def fit(self, X, y, random_state=np.random, bounds=None): + def fit(self, X, y, random_state='deprecated', bounds=None): """Create constraints from labels and learn the ITML model. @@ -342,8 +344,11 @@ def fit(self, X, y, random_state=np.random, bounds=None): y : (n) array-like Data labels. - random_state : numpy.random.RandomState, optional - If provided, controls random number generation. + random_state : Not used + .. deprecated:: 0.5.0 + `random_state` in the `fit` function was deprecated in version 0.5.0 + and will be removed in 0.6.0. Set `random_state` at initialization + instead (when instantiating a new `ITML_Supervised` object). bounds : array-like of two numbers Bounds on similarity, aside slack variables, s.t. @@ -364,6 +369,11 @@ def fit(self, X, y, random_state=np.random, bounds=None): ' It has been deprecated in version 0.5.0 and will be' ' removed in 0.6.0. Use the "bounds" parameter of this ' 'fit method instead.', DeprecationWarning) + if random_state != 'deprecated': + warnings.warn('"random_state" parameter in the `fit` function is ' + 'deprecated. Set `random_state` at initialization ' + 'instead (when instantiating a new `ITML_Supervised` ' + 'object).', DeprecationWarning) X, y = self._prepare_inputs(X, y, ensure_min_samples=2) num_constraints = self.num_constraints if num_constraints is None: @@ -372,6 +382,6 @@ def fit(self, X, y, random_state=np.random, bounds=None): c = Constraints(y) pos_neg = c.positive_negative_pairs(num_constraints, - random_state=random_state) + random_state=self.random_state) pairs, y = wrap_pairs(X, pos_neg) return _BaseITML._fit(self, pairs, y, bounds=bounds) diff --git a/metric_learn/lsml.py b/metric_learn/lsml.py index f59392c1..4682e130 100644 --- a/metric_learn/lsml.py +++ b/metric_learn/lsml.py @@ -277,7 +277,8 @@ def __init__(self, tol=1e-3, max_iter=1000, prior=None, random_state : int or numpy.RandomState or None, optional (default=None) A pseudo random number generator object or a seed for it if int. If ``init='random'``, ``random_state`` is used to set the random - prior. + prior. In any case, `random_state` is also used to randomly sample + constraints from labels. """ _BaseLSML.__init__(self, tol=tol, max_iter=max_iter, prior=prior, verbose=verbose, preprocessor=preprocessor, @@ -286,7 +287,7 @@ def __init__(self, tol=1e-3, max_iter=1000, prior=None, self.num_constraints = num_constraints self.weights = weights - def fit(self, X, y, random_state=np.random): + def fit(self, X, y, random_state='deprecated'): """Create constraints from labels and learn the LSML model. Parameters @@ -297,13 +298,21 @@ def fit(self, X, y, random_state=np.random): y : (n) array-like Data labels. - random_state : numpy.random.RandomState, optional - If provided, controls random number generation. + random_state : Not used + .. deprecated:: 0.5.0 + `random_state` in the `fit` function was deprecated in version 0.5.0 + and will be removed in 0.6.0. Set `random_state` at initialization + instead (when instantiating a new `LSML_Supervised` object). """ if self.num_labeled != 'deprecated': warnings.warn('"num_labeled" parameter is not used.' ' It has been deprecated in version 0.5.0 and will be' ' removed in 0.6.0', DeprecationWarning) + if random_state != 'deprecated': + warnings.warn('"random_state" parameter in the `fit` function is ' + 'deprecated. Set `random_state` at initialization ' + 'instead (when instantiating a new `LSML_Supervised` ' + 'object).', DeprecationWarning) X, y = self._prepare_inputs(X, y, ensure_min_samples=2) num_constraints = self.num_constraints if num_constraints is None: @@ -312,6 +321,6 @@ def fit(self, X, y, random_state=np.random): c = Constraints(y) pos_neg = c.positive_negative_pairs(num_constraints, same_length=True, - random_state=random_state) + random_state=self.random_state) return _BaseLSML._fit(self, X[np.column_stack(pos_neg)], weights=self.weights) diff --git a/metric_learn/mmc.py b/metric_learn/mmc.py index b3e6c203..ba1d2951 100644 --- a/metric_learn/mmc.py +++ b/metric_learn/mmc.py @@ -530,7 +530,8 @@ def __init__(self, max_iter=100, max_proj=10000, convergence_threshold=1e-6, random_state : int or numpy.RandomState or None, optional (default=None) A pseudo random number generator object or a seed for it if int. If ``init='random'``, ``random_state`` is used to initialize the random - Mahalanobis matrix. + Mahalanobis matrix. In any case, `random_state` is also used to + randomly sample constraints from labels. """ _BaseMMC.__init__(self, max_iter=max_iter, max_proj=max_proj, convergence_threshold=convergence_threshold, @@ -540,7 +541,7 @@ def __init__(self, max_iter=100, max_proj=10000, convergence_threshold=1e-6, self.num_labeled = num_labeled self.num_constraints = num_constraints - def fit(self, X, y, random_state=np.random): + def fit(self, X, y, random_state='deprecated'): """Create constraints from labels and learn the MMC model. Parameters @@ -549,13 +550,21 @@ def fit(self, X, y, random_state=np.random): Input data, where each row corresponds to a single instance. y : (n) array-like Data labels. - random_state : numpy.random.RandomState, optional - If provided, controls random number generation. + random_state : Not used + .. deprecated:: 0.5.0 + `random_state` in the `fit` function was deprecated in version 0.5.0 + and will be removed in 0.6.0. Set `random_state` at initialization + instead (when instantiating a new `MMC_Supervised` object). """ if self.num_labeled != 'deprecated': warnings.warn('"num_labeled" parameter is not used.' ' It has been deprecated in version 0.5.0 and will be' ' removed in 0.6.0', DeprecationWarning) + if random_state != 'deprecated': + warnings.warn('"random_state" parameter in the `fit` function is ' + 'deprecated. Set `random_state` at initialization ' + 'instead (when instantiating a new `MMC_Supervised` ' + 'object).', DeprecationWarning) X, y = self._prepare_inputs(X, y, ensure_min_samples=2) num_constraints = self.num_constraints if num_constraints is None: @@ -564,6 +573,6 @@ def fit(self, X, y, random_state=np.random): c = Constraints(y) pos_neg = c.positive_negative_pairs(num_constraints, - random_state=random_state) + random_state=self.random_state) pairs, y = wrap_pairs(X, pos_neg) return _BaseMMC._fit(self, pairs, y) diff --git a/metric_learn/rca.py b/metric_learn/rca.py index 1dbffdd6..3e76e4f3 100644 --- a/metric_learn/rca.py +++ b/metric_learn/rca.py @@ -161,7 +161,7 @@ class RCA_Supervised(RCA): def __init__(self, num_dims='deprecated', n_components=None, pca_comps='deprecated', num_chunks=100, chunk_size=2, - preprocessor=None): + preprocessor=None, random_state=None): """Initialize the supervised version of `RCA`. `RCA_Supervised` creates chunks of similar points by first sampling a @@ -184,13 +184,17 @@ def __init__(self, num_dims='deprecated', n_components=None, preprocessor : array-like, shape=(n_samples, n_features) or callable The preprocessor to call to get tuples from indices. If array-like, tuples will be formed like this: X[indices]. + random_state : int or numpy.RandomState or None, optional (default=None) + A pseudo random number generator object or a seed for it if int. + It is used to randomly sample constraints from labels. """ RCA.__init__(self, num_dims=num_dims, n_components=n_components, pca_comps=pca_comps, preprocessor=preprocessor) self.num_chunks = num_chunks self.chunk_size = chunk_size + self.random_state = random_state - def fit(self, X, y, random_state=np.random): + def fit(self, X, y, random_state='deprecated'): """Create constraints from labels and learn the RCA model. Needs num_constraints specified in constructor. @@ -199,10 +203,19 @@ def fit(self, X, y, random_state=np.random): X : (n x d) data matrix each row corresponds to a single instance y : (n) data labels - random_state : a random.seed object to fix the random_state if needed. + random_state : Not used + .. deprecated:: 0.5.0 + `random_state` in the `fit` function was deprecated in version 0.5.0 + and will be removed in 0.6.0. Set `random_state` at initialization + instead (when instantiating a new `RCA_Supervised` object). """ + if random_state != 'deprecated': + warnings.warn('"random_state" parameter in the `fit` function is ' + 'deprecated. Set `random_state` at initialization ' + 'instead (when instantiating a new `RCA_Supervised` ' + 'object).', DeprecationWarning) X, y = self._prepare_inputs(X, y, ensure_min_samples=2) chunks = Constraints(y).chunks(num_chunks=self.num_chunks, chunk_size=self.chunk_size, - random_state=random_state) + random_state=self.random_state) return RCA.fit(self, X, chunks) diff --git a/metric_learn/sdml.py b/metric_learn/sdml.py index c5e63fa8..17d6ed76 100644 --- a/metric_learn/sdml.py +++ b/metric_learn/sdml.py @@ -304,7 +304,8 @@ def __init__(self, balance_param=0.5, sparsity_param=0.01, prior=None, random_state : int or numpy.RandomState or None, optional (default=None) A pseudo random number generator object or a seed for it if int. If ``init='random'``, ``random_state`` is used to set the random - prior. + prior. In any case, `random_state` is also used to randomly sample + constraints from labels. """ _BaseSDML.__init__(self, balance_param=balance_param, sparsity_param=sparsity_param, prior=prior, @@ -313,7 +314,7 @@ def __init__(self, balance_param=0.5, sparsity_param=0.01, prior=None, self.num_labeled = num_labeled self.num_constraints = num_constraints - def fit(self, X, y, random_state=np.random): + def fit(self, X, y, random_state='deprecated'): """Create constraints from labels and learn the SDML model. Parameters @@ -322,9 +323,11 @@ def fit(self, X, y, random_state=np.random): data matrix, where each row corresponds to a single instance y : array-like, shape (n,) data labels, one for each instance - random_state : {numpy.random.RandomState, int}, optional - Random number generator or random seed. If not given, the singleton - numpy.random will be used. + random_state : Not used + .. deprecated:: 0.5.0 + `random_state` in the `fit` function was deprecated in version 0.5.0 + and will be removed in 0.6.0. Set `random_state` at initialization + instead (when instantiating a new `SDML_Supervised` object). Returns ------- @@ -335,6 +338,11 @@ def fit(self, X, y, random_state=np.random): warnings.warn('"num_labeled" parameter is not used.' ' It has been deprecated in version 0.5.0 and will be' ' removed in 0.6.0', DeprecationWarning) + if random_state != 'deprecated': + warnings.warn('"random_state" parameter in the `fit` function is ' + 'deprecated. Set `random_state` at initialization ' + 'instead (when instantiating a new `SDML_Supervised` ' + 'object).', DeprecationWarning) X, y = self._prepare_inputs(X, y, ensure_min_samples=2) num_constraints = self.num_constraints if num_constraints is None: @@ -343,6 +351,6 @@ def fit(self, X, y, random_state=np.random): c = Constraints(y) pos_neg = c.positive_negative_pairs(num_constraints, - random_state=random_state) + random_state=self.random_state) pairs, y = wrap_pairs(X, pos_neg) return _BaseSDML._fit(self, pairs, y) diff --git a/test/metric_learn_test.py b/test/metric_learn_test.py index c49c9ef5..4439873e 100644 --- a/test/metric_learn_test.py +++ b/test/metric_learn_test.py @@ -119,6 +119,21 @@ def test_changed_behaviour_warning(self): lsml.fit(pairs) assert any(msg == str(wrn.message) for wrn in raised_warning) + def test_deprecation_random_state(self): + # test that a deprecation message is thrown if random_state is set at + # fit time + # TODO: remove in v.0.6 + X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]]) + y = np.array([1, 0, 1, 0]) + lsml_supervised = LSML_Supervised() + msg = ('"random_state" parameter in the `fit` function is ' + 'deprecated. Set `random_state` at initialization ' + 'instead (when instantiating a new `LSML_Supervised` ' + 'object).') + with pytest.warns(DeprecationWarning) as raised_warning: + lsml_supervised.fit(X, y, random_state=np.random) + assert any(msg == str(wrn.message) for wrn in raised_warning) + class TestITML(MetricTestCase): def test_iris(self): @@ -174,6 +189,21 @@ def test_deprecation_A0(self): itml.fit(pairs, y_pairs) assert any(msg == str(wrn.message) for wrn in raised_warning) + def test_deprecation_random_state(self): + # test that a deprecation message is thrown if random_state is set at + # fit time + # TODO: remove in v.0.6 + X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]]) + y = np.array([1, 0, 1, 0]) + itml_supervised = ITML_Supervised() + msg = ('"random_state" parameter in the `fit` function is ' + 'deprecated. Set `random_state` at initialization ' + 'instead (when instantiating a new `ITML_Supervised` ' + 'object).') + with pytest.warns(DeprecationWarning) as raised_warning: + itml_supervised.fit(X, y, random_state=np.random) + assert any(msg == str(wrn.message) for wrn in raised_warning) + @pytest.mark.parametrize('bounds', [None, (20., 100.), [20., 100.], np.array([20., 100.]), @@ -586,6 +616,20 @@ def test_changed_behaviour_warning(self): sdml.fit(pairs, y_pairs) assert any(msg == str(wrn.message) for wrn in raised_warning) + def test_deprecation_random_state(self): + # test that a deprecation message is thrown if random_state is set at + # fit time + # TODO: remove in v.0.6 + X, y = load_iris(return_X_y=True) + sdml_supervised = SDML_Supervised(balance_param=5e-5) + msg = ('"random_state" parameter in the `fit` function is ' + 'deprecated. Set `random_state` at initialization ' + 'instead (when instantiating a new `SDML_Supervised` ' + 'object).') + with pytest.warns(DeprecationWarning) as raised_warning: + sdml_supervised.fit(X, y, random_state=np.random) + assert any(msg == str(wrn.message) for wrn in raised_warning) + @pytest.mark.skipif(not HAS_SKGGM, reason='The message should be printed only if skggm is ' @@ -895,6 +939,20 @@ def test_rank_deficient_returns_warning(self): rca.fit(X, y) assert any(str(w.message) == msg for w in raised_warnings) + def test_deprecation_random_state(self): + # test that a deprecation message is thrown if random_state is set at + # fit time + # TODO: remove in v.0.6 + X, y = make_classification(random_state=42, n_samples=100) + rca_supervised = RCA_Supervised(num_chunks=20) + msg = ('"random_state" parameter in the `fit` function is ' + 'deprecated. Set `random_state` at initialization ' + 'instead (when instantiating a new `RCA_Supervised` ' + 'object).') + with pytest.warns(DeprecationWarning) as raised_warning: + rca_supervised.fit(X, y, random_state=np.random) + assert any(msg == str(wrn.message) for wrn in raised_warning) + @pytest.mark.parametrize('num_dims', [None, 2]) def test_deprecation_num_dims_rca(num_dims): @@ -1095,6 +1153,21 @@ def test_changed_behaviour_warning(self): mmc.fit(pairs, y_pairs) assert any(msg == str(wrn.message) for wrn in raised_warning) + def test_deprecation_random_state(self): + # test that a deprecation message is thrown if random_state is set at + # fit time + # TODO: remove in v.0.6 + X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]]) + y = np.array([1, 0, 1, 0]) + mmc_supervised = MMC_Supervised() + msg = ('"random_state" parameter in the `fit` function is ' + 'deprecated. Set `random_state` at initialization ' + 'instead (when instantiating a new `MMC_Supervised` ' + 'object).') + with pytest.warns(DeprecationWarning) as raised_warning: + mmc_supervised.fit(X, y, random_state=np.random) + assert any(msg == str(wrn.message) for wrn in raised_warning) + @pytest.mark.parametrize(('algo_class', 'dataset'), [(NCA, make_classification()), From 3a27064a556581152782cfda1b2cb323dfc7ac9e Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Tue, 25 Jun 2019 11:07:35 +0200 Subject: [PATCH 02/12] update tests with the new API --- test/metric_learn_test.py | 15 +++-- test/test_base_metric.py | 2 +- test/test_fit_transform.py | 42 +++++++------ test/test_sklearn_compat.py | 73 +++++++--------------- test/test_transformer_metric_conversion.py | 8 +-- 5 files changed, 58 insertions(+), 82 deletions(-) diff --git a/test/metric_learn_test.py b/test/metric_learn_test.py index 4439873e..653ab6e5 100644 --- a/test/metric_learn_test.py +++ b/test/metric_learn_test.py @@ -476,11 +476,11 @@ def test_sdml_supervised_raises_warning_msg_installed_skggm(self): X = np.array([[-10., 0.], [10., 0.], [5., 0.], [3., 0.]]) y = [0, 0, 1, 1] sdml_supervised = SDML_Supervised(balance_param=0.5, prior='identity', - sparsity_param=0.01) + sparsity_param=0.01, random_state=rng) msg = ("There was a problem in SDML when using skggm's graphical " "lasso solver.") with pytest.raises(RuntimeError) as raised_error: - sdml_supervised.fit(X, y, random_state=rng) + sdml_supervised.fit(X, y) assert msg == str(raised_error.value) @pytest.mark.skipif(not HAS_SKGGM, @@ -565,8 +565,9 @@ def test_sdml_works_on_non_spd_pb_with_skggm(self): it should work, but scikit-learn's graphical_lasso does not work""" X, y = load_iris(return_X_y=True) sdml = SDML_Supervised(balance_param=0.5, sparsity_param=0.01, - prior='covariance') - sdml.fit(X, y, random_state=np.random.RandomState(42)) + prior='covariance', + random_state=np.random.RandomState(42)) + sdml.fit(X, y) def test_deprecation_use_cov(self): # test that a deprecation message is thrown if use_cov is set at @@ -869,15 +870,13 @@ def test_feature_null_variance(self): X = np.hstack((self.iris_points, np.eye(len(self.iris_points), M=1))) # Apply PCA with the number of components - rca = RCA_Supervised(n_components=2, pca_comps=3, num_chunks=30, - chunk_size=2) + rca = RCA_Supervised(n_components=2, pca_comps=3, num_chunks=30) rca.fit(X, self.iris_labels) csep = class_separation(rca.transform(X), self.iris_labels) self.assertLess(csep, 0.30) # Apply PCA with the minimum variance ratio - rca = RCA_Supervised(n_components=2, pca_comps=0.95, num_chunks=30, - chunk_size=2) + rca = RCA_Supervised(n_components=2, pca_comps=0.95, num_chunks=30) rca.fit(X, self.iris_labels) csep = class_separation(rca.transform(X), self.iris_labels) self.assertLess(csep, 0.30) diff --git a/test/test_base_metric.py b/test/test_base_metric.py index 313948ec..0c1117ed 100644 --- a/test/test_base_metric.py +++ b/test/test_base_metric.py @@ -96,7 +96,7 @@ def test_rca(self): "RCA_Supervised(chunk_size=2, " "n_components=None, num_chunks=100, " "num_dims='deprecated', pca_comps='deprecated', " - "preprocessor=None)")) + "preprocessor=None, random_state=None)")) def test_mlkr(self): self.assertEqual(remove_spaces(str(metric_learn.MLKR())), diff --git a/test/test_fit_transform.py b/test/test_fit_transform.py index b7255ea9..a9b2719e 100644 --- a/test/test_fit_transform.py +++ b/test/test_fit_transform.py @@ -30,25 +30,25 @@ def test_cov(self): def test_lsml_supervised(self): seed = np.random.RandomState(1234) - lsml = LSML_Supervised(num_constraints=200) - lsml.fit(self.X, self.y, random_state=seed) + lsml = LSML_Supervised(num_constraints=200, random_state=seed) + lsml.fit(self.X, self.y) res_1 = lsml.transform(self.X) seed = np.random.RandomState(1234) - lsml = LSML_Supervised(num_constraints=200) - res_2 = lsml.fit_transform(self.X, self.y, random_state=seed) + lsml = LSML_Supervised(num_constraints=200, random_state=seed) + res_2 = lsml.fit_transform(self.X, self.y) assert_array_almost_equal(res_1, res_2) def test_itml_supervised(self): seed = np.random.RandomState(1234) - itml = ITML_Supervised(num_constraints=200) - itml.fit(self.X, self.y, random_state=seed) + itml = ITML_Supervised(num_constraints=200, random_state=seed) + itml.fit(self.X, self.y) res_1 = itml.transform(self.X) seed = np.random.RandomState(1234) - itml = ITML_Supervised(num_constraints=200) - res_2 = itml.fit_transform(self.X, self.y, random_state=seed) + itml = ITML_Supervised(num_constraints=200, random_state=seed) + res_2 = itml.fit_transform(self.X, self.y) assert_array_almost_equal(res_1, res_2) @@ -65,14 +65,14 @@ def test_lmnn(self): def test_sdml_supervised(self): seed = np.random.RandomState(1234) sdml = SDML_Supervised(num_constraints=1500, balance_param=1e-5, - prior='identity') - sdml.fit(self.X, self.y, random_state=seed) + prior='identity', random_state=seed) + sdml.fit(self.X, self.y) res_1 = sdml.transform(self.X) seed = np.random.RandomState(1234) sdml = SDML_Supervised(num_constraints=1500, balance_param=1e-5, - prior='identity') - res_2 = sdml.fit_transform(self.X, self.y, random_state=seed) + prior='identity', random_state=seed) + res_2 = sdml.fit_transform(self.X, self.y) assert_array_almost_equal(res_1, res_2) @@ -100,13 +100,15 @@ def test_lfda(self): def test_rca_supervised(self): seed = np.random.RandomState(1234) - rca = RCA_Supervised(n_components=2, num_chunks=30, chunk_size=2) - rca.fit(self.X, self.y, random_state=seed) + rca = RCA_Supervised(n_components=2, num_chunks=30, chunk_size=2, + random_state=seed) + rca.fit(self.X, self.y) res_1 = rca.transform(self.X) seed = np.random.RandomState(1234) - rca = RCA_Supervised(n_components=2, num_chunks=30, chunk_size=2) - res_2 = rca.fit_transform(self.X, self.y, random_state=seed) + rca = RCA_Supervised(n_components=2, num_chunks=30, chunk_size=2, + random_state=seed) + res_2 = rca.fit_transform(self.X, self.y) assert_array_almost_equal(res_1, res_2) @@ -122,13 +124,13 @@ def test_mlkr(self): def test_mmc_supervised(self): seed = np.random.RandomState(1234) - mmc = MMC_Supervised(num_constraints=200) - mmc.fit(self.X, self.y, random_state=seed) + mmc = MMC_Supervised(num_constraints=200, random_state=seed) + mmc.fit(self.X, self.y) res_1 = mmc.transform(self.X) seed = np.random.RandomState(1234) - mmc = MMC_Supervised(num_constraints=200) - res_2 = mmc.fit_transform(self.X, self.y, random_state=seed) + mmc = MMC_Supervised(num_constraints=200, random_state=seed) + res_2 = mmc.fit_transform(self.X, self.y) assert_array_almost_equal(res_1, res_2) diff --git a/test/test_sklearn_compat.py b/test/test_sklearn_compat.py index 0c0f098d..722a4b80 100644 --- a/test/test_sklearn_compat.py +++ b/test/test_sklearn_compat.py @@ -24,33 +24,6 @@ quadruplets_learners) -# Wrap the _Supervised methods with a deterministic wrapper for testing. -class deterministic_mixin(object): - def fit(self, X, y): - rs = np.random.RandomState(1234) - return super(deterministic_mixin, self).fit(X, y, random_state=rs) - - -class dLSML(deterministic_mixin, LSML_Supervised): - pass - - -class dITML(deterministic_mixin, ITML_Supervised): - pass - - -class dMMC(deterministic_mixin, MMC_Supervised): - pass - - -class dSDML(deterministic_mixin, SDML_Supervised): - pass - - -class dRCA(deterministic_mixin, RCA_Supervised): - pass - - class TestSklearnCompat(unittest.TestCase): def test_covariance(self): check_estimator(Covariance) @@ -68,36 +41,38 @@ def test_nca(self): check_estimator(NCA) def test_lsml(self): - check_estimator(dLSML) + check_estimator(LSML_Supervised) def test_itml(self): - check_estimator(dITML) + check_estimator(ITML_Supervised) def test_mmc(self): - check_estimator(dMMC) + check_estimator(MMC_Supervised) def test_sdml(self): - def stable_init(self, sparsity_param=0.01, num_labeled='deprecated', - num_constraints=None, verbose=False, preprocessor=None): - # this init makes SDML stable for scikit-learn examples. - SDML_Supervised.__init__(self, sparsity_param=sparsity_param, - num_labeled=num_labeled, - num_constraints=num_constraints, - verbose=verbose, - preprocessor=preprocessor, - balance_param=1e-5, prior='identity') - dSDML.__init__ = stable_init - check_estimator(dSDML) + class Stable_SDML_Supervised(SDML_Supervised): + + def __init__(self, sparsity_param=0.01, num_labeled='deprecated', + num_constraints=None, verbose=False, preprocessor=None): + # this init makes SDML stable for scikit-learn examples. + SDML_Supervised.__init__(self, sparsity_param=sparsity_param, + num_labeled=num_labeled, + num_constraints=num_constraints, + verbose=verbose, + preprocessor=preprocessor, + balance_param=1e-5, prior='identity') + check_estimator(Stable_SDML_Supervised) def test_rca(self): - def stable_init(self, n_components=None, pca_comps=None, - chunk_size=2, preprocessor=None): - # this init makes RCA stable for scikit-learn examples. - RCA_Supervised.__init__(self, num_chunks=2, n_components=n_components, - pca_comps=pca_comps, chunk_size=chunk_size, - preprocessor=preprocessor) - dRCA.__init__ = stable_init - check_estimator(dRCA) + class Stable_RCA_Supervised(RCA_Supervised): + + def __init__(self, n_components=None, pca_comps=None, + chunk_size=2, preprocessor=None): + # this init makes RCA stable for scikit-learn examples. + RCA_Supervised.__init__(self, num_chunks=2, n_components=n_components, + pca_comps=pca_comps, chunk_size=chunk_size, + preprocessor=preprocessor) + check_estimator(Stable_RCA_Supervised) RNG = check_random_state(0) diff --git a/test/test_transformer_metric_conversion.py b/test/test_transformer_metric_conversion.py index 651f60ea..62ac8777 100644 --- a/test/test_transformer_metric_conversion.py +++ b/test/test_transformer_metric_conversion.py @@ -30,8 +30,8 @@ def test_cov(self): def test_lsml_supervised(self): seed = np.random.RandomState(1234) - lsml = LSML_Supervised(num_constraints=200) - lsml.fit(self.X, self.y, random_state=seed) + lsml = LSML_Supervised(num_constraints=200, random_state=seed) + lsml.fit(self.X, self.y) L = lsml.transformer_ assert_array_almost_equal(L.T.dot(L), lsml.get_mahalanobis_matrix()) @@ -51,8 +51,8 @@ def test_lmnn(self): def test_sdml_supervised(self): seed = np.random.RandomState(1234) sdml = SDML_Supervised(num_constraints=1500, prior='identity', - balance_param=1e-5) - sdml.fit(self.X, self.y, random_state=seed) + balance_param=1e-5, random_state=seed) + sdml.fit(self.X, self.y) L = sdml.transformer_ assert_array_almost_equal(L.T.dot(L), sdml.get_mahalanobis_matrix()) From 2d0337d4bd5ae71a57caf237be2e375b20353987 Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Tue, 25 Jun 2019 11:23:24 +0200 Subject: [PATCH 03/12] Update test_RCA in sklearn_compat --- test/test_sklearn_compat.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/test/test_sklearn_compat.py b/test/test_sklearn_compat.py index 722a4b80..a198e5c9 100644 --- a/test/test_sklearn_compat.py +++ b/test/test_sklearn_compat.py @@ -67,11 +67,12 @@ def test_rca(self): class Stable_RCA_Supervised(RCA_Supervised): def __init__(self, n_components=None, pca_comps=None, - chunk_size=2, preprocessor=None): + chunk_size=2, preprocessor=None, random_state=None): # this init makes RCA stable for scikit-learn examples. RCA_Supervised.__init__(self, num_chunks=2, n_components=n_components, pca_comps=pca_comps, chunk_size=chunk_size, - preprocessor=preprocessor) + preprocessor=preprocessor, + random_state=random_state) check_estimator(Stable_RCA_Supervised) From d907d30ba3a995b9cd79e4c50e22fc1ec2f676f5 Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Tue, 25 Jun 2019 11:25:05 +0200 Subject: [PATCH 04/12] Update test_SDML in sklearn_compat --- test/test_sklearn_compat.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/test/test_sklearn_compat.py b/test/test_sklearn_compat.py index a198e5c9..b320ac29 100644 --- a/test/test_sklearn_compat.py +++ b/test/test_sklearn_compat.py @@ -53,14 +53,16 @@ def test_sdml(self): class Stable_SDML_Supervised(SDML_Supervised): def __init__(self, sparsity_param=0.01, num_labeled='deprecated', - num_constraints=None, verbose=False, preprocessor=None): + num_constraints=None, verbose=False, preprocessor=None, + random_state=None): # this init makes SDML stable for scikit-learn examples. SDML_Supervised.__init__(self, sparsity_param=sparsity_param, num_labeled=num_labeled, num_constraints=num_constraints, verbose=verbose, preprocessor=preprocessor, - balance_param=1e-5, prior='identity') + balance_param=1e-5, prior='identity', + random_state=random_state) check_estimator(Stable_SDML_Supervised) def test_rca(self): From c055e9407f45c9f6cb749a0133f5c15638ffe448 Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Tue, 25 Jun 2019 11:34:32 +0200 Subject: [PATCH 05/12] Remove testing of pca_comps since it's deprecated --- test/metric_learn_test.py | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/test/metric_learn_test.py b/test/metric_learn_test.py index 653ab6e5..dcb6ce61 100644 --- a/test/metric_learn_test.py +++ b/test/metric_learn_test.py @@ -866,21 +866,6 @@ def test_iris(self): csep = class_separation(rca.transform(self.iris_points), self.iris_labels) self.assertLess(csep, 0.25) - def test_feature_null_variance(self): - X = np.hstack((self.iris_points, np.eye(len(self.iris_points), M=1))) - - # Apply PCA with the number of components - rca = RCA_Supervised(n_components=2, pca_comps=3, num_chunks=30) - rca.fit(X, self.iris_labels) - csep = class_separation(rca.transform(X), self.iris_labels) - self.assertLess(csep, 0.30) - - # Apply PCA with the minimum variance ratio - rca = RCA_Supervised(n_components=2, pca_comps=0.95, num_chunks=30) - rca.fit(X, self.iris_labels) - csep = class_separation(rca.transform(X), self.iris_labels) - self.assertLess(csep, 0.30) - def test_deprecation_pca_comps(self): # test that a deprecation message is thrown if pca_comps is set at # initialization From 4eba167795c41f2a9393f35dea58f679b8cfc030 Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Tue, 25 Jun 2019 11:43:57 +0200 Subject: [PATCH 06/12] Fix sklearn test --- test/test_sklearn_compat.py | 46 +++++++++++++++++++------------------ 1 file changed, 24 insertions(+), 22 deletions(-) diff --git a/test/test_sklearn_compat.py b/test/test_sklearn_compat.py index b320ac29..85bb61e0 100644 --- a/test/test_sklearn_compat.py +++ b/test/test_sklearn_compat.py @@ -24,6 +24,30 @@ quadruplets_learners) +class Stable_RCA_Supervised(RCA_Supervised): + + def __init__(self, n_components=None, pca_comps=None, + chunk_size=2, preprocessor=None, random_state=None): + # this init makes RCA stable for scikit-learn examples. + super(Stable_RCA_Supervised, self).__init__( + num_chunks=2, n_components=n_components, pca_comps=pca_comps, + chunk_size=chunk_size, preprocessor=preprocessor, + random_state=random_state) + + +class Stable_SDML_Supervised(SDML_Supervised): + + def __init__(self, sparsity_param=0.01, num_labeled='deprecated', + num_constraints=None, verbose=False, preprocessor=None, + random_state=None): + # this init makes SDML stable for scikit-learn examples. + super(Stable_SDML_Supervised, self).__init__( + sparsity_param=sparsity_param, num_labeled=num_labeled, + num_constraints=num_constraints, verbose=verbose, + preprocessor=preprocessor, balance_param=1e-5, prior='identity', + random_state=random_state) + + class TestSklearnCompat(unittest.TestCase): def test_covariance(self): check_estimator(Covariance) @@ -50,31 +74,9 @@ def test_mmc(self): check_estimator(MMC_Supervised) def test_sdml(self): - class Stable_SDML_Supervised(SDML_Supervised): - - def __init__(self, sparsity_param=0.01, num_labeled='deprecated', - num_constraints=None, verbose=False, preprocessor=None, - random_state=None): - # this init makes SDML stable for scikit-learn examples. - SDML_Supervised.__init__(self, sparsity_param=sparsity_param, - num_labeled=num_labeled, - num_constraints=num_constraints, - verbose=verbose, - preprocessor=preprocessor, - balance_param=1e-5, prior='identity', - random_state=random_state) check_estimator(Stable_SDML_Supervised) def test_rca(self): - class Stable_RCA_Supervised(RCA_Supervised): - - def __init__(self, n_components=None, pca_comps=None, - chunk_size=2, preprocessor=None, random_state=None): - # this init makes RCA stable for scikit-learn examples. - RCA_Supervised.__init__(self, num_chunks=2, n_components=n_components, - pca_comps=pca_comps, chunk_size=chunk_size, - preprocessor=preprocessor, - random_state=random_state) check_estimator(Stable_RCA_Supervised) From 4c791b4bc8660e126da451347a49abfe5f41cfe1 Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Tue, 25 Jun 2019 11:52:13 +0200 Subject: [PATCH 07/12] Fix random_seed for test_iris in TestRCA --- test/metric_learn_test.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/metric_learn_test.py b/test/metric_learn_test.py index dcb6ce61..c7013c2a 100644 --- a/test/metric_learn_test.py +++ b/test/metric_learn_test.py @@ -861,7 +861,8 @@ def test_deprecation_num_dims_lfda(num_dims): class TestRCA(MetricTestCase): def test_iris(self): - rca = RCA_Supervised(n_components=2, num_chunks=30, chunk_size=2) + rca = RCA_Supervised(n_components=2, num_chunks=30, chunk_size=2, + random_state=42) rca.fit(self.iris_points, self.iris_labels) csep = class_separation(rca.transform(self.iris_points), self.iris_labels) self.assertLess(csep, 0.25) From 38dc337afed8dc64ee87d939e0f56584d6f8a59f Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Tue, 25 Jun 2019 12:06:51 +0200 Subject: [PATCH 08/12] Relaunch CI From f2baeef385738f570d49c037aa78fbcfde8d991b Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Tue, 25 Jun 2019 16:12:18 +0200 Subject: [PATCH 09/12] Augment tolerance rather than fix random_seed --- test/metric_learn_test.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/test/metric_learn_test.py b/test/metric_learn_test.py index c7013c2a..95cbe76f 100644 --- a/test/metric_learn_test.py +++ b/test/metric_learn_test.py @@ -861,11 +861,10 @@ def test_deprecation_num_dims_lfda(num_dims): class TestRCA(MetricTestCase): def test_iris(self): - rca = RCA_Supervised(n_components=2, num_chunks=30, chunk_size=2, - random_state=42) + rca = RCA_Supervised(n_components=2, num_chunks=30, chunk_size=2) rca.fit(self.iris_points, self.iris_labels) csep = class_separation(rca.transform(self.iris_points), self.iris_labels) - self.assertLess(csep, 0.25) + self.assertLess(csep, 0.26) def test_deprecation_pca_comps(self): # test that a deprecation message is thrown if pca_comps is set at From 0be553a57d41fb78deec931e5c4cde31ca4892c9 Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Tue, 25 Jun 2019 16:46:37 +0200 Subject: [PATCH 10/12] Add ChangedBehaviorWarning if the random_state is left default --- metric_learn/itml.py | 8 ++++ metric_learn/lsml.py | 7 +++ metric_learn/mmc.py | 7 +++ metric_learn/rca.py | 7 +++ metric_learn/sdml.py | 7 +++ test/metric_learn_test.py | 92 ++++++++++++++++++++++++++++++++++++--- 6 files changed, 121 insertions(+), 7 deletions(-) diff --git a/metric_learn/itml.py b/metric_learn/itml.py index d87eb6be..ee5319d8 100644 --- a/metric_learn/itml.py +++ b/metric_learn/itml.py @@ -18,6 +18,7 @@ import warnings import numpy as np from six.moves import xrange +from sklearn.exceptions import ChangedBehaviorWarning from sklearn.metrics import pairwise_distances from sklearn.utils.validation import check_array from sklearn.base import TransformerMixin @@ -374,6 +375,13 @@ def fit(self, X, y, random_state='deprecated', bounds=None): 'deprecated. Set `random_state` at initialization ' 'instead (when instantiating a new `ITML_Supervised` ' 'object).', DeprecationWarning) + else: + warnings.warn('As of v0.5.0, `ITML_Supervised` now uses the ' + '`random_state` given at initialization to sample ' + 'constraints, not the default `np.random` from the `fit` ' + 'method, since this argument is now deprecated. ' + 'This warning will disappear in v0.6.0.', + ChangedBehaviorWarning) X, y = self._prepare_inputs(X, y, ensure_min_samples=2) num_constraints = self.num_constraints if num_constraints is None: diff --git a/metric_learn/lsml.py b/metric_learn/lsml.py index 4682e130..263a69b6 100644 --- a/metric_learn/lsml.py +++ b/metric_learn/lsml.py @@ -313,6 +313,13 @@ def fit(self, X, y, random_state='deprecated'): 'deprecated. Set `random_state` at initialization ' 'instead (when instantiating a new `LSML_Supervised` ' 'object).', DeprecationWarning) + else: + warnings.warn('As of v0.5.0, `LSML_Supervised` now uses the ' + '`random_state` given at initialization to sample ' + 'constraints, not the default `np.random` from the `fit` ' + 'method, since this argument is now deprecated. ' + 'This warning will disappear in v0.6.0.', + ChangedBehaviorWarning) X, y = self._prepare_inputs(X, y, ensure_min_samples=2) num_constraints = self.num_constraints if num_constraints is None: diff --git a/metric_learn/mmc.py b/metric_learn/mmc.py index ba1d2951..4c8ec4e9 100644 --- a/metric_learn/mmc.py +++ b/metric_learn/mmc.py @@ -565,6 +565,13 @@ def fit(self, X, y, random_state='deprecated'): 'deprecated. Set `random_state` at initialization ' 'instead (when instantiating a new `MMC_Supervised` ' 'object).', DeprecationWarning) + else: + warnings.warn('As of v0.5.0, `MMC_Supervised` now uses the ' + '`random_state` given at initialization to sample ' + 'constraints, not the default `np.random` from the `fit` ' + 'method, since this argument is now deprecated. ' + 'This warning will disappear in v0.6.0.', + ChangedBehaviorWarning) X, y = self._prepare_inputs(X, y, ensure_min_samples=2) num_constraints = self.num_constraints if num_constraints is None: diff --git a/metric_learn/rca.py b/metric_learn/rca.py index 3e76e4f3..24249d47 100644 --- a/metric_learn/rca.py +++ b/metric_learn/rca.py @@ -214,6 +214,13 @@ def fit(self, X, y, random_state='deprecated'): 'deprecated. Set `random_state` at initialization ' 'instead (when instantiating a new `RCA_Supervised` ' 'object).', DeprecationWarning) + else: + warnings.warn('As of v0.5.0, `RCA_Supervised` now uses the ' + '`random_state` given at initialization to sample ' + 'constraints, not the default `np.random` from the `fit` ' + 'method, since this argument is now deprecated. ' + 'This warning will disappear in v0.6.0.', + ChangedBehaviorWarning) X, y = self._prepare_inputs(X, y, ensure_min_samples=2) chunks = Constraints(y).chunks(num_chunks=self.num_chunks, chunk_size=self.chunk_size, diff --git a/metric_learn/sdml.py b/metric_learn/sdml.py index 17d6ed76..e07a4473 100644 --- a/metric_learn/sdml.py +++ b/metric_learn/sdml.py @@ -343,6 +343,13 @@ def fit(self, X, y, random_state='deprecated'): 'deprecated. Set `random_state` at initialization ' 'instead (when instantiating a new `SDML_Supervised` ' 'object).', DeprecationWarning) + else: + warnings.warn('As of v0.5.0, `SDML_Supervised` now uses the ' + '`random_state` given at initialization to sample ' + 'constraints, not the default `np.random` from the `fit` ' + 'method, since this argument is now deprecated. ' + 'This warning will disappear in v0.6.0.', + ChangedBehaviorWarning) X, y = self._prepare_inputs(X, y, ensure_min_samples=2) num_constraints = self.num_constraints if num_constraints is None: diff --git a/test/metric_learn_test.py b/test/metric_learn_test.py index 95cbe76f..0f47a58a 100644 --- a/test/metric_learn_test.py +++ b/test/metric_learn_test.py @@ -134,6 +134,22 @@ def test_deprecation_random_state(self): lsml_supervised.fit(X, y, random_state=np.random) assert any(msg == str(wrn.message) for wrn in raised_warning) + def test_changed_behaviour_warning_random_state(self): + # test that a ChangedBehavior warning is thrown if the random_state is + # not set in fit. + # TODO: remove in v.0.6 + X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]]) + y = np.array([1, 0, 1, 0]) + lsml_supervised = LSML_Supervised() + msg = ('As of v0.5.0, `LSML_Supervised` now uses the ' + '`random_state` given at initialization to sample ' + 'constraints, not the default `np.random` from the `fit` ' + 'method, since this argument is now deprecated. ' + 'This warning will disappear in v0.6.0.') + with pytest.warns(ChangedBehaviorWarning) as raised_warning: + lsml_supervised.fit(X, y) + assert any(msg == str(wrn.message) for wrn in raised_warning) + class TestITML(MetricTestCase): def test_iris(self): @@ -204,6 +220,22 @@ def test_deprecation_random_state(self): itml_supervised.fit(X, y, random_state=np.random) assert any(msg == str(wrn.message) for wrn in raised_warning) + def test_changed_behaviour_warning_random_state(self): + # test that a ChangedBehavior warning is thrown if the random_state is + # not set in fit. + # TODO: remove in v.0.6 + X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]]) + y = np.array([1, 0, 1, 0]) + itml_supervised = ITML_Supervised() + msg = ('As of v0.5.0, `ITML_Supervised` now uses the ' + '`random_state` given at initialization to sample ' + 'constraints, not the default `np.random` from the `fit` ' + 'method, since this argument is now deprecated. ' + 'This warning will disappear in v0.6.0.') + with pytest.warns(ChangedBehaviorWarning) as raised_warning: + itml_supervised.fit(X, y) + assert any(msg == str(wrn.message) for wrn in raised_warning) + @pytest.mark.parametrize('bounds', [None, (20., 100.), [20., 100.], np.array([20., 100.]), @@ -631,6 +663,21 @@ def test_deprecation_random_state(self): sdml_supervised.fit(X, y, random_state=np.random) assert any(msg == str(wrn.message) for wrn in raised_warning) + def test_changed_behaviour_warning_random_state(self): + # test that a ChangedBehavior warning is thrown if the random_state is + # not set in fit. + # TODO: remove in v.0.6 + X, y = load_iris(return_X_y=True) + sdml_supervised = SDML_Supervised(balance_param=5e-5) + msg = ('As of v0.5.0, `SDML_Supervised` now uses the ' + '`random_state` given at initialization to sample ' + 'constraints, not the default `np.random` from the `fit` ' + 'method, since this argument is now deprecated. ' + 'This warning will disappear in v0.6.0.') + with pytest.warns(ChangedBehaviorWarning) as raised_warning: + sdml_supervised.fit(X, y) + assert any(msg == str(wrn.message) for wrn in raised_warning) + @pytest.mark.skipif(not HAS_SKGGM, reason='The message should be printed only if skggm is ' @@ -864,7 +911,7 @@ def test_iris(self): rca = RCA_Supervised(n_components=2, num_chunks=30, chunk_size=2) rca.fit(self.iris_points, self.iris_labels) csep = class_separation(rca.transform(self.iris_points), self.iris_labels) - self.assertLess(csep, 0.26) + self.assertLess(csep, 0.29) def test_deprecation_pca_comps(self): # test that a deprecation message is thrown if pca_comps is set at @@ -879,12 +926,12 @@ def test_deprecation_pca_comps(self): '`sklearn.decomposition.PCA` and an `sklearn.pipeline.Pipeline`.') with pytest.warns(ChangedBehaviorWarning) as expected_msg: rca_supervised.fit(X, y) - assert str(expected_msg[0].message) == msg + assert any(str(w.message) == msg for w in expected_msg) rca = RCA(pca_comps=X.shape[1]) with pytest.warns(ChangedBehaviorWarning) as expected_msg: rca.fit(X, y) - assert str(expected_msg[0].message) == msg + assert any(str(w.message) == msg for w in expected_msg) def test_changedbehaviorwarning_preprocessing(self): # test that a ChangedBehaviorWarning is thrown when using RCA @@ -899,12 +946,12 @@ def test_changedbehaviorwarning_preprocessing(self): rca_supervised = RCA_Supervised(num_chunks=20) with pytest.warns(ChangedBehaviorWarning) as expected_msg: rca_supervised.fit(X, y) - assert str(expected_msg[0].message) == msg + assert any(str(w.message) == msg for w in expected_msg) rca = RCA() with pytest.warns(ChangedBehaviorWarning) as expected_msg: rca.fit(X, y) - assert str(expected_msg[0].message) == msg + assert any(str(w.message) == msg for w in expected_msg) def test_rank_deficient_returns_warning(self): """Checks that if the covariance matrix is not invertible, we raise a @@ -937,6 +984,21 @@ def test_deprecation_random_state(self): rca_supervised.fit(X, y, random_state=np.random) assert any(msg == str(wrn.message) for wrn in raised_warning) + def test_changed_behaviour_warning_random_state(self): + # test that a ChangedBehavior warning is thrown if the random_state is + # not set in fit. + # TODO: remove in v.0.6 + X, y = make_classification(random_state=42, n_samples=100) + rca_supervised = RCA_Supervised(num_chunks=20) + msg = ('As of v0.5.0, `RCA_Supervised` now uses the ' + '`random_state` given at initialization to sample ' + 'constraints, not the default `np.random` from the `fit` ' + 'method, since this argument is now deprecated. ' + 'This warning will disappear in v0.6.0.') + with pytest.warns(ChangedBehaviorWarning) as raised_warning: + rca_supervised.fit(X, y) + assert any(msg == str(wrn.message) for wrn in raised_warning) + @pytest.mark.parametrize('num_dims', [None, 2]) def test_deprecation_num_dims_rca(num_dims): @@ -950,7 +1012,7 @@ def test_deprecation_num_dims_rca(num_dims): ' removed in 0.6.0. Use "n_components" instead') with pytest.warns(DeprecationWarning) as raised_warning: rca.fit(X, y) - assert (str(raised_warning[0].message) == msg) + assert any(str(w.message) == msg for w in raised_warning) # we take a small number of chunks so that RCA works on iris rca_supervised = RCA_Supervised(num_dims=num_dims, num_chunks=10) @@ -959,7 +1021,7 @@ def test_deprecation_num_dims_rca(num_dims): ' removed in 0.6.0. Use "n_components" instead') with pytest.warns(DeprecationWarning) as raised_warning: rca_supervised.fit(X, y) - assert (str(raised_warning[0].message) == msg) + assert any(str(w.message) == msg for w in raised_warning) class TestMLKR(MetricTestCase): @@ -1152,6 +1214,22 @@ def test_deprecation_random_state(self): mmc_supervised.fit(X, y, random_state=np.random) assert any(msg == str(wrn.message) for wrn in raised_warning) + def test_changed_behaviour_warning_random_state(self): + # test that a ChangedBehavior warning is thrown if the random_state is + # not set in fit. + # TODO: remove in v.0.6 + X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]]) + y = np.array([1, 0, 1, 0]) + mmc_supervised = MMC_Supervised() + msg = ('As of v0.5.0, `MMC_Supervised` now uses the ' + '`random_state` given at initialization to sample ' + 'constraints, not the default `np.random` from the `fit` ' + 'method, since this argument is now deprecated. ' + 'This warning will disappear in v0.6.0.') + with pytest.warns(ChangedBehaviorWarning) as raised_warning: + mmc_supervised.fit(X, y) + assert any(msg == str(wrn.message) for wrn in raised_warning) + @pytest.mark.parametrize(('algo_class', 'dataset'), [(NCA, make_classification()), From 1c05dbf8e2d377408aaa568bd7d4af5e423f3294 Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Wed, 3 Jul 2019 18:18:40 +0200 Subject: [PATCH 11/12] Update the merge --- metric_learn/lsml.py | 2 +- metric_learn/mmc.py | 2 +- metric_learn/rca.py | 56 +++++++++++++++++--------------------------- metric_learn/sdml.py | 2 +- 4 files changed, 24 insertions(+), 38 deletions(-) diff --git a/metric_learn/lsml.py b/metric_learn/lsml.py index 5ddc8923..72a448ec 100644 --- a/metric_learn/lsml.py +++ b/metric_learn/lsml.py @@ -287,7 +287,7 @@ class LSML_Supervised(_BaseLSML, TransformerMixin): A pseudo random number generator object or a seed for it if int. If ``init='random'``, ``random_state`` is used to set the random prior. In any case, `random_state` is also used to randomly sample - constraints from labels. + constraints from labels. Attributes ---------- diff --git a/metric_learn/mmc.py b/metric_learn/mmc.py index dfa2e329..55337b2e 100644 --- a/metric_learn/mmc.py +++ b/metric_learn/mmc.py @@ -539,7 +539,7 @@ class MMC_Supervised(_BaseMMC, TransformerMixin): A pseudo random number generator object or a seed for it if int. If ``init='random'``, ``random_state`` is used to initialize the random Mahalanobis matrix. In any case, `random_state` is also used to - randomly sample constraints from labels. + randomly sample constraints from labels. `MMC_Supervised` creates pairs of similar sample by taking same class samples, and pairs of dissimilar samples by taking different class diff --git a/metric_learn/rca.py b/metric_learn/rca.py index 58d839f6..8686f02d 100644 --- a/metric_learn/rca.py +++ b/metric_learn/rca.py @@ -172,53 +172,39 @@ class RCA_Supervised(RCA): class, taking `chunk_size` elements in it, and repeating the process `num_chunks` times. - Attributes + Parameters ---------- - transformer_ : `numpy.ndarray`, shape=(n_components, n_features) - The learned linear transformation ``L``. - """ - - def __init__(self, num_dims='deprecated', n_components=None, - pca_comps='deprecated', num_chunks=100, chunk_size=2, - preprocessor=None, random_state=None): - """Initialize the supervised version of `RCA`. - - `RCA_Supervised` creates chunks of similar points by first sampling a - class, taking `chunk_size` elements in it, and repeating the process - `num_chunks` times. - - Parameters - ---------- - n_components : int or None, optional (default=None) - Dimensionality of reduced space (if None, defaults to dimension of X). + n_components : int or None, optional (default=None) + Dimensionality of reduced space (if None, defaults to dimension of X). - num_dims : Not used + num_dims : Not used - .. deprecated:: 0.5.0 - `num_dims` was deprecated in version 0.5.0 and will - be removed in 0.6.0. Use `n_components` instead. + .. deprecated:: 0.5.0 + `num_dims` was deprecated in version 0.5.0 and will + be removed in 0.6.0. Use `n_components` instead. - num_chunks: int, optional + num_chunks: int, optional - chunk_size: int, optional + chunk_size: int, optional - preprocessor : array-like, shape=(n_samples, n_features) or callable - The preprocessor to call to get tuples from indices. If array-like, - tuples will be formed like this: X[indices]. + preprocessor : array-like, shape=(n_samples, n_features) or callable + The preprocessor to call to get tuples from indices. If array-like, + tuples will be formed like this: X[indices]. - random_state : int or numpy.RandomState or None, optional (default=None) - A pseudo random number generator object or a seed for it if int. - It is used to randomly sample constraints from labels. + random_state : int or numpy.RandomState or None, optional (default=None) + A pseudo random number generator object or a seed for it if int. + It is used to randomly sample constraints from labels. - Attributes - ---------- - transformer_ : `numpy.ndarray`, shape=(n_components, n_features) - The learned linear transformation ``L``. + Attributes + ---------- + transformer_ : `numpy.ndarray`, shape=(n_components, n_features) + The learned linear transformation ``L``. """ def __init__(self, num_dims='deprecated', n_components=None, pca_comps='deprecated', num_chunks=100, chunk_size=2, - preprocessor=None): + preprocessor=None, random_state=None): + """Initialize the supervised version of `RCA`.""" RCA.__init__(self, num_dims=num_dims, n_components=n_components, pca_comps=pca_comps, preprocessor=preprocessor) self.num_chunks = num_chunks diff --git a/metric_learn/sdml.py b/metric_learn/sdml.py index a162d852..9344ef7c 100644 --- a/metric_learn/sdml.py +++ b/metric_learn/sdml.py @@ -311,7 +311,7 @@ class SDML_Supervised(_BaseSDML, TransformerMixin): A pseudo random number generator object or a seed for it if int. If ``init='random'``, ``random_state`` is used to set the random prior. In any case, `random_state` is also used to randomly sample - constraints from labels. + constraints from labels. Attributes ---------- From 7ada3364de06b0bb6c25bc1cd4a87c87f8763a9f Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Wed, 3 Jul 2019 22:00:00 +0200 Subject: [PATCH 12/12] Address https://github.com/metric-learn/metric-learn/pull/224#pullrequestreview-257618709 --- metric_learn/constraints.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/metric_learn/constraints.py b/metric_learn/constraints.py index 6254ec8d..e42ef4b8 100644 --- a/metric_learn/constraints.py +++ b/metric_learn/constraints.py @@ -49,8 +49,7 @@ def positive_negative_pairs(self, num_constraints, same_length=False, return a, b, c, d def _pairs(self, num_constraints, same_label=True, max_iter=10, - random_state=None): - random_state = check_random_state(random_state) + random_state=np.random): num_labels = len(self.known_labels) ab = set() it = 0