From aba686ec8339bfa7f89970483d403ae8b4d8c498 Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Thu, 2 May 2019 16:35:19 +0200 Subject: [PATCH 1/3] FIX fix RCA_Supervised sklearn compat test --- metric_learn/rca.py | 9 ++++++--- test/test_sklearn_compat.py | 10 +++++++--- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/metric_learn/rca.py b/metric_learn/rca.py index c9fedd59..edfa2242 100644 --- a/metric_learn/rca.py +++ b/metric_learn/rca.py @@ -26,7 +26,11 @@ def _chunk_mean_centering(data, chunks): num_chunks = chunks.max() + 1 chunk_mask = chunks != -1 - chunk_data = data[chunk_mask] + # Warning: we need to ensure we don't overwrite the data + # through slices hence we do a copy. We will also need to + # ensure the data is float so that we can substract the + # mean on it + chunk_data = data[chunk_mask].astype(float, copy=True) chunk_labels = chunks[chunk_mask] for c in xrange(num_chunks): mask = chunk_labels == c @@ -98,7 +102,7 @@ def fit(self, X, chunks): When ``chunks[i] == -1``, point i doesn't belong to any chunklet. When ``chunks[i] == j``, point i belongs to chunklet j. """ - X = self._prepare_inputs(X, ensure_min_samples=2) + X, chunks = self._prepare_inputs(X, chunks, ensure_min_samples=2) # PCA projection to remove noise and redundant information. if self.pca_comps is not None: @@ -109,7 +113,6 @@ def fit(self, X, chunks): X_t = X - X.mean(axis=0) M_pca = None - chunks = np.asanyarray(chunks, dtype=int) chunk_mask, chunked_data = _chunk_mean_centering(X_t, chunks) inner_cov = np.atleast_2d(np.cov(chunked_data, rowvar=0, bias=1)) diff --git a/test/test_sklearn_compat.py b/test/test_sklearn_compat.py index 5d6c5d77..08b4edf8 100644 --- a/test/test_sklearn_compat.py +++ b/test/test_sklearn_compat.py @@ -89,9 +89,13 @@ def stable_init(self, sparsity_param=0.01, num_labeled='deprecated', dSDML.__init__ = stable_init check_estimator(dSDML) - # This fails because the default num_chunks isn't data-dependent. - # def test_rca(self): - # check_estimator(RCA_Supervised) + def test_rca(self): + def stable_init(self, num_dims=None, pca_comps=None, + chunk_size=2, preprocessor=None): + # this init makes RCA stable for scikit-learn examples. + RCA_Supervised.__init__(self, num_chunks=2) + dRCA.__init__ = stable_init + check_estimator(dRCA) RNG = check_random_state(0) From 2b8437f0f9a4b877dae5d410792368d4cfc1daab Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Thu, 9 May 2019 13:36:36 +0200 Subject: [PATCH 2/3] Address https://github.com/metric-learn/metric-learn/pull/198#pullrequestreview-234140017 --- metric_learn/rca.py | 2 +- test/test_sklearn_compat.py | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/metric_learn/rca.py b/metric_learn/rca.py index edfa2242..0940671f 100644 --- a/metric_learn/rca.py +++ b/metric_learn/rca.py @@ -30,7 +30,7 @@ def _chunk_mean_centering(data, chunks): # through slices hence we do a copy. We will also need to # ensure the data is float so that we can substract the # mean on it - chunk_data = data[chunk_mask].astype(float, copy=True) + chunk_data = data[chunk_mask].astype(float, copy=False) chunk_labels = chunks[chunk_mask] for c in xrange(num_chunks): mask = chunk_labels == c diff --git a/test/test_sklearn_compat.py b/test/test_sklearn_compat.py index 08b4edf8..091c56e2 100644 --- a/test/test_sklearn_compat.py +++ b/test/test_sklearn_compat.py @@ -93,7 +93,9 @@ def test_rca(self): def stable_init(self, num_dims=None, pca_comps=None, chunk_size=2, preprocessor=None): # this init makes RCA stable for scikit-learn examples. - RCA_Supervised.__init__(self, num_chunks=2) + RCA_Supervised.__init__(self, num_chunks=2, num_dims=num_dims, + pca_comps=pca_comps, chunk_size=chunk_size, + preprocessor=preprocessor) dRCA.__init__ = stable_init check_estimator(dRCA) From aaed9b28c2b1032ad4d2c617e19d90bf6a0c1669 Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Thu, 9 May 2019 15:08:30 +0200 Subject: [PATCH 3/3] Refactor comment --- metric_learn/rca.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/metric_learn/rca.py b/metric_learn/rca.py index 0940671f..cbb90430 100644 --- a/metric_learn/rca.py +++ b/metric_learn/rca.py @@ -26,9 +26,7 @@ def _chunk_mean_centering(data, chunks): num_chunks = chunks.max() + 1 chunk_mask = chunks != -1 - # Warning: we need to ensure we don't overwrite the data - # through slices hence we do a copy. We will also need to - # ensure the data is float so that we can substract the + # We need to ensure the data is float so that we can substract the # mean on it chunk_data = data[chunk_mask].astype(float, copy=False) chunk_labels = chunks[chunk_mask]