From 3ec39b961af2229aef1444ac4b3094046a5f362f Mon Sep 17 00:00:00 2001 From: Arnaud Benard Date: Wed, 26 Oct 2016 18:13:26 +0100 Subject: [PATCH 1/2] replaced n_iter by max_iter and added deprecation --- sklearn/linear_model/bayes.py | 25 +++++++++++++++---------- sklearn/manifold/t_sne.py | 25 ++++++++++++++----------- sklearn/neural_network/rbm.py | 14 +++++++++----- 3 files changed, 38 insertions(+), 26 deletions(-) diff --git a/sklearn/linear_model/bayes.py b/sklearn/linear_model/bayes.py index 79f913b634489..d17f51f3ae0cd 100644 --- a/sklearn/linear_model/bayes.py +++ b/sklearn/linear_model/bayes.py @@ -14,6 +14,7 @@ from ..base import RegressorMixin from ..utils.extmath import fast_logdet, pinvh from ..utils import check_X_y +from ..utils import deprecated ############################################################################### @@ -29,7 +30,7 @@ class BayesianRidge(LinearModel, RegressorMixin): Parameters ---------- - n_iter : int, optional + max_iter : int, optional Maximum number of iterations. Default is 300. tol : float, optional @@ -102,7 +103,7 @@ class BayesianRidge(LinearModel, RegressorMixin): ... # doctest: +NORMALIZE_WHITESPACE BayesianRidge(alpha_1=1e-06, alpha_2=1e-06, compute_score=False, copy_X=True, fit_intercept=True, lambda_1=1e-06, lambda_2=1e-06, - n_iter=300, normalize=False, tol=0.001, verbose=False) + max_iter=300, normalize=False, tol=0.001, verbose=False) >>> clf.predict([[1, 1]]) array([ 1.]) @@ -111,11 +112,13 @@ class BayesianRidge(LinearModel, RegressorMixin): See examples/linear_model/plot_bayesian_ridge.py for an example. """ - def __init__(self, n_iter=300, tol=1.e-3, alpha_1=1.e-6, alpha_2=1.e-6, + @property + @deprecated("Attribute n_iter was deprecated. Use 'max_iter' instead") + def __init__(self, max_iter=300, tol=1.e-3, alpha_1=1.e-6, alpha_2=1.e-6, lambda_1=1.e-6, lambda_2=1.e-6, compute_score=False, fit_intercept=True, normalize=False, copy_X=True, verbose=False): - self.n_iter = n_iter + self.max_iter = max_iter self.tol = tol self.alpha_1 = alpha_1 self.alpha_2 = alpha_2 @@ -164,7 +167,7 @@ def fit(self, X, y): eigen_vals_ = S ** 2 # Convergence loop of the bayesian ridge regression - for iter_ in range(self.n_iter): + for iter_ in range(self.max_iter): # Compute mu and sigma # sigma_ = lambda_ / alpha_ * np.eye(n_features) + np.dot(X.T, X) @@ -238,7 +241,7 @@ class ARDRegression(LinearModel, RegressorMixin): Parameters ---------- - n_iter : int, optional + max_iter : int, optional Maximum number of iterations. Default is 300 tol : float, optional @@ -315,7 +318,7 @@ class ARDRegression(LinearModel, RegressorMixin): ... # doctest: +NORMALIZE_WHITESPACE ARDRegression(alpha_1=1e-06, alpha_2=1e-06, compute_score=False, copy_X=True, fit_intercept=True, lambda_1=1e-06, lambda_2=1e-06, - n_iter=300, normalize=False, threshold_lambda=10000.0, tol=0.001, + max_iter=300, normalize=False, threshold_lambda=10000.0, tol=0.001, verbose=False) >>> clf.predict([[1, 1]]) array([ 1.]) @@ -325,11 +328,13 @@ class ARDRegression(LinearModel, RegressorMixin): See examples/linear_model/plot_ard.py for an example. """ - def __init__(self, n_iter=300, tol=1.e-3, alpha_1=1.e-6, alpha_2=1.e-6, + @property + @deprecated("Attribute n_iter was deprecated. Use 'max_iter' instead") + def __init__(self, max_iter=300, tol=1.e-3, alpha_1=1.e-6, alpha_2=1.e-6, lambda_1=1.e-6, lambda_2=1.e-6, compute_score=False, threshold_lambda=1.e+4, fit_intercept=True, normalize=False, copy_X=True, verbose=False): - self.n_iter = n_iter + self.max_iter = max_iter self.tol = tol self.fit_intercept = fit_intercept self.normalize = normalize @@ -385,7 +390,7 @@ def fit(self, X, y): coef_old_ = None # Iterative procedure of ARDRegression - for iter_ in range(self.n_iter): + for iter_ in range(self.max_iter): # Compute mu and sigma (using Woodbury matrix identity) sigma_ = pinvh(np.eye(n_samples) / alpha_ + np.dot(X[:, keep_lambda] * diff --git a/sklearn/manifold/t_sne.py b/sklearn/manifold/t_sne.py index 6d74cf598392f..64b95e580173a 100644 --- a/sklearn/manifold/t_sne.py +++ b/sklearn/manifold/t_sne.py @@ -17,6 +17,7 @@ from ..base import BaseEstimator from ..utils import check_array from ..utils import check_random_state +from ..utils import deprecated from ..utils.extmath import _ravel from ..decomposition import PCA from ..metrics.pairwise import pairwise_distances @@ -295,7 +296,7 @@ def _kl_divergence_bh(params, P, neighbors, degrees_of_freedom, n_samples, return error, grad -def _gradient_descent(objective, p0, it, n_iter, objective_error=None, +def _gradient_descent(objective, p0, it, max_iter, objective_error=None, n_iter_check=1, n_iter_without_progress=50, momentum=0.5, learning_rate=1000.0, min_gain=0.01, min_grad_norm=1e-7, min_error_diff=1e-7, verbose=0, @@ -317,7 +318,7 @@ def _gradient_descent(objective, p0, it, n_iter, objective_error=None, Current number of iterations (this function will be called more than once during the optimization). - n_iter : int + max_iter : int Maximum number of gradient descent iterations. n_iter_check : int @@ -383,7 +384,7 @@ def _gradient_descent(objective, p0, it, n_iter, objective_error=None, best_error = np.finfo(np.float).max best_iter = 0 - for i in range(it, n_iter): + for i in range(it, max_iter): new_error, grad = objective(p, *args, **kwargs) grad_norm = linalg.norm(grad) @@ -541,7 +542,7 @@ class TSNE(BaseEstimator): might be too high. If the cost function gets stuck in a bad local minimum increasing the learning rate helps sometimes. - n_iter : int, optional (default: 1000) + max_iter : int, optional (default: 1000) Maximum number of iterations for the optimization. Should be at least 200. @@ -644,8 +645,10 @@ class TSNE(BaseEstimator): http://lvdmaaten.github.io/publications/papers/JMLR_2014.pdf """ + @property + @deprecated("Attribute n_iter was deprecated. Use 'max_iter' instead") def __init__(self, n_components=2, perplexity=30.0, - early_exaggeration=4.0, learning_rate=1000.0, n_iter=1000, + early_exaggeration=4.0, learning_rate=1000.0, max_iter=1000, n_iter_without_progress=30, min_grad_norm=1e-7, metric="euclidean", init="random", verbose=0, random_state=None, method='barnes_hut', angle=0.5): @@ -658,7 +661,7 @@ def __init__(self, n_components=2, perplexity=30.0, self.perplexity = perplexity self.early_exaggeration = early_exaggeration self.learning_rate = learning_rate - self.n_iter = n_iter + self.max_iter = max_iter self.n_iter_without_progress = n_iter_without_progress self.min_grad_norm = min_grad_norm self.metric = metric @@ -711,8 +714,8 @@ def _fit(self, X, skip_num_points=0): raise ValueError("early_exaggeration must be at least 1, but is " "%f" % self.early_exaggeration) - if self.n_iter < 200: - raise ValueError("n_iter should be at least 200") + if self.max_iter < 200: + raise ValueError("max_iter should be at least 200") if self.metric == "precomputed": if isinstance(self.init, string_types) and self.init == 'pca': @@ -806,7 +809,7 @@ def _tsne(self, P, degrees_of_freedom, n_samples, random_state, self.n_components) params = X_embedded.ravel() - opt_args = {"n_iter": 50, "momentum": 0.5, "it": 0, + opt_args = {"max_iter": 50, "momentum": 0.5, "it": 0, "learning_rate": self.learning_rate, "n_iter_without_progress": self.n_iter_without_progress, "verbose": self.verbose, "n_iter_check": 25, @@ -840,7 +843,7 @@ def _tsne(self, P, degrees_of_freedom, n_samples, random_state, params, kl_divergence, it = _gradient_descent(obj_func, params, **opt_args) - opt_args['n_iter'] = 100 + opt_args['max_iter'] = 100 opt_args['momentum'] = 0.8 opt_args['it'] = it + 1 params, kl_divergence, it = _gradient_descent(obj_func, params, @@ -853,7 +856,7 @@ def _tsne(self, P, degrees_of_freedom, n_samples, random_state, # Final optimization P /= self.early_exaggeration - opt_args['n_iter'] = self.n_iter + opt_args['max_iter'] = self.max_iter opt_args['it'] = it + 1 params, error, it = _gradient_descent(obj_func, params, **opt_args) diff --git a/sklearn/neural_network/rbm.py b/sklearn/neural_network/rbm.py index bdbc04e6be84b..3d06089f93bfa 100644 --- a/sklearn/neural_network/rbm.py +++ b/sklearn/neural_network/rbm.py @@ -17,6 +17,7 @@ from ..externals.six.moves import xrange from ..utils import check_array from ..utils import check_random_state +from ..utils import deprecated from ..utils import gen_even_slices from ..utils import issparse from ..utils.extmath import safe_sparse_dot @@ -51,7 +52,7 @@ class BernoulliRBM(BaseEstimator, TransformerMixin): batch_size : int, optional Number of examples per minibatch. - n_iter : int, optional + max_iter : int, optional Number of iterations/sweeps over the training dataset to perform during training. @@ -83,7 +84,7 @@ class BernoulliRBM(BaseEstimator, TransformerMixin): >>> X = np.array([[0, 0, 0], [0, 1, 1], [1, 0, 1], [1, 1, 1]]) >>> model = BernoulliRBM(n_components=2) >>> model.fit(X) - BernoulliRBM(batch_size=10, learning_rate=0.1, n_components=2, n_iter=10, + BernoulliRBM(batch_size=10, learning_rate=0.1, n_components=2, max_iter=10, random_state=None, verbose=0) References @@ -97,12 +98,15 @@ class BernoulliRBM(BaseEstimator, TransformerMixin): Approximations to the Likelihood Gradient. International Conference on Machine Learning (ICML) 2008 """ + + @property + @deprecated("Attribute n_iter was deprecated. Use 'max_iter' instead") def __init__(self, n_components=256, learning_rate=0.1, batch_size=10, - n_iter=10, verbose=0, random_state=None): + max_iter=10, verbose=0, random_state=None): self.n_components = n_components self.learning_rate = learning_rate self.batch_size = batch_size - self.n_iter = n_iter + self.max_iter = max_iter self.verbose = verbose self.random_state = random_state @@ -350,7 +354,7 @@ def fit(self, X, y=None): n_batches, n_samples)) verbose = self.verbose begin = time.time() - for iteration in xrange(1, self.n_iter + 1): + for iteration in xrange(1, self.max_iter + 1): for batch_slice in batch_slices: self._fit(X[batch_slice], rng) From 0d19b05c3a883424c02691ade441b5de0c6ca42d Mon Sep 17 00:00:00 2001 From: Arnaud Benard Date: Fri, 28 Oct 2016 10:52:35 +0100 Subject: [PATCH 2/2] added warnings in the models and fixed tests --- sklearn/linear_model/bayes.py | 24 ++++++++++++++---------- sklearn/manifold/t_sne.py | 8 +++++--- sklearn/manifold/tests/test_t_sne.py | 18 +++++++++--------- sklearn/neural_network/rbm.py | 6 ++++-- sklearn/neural_network/tests/test_rbm.py | 18 +++++++++--------- 5 files changed, 41 insertions(+), 33 deletions(-) diff --git a/sklearn/linear_model/bayes.py b/sklearn/linear_model/bayes.py index d17f51f3ae0cd..970d38df1d0b2 100644 --- a/sklearn/linear_model/bayes.py +++ b/sklearn/linear_model/bayes.py @@ -9,6 +9,7 @@ from math import log import numpy as np from scipy import linalg +import warnings from .base import LinearModel from ..base import RegressorMixin @@ -112,12 +113,13 @@ class BayesianRidge(LinearModel, RegressorMixin): See examples/linear_model/plot_bayesian_ridge.py for an example. """ - @property @deprecated("Attribute n_iter was deprecated. Use 'max_iter' instead") - def __init__(self, max_iter=300, tol=1.e-3, alpha_1=1.e-6, alpha_2=1.e-6, - lambda_1=1.e-6, lambda_2=1.e-6, compute_score=False, - fit_intercept=True, normalize=False, copy_X=True, - verbose=False): + def __init__(self, n_iter=None, max_iter=300, tol=1.e-3, alpha_1=1.e-6, + alpha_2=1.e-6, lambda_1=1.e-6, lambda_2=1.e-6, + compute_score=False, fit_intercept=True, normalize=False, + copy_X=True, verbose=False): + if n_iter is not None: + warnings.warn("'n_iter' was deprecated. Use 'max_iter' instead.") self.max_iter = max_iter self.tol = tol self.alpha_1 = alpha_1 @@ -328,12 +330,14 @@ class ARDRegression(LinearModel, RegressorMixin): See examples/linear_model/plot_ard.py for an example. """ - @property @deprecated("Attribute n_iter was deprecated. Use 'max_iter' instead") - def __init__(self, max_iter=300, tol=1.e-3, alpha_1=1.e-6, alpha_2=1.e-6, - lambda_1=1.e-6, lambda_2=1.e-6, compute_score=False, - threshold_lambda=1.e+4, fit_intercept=True, normalize=False, - copy_X=True, verbose=False): + def __init__(self, n_iter=None, max_iter=300, tol=1.e-3, alpha_1=1.e-6, + alpha_2=1.e-6, lambda_1=1.e-6, lambda_2=1.e-6, + compute_score=False, threshold_lambda=1.e+4, + fit_intercept=True, normalize=False, copy_X=True, + verbose=False): + if n_iter is not None: + warnings.warn("'n_iter' was deprecated. Use 'max_iter' instead.") self.max_iter = max_iter self.tol = tol self.fit_intercept = fit_intercept diff --git a/sklearn/manifold/t_sne.py b/sklearn/manifold/t_sne.py index 64b95e580173a..5933bfab2a45a 100644 --- a/sklearn/manifold/t_sne.py +++ b/sklearn/manifold/t_sne.py @@ -13,6 +13,7 @@ import scipy.sparse as sp from scipy.spatial.distance import pdist from scipy.spatial.distance import squareform +import warnings from ..neighbors import BallTree from ..base import BaseEstimator from ..utils import check_array @@ -645,11 +646,10 @@ class TSNE(BaseEstimator): http://lvdmaaten.github.io/publications/papers/JMLR_2014.pdf """ - @property @deprecated("Attribute n_iter was deprecated. Use 'max_iter' instead") def __init__(self, n_components=2, perplexity=30.0, - early_exaggeration=4.0, learning_rate=1000.0, max_iter=1000, - n_iter_without_progress=30, min_grad_norm=1e-7, + early_exaggeration=4.0, learning_rate=1000.0, n_iter=None, + max_iter=1000, n_iter_without_progress=30, min_grad_norm=1e-7, metric="euclidean", init="random", verbose=0, random_state=None, method='barnes_hut', angle=0.5): if not ((isinstance(init, string_types) and @@ -657,6 +657,8 @@ def __init__(self, n_components=2, perplexity=30.0, isinstance(init, np.ndarray)): msg = "'init' must be 'pca', 'random', or a numpy array" raise ValueError(msg) + if n_iter is not None: + warnings.warn("'n_iter' was deprecated. Use 'max_iter' instead.") self.n_components = n_components self.perplexity = perplexity self.early_exaggeration = early_exaggeration diff --git a/sklearn/manifold/tests/test_t_sne.py b/sklearn/manifold/tests/test_t_sne.py index 3be02f359c167..1ecb86e791506 100644 --- a/sklearn/manifold/tests/test_t_sne.py +++ b/sklearn/manifold/tests/test_t_sne.py @@ -47,7 +47,7 @@ def flat_function(_): sys.stdout = StringIO() try: _, error, it = _gradient_descent( - ObjectiveSmallGradient(), np.zeros(1), 0, n_iter=100, + ObjectiveSmallGradient(), np.zeros(1), 0, max_iter=100, n_iter_without_progress=100, momentum=0.0, learning_rate=0.0, min_gain=0.0, min_grad_norm=1e-5, min_error_diff=0.0, verbose=2) finally: @@ -63,7 +63,7 @@ def flat_function(_): sys.stdout = StringIO() try: _, error, it = _gradient_descent( - ObjectiveSmallGradient(), np.zeros(1), 0, n_iter=100, + ObjectiveSmallGradient(), np.zeros(1), 0, max_iter=100, n_iter_without_progress=100, momentum=0.0, learning_rate=0.0, min_gain=0.0, min_grad_norm=0.0, min_error_diff=0.2, verbose=2) finally: @@ -79,7 +79,7 @@ def flat_function(_): sys.stdout = StringIO() try: _, error, it = _gradient_descent( - flat_function, np.zeros(1), 0, n_iter=100, + flat_function, np.zeros(1), 0, max_iter=100, n_iter_without_progress=10, momentum=0.0, learning_rate=0.0, min_gain=0.0, min_grad_norm=0.0, min_error_diff=-1.0, verbose=2) finally: @@ -95,7 +95,7 @@ def flat_function(_): sys.stdout = StringIO() try: _, error, it = _gradient_descent( - ObjectiveSmallGradient(), np.zeros(1), 0, n_iter=11, + ObjectiveSmallGradient(), np.zeros(1), 0, max_iter=11, n_iter_without_progress=100, momentum=0.0, learning_rate=0.0, min_gain=0.0, min_grad_norm=0.0, min_error_diff=0.0, verbose=2) finally: @@ -254,9 +254,9 @@ def test_optimization_minimizes_kl_divergence(): random_state = check_random_state(0) X, _ = make_blobs(n_features=3, random_state=random_state) kl_divergences = [] - for n_iter in [200, 250, 300]: + for max_iter in [200, 250, 300]: tsne = TSNE(n_components=2, perplexity=10, learning_rate=100.0, - n_iter=n_iter, random_state=0) + max_iter=max_iter, random_state=0) tsne.fit_transform(X) kl_divergences.append(tsne.kl_divergence_) assert_less_equal(kl_divergences[1], kl_divergences[0]) @@ -297,8 +297,8 @@ def test_early_exaggeration_too_small(): def test_too_few_iterations(): # Number of gradient descent iterations must be at least 200. - tsne = TSNE(n_iter=199) - assert_raises_regexp(ValueError, "n_iter .*", tsne.fit_transform, + tsne = TSNE(max_iter=199) + assert_raises_regexp(ValueError, "max_iter .*", tsne.fit_transform, np.array([[0.0]])) @@ -469,7 +469,7 @@ def test_no_sparse_on_barnes_hut(): X = random_state.randn(100, 2) X[(np.random.randint(0, 100, 50), np.random.randint(0, 2, 50))] = 0.0 X_csr = sp.csr_matrix(X) - tsne = TSNE(n_iter=199, method='barnes_hut') + tsne = TSNE(max_iter=199, method='barnes_hut') assert_raises_regexp(TypeError, "A sparse matrix was.*", tsne.fit_transform, X_csr) diff --git a/sklearn/neural_network/rbm.py b/sklearn/neural_network/rbm.py index 3d06089f93bfa..7cdde373d242a 100644 --- a/sklearn/neural_network/rbm.py +++ b/sklearn/neural_network/rbm.py @@ -11,6 +11,7 @@ import numpy as np import scipy.sparse as sp +import warnings from ..base import BaseEstimator from ..base import TransformerMixin @@ -99,10 +100,11 @@ class BernoulliRBM(BaseEstimator, TransformerMixin): on Machine Learning (ICML) 2008 """ - @property @deprecated("Attribute n_iter was deprecated. Use 'max_iter' instead") def __init__(self, n_components=256, learning_rate=0.1, batch_size=10, - max_iter=10, verbose=0, random_state=None): + n_iter=None, max_iter=10, verbose=0, random_state=None): + if n_iter is not None: + warnings.warn("'n_iter' was deprecated. Use 'max_iter' instead.") self.n_components = n_components self.learning_rate = learning_rate self.batch_size = batch_size diff --git a/sklearn/neural_network/tests/test_rbm.py b/sklearn/neural_network/tests/test_rbm.py index bf171b7fd2555..5922ccf864ee2 100644 --- a/sklearn/neural_network/tests/test_rbm.py +++ b/sklearn/neural_network/tests/test_rbm.py @@ -21,7 +21,7 @@ def test_fit(): X = Xdigits.copy() rbm = BernoulliRBM(n_components=64, learning_rate=0.1, - batch_size=10, n_iter=7, random_state=9) + batch_size=10, max_iter=7, random_state=9) rbm.fit(X) assert_almost_equal(rbm.score_samples(X).mean(), -21., decimal=0) @@ -49,7 +49,7 @@ def test_partial_fit(): def test_transform(): X = Xdigits[:100] rbm1 = BernoulliRBM(n_components=16, batch_size=5, - n_iter=5, random_state=42) + max_iter=5, random_state=42) rbm1.fit(X) Xt1 = rbm1.transform(X) @@ -86,7 +86,7 @@ def test_sample_hiddens(): rng = np.random.RandomState(0) X = Xdigits[:100] rbm1 = BernoulliRBM(n_components=2, batch_size=5, - n_iter=5, random_state=42) + max_iter=5, random_state=42) rbm1.fit(X) h = rbm1._mean_hiddens(X[0]) @@ -101,7 +101,7 @@ def test_fit_gibbs(): rng = np.random.RandomState(42) X = np.array([[0.], [1.]]) rbm1 = BernoulliRBM(n_components=2, batch_size=2, - n_iter=42, random_state=rng) + max_iter=42, random_state=rng) # you need that much iters rbm1.fit(X) assert_almost_equal(rbm1.components_, @@ -118,7 +118,7 @@ def test_fit_gibbs_sparse(): from scipy.sparse import csc_matrix X = csc_matrix([[0.], [1.]]) rbm2 = BernoulliRBM(n_components=2, batch_size=2, - n_iter=42, random_state=rng) + max_iter=42, random_state=rng) rbm2.fit(X) assert_almost_equal(rbm2.components_, np.array([[0.02649814], [0.02009084]]), decimal=4) @@ -131,7 +131,7 @@ def test_gibbs_smoke(): # Also check that sampling again will yield different results. X = Xdigits rbm1 = BernoulliRBM(n_components=42, batch_size=40, - n_iter=20, random_state=42) + max_iter=20, random_state=42) rbm1.fit(X) X_sampled = rbm1.gibbs(X) assert_all_finite(X_sampled) @@ -146,7 +146,7 @@ def test_score_samples(): rng = np.random.RandomState(42) X = np.vstack([np.zeros(1000), np.ones(1000)]) rbm1 = BernoulliRBM(n_components=10, batch_size=2, - n_iter=10, random_state=rng) + max_iter=10, random_state=rng) rbm1.fit(X) assert_true((rbm1.score_samples(X) < -300).all()) @@ -165,7 +165,7 @@ def test_score_samples(): def test_rbm_verbose(): - rbm = BernoulliRBM(n_iter=2, verbose=10) + rbm = BernoulliRBM(max_iter=2, verbose=10) old_stdout = sys.stdout sys.stdout = StringIO() try: @@ -180,7 +180,7 @@ def test_sparse_and_verbose(): sys.stdout = StringIO() from scipy.sparse import csc_matrix X = csc_matrix([[0.], [1.]]) - rbm = BernoulliRBM(n_components=2, batch_size=2, n_iter=1, + rbm = BernoulliRBM(n_components=2, batch_size=2, max_iter=1, random_state=42, verbose=True) try: rbm.fit(X)