Skip to content
Closed
7 changes: 4 additions & 3 deletions sklearn/decomposition/_factor_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,9 +147,6 @@ def __init__(self, n_components=None, *, tol=1e-2, copy=True,
self.copy = copy
self.tol = tol
self.max_iter = max_iter
if svd_method not in ['lapack', 'randomized']:
raise ValueError('SVD method %s is not supported. Please consider'
' the documentation' % svd_method)
self.svd_method = svd_method

self.noise_variance_init = noise_variance_init
Expand All @@ -170,6 +167,10 @@ def fit(self, X, y=None):
-------
self
"""
if self.svd_method not in ['lapack', 'randomized']:
raise ValueError('SVD method %s is not supported. Please consider'
' the documentation' % self.svd_method)

X = self._validate_data(X, copy=self.copy, dtype=np.float64)

n_samples, n_features = X.shape
Expand Down
6 changes: 3 additions & 3 deletions sklearn/decomposition/_fastica.py
Original file line number Diff line number Diff line change
Expand Up @@ -397,9 +397,6 @@ def __init__(self, n_components=None, *, algorithm='parallel', whiten=True,
fun='logcosh', fun_args=None, max_iter=200, tol=1e-4,
w_init=None, random_state=None):
super().__init__()
if max_iter < 1:
raise ValueError("max_iter should be greater than 1, got "
"(max_iter={})".format(max_iter))
self.n_components = n_components
self.algorithm = algorithm
self.whiten = whiten
Expand Down Expand Up @@ -427,6 +424,9 @@ def _fit(self, X, compute_sources=False):
-------
X_new : array-like, shape (n_samples, n_components)
"""
if self.max_iter < 1:
raise ValueError("max_iter should be greater than 1, got "
"(max_iter={})".format(self.max_iter))

X = self._validate_data(X, copy=self.whiten, dtype=FLOAT_DTYPES,
ensure_min_samples=2).T
Expand Down
7 changes: 4 additions & 3 deletions sklearn/decomposition/_kernel_pca.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,9 +145,6 @@ def __init__(self, n_components=None, *, kernel="linear",
alpha=1.0, fit_inverse_transform=False, eigen_solver='auto',
tol=0, max_iter=None, remove_zero_eig=False,
random_state=None, copy_X=True, n_jobs=None):
if fit_inverse_transform and kernel == 'precomputed':
raise ValueError(
"Cannot fit_inverse_transform with a precomputed kernel.")
self.n_components = n_components
self.kernel = kernel
self.kernel_params = kernel_params
Expand Down Expand Up @@ -275,6 +272,10 @@ def fit(self, X, y=None):
self : object
Returns the instance itself.
"""
if self.fit_inverse_transform and self.kernel == 'precomputed':
raise ValueError(
"Cannot fit_inverse_transform with a precomputed kernel.")

X = self._validate_data(X, accept_sparse='csr', copy=self.copy_X)
self._centerer = KernelCenterer()
K = self._get_kernel(X)
Expand Down
2 changes: 1 addition & 1 deletion sklearn/decomposition/tests/test_factor_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def test_factor_analysis():
X = np.dot(h, W) + noise

with pytest.raises(ValueError):
FactorAnalysis(svd_method='foo')
FactorAnalysis(svd_method='foo').fit(X)
fa_fail = FactorAnalysis()
fa_fail.svd_method = 'foo'
with pytest.raises(ValueError):
Expand Down
2 changes: 1 addition & 1 deletion sklearn/decomposition/tests/test_fastica.py
Original file line number Diff line number Diff line change
Expand Up @@ -271,7 +271,7 @@ def test_fastica_errors():
X = rng.random_sample((n_samples, n_features))
w_init = rng.randn(n_features + 1, n_features + 1)
with pytest.raises(ValueError, match='max_iter should be greater than 1'):
FastICA(max_iter=0)
FastICA(max_iter=0).fit(X)
with pytest.raises(ValueError, match=r'alpha must be in \[1,2\]'):
fastica(X, fun_args={'alpha': 0})
with pytest.raises(ValueError, match='w_init has invalid shape.+'
Expand Down
4 changes: 3 additions & 1 deletion sklearn/decomposition/tests/test_kernel_pca.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,10 @@ def histogram(x, y, **kwargs):


def test_kernel_pca_invalid_parameters():
state = np.random.RandomState(0)
X = state.rand(10, 10)
with pytest.raises(ValueError):
KernelPCA(10, fit_inverse_transform=True, kernel='precomputed')
KernelPCA(10, fit_inverse_transform=True, kernel='precomputed').fit(X)


def test_kernel_pca_consistent_transform():
Expand Down
3 changes: 1 addition & 2 deletions sklearn/feature_extraction/_hash.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,8 +92,6 @@ class FeatureHasher(TransformerMixin, BaseEstimator):
@_deprecate_positional_args
def __init__(self, n_features=(2 ** 20), *, input_type="dict",
dtype=np.float64, alternate_sign=True):
self._validate_params(n_features, input_type)

self.dtype = dtype
self.input_type = input_type
self.n_features = n_features
Expand Down Expand Up @@ -150,6 +148,7 @@ def transform(self, raw_X):
Feature matrix, for use with estimators or further transformers.

"""
self._validate_params(self.n_features, self.input_type)
raw_X = iter(raw_X)
if self.input_type == "dict":
raw_X = (_iteritems(d) for d in raw_X)
Expand Down
10 changes: 6 additions & 4 deletions sklearn/feature_extraction/tests/test_feature_hasher.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,14 +112,16 @@ def test_hash_empty_input():


def test_hasher_invalid_input():
raw_X = [[], (), iter(range(0))]

with pytest.raises(ValueError):
FeatureHasher(input_type="gobbledygook")
FeatureHasher(input_type="gobbledygook").transform(raw_X)
with pytest.raises(ValueError):
FeatureHasher(n_features=-1)
FeatureHasher(n_features=-1).transform(raw_X)
with pytest.raises(ValueError):
FeatureHasher(n_features=0)
FeatureHasher(n_features=0).transform(raw_X)
with pytest.raises(TypeError):
FeatureHasher(n_features='ham')
FeatureHasher(n_features='ham').transform(raw_X)

h = FeatureHasher(n_features=np.uint16(2 ** 6))
with pytest.raises(ValueError):
Expand Down
4 changes: 4 additions & 0 deletions sklearn/feature_extraction/tests/test_text.py
Original file line number Diff line number Diff line change
Expand Up @@ -512,12 +512,16 @@ def test_tfidf_vectorizer_setters():
tv = TfidfVectorizer(norm='l2', use_idf=False, smooth_idf=False,
sublinear_tf=False)
tv.norm = 'l1'
tv.fit(ALL_FOOD_DOCS)
assert tv._tfidf.norm == 'l1'
tv.use_idf = True
tv.fit(ALL_FOOD_DOCS)
assert tv._tfidf.use_idf
tv.smooth_idf = True
tv.fit(ALL_FOOD_DOCS)
assert tv._tfidf.smooth_idf
tv.sublinear_tf = True
tv.fit(ALL_FOOD_DOCS)
assert tv._tfidf.sublinear_tf


Expand Down
79 changes: 29 additions & 50 deletions sklearn/feature_extraction/text.py
Original file line number Diff line number Diff line change
Expand Up @@ -1014,15 +1014,7 @@ def __init__(self, *, input='content', encoding='utf-8',
self.stop_words = stop_words
self.max_df = max_df
self.min_df = min_df
if max_df < 0 or min_df < 0:
raise ValueError("negative value for max_df or min_df")
self.max_features = max_features
if max_features is not None:
if (not isinstance(max_features, numbers.Integral) or
max_features <= 0):
raise ValueError(
"max_features=%r, neither a positive integer nor None"
% max_features)
self.ngram_range = ngram_range
self.vocabulary = vocabulary
self.binary = binary
Expand Down Expand Up @@ -1178,6 +1170,15 @@ def fit_transform(self, raw_documents, y=None):
# We intentionally don't call the transform method to make
# fit_transform overridable without unwanted side effects in
# TfidfVectorizer.
if self.max_df < 0 or self.min_df < 0:
raise ValueError("negative value for max_df or min_df")
if self.max_features is not None:
if (not isinstance(self.max_features, numbers.Integral) or
self.max_features <= 0):
raise ValueError(
"max_features=%r, neither a positive integer nor None"
% self.max_features)

if isinstance(raw_documents, str):
raise ValueError(
"Iterable over raw text documents expected, "
Expand Down Expand Up @@ -1730,49 +1731,18 @@ def __init__(self, *, input='content', encoding='utf-8',
ngram_range=ngram_range, max_df=max_df, min_df=min_df,
max_features=max_features, vocabulary=vocabulary, binary=binary,
dtype=dtype)

self._tfidf = TfidfTransformer(norm=norm, use_idf=use_idf,
smooth_idf=smooth_idf,
sublinear_tf=sublinear_tf)

# Broadcast the TF-IDF parameters to the underlying transformer instance
# for easy grid search and repr

@property
def norm(self):
return self._tfidf.norm

@norm.setter
def norm(self, value):
self._tfidf.norm = value

@property
def use_idf(self):
return self._tfidf.use_idf

@use_idf.setter
def use_idf(self, value):
self._tfidf.use_idf = value

@property
def smooth_idf(self):
return self._tfidf.smooth_idf

@smooth_idf.setter
def smooth_idf(self, value):
self._tfidf.smooth_idf = value

@property
def sublinear_tf(self):
return self._tfidf.sublinear_tf

@sublinear_tf.setter
def sublinear_tf(self, value):
self._tfidf.sublinear_tf = value
self.norm = norm
self.use_idf = use_idf
self.smooth_idf = smooth_idf
self.sublinear_tf = sublinear_tf

@property
def idf_(self):
return self._tfidf.idf_
try:
check_is_fitted(self)
return self._tfidf.idf_
except NotFittedError:
raise AttributeError

@idf_.setter
def idf_(self, value):
Expand All @@ -1782,6 +1752,11 @@ def idf_(self, value):
raise ValueError("idf length = %d must be equal "
"to vocabulary size = %d" %
(len(value), len(self.vocabulary)))
if not hasattr(self, '_tfidf'):
self._tfidf = TfidfTransformer(norm=self.norm,
use_idf=self.use_idf,
smooth_idf=self.smooth_idf,
sublinear_tf=self.sublinear_tf)
self._tfidf.idf_ = value

def _check_params(self):
Expand Down Expand Up @@ -1809,7 +1784,9 @@ def fit(self, raw_documents, y=None):
self._check_params()
self._warn_for_unused_params()
X = super().fit_transform(raw_documents)
self._tfidf.fit(X)
self._tfidf = TfidfTransformer(norm=self.norm, use_idf=self.use_idf,
smooth_idf=self.smooth_idf,
sublinear_tf=self.sublinear_tf).fit(X)
return self

def fit_transform(self, raw_documents, y=None):
Expand All @@ -1832,7 +1809,9 @@ def fit_transform(self, raw_documents, y=None):
"""
self._check_params()
X = super().fit_transform(raw_documents)
self._tfidf.fit(X)
self._tfidf = TfidfTransformer(norm=self.norm, use_idf=self.use_idf,
smooth_idf=self.smooth_idf,
sublinear_tf=self.sublinear_tf).fit(X)
# X is already a transformed view of raw_documents so
# we set copy to False
return self._tfidf.transform(X, copy=False)
Expand Down
2 changes: 0 additions & 2 deletions sklearn/linear_model/_stochastic_gradient.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,6 @@ def __init__(self, loss, *, penalty='l2', alpha=0.0001, C=1.0,
self.tol = tol
# current tests expect init to do parameter validation
# but we are not allowed to set attributes
self._validate_params()

def set_params(self, **kwargs):
"""Set and validate the parameters of estimator.
Expand All @@ -114,7 +113,6 @@ def set_params(self, **kwargs):
Estimator instance.
"""
super().set_params(**kwargs)
self._validate_params()
return self

@abstractmethod
Expand Down
Loading