Skip to content

DOC Ensures that AdditiveChi2Sampler passes numpydoc validation #20536

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Jul 20, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion maint_tools/test_docstrings.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@

# List of modules ignored when checking for numpydoc validation.
DOCSTRING_IGNORE_LIST = [
"AdditiveChi2Sampler",
"AgglomerativeClustering",
"BernoulliRBM",
"Birch",
Expand Down
123 changes: 75 additions & 48 deletions sklearn/kernel_approximation.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,10 +57,10 @@ class PolynomialCountSketch(BaseEstimator, TransformerMixin):
will be approximated.

n_components : int, default=100
Dimensionality of the output feature space. Usually, n_components
Dimensionality of the output feature space. Usually, `n_components`
should be greater than the number of features in input samples in
order to achieve good performance. The optimal score / run time
balance is typically achieved around n_components = 10 * n_features,
balance is typically achieved around `n_components` = 10 * `n_features`,
but this depends on the specific dataset being used.

random_state : int, RandomState instance, default=None
Expand Down Expand Up @@ -116,8 +116,12 @@ def fit(self, X, y=None):
Parameters
----------
X : {array-like, sparse matrix} of shape (n_samples, n_features)
Training data, where n_samples in the number of samples
and n_features is the number of features.
Training data, where `n_samples` in the number of samples
and `n_features` is the number of features.

y : array-like of shape (n_samples,) or (n_samples, n_outputs), \
default=None
Target values (None for unsupervised transformations).

Returns
-------
Expand Down Expand Up @@ -147,12 +151,13 @@ def transform(self, X):
Parameters
----------
X : {array-like}, shape (n_samples, n_features)
New data, where n_samples in the number of samples
and n_features is the number of features.
New data, where `n_samples` in the number of samples
and `n_features` is the number of features.

Returns
-------
X_new : array-like, shape (n_samples, n_components)
Projected array.
"""

check_is_fitted(self)
Expand Down Expand Up @@ -215,7 +220,7 @@ class RBFSampler(TransformerMixin, BaseEstimator):
Parameters
----------
gamma : float, default=1.0
Parameter of RBF kernel: exp(-gamma * x^2)
Parameter of RBF kernel: exp(-gamma * x^2).

n_components : int, default=100
Number of Monte Carlo samples per original feature.
Expand Down Expand Up @@ -281,8 +286,12 @@ def fit(self, X, y=None):
Parameters
----------
X : {array-like, sparse matrix}, shape (n_samples, n_features)
Training data, where n_samples in the number of samples
and n_features is the number of features.
Training data, where `n_samples` in the number of samples
and `n_features` is the number of features.

y : array-like, shape (n_samples,) or (n_samples, n_outputs), \
default=None
Target values (None for unsupervised transformations).

Returns
-------
Expand All @@ -307,12 +316,13 @@ def transform(self, X):
Parameters
----------
X : {array-like, sparse matrix}, shape (n_samples, n_features)
New data, where n_samples in the number of samples
and n_features is the number of features.
New data, where `n_samples` in the number of samples
and `n_features` is the number of features.

Returns
-------
X_new : array-like, shape (n_samples, n_components)
Projected array.
"""
check_is_fitted(self)

Expand All @@ -336,7 +346,7 @@ class SkewedChi2Sampler(TransformerMixin, BaseEstimator):
"skewedness" parameter of the kernel. Needs to be cross-validated.

n_components : int, default=100
number of Monte Carlo samples per original feature.
Number of Monte Carlo samples per original feature.
Equals the dimensionality of the computed feature space.

random_state : int, RandomState instance or None, default=None
Expand Down Expand Up @@ -402,8 +412,12 @@ def fit(self, X, y=None):
Parameters
----------
X : array-like, shape (n_samples, n_features)
Training data, where n_samples in the number of samples
and n_features is the number of features.
Training data, where `n_samples` in the number of samples
and `n_features` is the number of features.

y : array-like, shape (n_samples,) or (n_samples, n_outputs), \
default=None
Target values (None for unsupervised transformations).

Returns
-------
Expand All @@ -426,13 +440,14 @@ def transform(self, X):
Parameters
----------
X : array-like, shape (n_samples, n_features)
New data, where n_samples in the number of samples
and n_features is the number of features. All values of X must be
New data, where `n_samples` is the number of samples
and `n_features` is the number of features. All values of X must be
strictly greater than "-skewedness".

Returns
-------
X_new : array-like, shape (n_samples, n_components)
Projected array.
"""
check_is_fitted(self)

Expand Down Expand Up @@ -471,39 +486,21 @@ class AdditiveChi2Sampler(TransformerMixin, BaseEstimator):
----------
sample_steps : int, default=2
Gives the number of (complex) sampling points.

sample_interval : float, default=None
Sampling interval. Must be specified when sample_steps not in {1,2,3}.

Attributes
----------
sample_interval_ : float
Stored sampling interval. Specified as a parameter if sample_steps not
in {1,2,3}.
Stored sampling interval. Specified as a parameter if `sample_steps`
not in {1,2,3}.

n_features_in_ : int
Number of features seen during :term:`fit`.

.. versionadded:: 0.24

Examples
--------
>>> from sklearn.datasets import load_digits
>>> from sklearn.linear_model import SGDClassifier
>>> from sklearn.kernel_approximation import AdditiveChi2Sampler
>>> X, y = load_digits(return_X_y=True)
>>> chi2sampler = AdditiveChi2Sampler(sample_steps=2)
>>> X_transformed = chi2sampler.fit_transform(X, y)
>>> clf = SGDClassifier(max_iter=5, random_state=0, tol=1e-3)
>>> clf.fit(X_transformed, y)
SGDClassifier(max_iter=5, random_state=0)
>>> clf.score(X_transformed, y)
0.9499...

Notes
-----
This estimator approximates a slightly different version of the additive
chi squared kernel then ``metric.additive_chi2`` computes.

See Also
--------
SkewedChi2Sampler : A Fourier-approximation to a non-additive variant of
Expand All @@ -514,26 +511,49 @@ class AdditiveChi2Sampler(TransformerMixin, BaseEstimator):
sklearn.metrics.pairwise.additive_chi2_kernel : The exact additive chi
squared kernel.

Notes
-----
This estimator approximates a slightly different version of the additive
chi squared kernel then ``metric.additive_chi2`` computes.

References
----------
See `"Efficient additive kernels via explicit feature maps"
<http://www.robots.ox.ac.uk/~vedaldi/assets/pubs/vedaldi11efficient.pdf>`_
A. Vedaldi and A. Zisserman, Pattern Analysis and Machine Intelligence,
2011

Examples
--------
>>> from sklearn.datasets import load_digits
>>> from sklearn.linear_model import SGDClassifier
>>> from sklearn.kernel_approximation import AdditiveChi2Sampler
>>> X, y = load_digits(return_X_y=True)
>>> chi2sampler = AdditiveChi2Sampler(sample_steps=2)
>>> X_transformed = chi2sampler.fit_transform(X, y)
>>> clf = SGDClassifier(max_iter=5, random_state=0, tol=1e-3)
>>> clf.fit(X_transformed, y)
SGDClassifier(max_iter=5, random_state=0)
>>> clf.score(X_transformed, y)
0.9499...
"""

def __init__(self, *, sample_steps=2, sample_interval=None):
self.sample_steps = sample_steps
self.sample_interval = sample_interval

def fit(self, X, y=None):
"""Set the parameters
"""Set the parameters.

Parameters
----------
X : array-like, shape (n_samples, n_features)
Training data, where n_samples in the number of samples
and n_features is the number of features.
Training data, where `n_samples` is the number of samples
and `n_features` is the number of features.

y : array-like, shape (n_samples,) or (n_samples, n_outputs), \
default=None
Target values (None for unsupervised transformations).

Returns
-------
Expand Down Expand Up @@ -565,13 +585,15 @@ def transform(self, X):

Parameters
----------
X : {array-like, sparse matrix} of shape (n_samples, n_features)
X : {array-like, sparse matrix}, shape (n_samples, n_features)
Training data, where `n_samples` in the number of samples
and `n_features` is the number of features.

Returns
-------
X_new : {ndarray, sparse matrix}, \
shape = (n_samples, n_features * (2*sample_steps + 1))
Whether the return value is an array of sparse matrix depends on
Whether the return value is an array or sparse matrix depends on
the type of the input X.
"""
msg = (
Expand Down Expand Up @@ -664,7 +686,7 @@ class Nystroem(TransformerMixin, BaseEstimator):
----------
kernel : string or callable, default='rbf'
Kernel map to be approximated. A callable should accept two arguments
and the keyword arguments passed to this object as kernel_params, and
and the keyword arguments passed to this object as `kernel_params`, and
should return a floating point number.

gamma : float, default=None
Expand All @@ -690,14 +712,14 @@ class Nystroem(TransformerMixin, BaseEstimator):

random_state : int, RandomState instance or None, default=None
Pseudo-random number generator to control the uniform sampling without
replacement of n_components of the training data to construct the basis
kernel.
replacement of `n_components` of the training data to construct the
basis kernel.
Pass an int for reproducible output across multiple function calls.
See :term:`Glossary <random_state>`.

n_jobs : int, default=None
The number of jobs to use for the computation. This works by breaking
down the kernel matrix into n_jobs even slices and computing them in
down the kernel matrix into `n_jobs` even slices and computing them in
parallel.

``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
Expand Down Expand Up @@ -789,8 +811,13 @@ def fit(self, X, y=None):

Parameters
----------
X : array-like of shape (n_samples, n_features)
Training data.
X : array-like, shape (n_samples, n_features)
Training data, where `n_samples` in the number of samples
and `n_features` is the number of features.

y : array-like, shape (n_samples,) or (n_samples, n_outputs), \
default=None
Target values (None for unsupervised transformations).
"""
X = self._validate_data(X, accept_sparse="csr")
rnd = check_random_state(self.random_state)
Expand Down