Skip to content

Commit fdce26a

Browse files
authored
DOC Ensures that SelfTrainingClassifier passes numpydoc validation (#21277)
* Remove SelfTrainingClassifier from DOCSTRING_IGNORE_LIST * Fix numpydocs from SelfTrainingClassifier * Change docstrings to maintain consistency
1 parent 8ad7c3f commit fdce26a

File tree

2 files changed

+30
-26
lines changed

2 files changed

+30
-26
lines changed

maint_tools/test_docstrings.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@
2323
"PassiveAggressiveClassifier",
2424
"PassiveAggressiveRegressor",
2525
"QuadraticDiscriminantAnalysis",
26-
"SelfTrainingClassifier",
2726
"SparseRandomProjection",
2827
"SpectralBiclustering",
2928
"SpectralCoclustering",

sklearn/semi_supervised/_self_training.py

Lines changed: 30 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -37,30 +37,30 @@ class SelfTrainingClassifier(MetaEstimatorMixin, BaseEstimator):
3737
Parameters
3838
----------
3939
base_estimator : estimator object
40-
An estimator object implementing ``fit`` and ``predict_proba``.
41-
Invoking the ``fit`` method will fit a clone of the passed estimator,
42-
which will be stored in the ``base_estimator_`` attribute.
40+
An estimator object implementing `fit` and `predict_proba`.
41+
Invoking the `fit` method will fit a clone of the passed estimator,
42+
which will be stored in the `base_estimator_` attribute.
4343
4444
threshold : float, default=0.75
4545
The decision threshold for use with `criterion='threshold'`.
46-
Should be in [0, 1). When using the 'threshold' criterion, a
46+
Should be in [0, 1). When using the `'threshold'` criterion, a
4747
:ref:`well calibrated classifier <calibration>` should be used.
4848
4949
criterion : {'threshold', 'k_best'}, default='threshold'
5050
The selection criterion used to select which labels to add to the
51-
training set. If 'threshold', pseudo-labels with prediction
52-
probabilities above `threshold` are added to the dataset. If 'k_best',
51+
training set. If `'threshold'`, pseudo-labels with prediction
52+
probabilities above `threshold` are added to the dataset. If `'k_best'`,
5353
the `k_best` pseudo-labels with highest prediction probabilities are
5454
added to the dataset. When using the 'threshold' criterion, a
5555
:ref:`well calibrated classifier <calibration>` should be used.
5656
5757
k_best : int, default=10
5858
The amount of samples to add in each iteration. Only used when
59-
`criterion` is k_best'.
59+
`criterion='k_best'`.
6060
6161
max_iter : int or None, default=10
6262
Maximum number of iterations allowed. Should be greater than or equal
63-
to 0. If it is ``None``, the classifier will continue to predict labels
63+
to 0. If it is `None`, the classifier will continue to predict labels
6464
until no new pseudo-labels are added, or all unlabeled samples have
6565
been labeled.
6666
@@ -74,7 +74,7 @@ class SelfTrainingClassifier(MetaEstimatorMixin, BaseEstimator):
7474
7575
classes_ : ndarray or list of ndarray of shape (n_classes,)
7676
Class labels for each output. (Taken from the trained
77-
``base_estimator_``).
77+
`base_estimator_`).
7878
7979
transduction_ : ndarray of shape (n_samples,)
8080
The labels used for the final fit of the classifier, including
@@ -104,11 +104,24 @@ class SelfTrainingClassifier(MetaEstimatorMixin, BaseEstimator):
104104
termination_condition_ : {'max_iter', 'no_change', 'all_labeled'}
105105
The reason that fitting was stopped.
106106
107-
- 'max_iter': `n_iter_` reached `max_iter`.
108-
- 'no_change': no new labels were predicted.
109-
- 'all_labeled': all unlabeled samples were labeled before `max_iter`
107+
- `'max_iter'`: `n_iter_` reached `max_iter`.
108+
- `'no_change'`: no new labels were predicted.
109+
- `'all_labeled'`: all unlabeled samples were labeled before `max_iter`
110110
was reached.
111111
112+
See Also
113+
--------
114+
LabelPropagation : Label propagation classifier.
115+
LabelSpreading : Label spreading model for semi-supervised learning.
116+
117+
References
118+
----------
119+
David Yarowsky. 1995. Unsupervised word sense disambiguation rivaling
120+
supervised methods. In Proceedings of the 33rd annual meeting on
121+
Association for Computational Linguistics (ACL '95). Association for
122+
Computational Linguistics, Stroudsburg, PA, USA, 189-196. DOI:
123+
https://doi.org/10.3115/981658.981684
124+
112125
Examples
113126
--------
114127
>>> import numpy as np
@@ -123,14 +136,6 @@ class SelfTrainingClassifier(MetaEstimatorMixin, BaseEstimator):
123136
>>> self_training_model = SelfTrainingClassifier(svc)
124137
>>> self_training_model.fit(iris.data, iris.target)
125138
SelfTrainingClassifier(...)
126-
127-
References
128-
----------
129-
David Yarowsky. 1995. Unsupervised word sense disambiguation rivaling
130-
supervised methods. In Proceedings of the 33rd annual meeting on
131-
Association for Computational Linguistics (ACL '95). Association for
132-
Computational Linguistics, Stroudsburg, PA, USA, 189-196. DOI:
133-
https://doi.org/10.3115/981658.981684
134139
"""
135140

136141
_estimator_type = "classifier"
@@ -153,7 +158,7 @@ def __init__(
153158

154159
def fit(self, X, y):
155160
"""
156-
Fits this ``SelfTrainingClassifier`` to a dataset.
161+
Fit self-training classifier using `X`, `y` as training data.
157162
158163
Parameters
159164
----------
@@ -167,7 +172,7 @@ def fit(self, X, y):
167172
Returns
168173
-------
169174
self : object
170-
Returns an instance of self.
175+
Fitted estimator.
171176
"""
172177
# we need row slicing support for sparce matrices, but costly finiteness check
173178
# can be delegated to the base estimator.
@@ -281,7 +286,7 @@ def fit(self, X, y):
281286

282287
@if_delegate_has_method(delegate="base_estimator")
283288
def predict(self, X):
284-
"""Predict the classes of X.
289+
"""Predict the classes of `X`.
285290
286291
Parameters
287292
----------
@@ -326,7 +331,7 @@ def predict_proba(self, X):
326331

327332
@if_delegate_has_method(delegate="base_estimator")
328333
def decision_function(self, X):
329-
"""Calls decision function of the `base_estimator`.
334+
"""Call decision function of the `base_estimator`.
330335
331336
Parameters
332337
----------
@@ -372,7 +377,7 @@ def predict_log_proba(self, X):
372377

373378
@if_delegate_has_method(delegate="base_estimator")
374379
def score(self, X, y):
375-
"""Calls score on the `base_estimator`.
380+
"""Call score on the `base_estimator`.
376381
377382
Parameters
378383
----------

0 commit comments

Comments
 (0)