From e75b16b3fc47a5b4e306e5b2bdb6a60f57b8b766 Mon Sep 17 00:00:00 2001
From: Jenny Vo-Phamhi <jennyvophamhi@gmail.com>
Date: Sat, 2 Oct 2021 10:50:21 -0700
Subject: [PATCH 1/2] Address all but one numpydoc validation test

---
 maint_tools/test_docstrings.py              |  2 +-
 sklearn/linear_model/_passive_aggressive.py | 73 +++++++++++----------
 2 files changed, 39 insertions(+), 36 deletions(-)

diff --git a/maint_tools/test_docstrings.py b/maint_tools/test_docstrings.py
index d4c30f327e73e..09da4d864e615 100644
--- a/maint_tools/test_docstrings.py
+++ b/maint_tools/test_docstrings.py
@@ -20,7 +20,7 @@
     "MultiTaskLassoCV",
     "OrthogonalMatchingPursuit",
     "OrthogonalMatchingPursuitCV",
-    "PassiveAggressiveClassifier",
+    # "PassiveAggressiveClassifier",
     "PassiveAggressiveRegressor",
     "PatchExtractor",
     "PolynomialFeatures",
diff --git a/sklearn/linear_model/_passive_aggressive.py b/sklearn/linear_model/_passive_aggressive.py
index 90e21087ce344..610b1872549bd 100644
--- a/sklearn/linear_model/_passive_aggressive.py
+++ b/sklearn/linear_model/_passive_aggressive.py
@@ -13,7 +13,6 @@ class PassiveAggressiveClassifier(BaseSGDClassifier):
 
     Parameters
     ----------
-
     C : float, default=1.0
         Maximum step size (regularization). Defaults to 1.0.
 
@@ -58,10 +57,10 @@ class PassiveAggressiveClassifier(BaseSGDClassifier):
     shuffle : bool, default=True
         Whether or not the training data should be shuffled after each epoch.
 
-    verbose : integer, default=0
-        The verbosity level
+    verbose : int, default=0
+        The verbosity level.
 
-    loss : string, default="hinge"
+    loss : str, default="hinge"
         The loss function to be used:
         hinge: equivalent to PA-I in the reference paper.
         squared_hinge: equivalent to PA-II in the reference paper.
@@ -97,7 +96,7 @@ class PassiveAggressiveClassifier(BaseSGDClassifier):
 
         The "balanced" mode uses the values of y to automatically adjust
         weights inversely proportional to class frequencies in the input data
-        as ``n_samples / (n_classes * np.bincount(y))``
+        as ``n_samples / (n_classes * np.bincount(y))``.
 
         .. versionadded:: 0.17
            parameter *class_weight* to automatically weight samples.
@@ -109,7 +108,7 @@ class PassiveAggressiveClassifier(BaseSGDClassifier):
         average. So average=10 will begin averaging after seeing 10 samples.
 
         .. versionadded:: 0.19
-           parameter *average* to use weights averaging in SGD
+           parameter *average* to use weights averaging in SGD.
 
     Attributes
     ----------
@@ -145,6 +144,17 @@ class PassiveAggressiveClassifier(BaseSGDClassifier):
     loss_function_ : callable
         Loss function used by the algorithm.
 
+    See Also
+    --------
+    SGDClassifier : Incrementally trained logistic regression.
+    Perceptron : Linear perceptron classifier.
+
+    References
+    ----------
+    Online Passive-Aggressive Algorithms
+    <http://jmlr.csail.mit.edu/papers/volume7/crammer06a/crammer06a.pdf>
+    K. Crammer, O. Dekel, J. Keshat, S. Shalev-Shwartz, Y. Singer - JMLR (2006)
+
     Examples
     --------
     >>> from sklearn.linear_model import PassiveAggressiveClassifier
@@ -161,18 +171,6 @@ class PassiveAggressiveClassifier(BaseSGDClassifier):
     [1.84127814]
     >>> print(clf.predict([[0, 0, 0, 0]]))
     [1]
-
-    See Also
-    --------
-    SGDClassifier
-    Perceptron
-
-    References
-    ----------
-    Online Passive-Aggressive Algorithms
-    <http://jmlr.csail.mit.edu/papers/volume7/crammer06a/crammer06a.pdf>
-    K. Crammer, O. Dekel, J. Keshat, S. Shalev-Shwartz, Y. Singer - JMLR (2006)
-
     """
 
     def __init__(
@@ -221,10 +219,10 @@ def partial_fit(self, X, y, classes=None):
         Parameters
         ----------
         X : {array-like, sparse matrix} of shape (n_samples, n_features)
-            Subset of the training data
+            Subset of the training data.
 
         y : numpy array of shape [n_samples]
-            Subset of the target values
+            Subset of the target values.
 
         classes : array, shape = [n_classes]
             Classes across all calls to partial_fit.
@@ -236,7 +234,8 @@ def partial_fit(self, X, y, classes=None):
 
         Returns
         -------
-        self : returns an instance of self.
+        self : object
+            Fitted estimator.
         """
         self._validate_params(for_partial_fit=True)
         if self.class_weight == "balanced":
@@ -272,10 +271,10 @@ def fit(self, X, y, coef_init=None, intercept_init=None):
         Parameters
         ----------
         X : {array-like, sparse matrix} of shape (n_samples, n_features)
-            Training data
+            Training data.
 
         y : numpy array of shape [n_samples]
-            Target values
+            Target values.
 
         coef_init : array, shape = [n_classes,n_features]
             The initial coefficients to warm-start the optimization.
@@ -285,7 +284,8 @@ def fit(self, X, y, coef_init=None, intercept_init=None):
 
         Returns
         -------
-        self : returns an instance of self.
+        self : object
+            Fitted estimator.
         """
         self._validate_params()
         lr = "pa1" if self.loss == "hinge" else "pa2"
@@ -354,9 +354,9 @@ class PassiveAggressiveRegressor(BaseSGDRegressor):
         Whether or not the training data should be shuffled after each epoch.
 
     verbose : integer, default=0
-        The verbosity level
+        The verbosity level.
 
-    loss : string, default="epsilon_insensitive"
+    loss : str, default="epsilon_insensitive"
         The loss function to be used:
         epsilon_insensitive: equivalent to PA-I in the reference paper.
         squared_epsilon_insensitive: equivalent to PA-II in the reference
@@ -388,7 +388,7 @@ class PassiveAggressiveRegressor(BaseSGDRegressor):
         average. So average=10 will begin averaging after seeing 10 samples.
 
         .. versionadded:: 0.19
-           parameter *average* to use weights averaging in SGD
+           parameter *average* to use weights averaging in SGD.
 
     Attributes
     ----------
@@ -436,13 +436,14 @@ class PassiveAggressiveRegressor(BaseSGDRegressor):
 
     See Also
     --------
-    SGDRegressor
+    SGDRegressor : Linear model fitted by minimizing a regularized
+        empirical loss with SGD.
 
     References
     ----------
     Online Passive-Aggressive Algorithms
     <http://jmlr.csail.mit.edu/papers/volume7/crammer06a/crammer06a.pdf>
-    K. Crammer, O. Dekel, J. Keshat, S. Shalev-Shwartz, Y. Singer - JMLR (2006)
+    K. Crammer, O. Dekel, J. Keshat, S. Shalev-Shwartz, Y. Singer - JMLR (2006).
 
     """
 
@@ -490,14 +491,15 @@ def partial_fit(self, X, y):
         Parameters
         ----------
         X : {array-like, sparse matrix} of shape (n_samples, n_features)
-            Subset of training data
+            Subset of training data.
 
         y : numpy array of shape [n_samples]
-            Subset of target values
+            Subset of target values.
 
         Returns
         -------
-        self : returns an instance of self.
+        self : object
+            Fitted estimator.
         """
         self._validate_params(for_partial_fit=True)
         lr = "pa1" if self.loss == "epsilon_insensitive" else "pa2"
@@ -520,10 +522,10 @@ def fit(self, X, y, coef_init=None, intercept_init=None):
         Parameters
         ----------
         X : {array-like, sparse matrix} of shape (n_samples, n_features)
-            Training data
+            Training data.
 
         y : numpy array of shape [n_samples]
-            Target values
+            Target values.
 
         coef_init : array, shape = [n_features]
             The initial coefficients to warm-start the optimization.
@@ -533,7 +535,8 @@ def fit(self, X, y, coef_init=None, intercept_init=None):
 
         Returns
         -------
-        self : returns an instance of self.
+        self : object
+            Fitted estimator.
         """
         self._validate_params()
         lr = "pa1" if self.loss == "epsilon_insensitive" else "pa2"

From a268b33f8873b27cf8db92d25e194399780e1bdc Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Tue, 12 Oct 2021 22:06:25 +0200
Subject: [PATCH 2/2] solve remaining issues

---
 maint_tools/test_docstrings.py              |  1 -
 sklearn/linear_model/_passive_aggressive.py | 21 ++++++++++-----------
 2 files changed, 10 insertions(+), 12 deletions(-)

diff --git a/maint_tools/test_docstrings.py b/maint_tools/test_docstrings.py
index 09da4d864e615..2cc09ff0b95e0 100644
--- a/maint_tools/test_docstrings.py
+++ b/maint_tools/test_docstrings.py
@@ -20,7 +20,6 @@
     "MultiTaskLassoCV",
     "OrthogonalMatchingPursuit",
     "OrthogonalMatchingPursuitCV",
-    # "PassiveAggressiveClassifier",
     "PassiveAggressiveRegressor",
     "PatchExtractor",
     "PolynomialFeatures",
diff --git a/sklearn/linear_model/_passive_aggressive.py b/sklearn/linear_model/_passive_aggressive.py
index 610b1872549bd..fc5286c235a70 100644
--- a/sklearn/linear_model/_passive_aggressive.py
+++ b/sklearn/linear_model/_passive_aggressive.py
@@ -7,7 +7,7 @@
 
 
 class PassiveAggressiveClassifier(BaseSGDClassifier):
-    """Passive Aggressive Classifier
+    """Passive Aggressive Classifier.
 
     Read more in the :ref:`User Guide <passive_aggressive>`.
 
@@ -112,11 +112,11 @@ class PassiveAggressiveClassifier(BaseSGDClassifier):
 
     Attributes
     ----------
-    coef_ : array, shape = [1, n_features] if n_classes == 2 else [n_classes,\
-            n_features]
+    coef_ : ndarray of shape (1, n_features) if n_classes == 2 else \
+            (n_classes, n_features)
         Weights assigned to the features.
 
-    intercept_ : array, shape = [1] if n_classes == 2 else [n_classes]
+    intercept_ : ndarray of shape (1,) if n_classes == 2 else (n_classes,)
         Constants in decision function.
 
     n_features_in_ : int
@@ -134,7 +134,7 @@ class PassiveAggressiveClassifier(BaseSGDClassifier):
         The actual number of iterations to reach the stopping criterion.
         For multiclass fits, it is the maximum over every binary fit.
 
-    classes_ : array of shape (n_classes,)
+    classes_ : ndarray of shape (n_classes,)
         The unique classes labels.
 
     t_ : int
@@ -159,7 +159,6 @@ class PassiveAggressiveClassifier(BaseSGDClassifier):
     --------
     >>> from sklearn.linear_model import PassiveAggressiveClassifier
     >>> from sklearn.datasets import make_classification
-
     >>> X, y = make_classification(n_features=4, random_state=0)
     >>> clf = PassiveAggressiveClassifier(max_iter=1000, random_state=0,
     ... tol=1e-3)
@@ -221,10 +220,10 @@ def partial_fit(self, X, y, classes=None):
         X : {array-like, sparse matrix} of shape (n_samples, n_features)
             Subset of the training data.
 
-        y : numpy array of shape [n_samples]
+        y : array-like of shape (n_samples,)
             Subset of the target values.
 
-        classes : array, shape = [n_classes]
+        classes : ndarray of shape (n_classes,)
             Classes across all calls to partial_fit.
             Can be obtained by via `np.unique(y_all)`, where y_all is the
             target vector of the entire dataset.
@@ -273,13 +272,13 @@ def fit(self, X, y, coef_init=None, intercept_init=None):
         X : {array-like, sparse matrix} of shape (n_samples, n_features)
             Training data.
 
-        y : numpy array of shape [n_samples]
+        y : array-like of shape (n_samples,)
             Target values.
 
-        coef_init : array, shape = [n_classes,n_features]
+        coef_init : ndarray of shape (n_classes, n_features)
             The initial coefficients to warm-start the optimization.
 
-        intercept_init : array, shape = [n_classes]
+        intercept_init : ndarray of shape (n_classes,)
             The initial intercept to warm-start the optimization.
 
         Returns