diff --git a/doc/whats_new/v1.0.rst b/doc/whats_new/v1.0.rst
index 1338606e3a096..3beb29f1f3eda 100644
--- a/doc/whats_new/v1.0.rst
+++ b/doc/whats_new/v1.0.rst
@@ -527,6 +527,18 @@ Changelog
   coordinate descent solver. Otherwise, an error will be raised.
   :pr:`19391` by :user:`Shao Yang Hong <hongshaoyang>`.
 
+- |API| Keyword validation has moved from `__init__` and `set_params` to `fit`
+  for the following estimators conforming to scikit-learn's conventions:
+  :class:`linear_model.SGDClassifier`,
+  :class:`linear_model.SparseSGDClassifier`,
+  :class:`linear_model.SGDRegressor`,
+  :class:`linear_model.SparseSGDRegressor`,
+  :class:`linear_model.SGDOneClassSVM`,
+  :class:`linear_model.SparseSGDOneClassSVM`,
+  :class:`linear_model.PassiveAggressiveClassifier`,
+  :class:`linear_model.PassiveAggressiveRegressor`.
+  :pr:`20683` by `Guillaume Lemaitre`_.
+
 :mod:`sklearn.manifold`
 .......................
 
diff --git a/sklearn/linear_model/_stochastic_gradient.py b/sklearn/linear_model/_stochastic_gradient.py
index 5be895abb1d19..a9466b3be19b6 100644
--- a/sklearn/linear_model/_stochastic_gradient.py
+++ b/sklearn/linear_model/_stochastic_gradient.py
@@ -121,26 +121,6 @@ def __init__(
         self.average = average
         self.max_iter = max_iter
         self.tol = tol
-        # current tests expect init to do parameter validation
-        # but we are not allowed to set attributes
-        self._validate_params()
-
-    def set_params(self, **kwargs):
-        """Set and validate the parameters of estimator.
-
-        Parameters
-        ----------
-        **kwargs : dict
-            Estimator parameters.
-
-        Returns
-        -------
-        self : object
-            Estimator instance.
-        """
-        super().set_params(**kwargs)
-        self._validate_params()
-        return self
 
     @abstractmethod
     def fit(self, X, y):
diff --git a/sklearn/linear_model/tests/test_passive_aggressive.py b/sklearn/linear_model/tests/test_passive_aggressive.py
index a287d61406cdd..3ff92bd69a43b 100644
--- a/sklearn/linear_model/tests/test_passive_aggressive.py
+++ b/sklearn/linear_model/tests/test_passive_aggressive.py
@@ -3,6 +3,7 @@
 
 import pytest
 
+from sklearn.base import is_classifier
 from sklearn.utils._testing import assert_array_almost_equal
 from sklearn.utils._testing import assert_array_equal
 from sklearn.utils._testing import assert_almost_equal
@@ -136,11 +137,13 @@ def test_classifier_correctness(loss):
         assert_array_almost_equal(clf1.w, clf2.coef_.ravel(), decimal=2)
 
 
-def test_classifier_undefined_methods():
+@pytest.mark.parametrize(
+    "response_method", ["predict_proba", "predict_log_proba", "transform"]
+)
+def test_classifier_undefined_methods(response_method):
     clf = PassiveAggressiveClassifier(max_iter=100)
-    for meth in ("predict_proba", "predict_log_proba", "transform"):
-        with pytest.raises(AttributeError):
-            getattr(clf, meth)
+    with pytest.raises(AttributeError):
+        getattr(clf, response_method)
 
 
 def test_class_weights():
@@ -279,6 +282,37 @@ def test_regressor_correctness(loss):
 
 def test_regressor_undefined_methods():
     reg = PassiveAggressiveRegressor(max_iter=100)
-    for meth in ("transform",):
-        with pytest.raises(AttributeError):
-            getattr(reg, meth)
+    with pytest.raises(AttributeError):
+        reg.transform(X)
+
+
+@pytest.mark.parametrize(
+    "klass", [PassiveAggressiveClassifier, PassiveAggressiveRegressor]
+)
+@pytest.mark.parametrize("fit_method", ["fit", "partial_fit"])
+@pytest.mark.parametrize(
+    "params, err_msg",
+    [
+        ({"loss": "foobar"}, "The loss foobar is not supported"),
+        ({"max_iter": -1}, "max_iter must be > zero"),
+        ({"shuffle": "false"}, "shuffle must be either True or False"),
+        ({"early_stopping": "false"}, "early_stopping must be either True or False"),
+        (
+            {"validation_fraction": -0.1},
+            r"validation_fraction must be in range \(0, 1\)",
+        ),
+        ({"n_iter_no_change": 0}, "n_iter_no_change must be >= 1"),
+    ],
+)
+def test_passive_aggressive_estimator_params_validation(
+    klass, fit_method, params, err_msg
+):
+    """Validate parameters in the different PassiveAggressive estimators."""
+    sgd_estimator = klass(**params)
+
+    with pytest.raises(ValueError, match=err_msg):
+        if is_classifier(sgd_estimator) and fit_method == "partial_fit":
+            fit_params = {"classes": np.unique(y)}
+        else:
+            fit_params = {}
+        getattr(sgd_estimator, fit_method)(X, y, **fit_params)
diff --git a/sklearn/linear_model/tests/test_sgd.py b/sklearn/linear_model/tests/test_sgd.py
index 04abdcd9d6f0e..7171c860254ff 100644
--- a/sklearn/linear_model/tests/test_sgd.py
+++ b/sklearn/linear_model/tests/test_sgd.py
@@ -1,11 +1,11 @@
 import pickle
-import pytest
 
+import joblib
+import pytest
 import numpy as np
-from numpy.testing import assert_allclose
 import scipy.sparse as sp
-import joblib
 
+from sklearn.utils._testing import assert_allclose
 from sklearn.utils._testing import assert_array_equal
 from sklearn.utils._testing import assert_almost_equal
 from sklearn.utils._testing import assert_array_almost_equal
@@ -216,30 +216,55 @@ def asgd(klass, X, y, eta, alpha, weight_init=None, intercept_init=0.0):
 
 
 @pytest.mark.parametrize(
-    "klass", [SGDClassifier, SparseSGDClassifier, SGDRegressor, SparseSGDRegressor]
-)
-def test_sgd_bad_alpha(klass):
-    # Check whether expected ValueError on bad alpha
-    with pytest.raises(ValueError):
-        klass(alpha=-0.1)
-
-
-@pytest.mark.parametrize(
-    "klass", [SGDClassifier, SparseSGDClassifier, SGDRegressor, SparseSGDRegressor]
+    "klass",
+    [
+        SGDClassifier,
+        SparseSGDClassifier,
+        SGDRegressor,
+        SparseSGDRegressor,
+        SGDOneClassSVM,
+        SparseSGDOneClassSVM,
+    ],
 )
-def test_sgd_bad_penalty(klass):
-    # Check whether expected ValueError on bad penalty
-    with pytest.raises(ValueError):
-        klass(penalty="foobar", l1_ratio=0.85)
-
-
+@pytest.mark.parametrize("fit_method", ["fit", "partial_fit"])
 @pytest.mark.parametrize(
-    "klass", [SGDClassifier, SparseSGDClassifier, SGDRegressor, SparseSGDRegressor]
+    "params, err_msg",
+    [
+        ({"alpha": -0.1}, "alpha must be >= 0"),
+        ({"penalty": "foobar", "l1_ratio": 0.85}, "Penalty foobar is not supported"),
+        ({"loss": "foobar"}, "The loss foobar is not supported"),
+        ({"l1_ratio": 1.1}, r"l1_ratio must be in \[0, 1\]"),
+        ({"learning_rate": "<unknown>"}, "learning rate <unknown> is not supported"),
+        ({"nu": -0.5}, r"nu must be in \(0, 1]"),
+        ({"nu": 2}, r"nu must be in \(0, 1]"),
+        ({"alpha": 0, "learning_rate": "optimal"}, "alpha must be > 0"),
+        ({"eta0": 0, "learning_rate": "constant"}, "eta0 must be > 0"),
+        ({"max_iter": -1}, "max_iter must be > zero"),
+        ({"shuffle": "false"}, "shuffle must be either True or False"),
+        ({"early_stopping": "false"}, "early_stopping must be either True or False"),
+        (
+            {"validation_fraction": -0.1},
+            r"validation_fraction must be in range \(0, 1\)",
+        ),
+        ({"n_iter_no_change": 0}, "n_iter_no_change must be >= 1"),
+    ],
 )
-def test_sgd_bad_loss(klass):
-    # Check whether expected ValueError on bad loss
-    with pytest.raises(ValueError):
-        klass(loss="foobar")
+def test_sgd_estimator_params_validation(klass, fit_method, params, err_msg):
+    """Validate parameters in the different SGD estimators."""
+    try:
+        sgd_estimator = klass(**params)
+    except TypeError as err:
+        if "__init__() got an unexpected keyword argument" in str(err):
+            # skip test if the parameter is not supported by the estimator
+            return
+        raise err
+
+    with pytest.raises(ValueError, match=err_msg):
+        if is_classifier(sgd_estimator) and fit_method == "partial_fit":
+            fit_params = {"classes": np.unique(Y)}
+        else:
+            fit_params = {}
+        getattr(sgd_estimator, fit_method)(X, Y, **fit_params)
 
 
 def _test_warm_start(klass, X, Y, lr):
@@ -408,16 +433,6 @@ def test_late_onset_averaging_reached(klass):
     assert_almost_equal(clf1.intercept_, average_intercept, decimal=16)
 
 
-@pytest.mark.parametrize(
-    "klass", [SGDClassifier, SparseSGDClassifier, SGDRegressor, SparseSGDRegressor]
-)
-def test_sgd_bad_alpha_for_optimal_learning_rate(klass):
-    # Check whether expected ValueError on bad alpha, i.e. 0
-    # since alpha is used to compute the optimal learning rate
-    with pytest.raises(ValueError):
-        klass(alpha=0, learning_rate="optimal")
-
-
 @pytest.mark.parametrize(
     "klass", [SGDClassifier, SparseSGDClassifier, SGDRegressor, SparseSGDRegressor]
 )
@@ -540,115 +555,56 @@ def test_sgd_clf(klass):
         assert_array_equal(clf.predict(T), true_result)
 
 
-@pytest.mark.parametrize("klass", [SGDClassifier, SparseSGDClassifier])
-def test_sgd_bad_l1_ratio(klass):
-    # Check whether expected ValueError on bad l1_ratio
-    with pytest.raises(ValueError):
-        klass(l1_ratio=1.1)
-
-
-@pytest.mark.parametrize(
-    "klass", [SGDClassifier, SparseSGDClassifier, SGDOneClassSVM, SparseSGDOneClassSVM]
-)
-def test_sgd_bad_learning_rate_schedule(klass):
-    # Check whether expected ValueError on bad learning_rate
-    with pytest.raises(ValueError):
-        klass(learning_rate="<unknown>")
-
-
-@pytest.mark.parametrize(
-    "klass", [SGDClassifier, SparseSGDClassifier, SGDOneClassSVM, SparseSGDOneClassSVM]
-)
-def test_sgd_bad_eta0(klass):
-    # Check whether expected ValueError on bad eta0
-    with pytest.raises(ValueError):
-        klass(eta0=0, learning_rate="constant")
-
-
-@pytest.mark.parametrize(
-    "klass", [SGDClassifier, SparseSGDClassifier, SGDOneClassSVM, SparseSGDOneClassSVM]
-)
-def test_sgd_max_iter_param(klass):
-    # Test parameter validity check
-    with pytest.raises(ValueError):
-        klass(max_iter=-10000)
-
-
-@pytest.mark.parametrize(
-    "klass", [SGDClassifier, SparseSGDClassifier, SGDOneClassSVM, SparseSGDOneClassSVM]
-)
-def test_sgd_shuffle_param(klass):
-    # Test parameter validity check
-    with pytest.raises(ValueError):
-        klass(shuffle="false")
-
-
-@pytest.mark.parametrize("klass", [SGDClassifier, SparseSGDClassifier])
-def test_sgd_early_stopping_param(klass):
-    # Test parameter validity check
-    with pytest.raises(ValueError):
-        klass(early_stopping="false")
-
-
-@pytest.mark.parametrize("klass", [SGDClassifier, SparseSGDClassifier])
-def test_sgd_validation_fraction(klass):
-    # Test parameter validity check
-    with pytest.raises(ValueError):
-        klass(validation_fraction=-0.1)
-
-
-@pytest.mark.parametrize("klass", [SGDClassifier, SparseSGDClassifier])
-def test_sgd_n_iter_no_change(klass):
-    # Test parameter validity check
-    with pytest.raises(ValueError):
-        klass(n_iter_no_change=0)
-
-
-@pytest.mark.parametrize(
-    "klass", [SGDClassifier, SparseSGDClassifier, SGDOneClassSVM, SparseSGDOneClassSVM]
-)
-def test_argument_coef(klass):
-    # Checks coef_init not allowed as model argument (only fit)
-    # Provided coef_ does not match dataset
-    with pytest.raises(TypeError):
-        klass(coef_init=np.zeros((3,)))
-
-
 @pytest.mark.parametrize(
     "klass", [SGDClassifier, SparseSGDClassifier, SGDOneClassSVM, SparseSGDOneClassSVM]
 )
 def test_provide_coef(klass):
-    # Checks coef_init shape for the warm starts
-    # Provided coef_ does not match dataset.
-    with pytest.raises(ValueError):
+    """Check that the shape of `coef_init` is validated."""
+    with pytest.raises(ValueError, match="Provided coef_init does not match dataset"):
         klass().fit(X, Y, coef_init=np.zeros((3,)))
 
 
 @pytest.mark.parametrize(
-    "klass", [SGDClassifier, SparseSGDClassifier, SGDOneClassSVM, SparseSGDOneClassSVM]
+    "klass, fit_params",
+    [
+        (SGDClassifier, {"intercept_init": np.zeros((3,))}),
+        (SparseSGDClassifier, {"intercept_init": np.zeros((3,))}),
+        (SGDOneClassSVM, {"offset_init": np.zeros((3,))}),
+        (SparseSGDOneClassSVM, {"offset_init": np.zeros((3,))}),
+    ],
 )
-def test_set_intercept(klass):
-    # Checks intercept_ shape for the warm starts
-    # Provided intercept_ does not match dataset.
-    if klass in [SGDClassifier, SparseSGDClassifier]:
-        with pytest.raises(ValueError):
-            klass().fit(X, Y, intercept_init=np.zeros((3,)))
-    elif klass in [SGDOneClassSVM, SparseSGDOneClassSVM]:
-        with pytest.raises(ValueError):
-            klass().fit(X, Y, offset_init=np.zeros((3,)))
+def test_set_intercept_offset(klass, fit_params):
+    """Check that `intercept_init` or `offset_init` is validated."""
+    sgd_estimator = klass()
+    with pytest.raises(ValueError, match="does not match dataset"):
+        sgd_estimator.fit(X, Y, **fit_params)
 
 
-@pytest.mark.parametrize("klass", [SGDClassifier, SparseSGDClassifier])
+@pytest.mark.parametrize(
+    "klass", [SGDClassifier, SparseSGDClassifier, SGDRegressor, SparseSGDRegressor]
+)
 def test_sgd_early_stopping_with_partial_fit(klass):
-    # Test parameter validity check
-    with pytest.raises(ValueError):
+    """Check that we raise an error for `early_stopping` used with
+    `partial_fit`.
+    """
+    err_msg = "early_stopping should be False with partial_fit"
+    with pytest.raises(ValueError, match=err_msg):
         klass(early_stopping=True).partial_fit(X, Y)
 
 
-@pytest.mark.parametrize("klass", [SGDClassifier, SparseSGDClassifier])
-def test_set_intercept_binary(klass):
-    # Checks intercept_ shape for the warm starts in binary case
-    klass().fit(X5, Y5, intercept_init=0)
+@pytest.mark.parametrize(
+    "klass, fit_params",
+    [
+        (SGDClassifier, {"intercept_init": 0}),
+        (SparseSGDClassifier, {"intercept_init": 0}),
+        (SGDOneClassSVM, {"offset_init": 0}),
+        (SparseSGDOneClassSVM, {"offset_init": 0}),
+    ],
+)
+def test_set_intercept_offset_binary(klass, fit_params):
+    """Check that we can pass a scaler with binary classification to
+    `intercept_init` or `offset_init`."""
+    klass().fit(X5, Y5, **fit_params)
 
 
 @pytest.mark.parametrize("klass", [SGDClassifier, SparseSGDClassifier])
@@ -1537,19 +1493,6 @@ def asgd_oneclass(klass, X, eta, nu, coef_init=None, offset_init=0.0):
     return average_coef, 1 - average_intercept
 
 
-@pytest.mark.parametrize("klass", [SGDOneClassSVM, SparseSGDOneClassSVM])
-@pytest.mark.parametrize("nu", [-0.5, 2])
-def test_bad_nu_values(klass, nu):
-    msg = r"nu must be in \(0, 1]"
-    with pytest.raises(ValueError, match=msg):
-        klass(nu=nu)
-
-    clf = klass(nu=0.05)
-    clf2 = clone(clf)
-    with pytest.raises(ValueError, match=msg):
-        clf2.set_params(nu=nu)
-
-
 @pytest.mark.parametrize("klass", [SGDOneClassSVM, SparseSGDOneClassSVM])
 def _test_warm_start_oneclass(klass, X, lr):
     # Test that explicit warm restart...