scikit-learn · jeremiedbb · Apr 2, 2022 · Mar 18, 2021 · Apr 21, 2021 · Apr 21, 2021
diff --git a/sklearn/linear_model/_base.py b/sklearn/linear_model/_base.py
@@ -176,7 +176,8 @@ def make_dataset(X, y, sample_weight, random_state=None):
         The weight of each sample
 
     random_state : int, RandomState instance or None (default)
-        Determines random number generation for dataset shuffling and noise.
+        Determines random number generation for dataset random sampling. It is not
+        used for dataset shuffling.
         Pass an int for reproducible output across multiple function calls.
         See :term:`Glossary <random_state>`.
 

diff --git a/sklearn/linear_model/_stochastic_gradient.py b/sklearn/linear_model/_stochastic_gradient.py
@@ -1592,8 +1592,6 @@ def predict(self, X):
     def _fit_regressor(
         self, X, y, alpha, C, loss, learning_rate, sample_weight, max_iter
     ):
-        dataset, intercept_decay = make_dataset(X, y, sample_weight)
-
         loss_function = self._get_loss_function(loss)
         penalty_type = self._get_penalty_type(self.penalty)
         learning_rate_type = self._get_learning_rate_type(learning_rate)
@@ -1611,6 +1609,10 @@ def _fit_regressor(
         # Windows
         seed = random_state.randint(0, np.iinfo(np.int32).max)
 
+        dataset, intercept_decay = make_dataset(
+            X, y, sample_weight, random_state=random_state
+        )
+
         tol = self.tol if self.tol is not None else -np.inf
 
         if self.average:

diff --git a/sklearn/linear_model/tests/test_sgd.py b/sklearn/linear_model/tests/test_sgd.py
@@ -2140,3 +2140,41 @@ def test_loss_squared_loss_deprecated(Estimator):
         assert_allclose(est1.predict_proba(X), est2.predict_proba(X))
     else:
         assert_allclose(est1.predict(X), est2.predict(X))
+
+
+@pytest.mark.parametrize(
+    "Estimator", [linear_model.SGDClassifier, linear_model.SGDRegressor]
+)
+def test_sgd_random_state(Estimator, global_random_seed):
+    # Train the same model on the same data without converging and check that we
+    # get reproducible results by fixing the random seed.
+    if Estimator == linear_model.SGDRegressor:
+        X, y = datasets.make_regression(random_state=global_random_seed)
+    else:
+        X, y = datasets.make_classification(random_state=global_random_seed)
+
+    # Fitting twice a model with the same hyper-parameters on the same training
+    # set with the same seed leads to the same results deterministically.
+
+    est = Estimator(random_state=global_random_seed, max_iter=1)
+    with pytest.warns(ConvergenceWarning):
+        coef_same_seed_a = est.fit(X, y).coef_
+        assert est.n_iter_ == 1
+
+    est = Estimator(random_state=global_random_seed, max_iter=1)
+    with pytest.warns(ConvergenceWarning):
+        coef_same_seed_b = est.fit(X, y).coef_
+        assert est.n_iter_ == 1
+
+    assert_allclose(coef_same_seed_a, coef_same_seed_b)
+
+    # Fitting twice a model with the same hyper-parameters on the same training
+    # set but with different random seed leads to different results after one
+    # epoch because of the random shuffling of the dataset.
+
+    est = Estimator(random_state=global_random_seed + 1, max_iter=1)
+    with pytest.warns(ConvergenceWarning):
+        coef_other_seed = est.fit(X, y).coef_
+        assert est.n_iter_ == 1
+
+    assert np.abs(coef_same_seed_a - coef_other_seed).max() > 1.0
diff --git a/sklearn/utils/_seq_dataset.pyx.tp b/sklearn/utils/_seq_dataset.pyx.tp
@@ -66,8 +66,8 @@ cdef class SequentialDataset{{name_suffix}}:
         Number of samples in the dataset.
 
     seed : np.uint32_t
-        Seed used for random sampling.
-
+        Seed used for random sampling. This attribute is modified at each call to the
+        `random` method.
     """
 
     cdef void next(self, {{c_type}} **x_data_ptr, int **x_ind_ptr,