scikit-learn · kaekkr · Apr 3, 2025 · Apr 4, 2025 · Apr 4, 2025 · Apr 10, 2025
diff --git a/sklearn/preprocessing/_data.py b/sklearn/preprocessing/_data.py
@@ -10,6 +10,7 @@
 from scipy.special import boxcox, inv_boxcox
 
 from sklearn.utils import metadata_routing
+from sklearn.utils.stats import _averaged_weighted_percentile
 
 from ..base import (
     BaseEstimator,
@@ -2726,7 +2727,7 @@ def __init__(
         self.random_state = random_state
         self.copy = copy
 
-    def _dense_fit(self, X, random_state):
+    def _dense_fit(self, X, random_state, sample_weight=None):
         """Compute percentiles for dense matrices.
 
         Parameters
@@ -2748,8 +2749,30 @@ def _dense_fit(self, X, random_state):
             X = resample(
                 X, replace=False, n_samples=self.subsample, random_state=random_state
             )
+            if sample_weight is not None:
+                sample_weight = sample_weight[: X.shape[0]]
+
+        self.quantiles_ = np.zeros((len(references), n_features))
+
+        for i in range(n_features):
+            col = X[:, i]
+            mask = ~np.isnan(col)
+            col_clean = col[mask]
+
+            if col_clean.size == 0:
+                self.quantiles_[:, i] = np.nan
+                continue
+
+            if sample_weight is not None:
+                weights_clean = sample_weight[mask]
+                self.quantiles_[:, i] = _averaged_weighted_percentile(
+                    col_clean, sample_weight=weights_clean, quantile=references / 100.0
+                )
+            else:
+                self.quantiles_[:, i] = np.nanquantile(
+                    col_clean, references / 100.0, method="averaged_inverted_cdf"
+                )
 
-        self.quantiles_ = np.nanpercentile(X, references, axis=0)
         # Due to floating-point precision error in `np.nanpercentile`,
         # make sure that quantiles are monotonically increasing.
         # Upstream issue in numpy:
@@ -2802,7 +2825,7 @@ def _sparse_fit(self, X, random_state):
         self.quantiles_ = np.maximum.accumulate(self.quantiles_)
 
     @_fit_context(prefer_skip_nested_validation=True)
-    def fit(self, X, y=None):
+    def fit(self, X, y=None, sample_weight=None):
         """Compute the quantiles used for transforming.
 
         Parameters
@@ -2844,9 +2867,11 @@ def fit(self, X, y=None):
         # Create the quantiles of reference
         self.references_ = np.linspace(0, 1, self.n_quantiles_, endpoint=True)
         if sparse.issparse(X):
+            if sample_weight is not None:
+                raise ValueError("sample_weight is not supported for sparse input.")
-                raise ValueError("sample_weight is not supported for sparse input.")
+                raise NotImplementedError(
+                    "sample_weight is not supported for sparse input."
+                )
-                raise ValueError("sample_weight is not supported for sparse input.")
+                raise NotImplementedError(
+                    "sample_weight is not supported for sparse input."
+                )
             self._sparse_fit(X, rng)
         else:
-            self._dense_fit(X, rng)
+            self._dense_fit(X, rng, sample_weight=sample_weight)
 
         return self
 

diff --git a/sklearn/preprocessing/tests/test_data.py b/sklearn/preprocessing/tests/test_data.py
@@ -1528,6 +1528,28 @@ def test_quantile_transformer_sorted_quantiles(array_type):
     assert all(np.diff(quantiles) >= 0)
 
 
+def test_quantile_transformer_with_sample_weight():
+    rng = np.random.RandomState(42)
+    X = rng.randn(100, 1)
+    sample_weight = np.linspace(1, 2, X.shape[0])  # increasing weights
+
+    qt_weighted = QuantileTransformer(n_quantiles=10, random_state=0)
+    qt_unweighted = QuantileTransformer(n_quantiles=10, random_state=0)
+
+    Xt_weighted = qt_weighted.fit_transform(X, sample_weight=sample_weight)
+    Xt_unweighted = qt_unweighted.fit_transform(X)
+
+    # Should not be equal
+    with pytest.raises(AssertionError):
+        np.testing.assert_allclose(Xt_weighted, Xt_unweighted)
+
+    # Inverse transform round-trip check
+    X_roundtrip = qt_weighted.inverse_transform(Xt_weighted)
+    np.testing.assert_allclose(
+        X[~np.isnan(X)], X_roundtrip[~np.isnan(X)], rtol=1e-2, atol=1e-2
+    )
+
+
 def test_robust_scaler_invalid_range():
     for range_ in [
         (-1, 90),

diff --git a/sklearn/utils/_test_common/instance_generator.py b/sklearn/utils/_test_common/instance_generator.py
@@ -162,6 +162,7 @@
     StandardScaler,
     TargetEncoder,
 )
+from sklearn.preprocessing._data import QuantileTransformer
 from sklearn.random_projection import (
     GaussianRandomProjection,
     SparseRandomProjection,
@@ -1255,6 +1256,11 @@ def _yield_instances_for_check(check, estimator_orig):
             " tests are added for TunedThresholdClassifierCV specifically."
         ),
     },
+    QuantileTransformer: {
+        "check_sample_weight_equivalence_on_sparse_data": (
+            "QuantileTransformer does not yet support sample_weight on sparse data."
+        ),
+    },
 }
 
 # TODO: remove when scipy min version >= 1.11