scikit-learn · DatenBiene · Oct 4, 2017 · Oct 4, 2017 · Oct 16, 2017 · Oct 17, 2017
diff --git a/doc/modules/classes.rst b/doc/modules/classes.rst
@@ -796,6 +796,7 @@ or :class:`~sklearn.linear_model.SGDClassifier` with an appropriate penalty.
    linear_model.OrthogonalMatchingPursuit
    linear_model.OrthogonalMatchingPursuitCV
 
+
 Bayesian regressors
 -------------------
 
@@ -836,6 +837,7 @@ Any estimator using the Huber loss would also be robust to outliers, e.g.
    linear_model.HuberRegressor
    linear_model.RANSACRegressor
    linear_model.TheilSenRegressor
+   linear_model.QuantileRegressor
 
 Generalized linear models (GLM) for regression
 ----------------------------------------------
@@ -851,7 +853,6 @@ than a normal distribution:
    linear_model.TweedieRegressor
    linear_model.GammaRegressor
 
-
 Miscellaneous
 -------------
 

diff --git a/doc/modules/linear_model.rst b/doc/modules/linear_model.rst
@@ -1411,6 +1411,74 @@ Note that this estimator is different from the R implementation of Robust Regres
 squares implementation with weights given to each sample on the basis of how much the residual is
 greater than a certain threshold.
 
+.. _quantile_regression:
+
+Quantile Regression
+===================
+
+Quantile regression estimates median or other quantiles of :math:`y` conditional on :math:`X`, while OLS estimates
+conditional mean.
+
+The :class:`QuantileRegressor` applies linear loss to all samples. It is thus more radical than
+:class:`HuberRegressor`, that applies linear penalty to small fraction of outliers and quadratic loss
+to the rest of observations. :class:`QuantileRegressor` also supports L1 and L2 regularization,
+like :class:`ElasticNet`. It solves
+
+.. math::
+    \underset{w}{min\,} { \frac{1}{n_{samples}} L_q (y - X w) + \alpha \rho ||w||_1 + \alpha(1-\rho) ||w||_2 ^ 2}
+
+where
+
+.. math::
+    \L_q(t) =
+    \begin{cases}
+        q t, & t > 0, \\
+        0,    & t = 0, \\
+        (1-q) t, & t < 0
+    \end{cases}
+
+and :math:`q \in (0, 1)` is the quantile to be estimated.
+
+Quantile regression may be useful if one is interested in predicting an interval
+instead of point prediction. Sometimes prediction interval is calculated based on
+assumption that prediction error is distributed normally with zero mean and constant variance.
+Quantile regression provides sensible prediction intervals even for errors with non-constant
+(but predictable) variance or non-normal distribution.
+
+.. figure:: /auto_examples/linear_model/images/sphx_glr_plot_quantile_regression_001.png
+   :target: ../auto_examples/linear_model/plot_quantile_regression.html
+   :align: center
+   :scale: 50%
+
+Another possible advantage of quantile regression over OLS is its robustness
+to outliers, because it is only sign of an error that influences estimated
+coefficients, not its absolute value.
+
+Quantile loss function can be used with models other than linear. For example,
+:class:`GradientBoostingRegressor` can predict conditional quantiles, if its parameter ``loss`` is set to ``"quantile"``
+and parameter ``alpha`` is set to the quantile that should be predicted. See the example in
+:ref:`sphx_glr_auto_examples_ensemble_plot_gradient_boosting_quantile.py`
+
+Most implementations of quantile regression are based on linear programming problem.
+Use of L2 regularization makes the problem nonlinear, but use of non-differentiable absolute values
+makes it difficult for gradient descent optimization. Instead, the current implementation solves
+a sequence of smooth approximate problems similar to Huber regression, proposed by Chen and Wei.
+Every next step uses a finer approximation. Optimization stops when solutions of two
+consecutive steps are almost identical or when maximal number of iterations is exceeded.
+
+.. topic:: Examples:
+
+  * :ref:`sphx_glr_auto_examples_linear_model_plot_quantile_regression.py`
+
+.. topic:: References:
+
+  * Koenker, R., & Bassett Jr, G. (1978). `Regression quantiles. <http://web.stanford.edu/~doubleh/otherpapers/koenker.pdf>`_
+            Econometrica: journal of the Econometric Society, 33-50.
+
+  * Chen, C., & Wei, Y. (2005). `Computational issues for quantile regression. <http://pdfs.semanticscholar.org/5cf3/f9fe77c423dc394c8766cbdcfb41ea44b7d4.pdf>`_
+           Sankhya: The Indian Journal of Statistics, 399-417.
+
+
 .. _polynomial_regression:
 
 Polynomial regression: extending linear models with basis functions

diff --git a/examples/linear_model/plot_quantile_regression.py b/examples/linear_model/plot_quantile_regression.py
@@ -0,0 +1,78 @@
+"""
+==============
+Quantile regression
+==============
+
+Plot the prediction of different conditional quantiles.
+
+The left figure shows the case when error distribution is normal,
+but variance is not constant.
+
+The right figure shows example of an asymmetric error distribution
+(namely, Pareto).
+"""
+from __future__ import division
+print(__doc__)
+
+import numpy as np
+import matplotlib.pyplot as plt
+
+from sklearn.linear_model import QuantileRegressor, LinearRegression
+from sklearn.metrics import mean_absolute_error, mean_squared_error
+from sklearn.model_selection import cross_val_score
+
+fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5))
+
+rng = np.random.RandomState(42)
+x = np.linspace(0, 10, 100)
+X = x[:, np.newaxis]
+y = 20 + x*2 + rng.normal(loc=0, scale=0.5+0.5*x, size=x.shape[0])
+ax1.scatter(x, y)
+
+quantiles = [0.05, 0.5, 0.95]
+for quantile in quantiles:
+    qr = QuantileRegressor(quantile=quantile, max_iter=10000, alpha=0)
+    qr.fit(X, y)
+    ax1.plot([0, 10], qr.predict([[0], [10]]))
+ax1.set_xlabel('x')
+ax1.set_ylabel('y')
+ax1.set_title('Quantiles of normal residuals with non-constant variance')
+ax1.legend(quantiles)
+
+y = 20 + x * 0.5 + rng.pareto(10, size=x.shape[0])*10
+ax2.scatter(x, y)
+
+for quantile in quantiles:
+    qr = QuantileRegressor(quantile=quantile, max_iter=10000, alpha=0)
+    qr.fit(X, y)
+    ax2.plot([0, 10], qr.predict([[0], [10]]))
+ax2.set_xlabel('x')
+ax2.set_ylabel('y')
+ax2.set_title('Quantiles of asymmetrically distributed residuals')
+ax2.legend(quantiles)
+
+plt.show()
+
+#########################################################################
+#
+# The second part of the code shows that LinearRegression minimizes RMSE,
+# while QuantileRegressor minimizes MAE, and both do their own job well.
+
+models = [LinearRegression(), QuantileRegressor(alpha=0, max_iter=10000)]
+names = ['OLS', 'Quantile']
+
+print('# In-sample performance')
+for model_name, model in zip(names, models):
+    print(model_name + ':')
+    model.fit(X, y)
+    mae = mean_absolute_error(model.predict(X), y)
+    rmse = np.sqrt(mean_squared_error(model.predict(X), y))
+    print('MAE={:.4}  RMSE={:.4}'.format(mae, rmse))
+print('\n# Cross-validated performance')
+for model_name, model in zip(names, models):
+    print(model_name + ':')
+    mae = -cross_val_score(model, X, y, cv=3,
+                           scoring='neg_mean_absolute_error').mean()
+    rmse = np.sqrt(-cross_val_score(model, X, y, cv=3,
+                                    scoring='neg_mean_squared_error').mean())
+    print('MAE={:.4}  RMSE={:.4}'.format(mae, rmse))
diff --git a/sklearn/linear_model/__init__.py b/sklearn/linear_model/__init__.py
@@ -30,6 +30,7 @@
 
 from ._ransac import RANSACRegressor
 from ._theil_sen import TheilSenRegressor
+from .quantile import QuantileRegressor
 
 __all__ = ['ARDRegression',
            'BayesianRidge',
@@ -59,6 +60,12 @@
            'PassiveAggressiveClassifier',
            'PassiveAggressiveRegressor',
            'Perceptron',
+<<<<<<< HEAD
+           'QuantileRegressor',
+           'RandomizedLasso',
+           'RandomizedLogisticRegression',
+=======
+>>>>>>> upstream/master
            'Ridge',
            'RidgeCV',
            'RidgeClassifier',