From 47f78471867fc64d7306924d770f5542fbba3dfd Mon Sep 17 00:00:00 2001
From: adrinjalali <adrin.jalali@gmail.com>
Date: Tue, 10 Sep 2019 13:38:04 +0200
Subject: [PATCH 1/6] bad default value

---
 sklearn/cluster/dbscan_.py             | 12 +++++++-
 sklearn/utils/tests/test_validation.py | 40 +++++++++++++++++++++++++-
 sklearn/utils/validation.py            | 19 ++++++++++++
 3 files changed, 69 insertions(+), 2 deletions(-)

diff --git a/sklearn/cluster/dbscan_.py b/sklearn/cluster/dbscan_.py
index c123d22ff01f4..aa746f7c5160c 100644
--- a/sklearn/cluster/dbscan_.py
+++ b/sklearn/cluster/dbscan_.py
@@ -16,6 +16,7 @@
 from ..base import BaseEstimator, ClusterMixin
 from ..utils import check_array, check_consistent_length
 from ..neighbors import NearestNeighbors
+from ..utils.validation import _validate_bad_defaults
 
 from ._dbscan_inner import dbscan_inner
 
@@ -41,6 +42,9 @@ def dbscan(X, eps=0.5, min_samples=5, metric='minkowski', metric_params=None,
         important DBSCAN parameter to choose appropriately for your data set
         and distance function.
 
+        Note that there is no good default value for this parameter. An
+        optimal value depends on the data at hand as well as the used metric.
+
     min_samples : int, optional
         The number of samples (or total weight) in a neighborhood for a point
         to be considered as a core point. This includes the point itself.
@@ -208,6 +212,9 @@ class DBSCAN(ClusterMixin, BaseEstimator):
         important DBSCAN parameter to choose appropriately for your data set
         and distance function.
 
+        Note that there is no good default value for this parameter. An
+        optimal value depends on the data at hand as well as the used metric.
+
     min_samples : int, optional
         The number of samples (or total weight) in a neighborhood for a point
         to be considered as a core point. This includes the point itself.
@@ -315,7 +322,9 @@ class DBSCAN(ClusterMixin, BaseEstimator):
     ACM Transactions on Database Systems (TODS), 42(3), 19.
     """
 
-    def __init__(self, eps=0.5, min_samples=5, metric='euclidean',
+    _bad_defaults = {'eps': 0.5}
+
+    def __init__(self, eps='warn', min_samples=5, metric='euclidean',
                  metric_params=None, algorithm='auto', leaf_size=30, p=None,
                  n_jobs=None):
         self.eps = eps
@@ -353,6 +362,7 @@ def fit(self, X, y=None, sample_weight=None):
 
         """
         X = check_array(X, accept_sparse='csr')
+        _validate_bad_defaults(self)
         clust = dbscan(X, sample_weight=sample_weight,
                        **self.get_params())
         self.core_sample_indices_, self.labels_ = clust
diff --git a/sklearn/utils/tests/test_validation.py b/sklearn/utils/tests/test_validation.py
index 0f7ffe9a3e4f0..d36237fdf3e79 100644
--- a/sklearn/utils/tests/test_validation.py
+++ b/sklearn/utils/tests/test_validation.py
@@ -11,6 +11,7 @@
 import numpy as np
 import scipy.sparse as sp
 
+from sklearn.base import BaseEstimator
 from sklearn.utils.testing import assert_raises
 from sklearn.utils.testing import assert_raises_regex
 from sklearn.utils.testing import assert_no_warnings
@@ -42,7 +43,8 @@
     _num_samples,
     check_scalar,
     _check_sample_weight,
-    _allclose_dense_sparse)
+    _allclose_dense_sparse,
+    _validate_bad_defaults)
 import sklearn
 
 from sklearn.exceptions import NotFittedError
@@ -938,3 +940,39 @@ def test_allclose_dense_sparse_raise(toarray):
            "and an array")
     with pytest.raises(ValueError, match=msg):
         _allclose_dense_sparse(x, y)
+
+
+def test_validate_bad_params():
+    msg1 = ("There is no good default value for the following parameters in "
+            "A. Please consult the documentation on how to set them for your "
+            "data."
+            "	'param_a' - using default value: 1"
+            "	'param_b' - using default value: kmeans")
+    msg2 = ("There is no good default value for the following parameters in "
+            "A. Please consult the documentation on how to set them for your "
+            "data."
+            "	'param_b' - using default value: kmeans")
+
+    class A(BaseEstimator):
+        _bad_defaults = {'param_a': 1, 'param_b': 'kmeans'}
+
+        def __init__(self, param_a='warn', param_b='warn', param_c='warn',
+                     param_d=0):
+            self.param_a = param_a
+            self.param_b = param_b
+            self.param_c = param_c
+            self.param_d = param_d
+
+        def fit(self, X=None, y=None):
+            _validate_bad_defaults(self)
+            return self
+
+    with pytest.warns(UserWarning, match=msg1):
+        A().fit()
+
+    with pytest.warns(UserWarning, match=msg2):
+        A(param_a=1).fit()
+
+    with warnings.catch_warnings(record=True) as warns:
+        A(param_a=1, param_b='dbscan').fit()
+    assert not warns
diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py
index 465acf48e8293..66ec49376546b 100644
--- a/sklearn/utils/validation.py
+++ b/sklearn/utils/validation.py
@@ -1089,3 +1089,22 @@ def _allclose_dense_sparse(x, y, rtol=1e-7, atol=1e-9):
         return np.allclose(x, y, rtol=rtol, atol=atol)
     raise ValueError("Can only compare two sparse matrices, not a sparse "
                      "matrix and an array")
+
+
+def _validate_bad_defaults(obj):
+    if not hasattr(obj, "_bad_defaults"):
+        return
+
+    obj_values = {param: getattr(obj, param) for param in obj._bad_defaults}
+    bad_params = [param for param, value in obj_values.items()
+                  if value == 'warn']
+    if bad_params:
+        msg = ("There is no good default value for the following "
+               "parameters in {}. Please consult the documentation "
+               "on how to set them for your data.\n\t".format(
+                   obj.__class__.__name__))
+        msg += '\n\t'.join(["'{}' - using default value: {}".format(
+            param, obj._bad_defaults[param]) for param in bad_params])
+        warnings.warn(msg, UserWarning)
+    for param in bad_params:
+        setattr(obj, param, obj._bad_defaults[param])

From b1ff3afe28d2a799d5de2865edcb7398ab9c6991 Mon Sep 17 00:00:00 2001
From: adrinjalali <adrin.jalali@gmail.com>
Date: Tue, 10 Sep 2019 14:42:44 +0200
Subject: [PATCH 2/6] fix warning match, fix rm
 drivers/mmc/host/rtsx_pci_sdmmc.ko.xz

---
 sklearn/utils/tests/test_validation.py | 6 +++---
 sklearn/utils/validation.py            | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/sklearn/utils/tests/test_validation.py b/sklearn/utils/tests/test_validation.py
index d36237fdf3e79..799ede985fca1 100644
--- a/sklearn/utils/tests/test_validation.py
+++ b/sklearn/utils/tests/test_validation.py
@@ -946,12 +946,12 @@ def test_validate_bad_params():
     msg1 = ("There is no good default value for the following parameters in "
             "A. Please consult the documentation on how to set them for your "
             "data."
-            "	'param_a' - using default value: 1"
-            "	'param_b' - using default value: kmeans")
+            "\n	'param_a' - using default value: 1"
+            "\n	'param_b' - using default value: 'kmeans'")
     msg2 = ("There is no good default value for the following parameters in "
             "A. Please consult the documentation on how to set them for your "
             "data."
-            "	'param_b' - using default value: kmeans")
+            "\n	'param_b' - using default value: 'kmeans'")
 
     class A(BaseEstimator):
         _bad_defaults = {'param_a': 1, 'param_b': 'kmeans'}
diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py
index 66ec49376546b..874048fda0317 100644
--- a/sklearn/utils/validation.py
+++ b/sklearn/utils/validation.py
@@ -1103,7 +1103,7 @@ def _validate_bad_defaults(obj):
                "parameters in {}. Please consult the documentation "
                "on how to set them for your data.\n\t".format(
                    obj.__class__.__name__))
-        msg += '\n\t'.join(["'{}' - using default value: {}".format(
+        msg += '\n\t'.join(["'{}' - using default value: {!r}".format(
             param, obj._bad_defaults[param]) for param in bad_params])
         warnings.warn(msg, UserWarning)
     for param in bad_params:

From 4b2c335903dcf2344673959c183f977cfd14e25e Mon Sep 17 00:00:00 2001
From: adrinjalali <adrin.jalali@gmail.com>
Date: Tue, 10 Sep 2019 19:42:19 +0200
Subject: [PATCH 3/6] don't change object's params

---
 sklearn/cluster/dbscan_.py  | 8 +++++---
 sklearn/utils/validation.py | 4 +++-
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/sklearn/cluster/dbscan_.py b/sklearn/cluster/dbscan_.py
index aa746f7c5160c..75a2a3829eaa8 100644
--- a/sklearn/cluster/dbscan_.py
+++ b/sklearn/cluster/dbscan_.py
@@ -35,7 +35,7 @@ def dbscan(X, eps=0.5, min_samples=5, metric='minkowski', metric_params=None,
         A feature array, or array of distances between samples if
         ``metric='precomputed'``.
 
-    eps : float, optional
+    eps : float, default=0.5
         The maximum distance between two samples for one to be considered
         as in the neighborhood of the other. This is not a maximum bound
         on the distances of points within a cluster. This is the most
@@ -214,6 +214,8 @@ class DBSCAN(ClusterMixin, BaseEstimator):
 
         Note that there is no good default value for this parameter. An
         optimal value depends on the data at hand as well as the used metric.
+        If not specified, a warning is raised and the default value of 0.5 is
+        used.
 
     min_samples : int, optional
         The number of samples (or total weight) in a neighborhood for a point
@@ -362,9 +364,9 @@ def fit(self, X, y=None, sample_weight=None):
 
         """
         X = check_array(X, accept_sparse='csr')
-        _validate_bad_defaults(self)
+        all_params = _validate_bad_defaults(self)
         clust = dbscan(X, sample_weight=sample_weight,
-                       **self.get_params())
+                       **all_params)
         self.core_sample_indices_, self.labels_ = clust
         if len(self.core_sample_indices_):
             # fix for scipy sparse indexing issue
diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py
index 874048fda0317..f39f2eab6fd91 100644
--- a/sklearn/utils/validation.py
+++ b/sklearn/utils/validation.py
@@ -1106,5 +1106,7 @@ def _validate_bad_defaults(obj):
         msg += '\n\t'.join(["'{}' - using default value: {!r}".format(
             param, obj._bad_defaults[param]) for param in bad_params])
         warnings.warn(msg, UserWarning)
+    all_params = obj.get_params()
     for param in bad_params:
-        setattr(obj, param, obj._bad_defaults[param])
+        all_params[param] = obj._bad_defaults[param]
+    return all_params

From 331cc837be6317b9f35824642a39e1549d06da65 Mon Sep 17 00:00:00 2001
From: adrinjalali <adrin.jalali@gmail.com>
Date: Tue, 10 Sep 2019 19:46:10 +0200
Subject: [PATCH 4/6] address the other comments

---
 sklearn/utils/tests/test_validation.py | 8 +++++---
 sklearn/utils/validation.py            | 4 ++--
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/sklearn/utils/tests/test_validation.py b/sklearn/utils/tests/test_validation.py
index 799ede985fca1..69ba74d88342e 100644
--- a/sklearn/utils/tests/test_validation.py
+++ b/sklearn/utils/tests/test_validation.py
@@ -946,14 +946,16 @@ def test_validate_bad_params():
     msg1 = ("There is no good default value for the following parameters in "
             "A. Please consult the documentation on how to set them for your "
             "data."
-            "\n	'param_a' - using default value: 1"
-            "\n	'param_b' - using default value: 'kmeans'")
+            "\n    'param_a' - using default value: 1"
+            "\n    'param_b' - using default value: 'kmeans'")
     msg2 = ("There is no good default value for the following parameters in "
             "A. Please consult the documentation on how to set them for your "
             "data."
-            "\n	'param_b' - using default value: 'kmeans'")
+            "\n    'param_b' - using default value: 'kmeans'")
 
     class A(BaseEstimator):
+        # The param_c should not warn as a result of _validate_bad_defaults
+        # since it's not included in _bad_defaults
         _bad_defaults = {'param_a': 1, 'param_b': 'kmeans'}
 
         def __init__(self, param_a='warn', param_b='warn', param_c='warn',
diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py
index f39f2eab6fd91..5c376f3ddb291 100644
--- a/sklearn/utils/validation.py
+++ b/sklearn/utils/validation.py
@@ -1101,9 +1101,9 @@ def _validate_bad_defaults(obj):
     if bad_params:
         msg = ("There is no good default value for the following "
                "parameters in {}. Please consult the documentation "
-               "on how to set them for your data.\n\t".format(
+               "on how to set them for your data.\n    ".format(
                    obj.__class__.__name__))
-        msg += '\n\t'.join(["'{}' - using default value: {!r}".format(
+        msg += '\n    '.join(["'{}' - using default value: {!r}".format(
             param, obj._bad_defaults[param]) for param in bad_params])
         warnings.warn(msg, UserWarning)
     all_params = obj.get_params()

From 67cc364baf3be50f4c8564643b5e7ffb0d5e348e Mon Sep 17 00:00:00 2001
From: adrinjalali <adrin.jalali@gmail.com>
Date: Tue, 10 Sep 2019 20:38:46 +0200
Subject: [PATCH 5/6] sort the params

---
 sklearn/utils/validation.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py
index 5c376f3ddb291..63b76c45cd78b 100644
--- a/sklearn/utils/validation.py
+++ b/sklearn/utils/validation.py
@@ -1096,8 +1096,8 @@ def _validate_bad_defaults(obj):
         return
 
     obj_values = {param: getattr(obj, param) for param in obj._bad_defaults}
-    bad_params = [param for param, value in obj_values.items()
-                  if value == 'warn']
+    bad_params = sorted([param for param, value in obj_values.items()
+                        if value == 'warn'])
     if bad_params:
         msg = ("There is no good default value for the following "
                "parameters in {}. Please consult the documentation "

From 335dd3862fc8511398984b13776ac91bc240701d Mon Sep 17 00:00:00 2001
From: adrinjalali <adrin.jalali@gmail.com>
Date: Thu, 28 May 2020 11:28:56 +0200
Subject: [PATCH 6/6] trying to warn once

---
 sklearn/cluster/_dbscan.py             |  2 +-
 sklearn/utils/tests/test_validation.py | 21 ++++++++++++++++++---
 2 files changed, 19 insertions(+), 4 deletions(-)

diff --git a/sklearn/cluster/_dbscan.py b/sklearn/cluster/_dbscan.py
index 98afb8bc21d5e..108e54c7ffeb0 100644
--- a/sklearn/cluster/_dbscan.py
+++ b/sklearn/cluster/_dbscan.py
@@ -341,7 +341,7 @@ def fit(self, X, y=None, sample_weight=None):
                 X.setdiag(X.diagonal())  # XXX: modifies X's internals in-place
 
         neighbors_model = NearestNeighbors(
-            radius=self.eps, algorithm=self.algorithm,
+            radius=eps, algorithm=self.algorithm,
             leaf_size=self.leaf_size, metric=self.metric,
             metric_params=self.metric_params, p=self.p, n_jobs=self.n_jobs)
         neighbors_model.fit(X)
diff --git a/sklearn/utils/tests/test_validation.py b/sklearn/utils/tests/test_validation.py
index d5af535f9857a..a474f54d06a9c 100644
--- a/sklearn/utils/tests/test_validation.py
+++ b/sklearn/utils/tests/test_validation.py
@@ -51,6 +51,7 @@
 import sklearn
 
 from sklearn.exceptions import NotFittedError, PositiveSpectrumWarning
+from sklearn.exceptions import BadDefaultWarning
 
 from sklearn.utils._testing import TempMemmap
 
@@ -1111,6 +1112,7 @@ def test_allclose_dense_sparse_raise(toarray):
 
 def test_validate_bad_params():
     msg1 = ("There is no good default value for the following parameters in "
+            "A. Please consult the documentation on how to set them for your "
             "data."
             "\n    'param_a' - using default value: 1"
             "\n    'param_b' - using default value: 'kmeans'")
@@ -1135,16 +1137,30 @@ def fit(self, X=None, y=None):
             _validate_bad_defaults(self)
             return self
 
-    with pytest.warns(UserWarning, match=msg1):
+    with pytest.warns(BadDefaultWarning, match=msg1):
+        A().fit()
+
+    # should not warn the second time
+    with warnings.catch_warnings(record=True) as warns:
         A().fit()
+    assert not warns
+
+    with pytest.warns(BadDefaultWarning, match=msg2):
+        A(param_a=1).fit()
 
-    with pytest.warns(UserWarning, match=msg2):
+    # should not warn the second time
+    with warnings.catch_warnings(record=True) as warns:
         A(param_a=1).fit()
+    assert not warns
 
     with warnings.catch_warnings(record=True) as warns:
         A(param_a=1, param_b='dbscan').fit()
     assert not warns
 
+
+def test_deprecate_positional_args_warns_for_function():
+
+    @_deprecate_positional_args
     def f1(a, b, *, c=1, d=1):
         pass
 
@@ -1248,4 +1264,3 @@ def test_check_sparse_pandas_sp_format(sp_format):
     assert sp.issparse(result)
     assert result.format == sp_format
     assert_allclose_dense_sparse(sp_mat, result)
->>>>>>> upstream/master