scikit-learn · adrinjalali · Aug 30, 2024 · Jul 16, 2024 · Jul 30, 2024 · Jul 30, 2024
diff --git a/examples/linear_model/plot_tweedie_regression_insurance_claims.py b/examples/linear_model/plot_tweedie_regression_insurance_claims.py
@@ -80,7 +80,7 @@ def load_mtpl2(n_samples=None):
     df["ClaimAmount"] = df["ClaimAmount"].fillna(0)
 
     # unquote string fields
-    for column_name in df.columns[df.dtypes.values == object]:
+    for column_name in df.columns[[t is object for t in df.dtypes.values]]:
         df[column_name] = df[column_name].str.strip("'")
     return df.iloc[:n_samples]
 

diff --git a/sklearn/cluster/_optics.py b/sklearn/cluster/_optics.py
@@ -327,7 +327,7 @@ def fit(self, X, y=None):
             Returns a fitted instance of self.
         """
         dtype = bool if self.metric in PAIRWISE_BOOLEAN_FUNCTIONS else float
-        if dtype == bool and X.dtype != bool:
+        if dtype is bool and X.dtype != bool:
-        if dtype is bool and X.dtype != bool:
+        if dtype is bool and X.dtype is not bool:
-        if dtype is bool and X.dtype != bool:
+        if dtype is bool and X.dtype is not bool:
             msg = (
                 "Data will be converted to boolean for"
                 f" metric {self.metric}, to avoid this warning,"

diff --git a/sklearn/cluster/tests/test_dbscan.py b/sklearn/cluster/tests/test_dbscan.py
@@ -291,7 +291,7 @@ def test_input_validation():
 def test_pickle():
     obj = DBSCAN()
     s = pickle.dumps(obj)
-    assert type(pickle.loads(s)) == obj.__class__
+    assert type(pickle.loads(s)) is obj.__class__
 
 
 def test_boundaries():

diff --git a/sklearn/linear_model/tests/test_ridge.py b/sklearn/linear_model/tests/test_ridge.py
@@ -1020,15 +1020,15 @@ def _test_ridge_cv(sparse_container):
     ridge_cv.predict(X)
 
     assert len(ridge_cv.coef_.shape) == 1
-    assert type(ridge_cv.intercept_) == np.float64
+    assert type(ridge_cv.intercept_) is np.float64
 
     cv = KFold(5)
     ridge_cv.set_params(cv=cv)
     ridge_cv.fit(X, y_diabetes)
     ridge_cv.predict(X)
 
     assert len(ridge_cv.coef_.shape) == 1
-    assert type(ridge_cv.intercept_) == np.float64
+    assert type(ridge_cv.intercept_) is np.float64
 
 
 @pytest.mark.parametrize(

diff --git a/sklearn/metrics/pairwise.py b/sklearn/metrics/pairwise.py
@@ -2436,7 +2436,7 @@ def pairwise_distances(
 
         dtype = bool if metric in PAIRWISE_BOOLEAN_FUNCTIONS else "infer_float"
 
-        if dtype == bool and (X.dtype != bool or (Y is not None and Y.dtype != bool)):
+        if dtype is bool and (X.dtype != bool or (Y is not None and Y.dtype != bool)):
             msg = "Data was converted to boolean for metric %s" % metric
             warnings.warn(msg, DataConversionWarning)
 

diff --git a/sklearn/model_selection/_split.py b/sklearn/model_selection/_split.py
@@ -2940,7 +2940,7 @@ def _build_repr(self):
                 value = getattr(self, key, None)
                 if value is None and hasattr(self, "cvargs"):
                     value = self.cvargs.get(key, None)
-            if len(w) and w[0].category == FutureWarning:
+            if len(w) and w[0].category is FutureWarning:
                 # if the parameter is deprecated, don't show it
                 continue
         finally:

diff --git a/sklearn/model_selection/tests/test_validation.py b/sklearn/model_selection/tests/test_validation.py
@@ -586,10 +586,10 @@ def custom_scorer(clf, X, y):
             )
 
             # Make sure all the arrays are of np.ndarray type
-            assert type(cv_results["test_r2"]) == np.ndarray
-            assert type(cv_results["test_neg_mean_squared_error"]) == np.ndarray
-            assert type(cv_results["fit_time"]) == np.ndarray
-            assert type(cv_results["score_time"]) == np.ndarray
+            assert isinstance(cv_results["test_r2"], np.ndarray)
+            assert isinstance(cv_results["test_neg_mean_squared_error"], np.ndarray)
+            assert isinstance(cv_results["fit_time"], np.ndarray)
+            assert isinstance(cv_results["score_time"], np.ndarray)
 
             # Ensure all the times are within sane limits
             assert np.all(cv_results["fit_time"] >= 0)

diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
@@ -1502,7 +1502,7 @@ def _apply_on_subsets(func, X):
     result_by_batch = [func(batch.reshape(1, n_features)) for batch in X]
 
     # func can output tuple (e.g. score_samples)
-    if type(result_full) == tuple:
+    if isinstance(result_full, tuple):
         result_full = result_full[0]
         result_by_batch = list(map(lambda x: x[0], result_by_batch))
 

diff --git a/sklearn/utils/tests/test_validation.py b/sklearn/utils/tests/test_validation.py
@@ -1350,7 +1350,7 @@ def test_check_scalar_invalid(
             include_boundaries=include_boundaries,
         )
     assert str(raised_error.value) == str(err_msg)
-    assert type(raised_error.value) == type(err_msg)
+    assert isinstance(raised_error.value, type(err_msg))
 
 
 _psd_cases_valid = {