modifications after review

StefanieSenger · StefanieSenger · commit adf100676008 · 2023-08-19T07:37:56.000+02:00
diff --git a/.python-version b/.python-version
@@ -0,0 +1 @@
+scikit-learn_dev
diff --git a/::::::::::::::::::::::::::::.py b/::::::::::::::::::::::::::::.py
@@ -0,0 +1,124 @@
+"""from sklearn.ensemble import AdaBoostClassifier
+from sklearn.tree import DecisionTreeClassifier
+from sklearn.datasets import make_gaussian_quantiles
+
+#X, y_class = make_gaussian_quantiles(
+#    n_samples=1_000, n_features=10, n_classes=3, random_state=1
+#)
+
+X = [[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1]]
+y_class = ["foo", "foo", "foo", 1, 1, 1]
+
+
+adaboost_clf = AdaBoostClassifier(
+    estimator=DecisionTreeClassifier(max_leaf_nodes=3),
+    algorithm="SAMME.R",
+    n_estimators=200,
+    random_state=42,
+)
+adaboost_clf.fit(X, y_class)
+"""
+
+
+"""b = 0
+from sklearn.preprocessing import LabelEncoder
+a = 1
+
+le = LabelEncoder()
+
+le.fit_transform(y=[0, 2, 2, 0])
+
+#le.fit(y=[0, 2, 2, 0])
+#le.transform(y=[0, 2, 2, 0])"""
+
+
+"""# Doc OHE
+import pandas as pd
+
+import sklearn
+from sklearn.preprocessing import OneHotEncoder
+
+sklearn.set_config(transform_output="pandas")
+
+df = pd.DataFrame({"Color":["red", "blue", "blue", "green", "yellow", "red",
+    "blue", "red", "yellow", "red"],
+    "Target": [0, 1, 1, 0, 0, 0, 0, 1, 0, 0]})
+
+ohe = OneHotEncoder(sparse_output=True) #default: sparse_output=True
+
+ohe.fit(df[["Color"]])
+color_encoded = ohe.set_output(transform="default").transform(df[["Color"]])
+
+#df[ohe.get_feature_names_out()] = color_encoded
+
+print("len(df['Color'].value_counts()): ", len(df["Color"].value_counts()))
+print("color_encoded.shape): ", color_encoded.shape)
+#print("color_encoded.toarray().shape): ", color_encoded.toarray().shape)
+print(type(color_encoded))
+#print(len(df[ohe.get_feature_names_out()]) == len(color_encoded))
+print(len(ohe.get_feature_names_out()) == color_encoded.shape[1])"""
+
+
+"""# HalvingRandomSearchCV test and doc
+from sklearn.model_selection.tests.test_successive_halving import
+test_halving_random_search_cv_results
+test_halving_random_search_cv_results()"""
+
+"""from sklearn.model_selection.tests.test_search import
+test_random_search_cv_results, test_random_search_cv_results
+test_random_search_cv_results()
+test_grid_search_cv_results()"""
+
+
+"""# DOC Pipeline, final estimator
+from sklearn.pipeline import Pipeline
+from sklearn.datasets import make_gaussian_quantiles
+from sklearn.preprocessing import OneHotEncoder, StandardScaler
+from sklearn.linear_model import LogisticRegression
+
+X = [[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1]]
+y_class = ["foo", "foo", "foo", 1, 1, 1]
+
+pipeline = Pipeline([('scaler', StandardScaler()),
+('encoder', OneHotEncoder(sparse_output=False)),
+('classifier', LogisticRegression())])
+#pipeline = Pipeline([('scaler', StandardScaler()),
+# ('encoder', OneHotEncoder(sparse_output=False)),])
+
+X_transformed = pipeline.fit(X, y_class)
+print(X_transformed)
+"""
+
+"""from sklearn.pipeline import make_pipeline
+import numpy as np
+
+class DoubleIt:
+    def transform(self, X, y=None):
+        return 2*X
+
+X = np.array([[1, 2, 3], [4, 5, 6]])
+p = make_pipeline(DoubleIt(), DoubleIt())
+
+print(p.transform(X))"""
+
+
+"""from sklearn.pipeline import Pipeline
+from sklearn.linear_model import LinearRegression
+from sklearn.preprocessing import OrdinalEncoder
+
+import numpy as np
+
+class DoubleIt:
+
+    def transform(self, X, y=None):
+        return 2*X
+
+X = np.array([[1, 2, 3], [4, 5, 6]])
+p = Pipeline([
+            ('double1', DoubleIt()),
+            ('double2', OrdinalEncoder()),
+            ('linreg', LinearRegression())
+            ])
+
+print(p.fit(X))
+"""
diff --git a/doc/tutorial/statistical_inference/supervised_learning.rst b/doc/tutorial/statistical_inference/supervised_learning.rst
@@ -465,7 +465,7 @@ Linear kernel
 
     >>> svc = svm.SVC(kernel='linear')
 
-.. image:: /auto_examples/svm/images/sphx_glr_plot_svm_kernels_001.png
+.. image:: /auto_examples/svm/images/sphx_glr_plot_svm_kernels_002.png
    :target: ../../auto_examples/svm/plot_svm_kernels.html
 
 Polynomial kernel
@@ -477,7 +477,7 @@ Polynomial kernel
     ...               degree=3)
     >>> # degree: polynomial degree
 
-.. image:: /auto_examples/svm/images/sphx_glr_plot_svm_kernels_002.png
+.. image:: /auto_examples/svm/images/sphx_glr_plot_svm_kernels_003.png
    :target: ../../auto_examples/svm/plot_svm_kernels.html
 
 RBF kernel (Radial Basis Function)
@@ -489,7 +489,17 @@ RBF kernel (Radial Basis Function)
     >>> # gamma: inverse of size of
     >>> # radial kernel
 
-.. image:: /auto_examples/svm/images/sphx_glr_plot_svm_kernels_003.png
+.. image:: /auto_examples/svm/images/sphx_glr_plot_svm_kernels_004.png
+   :target: ../../auto_examples/svm/plot_svm_kernels.html
+
+Sigmoid kernel
+^^^^^^^^^^^^^^
+
+::
+
+    >>> svc = svm.SVC(kernel='sigmoid')
+
+.. image:: /auto_examples/svm/images/sphx_glr_plot_svm_kernels_005.png
    :target: ../../auto_examples/svm/plot_svm_kernels.html
 
 
diff --git a/examples/svm/plot_svm_kernels.py b/examples/svm/plot_svm_kernels.py
@@ -87,26 +87,25 @@
 # Training SVC model and plotting decision boundaries
 # ---------------------------------------------------
 # We define a function that fits a :class:`~sklearn.svm.SVC` classifier,
-# allowing the `kernel` parameter as input, and then plots the decision
+# allowing the `kernel` parameter as an input, and then plots the decision
 # boundaries learned by the model using
 # :class:`~sklearn.inspection.DecisionBoundaryDisplay`.
-# 
+#
 # Notice that for the sake of simplicity, the `C` parameter is set to its
-# default value (`C=1`) in this example, whereas the `gamma` parameter is set to
+# default value (`C=1`) in this example and the `gamma` parameter is set to
 # `gamma=2` across all kernels, although it is automatically ignored for the
 # linear kernel. In a real classification task, where performance matters,
 # parameter tuning (by using :class:`~sklearn.model_selection.GridSearchCV` for
 # instance) is highly recommended to capture different structures within the
 # data.
 #
 # Setting `response_method="predict"` in
-# :class:`~sklearn.inspection.DecisionBoundaryDisplay` colors the areas based on
-# their predicted class, which in the case of :class:`~sklearn.svm.SVC`
-# corresponds to the signed distance of each new sample to the hyperplane. Using
-# `response_method="decision_function"` allows us to also plot the margins at
-# both sides of the decision boundary. Finally the support vectors used during
-# training (which always lay on the margins) can be identified by means ot the
-# `support_vectors_` attribute.
+# :class:`~sklearn.inspection.DecisionBoundaryDisplay` colors the areas based
+# on their predicted class. Using `response_method="decision_function"` allows
+# us to also plot the decision boundary and the margins to both sides of it.
+# Finally the support vectors used during training (which always lay on the
+# margins) are identified by means of the `support_vectors_` attribute of
+# the trained SVCs, and plotted as well.
 from sklearn import svm
 from sklearn.inspection import DecisionBoundaryDisplay
 
@@ -168,10 +167,9 @@ def plot_training_data_with_decision_boundary(kernel):
 
 # %%
 # Training a :class:`~sklearn.svm.SVC` on a linear kernel results in an
-# untransformed feature space where the hyperplane and the margins are straight
-# lines. In this case, the choice `C=1` results in margins at 1 unit distance
-# from both sides of the hyperplane. Due to the lack of expressivity of the
-# linear kernel, the trained classes do not perfectly predict the data.
+# untransformed feature space, where the hyperplane and the margins are
+# straight lines. Due to the lack of expressivity of the linear kernel, the
+# trained classes do not perfectly capture the training data.
 #
 # Polynomial kernel
 # *****************