Skip to content

Commit adf1006

Browse files
modifications after review
1 parent 33a9b3a commit adf1006

File tree

4 files changed

+150
-17
lines changed

4 files changed

+150
-17
lines changed

.python-version

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
scikit-learn_dev

::::::::::::::::::::::::::::.py

+124
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,124 @@
1+
"""from sklearn.ensemble import AdaBoostClassifier
2+
from sklearn.tree import DecisionTreeClassifier
3+
from sklearn.datasets import make_gaussian_quantiles
4+
5+
#X, y_class = make_gaussian_quantiles(
6+
# n_samples=1_000, n_features=10, n_classes=3, random_state=1
7+
#)
8+
9+
X = [[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1]]
10+
y_class = ["foo", "foo", "foo", 1, 1, 1]
11+
12+
13+
adaboost_clf = AdaBoostClassifier(
14+
estimator=DecisionTreeClassifier(max_leaf_nodes=3),
15+
algorithm="SAMME.R",
16+
n_estimators=200,
17+
random_state=42,
18+
)
19+
adaboost_clf.fit(X, y_class)
20+
"""
21+
22+
23+
"""b = 0
24+
from sklearn.preprocessing import LabelEncoder
25+
a = 1
26+
27+
le = LabelEncoder()
28+
29+
le.fit_transform(y=[0, 2, 2, 0])
30+
31+
#le.fit(y=[0, 2, 2, 0])
32+
#le.transform(y=[0, 2, 2, 0])"""
33+
34+
35+
"""# Doc OHE
36+
import pandas as pd
37+
38+
import sklearn
39+
from sklearn.preprocessing import OneHotEncoder
40+
41+
sklearn.set_config(transform_output="pandas")
42+
43+
df = pd.DataFrame({"Color":["red", "blue", "blue", "green", "yellow", "red",
44+
"blue", "red", "yellow", "red"],
45+
"Target": [0, 1, 1, 0, 0, 0, 0, 1, 0, 0]})
46+
47+
ohe = OneHotEncoder(sparse_output=True) #default: sparse_output=True
48+
49+
ohe.fit(df[["Color"]])
50+
color_encoded = ohe.set_output(transform="default").transform(df[["Color"]])
51+
52+
#df[ohe.get_feature_names_out()] = color_encoded
53+
54+
print("len(df['Color'].value_counts()): ", len(df["Color"].value_counts()))
55+
print("color_encoded.shape): ", color_encoded.shape)
56+
#print("color_encoded.toarray().shape): ", color_encoded.toarray().shape)
57+
print(type(color_encoded))
58+
#print(len(df[ohe.get_feature_names_out()]) == len(color_encoded))
59+
print(len(ohe.get_feature_names_out()) == color_encoded.shape[1])"""
60+
61+
62+
"""# HalvingRandomSearchCV test and doc
63+
from sklearn.model_selection.tests.test_successive_halving import
64+
test_halving_random_search_cv_results
65+
test_halving_random_search_cv_results()"""
66+
67+
"""from sklearn.model_selection.tests.test_search import
68+
test_random_search_cv_results, test_random_search_cv_results
69+
test_random_search_cv_results()
70+
test_grid_search_cv_results()"""
71+
72+
73+
"""# DOC Pipeline, final estimator
74+
from sklearn.pipeline import Pipeline
75+
from sklearn.datasets import make_gaussian_quantiles
76+
from sklearn.preprocessing import OneHotEncoder, StandardScaler
77+
from sklearn.linear_model import LogisticRegression
78+
79+
X = [[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1]]
80+
y_class = ["foo", "foo", "foo", 1, 1, 1]
81+
82+
pipeline = Pipeline([('scaler', StandardScaler()),
83+
('encoder', OneHotEncoder(sparse_output=False)),
84+
('classifier', LogisticRegression())])
85+
#pipeline = Pipeline([('scaler', StandardScaler()),
86+
# ('encoder', OneHotEncoder(sparse_output=False)),])
87+
88+
X_transformed = pipeline.fit(X, y_class)
89+
print(X_transformed)
90+
"""
91+
92+
"""from sklearn.pipeline import make_pipeline
93+
import numpy as np
94+
95+
class DoubleIt:
96+
def transform(self, X, y=None):
97+
return 2*X
98+
99+
X = np.array([[1, 2, 3], [4, 5, 6]])
100+
p = make_pipeline(DoubleIt(), DoubleIt())
101+
102+
print(p.transform(X))"""
103+
104+
105+
"""from sklearn.pipeline import Pipeline
106+
from sklearn.linear_model import LinearRegression
107+
from sklearn.preprocessing import OrdinalEncoder
108+
109+
import numpy as np
110+
111+
class DoubleIt:
112+
113+
def transform(self, X, y=None):
114+
return 2*X
115+
116+
X = np.array([[1, 2, 3], [4, 5, 6]])
117+
p = Pipeline([
118+
('double1', DoubleIt()),
119+
('double2', OrdinalEncoder()),
120+
('linreg', LinearRegression())
121+
])
122+
123+
print(p.fit(X))
124+
"""

doc/tutorial/statistical_inference/supervised_learning.rst

+13-3
Original file line numberDiff line numberDiff line change
@@ -465,7 +465,7 @@ Linear kernel
465465

466466
>>> svc = svm.SVC(kernel='linear')
467467

468-
.. image:: /auto_examples/svm/images/sphx_glr_plot_svm_kernels_001.png
468+
.. image:: /auto_examples/svm/images/sphx_glr_plot_svm_kernels_002.png
469469
:target: ../../auto_examples/svm/plot_svm_kernels.html
470470

471471
Polynomial kernel
@@ -477,7 +477,7 @@ Polynomial kernel
477477
... degree=3)
478478
>>> # degree: polynomial degree
479479

480-
.. image:: /auto_examples/svm/images/sphx_glr_plot_svm_kernels_002.png
480+
.. image:: /auto_examples/svm/images/sphx_glr_plot_svm_kernels_003.png
481481
:target: ../../auto_examples/svm/plot_svm_kernels.html
482482

483483
RBF kernel (Radial Basis Function)
@@ -489,7 +489,17 @@ RBF kernel (Radial Basis Function)
489489
>>> # gamma: inverse of size of
490490
>>> # radial kernel
491491

492-
.. image:: /auto_examples/svm/images/sphx_glr_plot_svm_kernels_003.png
492+
.. image:: /auto_examples/svm/images/sphx_glr_plot_svm_kernels_004.png
493+
:target: ../../auto_examples/svm/plot_svm_kernels.html
494+
495+
Sigmoid kernel
496+
^^^^^^^^^^^^^^
497+
498+
::
499+
500+
>>> svc = svm.SVC(kernel='sigmoid')
501+
502+
.. image:: /auto_examples/svm/images/sphx_glr_plot_svm_kernels_005.png
493503
:target: ../../auto_examples/svm/plot_svm_kernels.html
494504

495505

examples/svm/plot_svm_kernels.py

+12-14
Original file line numberDiff line numberDiff line change
@@ -87,26 +87,25 @@
8787
# Training SVC model and plotting decision boundaries
8888
# ---------------------------------------------------
8989
# We define a function that fits a :class:`~sklearn.svm.SVC` classifier,
90-
# allowing the `kernel` parameter as input, and then plots the decision
90+
# allowing the `kernel` parameter as an input, and then plots the decision
9191
# boundaries learned by the model using
9292
# :class:`~sklearn.inspection.DecisionBoundaryDisplay`.
93-
#
93+
#
9494
# Notice that for the sake of simplicity, the `C` parameter is set to its
95-
# default value (`C=1`) in this example, whereas the `gamma` parameter is set to
95+
# default value (`C=1`) in this example and the `gamma` parameter is set to
9696
# `gamma=2` across all kernels, although it is automatically ignored for the
9797
# linear kernel. In a real classification task, where performance matters,
9898
# parameter tuning (by using :class:`~sklearn.model_selection.GridSearchCV` for
9999
# instance) is highly recommended to capture different structures within the
100100
# data.
101101
#
102102
# Setting `response_method="predict"` in
103-
# :class:`~sklearn.inspection.DecisionBoundaryDisplay` colors the areas based on
104-
# their predicted class, which in the case of :class:`~sklearn.svm.SVC`
105-
# corresponds to the signed distance of each new sample to the hyperplane. Using
106-
# `response_method="decision_function"` allows us to also plot the margins at
107-
# both sides of the decision boundary. Finally the support vectors used during
108-
# training (which always lay on the margins) can be identified by means ot the
109-
# `support_vectors_` attribute.
103+
# :class:`~sklearn.inspection.DecisionBoundaryDisplay` colors the areas based
104+
# on their predicted class. Using `response_method="decision_function"` allows
105+
# us to also plot the decision boundary and the margins to both sides of it.
106+
# Finally the support vectors used during training (which always lay on the
107+
# margins) are identified by means of the `support_vectors_` attribute of
108+
# the trained SVCs, and plotted as well.
110109
from sklearn import svm
111110
from sklearn.inspection import DecisionBoundaryDisplay
112111

@@ -168,10 +167,9 @@ def plot_training_data_with_decision_boundary(kernel):
168167

169168
# %%
170169
# Training a :class:`~sklearn.svm.SVC` on a linear kernel results in an
171-
# untransformed feature space where the hyperplane and the margins are straight
172-
# lines. In this case, the choice `C=1` results in margins at 1 unit distance
173-
# from both sides of the hyperplane. Due to the lack of expressivity of the
174-
# linear kernel, the trained classes do not perfectly predict the data.
170+
# untransformed feature space, where the hyperplane and the margins are
171+
# straight lines. Due to the lack of expressivity of the linear kernel, the
172+
# trained classes do not perfectly capture the training data.
175173
#
176174
# Polynomial kernel
177175
# *****************

0 commit comments

Comments
 (0)