|
| 1 | +"""from sklearn.ensemble import AdaBoostClassifier |
| 2 | +from sklearn.tree import DecisionTreeClassifier |
| 3 | +from sklearn.datasets import make_gaussian_quantiles |
| 4 | +
|
| 5 | +#X, y_class = make_gaussian_quantiles( |
| 6 | +# n_samples=1_000, n_features=10, n_classes=3, random_state=1 |
| 7 | +#) |
| 8 | +
|
| 9 | +X = [[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1]] |
| 10 | +y_class = ["foo", "foo", "foo", 1, 1, 1] |
| 11 | +
|
| 12 | +
|
| 13 | +adaboost_clf = AdaBoostClassifier( |
| 14 | + estimator=DecisionTreeClassifier(max_leaf_nodes=3), |
| 15 | + algorithm="SAMME.R", |
| 16 | + n_estimators=200, |
| 17 | + random_state=42, |
| 18 | +) |
| 19 | +adaboost_clf.fit(X, y_class) |
| 20 | +""" |
| 21 | + |
| 22 | + |
| 23 | +"""b = 0 |
| 24 | +from sklearn.preprocessing import LabelEncoder |
| 25 | +a = 1 |
| 26 | +
|
| 27 | +le = LabelEncoder() |
| 28 | +
|
| 29 | +le.fit_transform(y=[0, 2, 2, 0]) |
| 30 | +
|
| 31 | +#le.fit(y=[0, 2, 2, 0]) |
| 32 | +#le.transform(y=[0, 2, 2, 0])""" |
| 33 | + |
| 34 | + |
| 35 | +"""# Doc OHE |
| 36 | +import pandas as pd |
| 37 | +
|
| 38 | +import sklearn |
| 39 | +from sklearn.preprocessing import OneHotEncoder |
| 40 | +
|
| 41 | +sklearn.set_config(transform_output="pandas") |
| 42 | +
|
| 43 | +df = pd.DataFrame({"Color":["red", "blue", "blue", "green", "yellow", "red", |
| 44 | + "blue", "red", "yellow", "red"], |
| 45 | + "Target": [0, 1, 1, 0, 0, 0, 0, 1, 0, 0]}) |
| 46 | +
|
| 47 | +ohe = OneHotEncoder(sparse_output=True) #default: sparse_output=True |
| 48 | +
|
| 49 | +ohe.fit(df[["Color"]]) |
| 50 | +color_encoded = ohe.set_output(transform="default").transform(df[["Color"]]) |
| 51 | +
|
| 52 | +#df[ohe.get_feature_names_out()] = color_encoded |
| 53 | +
|
| 54 | +print("len(df['Color'].value_counts()): ", len(df["Color"].value_counts())) |
| 55 | +print("color_encoded.shape): ", color_encoded.shape) |
| 56 | +#print("color_encoded.toarray().shape): ", color_encoded.toarray().shape) |
| 57 | +print(type(color_encoded)) |
| 58 | +#print(len(df[ohe.get_feature_names_out()]) == len(color_encoded)) |
| 59 | +print(len(ohe.get_feature_names_out()) == color_encoded.shape[1])""" |
| 60 | + |
| 61 | + |
| 62 | +"""# HalvingRandomSearchCV test and doc |
| 63 | +from sklearn.model_selection.tests.test_successive_halving import |
| 64 | +test_halving_random_search_cv_results |
| 65 | +test_halving_random_search_cv_results()""" |
| 66 | + |
| 67 | +"""from sklearn.model_selection.tests.test_search import |
| 68 | +test_random_search_cv_results, test_random_search_cv_results |
| 69 | +test_random_search_cv_results() |
| 70 | +test_grid_search_cv_results()""" |
| 71 | + |
| 72 | + |
| 73 | +"""# DOC Pipeline, final estimator |
| 74 | +from sklearn.pipeline import Pipeline |
| 75 | +from sklearn.datasets import make_gaussian_quantiles |
| 76 | +from sklearn.preprocessing import OneHotEncoder, StandardScaler |
| 77 | +from sklearn.linear_model import LogisticRegression |
| 78 | +
|
| 79 | +X = [[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1]] |
| 80 | +y_class = ["foo", "foo", "foo", 1, 1, 1] |
| 81 | +
|
| 82 | +pipeline = Pipeline([('scaler', StandardScaler()), |
| 83 | +('encoder', OneHotEncoder(sparse_output=False)), |
| 84 | +('classifier', LogisticRegression())]) |
| 85 | +#pipeline = Pipeline([('scaler', StandardScaler()), |
| 86 | +# ('encoder', OneHotEncoder(sparse_output=False)),]) |
| 87 | +
|
| 88 | +X_transformed = pipeline.fit(X, y_class) |
| 89 | +print(X_transformed) |
| 90 | +""" |
| 91 | + |
| 92 | +"""from sklearn.pipeline import make_pipeline |
| 93 | +import numpy as np |
| 94 | +
|
| 95 | +class DoubleIt: |
| 96 | + def transform(self, X, y=None): |
| 97 | + return 2*X |
| 98 | +
|
| 99 | +X = np.array([[1, 2, 3], [4, 5, 6]]) |
| 100 | +p = make_pipeline(DoubleIt(), DoubleIt()) |
| 101 | +
|
| 102 | +print(p.transform(X))""" |
| 103 | + |
| 104 | + |
| 105 | +"""from sklearn.pipeline import Pipeline |
| 106 | +from sklearn.linear_model import LinearRegression |
| 107 | +from sklearn.preprocessing import OrdinalEncoder |
| 108 | +
|
| 109 | +import numpy as np |
| 110 | +
|
| 111 | +class DoubleIt: |
| 112 | +
|
| 113 | + def transform(self, X, y=None): |
| 114 | + return 2*X |
| 115 | +
|
| 116 | +X = np.array([[1, 2, 3], [4, 5, 6]]) |
| 117 | +p = Pipeline([ |
| 118 | + ('double1', DoubleIt()), |
| 119 | + ('double2', OrdinalEncoder()), |
| 120 | + ('linreg', LinearRegression()) |
| 121 | + ]) |
| 122 | +
|
| 123 | +print(p.fit(X)) |
| 124 | +""" |
0 commit comments