Skip to content

Commit d90a675

Browse files
xadupresdpython
andauthored
Updates tree decision criterion for scikit-learn 1.3 (#115)
* update for scikit-learn 1.2 * fix compilation issues, updates API * fix compilation issues * switch to development version of scikit-learn * remove normalize, update to the latest API * Update test_sklearn_stacking.py * Update test_sklearn_stacking.py * Update test_piecewise_classifier.py * update requirements * fix for scikit-learn==1.3.0 * fix issue * disable one warning * lint * setup * requirements.txt * requirements * remove appveyor --------- Co-authored-by: xavier dupré <xavier.dupre@gmail.com>
1 parent f2e217e commit d90a675

38 files changed

+540
-271
lines changed

.circleci/config.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ jobs:
4646
- run:
4747
name: Install standard libraries
4848
command: |
49-
python -m pip install scipy matplotlib numpy cython pandas
49+
python -m pip install scipy matplotlib numpy cython pandas pyquicksetup
5050
5151
- run:
5252
name: install dependencies

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,7 @@ build/
8080
*.pidb
8181
*.log
8282
*.scc
83+
*.so
8384
*.pyd
8485

8586
# Visual C++ cache files

.local.jenkins.lin.yml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@ install:
1111
- $PYINT -m pip install --upgrade pip
1212
- $PYINT -m pip install --upgrade --no-cache-dir --no-deps --index http://localhost:8067/simple/ jyquickhelper pyquickhelper cpyquickhelper pandas_streaming --extra-index-url=https://pypi.python.org/simple/
1313
- $PYINT -m pip install --upgrade --no-cache-dir --no-deps --index http://localhost:8067/simple/ scikit-learn>=0.22 --extra-index-url=https://pypi.python.org/simple/
14-
- $PYINT -m pip install -r requirements-win.txt
1514
- $PYINT -m pip install -r requirements.txt
1615
- $PYINT --version
1716
- $PYINT -m pip freeze

_unittests/ut_helpers/test_debug.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ def test_union_features_reg(self):
3838
self.assertNotIn(" object at 0x", text)
3939
self.assertIn(") -> (", text)
4040
else:
41-
raise Exception("should not be the case")
41+
raise AssertionError("should not be the case")
4242

4343
def test_union_features_cl(self):
4444
data = numpy.random.randn(4, 5)
@@ -60,7 +60,7 @@ def test_union_features_cl(self):
6060
self.assertNotIn(" object at 0x", text)
6161
self.assertIn(") -> (", text)
6262
else:
63-
raise Exception("should not be the case")
63+
raise AssertionError("should not be the case")
6464

6565

6666
if __name__ == "__main__":

_unittests/ut_mlmodel/test_anmf_predictor.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -45,8 +45,8 @@ def test_anmf_predictor_sparse(self):
4545
exp = mod.estimator_nmf_.inverse_transform(
4646
mod.estimator_nmf_.transform(mat))
4747
got = mod.predict(mat)
48-
sc1 = mean_squared_error(mat.todense(), exp)
49-
sc2 = mean_squared_error(mat.todense(), got)
48+
sc1 = mean_squared_error(numpy.asarray(mat.todense()), exp)
49+
sc2 = mean_squared_error(numpy.asarray(mat.todense()), got)
5050
self.assertGreater(sc1, sc2)
5151

5252
mat2 = numpy.array([[1, 1, 1, 1]], dtype=numpy.float64)
@@ -68,17 +68,17 @@ def test_anmf_predictor_sparse_sparse(self):
6868
exp = mod.estimator_nmf_.inverse_transform(
6969
mod.estimator_nmf_.transform(mat))
7070
got = mod.predict(mat)
71-
sc1 = mean_squared_error(mat.todense(), exp)
72-
sc2 = mean_squared_error(mat.todense(), got)
71+
sc1 = mean_squared_error(numpy.asarray(mat.todense()), exp)
72+
sc2 = mean_squared_error(numpy.asarray(mat.todense()), got)
7373
self.assertGreater(sc1, sc2)
7474

7575
mat2 = numpy.array([[1, 1, 1, 1]], dtype=numpy.float64)
7676
mat2 = csr_matrix(mat2)
7777
exp2 = mod.estimator_nmf_.inverse_transform(
7878
mod.estimator_nmf_.transform(mat2))
7979
got2 = mod.predict(mat2)
80-
sc1 = mean_squared_error(mat2.todense(), exp2)
81-
sc2 = mean_squared_error(mat2.todense(), got2)
80+
sc1 = mean_squared_error(numpy.asarray(mat2.todense()), exp2)
81+
sc2 = mean_squared_error(numpy.asarray(mat2.todense()), got2)
8282
self.assertGreater(sc1, sc2)
8383

8484
def test_anmf_predictor_positive(self):
@@ -118,8 +118,8 @@ def test_anmf_predictor_positive_sparse(self):
118118
exp = mod.estimator_nmf_.inverse_transform(
119119
mod.estimator_nmf_.transform(mat))
120120
got = mod.predict(mat)
121-
sc1 = mean_squared_error(mat.todense(), exp)
122-
sc2 = mean_squared_error(mat.todense(), got)
121+
sc1 = mean_squared_error(numpy.asarray(mat.todense()), exp)
122+
sc2 = mean_squared_error(numpy.asarray(mat.todense()), got)
123123
self.assertGreater(sc1, sc2)
124124
mx = numpy.min(got)
125125
self.assertGreater(mx, 0)

_unittests/ut_mlmodel/test_categories_to_integers.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,9 @@
1313
from pyquickhelper.texthelper import compare_module_version
1414
from mlinsights.mlmodel import CategoriesToIntegers
1515
from mlinsights.mlmodel import (
16-
test_sklearn_pickle, test_sklearn_clone, test_sklearn_grid_search_cv)
16+
run_test_sklearn_pickle,
17+
run_test_sklearn_clone,
18+
run_test_sklearn_grid_search_cv)
1719

1820
skipped_warnings = (ConvergenceWarning, UserWarning, FitFailedWarning)
1921

@@ -103,12 +105,12 @@ def test_categories_to_integers_pickle(self):
103105
data = os.path.join(os.path.abspath(
104106
os.path.dirname(__file__)), "data", "adult_set.txt")
105107
df = pandas.read_csv(data, sep="\t")
106-
test_sklearn_pickle(lambda: CategoriesToIntegers(skip_errors=True), df)
108+
run_test_sklearn_pickle(lambda: CategoriesToIntegers(skip_errors=True), df)
107109

108110
@ignore_warnings(skipped_warnings)
109111
def test_categories_to_integers_clone(self):
110112
self.maxDiff = None
111-
test_sklearn_clone(lambda: CategoriesToIntegers())
113+
run_test_sklearn_clone(lambda: CategoriesToIntegers())
112114

113115
@ignore_warnings(skipped_warnings)
114116
def test_categories_to_integers_grid_search(self):
@@ -119,19 +121,19 @@ def test_categories_to_integers_grid_search(self):
119121
y = df['income'] # pylint: disable=E1136
120122
pipe = make_pipeline(CategoriesToIntegers(),
121123
LogisticRegression())
122-
self.assertRaise(lambda: test_sklearn_grid_search_cv(
124+
self.assertRaise(lambda: run_test_sklearn_grid_search_cv(
123125
lambda: pipe, df), ValueError)
124126
if (compare_module_version(sklver, "0.24") >= 0 and # pylint: disable=R1716
125127
compare_module_version(pandas.__version__, "1.3") < 0):
126128
self.assertRaise(
127-
lambda: test_sklearn_grid_search_cv(
129+
lambda: run_test_sklearn_grid_search_cv(
128130
lambda: pipe, X, y, categoriestointegers__single=[True, False]),
129131
ValueError, "Unable to find category value")
130132
pipe = make_pipeline(CategoriesToIntegers(),
131133
Imputer(strategy='most_frequent'),
132134
LogisticRegression(n_jobs=1))
133135
try:
134-
res = test_sklearn_grid_search_cv(
136+
res = run_test_sklearn_grid_search_cv(
135137
lambda: pipe, X, y, categoriestointegers__single=[True, False],
136138
categoriestointegers__skip_errors=[True])
137139
except AttributeError as e:

_unittests/ut_mlmodel/test_classification_kmeans.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,8 @@
1515
from pyquickhelper.pycode import ExtTestCase
1616
from pyquickhelper.texthelper import compare_module_version
1717
from mlinsights.mlmodel import (
18-
ClassifierAfterKMeans, test_sklearn_pickle, test_sklearn_clone,
19-
test_sklearn_grid_search_cv)
18+
ClassifierAfterKMeans, run_test_sklearn_pickle,
19+
run_test_sklearn_clone, run_test_sklearn_grid_search_cv)
2020

2121

2222
class TestClassifierAfterKMeans(ExtTestCase):
@@ -58,24 +58,24 @@ def test_classification_kmeans_pickle(self):
5858
iris = datasets.load_iris()
5959
X, y = iris.data, iris.target
6060
try:
61-
test_sklearn_pickle(lambda: ClassifierAfterKMeans(), X, y)
61+
run_test_sklearn_pickle(lambda: ClassifierAfterKMeans(), X, y)
6262
except AttributeError as e:
6363
if compare_module_version(sklver, "0.24") < 0:
6464
return
6565
raise e
6666

6767
def test_classification_kmeans_clone(self):
6868
self.maxDiff = None
69-
test_sklearn_clone(lambda: ClassifierAfterKMeans())
69+
run_test_sklearn_clone(lambda: ClassifierAfterKMeans())
7070

7171
@ignore_warnings(category=ConvergenceWarning)
7272
def test_classification_kmeans_grid_search(self):
7373
iris = datasets.load_iris()
7474
X, y = iris.data, iris.target
75-
self.assertRaise(lambda: test_sklearn_grid_search_cv(
75+
self.assertRaise(lambda: run_test_sklearn_grid_search_cv(
7676
lambda: ClassifierAfterKMeans(), X, y), ValueError)
7777
try:
78-
res = test_sklearn_grid_search_cv(
78+
res = run_test_sklearn_grid_search_cv(
7979
lambda: ClassifierAfterKMeans(),
8080
X, y, c_n_clusters=[2, 3])
8181
except AttributeError as e:

_unittests/ut_mlmodel/test_decision_tree_logistic_regression.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,8 @@
1212
from sklearn.tree import DecisionTreeClassifier
1313
from pyquickhelper.pycode import ExtTestCase
1414
from mlinsights.mlmodel import (
15-
test_sklearn_pickle, test_sklearn_clone, test_sklearn_grid_search_cv,
16-
DecisionTreeLogisticRegression
17-
)
15+
run_test_sklearn_pickle, run_test_sklearn_clone,
16+
run_test_sklearn_grid_search_cv, DecisionTreeLogisticRegression)
1817
from mlinsights.mltree import predict_leaves
1918

2019

@@ -63,22 +62,23 @@ def test_classifier_pickle(self):
6362
X = random(100)
6463
Y = X > 0.5 # pylint: disable=W0143
6564
X = X.reshape((100, 1)) # pylint: disable=E1101
66-
test_sklearn_pickle(lambda: LogisticRegression(), X, Y)
67-
test_sklearn_pickle(lambda: DecisionTreeLogisticRegression(
65+
run_test_sklearn_pickle(lambda: LogisticRegression(), X, Y)
66+
run_test_sklearn_pickle(lambda: DecisionTreeLogisticRegression(
6867
fit_improve_algo=None), X, Y)
6968

7069
def test_classifier_clone(self):
71-
test_sklearn_clone(
70+
run_test_sklearn_clone(
7271
lambda: DecisionTreeLogisticRegression(fit_improve_algo=None))
7372

7473
def test_classifier_grid_search(self):
7574
X = random(100)
7675
Y = X > 0.5 # pylint: disable=W0143
7776
X = X.reshape((100, 1)) # pylint: disable=E1101
78-
self.assertRaise(lambda: test_sklearn_grid_search_cv(
77+
self.assertRaise(lambda: run_test_sklearn_grid_search_cv(
7978
lambda: DecisionTreeLogisticRegression(fit_improve_algo=None), X, Y), ValueError)
80-
res = test_sklearn_grid_search_cv(lambda: DecisionTreeLogisticRegression(fit_improve_algo=None),
81-
X, Y, max_depth=[2, 3])
79+
res = run_test_sklearn_grid_search_cv(
80+
lambda: DecisionTreeLogisticRegression(fit_improve_algo=None),
81+
X, Y, max_depth=[2, 3])
8282
self.assertIn('model', res)
8383
self.assertIn('score', res)
8484
self.assertGreater(res['score'], 0)

0 commit comments

Comments
 (0)