-
-
Notifications
You must be signed in to change notification settings - Fork 26.2k
Closed
Description
See below:
from sklearn.datasets import make_classification
from sklearn.grid_search import GridSearchCV
from sklearn.tree import DecisionTreeClassifier
from sklearn.cross_validation import KFold
X, y = make_classification(random_state=0)
est_parameters = {
"max_depth": range(2, 7)}
def best_est(X, y):
cv = KFold(y.shape[0], n_folds=2, random_state=0)
# default tree uses gini and best split
est = DecisionTreeClassifier()
grid_search = GridSearchCV(est, est_parameters, cv=cv)
grid_search.fit(X, y)
return grid_search.best_score_, grid_search.best_params_
for i in xrange(10):
print(best_est(X, y))
# why is the best tree different each time
# unless I set the random_state in DecisionTreeClassifier() ?
output:
(0.71999999999999997, {'max_depth': 3})
(0.68000000000000005, {'max_depth': 3})
(0.73999999999999999, {'max_depth': 6})
(0.70999999999999996, {'max_depth': 3})
(0.70999999999999996, {'max_depth': 3})
(0.70999999999999996, {'max_depth': 3})
(0.72999999999999998, {'max_depth': 3})
(0.69999999999999996, {'max_depth': 6})
(0.69999999999999996, {'max_depth': 3})
(0.70999999999999996, {'max_depth': 5})
ping @glouppe
Metadata
Metadata
Assignees
Labels
No labels