-
-
Notifications
You must be signed in to change notification settings - Fork 26.2k
Open
Description
Let's run BaggingClassifier on a base estimator that accepts sample_weight.
import numpy as np
from sklearn.pipeline import Pipeline
from sklearn.svm import SVC
from sklearn.ensemble import BaggingClassifier
#------------------------------------------------------------------------------
def main():
#1) prepare data
np.random.seed(0)
feat=np.random.normal(size=(1000,2))
lbl=np.random.choice(a=2,p=[.5,.5],size=1000,replace=True)
wghts=np.ones(shape=(1000,))
#2) fit using standard SVC
bg0=BaggingClassifier(base_estimator=SVC(probability=True,random_state=0),n_estimators=100,max_samples=1.,n_jobs=-1,random_state=0)
bg0=bg0.fit(feat,lbl)
prob0=bg0.predict_proba(feat)
#6.1) fit using standard pipe
#3) fit using a Pipeline that contains only a SVC
bg1=BaggingClassifier(base_estimator=Pipeline([('clf',SVC(probability=True,random_state=0))]),n_estimators=100,max_samples=1.,n_jobs=-1,random_state=0)
bg1=bg1.fit(feat,lbl)
prob1=bg1.predict_proba(feat)
print 'match?',np.allclose(prob0,prob1)
#------------------------------------------------------------------------------
if __name__=='__main__':main()
prob0 and prob1 are very different, even though both BaggingClassifier are intrinsically the same. The reason is, the SVC accepts sample_weight, and the Pipeline does not accept sample_weight.
Finally, this breaks for no good reason.
bg0=bg0.fit(feat,lbl,wghts) # works!
bg1=bg1.fit(feat,lbl,wghts) # breaks...
ValueError: The base estimator doesn't support sample weight
numpy version 1.11.3
sklearn version 0.18.1