@@ -313,9 +313,17 @@ define([
313
313
314
314
generateCode ( ) {
315
315
let { model } = this . state ;
316
+ let codeList = [ ] ;
316
317
let code = new com_String ( ) ;
317
318
let replaceDict = { '${model}' : model } ;
318
319
320
+ // If functions are available
321
+ if ( this . state . optionConfig . functions != undefined ) {
322
+ this . state . optionConfig . functions . forEach ( func => {
323
+ codeList . push ( func ) ;
324
+ } ) ;
325
+ }
326
+
319
327
// If import code is available, generate its code in front of code
320
328
if ( this . state . optionConfig . import != undefined ) {
321
329
code . appendLine ( this . state . optionConfig . import ) ;
@@ -342,8 +350,9 @@ define([
342
350
}
343
351
}
344
352
}
353
+ codeList . push ( code . toString ( ) ) ;
345
354
346
- return code . toString ( ) ;
355
+ return codeList ;
347
356
}
348
357
349
358
getModelCategory ( modelType ) {
@@ -399,22 +408,61 @@ define([
399
408
{ name : 'importance_allocate' , label : 'Allocate to' , component : [ 'input' ] , placeholder : 'New variable' , value : 'importances' }
400
409
]
401
410
} ,
411
+ 'feature_importances' : {
412
+ name : 'feature_importances' ,
413
+ label : 'Feature importances' ,
414
+ functions : [
415
+ "def create_feature_importances(model, X_train=None, sort=False):\
416
+ \n if isinstance(X_train, pd.core.frame.DataFrame):\
417
+ \n feature_names = X_train.columns\
418
+ \n else:\n\
419
+ \n feature_names = [ 'X{}'.format(i) for i in range(len(model.feature_importances_)) ]\
420
+ \n\
421
+ \n df_i = pd.DataFrame(model.feature_importances_, index=feature_names, columns=['Feature_importance'])\
422
+ \n df_i['Percentage'] = 100 * (df_i['Feature_importance'] / df_i['Feature_importance'].max())\
423
+ \n if sort: df_i.sort_values(by='Feature_importance', ascending=False, inplace=True)\
424
+ \n df_i = df_i.round(2)\
425
+ \n\
426
+ \n return df_i"
427
+ ] ,
428
+ code : "${fi_allocate} = create_feature_importances(${model}, ${fi_featureData}${sort})" ,
429
+ description : 'Allocate feature_importances_' ,
430
+ options : [
431
+ { name : 'fi_featureData' , label : 'Feature Data' , component : [ 'var_select' ] , var_type : [ 'DataFrame' , 'Series' , 'ndarray' , 'list' , 'dict' ] , value : 'X_train' } ,
432
+ { name : 'fi_allocate' , label : 'Allocate to' , component : [ 'input' ] , placeholder : 'New variable' , value : 'df_i' } ,
433
+ { name : 'sort' , label : 'Sort data' , component : [ 'bool_checkbox' ] , value : true , usePair : true }
434
+ ]
435
+ } ,
402
436
'plot_feature_importances' : {
403
437
name : 'plot_feature_importances' ,
404
438
label : 'Plot feature importances' ,
405
- code : "def plot_feature_importances(model):\n\
406
- n_features = len(model.feature_importances_)\n\
407
- feature_names = [ 'X{}'.format(i) for i in range(n_features) ]\n\
408
- plt.barh(np.arange(n_features), model.feature_importances_, align='center')\n\
409
- plt.yticks(np.arange(n_features), feature_names)\n\
410
- plt.xlabel('Feature importance')\n\
411
- plt.ylabel('Features')\n\
412
- plt.ylim(-1, n_features)\n\
413
- plt.show()\n\n\
414
- plot_feature_importances(${model})" ,
415
- description : '' ,
439
+ functions : [
440
+ "def create_feature_importances(model, X_train=None, sort=False):\
441
+ \n if isinstance(X_train, pd.core.frame.DataFrame):\
442
+ \n feature_names = X_train.columns\
443
+ \n else:\n\
444
+ \n feature_names = [ 'X{}'.format(i) for i in range(len(model.feature_importances_)) ]\
445
+ \n\
446
+ \n df_i = pd.DataFrame(model.feature_importances_, index=feature_names, columns=['Feature_importance'])\
447
+ \n df_i['Percentage'] = 100 * (df_i['Feature_importance'] / df_i['Feature_importance'].max())\
448
+ \n if sort: df_i.sort_values(by='Feature_importance', ascending=False, inplace=True)\
449
+ \n df_i = df_i.round(2)\
450
+ \n\
451
+ \n return df_i" ,
452
+ "def plot_feature_importances(model, X_train=None, sort=False):\
453
+ \n df_i = create_feature_importances(model, X_train, sort)\
454
+ \n\
455
+ \n df_i['Percentage'].sort_values().plot(kind='barh')\
456
+ \n plt.xlabel('Feature importance Percentage')\
457
+ \n plt.ylabel('Features')\
458
+ \n\
459
+ \n plt.show()"
460
+ ] ,
461
+ code : "plot_feature_importances(${model}, ${fi_featureData}${sort})" ,
462
+ description : 'Draw feature_importances_' ,
416
463
options : [
417
-
464
+ { name : 'fi_featureData' , label : 'Feature Data' , component : [ 'var_select' ] , var_type : [ 'DataFrame' , 'Series' , 'ndarray' , 'list' , 'dict' ] , value : 'X_train' } ,
465
+ { name : 'sort' , label : 'Sort data' , component : [ 'bool_checkbox' ] , value : true , usePair : true }
418
466
]
419
467
}
420
468
}
@@ -522,6 +570,7 @@ plot_feature_importances(${model})",
522
570
]
523
571
} ,
524
572
'permutation_importance' : defaultInfos [ 'permutation_importance' ] ,
573
+ 'feature_importances' : defaultInfos [ 'feature_importances' ] ,
525
574
'plot_feature_importances' : defaultInfos [ 'plot_feature_importances' ] ,
526
575
'Coefficient' : {
527
576
name : 'coef_' ,
@@ -573,11 +622,11 @@ plot_feature_importances(${model})",
573
622
name : 'roc_curve' ,
574
623
label : 'ROC Curve' ,
575
624
import : 'from sklearn import metrics' ,
576
- code : "fpr, tpr, thresholds = metrics.roc_curve(${roc_targetData}, ${model}.predict_proba(${roc_featureData})[:, 1])\n\
577
- plt .plot(fpr, tpr, label='ROC Curve')\n \
578
- plt .xlabel('Sensitivity')\n \
579
- plt .ylabel('Specificity')\n \
580
- plt .show()",
625
+ code : "fpr, tpr, thresholds = metrics.roc_curve(${roc_targetData}, ${model}.predict_proba(${roc_featureData})[:, 1])\
626
+ \nplt .plot(fpr, tpr, label='ROC Curve')\
627
+ \nplt .xlabel('Sensitivity')\
628
+ \nplt .ylabel('Specificity')\
629
+ \nplt .show()",
581
630
description : '' ,
582
631
options : [
583
632
{ name : 'roc_targetData' , label : 'Target Data' , component : [ 'var_select' ] , var_type : [ 'DataFrame' , 'Series' , 'ndarray' , 'list' , 'dict' ] , value : 'y_test' } ,
@@ -595,8 +644,16 @@ plt.show()",
595
644
{ name : 'auc_featureData' , label : 'Feature Data' , component : [ 'var_select' ] , var_type : [ 'DataFrame' , 'Series' , 'ndarray' , 'list' , 'dict' ] , value : 'X_test' }
596
645
]
597
646
} ,
598
- 'permutation_importance' : defaultInfos [ 'permutation_importance' ] ,
599
- 'plot_feature_importances' : defaultInfos [ 'plot_feature_importances' ]
647
+ 'permutation_importance' : defaultInfos [ 'permutation_importance' ]
648
+ }
649
+
650
+ // feature importances
651
+ if ( modelType != 'LogisticRegression' && modelType != 'SVC' ) {
652
+ infos = {
653
+ ...infos ,
654
+ 'feature_importances' : defaultInfos [ 'feature_importances' ] ,
655
+ 'plot_feature_importances' : defaultInfos [ 'plot_feature_importances' ]
656
+ }
600
657
}
601
658
602
659
// use decision_function on ROC, AUC
@@ -608,11 +665,11 @@ plt.show()",
608
665
...infos ,
609
666
'roc_curve' : {
610
667
...infos [ 'roc_curve' ] ,
611
- code : "fpr, tpr, thresholds = metrics.roc_curve(${roc_targetData}, ${model}.decision_function(${roc_featureData}))\n\
612
- plt .plot(fpr, tpr, label='ROC Curve')\n \
613
- plt .xlabel('Sensitivity')\n \
614
- plt .ylabel('Specificity')\n \
615
- plt .show()"
668
+ code : "fpr, tpr, thresholds = metrics.roc_curve(${roc_targetData}, ${model}.decision_function(${roc_featureData}))\
669
+ \nplt .plot(fpr, tpr, label='ROC Curve')\
670
+ \nplt .xlabel('Sensitivity')\
671
+ \nplt .ylabel('Specificity')\
672
+ \nplt .show()"
616
673
} ,
617
674
'auc' : {
618
675
...infos [ 'auc' ] ,
0 commit comments