10
10
import fitz
11
11
import nltk
12
12
nltk .download ('punkt' )
13
+
13
14
######
14
15
# Visual Python: Data Analysis > PDF
15
16
######
@@ -43,6 +44,7 @@ def vp_pdf_get_sentence(fname_lst):
43
44
df = _vp_pd .concat ([df ,df_doc ])
44
45
45
46
return df .reset_index ().drop ('index' , axis = 1 )
47
+
46
48
######
47
49
# Visual Python: Data Analysis > Frame
48
50
######
@@ -63,6 +65,7 @@ def vp_drop_outlier(df, col, weight=1.5):
63
65
df_res = df .drop (outlier_index ).copy ()
64
66
65
67
return df_res
68
+
66
69
######
67
70
# Visual Python: Machine Learning > Model Info
68
71
######
@@ -74,10 +77,12 @@ def vp_create_feature_importances(model, X_train=None, sort=False):
74
77
75
78
df_i = _vp_pd .DataFrame (model .feature_importances_ , index = feature_names , columns = ['Feature_importance' ])
76
79
df_i ['Percentage' ] = 100 * (df_i ['Feature_importance' ] / df_i ['Feature_importance' ].max ())
77
- if sort : df_i .sort_values (by = 'Feature_importance' , ascending = False , inplace = True )
80
+ if sort :
81
+ df_i .sort_values (by = 'Feature_importance' , ascending = False , inplace = True )
78
82
df_i = df_i .round (2 )
79
83
80
84
return df_i
85
+
81
86
######
82
87
# Visual Python: Machine Learning > Model Info
83
88
######
@@ -91,10 +96,13 @@ def vp_plot_feature_importances(model, X_train=None, sort=False, top_count=0):
91
96
df_i ['Percentage' ].sort_values ().plot (kind = 'barh' )
92
97
else :
93
98
df_i ['Percentage' ].plot (kind = 'barh' )
99
+
94
100
_vp_plt .xlabel ('Feature importance Percentage' )
95
101
_vp_plt .ylabel ('Features' )
96
-
97
102
_vp_plt .show ()
103
+
104
+ return
105
+
98
106
######
99
107
# Visual Python: Visualization > Seaborn
100
108
######
@@ -134,4 +142,6 @@ def _single(ax):
134
142
for idx , ax in _vp_np .ndenumerate (axs ):
135
143
_single (ax )
136
144
else :
137
- _single (axs )
145
+ _single (axs )
146
+
147
+ return
0 commit comments