File tree 7 files changed +53
-15
lines changed
7 files changed +53
-15
lines changed Original file line number Diff line number Diff line change @@ -302,14 +302,16 @@ define([
302
302
// Click import library
303
303
$ ( this . wrapSelector ( '#popupImport' ) ) . on ( 'click' , function ( ) {
304
304
// add import codes
305
- var code = that . generateImportCode ( ) ;
306
- // create block and run it
307
- $ ( '#vp_wrapper' ) . trigger ( {
308
- type : 'create_option_page' ,
309
- blockType : 'block' ,
310
- menuId : 'lgExe_code' ,
311
- menuState : { taskState : { code : code } } ,
312
- afterAction : 'run'
305
+ var codes = that . generateImportCode ( ) ;
306
+ codes && codes . forEach ( code => {
307
+ // create block and run it
308
+ $ ( '#vp_wrapper' ) . trigger ( {
309
+ type : 'create_option_page' ,
310
+ blockType : 'block' ,
311
+ menuId : 'lgExe_code' ,
312
+ menuState : { taskState : { code : code } } ,
313
+ afterAction : 'run'
314
+ } ) ;
313
315
} ) ;
314
316
} ) ;
315
317
@@ -598,7 +600,7 @@ define([
598
600
599
601
generateImportCode ( ) {
600
602
/** Implementation needed - Generated on clicking Import Library button */
601
- return '' ;
603
+ return [ ] ;
602
604
}
603
605
604
606
generateCode ( ) {
Original file line number Diff line number Diff line change @@ -30,6 +30,37 @@ import fitz
30
30
import nltk
31
31
nltk.download('punkt')` ;
32
32
33
+ const PDF_FUNC = `def vp_pdf_get_sentence(fname_lst):
34
+ '''
35
+ Get sentence from pdf file by PyMuPDF
36
+ '''
37
+ df = pd.DataFrame()
38
+ for fname in fname_lst:
39
+ if fname.split('.')[-1] != 'pdf': continue
40
+ try:
41
+ doc = fitz.open(fname)
42
+ sentence_lst = []
43
+ for page in doc:
44
+ block_lst = page.get_text('blocks')
45
+
46
+ text_lst = [block[4] for block in block_lst if block[6] == 0]
47
+ text = '\\n'.join(text_lst)
48
+
49
+ sentence_lst.extend([sentence for sentence in nltk.sent_tokenize(text)])
50
+
51
+ doc.close()
52
+ except Exception as e:
53
+ print(e)
54
+ continue
55
+
56
+ df_doc = pd.DataFrame({
57
+ 'fname': fname.split('/')[-1],
58
+ 'sentence': sentence_lst
59
+ })
60
+ df = pd.concat([df,df_doc])
61
+
62
+ return df.reset_index().drop('index', axis=1)` ;
63
+
33
64
const PDF_CMD = 'df = vp_pdf_get_sentence(pdf_lst)\ndf'
34
65
/**
35
66
* PDF
@@ -93,7 +124,10 @@ nltk.download('punkt')`;
93
124
}
94
125
95
126
generateImportCode ( ) {
96
- return PDF_IMPORT ;
127
+ return [
128
+ PDF_IMPORT ,
129
+ PDF_FUNC
130
+ ] ;
97
131
}
98
132
99
133
generateCode ( ) {
Original file line number Diff line number Diff line change @@ -147,7 +147,9 @@ define([
147
147
}
148
148
149
149
generateImportCode ( ) {
150
- return 'from pandas_profiling import ProfileReport' ;
150
+ return [
151
+ 'from pandas_profiling import ProfileReport'
152
+ ] ;
151
153
}
152
154
153
155
generateCode ( ) {
Original file line number Diff line number Diff line change @@ -238,7 +238,7 @@ define([
238
238
}
239
239
240
240
generateImportCode ( ) {
241
- return 'from sklearn import metrics' ;
241
+ return [ 'from sklearn import metrics' ] ;
242
242
}
243
243
244
244
generateCode ( ) {
Original file line number Diff line number Diff line change @@ -775,7 +775,7 @@ define([
775
775
}
776
776
code . append ( "rcParams['axes.unicode_minus'] = False" ) ;
777
777
778
- return code . toString ( ) ;
778
+ return [ code . toString ( ) ] ;
779
779
}
780
780
781
781
Original file line number Diff line number Diff line change @@ -114,7 +114,7 @@ define([
114
114
var code = new com_String ( ) ;
115
115
code . appendLine ( 'import matplotlib.pyplot as plt' ) ;
116
116
code . append ( 'import seaborn as sns' ) ;
117
- return code . toString ( ) ;
117
+ return [ code . toString ( ) ] ;
118
118
}
119
119
120
120
generateCode ( ) {
Original file line number Diff line number Diff line change @@ -507,7 +507,7 @@ define([
507
507
code . append ( "rcParams['axes.unicode_minus'] = False" ) ;
508
508
}
509
509
510
- return code . toString ( ) ;
510
+ return [ code . toString ( ) ] ;
511
511
}
512
512
513
513
generateCode ( preview = false ) {
You can’t perform that action at this time.
0 commit comments