Skip to content

Commit 9b4f24a

Browse files
committed
Fixed #233 - Add 'Use PyArrow' checkbox
- Add 'Use PyArrow' checkbox - When checked 'Use PyArrow', change the generated code that used pyarrow. - Add parquet to file type
1 parent 7f10ea7 commit 9b4f24a

File tree

2 files changed

+255
-16
lines changed

2 files changed

+255
-16
lines changed

visualpython/data/m_library/pandasLibrary.js

Lines changed: 183 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6825,6 +6825,189 @@ define([
68256825
},
68266826
]
68276827
},
6828+
// ***
6829+
"pd_toParquet": {
6830+
"name": "To Parquet",
6831+
"library": "pandas",
6832+
"description": "DataFrame/Series to Parquet file",
6833+
"code": "${i0}.to_parquet(${path}${etc})",
6834+
"options": [
6835+
{
6836+
"name": "i0",
6837+
"label": "DataFrame",
6838+
"required": true,
6839+
"component": [
6840+
"data_select"
6841+
],
6842+
"var_type": [
6843+
"DataFrame",
6844+
"Series"
6845+
]
6846+
},
6847+
{
6848+
"name": "path",
6849+
"label": "File path/variable",
6850+
"required": true,
6851+
"type": "text"
6852+
}
6853+
]
6854+
},
6855+
"pd_readParquet": {
6856+
"name": "Read Parquet",
6857+
"library": "pandas",
6858+
"description": "Parquet to pandas object",
6859+
"code": "${o0} = pd.read_parquet(${i0}${etc})",
6860+
"options": [
6861+
{
6862+
"name": "i0",
6863+
"label": "File path/object",
6864+
"required": true,
6865+
"type": "text",
6866+
"component": [
6867+
"file"
6868+
]
6869+
},
6870+
{
6871+
"name": "o0",
6872+
"label": "Allocate to",
6873+
"output": true,
6874+
"component": [
6875+
"input"
6876+
],
6877+
"value": "vp_df"
6878+
},
6879+
]
6880+
},
6881+
"pa_readCsv": {
6882+
"name": "Read Csv as pyarrow",
6883+
"library": "pyarrow",
6884+
"description": "Csv to pandas object",
6885+
"code": "${o0} = pa.csv.read_csv(${i0}${etc}).to_pandas()",
6886+
"options": [
6887+
{
6888+
"name": "i0",
6889+
"label": "File path/object",
6890+
"required": true,
6891+
"type": "text",
6892+
"component": [
6893+
"file"
6894+
]
6895+
},
6896+
{
6897+
"name": "o0",
6898+
"label": "Allocate to",
6899+
"output": true,
6900+
"component": [
6901+
"input"
6902+
],
6903+
"value": "vp_df"
6904+
}
6905+
]
6906+
},
6907+
"pa_toCsv": {
6908+
"name": "To Csv as pyarrow",
6909+
"library": "pyarrow",
6910+
"description": "DataFrame/Series to csv file",
6911+
"code": "pa.csv.write_csv(${i0}, ${path})",
6912+
"options": [
6913+
{
6914+
"name": "i0",
6915+
"label": "DataFrame",
6916+
"required": true,
6917+
"component": [
6918+
"data_select"
6919+
],
6920+
"var_type": [
6921+
"DataFrame",
6922+
"Series"
6923+
]
6924+
},
6925+
{
6926+
"name": "path",
6927+
"label": "File path/variable",
6928+
"required": true,
6929+
"type": "text"
6930+
}
6931+
]
6932+
},
6933+
"pa_readJson": {
6934+
"name": "Read Json as pyarrow",
6935+
"library": "pyarrow",
6936+
"description": "Json to pyarrow object",
6937+
"code": "${o0} = pa.json.read_json(${i0}${etc}).to_pandas()",
6938+
"options": [
6939+
{
6940+
"name": "i0",
6941+
"label": "File path/object",
6942+
"required": true,
6943+
"type": "text",
6944+
"component": [
6945+
"file"
6946+
]
6947+
},
6948+
{
6949+
"name": "o0",
6950+
"label": "Allocate to",
6951+
"output": true,
6952+
"component": [
6953+
"input"
6954+
],
6955+
"value": "vp_df"
6956+
}
6957+
]
6958+
},
6959+
"pa_readParquet": {
6960+
"name": "Read Parquet as pyarrow",
6961+
"library": "pyarrow",
6962+
"description": "Parquet to pandas object",
6963+
"code": "${o0} = pa.parquet.read_table(${i0}${etc}).to_pandas()",
6964+
"options": [
6965+
{
6966+
"name": "i0",
6967+
"label": "File path/object",
6968+
"required": true,
6969+
"type": "text",
6970+
"component": [
6971+
"file"
6972+
]
6973+
},
6974+
{
6975+
"name": "o0",
6976+
"label": "Allocate to",
6977+
"output": true,
6978+
"component": [
6979+
"input"
6980+
],
6981+
"value": "vp_df"
6982+
}
6983+
]
6984+
},
6985+
"pa_toParquet": {
6986+
"name": "To Parquet as pyarrow",
6987+
"library": "pyarrow",
6988+
"description": "DataFrame/Series to Parquet file",
6989+
"code": "pa.parquet.write_table(${i0}, ${path})",
6990+
"options": [
6991+
{
6992+
"name": "i0",
6993+
"label": "DataFrame",
6994+
"required": true,
6995+
"component": [
6996+
"data_select"
6997+
],
6998+
"var_type": [
6999+
"DataFrame",
7000+
"Series"
7001+
]
7002+
},
7003+
{
7004+
"name": "path",
7005+
"label": "File path/variable",
7006+
"required": true,
7007+
"type": "text"
7008+
}
7009+
]
7010+
},
68287011
}
68297012

68307013
return {

visualpython/js/m_apps/File.js

Lines changed: 72 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,8 @@ define([
4444
'json': 'json',
4545
'pickle': '',
4646
'sas': '', // xport or sas7bdat
47-
'spss': ''
47+
'spss': '',
48+
'parquet':'parquet'
4849
}
4950

5051
this.package = {
@@ -90,7 +91,8 @@ define([
9091
'json': 'pd_readJson',
9192
'pickle': 'pd_readPickle',
9293
'sas': 'pd_readSas',
93-
'spss': 'pd_readSpss'
94+
'spss': 'pd_readSpss',
95+
'parquet':'pd_readParquet'
9496
},
9597
selectedType: 'csv',
9698
package: null,
@@ -104,7 +106,8 @@ define([
104106
'csv': 'pd_toCsv',
105107
'excel': 'pd_toExcel',
106108
'json': 'pd_toJson',
107-
'pickle': 'pd_toPickle'
109+
'pickle': 'pd_toPickle',
110+
'parquet':'pd_toParquet'
108111
},
109112
selectedType: 'csv',
110113
package: null,
@@ -194,22 +197,64 @@ define([
194197
that.state['vp_fileioType'] = pageType;
195198
$(that.wrapSelector('.vp-fileio-box')).hide();
196199
$(that.wrapSelector('#vp_file' + pageType)).show();
197-
200+
201+
198202
//set fileExtensions
199203
that.fileResultState = {
200204
...that.fileState[pageType].fileResultState
201205
};
202206
});
207+
208+
// fileReadAs change Event, Use PyArrow
209+
$(document).on('change', this.wrapSelector('#fileReadAs'), function() {
210+
let isChecked = $(this).prop('checked');
211+
var fileioType = that.state.vp_fileioType;
212+
var prefix = '#vp_file' + fileioType + ' ';
213+
var selectedFileFormat = that.fileState[fileioType].selectedType;
214+
var fileioTypePrefix = fileioType.toLowerCase();
215+
if(fileioTypePrefix == 'write'){
216+
fileioTypePrefix = "to";
217+
}
218+
219+
if(isChecked){ // pyArrow
220+
that.fileState[fileioType].fileTypeId[that.state.fileExtension] = "pa_" + fileioTypePrefix + selectedFileFormat[0].toUpperCase() + selectedFileFormat.slice(1);
221+
$(that.wrapSelector(prefix + '#vp_optionBox')).closest('.vp-accordian-container').hide();
222+
}
223+
else{ // pandas
224+
that.fileState[fileioType].fileTypeId[that.state.fileExtension] = "pd_" + fileioTypePrefix + selectedFileFormat[0].toUpperCase() + selectedFileFormat.slice(1);
225+
if (that.state.fileExtension != 'parquet'){ // parquet has no options area
226+
$(that.wrapSelector(prefix + '#vp_optionBox')).closest('.vp-accordian-container').show();
227+
}
228+
}
229+
230+
var fileTypeObj = that.fileState[fileioType]['fileTypeId'];
231+
var selectedType = that.fileState[fileioType]['selectedType'];
232+
let fileId = fileTypeObj[selectedType];
233+
let pdLib = pandasLibrary.PANDAS_FUNCTION;
234+
let thisPkg = JSON.parse(JSON.stringify(pdLib[fileId]));
235+
236+
that.fileState[fileioType].package = thisPkg;
237+
});
238+
203239
}
204240

205241
_bindEventByType(pageType) {
206242
var that = this;
207243
var prefix = '#vp_file' + pageType + ' ';
208-
244+
245+
var fileioTypePrefix = pageType.toLowerCase();
246+
if(fileioTypePrefix == 'write'){
247+
fileioTypePrefix = "to";
248+
}
249+
var selectedFileFormat = that.fileState[pageType].selectedType;
209250
// select file type
210251
$(this.wrapSelector(prefix + '#fileType')).change(function() {
211252
var value = $(this).val();
212253
that.fileState[pageType].selectedType = value;
254+
255+
// Whenever change the file type, change to default pandas
256+
that.fileState[pageType].fileTypeId[that.state.fileExtension] = "pd_" + fileioTypePrefix + selectedFileFormat[0].toUpperCase() + selectedFileFormat.slice(1);
257+
213258

214259
// reload
215260
that.renderPage(pageType);
@@ -327,7 +372,7 @@ define([
327372
renderPage(pageType) {
328373
var that = this;
329374
var prefix = '#vp_file' + pageType + ' ';
330-
375+
331376
// clear
332377
$(this.wrapSelector(prefix + '#vp_inputOutputBox table tbody')).html('');
333378
$(this.wrapSelector(prefix + '#vp_optionBox table tbody')).html('');
@@ -344,7 +389,7 @@ define([
344389
...this.fileState[pageType].fileResultState
345390
};
346391

347-
if (selectedType == 'pickle') {
392+
if (selectedType == 'pickle' || selectedType == 'parquet') {
348393
// hide additional option box
349394
$(this.wrapSelector(prefix + '#vp_optionBox')).closest('.vp-accordian-container').hide();
350395
} else {
@@ -355,7 +400,7 @@ define([
355400
if (selectedType == 'json') {
356401
this.fileResultState.pathInputId = this.wrapSelector(prefix + '#path_or_buf');
357402
}
358-
if (selectedType == 'pickle') {
403+
if (selectedType == 'pickle' || selectedType == 'parquet') {
359404
this.fileResultState.pathInputId = this.wrapSelector(prefix + '#path');
360405
}
361406
}
@@ -365,18 +410,30 @@ define([
365410
// pdGen.vp_showInterfaceOnPage(this.wrapSelector('#vp_file' + pageType), thisPkg);
366411
pdGen.vp_showInterfaceOnPage(this, thisPkg, this.state, parent=('#vp_file' + pageType));
367412

413+
// pyarrow can r/w parquet, csv and only read json.
414+
if ((pageType == 'Read' && selectedType == 'json') || selectedType == 'parquet'|| selectedType == 'csv') {
415+
// add checkbox 'Use PyArrow', next to File Type
416+
$(this.wrapSelector(prefix + '#vp_inputOutputBox table tbody')).prepend(
417+
$('<tr>').append($(`<td><label for="fileType" class="vp-bold vp-orange-text">File Type</label></td>`))
418+
.append($('<td><select id="fileType" class="vp-select"></select> <label><input id="fileReadAs" type="checkbox"/><span>Use PyArrow</span></label></td>'))
419+
);
420+
}
421+
else{
422+
$(this.wrapSelector(prefix + '#vp_inputOutputBox table tbody')).prepend(
423+
$('<tr>').append($(`<td><label for="fileType" class="vp-bold vp-orange-text">File Type</label></td>`))
424+
.append($('<td><select id="fileType" class="vp-select"></select> </td>'))
425+
);
426+
}
427+
368428
// prepend file type selector
369-
$(this.wrapSelector(prefix + '#vp_inputOutputBox table tbody')).prepend(
370-
$('<tr>').append($(`<td><label for="fileType" class="vp-bold vp-orange-text">File Type</label></td>`))
371-
.append($('<td><select id="fileType" class="vp-select"></select></td>'))
372-
);
373429
var fileTypeList = Object.keys(fileTypeObj);
374430
fileTypeList.forEach(type => {
375431
$(this.wrapSelector(prefix + '#fileType')).append(
376432
$(`<option value="${type}">${type}</option>`)
377433
);
378434
});
379435

436+
380437
// prepend user option
381438
let hasAllocateTo = $(this.wrapSelector(prefix + '#o0')).length > 0;
382439
if (hasAllocateTo) {
@@ -390,17 +447,17 @@ define([
390447
.append($('<td><input id="userOption" type="text" class="vp-input vp-state" placeholder="key=value, ..."/></td>'))
391448
)
392449
}
393-
450+
394451
$(this.wrapSelector(prefix + '#fileType')).val(selectedType);
395-
452+
396453
// add file navigation button
397454
if (pageType == 'Write') {
398455
if (selectedType == 'json') {
399456
$(prefix + '#path_or_buf').parent().html(
400457
com_util.formatString('<input type="text" class="vp-input vp-state" id="path_or_buf" index="0" placeholder="" value="" title=""><div id="vp_openFileNavigationBtn" class="{0}"></div>'
401458
, 'vp-file-browser-button')
402459
);
403-
} else if (selectedType == 'pickle') {
460+
} else if (selectedType == 'pickle' || selectedType == 'parquet') {
404461
$(prefix + '#path').parent().html(
405462
com_util.formatString('<input type="text" class="vp-input vp-state" id="path" index="0" placeholder="" value="" title="" required="true"><div id="vp_openFileNavigationBtn" class="{0}"></div>'
406463
, 'vp-file-browser-button')
@@ -493,7 +550,6 @@ define([
493550
var result = pdGen.vp_codeGenerator(this, thisPkg, this.state, userOption.toString(), parent='#vp_fileWrite');
494551
sbCode.append(result);
495552
}
496-
497553
return sbCode.toString();
498554
}
499555

0 commit comments

Comments
 (0)