From 7f10ea778f794f84587f2baf5b7b7a506136aa8b Mon Sep 17 00:00:00 2001 From: Minku Koo Date: Sun, 27 Aug 2023 02:22:27 +0900 Subject: [PATCH 1/2] Fixed #233 - Update File, Import - Add pyarrow at import app - Add file format Parquet at file read and write --- visualpython/js/com/com_Config.js | 10 ++++++++-- visualpython/js/m_apps/Import.js | 3 ++- visualpython/js/m_library/m_pandas/readFile.js | 6 ++++-- visualpython/js/m_library/m_pandas/toFile.js | 6 ++++-- 4 files changed, 18 insertions(+), 7 deletions(-) diff --git a/visualpython/js/com/com_Config.js b/visualpython/js/com/com_Config.js index bbc6fb83..91fbb022 100644 --- a/visualpython/js/com/com_Config.js +++ b/visualpython/js/com/com_Config.js @@ -75,7 +75,8 @@ define([ 'import matplotlib.pyplot as plt', '%matplotlib inline', 'import seaborn as sns', - 'import plotly.express as px' + 'import plotly.express as px', + 'import pyarrow as pa' ], 'matplotlib customizing': [ 'import matplotlib.pyplot as plt', @@ -132,7 +133,8 @@ define([ 'from plotly.offline import init_notebook_mode', 'init_notebook_mode(connected=True)' ] - } + }, + { library: 'pyarrow', alias:'pa' }, ] } @@ -208,6 +210,10 @@ define([ 'statsmodels.api': { code: 'import statsmodels.api as sm', type: 'package' + }, + 'pyarrow': { + code: 'import pyarrow as pa', + type: 'package' } } diff --git a/visualpython/js/m_apps/Import.js b/visualpython/js/m_apps/Import.js index 38b6aec6..645114d8 100644 --- a/visualpython/js/m_apps/Import.js +++ b/visualpython/js/m_apps/Import.js @@ -37,7 +37,8 @@ define([ 'from plotly.offline import init_notebook_mode', 'init_notebook_mode(connected=True)' ], checked: false - } + }, + { i0: 'pyarrow', i1: 'pa', type: 'module', checked: false}, ], 'machine-learning': [ { i0: 'sklearn.model_selection', i1: 'train_test_split', type: 'function' }, diff --git a/visualpython/js/m_library/m_pandas/readFile.js b/visualpython/js/m_library/m_pandas/readFile.js index 296d3509..c9144db9 100644 --- a/visualpython/js/m_library/m_pandas/readFile.js +++ b/visualpython/js/m_library/m_pandas/readFile.js @@ -48,7 +48,8 @@ define([ 'csv': 'csv', 'excel': 'xlsx', 'json': 'json', - 'pickle': '' + 'pickle': '', + 'parquet': 'parquet' } this.dataPath = 'https://raw.githubusercontent.com/visualpython/visualpython/main/visualpython/data/sample_csv/'; this.fileResultState = { @@ -60,7 +61,8 @@ define([ 'csv': 'pd_readCsv', 'excel': 'pd_readExcel', 'json': 'pd_readJson', - 'pickle': 'pd_readPickle' + 'pickle': 'pd_readPickle', + 'parquet': 'pd_readParquet' }, selectedType: 'csv', package: null diff --git a/visualpython/js/m_library/m_pandas/toFile.js b/visualpython/js/m_library/m_pandas/toFile.js index 41a58ab3..08bb45c8 100644 --- a/visualpython/js/m_library/m_pandas/toFile.js +++ b/visualpython/js/m_library/m_pandas/toFile.js @@ -48,7 +48,8 @@ define([ 'csv': 'csv', 'excel': 'xlsx', 'json': 'json', - 'pickle': '' + 'pickle': '', + 'parquet': 'parquet' } this.dataPath = 'https://raw.githubusercontent.com/visualpython/visualpython/main/visualpython/data/sample_csv/'; this.fileResultState = { @@ -60,7 +61,8 @@ define([ 'csv': 'pd_toCsv', 'excel': 'pd_toExcel', 'json': 'pd_toJson', - 'pickle': 'pd_toPickle' + 'pickle': 'pd_toPickle', + 'parquet': 'pd_toParquet' }, selectedType: 'csv', package: null From 9b4f24aac81e266f14556129d6f9832af72dadd6 Mon Sep 17 00:00:00 2001 From: Minku Koo Date: Sun, 27 Aug 2023 22:37:50 +0900 Subject: [PATCH 2/2] Fixed #233 - Add 'Use PyArrow' checkbox - Add 'Use PyArrow' checkbox - When checked 'Use PyArrow', change the generated code that used pyarrow. - Add parquet to file type --- visualpython/data/m_library/pandasLibrary.js | 183 +++++++++++++++++++ visualpython/js/m_apps/File.js | 88 +++++++-- 2 files changed, 255 insertions(+), 16 deletions(-) diff --git a/visualpython/data/m_library/pandasLibrary.js b/visualpython/data/m_library/pandasLibrary.js index b18ed0c9..008b8af5 100644 --- a/visualpython/data/m_library/pandasLibrary.js +++ b/visualpython/data/m_library/pandasLibrary.js @@ -6825,6 +6825,189 @@ define([ }, ] }, + // *** + "pd_toParquet": { + "name": "To Parquet", + "library": "pandas", + "description": "DataFrame/Series to Parquet file", + "code": "${i0}.to_parquet(${path}${etc})", + "options": [ + { + "name": "i0", + "label": "DataFrame", + "required": true, + "component": [ + "data_select" + ], + "var_type": [ + "DataFrame", + "Series" + ] + }, + { + "name": "path", + "label": "File path/variable", + "required": true, + "type": "text" + } + ] + }, + "pd_readParquet": { + "name": "Read Parquet", + "library": "pandas", + "description": "Parquet to pandas object", + "code": "${o0} = pd.read_parquet(${i0}${etc})", + "options": [ + { + "name": "i0", + "label": "File path/object", + "required": true, + "type": "text", + "component": [ + "file" + ] + }, + { + "name": "o0", + "label": "Allocate to", + "output": true, + "component": [ + "input" + ], + "value": "vp_df" + }, + ] + }, + "pa_readCsv": { + "name": "Read Csv as pyarrow", + "library": "pyarrow", + "description": "Csv to pandas object", + "code": "${o0} = pa.csv.read_csv(${i0}${etc}).to_pandas()", + "options": [ + { + "name": "i0", + "label": "File path/object", + "required": true, + "type": "text", + "component": [ + "file" + ] + }, + { + "name": "o0", + "label": "Allocate to", + "output": true, + "component": [ + "input" + ], + "value": "vp_df" + } + ] + }, + "pa_toCsv": { + "name": "To Csv as pyarrow", + "library": "pyarrow", + "description": "DataFrame/Series to csv file", + "code": "pa.csv.write_csv(${i0}, ${path})", + "options": [ + { + "name": "i0", + "label": "DataFrame", + "required": true, + "component": [ + "data_select" + ], + "var_type": [ + "DataFrame", + "Series" + ] + }, + { + "name": "path", + "label": "File path/variable", + "required": true, + "type": "text" + } + ] + }, + "pa_readJson": { + "name": "Read Json as pyarrow", + "library": "pyarrow", + "description": "Json to pyarrow object", + "code": "${o0} = pa.json.read_json(${i0}${etc}).to_pandas()", + "options": [ + { + "name": "i0", + "label": "File path/object", + "required": true, + "type": "text", + "component": [ + "file" + ] + }, + { + "name": "o0", + "label": "Allocate to", + "output": true, + "component": [ + "input" + ], + "value": "vp_df" + } + ] + }, + "pa_readParquet": { + "name": "Read Parquet as pyarrow", + "library": "pyarrow", + "description": "Parquet to pandas object", + "code": "${o0} = pa.parquet.read_table(${i0}${etc}).to_pandas()", + "options": [ + { + "name": "i0", + "label": "File path/object", + "required": true, + "type": "text", + "component": [ + "file" + ] + }, + { + "name": "o0", + "label": "Allocate to", + "output": true, + "component": [ + "input" + ], + "value": "vp_df" + } + ] + }, + "pa_toParquet": { + "name": "To Parquet as pyarrow", + "library": "pyarrow", + "description": "DataFrame/Series to Parquet file", + "code": "pa.parquet.write_table(${i0}, ${path})", + "options": [ + { + "name": "i0", + "label": "DataFrame", + "required": true, + "component": [ + "data_select" + ], + "var_type": [ + "DataFrame", + "Series" + ] + }, + { + "name": "path", + "label": "File path/variable", + "required": true, + "type": "text" + } + ] + }, } return { diff --git a/visualpython/js/m_apps/File.js b/visualpython/js/m_apps/File.js index ce93fc74..2fe1454e 100644 --- a/visualpython/js/m_apps/File.js +++ b/visualpython/js/m_apps/File.js @@ -44,7 +44,8 @@ define([ 'json': 'json', 'pickle': '', 'sas': '', // xport or sas7bdat - 'spss': '' + 'spss': '', + 'parquet':'parquet' } this.package = { @@ -90,7 +91,8 @@ define([ 'json': 'pd_readJson', 'pickle': 'pd_readPickle', 'sas': 'pd_readSas', - 'spss': 'pd_readSpss' + 'spss': 'pd_readSpss', + 'parquet':'pd_readParquet' }, selectedType: 'csv', package: null, @@ -104,7 +106,8 @@ define([ 'csv': 'pd_toCsv', 'excel': 'pd_toExcel', 'json': 'pd_toJson', - 'pickle': 'pd_toPickle' + 'pickle': 'pd_toPickle', + 'parquet':'pd_toParquet' }, selectedType: 'csv', package: null, @@ -194,22 +197,64 @@ define([ that.state['vp_fileioType'] = pageType; $(that.wrapSelector('.vp-fileio-box')).hide(); $(that.wrapSelector('#vp_file' + pageType)).show(); - + + //set fileExtensions that.fileResultState = { ...that.fileState[pageType].fileResultState }; }); + + // fileReadAs change Event, Use PyArrow + $(document).on('change', this.wrapSelector('#fileReadAs'), function() { + let isChecked = $(this).prop('checked'); + var fileioType = that.state.vp_fileioType; + var prefix = '#vp_file' + fileioType + ' '; + var selectedFileFormat = that.fileState[fileioType].selectedType; + var fileioTypePrefix = fileioType.toLowerCase(); + if(fileioTypePrefix == 'write'){ + fileioTypePrefix = "to"; + } + + if(isChecked){ // pyArrow + that.fileState[fileioType].fileTypeId[that.state.fileExtension] = "pa_" + fileioTypePrefix + selectedFileFormat[0].toUpperCase() + selectedFileFormat.slice(1); + $(that.wrapSelector(prefix + '#vp_optionBox')).closest('.vp-accordian-container').hide(); + } + else{ // pandas + that.fileState[fileioType].fileTypeId[that.state.fileExtension] = "pd_" + fileioTypePrefix + selectedFileFormat[0].toUpperCase() + selectedFileFormat.slice(1); + if (that.state.fileExtension != 'parquet'){ // parquet has no options area + $(that.wrapSelector(prefix + '#vp_optionBox')).closest('.vp-accordian-container').show(); + } + } + + var fileTypeObj = that.fileState[fileioType]['fileTypeId']; + var selectedType = that.fileState[fileioType]['selectedType']; + let fileId = fileTypeObj[selectedType]; + let pdLib = pandasLibrary.PANDAS_FUNCTION; + let thisPkg = JSON.parse(JSON.stringify(pdLib[fileId])); + + that.fileState[fileioType].package = thisPkg; + }); + } _bindEventByType(pageType) { var that = this; var prefix = '#vp_file' + pageType + ' '; - + + var fileioTypePrefix = pageType.toLowerCase(); + if(fileioTypePrefix == 'write'){ + fileioTypePrefix = "to"; + } + var selectedFileFormat = that.fileState[pageType].selectedType; // select file type $(this.wrapSelector(prefix + '#fileType')).change(function() { var value = $(this).val(); that.fileState[pageType].selectedType = value; + + // Whenever change the file type, change to default pandas + that.fileState[pageType].fileTypeId[that.state.fileExtension] = "pd_" + fileioTypePrefix + selectedFileFormat[0].toUpperCase() + selectedFileFormat.slice(1); + // reload that.renderPage(pageType); @@ -327,7 +372,7 @@ define([ renderPage(pageType) { var that = this; var prefix = '#vp_file' + pageType + ' '; - + // clear $(this.wrapSelector(prefix + '#vp_inputOutputBox table tbody')).html(''); $(this.wrapSelector(prefix + '#vp_optionBox table tbody')).html(''); @@ -344,7 +389,7 @@ define([ ...this.fileState[pageType].fileResultState }; - if (selectedType == 'pickle') { + if (selectedType == 'pickle' || selectedType == 'parquet') { // hide additional option box $(this.wrapSelector(prefix + '#vp_optionBox')).closest('.vp-accordian-container').hide(); } else { @@ -355,7 +400,7 @@ define([ if (selectedType == 'json') { this.fileResultState.pathInputId = this.wrapSelector(prefix + '#path_or_buf'); } - if (selectedType == 'pickle') { + if (selectedType == 'pickle' || selectedType == 'parquet') { this.fileResultState.pathInputId = this.wrapSelector(prefix + '#path'); } } @@ -365,11 +410,22 @@ define([ // pdGen.vp_showInterfaceOnPage(this.wrapSelector('#vp_file' + pageType), thisPkg); pdGen.vp_showInterfaceOnPage(this, thisPkg, this.state, parent=('#vp_file' + pageType)); + // pyarrow can r/w parquet, csv and only read json. + if ((pageType == 'Read' && selectedType == 'json') || selectedType == 'parquet'|| selectedType == 'csv') { + // add checkbox 'Use PyArrow', next to File Type + $(this.wrapSelector(prefix + '#vp_inputOutputBox table tbody')).prepend( + $('').append($(``)) + .append($(' ')) + ); + } + else{ + $(this.wrapSelector(prefix + '#vp_inputOutputBox table tbody')).prepend( + $('').append($(``)) + .append($(' ')) + ); + } + // prepend file type selector - $(this.wrapSelector(prefix + '#vp_inputOutputBox table tbody')).prepend( - $('').append($(``)) - .append($('')) - ); var fileTypeList = Object.keys(fileTypeObj); fileTypeList.forEach(type => { $(this.wrapSelector(prefix + '#fileType')).append( @@ -377,6 +433,7 @@ define([ ); }); + // prepend user option let hasAllocateTo = $(this.wrapSelector(prefix + '#o0')).length > 0; if (hasAllocateTo) { @@ -390,9 +447,9 @@ define([ .append($('')) ) } - + $(this.wrapSelector(prefix + '#fileType')).val(selectedType); - + // add file navigation button if (pageType == 'Write') { if (selectedType == 'json') { @@ -400,7 +457,7 @@ define([ com_util.formatString('
' , 'vp-file-browser-button') ); - } else if (selectedType == 'pickle') { + } else if (selectedType == 'pickle' || selectedType == 'parquet') { $(prefix + '#path').parent().html( com_util.formatString('
' , 'vp-file-browser-button') @@ -493,7 +550,6 @@ define([ var result = pdGen.vp_codeGenerator(this, thisPkg, this.state, userOption.toString(), parent='#vp_fileWrite'); sbCode.append(result); } - return sbCode.toString(); }