diff --git a/visualpython/data/m_library/pandasLibrary.js b/visualpython/data/m_library/pandasLibrary.js index b18ed0c9..008b8af5 100644 --- a/visualpython/data/m_library/pandasLibrary.js +++ b/visualpython/data/m_library/pandasLibrary.js @@ -6825,6 +6825,189 @@ define([ }, ] }, + // *** + "pd_toParquet": { + "name": "To Parquet", + "library": "pandas", + "description": "DataFrame/Series to Parquet file", + "code": "${i0}.to_parquet(${path}${etc})", + "options": [ + { + "name": "i0", + "label": "DataFrame", + "required": true, + "component": [ + "data_select" + ], + "var_type": [ + "DataFrame", + "Series" + ] + }, + { + "name": "path", + "label": "File path/variable", + "required": true, + "type": "text" + } + ] + }, + "pd_readParquet": { + "name": "Read Parquet", + "library": "pandas", + "description": "Parquet to pandas object", + "code": "${o0} = pd.read_parquet(${i0}${etc})", + "options": [ + { + "name": "i0", + "label": "File path/object", + "required": true, + "type": "text", + "component": [ + "file" + ] + }, + { + "name": "o0", + "label": "Allocate to", + "output": true, + "component": [ + "input" + ], + "value": "vp_df" + }, + ] + }, + "pa_readCsv": { + "name": "Read Csv as pyarrow", + "library": "pyarrow", + "description": "Csv to pandas object", + "code": "${o0} = pa.csv.read_csv(${i0}${etc}).to_pandas()", + "options": [ + { + "name": "i0", + "label": "File path/object", + "required": true, + "type": "text", + "component": [ + "file" + ] + }, + { + "name": "o0", + "label": "Allocate to", + "output": true, + "component": [ + "input" + ], + "value": "vp_df" + } + ] + }, + "pa_toCsv": { + "name": "To Csv as pyarrow", + "library": "pyarrow", + "description": "DataFrame/Series to csv file", + "code": "pa.csv.write_csv(${i0}, ${path})", + "options": [ + { + "name": "i0", + "label": "DataFrame", + "required": true, + "component": [ + "data_select" + ], + "var_type": [ + "DataFrame", + "Series" + ] + }, + { + "name": "path", + "label": "File path/variable", + "required": true, + "type": "text" + } + ] + }, + "pa_readJson": { + "name": "Read Json as pyarrow", + "library": "pyarrow", + "description": "Json to pyarrow object", + "code": "${o0} = pa.json.read_json(${i0}${etc}).to_pandas()", + "options": [ + { + "name": "i0", + "label": "File path/object", + "required": true, + "type": "text", + "component": [ + "file" + ] + }, + { + "name": "o0", + "label": "Allocate to", + "output": true, + "component": [ + "input" + ], + "value": "vp_df" + } + ] + }, + "pa_readParquet": { + "name": "Read Parquet as pyarrow", + "library": "pyarrow", + "description": "Parquet to pandas object", + "code": "${o0} = pa.parquet.read_table(${i0}${etc}).to_pandas()", + "options": [ + { + "name": "i0", + "label": "File path/object", + "required": true, + "type": "text", + "component": [ + "file" + ] + }, + { + "name": "o0", + "label": "Allocate to", + "output": true, + "component": [ + "input" + ], + "value": "vp_df" + } + ] + }, + "pa_toParquet": { + "name": "To Parquet as pyarrow", + "library": "pyarrow", + "description": "DataFrame/Series to Parquet file", + "code": "pa.parquet.write_table(${i0}, ${path})", + "options": [ + { + "name": "i0", + "label": "DataFrame", + "required": true, + "component": [ + "data_select" + ], + "var_type": [ + "DataFrame", + "Series" + ] + }, + { + "name": "path", + "label": "File path/variable", + "required": true, + "type": "text" + } + ] + }, } return { diff --git a/visualpython/js/com/com_Config.js b/visualpython/js/com/com_Config.js index bbc6fb83..91fbb022 100644 --- a/visualpython/js/com/com_Config.js +++ b/visualpython/js/com/com_Config.js @@ -75,7 +75,8 @@ define([ 'import matplotlib.pyplot as plt', '%matplotlib inline', 'import seaborn as sns', - 'import plotly.express as px' + 'import plotly.express as px', + 'import pyarrow as pa' ], 'matplotlib customizing': [ 'import matplotlib.pyplot as plt', @@ -132,7 +133,8 @@ define([ 'from plotly.offline import init_notebook_mode', 'init_notebook_mode(connected=True)' ] - } + }, + { library: 'pyarrow', alias:'pa' }, ] } @@ -208,6 +210,10 @@ define([ 'statsmodels.api': { code: 'import statsmodels.api as sm', type: 'package' + }, + 'pyarrow': { + code: 'import pyarrow as pa', + type: 'package' } } diff --git a/visualpython/js/m_apps/File.js b/visualpython/js/m_apps/File.js index ce93fc74..2fe1454e 100644 --- a/visualpython/js/m_apps/File.js +++ b/visualpython/js/m_apps/File.js @@ -44,7 +44,8 @@ define([ 'json': 'json', 'pickle': '', 'sas': '', // xport or sas7bdat - 'spss': '' + 'spss': '', + 'parquet':'parquet' } this.package = { @@ -90,7 +91,8 @@ define([ 'json': 'pd_readJson', 'pickle': 'pd_readPickle', 'sas': 'pd_readSas', - 'spss': 'pd_readSpss' + 'spss': 'pd_readSpss', + 'parquet':'pd_readParquet' }, selectedType: 'csv', package: null, @@ -104,7 +106,8 @@ define([ 'csv': 'pd_toCsv', 'excel': 'pd_toExcel', 'json': 'pd_toJson', - 'pickle': 'pd_toPickle' + 'pickle': 'pd_toPickle', + 'parquet':'pd_toParquet' }, selectedType: 'csv', package: null, @@ -194,22 +197,64 @@ define([ that.state['vp_fileioType'] = pageType; $(that.wrapSelector('.vp-fileio-box')).hide(); $(that.wrapSelector('#vp_file' + pageType)).show(); - + + //set fileExtensions that.fileResultState = { ...that.fileState[pageType].fileResultState }; }); + + // fileReadAs change Event, Use PyArrow + $(document).on('change', this.wrapSelector('#fileReadAs'), function() { + let isChecked = $(this).prop('checked'); + var fileioType = that.state.vp_fileioType; + var prefix = '#vp_file' + fileioType + ' '; + var selectedFileFormat = that.fileState[fileioType].selectedType; + var fileioTypePrefix = fileioType.toLowerCase(); + if(fileioTypePrefix == 'write'){ + fileioTypePrefix = "to"; + } + + if(isChecked){ // pyArrow + that.fileState[fileioType].fileTypeId[that.state.fileExtension] = "pa_" + fileioTypePrefix + selectedFileFormat[0].toUpperCase() + selectedFileFormat.slice(1); + $(that.wrapSelector(prefix + '#vp_optionBox')).closest('.vp-accordian-container').hide(); + } + else{ // pandas + that.fileState[fileioType].fileTypeId[that.state.fileExtension] = "pd_" + fileioTypePrefix + selectedFileFormat[0].toUpperCase() + selectedFileFormat.slice(1); + if (that.state.fileExtension != 'parquet'){ // parquet has no options area + $(that.wrapSelector(prefix + '#vp_optionBox')).closest('.vp-accordian-container').show(); + } + } + + var fileTypeObj = that.fileState[fileioType]['fileTypeId']; + var selectedType = that.fileState[fileioType]['selectedType']; + let fileId = fileTypeObj[selectedType]; + let pdLib = pandasLibrary.PANDAS_FUNCTION; + let thisPkg = JSON.parse(JSON.stringify(pdLib[fileId])); + + that.fileState[fileioType].package = thisPkg; + }); + } _bindEventByType(pageType) { var that = this; var prefix = '#vp_file' + pageType + ' '; - + + var fileioTypePrefix = pageType.toLowerCase(); + if(fileioTypePrefix == 'write'){ + fileioTypePrefix = "to"; + } + var selectedFileFormat = that.fileState[pageType].selectedType; // select file type $(this.wrapSelector(prefix + '#fileType')).change(function() { var value = $(this).val(); that.fileState[pageType].selectedType = value; + + // Whenever change the file type, change to default pandas + that.fileState[pageType].fileTypeId[that.state.fileExtension] = "pd_" + fileioTypePrefix + selectedFileFormat[0].toUpperCase() + selectedFileFormat.slice(1); + // reload that.renderPage(pageType); @@ -327,7 +372,7 @@ define([ renderPage(pageType) { var that = this; var prefix = '#vp_file' + pageType + ' '; - + // clear $(this.wrapSelector(prefix + '#vp_inputOutputBox table tbody')).html(''); $(this.wrapSelector(prefix + '#vp_optionBox table tbody')).html(''); @@ -344,7 +389,7 @@ define([ ...this.fileState[pageType].fileResultState }; - if (selectedType == 'pickle') { + if (selectedType == 'pickle' || selectedType == 'parquet') { // hide additional option box $(this.wrapSelector(prefix + '#vp_optionBox')).closest('.vp-accordian-container').hide(); } else { @@ -355,7 +400,7 @@ define([ if (selectedType == 'json') { this.fileResultState.pathInputId = this.wrapSelector(prefix + '#path_or_buf'); } - if (selectedType == 'pickle') { + if (selectedType == 'pickle' || selectedType == 'parquet') { this.fileResultState.pathInputId = this.wrapSelector(prefix + '#path'); } } @@ -365,11 +410,22 @@ define([ // pdGen.vp_showInterfaceOnPage(this.wrapSelector('#vp_file' + pageType), thisPkg); pdGen.vp_showInterfaceOnPage(this, thisPkg, this.state, parent=('#vp_file' + pageType)); + // pyarrow can r/w parquet, csv and only read json. + if ((pageType == 'Read' && selectedType == 'json') || selectedType == 'parquet'|| selectedType == 'csv') { + // add checkbox 'Use PyArrow', next to File Type + $(this.wrapSelector(prefix + '#vp_inputOutputBox table tbody')).prepend( + $('