diff --git a/visualpython/data/m_library/pandasLibrary.js b/visualpython/data/m_library/pandasLibrary.js index 08ef7889..1478aa9e 100644 --- a/visualpython/data/m_library/pandasLibrary.js +++ b/visualpython/data/m_library/pandasLibrary.js @@ -206,7 +206,7 @@ define([ }, { "name": "sep", - "label": "Seperator", + "label": "Separator", "type": "text", "usePair": true }, @@ -304,7 +304,7 @@ define([ }, { "name": "sep", - "label": "Seperator", + "label": "Separator", "type": "text", "usePair": true }, @@ -4342,7 +4342,7 @@ define([ }, { "name": "prefix_sep", - "label": "Header Seperator", + "label": "Header Separator", "default": "_", "usePair": true }, diff --git a/visualpython/data/m_library/pandasLibrary_v1.js b/visualpython/data/m_library/pandasLibrary_v1.js index d1f38172..f97c6546 100644 --- a/visualpython/data/m_library/pandasLibrary_v1.js +++ b/visualpython/data/m_library/pandasLibrary_v1.js @@ -163,7 +163,7 @@ define([ { name: 'sep', type: 'text', - label: 'Seperator' + label: 'Separator' }, { name: 'names', @@ -243,7 +243,7 @@ define([ { name: 'sep', type: 'text', - label: 'Seperator' + label: 'Separator' }, { name: 'na_rep', @@ -3514,7 +3514,7 @@ define([ { name: 'prefix_sep', type: ['text'], - label: 'Header Seperator', + label: 'Header Separator', default: '_' }, { diff --git a/visualpython/data/m_ml/mlLibrary.js b/visualpython/data/m_ml/mlLibrary.js index 5f574e33..11fc11a8 100644 --- a/visualpython/data/m_ml/mlLibrary.js +++ b/visualpython/data/m_ml/mlLibrary.js @@ -128,10 +128,10 @@ define([ 'prep-onehot': { name: 'OneHotEncoder', import: 'from sklearn.preprocessing import OneHotEncoder', - code: 'OneHotEncoder(${sparse}${handle_unknown}${etc})', + code: 'OneHotEncoder(${sparse_output}${handle_unknown}${etc})', returnType: 'OneHotEncoder', options: [ - { name: 'sparse', component: ['bool_select'], default: 'False', usePair: true }, + { name: 'sparse_output', component: ['bool_select'], default: 'True', value: 'False', usePair: true }, { name: 'handle_unknown', component: ['option_suggest'], usePair: true, options: ['error', 'ignore'], default: 'error' }, ] @@ -490,8 +490,8 @@ define([ code: 'DecisionTreeClassifier(${criterion}${max_depth}${min_samples_split}${random_state}${etc})', returnType: 'DecisionTreeClassifier', options: [ - { name: 'criterion', component: ['option_select'], type: 'text', default: 'squared_error', type:'text', - options: ['squared_error', 'friedman_mse', 'absolute_error', 'poisson'], usePair: true }, + { name: 'criterion', component: ['option_select'], type: 'text', default: 'gini', type:'text', + options: ['gini','entropy','log_loss'], usePair: true }, { name: 'max_depth', component: ['input_number'], placeholder: 'None', usePair: true }, { name: 'min_samples_split', component: ['input_number'], default: 2, usePair: true }, { name: 'random_state', component: ['input_number'], placeholder: '123', usePair: true } @@ -505,7 +505,7 @@ define([ options: [ { name: 'n_estimators', component: ['input_number'], default: 100, usePair: true }, { name: 'criterion', component: ['option_select'], type: 'text', default: 'gini', type:'text', usePair: true, - options: ['gini', 'entropy'] }, + options: ['gini', 'entropy', 'log_loss'] }, { name: 'max_depth', component: ['input_number'], placeholder: 'None', usePair: true }, { name: 'min_samples_split', component: ['input_number'], default: 2, usePair: true }, { name: 'n_jobs', component: ['input_number'], placeholder: 'None', usePair: true }, @@ -518,12 +518,12 @@ define([ code: 'GradientBoostingClassifier(${loss}${learning_rate}${n_estimators}${criterion}${random_state}${etc})', returnType: 'GradientBoostingClassifier', options: [ - { name: 'loss', component: ['option_select'], type: 'text', default: 'deviance', type: 'text', usePair: true, - options: ['deviance', 'exponential'] }, + { name: 'loss', component: ['option_select'], type: 'text', default: 'log_loss', type: 'text', usePair: true, + options: ['log_loss', 'exponential'] }, { name: 'learning_rate', component: ['input_number'], default: 0.1, usePair: true }, { name: 'n_estimators', component: ['input_number'], default: 100, usePair: true }, { name: 'criterion', component: ['option_select'], type: 'text', default: 'friedman_mse', type:'text', usePair: true, - options: ['friedman_mse', 'squared_error', 'mse', 'mae'] }, + options: ['friedman_mse', 'squared_error'] }, { name: 'random_state', component: ['input_number'], placeholder: '123', usePair: true } ] }, diff --git a/visualpython/html/m_stats/regression.html b/visualpython/html/m_stats/regression.html index 124f7066..93ac63fd 100644 --- a/visualpython/html/m_stats/regression.html +++ b/visualpython/html/m_stats/regression.html @@ -76,6 +76,7 @@ + diff --git a/visualpython/js/com/com_generatorV2.js b/visualpython/js/com/com_generatorV2.js index f1762c36..42e6c760 100644 --- a/visualpython/js/com/com_generatorV2.js +++ b/visualpython/js/com/com_generatorV2.js @@ -376,6 +376,8 @@ define([ allowDataType: obj.var_type, placeholder: obj.placeholder || 'Select data', value: value, + columnSelection: obj.columnSelection || 'multiple', // single / multiple + returnFrameType: obj.returnFrameType || '', // '' / DataFrame / Series required: obj.required === true }); content = $(dataSelector.toTagString()); @@ -890,7 +892,7 @@ define([ suggestInputX.addClass('vp-input vp-state'); suggestInputX.setNormalFilter(false); suggestInputX.setValue(defaultValue); - $(selector + ' #' + columnInputId).replaceWith(function() { + $(pageThis.wrapSelector('#' + columnInputId)).replaceWith(function() { return suggestInputX.toTagString(); }); } else { @@ -899,7 +901,7 @@ define([ 'id': columnInputId, 'class': 'vp-select vp-state' }); - $(selector + ' #' + columnInputId).replaceWith(function() { + $(pageThis.wrapSelector('#' + columnInputId)).replaceWith(function() { return $(tag); }); } @@ -955,7 +957,7 @@ define([ suggestInputX.setSuggestList(function() { return list; }); //FIXME: suggestInputX.setNormalFilter(false); suggestInputX.setValue(defaultValue); - $(selector + ' #' + columnInputId).replaceWith(function() { + $(pageThis.wrapSelector('#' + columnInputId)).replaceWith(function() { return suggestInputX.toTagString(); }); } else { diff --git a/visualpython/js/com/component/DataSelector.js b/visualpython/js/com/component/DataSelector.js index 770770ba..1b64b9ee 100644 --- a/visualpython/js/com/component/DataSelector.js +++ b/visualpython/js/com/component/DataSelector.js @@ -59,6 +59,11 @@ define([ select: null, // callback after selection from suggestInput (value, dtype) allowDataType: null, // list of allowed data types dataCategory: null, // list of data category (use it for ml categories) + columnSelection: 'multiple', // single/multi : allowed column selection + returnFrameType: '', // DataFrame/Series : required data type for DataFrame -> Series/DataFrame operation + // if Series, only one column selection is allowed and returns Series + // if DataFrame, always returns DataFrame + // if Empty(=== ''), if one column selected, returns Series / others, returns DataFrame // additional options boxClasses: '', classes: '', @@ -87,6 +92,9 @@ define([ this.prop.dataCategory = this.prop.allowDataType; } } + if (this.prop.returnFrameType === 'Series') { + this.prop.columnSelection = 'single'; // only single selection allowed + } this.state = { filterType: 'All', @@ -98,6 +106,7 @@ define([ slicingEnd1: '', slicingStart2: '', slicingEnd2: '', + singleColumn: '', ndRowType: 'slicing', ndColType: 'slicing', useIndex: false, @@ -409,6 +418,19 @@ define([ } } + templateForSingleSelector() { + return ` +
+ +
+ + +
+
+ ` + } + templateForMultiSelector() { return `
@@ -599,22 +621,93 @@ define([ switch (dataType) { case 'DataFrame': - // render option page - $(this.wrapSelector('.vp-ds-option-inner-box')).html(this.templateForMultiSelector()); // column selecting - this._columnSelector = new MultiSelector(this.wrapSelector('.vp-ds-df-multiselector'), - { mode: 'columns', parent: [data], selectedList: this.state.indexing, allowAdd: true } - ); - + if (this.prop.columnSelection === 'single') { + // render option page + $(this.wrapSelector('.vp-ds-option-inner-box')).html(this.templateForSingleSelector()); + // bind column source + // com_generator.vp_bindColumnSource(this, 'data', ['singleColumn'], 'select', false, false); + // vp_bindColumnSource(pageThis, targetId, columnInputIdList, tagType="input", columnWithEmpty=false, columnWithIndex=false) + const columnInputIdList = ['singleColumn']; + if (data === '') { + // reset with no source + columnInputIdList && columnInputIdList.forEach(columnInputId => { + let defaultValue = that.state[columnInputId]; + if (defaultValue === null || defaultValue === undefined) { + defaultValue = ''; + } + // option tags + var tag = $('').attr({ + 'id': columnInputId, + 'class': 'vp-select vp-state' + }); + $(that.wrapSelector('#' + columnInputId)).replaceWith(function() { + return $(tag); + }); + }); + return ; + } + // get result and show on detail box + vpKernel.getColumnList(data).then(function(resultObj) { + try { + let { result, type, msg } = resultObj; + var { list } = JSON.parse(result); + + // columns using suggestInput + columnInputIdList && columnInputIdList.forEach((columnInputId, idx) => { + let defaultValue = that.state[columnInputId]; + if (defaultValue === null || defaultValue === undefined) { + defaultValue = ''; + } + // create tag + var tag = $('').attr({ + 'id': columnInputId, + 'class': 'vp-select vp-state w150' + }); + // make tag + list.forEach((listVar, idx) => { + var option = document.createElement('option'); + $(option).attr({ + 'value':listVar.value, + 'text':listVar.label, + 'data-type':listVar.dtype + }); + // cell metadata test : defaultValue as selected + if (listVar.value === defaultValue) { + $(option).prop('selected', true); + } + option.append(document.createTextNode(listVar.label)); + $(tag).append(option); + }); + $(that.wrapSelector('#' + columnInputId)).replaceWith(function() { + return $(tag); + }); + $(that.wrapSelector('#' + columnInputId)).trigger('change'); + }).catch(function(err) { + vpLog.display(VP_LOG_TYPE.ERROR, 'com_generator - bindColumnSource error ', err) + }); + } catch (e) { + vpLog.display(VP_LOG_TYPE.ERROR, 'com_generator - bindColumnSource: not supported data type. ', e); + } + }); + } else { + // render option page + $(this.wrapSelector('.vp-ds-option-inner-box')).html(this.templateForMultiSelector()); + this._columnSelector = new MultiSelector(this.wrapSelector('.vp-ds-df-multiselector'), + { mode: 'columns', parent: [data], selectedList: this.state.indexing, allowAdd: true } + ); + } // bind event $(this.wrapSelector('#useIndex')).on('change', function() { let checked = $(this).prop('checked'); that.state.useIndex = checked; if (checked === true) { $(that.wrapSelector('.vp-ds-df-multiselector')).hide(); + $(that.wrapSelector('.vp-ds-df-singleselector')).hide(); $(that.wrapSelector('.vp-ds-df-index-box')).show(); } else { $(that.wrapSelector('.vp-ds-df-multiselector')).show(); + $(that.wrapSelector('.vp-ds-df-singleselector')).show(); $(that.wrapSelector('.vp-ds-df-index-box')).hide(); } }); @@ -728,6 +821,7 @@ define([ let { data, dataType, useIndex, + singleColumn, slicingStart1, slicingEnd1, slicingStart2, slicingEnd2, ndRowType, ndColType @@ -742,21 +836,39 @@ define([ code.append('.index'); } else { // use column selector - if (this._columnSelector != null) { - let result = this._columnSelector.getDataList(); - this.state.indexing = result.map(obj => obj.code); // save state - let columnList = []; - result && result.forEach(obj => { - columnList.push(obj.code); - }); - if (columnList.length > 0) { - if (columnList.length == 1) { - // return as Series - code.appendFormat('[{0}]', columnList.join(', ')); - // change datatype to Series - this.state.returnDataType = 'Series'; - } else { - code.appendFormat('[[{0}]]', columnList.join(', ')); + if (this.prop.columnSelection === 'single') { + // single selector + if (this.prop.returnFrameType === 'DataFrame') { + // return as DataFrame + code.appendFormat('[[{0}]]', singleColumn); + } else { + // return as Series + code.appendFormat('[{0}]', singleColumn); + this.state.returnDataType = 'Series'; + } + } else { + // multiple selector + if (this._columnSelector != null) { + let result = this._columnSelector.getDataList(); + this.state.indexing = result.map(obj => obj.code); // save state + let columnList = []; + result && result.forEach(obj => { + columnList.push(obj.code); + }); + if (columnList.length > 0) { + if (columnList.length == 1) { + if (this.prop.returnFrameType === 'DataFrame') { + // return as DataFrame + code.appendFormat('[[{0}]]', columnList.join(', ')); + } else { + // return as Series + code.appendFormat('[{0}]', columnList.join(', ')); + // change datatype to Series + this.state.returnDataType = 'Series'; + } + } else { + code.appendFormat('[[{0}]]', columnList.join(', ')); + } } } } diff --git a/visualpython/js/com/component/ModelEditor.js b/visualpython/js/com/component/ModelEditor.js index da24a5d0..c2fab057 100644 --- a/visualpython/js/com/component/ModelEditor.js +++ b/visualpython/js/com/component/ModelEditor.js @@ -125,7 +125,7 @@ define([ code: '${model}.fit(${fit_featureData})', description: 'Fit Encoder/Scaler to X.', options: [ - { name: 'fit_featureData', label: 'Feature Data', component: ['data_select'], var_type: ['DataFrame', 'Series', 'ndarray', 'list', 'dict'], value: 'X' } + { name: 'fit_featureData', label: 'Feature Data', component: ['data_select'], var_type: ['DataFrame', 'Series', 'ndarray', 'list', 'dict'], value: 'X', returnFrameType: 'DataFrame' } ] }, 'fit_transform': { @@ -134,13 +134,19 @@ define([ code: '${fit_trans_allocate} = ${model}.fit_transform(${fit_trans_featureData})', description: 'Fit Encoder/Scaler to X, then transform X.', options: [ - { name: 'fit_trans_featureData', label: 'Feature Data', component: ['data_select'], var_type: ['DataFrame', 'Series', 'ndarray', 'list', 'dict'], value: 'X' }, + { name: 'fit_trans_featureData', label: 'Feature Data', component: ['data_select'], var_type: ['DataFrame', 'Series', 'ndarray', 'list', 'dict'], value: 'X', returnFrameType: 'DataFrame' }, { name: 'fit_trans_allocate', label: 'Allocate to', component: ['input'], placeholder: 'New variable', value: 'trans' } ] }, 'transform': { - ...defaultActions['transform'], - description: 'Transform labels to normalized encoding.' + name: 'transform', + label: 'Transform', + code: '${trans_allocate} = ${model}.transform(${trans_featureData})', + description: 'Transform labels to normalized encoding.', + options: [ + { name: 'trans_featureData', label: 'Feature Data', component: ['data_select'], var_type: ['DataFrame', 'Series', 'ndarray', 'list', 'dict'], value: 'X', returnFrameType: 'DataFrame' }, + { name: 'trans_allocate', label: 'Allocate to', component: ['input'], placeholder: 'New variable', value: 'trans' } + ] } } if (modelType != 'ColumnTransformer') { @@ -152,12 +158,49 @@ define([ code: '${inverse_allocate} = ${model}.inverse_transform(${inverse_featureData})', description: 'Transform binary labels back to multi-class labels.', options: [ - { name: 'inverse_featureData', label: 'Feature Data', component: ['data_select'], var_type: ['DataFrame', 'Series', 'ndarray', 'list', 'dict'], value: 'X' }, + { name: 'inverse_featureData', label: 'Feature Data', component: ['data_select'], var_type: ['DataFrame', 'Series', 'ndarray', 'list', 'dict'], value: 'X', returnFrameType: 'DataFrame' }, { name: 'inverse_allocate', label: 'Allocate to', component: ['input'], placeholder: 'New variable', value: 'inv_trans' } ] } } } + if (modelType == 'LabelEncoder') { + actions = { + ...actions, + 'fit': { + name: 'fit', + label: 'Fit', + code: '${model}.fit(${fit_featureData})', + description: 'Fit Encoder/Scaler to X.', + options: [ + { name: 'fit_featureData', label: 'Feature Data', component: ['data_select'], var_type: ['DataFrame', 'Series', 'ndarray', 'list', 'dict'], value: 'X', + columnSelection: 'single', returnFrameType: 'Series' } + ] + }, + 'fit_transform': { + name: 'fit_transform', + label: 'Fit and transform', + code: '${fit_trans_allocate} = ${model}.fit_transform(${fit_trans_featureData})', + description: 'Fit Encoder/Scaler to X, then transform X.', + options: [ + { name: 'fit_trans_featureData', label: 'Feature Data', component: ['data_select'], var_type: ['DataFrame', 'Series', 'ndarray', 'list', 'dict'], value: 'X', + columnSelection: 'single', returnFrameType: 'Series' }, + { name: 'fit_trans_allocate', label: 'Allocate to', component: ['input'], placeholder: 'New variable', value: 'trans' } + ] + }, + 'transform': { + name: 'transform', + label: 'Transform', + code: '${trans_allocate} = ${model}.transform(${trans_featureData})', + description: 'Transform labels to normalized encoding.', + options: [ + { name: 'trans_featureData', label: 'Feature Data', component: ['data_select'], var_type: ['DataFrame', 'Series', 'ndarray', 'list', 'dict'], value: 'X', + columnSelection: 'single', returnFrameType: 'Series' }, + { name: 'trans_allocate', label: 'Allocate to', component: ['input'], placeholder: 'New variable', value: 'trans' } + ] + } + } + } if (modelType === 'SMOTE') { actions = { 'fit': { @@ -322,7 +365,7 @@ define([ code: '${model}.fit(${fit_featureData})', description: 'Fit X into an embedded space.', options: [ - { name: 'fit_featureData', label: 'Feature Data', component: ['data_select'], var_type: ['DataFrame', 'Series', 'ndarray', 'list', 'dict'], value: 'X' } + { name: 'fit_featureData', label: 'Feature Data', component: ['data_select'], var_type: ['DataFrame', 'Series', 'ndarray', 'list', 'dict'], value: 'X', returnFrameType: 'DataFrame' } ] }, 'fit_transform': { @@ -331,7 +374,7 @@ define([ code: '${fit_trans_allocate} = ${model}.fit_transform(${fit_trans_featureData})', description: 'Fit X into an embedded space and return that transformed output.', options: [ - { name: 'fit_trans_featureData', label: 'Feature Data', component: ['data_select'], var_type: ['DataFrame', 'Series', 'ndarray', 'list', 'dict'], value: 'X' }, + { name: 'fit_trans_featureData', label: 'Feature Data', component: ['data_select'], var_type: ['DataFrame', 'Series', 'ndarray', 'list', 'dict'], value: 'X', returnFrameType: 'DataFrame' }, { name: 'fit_trans_allocate', label: 'Allocate to', component: ['input'], placeholder: 'New variable', value: 'trans' } ] } @@ -346,17 +389,17 @@ define([ code: '${model}.fit(${fit_featureData}, ${fit_targetData})', description: 'Fit the Linear Discriminant Analysis model.', options: [ - { name: 'fit_featureData', label: 'Feature Data', component: ['data_select'], var_type: ['DataFrame', 'Series', 'ndarray', 'list', 'dict'], value: 'X' }, + { name: 'fit_featureData', label: 'Feature Data', component: ['data_select'], var_type: ['DataFrame', 'Series', 'ndarray', 'list', 'dict'], value: 'X', returnFrameType: 'DataFrame' }, { name: 'fit_targetData', label: 'Target Data', component: ['data_select'], var_type: ['DataFrame', 'Series', 'ndarray', 'list', 'dict'], value: 'y' } ] }, 'fit_transform': { name: 'fit_transform', label: 'Fit and transform', - code: '${fit_trans_allocate} = ${model}.fit_transform(${fit_trans_featureData}${fit_trans_targetData})', + code: '${fit_trans_allocate} = ${model}.fit_transform(${fit_trans_featureData}, ${fit_trans_targetData})', description: 'Fit to data, then transform it.', options: [ - { name: 'fit_trans_featureData', label: 'Feature Data', component: ['data_select'], var_type: ['DataFrame', 'Series', 'ndarray', 'list', 'dict'], value: 'X' }, + { name: 'fit_trans_featureData', label: 'Feature Data', component: ['data_select'], var_type: ['DataFrame', 'Series', 'ndarray', 'list', 'dict'], value: 'X', returnFrameType: 'DataFrame' }, { name: 'fit_trans_targetData', label: 'Target Data', component: ['data_select'], var_type: ['DataFrame', 'Series', 'ndarray', 'list', 'dict'], value: 'y' }, { name: 'fit_trans_allocate', label: 'Allocate to', component: ['input'], placeholder: 'New variable', value: 'trans' } ] @@ -367,7 +410,7 @@ define([ code: '${pred_allocate} = ${model}.predict(${pred_featureData})', description: 'Predict class labels for samples in X.', options: [ - { name: 'pred_featureData', label: 'Feature Data', component: ['data_select'], var_type: ['DataFrame', 'Series', 'ndarray', 'list', 'dict'], value: 'X' }, + { name: 'pred_featureData', label: 'Feature Data', component: ['data_select'], var_type: ['DataFrame', 'Series', 'ndarray', 'list', 'dict'], value: 'X', returnFrameType: 'DataFrame' }, { name: 'pred_allocate', label: 'Allocate to', component: ['input'], placeholder: 'New variable', value: 'pred' } ] }, @@ -377,7 +420,7 @@ define([ code: '${trans_allocate} = ${model}.transform(${trans_featureData})', description: 'Project data to maximize class separation.', options: [ - { name: 'trans_featureData', label: 'Feature Data', component: ['data_select'], var_type: ['DataFrame', 'Series', 'ndarray', 'list', 'dict'], value: 'X' }, + { name: 'trans_featureData', label: 'Feature Data', component: ['data_select'], var_type: ['DataFrame', 'Series', 'ndarray', 'list', 'dict'], value: 'X', returnFrameType: 'DataFrame' }, { name: 'trans_allocate', label: 'Allocate to', component: ['input'], placeholder: 'New variable', value: 'trans' } ] } @@ -391,7 +434,7 @@ define([ code: '${model}.fit(${fit_featureData})', description: 'Fit X into an embedded space.', options: [ - { name: 'fit_featureData', label: 'Feature Data', component: ['data_select'], var_type: ['DataFrame', 'Series', 'ndarray', 'list', 'dict'], value: 'X' } + { name: 'fit_featureData', label: 'Feature Data', component: ['data_select'], var_type: ['DataFrame', 'Series', 'ndarray', 'list', 'dict'], value: 'X', returnFrameType: 'DataFrame' } ] }, 'fit_transform': { @@ -400,7 +443,7 @@ define([ code: '${fit_trans_allocate} = ${model}.fit_transform(${fit_trans_featureData})', description: 'Fit the model with X and apply the dimensionality reduction on X.', options: [ - { name: 'fit_trans_featureData', label: 'Feature Data', component: ['data_select'], var_type: ['DataFrame', 'Series', 'ndarray', 'list', 'dict'], value: 'X' }, + { name: 'fit_trans_featureData', label: 'Feature Data', component: ['data_select'], var_type: ['DataFrame', 'Series', 'ndarray', 'list', 'dict'], value: 'X', returnFrameType: 'DataFrame' }, { name: 'fit_trans_allocate', label: 'Allocate to', component: ['input'], placeholder: 'New variable', value: 'trans' } ] }, @@ -410,7 +453,7 @@ define([ code: '${inverse_allocate} = ${model}.inverse_transform(${inverse_featureData})', description: 'Transform data back to its original space.', options: [ - { name: 'inverse_featureData', label: 'Feature Data', component: ['data_select'], var_type: ['DataFrame', 'Series', 'ndarray', 'list', 'dict'], value: 'X' }, + { name: 'inverse_featureData', label: 'Feature Data', component: ['data_select'], var_type: ['DataFrame', 'Series', 'ndarray', 'list', 'dict'], value: 'X', returnFrameType: 'DataFrame' }, { name: 'inverse_allocate', label: 'Allocate to', component: ['input'], placeholder: 'New variable', value: 'inv_trans' } ] }, @@ -420,7 +463,7 @@ define([ code: '${trans_allocate} = ${model}.transform(${trans_featureData})', description: 'Apply dimensionality reduction to X.', options: [ - { name: 'trans_featureData', label: 'Feature Data', component: ['data_select'], var_type: ['DataFrame', 'Series', 'ndarray', 'list', 'dict'], value: 'X' }, + { name: 'trans_featureData', label: 'Feature Data', component: ['data_select'], var_type: ['DataFrame', 'Series', 'ndarray', 'list', 'dict'], value: 'X', returnFrameType: 'DataFrame' }, { name: 'trans_allocate', label: 'Allocate to', component: ['input'], placeholder: 'New variable', value: 'trans' } ] } diff --git a/visualpython/js/m_apps/File.js b/visualpython/js/m_apps/File.js index 6a04a91c..2a3e8741 100644 --- a/visualpython/js/m_apps/File.js +++ b/visualpython/js/m_apps/File.js @@ -499,15 +499,15 @@ define([ return suggestInput.toTagString(); }); - // seperator suggest input + // separator suggest input $(this.wrapSelector('#sep')).replaceWith(function() { - // seperator list : + // separator list : var sepList = [',', '|', '\\t', '\\n', ':', ';', '-', '_', '&', '/', '\\']; var suggestInput = new SuggestInput(); suggestInput.setComponentID('sep'); suggestInput.addClass('vp-input vp-state'); suggestInput.setSuggestList(function() { return sepList; }); - suggestInput.setPlaceholder('Input seperator'); + suggestInput.setPlaceholder('Input separator'); return suggestInput.toTagString(); }); } diff --git a/visualpython/js/m_apps/Frame.js b/visualpython/js/m_apps/Frame.js index fd786ad0..4e91083d 100644 --- a/visualpython/js/m_apps/Frame.js +++ b/visualpython/js/m_apps/Frame.js @@ -3620,7 +3620,12 @@ define([ code.appendFormat(", limit={0}", content['limit']); } } else { - code.appendFormat("{0}.{1}()", subsetObjStr, content['method']); + if (content['method'] === 'mode') { + // get mode()'s first element (mode returns Series) + code.appendFormat("{0}.{1}()[0]", subsetObjStr, content['method']); + } else { + code.appendFormat("{0}.{1}()", subsetObjStr, content['method']); + } } code.append(')'); break; diff --git a/visualpython/js/m_ml/Classification.js b/visualpython/js/m_ml/Classification.js index f487be39..f327b611 100644 --- a/visualpython/js/m_ml/Classification.js +++ b/visualpython/js/m_ml/Classification.js @@ -50,7 +50,7 @@ define([ this.modelConfig = ML_LIBRARIES; this.modelTypeList = { - 'Classfication': ['lg-rgs', 'bern-nb', 'mulnom-nb', 'gaus-nb', 'sv-clf', 'dt-clf', 'rf-clf', 'gbm-clf', 'xgb-clf', 'lgbm-clf', 'cb-clf'], + 'Classification': ['lg-rgs', 'bern-nb', 'mulnom-nb', 'gaus-nb', 'sv-clf', 'dt-clf', 'rf-clf', 'gbm-clf', 'xgb-clf', 'lgbm-clf', 'cb-clf'], } diff --git a/visualpython/js/m_ml/Clustering.js b/visualpython/js/m_ml/Clustering.js index c3781201..95ee735c 100644 --- a/visualpython/js/m_ml/Clustering.js +++ b/visualpython/js/m_ml/Clustering.js @@ -51,7 +51,7 @@ define([ this.modelTypeList = { // 'Regression': ['ln-rgs', 'sv-rgs', 'dt-rgs', 'rf-rgs', 'gbm-rgs', 'xgb-rgs', 'lgbm-rgs', 'cb-rgs'], - // 'Classfication': ['lg-rgs', 'sv-clf', 'dt-clf', 'rf-clf', 'gbm-clf', 'xgb-clf', 'lgbm-clf', 'cb-clf'], + // 'Classification': ['lg-rgs', 'sv-clf', 'dt-clf', 'rf-clf', 'gbm-clf', 'xgb-clf', 'lgbm-clf', 'cb-clf'], // 'Auto ML': ['tpot-rgs', 'tpot-clf'], 'Clustering': ['k-means', 'agg-cls', 'gaus-mix', 'dbscan'], // 'Dimension Reduction': ['pca', 'lda', 'svd', 'nmf'] diff --git a/visualpython/js/m_ml/FitPredict.js b/visualpython/js/m_ml/FitPredict.js index cdd979b9..d48c20c5 100644 --- a/visualpython/js/m_ml/FitPredict.js +++ b/visualpython/js/m_ml/FitPredict.js @@ -41,6 +41,8 @@ define([ method: '', action: {}, optionConfig: {}, + modelEditorType: '', + modelEditorName: '', userOption: '', ...this.state } @@ -315,7 +317,7 @@ define([ } generateCode() { - let { model } = this.state; + let { model, modelType, modelEditorName } = this.state; let code = new com_String(); let replaceDict = {'${model}': model}; @@ -336,6 +338,31 @@ define([ code.appendLine(); code.append(allocateCode); } + // Data Preparation > Scaling + const scalingTypeList = ['StandardScaler', 'RobustScaler', 'MinMaxScaler', 'Normalizer']; + // Dimension Reduction + const dimensionTypeList = ['PCA', 'NMF']; + if (scalingTypeList.includes(modelType) || dimensionTypeList.includes(modelType)) { + // fit_transform, transform returns df_trans also + switch (modelEditorName) { + case 'fit_transform': + const allocatedFitTrans = this.state.fit_trans_allocate || 'trans'; + code.appendLine(); + code.appendLine(); + code.appendFormatLine("df_{0} = pd.DataFrame({1}, columns=[{2}])", allocatedFitTrans, allocatedFitTrans, this.state.fit_trans_featureData); + code.append("df_" + allocatedFitTrans); + break; + case 'transform': + const allocatedTrans = this.state.trans_allocate || 'trans'; + code.appendLine(); + code.appendLine(); + code.appendFormatLine("df_{0} = pd.DataFrame({1}, columns=[{2}])", allocatedTrans, allocatedTrans, this.state.trans_featureData); + code.append("df_" + allocatedTrans); + break; + default: + break; + } + } } return code.toString(); @@ -409,7 +436,7 @@ define([ code: '${model}.fit(${fit_featureData})', description: 'Fit Encoder/Scaler to X.', options: [ - { name: 'fit_featureData', label: 'Feature Data', component: ['data_select'], var_type: ['DataFrame', 'Series', 'ndarray', 'list', 'dict'], value: 'X' } + { name: 'fit_featureData', label: 'Feature Data', component: ['data_select'], var_type: ['DataFrame', 'Series', 'ndarray', 'list', 'dict'], value: 'X', returnFrameType: 'DataFrame' } ] }, 'fit_transform': { @@ -418,13 +445,19 @@ define([ code: '${fit_trans_allocate} = ${model}.fit_transform(${fit_trans_featureData})', description: 'Fit Encoder/Scaler to X, then transform X.', options: [ - { name: 'fit_trans_featureData', label: 'Feature Data', component: ['data_select'], var_type: ['DataFrame', 'Series', 'ndarray', 'list', 'dict'], value: 'X' }, + { name: 'fit_trans_featureData', label: 'Feature Data', component: ['data_select'], var_type: ['DataFrame', 'Series', 'ndarray', 'list', 'dict'], value: 'X', returnFrameType: 'DataFrame' }, { name: 'fit_trans_allocate', label: 'Allocate to', component: ['input'], placeholder: 'New variable', value: 'trans' } ] }, 'transform': { - ...defaultActions['transform'], - description: 'Transform labels to normalized encoding.' + name: 'transform', + label: 'Transform', + code: '${trans_allocate} = ${model}.transform(${trans_featureData})', + description: 'Transform labels to normalized encoding.', + options: [ + { name: 'trans_featureData', label: 'Feature Data', component: ['data_select'], var_type: ['DataFrame', 'Series', 'ndarray', 'list', 'dict'], value: 'X', returnFrameType: 'DataFrame' }, + { name: 'trans_allocate', label: 'Allocate to', component: ['input'], placeholder: 'New variable', value: 'trans' } + ] } } if (modelType != 'ColumnTransformer') { @@ -436,12 +469,49 @@ define([ code: '${inverse_allocate} = ${model}.inverse_transform(${inverse_featureData})', description: 'Transform binary labels back to multi-class labels.', options: [ - { name: 'inverse_featureData', label: 'Feature Data', component: ['data_select'], var_type: ['DataFrame', 'Series', 'ndarray', 'list', 'dict'], value: 'X' }, + { name: 'inverse_featureData', label: 'Feature Data', component: ['data_select'], var_type: ['DataFrame', 'Series', 'ndarray', 'list', 'dict'], value: 'X', returnFrameType: 'DataFrame' }, { name: 'inverse_allocate', label: 'Allocate to', component: ['input'], placeholder: 'New variable', value: 'inv_trans' } ] } } } + if (modelType == 'LabelEncoder') { + actions = { + ...actions, + 'fit': { + name: 'fit', + label: 'Fit', + code: '${model}.fit(${fit_featureData})', + description: 'Fit Encoder/Scaler to X.', + options: [ + { name: 'fit_featureData', label: 'Feature Data', component: ['data_select'], var_type: ['DataFrame', 'Series', 'ndarray', 'list', 'dict'], value: 'X', + columnSelection: 'single', returnFrameType: 'Series' } + ] + }, + 'fit_transform': { + name: 'fit_transform', + label: 'Fit and transform', + code: '${fit_trans_allocate} = ${model}.fit_transform(${fit_trans_featureData})', + description: 'Fit Encoder/Scaler to X, then transform X.', + options: [ + { name: 'fit_trans_featureData', label: 'Feature Data', component: ['data_select'], var_type: ['DataFrame', 'Series', 'ndarray', 'list', 'dict'], value: 'X', + columnSelection: 'single', returnFrameType: 'Series' }, + { name: 'fit_trans_allocate', label: 'Allocate to', component: ['input'], placeholder: 'New variable', value: 'trans' } + ] + }, + 'transform': { + name: 'transform', + label: 'Transform', + code: '${trans_allocate} = ${model}.transform(${trans_featureData})', + description: 'Transform labels to normalized encoding.', + options: [ + { name: 'trans_featureData', label: 'Feature Data', component: ['data_select'], var_type: ['DataFrame', 'Series', 'ndarray', 'list', 'dict'], value: 'X', + columnSelection: 'single', returnFrameType: 'Series' }, + { name: 'trans_allocate', label: 'Allocate to', component: ['input'], placeholder: 'New variable', value: 'trans' } + ] + } + } + } if (modelType === 'SMOTE') { actions = { 'fit': { @@ -606,7 +676,7 @@ define([ code: '${model}.fit(${fit_featureData})', description: 'Fit X into an embedded space.', options: [ - { name: 'fit_featureData', label: 'Feature Data', component: ['data_select'], var_type: ['DataFrame', 'Series', 'ndarray', 'list', 'dict'], value: 'X' } + { name: 'fit_featureData', label: 'Feature Data', component: ['data_select'], var_type: ['DataFrame', 'Series', 'ndarray', 'list', 'dict'], value: 'X', returnFrameType: 'DataFrame' } ] }, 'fit_transform': { @@ -615,7 +685,7 @@ define([ code: '${fit_trans_allocate} = ${model}.fit_transform(${fit_trans_featureData})', description: 'Fit X into an embedded space and return that transformed output.', options: [ - { name: 'fit_trans_featureData', label: 'Feature Data', component: ['data_select'], var_type: ['DataFrame', 'Series', 'ndarray', 'list', 'dict'], value: 'X' }, + { name: 'fit_trans_featureData', label: 'Feature Data', component: ['data_select'], var_type: ['DataFrame', 'Series', 'ndarray', 'list', 'dict'], value: 'X', returnFrameType: 'DataFrame' }, { name: 'fit_trans_allocate', label: 'Allocate to', component: ['input'], placeholder: 'New variable', value: 'trans' } ] } @@ -630,7 +700,7 @@ define([ code: '${model}.fit(${fit_featureData}, ${fit_targetData})', description: 'Fit the Linear Discriminant Analysis model.', options: [ - { name: 'fit_featureData', label: 'Feature Data', component: ['data_select'], var_type: ['DataFrame', 'Series', 'ndarray', 'list', 'dict'], value: 'X' }, + { name: 'fit_featureData', label: 'Feature Data', component: ['data_select'], var_type: ['DataFrame', 'Series', 'ndarray', 'list', 'dict'], value: 'X', returnFrameType: 'DataFrame' }, { name: 'fit_targetData', label: 'Target Data', component: ['data_select'], var_type: ['DataFrame', 'Series', 'ndarray', 'list', 'dict'], value: 'y' } ] }, @@ -640,7 +710,7 @@ define([ code: '${fit_trans_allocate} = ${model}.fit_transform(${fit_trans_featureData}${fit_trans_targetData})', description: 'Fit to data, then transform it.', options: [ - { name: 'fit_trans_featureData', label: 'Feature Data', component: ['data_select'], var_type: ['DataFrame', 'Series', 'ndarray', 'list', 'dict'], value: 'X' }, + { name: 'fit_trans_featureData', label: 'Feature Data', component: ['data_select'], var_type: ['DataFrame', 'Series', 'ndarray', 'list', 'dict'], value: 'X', returnFrameType: 'DataFrame' }, { name: 'fit_trans_targetData', label: 'Target Data', component: ['data_select'], var_type: ['DataFrame', 'Series', 'ndarray', 'list', 'dict'], value: 'y' }, { name: 'fit_trans_allocate', label: 'Allocate to', component: ['input'], placeholder: 'New variable', value: 'trans' } ] @@ -651,7 +721,7 @@ define([ code: '${pred_allocate} = ${model}.predict(${pred_featureData})', description: 'Predict class labels for samples in X.', options: [ - { name: 'pred_featureData', label: 'Feature Data', component: ['data_select'], var_type: ['DataFrame', 'Series', 'ndarray', 'list', 'dict'], value: 'X' }, + { name: 'pred_featureData', label: 'Feature Data', component: ['data_select'], var_type: ['DataFrame', 'Series', 'ndarray', 'list', 'dict'], value: 'X', returnFrameType: 'DataFrame' }, { name: 'pred_allocate', label: 'Allocate to', component: ['input'], placeholder: 'New variable', value: 'pred' } ] }, @@ -661,7 +731,7 @@ define([ code: '${trans_allocate} = ${model}.transform(${trans_featureData})', description: 'Project data to maximize class separation.', options: [ - { name: 'trans_featureData', label: 'Feature Data', component: ['data_select'], var_type: ['DataFrame', 'Series', 'ndarray', 'list', 'dict'], value: 'X' }, + { name: 'trans_featureData', label: 'Feature Data', component: ['data_select'], var_type: ['DataFrame', 'Series', 'ndarray', 'list', 'dict'], value: 'X', returnFrameType: 'DataFrame' }, { name: 'trans_allocate', label: 'Allocate to', component: ['input'], placeholder: 'New variable', value: 'trans' } ] } @@ -675,7 +745,7 @@ define([ code: '${model}.fit(${fit_featureData})', description: 'Fit X into an embedded space.', options: [ - { name: 'fit_featureData', label: 'Feature Data', component: ['data_select'], var_type: ['DataFrame', 'Series', 'ndarray', 'list', 'dict'], value: 'X' } + { name: 'fit_featureData', label: 'Feature Data', component: ['data_select'], var_type: ['DataFrame', 'Series', 'ndarray', 'list', 'dict'], value: 'X', returnFrameType: 'DataFrame' } ] }, 'fit_transform': { @@ -684,7 +754,7 @@ define([ code: '${fit_trans_allocate} = ${model}.fit_transform(${fit_trans_featureData})', description: 'Fit the model with X and apply the dimensionality reduction on X.', options: [ - { name: 'fit_trans_featureData', label: 'Feature Data', component: ['data_select'], var_type: ['DataFrame', 'Series', 'ndarray', 'list', 'dict'], value: 'X' }, + { name: 'fit_trans_featureData', label: 'Feature Data', component: ['data_select'], var_type: ['DataFrame', 'Series', 'ndarray', 'list', 'dict'], value: 'X', returnFrameType: 'DataFrame' }, { name: 'fit_trans_allocate', label: 'Allocate to', component: ['input'], placeholder: 'New variable', value: 'trans' } ] }, @@ -694,7 +764,7 @@ define([ code: '${inverse_allocate} = ${model}.inverse_transform(${inverse_featureData})', description: 'Transform data back to its original space.', options: [ - { name: 'inverse_featureData', label: 'Feature Data', component: ['data_select'], var_type: ['DataFrame', 'Series', 'ndarray', 'list', 'dict'], value: 'X' }, + { name: 'inverse_featureData', label: 'Feature Data', component: ['data_select'], var_type: ['DataFrame', 'Series', 'ndarray', 'list', 'dict'], value: 'X', returnFrameType: 'DataFrame' }, { name: 'inverse_allocate', label: 'Allocate to', component: ['input'], placeholder: 'New variable', value: 'inv_trans' } ] }, @@ -704,7 +774,7 @@ define([ code: '${trans_allocate} = ${model}.transform(${trans_featureData})', description: 'Apply dimensionality reduction to X.', options: [ - { name: 'trans_featureData', label: 'Feature Data', component: ['data_select'], var_type: ['DataFrame', 'Series', 'ndarray', 'list', 'dict'], value: 'X' }, + { name: 'trans_featureData', label: 'Feature Data', component: ['data_select'], var_type: ['DataFrame', 'Series', 'ndarray', 'list', 'dict'], value: 'X', returnFrameType: 'DataFrame' }, { name: 'trans_allocate', label: 'Allocate to', component: ['input'], placeholder: 'New variable', value: 'trans' } ] } diff --git a/visualpython/js/m_ml/GridSearch.js b/visualpython/js/m_ml/GridSearch.js index 32dbc967..1dfdbaa4 100644 --- a/visualpython/js/m_ml/GridSearch.js +++ b/visualpython/js/m_ml/GridSearch.js @@ -49,7 +49,7 @@ define([ this.modelTypeList = { 'Regression': ['ln-rgs', 'ridge', 'lasso', 'elasticnet', 'sv-rgs', 'dt-rgs', 'rf-rgs', 'gbm-rgs', 'xgb-rgs', 'lgbm-rgs', 'cb-rgs'], - 'Classfication': ['lg-rgs', 'bern-nb', 'mulnom-nb', 'gaus-nb', 'sv-clf', 'dt-clf', 'rf-clf', 'gbm-clf', 'xgb-clf', 'lgbm-clf', 'cb-clf'] + 'Classification': ['lg-rgs', 'bern-nb', 'mulnom-nb', 'gaus-nb', 'sv-clf', 'dt-clf', 'rf-clf', 'gbm-clf', 'xgb-clf', 'lgbm-clf', 'cb-clf'] } diff --git a/visualpython/js/m_ml/Pipeline.js b/visualpython/js/m_ml/Pipeline.js index 6dfc1989..856c33db 100644 --- a/visualpython/js/m_ml/Pipeline.js +++ b/visualpython/js/m_ml/Pipeline.js @@ -210,6 +210,7 @@ define([ $(that.wrapSelector(`.vp-pp-step-page:not([data-name="${name}"])`)).hide(); $(that.wrapSelector(`.vp-pp-step-page[data-name="${name}"]`)).show(); if (ppObj.useApp === true) { + ppObj.app && that.handleAppView(name, ppObj.app); ppObj.app && ppObj.app.open($(that.wrapSelector(`.vp-pp-step-page[data-name="${name}"]`))); } else { that.renderApp(name); @@ -372,7 +373,7 @@ define([ // click next button $(this.wrapSelector('.vp-pp-step-next:not(.disabled)')).on('click', function() { let selectedTag = $(that.wrapSelector('.vp-pp-item.selected')); - let nextTagList = $(selectedTag).nextAll('.vp-pp-item[data-flag="enabled"]:visible') + let nextTagList = $(selectedTag).nextAll('.vp-pp-item[data-flag="enabled"]:visible'); $(nextTagList[0]).trigger('click'); }); } @@ -562,6 +563,32 @@ define([ case 'ml_dataSplit': $(mlApp.wrapSelector('#inputData')).parent().hide(); break; + case 'ml_evaluation': + // for pipeline + $(mlApp.wrapSelector('.vp-upper-box')).hide(); + $(mlApp.wrapSelector('.vp-upper-box.' + mlApp.state.modelType)).show(); + + $(mlApp.wrapSelector('.vp-eval-box')).hide(); + $(mlApp.wrapSelector('.vp-eval-' + mlApp.state.modelType)).show(); + + if (mlApp.state.modelType == 'rgs') { + // Regression + + } else if (mlApp.state.modelType == 'clf') { + // Classification + // if (this.state.roc_curve == false && this.state.auc == false) { + // $(page).find('.vp-ev-model.roc-auc').prop('disabled', true); + // } + } else { + // Clustering + if (mlApp.state.silhouetteScore == false) { + $(mlApp.wrapSelector('.vp-ev-model.silhouette')).prop('disabled', true); + } + if (mlApp.state.ari == false && mlApp.state.nmi == false) { + $(mlApp.wrapSelector('.vp-ev-model.ari-nmi')).prop('disabled', true); + } + } + break; } } diff --git a/visualpython/js/m_ml/evaluation.js b/visualpython/js/m_ml/evaluation.js index 1cf1552d..ed63120c 100644 --- a/visualpython/js/m_ml/evaluation.js +++ b/visualpython/js/m_ml/evaluation.js @@ -158,6 +158,9 @@ define([ $(page).find('.vp-upper-box').hide(); $(page).find('.vp-upper-box.' + this.state.modelType).show(); + + $(page).find('.vp-eval-box').hide(); + $(page).find('.vp-eval-' + this.state.modelType).show(); if (this.state.modelType == 'rgs') { // Regression @@ -204,7 +207,7 @@ define([ let needMarkdown = false; //==================================================================== - // Classfication + // Classification //==================================================================== if (modelType == 'clf') { if (confusion_matrix) { @@ -351,7 +354,7 @@ define([ ...codeCells ]; } - // return as seperated cells + // return as separated cells return codeCells; } diff --git a/visualpython/js/m_stats/Regression.js b/visualpython/js/m_stats/Regression.js index 89b0738d..a38c811b 100644 --- a/visualpython/js/m_stats/Regression.js +++ b/visualpython/js/m_stats/Regression.js @@ -58,6 +58,7 @@ define([ normTest: true, histogram: true, scatterplot: true, + rmse: false, ...this.state }; @@ -259,7 +260,7 @@ define([ // Multi-collinearity multiCollinearity, // Residual option - statistics, normTest, histogram, scatterplot + statistics, normTest, histogram, scatterplot, rmse, } = this.state; let codeList = []; let code = new com_String(); @@ -697,7 +698,7 @@ define([ } // Residual option - if (statistics === true || normTest === true || histogram === true || scatterplot === true) { + if (statistics === true || normTest === true || histogram === true || scatterplot === true || rmse === true) { let residualTitle = 'Residual' if (lastModelNum > 0) { residualTitle += ' - Model ' + lastModelNum; @@ -768,6 +769,14 @@ define([ code.appendLine(" plt.tight_layout()"); code.append(" plt.show()"); } + if (rmse === true) { + code.appendLine(); + code.appendLine(); + code.appendLine("# RMSE (Root Mean Squared Error)"); + code.appendLine("_rmse = np.sqrt(_result.mse_resid)"); + code.appendLine("display(Markdown('### RMSE (Root Mean Squared Error)'))"); + code.append("display(Markdown(f'RMSE: {_rmse}'))"); + } } codeList.push(code.toString());