', 'vp_rendered_html'); // 'rendered_html' style from jupyter output area
+ tag.appendFormatLine('
', 'vp_rendered_html'); // 'rendered_html' style from jupyter output area
if (isHtml) {
tag.appendLine(renderedText);
} else {
diff --git a/visualpython/js/m_apps/File.js b/visualpython/js/m_apps/File.js
index 2fe1454e..6a04a91c 100644
--- a/visualpython/js/m_apps/File.js
+++ b/visualpython/js/m_apps/File.js
@@ -35,17 +35,17 @@ define([
super._init();
/** Write codes executed before rendering */
this.config.dataview = false;
- this.config.sizeLevel = 1;
+ this.config.sizeLevel = 2;
this.config.checkModules = ['pd'];
this.fileExtensions = {
- 'csv': 'csv',
- 'excel': 'xlsx',
- 'json': 'json',
- 'pickle': '',
- 'sas': '', // xport or sas7bdat
- 'spss': '',
- 'parquet':'parquet'
+ 'csv': ['csv', 'tsv', 'txt'],
+ 'excel': ['xlsx', 'xls'],
+ 'json': ['json'],
+ 'pickle': [],
+ 'sas': [], // xport or sas7bdat
+ 'spss': [],
+ 'parquet': ['parquet']
}
this.package = {
@@ -69,7 +69,8 @@ define([
}
this.state = {
- fileExtension: 'csv',
+ fileType: 'csv',
+ fileExtension: ['csv'],
selectedFile: '',
selectedPath: '',
vp_fileioType: 'Read',
@@ -188,6 +189,11 @@ define([
}
}
+ _unbindEvent() {
+ super._unbindEvent();
+ $(document).off('change', this.wrapSelector('#fileReadAs'));
+ }
+
_bindEvent() {
super._bindEvent();
/** Implement binding events */
@@ -198,6 +204,19 @@ define([
$(that.wrapSelector('.vp-fileio-box')).hide();
$(that.wrapSelector('#vp_file' + pageType)).show();
+ if (pageType === 'Read' && that.fileState[pageType].selectedType === 'spss') {
+ // show install button
+ that.showInstallButton();
+ // show install note below File type selection
+ $(`
+
+
+ |
`).insertAfter($(that.wrapSelector('#fileType')).closest('tr'));
+ } else {
+ that.hideInstallButton();
+ $(that.wrapSelector('.vp-spss-note')).remove();
+ }
+
//set fileExtensions
that.fileResultState = {
@@ -210,26 +229,24 @@ define([
let isChecked = $(this).prop('checked');
var fileioType = that.state.vp_fileioType;
var prefix = '#vp_file' + fileioType + ' ';
- var selectedFileFormat = that.fileState[fileioType].selectedType;
+ var selectedType = that.fileState[fileioType]['selectedType'];
var fileioTypePrefix = fileioType.toLowerCase();
if(fileioTypePrefix == 'write'){
fileioTypePrefix = "to";
}
+ let fileId = that.fileState[fileioType].fileTypeId[selectedType];
- if(isChecked){ // pyArrow
- that.fileState[fileioType].fileTypeId[that.state.fileExtension] = "pa_" + fileioTypePrefix + selectedFileFormat[0].toUpperCase() + selectedFileFormat.slice(1);
+ if (isChecked) { // pyArrow
+ fileId = "pa_" + fileioTypePrefix + selectedType[0].toUpperCase() + selectedType.slice(1);
+ // that.fileState[fileioType].fileTypeId[that.state.fileExtension] = "pa_" + fileioTypePrefix + selectedFileFormat[0].toUpperCase() + selectedFileFormat.slice(1);
$(that.wrapSelector(prefix + '#vp_optionBox')).closest('.vp-accordian-container').hide();
- }
- else{ // pandas
- that.fileState[fileioType].fileTypeId[that.state.fileExtension] = "pd_" + fileioTypePrefix + selectedFileFormat[0].toUpperCase() + selectedFileFormat.slice(1);
- if (that.state.fileExtension != 'parquet'){ // parquet has no options area
+ } else { // pandas
+ // that.fileState[fileioType].fileTypeId[that.state.fileExtension] = "pd_" + fileioTypePrefix + selectedFileFormat[0].toUpperCase() + selectedFileFormat.slice(1);
+ if (that.state.fileType != 'parquet'){ // parquet has no options area
$(that.wrapSelector(prefix + '#vp_optionBox')).closest('.vp-accordian-container').show();
}
}
- var fileTypeObj = that.fileState[fileioType]['fileTypeId'];
- var selectedType = that.fileState[fileioType]['selectedType'];
- let fileId = fileTypeObj[selectedType];
let pdLib = pandasLibrary.PANDAS_FUNCTION;
let thisPkg = JSON.parse(JSON.stringify(pdLib[fileId]));
@@ -246,31 +263,14 @@ define([
if(fileioTypePrefix == 'write'){
fileioTypePrefix = "to";
}
- var selectedFileFormat = that.fileState[pageType].selectedType;
// select file type
$(this.wrapSelector(prefix + '#fileType')).change(function() {
- var value = $(this).val();
- that.fileState[pageType].selectedType = value;
-
- // Whenever change the file type, change to default pandas
- that.fileState[pageType].fileTypeId[that.state.fileExtension] = "pd_" + fileioTypePrefix + selectedFileFormat[0].toUpperCase() + selectedFileFormat.slice(1);
-
+ var fileType = $(this).val();
+ that.fileState[pageType].selectedType = fileType;
// reload
that.renderPage(pageType);
that._bindEventByType(pageType);
-
- if (value === 'spss') {
- // show install button
- that.showInstallButton();
- // show install note below File type selection
- $(`
-
-
- |
`).insertAfter($(that.wrapSelector('#fileType')).closest('tr'));
- } else {
- that.hideInstallButton();
- }
});
// open file navigation
@@ -282,8 +282,8 @@ define([
}
let extensionList = [];
- if (that.state.fileExtension !== '') {
- extensionList = [ that.state.fileExtension ];
+ if (that.state.fileExtension && that.state.fileExtension.length > 0) {
+ extensionList = that.state.fileExtension;
}
let fileNavi = new FileNavigation({
@@ -417,8 +417,7 @@ define([
$('
').append($(` | `))
.append($(' | '))
);
- }
- else{
+ } else {
$(this.wrapSelector(prefix + '#vp_inputOutputBox table tbody')).prepend(
$('
').append($(` | `))
.append($(' | '))
@@ -433,7 +432,6 @@ define([
);
});
-
// prepend user option
let hasAllocateTo = $(this.wrapSelector(prefix + '#o0')).length > 0;
if (hasAllocateTo) {
@@ -476,6 +474,18 @@ define([
, 'vp-file-browser-button')
);
}
+
+ if (pageType === 'Read' && selectedType === 'spss') {
+ // show install button
+ this.showInstallButton();
+ // show install note below File type selection
+ $(`
+
+
+ |
`).insertAfter($(this.wrapSelector('#fileType')).closest('tr'));
+ } else {
+ this.hideInstallButton();
+ }
// encoding suggest input
$(this.wrapSelector('#encoding')).replaceWith(function() {
@@ -488,6 +498,18 @@ define([
suggestInput.setPlaceholder('encoding option');
return suggestInput.toTagString();
});
+
+ // seperator suggest input
+ $(this.wrapSelector('#sep')).replaceWith(function() {
+ // seperator list :
+ var sepList = [',', '|', '\\t', '\\n', ':', ';', '-', '_', '&', '/', '\\'];
+ var suggestInput = new SuggestInput();
+ suggestInput.setComponentID('sep');
+ suggestInput.addClass('vp-input vp-state');
+ suggestInput.setSuggestList(function() { return sepList; });
+ suggestInput.setPlaceholder('Input seperator');
+ return suggestInput.toTagString();
+ });
}
render() {
diff --git a/visualpython/js/m_apps/Frame.js b/visualpython/js/m_apps/Frame.js
index 47cf0883..6452ddf6 100644
--- a/visualpython/js/m_apps/Frame.js
+++ b/visualpython/js/m_apps/Frame.js
@@ -87,7 +87,8 @@ define([
{ id: 'add_row', label: 'Add row', selection: FRAME_SELECT_TYPE.NONE, menuType: FRAME_EDIT_TYPE.ADD_ROW },
{ id: 'delete', label: 'Delete', selection: FRAME_SELECT_TYPE.MULTI, menuType: FRAME_EDIT_TYPE.DROP },
{ id: 'rename', label: 'Rename', selection: FRAME_SELECT_TYPE.NONE, menuType: FRAME_EDIT_TYPE.RENAME },
- { id: 'asType', label: 'As type', selection: FRAME_SELECT_TYPE.NONE, axis: FRAME_AXIS.COLUMN, menuType: FRAME_EDIT_TYPE.AS_TYPE },
+ { id: 'as_type', label: 'As type', selection: FRAME_SELECT_TYPE.NONE, axis: FRAME_AXIS.COLUMN, menuType: FRAME_EDIT_TYPE.AS_TYPE },
+ { id: 'to_datetime', label: 'To datetime', selection: FRAME_SELECT_TYPE.SINGLE, axis: FRAME_AXIS.COLUMN, menuType: FRAME_EDIT_TYPE.TO_DATETIME },
{ id: 'replace', label: 'Replace', selection: FRAME_SELECT_TYPE.SINGLE, axis: FRAME_AXIS.COLUMN, menuType: FRAME_EDIT_TYPE.REPLACE },
{ id: 'discretize', label: 'Discretize', selection: FRAME_SELECT_TYPE.SINGLE, axis: FRAME_AXIS.COLUMN, numeric_only: true, menuType: FRAME_EDIT_TYPE.DISCRETIZE }
]
@@ -618,6 +619,7 @@ define([
case FRAME_EDIT_TYPE.RENAME:
case FRAME_EDIT_TYPE.REPLACE:
case FRAME_EDIT_TYPE.AS_TYPE:
+ case FRAME_EDIT_TYPE.TO_DATETIME:
case FRAME_EDIT_TYPE.DISCRETIZE:
case FRAME_EDIT_TYPE.DATA_SHIFT:
case FRAME_EDIT_TYPE.SORT_INDEX:
@@ -1106,6 +1108,73 @@ define([
$(that.wrapSelector('.vp-inner-popup-fill-row')).hide();
}
});
+ } else if (menuType === FRAME_EDIT_TYPE.TO_DATETIME) {
+ // bind event for selecting format
+ $(this.wrapSelector('.vp-inner-popup-todt-format')).on('change', function() {
+ let format = $(this).val();
+ if (format === 'auto') {
+ $(that.wrapSelector('.vp-inner-popup-todt-dayfirst')).prop('disabled', false);
+ } else {
+ $(that.wrapSelector('.vp-inner-popup-todt-dayfirst')).prop('disabled', true);
+ $(that.wrapSelector('.vp-inner-popup-todt-dayfirst')).val('');
+ }
+
+ if (format === 'typing') {
+ $(that.wrapSelector('.vp-inner-popup-todt-format-typing')).prop('disabled', false);
+ } else {
+ $(that.wrapSelector('.vp-inner-popup-todt-format-typing')).prop('disabled', true);
+ }
+ });
+
+ // bind event for checking add column
+ $(this.wrapSelector('.vp-inner-popup-todt-use-addcol')).on('change', function() {
+ let checked = $(this).prop('checked');
+ if (checked === true) {
+ $(that.wrapSelector('.vp-inner-popup-todt-addcol-box')).show();
+ } else {
+ $(that.wrapSelector('.vp-inner-popup-todt-addcol-box')).hide();
+ }
+ });
+
+ // Add column set event
+ $(this.wrapSelector('.vp-inner-popup-todt-addcol')).on('click', function() {
+ let dateTypeList = [ // df[col].dt[{dateType}]
+ { label: 'Year', value: 'year' },
+ { label: 'Month', value: 'month' },
+ { label: 'Day', value: 'day' },
+ { label: 'Date', value: 'date' },
+ { label: 'DayOfWeek', value: 'dayofweek' },
+ { label: 'DayOfYear', value: 'dayofyear' },
+ { label: 'DaysInMonth', value: 'daysinmonth' },
+ { label: 'Quarter', value: 'quarter' },
+ { label: 'Time', value: 'time' },
+ { label: 'Hour', value: 'hour' },
+ { label: 'Minute', value: 'minute' },
+ { label: 'Second', value: 'second' },
+ { label: 'Nanosecond', value: 'nanosecond' },
+ ];
+ let dateTypeOptionTag = new com_String();
+ dateTypeList.forEach(opt => {
+ dateTypeOptionTag.appendFormat('
', opt.value, opt.label);
+ });
+
+ let addColItemTag = $(``);
+ $(that.wrapSelector('.vp-inner-popup-todt-addcol-content')).append(addColItemTag);
+ $(addColItemTag)[0].scrollIntoView();
+
+ // bind event for deleting
+ $(that.wrapSelector('.vp-inner-popup-todt-addcol-del')).off('click');
+ $(that.wrapSelector('.vp-inner-popup-todt-addcol-del')).on('click', function() {
+ // delete item
+ $(this).closest('.vp-inner-popup-todt-addcol-item').remove();
+ });
+ });
}
}
@@ -2113,6 +2182,75 @@ define([
return content.toString();
}
+ renderToDatetime() {
+ var content = new com_String();
+ let formatList = [
+ { label: 'Auto', value: 'auto' },
+ { label: 'Year', value: '%Y' },
+ { label: 'Month', value: '%m' },
+ { label: 'Day', value: '%d' },
+ { label: 'Day Of Week', value: '%w' },
+ { label: '%Y/%m/%d', value: '%Y/%m/%d' },
+ { label: '%Y-%m-%d', value: '%Y-%m-%d' },
+ { label: '%d/%m/%Y', value: '%d/%m/%Y' },
+ { label: '%d-%m-%Y', value: '%d-%m-%Y' },
+ { label: 'Typing', value: 'typing' },
+ ];
+ let formatOptionTag = new com_String();
+ formatList.forEach(opt => {
+ formatOptionTag.appendFormat('
', opt.value, opt.label);
+ });
+
+ content.appendFormat(`
+ `, this.state.selected[0].label, formatOptionTag.toString(), );
+
+ // set content
+ $(this.wrapSelector('.vp-inner-popup-body')).html(content.toString());
+ return content.toString();
+ }
+
renderFillNAPage() {
var content = new com_String();
content.appendFormatLine('
', 'vp-inner-popup-fillna-page');
@@ -2454,6 +2592,11 @@ define([
title = 'Convert type';
content = this.renderAsType();
break;
+ case FRAME_EDIT_TYPE.TO_DATETIME:
+ title = 'Convert to datetime';
+ size = { width: 500, height: 450 };
+ content = this.renderToDatetime();
+ break;
case FRAME_EDIT_TYPE.FILL_NA:
title = 'Fill NA';
content = this.renderFillNAPage();
@@ -2823,6 +2966,22 @@ define([
}
});
break;
+ case FRAME_EDIT_TYPE.TO_DATETIME:
+ content['format'] = $(this.wrapSelector('.vp-inner-popup-todt-format')).val();
+ content['format_typing'] = $(this.wrapSelector('.vp-inner-popup-todt-format-typing')).val();
+ content['dayfirst'] = $(this.wrapSelector('.vp-inner-popup-todt-dayfirst')).val();
+ content['use_addcol'] = $(this.wrapSelector('.vp-inner-popup-todt-use-addcol')).prop('checked');
+ var colList = [];
+ var addcolItemTags = $(this.wrapSelector('.vp-inner-popup-todt-addcol-item'));
+ addcolItemTags && addcolItemTags.each((idx, tag) => {
+ let colName = $(tag).find('.vp-inner-popup-todt-addcol-colname').val();
+ let dateType = $(tag).find('.vp-inner-popup-todt-addcol-type').val();
+ if (colName !== '' && dateType !== '') {
+ colList.push({ colName: colName, dateType: dateType });
+ }
+ });
+ content['collist'] = colList;
+ break;
case FRAME_EDIT_TYPE.DISCRETIZE:
content['input'] = $(this.wrapSelector('.vp-inner-popup-input')).val();
content['inputastext'] = $(this.wrapSelector('.vp-inner-popup-inputastext')).prop('checked');
@@ -3343,6 +3502,31 @@ define([
});
code.appendFormat("{0} = {1}.astype({{2}})", tempObj, tempObj, astypeStr.toString());
break;
+ case FRAME_EDIT_TYPE.TO_DATETIME:
+ code.appendFormat("{0}[{1}] = pd.to_datetime({2}[{3}]", tempObj, selectedName, tempObj, selectedName);
+ let optionList = [];
+ if (content['format'] === 'auto') {
+ if (content['dayfirst'] !== '') {
+ optionList.push(`dayfirst=${content['dayfirst']}`);
+ }
+ } else if (content['format'] === 'typing') {
+ if (content['format_typing'] !== '') {
+ optionList.push(`format='${content['format_typing']}'`);
+ }
+ } else {
+ optionList.push(`format='${content['format']}'`);
+ }
+ if (optionList.length > 0) {
+ code.appendFormat(', {0}', optionList.join(', '));
+ }
+ code.append(')');
+ if (content['use_addcol'] === true && content['collist'].length > 0) {
+ content['collist'].forEach(obj => {
+ code.appendLine();
+ code.appendFormat("{0}['{1}'] = {2}[{3}].dt.{4}", tempObj, obj.colName, tempObj, selectedName, obj.dateType);
+ });
+ }
+ break;
case FRAME_EDIT_TYPE.DISCRETIZE:
let newColumn = com_util.convertToStr(content['input'], content['inputastext']);
let method = content['type'];
@@ -3437,7 +3621,7 @@ define([
var indexList = data.index;
var dataList = data.data;
- columnList = columnList.map(col => { return { label: col.label, type: col.dtype, code: col.value } });
+ columnList = columnList.map(col => { return { label: col.label, type: col.dtype, code: col.value, isNumeric: col.is_numeric } });
indexList = indexList.map(idx => { return { label: idx, code: idx } });
if (!more) {
@@ -3453,6 +3637,12 @@ define([
while (colIdx < columnList.length) {
let col = columnList[colIdx];
let colCode = col.code.slice(0, colLevIdx + 1).join(',');
+ var colIcon = '';
+ if (col.isNumeric === true) {
+ colIcon = '
';
+ } else {
+ colIcon = '
';
+ }
let nextCol = columnList[colIdx + 1];
if (nextCol && nextCol.code.slice(0, colLevIdx + 1).join(',') === colCode) {
colSpan++;
@@ -3467,8 +3657,8 @@ define([
} else {
colClass = VP_FE_TABLE_COLUMN_GROUP;
}
- table.appendFormatLine('
{8} | '
- , colCode, FRAME_AXIS.COLUMN, col.type, col.label[colLevIdx-1], col.label[colLevIdx], colClass, selected, colSpan, col.label[colLevIdx]);
+ table.appendFormatLine('
{8}{9} | '
+ , colCode, FRAME_AXIS.COLUMN, col.type, col.label[colLevIdx-1], col.label[colLevIdx], colClass, selected, colSpan, colIcon, col.label[colLevIdx]);
colSpan = 1;
}
colIdx++;
@@ -3487,12 +3677,18 @@ define([
table.appendLine('
| ');
columnList && columnList.forEach(col => {
var colCode = col.code;
+ var colIcon = '';
+ if (col.isNumeric === true) {
+ colIcon = '';
+ } else {
+ colIcon = '';
+ }
var colClass = '';
if (that.state.axis == FRAME_AXIS.COLUMN && that.state.selected.map(col=>col.code).includes(colCode)) {
colClass = 'selected';
}
- table.appendFormatLine('{6} | '
- , colCode, FRAME_AXIS.COLUMN, col.type, col.label, VP_FE_TABLE_COLUMN, colClass, col.label);
+ table.appendFormatLine('{6}{7} | '
+ , colCode, FRAME_AXIS.COLUMN, col.type, col.label, VP_FE_TABLE_COLUMN, colClass, colIcon, col.label);
});
// // add column
table.appendFormatLine(' | ', VP_FE_ADD_COLUMN, 'vp-icon-plus');
@@ -3713,6 +3909,7 @@ define([
DROP: 3,
RENAME: 2,
AS_TYPE: 10,
+ TO_DATETIME: 19,
REPLACE: 9,
DISCRETIZE: 15,
diff --git a/visualpython/js/m_apps/Groupby.js b/visualpython/js/m_apps/Groupby.js
index a88dcfce..7b61d561 100644
--- a/visualpython/js/m_apps/Groupby.js
+++ b/visualpython/js/m_apps/Groupby.js
@@ -643,7 +643,7 @@ define([
page.appendFormatLine('', 'vp-gb-method-user');
page.appendFormatLine('', 'User option');
page.appendFormatLine('', 'Type user method');
- page.appendFormatLine(''
+ page.appendFormatLine(''
, 'Text');
page.appendLine('');
page.appendLine('
');
diff --git a/visualpython/js/m_apps/Instance.js b/visualpython/js/m_apps/Instance.js
index 83a8a4ea..1095ef31 100644
--- a/visualpython/js/m_apps/Instance.js
+++ b/visualpython/js/m_apps/Instance.js
@@ -263,7 +263,8 @@ define([
let cmObj = this.getCodemirror('vp_instanceVariable');
let rightCode = (cmObj && cmObj.cm)?cmObj.cm.getValue():'';
if (leftCode && leftCode != '') {
- sbCode.appendFormat('{0} = {1}', leftCode, rightCode);
+ sbCode.appendFormatLine('{0} = {1}', leftCode, rightCode);
+ sbCode.append(leftCode); // show allocation (from version 2.4.10)
} else {
sbCode.appendFormat('{0}', rightCode);
}
diff --git a/visualpython/js/m_ml/DataPrep.js b/visualpython/js/m_ml/DataPrep.js
index e01e7201..10b9625f 100644
--- a/visualpython/js/m_ml/DataPrep.js
+++ b/visualpython/js/m_ml/DataPrep.js
@@ -57,13 +57,13 @@ define([
this.modelConfig = ML_LIBRARIES;
this.modelTypeList = {
- 'Encoding': ['prep-onehot', 'prep-label', 'prep-ordinal', 'prep-target', 'prep-smote'],
+ 'Encoding': ['prep-onehot', 'prep-label', 'prep-ordinal', 'prep-target'],
'Scaling': ['prep-standard', 'prep-robust', 'prep-minmax', 'prep-normalizer', 'prep-func-trsfrm-log', 'prep-func-trsfrm-exp', 'prep-poly-feat', 'prep-kbins-discretizer'],
- 'ETC': ['prep-simple-imputer', 'make-column-transformer']
+ 'ETC': ['prep-simple-imputer', 'prep-smote', 'make-column-transformer']
}
this.mctEstimator = {
- 'Encoding': ['prep-onehot', 'prep-label', 'prep-ordinal', 'prep-target', 'prep-smote'],
+ 'Encoding': ['prep-onehot', 'prep-label', 'prep-ordinal', 'prep-target'],
'Scaling': ['prep-standard', 'prep-robust', 'prep-minmax', 'prep-normalizer', 'prep-func-trsfrm-log', 'prep-func-trsfrm-exp', 'prep-poly-feat', 'prep-kbins-discretizer'],
}
diff --git a/visualpython/js/m_ml/FitPredict.js b/visualpython/js/m_ml/FitPredict.js
index 22055a30..cdd979b9 100644
--- a/visualpython/js/m_ml/FitPredict.js
+++ b/visualpython/js/m_ml/FitPredict.js
@@ -427,7 +427,6 @@ define([
description: 'Transform labels to normalized encoding.'
}
}
-
if (modelType != 'ColumnTransformer') {
actions = {
...actions,
@@ -443,6 +442,32 @@ define([
}
}
}
+ if (modelType === 'SMOTE') {
+ actions = {
+ 'fit': {
+ name: 'fit',
+ label: 'Fit',
+ code: '${model}.fit(${fit_featureData}, ${fit_targetData})',
+ description: 'Check inputs and statistics of the sampler.',
+ options: [
+ { name: 'fit_featureData', label: 'Feature Data', component: ['data_select'], var_type: ['DataFrame', 'Series', 'ndarray', 'list', 'dict'], value: 'X_train' },
+ { name: 'fit_targetData', label: 'Target Data', component: ['data_select'], var_type: ['DataFrame', 'Series', 'ndarray', 'list', 'dict'], value: 'y_train' }
+ ]
+ },
+ 'fit_resample': {
+ name: 'fit_resample',
+ label: 'Fit and resample',
+ code: '${fit_res_allocateX}, ${fit_res_allocatey} = ${model}.fit_resample(${fit_res_featureData}, ${fit_res_targetData})',
+ description: 'Resample the dataset.',
+ options: [
+ { name: 'fit_res_allocateX', label: 'Allocate feature', component: ['input'], placeholder: 'New variable', value: 'X_res' },
+ { name: 'fit_res_allocatey', label: 'Allocate target', component: ['input'], placeholder: 'New variable', value: 'y_res' },
+ { name: 'fit_res_featureData', label: 'Feature Data', component: ['data_select'], var_type: ['DataFrame', 'Series', 'ndarray', 'list', 'dict'], value: 'X_train' },
+ { name: 'fit_res_targetData', label: 'Target Data', component: ['data_select'], var_type: ['DataFrame', 'Series', 'ndarray', 'list', 'dict'], value: 'y_train' }
+ ]
+ }
+ }
+ }
break;
case 'Regression':
actions = {
diff --git a/visualpython/js/m_ml/ModelInfo.js b/visualpython/js/m_ml/ModelInfo.js
index 1d450a0a..8eea2b53 100644
--- a/visualpython/js/m_ml/ModelInfo.js
+++ b/visualpython/js/m_ml/ModelInfo.js
@@ -409,16 +409,30 @@ define([
name: 'permutation_importance',
label: 'Permutation importance',
import: 'from sklearn.inspection import permutation_importance',
- code: '${importance_allocate} = permutation_importance(${model}, ${importance_featureData}, ${importance_targetData}${scoring}${random_state}${etc})',
+ code: '${importance_allocate} = vp_create_permutation_importances(${model}, ${importance_featureData}, ${importance_targetData}${scoring}${sort})',
description: 'Permutation importance for feature evaluation.',
options: [
{ name: 'importance_featureData', label: 'Feature Data', component: ['data_select'], var_type: ['DataFrame', 'Series', 'ndarray', 'list', 'dict'], value: 'X_train' },
{ name: 'importance_targetData', label: 'Target Data', component: ['data_select'], var_type: ['DataFrame', 'Series', 'ndarray', 'list', 'dict'], value: 'y_train' },
{ name: 'scoring', component: ['input'], usePair: true },
- { name: 'random_state', component: ['input_number'], placeholder: '123', usePair: true },
+ { name: 'sort', label: 'Sort data', component: ['bool_checkbox'], value: true, usePair: true },
{ name: 'importance_allocate', label: 'Allocate to', component: ['input'], placeholder: 'New variable', value: 'importances' }
]
},
+ 'plot_permutation_importance': {
+ name: 'plot_permutation_importance',
+ label: 'Plot permutation importance',
+ import: 'from sklearn.inspection import permutation_importance',
+ code: 'vp_plot_permutation_importances(${model}, ${importance_featureData}, ${importance_targetData}${scoring}${sort}${top_count})',
+ description: 'Permutation importance for feature evaluation.',
+ options: [
+ { name: 'importance_featureData', label: 'Feature Data', component: ['data_select'], var_type: ['DataFrame', 'Series', 'ndarray', 'list', 'dict'], value: 'X_train' },
+ { name: 'importance_targetData', label: 'Target Data', component: ['data_select'], var_type: ['DataFrame', 'Series', 'ndarray', 'list', 'dict'], value: 'y_train' },
+ { name: 'scoring', component: ['input'], usePair: true },
+ { name: 'sort', label: 'Sort data', component: ['bool_checkbox'], value: true, usePair: true },
+ { name: 'top_count', label: 'Top count', component: ['input_number'], min: 0, usePair: true }
+ ]
+ },
'feature_importances': {
name: 'feature_importances',
label: 'Feature importances',
@@ -514,6 +528,19 @@ define([
}
}
}
+ if (modelType === 'SMOTE') {
+ infos = {
+ 'get_feature_names_out': {
+ name: 'get_feature_names_out',
+ label: 'Get feature names',
+ code: '${feature_names_allocate} = ${model}.get_feature_names_out()',
+ description: 'Get output feature names for transformation.',
+ options: [
+ { name: 'feature_names_allocate', label: 'Allocate to', component: ['input'], placeholder: 'New variable', value: 'features' }
+ ]
+ }
+ }
+ }
infos = {
...infos,
'get_params': defaultInfos['get_params']
diff --git a/visualpython/js/m_ml/Pipeline.js b/visualpython/js/m_ml/Pipeline.js
index 0db5f041..6dfc1989 100644
--- a/visualpython/js/m_ml/Pipeline.js
+++ b/visualpython/js/m_ml/Pipeline.js
@@ -63,6 +63,9 @@ define([
- Fit
- Transform
- Predict
+ - Fit and Predict
+ - Fit and Transform
+ - Fit and Resample
*/
this.templateList = {
'data-prep': {
@@ -73,9 +76,10 @@ define([
* ml_* is pre-defined app
* pp_* is defined only for Pipeline
*/
- { name: 'ml_dataPrep', label: 'Data Prep', useApp: true },
+ { name: 'ml_dataPrep', label: 'Data Prep', useApp: true, child: ['pp_fit', 'pp_transform', 'pp_fit_resample'] },
{ name: 'pp_fit', label: 'Fit' },
- { name: 'pp_transform', label: 'Transform' }
+ { name: 'pp_transform', label: 'Transform' },
+ { name: 'pp_fit_resample', label: 'Fit and Resample' }
]
},
'regression': {
@@ -286,7 +290,7 @@ define([
that.state.modelTypeName = modelTypeName;
// show fit / predict / transform depends on model selection
- let defaultActions = ['fit', 'predict', 'transform', 'fit_predict', 'fit_transform'];
+ let defaultActions = ['fit', 'predict', 'transform', 'fit_predict', 'fit_transform', 'fit_resample'];
let actions = that.modelEditor.getAction(modelTypeName);
defaultActions.forEach(actKey => {
if (actions[actKey] === undefined) {
@@ -308,6 +312,10 @@ define([
} else {
$(that.wrapSelector(`.vp-pp-item[data-name="pp_${actKey}"]`)).hide();
}
+ } else if (actKey === 'fit_resample') {
+ // for SMOTE: show fit_resample only
+ $(that.wrapSelector(`.vp-pp-item[data-name="pp_fit"]`)).hide();
+ $(that.wrapSelector(`.vp-pp-item[data-name="pp_transform"]`)).hide();
}
}
$(that.wrapSelector('.vp-pp-item')).removeClass('vp-last-visible');
@@ -580,6 +588,9 @@ define([
case 'pp_fit_transform':
tag = this.templateForOptionPage(actions['fit_transform']);
break;
+ case 'pp_fit_resample':
+ tag = this.templateForOptionPage(actions['fit_resample']);
+ break;
}
$(this.wrapSelector(`.vp-pp-step-page[data-name="${appId}"]`)).html(`
${tag}
@@ -680,6 +691,9 @@ define([
case 'pp_fit_transform':
actObj = actions['fit_transform'];
break;
+ case 'pp_fit_resample':
+ actObj = actions['fit_resample'];
+ break;
}
let code = new com_String();
@@ -721,7 +735,7 @@ define([
code.appendLine();
code.appendLine();
}
- if (useApp) {
+ if (useApp === true) {
let appCode = app.generateCode();
if (appCode instanceof Array) {
appCode = appCode.join('\n');
diff --git a/visualpython/js/m_ml/dataSplit.js b/visualpython/js/m_ml/dataSplit.js
index bfa2aac2..a8c9417f 100644
--- a/visualpython/js/m_ml/dataSplit.js
+++ b/visualpython/js/m_ml/dataSplit.js
@@ -175,7 +175,7 @@ define([
options.appendFormat(', shuffle={0}', shuffle);
}
if (shuffle != 'False' && stratify && stratify != '') {
- options.appendFormat(', startify={0}', stratify);
+ options.appendFormat(', stratify={0}', stratify);
}
let code = new com_String();
diff --git a/visualpython/js/m_stats/ProbDist.js b/visualpython/js/m_stats/ProbDist.js
index ecd629ae..245cd9f6 100644
--- a/visualpython/js/m_stats/ProbDist.js
+++ b/visualpython/js/m_stats/ProbDist.js
@@ -86,14 +86,24 @@ define([
// discrete option
$(that.wrapSelector('.vp-pd-display-option.dist')).show();
- // set size to 100
- $(that.wrapSelector('#size')).val(100);
- that.state.size = 100;
-
- // hide continuous action
- if (that.state.action === 'stats-to-pvalue' || that.state.action === 'pvalue-to-stats') {
- $(that.wrapSelector('#action')).val('random-number');
- $(that.wrapSelector('#action')).trigger('change');
+ // set size to 1000
+ $(that.wrapSelector('#size')).val(1000);
+ that.state.size = 1000;
+
+ // hide distribution plot for multinomial
+ if (distType === 'multinomial') {
+ $(that.wrapSelector('.vp-pd-display-option.dist-plot')).hide();
+ // hide other actions
+ if (that.state.action !== 'random-number') {
+ $(that.wrapSelector('#action')).val('random-number');
+ $(that.wrapSelector('#action')).trigger('change');
+ }
+ } else {
+ // hide continuous action
+ if (that.state.action === 'stats-to-pvalue' || that.state.action === 'pvalue-to-stats') {
+ $(that.wrapSelector('#action')).val('random-number');
+ $(that.wrapSelector('#action')).trigger('change');
+ }
}
} else {
// continuous option
diff --git a/visualpython/js/m_visualize/Seaborn.js b/visualpython/js/m_visualize/Seaborn.js
index d172d998..bea6cfd1 100644
--- a/visualpython/js/m_visualize/Seaborn.js
+++ b/visualpython/js/m_visualize/Seaborn.js
@@ -32,6 +32,7 @@ define([
this.config.dataview = false;
this.config.size = { width: 1064, height: 550 };
+ this.config.autoScroll = false;
this.config.checkModules = ['plt', 'sns'];
this.config.docs = 'https://seaborn.pydata.org/index.html';
@@ -201,6 +202,7 @@ define([
$(that.wrapSelector('#kde')).closest('.sb-option').show();
$(that.wrapSelector('#stat')).closest('.sb-option').show();
} else if (chartType == 'barplot') {
+ $(that.wrapSelector('#orient')).closest('.sb-option').show();
$(that.wrapSelector('#showValues')).closest('.sb-option').show();
$(that.wrapSelector('#errorbar')).closest('.sb-option').show();
if (that.state.setXY === false) {
@@ -547,6 +549,7 @@ define([
$(page).find('#kde').closest('.sb-option').show();
$(page).find('#stat').closest('.sb-option').show();
} else if (this.state.chartType == 'barplot') {
+ $(page).find('#orient').closest('.sb-option').show();
$(page).find('#showValues').closest('.sb-option').show();
$(page).find('#errorbar').closest('.sb-option').show();
if (this.state.setXY === false) {
@@ -890,7 +893,7 @@ define([
generateCode(preview=false) {
let {
- chartType, data, x, y, setXY, hue, kde, stat,
+ chartType, data, x, y, setXY, hue, orient, kde, stat,
showValues, showValuesPrecision, errorbar,
sortType, sortBy, sortHue, sortHueText,
userOption='',
diff --git a/visualpython/python/userCommand.py b/visualpython/python/userCommand.py
index 586eea36..cac0152f 100644
--- a/visualpython/python/userCommand.py
+++ b/visualpython/python/userCommand.py
@@ -100,7 +100,7 @@ def vp_create_feature_importances(model, X_train=None, sort=False):
feature_names = [ 'X{}'.format(i) for i in range(len(model.feature_importances_)) ]
df_i = _vp_pd.DataFrame(model.feature_importances_, index=feature_names, columns=['Feature_importance'])
- df_i['Percentage'] = 100 * (df_i['Feature_importance'] / df_i['Feature_importance'].max())
+ df_i['Percentage'] = 100 * df_i['Feature_importance']
if sort: df_i.sort_values(by='Feature_importance', ascending=False, inplace=True)
df_i = df_i.round(2)
@@ -123,6 +123,41 @@ def vp_plot_feature_importances(model, X_train=None, sort=False, top_count=0):
_vp_plt.show()
######
+# Visual Python: Machine Learning > Model Info
+######
+def vp_create_permutation_importances(model, X_train, y_train, scoring=None, sort=False):
+ from sklearn.inspection import permutation_importance
+ if isinstance(X_train, _vp_pd.core.frame.DataFrame):
+ feature_names = X_train.columns
+ else:
+ feature_names = [ 'X{}'.format(i) for i in range(len(model.feature_importances_)) ]
+
+ imp = permutation_importance(model, X_train, y_train, scoring)
+
+ df_i = _vp_pd.DataFrame(imp['importances_mean'], index=feature_names, columns=['Feature_importance'])
+ df_i['Percentage'] = 100 * df_i['Feature_importance']
+ if sort: df_i.sort_values(by='Feature_importance', ascending=False, inplace=True)
+ df_i = df_i.round(2)
+
+ return df_i
+######
+# Visual Python: Machine Learning > Model Info
+######
+def vp_plot_permutation_importances(model, X_train, y_train, scoring=None, sort=False, top_count=0):
+ df_i = vp_create_permutation_importances(model, X_train, y_train, scoring, sort)
+
+ if sort:
+ if top_count > 0:
+ df_i['Percentage'].sort_values().tail(top_count).plot(kind='barh')
+ else:
+ df_i['Percentage'].sort_values().plot(kind='barh')
+ else:
+ df_i['Percentage'].plot(kind='barh')
+ _vp_plt.xlabel('Feature importance Percentage')
+ _vp_plt.ylabel('Features')
+
+ _vp_plt.show()
+######
# Visual Python: Visualization > Seaborn
######
def vp_seaborn_show_values(axs, precision=1, space=0.01):
---|