Skip to content

Commit 3733634

Browse files
author
minjk-bl
committed
Add _vp_sample function for sampling data with various data types
1 parent c2f4eca commit 3733634

File tree

2 files changed

+18
-2
lines changed

2 files changed

+18
-2
lines changed

js/m_visualize/Seaborn.js

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -626,7 +626,8 @@ define([
626626
code.appendFormatLine('plt.figure(figsize=({0}, {1}))', defaultWidth, defaultHeight);
627627
if (useSampling) {
628628
// data sampling code for preview
629-
convertedData = data + '.sample(n=' + sampleCount + ', random_state=0)';
629+
// convertedData = data + '.sample(n=' + sampleCount + ', random_state=0)';
630+
convertedData = com_util.formatString('_vp_sample({0}, {1})', data, sampleCount);
630631
// replace pre-defined options
631632
generatedCode = generatedCode.replaceAll(data, convertedData);
632633
}

python/variableCommand.py

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,4 +68,19 @@ def _vp_get_profiling_list():
6868
result.append({ 'varName': v['varName'], 'title': title })
6969

7070
return result
71-
71+
72+
import numpy as _vp_np
73+
import random as _vp_rd
74+
def _vp_sample(data, sample_cnt):
75+
dataType = type(data).__name__
76+
sample_cnt = len(data) if len(data) < sample_cnt else sample_cnt
77+
78+
if dataType == 'DataFrame':
79+
return data.sample(sample_cnt)
80+
elif dataType == 'Series':
81+
return data.sample(sample_cnt)
82+
elif dataType == 'ndarray':
83+
return data[_vp_np.random.choice(data.shape[0], sample_cnt, replace=False)]
84+
elif dataType == 'list':
85+
return _vp_rd.choices(data, k=sample_cnt)
86+
return data

0 commit comments

Comments
 (0)