diff --git a/examples/statistics/customized_violin_demo.py b/examples/statistics/customized_violin_demo.py index 29dfda7b894e..e37a034400d5 100644 --- a/examples/statistics/customized_violin_demo.py +++ b/examples/statistics/customized_violin_demo.py @@ -18,87 +18,59 @@ import numpy as np -# functions to calculate percentiles and adjacent values -def percentile(vals, p): - N = len(vals) - n = p*(N+1) - k = int(n) - d = n-k - if k <= 0: - return vals[0] - if k >= N: - return vals[N-1] - return vals[k-1] + d*(vals[k] - vals[k-1]) - - -def adjacent_values(vals): - q1 = percentile(vals, 0.25) - q3 = percentile(vals, 0.75) - iqr = q3 - q1 # inter-quartile range - - # upper adjacent values - uav = q3 + iqr * 1.5 - if uav > vals[-1]: - uav = vals[-1] - if uav < q3: - uav = q3 - - # lower adjacent values - lav = q1 - iqr * 1.5 - if lav < vals[0]: - lav = vals[0] - if lav > q1: - lav = q1 - return [lav, uav] +def adjacent_values(vals, q1, q3): + upper_adjacent_value = q3 + (q3 - q1) * 1.5 + upper_adjacent_value = np.clip(upper_adjacent_value, q3, vals[-1]) + + lower_adjacent_value = q1 - (q3 - q1) * 1.5 + lower_adjacent_value = np.clip(lower_adjacent_value, vals[0], q1) + return lower_adjacent_value, upper_adjacent_value + + +def set_axis_style(ax, labels): + ax.get_xaxis().set_tick_params(direction='out') + ax.xaxis.set_ticks_position('bottom') + ax.set_xticks(np.arange(1, len(labels) + 1)) + ax.set_xticklabels(labels) + ax.set_xlim(0.25, len(labels) + 0.75) + ax.set_xlabel('Sample name') # create test data np.random.seed(123) -dat = [np.random.normal(0, std, 100) for std in range(1, 5)] -lab = ['A', 'B', 'C', 'D'] # labels -med = [] # medians -iqr = [] # inter-quantile ranges -avs = [] # upper and lower adjacent values -for arr in dat: - sarr = sorted(arr) - med.append(percentile(sarr, 0.5)) - iqr.append([percentile(sarr, 0.25), percentile(sarr, 0.75)]) - avs.append(adjacent_values(sarr)) - -# plot the violins -fig, (ax1, ax2) = plt.subplots(nrows=1, ncols=2, figsize=(9, 4), - sharey=True) -_ = ax1.violinplot(dat) -parts = ax2.violinplot(dat, showmeans=False, showmedians=False, - showextrema=False) +data = [sorted(np.random.normal(0, std, 100)) for std in range(1, 5)] + +fig, (ax1, ax2) = plt.subplots(nrows=1, ncols=2, figsize=(9, 4), sharey=True) ax1.set_title('Default violin plot') -ax2.set_title('Customized violin plot') +ax1.set_ylabel('Observed values') +ax1.violinplot(data) -# plot whiskers as thin lines, quartiles as fat lines, -# and medians as points -for i in range(len(med)): - # whiskers - ax2.plot([i + 1, i + 1], avs[i], '-', color='black', linewidth=1) - ax2.plot([i + 1, i + 1], iqr[i], '-', color='black', linewidth=5) - ax2.plot(i + 1, med[i], 'o', color='white', - markersize=6, markeredgecolor='none') +ax2.set_title('Customized violin plot') +parts = ax2.violinplot( + data, showmeans=False, showmedians=False, + showextrema=False) -# customize colors for pc in parts['bodies']: pc.set_facecolor('#D43F3A') pc.set_edgecolor('black') pc.set_alpha(1) -ax1.set_ylabel('Observed values') +quartile1, medians, quartile3 = np.percentile(data, [25, 50, 75], axis=1) +whiskers = np.array([ + adjacent_values(sorted_array, q1, q3) + for sorted_array, q1, q3 in zip(data, quartile1, quartile3)]) +whiskersMin, whiskersMax = whiskers[:, 0], whiskers[:, 1] + +inds = np.arange(1, len(medians) + 1) +ax2.scatter(inds, medians, marker='o', color='white', s=30, zorder=3) +ax2.vlines(inds, quartile1, quartile3, color='k', linestyle='-', lw=5) +ax2.vlines(inds, whiskersMin, whiskersMax, color='k', linestyle='-', lw=1) + +# set style for the axes +labels = ['A', 'B', 'C', 'D'] for ax in [ax1, ax2]: - ax.get_xaxis().set_tick_params(direction='out') - ax.xaxis.set_ticks_position('bottom') - ax.set_xticks(np.arange(1, len(lab) + 1)) - ax.set_xticklabels(lab) - ax.set_xlim(0.25, len(lab) + 0.75) - ax.set_xlabel('Sample name') + set_axis_style(ax, labels) plt.subplots_adjust(bottom=0.15, wspace=0.05) - plt.show()