-
-
Notifications
You must be signed in to change notification settings - Fork 7.9k
Updated violin plot example as per suggestions in issue #7251 #7360
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 7 commits
08db92d
becbfa4
14cd6cf
03946d4
8044e9d
88a8c2e
62c1716
9f4bbdd
b3ce73b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -18,87 +18,64 @@ | |
import numpy as np | ||
|
||
|
||
# functions to calculate percentiles and adjacent values | ||
def percentile(vals, p): | ||
N = len(vals) | ||
n = p*(N+1) | ||
k = int(n) | ||
d = n-k | ||
if k <= 0: | ||
return vals[0] | ||
if k >= N: | ||
return vals[N-1] | ||
return vals[k-1] + d*(vals[k] - vals[k-1]) | ||
|
||
|
||
def adjacent_values(vals): | ||
q1 = percentile(vals, 0.25) | ||
q3 = percentile(vals, 0.75) | ||
iqr = q3 - q1 # inter-quartile range | ||
|
||
# upper adjacent values | ||
uav = q3 + iqr * 1.5 | ||
if uav > vals[-1]: | ||
uav = vals[-1] | ||
if uav < q3: | ||
uav = q3 | ||
|
||
# lower adjacent values | ||
lav = q1 - iqr * 1.5 | ||
if lav < vals[0]: | ||
lav = vals[0] | ||
if lav > q1: | ||
lav = q1 | ||
return [lav, uav] | ||
def adjacent_values(vals, q1, q3): | ||
upper_adjacent_value = q3 + (q3 - q1) * 1.5 | ||
upper_adjacent_value = np.clip(upper_adjacent_value, q3, vals[-1]) | ||
|
||
lower_adjacent_value = q1 - (q3 - q1) * 1.5 | ||
lower_adjacent_value = np.clip(lower_adjacent_value, vals[0], q1) | ||
return [lower_adjacent_value, upper_adjacent_value] | ||
|
||
|
||
def set_axis_style(ax, labels): | ||
ax.get_xaxis().set_tick_params(direction='out') | ||
ax.xaxis.set_ticks_position('bottom') | ||
ax.set_xticks(np.arange(1, len(labels) + 1)) | ||
ax.set_xticklabels(labels) | ||
ax.set_xlim(0.25, len(labels) + 0.75) | ||
ax.set_xlabel('Sample name') | ||
|
||
|
||
# create test data | ||
np.random.seed(123) | ||
dat = [np.random.normal(0, std, 100) for std in range(1, 5)] | ||
lab = ['A', 'B', 'C', 'D'] # labels | ||
med = [] # medians | ||
iqr = [] # inter-quantile ranges | ||
avs = [] # upper and lower adjacent values | ||
for arr in dat: | ||
sarr = sorted(arr) | ||
med.append(percentile(sarr, 0.5)) | ||
iqr.append([percentile(sarr, 0.25), percentile(sarr, 0.75)]) | ||
avs.append(adjacent_values(sarr)) | ||
|
||
# plot the violins | ||
fig, (ax1, ax2) = plt.subplots(nrows=1, ncols=2, figsize=(9, 4), | ||
sharey=True) | ||
_ = ax1.violinplot(dat) | ||
parts = ax2.violinplot(dat, showmeans=False, showmedians=False, | ||
showextrema=False) | ||
data = [sorted(np.random.normal(0, std, 100)) for std in range(1, 5)] | ||
|
||
fig, (ax1, ax2) = plt.subplots(nrows=1, ncols=2, figsize=(9, 4), sharey=True) | ||
|
||
# plot the default violin | ||
ax1.set_title('Default violin plot') | ||
ax2.set_title('Customized violin plot') | ||
ax1.set_ylabel('Observed values') | ||
ax1.violinplot(data) | ||
|
||
# plot whiskers as thin lines, quartiles as fat lines, | ||
# and medians as points | ||
for i in range(len(med)): | ||
# whiskers | ||
ax2.plot([i + 1, i + 1], avs[i], '-', color='black', linewidth=1) | ||
ax2.plot([i + 1, i + 1], iqr[i], '-', color='black', linewidth=5) | ||
ax2.plot(i + 1, med[i], 'o', color='white', | ||
markersize=6, markeredgecolor='none') | ||
# customized violin | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. stray redundant comment There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hmm, right. Guess the title we set for the axes are self explanatory. |
||
ax2.set_title('Customized violin plot') | ||
parts = ax2.violinplot( | ||
data, showmeans=False, showmedians=False, | ||
showextrema=False) | ||
|
||
# customize colors | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. probably also unnecessary comment |
||
for pc in parts['bodies']: | ||
pc.set_facecolor('#D43F3A') | ||
pc.set_edgecolor('black') | ||
pc.set_alpha(1) | ||
|
||
ax1.set_ylabel('Observed values') | ||
quartile1, medians, quartile3 = np.percentile(data, [25, 50, 75], axis=1) | ||
inter_quartile_ranges = np.vstack([quartile1, quartile3]).T | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Pretty sure this variable is no longer used and so can be removed |
||
whiskers = [ | ||
adjacent_values(sorted_array, q1, q3) | ||
for sorted_array, q1, q3 in zip(data, quartile1, quartile3)] | ||
whiskersMin, whiskersMax = list(zip(*whiskers)) | ||
# plot medians as points, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can remove these comments, probably There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hmm right, will remove these comments and the unused variable |
||
# whiskers as thin lines, quartiles as fat lines | ||
inds = np.arange(1, len(medians) + 1) | ||
ax2.scatter(inds, medians, marker='o', color='white', s=30, zorder=3) | ||
ax2.vlines(inds, quartile1, quartile3, color='k', linestyle='-', lw=5) | ||
ax2.vlines(inds, whiskersMin, whiskersMax, color='k', linestyle='-', lw=1) | ||
|
||
# set style for the axes | ||
labels = ['A', 'B', 'C', 'D'] # labels | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. variable name = comment |
||
for ax in [ax1, ax2]: | ||
ax.get_xaxis().set_tick_params(direction='out') | ||
ax.xaxis.set_ticks_position('bottom') | ||
ax.set_xticks(np.arange(1, len(lab) + 1)) | ||
ax.set_xticklabels(lab) | ||
ax.set_xlim(0.25, len(lab) + 0.75) | ||
ax.set_xlabel('Sample name') | ||
set_axis_style(ax, labels) | ||
|
||
plt.subplots_adjust(bottom=0.15, wspace=0.05) | ||
|
||
plt.show() |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Probabky doesn't need to be bracketed [], wonder if it can be used to remove zip* stuff in the unpacking...
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
To avoid the unpacking, I could also optionally make 'whiskers' an np.array and then do:
whiskersMin, whiskersMax = whiskers[:, 0], whiskers[:, 1]
Irrespective of whether I do this or not, the brackets can be removed
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Then I think it makes sense to do that.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, updated in latest commit