-
-
Notifications
You must be signed in to change notification settings - Fork 7.9k
Updated violin plot example as per suggestions in issue #7251 #7360
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
08db92d
becbfa4
14cd6cf
03946d4
8044e9d
88a8c2e
62c1716
9f4bbdd
b3ce73b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
- Loading branch information
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -18,87 +18,74 @@ | |
import numpy as np | ||
|
||
|
||
# functions to calculate percentiles and adjacent values | ||
def percentile(vals, p): | ||
N = len(vals) | ||
n = p*(N+1) | ||
k = int(n) | ||
d = n-k | ||
if k <= 0: | ||
return vals[0] | ||
if k >= N: | ||
return vals[N-1] | ||
return vals[k-1] + d*(vals[k] - vals[k-1]) | ||
|
||
|
||
def adjacent_values(vals): | ||
q1 = percentile(vals, 0.25) | ||
q3 = percentile(vals, 0.75) | ||
iqr = q3 - q1 # inter-quartile range | ||
|
||
q1, q3 = np.percentile(vals, [25, 75]) | ||
# inter-quartile range iqr | ||
iqr = q3 - q1 | ||
# upper adjacent values | ||
uav = q3 + iqr * 1.5 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same, change to |
||
if uav > vals[-1]: | ||
uav = vals[-1] | ||
if uav < q3: | ||
uav = q3 | ||
|
||
uav = np.clip(uav, q3, vals[-1]) | ||
# lower adjacent values | ||
lav = q1 - iqr * 1.5 | ||
if lav < vals[0]: | ||
lav = vals[0] | ||
if lav > q1: | ||
lav = q1 | ||
lav = np.clip(lav, q1, vals[0]) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think you might have these two backwards. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could you please comment a bit more on what you mean by backwards? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am sorry, I do have them backwards. Changing that now |
||
return [lav, uav] | ||
|
||
|
||
def set_axis_style(ax, labels): | ||
ax.get_xaxis().set_tick_params(direction='out') | ||
ax.xaxis.set_ticks_position('bottom') | ||
ax.set_xticks(np.arange(1, len(labels) + 1)) | ||
ax.set_xticklabels(labels) | ||
ax.set_xlim(0.25, len(labels) + 0.75) | ||
ax.set_xlabel('Sample name') | ||
|
||
|
||
# create test data | ||
np.random.seed(123) | ||
dat = [np.random.normal(0, std, 100) for std in range(1, 5)] | ||
lab = ['A', 'B', 'C', 'D'] # labels | ||
med = [] # medians | ||
iqr = [] # inter-quantile ranges | ||
avs = [] # upper and lower adjacent values | ||
for arr in dat: | ||
sarr = sorted(arr) | ||
med.append(percentile(sarr, 0.5)) | ||
iqr.append([percentile(sarr, 0.25), percentile(sarr, 0.75)]) | ||
avs.append(adjacent_values(sarr)) | ||
|
||
# plot the violins | ||
fig, (ax1, ax2) = plt.subplots(nrows=1, ncols=2, figsize=(9, 4), | ||
sharey=True) | ||
_ = ax1.violinplot(dat) | ||
parts = ax2.violinplot(dat, showmeans=False, showmedians=False, | ||
showextrema=False) | ||
dat = [sorted(np.random.normal(0, std, 100)) for std in range(1, 5)] | ||
|
||
fig, (ax1, ax2) = plt.subplots(nrows=1, ncols=2, figsize=(9, 4), sharey=True) | ||
|
||
# plot the default violin | ||
ax1.set_title('Default violin plot') | ||
ax2.set_title('Customized violin plot') | ||
ax1.set_ylabel('Observed values') | ||
ax1.violinplot(dat) | ||
|
||
# plot whiskers as thin lines, quartiles as fat lines, | ||
# and medians as points | ||
for i in range(len(med)): | ||
# whiskers | ||
ax2.plot([i + 1, i + 1], avs[i], '-', color='black', linewidth=1) | ||
ax2.plot([i + 1, i + 1], iqr[i], '-', color='black', linewidth=5) | ||
ax2.plot(i + 1, med[i], 'o', color='white', | ||
markersize=6, markeredgecolor='none') | ||
# customized violin | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. stray redundant comment There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hmm, right. Guess the title we set for the axes are self explanatory. |
||
ax2.set_title('Customized violin plot') | ||
parts = ax2.violinplot( | ||
dat, showmeans=False, showmedians=False, | ||
showextrema=False) | ||
|
||
# customize colors | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. probably also unnecessary comment |
||
for pc in parts['bodies']: | ||
pc.set_facecolor('#D43F3A') | ||
pc.set_edgecolor('black') | ||
pc.set_alpha(1) | ||
|
||
ax1.set_ylabel('Observed values') | ||
# medians | ||
med = [np.percentile(sarr, 50) for sarr in dat] | ||
# inter-quartile ranges | ||
iqr = [[np.percentile(sarr, 25), np.percentile(sarr, 75)] for sarr in dat] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think you can use the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. To compute all of those in one call (without list comprehensions), I could do something like: for sarr in dat:
median, q1, q3 = np.percentile(sarr, [50, 25, 75])
med.append(median)
iqr.append([q1, q3])
avs.append(adjacent_values(sarr)) Is that fine? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think you can pass the entire array and use the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Got it. Since iqr is a list of tuples (or rather, list of lists of size 2), I am unable to see how we could do all 3 percentiles with a single call. I could still replace There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No need for the tmp = np.percentile(dat, [25, 50, 75], axis=1))).T
med = tmp[:, 1]
iqr = tmp[:, [0, 2]] |
||
# upper and lower adjacent values | ||
avs = [adjacent_values(sarr) for sarr in dat] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. since this is the whiskers, why not change the variable name to whiskers? and again, then removing the comment |
||
|
||
# plot whiskers as thin lines, quartiles as fat lines, | ||
# and medians as points | ||
for i, median in enumerate(med): | ||
# whiskers | ||
ax2.plot([i + 1, i + 1], avs[i], '-', color='black', linewidth=1) | ||
# quartiles | ||
ax2.plot([i + 1, i + 1], iqr[i], '-', color='black', linewidth=5) | ||
# medians | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. doesn't need this comment 'cause clear variable name. Also, still in favor of vlines for this (like in #6814) because I think that's a clearer indication of what's being plotted. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @story645 👍 for better variable names, I have made changes and pushed a commit. I also intend to include the change to vlines in a separate commit, if we go ahead with that. |
||
ax2.plot( | ||
i + 1, median, 'o', color='white', | ||
markersize=6, markeredgecolor='none') | ||
|
||
# set style for the axes | ||
labels = ['A', 'B', 'C', 'D'] # labels | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. variable name = comment |
||
for ax in [ax1, ax2]: | ||
ax.get_xaxis().set_tick_params(direction='out') | ||
ax.xaxis.set_ticks_position('bottom') | ||
ax.set_xticks(np.arange(1, len(lab) + 1)) | ||
ax.set_xticklabels(lab) | ||
ax.set_xlim(0.25, len(lab) + 0.75) | ||
ax.set_xlabel('Sample name') | ||
set_axis_style(ax, labels) | ||
|
||
plt.subplots_adjust(bottom=0.15, wspace=0.05) | ||
|
||
plt.show() |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What about removing this comment and instead changing to clearer variable names:
interquartile_range = q3 - q1