Skip to content

Commit b6d3f7c

Browse files
sasoripathostimhoffm
authored andcommitted
Feature: draw percentiles in violinplot (#14107)
* Fix issue 10788 * Fix style * Add missing import statement * Remove changes * Code for feature 8532 * Run boilerplate * Fix code style * Remove unused test images * Update pyplot.py * Update pyplot.py * Run boilerplate * Remove extra image-compare tests, update documentation * Use np.quantile instead of percentile * Update test images for violinplot * Revert "Update test images for violinplot" This reverts commit ee5cd08. * Use np.percentile implement quantile * Update api from percentiles to quantiles * Update violinplot call in test cases * Update documentations from percentiles to quantiles * Update docstring and backward campatibility Add next API change document for violin_stats in cbook. Make violin() more backward campatible by making quantiles key optional. Update smoke test for violin number and quantile lists number mismatch. * Remove unnecessary api change doc
1 parent 8f62234 commit b6d3f7c

File tree

7 files changed

+146
-15
lines changed

7 files changed

+146
-15
lines changed

examples/statistics/violinplot.py

Lines changed: 27 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828
pos = [1, 2, 4, 5, 7, 8]
2929
data = [np.random.normal(0, std, size=100) for std in pos]
3030

31-
fig, axes = plt.subplots(nrows=2, ncols=3, figsize=(6, 6))
31+
fig, axes = plt.subplots(nrows=2, ncols=5, figsize=(10, 6))
3232

3333
axes[0, 0].violinplot(data, pos, points=20, widths=0.3,
3434
showmeans=True, showextrema=True, showmedians=True)
@@ -43,19 +43,42 @@
4343
showextrema=True, showmedians=True, bw_method=0.5)
4444
axes[0, 2].set_title('Custom violinplot 3', fontsize=fs)
4545

46+
axes[0, 3].violinplot(data, pos, points=60, widths=0.7, showmeans=True,
47+
showextrema=True, showmedians=True, bw_method=0.5,
48+
quantiles=[[0.1], [], [], [0.175, 0.954], [0.75],
49+
[0.25]])
50+
axes[0, 3].set_title('Custom violinplot 4', fontsize=fs)
51+
52+
axes[0, 4].violinplot(data[-1:], pos[-1:], points=60, widths=0.7,
53+
showmeans=True, showextrema=True, showmedians=True,
54+
quantiles=[0.05, 0.1, 0.8, 0.9], bw_method=0.5)
55+
axes[0, 4].set_title('Custom violinplot 5', fontsize=fs)
56+
4657
axes[1, 0].violinplot(data, pos, points=80, vert=False, widths=0.7,
4758
showmeans=True, showextrema=True, showmedians=True)
48-
axes[1, 0].set_title('Custom violinplot 4', fontsize=fs)
59+
axes[1, 0].set_title('Custom violinplot 6', fontsize=fs)
4960

5061
axes[1, 1].violinplot(data, pos, points=100, vert=False, widths=0.9,
5162
showmeans=True, showextrema=True, showmedians=True,
5263
bw_method='silverman')
53-
axes[1, 1].set_title('Custom violinplot 5', fontsize=fs)
64+
axes[1, 1].set_title('Custom violinplot 7', fontsize=fs)
5465

5566
axes[1, 2].violinplot(data, pos, points=200, vert=False, widths=1.1,
5667
showmeans=True, showextrema=True, showmedians=True,
5768
bw_method=0.5)
58-
axes[1, 2].set_title('Custom violinplot 6', fontsize=fs)
69+
axes[1, 2].set_title('Custom violinplot 8', fontsize=fs)
70+
71+
axes[1, 3].violinplot(data, pos, points=200, vert=False, widths=1.1,
72+
showmeans=True, showextrema=True, showmedians=True,
73+
quantiles=[[0.1], [], [], [0.175, 0.954], [0.75],
74+
[0.25]],
75+
bw_method=0.5)
76+
axes[1, 3].set_title('Custom violinplot 9', fontsize=fs)
77+
78+
axes[1, 4].violinplot(data[-1:], pos[-1:], points=200, vert=False, widths=1.1,
79+
showmeans=True, showextrema=True, showmedians=True,
80+
quantiles=[0.05, 0.1, 0.8, 0.9], bw_method=0.5)
81+
axes[1, 4].set_title('Custom violinplot 10', fontsize=fs)
5982

6083
for ax in axes.flat:
6184
ax.set_yticklabels([])

lib/matplotlib/axes/_axes.py

Lines changed: 45 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7880,14 +7880,14 @@ def matshow(self, Z, **kwargs):
78807880
@_preprocess_data(replace_names=["dataset"])
78817881
def violinplot(self, dataset, positions=None, vert=True, widths=0.5,
78827882
showmeans=False, showextrema=True, showmedians=False,
7883-
points=100, bw_method=None):
7883+
quantiles=None, points=100, bw_method=None):
78847884
"""
78857885
Make a violin plot.
78867886
78877887
Make a violin plot for each column of *dataset* or each vector in
78887888
sequence *dataset*. Each filled area extends to represent the
78897889
entire data range, with optional lines at the mean, the median,
7890-
the minimum, and the maximum.
7890+
the minimum, the maximum, and user-specified quantiles.
78917891
78927892
Parameters
78937893
----------
@@ -7916,6 +7916,11 @@ def violinplot(self, dataset, positions=None, vert=True, widths=0.5,
79167916
showmedians : bool, default = False
79177917
If `True`, will toggle rendering of the medians.
79187918
7919+
quantiles : array-like, default = None
7920+
If not None, set a list of floats in interval [0, 1] for each violin,
7921+
which stands for the quantiles that will be rendered for that
7922+
violin.
7923+
79197924
points : scalar, default = 100
79207925
Defines the number of points to evaluate each of the
79217926
gaussian kernel density estimations at.
@@ -7953,6 +7958,10 @@ def violinplot(self, dataset, positions=None, vert=True, widths=0.5,
79537958
- ``cmedians``: A `~.collections.LineCollection` instance that
79547959
marks the median values of each of the violin's distribution.
79557960
7961+
- ``cquantiles``: A `~.collections.LineCollection` instance created
7962+
to identify the quantile values of each of the violin's
7963+
distribution.
7964+
79567965
"""
79577966

79587967
def _kde_method(X, coords):
@@ -7962,7 +7971,8 @@ def _kde_method(X, coords):
79627971
kde = mlab.GaussianKDE(X, bw_method)
79637972
return kde.evaluate(coords)
79647973

7965-
vpstats = cbook.violin_stats(dataset, _kde_method, points=points)
7974+
vpstats = cbook.violin_stats(dataset, _kde_method, points=points,
7975+
quantiles=quantiles)
79667976
return self.violin(vpstats, positions=positions, vert=vert,
79677977
widths=widths, showmeans=showmeans,
79687978
showextrema=showextrema, showmedians=showmedians)
@@ -7973,7 +7983,7 @@ def violin(self, vpstats, positions=None, vert=True, widths=0.5,
79737983
79747984
Draw a violin plot for each column of `vpstats`. Each filled area
79757985
extends to represent the entire data range, with optional lines at the
7976-
mean, the median, the minimum, and the maximum.
7986+
mean, the median, the minimum, the maximum, and the quantiles values.
79777987
79787988
Parameters
79797989
----------
@@ -7997,6 +8007,11 @@ def violin(self, vpstats, positions=None, vert=True, widths=0.5,
79978007
79988008
- ``max``: The maximum value for this violin's dataset.
79998009
8010+
Optional keys are:
8011+
8012+
- ``quantiles``: A list of scalars containing the quantile values
8013+
for this violin's dataset.
8014+
80008015
positions : array-like, default = [1, 2, ..., n]
80018016
Sets the positions of the violins. The ticks and limits are
80028017
automatically set to match the positions.
@@ -8043,13 +8058,19 @@ def violin(self, vpstats, positions=None, vert=True, widths=0.5,
80438058
80448059
- ``cmedians``: A `~.collections.LineCollection` instance that
80458060
marks the median values of each of the violin's distribution.
8061+
8062+
- ``cquantiles``: A `~.collections.LineCollection` instance created
8063+
to identify the quantiles values of each of the violin's
8064+
distribution.
8065+
80468066
"""
80478067

80488068
# Statistical quantities to be plotted on the violins
80498069
means = []
80508070
mins = []
80518071
maxes = []
80528072
medians = []
8073+
quantiles = np.asarray([])
80538074

80548075
# Collections to be returned
80558076
artists = {}
@@ -8106,6 +8127,10 @@ def violin(self, vpstats, positions=None, vert=True, widths=0.5,
81068127
mins.append(stats['min'])
81078128
maxes.append(stats['max'])
81088129
medians.append(stats['median'])
8130+
q = stats.get('quantiles')
8131+
if q is not None:
8132+
# If exist key quantiles, assume it's a list of floats
8133+
quantiles = np.concatenate((quantiles, q))
81098134
artists['bodies'] = bodies
81108135

81118136
# Render means
@@ -8129,6 +8154,22 @@ def violin(self, vpstats, positions=None, vert=True, widths=0.5,
81298154
pmaxes,
81308155
colors=edgecolor)
81318156

8157+
# Render quantile values
8158+
if quantiles.size > 0:
8159+
# Recalculate ranges for statistics lines for quantiles.
8160+
# ppmins are the left end of quantiles lines
8161+
ppmins = np.asarray([])
8162+
# pmaxes are the right end of quantiles lines
8163+
ppmaxs = np.asarray([])
8164+
for stats, cmin, cmax in zip(vpstats, pmins, pmaxes):
8165+
q = stats.get('quantiles')
8166+
if q is not None:
8167+
ppmins = np.concatenate((ppmins, [cmin] * np.size(q)))
8168+
ppmaxs = np.concatenate((ppmaxs, [cmax] * np.size(q)))
8169+
# Start rendering
8170+
artists['cquantiles'] = perp_lines(quantiles, ppmins, ppmaxs,
8171+
colors=edgecolor)
8172+
81328173
return artists
81338174

81348175
# Methods that are entirely implemented in other modules.

lib/matplotlib/cbook/__init__.py

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1431,7 +1431,7 @@ def _reshape_2D(X, name):
14311431
raise ValueError("{} must have 2 or fewer dimensions".format(name))
14321432

14331433

1434-
def violin_stats(X, method, points=100):
1434+
def violin_stats(X, method, points=100, quantiles=None):
14351435
"""
14361436
Returns a list of dictionaries of data which can be used to draw a series
14371437
of violin plots. See the `Returns` section below to view the required keys
@@ -1455,6 +1455,12 @@ def violin_stats(X, method, points=100):
14551455
Defines the number of points to evaluate each of the gaussian kernel
14561456
density estimates at.
14571457
1458+
quantiles : array-like, default = None
1459+
Defines (if not None) a list of floats in interval [0, 1] for each
1460+
column of data, which represents the quantiles that will be rendered
1461+
for that column of data. Must have 2 or fewer dimensions. 1D array will
1462+
be treated as a singleton list containing them.
1463+
14581464
Returns
14591465
-------
14601466
@@ -1469,6 +1475,7 @@ def violin_stats(X, method, points=100):
14691475
- median: The median value for this column of data.
14701476
- min: The minimum value for this column of data.
14711477
- max: The maximum value for this column of data.
1478+
- quantiles: The quantile values for this column of data.
14721479
"""
14731480

14741481
# List of dictionaries describing each of the violins.
@@ -1477,13 +1484,27 @@ def violin_stats(X, method, points=100):
14771484
# Want X to be a list of data sequences
14781485
X = _reshape_2D(X, "X")
14791486

1480-
for x in X:
1487+
# Want quantiles to be as the same shape as data sequences
1488+
if quantiles is not None and len(quantiles) != 0:
1489+
quantiles = _reshape_2D(quantiles, "quantiles")
1490+
# Else, mock quantiles if is none or empty
1491+
else:
1492+
quantiles = [[]] * np.shape(X)[0]
1493+
1494+
# quantiles should has the same size as dataset
1495+
if np.shape(X)[:1] != np.shape(quantiles)[:1]:
1496+
raise ValueError("List of violinplot statistics and quantiles values"
1497+
" must have the same length")
1498+
1499+
# Zip x and quantiles
1500+
for (x, q) in zip(X, quantiles):
14811501
# Dictionary of results for this distribution
14821502
stats = {}
14831503

14841504
# Calculate basic stats for the distribution
14851505
min_val = np.min(x)
14861506
max_val = np.max(x)
1507+
quantile_val = np.percentile(x, 100 * q)
14871508

14881509
# Evaluate the kernel density estimate
14891510
coords = np.linspace(min_val, max_val, points)
@@ -1495,6 +1516,7 @@ def violin_stats(X, method, points=100):
14951516
stats['median'] = np.median(x)
14961517
stats['min'] = min_val
14971518
stats['max'] = max_val
1519+
stats['quantiles'] = np.atleast_1d(quantile_val)
14981520

14991521
# Append to output
15001522
vpstats.append(stats)

lib/matplotlib/pyplot.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2998,12 +2998,13 @@ def triplot(*args, **kwargs):
29982998
def violinplot(
29992999
dataset, positions=None, vert=True, widths=0.5,
30003000
showmeans=False, showextrema=True, showmedians=False,
3001-
points=100, bw_method=None, *, data=None):
3001+
quantiles=None, points=100, bw_method=None, *, data=None):
30023002
return gca().violinplot(
30033003
dataset, positions=positions, vert=vert, widths=widths,
30043004
showmeans=showmeans, showextrema=showextrema,
3005-
showmedians=showmedians, points=points, bw_method=bw_method,
3006-
**({"data": data} if data is not None else {}))
3005+
showmedians=showmedians, quantiles=quantiles, points=points,
3006+
bw_method=bw_method, **({"data": data} if data is not None
3007+
else {}))
30073008

30083009

30093010
# Autogenerated by boilerplate.py. Do not edit as changes will be lost.

lib/matplotlib/tests/test_axes.py

Lines changed: 46 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2661,7 +2661,8 @@ def test_vert_violinplot_showall():
26612661
np.random.seed(316624790)
26622662
data = [np.random.normal(size=100) for i in range(4)]
26632663
ax.violinplot(data, positions=range(4), showmeans=1, showextrema=1,
2664-
showmedians=1)
2664+
showmedians=1,
2665+
quantiles=[[0.1, 0.9], [0.2, 0.8], [0.3, 0.7], [0.4, 0.6]])
26652666

26662667

26672668
@image_comparison(baseline_images=['violinplot_vert_custompoints_10'],
@@ -2738,7 +2739,8 @@ def test_horiz_violinplot_showall():
27382739
np.random.seed(82762530)
27392740
data = [np.random.normal(size=100) for i in range(4)]
27402741
ax.violinplot(data, positions=range(4), vert=False, showmeans=1,
2741-
showextrema=1, showmedians=1)
2742+
showextrema=1, showmedians=1,
2743+
quantiles=[[0.1, 0.9], [0.2, 0.8], [0.3, 0.7], [0.4, 0.6]])
27422744

27432745

27442746
@image_comparison(baseline_images=['violinplot_horiz_custompoints_10'],
@@ -2781,6 +2783,48 @@ def test_violinplot_bad_widths():
27812783
ax.violinplot(data, positions=range(4), widths=[1, 2, 3])
27822784

27832785

2786+
def test_violinplot_bad_quantiles():
2787+
ax = plt.axes()
2788+
# First 9 digits of frac(sqrt(73))
2789+
np.random.seed(544003745)
2790+
data = [np.random.normal(size=100)]
2791+
2792+
# Different size quantile list and plots
2793+
with pytest.raises(ValueError):
2794+
ax.violinplot(data, quantiles=[[0.1, 0.2], [0.5, 0.7]])
2795+
2796+
2797+
def test_violinplot_outofrange_quantiles():
2798+
ax = plt.axes()
2799+
# First 9 digits of frac(sqrt(79))
2800+
np.random.seed(888194417)
2801+
data = [np.random.normal(size=100)]
2802+
2803+
# Quantile value above 100
2804+
with pytest.raises(ValueError):
2805+
ax.violinplot(data, quantiles=[[0.1, 0.2, 0.3, 1.05]])
2806+
2807+
# Quantile value below 0
2808+
with pytest.raises(ValueError):
2809+
ax.violinplot(data, quantiles=[[-0.05, 0.2, 0.3, 0.75]])
2810+
2811+
2812+
@check_figures_equal(extensions=["png"])
2813+
def test_violinplot_single_list_quantiles(fig_test, fig_ref):
2814+
# Ensures quantile list for 1D can be passed in as single list
2815+
# First 9 digits of frac(sqrt(83))
2816+
np.random.seed(110433579)
2817+
data = [np.random.normal(size=100)]
2818+
2819+
# Test image
2820+
ax = fig_test.subplots()
2821+
ax.violinplot(data, quantiles=[0.1, 0.3, 0.9])
2822+
2823+
# Reference image
2824+
ax = fig_ref.subplots()
2825+
ax.violinplot(data, quantiles=[[0.1, 0.3, 0.9]])
2826+
2827+
27842828
def test_manage_xticks():
27852829
_, ax = plt.subplots()
27862830
ax.set_xlim(0, 4)

0 commit comments

Comments
 (0)