From 14308875e4aed535709b1fe02136969a162542ee Mon Sep 17 00:00:00 2001 From: Emmanuelle Gouillart Date: Fri, 21 Jun 2019 15:13:54 +0200 Subject: [PATCH 1/5] first pass on violin notebook --- notebooks/violin.md | 431 ++++++++++++++++---------------------------- 1 file changed, 151 insertions(+), 280 deletions(-) diff --git a/notebooks/violin.md b/notebooks/violin.md index bd2f6af2d..443414086 100644 --- a/notebooks/violin.md +++ b/notebooks/violin.md @@ -1,15 +1,26 @@ --- jupyter: jupytext: + notebook_metadata_filter: all text_representation: extension: .md format_name: markdown format_version: '1.1' jupytext_version: 1.1.1 kernelspec: - display_name: Python 2 + display_name: Python 3 language: python - name: python2 + name: python3 + language_info: + codemirror_mode: + name: ipython + version: 3 + file_extension: .py + mimetype: text/x-python + name: python + nbconvert_exporter: python + pygments_lexer: ipython3 + version: 3.6.7 plotly: description: How to make violin plots in Python with Plotly. display_as: statistical @@ -23,346 +34,206 @@ jupyter: permalink: python/violin/ thumbnail: thumbnail/violin.jpg title: Violin Plots | Plotly + v4upgrade: true --- -#### New to Plotly? -Plotly's Python library is free and open source! [Get started](https://plot.ly/python/getting-started/) by downloading the client and [reading the primer](https://plot.ly/python/getting-started/). -
You can set up Plotly to work in [online](https://plot.ly/python/getting-started/#initialization-for-online-plotting) or [offline](https://plot.ly/python/getting-started/#initialization-for-offline-plotting) mode, or in [jupyter notebooks](https://plot.ly/python/getting-started/#start-plotting-online). -
We also have a quick-reference [cheatsheet](https://images.plot.ly/plotly-documentation/images/python_cheat_sheet.pdf) (new!) to help you get started! +## Violin Plot with plotly express +A [violin plot](https://en.wikipedia.org/wiki/Violin_plot) is a statistical representation of numerical data. It is similar to a [box plot](https://plot.ly/python/box-plots/), with the addition of a rotated [kernel density](https://en.wikipedia.org/wiki/Kernel_density_estimation) plot on each side. -#### Version Check -Plotly's python package is updated frequently. Run `pip install plotly --upgrade` to use the latest version. +### Basic Violin Plot with plotly express + +Plotly express functions take as argument a tidy [pandas DataFrame](https://pandas.pydata.org/pandas-docs/stable/getting_started/10min.html). + +```python +import plotly.express as px + +tips = px.data.tips() +fig = px.violin(tips, y="total_bill") +fig.show() +``` + +### Violin plot with box and data points + +```python +import plotly.express as px + +tips = px.data.tips() +fig = px.violin(tips, y="total_bill", box=True, # draw box plot inside the violin + points='all', # can be 'outliers', or False + ) +fig.show() +``` + +### Multiple Violin Plots + +```python +import plotly.express as px + +tips = px.data.tips() +fig = px.violin(tips, y="tip", x="smoker", color="sex", box=True, points="all", + hover_data=tips.columns) +fig.show() +``` ```python -import plotly -plotly.__version__ +import plotly.express as px + +tips = px.data.tips() +fig = px.violin(tips, y="tip", color="sex", + violinmode='overlay', # draw violins on top of each other + # default violinmode is 'group' as in example above + hover_data=tips.columns) +fig.show() ``` +## Violin Plot with go.Violin + +When data are not available as a tidy dataframe, you can use the more generic function `go.Violin` from `plotly.graph_objects`. All the options of `go.Violin` are documented in the reference https://plot.ly/python/reference/#violin + #### Basic Violin Plot ```python -import plotly.plotly as py -import plotly.graph_objs as go +import plotly.graph_objects as go import pandas as pd df = pd.read_csv("https://raw.githubusercontent.com/plotly/datasets/master/violin_data.csv") -fig = { - "data": [{ - "type": 'violin', - "y": df['total_bill'], - "box": { - "visible": True - }, - "line": { - "color": 'black' - }, - "meanline": { - "visible": True - }, - "fillcolor": '#8dd3c7', - "opacity": 0.6, - "x0": 'Total Bill' - }], - "layout" : { - "title": "", - "yaxis": { - "zeroline": False, - } - } -} - -py.iplot(fig, filename = 'violin/basic', validate = False) +fig = go.Figure(data=go.Violin(y=df['total_bill'], box_visible=True, line_color='black', + meanline_visible=True, fillcolor='lightseagreen', opacity=0.6, + x0='Total Bill')) + +fig.update_layout(yaxis_zeroline=False) +fig.show() ``` #### Multiple Traces ```python -import plotly.plotly as py -import plotly.graph_objs as go +import plotly.graph_objects as go import pandas as pd df = pd.read_csv("https://raw.githubusercontent.com/plotly/datasets/master/violin_data.csv") -data = [] -for i in range(0,len(pd.unique(df['day']))): - trace = { - "type": 'violin', - "x": df['day'][df['day'] == pd.unique(df['day'])[i]], - "y": df['total_bill'][df['day'] == pd.unique(df['day'])[i]], - "name": pd.unique(df['day'])[i], - "box": { - "visible": True - }, - "meanline": { - "visible": True - } - } - data.append(trace) +fig = go.Figure() + +days = ['Thur', 'Fri', 'Sat', 'Sun'] +for day in days: + fig.add_trace(go.Violin(x=df['day'][df['day'] == day], + y=df['total_bill'][df['day'] == day], + name=day, + box_visible=True, + meanline_visible=True)) -fig = { - "data": data, - "layout" : { - "title": "", - "yaxis": { - "zeroline": False, - } - } -} - - -py.iplot(fig, filename='violin/multiple', validate = False) +fig.show() ``` #### Grouped Violin Plot ```python -import plotly.plotly as py -import plotly.graph_objs as go +import plotly.graph_objects as go import pandas as pd df = pd.read_csv("https://raw.githubusercontent.com/plotly/datasets/master/violin_data.csv") -fig = { - "data": [ - { - "type": 'violin', - "x": df['day'] [ df['sex'] == 'Male' ], - "y": df['total_bill'] [ df['sex'] == 'Male' ], - "legendgroup": 'M', - "scalegroup": 'M', - "name": 'M', - "box": { - "visible": True - }, - "meanline": { - "visible": True - }, - "line": { - "color": 'blue' - } - }, - { - "type": 'violin', - "x": df['day'] [ df['sex'] == 'Female' ], - "y": df['total_bill'] [ df['sex'] == 'Female' ], - "legendgroup": 'F', - "scalegroup": 'F', - "name": 'F', - "box": { - "visible": True - }, - "meanline": { - "visible": True - }, - "line": { - "color": 'pink' - } - } - ], - "layout" : { - "yaxis": { - "zeroline": False, - }, - "violinmode": "group" - } -} - - -py.iplot(fig, filename = 'violin/grouped', validate = False) +fig = go.Figure() + +fig.add_trace(go.Violin(x=df['day'][ df['sex'] == 'Male' ], + y=df['total_bill'][ df['sex'] == 'Male' ], + legendgroup='M', scalegroup='M', name='M', + line_color='blue') + ) +fig.add_trace(go.Violin(x=df['day'][ df['sex'] == 'Female' ], + y=df['total_bill'][ df['sex'] == 'Female' ], + legendgroup='F', scalegroup='F', name='F', + line_color='orange') + ) + +fig.update_traces(box_visible=True, meanline_visible=True) +fig.update_layout(violinmode='group') +fig.show() ``` #### Split Violin Plot ```python -import plotly.plotly as py -import plotly.graph_objs as go +import plotly.graph_objects as go import pandas as pd df = pd.read_csv("https://raw.githubusercontent.com/plotly/datasets/master/violin_data.csv") -fig = { - "data": [ - { - "type": 'violin', - "x": df['day'] [ df['smoker'] == 'Yes' ], - "y": df['total_bill'] [ df['smoker'] == 'Yes' ], - "legendgroup": 'Yes', - "scalegroup": 'Yes', - "name": 'Yes', - "side": 'negative', - "box": { - "visible": True - }, - "meanline": { - "visible": True - }, - "line": { - "color": 'blue' - } - }, - { - "type": 'violin', - "x": df['day'] [ df['smoker'] == 'No' ], - "y": df['total_bill'] [ df['smoker'] == 'No' ], - "legendgroup": 'No', - "scalegroup": 'No', - "name": 'No', - "side": 'positive', - "box": { - "visible": True - }, - "meanline": { - "visible": True - }, - "line": { - "color": 'green' - } - } - ], - "layout" : { - "yaxis": { - "zeroline": False, - }, - "violingap": 0, - "violinmode": "overlay" - } -} - - -py.iplot(fig, filename = 'violin/split', validate = False) +fig = go.Figure() + +fig.add_trace(go.Violin(x=df['day'][ df['smoker'] == 'Yes' ], + y=df['total_bill'][ df['smoker'] == 'Yes' ], + legendgroup='Yes', scalegroup='Yes', name='Yes', + side='negative', + line_color='blue') + ) +fig.add_trace(go.Violin(x=df['day'][ df['smoker'] == 'No' ], + y=df['total_bill'][ df['smoker'] == 'No' ], + legendgroup='No', scalegroup='No', name='No', + side='positive', + line_color='orange') + ) +fig.update_traces(meanline_visible=True) +fig.update_layout(violingap=0, violinmode='overlay') +fig.show() ``` #### Advanced Violin Plot ```python -import plotly.plotly as py -import plotly.graph_objs as go +import plotly.graph_objects as go import pandas as pd df = pd.read_csv("https://raw.githubusercontent.com/plotly/datasets/master/violin_data.csv") -pointposMale = [-0.9,-1.1,-0.6,-0.3] -pointposFemale = [0.45,0.55,1,0.4] -showLegend = [True,False,False,False] +pointpos_male = [-0.9,-1.1,-0.6,-0.3] +pointpos_female = [0.45,0.55,1,0.4] +show_legend = [True,False,False,False] -data = [] -for i in range(0,len(pd.unique(df['day']))): - male = { - "type": 'violin', - "x": df['day'][ (df['sex'] == 'Male') & (df['day'] == pd.unique(df['day'])[i]) ], - "y": df['total_bill'][ (df['sex'] == 'Male') & (df['day'] == pd.unique(df['day'])[i]) ], - "legendgroup": 'M', - "scalegroup": 'M', - "name": 'M', - "side": 'negative', - "box": { - "visible": True - }, - "points": 'all', - "pointpos": pointposMale[i], - "jitter": 0, - "scalemode": 'count', - "meanline": { - "visible": True - }, - "line": { - "color": '#8dd3c7' - }, - "marker": { - "line": { - "width": 2, - "color": '#8dd3c7' - } - }, - "span": [ - 0 - ], - "showlegend": showLegend[i] - } - data.append(male) - female = { - "type": 'violin', - "x": df['day'] [ (df['sex'] == 'Female') & (df['day'] == pd.unique(df['day'])[i]) ], - "y": df['total_bill'] [ (df['sex'] == 'Female') & (df['day'] == pd.unique(df['day'])[i]) ], - "legendgroup": 'F', - "scalegroup": 'F', - "name": 'F', - "side": 'positive', - "box": { - "visible": True - }, - "points": 'all', - "pointpos": pointposFemale[i], - "jitter": 0, - "scalemode": 'count', - "meanline": { - "visible": True - }, - "line": { - "color": '#bebada' - }, - "marker": { - "line": { - "width": 2, - "color": '#bebada' - } - }, - "span": [ - 0 - ], - "showlegend": showLegend[i] - } - data.append(female) - +fig = go.Figure() -fig = { - "data": data, - "layout" : { - "title": "Total bill distribution
scaled by number of bills per gender", - "yaxis": { - "zeroline": False, - }, - "violingap": 0, - "violingroupgap": 0, - "violinmode": "overlay" - } -} - - -py.iplot(fig, filename='violin/advanced', validate = False) +for i in range(0,len(pd.unique(df['day']))): + fig.add_trace(go.Violin(x=df['day'][(df['sex'] == 'Male') & + (df['day'] == pd.unique(df['day'])[i])], + y=df['total_bill'][(df['sex'] == 'Male')& + (df['day'] == pd.unique(df['day'])[i])], + legendgroup='M', scalegroup='M', name='M', + side='negative', + pointpos=pointpos_male[i], # where to position points + line_color='lightseagreen', + showlegend=show_legend[i]) + ) + fig.add_trace(go.Violin(x=df['day'][(df['sex'] == 'Female') & + (df['day'] == pd.unique(df['day'])[i])], + y=df['total_bill'][(df['sex'] == 'Female')& + (df['day'] == pd.unique(df['day'])[i])], + legendgroup='F', scalegroup='F', name='F', + side='positive', + pointpos=pointpos_female[i], + line_color='mediumpurple', + showlegend=show_legend[i]) + ) + +# update characteristics shared by all traces +fig.update_traces(meanline_visible=True, + points='all', # show all points + jitter=0.05, # add some jitter on points for better visibility + scalemode='count') #scale violin plot area with total count +fig.update_layout( + title_text="Total bill distribution
scaled by number of bills per gender", + violingap=0, violingroupgap=0, violinmode='overlay') +fig.show() ``` #### Reference See https://plot.ly/python/reference/#violin for more information and chart attribute options! -```python -from IPython.display import display, HTML - -display(HTML('')) -display(HTML('')) - -! pip install git+https://github.com/plotly/publisher.git --upgrade -import publisher -publisher.publish( - 'violin.ipynb', 'python/violin/', 'Violin Plots', - 'How to make violin plots in Python with Plotly.', - title = 'Violin Plots | Plotly', - has_thumbnail='true', - thumbnail='thumbnail/violin.jpg', - language='python', - display_as='statistical', - order=12, - ipynb='~notebook_demo/201') -``` - -```python - -``` \ No newline at end of file From 19d179856a49d11e1e8b012e5fc637685acec3c0 Mon Sep 17 00:00:00 2001 From: Emmanuelle Gouillart Date: Fri, 21 Jun 2019 15:16:26 +0200 Subject: [PATCH 2/5] added linkto statistical charts page --- notebooks/violin.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/notebooks/violin.md b/notebooks/violin.md index 443414086..666f06689 100644 --- a/notebooks/violin.md +++ b/notebooks/violin.md @@ -39,7 +39,9 @@ jupyter: ## Violin Plot with plotly express -A [violin plot](https://en.wikipedia.org/wiki/Violin_plot) is a statistical representation of numerical data. It is similar to a [box plot](https://plot.ly/python/box-plots/), with the addition of a rotated [kernel density](https://en.wikipedia.org/wiki/Kernel_density_estimation) plot on each side. +A [violin plot](https://en.wikipedia.org/wiki/Violin_plot) is a statistical representation of numerical data. It is similar to a [box plot](https://plot.ly/python/box-plots/), with the addition of a rotated [kernel density](https://en.wikipedia.org/wiki/Kernel_density_estimation) plot on each side. + +See also the [list of other statistical charts](https://plot.ly/python/statistical-charts/). ### Basic Violin Plot with plotly express From 4726a4eb042aef9e3e12bed0473ab7f8bd9f773c Mon Sep 17 00:00:00 2001 From: Emmanuelle Gouillart Date: Fri, 21 Jun 2019 15:41:02 +0200 Subject: [PATCH 3/5] ohlc notebook --- notebooks/ohlc-charts.md | 178 +++++++++++---------------------------- 1 file changed, 51 insertions(+), 127 deletions(-) diff --git a/notebooks/ohlc-charts.md b/notebooks/ohlc-charts.md index 6abfbd297..b4de8c404 100644 --- a/notebooks/ohlc-charts.md +++ b/notebooks/ohlc-charts.md @@ -10,146 +10,96 @@ jupyter: display_name: Python 3 language: python name: python3 - plotly: - description: How to make interactive OHLC charts in Python with Plotly. Six examples - of OHLC charts with Pandas, time series, and yahoo finance data. - display_as: financial - has_thumbnail: true - ipynb: ~notebook_demo/53 - language: python - layout: user-guide - name: OHLC Charts - order: 1 - page_type: example_index - permalink: python/ohlc-charts/ - thumbnail: thumbnail/ohlc.jpg --- -#### New to Plotly? -Plotly's Python library is free and open source! [Get started](https://plot.ly/python/getting-started/) by downloading the client and [reading the primer](https://plot.ly/python/getting-started/). -
You can set up Plotly to work in [online](https://plot.ly/python/getting-started/#initialization-for-online-plotting) or [offline](https://plot.ly/python/getting-started/#initialization-for-offline-plotting) mode, or in [jupyter notebooks](https://plot.ly/python/getting-started/#start-plotting-online). -
We also have a quick-reference [cheatsheet](https://images.plot.ly/plotly-documentation/images/python_cheat_sheet.pdf) (new!) to help you get started! -#### Version Check -Plotly's Python API is updated frequently. Run `pip install plotly --upgrade` to update your Plotly version. +The [OHLC](https://en.wikipedia.org/wiki/Open-high-low-close_chart) chart (for open, high, low and close) is a style of financial chart describing open, high, low and close values for a given `x` coordinate (most likely time). The tip of the lines represent the `low` and `high` values and the horizontal segments represent the `open` and `close` values. Sample points where the close value is higher (lower) then the open value are called increasing (decreasing). By default, increasing items are drawn in green whereas decreasing are drawn in red. -```python -import plotly -plotly.__version__ -``` +See also [Candlestick Charts](https://plot.ly/python/candlestick-charts/) and [other financial charts](https://plot.ly/python/#financial-charts). -##### Simple OHLC Chart with Pandas +#### Simple OHLC Chart with Pandas ```python -import plotly.plotly as py -import plotly.graph_objs as go - +import plotly.graph_objects as go import pandas as pd -from datetime import datetime df = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/finance-charts-apple.csv') -trace = go.Ohlc(x=df['Date'], - open=df['AAPL.Open'], - high=df['AAPL.High'], - low=df['AAPL.Low'], - close=df['AAPL.Close']) -data = [trace] -py.iplot(data, filename='simple_ohlc') +fig = go.Figure(data=go.Ohlc(x=df['Date'], + open=df['AAPL.Open'], + high=df['AAPL.High'], + low=df['AAPL.Low'], + close=df['AAPL.Close'])) +fig.show() ``` -##### OHLC Chart without Rangeslider +#### OHLC Chart without Rangeslider ```python -import plotly.plotly as py -import plotly.graph_objs as go +import plotly.graph_objects as go import pandas as pd -from datetime import datetime + df = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/finance-charts-apple.csv') -trace = go.Ohlc(x=df['Date'], +fig = go.Figure(data=go.Ohlc(x=df['Date'], open=df['AAPL.Open'], high=df['AAPL.High'], low=df['AAPL.Low'], - close=df['AAPL.Close']) - -layout = go.Layout( - xaxis = dict( - rangeslider = dict( - visible = False - ) - ) -) - -data = [trace] - -fig = go.Figure(data=data, layout=layout) -py.iplot(fig, filename='OHLC without Rangeslider') + close=df['AAPL.Close'])) +fig.update(layout_xaxis_rangeslider_visible=False) +fig.show() ``` #### Adding Customized Text and Annotations ```python -import plotly.plotly as py -import plotly.graph_objs as go - -from datetime import datetime +import plotly.graph_objects as go import pandas as pd df = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/finance-charts-apple.csv') -trace = go.Ohlc(x=df['Date'], +fig = go.Figure(data=go.Ohlc(x=df['Date'], open=df['AAPL.Open'], high=df['AAPL.High'], low=df['AAPL.Low'], - close=df['AAPL.Close']) -data = [trace] -layout = { - 'title': 'The Great Recession', - 'yaxis': {'title': 'AAPL Stock'}, - 'shapes': [{ - 'x0': '2016-12-09', 'x1': '2016-12-09', - 'y0': 0, 'y1': 1, 'xref': 'x', 'yref': 'paper', - 'line': {'color': 'rgb(30,30,30)', 'width': 1} - }], - 'annotations': [{ - 'x': '2016-12-09', 'y': 0.05, 'xref': 'x', 'yref': 'paper', - 'showarrow': False, 'xanchor': 'left', - 'text': 'Increase Period Begins' - }] -} -fig = dict(data=data, layout=layout) -py.iplot(fig, filename='aapl-recession-ohlc') + close=df['AAPL.Close'])) + +fig.update_layout( + title='The Great Recession', + yaxis_title='AAPL Stock', + shapes = [dict( + x0='2016-12-09', x1='2016-12-09', y0=0, y1=1, xref='x', yref='paper', + line_width=2)], + annotations=[dict( + x='2016-12-09', y=0.05, xref='x', yref='paper', + showarrow=False, xanchor='left', text='Increase Period Begins')] +) + +fig.show() ``` #### Custom OHLC Colors ```python -import plotly.plotly as py -import plotly.graph_objs as go - +import plotly.graph_objects as go import pandas as pd -from datetime import datetime df = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/finance-charts-apple.csv') -trace = go.Ohlc(x=df['Date'], - open=df['AAPL.Open'], - high=df['AAPL.High'], - low=df['AAPL.Low'], - close=df['AAPL.Close'], - increasing=dict(line=dict(color= '#17BECF')), - decreasing=dict(line=dict(color= '#7F7F7F'))) -data = [trace] -py.iplot(data, filename='styled_ohlc') +fig = go.Figure(data=[go.Ohlc( + x=df['Date'], + open=df['AAPL.Open'], high=df['AAPL.High'], + low=df['AAPL.Low'], close=df['AAPL.Close'], + increasing_line_color= 'cyan', decreasing_line_color= 'gray' +)]) +fig.show() ``` -##### Simple OHLC with `datetime` Objects +#### Simple OHLC with `datetime` Objects ```python -import plotly.plotly as py -import plotly.graph_objs as go +import plotly.graph_objects as go from datetime import datetime @@ -163,20 +113,16 @@ dates = [datetime(year=2013, month=10, day=10), datetime(year=2014, month=1, day=10), datetime(year=2014, month=2, day=10)] -trace = go.Ohlc(x=dates, - open=open_data, - high=high_data, - low=low_data, - close=close_data) -data = [trace] -py.iplot(data, filename='ohlc_datetime') +fig = go.Figure(data=[go.Ohlc(x=dates, + open=open_data, high=high_data, + low=low_data, close=close_data)]) +fig.show() ``` ### Custom Hovertext ```python -import plotly.plotly as py -import plotly.graph_objs as go +import plotly.graph_objects as go import pandas as pd from datetime import datetime @@ -187,38 +133,16 @@ for i in range(len(df['AAPL.Open'])): df = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/finance-charts-apple.csv') -trace = go.Ohlc(x=df['Date'], +fig = go.Figure(data=go.Ohlc(x=df['Date'], open=df['AAPL.Open'], high=df['AAPL.High'], low=df['AAPL.Low'], close=df['AAPL.Close'], text=hovertext, - hoverinfo='text') -data = [trace] -py.iplot(data, filename='ohlc_custom_hover') + hoverinfo='text')) +fig.show() ``` #### Reference For more information on candlestick attributes, see: https://plot.ly/python/reference/#ohlc -```python -from IPython.display import display, HTML - -display(HTML('')) -display(HTML('')) - -!pip install git+https://github.com/plotly/publisher.git --upgrade -import publisher -publisher.publish( - 'ohlc-charts.ipynb', 'python/ohlc-charts/', 'Python OHLC Charts | plotly', - 'How to make interactive OHLC charts in Python with Plotly. ' - 'Six examples of OHLC charts with Pandas, time series, and yahoo finance data.', - name = 'OHLC Charts', - thumbnail='thumbnail/ohlc.jpg', language='python', - page_type='example_index', has_thumbnail='true', display_as='financial', order=1, - ipynb= '~notebook_demo/53') -``` - -```python - -``` \ No newline at end of file From 6995e14dd3d6fc4faf14eb7d0733b5ced35edcfc Mon Sep 17 00:00:00 2001 From: Emmanuelle Gouillart Date: Fri, 21 Jun 2019 15:43:34 +0200 Subject: [PATCH 4/5] Revert "added linkto statistical charts page" This reverts commit 19d179856a49d11e1e8b012e5fc637685acec3c0. --- notebooks/violin.md | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/notebooks/violin.md b/notebooks/violin.md index 666f06689..443414086 100644 --- a/notebooks/violin.md +++ b/notebooks/violin.md @@ -39,9 +39,7 @@ jupyter: ## Violin Plot with plotly express -A [violin plot](https://en.wikipedia.org/wiki/Violin_plot) is a statistical representation of numerical data. It is similar to a [box plot](https://plot.ly/python/box-plots/), with the addition of a rotated [kernel density](https://en.wikipedia.org/wiki/Kernel_density_estimation) plot on each side. - -See also the [list of other statistical charts](https://plot.ly/python/statistical-charts/). +A [violin plot](https://en.wikipedia.org/wiki/Violin_plot) is a statistical representation of numerical data. It is similar to a [box plot](https://plot.ly/python/box-plots/), with the addition of a rotated [kernel density](https://en.wikipedia.org/wiki/Kernel_density_estimation) plot on each side. ### Basic Violin Plot with plotly express From 56b25ba91bbc64db2a442897c59b66948b3a5ecb Mon Sep 17 00:00:00 2001 From: Emmanuelle Gouillart Date: Fri, 21 Jun 2019 15:43:49 +0200 Subject: [PATCH 5/5] Revert "first pass on violin notebook" This reverts commit 14308875e4aed535709b1fe02136969a162542ee. --- notebooks/violin.md | 431 ++++++++++++++++++++++++++++---------------- 1 file changed, 280 insertions(+), 151 deletions(-) diff --git a/notebooks/violin.md b/notebooks/violin.md index 443414086..bd2f6af2d 100644 --- a/notebooks/violin.md +++ b/notebooks/violin.md @@ -1,26 +1,15 @@ --- jupyter: jupytext: - notebook_metadata_filter: all text_representation: extension: .md format_name: markdown format_version: '1.1' jupytext_version: 1.1.1 kernelspec: - display_name: Python 3 + display_name: Python 2 language: python - name: python3 - language_info: - codemirror_mode: - name: ipython - version: 3 - file_extension: .py - mimetype: text/x-python - name: python - nbconvert_exporter: python - pygments_lexer: ipython3 - version: 3.6.7 + name: python2 plotly: description: How to make violin plots in Python with Plotly. display_as: statistical @@ -34,206 +23,346 @@ jupyter: permalink: python/violin/ thumbnail: thumbnail/violin.jpg title: Violin Plots | Plotly - v4upgrade: true --- -## Violin Plot with plotly express +#### New to Plotly? +Plotly's Python library is free and open source! [Get started](https://plot.ly/python/getting-started/) by downloading the client and [reading the primer](https://plot.ly/python/getting-started/). +
You can set up Plotly to work in [online](https://plot.ly/python/getting-started/#initialization-for-online-plotting) or [offline](https://plot.ly/python/getting-started/#initialization-for-offline-plotting) mode, or in [jupyter notebooks](https://plot.ly/python/getting-started/#start-plotting-online). +
We also have a quick-reference [cheatsheet](https://images.plot.ly/plotly-documentation/images/python_cheat_sheet.pdf) (new!) to help you get started! -A [violin plot](https://en.wikipedia.org/wiki/Violin_plot) is a statistical representation of numerical data. It is similar to a [box plot](https://plot.ly/python/box-plots/), with the addition of a rotated [kernel density](https://en.wikipedia.org/wiki/Kernel_density_estimation) plot on each side. -### Basic Violin Plot with plotly express - -Plotly express functions take as argument a tidy [pandas DataFrame](https://pandas.pydata.org/pandas-docs/stable/getting_started/10min.html). - -```python -import plotly.express as px - -tips = px.data.tips() -fig = px.violin(tips, y="total_bill") -fig.show() -``` - -### Violin plot with box and data points - -```python -import plotly.express as px - -tips = px.data.tips() -fig = px.violin(tips, y="total_bill", box=True, # draw box plot inside the violin - points='all', # can be 'outliers', or False - ) -fig.show() -``` - -### Multiple Violin Plots - -```python -import plotly.express as px - -tips = px.data.tips() -fig = px.violin(tips, y="tip", x="smoker", color="sex", box=True, points="all", - hover_data=tips.columns) -fig.show() -``` +#### Version Check +Plotly's python package is updated frequently. Run `pip install plotly --upgrade` to use the latest version. ```python -import plotly.express as px - -tips = px.data.tips() -fig = px.violin(tips, y="tip", color="sex", - violinmode='overlay', # draw violins on top of each other - # default violinmode is 'group' as in example above - hover_data=tips.columns) -fig.show() +import plotly +plotly.__version__ ``` -## Violin Plot with go.Violin - -When data are not available as a tidy dataframe, you can use the more generic function `go.Violin` from `plotly.graph_objects`. All the options of `go.Violin` are documented in the reference https://plot.ly/python/reference/#violin - #### Basic Violin Plot ```python -import plotly.graph_objects as go +import plotly.plotly as py +import plotly.graph_objs as go import pandas as pd df = pd.read_csv("https://raw.githubusercontent.com/plotly/datasets/master/violin_data.csv") -fig = go.Figure(data=go.Violin(y=df['total_bill'], box_visible=True, line_color='black', - meanline_visible=True, fillcolor='lightseagreen', opacity=0.6, - x0='Total Bill')) - -fig.update_layout(yaxis_zeroline=False) -fig.show() +fig = { + "data": [{ + "type": 'violin', + "y": df['total_bill'], + "box": { + "visible": True + }, + "line": { + "color": 'black' + }, + "meanline": { + "visible": True + }, + "fillcolor": '#8dd3c7', + "opacity": 0.6, + "x0": 'Total Bill' + }], + "layout" : { + "title": "", + "yaxis": { + "zeroline": False, + } + } +} + +py.iplot(fig, filename = 'violin/basic', validate = False) ``` #### Multiple Traces ```python -import plotly.graph_objects as go +import plotly.plotly as py +import plotly.graph_objs as go import pandas as pd df = pd.read_csv("https://raw.githubusercontent.com/plotly/datasets/master/violin_data.csv") -fig = go.Figure() - -days = ['Thur', 'Fri', 'Sat', 'Sun'] +data = [] +for i in range(0,len(pd.unique(df['day']))): + trace = { + "type": 'violin', + "x": df['day'][df['day'] == pd.unique(df['day'])[i]], + "y": df['total_bill'][df['day'] == pd.unique(df['day'])[i]], + "name": pd.unique(df['day'])[i], + "box": { + "visible": True + }, + "meanline": { + "visible": True + } + } + data.append(trace) -for day in days: - fig.add_trace(go.Violin(x=df['day'][df['day'] == day], - y=df['total_bill'][df['day'] == day], - name=day, - box_visible=True, - meanline_visible=True)) -fig.show() +fig = { + "data": data, + "layout" : { + "title": "", + "yaxis": { + "zeroline": False, + } + } +} + + +py.iplot(fig, filename='violin/multiple', validate = False) ``` #### Grouped Violin Plot ```python -import plotly.graph_objects as go +import plotly.plotly as py +import plotly.graph_objs as go import pandas as pd df = pd.read_csv("https://raw.githubusercontent.com/plotly/datasets/master/violin_data.csv") -fig = go.Figure() - -fig.add_trace(go.Violin(x=df['day'][ df['sex'] == 'Male' ], - y=df['total_bill'][ df['sex'] == 'Male' ], - legendgroup='M', scalegroup='M', name='M', - line_color='blue') - ) -fig.add_trace(go.Violin(x=df['day'][ df['sex'] == 'Female' ], - y=df['total_bill'][ df['sex'] == 'Female' ], - legendgroup='F', scalegroup='F', name='F', - line_color='orange') - ) - -fig.update_traces(box_visible=True, meanline_visible=True) -fig.update_layout(violinmode='group') -fig.show() +fig = { + "data": [ + { + "type": 'violin', + "x": df['day'] [ df['sex'] == 'Male' ], + "y": df['total_bill'] [ df['sex'] == 'Male' ], + "legendgroup": 'M', + "scalegroup": 'M', + "name": 'M', + "box": { + "visible": True + }, + "meanline": { + "visible": True + }, + "line": { + "color": 'blue' + } + }, + { + "type": 'violin', + "x": df['day'] [ df['sex'] == 'Female' ], + "y": df['total_bill'] [ df['sex'] == 'Female' ], + "legendgroup": 'F', + "scalegroup": 'F', + "name": 'F', + "box": { + "visible": True + }, + "meanline": { + "visible": True + }, + "line": { + "color": 'pink' + } + } + ], + "layout" : { + "yaxis": { + "zeroline": False, + }, + "violinmode": "group" + } +} + + +py.iplot(fig, filename = 'violin/grouped', validate = False) ``` #### Split Violin Plot ```python -import plotly.graph_objects as go +import plotly.plotly as py +import plotly.graph_objs as go import pandas as pd df = pd.read_csv("https://raw.githubusercontent.com/plotly/datasets/master/violin_data.csv") -fig = go.Figure() - -fig.add_trace(go.Violin(x=df['day'][ df['smoker'] == 'Yes' ], - y=df['total_bill'][ df['smoker'] == 'Yes' ], - legendgroup='Yes', scalegroup='Yes', name='Yes', - side='negative', - line_color='blue') - ) -fig.add_trace(go.Violin(x=df['day'][ df['smoker'] == 'No' ], - y=df['total_bill'][ df['smoker'] == 'No' ], - legendgroup='No', scalegroup='No', name='No', - side='positive', - line_color='orange') - ) -fig.update_traces(meanline_visible=True) -fig.update_layout(violingap=0, violinmode='overlay') -fig.show() +fig = { + "data": [ + { + "type": 'violin', + "x": df['day'] [ df['smoker'] == 'Yes' ], + "y": df['total_bill'] [ df['smoker'] == 'Yes' ], + "legendgroup": 'Yes', + "scalegroup": 'Yes', + "name": 'Yes', + "side": 'negative', + "box": { + "visible": True + }, + "meanline": { + "visible": True + }, + "line": { + "color": 'blue' + } + }, + { + "type": 'violin', + "x": df['day'] [ df['smoker'] == 'No' ], + "y": df['total_bill'] [ df['smoker'] == 'No' ], + "legendgroup": 'No', + "scalegroup": 'No', + "name": 'No', + "side": 'positive', + "box": { + "visible": True + }, + "meanline": { + "visible": True + }, + "line": { + "color": 'green' + } + } + ], + "layout" : { + "yaxis": { + "zeroline": False, + }, + "violingap": 0, + "violinmode": "overlay" + } +} + + +py.iplot(fig, filename = 'violin/split', validate = False) ``` #### Advanced Violin Plot ```python -import plotly.graph_objects as go +import plotly.plotly as py +import plotly.graph_objs as go import pandas as pd df = pd.read_csv("https://raw.githubusercontent.com/plotly/datasets/master/violin_data.csv") -pointpos_male = [-0.9,-1.1,-0.6,-0.3] -pointpos_female = [0.45,0.55,1,0.4] -show_legend = [True,False,False,False] - -fig = go.Figure() +pointposMale = [-0.9,-1.1,-0.6,-0.3] +pointposFemale = [0.45,0.55,1,0.4] +showLegend = [True,False,False,False] +data = [] for i in range(0,len(pd.unique(df['day']))): - fig.add_trace(go.Violin(x=df['day'][(df['sex'] == 'Male') & - (df['day'] == pd.unique(df['day'])[i])], - y=df['total_bill'][(df['sex'] == 'Male')& - (df['day'] == pd.unique(df['day'])[i])], - legendgroup='M', scalegroup='M', name='M', - side='negative', - pointpos=pointpos_male[i], # where to position points - line_color='lightseagreen', - showlegend=show_legend[i]) - ) - fig.add_trace(go.Violin(x=df['day'][(df['sex'] == 'Female') & - (df['day'] == pd.unique(df['day'])[i])], - y=df['total_bill'][(df['sex'] == 'Female')& - (df['day'] == pd.unique(df['day'])[i])], - legendgroup='F', scalegroup='F', name='F', - side='positive', - pointpos=pointpos_female[i], - line_color='mediumpurple', - showlegend=show_legend[i]) - ) - -# update characteristics shared by all traces -fig.update_traces(meanline_visible=True, - points='all', # show all points - jitter=0.05, # add some jitter on points for better visibility - scalemode='count') #scale violin plot area with total count -fig.update_layout( - title_text="Total bill distribution
scaled by number of bills per gender", - violingap=0, violingroupgap=0, violinmode='overlay') -fig.show() + male = { + "type": 'violin', + "x": df['day'][ (df['sex'] == 'Male') & (df['day'] == pd.unique(df['day'])[i]) ], + "y": df['total_bill'][ (df['sex'] == 'Male') & (df['day'] == pd.unique(df['day'])[i]) ], + "legendgroup": 'M', + "scalegroup": 'M', + "name": 'M', + "side": 'negative', + "box": { + "visible": True + }, + "points": 'all', + "pointpos": pointposMale[i], + "jitter": 0, + "scalemode": 'count', + "meanline": { + "visible": True + }, + "line": { + "color": '#8dd3c7' + }, + "marker": { + "line": { + "width": 2, + "color": '#8dd3c7' + } + }, + "span": [ + 0 + ], + "showlegend": showLegend[i] + } + data.append(male) + female = { + "type": 'violin', + "x": df['day'] [ (df['sex'] == 'Female') & (df['day'] == pd.unique(df['day'])[i]) ], + "y": df['total_bill'] [ (df['sex'] == 'Female') & (df['day'] == pd.unique(df['day'])[i]) ], + "legendgroup": 'F', + "scalegroup": 'F', + "name": 'F', + "side": 'positive', + "box": { + "visible": True + }, + "points": 'all', + "pointpos": pointposFemale[i], + "jitter": 0, + "scalemode": 'count', + "meanline": { + "visible": True + }, + "line": { + "color": '#bebada' + }, + "marker": { + "line": { + "width": 2, + "color": '#bebada' + } + }, + "span": [ + 0 + ], + "showlegend": showLegend[i] + } + data.append(female) + + +fig = { + "data": data, + "layout" : { + "title": "Total bill distribution
scaled by number of bills per gender", + "yaxis": { + "zeroline": False, + }, + "violingap": 0, + "violingroupgap": 0, + "violinmode": "overlay" + } +} + + +py.iplot(fig, filename='violin/advanced', validate = False) ``` #### Reference See https://plot.ly/python/reference/#violin for more information and chart attribute options! +```python +from IPython.display import display, HTML + +display(HTML('')) +display(HTML('')) + +! pip install git+https://github.com/plotly/publisher.git --upgrade +import publisher +publisher.publish( + 'violin.ipynb', 'python/violin/', 'Violin Plots', + 'How to make violin plots in Python with Plotly.', + title = 'Violin Plots | Plotly', + has_thumbnail='true', + thumbnail='thumbnail/violin.jpg', + language='python', + display_as='statistical', + order=12, + ipynb='~notebook_demo/201') +``` + +```python + +``` \ No newline at end of file