Skip to content

Commit a1b1276

Browse files
committed
Pandas and Plotly sections added
1 parent c74ce25 commit a1b1276

File tree

3 files changed

+574
-1
lines changed

3 files changed

+574
-1
lines changed

README.md

Lines changed: 312 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3063,6 +3063,318 @@ if __name__ == '__main__':
30633063
```
30643064

30653065

3066+
Pandas
3067+
------
3068+
3069+
```python
3070+
# $ pip3 install pandas
3071+
import pandas as pd
3072+
from pandas import Series, DataFrame
3073+
```
3074+
3075+
### Series
3076+
**Ordered dictionary with a name.**
3077+
3078+
```python
3079+
>>> Series([1, 2], index=['x', 'y'], name='a')
3080+
x 1
3081+
y 2
3082+
Name: a, dtype: int64
3083+
```
3084+
3085+
```python
3086+
<Sr> = Series(<list>) # Assigns RangeIndex starting at 0.
3087+
<Sr> = Series(<dict>) # Takes dictionary's keys for index.
3088+
<Sr> = Series(<dict/Series>, index=<list>) # Only keeps items with keys specified in index.
3089+
```
3090+
3091+
```python
3092+
keys = <Sr>.index # Returns a sequence of keys as Index object.
3093+
vals = <Sr>.values # Returns a sequence of values as numpy array.
3094+
```
3095+
3096+
```python
3097+
<el> = <Sr>.loc[key] # Or: <Sr>.iloc[index]
3098+
<Sr> = <Sr>.loc[keys] # Or: <Sr>.iloc[indexes]
3099+
<Sr> = <Sr>.loc[from_key : to_key_inclusive] # Or: <Sr>.iloc[from_i : to_i_exclusive]
3100+
```
3101+
3102+
```python
3103+
<el> = <Sr>[key/index] # Or: <Sr>.key
3104+
<Sr> = <Sr>[keys/indexes] # Or: <Sr>[<key_range/range>]
3105+
<Sr> = <Sr>[<bools>] # Or: <Sr>.i/loc[<bools>]
3106+
```
3107+
3108+
```python
3109+
<Sr> = <Sr> ><== <el/Sr> # Returns Series of bools.
3110+
<Sr> = <Sr> +-*/ <el/Sr> # Non-matching keys get value NaN.
3111+
```
3112+
3113+
```python
3114+
<Sr> = pd.concat(<coll_of_Sr>) # Combines items.
3115+
<Sr> = <Sr>.append(<Sr>) # Appends new items.
3116+
<Sr> = <Sr>.combine_first(<Sr>) # Adds items that are not yet present (extends).
3117+
```
3118+
3119+
### DataFrame
3120+
**Table with labeled rows and columns.**
3121+
3122+
```python
3123+
>>> DataFrame([[1, 2], [3, 4]], index=['a', 'b'], columns=['x', 'y'])
3124+
x y
3125+
a 1 2
3126+
b 3 4
3127+
```
3128+
3129+
```python
3130+
<DF> = DataFrame(<list_of_rows>) # Rows can be either lists, dicts or series.
3131+
<DF> = DataFrame(<dict_of_columns>) # Columns can be either lists, dicts or series.
3132+
```
3133+
3134+
```python
3135+
row_keys = <Sr>.index # Also: `col_keys = <Sr>.columns`.
3136+
values = <Sr>.values # Returns values as 2D numpy array.
3137+
```
3138+
3139+
```python
3140+
<el> = <DF>.loc[row_key, column_key] # Or: <DF>.iloc[row_index, column_index]
3141+
<Sr/DF> = <DF>.loc[row_key/s] # Or: <DF>.iloc[row_index/es]
3142+
<Sr/DF> = <DF>.loc[:, column_key/s] # Or: <DF>.iloc[:, column_index/es]
3143+
<DF> = <DF>.loc[row_bools, column_bools] # Or: <DF>.iloc[row_bools, column_bools]
3144+
```
3145+
3146+
```python
3147+
<Sr/DF> = <DF>[column_key/s] # Or: <DF>.column_key
3148+
<DF> = <DF>[row_bools] # Keeps rows as specified by bools.
3149+
<DF> = <DF>[<DF_of_bools>] # Assigns NaN to False values.
3150+
```
3151+
3152+
```python
3153+
<DF> = <DF> ><== <el/Sr/DF> # Returns DataFrame of bools.
3154+
<DF> = <DF> +-*/ <el/Sr/DF> # Non-matching keys get value NaN.
3155+
```
3156+
3157+
```python
3158+
<DF> = <DF>.set_index(column_key) # Replaces row keys with values from a column.
3159+
<DF> = <DF>.reset_index() # Moves row keys to their own column.
3160+
<DF> = <DF>.transpose() # Rotates the table.
3161+
<DF> = <DF>.melt(id_vars=column_key/s) # Melts on columns.
3162+
```
3163+
3164+
### Merge, Join, Concat
3165+
```python
3166+
>>> l = DataFrame([[1, 2], [3, 4]], index=['a', 'b'], columns=['x', 'y'])
3167+
x y
3168+
a 1 2
3169+
b 3 4
3170+
>>> r = DataFrame([[4, 5], [6, 7]], index=['b', 'c'], columns=['y', 'z'])
3171+
y z
3172+
b 4 5
3173+
c 6 7
3174+
```
3175+
```python
3176+
┏━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━┯━━━━━━━━━━━━┯━━━━━━━━━━━━┓
3177+
┃ how/join │ 'outer''inner''left'
3178+
┠────────────────────────┼───────────────┼────────────┼────────────┨
3179+
┃ l.merge(r, on='y', │ x y z │ x y z │ x y z ┃
3180+
how=…) │ 0 1 2 . │ 3 4 51 2 . ┃
3181+
┃ │ 1 3 4 5 │ │ 3 4 5
3182+
┃ │ 2 . 6 7 │ │ ┃
3183+
┠────────────────────────┼───────────────┼────────────┼────────────┨
3184+
┃ l.join(r, lsuffix='l', │ x yl yr z │ │ x yl yr z ┃
3185+
rsuffix='r', │ a 1 2 . . │ x yl yr z │ 1 2 . . ┃
3186+
how=…) │ b 3 4 4 53 4 4 53 4 4 5
3187+
┃ │ c . . 6 7 │ │ ┃
3188+
┠────────────────────────┼───────────────┼────────────┼────────────┨
3189+
┃ pd.concat([l, r], │ x y z │ y │ ┃
3190+
axis=0, │ a 1 2 . │ 2 │ ┃
3191+
join=…) │ b 3 4 . │ 4 │ ┃
3192+
┃ │ b . 4 54 │ ┃
3193+
┃ │ c . 6 76 │ ┃
3194+
┠────────────────────────┼───────────────┼────────────┼────────────┨
3195+
┃ pd.concat([l, r], │ x y y z │ │ ┃
3196+
axis=1, │ a 1 2 . . │ x y y z │ ┃
3197+
join=…) │ b 3 4 4 53 4 4 5 │ ┃
3198+
┃ │ c . . 6 7 │ │ ┃
3199+
┠────────────────────────┼───────────────┼────────────┼────────────┨
3200+
┃ l.combine_first(r) │ x y z │ │ ┃
3201+
┃ │ a 1 2 . │ │ ┃
3202+
┃ │ b 3 4 5 │ │ ┃
3203+
┃ │ c . 6 7 │ │ ┃
3204+
┗━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━┷━━━━━━━━━━━━┷━━━━━━━━━━━━┛
3205+
```
3206+
3207+
### GroupBy
3208+
```python
3209+
<DF_Gb> = <DF>.groupby(column_key/s) # Columns that were used for groupin becme row_k.
3210+
<DFs> = list(<DF_Gb>) # Returns list of group_key - DataFrame tuples.
3211+
<DF> = <DF_Gb>.get_group(group_key)
3212+
<Sr_Gb> = <DF_Gb>[column_key] # Or: <DF_Gb>.column_key
3213+
<Srs> = list(<Sr_Gb>) # Returns list of group_key - Series tuples.
3214+
```
3215+
3216+
### Operations
3217+
```python
3218+
<el/Sr/DF> = <Sr/DF/GB>.sum/max/mean() # …/idxmax/all()
3219+
<Sr/DF> = <Sr/DF/GB>.diff/cumsum/rank() # …/pct_change()
3220+
```
3221+
3222+
```python
3223+
<Sr/DF> = <Sr/DF/GB>.ffill()
3224+
<Sr/DF> = <Sr/DF/GB>.fillna(value)
3225+
<Sr/DF> = <Sr/DF>.interpolate()
3226+
```
3227+
3228+
```python
3229+
<Sr/DF> = <Sr/DF/GB>.apply(<func>) # Invokes function on every value/column/group.
3230+
<DF> = <DF>.applymap(<func>) # Apply a function to a Dataframe elementwise.
3231+
<Sr/DF> = <Sr/DF/GB>.aggregate(<func>) # Invokes function on every column > number.
3232+
<Sr/DF> = <Sr/DF/GB>.transform(<func>)
3233+
<Sr/DF> = <Sr/DF>.combine(<Sr/DF>, <func>)
3234+
```
3235+
3236+
### Rolling
3237+
```python
3238+
<Rl> = <Sr/DF/GB>.rolling(window_size) # Also: `min_periods, center=False`.
3239+
<Rl> = <Rl>[column_key/s] # Or: <Rl>.column_key
3240+
<Sr/DF> = <Rl>.sum/max/mean()
3241+
<Sr/DF> = <Rl>.apply(<func>) # Invokes function on every window.
3242+
<Sr/DF> = <Rl>.aggregate(<func>) # Invokes function on every window.
3243+
```
3244+
3245+
### Encode
3246+
```python
3247+
<DF> = pd.read_json/html('<str/path/url>')
3248+
<DF> = pd.read_csv/pickle/excel('<path/url>')
3249+
<DF> = pd.read_sql('<query>', <connection>)
3250+
<DF> = pd.read_clipboard()
3251+
```
3252+
3253+
### Decode
3254+
```python
3255+
<dict> = <DF>.to_dict(['d/l/s/sp/r/i'])
3256+
<str> = <DF>.to_json/html/csv/markdown/latex([<path>])
3257+
<DF>.to_pickle/excel(<path>)
3258+
<DF>.to_sql('<table_name>', <connection>)
3259+
```
3260+
3261+
3262+
Plotly
3263+
------
3264+
3265+
### Top 10 Countries by Percentage of Population With Confirmed COVID-19 Infection
3266+
```text
3267+
|
3268+
|
3269+
|
3270+
|
3271+
|
3272+
|
3273+
|
3274+
|
3275+
|
3276+
|
3277+
|
3278+
|
3279+
|
3280+
|
3281+
|
3282+
|
3283+
|
3284+
|
3285+
|
3286+
|
3287+
+----------------------------------------------------------------------------------------------
3288+
```
3289+
3290+
```python
3291+
import pandas as pd
3292+
import plotly.express
3293+
3294+
covid = pd.read_csv('https://covid.ourworldindata.org/data/owid-covid-data.csv',
3295+
usecols=['iso_code', 'date', 'total_deaths', 'population'])
3296+
continents = pd.read_csv('https://datahub.io/JohnSnowLabs/country-and-continent-codes-' + \
3297+
'list/r/country-and-continent-codes-list-csv.csv',
3298+
usecols=['Three_Letter_Country_Code', 'Continent_Name'])
3299+
merged = pd.merge(covid, continents, left_on='iso_code', right_on='Three_Letter_Country_Code')
3300+
summed = merged.groupby(['Continent_Name', 'date']).sum().reset_index()
3301+
summed['Total Deaths per Million'] = summed.total_deaths * 1e6 / summed.population
3302+
summed = summed[('2020-03-14' < summed.date) & (summed.date < '2020-05-31')]
3303+
summed = summed.rename({'date': 'Date', 'Continent_Name': 'Continent'}, axis='columns')
3304+
plotly.express.line(summed, x='Date', y='Total Deaths per Million', color='Continent').show()
3305+
```
3306+
3307+
### Confirmed Cases of COVID-19, Dow Jones Index, Gold, and Bitcoin Price
3308+
```text
3309+
|
3310+
|
3311+
|
3312+
|
3313+
|
3314+
|
3315+
|
3316+
|
3317+
|
3318+
|
3319+
|
3320+
|
3321+
|
3322+
|
3323+
|
3324+
|
3325+
|
3326+
|
3327+
|
3328+
|
3329+
+----------------------------------------------------------------------------------------------
3330+
```
3331+
3332+
```python
3333+
import pandas, datetime
3334+
import plotly.graph_objects as go
3335+
3336+
def main():
3337+
display_data(mangle_data(*scrape_data()))
3338+
3339+
def scrape_data():
3340+
def scrape_yah(id_):
3341+
BASE_URL = 'https://query1.finance.yahoo.com/v7/finance/download/'
3342+
now = int(datetime.datetime.now().timestamp())
3343+
url = f'{BASE_URL}{id_}?period1=1579651200&period2={now}&interval=1d&events=history'
3344+
return pandas.read_csv(url, usecols=['Date', 'Close']).set_index('Date').Close
3345+
covid = pd.read_csv('https://covid.ourworldindata.org/data/owid-covid-data.csv',
3346+
usecols=['date', 'total_cases'])
3347+
covid = covid.groupby('date').sum()
3348+
dow_jones, gold, bitcoin = scrape_yah('^DJI'), scrape_yah('GC=F'), scrape_yah('BTC-USD')
3349+
dow_jones.name, gold.name, bitcoin.name = 'Dow Jones', 'Gold', 'Bitcoin'
3350+
return covid, dow_jones, gold, bitcoin
3351+
3352+
def mangle_data(covid, dow_jones, gold, bitcoin):
3353+
out = pandas.concat([covid, dow_jones, gold, bitcoin], axis=1)
3354+
out = out.loc['2020-02-23':].iloc[:-2]
3355+
out = out.interpolate()
3356+
out.iloc[:, 1:] = out.rolling(10, 1, center=True).mean().iloc[:, 1:]
3357+
out.iloc[:, 1:] = out.iloc[:, 1:] / out.iloc[0, 1:] * 100
3358+
return out
3359+
3360+
def display_data(out):
3361+
def get_trace(col_name):
3362+
return go.Scatter(x=out.index, y=out[col_name], name=col_name, yaxis='y2')
3363+
traces = [get_trace(col_name) for col_name in out.columns[1:]]
3364+
traces.append(go.Scatter(x=out.index, y=out.total_cases, name='Total Cases', yaxis='y1'))
3365+
figure = go.Figure()
3366+
figure.add_traces(traces)
3367+
figure.update_layout(
3368+
yaxis1=dict(title='Total Cases', rangemode='tozero'),
3369+
yaxis2=dict(title='%', rangemode='tozero', overlaying='y', side='right'),
3370+
legend=dict(x=1.1)
3371+
).show()
3372+
3373+
if __name__ == '__main__':
3374+
main()
3375+
```
3376+
3377+
30663378
Basic Script Template
30673379
---------------------
30683380
```python

0 commit comments

Comments
 (0)