@@ -3063,6 +3063,318 @@ if __name__ == '__main__':
3063
3063
```
3064
3064
3065
3065
3066
+ Pandas
3067
+ ------
3068
+
3069
+ ``` python
3070
+ # $ pip3 install pandas
3071
+ import pandas as pd
3072
+ from pandas import Series, DataFrame
3073
+ ```
3074
+
3075
+ ### Series
3076
+ ** Ordered dictionary with a name.**
3077
+
3078
+ ``` python
3079
+ >> > Series([1 , 2 ], index = [' x' , ' y' ], name = ' a' )
3080
+ x 1
3081
+ y 2
3082
+ Name: a, dtype: int64
3083
+ ```
3084
+
3085
+ ``` python
3086
+ < Sr> = Series(< list > ) # Assigns RangeIndex starting at 0.
3087
+ < Sr> = Series(< dict > ) # Takes dictionary's keys for index.
3088
+ < Sr> = Series(< dict / Series> , index = < list > ) # Only keeps items with keys specified in index.
3089
+ ```
3090
+
3091
+ ``` python
3092
+ keys = < Sr> .index # Returns a sequence of keys as Index object.
3093
+ vals = < Sr> .values # Returns a sequence of values as numpy array.
3094
+ ```
3095
+
3096
+ ``` python
3097
+ < el> = < Sr> .loc[key] # Or: <Sr>.iloc[index]
3098
+ < Sr> = < Sr> .loc[keys] # Or: <Sr>.iloc[indexes]
3099
+ < Sr> = < Sr> .loc[from_key : to_key_inclusive] # Or: <Sr>.iloc[from_i : to_i_exclusive]
3100
+ ```
3101
+
3102
+ ``` python
3103
+ < el> = < Sr> [key/ index] # Or: <Sr>.key
3104
+ < Sr> = < Sr> [keys/ indexes] # Or: <Sr>[<key_range/range>]
3105
+ < Sr> = < Sr> [< bools> ] # Or: <Sr>.i/loc[<bools>]
3106
+ ```
3107
+
3108
+ ``` python
3109
+ < Sr> = < Sr> ><= = < el/ Sr> # Returns Series of bools.
3110
+ < Sr> = < Sr> +-*/ < el/ Sr> # Non-matching keys get value NaN.
3111
+ ```
3112
+
3113
+ ``` python
3114
+ < Sr> = pd.concat(< coll_of_Sr> ) # Combines items.
3115
+ < Sr> = < Sr> .append(< Sr> ) # Appends new items.
3116
+ < Sr> = < Sr> .combine_first(< Sr> ) # Adds items that are not yet present (extends).
3117
+ ```
3118
+
3119
+ ### DataFrame
3120
+ ** Table with labeled rows and columns.**
3121
+
3122
+ ``` python
3123
+ >> > DataFrame([[1 , 2 ], [3 , 4 ]], index = [' a' , ' b' ], columns = [' x' , ' y' ])
3124
+ x y
3125
+ a 1 2
3126
+ b 3 4
3127
+ ```
3128
+
3129
+ ``` python
3130
+ < DF > = DataFrame(< list_of_rows> ) # Rows can be either lists, dicts or series.
3131
+ < DF > = DataFrame(< dict_of_columns> ) # Columns can be either lists, dicts or series.
3132
+ ```
3133
+
3134
+ ``` python
3135
+ row_keys = < Sr> .index # Also: `col_keys = <Sr>.columns`.
3136
+ values = < Sr> .values # Returns values as 2D numpy array.
3137
+ ```
3138
+
3139
+ ``` python
3140
+ < el> = < DF > .loc[row_key, column_key] # Or: <DF>.iloc[row_index, column_index]
3141
+ < Sr/ DF > = < DF > .loc[row_key/ s] # Or: <DF>.iloc[row_index/es]
3142
+ < Sr/ DF > = < DF > .loc[:, column_key/ s] # Or: <DF>.iloc[:, column_index/es]
3143
+ < DF > = < DF > .loc[row_bools, column_bools] # Or: <DF>.iloc[row_bools, column_bools]
3144
+ ```
3145
+
3146
+ ``` python
3147
+ < Sr/ DF > = < DF > [column_key/ s] # Or: <DF>.column_key
3148
+ < DF > = < DF > [row_bools] # Keeps rows as specified by bools.
3149
+ < DF > = < DF > [< DF_of_bools > ] # Assigns NaN to False values.
3150
+ ```
3151
+
3152
+ ``` python
3153
+ < DF > = < DF > ><= = < el/ Sr/ DF > # Returns DataFrame of bools.
3154
+ < DF > = < DF > +-*/ < el/ Sr/ DF > # Non-matching keys get value NaN.
3155
+ ```
3156
+
3157
+ ``` python
3158
+ < DF > = < DF > .set_index(column_key) # Replaces row keys with values from a column.
3159
+ < DF > = < DF > .reset_index() # Moves row keys to their own column.
3160
+ < DF > = < DF > .transpose() # Rotates the table.
3161
+ < DF > = < DF > .melt(id_vars = column_key/ s) # Melts on columns.
3162
+ ```
3163
+
3164
+ ### Merge, Join, Concat
3165
+ ``` python
3166
+ >> > l = DataFrame([[1 , 2 ], [3 , 4 ]], index = [' a' , ' b' ], columns = [' x' , ' y' ])
3167
+ x y
3168
+ a 1 2
3169
+ b 3 4
3170
+ >> > r = DataFrame([[4 , 5 ], [6 , 7 ]], index = [' b' , ' c' ], columns = [' y' , ' z' ])
3171
+ y z
3172
+ b 4 5
3173
+ c 6 7
3174
+ ```
3175
+ ``` python
3176
+ ┏━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━┯━━━━━━━━━━━━┯━━━━━━━━━━━━┓
3177
+ ┃ how/ join │ ' outer' │ ' inner' │ ' left' ┃
3178
+ ┠────────────────────────┼───────────────┼────────────┼────────────┨
3179
+ ┃ l.merge(r, on = ' y' , │ x y z │ x y z │ x y z ┃
3180
+ ┃ how = …) │ 0 1 2 . │ 3 4 5 │ 1 2 . ┃
3181
+ ┃ │ 1 3 4 5 │ │ 3 4 5 ┃
3182
+ ┃ │ 2 . 6 7 │ │ ┃
3183
+ ┠────────────────────────┼───────────────┼────────────┼────────────┨
3184
+ ┃ l.join(r, lsuffix = ' l' , │ x yl yr z │ │ x yl yr z ┃
3185
+ ┃ rsuffix = ' r' , │ a 1 2 . . │ x yl yr z │ 1 2 . . ┃
3186
+ ┃ how = …) │ b 3 4 4 5 │ 3 4 4 5 │ 3 4 4 5 ┃
3187
+ ┃ │ c . . 6 7 │ │ ┃
3188
+ ┠────────────────────────┼───────────────┼────────────┼────────────┨
3189
+ ┃ pd.concat([l, r], │ x y z │ y │ ┃
3190
+ ┃ axis = 0 , │ a 1 2 . │ 2 │ ┃
3191
+ ┃ join = …) │ b 3 4 . │ 4 │ ┃
3192
+ ┃ │ b . 4 5 │ 4 │ ┃
3193
+ ┃ │ c . 6 7 │ 6 │ ┃
3194
+ ┠────────────────────────┼───────────────┼────────────┼────────────┨
3195
+ ┃ pd.concat([l, r], │ x y y z │ │ ┃
3196
+ ┃ axis = 1 , │ a 1 2 . . │ x y y z │ ┃
3197
+ ┃ join = …) │ b 3 4 4 5 │ 3 4 4 5 │ ┃
3198
+ ┃ │ c . . 6 7 │ │ ┃
3199
+ ┠────────────────────────┼───────────────┼────────────┼────────────┨
3200
+ ┃ l.combine_first(r) │ x y z │ │ ┃
3201
+ ┃ │ a 1 2 . │ │ ┃
3202
+ ┃ │ b 3 4 5 │ │ ┃
3203
+ ┃ │ c . 6 7 │ │ ┃
3204
+ ┗━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━┷━━━━━━━━━━━━┷━━━━━━━━━━━━┛
3205
+ ```
3206
+
3207
+ ### GroupBy
3208
+ ``` python
3209
+ < DF_Gb > = < DF > .groupby(column_key/ s) # Columns that were used for groupin becme row_k.
3210
+ < DFs> = list (< DF_Gb > ) # Returns list of group_key - DataFrame tuples.
3211
+ < DF > = < DF_Gb > .get_group(group_key)
3212
+ < Sr_Gb> = < DF_Gb > [column_key] # Or: <DF_Gb>.column_key
3213
+ < Srs> = list (< Sr_Gb> ) # Returns list of group_key - Series tuples.
3214
+ ```
3215
+
3216
+ ### Operations
3217
+ ``` python
3218
+ < el/ Sr/ DF > = < Sr/ DF / GB > .sum/ max / mean() # …/idxmax/all()
3219
+ < Sr/ DF > = < Sr/ DF / GB > .diff/ cumsum/ rank() # …/pct_change()
3220
+ ```
3221
+
3222
+ ``` python
3223
+ < Sr/ DF > = < Sr/ DF / GB > .ffill()
3224
+ < Sr/ DF > = < Sr/ DF / GB > .fillna(value)
3225
+ < Sr/ DF > = < Sr/ DF > .interpolate()
3226
+ ```
3227
+
3228
+ ``` python
3229
+ < Sr/ DF > = < Sr/ DF / GB > .apply(< func> ) # Invokes function on every value/column/group.
3230
+ < DF > = < DF > .applymap(< func> ) # Apply a function to a Dataframe elementwise.
3231
+ < Sr/ DF > = < Sr/ DF / GB > .aggregate(< func> ) # Invokes function on every column > number.
3232
+ < Sr/ DF > = < Sr/ DF / GB > .transform(< func> )
3233
+ < Sr/ DF > = < Sr/ DF > .combine(< Sr/ DF > , < func> )
3234
+ ```
3235
+
3236
+ ### Rolling
3237
+ ``` python
3238
+ < Rl> = < Sr/ DF / GB > .rolling(window_size) # Also: `min_periods, center=False`.
3239
+ < Rl> = < Rl> [column_key/ s] # Or: <Rl>.column_key
3240
+ < Sr/ DF > = < Rl> .sum/ max / mean()
3241
+ < Sr/ DF > = < Rl> .apply(< func> ) # Invokes function on every window.
3242
+ < Sr/ DF > = < Rl> .aggregate(< func> ) # Invokes function on every window.
3243
+ ```
3244
+
3245
+ ### Encode
3246
+ ``` python
3247
+ < DF > = pd.read_json/ html(' <str/path/url>' )
3248
+ < DF > = pd.read_csv/ pickle/ excel(' <path/url>' )
3249
+ < DF > = pd.read_sql(' <query>' , < connection> )
3250
+ < DF > = pd.read_clipboard()
3251
+ ```
3252
+
3253
+ ### Decode
3254
+ ``` python
3255
+ < dict > = < DF > .to_dict([' d/l/s/sp/r/i' ])
3256
+ < str > = < DF > .to_json/ html/ csv/ markdown/ latex([< path> ])
3257
+ < DF > .to_pickle/ excel(< path> )
3258
+ < DF > .to_sql(' <table_name>' , < connection> )
3259
+ ```
3260
+
3261
+
3262
+ Plotly
3263
+ ------
3264
+
3265
+ ### Top 10 Countries by Percentage of Population With Confirmed COVID-19 Infection
3266
+ ``` text
3267
+ |
3268
+ |
3269
+ |
3270
+ |
3271
+ |
3272
+ |
3273
+ |
3274
+ |
3275
+ |
3276
+ |
3277
+ |
3278
+ |
3279
+ |
3280
+ |
3281
+ |
3282
+ |
3283
+ |
3284
+ |
3285
+ |
3286
+ |
3287
+ +----------------------------------------------------------------------------------------------
3288
+ ```
3289
+
3290
+ ``` python
3291
+ import pandas as pd
3292
+ import plotly.express
3293
+
3294
+ covid = pd.read_csv(' https://covid.ourworldindata.org/data/owid-covid-data.csv' ,
3295
+ usecols = [' iso_code' , ' date' , ' total_deaths' , ' population' ])
3296
+ continents = pd.read_csv(' https://datahub.io/JohnSnowLabs/country-and-continent-codes-' + \
3297
+ ' list/r/country-and-continent-codes-list-csv.csv' ,
3298
+ usecols = [' Three_Letter_Country_Code' , ' Continent_Name' ])
3299
+ merged = pd.merge(covid, continents, left_on = ' iso_code' , right_on = ' Three_Letter_Country_Code' )
3300
+ summed = merged.groupby([' Continent_Name' , ' date' ]).sum().reset_index()
3301
+ summed[' Total Deaths per Million' ] = summed.total_deaths * 1e6 / summed.population
3302
+ summed = summed[(' 2020-03-14' < summed.date) & (summed.date < ' 2020-05-31' )]
3303
+ summed = summed.rename({' date' : ' Date' , ' Continent_Name' : ' Continent' }, axis = ' columns' )
3304
+ plotly.express.line(summed, x = ' Date' , y = ' Total Deaths per Million' , color = ' Continent' ).show()
3305
+ ```
3306
+
3307
+ ### Confirmed Cases of COVID-19, Dow Jones Index, Gold, and Bitcoin Price
3308
+ ``` text
3309
+ |
3310
+ |
3311
+ |
3312
+ |
3313
+ |
3314
+ |
3315
+ |
3316
+ |
3317
+ |
3318
+ |
3319
+ |
3320
+ |
3321
+ |
3322
+ |
3323
+ |
3324
+ |
3325
+ |
3326
+ |
3327
+ |
3328
+ |
3329
+ +----------------------------------------------------------------------------------------------
3330
+ ```
3331
+
3332
+ ``` python
3333
+ import pandas, datetime
3334
+ import plotly.graph_objects as go
3335
+
3336
+ def main ():
3337
+ display_data(mangle_data(* scrape_data()))
3338
+
3339
+ def scrape_data ():
3340
+ def scrape_yah (id_ ):
3341
+ BASE_URL = ' https://query1.finance.yahoo.com/v7/finance/download/'
3342
+ now = int (datetime.datetime.now().timestamp())
3343
+ url = f ' { BASE_URL }{ id_} ?period1=1579651200&period2= { now} &interval=1d&events=history '
3344
+ return pandas.read_csv(url, usecols = [' Date' , ' Close' ]).set_index(' Date' ).Close
3345
+ covid = pd.read_csv(' https://covid.ourworldindata.org/data/owid-covid-data.csv' ,
3346
+ usecols = [' date' , ' total_cases' ])
3347
+ covid = covid.groupby(' date' ).sum()
3348
+ dow_jones, gold, bitcoin = scrape_yah(' ^DJI' ), scrape_yah(' GC=F' ), scrape_yah(' BTC-USD' )
3349
+ dow_jones.name, gold.name, bitcoin.name = ' Dow Jones' , ' Gold' , ' Bitcoin'
3350
+ return covid, dow_jones, gold, bitcoin
3351
+
3352
+ def mangle_data (covid , dow_jones , gold , bitcoin ):
3353
+ out = pandas.concat([covid, dow_jones, gold, bitcoin], axis = 1 )
3354
+ out = out.loc[' 2020-02-23' :].iloc[:- 2 ]
3355
+ out = out.interpolate()
3356
+ out.iloc[:, 1 :] = out.rolling(10 , 1 , center = True ).mean().iloc[:, 1 :]
3357
+ out.iloc[:, 1 :] = out.iloc[:, 1 :] / out.iloc[0 , 1 :] * 100
3358
+ return out
3359
+
3360
+ def display_data (out ):
3361
+ def get_trace (col_name ):
3362
+ return go.Scatter(x = out.index, y = out[col_name], name = col_name, yaxis = ' y2' )
3363
+ traces = [get_trace(col_name) for col_name in out.columns[1 :]]
3364
+ traces.append(go.Scatter(x = out.index, y = out.total_cases, name = ' Total Cases' , yaxis = ' y1' ))
3365
+ figure = go.Figure()
3366
+ figure.add_traces(traces)
3367
+ figure.update_layout(
3368
+ yaxis1 = dict (title = ' Total Cases' , rangemode = ' tozero' ),
3369
+ yaxis2 = dict (title = ' %' , rangemode = ' tozero' , overlaying = ' y' , side = ' right' ),
3370
+ legend = dict (x = 1.1 )
3371
+ ).show()
3372
+
3373
+ if __name__ == ' __main__' :
3374
+ main()
3375
+ ```
3376
+
3377
+
3066
3378
Basic Script Template
3067
3379
---------------------
3068
3380
``` python
0 commit comments