@@ -3088,11 +3088,6 @@ Name: a, dtype: int64
3088
3088
< Sr> = Series(< dict / Series> , index = < list > ) # Only keeps items with keys specified in index.
3089
3089
```
3090
3090
3091
- ``` python
3092
- keys = < Sr> .index # Returns a sequence of keys as Index object.
3093
- vals = < Sr> .values # Returns a sequence of values as numpy array.
3094
- ```
3095
-
3096
3091
``` python
3097
3092
< el> = < Sr> .loc[key] # Or: <Sr>.iloc[index]
3098
3093
< Sr> = < Sr> .loc[keys] # Or: <Sr>.iloc[indexes]
@@ -3102,20 +3097,37 @@ vals = <Sr>.values # Returns a sequence of values as
3102
3097
``` python
3103
3098
< el> = < Sr> [key/ index] # Or: <Sr>.key
3104
3099
< Sr> = < Sr> [keys/ indexes] # Or: <Sr>[<key_range/range>]
3105
- < Sr> = < Sr> [< bools> ] # Or: <Sr>.i/loc[< bools> ]
3100
+ < Sr> = < Sr> [bools] # Or: <Sr>.i/loc[bools]
3106
3101
```
3107
3102
3108
3103
``` python
3109
- < Sr> = < Sr> ><= = < el/ Sr> # Returns Series of bools.
3104
+ < Sr> = < Sr> ><= = < el/ Sr> # Returns a Series of bools.
3110
3105
< Sr> = < Sr> +-*/ < el/ Sr> # Non-matching keys get value NaN.
3111
3106
```
3112
3107
3113
3108
``` python
3114
- < Sr> = pd.concat(< coll_of_Sr> ) # Combines items.
3115
- < Sr> = < Sr> .append(< Sr> ) # Appends new items.
3109
+ < Sr> = < Sr> .append(< Sr> ) # Or: pd.concat(<coll_of_Sr>)
3116
3110
< Sr> = < Sr> .combine_first(< Sr> ) # Adds items that are not yet present (extends).
3117
3111
```
3118
3112
3113
+ #### Operations:
3114
+ ``` python
3115
+ < el> = < Sr> .sum/ max / mean/ idxmax/ all ()
3116
+ < Sr> = < Sr> .diff/ cumsum/ rank/ pct_change() # …/fillna/ffill/interpolate()
3117
+ < el> = < Sr> .apply/ agg(< agg_func> )
3118
+ < Sr> = < Sr> .apply/ agg/ transform(< trans_func> )
3119
+ ```
3120
+
3121
+ ``` python
3122
+ + ------------ -+ ------------ + ---------- -+ -------------- + -------- + ------------ -+ -------------- -+
3123
+ | | ' sum' | [' sum' ] | {' s' : ' sum' } | ' rank' | [' rank' ] | {' r' : ' rank' } |
3124
+ + ------------ -+ ------------ + ---------- -+ -------------- + -------- + ------------ -+ -------------- -+
3125
+ | sr.apply(…) | | | | | rank | |
3126
+ | sr.agg(…) | 3 | sum 3 | s 3 | x 1 | x 1 | r x 1 |
3127
+ | | | | | y 2 | y 2 | y 2 |
3128
+ + ------------ -+ ------------ + ---------- -+ -------------- + -------- + ------------ -+ -------------- -+
3129
+ ```
3130
+
3119
3131
### DataFrame
3120
3132
** Table with labeled rows and columns.**
3121
3133
@@ -3127,41 +3139,58 @@ b 3 4
3127
3139
```
3128
3140
3129
3141
``` python
3130
- < DF > = DataFrame(< list_of_rows> ) # Rows can be either lists, dicts or series.
3131
- < DF > = DataFrame(< dict_of_columns> ) # Columns can be either lists, dicts or series.
3142
+ < DF > = DataFrame(< list_of_rows> ) # Rows can be either lists, dicts or series.
3143
+ < DF > = DataFrame(< dict_of_columns> ) # Columns can be either lists, dicts or series.
3144
+ ```
3145
+
3146
+ ``` python
3147
+ < el> = < DF > .loc[row_key, column_key] # Or: <DF>.iloc[row_index, column_index]
3148
+ < Sr/ DF > = < DF > .loc[row_key/ s] # Or: <DF>.iloc[row_index/es]
3149
+ < Sr/ DF > = < DF > .loc[:, column_key/ s] # Or: <DF>.iloc[:, column_index/es]
3150
+ < DF > = < DF > .loc[row_bools, column_bools] # Or: <DF>.iloc[row_bools, column_bools]
3132
3151
```
3133
3152
3134
3153
``` python
3135
- row_keys = < Sr> .index # Also: `col_keys = <Sr>.columns`.
3136
- values = < Sr> .values # Returns values as 2D numpy array.
3154
+ < Sr/ DF > = < DF > [column_key/ s] # Or: <DF>.column_key
3155
+ < DF > = < DF > [row_bools] # Keeps rows as specified by bools.
3156
+ < DF > = < DF > [< DF_of_bools > ] # Assigns NaN to False values.
3137
3157
```
3138
3158
3139
3159
``` python
3140
- < el> = < DF > .loc[row_key, column_key] # Or: <DF>.iloc[row_index, column_index]
3141
- < Sr/ DF > = < DF > .loc[row_key/ s] # Or: <DF>.iloc[row_index/es]
3142
- < Sr/ DF > = < DF > .loc[:, column_key/ s] # Or: <DF>.iloc[:, column_index/es]
3143
- < DF > = < DF > .loc[row_bools, column_bools] # Or: <DF>.iloc[row_bools, column_bools]
3160
+ < DF > = < DF > ><= = < el/ Sr/ DF > # Returns DataFrame of bools.
3161
+ < DF > = < DF > +-*/ < el/ Sr/ DF > # Non-matching keys get value NaN.
3144
3162
```
3145
3163
3146
3164
``` python
3147
- < Sr/ DF > = < DF > [column_key/ s] # Or: <DF>.column_key
3148
- < DF > = < DF > [row_bools] # Keeps rows as specified by bools.
3149
- < DF > = < DF > [< DF_of_bools > ] # Assigns NaN to False values.
3165
+ < DF > = < DF > .set_index(column_key) # Replaces row keys with values from a column.
3166
+ < DF > = < DF > .reset_index() # Moves row keys to their own column.
3167
+ < DF > = < DF > .transpose() # Rotates the table.
3168
+ < DF > = < DF > .melt(id_vars = column_key/ s) # Melts on columns.
3150
3169
```
3151
3170
3171
+ #### Operations:
3152
3172
``` python
3153
- < DF > = < DF > ><= = < el/ Sr/ DF > # Returns DataFrame of bools.
3154
- < DF > = < DF > +-*/ < el/ Sr/ DF > # Non-matching keys get value NaN.
3173
+ < Sr> = < DF > .sum/ max / mean/ idxmax/ all ()
3174
+ < DF > = < DF > .diff/ cumsum/ rank() # …/pct_change/fillna/ffill/interpolate()
3175
+ < Sr> = < DF > .apply/ agg/ transform(< agg_func> )
3176
+ < DF > = < DF > .apply/ agg/ transform(< trans_func> )
3177
+ < DF > = < DF > .applymap(< func> ) # Apply a function to a Dataframe elementwise.
3155
3178
```
3179
+ * ** All operations operate on columns by default. Use ` 'axis=1' ` parameter to process the rows instead.**
3156
3180
3157
3181
``` python
3158
- < DF > = < DF > .set_index(column_key) # Replaces row keys with values from a column.
3159
- < DF > = < DF > .reset_index() # Moves row keys to their own column.
3160
- < DF > = < DF > .transpose() # Rotates the table.
3161
- < DF > = < DF > .melt(id_vars = column_key/ s) # Melts on columns.
3182
+ + ------------ -+ ------------ + ---------- -+ -------------- + -------- + ------------ -+ -------------- -+
3183
+ | | ' sum' | [' sum' ] | {' x' : ' sum' } | ' rank' | [' rank' ] | {' x' : ' rank' } |
3184
+ + ------------ -+ ------------ + ---------- -+ -------------- + -------- + ------------ -+ -------------- -+
3185
+ | df.apply(…) | | x y | | x y | x y | x |
3186
+ | df.agg(…) | x 4 | sum 4 6 | x 4 | a 1 1 | rank rank | a 1 |
3187
+ | df.trans(…) | y 6 | | | b 2 2 | a 1 1 | b 2 |
3188
+ | | | | | | b 2 2 | |
3189
+ + ------------ -+ ------------ + ---------- -+ -------------- + -------- + ------------ -+ -------------- -+
3162
3190
```
3191
+ * ** Transform doesen't work with ` ['sum'] ` and ` {'x': 'sum'} ` .**
3163
3192
3164
- ### Merge, Join, Concat
3193
+ #### Merge, Join, Concat:
3165
3194
``` python
3166
3195
>> > l = DataFrame([[1 , 2 ], [3 , 4 ]], index = [' a' , ' b' ], columns = [' x' , ' y' ])
3167
3196
x y
@@ -3172,74 +3201,95 @@ b 3 4
3172
3201
b 4 5
3173
3202
c 6 7
3174
3203
```
3204
+
3175
3205
``` python
3176
- ┏━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━┯━━━━━━━━━━━━┯━━━━━━━━━━━━┓
3177
- ┃ how/ join │ ' outer' │ ' inner' │ ' left' ┃
3178
- ┠────────────────────────┼───────────────┼────────────┼────────────┨
3179
- ┃ l.merge(r, on = ' y' , │ x y z │ x y z │ x y z ┃
3180
- ┃ how = …) │ 0 1 2 . │ 3 4 5 │ 1 2 . ┃
3181
- ┃ │ 1 3 4 5 │ │ 3 4 5 ┃
3182
- ┃ │ 2 . 6 7 │ │ ┃
3183
- ┠────────────────────────┼───────────────┼────────────┼────────────┨
3184
- ┃ l.join(r, lsuffix = ' l' , │ x yl yr z │ │ x yl yr z ┃
3185
- ┃ rsuffix = ' r' , │ a 1 2 . . │ x yl yr z │ 1 2 . . ┃
3186
- ┃ how = …) │ b 3 4 4 5 │ 3 4 4 5 │ 3 4 4 5 ┃
3187
- ┃ │ c . . 6 7 │ │ ┃
3188
- ┠────────────────────────┼───────────────┼────────────┼────────────┨
3189
- ┃ pd.concat([l, r], │ x y z │ y │ ┃
3190
- ┃ axis = 0 , │ a 1 2 . │ 2 │ ┃
3191
- ┃ join = …) │ b 3 4 . │ 4 │ ┃
3192
- ┃ │ b . 4 5 │ 4 │ ┃
3193
- ┃ │ c . 6 7 │ 6 │ ┃
3194
- ┠────────────────────────┼───────────────┼────────────┼────────────┨
3195
- ┃ pd.concat([l, r], │ x y y z │ │ ┃
3196
- ┃ axis = 1 , │ a 1 2 . . │ x y y z │ ┃
3197
- ┃ join = …) │ b 3 4 4 5 │ 3 4 4 5 │ ┃
3198
- ┃ │ c . . 6 7 │ │ ┃
3199
- ┠────────────────────────┼───────────────┼────────────┼────────────┨
3200
- ┃ l.combine_first(r) │ x y z │ │ ┃
3201
- ┃ │ a 1 2 . │ │ ┃
3202
- ┃ │ b 3 4 5 │ │ ┃
3203
- ┃ │ c . 6 7 │ │ ┃
3204
- ┗━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━┷━━━━━━━━━━━━┷━━━━━━━━━━━━┛
3206
+ ┏━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━┯━━━━━━━━━━━━┯━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━ ┓
3207
+ ┃ how/ join │ ' outer' │ ' inner' │ ' left' │ description ┃
3208
+ ┠────────────────────────┼───────────────┼────────────┼────────────┼────────────────────────── ┨
3209
+ ┃ l.merge(r, on = ' y' , │ x y z │ x y z │ x y z │ Joins / merges on column. ┃
3210
+ ┃ how = …) │ 0 1 2 . │ 3 4 5 │ 1 2 . │ Also accepts left_on and ┃
3211
+ ┃ │ 1 3 4 5 │ │ 3 4 5 │ right_on parameters. ┃
3212
+ ┃ │ 2 . 6 7 │ │ │ Uses ' inner ' by default. ┃
3213
+ ┠────────────────────────┼───────────────┼────────────┼────────────┼────────────────────────── ┨
3214
+ ┃ l.join(r, lsuffix = ' l' , │ x yl yr z │ │ x yl yr z │ Joins / merges on row_keys. ┃
3215
+ ┃ rsuffix = ' r' , │ a 1 2 . . │ x yl yr z │ 1 2 . . │ Uses ' left ' by default. ┃
3216
+ ┃ how = …) │ b 3 4 4 5 │ 3 4 4 5 │ 3 4 4 5 │ ┃
3217
+ ┃ │ c . . 6 7 │ │ │ ┃
3218
+ ┠────────────────────────┼───────────────┼────────────┼────────────┼────────────────────────── ┨
3219
+ ┃ pd.concat([l, r], │ x y z │ y │ │ Adds rows at the bottom. ┃
3220
+ ┃ axis = 0 , │ a 1 2 . │ 2 │ │ Uses ' outer ' by default. ┃
3221
+ ┃ join = …) │ b 3 4 . │ 4 │ │ By default works the ┃
3222
+ ┃ │ b . 4 5 │ 4 │ │ same as `l.append(r)` . ┃
3223
+ ┃ │ c . 6 7 │ 6 │ │ ┃
3224
+ ┠────────────────────────┼───────────────┼────────────┼────────────┼────────────────────────── ┨
3225
+ ┃ pd.concat([l, r], │ x y y z │ │ │ Adds columns at the ┃
3226
+ ┃ axis = 1 , │ a 1 2 . . │ x y y z │ │ right end. ┃
3227
+ ┃ join = …) │ b 3 4 4 5 │ 3 4 4 5 │ │ Uses ' outer ' by default. ┃
3228
+ ┃ │ c . . 6 7 │ │ │ ┃
3229
+ ┠────────────────────────┼───────────────┼────────────┼────────────┼────────────────────────── ┨
3230
+ ┃ l.combine_first(r) │ x y z │ │ │ Adds missing rows and ┃
3231
+ ┃ │ a 1 2 . │ │ │ columns. ┃
3232
+ ┃ │ b 3 4 5 │ │ │ ┃
3233
+ ┃ │ c . 6 7 │ │ │ ┃
3234
+ ┗━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━┷━━━━━━━━━━━━┷━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━ ┛
3205
3235
```
3206
3236
3207
3237
### GroupBy
3238
+ ** Object that groups together rows of a dataframe based on the value of passed column.**
3239
+
3208
3240
``` python
3209
- < DF_Gb > = < DF > .groupby(column_key/ s) # Columns that were used for groupin becme row_k.
3210
- < DFs> = list (< DF_Gb > ) # Returns list of group_key - DataFrame tuples.
3211
- < DF > = < DF_Gb > .get_group(group_key)
3212
- < Sr_Gb> = < DF_Gb > [column_key] # Or: <DF_Gb>.column_key
3213
- < Srs> = list (< Sr_Gb> ) # Returns list of group_key - Series tuples.
3241
+ >> > df = DataFrame([[1 , 2 , 3 ], [4 , 5 , 6 ], [7 , 8 , 6 ]], index = list (' abc' ), columns = list (' xyz' ))
3242
+ >> > gb = df.groupby(' z' )
3243
+ x y z
3244
+ 3 : a 1 2 3
3245
+ 6 : b 4 5 6
3246
+ c 7 8 6
3214
3247
```
3215
3248
3216
- ### Operations
3217
3249
``` python
3218
- < el/ Sr/ DF > = < Sr/ DF / GB > .sum/ max / mean() # …/idxmax/all()
3219
- < Sr/ DF > = < Sr/ DF / GB > .diff/ cumsum/ rank() # …/pct_change()
3250
+ < GB > = < DF > .groupby(column_key/ s) # DF is split into groups based on passed column.
3251
+ < DF > = < GB > .get_group(group_key) # Selects a group by value of grouping column.
3252
+ < DF > = < GB > .< operation> () # Executes operation on each col of each group.
3220
3253
```
3254
+ * ** Result of an operation is a dataframe with index made up of group keys. Use ` '<DF>.reset_index()' ` to move the index back into it's own column.**
3221
3255
3256
+ #### Operations:
3222
3257
``` python
3223
- < Sr/ DF > = < Sr/ DF / GB > .ffill()
3224
- < Sr/ DF > = < Sr/ DF / GB > .fillna(value)
3225
- < Sr/ DF > = < Sr/ DF > .interpolate()
3258
+ < DF > = < GB > .sum/ max / mean/ idxmax/ all ()
3259
+ < DF > = < GB > .diff/ cumsum/ rank() # …/pct_change/fillna/ffill()
3260
+ < DF > = < GB > .apply/ agg/ transform(< agg_func> )
3261
+ < DF > = < GB > .agg/ transform(< trans_func> )
3226
3262
```
3227
3263
3228
3264
``` python
3229
- < Sr/ DF > = < Sr/ DF / GB > .apply(< func> ) # Invokes function on every value/column/group.
3230
- < DF > = < DF > .applymap(< func> ) # Apply a function to a Dataframe elementwise.
3231
- < Sr/ DF > = < Sr/ DF / GB > .aggregate(< func> ) # Invokes function on every column > number.
3232
- < Sr/ DF > = < Sr/ DF / GB > .transform(< func> )
3233
- < Sr/ DF > = < Sr/ DF > .combine(< Sr/ DF > , < func> )
3265
+ + ------------ -+ ------------ + ---------- -+ -------------- + -------- + ------------ -+ -------------- -+
3266
+ | | ' sum' | [' sum' ] | {' x' : ' sum' } | ' rank' | [' rank' ] | {' x' : ' rank' } |
3267
+ + ------------ -+ ------------ + ---------- -+ -------------- + -------- + ------------ -+ -------------- -+
3268
+ | gb.apply(…) | x y z | | | | | |
3269
+ | | z | | | | | |
3270
+ | | 3 1 2 3 | | | | | |
3271
+ | | 6 11 13 12 | | | | | |
3272
+ + ------------ -+ ------------ + ---------- -+ -------------- + -------- + ------------ -+ -------------- -+
3273
+ | gb.agg(…) | x y | x y | x | x y | x y | x |
3274
+ | | z | sum sum | z | a 1 1 | rank rank | a 1 |
3275
+ | | 3 1 2 | z | 3 1 | b 1 1 | a 1 1 | b 1 |
3276
+ | | 6 11 13 | 3 1 2 | 6 11 | c 2 2 | b 1 1 | c 2 |
3277
+ | | | 6 11 13 | | | c 2 2 | |
3278
+ + ------------ -+ ------------ + ---------- -+ -------------- + -------- + ------------ -+ -------------- -+
3279
+ | gb.trans(…) | x y | | | x y | | |
3280
+ | | a 1 2 | | | a 1 1 | | |
3281
+ | | b 11 13 | | | b 1 1 | | |
3282
+ | | c 11 13 | | | c 1 1 | | |
3283
+ + ------------ -+ ------------ + ---------- -+ -------------- + -------- + ------------ -+ -------------- -+
3234
3284
```
3235
3285
3236
3286
### Rolling
3237
3287
``` python
3238
- < Rl > = < Sr/ DF / GB > .rolling(window_size) # Also: `min_periods, center=False`.
3239
- < Rl > = < Rl > [column_key/ s] # Or: <Rl>.column_key
3240
- < Sr/ DF > = < Rl > .sum/ max / mean()
3241
- < Sr/ DF > = < Rl > .apply(< func> ) # Invokes function on every window.
3242
- < Sr/ DF > = < Rl > .aggregate(< func> ) # Invokes function on every window.
3288
+ < Rl_S / D / G > = < Sr/ DF / GB > .rolling(window_size) # Also: `min_periods=None , center=False`.
3289
+ < Rl_S / D > = < Rl_D / G > [column_key/ s] # Or: <Rl>.column_key
3290
+ < Sr/ DF / DF > = < Rl_S / D / G > .sum/ max / mean()
3291
+ < Sr/ DF / DF > = < Rl_S / D / G > .apply(< func> ) # Invokes function on every window.
3292
+ < Sr/ DF / DF > = < Rl_S / D / G > .aggregate(< func/ str > ) # Invokes function on every window.
3243
3293
```
3244
3294
3245
3295
### Encode
@@ -3353,7 +3403,7 @@ def mangle_data(covid, dow_jones, gold, bitcoin):
3353
3403
out = pandas.concat([covid, dow_jones, gold, bitcoin], axis = 1 )
3354
3404
out = out.loc[' 2020-02-23' :].iloc[:- 2 ]
3355
3405
out = out.interpolate()
3356
- out.iloc[:, 1 :] = out.rolling(10 , 1 , center = True ).mean().iloc[:, 1 :]
3406
+ out.iloc[:, 1 :] = out.rolling(10 , min_periods = 1 , center = True ).mean().iloc[:, 1 :]
3357
3407
out.iloc[:, 1 :] = out.iloc[:, 1 :] / out.iloc[0 , 1 :] * 100
3358
3408
return out
3359
3409
0 commit comments