Skip to content

Commit db9ba04

Browse files
committed
Working on Pandas
1 parent a1b1276 commit db9ba04

File tree

2 files changed

+259
-159
lines changed

2 files changed

+259
-159
lines changed

README.md

Lines changed: 128 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -3088,11 +3088,6 @@ Name: a, dtype: int64
30883088
<Sr> = Series(<dict/Series>, index=<list>) # Only keeps items with keys specified in index.
30893089
```
30903090

3091-
```python
3092-
keys = <Sr>.index # Returns a sequence of keys as Index object.
3093-
vals = <Sr>.values # Returns a sequence of values as numpy array.
3094-
```
3095-
30963091
```python
30973092
<el> = <Sr>.loc[key] # Or: <Sr>.iloc[index]
30983093
<Sr> = <Sr>.loc[keys] # Or: <Sr>.iloc[indexes]
@@ -3102,20 +3097,37 @@ vals = <Sr>.values # Returns a sequence of values as
31023097
```python
31033098
<el> = <Sr>[key/index] # Or: <Sr>.key
31043099
<Sr> = <Sr>[keys/indexes] # Or: <Sr>[<key_range/range>]
3105-
<Sr> = <Sr>[<bools>] # Or: <Sr>.i/loc[<bools>]
3100+
<Sr> = <Sr>[bools] # Or: <Sr>.i/loc[bools]
31063101
```
31073102

31083103
```python
3109-
<Sr> = <Sr> ><== <el/Sr> # Returns Series of bools.
3104+
<Sr> = <Sr> ><== <el/Sr> # Returns a Series of bools.
31103105
<Sr> = <Sr> +-*/ <el/Sr> # Non-matching keys get value NaN.
31113106
```
31123107

31133108
```python
3114-
<Sr> = pd.concat(<coll_of_Sr>) # Combines items.
3115-
<Sr> = <Sr>.append(<Sr>) # Appends new items.
3109+
<Sr> = <Sr>.append(<Sr>) # Or: pd.concat(<coll_of_Sr>)
31163110
<Sr> = <Sr>.combine_first(<Sr>) # Adds items that are not yet present (extends).
31173111
```
31183112

3113+
#### Operations:
3114+
```python
3115+
<el> = <Sr>.sum/max/mean/idxmax/all()
3116+
<Sr> = <Sr>.diff/cumsum/rank/pct_change() # …/fillna/ffill/interpolate()
3117+
<el> = <Sr>.apply/agg(<agg_func>)
3118+
<Sr> = <Sr>.apply/agg/transform(<trans_func>)
3119+
```
3120+
3121+
```python
3122+
+-------------+------------+-----------+--------------+--------+-------------+---------------+
3123+
| | 'sum' | ['sum'] | {'s': 'sum'} | 'rank' | ['rank'] | {'r': 'rank'} |
3124+
+-------------+------------+-----------+--------------+--------+-------------+---------------+
3125+
| sr.apply(…) | | | | | rank | |
3126+
| sr.agg(…) | 3 | sum 3 | s 3 | x 1 | x 1 | r x 1 |
3127+
| | | | | y 2 | y 2 | y 2 |
3128+
+-------------+------------+-----------+--------------+--------+-------------+---------------+
3129+
```
3130+
31193131
### DataFrame
31203132
**Table with labeled rows and columns.**
31213133

@@ -3127,41 +3139,58 @@ b 3 4
31273139
```
31283140

31293141
```python
3130-
<DF> = DataFrame(<list_of_rows>) # Rows can be either lists, dicts or series.
3131-
<DF> = DataFrame(<dict_of_columns>) # Columns can be either lists, dicts or series.
3142+
<DF> = DataFrame(<list_of_rows>) # Rows can be either lists, dicts or series.
3143+
<DF> = DataFrame(<dict_of_columns>) # Columns can be either lists, dicts or series.
3144+
```
3145+
3146+
```python
3147+
<el> = <DF>.loc[row_key, column_key] # Or: <DF>.iloc[row_index, column_index]
3148+
<Sr/DF> = <DF>.loc[row_key/s] # Or: <DF>.iloc[row_index/es]
3149+
<Sr/DF> = <DF>.loc[:, column_key/s] # Or: <DF>.iloc[:, column_index/es]
3150+
<DF> = <DF>.loc[row_bools, column_bools] # Or: <DF>.iloc[row_bools, column_bools]
31323151
```
31333152

31343153
```python
3135-
row_keys = <Sr>.index # Also: `col_keys = <Sr>.columns`.
3136-
values = <Sr>.values # Returns values as 2D numpy array.
3154+
<Sr/DF> = <DF>[column_key/s] # Or: <DF>.column_key
3155+
<DF> = <DF>[row_bools] # Keeps rows as specified by bools.
3156+
<DF> = <DF>[<DF_of_bools>] # Assigns NaN to False values.
31373157
```
31383158

31393159
```python
3140-
<el> = <DF>.loc[row_key, column_key] # Or: <DF>.iloc[row_index, column_index]
3141-
<Sr/DF> = <DF>.loc[row_key/s] # Or: <DF>.iloc[row_index/es]
3142-
<Sr/DF> = <DF>.loc[:, column_key/s] # Or: <DF>.iloc[:, column_index/es]
3143-
<DF> = <DF>.loc[row_bools, column_bools] # Or: <DF>.iloc[row_bools, column_bools]
3160+
<DF> = <DF> ><== <el/Sr/DF> # Returns DataFrame of bools.
3161+
<DF> = <DF> +-*/ <el/Sr/DF> # Non-matching keys get value NaN.
31443162
```
31453163

31463164
```python
3147-
<Sr/DF> = <DF>[column_key/s] # Or: <DF>.column_key
3148-
<DF> = <DF>[row_bools] # Keeps rows as specified by bools.
3149-
<DF> = <DF>[<DF_of_bools>] # Assigns NaN to False values.
3165+
<DF> = <DF>.set_index(column_key) # Replaces row keys with values from a column.
3166+
<DF> = <DF>.reset_index() # Moves row keys to their own column.
3167+
<DF> = <DF>.transpose() # Rotates the table.
3168+
<DF> = <DF>.melt(id_vars=column_key/s) # Melts on columns.
31503169
```
31513170

3171+
#### Operations:
31523172
```python
3153-
<DF> = <DF> ><== <el/Sr/DF> # Returns DataFrame of bools.
3154-
<DF> = <DF> +-*/ <el/Sr/DF> # Non-matching keys get value NaN.
3173+
<Sr> = <DF>.sum/max/mean/idxmax/all()
3174+
<DF> = <DF>.diff/cumsum/rank() # …/pct_change/fillna/ffill/interpolate()
3175+
<Sr> = <DF>.apply/agg/transform(<agg_func>)
3176+
<DF> = <DF>.apply/agg/transform(<trans_func>)
3177+
<DF> = <DF>.applymap(<func>) # Apply a function to a Dataframe elementwise.
31553178
```
3179+
* **All operations operate on columns by default. Use `'axis=1'` parameter to process the rows instead.**
31563180

31573181
```python
3158-
<DF> = <DF>.set_index(column_key) # Replaces row keys with values from a column.
3159-
<DF> = <DF>.reset_index() # Moves row keys to their own column.
3160-
<DF> = <DF>.transpose() # Rotates the table.
3161-
<DF> = <DF>.melt(id_vars=column_key/s) # Melts on columns.
3182+
+-------------+------------+-----------+--------------+--------+-------------+---------------+
3183+
| | 'sum' | ['sum'] | {'x': 'sum'} | 'rank' | ['rank'] | {'x': 'rank'} |
3184+
+-------------+------------+-----------+--------------+--------+-------------+---------------+
3185+
| df.apply(…) | | x y | | x y | x y | x |
3186+
| df.agg(…) | x 4 | sum 4 6 | x 4 | a 1 1 | rank rank | a 1 |
3187+
| df.trans(…) | y 6 | | | b 2 2 | a 1 1 | b 2 |
3188+
| | | | | | b 2 2 | |
3189+
+-------------+------------+-----------+--------------+--------+-------------+---------------+
31623190
```
3191+
* **Transform doesen't work with `['sum']` and `{'x': 'sum'}`.**
31633192

3164-
### Merge, Join, Concat
3193+
#### Merge, Join, Concat:
31653194
```python
31663195
>>> l = DataFrame([[1, 2], [3, 4]], index=['a', 'b'], columns=['x', 'y'])
31673196
x y
@@ -3172,74 +3201,95 @@ b 3 4
31723201
b 4 5
31733202
c 6 7
31743203
```
3204+
31753205
```python
3176-
┏━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━┯━━━━━━━━━━━━┯━━━━━━━━━━━━┓
3177-
┃ how/join │ 'outer''inner''left'
3178-
┠────────────────────────┼───────────────┼────────────┼────────────┨
3179-
┃ l.merge(r, on='y', │ x y z │ x y z │ x y z ┃
3180-
how=…) │ 0 1 2 . │ 3 4 51 2 . ┃
3181-
┃ │ 1 3 4 5 │ │ 3 4 5
3182-
┃ │ 2 . 6 7 │ │ ┃
3183-
┠────────────────────────┼───────────────┼────────────┼────────────┨
3184-
┃ l.join(r, lsuffix='l', │ x yl yr z │ │ x yl yr z ┃
3185-
rsuffix='r', │ a 1 2 . . │ x yl yr z │ 1 2 . . ┃
3186-
how=…) │ b 3 4 4 53 4 4 53 4 4 5
3187-
┃ │ c . . 6 7 │ │ ┃
3188-
┠────────────────────────┼───────────────┼────────────┼────────────┨
3189-
┃ pd.concat([l, r], │ x y z │ y │ ┃
3190-
axis=0, │ a 1 2 . │ 2 │ ┃
3191-
join=…) │ b 3 4 . │ 4 │ ┃
3192-
┃ │ b . 4 54 │ ┃
3193-
┃ │ c . 6 76 │ ┃
3194-
┠────────────────────────┼───────────────┼────────────┼────────────┨
3195-
┃ pd.concat([l, r], │ x y y z │ │ ┃
3196-
axis=1, │ a 1 2 . . │ x y y z │ ┃
3197-
join=…) │ b 3 4 4 53 4 4 5 │ ┃
3198-
┃ │ c . . 6 7 │ │ ┃
3199-
┠────────────────────────┼───────────────┼────────────┼────────────┨
3200-
┃ l.combine_first(r) │ x y z │ │ ┃
3201-
┃ │ a 1 2 . │ │ ┃
3202-
┃ │ b 3 4 5 │ │ ┃
3203-
┃ │ c . 6 7 │ │ ┃
3204-
┗━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━┷━━━━━━━━━━━━┷━━━━━━━━━━━━┛
3206+
┏━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━┯━━━━━━━━━━━━┯━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━
3207+
┃ how/join │ 'outer''inner''left' │ description
3208+
┠────────────────────────┼───────────────┼────────────┼────────────┼──────────────────────────
3209+
┃ l.merge(r, on='y', │ x y z │ x y z │ x y z │ Joins/merges on column.
3210+
how=…) │ 0 1 2 . │ 3 4 51 2 . │ Also accepts left_on and
3211+
┃ │ 1 3 4 5 │ │ 3 4 5 │ right_on parameters.
3212+
┃ │ 2 . 6 7 │ │ │ Uses 'inner' by default.
3213+
┠────────────────────────┼───────────────┼────────────┼────────────┼──────────────────────────
3214+
┃ l.join(r, lsuffix='l', │ x yl yr z │ │ x yl yr z │ Joins/merges on row_keys.
3215+
rsuffix='r', │ a 1 2 . . │ x yl yr z │ 1 2 . . │ Uses 'left' by default.
3216+
how=…) │ b 3 4 4 53 4 4 53 4 4 5
3217+
┃ │ c . . 6 7 │ │
3218+
┠────────────────────────┼───────────────┼────────────┼────────────┼──────────────────────────
3219+
┃ pd.concat([l, r], │ x y z │ y │ │ Adds rows at the bottom.
3220+
axis=0, │ a 1 2 . │ 2│ Uses 'outer' by default.
3221+
join=…) │ b 3 4 . │ 4│ By default works the
3222+
┃ │ b . 4 54│ same as `l.append(r)`.
3223+
┃ │ c . 6 76
3224+
┠────────────────────────┼───────────────┼────────────┼────────────┼──────────────────────────
3225+
┃ pd.concat([l, r], │ x y y z │ │ │ Adds columns at the
3226+
axis=1, │ a 1 2 . . │ x y y z │ │ right end.
3227+
join=…) │ b 3 4 4 53 4 4 5│ Uses 'outer' by default.
3228+
┃ │ c . . 6 7 │ │
3229+
┠────────────────────────┼───────────────┼────────────┼────────────┼──────────────────────────
3230+
┃ l.combine_first(r) │ x y z │ │ │ Adds missing rows and
3231+
┃ │ a 1 2 . │ │ │ columns.
3232+
┃ │ b 3 4 5 │ │
3233+
┃ │ c . 6 7 │ │
3234+
┗━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━┷━━━━━━━━━━━━┷━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━
32053235
```
32063236

32073237
### GroupBy
3238+
**Object that groups together rows of a dataframe based on the value of passed column.**
3239+
32083240
```python
3209-
<DF_Gb> = <DF>.groupby(column_key/s) # Columns that were used for groupin becme row_k.
3210-
<DFs> = list(<DF_Gb>) # Returns list of group_key - DataFrame tuples.
3211-
<DF> = <DF_Gb>.get_group(group_key)
3212-
<Sr_Gb> = <DF_Gb>[column_key] # Or: <DF_Gb>.column_key
3213-
<Srs> = list(<Sr_Gb>) # Returns list of group_key - Series tuples.
3241+
>>> df = DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 6]], index=list('abc'), columns=list('xyz'))
3242+
>>> gb = df.groupby('z')
3243+
x y z
3244+
3: a 1 2 3
3245+
6: b 4 5 6
3246+
c 7 8 6
32143247
```
32153248

3216-
### Operations
32173249
```python
3218-
<el/Sr/DF> = <Sr/DF/GB>.sum/max/mean() # …/idxmax/all()
3219-
<Sr/DF> = <Sr/DF/GB>.diff/cumsum/rank() # …/pct_change()
3250+
<GB> = <DF>.groupby(column_key/s) # DF is split into groups based on passed column.
3251+
<DF> = <GB>.get_group(group_key) # Selects a group by value of grouping column.
3252+
<DF> = <GB>.<operation>() # Executes operation on each col of each group.
32203253
```
3254+
* **Result of an operation is a dataframe with index made up of group keys. Use `'<DF>.reset_index()'` to move the index back into it's own column.**
32213255

3256+
#### Operations:
32223257
```python
3223-
<Sr/DF> = <Sr/DF/GB>.ffill()
3224-
<Sr/DF> = <Sr/DF/GB>.fillna(value)
3225-
<Sr/DF> = <Sr/DF>.interpolate()
3258+
<DF> = <GB>.sum/max/mean/idxmax/all()
3259+
<DF> = <GB>.diff/cumsum/rank() # …/pct_change/fillna/ffill()
3260+
<DF> = <GB>.apply/agg/transform(<agg_func>)
3261+
<DF> = <GB>.agg/transform(<trans_func>)
32263262
```
32273263

32283264
```python
3229-
<Sr/DF> = <Sr/DF/GB>.apply(<func>) # Invokes function on every value/column/group.
3230-
<DF> = <DF>.applymap(<func>) # Apply a function to a Dataframe elementwise.
3231-
<Sr/DF> = <Sr/DF/GB>.aggregate(<func>) # Invokes function on every column > number.
3232-
<Sr/DF> = <Sr/DF/GB>.transform(<func>)
3233-
<Sr/DF> = <Sr/DF>.combine(<Sr/DF>, <func>)
3265+
+-------------+------------+-----------+--------------+--------+-------------+---------------+
3266+
| | 'sum' | ['sum'] | {'x': 'sum'} | 'rank' | ['rank'] | {'x': 'rank'} |
3267+
+-------------+------------+-----------+--------------+--------+-------------+---------------+
3268+
| gb.apply(…) | x y z | | | | | |
3269+
| | z | | | | | |
3270+
| | 3 1 2 3 | | | | | |
3271+
| | 6 11 13 12 | | | | | |
3272+
+-------------+------------+-----------+--------------+--------+-------------+---------------+
3273+
| gb.agg(…) | x y | x y | x | x y | x y | x |
3274+
| | z | sum sum | z | a 1 1 | rank rank | a 1 |
3275+
| | 3 1 2 | z | 3 1 | b 1 1 | a 1 1 | b 1 |
3276+
| | 6 11 13 | 3 1 2 | 6 11 | c 2 2 | b 1 1 | c 2 |
3277+
| | | 6 11 13 | | | c 2 2 | |
3278+
+-------------+------------+-----------+--------------+--------+-------------+---------------+
3279+
| gb.trans(…) | x y | | | x y | | |
3280+
| | a 1 2 | | | a 1 1 | | |
3281+
| | b 11 13 | | | b 1 1 | | |
3282+
| | c 11 13 | | | c 1 1 | | |
3283+
+-------------+------------+-----------+--------------+--------+-------------+---------------+
32343284
```
32353285

32363286
### Rolling
32373287
```python
3238-
<Rl> = <Sr/DF/GB>.rolling(window_size) # Also: `min_periods, center=False`.
3239-
<Rl> = <Rl>[column_key/s] # Or: <Rl>.column_key
3240-
<Sr/DF> = <Rl>.sum/max/mean()
3241-
<Sr/DF> = <Rl>.apply(<func>) # Invokes function on every window.
3242-
<Sr/DF> = <Rl>.aggregate(<func>) # Invokes function on every window.
3288+
<Rl_S/D/G> = <Sr/DF/GB>.rolling(window_size) # Also: `min_periods=None, center=False`.
3289+
<Rl_S/D> = <Rl_D/G>[column_key/s] # Or: <Rl>.column_key
3290+
<Sr/DF/DF> = <Rl_S/D/G>.sum/max/mean()
3291+
<Sr/DF/DF> = <Rl_S/D/G>.apply(<func>) # Invokes function on every window.
3292+
<Sr/DF/DF> = <Rl_S/D/G>.aggregate(<func/str>) # Invokes function on every window.
32433293
```
32443294

32453295
### Encode
@@ -3353,7 +3403,7 @@ def mangle_data(covid, dow_jones, gold, bitcoin):
33533403
out = pandas.concat([covid, dow_jones, gold, bitcoin], axis=1)
33543404
out = out.loc['2020-02-23':].iloc[:-2]
33553405
out = out.interpolate()
3356-
out.iloc[:, 1:] = out.rolling(10, 1, center=True).mean().iloc[:, 1:]
3406+
out.iloc[:, 1:] = out.rolling(10, min_periods=1, center=True).mean().iloc[:, 1:]
33573407
out.iloc[:, 1:] = out.iloc[:, 1:] / out.iloc[0, 1:] * 100
33583408
return out
33593409

0 commit comments

Comments
 (0)