Skip to content

Commit 9ff8ae2

Browse files
committed
Course 5 Week 1
Building a RNN Dinosaur Island Jazz Improv with LSTM
1 parent fe9d7ae commit 9ff8ae2

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

48 files changed

+21989
-0
lines changed

Course 5/Week 1/Building a Recurrent Neural Network - Step by Step/Building+a+Recurrent+Neural+Network+-+Step+by+Step+-+v3.ipynb

Lines changed: 2143 additions & 0 deletions
Large diffs are not rendered by default.

Course 5/Week 1/Building a Recurrent Neural Network - Step by Step/Building+a+Recurrent+Neural+Network+-+Step+by+Step+-+v3.py

Lines changed: 1739 additions & 0 deletions
Large diffs are not rendered by default.

Course 5/Week 1/Building a Recurrent Neural Network - Step by Step/data/input.txt

Lines changed: 6682 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
import numpy as np
2+
3+
def softmax(x):
4+
e_x = np.exp(x - np.max(x))
5+
return e_x / e_x.sum(axis=0)
6+
7+
8+
def sigmoid(x):
9+
return 1 / (1 + np.exp(-x))
10+
11+
12+
def initialize_adam(parameters) :
13+
"""
14+
Initializes v and s as two python dictionaries with:
15+
- keys: "dW1", "db1", ..., "dWL", "dbL"
16+
- values: numpy arrays of zeros of the same shape as the corresponding gradients/parameters.
17+
18+
Arguments:
19+
parameters -- python dictionary containing your parameters.
20+
parameters["W" + str(l)] = Wl
21+
parameters["b" + str(l)] = bl
22+
23+
Returns:
24+
v -- python dictionary that will contain the exponentially weighted average of the gradient.
25+
v["dW" + str(l)] = ...
26+
v["db" + str(l)] = ...
27+
s -- python dictionary that will contain the exponentially weighted average of the squared gradient.
28+
s["dW" + str(l)] = ...
29+
s["db" + str(l)] = ...
30+
31+
"""
32+
33+
L = len(parameters) // 2 # number of layers in the neural networks
34+
v = {}
35+
s = {}
36+
37+
# Initialize v, s. Input: "parameters". Outputs: "v, s".
38+
for l in range(L):
39+
### START CODE HERE ### (approx. 4 lines)
40+
v["dW" + str(l+1)] = np.zeros(parameters["W" + str(l+1)].shape)
41+
v["db" + str(l+1)] = np.zeros(parameters["b" + str(l+1)].shape)
42+
s["dW" + str(l+1)] = np.zeros(parameters["W" + str(l+1)].shape)
43+
s["db" + str(l+1)] = np.zeros(parameters["b" + str(l+1)].shape)
44+
### END CODE HERE ###
45+
46+
return v, s
47+
48+
49+
def update_parameters_with_adam(parameters, grads, v, s, t, learning_rate = 0.01,
50+
beta1 = 0.9, beta2 = 0.999, epsilon = 1e-8):
51+
"""
52+
Update parameters using Adam
53+
54+
Arguments:
55+
parameters -- python dictionary containing your parameters:
56+
parameters['W' + str(l)] = Wl
57+
parameters['b' + str(l)] = bl
58+
grads -- python dictionary containing your gradients for each parameters:
59+
grads['dW' + str(l)] = dWl
60+
grads['db' + str(l)] = dbl
61+
v -- Adam variable, moving average of the first gradient, python dictionary
62+
s -- Adam variable, moving average of the squared gradient, python dictionary
63+
learning_rate -- the learning rate, scalar.
64+
beta1 -- Exponential decay hyperparameter for the first moment estimates
65+
beta2 -- Exponential decay hyperparameter for the second moment estimates
66+
epsilon -- hyperparameter preventing division by zero in Adam updates
67+
68+
Returns:
69+
parameters -- python dictionary containing your updated parameters
70+
v -- Adam variable, moving average of the first gradient, python dictionary
71+
s -- Adam variable, moving average of the squared gradient, python dictionary
72+
"""
73+
74+
L = len(parameters) // 2 # number of layers in the neural networks
75+
v_corrected = {} # Initializing first moment estimate, python dictionary
76+
s_corrected = {} # Initializing second moment estimate, python dictionary
77+
78+
# Perform Adam update on all parameters
79+
for l in range(L):
80+
# Moving average of the gradients. Inputs: "v, grads, beta1". Output: "v".
81+
### START CODE HERE ### (approx. 2 lines)
82+
v["dW" + str(l+1)] = beta1 * v["dW" + str(l+1)] + (1 - beta1) * grads["dW" + str(l+1)]
83+
v["db" + str(l+1)] = beta1 * v["db" + str(l+1)] + (1 - beta1) * grads["db" + str(l+1)]
84+
### END CODE HERE ###
85+
86+
# Compute bias-corrected first moment estimate. Inputs: "v, beta1, t". Output: "v_corrected".
87+
### START CODE HERE ### (approx. 2 lines)
88+
v_corrected["dW" + str(l+1)] = v["dW" + str(l+1)] / (1 - beta1**t)
89+
v_corrected["db" + str(l+1)] = v["db" + str(l+1)] / (1 - beta1**t)
90+
### END CODE HERE ###
91+
92+
# Moving average of the squared gradients. Inputs: "s, grads, beta2". Output: "s".
93+
### START CODE HERE ### (approx. 2 lines)
94+
s["dW" + str(l+1)] = beta2 * s["dW" + str(l+1)] + (1 - beta2) * (grads["dW" + str(l+1)] ** 2)
95+
s["db" + str(l+1)] = beta2 * s["db" + str(l+1)] + (1 - beta2) * (grads["db" + str(l+1)] ** 2)
96+
### END CODE HERE ###
97+
98+
# Compute bias-corrected second raw moment estimate. Inputs: "s, beta2, t". Output: "s_corrected".
99+
### START CODE HERE ### (approx. 2 lines)
100+
s_corrected["dW" + str(l+1)] = s["dW" + str(l+1)] / (1 - beta2 ** t)
101+
s_corrected["db" + str(l+1)] = s["db" + str(l+1)] / (1 - beta2 ** t)
102+
### END CODE HERE ###
103+
104+
# Update parameters. Inputs: "parameters, learning_rate, v_corrected, s_corrected, epsilon". Output: "parameters".
105+
### START CODE HERE ### (approx. 2 lines)
106+
parameters["W" + str(l+1)] = parameters["W" + str(l+1)] - learning_rate * v_corrected["dW" + str(l+1)] / np.sqrt(s_corrected["dW" + str(l+1)] + epsilon)
107+
parameters["b" + str(l+1)] = parameters["b" + str(l+1)] - learning_rate * v_corrected["db" + str(l+1)] / np.sqrt(s_corrected["db" + str(l+1)] + epsilon)
108+
### END CODE HERE ###
109+
110+
return parameters, v, s

0 commit comments

Comments
 (0)