1
+ import numpy as np
2
+ import matplotlib .pyplot as plt
3
+ import h5py
4
+ import scipy .io
5
+ import sklearn
6
+ import sklearn .datasets
7
+
8
+ def sigmoid (x ):
9
+ """
10
+ Compute the sigmoid of x
11
+
12
+ Arguments:
13
+ x -- A scalar or numpy array of any size.
14
+
15
+ Return:
16
+ s -- sigmoid(x)
17
+ """
18
+ s = 1 / (1 + np .exp (- x ))
19
+ return s
20
+
21
+ def relu (x ):
22
+ """
23
+ Compute the relu of x
24
+
25
+ Arguments:
26
+ x -- A scalar or numpy array of any size.
27
+
28
+ Return:
29
+ s -- relu(x)
30
+ """
31
+ s = np .maximum (0 ,x )
32
+
33
+ return s
34
+
35
+ def load_params_and_grads (seed = 1 ):
36
+ np .random .seed (seed )
37
+ W1 = np .random .randn (2 ,3 )
38
+ b1 = np .random .randn (2 ,1 )
39
+ W2 = np .random .randn (3 ,3 )
40
+ b2 = np .random .randn (3 ,1 )
41
+
42
+ dW1 = np .random .randn (2 ,3 )
43
+ db1 = np .random .randn (2 ,1 )
44
+ dW2 = np .random .randn (3 ,3 )
45
+ db2 = np .random .randn (3 ,1 )
46
+
47
+ return W1 , b1 , W2 , b2 , dW1 , db1 , dW2 , db2
48
+
49
+
50
+ def initialize_parameters (layer_dims ):
51
+ """
52
+ Arguments:
53
+ layer_dims -- python array (list) containing the dimensions of each layer in our network
54
+
55
+ Returns:
56
+ parameters -- python dictionary containing your parameters "W1", "b1", ..., "WL", "bL":
57
+ W1 -- weight matrix of shape (layer_dims[l], layer_dims[l-1])
58
+ b1 -- bias vector of shape (layer_dims[l], 1)
59
+ Wl -- weight matrix of shape (layer_dims[l-1], layer_dims[l])
60
+ bl -- bias vector of shape (1, layer_dims[l])
61
+
62
+ Tips:
63
+ - For example: the layer_dims for the "Planar Data classification model" would have been [2,2,1].
64
+ This means W1's shape was (2,2), b1 was (1,2), W2 was (2,1) and b2 was (1,1). Now you have to generalize it!
65
+ - In the for loop, use parameters['W' + str(l)] to access Wl, where l is the iterative integer.
66
+ """
67
+
68
+ np .random .seed (3 )
69
+ parameters = {}
70
+ L = len (layer_dims ) # number of layers in the network
71
+
72
+ for l in range (1 , L ):
73
+ parameters ['W' + str (l )] = np .random .randn (layer_dims [l ], layer_dims [l - 1 ])* np .sqrt (2 / layer_dims [l - 1 ])
74
+ parameters ['b' + str (l )] = np .zeros ((layer_dims [l ], 1 ))
75
+
76
+ assert (parameters ['W' + str (l )].shape == layer_dims [l ], layer_dims [l - 1 ])
77
+ assert (parameters ['W' + str (l )].shape == layer_dims [l ], 1 )
78
+
79
+ return parameters
80
+
81
+
82
+ def compute_cost (a3 , Y ):
83
+
84
+ """
85
+ Implement the cost function
86
+
87
+ Arguments:
88
+ a3 -- post-activation, output of forward propagation
89
+ Y -- "true" labels vector, same shape as a3
90
+
91
+ Returns:
92
+ cost - value of the cost function
93
+ """
94
+ m = Y .shape [1 ]
95
+
96
+ logprobs = np .multiply (- np .log (a3 ),Y ) + np .multiply (- np .log (1 - a3 ), 1 - Y )
97
+ cost = 1. / m * np .sum (logprobs )
98
+
99
+ return cost
100
+
101
+ def forward_propagation (X , parameters ):
102
+ """
103
+ Implements the forward propagation (and computes the loss) presented in Figure 2.
104
+
105
+ Arguments:
106
+ X -- input dataset, of shape (input size, number of examples)
107
+ parameters -- python dictionary containing your parameters "W1", "b1", "W2", "b2", "W3", "b3":
108
+ W1 -- weight matrix of shape ()
109
+ b1 -- bias vector of shape ()
110
+ W2 -- weight matrix of shape ()
111
+ b2 -- bias vector of shape ()
112
+ W3 -- weight matrix of shape ()
113
+ b3 -- bias vector of shape ()
114
+
115
+ Returns:
116
+ loss -- the loss function (vanilla logistic loss)
117
+ """
118
+
119
+ # retrieve parameters
120
+ W1 = parameters ["W1" ]
121
+ b1 = parameters ["b1" ]
122
+ W2 = parameters ["W2" ]
123
+ b2 = parameters ["b2" ]
124
+ W3 = parameters ["W3" ]
125
+ b3 = parameters ["b3" ]
126
+
127
+ # LINEAR -> RELU -> LINEAR -> RELU -> LINEAR -> SIGMOID
128
+ z1 = np .dot (W1 , X ) + b1
129
+ a1 = relu (z1 )
130
+ z2 = np .dot (W2 , a1 ) + b2
131
+ a2 = relu (z2 )
132
+ z3 = np .dot (W3 , a2 ) + b3
133
+ a3 = sigmoid (z3 )
134
+
135
+ cache = (z1 , a1 , W1 , b1 , z2 , a2 , W2 , b2 , z3 , a3 , W3 , b3 )
136
+
137
+ return a3 , cache
138
+
139
+ def backward_propagation (X , Y , cache ):
140
+ """
141
+ Implement the backward propagation presented in figure 2.
142
+
143
+ Arguments:
144
+ X -- input dataset, of shape (input size, number of examples)
145
+ Y -- true "label" vector (containing 0 if cat, 1 if non-cat)
146
+ cache -- cache output from forward_propagation()
147
+
148
+ Returns:
149
+ gradients -- A dictionary with the gradients with respect to each parameter, activation and pre-activation variables
150
+ """
151
+ m = X .shape [1 ]
152
+ (z1 , a1 , W1 , b1 , z2 , a2 , W2 , b2 , z3 , a3 , W3 , b3 ) = cache
153
+
154
+ dz3 = 1. / m * (a3 - Y )
155
+ dW3 = np .dot (dz3 , a2 .T )
156
+ db3 = np .sum (dz3 , axis = 1 , keepdims = True )
157
+
158
+ da2 = np .dot (W3 .T , dz3 )
159
+ dz2 = np .multiply (da2 , np .int64 (a2 > 0 ))
160
+ dW2 = np .dot (dz2 , a1 .T )
161
+ db2 = np .sum (dz2 , axis = 1 , keepdims = True )
162
+
163
+ da1 = np .dot (W2 .T , dz2 )
164
+ dz1 = np .multiply (da1 , np .int64 (a1 > 0 ))
165
+ dW1 = np .dot (dz1 , X .T )
166
+ db1 = np .sum (dz1 , axis = 1 , keepdims = True )
167
+
168
+ gradients = {"dz3" : dz3 , "dW3" : dW3 , "db3" : db3 ,
169
+ "da2" : da2 , "dz2" : dz2 , "dW2" : dW2 , "db2" : db2 ,
170
+ "da1" : da1 , "dz1" : dz1 , "dW1" : dW1 , "db1" : db1 }
171
+
172
+ return gradients
173
+
174
+ def predict (X , y , parameters ):
175
+ """
176
+ This function is used to predict the results of a n-layer neural network.
177
+
178
+ Arguments:
179
+ X -- data set of examples you would like to label
180
+ parameters -- parameters of the trained model
181
+
182
+ Returns:
183
+ p -- predictions for the given dataset X
184
+ """
185
+
186
+ m = X .shape [1 ]
187
+ p = np .zeros ((1 ,m ), dtype = np .int )
188
+
189
+ # Forward propagation
190
+ a3 , caches = forward_propagation (X , parameters )
191
+
192
+ # convert probas to 0/1 predictions
193
+ for i in range (0 , a3 .shape [1 ]):
194
+ if a3 [0 ,i ] > 0.5 :
195
+ p [0 ,i ] = 1
196
+ else :
197
+ p [0 ,i ] = 0
198
+
199
+ # print results
200
+
201
+ #print ("predictions: " + str(p[0,:]))
202
+ #print ("true labels: " + str(y[0,:]))
203
+ print ("Accuracy: " + str (np .mean ((p [0 ,:] == y [0 ,:]))))
204
+
205
+ return p
206
+
207
+ def load_2D_dataset ():
208
+ data = scipy .io .loadmat ('datasets/data.mat' )
209
+ train_X = data ['X' ].T
210
+ train_Y = data ['y' ].T
211
+ test_X = data ['Xval' ].T
212
+ test_Y = data ['yval' ].T
213
+
214
+ plt .scatter (train_X [0 , :], train_X [1 , :], c = train_Y , s = 40 , cmap = plt .cm .Spectral );
215
+
216
+ return train_X , train_Y , test_X , test_Y
217
+
218
+ def plot_decision_boundary (model , X , y ):
219
+ # Set min and max values and give it some padding
220
+ x_min , x_max = X [0 , :].min () - 1 , X [0 , :].max () + 1
221
+ y_min , y_max = X [1 , :].min () - 1 , X [1 , :].max () + 1
222
+ h = 0.01
223
+ # Generate a grid of points with distance h between them
224
+ xx , yy = np .meshgrid (np .arange (x_min , x_max , h ), np .arange (y_min , y_max , h ))
225
+ # Predict the function value for the whole grid
226
+ Z = model (np .c_ [xx .ravel (), yy .ravel ()])
227
+ Z = Z .reshape (xx .shape )
228
+ # Plot the contour and training examples
229
+ plt .contourf (xx , yy , Z , cmap = plt .cm .Spectral )
230
+ plt .ylabel ('x2' )
231
+ plt .xlabel ('x1' )
232
+ plt .scatter (X [0 , :], X [1 , :], c = y , cmap = plt .cm .Spectral )
233
+ plt .show ()
234
+
235
+ def predict_dec (parameters , X ):
236
+ """
237
+ Used for plotting decision boundary.
238
+
239
+ Arguments:
240
+ parameters -- python dictionary containing your parameters
241
+ X -- input data of size (m, K)
242
+
243
+ Returns
244
+ predictions -- vector of predictions of our model (red: 0 / blue: 1)
245
+ """
246
+
247
+ # Predict using forward propagation and a classification threshold of 0.5
248
+ a3 , cache = forward_propagation (X , parameters )
249
+ predictions = (a3 > 0.5 )
250
+ return predictions
251
+
252
+ def load_dataset ():
253
+ np .random .seed (3 )
254
+ train_X , train_Y = sklearn .datasets .make_moons (n_samples = 300 , noise = .2 ) #300 #0.2
255
+ # Visualize the data
256
+ plt .scatter (train_X [:, 0 ], train_X [:, 1 ], c = train_Y , s = 40 , cmap = plt .cm .Spectral );
257
+ train_X = train_X .T
258
+ train_Y = train_Y .reshape ((1 , train_Y .shape [0 ]))
259
+
260
+ return train_X , train_Y
0 commit comments