@@ -128,26 +128,9 @@ Given a sentence i.e. an array of indexes, and a window size i.e. 1,3,5,..., we
128
128
need to convert each word in the sentence to a context window surrounding this
129
129
particular word. In details, we have::
130
130
131
- def contextwin(l, win):
132
- '''
133
- win :: int corresponding to the size of the window
134
- given a list of indexes composing a sentence
135
-
136
- l :: array containing the word indexes
137
-
138
- it will return a list of list of indexes corresponding
139
- to context windows surrounding each word in the sentence
140
- '''
141
-
142
- assert (win % 2) == 1
143
- assert win >=1
144
- l = list(l)
145
-
146
- lpadded = win//2 * [-1] + l + win//2 * [-1]
147
- out = [ lpadded[i:i+win] for i in range(len(l)) ]
148
-
149
- assert len(out) == len(l)
150
- return out
131
+ .. literalinclude:: ../code/rnnslu.py
132
+ :start-after: start-snippet-1
133
+ :end-before: end-snippet-1
151
134
152
135
The index ``-1`` corresponds to the ``PADDING`` index we insert at the
153
136
beginning/end of the sentence.
@@ -254,71 +237,39 @@ The **hyperparameters** define the whole architecture:
254
237
255
238
It gives the following code::
256
239
257
- class RNNSLU(object):
258
-
259
- def __init__(self, nh, nc, ne, de, cs):
260
- '''
261
- nh :: dimension of the hidden layer
262
- nc :: number of classes
263
- ne :: number of word embeddings in the vocabulary
264
- de :: dimension of the word embeddings
265
- cs :: word window context size
266
- '''
267
- self.emb = theano.shared(name='embeddings', value=0.2 * numpy.random.uniform(-1.0, 1.0,
268
- (ne+1, de)).astype(theano.config.floatX)) # add one for PADDING at the end
269
- self.Wx = theano.shared(name='Wx', value=0.2 * numpy.random.uniform(-1.0, 1.0,
270
- (de * cs, nh)).astype(theano.config.floatX))
271
- self.Wh = theano.shared(name='Wh', value=0.2 * numpy.random.uniform(-1.0, 1.0,
272
- (nh, nh)).astype(theano.config.floatX))
273
- self.W = theano.shared(name='W', value=0.2 * numpy.random.uniform(-1.0, 1.0,
274
- (nh, nc)).astype(theano.config.floatX))
275
- self.bh = theano.shared(name='bh', value=numpy.zeros(nh, dtype=theano.config.floatX))
276
- self.b = theano.shared(name='b', value=numpy.zeros(nc, dtype=theano.config.floatX))
277
- self.h0 = theano.shared(name='h0', value=numpy.zeros(nh, dtype=theano.config.floatX))
278
-
279
- # bundle
280
- self.params = [self.emb, self.Wx, self.Wh, self.W, self.bh, self.b, self.h0]
240
+ .. literalinclude:: ../code/rnnslu.py
241
+ :start-after: start-snippet-2
242
+ :end-before: end-snippet-2
281
243
282
244
Then we integrate the way to build the input from the embedding matrix::
283
245
284
- idxs = T.imatrix() # as many columns as context window size/lines as words in the sentence
285
- x = self.emb[idxs].reshape((idxs.shape[0], de*cs))
286
- y = T.ivector('y') # label
246
+ .. literalinclude:: ../code/rnnslu.py
247
+ :start-after: start-snippet-3
248
+ :end-before: end-snippet-3
287
249
288
250
We use the scan operator to construct the recursion, works like a charm::
289
251
290
- def recurrence(x_t, h_tm1):
291
- h_t = T.nnet.sigmoid(T.dot(x_t, self.Wx) + T.dot(h_tm1, self.Wh) + self.bh)
292
- s_t = T.nnet.softmax(T.dot(h_t, self.W) + self.b)
293
- return [h_t, s_t]
294
-
295
- [h, s], _ = theano.scan(fn=recurrence,
296
- sequences=x, outputs_info=[self.h0, None],
297
- n_steps=x.shape[0])
298
-
299
- p_y_given_x_sentence = s[:, 0, :]
300
- y_pred = T.argmax(p_y_given_x_sentence, axis=1)
252
+ .. literalinclude:: ../code/rnnslu.py
253
+ :start-after: start-snippet-4
254
+ :end-before: end-snippet-4
301
255
302
256
Theano will then compute all the gradients automatically to maximize the log-likelihood::
303
257
304
- lr = T.scalar('lr')
305
- nll = -T.mean(T.log(p_y_given_x_sentence)[T.arange(x.shape[0]),y])
306
- gradients = T.grad( nll, self.params )
307
- updates = OrderedDict((p, p - lr*g) for p, g in zip(self.params, gradients))
308
-
309
- Next compile those functions::
258
+ .. literalinclude:: ../code/rnnslu.py
259
+ :start-after: start-snippet-5
260
+ :end-before: end-snippet-5
310
261
311
- self.classify = theano.function(inputs=[idxs], outputs=y_pred)
262
+ Next compile those functions::
312
263
313
- self.train = theano.function(inputs=[idxs, y, lr],
314
- outputs=nll,
315
- updates=updates)
264
+ .. literalinclude:: ../code/rnnslu.py
265
+ :start-after: start-snippet-6
266
+ :end-before: end-snippet-6
316
267
317
268
We keep the word embeddings on the unit sphere by normalizing them after each update::
318
269
319
- self.normalize = theano.function(inputs=[],
320
- updates = {self.emb:
321
- self.emb / T.sqrt((self.emb**2).sum(axis=1)).dimshuffle(0, 'x')})
270
+ .. literalinclude:: ../code/rnnslu.py
271
+ :start-after: start-snippet-7
272
+ :end-before: end-snippet-7
322
273
323
274
And that's it!
324
275
@@ -373,7 +324,7 @@ The following intervals can give you some starting point:
373
324
Running the Code
374
325
++++++++++++++++
375
326
376
- The user can then run the code by calling:
327
+ After downloading the data using `download.sh`, the user can then run the code by calling:
377
328
378
329
.. code-block:: bash
379
330
0 commit comments