Skip to content

Commit 599e458

Browse files
author
Grégoire
committed
add snippets
1 parent 723dedb commit 599e458

File tree

2 files changed

+38
-75
lines changed

2 files changed

+38
-75
lines changed

code/rnnslu.py

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,11 +30,14 @@ def shuffle(lol, seed):
3030
random.seed(seed)
3131
random.shuffle(l)
3232

33-
33+
# start-snippet-1
3434
def contextwin(l, win):
3535
'''
3636
win :: int corresponding to the size of the window
3737
given a list of indexes composing a sentence
38+
39+
l :: array containing the word indexes
40+
3841
it will return a list of list of indexes corresponding
3942
to context windows surrounding each word in the sentence
4043
'''
@@ -47,7 +50,7 @@ def contextwin(l, win):
4750

4851
assert len(out) == len(l)
4952
return out
50-
53+
# end-snippet-1
5154

5255
# data loading functions
5356
def atisfold(fold):
@@ -122,7 +125,7 @@ def get_perf(filename):
122125

123126
return {'p': precision, 'r': recall, 'f1': f1score}
124127

125-
128+
# start-snippet-2
126129
class RNNSLU(object):
127130
''' elman neural net model '''
128131
def __init__(self, nh, nc, ne, de, cs):
@@ -164,11 +167,14 @@ def __init__(self, nh, nc, ne, de, cs):
164167
# bundle
165168
self.params = [self.emb, self.wx, self.wh, self.w,
166169
self.bh, self.b, self.h0]
170+
# end-snippet-2
167171
# as many columns as context window size
168172
# as many lines as words in the sentence
173+
# start-snippet-3
169174
idxs = T.imatrix()
170175
x = self.emb[idxs].reshape((idxs.shape[0], de*cs))
171176
y_sentence = T.ivector('y_sentence') # labels
177+
# end-snippet-3 start-snippet-4
172178

173179
def recurrence(x_t, h_tm1):
174180
h_t = T.nnet.sigmoid(T.dot(x_t, self.wx)
@@ -183,28 +189,34 @@ def recurrence(x_t, h_tm1):
183189

184190
p_y_given_x_sentence = s[:, 0, :]
185191
y_pred = T.argmax(p_y_given_x_sentence, axis=1)
192+
# end-snippet-4
186193

187194
# cost and gradients and learning rate
195+
# start-snippet-5
188196
lr = T.scalar('lr')
189197

190198
sentence_nll = -T.mean(T.log(p_y_given_x_sentence)
191199
[T.arange(x.shape[0]), y_sentence])
192200
sentence_gradients = T.grad(sentence_nll, self.params)
193201
sentence_updates = OrderedDict((p, p - lr*g)
202+
# end-snippet-5
194203
for p, g in
195204
zip(self.params, sentence_gradients))
196205

197206
# theano functions to compile
207+
# start-snippet-6
198208
self.classify = theano.function(inputs=[idxs], outputs=y_pred)
199209
self.sentence_train = theano.function(inputs=[idxs, y_sentence, lr],
200210
outputs=sentence_nll,
201211
updates=sentence_updates)
212+
# end-snippet-6 start-snippet-7
202213
self.normalize = theano.function(inputs=[],
203214
updates={self.emb:
204215
self.emb /
205216
T.sqrt((self.emb**2)
206217
.sum(axis=1))
207218
.dimshuffle(0, 'x')})
219+
# end-snippet-7
208220

209221
def train(self, x, y, window_size, learning_rate):
210222

doc/rnnslu.txt

Lines changed: 23 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -128,26 +128,9 @@ Given a sentence i.e. an array of indexes, and a window size i.e. 1,3,5,..., we
128128
need to convert each word in the sentence to a context window surrounding this
129129
particular word. In details, we have::
130130

131-
def contextwin(l, win):
132-
'''
133-
win :: int corresponding to the size of the window
134-
given a list of indexes composing a sentence
135-
136-
l :: array containing the word indexes
137-
138-
it will return a list of list of indexes corresponding
139-
to context windows surrounding each word in the sentence
140-
'''
141-
142-
assert (win % 2) == 1
143-
assert win >=1
144-
l = list(l)
145-
146-
lpadded = win//2 * [-1] + l + win//2 * [-1]
147-
out = [ lpadded[i:i+win] for i in range(len(l)) ]
148-
149-
assert len(out) == len(l)
150-
return out
131+
.. literalinclude:: ../code/rnnslu.py
132+
:start-after: start-snippet-1
133+
:end-before: end-snippet-1
151134

152135
The index ``-1`` corresponds to the ``PADDING`` index we insert at the
153136
beginning/end of the sentence.
@@ -254,71 +237,39 @@ The **hyperparameters** define the whole architecture:
254237

255238
It gives the following code::
256239

257-
class RNNSLU(object):
258-
259-
def __init__(self, nh, nc, ne, de, cs):
260-
'''
261-
nh :: dimension of the hidden layer
262-
nc :: number of classes
263-
ne :: number of word embeddings in the vocabulary
264-
de :: dimension of the word embeddings
265-
cs :: word window context size
266-
'''
267-
self.emb = theano.shared(name='embeddings', value=0.2 * numpy.random.uniform(-1.0, 1.0,
268-
(ne+1, de)).astype(theano.config.floatX)) # add one for PADDING at the end
269-
self.Wx = theano.shared(name='Wx', value=0.2 * numpy.random.uniform(-1.0, 1.0,
270-
(de * cs, nh)).astype(theano.config.floatX))
271-
self.Wh = theano.shared(name='Wh', value=0.2 * numpy.random.uniform(-1.0, 1.0,
272-
(nh, nh)).astype(theano.config.floatX))
273-
self.W = theano.shared(name='W', value=0.2 * numpy.random.uniform(-1.0, 1.0,
274-
(nh, nc)).astype(theano.config.floatX))
275-
self.bh = theano.shared(name='bh', value=numpy.zeros(nh, dtype=theano.config.floatX))
276-
self.b = theano.shared(name='b', value=numpy.zeros(nc, dtype=theano.config.floatX))
277-
self.h0 = theano.shared(name='h0', value=numpy.zeros(nh, dtype=theano.config.floatX))
278-
279-
# bundle
280-
self.params = [self.emb, self.Wx, self.Wh, self.W, self.bh, self.b, self.h0]
240+
.. literalinclude:: ../code/rnnslu.py
241+
:start-after: start-snippet-2
242+
:end-before: end-snippet-2
281243

282244
Then we integrate the way to build the input from the embedding matrix::
283245

284-
idxs = T.imatrix() # as many columns as context window size/lines as words in the sentence
285-
x = self.emb[idxs].reshape((idxs.shape[0], de*cs))
286-
y = T.ivector('y') # label
246+
.. literalinclude:: ../code/rnnslu.py
247+
:start-after: start-snippet-3
248+
:end-before: end-snippet-3
287249

288250
We use the scan operator to construct the recursion, works like a charm::
289251

290-
def recurrence(x_t, h_tm1):
291-
h_t = T.nnet.sigmoid(T.dot(x_t, self.Wx) + T.dot(h_tm1, self.Wh) + self.bh)
292-
s_t = T.nnet.softmax(T.dot(h_t, self.W) + self.b)
293-
return [h_t, s_t]
294-
295-
[h, s], _ = theano.scan(fn=recurrence,
296-
sequences=x, outputs_info=[self.h0, None],
297-
n_steps=x.shape[0])
298-
299-
p_y_given_x_sentence = s[:, 0, :]
300-
y_pred = T.argmax(p_y_given_x_sentence, axis=1)
252+
.. literalinclude:: ../code/rnnslu.py
253+
:start-after: start-snippet-4
254+
:end-before: end-snippet-4
301255

302256
Theano will then compute all the gradients automatically to maximize the log-likelihood::
303257

304-
lr = T.scalar('lr')
305-
nll = -T.mean(T.log(p_y_given_x_sentence)[T.arange(x.shape[0]),y])
306-
gradients = T.grad( nll, self.params )
307-
updates = OrderedDict((p, p - lr*g) for p, g in zip(self.params, gradients))
308-
309-
Next compile those functions::
258+
.. literalinclude:: ../code/rnnslu.py
259+
:start-after: start-snippet-5
260+
:end-before: end-snippet-5
310261

311-
self.classify = theano.function(inputs=[idxs], outputs=y_pred)
262+
Next compile those functions::
312263

313-
self.train = theano.function(inputs=[idxs, y, lr],
314-
outputs=nll,
315-
updates=updates)
264+
.. literalinclude:: ../code/rnnslu.py
265+
:start-after: start-snippet-6
266+
:end-before: end-snippet-6
316267

317268
We keep the word embeddings on the unit sphere by normalizing them after each update::
318269

319-
self.normalize = theano.function(inputs=[],
320-
updates = {self.emb:
321-
self.emb / T.sqrt((self.emb**2).sum(axis=1)).dimshuffle(0, 'x')})
270+
.. literalinclude:: ../code/rnnslu.py
271+
:start-after: start-snippet-7
272+
:end-before: end-snippet-7
322273

323274
And that's it!
324275

@@ -373,7 +324,7 @@ The following intervals can give you some starting point:
373324
Running the Code
374325
++++++++++++++++
375326

376-
The user can then run the code by calling:
327+
After downloading the data using `download.sh`, the user can then run the code by calling:
377328

378329
.. code-block:: bash
379330

0 commit comments

Comments
 (0)