add snippets

Grégoire · Grégoire · commit 599e4580ef71 · 2014-11-10T19:14:28.000-05:00
diff --git a/code/rnnslu.py b/code/rnnslu.py
@@ -30,11 +30,14 @@ def shuffle(lol, seed):
         random.seed(seed)
         random.shuffle(l)
 
-
+# start-snippet-1
 def contextwin(l, win):
     '''
     win :: int corresponding to the size of the window
     given a list of indexes composing a sentence
+
+    l :: array containing the word indexes
+
     it will return a list of list of indexes corresponding
     to context windows surrounding each word in the sentence
     '''
@@ -47,7 +50,7 @@ def contextwin(l, win):
 
     assert len(out) == len(l)
     return out
-
+# end-snippet-1
 
 # data loading functions
 def atisfold(fold):
@@ -122,7 +125,7 @@ def get_perf(filename):
 
     return {'p': precision, 'r': recall, 'f1': f1score}
 
-
+# start-snippet-2
 class RNNSLU(object):
     ''' elman neural net model '''
     def __init__(self, nh, nc, ne, de, cs):
@@ -164,11 +167,14 @@ def __init__(self, nh, nc, ne, de, cs):
         # bundle
         self.params = [self.emb, self.wx, self.wh, self.w,
                        self.bh, self.b, self.h0]
+        # end-snippet-2
         # as many columns as context window size
         # as many lines as words in the sentence
+        # start-snippet-3
         idxs = T.imatrix()
         x = self.emb[idxs].reshape((idxs.shape[0], de*cs))
         y_sentence = T.ivector('y_sentence')  # labels
+        # end-snippet-3 start-snippet-4
 
         def recurrence(x_t, h_tm1):
             h_t = T.nnet.sigmoid(T.dot(x_t, self.wx)
@@ -183,28 +189,34 @@ def recurrence(x_t, h_tm1):
 
         p_y_given_x_sentence = s[:, 0, :]
         y_pred = T.argmax(p_y_given_x_sentence, axis=1)
+        # end-snippet-4
 
         # cost and gradients and learning rate
+        # start-snippet-5
         lr = T.scalar('lr')
 
         sentence_nll = -T.mean(T.log(p_y_given_x_sentence)
                                [T.arange(x.shape[0]), y_sentence])
         sentence_gradients = T.grad(sentence_nll, self.params)
         sentence_updates = OrderedDict((p, p - lr*g)
+        # end-snippet-5
                                        for p, g in
                                        zip(self.params, sentence_gradients))
 
         # theano functions to compile
+        # start-snippet-6
         self.classify = theano.function(inputs=[idxs], outputs=y_pred)
         self.sentence_train = theano.function(inputs=[idxs, y_sentence, lr],
                                               outputs=sentence_nll,
                                               updates=sentence_updates)
+        # end-snippet-6 start-snippet-7
         self.normalize = theano.function(inputs=[],
                                          updates={self.emb:
                                                   self.emb /
                                                   T.sqrt((self.emb**2)
                                                   .sum(axis=1))
                                                   .dimshuffle(0, 'x')})
+        # end-snippet-7
 
     def train(self, x, y, window_size, learning_rate):
 
diff --git a/doc/rnnslu.txt b/doc/rnnslu.txt
@@ -128,26 +128,9 @@ Given a sentence i.e. an array of indexes, and a window size i.e. 1,3,5,..., we
 need to convert each word in the sentence to a context window surrounding this
 particular word. In details, we have:: 
 
-    def contextwin(l, win):
-        '''
-        win :: int corresponding to the size of the window
-        given a list of indexes composing a sentence
-
-        l :: array containing the word indexes
-
-        it will return a list of list of indexes corresponding
-        to context windows surrounding each word in the sentence
-        '''
-        
-        assert (win % 2) == 1
-        assert win >=1
-        l = list(l)
-
-        lpadded = win//2 * [-1] + l + win//2 * [-1]
-        out = [ lpadded[i:i+win] for i in range(len(l)) ]
-
-        assert len(out) == len(l)
-        return out
+.. literalinclude:: ../code/rnnslu.py
+  :start-after: start-snippet-1
+  :end-before: end-snippet-1
 
 The index ``-1`` corresponds to the ``PADDING`` index we insert at the
 beginning/end of the sentence.
@@ -254,71 +237,39 @@ The **hyperparameters** define the whole architecture:
 
 It gives the following code::
 
-    class RNNSLU(object):
-        
-        def __init__(self, nh, nc, ne, de, cs):
-            '''
-            nh :: dimension of the hidden layer
-            nc :: number of classes
-            ne :: number of word embeddings in the vocabulary
-            de :: dimension of the word embeddings
-            cs :: word window context size 
-            '''
-            self.emb = theano.shared(name='embeddings', value=0.2 * numpy.random.uniform(-1.0, 1.0,
-                       (ne+1, de)).astype(theano.config.floatX)) # add one for PADDING at the end
-            self.Wx  = theano.shared(name='Wx', value=0.2 * numpy.random.uniform(-1.0, 1.0,
-                       (de * cs, nh)).astype(theano.config.floatX))
-            self.Wh  = theano.shared(name='Wh', value=0.2 * numpy.random.uniform(-1.0, 1.0,
-                       (nh, nh)).astype(theano.config.floatX))
-            self.W   = theano.shared(name='W', value=0.2 * numpy.random.uniform(-1.0, 1.0,
-                       (nh, nc)).astype(theano.config.floatX))
-            self.bh  = theano.shared(name='bh', value=numpy.zeros(nh, dtype=theano.config.floatX))
-            self.b   = theano.shared(name='b', value=numpy.zeros(nc, dtype=theano.config.floatX))
-            self.h0  = theano.shared(name='h0', value=numpy.zeros(nh, dtype=theano.config.floatX))
-
-            # bundle
-            self.params = [self.emb, self.Wx, self.Wh, self.W, self.bh, self.b, self.h0]
+.. literalinclude:: ../code/rnnslu.py
+  :start-after: start-snippet-2
+  :end-before: end-snippet-2
 
 Then we integrate the way to build the input from the embedding matrix::
 
-            idxs = T.imatrix() # as many columns as context window size/lines as words in the sentence
-            x    = self.emb[idxs].reshape((idxs.shape[0], de*cs))
-            y    = T.ivector('y') # label
+.. literalinclude:: ../code/rnnslu.py
+  :start-after: start-snippet-3
+  :end-before: end-snippet-3
 
 We use the scan operator to construct the recursion, works like a charm::
 
-            def recurrence(x_t, h_tm1):
-                h_t = T.nnet.sigmoid(T.dot(x_t, self.Wx) + T.dot(h_tm1, self.Wh) + self.bh)
-                s_t = T.nnet.softmax(T.dot(h_t, self.W) + self.b)
-                return [h_t, s_t]
-
-            [h, s], _ = theano.scan(fn=recurrence,
-                sequences=x, outputs_info=[self.h0, None], 
-                n_steps=x.shape[0])
-
-            p_y_given_x_sentence = s[:, 0, :]
-            y_pred = T.argmax(p_y_given_x_sentence, axis=1)
+.. literalinclude:: ../code/rnnslu.py
+  :start-after: start-snippet-4
+  :end-before: end-snippet-4
 
 Theano will then compute all the gradients automatically to maximize the log-likelihood::
 
-            lr = T.scalar('lr')
-            nll = -T.mean(T.log(p_y_given_x_sentence)[T.arange(x.shape[0]),y])
-            gradients = T.grad( nll, self.params )
-            updates = OrderedDict((p, p - lr*g) for p, g in zip(self.params, gradients))
-        
-Next compile those functions::
+.. literalinclude:: ../code/rnnslu.py
+  :start-after: start-snippet-5
+  :end-before: end-snippet-5
 
-            self.classify = theano.function(inputs=[idxs], outputs=y_pred)
+Next compile those functions::
 
-            self.train = theano.function(inputs=[idxs, y, lr],
-                                         outputs=nll,
-                                         updates=updates)
+.. literalinclude:: ../code/rnnslu.py
+  :start-after: start-snippet-6
+  :end-before: end-snippet-6
 
 We keep the word embeddings on the unit sphere by normalizing them after each update::
 
-            self.normalize = theano.function(inputs=[],
-                             updates = {self.emb:
-                             self.emb / T.sqrt((self.emb**2).sum(axis=1)).dimshuffle(0, 'x')})
+.. literalinclude:: ../code/rnnslu.py
+  :start-after: start-snippet-7
+  :end-before: end-snippet-7
 
 And that's it!
 
@@ -373,7 +324,7 @@ The following intervals can give you some starting point:
 Running the Code
 ++++++++++++++++
 
-The user can then run the code by calling:
+After downloading the data using `download.sh`, the user can then run the code by calling:
 
 .. code-block:: bash