9
9
10
10
import numpy
11
11
import theano
12
+ from theano import config
12
13
import theano .tensor as tensor
13
14
from theano .sandbox .rng_mrg import MRG_RandomStreams as RandomStreams
14
15
17
18
datasets = {'imdb' : (imdb .load_data , imdb .prepare_data )}
18
19
19
20
21
+ def numpy_floatX (data ):
22
+ return numpy .asarray (data , dtype = config .floatX )
23
+
24
+
20
25
def get_minibatches_idx (n , minibatch_size , shuffle = False ):
21
26
"""
22
27
Used to shuffle the dataset at each iteration.
@@ -85,14 +90,14 @@ def init_params(options):
85
90
# embedding
86
91
randn = numpy .random .rand (options ['n_words' ],
87
92
options ['dim_proj' ])
88
- params ['Wemb' ] = (0.01 * randn ).astype ('float32' )
93
+ params ['Wemb' ] = (0.01 * randn ).astype (config . floatX )
89
94
params = get_layer (options ['encoder' ])[0 ](options ,
90
95
params ,
91
96
prefix = options ['encoder' ])
92
97
# classifier
93
98
params ['U' ] = 0.01 * numpy .random .randn (options ['dim_proj' ],
94
- options ['ydim' ]).astype ('float32' )
95
- params ['b' ] = numpy .zeros ((options ['ydim' ],)).astype ('float32' )
99
+ options ['ydim' ]).astype (config . floatX )
100
+ params ['b' ] = numpy .zeros ((options ['ydim' ],)).astype (config . floatX )
96
101
97
102
return params
98
103
@@ -122,7 +127,7 @@ def get_layer(name):
122
127
def ortho_weight (ndim ):
123
128
W = numpy .random .randn (ndim , ndim )
124
129
u , s , v = numpy .linalg .svd (W )
125
- return u .astype ('float32' )
130
+ return u .astype (config . floatX )
126
131
127
132
128
133
def param_init_lstm (options , params , prefix = 'lstm' ):
@@ -142,7 +147,7 @@ def param_init_lstm(options, params, prefix='lstm'):
142
147
ortho_weight (options ['dim_proj' ])], axis = 1 )
143
148
params [_p (prefix , 'U' )] = U
144
149
b = numpy .zeros ((4 * options ['dim_proj' ],))
145
- params [_p (prefix , 'b' )] = b .astype ('float32' )
150
+ params [_p (prefix , 'b' )] = b .astype (config . floatX )
146
151
147
152
return params
148
153
@@ -185,9 +190,11 @@ def _step(m_, x_, h_, c_):
185
190
dim_proj = options ['dim_proj' ]
186
191
rval , updates = theano .scan (_step ,
187
192
sequences = [mask , state_below ],
188
- outputs_info = [tensor .alloc (0. , n_samples ,
193
+ outputs_info = [tensor .alloc (numpy_floatX (0. ),
194
+ n_samples ,
189
195
dim_proj ),
190
- tensor .alloc (0. , n_samples ,
196
+ tensor .alloc (numpy_floatX (0. ),
197
+ n_samples ,
191
198
dim_proj )],
192
199
name = _p (prefix , '_layers' ),
193
200
n_steps = nsteps )
@@ -228,13 +235,13 @@ def sgd(lr, tparams, grads, x, mask, y, cost):
228
235
229
236
230
237
def adadelta (lr , tparams , grads , x , mask , y , cost ):
231
- zipped_grads = [theano .shared (p .get_value () * numpy . float32 (0. ),
238
+ zipped_grads = [theano .shared (p .get_value () * numpy_floatX (0. ),
232
239
name = '%s_grad' % k )
233
240
for k , p in tparams .iteritems ()]
234
- running_up2 = [theano .shared (p .get_value () * numpy . float32 (0. ),
241
+ running_up2 = [theano .shared (p .get_value () * numpy_floatX (0. ),
235
242
name = '%s_rup2' % k )
236
243
for k , p in tparams .iteritems ()]
237
- running_grads2 = [theano .shared (p .get_value () * numpy . float32 (0. ),
244
+ running_grads2 = [theano .shared (p .get_value () * numpy_floatX (0. ),
238
245
name = '%s_rgrad2' % k )
239
246
for k , p in tparams .iteritems ()]
240
247
@@ -261,13 +268,13 @@ def adadelta(lr, tparams, grads, x, mask, y, cost):
261
268
262
269
263
270
def rmsprop (lr , tparams , grads , x , mask , y , cost ):
264
- zipped_grads = [theano .shared (p .get_value () * numpy . float32 (0. ),
271
+ zipped_grads = [theano .shared (p .get_value () * numpy_floatX (0. ),
265
272
name = '%s_grad' % k )
266
273
for k , p in tparams .iteritems ()]
267
- running_grads = [theano .shared (p .get_value () * numpy . float32 (0. ),
274
+ running_grads = [theano .shared (p .get_value () * numpy_floatX (0. ),
268
275
name = '%s_rgrad' % k )
269
276
for k , p in tparams .iteritems ()]
270
- running_grads2 = [theano .shared (p .get_value () * numpy . float32 (0. ),
277
+ running_grads2 = [theano .shared (p .get_value () * numpy_floatX (0. ),
271
278
name = '%s_rgrad2' % k )
272
279
for k , p in tparams .iteritems ()]
273
280
@@ -280,7 +287,7 @@ def rmsprop(lr, tparams, grads, x, mask, y, cost):
280
287
updates = zgup + rgup + rg2up ,
281
288
name = 'rmsprop_f_grad_shared' )
282
289
283
- updir = [theano .shared (p .get_value () * numpy . float32 (0. ),
290
+ updir = [theano .shared (p .get_value () * numpy_floatX (0. ),
284
291
name = '%s_updir' % k )
285
292
for k , p in tparams .iteritems ()]
286
293
updir_new = [(ud , 0.9 * ud - 1e-4 * zg / tensor .sqrt (rg2 - rg ** 2 + 1e-4 ))
@@ -299,10 +306,10 @@ def build_model(tparams, options):
299
306
trng = RandomStreams (1234 )
300
307
301
308
# Used for dropout.
302
- use_noise = theano .shared (numpy . float32 (0. ))
309
+ use_noise = theano .shared (numpy_floatX (0. ))
303
310
304
311
x = tensor .matrix ('x' , dtype = 'int64' )
305
- mask = tensor .matrix ('mask' , dtype = 'float32' )
312
+ mask = tensor .matrix ('mask' , dtype = config . floatX )
306
313
y = tensor .vector ('y' , dtype = 'int64' )
307
314
308
315
n_timesteps = x .shape [0 ]
@@ -335,7 +342,7 @@ def pred_probs(f_pred_prob, prepare_data, data, iterator, verbose=False):
335
342
the probabilities of new examples.
336
343
"""
337
344
n_samples = len (data [0 ])
338
- probs = numpy .zeros ((n_samples , 2 )).astype ('float32' )
345
+ probs = numpy .zeros ((n_samples , 2 )).astype (config . floatX )
339
346
340
347
n_done = 0
341
348
@@ -367,7 +374,7 @@ def pred_error(f_pred, prepare_data, data, iterator, verbose=False):
367
374
preds = f_pred (x , mask )
368
375
targets = numpy .array (data [1 ])[valid_index ]
369
376
valid_err += (preds == targets ).sum ()
370
- valid_err = 1. - numpy . float32 (valid_err ) / len (data [0 ])
377
+ valid_err = 1. - numpy_floatX (valid_err ) / len (data [0 ])
371
378
372
379
return valid_err
373
380
@@ -395,7 +402,7 @@ def train_lstm(
395
402
use_dropout = True , # if False slightly faster, but worst test error
396
403
# This frequently need a bigger model.
397
404
reload_model = "" , # Path to a saved model we want to start from.
398
- test_size = - 1 , # If >0, we will trunc the test set to this number of example.
405
+ test_size = - 1 , # If >0, we keep only this number of test example.
399
406
):
400
407
401
408
# Model options
@@ -432,7 +439,7 @@ def train_lstm(
432
439
y , f_pred_prob , f_pred , cost ) = build_model (tparams , model_options )
433
440
434
441
if decay_c > 0. :
435
- decay_c = theano .shared (numpy . float32 (decay_c ), name = 'decay_c' )
442
+ decay_c = theano .shared (numpy_floatX (decay_c ), name = 'decay_c' )
436
443
weight_decay = 0.
437
444
weight_decay += (tparams ['U' ] ** 2 ).sum ()
438
445
weight_decay *= decay_c
@@ -571,12 +578,6 @@ def train_lstm(
571
578
572
579
573
580
if __name__ == '__main__' :
574
-
575
- # We must have floatX=float32 for this tutorial to work correctly.
576
- theano .config .floatX = "float32"
577
- # The next line is the new Theano default. This is a speed up.
578
- theano .config .scan .allow_gc = False
579
-
580
581
# See function train for all possible parameter and there definition.
581
582
train_lstm (
582
583
#reload_model="lstm_model.npz",
0 commit comments