Skip to content

Commit ec112fa

Browse files
committed
use floatX for lstm
1 parent 26914e4 commit ec112fa

File tree

2 files changed

+29
-27
lines changed

2 files changed

+29
-27
lines changed

code/imdb.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import os
44

55
import numpy
6+
import theano
67

78

89
def prepare_data(seqs, labels, maxlen=None):
@@ -39,7 +40,7 @@ def prepare_data(seqs, labels, maxlen=None):
3940
maxlen = numpy.max(lengths)
4041

4142
x = numpy.zeros((maxlen, n_samples)).astype('int64')
42-
x_mask = numpy.zeros((maxlen, n_samples)).astype('float32')
43+
x_mask = numpy.zeros((maxlen, n_samples)).astype(theano.config.floatX)
4344
for idx, s in enumerate(seqs):
4445
x[:lengths[idx], idx] = s
4546
x_mask[:lengths[idx], idx] = 1.

code/lstm.py

Lines changed: 27 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99

1010
import numpy
1111
import theano
12+
from theano import config
1213
import theano.tensor as tensor
1314
from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams
1415

@@ -17,6 +18,10 @@
1718
datasets = {'imdb': (imdb.load_data, imdb.prepare_data)}
1819

1920

21+
def numpy_floatX(data):
22+
return numpy.asarray(data, dtype=config.floatX)
23+
24+
2025
def get_minibatches_idx(n, minibatch_size, shuffle=False):
2126
"""
2227
Used to shuffle the dataset at each iteration.
@@ -85,14 +90,14 @@ def init_params(options):
8590
# embedding
8691
randn = numpy.random.rand(options['n_words'],
8792
options['dim_proj'])
88-
params['Wemb'] = (0.01 * randn).astype('float32')
93+
params['Wemb'] = (0.01 * randn).astype(config.floatX)
8994
params = get_layer(options['encoder'])[0](options,
9095
params,
9196
prefix=options['encoder'])
9297
# classifier
9398
params['U'] = 0.01 * numpy.random.randn(options['dim_proj'],
94-
options['ydim']).astype('float32')
95-
params['b'] = numpy.zeros((options['ydim'],)).astype('float32')
99+
options['ydim']).astype(config.floatX)
100+
params['b'] = numpy.zeros((options['ydim'],)).astype(config.floatX)
96101

97102
return params
98103

@@ -122,7 +127,7 @@ def get_layer(name):
122127
def ortho_weight(ndim):
123128
W = numpy.random.randn(ndim, ndim)
124129
u, s, v = numpy.linalg.svd(W)
125-
return u.astype('float32')
130+
return u.astype(config.floatX)
126131

127132

128133
def param_init_lstm(options, params, prefix='lstm'):
@@ -142,7 +147,7 @@ def param_init_lstm(options, params, prefix='lstm'):
142147
ortho_weight(options['dim_proj'])], axis=1)
143148
params[_p(prefix, 'U')] = U
144149
b = numpy.zeros((4 * options['dim_proj'],))
145-
params[_p(prefix, 'b')] = b.astype('float32')
150+
params[_p(prefix, 'b')] = b.astype(config.floatX)
146151

147152
return params
148153

@@ -185,9 +190,11 @@ def _step(m_, x_, h_, c_):
185190
dim_proj = options['dim_proj']
186191
rval, updates = theano.scan(_step,
187192
sequences=[mask, state_below],
188-
outputs_info=[tensor.alloc(0., n_samples,
193+
outputs_info=[tensor.alloc(numpy_floatX(0.),
194+
n_samples,
189195
dim_proj),
190-
tensor.alloc(0., n_samples,
196+
tensor.alloc(numpy_floatX(0.),
197+
n_samples,
191198
dim_proj)],
192199
name=_p(prefix, '_layers'),
193200
n_steps=nsteps)
@@ -228,13 +235,13 @@ def sgd(lr, tparams, grads, x, mask, y, cost):
228235

229236

230237
def adadelta(lr, tparams, grads, x, mask, y, cost):
231-
zipped_grads = [theano.shared(p.get_value() * numpy.float32(0.),
238+
zipped_grads = [theano.shared(p.get_value() * numpy_floatX(0.),
232239
name='%s_grad' % k)
233240
for k, p in tparams.iteritems()]
234-
running_up2 = [theano.shared(p.get_value() * numpy.float32(0.),
241+
running_up2 = [theano.shared(p.get_value() * numpy_floatX(0.),
235242
name='%s_rup2' % k)
236243
for k, p in tparams.iteritems()]
237-
running_grads2 = [theano.shared(p.get_value() * numpy.float32(0.),
244+
running_grads2 = [theano.shared(p.get_value() * numpy_floatX(0.),
238245
name='%s_rgrad2' % k)
239246
for k, p in tparams.iteritems()]
240247

@@ -261,13 +268,13 @@ def adadelta(lr, tparams, grads, x, mask, y, cost):
261268

262269

263270
def rmsprop(lr, tparams, grads, x, mask, y, cost):
264-
zipped_grads = [theano.shared(p.get_value() * numpy.float32(0.),
271+
zipped_grads = [theano.shared(p.get_value() * numpy_floatX(0.),
265272
name='%s_grad' % k)
266273
for k, p in tparams.iteritems()]
267-
running_grads = [theano.shared(p.get_value() * numpy.float32(0.),
274+
running_grads = [theano.shared(p.get_value() * numpy_floatX(0.),
268275
name='%s_rgrad' % k)
269276
for k, p in tparams.iteritems()]
270-
running_grads2 = [theano.shared(p.get_value() * numpy.float32(0.),
277+
running_grads2 = [theano.shared(p.get_value() * numpy_floatX(0.),
271278
name='%s_rgrad2' % k)
272279
for k, p in tparams.iteritems()]
273280

@@ -280,7 +287,7 @@ def rmsprop(lr, tparams, grads, x, mask, y, cost):
280287
updates=zgup + rgup + rg2up,
281288
name='rmsprop_f_grad_shared')
282289

283-
updir = [theano.shared(p.get_value() * numpy.float32(0.),
290+
updir = [theano.shared(p.get_value() * numpy_floatX(0.),
284291
name='%s_updir' % k)
285292
for k, p in tparams.iteritems()]
286293
updir_new = [(ud, 0.9 * ud - 1e-4 * zg / tensor.sqrt(rg2 - rg ** 2 + 1e-4))
@@ -299,10 +306,10 @@ def build_model(tparams, options):
299306
trng = RandomStreams(1234)
300307

301308
# Used for dropout.
302-
use_noise = theano.shared(numpy.float32(0.))
309+
use_noise = theano.shared(numpy_floatX(0.))
303310

304311
x = tensor.matrix('x', dtype='int64')
305-
mask = tensor.matrix('mask', dtype='float32')
312+
mask = tensor.matrix('mask', dtype=config.floatX)
306313
y = tensor.vector('y', dtype='int64')
307314

308315
n_timesteps = x.shape[0]
@@ -335,7 +342,7 @@ def pred_probs(f_pred_prob, prepare_data, data, iterator, verbose=False):
335342
the probabilities of new examples.
336343
"""
337344
n_samples = len(data[0])
338-
probs = numpy.zeros((n_samples, 2)).astype('float32')
345+
probs = numpy.zeros((n_samples, 2)).astype(config.floatX)
339346

340347
n_done = 0
341348

@@ -367,7 +374,7 @@ def pred_error(f_pred, prepare_data, data, iterator, verbose=False):
367374
preds = f_pred(x, mask)
368375
targets = numpy.array(data[1])[valid_index]
369376
valid_err += (preds == targets).sum()
370-
valid_err = 1. - numpy.float32(valid_err) / len(data[0])
377+
valid_err = 1. - numpy_floatX(valid_err) / len(data[0])
371378

372379
return valid_err
373380

@@ -395,7 +402,7 @@ def train_lstm(
395402
use_dropout=True, # if False slightly faster, but worst test error
396403
# This frequently need a bigger model.
397404
reload_model="", # Path to a saved model we want to start from.
398-
test_size=-1, # If >0, we will trunc the test set to this number of example.
405+
test_size=-1, # If >0, we keep only this number of test example.
399406
):
400407

401408
# Model options
@@ -432,7 +439,7 @@ def train_lstm(
432439
y, f_pred_prob, f_pred, cost) = build_model(tparams, model_options)
433440

434441
if decay_c > 0.:
435-
decay_c = theano.shared(numpy.float32(decay_c), name='decay_c')
442+
decay_c = theano.shared(numpy_floatX(decay_c), name='decay_c')
436443
weight_decay = 0.
437444
weight_decay += (tparams['U'] ** 2).sum()
438445
weight_decay *= decay_c
@@ -571,12 +578,6 @@ def train_lstm(
571578

572579

573580
if __name__ == '__main__':
574-
575-
# We must have floatX=float32 for this tutorial to work correctly.
576-
theano.config.floatX = "float32"
577-
# The next line is the new Theano default. This is a speed up.
578-
theano.config.scan.allow_gc = False
579-
580581
# See function train for all possible parameter and there definition.
581582
train_lstm(
582583
#reload_model="lstm_model.npz",

0 commit comments

Comments
 (0)