Skip to content

Commit 3e619f1

Browse files
committed
standard coding style and dataset location
1 parent 16928fb commit 3e619f1

File tree

3 files changed

+363
-297
lines changed

3 files changed

+363
-297
lines changed

code/rnnrbm.py

Lines changed: 183 additions & 150 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,9 @@
33
# RNN-RBM deep learning tutorial
44
# More information at http://deeplearning.net/tutorial/rnnrbm.html
55

6+
import glob
67
import numpy
78
import pylab
8-
import glob
99
import sys
1010

1111
from midi.utils import midiread, midiwrite
@@ -19,7 +19,7 @@
1919

2020

2121
def build_rbm(v, W, bv, bh, k):
22-
'''Construct a k-step Gibbs chain starting at v with RBM parameters W, bv, bh.
22+
'''Construct a k-step Gibbs chain starting at v for an RBM.
2323
2424
v : Theano vector or matrix
2525
If a matrix, multiple chains will be run in parallel (batch).
@@ -45,39 +45,44 @@ def build_rbm(v, W, bv, bh, k):
4545
updates: dictionary of Theano variable -> Theano variable
4646
The `updates` object returned by scan.'''
4747

48-
def gibbs_step(v):
49-
mean_h = T.nnet.sigmoid(T.dot(v, W) + bh)
50-
h = rng.binomial(size=mean_h.shape, n=1, p=mean_h, dtype=theano.config.floatX)
51-
mean_v = T.nnet.sigmoid(T.dot(h, W.T) + bv)
52-
v = rng.binomial(size=mean_v.shape, n=1, p=mean_v, dtype=theano.config.floatX)
53-
return mean_v, v
48+
def gibbs_step(v):
49+
mean_h = T.nnet.sigmoid(T.dot(v, W) + bh)
50+
h = rng.binomial(size=mean_h.shape, n=1, p=mean_h,
51+
dtype=theano.config.floatX)
52+
mean_v = T.nnet.sigmoid(T.dot(h, W.T) + bv)
53+
v = rng.binomial(size=mean_v.shape, n=1, p=mean_v,
54+
dtype=theano.config.floatX)
55+
return mean_v, v
56+
57+
chain, updates = theano.scan(lambda v: gibbs_step(v)[1], outputs_info=[v],
58+
n_steps=k)
59+
v_sample = chain[-1]
5460

55-
chain, updates = theano.scan(lambda v: gibbs_step(v)[1], outputs_info=[v], n_steps=k)
56-
v_sample = chain[-1]
57-
58-
mean_v = gibbs_step(v_sample)[0]
59-
monitor = T.xlogx.xlogy0(v, mean_v) + T.xlogx.xlogy0(1-v, 1-mean_v)
60-
monitor = monitor.sum() / v.shape[0]
61+
mean_v = gibbs_step(v_sample)[0]
62+
monitor = T.xlogx.xlogy0(v, mean_v) + T.xlogx.xlogy0(1 - v, 1 - mean_v)
63+
monitor = monitor.sum() / v.shape[0]
6164

62-
free_energy = lambda v: -(v * bv).sum() - T.log(1 + T.exp(T.dot(v, W) + bh)).sum()
63-
cost = (free_energy(v) - free_energy(v_sample)) / v.shape[0]
64-
65-
return v_sample, cost, monitor, updates
65+
def free_energy(v):
66+
return -(v * bv).sum() - T.log(1 + T.exp(T.dot(v, W) + bh)).sum()
67+
cost = (free_energy(v) - free_energy(v_sample)) / v.shape[0]
68+
69+
return v_sample, cost, monitor, updates
6670

6771

6872
def shared_normal(num_rows, num_cols, scale=1):
69-
'''Initialize a matrix shared variable with normally distributed elements.'''
70-
return theano.shared(numpy.random.normal(scale=scale, size=(num_rows, num_cols)).astype(theano.config.floatX))
73+
'''Initialize a matrix shared variable with normally distributed
74+
elements.'''
75+
return theano.shared(numpy.random.normal(
76+
scale=scale, size=(num_rows, num_cols)).astype(theano.config.floatX))
7177

7278

7379
def shared_zeros(*shape):
74-
'''Initialize a vector shared variable with zero elements.'''
75-
return theano.shared(numpy.zeros(shape, dtype=theano.config.floatX))
80+
'''Initialize a vector shared variable with zero elements.'''
81+
return theano.shared(numpy.zeros(shape, dtype=theano.config.floatX))
7682

7783

7884
def build_rnnrbm(n_visible, n_hidden, n_hidden_recurrent):
79-
'''Construct a symbolic RNN-RBM, including initialized parameters in shared variables and
80-
symbolic variables for the training cost and sequence generation.
85+
'''Construct a symbolic RNN-RBM and initialize parameters.
8186
8287
n_visible : integer
8388
Number of visible units.
@@ -86,148 +91,176 @@ def build_rnnrbm(n_visible, n_hidden, n_hidden_recurrent):
8691
n_hidden_recurrent : integer
8792
Number of hidden units of the RNN.
8893
89-
Return a (v, v_sample, cost, monitor, params, updates_train, v_t, updates_generate) tuple:
94+
Return a (v, v_sample, cost, monitor, params, updates_train, v_t,
95+
updates_generate) tuple:
9096
9197
v : Theano matrix
9298
Symbolic variable holding an input sequence (used during training)
9399
v_sample : Theano matrix
94-
Symbolic variable holding the negative particles for CD log-likelihood gradient estimation
95-
(used during training)
100+
Symbolic variable holding the negative particles for CD log-likelihood
101+
gradient estimation (used during training)
96102
cost : Theano scalar
97-
Expression whose gradient (considering v_sample constant) corresponds to the LL gradient of the RNN-RBM.
98-
(used during training)
103+
Expression whose gradient (considering v_sample constant) corresponds to the
104+
LL gradient of the RNN-RBM (used during training)
99105
monitor : Theano scalar
100106
Frame-level pseudo-likelihood (useful for monitoring during training)
101107
params : tuple of Theano shared variables
102108
The parameters of the model to be optimized during training.
103109
updates_train : dictionary of Theano variable -> Theano variable
104-
Update object that should be passed to theano.function when compiling the training function.
110+
Update object that should be passed to theano.function when compiling the
111+
training function.
105112
v_t : Theano matrix
106113
Symbolic variable holding a generated sequence (used during sampling)
107114
updates_generate : dictionary of Theano variable -> Theano variable
108-
Update object that should be passed to theano.function when compiling the generation function.'''
109-
110-
W = shared_normal(n_visible, n_hidden, 0.01)
111-
bv = shared_zeros(n_visible)
112-
bh = shared_zeros(n_hidden)
113-
Wuh = shared_normal(n_hidden_recurrent, n_hidden, 0.0001)
114-
Wuv = shared_normal(n_hidden_recurrent, n_visible, 0.0001)
115-
Wvu = shared_normal(n_visible, n_hidden_recurrent, 0.0001)
116-
Wuu = shared_normal(n_hidden_recurrent, n_hidden_recurrent, 0.0001)
117-
bu = shared_zeros(n_hidden_recurrent)
118-
119-
params = W, bv, bh, Wuh, Wuv, Wvu, Wuu, bu # learned parameters as shared variables
120-
121-
v = T.matrix() # a training sequence
122-
u0 = T.zeros((n_hidden_recurrent,)) # initial value for the RNN hidden units
123-
124-
# if `v_t` is given, deterministic recurrence to compute the variable biases bv_t, bh_t at each time step
125-
# if `v_t` is None, same recurrence but with a separate Gibbs chain at each time step to sample (generate) from the RNN-RBM
126-
# the resulting sample v_t is returned in order to be passed down to the sequence history
127-
def recurrence(v_t, u_tm1):
128-
bv_t = bv + T.dot(u_tm1, Wuv)
129-
bh_t = bh + T.dot(u_tm1, Wuh)
130-
generate = v_t is None
131-
if generate:
132-
v_t, _, _, updates = build_rbm(T.zeros((n_visible,)), W, bv_t, bh_t, k=25)
133-
u_t = T.tanh(bu + T.dot(v_t, Wvu) + T.dot(u_tm1, Wuu))
134-
return ([v_t, u_t], updates) if generate else [u_t, bv_t, bh_t]
135-
136-
# for training, the deterministic recurrence is used to compute all the {bv_t, bh_t, 1 <= t <= T} given v
137-
# conditional RBMs can then be trained in batches using those parameters
138-
(u_t, bv_t, bh_t), updates_train = theano.scan(lambda v_t, u_tm1, *_: recurrence(v_t, u_tm1), sequences=v, outputs_info=[u0, None, None], non_sequences=params)
139-
v_sample, cost, monitor, updates_rbm = build_rbm(v, W, bv_t[:], bh_t[:], k=15)
140-
updates_train.update(updates_rbm)
141-
142-
# symbolic loop for sequence generation
143-
(v_t, u_t), updates_generate = theano.scan(lambda u_tm1, *_: recurrence(None, u_tm1), outputs_info=[None, u0], non_sequences=params, n_steps=200)
144-
145-
return v, v_sample, cost, monitor, params, updates_train, v_t, updates_generate
115+
Update object that should be passed to theano.function when compiling the
116+
generation function.'''
117+
118+
W = shared_normal(n_visible, n_hidden, 0.01)
119+
bv = shared_zeros(n_visible)
120+
bh = shared_zeros(n_hidden)
121+
Wuh = shared_normal(n_hidden_recurrent, n_hidden, 0.0001)
122+
Wuv = shared_normal(n_hidden_recurrent, n_visible, 0.0001)
123+
Wvu = shared_normal(n_visible, n_hidden_recurrent, 0.0001)
124+
Wuu = shared_normal(n_hidden_recurrent, n_hidden_recurrent, 0.0001)
125+
bu = shared_zeros(n_hidden_recurrent)
126+
127+
params = W, bv, bh, Wuh, Wuv, Wvu, Wuu, bu # learned parameters as shared
128+
# variables
129+
130+
v = T.matrix() # a training sequence
131+
u0 = T.zeros((n_hidden_recurrent,)) # initial value for the RNN hidden
132+
# units
133+
134+
# If `v_t` is given, deterministic recurrence to compute the variable
135+
# biases bv_t, bh_t at each time step. If `v_t` is None, same recurrence
136+
# but with a separate Gibbs chain at each time step to sample (generate)
137+
# from the RNN-RBM. The resulting sample v_t is returned in order to be
138+
# passed down to the sequence history.
139+
def recurrence(v_t, u_tm1):
140+
bv_t = bv + T.dot(u_tm1, Wuv)
141+
bh_t = bh + T.dot(u_tm1, Wuh)
142+
generate = v_t is None
143+
if generate:
144+
v_t, _, _, updates = build_rbm(T.zeros((n_visible,)), W, bv_t,
145+
bh_t, k=25)
146+
u_t = T.tanh(bu + T.dot(v_t, Wvu) + T.dot(u_tm1, Wuu))
147+
return ([v_t, u_t], updates) if generate else [u_t, bv_t, bh_t]
148+
149+
# For training, the deterministic recurrence is used to compute all the
150+
# {bv_t, bh_t, 1 <= t <= T} given v. Conditional RBMs can then be trained
151+
# in batches using those parameters.
152+
(u_t, bv_t, bh_t), updates_train = theano.scan(
153+
lambda v_t, u_tm1, *_: recurrence(v_t, u_tm1),
154+
sequences=v, outputs_info=[u0, None, None], non_sequences=params)
155+
v_sample, cost, monitor, updates_rbm = build_rbm(v, W, bv_t[:], bh_t[:],
156+
k=15)
157+
updates_train.update(updates_rbm)
158+
159+
# symbolic loop for sequence generation
160+
(v_t, u_t), updates_generate = theano.scan(
161+
lambda u_tm1, *_: recurrence(None, u_tm1),
162+
outputs_info=[None, u0], non_sequences=params, n_steps=200)
163+
164+
return (v, v_sample, cost, monitor, params, updates_train, v_t,
165+
updates_generate)
146166

147167

148168
class RnnRbm:
149-
'''Simple class to build and train an RNN-RBM from MIDI files and to generate sample sequences.'''
150-
151-
def __init__(self, n_hidden=150, n_hidden_recurrent=100, lr=0.001, r=(21, 109), dt=0.3):
152-
'''Constructs and compiles Theano functions for training and sequence generation.
153-
154-
n_hidden : integer
155-
Number of hidden units of the conditional RBMs.
156-
n_hidden_recurrent : integer
157-
Number of hidden units of the RNN.
158-
lr : float
159-
Learning rate
160-
r : (integer, integer) tuple
161-
Specifies the pitch range of the piano-roll in MIDI note numbers, including r[0] but not r[1],
162-
such that r[1]-r[0] is the number of visible units of the RBM at a given time step.
163-
The default (21, 109) corresponds to the full range of piano (88 notes).
164-
dt : float
165-
Sampling period when converting the MIDI files into piano-rolls, or equivalently the time difference
166-
between consecutive time steps.'''
167-
168-
self.r = r
169-
self.dt = dt
170-
v, v_sample, cost, monitor, params, updates_train, v_t, updates_generate = build_rnnrbm(r[1]-r[0], n_hidden, n_hidden_recurrent)
171-
172-
gradient = T.grad(cost, params, consider_constant=[v_sample])
173-
updates_train.update(dict((p, p - lr*g) for p, g in zip(params, gradient)))
174-
self.train_function = theano.function([v], monitor, updates=updates_train)
175-
self.generate_function = theano.function([], v_t, updates=updates_generate)
176-
177-
178-
def train(self, files, batch_size=100, num_epochs=200):
179-
'''Train the RNN-RBM via stochastic gradient descent (SGD) using MIDI files converted to piano-rolls.
180-
181-
files : list of strings
182-
List of MIDI files that will be loaded as piano-rolls for training.
183-
batch_size : integer
184-
Training sequences will be split into subsequences of at most this size
185-
before applying the SGD updates.
186-
num_epochs : integer
187-
Number of epochs (pass over the training set) performed. The user can
188-
safely interrupt training with Ctrl+C at any time.'''
189-
190-
dataset = [midiread(f, self.r, self.dt).piano_roll for f in files]
191-
try:
192-
for epoch in xrange(num_epochs):
193-
numpy.random.shuffle(dataset)
194-
costs = []
195-
196-
for s, sequence in enumerate(dataset):
197-
for i in xrange(0, len(sequence), batch_size):
198-
cost = self.train_function(sequence[i:i+batch_size])
199-
costs.append(cost)
200-
201-
print 'Epoch %i/%i' % (epoch + 1, num_epochs), numpy.mean(costs)
202-
sys.stdout.flush()
203-
204-
except KeyboardInterrupt:
205-
print 'Interrupted by user.'
206-
207-
208-
def generate(self, filename, show=True):
209-
'''Generate a sample sequence, plot the resulting piano-roll and save it as a MIDI file.
210-
211-
filename : string
212-
A MIDI file will be created at this location.
213-
show : boolean
214-
If True, a piano-roll of the generated sequence will be shown.'''
215-
216-
piano_roll = self.generate_function()
217-
midiwrite(filename, piano_roll, self.r, self.dt)
218-
if show:
219-
extent = (0, self.dt * len(piano_roll)) + self.r
220-
pylab.figure()
221-
pylab.imshow(piano_roll.T, origin='lower', aspect='auto', interpolation='nearest', cmap=pylab.cm.gray_r, extent=extent)
222-
pylab.xlabel('time (s)')
223-
pylab.ylabel('MIDI note number')
224-
pylab.title('generated piano-roll')
169+
'''Simple class to train an RNN-RBM from MIDI files and to generate sample
170+
sequences.'''
225171

172+
def __init__(self, n_hidden=150, n_hidden_recurrent=100, lr=0.001,
173+
r=(21, 109), dt=0.3):
174+
'''Constructs and compiles Theano functions for training and sequence
175+
generation.
226176
227-
if __name__ == '__main__':
228-
model = RnnRbm()
229-
model.train(glob.glob('Nottingham/train/*.mid'))
230-
model.generate('sample1.mid')
231-
model.generate('sample2.mid')
232-
pylab.show()
177+
n_hidden : integer
178+
Number of hidden units of the conditional RBMs.
179+
n_hidden_recurrent : integer
180+
Number of hidden units of the RNN.
181+
lr : float
182+
Learning rate
183+
r : (integer, integer) tuple
184+
Specifies the pitch range of the piano-roll in MIDI note numbers, including
185+
r[0] but not r[1], such that r[1]-r[0] is the number of visible units of the
186+
RBM at a given time step. The default (21, 109) corresponds to the full range
187+
of piano (88 notes).
188+
dt : float
189+
Sampling period when converting the MIDI files into piano-rolls, or
190+
equivalently the time difference between consecutive time steps.'''
191+
192+
self.r = r
193+
self.dt = dt
194+
(v, v_sample, cost, monitor, params, updates_train, v_t,
195+
updates_generate) = build_rnnrbm(r[1] - r[0], n_hidden,
196+
n_hidden_recurrent)
197+
198+
gradient = T.grad(cost, params, consider_constant=[v_sample])
199+
updates_train.update(dict((p, p - lr * g) for p, g in zip(params,
200+
gradient)))
201+
self.train_function = theano.function([v], monitor,
202+
updates=updates_train)
203+
self.generate_function = theano.function([], v_t,
204+
updates=updates_generate)
205+
206+
def train(self, files, batch_size=100, num_epochs=200):
207+
'''Train the RNN-RBM via stochastic gradient descent (SGD) using MIDI
208+
files converted to piano-rolls.
209+
210+
files : list of strings
211+
List of MIDI files that will be loaded as piano-rolls for training.
212+
batch_size : integer
213+
Training sequences will be split into subsequences of at most this size
214+
before applying the SGD updates.
215+
num_epochs : integer
216+
Number of epochs (pass over the training set) performed. The user can
217+
safely interrupt training with Ctrl+C at any time.'''
218+
219+
assert len(files) > 0, 'Training set is empty!' \
220+
' (did you download the data files?)'
221+
dataset = [midiread(f, self.r, self.dt).piano_roll for f in files]
222+
try:
223+
for epoch in xrange(num_epochs):
224+
numpy.random.shuffle(dataset)
225+
costs = []
226+
227+
for s, sequence in enumerate(dataset):
228+
for i in xrange(0, len(sequence), batch_size):
229+
cost = self.train_function(sequence[i:i + batch_size])
230+
costs.append(cost)
231+
232+
print 'Epoch %i/%i' % (epoch + 1, num_epochs),
233+
print numpy.mean(costs)
234+
sys.stdout.flush()
235+
236+
except KeyboardInterrupt:
237+
print 'Interrupted by user.'
238+
239+
def generate(self, filename, show=True):
240+
'''Generate a sample sequence, plot the resulting piano-roll and save
241+
it as a MIDI file.
242+
243+
filename : string
244+
A MIDI file will be created at this location.
245+
show : boolean
246+
If True, a piano-roll of the generated sequence will be shown.'''
247+
248+
piano_roll = self.generate_function()
249+
midiwrite(filename, piano_roll, self.r, self.dt)
250+
if show:
251+
extent = (0, self.dt * len(piano_roll)) + self.r
252+
pylab.figure()
253+
pylab.imshow(piano_roll.T, origin='lower', aspect='auto',
254+
interpolation='nearest', cmap=pylab.cm.gray_r,
255+
extent=extent)
256+
pylab.xlabel('time (s)')
257+
pylab.ylabel('MIDI note number')
258+
pylab.title('generated piano-roll')
233259

260+
261+
if __name__ == '__main__':
262+
model = RnnRbm()
263+
model.train(glob.glob('../data/Nottingham/train/*.mid'))
264+
model.generate('sample1.mid')
265+
model.generate('sample2.mid')
266+
pylab.show()

data/download.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
#!/bin/sh
22

33
wget http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz
4+
wget http://www.iro.umontreal.ca/~lisa/deep/data/mnist/Nottingham.zip && unzip Nottingham.zip

0 commit comments

Comments
 (0)