3
3
# RNN-RBM deep learning tutorial
4
4
# More information at http://deeplearning.net/tutorial/rnnrbm.html
5
5
6
+ import glob
6
7
import numpy
7
8
import pylab
8
- import glob
9
9
import sys
10
10
11
11
from midi .utils import midiread , midiwrite
19
19
20
20
21
21
def build_rbm (v , W , bv , bh , k ):
22
- '''Construct a k-step Gibbs chain starting at v with RBM parameters W, bv, bh .
22
+ '''Construct a k-step Gibbs chain starting at v for an RBM .
23
23
24
24
v : Theano vector or matrix
25
25
If a matrix, multiple chains will be run in parallel (batch).
@@ -45,39 +45,44 @@ def build_rbm(v, W, bv, bh, k):
45
45
updates: dictionary of Theano variable -> Theano variable
46
46
The `updates` object returned by scan.'''
47
47
48
- def gibbs_step (v ):
49
- mean_h = T .nnet .sigmoid (T .dot (v , W ) + bh )
50
- h = rng .binomial (size = mean_h .shape , n = 1 , p = mean_h , dtype = theano .config .floatX )
51
- mean_v = T .nnet .sigmoid (T .dot (h , W .T ) + bv )
52
- v = rng .binomial (size = mean_v .shape , n = 1 , p = mean_v , dtype = theano .config .floatX )
53
- return mean_v , v
48
+ def gibbs_step (v ):
49
+ mean_h = T .nnet .sigmoid (T .dot (v , W ) + bh )
50
+ h = rng .binomial (size = mean_h .shape , n = 1 , p = mean_h ,
51
+ dtype = theano .config .floatX )
52
+ mean_v = T .nnet .sigmoid (T .dot (h , W .T ) + bv )
53
+ v = rng .binomial (size = mean_v .shape , n = 1 , p = mean_v ,
54
+ dtype = theano .config .floatX )
55
+ return mean_v , v
56
+
57
+ chain , updates = theano .scan (lambda v : gibbs_step (v )[1 ], outputs_info = [v ],
58
+ n_steps = k )
59
+ v_sample = chain [- 1 ]
54
60
55
- chain , updates = theano .scan (lambda v : gibbs_step (v )[1 ], outputs_info = [v ], n_steps = k )
56
- v_sample = chain [- 1 ]
57
-
58
- mean_v = gibbs_step (v_sample )[0 ]
59
- monitor = T .xlogx .xlogy0 (v , mean_v ) + T .xlogx .xlogy0 (1 - v , 1 - mean_v )
60
- monitor = monitor .sum () / v .shape [0 ]
61
+ mean_v = gibbs_step (v_sample )[0 ]
62
+ monitor = T .xlogx .xlogy0 (v , mean_v ) + T .xlogx .xlogy0 (1 - v , 1 - mean_v )
63
+ monitor = monitor .sum () / v .shape [0 ]
61
64
62
- free_energy = lambda v : - (v * bv ).sum () - T .log (1 + T .exp (T .dot (v , W ) + bh )).sum ()
63
- cost = (free_energy (v ) - free_energy (v_sample )) / v .shape [0 ]
64
-
65
- return v_sample , cost , monitor , updates
65
+ def free_energy (v ):
66
+ return - (v * bv ).sum () - T .log (1 + T .exp (T .dot (v , W ) + bh )).sum ()
67
+ cost = (free_energy (v ) - free_energy (v_sample )) / v .shape [0 ]
68
+
69
+ return v_sample , cost , monitor , updates
66
70
67
71
68
72
def shared_normal (num_rows , num_cols , scale = 1 ):
69
- '''Initialize a matrix shared variable with normally distributed elements.'''
70
- return theano .shared (numpy .random .normal (scale = scale , size = (num_rows , num_cols )).astype (theano .config .floatX ))
73
+ '''Initialize a matrix shared variable with normally distributed
74
+ elements.'''
75
+ return theano .shared (numpy .random .normal (
76
+ scale = scale , size = (num_rows , num_cols )).astype (theano .config .floatX ))
71
77
72
78
73
79
def shared_zeros (* shape ):
74
- '''Initialize a vector shared variable with zero elements.'''
75
- return theano .shared (numpy .zeros (shape , dtype = theano .config .floatX ))
80
+ '''Initialize a vector shared variable with zero elements.'''
81
+ return theano .shared (numpy .zeros (shape , dtype = theano .config .floatX ))
76
82
77
83
78
84
def build_rnnrbm (n_visible , n_hidden , n_hidden_recurrent ):
79
- '''Construct a symbolic RNN-RBM, including initialized parameters in shared variables and
80
- symbolic variables for the training cost and sequence generation.
85
+ '''Construct a symbolic RNN-RBM and initialize parameters.
81
86
82
87
n_visible : integer
83
88
Number of visible units.
@@ -86,148 +91,176 @@ def build_rnnrbm(n_visible, n_hidden, n_hidden_recurrent):
86
91
n_hidden_recurrent : integer
87
92
Number of hidden units of the RNN.
88
93
89
- Return a (v, v_sample, cost, monitor, params, updates_train, v_t, updates_generate) tuple:
94
+ Return a (v, v_sample, cost, monitor, params, updates_train, v_t,
95
+ updates_generate) tuple:
90
96
91
97
v : Theano matrix
92
98
Symbolic variable holding an input sequence (used during training)
93
99
v_sample : Theano matrix
94
- Symbolic variable holding the negative particles for CD log-likelihood gradient estimation
95
- (used during training)
100
+ Symbolic variable holding the negative particles for CD log-likelihood
101
+ gradient estimation (used during training)
96
102
cost : Theano scalar
97
- Expression whose gradient (considering v_sample constant) corresponds to the LL gradient of the RNN-RBM.
98
- (used during training)
103
+ Expression whose gradient (considering v_sample constant) corresponds to the
104
+ LL gradient of the RNN-RBM (used during training)
99
105
monitor : Theano scalar
100
106
Frame-level pseudo-likelihood (useful for monitoring during training)
101
107
params : tuple of Theano shared variables
102
108
The parameters of the model to be optimized during training.
103
109
updates_train : dictionary of Theano variable -> Theano variable
104
- Update object that should be passed to theano.function when compiling the training function.
110
+ Update object that should be passed to theano.function when compiling the
111
+ training function.
105
112
v_t : Theano matrix
106
113
Symbolic variable holding a generated sequence (used during sampling)
107
114
updates_generate : dictionary of Theano variable -> Theano variable
108
- Update object that should be passed to theano.function when compiling the generation function.'''
109
-
110
- W = shared_normal (n_visible , n_hidden , 0.01 )
111
- bv = shared_zeros (n_visible )
112
- bh = shared_zeros (n_hidden )
113
- Wuh = shared_normal (n_hidden_recurrent , n_hidden , 0.0001 )
114
- Wuv = shared_normal (n_hidden_recurrent , n_visible , 0.0001 )
115
- Wvu = shared_normal (n_visible , n_hidden_recurrent , 0.0001 )
116
- Wuu = shared_normal (n_hidden_recurrent , n_hidden_recurrent , 0.0001 )
117
- bu = shared_zeros (n_hidden_recurrent )
118
-
119
- params = W , bv , bh , Wuh , Wuv , Wvu , Wuu , bu # learned parameters as shared variables
120
-
121
- v = T .matrix () # a training sequence
122
- u0 = T .zeros ((n_hidden_recurrent ,)) # initial value for the RNN hidden units
123
-
124
- # if `v_t` is given, deterministic recurrence to compute the variable biases bv_t, bh_t at each time step
125
- # if `v_t` is None, same recurrence but with a separate Gibbs chain at each time step to sample (generate) from the RNN-RBM
126
- # the resulting sample v_t is returned in order to be passed down to the sequence history
127
- def recurrence (v_t , u_tm1 ):
128
- bv_t = bv + T .dot (u_tm1 , Wuv )
129
- bh_t = bh + T .dot (u_tm1 , Wuh )
130
- generate = v_t is None
131
- if generate :
132
- v_t , _ , _ , updates = build_rbm (T .zeros ((n_visible ,)), W , bv_t , bh_t , k = 25 )
133
- u_t = T .tanh (bu + T .dot (v_t , Wvu ) + T .dot (u_tm1 , Wuu ))
134
- return ([v_t , u_t ], updates ) if generate else [u_t , bv_t , bh_t ]
135
-
136
- # for training, the deterministic recurrence is used to compute all the {bv_t, bh_t, 1 <= t <= T} given v
137
- # conditional RBMs can then be trained in batches using those parameters
138
- (u_t , bv_t , bh_t ), updates_train = theano .scan (lambda v_t , u_tm1 , * _ : recurrence (v_t , u_tm1 ), sequences = v , outputs_info = [u0 , None , None ], non_sequences = params )
139
- v_sample , cost , monitor , updates_rbm = build_rbm (v , W , bv_t [:], bh_t [:], k = 15 )
140
- updates_train .update (updates_rbm )
141
-
142
- # symbolic loop for sequence generation
143
- (v_t , u_t ), updates_generate = theano .scan (lambda u_tm1 , * _ : recurrence (None , u_tm1 ), outputs_info = [None , u0 ], non_sequences = params , n_steps = 200 )
144
-
145
- return v , v_sample , cost , monitor , params , updates_train , v_t , updates_generate
115
+ Update object that should be passed to theano.function when compiling the
116
+ generation function.'''
117
+
118
+ W = shared_normal (n_visible , n_hidden , 0.01 )
119
+ bv = shared_zeros (n_visible )
120
+ bh = shared_zeros (n_hidden )
121
+ Wuh = shared_normal (n_hidden_recurrent , n_hidden , 0.0001 )
122
+ Wuv = shared_normal (n_hidden_recurrent , n_visible , 0.0001 )
123
+ Wvu = shared_normal (n_visible , n_hidden_recurrent , 0.0001 )
124
+ Wuu = shared_normal (n_hidden_recurrent , n_hidden_recurrent , 0.0001 )
125
+ bu = shared_zeros (n_hidden_recurrent )
126
+
127
+ params = W , bv , bh , Wuh , Wuv , Wvu , Wuu , bu # learned parameters as shared
128
+ # variables
129
+
130
+ v = T .matrix () # a training sequence
131
+ u0 = T .zeros ((n_hidden_recurrent ,)) # initial value for the RNN hidden
132
+ # units
133
+
134
+ # If `v_t` is given, deterministic recurrence to compute the variable
135
+ # biases bv_t, bh_t at each time step. If `v_t` is None, same recurrence
136
+ # but with a separate Gibbs chain at each time step to sample (generate)
137
+ # from the RNN-RBM. The resulting sample v_t is returned in order to be
138
+ # passed down to the sequence history.
139
+ def recurrence (v_t , u_tm1 ):
140
+ bv_t = bv + T .dot (u_tm1 , Wuv )
141
+ bh_t = bh + T .dot (u_tm1 , Wuh )
142
+ generate = v_t is None
143
+ if generate :
144
+ v_t , _ , _ , updates = build_rbm (T .zeros ((n_visible ,)), W , bv_t ,
145
+ bh_t , k = 25 )
146
+ u_t = T .tanh (bu + T .dot (v_t , Wvu ) + T .dot (u_tm1 , Wuu ))
147
+ return ([v_t , u_t ], updates ) if generate else [u_t , bv_t , bh_t ]
148
+
149
+ # For training, the deterministic recurrence is used to compute all the
150
+ # {bv_t, bh_t, 1 <= t <= T} given v. Conditional RBMs can then be trained
151
+ # in batches using those parameters.
152
+ (u_t , bv_t , bh_t ), updates_train = theano .scan (
153
+ lambda v_t , u_tm1 , * _ : recurrence (v_t , u_tm1 ),
154
+ sequences = v , outputs_info = [u0 , None , None ], non_sequences = params )
155
+ v_sample , cost , monitor , updates_rbm = build_rbm (v , W , bv_t [:], bh_t [:],
156
+ k = 15 )
157
+ updates_train .update (updates_rbm )
158
+
159
+ # symbolic loop for sequence generation
160
+ (v_t , u_t ), updates_generate = theano .scan (
161
+ lambda u_tm1 , * _ : recurrence (None , u_tm1 ),
162
+ outputs_info = [None , u0 ], non_sequences = params , n_steps = 200 )
163
+
164
+ return (v , v_sample , cost , monitor , params , updates_train , v_t ,
165
+ updates_generate )
146
166
147
167
148
168
class RnnRbm :
149
- '''Simple class to build and train an RNN-RBM from MIDI files and to generate sample sequences.'''
150
-
151
- def __init__ (self , n_hidden = 150 , n_hidden_recurrent = 100 , lr = 0.001 , r = (21 , 109 ), dt = 0.3 ):
152
- '''Constructs and compiles Theano functions for training and sequence generation.
153
-
154
- n_hidden : integer
155
- Number of hidden units of the conditional RBMs.
156
- n_hidden_recurrent : integer
157
- Number of hidden units of the RNN.
158
- lr : float
159
- Learning rate
160
- r : (integer, integer) tuple
161
- Specifies the pitch range of the piano-roll in MIDI note numbers, including r[0] but not r[1],
162
- such that r[1]-r[0] is the number of visible units of the RBM at a given time step.
163
- The default (21, 109) corresponds to the full range of piano (88 notes).
164
- dt : float
165
- Sampling period when converting the MIDI files into piano-rolls, or equivalently the time difference
166
- between consecutive time steps.'''
167
-
168
- self .r = r
169
- self .dt = dt
170
- v , v_sample , cost , monitor , params , updates_train , v_t , updates_generate = build_rnnrbm (r [1 ]- r [0 ], n_hidden , n_hidden_recurrent )
171
-
172
- gradient = T .grad (cost , params , consider_constant = [v_sample ])
173
- updates_train .update (dict ((p , p - lr * g ) for p , g in zip (params , gradient )))
174
- self .train_function = theano .function ([v ], monitor , updates = updates_train )
175
- self .generate_function = theano .function ([], v_t , updates = updates_generate )
176
-
177
-
178
- def train (self , files , batch_size = 100 , num_epochs = 200 ):
179
- '''Train the RNN-RBM via stochastic gradient descent (SGD) using MIDI files converted to piano-rolls.
180
-
181
- files : list of strings
182
- List of MIDI files that will be loaded as piano-rolls for training.
183
- batch_size : integer
184
- Training sequences will be split into subsequences of at most this size
185
- before applying the SGD updates.
186
- num_epochs : integer
187
- Number of epochs (pass over the training set) performed. The user can
188
- safely interrupt training with Ctrl+C at any time.'''
189
-
190
- dataset = [midiread (f , self .r , self .dt ).piano_roll for f in files ]
191
- try :
192
- for epoch in xrange (num_epochs ):
193
- numpy .random .shuffle (dataset )
194
- costs = []
195
-
196
- for s , sequence in enumerate (dataset ):
197
- for i in xrange (0 , len (sequence ), batch_size ):
198
- cost = self .train_function (sequence [i :i + batch_size ])
199
- costs .append (cost )
200
-
201
- print 'Epoch %i/%i' % (epoch + 1 , num_epochs ), numpy .mean (costs )
202
- sys .stdout .flush ()
203
-
204
- except KeyboardInterrupt :
205
- print 'Interrupted by user.'
206
-
207
-
208
- def generate (self , filename , show = True ):
209
- '''Generate a sample sequence, plot the resulting piano-roll and save it as a MIDI file.
210
-
211
- filename : string
212
- A MIDI file will be created at this location.
213
- show : boolean
214
- If True, a piano-roll of the generated sequence will be shown.'''
215
-
216
- piano_roll = self .generate_function ()
217
- midiwrite (filename , piano_roll , self .r , self .dt )
218
- if show :
219
- extent = (0 , self .dt * len (piano_roll )) + self .r
220
- pylab .figure ()
221
- pylab .imshow (piano_roll .T , origin = 'lower' , aspect = 'auto' , interpolation = 'nearest' , cmap = pylab .cm .gray_r , extent = extent )
222
- pylab .xlabel ('time (s)' )
223
- pylab .ylabel ('MIDI note number' )
224
- pylab .title ('generated piano-roll' )
169
+ '''Simple class to train an RNN-RBM from MIDI files and to generate sample
170
+ sequences.'''
225
171
172
+ def __init__ (self , n_hidden = 150 , n_hidden_recurrent = 100 , lr = 0.001 ,
173
+ r = (21 , 109 ), dt = 0.3 ):
174
+ '''Constructs and compiles Theano functions for training and sequence
175
+ generation.
226
176
227
- if __name__ == '__main__' :
228
- model = RnnRbm ()
229
- model .train (glob .glob ('Nottingham/train/*.mid' ))
230
- model .generate ('sample1.mid' )
231
- model .generate ('sample2.mid' )
232
- pylab .show ()
177
+ n_hidden : integer
178
+ Number of hidden units of the conditional RBMs.
179
+ n_hidden_recurrent : integer
180
+ Number of hidden units of the RNN.
181
+ lr : float
182
+ Learning rate
183
+ r : (integer, integer) tuple
184
+ Specifies the pitch range of the piano-roll in MIDI note numbers, including
185
+ r[0] but not r[1], such that r[1]-r[0] is the number of visible units of the
186
+ RBM at a given time step. The default (21, 109) corresponds to the full range
187
+ of piano (88 notes).
188
+ dt : float
189
+ Sampling period when converting the MIDI files into piano-rolls, or
190
+ equivalently the time difference between consecutive time steps.'''
191
+
192
+ self .r = r
193
+ self .dt = dt
194
+ (v , v_sample , cost , monitor , params , updates_train , v_t ,
195
+ updates_generate ) = build_rnnrbm (r [1 ] - r [0 ], n_hidden ,
196
+ n_hidden_recurrent )
197
+
198
+ gradient = T .grad (cost , params , consider_constant = [v_sample ])
199
+ updates_train .update (dict ((p , p - lr * g ) for p , g in zip (params ,
200
+ gradient )))
201
+ self .train_function = theano .function ([v ], monitor ,
202
+ updates = updates_train )
203
+ self .generate_function = theano .function ([], v_t ,
204
+ updates = updates_generate )
205
+
206
+ def train (self , files , batch_size = 100 , num_epochs = 200 ):
207
+ '''Train the RNN-RBM via stochastic gradient descent (SGD) using MIDI
208
+ files converted to piano-rolls.
209
+
210
+ files : list of strings
211
+ List of MIDI files that will be loaded as piano-rolls for training.
212
+ batch_size : integer
213
+ Training sequences will be split into subsequences of at most this size
214
+ before applying the SGD updates.
215
+ num_epochs : integer
216
+ Number of epochs (pass over the training set) performed. The user can
217
+ safely interrupt training with Ctrl+C at any time.'''
218
+
219
+ assert len (files ) > 0 , 'Training set is empty!' \
220
+ ' (did you download the data files?)'
221
+ dataset = [midiread (f , self .r , self .dt ).piano_roll for f in files ]
222
+ try :
223
+ for epoch in xrange (num_epochs ):
224
+ numpy .random .shuffle (dataset )
225
+ costs = []
226
+
227
+ for s , sequence in enumerate (dataset ):
228
+ for i in xrange (0 , len (sequence ), batch_size ):
229
+ cost = self .train_function (sequence [i :i + batch_size ])
230
+ costs .append (cost )
231
+
232
+ print 'Epoch %i/%i' % (epoch + 1 , num_epochs ),
233
+ print numpy .mean (costs )
234
+ sys .stdout .flush ()
235
+
236
+ except KeyboardInterrupt :
237
+ print 'Interrupted by user.'
238
+
239
+ def generate (self , filename , show = True ):
240
+ '''Generate a sample sequence, plot the resulting piano-roll and save
241
+ it as a MIDI file.
242
+
243
+ filename : string
244
+ A MIDI file will be created at this location.
245
+ show : boolean
246
+ If True, a piano-roll of the generated sequence will be shown.'''
247
+
248
+ piano_roll = self .generate_function ()
249
+ midiwrite (filename , piano_roll , self .r , self .dt )
250
+ if show :
251
+ extent = (0 , self .dt * len (piano_roll )) + self .r
252
+ pylab .figure ()
253
+ pylab .imshow (piano_roll .T , origin = 'lower' , aspect = 'auto' ,
254
+ interpolation = 'nearest' , cmap = pylab .cm .gray_r ,
255
+ extent = extent )
256
+ pylab .xlabel ('time (s)' )
257
+ pylab .ylabel ('MIDI note number' )
258
+ pylab .title ('generated piano-roll' )
233
259
260
+
261
+ if __name__ == '__main__' :
262
+ model = RnnRbm ()
263
+ model .train (glob .glob ('../data/Nottingham/train/*.mid' ))
264
+ model .generate ('sample1.mid' )
265
+ model .generate ('sample2.mid' )
266
+ pylab .show ()
0 commit comments