|
| 1 | +#!/usr/bin/env python |
| 2 | +# -*- coding: utf-8 -*- |
| 3 | + |
| 4 | +""" |
| 5 | + Restricted Boltzmann Machine (RBM) |
| 6 | +
|
| 7 | + References : |
| 8 | + - Y. Bengio, P. Lamblin, D. Popovici, H. Larochelle: Greedy Layer-Wise |
| 9 | + Training of Deep Networks, Advances in Neural Information Processing |
| 10 | + Systems 19, 2007 |
| 11 | +
|
| 12 | +
|
| 13 | + - DeepLearningTutorials |
| 14 | + https://github.com/lisa-lab/DeepLearningTutorials |
| 15 | +
|
| 16 | +
|
| 17 | +""" |
| 18 | + |
| 19 | +import sys |
| 20 | +import numpy |
| 21 | +from utils import * |
| 22 | + |
| 23 | +class RestrictedBoltzmannMachine(object): |
| 24 | + def __init__(self, input=None, n_visible=2, n_hidden=3, \ |
| 25 | + W=None, hbias=None, vbias=None, numpy_rng=None): |
| 26 | + |
| 27 | + self.n_visible = n_visible # num of units in visible (input) layer |
| 28 | + self.n_hidden = n_hidden # num of units in hidden layer |
| 29 | + |
| 30 | + if numpy_rng is None: |
| 31 | + numpy_rng = numpy.random.RandomState(1234) |
| 32 | + |
| 33 | + |
| 34 | + if W is None: |
| 35 | + a = 1. / n_visible |
| 36 | + initial_W = numpy.array(numpy_rng.uniform( # initialize W uniformly |
| 37 | + low=-a, |
| 38 | + high=a, |
| 39 | + size=(n_visible, n_hidden))) |
| 40 | + |
| 41 | + W = initial_W |
| 42 | + |
| 43 | + if hbias is None: |
| 44 | + hbias = numpy.zeros(n_hidden) # initialize h bias 0 |
| 45 | + |
| 46 | + if vbias is None: |
| 47 | + vbias = numpy.zeros(n_visible) # initialize v bias 0 |
| 48 | + |
| 49 | + |
| 50 | + self.numpy_rng = numpy_rng |
| 51 | + self.input = input |
| 52 | + self.W = W |
| 53 | + self.hbias = hbias |
| 54 | + self.vbias = vbias |
| 55 | + |
| 56 | + self.params = [self.W, self.hbias, self.vbias] |
| 57 | + |
| 58 | + |
| 59 | + def contrastive_divergence(self, lr=0.1, k=1): |
| 60 | + ''' CD-k ''' |
| 61 | + pre_sigmoid_ph, ph_mean, ph_sample = self.sample_h_given_v(self.input) |
| 62 | + |
| 63 | + chain_start = ph_sample |
| 64 | + |
| 65 | + for step in xrange(k): |
| 66 | + if step == 0: |
| 67 | + pre_sigmoid_nvs, nv_means, nv_samples,\ |
| 68 | + pre_sigmoid_nhs, nh_means, nh_samples = self.gibbs_hvh(chain_start) |
| 69 | + else: |
| 70 | + pre_sigmoid_nvs, nv_means, nv_samples,\ |
| 71 | + pre_sigmoid_nhs, nh_means, nh_samples = self.gibbs_hvh(nh_samples) |
| 72 | + |
| 73 | + # chain_end = nv_samples |
| 74 | + |
| 75 | + self.W += lr * (numpy.dot(self.input, ph_sample) - numpy.dot(nv_samples, nh_samples)) |
| 76 | + self.hbias += lr * numpy.mean(ph_sample - nh_samples, axis=0) |
| 77 | + self.vbias += lr * numpy.mean(self.input - nv_samples, axis=1) |
| 78 | + |
| 79 | + |
| 80 | + cost = self.get_reconstruction_cross_entropy() |
| 81 | + return cost |
| 82 | + |
| 83 | + |
| 84 | + def sample_h_given_v(self, v0_sample): |
| 85 | + pre_sigmoid_h1, h1_mean = self.propup(v0_sample) |
| 86 | + h1_sample = self.numpy_rng.binomial(size=h1_mean.shape, # discrete: binomial |
| 87 | + n=1, |
| 88 | + p=h1_mean) |
| 89 | + |
| 90 | + return [pre_sigmoid_h1, h1_mean, h1_sample] |
| 91 | + |
| 92 | + |
| 93 | + def sample_v_given_h(self, h0_sample): |
| 94 | + pre_sigmoid_v1, v1_mean = self.propdown(h0_sample) |
| 95 | + v1_sample = self.numpy_rng.binomial(size=v1_mean.shape, # discrete: binomial |
| 96 | + n=1, |
| 97 | + p=v1_mean) |
| 98 | + |
| 99 | + return [pre_sigmoid_v1, v1_mean, v1_sample] |
| 100 | + |
| 101 | + def propup(self, v): |
| 102 | + pre_sigmoid_activation = numpy.dot(v, self.W) + self.hbias |
| 103 | + return [pre_sigmoid_activation, sigmoid(pre_sigmoid_activation)] |
| 104 | + |
| 105 | + def propdown(self, h): |
| 106 | + pre_sigmoid_activation = numpy.dot(h, self.W.T) + self.vbias |
| 107 | + return [pre_sigmoid_activation, sigmoid(pre_sigmoid_activation)] |
| 108 | + |
| 109 | + |
| 110 | + def gibbs_hvh(self, h0_sample): |
| 111 | + pre_sigmoid_v1, v1_mean, v1_sample = self.sample_v_given_h(h0_sample) |
| 112 | + pre_sigmoid_h1, h1_mean, h1_sample = self.sample_h_given_v(v1_sample) |
| 113 | + |
| 114 | + return [pre_sigmoid_v1, v1_mean, v1_sample, |
| 115 | + pre_sigmoid_h1, h1_mean, h1_sample] |
| 116 | + |
| 117 | + |
| 118 | + def get_reconstruction_cross_entropy(self): |
| 119 | + pre_sigmoid_activation = numpy.dot(self.input, self.W) + self.hbias |
| 120 | + sigmoid_activation = sigmoid(pre_sigmoid_activation) |
| 121 | + |
| 122 | + pre_sigmoid_activation = - numpy.dot(sigmoid_activation, self.W.T) - self.vbias |
| 123 | + sigmoid_activation = sigmoid(pre_sigmoid_activation) |
| 124 | + |
| 125 | + cross_entropy = numpy.mean( |
| 126 | + numpy.sum(self.input * numpy.log(sigmoid_activation) + |
| 127 | + (1 - self.input) * numpy.log(1 - sigmoid_activation), |
| 128 | + axis=1)) |
| 129 | + |
| 130 | + return cross_entropy |
| 131 | + |
| 132 | + |
| 133 | + def free_energy(self, v_sample): |
| 134 | + wx_b = numpy.dot(v_sample, self.W) + self.hbias |
| 135 | + vbias_term = numpy.dot(v_sample, self.vbias) |
| 136 | + hidden_term = numpy.sum(numpy.log(1 + numpy.exp(wx_b)), axis=1) |
| 137 | + return -hidden_term - vbias_term |
| 138 | + |
| 139 | + |
| 140 | + |
| 141 | + |
| 142 | + |
| 143 | + |
| 144 | +def test_rbm(learning_rate=0.1, k=1, training_epochs=15): |
| 145 | + data = numpy.array([ |
| 146 | + [1,1,1,0,0,0], |
| 147 | + [1,0,1,0,0,0], |
| 148 | + [1,1,1,0,0,0], |
| 149 | + [0,0,1,1,1,0], |
| 150 | + [0,0,1,1,0,0], |
| 151 | + [0,0,1,1,1,0]]) |
| 152 | + # A 6x6 matrix where each row is a training example and each column is a visible unit. |
| 153 | + |
| 154 | + rng = numpy.random.RandomState(123) |
| 155 | + |
| 156 | + # construct RBM |
| 157 | + rbm = RestrictedBoltzmannMachine(input=data, n_visible=6, n_hidden=2, numpy_rng=rng) |
| 158 | + |
| 159 | + for epoch in xrange(training_epochs): |
| 160 | + cost = rbm.contrastive_divergence(lr=learning_rate, k=k) |
| 161 | + print >> sys.stderr, 'Training epoch %d, cost is ' % epoch, cost |
| 162 | + |
| 163 | + |
| 164 | + |
| 165 | +if __name__ == "__main__": |
| 166 | + test_rbm() |
0 commit comments