DBN

Yusuke Sugomori · Yusuke Sugomori · commit 48a3d5cd54fb · 2013-01-05T15:59:38.000+09:00
diff --git a/DeepBeliefNets.py b/DeepBeliefNets.py
@@ -14,19 +14,20 @@
 
 
 class DBN(object):
-    def __init__(self, input=None, n_ins=2, hidden_layer_sizes=[3, 3], n_outs=2, \
+    def __init__(self, input=None, label=None, n_ins=2, hidden_layer_sizes=[3, 3], n_outs=2, \
                  numpy_rng=None):   # constructor does not contain input
         
-        self.input = input
+        self.x = input
+        self.y = label
 
         self.sigmoid_layers = []
         self.rbm_layers = []
         self.n_layers = len(hidden_layer_sizes)  # = len(self.rbm_layers)
 
         if numpy_rng is None:
             numpy_rng = numpy.random.RandomState(1234)
-        
 
+        
         assert self.n_layers > 0
 
 
@@ -40,7 +41,7 @@ def __init__(self, input=None, n_ins=2, hidden_layer_sizes=[3, 3], n_outs=2, \
 
             # layer_input
             if i == 0:
-                layer_input = self.input
+                layer_input = self.x
             else:
                 layer_input = self.sigmoid_layers[-1].output()
                 
@@ -62,28 +63,22 @@ def __init__(self, input=None, n_ins=2, hidden_layer_sizes=[3, 3], n_outs=2, \
             self.rbm_layers.append(rbm_layer)
 
 
-        # exit()
-
-
         # layer for output using Logistic Regression
         self.log_layer = LogisticRegression(input=self.sigmoid_layers[-1].output(),
+                                            label=self.y,
                                             n_in=hidden_layer_sizes[-1],
                                             n_out=n_outs)
 
         # finetune cost: the negative log likelihood of the logistic regression layer
-        # self.finetune_cost = self.log_layer.negative_log_likelihood()
-        # print self.finetune_cost
-
-
-        # self.errors
+        self.finetune_cost = self.log_layer.negative_log_likelihood()
 
 
 
     def pretrain(self, lr=0.1, k=1, epochs=100):
         # pre-train layer-wise
         for i in xrange(self.n_layers):
             if i == 0:
-                layer_input = self.input
+                layer_input = self.x
             else:
                 layer_input = self.sigmoid_layers[i-1].output()
             rbm = self.rbm_layers[i]
@@ -92,92 +87,74 @@ def pretrain(self, lr=0.1, k=1, epochs=100):
             for epoch in xrange(epochs):
                 c = []
                 rbm.contrastive_divergence(lr=lr, k=k, input=layer_input)
-                cost = rbm.get_reconstruction_cross_entropy()
-                # c.append(cost)
-                print >> sys.stderr, \
-                      'Pre-training layer %d, epoch %d, cost ' %(i, epoch), cost
+                # cost = rbm.get_reconstruction_cross_entropy()
+                # # c.append(cost)
+                # print >> sys.stderr, \
+                #        'Pre-training layer %d, epoch %d, cost ' %(i, epoch), cost
                 
                 # print numpy.mean(c)
 
 
-    def finetune(self, y, lr=0.1, epochs=100):
+    def finetune(self, lr=0.1, epochs=100):
         layer_input = self.sigmoid_layers[-1].output()
 
         # train log_layer
         epoch = 0
         done_looping = False
         while (epoch < epochs) and (not done_looping):
-            self.log_layer.train(y=y, lr=lr, input=layer_input)
-            cost = self.log_layer.negative_log_likelihood(y=y)
-            print >> sys.stderr, 'Training epoch %d, cost is ' % epoch, cost
+            self.log_layer.train(lr=lr, input=layer_input)
+            # self.finetune_cost = self.log_layer.negative_log_likelihood()
+            # print >> sys.stderr, 'Training epoch %d, cost is ' % epoch, self.finetune_cost
             
             lr *= 0.95
             epoch += 1
 
-        # print self.log_layer.W
-        # exit()
 
     def predict(self, x):
         layer_input = x
         
         for i in xrange(self.n_layers):
-            print layer_input
             sigmoid_layer = self.sigmoid_layers[i]
+            # rbm_layer = self.rbm_layers[i]
             layer_input = sigmoid_layer.output(input=layer_input)
 
-        # print self.log_layer.W
         out = self.log_layer.predict(layer_input)
-        print layer_input
-        
         return out
 
 
-def test_dbn(pretrain_lr=0.1, pretraining_epochs=10, k=1, \
-             finetune_lr=0.1, finetune_epochs=10):
-    
-    x = numpy.array([[0, 0, 0, 1],
-                     [0, 0, 1, 0],
-                     [0, 0, 1, 0],
-                     [0, 1, 0, 0],
-                     [0, 1, 0, 0],
-                     [1, 0, 0, 0]])
-    y = numpy.array([[0, 0, 0, 1],
-                     [0, 0, 1, 0],
-                     [0, 0, 1, 0],
-                     [0, 1, 0, 0],
-                     [0, 1, 0, 0],
-                     [1, 0, 0, 0]])
+def test_dbn(pretrain_lr=0.1, pretraining_epochs=1000, k=1, \
+             finetune_lr=0.1, finetune_epochs=1000):
+
+    x = numpy.array([[1,1,1,0,0,0],
+                     [1,0,1,0,0,0],
+                     [1,1,1,0,0,0],
+                     [0,0,1,1,1,0],
+                     [0,0,1,1,0,0],
+                     [0,0,1,1,1,0]])
+    y = numpy.array([[1, 0],
+                     [1, 0],
+                     [1, 0],
+                     [0, 1],
+                     [0, 1],
+                     [0, 1]])
+
     
     rng = numpy.random.RandomState(123)
 
     # construct DBN
-    dbn = DBN(input=x, n_ins=4, hidden_layer_sizes=[10], n_outs=4, numpy_rng=rng)
+    dbn = DBN(input=x, label=y, n_ins=6, hidden_layer_sizes=[2], n_outs=2, numpy_rng=rng)
 
     # pre-training (TrainUnsupervisedDBN)
     dbn.pretrain(lr=pretrain_lr, k=1, epochs=pretraining_epochs)
     
     # fine-tuning (DBNSupervisedFineTuning)
-    dbn.finetune(y, lr=finetune_lr, epochs=finetune_epochs)
+    dbn.finetune(lr=finetune_lr, epochs=finetune_epochs)
 
 
     # test
-    x = numpy.array([0, 0, 0, 1])
-    print dbn.predict(x)   # 0
-    print 
-
-
-    exit()
-    x = numpy.array([0, 0])
-    print dbn.predict(x)   # 0
-    print 
-    x = numpy.array([0, 1])
-    print dbn.predict(x)   # 1
-    print 
-    x = numpy.array([1, 0])
-    print dbn.predict(x)   # 1
-    print 
-    x = numpy.array([1, 1])
-    print dbn.predict(x)   # 0
+    x = numpy.array([1, 1, 0, 0, 0, 0])
+    print dbn.predict(x)
+
 
 if __name__ == "__main__":
     test_dbn()
diff --git a/LogisticRegression.py b/LogisticRegression.py
@@ -17,75 +17,73 @@
 
 
 class LogisticRegression(object):
-    def __init__(self, input, n_in, n_out):
-        self.input = input
+    def __init__(self, input, label, n_in, n_out):
+        self.x = input
+        self.y = label
         self.W = numpy.zeros((n_in, n_out))  # initialize W 0
         self.b = numpy.zeros(n_out)          # initialize bias 0
 
         # self.params = [self.W, self.b]
 
-    def train(self, y, lr=0.1, input=None):
+    def train(self, lr=0.1, input=None):
         if input is not None:
-            self.input = input
+            self.x = input
 
-        p_y_given_x = softmax(numpy.dot(self.input, self.W) + self.b)
-        d_y = y - p_y_given_x
+        p_y_given_x = softmax(numpy.dot(self.x, self.W) + self.b)
+        d_y = self.y - p_y_given_x
         
-        self.W += lr * numpy.dot(self.input.T, d_y)
+        self.W += lr * numpy.dot(self.x.T, d_y)
         self.b += lr * numpy.mean(d_y, axis=0)
-
         
         # cost = self.negative_log_likelihood()
         # return cost
 
-    def negative_log_likelihood(self, y):
-        sigmoid_activation = softmax(numpy.dot(self.input, self.W) + self.b)
+    def negative_log_likelihood(self):
+        sigmoid_activation = softmax(numpy.dot(self.x, self.W) + self.b)
 
-        # entropy = - numpy.mean(numpy.sum(y * numpy.log(sigmoid_activation), axis=1))
-        # return entropy
-        
         cross_entropy = - numpy.mean(
-            numpy.sum(y * numpy.log(sigmoid_activation) +
-            (1 - y) * numpy.log(1 - sigmoid_activation),
+            numpy.sum(self.y * numpy.log(sigmoid_activation) +
+            (1 - self.y) * numpy.log(1 - sigmoid_activation),
                       axis=1))
 
         return cross_entropy
 
 
     def predict(self, x):
-        return sigmoid(numpy.dot(x, self.W) + self.b)
+        return softmax(numpy.dot(x, self.W) + self.b)
 
 
-def test_lr(learning_rate=0.01, n_epochs=5):
+def test_lr(learning_rate=0.01, n_epochs=1000):
     # training data
-    x = numpy.array([[-10., -5.],
-                     [-5., -10.],
-                     [5., 10.],
-                     [10., 5.]])
-    y = numpy.array([[0, 0, 0, 0],
-                     [0, 0, 0, 0],
-                     [1, 1, 1, 1],
-                     [1, 1, 1, 1]])
+    x = numpy.array([[1,1,1,0,0,0],
+                     [1,0,1,0,0,0],
+                     [1,1,1,0,0,0],
+                     [0,0,1,1,1,0],
+                     [0,0,1,1,0,0],
+                     [0,0,1,1,1,0]])
+    y = numpy.array([[1, 0],
+                     [1, 0],
+                     [1, 0],
+                     [0, 1],
+                     [0, 1],
+                     [0, 1]])
 
 
     # construct LogisticRegression
-    classifier = LogisticRegression(input=x, n_in=2, n_out=4)
+    classifier = LogisticRegression(input=x, label=y, n_in=6, n_out=2)
 
     # train
     for epoch in xrange(n_epochs):
-        classifier.train(y=y, lr=learning_rate)
-        cost = classifier.negative_log_likelihood(y=y)
-        print >> sys.stderr, 'Training epoch %d, cost is ' % epoch, cost
+        classifier.train(lr=learning_rate)
+        # cost = classifier.negative_log_likelihood(y=y)
+        # print >> sys.stderr, 'Training epoch %d, cost is ' % epoch, cost
         learning_rate *= 0.95
 
 
     # test
-    print 
-    print 'test'
-    x = numpy.array([-10., -5.])
-    print >> sys.stderr, classifier.predict(x)  # 0
-    x = numpy.array([10., 5.])
-    print >> sys.stderr, classifier.predict(x)  # 1
+    x = numpy.array([1, 1, 0, 0, 0, 0])
+    print >> sys.stderr, classifier.predict(x)
+
 
 if __name__ == "__main__":
     test_lr()
diff --git a/RestrictedBoltzmannMachine.py b/RestrictedBoltzmannMachine.py
@@ -77,10 +77,9 @@ def contrastive_divergence(self, lr=0.1, k=1, input=None):
 
 
         self.W += lr * (numpy.dot(self.input.T, ph_sample)
-                        - numpy.dot(nv_samples.T, nh_samples))
-        self.hbias += lr * numpy.mean(ph_sample - nh_samples, axis=0)
+                        - numpy.dot(nv_samples.T, nh_means))
         self.vbias += lr * numpy.mean(self.input - nv_samples, axis=0)
-
+        self.hbias += lr * numpy.mean(ph_sample - nh_means, axis=0)
 
         # cost = self.get_reconstruction_cross_entropy()
         # return cost
@@ -138,27 +137,35 @@ def get_reconstruction_cross_entropy(self):
 
 
 
-def test_rbm(learning_rate=0.1, k=1, training_epochs=20):
-    data = numpy.array([
-        [1,1,1,0,0],
-        [1,0,1,0,0],
-        [1,1,1,0,0],
-        [0,0,1,1,1],
-        [0,0,1,1,0],
-        [0,0,1,1,1]])
+def test_rbm(learning_rate=0.1, k=1, training_epochs=1000):
+    data = numpy.array([[1,1,1,0,0,0],
+                        [1,0,1,0,0,0],
+                        [1,1,1,0,0,0],
+                        [0,0,1,1,1,0],
+                        [0,0,1,1,0,0],
+                        [0,0,1,1,1,0]])
+
 
 
     rng = numpy.random.RandomState(123)
 
     # construct RBM
-    rbm = RBM(input=data, n_visible=5, n_hidden=2, numpy_rng=rng)
+    rbm = RBM(input=data, n_visible=6, n_hidden=2, numpy_rng=rng)
 
     for epoch in xrange(training_epochs):
         rbm.contrastive_divergence(lr=learning_rate, k=k)
         cost = rbm.get_reconstruction_cross_entropy()
         print >> sys.stderr, 'Training epoch %d, cost is ' % epoch, cost
 
 
+    # test
+    # v = numpy.array([0, 0, 0, 1, 1, 0])
+    v = numpy.array([1, 1, 0, 0, 0, 0])
+    h = sigmoid(numpy.dot(v, rbm.W) + rbm.hbias)
+
+    reconstructed_v = sigmoid(numpy.dot(h, rbm.W.T) + rbm.vbias)
+    print reconstructed_v
+
 
 if __name__ == "__main__":
     test_rbm()