CDBN

Yusuke Sugomori · Yusuke Sugomori · commit 3e8e8143fbf2 · 2013-01-29T12:29:06.000+09:00
diff --git a/DeepBeliefNets.py b/DeepBeliefNets.py
@@ -20,7 +20,7 @@
 import numpy
 from HiddenLayer import HiddenLayer
 from LogisticRegression import LogisticRegression
-from RestrictedBoltzmannMachine import RBM
+from RestrictedBoltzmannMachine import RBM, CRBM
 from utils import *
 
 
@@ -143,6 +143,80 @@ def predict(self, x):
         out = self.log_layer.predict(layer_input)
         return out
 
+'''
+ DBN  w/ continuous-valued inputs (Linear Energy)
+ 
+'''
+class CDBN(DBN):
+    def __init__(self, input=None, label=None,\
+                 n_ins=2, hidden_layer_sizes=[3, 3], n_outs=2,\
+                 numpy_rng=None):
+        
+        self.x = input
+        self.y = label
+
+        self.sigmoid_layers = []
+        self.rbm_layers = []
+        self.n_layers = len(hidden_layer_sizes)  # = len(self.rbm_layers)
+
+        if numpy_rng is None:
+            numpy_rng = numpy.random.RandomState(1234)
+
+        
+        assert self.n_layers > 0
+
+
+        # construct multi-layer
+        for i in xrange(self.n_layers):
+            # layer_size
+            if i == 0:
+                input_size = n_ins
+            else:
+                input_size = hidden_layer_sizes[i - 1]
+
+            # layer_input
+            if i == 0:
+                layer_input = self.x
+            else:
+                layer_input = self.sigmoid_layers[-1].sample_h_given_v()
+                
+            # construct sigmoid_layer
+            sigmoid_layer = HiddenLayer(input=layer_input,
+                                        n_in=input_size,
+                                        n_out=hidden_layer_sizes[i],
+                                        numpy_rng=numpy_rng,
+                                        activation=sigmoid)
+            self.sigmoid_layers.append(sigmoid_layer)
+
+            # construct rbm_layer
+            if i == 0:
+                rbm_layer = CRBM(input=layer_input,     # continuous-valued inputs
+                                 n_visible=input_size,
+                                 n_hidden=hidden_layer_sizes[i],
+                                 W=sigmoid_layer.W,     # W, b are shared
+                                 hbias=sigmoid_layer.b)
+            else:
+                rbm_layer = RBM(input=layer_input,
+                                n_visible=input_size,
+                                n_hidden=hidden_layer_sizes[i],
+                                W=sigmoid_layer.W,     # W, b are shared
+                                hbias=sigmoid_layer.b)
+                
+            self.rbm_layers.append(rbm_layer)
+
+
+        # layer for output using Logistic Regression
+        self.log_layer = LogisticRegression(input=self.sigmoid_layers[-1].sample_h_given_v(),
+                                            label=self.y,
+                                            n_in=hidden_layer_sizes[-1],
+                                            n_out=n_outs)
+
+        # finetune cost: the negative log likelihood of the logistic regression layer
+        self.finetune_cost = self.log_layer.negative_log_likelihood()
+
+
+
+
 
 def test_dbn(pretrain_lr=0.1, pretraining_epochs=1000, k=1, \
              finetune_lr=0.1, finetune_epochs=200):
@@ -181,5 +255,51 @@ def test_dbn(pretrain_lr=0.1, pretraining_epochs=1000, k=1, \
     print dbn.predict(x)
 
 
+
+def test_cdbn(pretrain_lr=0.1, pretraining_epochs=1000, k=1, \
+             finetune_lr=0.1, finetune_epochs=200):
+
+    x = numpy.array([[0.4, 0.5, 0.5, 0.,  0.,  0.],
+                     [0.5, 0.3,  0.5, 0.,  0.,  0.],
+                     [0.4, 0.5, 0.5, 0.,  0.,  0.],
+                     [0.,  0.,  0.5, 0.3, 0.5, 0.],
+                     [0.,  0.,  0.5, 0.4, 0.5, 0.],
+                     [0.,  0.,  0.5, 0.5, 0.5, 0.]])
+    
+    y = numpy.array([[1, 0],
+                     [1, 0],
+                     [1, 0],
+                     [0, 1],
+                     [0, 1],
+                     [0, 1]])
+
+    
+    rng = numpy.random.RandomState(123)
+
+    # construct DBN
+    dbn = CDBN(input=x, label=y, n_ins=6, hidden_layer_sizes=[5, 5], n_outs=2, numpy_rng=rng)
+
+    # pre-training (TrainUnsupervisedDBN)
+    dbn.pretrain(lr=pretrain_lr, k=1, epochs=pretraining_epochs)
+    
+    # fine-tuning (DBNSupervisedFineTuning)
+    dbn.finetune(lr=finetune_lr, epochs=finetune_epochs)
+
+
+    # test
+    x = numpy.array([[0.5, 0.5, 0., 0., 0., 0.],
+                     [0., 0., 0., 0.5, 0.5, 0.],
+                     [0.5, 0.5, 0.5, 0.5, 0.5, 0.]])
+
+    
+    print dbn.predict(x)
+
+
+
+
 if __name__ == "__main__":
+    print >> sys.stderr, 'DBN'
     test_dbn()
+    print >> sys.stderr
+    print >> sys.stderr, 'CDBN'
+    test_cdbn()
diff --git a/RestrictedBoltzmannMachine.py b/RestrictedBoltzmannMachine.py
@@ -141,6 +141,7 @@ def reconstruct(self, v):
 
 '''
  RBM  w/ continuous-valued inputs (Linear Energy)
+ 
 '''
 class CRBM(RBM):
     def propdown(self, h):
@@ -162,7 +163,6 @@ def sample_v_given_h(self, h0_sample):
 
         v1_sample = numpy.log((1 - U * (1 - ep))) / a_h
 
-        
         return [v1_mean, v1_sample]
 
 
@@ -214,8 +214,8 @@ def test_crbm(learning_rate=0.1, k=1, training_epochs=1000):
     # train
     for epoch in xrange(training_epochs):
         rbm.contrastive_divergence(lr=learning_rate, k=k)
-        cost = rbm.get_reconstruction_cross_entropy()
-        print >> sys.stderr, 'Training epoch %d, cost is ' % epoch, cost
+        # cost = rbm.get_reconstruction_cross_entropy()
+        # print >> sys.stderr, 'Training epoch %d, cost is ' % epoch, cost
 
 
     # test
@@ -226,5 +226,8 @@ def test_crbm(learning_rate=0.1, k=1, training_epochs=1000):
 
 
 if __name__ == "__main__":
-    # test_rbm()
+    print >> sys.stderr, 'RBM'
+    test_rbm()
+    print >> sys.stderr
+    print >> sys.stderr, 'CRBM'
     test_crbm()