From 5e4203f357f718038d116b1c123c164d78a45749 Mon Sep 17 00:00:00 2001
From: Yusuke Sugomori <restinthenest@gmail.com>
Date: Wed, 1 May 2013 20:33:54 +0900
Subject: [PATCH 01/45] LogisticRegression.scala

---
 scala/LogisticRegression.scala | 133 +++++++++++++++++++++++++++++++++
 1 file changed, 133 insertions(+)
 create mode 100644 scala/LogisticRegression.scala

diff --git a/scala/LogisticRegression.scala b/scala/LogisticRegression.scala
new file mode 100644
index 0000000..2386f5e
--- /dev/null
+++ b/scala/LogisticRegression.scala
@@ -0,0 +1,133 @@
+// $ scalac LogisticRegression.scala
+// $ scala LogisticRegression
+
+import scala.math
+
+class LogisticRegression(val N: Int, val n_in: Int, val n_out: Int) {
+
+  val W: Array[Array[Double]] = Array.ofDim[Double](n_out, n_in)
+  val b: Array[Double] = new Array[Double](n_out)
+
+  def train(x: Array[Int], y: Array[Int], lr: Double) {
+    val p_y_given_x: Array[Double] = new Array[Double](n_out)
+    val dy: Array[Double] = new Array[Double](n_out)
+
+    var i: Int = 0
+    var j: Int = 0
+    for(i <- 0 until n_out) {
+      p_y_given_x(i) = 0
+      for(j <- 0 until n_in) {
+        p_y_given_x(i) += W(i)(j) * x(j)
+      }
+      p_y_given_x(i) += b(i)
+    }
+    softmax(p_y_given_x)
+
+    for(i <- 0 until n_out) {
+      dy(i) = y(i) - p_y_given_x(i)
+
+      for(j <- 0 until n_in) {
+        W(i)(j) += lr * dy(i) * x(j) / N
+      }
+      b(i) += lr * dy(i) / N
+    }
+  }
+
+
+  def softmax(x: Array[Double]) {
+    var max: Double = 0.0
+    var sum: Double = 0.0
+
+    var i: Int = 0
+    for(i <- 0 until n_out) if(max < x(i)) max = x(i)
+
+    for(i <- 0 until n_out) {
+      x(i) = math.exp(x(i) - max)
+      sum += x(i)
+    }
+
+    for(i <- 0 until n_out) x(i) /= sum
+  }
+
+
+  def predict(x: Array[Int], y: Array[Double]) {
+    var i: Int = 0
+    var j: Int = 0
+    for(i <- 0 until n_out) {
+      y(i) = 0
+      for(j <- 0 until n_in) {
+        y(i) += W(i)(j) * x(j)
+      }
+      y(i) += b(i)
+    }
+    softmax(y)
+  }
+
+}
+
+
+object LogisticRegression {
+  def test_lr() {
+    val learning_rate: Double = 0.1
+    val n_epochs: Int = 500
+
+    val train_N: Int = 6
+    val test_N: Int = 2
+    val n_in: Int = 6
+    val n_out: Int = 2
+
+    val train_X: Array[Array[Int]] = Array(
+      Array(1, 1, 1, 0, 0, 0),
+      Array(1, 0, 1, 0, 0, 0),
+      Array(1, 1, 1, 0, 0, 0),
+      Array(0, 0, 1, 1, 1, 0),
+      Array(0, 0, 1, 0, 1, 0),
+      Array(0, 0, 1, 1, 1, 0)
+    )
+
+    val train_Y: Array[Array[Int]] = Array(
+      Array(1, 0),
+      Array(1, 0),
+      Array(1, 0),
+      Array(0, 1),
+      Array(0, 1),
+      Array(0, 1)
+    )
+
+    // construct
+    val classifier = new LogisticRegression(train_N, n_in, n_out)
+
+    // train
+    var epoch: Int = 0
+    var i: Int = 0
+    for(epoch <- 0 until n_epochs) {
+      for(i <- 0 until train_N) {
+        classifier.train(train_X(i), train_Y(i), learning_rate)
+      }
+      // learning_rate *= 0.95
+    }
+
+    // test data
+    val test_X: Array[Array[Int]] = Array(
+      Array(1, 0, 1, 0, 0, 0),
+      Array(0, 0, 1, 1, 1, 0)
+    )
+
+    val test_Y: Array[Array[Double]] = Array.ofDim[Double](test_N, n_in)
+
+    // test
+    var j: Int = 0
+    for(i <- 0 until test_N) {
+      classifier.predict(test_X(i), test_Y(i))
+      for(j <- 0 until n_out) {
+        printf("%.5f ", test_Y(i)(j))
+      }
+      println()
+    }
+  }
+
+  def main(args: Array[String]) {
+    test_lr()
+  }
+
+}

From cbd78335c4041618e071c11b280c5c1b7ffec6e7 Mon Sep 17 00:00:00 2001
From: Yusuke Sugomori <restinthenest@gmail.com>
Date: Mon, 24 Jun 2013 00:53:17 +0900
Subject: [PATCH 02/45] Issue #1 bug fix on C C++ Java

---
 c/DBN.c               | 3 ++-
 c/SdA.c               | 3 ++-
 cpp/DBN.cpp           | 3 ++-
 cpp/SdA.cpp           | 3 ++-
 java/DBN/src/DBN.java | 3 ++-
 java/SdA/src/SdA.java | 3 ++-
 6 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/c/DBN.c b/c/DBN.c
index 9423622..7899678 100644
--- a/c/DBN.c
+++ b/c/DBN.c
@@ -190,8 +190,9 @@ void DBN_predict(DBN* this, int *x, double *y) {
   for(i=0; i<this->n_layers; i++) {
     layer_input = (double *)malloc(sizeof(double) * this->sigmoid_layers[i].n_out);
 
-    linear_output = 0.0;
     for(k=0; k<this->sigmoid_layers[i].n_out; k++) {
+      linear_output = 0.0;
+
       for(j=0; j<this->sigmoid_layers[i].n_in; j++) {
         linear_output += this->sigmoid_layers[i].W[k][j] * prev_layer_input[j];
       }
diff --git a/c/SdA.c b/c/SdA.c
index 273d2b8..99170ee 100644
--- a/c/SdA.c
+++ b/c/SdA.c
@@ -188,8 +188,9 @@ void SdA_predict(SdA* this, int *x, double *y) {
   for(i=0; i<this->n_layers; i++) {
     layer_input = (double *)malloc(sizeof(double) * this->sigmoid_layers[i].n_out);
 
-    linear_output = 0.0;
     for(k=0; k<this->sigmoid_layers[i].n_out; k++) {
+      linear_output = 0.0;
+
       for(j=0; j<this->sigmoid_layers[i].n_in; j++) {
         linear_output += this->sigmoid_layers[i].W[k][j] * prev_layer_input[j];
       }
diff --git a/cpp/DBN.cpp b/cpp/DBN.cpp
index e92bad7..4f9b9c3 100644
--- a/cpp/DBN.cpp
+++ b/cpp/DBN.cpp
@@ -176,8 +176,9 @@ void DBN::predict(int *x, double *y) {
   for(int i=0; i<n_layers; i++) {
     layer_input = new double[sigmoid_layers[i]->n_out];
 
-    linear_output = 0.0;
     for(int k=0; k<sigmoid_layers[i]->n_out; k++) {
+      linear_output = 0.0;
+
       for(int j=0; j<sigmoid_layers[i]->n_in; j++) {
         linear_output += sigmoid_layers[i]->W[k][j] * prev_layer_input[j];
       }
diff --git a/cpp/SdA.cpp b/cpp/SdA.cpp
index 787bc6c..39ac32e 100644
--- a/cpp/SdA.cpp
+++ b/cpp/SdA.cpp
@@ -174,8 +174,9 @@ void SdA::predict(int *x, double *y) {
   for(int i=0; i<n_layers; i++) {
     layer_input = new double[sigmoid_layers[i]->n_out];
 
-    linear_output = 0.0;
     for(int k=0; k<sigmoid_layers[i]->n_out; k++) {
+      linear_output = 0.0;
+
       for(int j=0; j<sigmoid_layers[i]->n_in; j++) {
         linear_output += sigmoid_layers[i]->W[k][j] * prev_layer_input[j];
       }
diff --git a/java/DBN/src/DBN.java b/java/DBN/src/DBN.java
index 238d1ce..5b72e68 100644
--- a/java/DBN/src/DBN.java
+++ b/java/DBN/src/DBN.java
@@ -124,8 +124,9 @@ public void predict(int[] x, double[] y) {
 		for(int i=0; i<n_layers; i++) {
 			layer_input = new double[sigmoid_layers[i].n_out];
 			
-			linear_output = 0.0;
 			for(int k=0; k<sigmoid_layers[i].n_out; k++) {
+				linear_output = 0.0;
+				
 				for(int j=0; j<sigmoid_layers[i].n_in; j++) {
 					linear_output += sigmoid_layers[i].W[k][j] * prev_layer_input[j];
 				}
diff --git a/java/SdA/src/SdA.java b/java/SdA/src/SdA.java
index 4c7a749..a626ad8 100644
--- a/java/SdA/src/SdA.java
+++ b/java/SdA/src/SdA.java
@@ -123,8 +123,9 @@ public void predict(int[] x, double[] y) {
 		for(int i=0; i<n_layers; i++) {
 			layer_input = new double[sigmoid_layers[i].n_out];
 			
-			linear_output = 0.0;
 			for(int k=0; k<sigmoid_layers[i].n_out; k++) {
+				linear_output = 0.0;
+				
 				for(int j=0; j<sigmoid_layers[i].n_in; j++) {
 					linear_output += sigmoid_layers[i].W[k][j] * prev_layer_input[j];
 				}

From 2d9d06de1052442993413cd14ebea5867ec90433 Mon Sep 17 00:00:00 2001
From: Yusuke Sugomori <restinthenest@gmail.com>
Date: Thu, 18 Jul 2013 23:45:29 +0900
Subject: [PATCH 03/45] 07/18/2013 #1 fix C C++ Java Scala

---
 c/DBN.c               | 3 ++-
 c/RBM.c               | 3 ++-
 cpp/DBN.cpp           | 3 ++-
 cpp/RBM.cpp           | 3 ++-
 java/DBN/src/RBM.java | 3 ++-
 java/RBM/src/RBM.java | 3 ++-
 scala/RBM.scala       | 3 ++-
 7 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/c/DBN.c b/c/DBN.c
index 7899678..eee1e1c 100644
--- a/c/DBN.c
+++ b/c/DBN.c
@@ -347,7 +347,8 @@ void RBM_contrastive_divergence(RBM* this, int *input, double lr, int k) {
 
   for(i=0; i<this->n_hidden; i++) {
     for(j=0; j<this->n_visible; j++) {
-      this->W[i][j] += lr * (ph_sample[i] * input[j] - nh_means[i] * nv_samples[j]) / this->N;
+      // this->W[i][j] += lr * (ph_sample[i] * input[j] - nh_means[i] * nv_samples[j]) / this->N;
+      this->W[i][j] += lr * (ph_mean[i] * input[j] - nh_means[i] * nv_samples[j]) / this->N;
     }
     this->hbias[i] += lr * (ph_sample[i] - nh_means[i]) / this->N;
   }
diff --git a/c/RBM.c b/c/RBM.c
index 9ea27b2..7e806bb 100644
--- a/c/RBM.c
+++ b/c/RBM.c
@@ -100,7 +100,8 @@ void RBM_contrastive_divergence(RBM* this, int *input, double lr, int k) {
 
   for(i=0; i<this->n_hidden; i++) {
     for(j=0; j<this->n_visible; j++) {
-      this->W[i][j] += lr * (ph_sample[i] * input[j] - nh_means[i] * nv_samples[j]) / this->N;
+      // this->W[i][j] += lr * (ph_sample[i] * input[j] - nh_means[i] * nv_samples[j]) / this->N;
+      this->W[i][j] += lr * (ph_mean[i] * input[j] - nh_means[i] * nv_samples[j]) / this->N;
     }
     this->hbias[i] += lr * (ph_sample[i] - nh_means[i]) / this->N;
   }
diff --git a/cpp/DBN.cpp b/cpp/DBN.cpp
index 4f9b9c3..f043e20 100644
--- a/cpp/DBN.cpp
+++ b/cpp/DBN.cpp
@@ -322,7 +322,8 @@ void RBM::contrastive_divergence(int *input, double lr, int k) {
 
   for(int i=0; i<n_hidden; i++) {
     for(int j=0; j<n_visible; j++) {
-      W[i][j] += lr * (ph_sample[i] * input[j] - nh_means[i] * nv_samples[j]) / N;
+      // W[i][j] += lr * (ph_sample[i] * input[j] - nh_means[i] * nv_samples[j]) / N;
+      W[i][j] += lr * (ph_mean[i] * input[j] - nh_means[i] * nv_samples[j]) / N;
     }
     hbias[i] += lr * (ph_sample[i] - nh_means[i]) / N;
   }
diff --git a/cpp/RBM.cpp b/cpp/RBM.cpp
index 590199a..d64462c 100644
--- a/cpp/RBM.cpp
+++ b/cpp/RBM.cpp
@@ -89,7 +89,8 @@ void RBM::contrastive_divergence(int *input, double lr, int k) {
 
   for(int i=0; i<n_hidden; i++) {
     for(int j=0; j<n_visible; j++) {
-      W[i][j] += lr * (ph_sample[i] * input[j] - nh_means[i] * nv_samples[j]) / N;
+      // W[i][j] += lr * (ph_sample[i] * input[j] - nh_means[i] * nv_samples[j]) / N;
+      W[i][j] += lr * (ph_mean[i] * input[j] - nh_means[i] * nv_samples[j]) / N;
     }
     hbias[i] += lr * (ph_sample[i] - nh_means[i]) / N;
   }
diff --git a/java/DBN/src/RBM.java b/java/DBN/src/RBM.java
index ed1b4c5..9ee3563 100644
--- a/java/DBN/src/RBM.java
+++ b/java/DBN/src/RBM.java
@@ -91,7 +91,8 @@ public void contrastive_divergence(int[] input, double lr, int k) {
 		
 		for(int i=0; i<n_hidden; i++) {
 			for(int j=0; j<n_visible; j++) {
-				W[i][j] += lr *(ph_sample[i] * input[j] - nh_means[i] * nv_samples[j]) / N;
+				// W[i][j] += lr *(ph_sample[i] * input[j] - nh_means[i] * nv_samples[j]) / N;
+				W[i][j] += lr *(ph_mean[i] * input[j] - nh_means[i] * nv_samples[j]) / N;
 			}
 			hbias[i] += lr * (ph_sample[i] - nh_means[i]) / N;
 		}
diff --git a/java/RBM/src/RBM.java b/java/RBM/src/RBM.java
index 3100dd0..6f2e3b7 100644
--- a/java/RBM/src/RBM.java
+++ b/java/RBM/src/RBM.java
@@ -91,7 +91,8 @@ public void contrastive_divergence(int[] input, double lr, int k) {
 		
 		for(int i=0; i<n_hidden; i++) {
 			for(int j=0; j<n_visible; j++) {
-				W[i][j] += lr * (ph_sample[i] * input[j] - nh_means[i] * nv_samples[j]) / N;
+				// W[i][j] += lr * (ph_sample[i] * input[j] - nh_means[i] * nv_samples[j]) / N;
+				W[i][j] += lr * (ph_mean[i] * input[j] - nh_means[i] * nv_samples[j]) / N;
 			}
 			hbias[i] += lr * (ph_sample[i] - nh_means[i]) / N;
 		}
diff --git a/scala/RBM.scala b/scala/RBM.scala
index 79ae124..ad760fb 100644
--- a/scala/RBM.scala
+++ b/scala/RBM.scala
@@ -86,7 +86,8 @@ class RBM(val N: Int, val n_visible: Int, val n_hidden: Int,
     var j: Int = 0
     for(i <- 0 until n_hidden) {
       for(j <- 0 until n_visible) {
-        W(i)(j) += lr * (ph_sample(i) * input(j) - nh_means(i) * nv_samples(j)) / N
+        // W(i)(j) += lr * (ph_sample(i) * input(j) - nh_means(i) * nv_samples(j)) / N
+        W(i)(j) += lr * (ph_mean(i) * input(j) - nh_means(i) * nv_samples(j)) / N
       }
       hbias(i) += lr * (ph_sample(i) - nh_means(i)) / N
     }

From 8acec2f6a6af831f8b992eefad18dc53afbeee49 Mon Sep 17 00:00:00 2001
From: Yusuke Sugomori <restinthenest@gmail.com>
Date: Fri, 6 Sep 2013 09:01:18 +0900
Subject: [PATCH 04/45] scala sda

---
 scala/HiddenLayer.scala |  73 +++++++++++++
 scala/SdA.scala         | 236 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 309 insertions(+)
 create mode 100644 scala/HiddenLayer.scala
 create mode 100644 scala/SdA.scala

diff --git a/scala/HiddenLayer.scala b/scala/HiddenLayer.scala
new file mode 100644
index 0000000..118bc5e
--- /dev/null
+++ b/scala/HiddenLayer.scala
@@ -0,0 +1,73 @@
+import scala.util.Random
+import scala.math
+
+class HiddenLayer(val N: Int, val n_in: Int, val n_out: Int, _W: Array[Array[Double]], _b: Array[Double], var rng: Random=null) {
+
+
+  def uniform(min: Double, max: Double): Double = {
+    return rng.nextDouble() * (max - min) + min
+  }
+
+  def binomial(n: Int, p: Double): Int = {
+    if(p < 0 || p > 1) return 0
+
+    var c: Int = 0
+    var r: Double = 0.0
+
+    var i: Int = 0
+
+    for(i <- 0 until n) {
+      r = rng.nextDouble()
+      if(r < p) c += 1
+    }
+
+    return c
+  }
+
+  def sigmoid(x: Double): Double = {
+    return 1.0 / (1.0 + math.pow(math.E, -x))
+  }
+
+
+  if(rng == null) rng = new Random(1234)
+
+  var a: Double = 0.0
+  var W: Array[Array[Double]] = Array.ofDim[Double](n_out, n_in)
+  var b: Array[Double] = new Array[Double](n_out)
+
+  var i: Int = 0
+  if(_W == null) {
+    a = 1.0 / n_in
+
+    for(i <- 0 until n_out) {
+      for(j <- 0 until n_in) {
+        W(i)(j) = uniform(-a, a)
+      }
+    }
+  } else {
+    W = _W
+  }
+
+  if(_b != null) b = _b
+
+
+  def output(input: Array[Int], w: Array[Double], b: Double): Double = {
+    var linear_output: Double = 0.0
+
+    var j: Int = 0
+    for(j <- 0 until n_in) {
+      linear_output += w(j) * input(j)
+    }
+    linear_output += b
+
+    return sigmoid(linear_output)
+  }
+
+  def sample_h_given_v(input: Array[Int], sample: Array[Int]) {
+    var i: Int = 0
+    
+    for(i <- 0 until n_out) {
+      sample(i) = binomial(1, output(input, W(i), b(i)))
+    }
+  }
+}
diff --git a/scala/SdA.scala b/scala/SdA.scala
new file mode 100644
index 0000000..3f897e5
--- /dev/null
+++ b/scala/SdA.scala
@@ -0,0 +1,236 @@
+import scala.util.Random
+import scala.math
+
+class SdA(val N: Int, val n_ins: Int, hidden_layer_sizes: Array[Int], val n_outs: Int, val n_layers:Int, var rng: Random=null) {
+
+  def sigmoid(x: Double): Double = {
+    return 1.0 / (1.0 + math.pow(math.E, -x))
+  }
+
+  var input_size: Int = 0
+
+  // var hidden_layer_sizes: Array[Int] = new Array[Int](n_layers)
+  var sigmoid_layers: Array[HiddenLayer] = new Array[HiddenLayer](n_layers)
+  var dA_layers: Array[dA] = new Array[dA](n_layers)
+
+  if(rng == null) rng = new Random(1234)
+
+
+  var i: Int = 0
+
+  // construct multi-layer
+  for(i <- 0 until n_layers) {
+    if(i == 0) {
+      input_size = n_ins
+    } else {
+      input_size = hidden_layer_sizes(i-1)
+    }
+    
+    // construct sigmoid_layer
+    sigmoid_layers(i) = new HiddenLayer(N, input_size, hidden_layer_sizes(i), null, null, rng)
+
+    // construct dA_layer
+    dA_layers(i) = new dA(N, input_size, hidden_layer_sizes(i), sigmoid_layers(i).W, sigmoid_layers(i).b, null, rng)
+  }
+
+  // layer for output using LogisticRegression
+  val log_layer = new LogisticRegression(N, hidden_layer_sizes(n_layers-1), n_outs)
+
+
+  def pretrain(train_X: Array[Array[Int]], lr: Double, corruption_level: Double, epochs: Int) {
+    var layer_input: Array[Int] = new Array[Int](0)
+    var prev_layer_input_size: Int = 0
+    var prev_layer_input: Array[Int] = new Array[Int](0)
+
+    var i: Int = 0
+    var j: Int = 0
+    var epoch: Int = 0
+    var n: Int = 0
+    var l: Int = 0
+
+    for(i <- 0 until n_layers) {  // layer-wise
+      for(epoch <- 0 until epochs) {  // training epochs
+        for(n <- 0 until N) {  // input x1...xN
+          // layer input
+          for(l <- 0 to i) {
+            if(l == 0) {
+              layer_input = new Array[Int](n_ins)
+              for(j <- 0 until n_ins) layer_input(j) = train_X(n)(j)
+            } else {
+              if(l == 1) prev_layer_input_size = n_ins
+              else prev_layer_input_size = hidden_layer_sizes(l-2)
+
+              prev_layer_input = new Array[Int](prev_layer_input_size)
+              for(j <- 0 until prev_layer_input_size) prev_layer_input(j) = layer_input(j)
+
+              layer_input = new Array[Int](hidden_layer_sizes(l-1))
+              
+              sigmoid_layers(l-1).sample_h_given_v(prev_layer_input, layer_input)
+            }
+          }
+
+          dA_layers(i).train(layer_input, lr, corruption_level)
+        }
+      }
+    }
+    
+  }
+
+
+  def finetune(train_X: Array[Array[Int]], train_Y: Array[Array[Int]], lr: Double, epochs: Int) {
+    var layer_input: Array[Int] = new Array[Int](0)
+    var prev_layer_input: Array[Int] = new Array[Int](0)
+
+    var epoch: Int = 0
+    var n: Int = 0
+    
+    
+    for(epoch <- 0 until epochs) {
+      for(n <- 0 until N) {
+        
+        // layer input
+        for(i <- 0 until n_layers) {
+          if(i == 0) {
+            prev_layer_input = new Array[Int](n_ins)
+            for(j <- 0 until n_ins) prev_layer_input(j) = train_X(n)(j)
+          } else {
+            prev_layer_input = new Array[Int](hidden_layer_sizes(i-1))
+            for(j <- 0 until hidden_layer_sizes(i-1)) prev_layer_input(j) = layer_input(j)
+          }
+
+          layer_input = new Array[Int](hidden_layer_sizes(i))
+          sigmoid_layers(i).sample_h_given_v(prev_layer_input, layer_input)
+        }
+
+        log_layer.train(layer_input, train_Y(n), lr)
+      }
+      // lr *= 0.95
+    }
+  }
+
+  def predict(x: Array[Int], y: Array[Double]) {
+    var layer_input: Array[Double] = new Array[Double](0)
+    var prev_layer_input: Array[Double] = new Array[Double](n_ins)
+    
+    var j: Int = 0
+    for(j <- 0 until n_ins) prev_layer_input(j) = x(j)
+
+    var linear_output: Double = 0.0
+
+    // layer activation
+    var i: Int = 0
+    var k: Int = 0
+
+    for(i <- 0 until n_layers) {
+      layer_input = new Array[Double](sigmoid_layers(i).n_out)
+
+      for(k <- 0 until sigmoid_layers(i).n_out) {
+        linear_output = 0.0
+
+        for(j <- 0 until sigmoid_layers(i).n_in) {
+          linear_output += sigmoid_layers(i).W(k)(j) * prev_layer_input(j)
+        }
+        linear_output += sigmoid_layers(i).b(k)
+        layer_input(k) = sigmoid(linear_output)
+      }
+
+      if(i < n_layers-1) {
+        prev_layer_input = new Array[Double](sigmoid_layers(i).n_out)
+        for(j <- 0 until sigmoid_layers(i).n_out) prev_layer_input(j) = layer_input(j)
+      }
+    }
+
+    for(i <- 0 until log_layer.n_out) {
+      y(i) = 0
+      for(j <- 0 until log_layer.n_in) {
+        y(i) += log_layer.W(i)(j) * layer_input(j)
+      }
+      y(i) += log_layer.b(i)
+    }
+
+    log_layer.softmax(y)
+  }
+
+}
+
+
+object SdA {
+  def test_sda() {
+    val rng: Random = new Random(123)
+    
+    val pretrain_lr: Double = 0.1
+    val corruption_level: Double = 0.3
+    val pretraining_epochs: Int = 1000
+    val finetune_lr: Double = 0.1
+    val finetune_epochs: Int = 500
+
+    val train_N: Int = 10
+    val test_N: Int = 4
+    val n_ins: Int = 28
+    val n_outs: Int = 2
+    val hidden_layer_sizes: Array[Int] = Array(15, 15)
+    val n_layers: Int = hidden_layer_sizes.length
+
+    // training data
+    val train_X: Array[Array[Int]] = Array(
+			Array(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
+			Array(0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
+			Array(1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
+			Array(0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
+			Array(1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
+			Array(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1),
+			Array(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1),
+			Array(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1),
+			Array(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1),
+			Array(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1)
+    )
+
+    val train_Y: Array[Array[Int]] = Array(
+			Array(1, 0),
+			Array(1, 0),
+			Array(1, 0),
+			Array(1, 0),
+			Array(1, 0),
+			Array(0, 1),
+			Array(0, 1),
+			Array(0, 1),
+			Array(0, 1),
+			Array(0, 1)
+    )
+    
+    // construct SdA
+    val sda:SdA = new SdA(train_N, n_ins, hidden_layer_sizes, n_outs, n_layers, rng)
+
+    // pretrain
+    sda.pretrain(train_X, pretrain_lr, corruption_level, pretraining_epochs)
+
+    // finetune
+    sda.finetune(train_X, train_Y, finetune_lr, finetune_epochs)
+      
+    // test data
+    val test_X: Array[Array[Int]] = Array(
+			Array(1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
+			Array(1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
+			Array(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1),
+			Array(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1)
+    )
+
+    val test_Y: Array[Array[Double]] = Array.ofDim[Double](test_N, n_outs)
+
+    // test
+    var i: Int = 0
+    var j: Int = 0
+
+    for(i <- 0 until test_N) {
+      sda.predict(test_X(i), test_Y(i))
+      for(j <- 0 until n_outs) {
+        print(test_Y(i)(j) + " ")
+      }
+      println()
+    }
+  }
+
+  def main(args: Array[String]) {
+    test_sda()
+  }
+}

From c11bd27b3a675dcc7a170c1f60313a882a6c96a8 Mon Sep 17 00:00:00 2001
From: Yusuke Sugomori <restinthenest@gmail.com>
Date: Sat, 7 Sep 2013 12:16:27 +0900
Subject: [PATCH 05/45] dbn scala

---
 scala/DBN.scala | 231 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 231 insertions(+)
 create mode 100644 scala/DBN.scala

diff --git a/scala/DBN.scala b/scala/DBN.scala
new file mode 100644
index 0000000..1b8df68
--- /dev/null
+++ b/scala/DBN.scala
@@ -0,0 +1,231 @@
+import scala.util.Random
+import scala.math
+
+class DBN(val N: Int, val n_ins: Int, hidden_layer_sizes: Array[Int], val n_outs: Int, val n_layers: Int, var rng: Random=null) {
+
+  def sigmoid(x: Double): Double = {
+    return 1.0 / (1.0 + math.pow(math.E, -x))
+  }
+
+
+  var input_size: Int = 0
+  
+  val sigmoid_layers: Array[HiddenLayer] = new Array[HiddenLayer](n_layers)
+  val rbm_layers: Array[RBM] = new Array[RBM](n_layers)
+
+  if(rng == null) rng = new Random(1234)
+
+  var i: Int = 0
+  // construct multi-layer
+  for(i <- 0 until n_layers) {
+    if(i == 0) {
+      input_size = n_ins
+    } else {
+      input_size = hidden_layer_sizes(i-1)
+    }
+
+    // construct sigmoid_layer
+    sigmoid_layers(i) = new HiddenLayer(N, input_size, hidden_layer_sizes(i), null, null, rng)
+
+    // construct rbm_layer
+    rbm_layers(i) = new RBM(N, input_size, hidden_layer_sizes(i), sigmoid_layers(i).W, sigmoid_layers(i).b, null, rng)
+
+  }
+
+  // layer for output using LogisticRegression
+  val log_layer: LogisticRegression = new LogisticRegression(N, hidden_layer_sizes(n_layers-1), n_outs)
+
+
+  def pretrain(train_X: Array[Array[Int]], lr: Double, k: Int, epochs: Int) {
+    var layer_input: Array[Int] = new Array[Int](0)
+    var prev_layer_input_size: Int = 0
+    var prev_layer_input: Array[Int] = new Array[Int](0)
+    
+    var i: Int = 0
+    var j: Int = 0
+    var epoch: Int = 0
+    var n: Int = 0
+    var l: Int = 0
+
+    for(i <- 0 until n_layers) {  // layer-wise
+      for(epoch <- 0 until epochs) {  // training epochs
+        for(n <- 0 until N) {  // input x1...xN
+          // layer input
+          for(l <- 0 to i) {
+            if(l == 0) {
+              layer_input = new Array[Int](n_ins)
+              for(j <- 0 until n_ins) layer_input(j) = train_X(n)(j)
+
+            } else {
+              if(l == 1) prev_layer_input_size = n_ins
+              else prev_layer_input_size = hidden_layer_sizes(l-2)
+
+              prev_layer_input = new Array[Int](prev_layer_input_size)
+              for(j <- 0 until prev_layer_input_size) prev_layer_input(j) = layer_input(j)
+
+              layer_input = new Array[Int](hidden_layer_sizes(l-1))
+              sigmoid_layers(l-1).sample_h_given_v(prev_layer_input, layer_input)
+            }
+          }
+
+          rbm_layers(i).contrastive_divergence(layer_input, lr, k)
+        }
+      }
+    }
+  }
+
+
+  def finetune(train_X: Array[Array[Int]], train_Y: Array[Array[Int]], lr: Double, epochs: Int) {
+    var layer_input: Array[Int] = new Array[Int](0)
+    var prev_layer_input: Array[Int] = new Array[Int](0)
+
+    var epoch: Int = 0
+    var n: Int = 0
+    var i: Int = 0
+    var j: Int = 0
+
+    for(epoch <- 0 until epochs) {
+      for(n <- 0 until N) {
+        
+        // layer input
+        for(i <- 0 until n_layers) {
+          if(i == 0) {
+            prev_layer_input = new Array[Int](n_ins)
+            for(j <- 0 until n_ins) prev_layer_input(j) = train_X(n)(j)
+          } else {
+            prev_layer_input = new Array[Int](hidden_layer_sizes(i-1))
+            for(j <- 0 until hidden_layer_sizes(i-1)) prev_layer_input(j) = layer_input(j)
+          }
+
+          layer_input = new Array[Int](hidden_layer_sizes(i))
+          sigmoid_layers(i).sample_h_given_v(prev_layer_input, layer_input)
+        }
+
+        log_layer.train(layer_input, train_Y(n), lr)
+      }
+      // lr *= 0.95
+    }
+  }
+
+  def predict(x: Array[Int], y: Array[Double]) {
+    var layer_input: Array[Double] = new Array[Double](0)
+    var prev_layer_input: Array[Double] = new Array[Double](n_ins)
+
+    var i: Int = 0
+    var j: Int = 0
+    var k: Int = 0
+
+    for(j <- 0 until n_ins) prev_layer_input(j) = x(j)
+    
+    var linear_outoput: Double = 0
+
+    // layer activation
+    for(i <- 0 until n_layers) {
+      layer_input = new Array[Double](sigmoid_layers(i).n_out)
+
+      for(k <- 0 until sigmoid_layers(i).n_out) {
+        linear_outoput = 0.0
+
+        for(j <- 0 until sigmoid_layers(i).n_in) {
+          linear_outoput += sigmoid_layers(i).W(k)(j) * prev_layer_input(j)
+        }
+        linear_outoput += sigmoid_layers(i).b(k)
+        layer_input(k) = sigmoid(linear_outoput)
+      }
+      
+      if(i < n_layers-1) {
+        prev_layer_input = new Array[Double](sigmoid_layers(i).n_out)
+        for(j <- 0 until sigmoid_layers(i).n_out) prev_layer_input(j) = layer_input(j)
+      }
+    }
+
+    for(i <- 0 until log_layer.n_out) {
+      y(i) = 0
+      for(j <- 0 until log_layer.n_in) {
+        y(i) += log_layer.W(i)(j) * layer_input(j)
+      }
+      y(i) += log_layer.b(i)
+    }
+
+    log_layer.softmax(y)
+  }
+
+}
+
+
+object DBN {
+  def test_dbn() {
+    val rng: Random = new Random(123)
+
+    val pretrain_lr: Double = 0.1
+    val pretraining_epochs: Int = 1000
+    val k: Int = 1
+    val finetune_lr: Double = 0.1
+    val finetune_epochs: Int = 500
+    
+    val train_N: Int = 6
+    val test_N: Int = 4
+    val n_ins: Int = 6
+    val n_outs: Int = 2
+    val hidden_layer_sizes: Array[Int] = Array(3, 3)
+    val n_layers = hidden_layer_sizes.length
+
+
+    // training data
+    val train_X: Array[Array[Int]] = Array(
+			Array(1, 1, 1, 0, 0, 0),
+			Array(1, 0, 1, 0, 0, 0),
+		  Array(1, 1, 1, 0, 0, 0),
+			Array(0, 0, 1, 1, 1, 0),
+			Array(0, 0, 1, 1, 0, 0),
+			Array(0, 0, 1, 1, 1, 0)
+    )
+
+    val train_Y: Array[Array[Int]] = Array(
+			Array(1, 0),
+			Array(1, 0),
+			Array(1, 0),
+			Array(0, 1),
+			Array(0, 1),
+			Array(0, 1)
+    )
+
+    // construct DBN
+    val dbn: DBN = new DBN(train_N, n_ins, hidden_layer_sizes, n_outs, n_layers, rng)
+
+		// pretrain
+		dbn.pretrain(train_X, pretrain_lr, k, pretraining_epochs);
+		
+		// finetune
+		dbn.finetune(train_X, train_Y, finetune_lr, finetune_epochs);
+		
+		
+		// test data
+		val test_X: Array[Array[Int]] = Array(
+			Array(1, 1, 0, 0, 0, 0),
+			Array(1, 1, 1, 1, 0, 0),
+			Array(0, 0, 0, 1, 1, 0),
+			Array(0, 0, 1, 1, 1, 0)
+		)
+
+    val test_Y: Array[Array[Double]] = Array.ofDim[Double](test_N, n_outs)
+
+    var i: Int = 0
+    var j: Int = 0
+
+    // test
+    for(i <- 0 until test_N) {
+      dbn.predict(test_X(i), test_Y(i))
+      for(j <- 0 until n_outs) {
+        print(test_Y(i)(j) + " ")
+      }
+      println()
+    }
+    
+  }
+  
+
+  def main(args: Array[String]) {
+    test_dbn()
+  }
+}

From 58ef84bb4d21f4b95cd2719a2e5fa9e85512614c Mon Sep 17 00:00:00 2001
From: N011077 <n011077@N011077.local>
Date: Wed, 9 Oct 2013 08:57:14 +0900
Subject: [PATCH 06/45] readme

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 804e867..c73cd3e 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,4 @@
-# Deep Learning (Python, C/C++, Java)
+# Deep Learning (Python, C/C++, Java, Scala)
 
 ### Classes :
 

From 84fd62a7dcead5f0a2ac46a4487362098fd108db Mon Sep 17 00:00:00 2001
From: N011077 <me@yusugomori.com>
Date: Thu, 10 Oct 2013 13:33:15 +0900
Subject: [PATCH 07/45] minor chg

---
 README.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index c73cd3e..c5ff02d 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,5 @@
-# Deep Learning (Python, C/C++, Java, Scala)
+# Deep Learning 
+## (Python, C/C++, Java, Scala)
 
 ### Classes :
 

From 3f818eef4668ad158f174bdf900a4e5be5316140 Mon Sep 17 00:00:00 2001
From: N011077 <me@yusugomori.com>
Date: Thu, 10 Oct 2013 13:36:53 +0900
Subject: [PATCH 08/45] minor chg

---
 README.md | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/README.md b/README.md
index c5ff02d..6bc0a32 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,4 @@
-# Deep Learning 
-## (Python, C/C++, Java, Scala)
+## Deep Learning (Python, C/C++, Java, Scala)
 
 ### Classes :
 

From 38f3c9b43ff265889ba1890adca1b3da0fbf846a Mon Sep 17 00:00:00 2001
From: Yusuke Sugomori <me@yusugomori.com>
Date: Thu, 10 Oct 2013 13:37:53 +0900
Subject: [PATCH 09/45] minor chg

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 6bc0a32..735e5d0 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,4 @@
-## Deep Learning (Python, C/C++, Java, Scala)
+##  Deep Learning (Python, C/C++, Java, Scala)
 
 ### Classes :
 

From 78611962735f3a9bb41d6dcc04d9f9423557fd1f Mon Sep 17 00:00:00 2001
From: Yusuke Sugomori <me@yusugomori.com>
Date: Tue, 25 Mar 2014 21:04:01 +0900
Subject: [PATCH 10/45] minor bug fix

---
 java/LogisticRegression/src/LogisticRegression.java | 2 +-
 scala/LogisticRegression.scala                      | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/java/LogisticRegression/src/LogisticRegression.java b/java/LogisticRegression/src/LogisticRegression.java
index 8a13407..21e8f22 100644
--- a/java/LogisticRegression/src/LogisticRegression.java
+++ b/java/LogisticRegression/src/LogisticRegression.java
@@ -109,7 +109,7 @@ private static void test_lr() {
 			{0, 0, 1, 1, 1, 0}
 		};
 		
-		double[][] test_Y = new double[test_N][n_in];
+		double[][] test_Y = new double[test_N][n_out];
 		
 		
 		// test
diff --git a/scala/LogisticRegression.scala b/scala/LogisticRegression.scala
index 2386f5e..bf9b229 100644
--- a/scala/LogisticRegression.scala
+++ b/scala/LogisticRegression.scala
@@ -113,7 +113,7 @@ object LogisticRegression {
       Array(0, 0, 1, 1, 1, 0)
     )
 
-    val test_Y: Array[Array[Double]] = Array.ofDim[Double](test_N, n_in)
+    val test_Y: Array[Array[Double]] = Array.ofDim[Double](test_N, n_out)
 
     // test
     var j: Int = 0

From 15241eb8a1fa7ca3a487b83b458c4f4d77a80d0c Mon Sep 17 00:00:00 2001
From: Yusuke Sugomori <me@yusugomori.com>
Date: Sun, 7 Dec 2014 02:22:43 +0900
Subject: [PATCH 11/45] go

---
 c/LogisticRegression.c                        |   2 +-
 cpp/LogisticRegression.cpp                    |  20 +--
 go/LogisticRegression.go                      | 150 ++++++++++++++++++
 .../src/LogisticRegression.java               |   2 +-
 4 files changed, 153 insertions(+), 21 deletions(-)
 create mode 100644 go/LogisticRegression.go

diff --git a/c/LogisticRegression.c b/c/LogisticRegression.c
index 0c0c04a..b55c707 100644
--- a/c/LogisticRegression.c
+++ b/c/LogisticRegression.c
@@ -94,7 +94,7 @@ void test_lr(void) {
   int i, j, epoch;
 
   double learning_rate = 0.1;
-  double n_epochs = 500;
+  int n_epochs = 500;
 
   int train_N = 6;
   int test_N = 2;
diff --git a/cpp/LogisticRegression.cpp b/cpp/LogisticRegression.cpp
index 9eb8f24..6eca566 100644
--- a/cpp/LogisticRegression.cpp
+++ b/cpp/LogisticRegression.cpp
@@ -86,30 +86,12 @@ void test_lr() {
   srand(0);
   
   double learning_rate = 0.1;
-  double n_epochs = 500;
+  int n_epochs = 500;
 
   int train_N = 6;
   int test_N = 2;
   int n_in = 6;
   int n_out = 2;
-  // int **train_X;
-  // int **train_Y;
-  // int **test_X;
-  // double **test_Y;
-
-  // train_X = new int*[train_N];
-  // train_Y = new int*[train_N];
-  // for(i=0; i<train_N; i++){
-  //   train_X[i] = new int[n_in];
-  //   train_Y[i] = new int[n_out];
-  // };
-
-  // test_X = new int*[test_N];
-  // test_Y = new double*[test_N];
-  // for(i=0; i<test_N; i++){
-  //   test_X[i] = new int[n_in];
-  //   test_Y[i] = new double[n_out];
-  // }
 
 
   // training data
diff --git a/go/LogisticRegression.go b/go/LogisticRegression.go
new file mode 100644
index 0000000..00b8768
--- /dev/null
+++ b/go/LogisticRegression.go
@@ -0,0 +1,150 @@
+package main
+
+import (
+	"fmt"
+	"math"
+)
+
+type LogisticRegression struct {
+	N int
+	n_in int
+	n_out int
+	W [][]float64
+	b []float64
+}
+
+
+func LogisticRegression__construct(this *LogisticRegression, N int, n_in int, n_out int) {
+	this.N = N
+	this.n_in = n_in
+	this.n_out = n_out
+
+	this.W = make([][]float64, n_out)
+	for i := 0; i < n_out; i++ { this.W[i] = make([]float64, n_in) }
+	
+	this.b = make([]float64, n_out)
+}
+
+func LogisticRegression_train(this *LogisticRegression, x []int, y []int, lr float64) {
+	p_y_given_x := make([]float64, this.n_out)
+	dy := make([]float64, this.n_out)
+	
+	for i := 0; i < this.n_out; i++ {
+		p_y_given_x[i] = 0
+		for j := 0; j < this.n_in; j++ {
+			p_y_given_x[i] += this.W[i][j] * float64(x[j])
+		}
+		p_y_given_x[i] += this.b[i]
+	}
+	LogisticRegression_softmax(this, p_y_given_x)
+	
+	for i := 0; i < this.n_out; i++ {
+		dy[i] = float64(y[i]) - p_y_given_x[i]
+		
+		for j := 0; j < this.n_in; j++ {
+			this.W[i][j] += lr * dy[i] * float64(x[j]) / float64(this.N)
+		}
+
+		this.b[i] += lr * dy[i] / float64(this.N)
+	}
+	
+}
+
+func LogisticRegression_softmax(this *LogisticRegression, x []float64) {
+	var (
+		max float64
+		sum float64
+	)
+
+	for i := 0; i < this.n_out; i++ { if max < x[i] {max = x[i]} }
+	for i := 0; i < this.n_out; i++ {
+		x[i] = math.Exp(x[i] - max)
+		sum += x[i]
+	}
+
+	for i := 0; i < this.n_out; i++ { x[i] /= sum }
+}
+
+func LogisticRegression_predict(this *LogisticRegression, x []int, y []float64) {
+	for i := 0; i < this.n_out; i++ {
+		y[i] = 0
+		for j := 0; j < this.n_in; j++ {
+			y[i] += this.W[i][j] * float64(x[j])
+		}
+		y[i] += this.b[i]
+	}
+
+	LogisticRegression_softmax(this, y)
+}
+
+
+
+func test_lr() {
+	
+	learning_rate := 0.1
+	n_epochs := 500
+
+	train_N := 6
+	test_N := 2
+	n_in := 6
+	n_out := 2
+
+	
+	// training data
+	train_X := [][]int {
+    {1, 1, 1, 0, 0, 0},
+    {1, 0, 1, 0, 0, 0},
+    {1, 1, 1, 0, 0, 0},
+    {0, 0, 1, 1, 1, 0},
+    {0, 0, 1, 1, 0, 0},
+    {0, 0, 1, 1, 1, 0},
+	}
+
+
+	train_Y := [][]int {
+    {1, 0},
+    {1, 0},
+    {1, 0},
+    {0, 1},
+    {0, 1},
+    {0, 1},
+	}
+
+	
+	// construct LogisticRegression
+	var classifier LogisticRegression
+	LogisticRegression__construct(&classifier, train_N, n_in, n_out)
+
+	// train
+	for epoch := 0; epoch < n_epochs; epoch++ {
+		for i := 0; i < train_N; i++ {
+			LogisticRegression_train(&classifier, train_X[i], train_Y[i], learning_rate)
+		}
+	}
+	
+	// test data
+	test_X := [][]int {
+    {1, 0, 1, 0, 0, 0},
+    {0, 0, 1, 1, 1, 0},
+	}
+	
+	test_Y := make([][]float64, test_N)
+	for i := 0; i < test_N; i++ { test_Y[i] = make([]float64, n_out) }
+
+
+	// test
+	for i := 0; i < test_N; i++ {
+		LogisticRegression_predict(&classifier, test_X[i], test_Y[i])
+		for j := 0; j < n_out; j++ {
+			fmt.Printf("%f ", test_Y[i][j])
+		}
+		fmt.Printf("\n")
+	}
+	
+}
+
+
+func main() {
+	test_lr()
+}
+
diff --git a/java/LogisticRegression/src/LogisticRegression.java b/java/LogisticRegression/src/LogisticRegression.java
index 21e8f22..8356c95 100644
--- a/java/LogisticRegression/src/LogisticRegression.java
+++ b/java/LogisticRegression/src/LogisticRegression.java
@@ -67,7 +67,7 @@ public void predict(int[] x, double[] y) {
 	
 	private static void test_lr() {
 		double learning_rate = 0.1;
-		double n_epochs = 500;
+		int n_epochs = 500;
 		
 		int train_N = 6;
 		int test_N = 2;

From 0cd1a5db9a1bc66e77dad085f95262ff743304e7 Mon Sep 17 00:00:00 2001
From: Yusuke Sugomori <me@yusugomori.com>
Date: Sun, 7 Dec 2014 02:23:48 +0900
Subject: [PATCH 12/45] add go to readme

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 735e5d0..48c2f67 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,4 @@
-##  Deep Learning (Python, C/C++, Java, Scala)
+##  Deep Learning (Python, C/C++, Java, Scala, Go)
 
 ### Classes :
 

From a636ca8a2cd98a8a344b4f7ebeb3792a442e87a0 Mon Sep 17 00:00:00 2001
From: Yusuke Sugomori <me@yusugomori.com>
Date: Sun, 7 Dec 2014 02:34:12 +0900
Subject: [PATCH 13/45] untabify go

---
 go/LogisticRegression.go | 54 ++++++++++++++++++++--------------------
 1 file changed, 27 insertions(+), 27 deletions(-)

diff --git a/go/LogisticRegression.go b/go/LogisticRegression.go
index 00b8768..1103cff 100644
--- a/go/LogisticRegression.go
+++ b/go/LogisticRegression.go
@@ -113,38 +113,38 @@ func test_lr() {
 	
 	// construct LogisticRegression
 	var classifier LogisticRegression
-	LogisticRegression__construct(&classifier, train_N, n_in, n_out)
-
-	// train
-	for epoch := 0; epoch < n_epochs; epoch++ {
-		for i := 0; i < train_N; i++ {
-			LogisticRegression_train(&classifier, train_X[i], train_Y[i], learning_rate)
-		}
-	}
-	
-	// test data
-	test_X := [][]int {
+  LogisticRegression__construct(&classifier, train_N, n_in, n_out)
+
+  // train
+  for epoch := 0; epoch < n_epochs; epoch++ {
+    for i := 0; i < train_N; i++ {
+      LogisticRegression_train(&classifier, train_X[i], train_Y[i], learning_rate)
+    }
+  }
+  
+  // test data
+  test_X := [][]int {
     {1, 0, 1, 0, 0, 0},
     {0, 0, 1, 1, 1, 0},
-	}
-	
-	test_Y := make([][]float64, test_N)
-	for i := 0; i < test_N; i++ { test_Y[i] = make([]float64, n_out) }
-
-
-	// test
-	for i := 0; i < test_N; i++ {
-		LogisticRegression_predict(&classifier, test_X[i], test_Y[i])
-		for j := 0; j < n_out; j++ {
-			fmt.Printf("%f ", test_Y[i][j])
-		}
-		fmt.Printf("\n")
-	}
-	
+  }
+  
+  test_Y := make([][]float64, test_N)
+  for i := 0; i < test_N; i++ { test_Y[i] = make([]float64, n_out) }
+
+
+  // test
+  for i := 0; i < test_N; i++ {
+    LogisticRegression_predict(&classifier, test_X[i], test_Y[i])
+    for j := 0; j < n_out; j++ {
+      fmt.Printf("%f ", test_Y[i][j])
+    }
+    fmt.Printf("\n")
+  }
+  
 }
 
 
 func main() {
-	test_lr()
+  test_lr()
 }
 

From 9a09bc974637ef244bb743666e31795372b2fe70 Mon Sep 17 00:00:00 2001
From: Yusuke Sugomori <me@yusugomori.com>
Date: Sun, 7 Dec 2014 02:37:50 +0900
Subject: [PATCH 14/45] untabify go

---
 go/LogisticRegression.go | 152 +++++++++++++++++++--------------------
 1 file changed, 76 insertions(+), 76 deletions(-)

diff --git a/go/LogisticRegression.go b/go/LogisticRegression.go
index 1103cff..b1e9a65 100644
--- a/go/LogisticRegression.go
+++ b/go/LogisticRegression.go
@@ -1,118 +1,118 @@
 package main
 
 import (
-	"fmt"
-	"math"
+  "fmt"
+  "math"
 )
 
 type LogisticRegression struct {
-	N int
-	n_in int
-	n_out int
-	W [][]float64
-	b []float64
+  N int
+  n_in int
+  n_out int
+  W [][]float64
+  b []float64
 }
 
 
 func LogisticRegression__construct(this *LogisticRegression, N int, n_in int, n_out int) {
-	this.N = N
-	this.n_in = n_in
-	this.n_out = n_out
-
-	this.W = make([][]float64, n_out)
-	for i := 0; i < n_out; i++ { this.W[i] = make([]float64, n_in) }
-	
-	this.b = make([]float64, n_out)
+  this.N = N
+  this.n_in = n_in
+  this.n_out = n_out
+
+  this.W = make([][]float64, n_out)
+  for i := 0; i < n_out; i++ { this.W[i] = make([]float64, n_in) }
+  
+  this.b = make([]float64, n_out)
 }
 
 func LogisticRegression_train(this *LogisticRegression, x []int, y []int, lr float64) {
-	p_y_given_x := make([]float64, this.n_out)
-	dy := make([]float64, this.n_out)
-	
-	for i := 0; i < this.n_out; i++ {
-		p_y_given_x[i] = 0
-		for j := 0; j < this.n_in; j++ {
-			p_y_given_x[i] += this.W[i][j] * float64(x[j])
-		}
-		p_y_given_x[i] += this.b[i]
-	}
-	LogisticRegression_softmax(this, p_y_given_x)
-	
-	for i := 0; i < this.n_out; i++ {
-		dy[i] = float64(y[i]) - p_y_given_x[i]
-		
-		for j := 0; j < this.n_in; j++ {
-			this.W[i][j] += lr * dy[i] * float64(x[j]) / float64(this.N)
-		}
-
-		this.b[i] += lr * dy[i] / float64(this.N)
-	}
-	
+  p_y_given_x := make([]float64, this.n_out)
+  dy := make([]float64, this.n_out)
+  
+  for i := 0; i < this.n_out; i++ {
+    p_y_given_x[i] = 0
+    for j := 0; j < this.n_in; j++ {
+      p_y_given_x[i] += this.W[i][j] * float64(x[j])
+    }
+    p_y_given_x[i] += this.b[i]
+  }
+  LogisticRegression_softmax(this, p_y_given_x)
+  
+  for i := 0; i < this.n_out; i++ {
+    dy[i] = float64(y[i]) - p_y_given_x[i]
+    
+    for j := 0; j < this.n_in; j++ {
+      this.W[i][j] += lr * dy[i] * float64(x[j]) / float64(this.N)
+    }
+
+    this.b[i] += lr * dy[i] / float64(this.N)
+  }
+  
 }
 
 func LogisticRegression_softmax(this *LogisticRegression, x []float64) {
-	var (
-		max float64
-		sum float64
-	)
-
-	for i := 0; i < this.n_out; i++ { if max < x[i] {max = x[i]} }
-	for i := 0; i < this.n_out; i++ {
-		x[i] = math.Exp(x[i] - max)
-		sum += x[i]
-	}
-
-	for i := 0; i < this.n_out; i++ { x[i] /= sum }
+  var (
+    max float64
+    sum float64
+  )
+
+  for i := 0; i < this.n_out; i++ { if max < x[i] {max = x[i]} }
+  for i := 0; i < this.n_out; i++ {
+    x[i] = math.Exp(x[i] - max)
+    sum += x[i]
+  }
+
+  for i := 0; i < this.n_out; i++ { x[i] /= sum }
 }
 
 func LogisticRegression_predict(this *LogisticRegression, x []int, y []float64) {
-	for i := 0; i < this.n_out; i++ {
-		y[i] = 0
-		for j := 0; j < this.n_in; j++ {
-			y[i] += this.W[i][j] * float64(x[j])
-		}
-		y[i] += this.b[i]
-	}
-
-	LogisticRegression_softmax(this, y)
+  for i := 0; i < this.n_out; i++ {
+    y[i] = 0
+    for j := 0; j < this.n_in; j++ {
+      y[i] += this.W[i][j] * float64(x[j])
+    }
+    y[i] += this.b[i]
+  }
+
+  LogisticRegression_softmax(this, y)
 }
 
 
 
 func test_lr() {
-	
-	learning_rate := 0.1
-	n_epochs := 500
-
-	train_N := 6
-	test_N := 2
-	n_in := 6
-	n_out := 2
-
-	
-	// training data
-	train_X := [][]int {
+  
+  learning_rate := 0.1
+  n_epochs := 500
+
+  train_N := 6
+  test_N := 2
+  n_in := 6
+  n_out := 2
+
+  
+  // training data
+  train_X := [][]int {
     {1, 1, 1, 0, 0, 0},
     {1, 0, 1, 0, 0, 0},
     {1, 1, 1, 0, 0, 0},
     {0, 0, 1, 1, 1, 0},
     {0, 0, 1, 1, 0, 0},
     {0, 0, 1, 1, 1, 0},
-	}
+  }
 
 
-	train_Y := [][]int {
+  train_Y := [][]int {
     {1, 0},
     {1, 0},
     {1, 0},
     {0, 1},
     {0, 1},
     {0, 1},
-	}
+  }
 
-	
-	// construct LogisticRegression
-	var classifier LogisticRegression
+  
+  // construct LogisticRegression
+  var classifier LogisticRegression
   LogisticRegression__construct(&classifier, train_N, n_in, n_out)
 
   // train

From ee7e7b6c208fa14089e9cf66b090036baf69d474 Mon Sep 17 00:00:00 2001
From: Yusuke Sugomori <me@yusugomori.com>
Date: Sun, 7 Dec 2014 02:39:18 +0900
Subject: [PATCH 15/45] tabify go

---
 go/LogisticRegression.go | 240 +++++++++++++++++++--------------------
 1 file changed, 120 insertions(+), 120 deletions(-)

diff --git a/go/LogisticRegression.go b/go/LogisticRegression.go
index b1e9a65..cbc7e0e 100644
--- a/go/LogisticRegression.go
+++ b/go/LogisticRegression.go
@@ -1,150 +1,150 @@
 package main
 
 import (
-  "fmt"
-  "math"
+	"fmt"
+	"math"
 )
 
 type LogisticRegression struct {
-  N int
-  n_in int
-  n_out int
-  W [][]float64
-  b []float64
+	N int
+	n_in int
+	n_out int
+	W [][]float64
+	b []float64
 }
 
 
 func LogisticRegression__construct(this *LogisticRegression, N int, n_in int, n_out int) {
-  this.N = N
-  this.n_in = n_in
-  this.n_out = n_out
-
-  this.W = make([][]float64, n_out)
-  for i := 0; i < n_out; i++ { this.W[i] = make([]float64, n_in) }
-  
-  this.b = make([]float64, n_out)
+	this.N = N
+	this.n_in = n_in
+	this.n_out = n_out
+
+	this.W = make([][]float64, n_out)
+	for i := 0; i < n_out; i++ { this.W[i] = make([]float64, n_in) }
+	
+	this.b = make([]float64, n_out)
 }
 
 func LogisticRegression_train(this *LogisticRegression, x []int, y []int, lr float64) {
-  p_y_given_x := make([]float64, this.n_out)
-  dy := make([]float64, this.n_out)
-  
-  for i := 0; i < this.n_out; i++ {
-    p_y_given_x[i] = 0
-    for j := 0; j < this.n_in; j++ {
-      p_y_given_x[i] += this.W[i][j] * float64(x[j])
-    }
-    p_y_given_x[i] += this.b[i]
-  }
-  LogisticRegression_softmax(this, p_y_given_x)
-  
-  for i := 0; i < this.n_out; i++ {
-    dy[i] = float64(y[i]) - p_y_given_x[i]
-    
-    for j := 0; j < this.n_in; j++ {
-      this.W[i][j] += lr * dy[i] * float64(x[j]) / float64(this.N)
-    }
-
-    this.b[i] += lr * dy[i] / float64(this.N)
-  }
-  
+	p_y_given_x := make([]float64, this.n_out)
+	dy := make([]float64, this.n_out)
+	
+	for i := 0; i < this.n_out; i++ {
+		p_y_given_x[i] = 0
+		for j := 0; j < this.n_in; j++ {
+			p_y_given_x[i] += this.W[i][j] * float64(x[j])
+		}
+		p_y_given_x[i] += this.b[i]
+	}
+	LogisticRegression_softmax(this, p_y_given_x)
+	
+	for i := 0; i < this.n_out; i++ {
+		dy[i] = float64(y[i]) - p_y_given_x[i]
+		
+		for j := 0; j < this.n_in; j++ {
+			this.W[i][j] += lr * dy[i] * float64(x[j]) / float64(this.N)
+		}
+
+		this.b[i] += lr * dy[i] / float64(this.N)
+	}
+	
 }
 
 func LogisticRegression_softmax(this *LogisticRegression, x []float64) {
-  var (
-    max float64
-    sum float64
-  )
-
-  for i := 0; i < this.n_out; i++ { if max < x[i] {max = x[i]} }
-  for i := 0; i < this.n_out; i++ {
-    x[i] = math.Exp(x[i] - max)
-    sum += x[i]
-  }
-
-  for i := 0; i < this.n_out; i++ { x[i] /= sum }
+	var (
+		max float64
+		sum float64
+	)
+
+	for i := 0; i < this.n_out; i++ { if max < x[i] {max = x[i]} }
+	for i := 0; i < this.n_out; i++ {
+		x[i] = math.Exp(x[i] - max)
+		sum += x[i]
+	}
+
+	for i := 0; i < this.n_out; i++ { x[i] /= sum }
 }
 
 func LogisticRegression_predict(this *LogisticRegression, x []int, y []float64) {
-  for i := 0; i < this.n_out; i++ {
-    y[i] = 0
-    for j := 0; j < this.n_in; j++ {
-      y[i] += this.W[i][j] * float64(x[j])
-    }
-    y[i] += this.b[i]
-  }
-
-  LogisticRegression_softmax(this, y)
+	for i := 0; i < this.n_out; i++ {
+		y[i] = 0
+		for j := 0; j < this.n_in; j++ {
+			y[i] += this.W[i][j] * float64(x[j])
+		}
+		y[i] += this.b[i]
+	}
+
+	LogisticRegression_softmax(this, y)
 }
 
 
 
 func test_lr() {
-  
-  learning_rate := 0.1
-  n_epochs := 500
-
-  train_N := 6
-  test_N := 2
-  n_in := 6
-  n_out := 2
-
-  
-  // training data
-  train_X := [][]int {
-    {1, 1, 1, 0, 0, 0},
-    {1, 0, 1, 0, 0, 0},
-    {1, 1, 1, 0, 0, 0},
-    {0, 0, 1, 1, 1, 0},
-    {0, 0, 1, 1, 0, 0},
-    {0, 0, 1, 1, 1, 0},
-  }
-
-
-  train_Y := [][]int {
-    {1, 0},
-    {1, 0},
-    {1, 0},
-    {0, 1},
-    {0, 1},
-    {0, 1},
-  }
-
-  
-  // construct LogisticRegression
-  var classifier LogisticRegression
-  LogisticRegression__construct(&classifier, train_N, n_in, n_out)
-
-  // train
-  for epoch := 0; epoch < n_epochs; epoch++ {
-    for i := 0; i < train_N; i++ {
-      LogisticRegression_train(&classifier, train_X[i], train_Y[i], learning_rate)
-    }
-  }
-  
-  // test data
-  test_X := [][]int {
-    {1, 0, 1, 0, 0, 0},
-    {0, 0, 1, 1, 1, 0},
-  }
-  
-  test_Y := make([][]float64, test_N)
-  for i := 0; i < test_N; i++ { test_Y[i] = make([]float64, n_out) }
-
-
-  // test
-  for i := 0; i < test_N; i++ {
-    LogisticRegression_predict(&classifier, test_X[i], test_Y[i])
-    for j := 0; j < n_out; j++ {
-      fmt.Printf("%f ", test_Y[i][j])
-    }
-    fmt.Printf("\n")
-  }
-  
+	
+	learning_rate := 0.1
+	n_epochs := 500
+
+	train_N := 6
+	test_N := 2
+	n_in := 6
+	n_out := 2
+
+	
+	// training data
+	train_X := [][]int {
+		{1, 1, 1, 0, 0, 0},
+		{1, 0, 1, 0, 0, 0},
+		{1, 1, 1, 0, 0, 0},
+		{0, 0, 1, 1, 1, 0},
+		{0, 0, 1, 1, 0, 0},
+		{0, 0, 1, 1, 1, 0},
+	}
+
+
+	train_Y := [][]int {
+		{1, 0},
+		{1, 0},
+		{1, 0},
+		{0, 1},
+		{0, 1},
+		{0, 1},
+	}
+
+	
+	// construct LogisticRegression
+	var classifier LogisticRegression
+	LogisticRegression__construct(&classifier, train_N, n_in, n_out)
+
+	// train
+	for epoch := 0; epoch < n_epochs; epoch++ {
+		for i := 0; i < train_N; i++ {
+			LogisticRegression_train(&classifier, train_X[i], train_Y[i], learning_rate)
+		}
+	}
+	
+	// test data
+	test_X := [][]int {
+		{1, 0, 1, 0, 0, 0},
+		{0, 0, 1, 1, 1, 0},
+	}
+	
+	test_Y := make([][]float64, test_N)
+	for i := 0; i < test_N; i++ { test_Y[i] = make([]float64, n_out) }
+
+
+	// test
+	for i := 0; i < test_N; i++ {
+		LogisticRegression_predict(&classifier, test_X[i], test_Y[i])
+		for j := 0; j < n_out; j++ {
+			fmt.Printf("%f ", test_Y[i][j])
+		}
+		fmt.Printf("\n")
+	}
+	
 }
 
 
 func main() {
-  test_lr()
+	test_lr()
 }
 

From d2774d6f80a610c37a4b458dd13c8c7589fed25e Mon Sep 17 00:00:00 2001
From: Yusuke Sugomori <me@yusugomori.com>
Date: Sun, 7 Dec 2014 12:32:12 +0900
Subject: [PATCH 16/45] RBM.go

---
 go/RBM.go         | 200 ++++++++++++++++++++++++++++++++++++++++++++++
 go/utils/utils.go |  28 +++++++
 2 files changed, 228 insertions(+)
 create mode 100644 go/RBM.go
 create mode 100644 go/utils/utils.go

diff --git a/go/RBM.go b/go/RBM.go
new file mode 100644
index 0000000..6369da5
--- /dev/null
+++ b/go/RBM.go
@@ -0,0 +1,200 @@
+package main
+
+import (
+	"fmt"
+	"math/rand"
+	u "./utils"
+)
+
+type RBM struct {
+	N int
+	n_visible int
+	n_hidden int
+	W [][]float64
+	hbias []float64
+	vbias []float64
+}
+
+
+func RBM__construct(this *RBM, N int, n_visible int, n_hidden int, W [][]float64, hbias []float64, vbias []float64) {
+	a := 1.0 / float64(n_visible)
+
+	this.N = N
+	this.n_visible = n_visible
+	this.n_hidden = n_hidden
+
+	if W == nil {
+		this.W = make([][]float64, n_hidden)
+		for i := 0; i < n_hidden; i++ { this.W[i] = make([]float64, n_visible) }
+
+		for i := 0; i < n_hidden; i++ {
+			for j := 0; j < n_visible; j++ {
+				this.W[i][j] = u.Uniform(-a, a)
+			}
+		}
+	} else {
+		this.W = W
+	}
+
+	if hbias == nil {
+		this.hbias = make([]float64, n_hidden)
+	} else {
+		this.hbias = hbias
+	}
+
+	if vbias == nil {
+		this.vbias = make([]float64, n_visible)
+	} else {
+		this.vbias = vbias
+	}
+}
+
+func RBM_contrastive_divergence(this *RBM, input []int, lr float64, k int) {
+	ph_mean := make([]float64, this.n_hidden)
+	ph_sample := make([]int, this.n_hidden)
+	nv_means := make([]float64, this.n_visible)
+	nv_samples := make([]int, this.n_visible)
+	nh_means := make([]float64, this.n_hidden)
+	nh_samples := make([]int, this.n_hidden)
+
+	/* CD-k */
+	RBM_sample_h_given_v(this, input, ph_mean, ph_sample)
+
+	for step := 0; step < k; step++ {
+		if step == 0 {
+			RBM_gibbs_hvh(this, ph_sample, nv_means, nv_samples, nh_means, nh_samples)
+		} else {
+			RBM_gibbs_hvh(this, nh_samples, nv_means, nv_samples, nh_means, nh_samples)
+		}
+	}
+
+	for i := 0; i < this.n_hidden; i++ {
+		for j := 0; j < this.n_visible; j++ {
+			this.W[i][j] += lr * (ph_mean[i] * float64(input[j]) - nh_means[i] * float64(nv_samples[j])) / float64(this.N)
+		}
+		this.hbias[i] += lr * (float64(ph_sample[i]) - nh_means[i]) / float64(this.N)
+	}
+
+	for i := 0; i < this.n_visible; i++ {
+		this.vbias[i] += lr * float64(input[i] - nv_samples[i]) / float64(this.N)
+	}
+}
+
+func RBM_sample_h_given_v(this *RBM, v0_sample []int, mean []float64, sample []int) {
+	for i := 0; i < this.n_hidden; i++ {
+		mean[i] = RBM_propup(this, v0_sample, this.W[i], this.hbias[i])
+		sample[i] = u.Binomial(1, mean[i])
+	}
+}
+
+func RBM_sample_v_given_h(this *RBM, h0_sample []int, mean []float64, sample []int) {
+	for i := 0; i < this.n_visible; i++ {
+		mean[i] = RBM_propdown(this, h0_sample, i, this.vbias[i])
+		sample[i] = u.Binomial(1, mean[i])
+	}
+}
+
+func RBM_propup(this *RBM, v []int, w []float64, b float64) float64 {
+	pre_sigmoid_activation := 0.0
+	
+	for j := 0; j < this.n_visible; j++ {
+		pre_sigmoid_activation += w[j] * float64(v[j])
+	}
+	pre_sigmoid_activation += b
+	
+	return u.Sigmoid(pre_sigmoid_activation)
+}
+
+func RBM_propdown(this *RBM,	h []int, i int, b float64) float64 {
+	pre_sigmoid_activation := 0.0
+	
+	for j := 0; j < this.n_hidden; j++ {
+		pre_sigmoid_activation += this.W[j][i] * float64(h[j])
+	}
+	pre_sigmoid_activation += b
+
+	return u.Sigmoid(pre_sigmoid_activation)
+}
+
+func RBM_gibbs_hvh(this *RBM, h0_sample []int, nv_means []float64, nv_samples []int, nh_means []float64, nh_samples []int) {
+	RBM_sample_v_given_h(this, h0_sample, nv_means, nv_samples)
+	RBM_sample_h_given_v(this, nv_samples, nh_means, nh_samples)
+}
+
+func RBM_reconstruct(this *RBM, v []int, reconstructed_v []float64) {
+	h := make([]float64, this.n_hidden)
+	var pre_sigmoid_activation float64
+
+	for i := 0; i < this.n_hidden; i++ {
+		h[i] = RBM_propup(this, v, this.W[i], this.hbias[i])
+	}
+
+	for i := 0; i < this.n_visible; i++ {
+		pre_sigmoid_activation = 0.0
+		for j := 0; j < this.n_hidden; j++ {
+			pre_sigmoid_activation += this.W[j][i] * h[j]
+		}
+		pre_sigmoid_activation += this.vbias[i]
+
+		reconstructed_v[i] = u.Sigmoid(pre_sigmoid_activation)
+	}
+}
+
+
+func test_rbm() {
+	rand.Seed(0)
+
+	learning_rate := 0.1
+	training_epochs := 1000
+	k := 1
+	
+	train_N := 6
+	test_N := 2
+	n_visible := 6
+	n_hidden := 3
+
+	// training data
+	train_X := [][]int {
+		{1, 1, 1, 0, 0, 0},
+		{1, 0, 1, 0, 0, 0},
+		{1, 1, 1, 0, 0, 0},
+		{0, 0, 1, 1, 1, 0},
+		{0, 0, 1, 0, 1, 0},
+		{0, 0, 1, 1, 1, 0},
+	}
+	
+
+	// construct RBM
+	var rbm RBM
+	RBM__construct(&rbm, train_N, n_visible, n_hidden, nil, nil, nil)
+
+	// train
+	for epoch := 0; epoch < training_epochs; epoch++ {
+		for i := 0; i < train_N; i++ {
+			RBM_contrastive_divergence(&rbm, train_X[i], learning_rate, k)
+		}
+	}
+
+	// test data
+	test_X := [][]int {
+		{1, 1, 0, 0, 0, 0},
+		{0, 0, 0, 1, 1, 0},
+	}
+	reconstructed_X := make([][]float64, test_N)
+	for i := 0; i < test_N; i++ { reconstructed_X[i] = make([]float64, n_visible)}
+
+
+	// test
+	for i := 0;  i < test_N; i++ {
+		RBM_reconstruct(&rbm, test_X[i], reconstructed_X[i])
+		for j := 0; j < n_visible; j++ {
+			fmt.Printf("%.5f ", reconstructed_X[i][j])
+		}
+		fmt.Printf("\n")
+	}
+}
+
+
+func main() {
+	test_rbm()
+}
diff --git a/go/utils/utils.go b/go/utils/utils.go
new file mode 100644
index 0000000..44b3af2
--- /dev/null
+++ b/go/utils/utils.go
@@ -0,0 +1,28 @@
+package utils
+
+import (
+	"math"
+	"math/rand"
+)
+
+func Uniform(min float64, max float64) float64 {
+	return rand.Float64() * (max - min) + min
+}
+
+func Binomial(n int, p float64) int {
+	if p < 0 || p > 1 { return 0 }
+
+	c := 0
+	var r float64
+	
+	for i := 0; i < n; i++ {
+		r = rand.Float64()		
+		if r < p { c++ }
+	}
+
+	return c
+}
+
+func Sigmoid(x float64) float64 {
+	return 1.0 / (1.0 + math.Exp(-x))
+}

From 76a32007bfe02b607f1ecf278b9f0c6623f2b834 Mon Sep 17 00:00:00 2001
From: Yusuke Sugomori <me@yusugomori.com>
Date: Sun, 7 Dec 2014 15:21:19 +0900
Subject: [PATCH 17/45] DBN.go

---
 go/DBN.go                                   | 237 ++++++++++++++++++++
 go/HiddenLayer/HiddenLayer.go               |  60 +++++
 go/LogisticRegression/LogisticRegression.go |  77 +++++++
 go/RBM/RBM.go                               | 139 ++++++++++++
 4 files changed, 513 insertions(+)
 create mode 100644 go/DBN.go
 create mode 100644 go/HiddenLayer/HiddenLayer.go
 create mode 100644 go/LogisticRegression/LogisticRegression.go
 create mode 100644 go/RBM/RBM.go

diff --git a/go/DBN.go b/go/DBN.go
new file mode 100644
index 0000000..498b473
--- /dev/null
+++ b/go/DBN.go
@@ -0,0 +1,237 @@
+package main
+
+import (
+	"fmt"
+	"math/rand"
+	u "./utils"
+	H "./HiddenLayer"
+	R "./RBM"
+	L "./LogisticRegression"
+)
+
+type DBN struct {
+	N int
+	n_ins int
+	hidden_layer_sizes []int
+	n_outs int
+	n_layers int
+	sigmoid_layers []H.HiddenLayer
+	rbm_layers []R.RBM
+	log_layer L.LogisticRegression
+}
+
+
+func DBN__construct(this *DBN, N int, n_ins int, hidden_layer_sizes []int, n_outs int, n_layers int) {
+	var input_size int
+	
+	this.N = N
+	this.n_ins = n_ins
+	this.hidden_layer_sizes = hidden_layer_sizes
+	this.n_outs = n_outs
+	this.n_layers = n_layers
+
+	this.sigmoid_layers = make([]H.HiddenLayer, n_layers)
+	this.rbm_layers = make([]R.RBM, n_layers)
+	
+	// construct multi-layer
+	for i := 0; i < n_layers; i++ {
+		if i == 0 {
+			input_size = n_ins
+		} else {
+			input_size = hidden_layer_sizes[i-1]
+		}
+
+		// construct sigmoid_layer
+		H.HiddenLayer__construct(&(this.sigmoid_layers[i]), N, input_size, hidden_layer_sizes[i], nil, nil)
+
+		// construct rbm_layer
+		R.RBM__construct(&(this.rbm_layers[i]), N, input_size, hidden_layer_sizes[i], this.sigmoid_layers[i].W, this.sigmoid_layers[i].B, nil)
+	}
+
+	// layer for output using LogisticRegression
+	L.LogisticRegression__construct(&(this.log_layer), N, hidden_layer_sizes[n_layers-1], n_outs)
+}
+
+func DBN_pretrain(this *DBN, train_X [][]int, lr float64, k int, epochs int){
+	var (
+		layer_input []int
+		prev_layer_input_size int
+		prev_layer_input []int
+	)
+
+
+	for i := 0; i < this.n_layers; i++ {	// layer-wise
+		for epoch := 0; epoch < epochs; epoch++ {	 // training epochs
+			for n := 0; n < this.N; n++ {	 // input x1...xN
+
+				// layer input
+				for l := 0; l <= i; l++ {
+					if l == 0 {
+						layer_input = make([]int, this.n_ins)
+						for j := 0; j < this.n_ins; j++ { layer_input[j] = train_X[n][j] }
+					} else {
+						if l == 1 {
+							prev_layer_input_size = this.n_ins
+						} else {
+							prev_layer_input_size = this.hidden_layer_sizes[l-2]
+						}
+
+						prev_layer_input = make([]int, prev_layer_input_size)
+						for j := 0; j < prev_layer_input_size; j++ { prev_layer_input[j] = layer_input[j] }
+
+						layer_input = make([]int, this.hidden_layer_sizes[l-1])
+
+						H.HiddenLayer_sample_h_given_v(&(this.sigmoid_layers[l-1]), prev_layer_input, layer_input)
+					}
+				}
+
+				R.RBM_contrastive_divergence(&(this.rbm_layers[i]), layer_input, lr, k)
+			}
+		}
+	}
+}
+
+func DBN_finetune(this *DBN, train_X [][]int, train_Y [][]int, lr float64, epochs int) {
+	var (
+		layer_input []int
+		prev_layer_input []int
+	)
+
+	for epoch := 0; epoch < epochs; epoch++ {
+		for n := 0; n < this.N; n++ {	 // input x1...xN
+
+			// layer input
+			for i := 0; i < this.n_layers; i++ {
+				if i == 0 {
+					prev_layer_input = make([]int, this.n_ins)
+					for j := 0; j < this.n_ins; j++ { prev_layer_input[j] = train_X[n][j] }
+				} else {
+					prev_layer_input = make([]int, this.hidden_layer_sizes[i-1])
+					for j:= 0; j < this.hidden_layer_sizes[i-1]; j++ { prev_layer_input[j] = layer_input[j] }
+				}
+
+				layer_input = make([]int, this.hidden_layer_sizes[i])
+				H.HiddenLayer_sample_h_given_v(&(this.sigmoid_layers[i]), prev_layer_input, layer_input)
+			}
+
+			L.LogisticRegression_train(&(this.log_layer), layer_input, train_Y[n], lr)
+		}
+		// lr *= 0.95
+	}
+}
+
+func DBN_predict(this *DBN, x []int, y []float64) {
+	var (
+		layer_input []float64
+	)	
+	prev_layer_input := make([]float64, this.n_ins)
+	for j := 0; j < this.n_ins; j++ { prev_layer_input[j] = float64(x[j]) }
+
+	
+	// layer activation
+	for i := 0; i < this.n_layers; i++ {
+		layer_input = make([]float64, this.sigmoid_layers[i].N_out)
+
+		for k := 0; k < this.sigmoid_layers[i].N_out; k++ {
+			linear_outuput := 0.0
+
+			for j := 0; j < this.sigmoid_layers[i].N_in; j++ {
+				linear_outuput += this.sigmoid_layers[i].W[k][j] * prev_layer_input[j]
+			}
+			linear_outuput += this.sigmoid_layers[i].B[k]
+			layer_input[k] = u.Sigmoid(linear_outuput)
+		}
+
+		if i < this.n_layers-1 {
+			prev_layer_input = make([]float64, this.sigmoid_layers[i].N_out)
+
+			for j := 0; j < this.sigmoid_layers[i].N_out; j++ {
+				prev_layer_input[j] = layer_input[j]
+			}
+		}
+	}
+
+	for i := 0; i < this.log_layer.N_out; i++ {
+		y[i] = 0
+		for j := 0; j < this.log_layer.N_in; j++ {
+			y[i] += this.log_layer.W[i][j] * layer_input[j]
+		}
+		y[i] += this.log_layer.B[i]
+	}
+
+	L.LogisticRegression_softmax(&(this.log_layer), y)
+}
+
+
+func test_dbn() {
+	rand.Seed(0)
+
+	pretrain_lr := 0.1
+	pretraining_epochs := 1000
+	k := 1
+	fintune_lr := 0.1
+	fintune_epochs := 500
+
+	train_N := 6
+	test_N := 4
+	n_ins := 6
+	n_outs := 2
+	hidden_layer_sizes := []int {3, 3}
+	n_layers := len(hidden_layer_sizes)
+
+
+	// training data
+	train_X := [][]int {
+		{1, 1, 1, 0, 0, 0},
+		{1, 0, 1, 0, 0, 0},
+		{1, 1, 1, 0, 0, 0},
+		{0, 0, 1, 1, 1, 0},
+		{0, 0, 1, 1, 0, 0},
+		{0, 0, 1, 1, 1, 0},
+	}
+
+	train_Y := [][]int {
+		{1, 0},
+		{1, 0},
+		{1, 0},
+		{0, 1},
+		{0, 1},
+		{0, 1},
+	}
+
+	// construct DBN
+	var dbn DBN
+	DBN__construct(&dbn, train_N, n_ins, hidden_layer_sizes, n_outs, n_layers)
+
+	// pretrain
+	DBN_pretrain(&dbn, train_X, pretrain_lr, k, pretraining_epochs)
+
+	// finetune
+	DBN_finetune(&dbn, train_X, train_Y, fintune_lr, fintune_epochs)
+
+	// test data
+	test_X := [][]int {
+		{1, 1, 0, 0, 0, 0},
+		{1, 1, 1, 1, 0, 0},
+		{0, 0, 0, 1, 1, 0},
+		{0, 0, 1, 1, 1, 0},
+	}
+
+	test_Y := make([][]float64, test_N)
+	for i := 0; i < test_N; i++ { test_Y[i] = make([]float64, n_outs)}
+
+	// test
+	for i := 0; i < test_N; i++ {
+		DBN_predict(&dbn, test_X[i], test_Y[i])
+		for j := 0; j < n_outs; j++ {
+			fmt.Printf("%.5f ", test_Y[i][j])
+		}
+		fmt.Printf("\n")
+	}
+}
+
+
+
+func main() {
+	test_dbn()
+}
diff --git a/go/HiddenLayer/HiddenLayer.go b/go/HiddenLayer/HiddenLayer.go
new file mode 100644
index 0000000..995ca44
--- /dev/null
+++ b/go/HiddenLayer/HiddenLayer.go
@@ -0,0 +1,60 @@
+package HiddenLayer
+
+import (
+	u "../utils"
+)
+
+
+type HiddenLayer struct {
+	N int
+	N_in int
+	N_out int
+	W [][]float64
+	B []float64
+}
+
+
+// HiddenLayer
+func HiddenLayer__construct(this *HiddenLayer, N int, n_in int, n_out int, W [][]float64, b []float64) {
+	a := 1.0 / float64(n_in)
+
+	this.N = N
+	this.N_in = n_in
+	this.N_out = n_out
+
+	if W == nil {
+		this.W = make([][]float64, n_out)
+		for i := 0; i < n_out; i++ { this.W[i] = make([]float64, n_in) }
+		
+		for i := 0; i < n_out; i++ {
+			for j := 0; j < n_in; j++ {
+				this.W[i][j] = u.Uniform(-a, a)
+			}
+		}
+	} else {
+		this.W = W
+	}
+
+	if b == nil {
+		this.B = make([]float64, n_out)
+	} else {
+		this.B = b
+	}
+}
+
+func HiddenLayer_output(this *HiddenLayer, input []int, w []float64, b float64) float64 {
+	linear_output := 0.0
+
+	for j := 0; j < this.N_in; j++ {
+		linear_output += w[j] * float64(input[j])
+	}
+	linear_output += b
+
+	return u.Sigmoid(linear_output)
+}
+
+func HiddenLayer_sample_h_given_v(this *HiddenLayer, input []int, sample []int) {
+	for i := 0; i < this.N_out; i++ {
+		sample[i] = u.Binomial(1, HiddenLayer_output(this, input, this.W[i], this.B[i]))
+	}
+}
diff --git a/go/LogisticRegression/LogisticRegression.go b/go/LogisticRegression/LogisticRegression.go
new file mode 100644
index 0000000..2f68ef5
--- /dev/null
+++ b/go/LogisticRegression/LogisticRegression.go
@@ -0,0 +1,77 @@
+package LogisticRegression
+
+import (
+	"math"
+)
+
+type LogisticRegression struct {
+	N int
+	N_in int
+	N_out int
+	W [][]float64
+	B []float64
+}
+
+
+func LogisticRegression__construct(this *LogisticRegression, N int, n_in int, n_out int) {
+	this.N = N
+	this.N_in = n_in
+	this.N_out = n_out
+
+	this.W = make([][]float64, n_out)
+	for i := 0; i < n_out; i++ { this.W[i] = make([]float64, n_in) }
+	
+	this.B = make([]float64, n_out)
+}
+
+func LogisticRegression_train(this *LogisticRegression, x []int, y []int, lr float64) {
+	p_y_given_x := make([]float64, this.N_out)
+	dy := make([]float64, this.N_out)
+	
+	for i := 0; i < this.N_out; i++ {
+		p_y_given_x[i] = 0
+		for j := 0; j < this.N_in; j++ {
+			p_y_given_x[i] += this.W[i][j] * float64(x[j])
+		}
+		p_y_given_x[i] += this.B[i]
+	}
+	LogisticRegression_softmax(this, p_y_given_x)
+	
+	for i := 0; i < this.N_out; i++ {
+		dy[i] = float64(y[i]) - p_y_given_x[i]
+		
+		for j := 0; j < this.N_in; j++ {
+			this.W[i][j] += lr * dy[i] * float64(x[j]) / float64(this.N)
+		}
+
+		this.B[i] += lr * dy[i] / float64(this.N)
+	}
+	
+}
+
+func LogisticRegression_softmax(this *LogisticRegression, x []float64) {
+	var (
+		max float64
+		sum float64
+	)
+
+	for i := 0; i < this.N_out; i++ { if max < x[i] {max = x[i]} }
+	for i := 0; i < this.N_out; i++ {
+		x[i] = math.Exp(x[i] - max)
+		sum += x[i]
+	}
+
+	for i := 0; i < this.N_out; i++ { x[i] /= sum }
+}
+
+func LogisticRegression_predict(this *LogisticRegression, x []int, y []float64) {
+	for i := 0; i < this.N_out; i++ {
+		y[i] = 0
+		for j := 0; j < this.N_in; j++ {
+			y[i] += this.W[i][j] * float64(x[j])
+		}
+		y[i] += this.B[i]
+	}
+
+	LogisticRegression_softmax(this, y)
+}
diff --git a/go/RBM/RBM.go b/go/RBM/RBM.go
new file mode 100644
index 0000000..708f8b7
--- /dev/null
+++ b/go/RBM/RBM.go
@@ -0,0 +1,139 @@
+package RBM
+
+import (
+	u "../utils"
+)
+
+type RBM struct {
+	N int
+	n_visible int
+	n_hidden int
+	W [][]float64
+	hbias []float64
+	vbias []float64
+}
+
+
+func RBM__construct(this *RBM, N int, n_visible int, n_hidden int, W [][]float64, hbias []float64, vbias []float64) {
+	a := 1.0 / float64(n_visible)
+
+	this.N = N
+	this.n_visible = n_visible
+	this.n_hidden = n_hidden
+
+	if W == nil {
+		this.W = make([][]float64, n_hidden)
+		for i := 0; i < n_hidden; i++ { this.W[i] = make([]float64, n_visible) }
+
+		for i := 0; i < n_hidden; i++ {
+			for j := 0; j < n_visible; j++ {
+				this.W[i][j] = u.Uniform(-a, a)
+			}
+		}
+	} else {
+		this.W = W
+	}
+
+	if hbias == nil {
+		this.hbias = make([]float64, n_hidden)
+	} else {
+		this.hbias = hbias
+	}
+
+	if vbias == nil {
+		this.vbias = make([]float64, n_visible)
+	} else {
+		this.vbias = vbias
+	}
+}
+
+func RBM_contrastive_divergence(this *RBM, input []int, lr float64, k int) {
+	ph_mean := make([]float64, this.n_hidden)
+	ph_sample := make([]int, this.n_hidden)
+	nv_means := make([]float64, this.n_visible)
+	nv_samples := make([]int, this.n_visible)
+	nh_means := make([]float64, this.n_hidden)
+	nh_samples := make([]int, this.n_hidden)
+
+	/* CD-k */
+	RBM_sample_h_given_v(this, input, ph_mean, ph_sample)
+
+	for step := 0; step < k; step++ {
+		if step == 0 {
+			RBM_gibbs_hvh(this, ph_sample, nv_means, nv_samples, nh_means, nh_samples)
+		} else {
+			RBM_gibbs_hvh(this, nh_samples, nv_means, nv_samples, nh_means, nh_samples)
+		}
+	}
+
+	for i := 0; i < this.n_hidden; i++ {
+		for j := 0; j < this.n_visible; j++ {
+			this.W[i][j] += lr * (ph_mean[i] * float64(input[j]) - nh_means[i] * float64(nv_samples[j])) / float64(this.N)
+		}
+		this.hbias[i] += lr * (float64(ph_sample[i]) - nh_means[i]) / float64(this.N)
+	}
+
+	for i := 0; i < this.n_visible; i++ {
+		this.vbias[i] += lr * float64(input[i] - nv_samples[i]) / float64(this.N)
+	}
+}
+
+func RBM_sample_h_given_v(this *RBM, v0_sample []int, mean []float64, sample []int) {
+	for i := 0; i < this.n_hidden; i++ {
+		mean[i] = RBM_propup(this, v0_sample, this.W[i], this.hbias[i])
+		sample[i] = u.Binomial(1, mean[i])
+	}
+}
+
+func RBM_sample_v_given_h(this *RBM, h0_sample []int, mean []float64, sample []int) {
+	for i := 0; i < this.n_visible; i++ {
+		mean[i] = RBM_propdown(this, h0_sample, i, this.vbias[i])
+		sample[i] = u.Binomial(1, mean[i])
+	}
+}
+
+func RBM_propup(this *RBM, v []int, w []float64, b float64) float64 {
+	pre_sigmoid_activation := 0.0
+	
+	for j := 0; j < this.n_visible; j++ {
+		pre_sigmoid_activation += w[j] * float64(v[j])
+	}
+	pre_sigmoid_activation += b
+	
+	return u.Sigmoid(pre_sigmoid_activation)
+}
+
+func RBM_propdown(this *RBM,	h []int, i int, b float64) float64 {
+	pre_sigmoid_activation := 0.0
+	
+	for j := 0; j < this.n_hidden; j++ {
+		pre_sigmoid_activation += this.W[j][i] * float64(h[j])
+	}
+	pre_sigmoid_activation += b
+
+	return u.Sigmoid(pre_sigmoid_activation)
+}
+
+func RBM_gibbs_hvh(this *RBM, h0_sample []int, nv_means []float64, nv_samples []int, nh_means []float64, nh_samples []int) {
+	RBM_sample_v_given_h(this, h0_sample, nv_means, nv_samples)
+	RBM_sample_h_given_v(this, nv_samples, nh_means, nh_samples)
+}
+
+func RBM_reconstruct(this *RBM, v []int, reconstructed_v []float64) {
+	h := make([]float64, this.n_hidden)
+	var pre_sigmoid_activation float64
+
+	for i := 0; i < this.n_hidden; i++ {
+		h[i] = RBM_propup(this, v, this.W[i], this.hbias[i])
+	}
+
+	for i := 0; i < this.n_visible; i++ {
+		pre_sigmoid_activation = 0.0
+		for j := 0; j < this.n_hidden; j++ {
+			pre_sigmoid_activation += this.W[j][i] * h[j]
+		}
+		pre_sigmoid_activation += this.vbias[i]
+
+		reconstructed_v[i] = u.Sigmoid(pre_sigmoid_activation)
+	}
+}

From daded50a016a085dcfe8f7c43b881c344ac4a342 Mon Sep 17 00:00:00 2001
From: Yusuke Sugomori <me@yusugomori.com>
Date: Sat, 13 Dec 2014 21:43:14 +0900
Subject: [PATCH 18/45] dA.go

---
 go/dA.go | 195 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 195 insertions(+)
 create mode 100644 go/dA.go

diff --git a/go/dA.go b/go/dA.go
new file mode 100644
index 0000000..3b14c6c
--- /dev/null
+++ b/go/dA.go
@@ -0,0 +1,195 @@
+package main
+
+import (
+	"os"
+	"fmt"
+	"math/rand"
+	u "./utils"
+)
+
+type dA struct {
+	N int
+	n_visible int
+	n_hidden int
+	W [][]float64
+	hbias []float64
+	vbias []float64
+}
+
+
+func dA__construct(this *dA, N int, n_visible int, n_hidden int, W [][]float64, hbias []float64, vbias []float64) {
+	a := 1.0 / float64(n_visible)
+	
+	this.N = N
+	this.n_visible = n_visible
+	this.n_hidden = n_hidden
+
+	if W == nil {
+		this.W = make([][]float64, n_hidden)
+		for i := 0; i < n_hidden; i++ { this.W[i] = make([]float64, n_visible) }
+
+		for i := 0; i < n_hidden; i++ {
+			for j := 0; j < n_visible; j++ {
+				this.W[i][j] = u.Uniform(-a, a)
+			}
+		}
+	} else {
+		this.W = W
+	}
+
+	if hbias == nil {
+		this.hbias = make([]float64, n_hidden)
+	} else {
+		this.hbias = hbias
+	}
+
+	if vbias == nil {
+		this.vbias = make([]float64, n_visible)
+	} else {
+		this.vbias = vbias
+	}
+}
+
+func dA_get_corrupted_input(this *dA, x []int, tilde_x []int, p float64) {
+	for i := 0; i < this.n_visible; i++ {
+		if x[i] == 0 {
+			tilde_x[i] = 0
+		} else {
+			tilde_x[i] = u.Binomial(1, p)
+		}
+	}
+}
+
+// Encode
+func dA_get_hidden_values(this *dA, x []int, y []float64) {
+	for i := 0; i < this.n_hidden; i++ {
+		y[i] = 0
+		for j := 0; j < this.n_visible; j++ {
+			y[i] += this.W[i][j] * float64(x[j])
+		}
+		y[i] += this.hbias[i]
+		y[i] = u.Sigmoid(y[i])
+	}
+}
+
+// Decode
+func dA_get_reconstructed_input(this *dA, y []float64, z []float64) {
+	for i := 0; i < this.n_visible; i++ {
+		z[i] = 0
+		for j := 0; j < this.n_hidden; j++ {
+			z[i] += this.W[j][i] * y[j]
+		}
+		z[i] += this.vbias[i]
+		z[i] = u.Sigmoid(z[i])
+	}
+}
+
+func dA_train(this *dA, x []int, lr float64, corruption_level float64) {
+	tilde_x := make([]int, this.n_visible)
+	y := make([]float64, this.n_hidden)
+	z := make([]float64, this.n_visible)
+
+	L_vbias := make([]float64, this.n_visible)
+	L_hbias := make([]float64, this.n_hidden)
+
+	p := 1 - corruption_level
+
+	dA_get_corrupted_input(this, x, tilde_x, p)
+	dA_get_hidden_values(this, tilde_x, y)
+	dA_get_reconstructed_input(this, y, z)
+
+	// vbias
+	for i := 0; i < this.n_visible; i++ {
+		L_vbias[i] = float64(x[i]) - z[i]
+		this.vbias[i] += lr * L_vbias[i] / float64(this.N)
+	}
+
+	// hbias
+	for i := 0; i < this.n_hidden; i++ {
+		L_hbias[i] = 0
+		for j := 0; j < this.n_visible; j++ {
+			L_hbias[i] += this.W[i][j] * L_vbias[j]
+		}
+		L_hbias[i] *= y[i] * (1- y[i])
+		this.hbias[i] += lr * L_hbias[i] / float64(this.N)
+	}
+
+	// W
+	for i := 0; i < this.n_hidden; i++ {
+		for j := 0; j < this.n_visible; j++ {
+			this.W[i][j] += lr * (L_hbias[i] * float64(tilde_x[j]) + L_vbias[j] * y[i]) / float64(this.N)
+		}
+	}
+}
+
+func dA_reconstruct(this *dA, x []int, z []float64) {
+	y := make([]float64, this.n_hidden)
+
+	dA_get_hidden_values(this, x, y)
+	dA_get_reconstructed_input(this, y, z)
+}
+
+
+
+
+func test_dA() {
+	rand.Seed(0)
+
+	learning_rate := 0.1
+	corruption_level := 0.3
+	training_epochs := 1000
+
+	train_N := 6
+	test_N := 2
+	n_visible := 20
+	n_hidden := 5
+
+	// training data
+	train_X := [][]int {
+		{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+		{1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+		{1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+		{1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+		{0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+		{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
+		{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1},
+		{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1},
+		{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1},
+		{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0},
+	}
+
+	// construct dA
+	var da dA
+	dA__construct(&da, train_N, n_visible, n_hidden, nil, nil, nil)
+
+	// train
+	for epoch := 0; epoch < training_epochs; epoch++ {
+		for i := 0; i < train_N; i++ {
+			dA_train(&da, train_X[i], learning_rate, corruption_level)
+		}
+	}
+
+	// test data
+	test_X := [][]int {
+		{1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+		{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0},
+	}
+	reconstructed_X := make([][]float64, test_N)
+	for i := 0; i < test_N; i++ { reconstructed_X[i] = make([]float64, n_visible)}
+
+	
+	// test
+	for i := 0;  i < test_N; i++ {
+		dA_reconstruct(&da, test_X[i], reconstructed_X[i])
+		for j := 0; j < n_visible; j++ {
+			fmt.Printf("%.5f ", reconstructed_X[i][j])
+		}
+		fmt.Printf("\n")
+	}
+	os.Exit(0)
+}
+
+func main() {
+	test_dA()
+	
+}

From 3dfb34a81b0b31f4a440634f6f5f5e4d8feccbc5 Mon Sep 17 00:00:00 2001
From: Yusuke Sugomori <me@yusugomori.com>
Date: Sat, 13 Dec 2014 21:44:26 +0900
Subject: [PATCH 19/45] minor fix

---
 go/dA.go | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/go/dA.go b/go/dA.go
index 3b14c6c..a36c226 100644
--- a/go/dA.go
+++ b/go/dA.go
@@ -1,7 +1,6 @@
 package main
 
 import (
-	"os"
 	"fmt"
 	"math/rand"
 	u "./utils"
@@ -186,10 +185,8 @@ func test_dA() {
 		}
 		fmt.Printf("\n")
 	}
-	os.Exit(0)
 }
 
 func main() {
 	test_dA()
-	
 }

From 50a09be13f219008bae939fb2ef173994044ca51 Mon Sep 17 00:00:00 2001
From: Yusuke Sugomori <me@yusugomori.com>
Date: Sun, 14 Dec 2014 16:55:49 +0900
Subject: [PATCH 20/45] SdA.go

---
 go/DBN.go   |   6 +-
 go/SdA.go   | 241 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 go/dA/dA.go | 128 ++++++++++++++++++++++++++++
 3 files changed, 372 insertions(+), 3 deletions(-)
 create mode 100644 go/SdA.go
 create mode 100644 go/dA/dA.go

diff --git a/go/DBN.go b/go/DBN.go
index 498b473..7962a5a 100644
--- a/go/DBN.go
+++ b/go/DBN.go
@@ -169,8 +169,8 @@ func test_dbn() {
 	pretrain_lr := 0.1
 	pretraining_epochs := 1000
 	k := 1
-	fintune_lr := 0.1
-	fintune_epochs := 500
+	finetune_lr := 0.1
+	finetune_epochs := 500
 
 	train_N := 6
 	test_N := 4
@@ -207,7 +207,7 @@ func test_dbn() {
 	DBN_pretrain(&dbn, train_X, pretrain_lr, k, pretraining_epochs)
 
 	// finetune
-	DBN_finetune(&dbn, train_X, train_Y, fintune_lr, fintune_epochs)
+	DBN_finetune(&dbn, train_X, train_Y, finstune_lr, finetune_epochs)
 
 	// test data
 	test_X := [][]int {
diff --git a/go/SdA.go b/go/SdA.go
new file mode 100644
index 0000000..27ccaf6
--- /dev/null
+++ b/go/SdA.go
@@ -0,0 +1,241 @@
+package main
+
+import (
+	"fmt"
+	"math/rand"
+	u "./utils"
+	H "./HiddenLayer"
+	D "./dA"
+	L "./LogisticRegression"
+)
+
+type SdA struct {
+	N int
+	n_ins int
+	hidden_layer_sizes []int
+	n_outs int
+	n_layers int
+	sigmoid_layers []H.HiddenLayer
+	dA_layers []D.DA
+	log_layer L.LogisticRegression
+}
+
+
+func SdA__construct(this *SdA, N int, n_ins int, hidden_layer_sizes []int, n_outs int, n_layers int) {
+	var input_size int
+
+	this.N = N
+	this.n_ins = n_ins
+	this.hidden_layer_sizes = hidden_layer_sizes
+	this.n_outs = n_outs
+	this.n_layers = n_layers
+
+	this.sigmoid_layers = make([]H.HiddenLayer, n_layers)
+	this.dA_layers = make([]D.DA, n_layers)
+
+	// construct multi-layer
+	for i := 0; i < n_layers; i++ {
+		if i == 0 {
+			input_size = n_ins
+		} else {
+			input_size = hidden_layer_sizes[i-1]
+		}
+
+		// construct sigmoid_layer
+		H.HiddenLayer__construct(&(this.sigmoid_layers[i]), N, input_size, hidden_layer_sizes[i], nil, nil)
+
+		// construct dA_layer
+		D.DA__construct(&(this.dA_layers[i]), N, input_size, hidden_layer_sizes[i], this.sigmoid_layers[i].W, this.sigmoid_layers[i].B, nil)
+	}
+
+	// layer for output using LogisticRegression
+	L.LogisticRegression__construct(&(this.log_layer), N, hidden_layer_sizes[n_layers-1], n_outs)
+}
+
+func SdA_pretrain(this *SdA, train_X [][]int, lr float64, corruption_level float64, epochs int) {
+	var (
+		layer_input []int
+		prev_layer_input_size int
+		prev_layer_input []int
+	)
+	for i := 0; i < this.n_layers; i++ {	// layer-wise
+		for epoch := 0; epoch < epochs; epoch++ {	 // training epochs
+			for n := 0; n < this.N; n++ {	 // input x1...xN
+
+				// layer input
+				for l := 0; l <= i; l++ {
+					if l == 0 {
+						layer_input = make([]int, this.n_ins)
+						for j := 0; j < this.n_ins; j++ { layer_input[j] = train_X[n][j] }
+					} else {
+						if l == 1 {
+							prev_layer_input_size = this.n_ins
+						} else {
+							prev_layer_input_size = this.hidden_layer_sizes[l-2]
+						}
+
+						prev_layer_input = make([]int, prev_layer_input_size)
+						for j := 0; j < prev_layer_input_size; j++ { prev_layer_input[j] = layer_input[j] }
+
+						layer_input = make([]int, this.hidden_layer_sizes[l-1])
+
+						H.HiddenLayer_sample_h_given_v(&(this.sigmoid_layers[l-1]), prev_layer_input, layer_input)
+					}
+				}
+
+				D.DA_train(&(this.dA_layers[i]), layer_input, lr, corruption_level)
+			}
+		}
+	}
+}
+
+func SdA_finetune(this *SdA, train_X [][]int, train_Y [][]int, lr float64, epochs int) {
+	var (
+		layer_input []int
+		prev_layer_input []int
+	)
+	
+	for epoch := 0; epoch < epochs; epoch++ {
+		for n := 0; n < this.N; n++ {	 // input x1...xN
+
+			// layer input
+			for i := 0; i < this.n_layers; i++ {
+				if i == 0 {
+					prev_layer_input = make([]int, this.n_ins)
+					for j := 0; j < this.n_ins; j++ { prev_layer_input[j] = train_X[n][j] }
+				} else {
+					prev_layer_input = make([]int, this.hidden_layer_sizes[i-1])
+					for j:= 0; j < this.hidden_layer_sizes[i-1]; j++ { prev_layer_input[j] = layer_input[j] }
+				}
+
+				layer_input = make([]int, this.hidden_layer_sizes[i])
+				H.HiddenLayer_sample_h_given_v(&(this.sigmoid_layers[i]), prev_layer_input, layer_input)
+			}
+
+			L.LogisticRegression_train(&(this.log_layer), layer_input, train_Y[n], lr)
+		}
+		// lr *= 0.95
+	}
+}
+
+func SdA_predict(this *SdA, x []int, y []float64) {
+	var (
+		layer_input []float64
+	)	
+	prev_layer_input := make([]float64, this.n_ins)
+	for j := 0; j < this.n_ins; j++ { prev_layer_input[j] = float64(x[j]) }
+
+	// layer activation
+	for i := 0; i < this.n_layers; i++ {
+		layer_input = make([]float64, this.sigmoid_layers[i].N_out)
+
+		for k := 0; k < this.sigmoid_layers[i].N_out; k++ {
+			linear_outuput := 0.0
+
+			for j := 0; j < this.sigmoid_layers[i].N_in; j++ {
+				linear_outuput += this.sigmoid_layers[i].W[k][j] * prev_layer_input[j]
+			}
+			linear_outuput += this.sigmoid_layers[i].B[k]
+			layer_input[k] = u.Sigmoid(linear_outuput)
+		}
+
+		if i < this.n_layers-1 {
+			prev_layer_input = make([]float64, this.sigmoid_layers[i].N_out)
+
+			for j := 0; j < this.sigmoid_layers[i].N_out; j++ {
+				prev_layer_input[j] = layer_input[j]
+			}
+		}
+	}
+
+	for i := 0; i < this.log_layer.N_out; i++ {
+		y[i] = 0
+		for j := 0; j < this.log_layer.N_in; j++ {
+			y[i] += this.log_layer.W[i][j] * layer_input[j]
+		}
+		y[i] += this.log_layer.B[i]
+	}
+
+	L.LogisticRegression_softmax(&(this.log_layer), y)
+}
+
+func test_SdA() {
+	rand.Seed(0)
+
+	pretrain_lr := 0.1
+	corruption_level := 0.3
+	pretraining_epochs := 1000
+	finetune_lr := 0.1
+	finetune_epochs := 500
+	
+	train_N := 10
+	test_N := 4
+	n_ins := 28
+	n_outs := 2
+	hidden_layer_sizes := []int {15, 15}
+	n_layers := len(hidden_layer_sizes)
+
+
+	// training data
+	train_X := [][]int {
+		{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+		{0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+		{1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+		{0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+		{1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+		{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
+		{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1},
+		{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1},
+		{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1},
+		{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1},
+	}
+
+	train_Y := [][]int {
+		{1, 0},
+		{1, 0},
+		{1, 0},
+		{1, 0},
+		{1, 0},
+		{0, 1},
+		{0, 1},
+		{0, 1},
+		{0, 1},
+		{0, 1},
+	}
+
+	// construct SdA
+	var sda SdA
+	SdA__construct(&sda, train_N, n_ins, hidden_layer_sizes, n_outs, n_layers)
+
+	// pretrain
+	SdA_pretrain(&sda, train_X, pretrain_lr, corruption_level, pretraining_epochs)
+
+	// finetune
+	SdA_finetune(&sda, train_X, train_Y, finetune_lr, finetune_epochs)
+
+
+  // test data
+	test_X := [][]int {
+		{1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+		{1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+		{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1},
+		{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1},
+	}
+	
+	test_Y := make([][]float64, test_N)
+	for i := 0; i < test_N; i++ { test_Y[i] = make([]float64, n_outs)}
+
+	// test
+	for i := 0; i < test_N; i++ {
+		SdA_predict(&sda, test_X[i], test_Y[i])
+		for j := 0; j < n_outs; j++ {
+			fmt.Printf("%.5f ", test_Y[i][j])
+		}
+		fmt.Printf("\n")
+	}
+}
+
+
+func main() {
+	test_SdA()
+}
diff --git a/go/dA/dA.go b/go/dA/dA.go
new file mode 100644
index 0000000..b41d1ce
--- /dev/null
+++ b/go/dA/dA.go
@@ -0,0 +1,128 @@
+package dA
+
+import (
+	u "../utils"
+)
+
+
+type DA struct {
+	N int
+	n_visible int
+	n_hidden int
+	W [][]float64
+	hbias []float64
+	vbias []float64
+}
+
+
+func DA__construct(this *DA, N int, n_visible int, n_hidden int, W [][]float64, hbias []float64, vbias []float64) {
+	a := 1.0 / float64(n_visible)
+	
+	this.N = N
+	this.n_visible = n_visible
+	this.n_hidden = n_hidden
+
+	if W == nil {
+		this.W = make([][]float64, n_hidden)
+		for i := 0; i < n_hidden; i++ { this.W[i] = make([]float64, n_visible) }
+
+		for i := 0; i < n_hidden; i++ {
+			for j := 0; j < n_visible; j++ {
+				this.W[i][j] = u.Uniform(-a, a)
+			}
+		}
+	} else {
+		this.W = W
+	}
+
+	if hbias == nil {
+		this.hbias = make([]float64, n_hidden)
+	} else {
+		this.hbias = hbias
+	}
+
+	if vbias == nil {
+		this.vbias = make([]float64, n_visible)
+	} else {
+		this.vbias = vbias
+	}
+}
+
+func dA_get_corrupted_input(this *DA, x []int, tilde_x []int, p float64) {
+	for i := 0; i < this.n_visible; i++ {
+		if x[i] == 0 {
+			tilde_x[i] = 0
+		} else {
+			tilde_x[i] = u.Binomial(1, p)
+		}
+	}
+}
+
+// Encode
+func dA_get_hidden_values(this *DA, x []int, y []float64) {
+	for i := 0; i < this.n_hidden; i++ {
+		y[i] = 0
+		for j := 0; j < this.n_visible; j++ {
+			y[i] += this.W[i][j] * float64(x[j])
+		}
+		y[i] += this.hbias[i]
+		y[i] = u.Sigmoid(y[i])
+	}
+}
+
+// Decode
+func dA_get_reconstructed_input(this *DA, y []float64, z []float64) {
+	for i := 0; i < this.n_visible; i++ {
+		z[i] = 0
+		for j := 0; j < this.n_hidden; j++ {
+			z[i] += this.W[j][i] * y[j]
+		}
+		z[i] += this.vbias[i]
+		z[i] = u.Sigmoid(z[i])
+	}
+}
+
+func DA_train(this *DA, x []int, lr float64, corruption_level float64) {
+	tilde_x := make([]int, this.n_visible)
+	y := make([]float64, this.n_hidden)
+	z := make([]float64, this.n_visible)
+
+	L_vbias := make([]float64, this.n_visible)
+	L_hbias := make([]float64, this.n_hidden)
+
+	p := 1 - corruption_level
+
+	dA_get_corrupted_input(this, x, tilde_x, p)
+	dA_get_hidden_values(this, tilde_x, y)
+	dA_get_reconstructed_input(this, y, z)
+
+	// vbias
+	for i := 0; i < this.n_visible; i++ {
+		L_vbias[i] = float64(x[i]) - z[i]
+		this.vbias[i] += lr * L_vbias[i] / float64(this.N)
+	}
+
+	// hbias
+	for i := 0; i < this.n_hidden; i++ {
+		L_hbias[i] = 0
+		for j := 0; j < this.n_visible; j++ {
+			L_hbias[i] += this.W[i][j] * L_vbias[j]
+		}
+		L_hbias[i] *= y[i] * (1- y[i])
+		this.hbias[i] += lr * L_hbias[i] / float64(this.N)
+	}
+
+	// W
+	for i := 0; i < this.n_hidden; i++ {
+		for j := 0; j < this.n_visible; j++ {
+			this.W[i][j] += lr * (L_hbias[i] * float64(tilde_x[j]) + L_vbias[j] * y[i]) / float64(this.N)
+		}
+	}
+}
+
+func dA_reconstruct(this *DA, x []int, z []float64) {
+	y := make([]float64, this.n_hidden)
+
+	dA_get_hidden_values(this, x, y)
+	dA_get_reconstructed_input(this, y, z)
+}

From 9a8f85cd32bc0a50747d56fc7e684a50bdf2c9f0 Mon Sep 17 00:00:00 2001
From: Yusuke Sugomori <me@yusugomori.com>
Date: Sun, 14 Dec 2014 16:56:52 +0900
Subject: [PATCH 21/45] fix typo

---
 go/DBN.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/go/DBN.go b/go/DBN.go
index 7962a5a..e5522a6 100644
--- a/go/DBN.go
+++ b/go/DBN.go
@@ -207,7 +207,7 @@ func test_dbn() {
 	DBN_pretrain(&dbn, train_X, pretrain_lr, k, pretraining_epochs)
 
 	// finetune
-	DBN_finetune(&dbn, train_X, train_Y, finstune_lr, finetune_epochs)
+	DBN_finetune(&dbn, train_X, train_Y, finetune_lr, finetune_epochs)
 
 	// test data
 	test_X := [][]int {

From c02375dfe8db3080075ded85fa7d6338812f025b Mon Sep 17 00:00:00 2001
From: Yusuke Sugomori <me@yusugomori.com>
Date: Thu, 25 Jun 2015 09:46:48 +0200
Subject: [PATCH 22/45] added MLP

---
 python/CDBN.py               | 23 +++-------
 python/CRBM.py               | 15 +------
 python/DBN.py                | 41 +++---------------
 python/HiddenLayer.py        | 60 +++++++++++++++-----------
 python/LogisticRegression.py | 21 +++-------
 python/MLP.py                | 81 ++++++++++++++++++++++++++++++++++++
 python/RBM.py                | 33 ++++-----------
 python/SdA.py                | 25 +++--------
 python/dA.py                 | 37 ++++------------
 python/utils.py              | 11 +++--
 10 files changed, 163 insertions(+), 184 deletions(-)
 create mode 100755 python/MLP.py

diff --git a/python/CDBN.py b/python/CDBN.py
index 4ac987a..dbf6648 100755
--- a/python/CDBN.py
+++ b/python/CDBN.py
@@ -1,16 +1,5 @@
-#!/usr/bin/env python
 # -*- coding: utf-8 -*-
 
-'''
- DBN  w/ continuous-valued inputs (Linear Energy)
-
- References :
-   - Y. Bengio, P. Lamblin, D. Popovici, H. Larochelle: Greedy Layer-Wise
-   Training of Deep Networks, Advances in Neural Information Processing
-   Systems 19, 2007
-
-'''
-
 import sys
 import numpy
 from HiddenLayer import HiddenLayer
@@ -20,13 +9,11 @@
 from DBN import DBN
 from utils import *
 
-
-
  
 class CDBN(DBN):
     def __init__(self, input=None, label=None,\
                  n_ins=2, hidden_layer_sizes=[3, 3], n_outs=2,\
-                 numpy_rng=None):
+                 rng=None):
         
         self.x = input
         self.y = label
@@ -35,8 +22,8 @@ def __init__(self, input=None, label=None,\
         self.rbm_layers = []
         self.n_layers = len(hidden_layer_sizes)  # = len(self.rbm_layers)
 
-        if numpy_rng is None:
-            numpy_rng = numpy.random.RandomState(1234)
+        if rng is None:
+            rng = numpy.random.RandomState(1234)
 
         
         assert self.n_layers > 0
@@ -60,7 +47,7 @@ def __init__(self, input=None, label=None,\
             sigmoid_layer = HiddenLayer(input=layer_input,
                                         n_in=input_size,
                                         n_out=hidden_layer_sizes[i],
-                                        numpy_rng=numpy_rng,
+                                        rng=rng,
                                         activation=sigmoid)
             self.sigmoid_layers.append(sigmoid_layer)
 
@@ -113,7 +100,7 @@ def test_cdbn(pretrain_lr=0.1, pretraining_epochs=1000, k=1, \
     rng = numpy.random.RandomState(123)
 
     # construct DBN
-    dbn = CDBN(input=x, label=y, n_ins=6, hidden_layer_sizes=[5, 5], n_outs=2, numpy_rng=rng)
+    dbn = CDBN(input=x, label=y, n_ins=6, hidden_layer_sizes=[5, 5], n_outs=2, rng=rng)
 
     # pre-training (TrainUnsupervisedDBN)
     dbn.pretrain(lr=pretrain_lr, k=1, epochs=pretraining_epochs)
diff --git a/python/CRBM.py b/python/CRBM.py
index 0521883..e870047 100755
--- a/python/CRBM.py
+++ b/python/CRBM.py
@@ -1,16 +1,5 @@
-#!/usr/bin/env python
 # -*- coding: utf-8 -*-
 
-"""
- RBM  w/ continuous-valued inputs (Linear Energy)
-
- References :
-   - Y. Bengio, P. Lamblin, D. Popovici, H. Larochelle: Greedy Layer-Wise
-   Training of Deep Networks, Advances in Neural Information Processing
-   Systems 19, 2007
-
-"""
-
 import sys
 import numpy
 from RBM import RBM
@@ -30,7 +19,7 @@ def sample_v_given_h(self, h0_sample):
         ep = numpy.exp(a_h)
 
         v1_mean = 1 / (1 - en) - 1 / a_h
-        U = numpy.array(self.numpy_rng.uniform(
+        U = numpy.array(self.rng.uniform(
             low=0,
             high=1,
             size=v1_mean.shape))
@@ -53,7 +42,7 @@ def test_crbm(learning_rate=0.1, k=1, training_epochs=1000):
     rng = numpy.random.RandomState(123)
 
     # construct CRBM
-    rbm = CRBM(input=data, n_visible=6, n_hidden=5, numpy_rng=rng)
+    rbm = CRBM(input=data, n_visible=6, n_hidden=5, rng=rng)
 
     # train
     for epoch in xrange(training_epochs):
diff --git a/python/DBN.py b/python/DBN.py
index f639823..b1b351b 100755
--- a/python/DBN.py
+++ b/python/DBN.py
@@ -1,20 +1,5 @@
-#!/usr/bin/env python
 # -*- coding: utf-8 -*-
 
-'''
- Deep Belief Nets (DBN)
-
- References :
-   - Y. Bengio, P. Lamblin, D. Popovici, H. Larochelle: Greedy Layer-Wise
-   Training of Deep Networks, Advances in Neural Information Processing
-   Systems 19, 2007
-
-
-   - DeepLearningTutorials
-   https://github.com/lisa-lab/DeepLearningTutorials
-
-'''
-
 import sys
 import numpy
 from HiddenLayer import HiddenLayer
@@ -26,7 +11,7 @@
 class DBN(object):
     def __init__(self, input=None, label=None,\
                  n_ins=2, hidden_layer_sizes=[3, 3], n_outs=2,\
-                 numpy_rng=None):
+                 rng=None):
         
         self.x = input
         self.y = label
@@ -35,8 +20,8 @@ def __init__(self, input=None, label=None,\
         self.rbm_layers = []
         self.n_layers = len(hidden_layer_sizes)  # = len(self.rbm_layers)
 
-        if numpy_rng is None:
-            numpy_rng = numpy.random.RandomState(1234)
+        if rng is None:
+            rng = numpy.random.RandomState(1234)
 
         
         assert self.n_layers > 0
@@ -60,7 +45,7 @@ def __init__(self, input=None, label=None,\
             sigmoid_layer = HiddenLayer(input=layer_input,
                                         n_in=input_size,
                                         n_out=hidden_layer_sizes[i],
-                                        numpy_rng=numpy_rng,
+                                        rng=rng,
                                         activation=sigmoid)
             self.sigmoid_layers.append(sigmoid_layer)
 
@@ -100,21 +85,6 @@ def pretrain(self, lr=0.1, k=1, epochs=100):
                 # print >> sys.stderr, \
                 #        'Pre-training layer %d, epoch %d, cost ' %(i, epoch), cost
 
-    # def pretrain(self, lr=0.1, k=1, epochs=100):
-    #     # pre-train layer-wise
-    #     for i in xrange(self.n_layers):
-    #         rbm = self.rbm_layers[i]
-            
-    #         for epoch in xrange(epochs):
-    #             layer_input = self.x
-    #             for j in xrange(i):
-    #                 layer_input = self.sigmoid_layers[j].sample_h_given_v(layer_input)
-            
-    #             rbm.contrastive_divergence(lr=lr, k=k, input=layer_input)
-    #             # cost = rbm.get_reconstruction_cross_entropy()
-    #             # print >> sys.stderr, \
-    #             #        'Pre-training layer %d, epoch %d, cost ' %(i, epoch), cost
-
 
     def finetune(self, lr=0.1, epochs=100):
         layer_input = self.sigmoid_layers[-1].sample_h_given_v()
@@ -158,12 +128,11 @@ def test_dbn(pretrain_lr=0.1, pretraining_epochs=1000, k=1, \
                      [0, 1],
                      [0, 1],
                      [0, 1]])
-
     
     rng = numpy.random.RandomState(123)
 
     # construct DBN
-    dbn = DBN(input=x, label=y, n_ins=6, hidden_layer_sizes=[3, 3], n_outs=2, numpy_rng=rng)
+    dbn = DBN(input=x, label=y, n_ins=6, hidden_layer_sizes=[3, 3], n_outs=2, rng=rng)
 
     # pre-training (TrainUnsupervisedDBN)
     dbn.pretrain(lr=pretrain_lr, k=1, epochs=pretraining_epochs)
diff --git a/python/HiddenLayer.py b/python/HiddenLayer.py
index 72e51e1..4130b35 100755
--- a/python/HiddenLayer.py
+++ b/python/HiddenLayer.py
@@ -1,15 +1,5 @@
-#!/usr/bin/env python
 # -*- coding: utf-8 -*-
 
-'''
- Hidden Layer
-
- References :
-   - DeepLearningTutorials
-   https://github.com/lisa-lab/DeepLearningTutorials
-
-'''
-
 import sys
 import numpy
 from utils import *
@@ -17,38 +7,42 @@
 
 class HiddenLayer(object):
     def __init__(self, input, n_in, n_out,\
-                 W=None, b=None, numpy_rng=None, activation=numpy.tanh):
+                 W=None, b=None, rng=None, activation=numpy.tanh):
         
-        if numpy_rng is None:
-            numpy_rng = numpy.random.RandomState(1234)
+        if rng is None:
+            rng = numpy.random.RandomState(1234)
 
         if W is None:
             a = 1. / n_in
-            initial_W = numpy.array(numpy_rng.uniform(  # initialize W uniformly
+            W = numpy.array(rng.uniform(  # initialize W uniformly
                 low=-a,
                 high=a,
                 size=(n_in, n_out)))
 
-            W = initial_W
-
         if b is None:
             b = numpy.zeros(n_out)  # initialize bias 0
 
-
-        self.numpy_rng = numpy_rng
-        self.input = input
+        self.rng = rng
+        self.x = input
         self.W = W
         self.b = b
 
+        if activation == numpy.tanh:
+            self.dactivation = dtanh
+        elif activation == sigmoid:
+            self.dactivation = dsigmoid
+        else:
+            raise ValueError('activation function not supported.')
+        
         self.activation = activation
+        
 
-        # self.params = [self.W, self.b]
 
     def output(self, input=None):
         if input is not None:
-            self.input = input
+            self.x = input
         
-        linear_output = numpy.dot(self.input, self.W) + self.b
+        linear_output = numpy.dot(self.x, self.W) + self.b
 
         return (linear_output if self.activation is None
                 else self.activation(linear_output))
@@ -56,10 +50,28 @@ def output(self, input=None):
 
     def sample_h_given_v(self, input=None):
         if input is not None:
-            self.input = input
+            self.x = input
 
         v_mean = self.output()
-        h_sample = self.numpy_rng.binomial(size=v_mean.shape,
+        h_sample = self.rng.binomial(size=v_mean.shape,
                                            n=1,
                                            p=v_mean)
         return h_sample
+
+
+
+    def forward(self, input=None):
+        return self.output(input=input)
+
+
+    def backward(self, prev_layer, lr=0.1, input=None):
+        if input is not None:
+            self.x = input
+
+        # d_y = (1 - prev_layer.x * prev_layer.x) * numpy.dot(prev_layer.d_y, prev_layer.W.T)
+        d_y = self.dactivation(prev_layer.x) * numpy.dot(prev_layer.d_y, prev_layer.W.T)
+
+        self.W += lr * numpy.dot(self.x.T, d_y)
+        self.b += lr * numpy.mean(d_y, axis=0)
+
+        self.d_y = d_y
diff --git a/python/LogisticRegression.py b/python/LogisticRegression.py
index a828a40..f1a686d 100755
--- a/python/LogisticRegression.py
+++ b/python/LogisticRegression.py
@@ -1,18 +1,5 @@
-#!/usr/bin/env python
 # -*- coding: utf-8 -*-
 
-'''
- Logistic Regression
- 
- References :
-   - Jason Rennie: Logistic Regression,
-   http://qwone.com/~jason/writing/lr.pdf
- 
-   - DeepLearningTutorials
-   https://github.com/lisa-lab/DeepLearningTutorials
-
-'''
-
 import sys
 import numpy
 from utils import *
@@ -25,7 +12,6 @@ def __init__(self, input, label, n_in, n_out):
         self.W = numpy.zeros((n_in, n_out))  # initialize W 0
         self.b = numpy.zeros(n_out)          # initialize bias 0
 
-        # self.params = [self.W, self.b]
 
     def train(self, lr=0.1, input=None, L2_reg=0.00):
         if input is not None:
@@ -37,6 +23,8 @@ def train(self, lr=0.1, input=None, L2_reg=0.00):
         
         self.W += lr * numpy.dot(self.x.T, d_y) - lr * L2_reg * self.W
         self.b += lr * numpy.mean(d_y, axis=0)
+
+        self.d_y = d_y
         
         # cost = self.negative_log_likelihood()
         # return cost
@@ -57,6 +45,9 @@ def predict(self, x):
         # return sigmoid(numpy.dot(x, self.W) + self.b)
         return softmax(numpy.dot(x, self.W) + self.b)
 
+    def output(self, x):
+        return self.predict(x)
+
 
 def test_lr(learning_rate=0.01, n_epochs=200):
     # training data
@@ -90,7 +81,7 @@ def test_lr(learning_rate=0.01, n_epochs=200):
                      [0, 0, 0, 1, 1, 0],
                      [1, 1, 1, 1, 1, 0]])
 
-    print >> sys.stderr, classifier.predict(x)
+    print classifier.predict(x)
 
 
 if __name__ == "__main__":
diff --git a/python/MLP.py b/python/MLP.py
new file mode 100755
index 0000000..def6ab7
--- /dev/null
+++ b/python/MLP.py
@@ -0,0 +1,81 @@
+# -*- coding: utf-8 -*-
+
+import sys
+import numpy
+from HiddenLayer import HiddenLayer
+from LogisticRegression import LogisticRegression
+from utils import *
+
+
+class MLP(object):
+    def __init__(self, input, label, n_in, n_hidden, n_out, rng=None):
+
+        self.x = input
+        self.y = label
+
+        if rng is None:
+            rng = numpy.random.RandomState(1234)
+
+        # construct hidden_layer (tanh, sigmoid, etc...)
+        self.hidden_layer = HiddenLayer(input=self.x,
+                                        n_in=n_in,
+                                        n_out=n_hidden,
+                                        rng=rng,
+                                        activation=numpy.tanh)
+
+        # construct log_layer (softmax)
+        self.log_layer = LogisticRegression(input=self.hidden_layer.output,
+                                            label=self.y,
+                                            n_in=n_hidden,
+                                            n_out=n_out)
+
+    def train(self):
+        layer_input = self.hidden_layer.forward()
+        # print self.hidden_layer.W
+
+        self.log_layer.train(input=layer_input)
+        self.hidden_layer.backward(prev_layer=self.log_layer)
+        
+
+    def predict(self, x):
+        x = self.hidden_layer.output(x)
+        return self.log_layer.predict(x)
+
+
+def test_mlp(n_epochs=100):
+
+    x = numpy.array([[1,1,1,0,0,0],
+                     [1,0,1,0,0,0],
+                     [1,1,1,0,0,0],
+                     [0,0,1,1,1,0],
+                     [0,0,1,1,0,0],
+                     [0,0,1,1,1,0]])
+    y = numpy.array([[1, 0],
+                     [1, 0],
+                     [1, 0],
+                     [0, 1],
+                     [0, 1],
+                     [0, 1]])
+
+
+    rng = numpy.random.RandomState(123)
+
+
+    # construct MLP
+    classifier = MLP(input=x, label=y, n_in=6, n_hidden=15, n_out=2, rng=rng)
+
+    # train
+    for epoch in xrange(n_epochs):
+        classifier.train()
+
+
+    # test
+    x = numpy.array([[1, 1, 0, 0, 0, 0],
+                     [0, 0, 0, 1, 1, 0],
+                     [1, 1, 1, 1, 1, 0]])
+
+    print classifier.predict(x)
+        
+
+if __name__ == "__main__":
+    test_mlp()
diff --git a/python/RBM.py b/python/RBM.py
index 781241d..4a1be8e 100755
--- a/python/RBM.py
+++ b/python/RBM.py
@@ -1,38 +1,23 @@
-#!/usr/bin/env python
 # -*- coding: utf-8 -*-
 
-"""
- Restricted Boltzmann Machine (RBM)
-
- References :
-   - Y. Bengio, P. Lamblin, D. Popovici, H. Larochelle: Greedy Layer-Wise
-   Training of Deep Networks, Advances in Neural Information Processing
-   Systems 19, 2007
-
-
-   - DeepLearningTutorials
-   https://github.com/lisa-lab/DeepLearningTutorials
-
-"""
-
 import sys
 import numpy
 from utils import *
 
 class RBM(object):
     def __init__(self, input=None, n_visible=2, n_hidden=3, \
-        W=None, hbias=None, vbias=None, numpy_rng=None):
+        W=None, hbias=None, vbias=None, rng=None):
         
         self.n_visible = n_visible  # num of units in visible (input) layer
         self.n_hidden = n_hidden    # num of units in hidden layer
 
-        if numpy_rng is None:
-            numpy_rng = numpy.random.RandomState(1234)
+        if rng is None:
+            rng = numpy.random.RandomState(1234)
 
 
         if W is None:
             a = 1. / n_visible
-            initial_W = numpy.array(numpy_rng.uniform(  # initialize W uniformly
+            initial_W = numpy.array(rng.uniform(  # initialize W uniformly
                 low=-a,
                 high=a,
                 size=(n_visible, n_hidden)))
@@ -46,14 +31,12 @@ def __init__(self, input=None, n_visible=2, n_hidden=3, \
             vbias = numpy.zeros(n_visible)  # initialize v bias 0
 
 
-        self.numpy_rng = numpy_rng
+        self.rng = rng
         self.input = input
         self.W = W
         self.hbias = hbias
         self.vbias = vbias
 
-        # self.params = [self.W, self.hbias, self.vbias]
-
 
     def contrastive_divergence(self, lr=0.1, k=1, input=None):
         if input is not None:
@@ -86,7 +69,7 @@ def contrastive_divergence(self, lr=0.1, k=1, input=None):
 
     def sample_h_given_v(self, v0_sample):
         h1_mean = self.propup(v0_sample)
-        h1_sample = self.numpy_rng.binomial(size=h1_mean.shape,   # discrete: binomial
+        h1_sample = self.rng.binomial(size=h1_mean.shape,   # discrete: binomial
                                        n=1,
                                        p=h1_mean)
 
@@ -95,7 +78,7 @@ def sample_h_given_v(self, v0_sample):
 
     def sample_v_given_h(self, h0_sample):
         v1_mean = self.propdown(h0_sample)
-        v1_sample = self.numpy_rng.binomial(size=v1_mean.shape,   # discrete: binomial
+        v1_sample = self.rng.binomial(size=v1_mean.shape,   # discrete: binomial
                                             n=1,
                                             p=v1_mean)
         
@@ -153,7 +136,7 @@ def test_rbm(learning_rate=0.1, k=1, training_epochs=1000):
     rng = numpy.random.RandomState(123)
 
     # construct RBM
-    rbm = RBM(input=data, n_visible=6, n_hidden=2, numpy_rng=rng)
+    rbm = RBM(input=data, n_visible=6, n_hidden=2, rng=rng)
 
     # train
     for epoch in xrange(training_epochs):
diff --git a/python/SdA.py b/python/SdA.py
index 4a0f45a..3d38d0f 100755
--- a/python/SdA.py
+++ b/python/SdA.py
@@ -1,20 +1,5 @@
-#!/usr/bin/env python
 # -*- coding: utf-8 -*-
 
-
-"""
- Stacked Denoising Autoencoders (SdA)
-
- References :
-   - P. Vincent, H. Larochelle, Y. Bengio, P.A. Manzagol: Extracting and
-   Composing Robust Features with Denoising Autoencoders, ICML' 08, 1096-1103,
-   2008
-
-   - DeepLearningTutorials
-   https://github.com/lisa-lab/DeepLearningTutorials
-   
-"""
-
 import sys
 import numpy
 from HiddenLayer import HiddenLayer
@@ -26,7 +11,7 @@
 class SdA(object):
     def __init__(self, input=None, label=None,\
                  n_ins=2, hidden_layer_sizes=[3, 3], n_outs=2,\
-                 numpy_rng=None):
+                 rng=None):
         
         self.x = input
         self.y = label
@@ -35,8 +20,8 @@ def __init__(self, input=None, label=None,\
         self.dA_layers = []
         self.n_layers = len(hidden_layer_sizes)  # = len(self.rbm_layers)
 
-        if numpy_rng is None:
-            numpy_rng = numpy.random.RandomState(1234)
+        if rng is None:
+            rng = numpy.random.RandomState(1234)
 
         
         assert self.n_layers > 0
@@ -60,7 +45,7 @@ def __init__(self, input=None, label=None,\
             sigmoid_layer = HiddenLayer(input=layer_input,
                                         n_in=input_size,
                                         n_out=hidden_layer_sizes[i],
-                                        numpy_rng=numpy_rng,
+                                        rng=rng,
                                         activation=sigmoid)
             self.sigmoid_layers.append(sigmoid_layer)
 
@@ -152,7 +137,7 @@ def test_SdA(pretrain_lr=0.1, pretraining_epochs=1000, corruption_level=0.3, \
 
     # construct SdA
     sda = SdA(input=x, label=y, \
-              n_ins=20, hidden_layer_sizes=[15, 15], n_outs=2, numpy_rng=rng)
+              n_ins=20, hidden_layer_sizes=[15, 15], n_outs=2, rng=rng)
 
     # pre-training
     sda.pretrain(lr=pretrain_lr, corruption_level=corruption_level, epochs=pretraining_epochs)
diff --git a/python/dA.py b/python/dA.py
index 0b911eb..edbf6c7 100755
--- a/python/dA.py
+++ b/python/dA.py
@@ -1,23 +1,5 @@
-#!/usr/bin/env python
 # -*- coding: utf-8 -*-
 
-"""
- Denoising Autoencoders (dA)
-
- References :
-   - P. Vincent, H. Larochelle, Y. Bengio, P.A. Manzagol: Extracting and
-   Composing Robust Features with Denoising Autoencoders, ICML'08, 1096-1103,
-   2008
-
-   - DeepLearningTutorials
-   https://github.com/lisa-lab/DeepLearningTutorials
-
-   - Yusuke Sugomori: Stochastic Gradient Descent for Denoising Autoencoders,
-   http://yusugomori.com/docs/SGD_DA.pdf
-
-"""
-
-
 import sys
 import numpy
 from utils import *
@@ -25,44 +7,39 @@
 
 class dA(object):
     def __init__(self, input=None, n_visible=2, n_hidden=3, \
-        W=None, hbias=None, vbias=None, numpy_rng=None):
+        W=None, hbias=None, vbias=None, rng=None):
 
         self.n_visible = n_visible  # num of units in visible (input) layer
         self.n_hidden = n_hidden    # num of units in hidden layer
 
-        if numpy_rng is None:
-            numpy_rng = numpy.random.RandomState(1234)
+        if rng is None:
+            rng = numpy.random.RandomState(1234)
             
         if W is None:
             a = 1. / n_visible
-            initial_W = numpy.array(numpy_rng.uniform(  # initialize W uniformly
+            W = numpy.array(rng.uniform(  # initialize W uniformly
                 low=-a,
                 high=a,
                 size=(n_visible, n_hidden)))
 
-            W = initial_W
-
         if hbias is None:
             hbias = numpy.zeros(n_hidden)  # initialize h bias 0
 
         if vbias is None:
             vbias = numpy.zeros(n_visible)  # initialize v bias 0
 
-        self.numpy_rng = numpy_rng
+        self.rng = rng
         self.x = input
         self.W = W
         self.W_prime = self.W.T
         self.hbias = hbias
         self.vbias = vbias
 
-        # self.params = [self.W, self.hbias, self.vbias]
-
-
         
     def get_corrupted_input(self, input, corruption_level):
         assert corruption_level < 1
 
-        return self.numpy_rng.binomial(size=input.shape,
+        return self.rng.binomial(size=input.shape,
                                        n=1,
                                        p=1-corruption_level) * input
 
@@ -133,7 +110,7 @@ def test_dA(learning_rate=0.1, corruption_level=0.3, training_epochs=50):
     rng = numpy.random.RandomState(123)
 
     # construct dA
-    da = dA(input=data, n_visible=20, n_hidden=5, numpy_rng=rng)
+    da = dA(input=data, n_visible=20, n_hidden=5, rng=rng)
 
     # train
     for epoch in xrange(training_epochs):
diff --git a/python/utils.py b/python/utils.py
index 5c4a748..e17f203 100755
--- a/python/utils.py
+++ b/python/utils.py
@@ -1,7 +1,5 @@
-''' '''
-import numpy
-
 
+import numpy
 numpy.seterr(all='ignore')
 
 
@@ -9,6 +7,13 @@ def sigmoid(x):
     return 1. / (1 + numpy.exp(-x))
 
 
+def dsigmoid(x):
+    return x * (1. - x)
+
+def dtanh(x):
+    return 1. - x * x
+
+
 def softmax(x):
     e = numpy.exp(x - numpy.max(x))  # prevent overflow
     if e.ndim == 1:

From f4ef9e5a8c63c368fb9a99b130ae9657aaf1ed3f Mon Sep 17 00:00:00 2001
From: Yusuke Sugomori <me@yusugomori.com>
Date: Thu, 25 Jun 2015 09:48:36 +0200
Subject: [PATCH 23/45] minor fix MLP

---
 python/MLP.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/python/MLP.py b/python/MLP.py
index def6ab7..3ba7425 100755
--- a/python/MLP.py
+++ b/python/MLP.py
@@ -16,7 +16,7 @@ def __init__(self, input, label, n_in, n_hidden, n_out, rng=None):
         if rng is None:
             rng = numpy.random.RandomState(1234)
 
-        # construct hidden_layer (tanh, sigmoid, etc...)
+        # construct hidden_layer (tanh or sigmoid so far)
         self.hidden_layer = HiddenLayer(input=self.x,
                                         n_in=n_in,
                                         n_out=n_hidden,
@@ -31,8 +31,6 @@ def __init__(self, input, label, n_in, n_hidden, n_out, rng=None):
 
     def train(self):
         layer_input = self.hidden_layer.forward()
-        # print self.hidden_layer.W
-
         self.log_layer.train(input=layer_input)
         self.hidden_layer.backward(prev_layer=self.log_layer)
         

From 37aadf1d27ba356703716afd80c28410fd2d1c34 Mon Sep 17 00:00:00 2001
From: Yusuke Sugomori <me@yusugomori.com>
Date: Sun, 2 Aug 2015 21:45:34 -0400
Subject: [PATCH 24/45] add Dropout with ReLU

---
 python/Dropout.py     | 160 ++++++++++++++++++++++++++++++++++++++++++
 python/HiddenLayer.py |  26 ++++++-
 python/MLP.py         |   4 +-
 python/SdA.py         |   4 +-
 python/utils.py       |  11 ++-
 5 files changed, 197 insertions(+), 8 deletions(-)
 create mode 100755 python/Dropout.py

diff --git a/python/Dropout.py b/python/Dropout.py
new file mode 100755
index 0000000..3edf3fe
--- /dev/null
+++ b/python/Dropout.py
@@ -0,0 +1,160 @@
+# -*- coding: utf-8 -*-
+
+import sys
+import numpy
+from HiddenLayer import HiddenLayer
+from LogisticRegression import LogisticRegression
+from utils import *
+
+
+class Dropout(object):
+    def __init__(self, input, label,\
+                 n_in, hidden_layer_sizes, n_out,\
+                 rng=None, activation=ReLU):
+
+        self.x = input
+        self.y = label
+
+        self.hidden_layers = []
+        self.n_layers = len(hidden_layer_sizes)
+        
+        if rng is None:
+            rng = numpy.random.RandomState(1234)
+
+        assert self.n_layers > 0
+
+
+        # construct multi-layer 
+        for i in xrange(self.n_layers):
+
+            # layer_size
+            if i == 0:
+                input_size = n_in
+            else:
+                input_size = hidden_layer_sizes[i-1]
+
+            # layer_input
+            if i == 0:
+                layer_input = self.x
+
+            else:
+                layer_input = self.hidden_layers[-1].output()
+
+            # construct hidden_layer
+            hidden_layer = HiddenLayer(input=layer_input,
+                                       n_in=input_size,
+                                       n_out=hidden_layer_sizes[i],
+                                       rng=rng,
+                                       activation=activation)
+            
+            self.hidden_layers.append(hidden_layer)
+
+
+            # layer for ouput using Logistic Regression (softmax)
+            self.log_layer = LogisticRegression(input=self.hidden_layers[-1].output(),
+                                                label=self.y,
+                                                n_in=hidden_layer_sizes[-1],
+                                                n_out=n_out)
+
+
+    def train(self, epochs=500, dropout=True, p_dropout=0.5, rng=None):
+
+        for epoch in xrange(epochs):
+            dropout_masks = []  # create different masks in each training epoch
+
+            # forward hidden_layers
+            for i in xrange(self.n_layers):
+                if i == 0:
+                    layer_input = self.x
+
+                layer_input = self.hidden_layers[i].forward(input=layer_input)
+
+                if dropout == True:
+                    mask = self.hidden_layers[i].dropout(input=layer_input, p=p_dropout, rng=rng)
+                    layer_input *= mask
+
+                    dropout_masks.append(mask)
+
+
+            # forward & backward log_layer
+            self.log_layer.train(input=layer_input)
+
+
+            # backward hidden_layers
+            for i in reversed(xrange(0, self.n_layers)):
+                if i == self.n_layers-1:
+                    prev_layer = self.log_layer
+                else:
+                    prev_layer = self.hidden_layers[i+1]
+
+                self.hidden_layers[i].backward(prev_layer=prev_layer)
+                
+                if dropout == True:
+                    self.hidden_layers[i].d_y *= dropout_masks[i]  # also mask here
+
+
+    def predict(self, x, dropout=True, p_dropout=0.5):
+        layer_input = x
+
+        for i in xrange(self.n_layers):
+            if dropout == True:
+                self.hidden_layers[i].W = p_dropout * self.hidden_layers[i].W
+                self.hidden_layers[i].b = p_dropout * self.hidden_layers[i].b
+            
+            layer_input = self.hidden_layers[i].output(input=layer_input)
+
+        return self.log_layer.predict(layer_input)
+
+
+
+def test_dropout(n_epochs=500, dropout=True, p_dropout=0.5):
+
+    x = numpy.array([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+                     [1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+                     [1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+                     [1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+                     [0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+                     [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+                     [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1],
+                     [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1],
+                     [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1],
+                     [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0]])
+
+    y = numpy.array([[1, 0],
+                     [1, 0],
+                     [1, 0],
+                     [1, 0],
+                     [1, 0],
+                     [0, 1],
+                     [0, 1],
+                     [0, 1],
+                     [0, 1],
+                     [0, 1]])
+
+
+    rng = numpy.random.RandomState(123)
+
+
+    # construct Dropout MLP
+    classifier = Dropout(input=x, label=y, \
+                         n_in=20, hidden_layer_sizes=[12, 12], n_out=2, \
+                         rng=rng, activation=ReLU)
+
+
+    # train
+    classifier.train(epochs=n_epochs, dropout=dropout, \
+                     p_dropout=p_dropout, rng=rng)
+
+
+    # test
+    x = numpy.array([[1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+                     [1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+                     [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1],
+                     [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1]])
+
+    print classifier.predict(x)
+
+
+
+if __name__ == "__main__":
+    test_dropout()
diff --git a/python/HiddenLayer.py b/python/HiddenLayer.py
index 4130b35..f7a0731 100755
--- a/python/HiddenLayer.py
+++ b/python/HiddenLayer.py
@@ -7,7 +7,7 @@
 
 class HiddenLayer(object):
     def __init__(self, input, n_in, n_out,\
-                 W=None, b=None, rng=None, activation=numpy.tanh):
+                 W=None, b=None, rng=None, activation=tanh):
         
         if rng is None:
             rng = numpy.random.RandomState(1234)
@@ -24,15 +24,22 @@ def __init__(self, input, n_in, n_out,\
 
         self.rng = rng
         self.x = input
+
         self.W = W
         self.b = b
 
-        if activation == numpy.tanh:
+        if activation == tanh:
             self.dactivation = dtanh
+
         elif activation == sigmoid:
             self.dactivation = dsigmoid
+
+        elif activation == ReLU:
+            self.dactivation = dReLU
+
         else:
             raise ValueError('activation function not supported.')
+
         
         self.activation = activation
         
@@ -68,10 +75,23 @@ def backward(self, prev_layer, lr=0.1, input=None):
         if input is not None:
             self.x = input
 
-        # d_y = (1 - prev_layer.x * prev_layer.x) * numpy.dot(prev_layer.d_y, prev_layer.W.T)
         d_y = self.dactivation(prev_layer.x) * numpy.dot(prev_layer.d_y, prev_layer.W.T)
 
         self.W += lr * numpy.dot(self.x.T, d_y)
         self.b += lr * numpy.mean(d_y, axis=0)
 
         self.d_y = d_y
+
+
+    def dropout(self, input, p, rng=None):
+        if rng is None:
+            rng = numpy.random.RandomState(123)
+
+        mask = rng.binomial(size=input.shape,
+                            n=1,
+                            p=1-p)  # p is the prob of dropping
+
+        return mask
+                     
+
+
diff --git a/python/MLP.py b/python/MLP.py
index 3ba7425..0e02f2a 100755
--- a/python/MLP.py
+++ b/python/MLP.py
@@ -21,7 +21,7 @@ def __init__(self, input, label, n_in, n_hidden, n_out, rng=None):
                                         n_in=n_in,
                                         n_out=n_hidden,
                                         rng=rng,
-                                        activation=numpy.tanh)
+                                        activation=tanh)
 
         # construct log_layer (softmax)
         self.log_layer = LogisticRegression(input=self.hidden_layer.output,
@@ -36,7 +36,7 @@ def train(self):
         
 
     def predict(self, x):
-        x = self.hidden_layer.output(x)
+        x = self.hidden_layer.output(input=x)
         return self.log_layer.predict(x)
 
 
diff --git a/python/SdA.py b/python/SdA.py
index 3d38d0f..5f8de37 100755
--- a/python/SdA.py
+++ b/python/SdA.py
@@ -103,8 +103,8 @@ def predict(self, x):
             sigmoid_layer = self.sigmoid_layers[i]
             layer_input = sigmoid_layer.output(input=layer_input)
 
-        out = self.log_layer.predict(layer_input)
-        return out
+        return self.log_layer.predict(layer_input)
+
 
 
 
diff --git a/python/utils.py b/python/utils.py
index e17f203..7aca40d 100755
--- a/python/utils.py
+++ b/python/utils.py
@@ -10,10 +10,12 @@ def sigmoid(x):
 def dsigmoid(x):
     return x * (1. - x)
 
+def tanh(x):
+    return numpy.tanh(x)
+
 def dtanh(x):
     return 1. - x * x
 
-
 def softmax(x):
     e = numpy.exp(x - numpy.max(x))  # prevent overflow
     if e.ndim == 1:
@@ -22,6 +24,13 @@ def softmax(x):
         return e / numpy.array([numpy.sum(e, axis=1)]).T  # ndim = 2
 
 
+def ReLU(x):
+    return x * (x > 0)
+
+def dReLU(x):
+    return 1. * (x > 0)
+
+
 # # probability density for the Gaussian dist
 # def gaussian(x, mean=0.0, scale=1.0):
 #     s = 2 * numpy.power(scale, 2)

From 4aa12f1ca8cc58e0d232e721335110d604fce59d Mon Sep 17 00:00:00 2001
From: Yusuke Sugomori <yusugomori@users.noreply.github.com>
Date: Sun, 2 Aug 2015 21:47:59 -0400
Subject: [PATCH 25/45] Update README.md

---
 README.md | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 48c2f67..14e17c5 100644
--- a/README.md
+++ b/README.md
@@ -17,6 +17,10 @@
   - LogisticRegression: Logistic Regression
 
   - HiddenLayer: Hidden Layer of Neural Networks
+  
+  - MLP: Multiple Layer Perceptron
+
+  - Dropout: Dropout MLP
 
 
 
@@ -38,4 +42,4 @@
 
 
 
-### Bug reports are deeply welcome.
\ No newline at end of file
+### Bug reports are deeply welcome.

From d31d4f51cdead6e4d0ae0a24225c3f79b7fa7c5f Mon Sep 17 00:00:00 2001
From: Yusuke Sugomori <me@yusugomori.com>
Date: Mon, 3 Aug 2015 23:12:15 -0400
Subject: [PATCH 26/45] bug fix

---
 python/Dropout.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/python/Dropout.py b/python/Dropout.py
index 3edf3fe..feb594e 100755
--- a/python/Dropout.py
+++ b/python/Dropout.py
@@ -98,8 +98,7 @@ def predict(self, x, dropout=True, p_dropout=0.5):
 
         for i in xrange(self.n_layers):
             if dropout == True:
-                self.hidden_layers[i].W = p_dropout * self.hidden_layers[i].W
-                self.hidden_layers[i].b = p_dropout * self.hidden_layers[i].b
+                self.hidden_layers[i].W = (1 - p_dropout) * self.hidden_layers[i].W
             
             layer_input = self.hidden_layers[i].output(input=layer_input)
 

From bb903c6791c83851c725c6e8689edea83a0122df Mon Sep 17 00:00:00 2001
From: Yusuke Sugomori <me@yusugomori.com>
Date: Tue, 4 Aug 2015 22:09:01 -0400
Subject: [PATCH 27/45] clean up java

---
 .gitignore                                    |   4 +
 java/DBN/src/DBN.java                         | 223 -----------------
 java/DBN/src/HiddenLayer.java                 |  74 ------
 java/DBN/src/LogisticRegression.java          |  73 ------
 java/DBN/src/RBM.java                         | 164 -------------
 .../src/LogisticRegression.java               | 128 ----------
 java/RBM/src/RBM.java                         | 221 -----------------
 java/SdA/src/HiddenLayer.java                 |  74 ------
 java/SdA/src/LogisticRegression.java          |  73 ------
 java/SdA/src/SdA.java                         | 230 ------------------
 java/SdA/src/dA.java                          | 150 ------------
 java/dA/src/dA.java                           | 207 ----------------
 java/src/DeepLearning/DBN.java                | 222 +++++++++++++++++
 java/src/DeepLearning/HiddenLayer.java        |  54 ++++
 java/src/DeepLearning/LogisticRegression.java | 129 ++++++++++
 java/src/DeepLearning/RBM.java                | 203 ++++++++++++++++
 java/src/DeepLearning/SdA.java                | 230 ++++++++++++++++++
 java/src/DeepLearning/dA.java                 | 187 ++++++++++++++
 java/src/DeepLearning/utils.java              |  29 +++
 19 files changed, 1058 insertions(+), 1617 deletions(-)
 delete mode 100644 java/DBN/src/DBN.java
 delete mode 100644 java/DBN/src/HiddenLayer.java
 delete mode 100644 java/DBN/src/LogisticRegression.java
 delete mode 100644 java/DBN/src/RBM.java
 delete mode 100644 java/LogisticRegression/src/LogisticRegression.java
 delete mode 100644 java/RBM/src/RBM.java
 delete mode 100644 java/SdA/src/HiddenLayer.java
 delete mode 100644 java/SdA/src/LogisticRegression.java
 delete mode 100644 java/SdA/src/SdA.java
 delete mode 100644 java/SdA/src/dA.java
 delete mode 100644 java/dA/src/dA.java
 create mode 100644 java/src/DeepLearning/DBN.java
 create mode 100644 java/src/DeepLearning/HiddenLayer.java
 create mode 100644 java/src/DeepLearning/LogisticRegression.java
 create mode 100644 java/src/DeepLearning/RBM.java
 create mode 100644 java/src/DeepLearning/SdA.java
 create mode 100644 java/src/DeepLearning/dA.java
 create mode 100644 java/src/DeepLearning/utils.java

diff --git a/.gitignore b/.gitignore
index 190cc57..252ef14 100644
--- a/.gitignore
+++ b/.gitignore
@@ -8,3 +8,7 @@
 *.settings
 .project
 .metadata
+
+java/.idea/*
+java/java.iml
+java/out/*
\ No newline at end of file
diff --git a/java/DBN/src/DBN.java b/java/DBN/src/DBN.java
deleted file mode 100644
index 5b72e68..0000000
--- a/java/DBN/src/DBN.java
+++ /dev/null
@@ -1,223 +0,0 @@
-import java.util.Random;
-
-public class DBN {
-	public int N;
-	public int n_ins;
-	public int[] hidden_layer_sizes;
-	public int n_outs;
-	public int n_layers;
-	public HiddenLayer[] sigmoid_layers;
-	public RBM[] rbm_layers;
-	public LogisticRegression log_layer;
-	public Random rng;
-
-	public static double sigmoid(double x) {
-		return 1.0 / (1.0 + Math.pow(Math.E, -x));
-	}
-	
-	
-	public DBN(int N, int n_ins, int[] hidden_layer_sizes, int n_outs, int n_layers, Random rng) {
-		int input_size;
-		
-		this.N = N;
-		this.n_ins = n_ins;
-		this.hidden_layer_sizes = hidden_layer_sizes;
-		this.n_outs = n_outs;
-		this.n_layers = n_layers;
-		
-		this.sigmoid_layers = new HiddenLayer[n_layers];
-		this.rbm_layers = new RBM[n_layers];
-
-		if(rng == null)	this.rng = new Random(1234);
-		else this.rng = rng;		
-		
-		// construct multi-layer
-		for(int i=0; i<this.n_layers; i++) {
-			if(i == 0) {
-				input_size = this.n_ins;
-			} else {
-				input_size = this.hidden_layer_sizes[i-1];
-			}
-			
-			// construct sigmoid_layer
-			this.sigmoid_layers[i] = new HiddenLayer(this.N, input_size, this.hidden_layer_sizes[i], null, null, rng);
-			
-			// construct rbm_layer
-			this.rbm_layers[i] = new RBM(this.N, input_size, this.hidden_layer_sizes[i], this.sigmoid_layers[i].W, this.sigmoid_layers[i].b, null, rng);
-		}
-		
-		// layer for output using LogisticRegression
-		this.log_layer = new LogisticRegression(this.N, this.hidden_layer_sizes[this.n_layers-1], this.n_outs);
-	}
-	
-	public void pretrain(int[][] train_X, double lr, int k, int epochs) {
-		int[] layer_input = new int[0];
-		int prev_layer_input_size;
-		int[] prev_layer_input;
-				
-		for(int i=0; i<n_layers; i++) {  // layer-wise			
-			for(int epoch=0; epoch<epochs; epoch++) {  // training epochs
-				for(int n=0; n<N; n++) {  // input x1...xN
-					// layer input
-					for(int l=0; l<=i; l++) {
-						
-						if(l == 0) {
-							layer_input = new int[n_ins];
-							for(int j=0; j<n_ins; j++) layer_input[j] = train_X[n][j];
-						} else {
-							if(l == 1) prev_layer_input_size = n_ins;
-							else prev_layer_input_size = hidden_layer_sizes[l-2];
-							
-							prev_layer_input = new int[prev_layer_input_size];
-							for(int j=0; j<prev_layer_input_size; j++) prev_layer_input[j] = layer_input[j];
-							
-							layer_input = new int[hidden_layer_sizes[l-1]];
-							
-							sigmoid_layers[l-1].sample_h_given_v(prev_layer_input, layer_input);
-						}
-					}
-					
-					rbm_layers[i].contrastive_divergence(layer_input, lr, k);
-				}
-			}
-		}
-	}
-	
-	public void finetune(int[][] train_X, int[][] train_Y, double lr, int epochs) {
-		int[] layer_input = new int[0];
-		// int prev_layer_input_size;
-		int[] prev_layer_input = new int[0];
-		
-		for(int epoch=0; epoch<epochs; epoch++) {
-			for(int n=0; n<N; n++) {
-				
-				// layer input
-				for(int i=0; i<n_layers; i++) {
-					if(i == 0) {
-						prev_layer_input = new int[n_ins];
-						for(int j=0; j<n_ins; j++) prev_layer_input[j] = train_X[n][j];
-					} else {
-						prev_layer_input = new int[hidden_layer_sizes[i-1]];
-						for(int j=0; j<hidden_layer_sizes[i-1]; j++) prev_layer_input[j] = layer_input[j];
-					}
-					
-					layer_input = new int[hidden_layer_sizes[i]];
-					sigmoid_layers[i].sample_h_given_v(prev_layer_input, layer_input);
-				}
-				
-				log_layer.train(layer_input, train_Y[n], lr);
-			}
-			// lr *= 0.95;
-		}
-	}
-	
-	public void predict(int[] x, double[] y) {
-		double[] layer_input = new double[0];
-		// int prev_layer_input_size;
-		double[] prev_layer_input = new double[n_ins];
-		for(int j=0; j<n_ins; j++) prev_layer_input[j] = x[j];
-	
-		double linear_output;
-		
-		
-		// layer activation
-		for(int i=0; i<n_layers; i++) {
-			layer_input = new double[sigmoid_layers[i].n_out];
-			
-			for(int k=0; k<sigmoid_layers[i].n_out; k++) {
-				linear_output = 0.0;
-				
-				for(int j=0; j<sigmoid_layers[i].n_in; j++) {
-					linear_output += sigmoid_layers[i].W[k][j] * prev_layer_input[j];
-				}
-				linear_output += sigmoid_layers[i].b[k];
-				layer_input[k] = sigmoid(linear_output);
-			}
-			
-			if(i < n_layers-1) {
-				prev_layer_input = new double[sigmoid_layers[i].n_out];
-				for(int j=0; j<sigmoid_layers[i].n_out; j++) prev_layer_input[j] = layer_input[j];
-			}
-		}
-		
-		for(int i=0; i<log_layer.n_out; i++) {
-			y[i] = 0;
-			for(int j=0; j<log_layer.n_in; j++) {
-				y[i] += log_layer.W[i][j] * layer_input[j];
-			}
-			y[i] += log_layer.b[i];
-		}
-		
-		log_layer.softmax(y);
-	}
-	
-	private static void test_dbn() {
-		Random rng = new Random(123);
-		
-		double pretrain_lr = 0.1;
-		int pretraining_epochs = 1000;
-		int k = 1;
-		double finetune_lr = 0.1;
-		int finetune_epochs = 500;
-		
-		int train_N = 6;
-		int test_N = 4;
-		int n_ins = 6;
-		int n_outs = 2;
-		int[] hidden_layer_sizes = {3, 3};
-		int n_layers = hidden_layer_sizes.length;
-		
-		// training data
-		int[][] train_X = {
-			{1, 1, 1, 0, 0, 0},
-			{1, 0, 1, 0, 0, 0},
-			{1, 1, 1, 0, 0, 0},
-			{0, 0, 1, 1, 1, 0},
-			{0, 0, 1, 1, 0, 0},
-			{0, 0, 1, 1, 1, 0}
-		};
-		
-		int[][] train_Y = {
-			{1, 0},
-			{1, 0},
-			{1, 0},
-			{0, 1},
-			{0, 1},
-			{0, 1},
-		};
-		
-		
-		// construct DBN
-		DBN dbn = new DBN(train_N, n_ins, hidden_layer_sizes, n_outs, n_layers, rng);
-		
-		// pretrain
-		dbn.pretrain(train_X, pretrain_lr, k, pretraining_epochs);
-		
-		// finetune
-		dbn.finetune(train_X, train_Y, finetune_lr, finetune_epochs);
-		
-		
-		// test data
-		int[][] test_X = {
-			{1, 1, 0, 0, 0, 0},
-			{1, 1, 1, 1, 0, 0},
-			{0, 0, 0, 1, 1, 0},
-			{0, 0, 1, 1, 1, 0},
-		};
-		
-		double[][] test_Y = new double[test_N][n_outs];
-		
-		// test
-		for(int i=0; i<test_N; i++) {
-			dbn.predict(test_X[i], test_Y[i]);
-			for(int j=0; j<n_outs; j++) {
-				System.out.print(test_Y[i][j] + " ");
-			}
-			System.out.println();
-		}
-	}
-	
-	public static void main(String[] args) {
-		test_dbn();
-	}
-}
diff --git a/java/DBN/src/HiddenLayer.java b/java/DBN/src/HiddenLayer.java
deleted file mode 100644
index 588eea8..0000000
--- a/java/DBN/src/HiddenLayer.java
+++ /dev/null
@@ -1,74 +0,0 @@
-import java.util.Random;
-
-public class HiddenLayer {
-	public int N;
-	public int n_in;
-	public int n_out;
-	public double[][] W;
-	public double[] b;
-	public Random rng;
-	
-	public double uniform(double min, double max) {
-		return rng.nextDouble() * (max - min) + min;
-	}
-	
-	public int binomial(int n, double p) {
-		if(p < 0 || p > 1) return 0;
-		
-		int c = 0;
-		double r;
-		
-		for(int i=0; i<n; i++) {
-			r = rng.nextDouble();
-			if (r < p) c++;
-		}
-		
-		return c;
-	}
-	
-	public static double sigmoid(double x) {
-		return 1.0 / (1.0 + Math.pow(Math.E, -x));
-	}
-	
-	
-	
-	public HiddenLayer(int N, int n_in, int n_out, double[][] W, double[] b, Random rng) {
-		this.N = N;
-		this.n_in = n_in;
-		this.n_out = n_out;
-		
-		if(rng == null)	this.rng = new Random(1234);
-		else this.rng = rng;
-	
-		if(W == null) {
-			this.W = new double[n_out][n_in];
-			double a = 1.0 / this.n_in;
-			
-			for(int i=0; i<n_out; i++) {
-				for(int j=0; j<n_in; j++) {
-					this.W[i][j] = uniform(-a, a);
-				}
-			}
-		} else {
-			this.W = W;
-		}
-		
-		if(b == null) this.b = new double[n_out];
-		else this.b = b;
-	}
-	
-	public double output(int[] input, double[] w, double b) {
-		double linear_output = 0.0;
-		for(int j=0; j<n_in; j++) {
-			linear_output += w[j] * input[j];
-		}
-		linear_output += b;
-		return sigmoid(linear_output);
-	}
-	
-	public void sample_h_given_v(int[] input, int[] sample) {
-		for(int i=0; i<n_out; i++) {
-			sample[i] = binomial(1, output(input, W[i], b[i]));
-		}
-	}
-}
diff --git a/java/DBN/src/LogisticRegression.java b/java/DBN/src/LogisticRegression.java
deleted file mode 100644
index 09ba807..0000000
--- a/java/DBN/src/LogisticRegression.java
+++ /dev/null
@@ -1,73 +0,0 @@
-
-public class LogisticRegression {
-	public int N;
-	public int n_in;
-	public int n_out;
-	public double[][] W;
-	public double[] b;
-	
-	public LogisticRegression(int N, int n_in, int n_out) {
-		this.N = N;
-		this.n_in = n_in;
-		this.n_out = n_out;
-		
-		W = new double[this.n_out][this.n_in];
-		b = new double[this.n_out];
-	}
-	
-	public void train(int[] x, int[] y, double lr) {
-		double[] p_y_given_x = new double[n_out];
-		double[] dy = new double[n_out];
-		
-		for(int i=0; i<n_out; i++) {
-			p_y_given_x[i] = 0;
-			for(int j=0; j<n_in; j++) {
-				p_y_given_x[i] += W[i][j] * x[j];
-			}
-			p_y_given_x[i] += b[i];
-		}
-		softmax(p_y_given_x);
-		
-		for(int i=0; i<n_out; i++) {
-			dy[i] = y[i] - p_y_given_x[i];
-			
-			for(int j=0; j<n_in; j++) {
-				W[i][j] += lr * dy[i] * x[j] / N;
-			}
-			
-			b[i] += lr * dy[i] / N;
-		}
-	}
-	
-	public void softmax(double[] x) {
-		double max = 0.0;
-		double sum = 0.0;
-		
-		for(int i=0; i<n_out; i++) {
-			if(max < x[i]) {
-				max = x[i];
-			}
-		}
-		
-		for(int i=0; i<n_out; i++) {
-			x[i] = Math.exp(x[i] - max);
-			sum += x[i];
-		}
-		
-		for(int i=0; i<n_out; i++) {
-			x[i] /= sum;
-		}
-	}
-	
-	public void predict(int[] x, double[] y) {
-		for(int i=0; i<n_out; i++) {
-			y[i] = 0;
-			for(int j=0; j<n_in; j++) {
-				y[i] += W[i][j] * x[j];
-			}
-			y[i] += b[i];
-		}
-		
-		softmax(y);
-	}		
-}
diff --git a/java/DBN/src/RBM.java b/java/DBN/src/RBM.java
deleted file mode 100644
index 9ee3563..0000000
--- a/java/DBN/src/RBM.java
+++ /dev/null
@@ -1,164 +0,0 @@
-import java.util.Random;
-
-public class RBM {
-	public int N;
-	public int n_visible;
-	public int n_hidden;
-	public double[][] W;
-	public double[] hbias;
-	public double[] vbias;
-	public Random rng;
-	
-	public double uniform(double min, double max) {
-		return rng.nextDouble() * (max - min) + min;
-	}
-	
-	public int binomial(int n, double p) {
-		if(p < 0 || p > 1) return 0;
-		
-		int c = 0;
-		double r;
-		
-		for(int i=0; i<n; i++) {
-			r = rng.nextDouble();
-			if (r < p) c++;
-		}
-		
-		return c;
-	}
-	
-	public static double sigmoid(double x) {
-		return 1.0 / (1.0 + Math.pow(Math.E, -x));
-	}
-	
-	
-	public RBM(int N, int n_visible, int n_hidden, 
-			double[][] W, double[] hbias, double[] vbias, Random rng) {
-		this.N = N;
-		this.n_visible = n_visible;
-		this.n_hidden = n_hidden;
-	
-		if(rng == null)	this.rng = new Random(1234);
-		else this.rng = rng;
-		
-		if(W == null) {
-			this.W = new double[this.n_hidden][this.n_visible];
-			double a = 1.0 / this.n_visible;
-			
-			for(int i=0; i<this.n_hidden; i++) {
-				for(int j=0; j<this.n_visible; j++) {
-					this.W[i][j] = uniform(-a, a); 
-				}
-			}	
-		} else {
-			this.W = W;
-		}
-		
-		if(hbias == null) {
-			this.hbias = new double[this.n_hidden];
-			for(int i=0; i<this.n_hidden; i++) this.hbias[i] = 0;
-		} else {
-			this.hbias = hbias;
-		}
-		
-		if(vbias == null) {
-			this.vbias = new double[this.n_visible];
-			for(int i=0; i<this.n_visible; i++) this.vbias[i] = 0;
-		} else {
-			this.vbias = vbias;
-		}
-	}
-	
-	
-	public void contrastive_divergence(int[] input, double lr, int k) {
-		double[] ph_mean = new double[n_hidden];
-		int[] ph_sample = new int[n_hidden];
-		double[] nv_means = new double[n_visible];
-		int[] nv_samples = new int[n_visible];
-		double[] nh_means = new double[n_hidden];
-		int[] nh_samples = new int[n_hidden];
-		
-		/* CD-k */
-		sample_h_given_v(input, ph_mean, ph_sample);
-		
-		for(int step=0; step<k; step++) {
-			if(step == 0) {
-				gibbs_hvh(ph_sample, nv_means, nv_samples, nh_means, nh_samples);
-			} else {
-				gibbs_hvh(nh_samples, nv_means, nv_samples, nh_means, nh_samples);
-			}
-		}
-		
-		for(int i=0; i<n_hidden; i++) {
-			for(int j=0; j<n_visible; j++) {
-				// W[i][j] += lr *(ph_sample[i] * input[j] - nh_means[i] * nv_samples[j]) / N;
-				W[i][j] += lr *(ph_mean[i] * input[j] - nh_means[i] * nv_samples[j]) / N;
-			}
-			hbias[i] += lr * (ph_sample[i] - nh_means[i]) / N;
-		}
-		
-
-		for(int i=0; i<n_visible; i++) {
-			vbias[i] += lr * (input[i] - nv_samples[i]) / N;
-		}
-
-	}
-	
-	
-	public void sample_h_given_v(int[] v0_sample, double[] mean, int[] sample) {
-		for(int i=0; i<n_hidden; i++) {
-			mean[i] = propup(v0_sample, W[i], hbias[i]);
-			sample[i] = binomial(1, mean[i]);
-		}
-	}
-
-	public void sample_v_given_h(int[] h0_sample, double[] mean, int[] sample) {
-		for(int i=0; i<n_visible; i++) {
-			mean[i] = propdown(h0_sample, i, vbias[i]);
-			sample[i] = binomial(1, mean[i]);
-		}
-	}
-	
-	public double propup(int[] v, double[] w, double b) {
-		double pre_sigmoid_activation = 0.0;
-		for(int j=0; j<n_visible; j++) {
-			pre_sigmoid_activation += w[j] * v[j];
-		}
-		pre_sigmoid_activation += b;
-		return sigmoid(pre_sigmoid_activation);
-	}
-	
-	public double propdown(int[] h, int i, double b) {
-	  double pre_sigmoid_activation = 0.0;
-	  for(int j=0; j<n_hidden; j++) {
-	    pre_sigmoid_activation += W[j][i] * h[j];
-	  }
-	  pre_sigmoid_activation += b;
-	  return sigmoid(pre_sigmoid_activation);
-	}
-	
-	public void gibbs_hvh(int[] h0_sample, double[] nv_means, int[] nv_samples, double[] nh_means, int[] nh_samples) {
-	  sample_v_given_h(h0_sample, nv_means, nv_samples);
-	  sample_h_given_v(nv_samples, nh_means, nh_samples);
-	}
-
-
-	public void reconstruct(int[] v, double[] reconstructed_v) {
-	  double[] h = new double[n_hidden];
-	  double pre_sigmoid_activation;
-	
-	  for(int i=0; i<n_hidden; i++) {
-	    h[i] = propup(v, W[i], hbias[i]);
-	  }
-	
-	  for(int i=0; i<n_visible; i++) {
-	    pre_sigmoid_activation = 0.0;
-	    for(int j=0; j<n_hidden; j++) {
-	      pre_sigmoid_activation += W[j][i] * h[j];
-	    }
-	    pre_sigmoid_activation += vbias[i];
-	
-	    reconstructed_v[i] = sigmoid(pre_sigmoid_activation);
-	  }	
-	}	
-}
diff --git a/java/LogisticRegression/src/LogisticRegression.java b/java/LogisticRegression/src/LogisticRegression.java
deleted file mode 100644
index 8356c95..0000000
--- a/java/LogisticRegression/src/LogisticRegression.java
+++ /dev/null
@@ -1,128 +0,0 @@
-
-public class LogisticRegression {
-	public int N;
-	public int n_in;
-	public int n_out;
-	public double[][] W;
-	public double[] b;
-	
-	public LogisticRegression(int N, int n_in, int n_out) {
-		this.N = N;
-		this.n_in = n_in;
-		this.n_out = n_out;
-		
-		W = new double[this.n_out][this.n_in];
-		b = new double[this.n_out];
-	}
-	
-	public void train(int[] x, int[] y, double lr) {
-		double[] p_y_given_x = new double[n_out];
-		double[] dy = new double[n_out];
-		
-		for(int i=0; i<n_out; i++) {
-			p_y_given_x[i] = 0;
-			for(int j=0; j<n_in; j++) {
-				p_y_given_x[i] += W[i][j] * x[j];
-			}
-			p_y_given_x[i] += b[i];
-		}
-		softmax(p_y_given_x);
-		
-		for(int i=0; i<n_out; i++) {
-			dy[i] = y[i] - p_y_given_x[i];
-			
-			for(int j=0; j<n_in; j++) {
-				W[i][j] += lr * dy[i] * x[j] / N;
-			}
-			
-			b[i] += lr * dy[i] / N;
-		}
-	}
-	
-	public void softmax(double[] x) {
-		double max = 0.0;
-		double sum = 0.0;
-		
-		for(int i=0; i<n_out; i++) if(max < x[i]) max = x[i];
-		
-		for(int i=0; i<n_out; i++) {
-			x[i] = Math.exp(x[i] - max);
-			sum += x[i];
-		}
-		
-		for(int i=0; i<n_out; i++) x[i] /= sum;
-	}
-	
-	public void predict(int[] x, double[] y) {
-		for(int i=0; i<n_out; i++) {
-			y[i] = 0;
-			for(int j=0; j<n_in; j++) {
-				y[i] += W[i][j] * x[j];
-			}
-			y[i] += b[i];
-		}
-		
-		softmax(y);
-	}	
-	
-	private static void test_lr() {
-		double learning_rate = 0.1;
-		int n_epochs = 500;
-		
-		int train_N = 6;
-		int test_N = 2;
-		int n_in = 6;
-		int n_out = 2;
-		
-		int[][] train_X = {
-			{1, 1, 1, 0, 0, 0},
-			{1, 0, 1, 0, 0, 0},
-			{1, 1, 1, 0, 0, 0},
-			{0, 0, 1, 1, 1, 0},
-			{0, 0, 1, 1, 0, 0},
-			{0, 0, 1, 1, 1, 0}
-		};
-		
-		int[][] train_Y = {
-			{1, 0},
-			{1, 0},
-			{1, 0},
-			{0, 1},
-			{0, 1},
-			{0, 1}
-		}; 
-		
-		// construct
-		LogisticRegression classifier = new LogisticRegression(train_N, n_in, n_out);
-		
-		// train 
-		for(int epoch=0; epoch<n_epochs; epoch++) {
-			for(int i=0; i<train_N; i++) {
-				classifier.train(train_X[i], train_Y[i], learning_rate);
-			}
-			//learning_rate *= 0.95;
-		}
-		
-		// test data
-		int[][] test_X = {
-			{1, 0, 1, 0, 0, 0},
-			{0, 0, 1, 1, 1, 0}
-		};
-		
-		double[][] test_Y = new double[test_N][n_out];
-		
-		
-		// test
-		for(int i=0; i<test_N; i++) {
-			classifier.predict(test_X[i], test_Y[i]);
-			for(int j=0; j<n_out; j++) {
-				System.out.print(test_Y[i][j] + " ");
-			}
-			System.out.println();
-		}
-	}
-	
-	public static void main(String[] args) {
-		test_lr();
-	}
-}
diff --git a/java/RBM/src/RBM.java b/java/RBM/src/RBM.java
deleted file mode 100644
index 6f2e3b7..0000000
--- a/java/RBM/src/RBM.java
+++ /dev/null
@@ -1,221 +0,0 @@
-import java.util.Random;
-
-public class RBM {
-	public int N;
-	public int n_visible;
-	public int n_hidden;
-	public double[][] W;
-	public double[] hbias;
-	public double[] vbias;
-	public Random rng;
-	
-	public double uniform(double min, double max) {
-		return rng.nextDouble() * (max - min) + min;
-	}
-	
-	public int binomial(int n, double p) {
-		if(p < 0 || p > 1) return 0;
-		
-		int c = 0;
-		double r;
-		
-		for(int i=0; i<n; i++) {
-			r = rng.nextDouble();
-			if (r < p) c++;
-		}
-		
-		return c;
-	}
-	
-	public static double sigmoid(double x) {
-		return 1.0 / (1.0 + Math.pow(Math.E, -x));
-	}
-	
-	
-	public RBM(int N, int n_visible, int n_hidden, 
-			double[][] W, double[] hbias, double[] vbias, Random rng) {
-		this.N = N;
-		this.n_visible = n_visible;
-		this.n_hidden = n_hidden;
-	
-		if(rng == null)	this.rng = new Random(1234);
-		else this.rng = rng;
-		
-		if(W == null) {
-			this.W = new double[this.n_hidden][this.n_visible];
-			double a = 1.0 / this.n_visible;
-			
-			for(int i=0; i<this.n_hidden; i++) {
-				for(int j=0; j<this.n_visible; j++) {
-					this.W[i][j] = uniform(-a, a); 
-				}
-			}	
-		} else {
-			this.W = W;
-		}
-		
-		if(hbias == null) {
-			this.hbias = new double[this.n_hidden];
-			for(int i=0; i<this.n_hidden; i++) this.hbias[i] = 0;
-		} else {
-			this.hbias = hbias;
-		}
-		
-		if(vbias == null) {
-			this.vbias = new double[this.n_visible];
-			for(int i=0; i<this.n_visible; i++) this.vbias[i] = 0;
-		} else {
-			this.vbias = vbias;
-		}
-	}
-	
-	
-	public void contrastive_divergence(int[] input, double lr, int k) {
-		double[] ph_mean = new double[n_hidden];
-		int[] ph_sample = new int[n_hidden];
-		double[] nv_means = new double[n_visible];
-		int[] nv_samples = new int[n_visible];
-		double[] nh_means = new double[n_hidden];
-		int[] nh_samples = new int[n_hidden];
-		
-		/* CD-k */
-		sample_h_given_v(input, ph_mean, ph_sample);
-		
-		for(int step=0; step<k; step++) {
-			if(step == 0) {
-				gibbs_hvh(ph_sample, nv_means, nv_samples, nh_means, nh_samples);
-			} else {
-				gibbs_hvh(nh_samples, nv_means, nv_samples, nh_means, nh_samples);
-			}
-		}
-		
-		for(int i=0; i<n_hidden; i++) {
-			for(int j=0; j<n_visible; j++) {
-				// W[i][j] += lr * (ph_sample[i] * input[j] - nh_means[i] * nv_samples[j]) / N;
-				W[i][j] += lr * (ph_mean[i] * input[j] - nh_means[i] * nv_samples[j]) / N;
-			}
-			hbias[i] += lr * (ph_sample[i] - nh_means[i]) / N;
-		}
-		
-
-		for(int i=0; i<n_visible; i++) {
-			vbias[i] += lr * (input[i] - nv_samples[i]) / N;
-		}
-
-	}
-	
-	
-	public void sample_h_given_v(int[] v0_sample, double[] mean, int[] sample) {
-		for(int i=0; i<n_hidden; i++) {
-			mean[i] = propup(v0_sample, W[i], hbias[i]);
-			sample[i] = binomial(1, mean[i]);
-		}
-	}
-
-	public void sample_v_given_h(int[] h0_sample, double[] mean, int[] sample) {
-		for(int i=0; i<n_visible; i++) {
-			mean[i] = propdown(h0_sample, i, vbias[i]);
-			sample[i] = binomial(1, mean[i]);
-		}
-	}
-	
-	public double propup(int[] v, double[] w, double b) {
-		double pre_sigmoid_activation = 0.0;
-		for(int j=0; j<n_visible; j++) {
-			pre_sigmoid_activation += w[j] * v[j];
-		}
-		pre_sigmoid_activation += b;
-		return sigmoid(pre_sigmoid_activation);
-	}
-	
-	public double propdown(int[] h, int i, double b) {
-	  double pre_sigmoid_activation = 0.0;
-	  for(int j=0; j<n_hidden; j++) {
-	    pre_sigmoid_activation += W[j][i] * h[j];
-	  }
-	  pre_sigmoid_activation += b;
-	  return sigmoid(pre_sigmoid_activation);
-	}
-	
-	public void gibbs_hvh(int[] h0_sample, double[] nv_means, int[] nv_samples, double[] nh_means, int[] nh_samples) {
-	  sample_v_given_h(h0_sample, nv_means, nv_samples);
-	  sample_h_given_v(nv_samples, nh_means, nh_samples);
-	}
-
-
-	public void reconstruct(int[] v, double[] reconstructed_v) {
-	  double[] h = new double[n_hidden];
-	  double pre_sigmoid_activation;
-	
-	  for(int i=0; i<n_hidden; i++) {
-	    h[i] = propup(v, W[i], hbias[i]);
-	  }
-	
-	  for(int i=0; i<n_visible; i++) {
-	    pre_sigmoid_activation = 0.0;
-	    for(int j=0; j<n_hidden; j++) {
-	      pre_sigmoid_activation += W[j][i] * h[j];
-	    }
-	    pre_sigmoid_activation += vbias[i];
-	
-	    reconstructed_v[i] = sigmoid(pre_sigmoid_activation);
-	  }	
-	}
-
-	
-	
-	private static void test_rbm() {
-		Random rng = new Random(123);
-
-		double learning_rate = 0.1;
-		int training_epochs = 1000;
-		int k = 1;
-		  
-		int train_N = 6;
-		int test_N = 2;
-		int n_visible = 6;
-		int n_hidden = 3;
-
-		// training data
-		int[][] train_X = {
-			{1, 1, 1, 0, 0, 0},
-			{1, 0, 1, 0, 0, 0},
-			{1, 1, 1, 0, 0, 0},
-			{0, 0, 1, 1, 1, 0},
-			{0, 0, 1, 0, 1, 0},
-			{0, 0, 1, 1, 1, 0}
-		};
-
-		
-		
-		RBM rbm = new RBM(train_N, n_visible, n_hidden, null, null, null, rng);
-
-		// train
-		for(int epoch=0; epoch<training_epochs; epoch++) {
-			for(int i=0; i<train_N; i++) {
-				rbm.contrastive_divergence(train_X[i], learning_rate, k);
-			}
-		}
-
-		// test data
-		int[][] test_X = {
-			{1, 1, 0, 0, 0, 0},
-			{0, 0, 0, 1, 1, 0}
-		};
-		
-		double[][] reconstructed_X = new double[test_N][n_visible];
-
-		for(int i=0; i<test_N; i++) {
-			rbm.reconstruct(test_X[i], reconstructed_X[i]);
-			for(int j=0; j<n_visible; j++) {
-				System.out.printf("%.5f ", reconstructed_X[i][j]);
-			}
-			System.out.println();
-		}
-	}
-	
-	public static void main(String[] args) {
-		test_rbm();
-	}
-	
-}
diff --git a/java/SdA/src/HiddenLayer.java b/java/SdA/src/HiddenLayer.java
deleted file mode 100644
index 588eea8..0000000
--- a/java/SdA/src/HiddenLayer.java
+++ /dev/null
@@ -1,74 +0,0 @@
-import java.util.Random;
-
-public class HiddenLayer {
-	public int N;
-	public int n_in;
-	public int n_out;
-	public double[][] W;
-	public double[] b;
-	public Random rng;
-	
-	public double uniform(double min, double max) {
-		return rng.nextDouble() * (max - min) + min;
-	}
-	
-	public int binomial(int n, double p) {
-		if(p < 0 || p > 1) return 0;
-		
-		int c = 0;
-		double r;
-		
-		for(int i=0; i<n; i++) {
-			r = rng.nextDouble();
-			if (r < p) c++;
-		}
-		
-		return c;
-	}
-	
-	public static double sigmoid(double x) {
-		return 1.0 / (1.0 + Math.pow(Math.E, -x));
-	}
-	
-	
-	
-	public HiddenLayer(int N, int n_in, int n_out, double[][] W, double[] b, Random rng) {
-		this.N = N;
-		this.n_in = n_in;
-		this.n_out = n_out;
-		
-		if(rng == null)	this.rng = new Random(1234);
-		else this.rng = rng;
-	
-		if(W == null) {
-			this.W = new double[n_out][n_in];
-			double a = 1.0 / this.n_in;
-			
-			for(int i=0; i<n_out; i++) {
-				for(int j=0; j<n_in; j++) {
-					this.W[i][j] = uniform(-a, a);
-				}
-			}
-		} else {
-			this.W = W;
-		}
-		
-		if(b == null) this.b = new double[n_out];
-		else this.b = b;
-	}
-	
-	public double output(int[] input, double[] w, double b) {
-		double linear_output = 0.0;
-		for(int j=0; j<n_in; j++) {
-			linear_output += w[j] * input[j];
-		}
-		linear_output += b;
-		return sigmoid(linear_output);
-	}
-	
-	public void sample_h_given_v(int[] input, int[] sample) {
-		for(int i=0; i<n_out; i++) {
-			sample[i] = binomial(1, output(input, W[i], b[i]));
-		}
-	}
-}
diff --git a/java/SdA/src/LogisticRegression.java b/java/SdA/src/LogisticRegression.java
deleted file mode 100644
index 09ba807..0000000
--- a/java/SdA/src/LogisticRegression.java
+++ /dev/null
@@ -1,73 +0,0 @@
-
-public class LogisticRegression {
-	public int N;
-	public int n_in;
-	public int n_out;
-	public double[][] W;
-	public double[] b;
-	
-	public LogisticRegression(int N, int n_in, int n_out) {
-		this.N = N;
-		this.n_in = n_in;
-		this.n_out = n_out;
-		
-		W = new double[this.n_out][this.n_in];
-		b = new double[this.n_out];
-	}
-	
-	public void train(int[] x, int[] y, double lr) {
-		double[] p_y_given_x = new double[n_out];
-		double[] dy = new double[n_out];
-		
-		for(int i=0; i<n_out; i++) {
-			p_y_given_x[i] = 0;
-			for(int j=0; j<n_in; j++) {
-				p_y_given_x[i] += W[i][j] * x[j];
-			}
-			p_y_given_x[i] += b[i];
-		}
-		softmax(p_y_given_x);
-		
-		for(int i=0; i<n_out; i++) {
-			dy[i] = y[i] - p_y_given_x[i];
-			
-			for(int j=0; j<n_in; j++) {
-				W[i][j] += lr * dy[i] * x[j] / N;
-			}
-			
-			b[i] += lr * dy[i] / N;
-		}
-	}
-	
-	public void softmax(double[] x) {
-		double max = 0.0;
-		double sum = 0.0;
-		
-		for(int i=0; i<n_out; i++) {
-			if(max < x[i]) {
-				max = x[i];
-			}
-		}
-		
-		for(int i=0; i<n_out; i++) {
-			x[i] = Math.exp(x[i] - max);
-			sum += x[i];
-		}
-		
-		for(int i=0; i<n_out; i++) {
-			x[i] /= sum;
-		}
-	}
-	
-	public void predict(int[] x, double[] y) {
-		for(int i=0; i<n_out; i++) {
-			y[i] = 0;
-			for(int j=0; j<n_in; j++) {
-				y[i] += W[i][j] * x[j];
-			}
-			y[i] += b[i];
-		}
-		
-		softmax(y);
-	}		
-}
diff --git a/java/SdA/src/SdA.java b/java/SdA/src/SdA.java
deleted file mode 100644
index a626ad8..0000000
--- a/java/SdA/src/SdA.java
+++ /dev/null
@@ -1,230 +0,0 @@
-import java.util.Random;
-
-public class SdA {
-	public int N;
-	public int n_ins;
-	public int[] hidden_layer_sizes;
-	public int n_outs;
-	public int n_layers;
-	public HiddenLayer[] sigmoid_layers;
-	public dA[] dA_layers;
-	public LogisticRegression log_layer;
-	public Random rng;
-
-	public static double sigmoid(double x) {
-		return 1.0 / (1.0 + Math.pow(Math.E, -x));
-	}
-	
-	public SdA(int N, int n_ins, int[] hidden_layer_sizes, int n_outs, int n_layers, Random rng) {
-		int input_size;
-		
-		this.N = N;
-		this.n_ins = n_ins;
-		this.hidden_layer_sizes = hidden_layer_sizes;
-		this.n_outs = n_outs;
-		this.n_layers = n_layers;
-		
-		this.sigmoid_layers = new HiddenLayer[n_layers];
-		this.dA_layers = new dA[n_layers];
-
-		if(rng == null)	this.rng = new Random(1234);
-		else this.rng = rng;		
-		
-		// construct multi-layer
-		for(int i=0; i<this.n_layers; i++) {
-			if(i == 0) {
-				input_size = this.n_ins;
-			} else {
-				input_size = this.hidden_layer_sizes[i-1];
-			}
-			
-			// construct sigmoid_layer
-			this.sigmoid_layers[i] = new HiddenLayer(this.N, input_size, this.hidden_layer_sizes[i], null, null, rng);
-			
-			// construct dA_layer
-			this.dA_layers[i] = new dA(this.N, input_size, this.hidden_layer_sizes[i], this.sigmoid_layers[i].W, this.sigmoid_layers[i].b, null, rng);
-		}
-		
-		// layer for output using LogisticRegression
-		this.log_layer = new LogisticRegression(this.N, this.hidden_layer_sizes[this.n_layers-1], this.n_outs);
-	}
-	
-	public void pretrain(int[][] train_X, double lr, double corruption_level, int epochs) {
-		int[] layer_input = new int[0];
-		int prev_layer_input_size;
-		int[] prev_layer_input;
-				
-		for(int i=0; i<n_layers; i++) {  // layer-wise			
-			for(int epoch=0; epoch<epochs; epoch++) {  // training epochs
-				for(int n=0; n<N; n++) {  // input x1...xN
-					// layer input
-					for(int l=0; l<=i; l++) {
-						
-						if(l == 0) {
-							layer_input = new int[n_ins];
-							for(int j=0; j<n_ins; j++) layer_input[j] = train_X[n][j];
-						} else {
-							if(l == 1) prev_layer_input_size = n_ins;
-							else prev_layer_input_size = hidden_layer_sizes[l-2];
-							
-							prev_layer_input = new int[prev_layer_input_size];
-							for(int j=0; j<prev_layer_input_size; j++) prev_layer_input[j] = layer_input[j];
-							
-							layer_input = new int[hidden_layer_sizes[l-1]];
-							
-							sigmoid_layers[l-1].sample_h_given_v(prev_layer_input, layer_input);
-						}
-					}
-					
-					dA_layers[i].train(layer_input, lr, corruption_level);
-				}
-			}
-		}
-	}
-		
-	public void finetune(int[][] train_X, int[][] train_Y, double lr, int epochs) {
-		int[] layer_input = new int[0];
-		// int prev_layer_input_size;
-		int[] prev_layer_input = new int[0];
-		
-		for(int epoch=0; epoch<epochs; epoch++) {
-			for(int n=0; n<N; n++) {
-				
-				// layer input
-				for(int i=0; i<n_layers; i++) {
-					if(i == 0) {
-						prev_layer_input = new int[n_ins];
-						for(int j=0; j<n_ins; j++) prev_layer_input[j] = train_X[n][j];
-					} else {
-						prev_layer_input = new int[hidden_layer_sizes[i-1]];
-						for(int j=0; j<hidden_layer_sizes[i-1]; j++) prev_layer_input[j] = layer_input[j];
-					}
-					
-					layer_input = new int[hidden_layer_sizes[i]];
-					sigmoid_layers[i].sample_h_given_v(prev_layer_input, layer_input);
-				}
-				
-				log_layer.train(layer_input, train_Y[n], lr);
-			}
-			// lr *= 0.95;
-		}
-	}
-	
-	public void predict(int[] x, double[] y) {
-		double[] layer_input = new double[0];
-		// int prev_layer_input_size;
-		double[] prev_layer_input = new double[n_ins];
-		for(int j=0; j<n_ins; j++) prev_layer_input[j] = x[j];
-	
-		double linear_output;
-		
-		
-		// layer activation
-		for(int i=0; i<n_layers; i++) {
-			layer_input = new double[sigmoid_layers[i].n_out];
-			
-			for(int k=0; k<sigmoid_layers[i].n_out; k++) {
-				linear_output = 0.0;
-				
-				for(int j=0; j<sigmoid_layers[i].n_in; j++) {
-					linear_output += sigmoid_layers[i].W[k][j] * prev_layer_input[j];
-				}
-				linear_output += sigmoid_layers[i].b[k];
-				layer_input[k] = sigmoid(linear_output);
-			}
-			
-			if(i < n_layers-1) {
-				prev_layer_input = new double[sigmoid_layers[i].n_out];
-				for(int j=0; j<sigmoid_layers[i].n_out; j++) prev_layer_input[j] = layer_input[j];
-			}
-		}
-		
-		for(int i=0; i<log_layer.n_out; i++) {
-			y[i] = 0;
-			for(int j=0; j<log_layer.n_in; j++) {
-				y[i] += log_layer.W[i][j] * layer_input[j];
-			}
-			y[i] += log_layer.b[i];
-		}
-		
-		log_layer.softmax(y);
-	}
-	
-
-	private static void test_sda() {
-		Random rng = new Random(123);
-		
-		double pretrain_lr = 0.1;
-		double corruption_level = 0.3;
-		int pretraining_epochs = 1000;
-		double finetune_lr = 0.1;
-		int finetune_epochs = 500;
-
-		int train_N = 10;
-		int test_N = 4;
-		int n_ins = 28;
-		int n_outs = 2;
-		int[] hidden_layer_sizes = {15, 15};
-		int n_layers = hidden_layer_sizes.length;
-		
-		// training data
-		int[][] train_X = {
-			{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
-			{0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
-			{1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
-			{0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
-			{1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
-			{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
-			{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1},
-			{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1},
-			{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1},
-			{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1}
-		};
-
-		int[][] train_Y = {
-			{1, 0},
-			{1, 0},
-			{1, 0},
-			{1, 0},
-			{1, 0},
-			{0, 1},
-			{0, 1},
-			{0, 1},
-			{0, 1},
-			{0, 1}
-		};
-		
-		// construct SdA
-		SdA sda = new SdA(train_N, n_ins, hidden_layer_sizes, n_outs, n_layers, rng);
-		
-		// pretrain
-		sda.pretrain(train_X, pretrain_lr, corruption_level, pretraining_epochs);
-		
-		// finetune
-		sda.finetune(train_X, train_Y, finetune_lr, finetune_epochs);
-		
-
-		// test data
-		int[][] test_X = {
-			{1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
-			{1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
-			{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1},
-			{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1}
-		};
-		
-		double[][] test_Y = new double[test_N][n_outs];
-		
-		// test
-		for(int i=0; i<test_N; i++) {
-			sda.predict(test_X[i], test_Y[i]);
-			for(int j=0; j<n_outs; j++) {
-				System.out.print(test_Y[i][j] + " ");
-			}
-			System.out.println();
-		}
-	}
-	
-	public static void main(String[] args) {
-		test_sda();
-	}
-}
diff --git a/java/SdA/src/dA.java b/java/SdA/src/dA.java
deleted file mode 100644
index 3484350..0000000
--- a/java/SdA/src/dA.java
+++ /dev/null
@@ -1,150 +0,0 @@
-import java.util.Random;
-
-public class dA {
-	public int N;
-	public int n_visible;
-	public int n_hidden;
-	public double[][] W;
-	public double[] hbias;
-	public double[] vbias;
-	public Random rng;
-	
-	
-	public double uniform(double min, double max) {
-		return rng.nextDouble() * (max - min) + min;
-	}
-	
-	public int binomial(int n, double p) {
-		if(p < 0 || p > 1) return 0;
-		
-		int c = 0;
-		double r;
-		
-		for(int i=0; i<n; i++) {
-			r = rng.nextDouble();
-			if (r < p) c++;
-		}
-		
-		return c;
-	}
-	
-	public static double sigmoid(double x) {
-		return 1.0 / (1.0 + Math.pow(Math.E, -x));
-	}
-
-	public dA(int N, int n_visible, int n_hidden, 
-			double[][] W, double[] hbias, double[] vbias, Random rng) {
-		this.N = N;
-		this.n_visible = n_visible;
-		this.n_hidden = n_hidden;
-
-		if(rng == null)	this.rng = new Random(1234);
-		else this.rng = rng;
-				
-		if(W == null) {
-			this.W = new double[this.n_hidden][this.n_visible];
-			double a = 1.0 / this.n_visible;
-			
-			for(int i=0; i<this.n_hidden; i++) {
-				for(int j=0; j<this.n_visible; j++) {
-					this.W[i][j] = uniform(-a, a); 
-				}
-			}	
-		} else {
-			this.W = W;
-		}
-		
-		if(hbias == null) {
-			this.hbias = new double[this.n_hidden];
-			for(int i=0; i<this.n_hidden; i++) this.hbias[i] = 0;
-		} else {
-			this.hbias = hbias;
-		}
-		
-		if(vbias == null) {
-			this.vbias = new double[this.n_visible];
-			for(int i=0; i<this.n_visible; i++) this.vbias[i] = 0;
-		} else {
-			this.vbias = vbias;
-		}	
-	}
-	
-	public void get_corrupted_input(int[] x, int[] tilde_x, double p) {
-		for(int i=0; i<n_visible; i++) {
-			if(x[i] == 0) {
-				tilde_x[i] = 0;
-			} else {
-				tilde_x[i] = binomial(1, p);
-			}
-		}
-	}
-	
-	// Encode
-	public void get_hidden_values(int[] x, double[] y) {
-		for(int i=0; i<n_hidden; i++) {
-			y[i] = 0;
-			for(int j=0; j<n_visible; j++) {
-				y[i] += W[i][j] * x[j];
-			}
-			y[i] += hbias[i];
-			y[i] = sigmoid(y[i]);
-		}
-	}
-	
-	// Decode
-	public void get_reconstructed_input(double[] y, double[] z) {
-		for(int i=0; i<n_visible; i++) {
-			z[i] = 0;
-			for(int j=0; j<n_hidden; j++) {
-				z[i] += W[j][i] * y[j];
-			}
-			z[i] += vbias[i];
-			z[i] = sigmoid(z[i]);
-		}
-	}
-	
-	public void train(int[] x, double lr, double corruption_level) {
-		int[] tilde_x = new int[n_visible];
-		double[] y = new double[n_hidden];
-		double[] z = new double[n_visible];
-		
-		double[] L_vbias = new double[n_visible];
-		double[] L_hbias = new double[n_hidden];
-		
-		double p = 1 - corruption_level;
-		
-		get_corrupted_input(x, tilde_x, p);
-		get_hidden_values(tilde_x, y);
-		get_reconstructed_input(y, z);
-		
-		// vbias
-		for(int i=0; i<n_visible; i++) {
-			L_vbias[i] = x[i] - z[i];
-			vbias[i] += lr * L_vbias[i] / N;
-		}
-		
-		// hbias
-		for(int i=0; i<n_hidden; i++) {
-			L_hbias[i] = 0;
-			for(int j=0; j<n_visible; j++) {
-				L_hbias[i] += W[i][j] * L_vbias[j];
-			}
-			L_hbias[i] *= y[i] * (1 - y[i]);
-			hbias[i] += lr * L_hbias[i] / N;
-		}
-		
-		// W
-		for(int i=0; i<n_hidden; i++) {
-			for(int j=0; j<n_visible; j++) {
-				W[i][j] += lr * (L_hbias[i] * tilde_x[j] + L_vbias[j] * y[i]) / N;
-			}
-		}
-	}
-	
-	public void reconstruct(int[] x, double[] z) {
-		double[] y = new double[n_hidden];
-		
-		get_hidden_values(x, y);
-		get_reconstructed_input(y, z);
-	}	
-}
diff --git a/java/dA/src/dA.java b/java/dA/src/dA.java
deleted file mode 100644
index 7429724..0000000
--- a/java/dA/src/dA.java
+++ /dev/null
@@ -1,207 +0,0 @@
-import java.util.Random;
-
-public class dA {
-	public int N;
-	public int n_visible;
-	public int n_hidden;
-	public double[][] W;
-	public double[] hbias;
-	public double[] vbias;
-	public Random rng;
-	
-	
-	public double uniform(double min, double max) {
-		return rng.nextDouble() * (max - min) + min;
-	}
-	
-	public int binomial(int n, double p) {
-		if(p < 0 || p > 1) return 0;
-		
-		int c = 0;
-		double r;
-		
-		for(int i=0; i<n; i++) {
-			r = rng.nextDouble();
-			if (r < p) c++;
-		}
-		
-		return c;
-	}
-	
-	public static double sigmoid(double x) {
-		return 1.0 / (1.0 + Math.pow(Math.E, -x));
-	}
-
-	public dA(int N, int n_visible, int n_hidden, 
-			double[][] W, double[] hbias, double[] vbias, Random rng) {
-		this.N = N;
-		this.n_visible = n_visible;
-		this.n_hidden = n_hidden;
-
-		if(rng == null)	this.rng = new Random(1234);
-		else this.rng = rng;
-				
-		if(W == null) {
-			this.W = new double[this.n_hidden][this.n_visible];
-			double a = 1.0 / this.n_visible;
-			
-			for(int i=0; i<this.n_hidden; i++) {
-				for(int j=0; j<this.n_visible; j++) {
-					this.W[i][j] = uniform(-a, a); 
-				}
-			}	
-		} else {
-			this.W = W;
-		}
-		
-		if(hbias == null) {
-			this.hbias = new double[this.n_hidden];
-			for(int i=0; i<this.n_hidden; i++) this.hbias[i] = 0;
-		} else {
-			this.hbias = hbias;
-		}
-		
-		if(vbias == null) {
-			this.vbias = new double[this.n_visible];
-			for(int i=0; i<this.n_visible; i++) this.vbias[i] = 0;
-		} else {
-			this.vbias = vbias;
-		}	
-	}
-	
-	public void get_corrupted_input(int[] x, int[] tilde_x, double p) {
-		for(int i=0; i<n_visible; i++) {
-			if(x[i] == 0) {
-				tilde_x[i] = 0;
-			} else {
-				tilde_x[i] = binomial(1, p);
-			}
-		}
-	}
-	
-	// Encode
-	public void get_hidden_values(int[] x, double[] y) {
-		for(int i=0; i<n_hidden; i++) {
-			y[i] = 0;
-			for(int j=0; j<n_visible; j++) {
-				y[i] += W[i][j] * x[j];
-			}
-			y[i] += hbias[i];
-			y[i] = sigmoid(y[i]);
-		}
-	}
-	
-	// Decode
-	public void get_reconstructed_input(double[] y, double[] z) {
-		for(int i=0; i<n_visible; i++) {
-			z[i] = 0;
-			for(int j=0; j<n_hidden; j++) {
-				z[i] += W[j][i] * y[j];
-			}
-			z[i] += vbias[i];
-			z[i] = sigmoid(z[i]);
-		}
-	}
-	
-	public void train(int[] x, double lr, double corruption_level) {
-		int[] tilde_x = new int[n_visible];
-		double[] y = new double[n_hidden];
-		double[] z = new double[n_visible];
-		
-		double[] L_vbias = new double[n_visible];
-		double[] L_hbias = new double[n_hidden];
-		
-		double p = 1 - corruption_level;
-		
-		get_corrupted_input(x, tilde_x, p);
-		get_hidden_values(tilde_x, y);
-		get_reconstructed_input(y, z);
-		
-		// vbias
-		for(int i=0; i<n_visible; i++) {
-			L_vbias[i] = x[i] - z[i];
-			vbias[i] += lr * L_vbias[i] / N;
-		}
-		
-		// hbias
-		for(int i=0; i<n_hidden; i++) {
-			L_hbias[i] = 0;
-			for(int j=0; j<n_visible; j++) {
-				L_hbias[i] += W[i][j] * L_vbias[j];
-			}
-			L_hbias[i] *= y[i] * (1 - y[i]);
-			hbias[i] += lr * L_hbias[i] / N;
-		}
-		
-		// W
-		for(int i=0; i<n_hidden; i++) {
-			for(int j=0; j<n_visible; j++) {
-				W[i][j] += lr * (L_hbias[i] * tilde_x[j] + L_vbias[j] * y[i]) / N;
-			}
-		}
-	}
-	
-	public void reconstruct(int[] x, double[] z) {
-		double[] y = new double[n_hidden];
-		
-		get_hidden_values(x, y);
-		get_reconstructed_input(y, z);
-	}
-	
-	
-	private static void test_dA() {
-		Random rng = new Random(123);
-		
-		double learning_rate = 0.1;
-		double corruption_level = 0.3;
-		int training_epochs = 100;
-		
-		int train_N = 10;
-		int test_N = 2;
-		int n_visible = 20;
-		int n_hidden = 5;
-		
-		int[][] train_X = {
-			{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
-			{1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
-			{1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
-			{1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
-			{0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
-			{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
-			{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1},
-			{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1},
-			{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1},
-			{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0}		
-		};
-		
-		dA da = new dA(train_N, n_visible, n_hidden, null, null, null, rng);
-		
-		// train
-		for(int epoch=0; epoch<training_epochs; epoch++) {
-			for(int i=0; i<train_N; i++) {
-				da.train(train_X[i], learning_rate, corruption_level);
-			}
-		}
-		
-		// test data
-		int[][] test_X = {
-			{1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
-			{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0}				
-		};
-		
-		double[][] reconstructed_X = new double[test_N][n_visible];
-		
-		// test
-		for(int i=0; i<test_N; i++) {
-			da.reconstruct(test_X[i], reconstructed_X[i]);
-			for(int j=0; j<n_visible; j++) {
-				System.out.printf("%.5f ", reconstructed_X[i][j]);
-			}
-			System.out.println();
-		}
-	}
-	
-	public static void main(String[] args) {
-		test_dA();
-	}
-}
diff --git a/java/src/DeepLearning/DBN.java b/java/src/DeepLearning/DBN.java
new file mode 100644
index 0000000..39fb999
--- /dev/null
+++ b/java/src/DeepLearning/DBN.java
@@ -0,0 +1,222 @@
+package DeepLearning;
+
+import java.util.Random;
+import static DeepLearning.utils.*;
+
+public class DBN {
+    public int N;
+    public int n_ins;
+    public int[] hidden_layer_sizes;
+    public int n_outs;
+    public int n_layers;
+    public HiddenLayer[] sigmoid_layers;
+    public RBM[] rbm_layers;
+    public LogisticRegression log_layer;
+    public Random rng;
+
+
+    public DBN(int N, int n_ins, int[] hidden_layer_sizes, int n_outs, int n_layers, Random rng) {
+        int input_size;
+
+        this.N = N;
+        this.n_ins = n_ins;
+        this.hidden_layer_sizes = hidden_layer_sizes;
+        this.n_outs = n_outs;
+        this.n_layers = n_layers;
+
+        this.sigmoid_layers = new HiddenLayer[n_layers];
+        this.rbm_layers = new RBM[n_layers];
+
+        if(rng == null)	this.rng = new Random(1234);
+        else this.rng = rng;
+
+        // construct multi-layer
+        for(int i=0; i<this.n_layers; i++) {
+            if(i == 0) {
+                input_size = this.n_ins;
+            } else {
+                input_size = this.hidden_layer_sizes[i-1];
+            }
+
+            // construct sigmoid_layer
+            this.sigmoid_layers[i] = new HiddenLayer(this.N, input_size, this.hidden_layer_sizes[i], null, null, rng);
+
+            // construct rbm_layer
+            this.rbm_layers[i] = new RBM(this.N, input_size, this.hidden_layer_sizes[i], this.sigmoid_layers[i].W, this.sigmoid_layers[i].b, null, rng);
+        }
+
+        // layer for output using DNN.LogisticRegression
+        this.log_layer = new LogisticRegression(this.N, this.hidden_layer_sizes[this.n_layers-1], this.n_outs);
+    }
+
+    public void pretrain(int[][] train_X, double lr, int k, int epochs) {
+        int[] layer_input = new int[0];
+        int prev_layer_input_size;
+        int[] prev_layer_input;
+
+        for(int i=0; i<n_layers; i++) {  // layer-wise
+            for(int epoch=0; epoch<epochs; epoch++) {  // training epochs
+                for(int n=0; n<N; n++) {  // input x1...xN
+                    // layer input
+                    for(int l=0; l<=i; l++) {
+
+                        if(l == 0) {
+                            layer_input = new int[n_ins];
+                            for(int j=0; j<n_ins; j++) layer_input[j] = train_X[n][j];
+                        } else {
+                            if(l == 1) prev_layer_input_size = n_ins;
+                            else prev_layer_input_size = hidden_layer_sizes[l-2];
+
+                            prev_layer_input = new int[prev_layer_input_size];
+                            for(int j=0; j<prev_layer_input_size; j++) prev_layer_input[j] = layer_input[j];
+
+                            layer_input = new int[hidden_layer_sizes[l-1]];
+
+                            sigmoid_layers[l-1].sample_h_given_v(prev_layer_input, layer_input);
+                        }
+                    }
+
+                    rbm_layers[i].contrastive_divergence(layer_input, lr, k);
+                }
+            }
+        }
+    }
+
+    public void finetune(int[][] train_X, int[][] train_Y, double lr, int epochs) {
+        int[] layer_input = new int[0];
+        // int prev_layer_input_size;
+        int[] prev_layer_input = new int[0];
+
+        for(int epoch=0; epoch<epochs; epoch++) {
+            for(int n=0; n<N; n++) {
+
+                // layer input
+                for(int i=0; i<n_layers; i++) {
+                    if(i == 0) {
+                        prev_layer_input = new int[n_ins];
+                        for(int j=0; j<n_ins; j++) prev_layer_input[j] = train_X[n][j];
+                    } else {
+                        prev_layer_input = new int[hidden_layer_sizes[i-1]];
+                        for(int j=0; j<hidden_layer_sizes[i-1]; j++) prev_layer_input[j] = layer_input[j];
+                    }
+
+                    layer_input = new int[hidden_layer_sizes[i]];
+                    sigmoid_layers[i].sample_h_given_v(prev_layer_input, layer_input);
+                }
+
+                log_layer.train(layer_input, train_Y[n], lr);
+            }
+            // lr *= 0.95;
+        }
+    }
+
+    public void predict(int[] x, double[] y) {
+        double[] layer_input = new double[0];
+        // int prev_layer_input_size;
+        double[] prev_layer_input = new double[n_ins];
+        for(int j=0; j<n_ins; j++) prev_layer_input[j] = x[j];
+
+        double linear_output;
+
+
+        // layer activation
+        for(int i=0; i<n_layers; i++) {
+            layer_input = new double[sigmoid_layers[i].n_out];
+
+            for(int k=0; k<sigmoid_layers[i].n_out; k++) {
+                linear_output = 0.0;
+
+                for(int j=0; j<sigmoid_layers[i].n_in; j++) {
+                    linear_output += sigmoid_layers[i].W[k][j] * prev_layer_input[j];
+                }
+                linear_output += sigmoid_layers[i].b[k];
+                layer_input[k] = sigmoid(linear_output);
+            }
+
+            if(i < n_layers-1) {
+                prev_layer_input = new double[sigmoid_layers[i].n_out];
+                for(int j=0; j<sigmoid_layers[i].n_out; j++) prev_layer_input[j] = layer_input[j];
+            }
+        }
+
+        for(int i=0; i<log_layer.n_out; i++) {
+            y[i] = 0;
+            for(int j=0; j<log_layer.n_in; j++) {
+                y[i] += log_layer.W[i][j] * layer_input[j];
+            }
+            y[i] += log_layer.b[i];
+        }
+
+        log_layer.softmax(y);
+    }
+
+    private static void test_dbn() {
+        Random rng = new Random(123);
+
+        double pretrain_lr = 0.1;
+        int pretraining_epochs = 1000;
+        int k = 1;
+        double finetune_lr = 0.1;
+        int finetune_epochs = 500;
+
+        int train_N = 6;
+        int test_N = 4;
+        int n_ins = 6;
+        int n_outs = 2;
+        int[] hidden_layer_sizes = {3, 3};
+        int n_layers = hidden_layer_sizes.length;
+
+        // training data
+        int[][] train_X = {
+                {1, 1, 1, 0, 0, 0},
+                {1, 0, 1, 0, 0, 0},
+                {1, 1, 1, 0, 0, 0},
+                {0, 0, 1, 1, 1, 0},
+                {0, 0, 1, 1, 0, 0},
+                {0, 0, 1, 1, 1, 0}
+        };
+
+        int[][] train_Y = {
+                {1, 0},
+                {1, 0},
+                {1, 0},
+                {0, 1},
+                {0, 1},
+                {0, 1},
+        };
+
+
+        // construct DNN.DBN
+        DBN dbn = new DBN(train_N, n_ins, hidden_layer_sizes, n_outs, n_layers, rng);
+
+        // pretrain
+        dbn.pretrain(train_X, pretrain_lr, k, pretraining_epochs);
+
+        // finetune
+        dbn.finetune(train_X, train_Y, finetune_lr, finetune_epochs);
+
+
+        // test data
+        int[][] test_X = {
+                {1, 1, 0, 0, 0, 0},
+                {1, 1, 1, 1, 0, 0},
+                {0, 0, 0, 1, 1, 0},
+                {0, 0, 1, 1, 1, 0},
+        };
+
+        double[][] test_Y = new double[test_N][n_outs];
+
+        // test
+        for(int i=0; i<test_N; i++) {
+            dbn.predict(test_X[i], test_Y[i]);
+            for(int j=0; j<n_outs; j++) {
+                System.out.print(test_Y[i][j] + " ");
+            }
+            System.out.println();
+        }
+    }
+
+    public static void main(String[] args) {
+        test_dbn();
+    }
+}
diff --git a/java/src/DeepLearning/HiddenLayer.java b/java/src/DeepLearning/HiddenLayer.java
new file mode 100644
index 0000000..ca50104
--- /dev/null
+++ b/java/src/DeepLearning/HiddenLayer.java
@@ -0,0 +1,54 @@
+package DeepLearning;
+
+import java.util.Random;
+import static DeepLearning.utils.*;
+
+public class HiddenLayer {
+    public int N;
+    public int n_in;
+    public int n_out;
+    public double[][] W;
+    public double[] b;
+    public Random rng;
+
+
+    public HiddenLayer(int N, int n_in, int n_out, double[][] W, double[] b, Random rng) {
+        this.N = N;
+        this.n_in = n_in;
+        this.n_out = n_out;
+
+        if(rng == null)	this.rng = new Random(1234);
+        else this.rng = rng;
+
+        if(W == null) {
+            this.W = new double[n_out][n_in];
+            double a = 1.0 / this.n_in;
+
+            for(int i=0; i<n_out; i++) {
+                for(int j=0; j<n_in; j++) {
+                    this.W[i][j] = uniform(-a, a, rng);
+                }
+            }
+        } else {
+            this.W = W;
+        }
+
+        if(b == null) this.b = new double[n_out];
+        else this.b = b;
+    }
+
+    public double output(int[] input, double[] w, double b) {
+        double linear_output = 0.0;
+        for(int j=0; j<n_in; j++) {
+            linear_output += w[j] * input[j];
+        }
+        linear_output += b;
+        return sigmoid(linear_output);
+    }
+
+    public void sample_h_given_v(int[] input, int[] sample) {
+        for(int i=0; i<n_out; i++) {
+            sample[i] = binomial(1, output(input, W[i], b[i]), rng);
+        }
+    }
+}
diff --git a/java/src/DeepLearning/LogisticRegression.java b/java/src/DeepLearning/LogisticRegression.java
new file mode 100644
index 0000000..abe11a0
--- /dev/null
+++ b/java/src/DeepLearning/LogisticRegression.java
@@ -0,0 +1,129 @@
+package DeepLearning;
+
+public class LogisticRegression {
+    public int N;
+    public int n_in;
+    public int n_out;
+    public double[][] W;
+    public double[] b;
+
+    public LogisticRegression(int N, int n_in, int n_out) {
+        this.N = N;
+        this.n_in = n_in;
+        this.n_out = n_out;
+
+        W = new double[this.n_out][this.n_in];
+        b = new double[this.n_out];
+    }
+
+    public void train(int[] x, int[] y, double lr) {
+        double[] p_y_given_x = new double[n_out];
+        double[] dy = new double[n_out];
+
+        for(int i=0; i<n_out; i++) {
+            p_y_given_x[i] = 0;
+            for(int j=0; j<n_in; j++) {
+                p_y_given_x[i] += W[i][j] * x[j];
+            }
+            p_y_given_x[i] += b[i];
+        }
+        softmax(p_y_given_x);
+
+        for(int i=0; i<n_out; i++) {
+            dy[i] = y[i] - p_y_given_x[i];
+
+            for(int j=0; j<n_in; j++) {
+                W[i][j] += lr * dy[i] * x[j] / N;
+            }
+
+            b[i] += lr * dy[i] / N;
+        }
+    }
+
+    public void softmax(double[] x) {
+        double max = 0.0;
+        double sum = 0.0;
+
+        for(int i=0; i<n_out; i++) if(max < x[i]) max = x[i];
+
+        for(int i=0; i<n_out; i++) {
+            x[i] = Math.exp(x[i] - max);
+            sum += x[i];
+        }
+
+        for(int i=0; i<n_out; i++) x[i] /= sum;
+    }
+
+    public void predict(int[] x, double[] y) {
+        for(int i=0; i<n_out; i++) {
+            y[i] = 0;
+            for(int j=0; j<n_in; j++) {
+                y[i] += W[i][j] * x[j];
+            }
+            y[i] += b[i];
+        }
+
+        softmax(y);
+    }
+
+    private static void test_lr() {
+        double learning_rate = 0.1;
+        int n_epochs = 500;
+
+        int train_N = 6;
+        int test_N = 2;
+        int n_in = 6;
+        int n_out = 2;
+
+        int[][] train_X = {
+                {1, 1, 1, 0, 0, 0},
+                {1, 0, 1, 0, 0, 0},
+                {1, 1, 1, 0, 0, 0},
+                {0, 0, 1, 1, 1, 0},
+                {0, 0, 1, 1, 0, 0},
+                {0, 0, 1, 1, 1, 0}
+        };
+
+        int[][] train_Y = {
+                {1, 0},
+                {1, 0},
+                {1, 0},
+                {0, 1},
+                {0, 1},
+                {0, 1}
+        };
+
+        // construct
+        LogisticRegression classifier = new LogisticRegression(train_N, n_in, n_out);
+
+        // train
+        for(int epoch=0; epoch<n_epochs; epoch++) {
+            for(int i=0; i<train_N; i++) {
+                classifier.train(train_X[i], train_Y[i], learning_rate);
+            }
+            //learning_rate *= 0.95;
+        }
+
+        // test data
+        int[][] test_X = {
+                {1, 0, 1, 0, 0, 0},
+                {0, 0, 1, 1, 1, 0}
+        };
+
+        double[][] test_Y = new double[test_N][n_out];
+
+
+        // test
+        for(int i=0; i<test_N; i++) {
+            classifier.predict(test_X[i], test_Y[i]);
+            for(int j=0; j<n_out; j++) {
+                System.out.print(test_Y[i][j] + " ");
+            }
+            System.out.println();
+        }
+    }
+
+    public static void main(String[] args) {
+        test_lr();
+    }
+}
diff --git a/java/src/DeepLearning/RBM.java b/java/src/DeepLearning/RBM.java
new file mode 100644
index 0000000..d84e65c
--- /dev/null
+++ b/java/src/DeepLearning/RBM.java
@@ -0,0 +1,203 @@
+package DeepLearning;
+
+import java.util.Random;
+import static DeepLearning.utils.*;
+
+
+public class RBM {
+    public int N;
+    public int n_visible;
+    public int n_hidden;
+    public double[][] W;
+    public double[] hbias;
+    public double[] vbias;
+    public Random rng;
+
+
+    public RBM(int N, int n_visible, int n_hidden,
+               double[][] W, double[] hbias, double[] vbias, Random rng) {
+        this.N = N;
+        this.n_visible = n_visible;
+        this.n_hidden = n_hidden;
+
+        if(rng == null)	this.rng = new Random(1234);
+        else this.rng = rng;
+
+        if(W == null) {
+            this.W = new double[this.n_hidden][this.n_visible];
+            double a = 1.0 / this.n_visible;
+
+            for(int i=0; i<this.n_hidden; i++) {
+                for(int j=0; j<this.n_visible; j++) {
+                    this.W[i][j] = uniform(-a, a, rng);
+                }
+            }
+        } else {
+            this.W = W;
+        }
+
+        if(hbias == null) {
+            this.hbias = new double[this.n_hidden];
+            for(int i=0; i<this.n_hidden; i++) this.hbias[i] = 0;
+        } else {
+            this.hbias = hbias;
+        }
+
+        if(vbias == null) {
+            this.vbias = new double[this.n_visible];
+            for(int i=0; i<this.n_visible; i++) this.vbias[i] = 0;
+        } else {
+            this.vbias = vbias;
+        }
+    }
+
+
+    public void contrastive_divergence(int[] input, double lr, int k) {
+        double[] ph_mean = new double[n_hidden];
+        int[] ph_sample = new int[n_hidden];
+        double[] nv_means = new double[n_visible];
+        int[] nv_samples = new int[n_visible];
+        double[] nh_means = new double[n_hidden];
+        int[] nh_samples = new int[n_hidden];
+		
+		/* CD-k */
+        sample_h_given_v(input, ph_mean, ph_sample);
+
+        for(int step=0; step<k; step++) {
+            if(step == 0) {
+                gibbs_hvh(ph_sample, nv_means, nv_samples, nh_means, nh_samples);
+            } else {
+                gibbs_hvh(nh_samples, nv_means, nv_samples, nh_means, nh_samples);
+            }
+        }
+
+        for(int i=0; i<n_hidden; i++) {
+            for(int j=0; j<n_visible; j++) {
+                // W[i][j] += lr * (ph_sample[i] * input[j] - nh_means[i] * nv_samples[j]) / N;
+                W[i][j] += lr * (ph_mean[i] * input[j] - nh_means[i] * nv_samples[j]) / N;
+            }
+            hbias[i] += lr * (ph_sample[i] - nh_means[i]) / N;
+        }
+
+
+        for(int i=0; i<n_visible; i++) {
+            vbias[i] += lr * (input[i] - nv_samples[i]) / N;
+        }
+
+    }
+
+
+    public void sample_h_given_v(int[] v0_sample, double[] mean, int[] sample) {
+        for(int i=0; i<n_hidden; i++) {
+            mean[i] = propup(v0_sample, W[i], hbias[i]);
+            sample[i] = binomial(1, mean[i], rng);
+        }
+    }
+
+    public void sample_v_given_h(int[] h0_sample, double[] mean, int[] sample) {
+        for(int i=0; i<n_visible; i++) {
+            mean[i] = propdown(h0_sample, i, vbias[i]);
+            sample[i] = binomial(1, mean[i], rng);
+        }
+    }
+
+    public double propup(int[] v, double[] w, double b) {
+        double pre_sigmoid_activation = 0.0;
+        for(int j=0; j<n_visible; j++) {
+            pre_sigmoid_activation += w[j] * v[j];
+        }
+        pre_sigmoid_activation += b;
+        return sigmoid(pre_sigmoid_activation);
+    }
+
+    public double propdown(int[] h, int i, double b) {
+        double pre_sigmoid_activation = 0.0;
+        for(int j=0; j<n_hidden; j++) {
+            pre_sigmoid_activation += W[j][i] * h[j];
+        }
+        pre_sigmoid_activation += b;
+        return sigmoid(pre_sigmoid_activation);
+    }
+
+    public void gibbs_hvh(int[] h0_sample, double[] nv_means, int[] nv_samples, double[] nh_means, int[] nh_samples) {
+        sample_v_given_h(h0_sample, nv_means, nv_samples);
+        sample_h_given_v(nv_samples, nh_means, nh_samples);
+    }
+
+
+    public void reconstruct(int[] v, double[] reconstructed_v) {
+        double[] h = new double[n_hidden];
+        double pre_sigmoid_activation;
+
+        for(int i=0; i<n_hidden; i++) {
+            h[i] = propup(v, W[i], hbias[i]);
+        }
+
+        for(int i=0; i<n_visible; i++) {
+            pre_sigmoid_activation = 0.0;
+            for(int j=0; j<n_hidden; j++) {
+                pre_sigmoid_activation += W[j][i] * h[j];
+            }
+            pre_sigmoid_activation += vbias[i];
+
+            reconstructed_v[i] = sigmoid(pre_sigmoid_activation);
+        }
+    }
+
+
+
+    private static void test_rbm() {
+        Random rng = new Random(123);
+
+        double learning_rate = 0.1;
+        int training_epochs = 1000;
+        int k = 1;
+
+        int train_N = 6;
+        int test_N = 2;
+        int n_visible = 6;
+        int n_hidden = 3;
+
+        // training data
+        int[][] train_X = {
+                {1, 1, 1, 0, 0, 0},
+                {1, 0, 1, 0, 0, 0},
+                {1, 1, 1, 0, 0, 0},
+                {0, 0, 1, 1, 1, 0},
+                {0, 0, 1, 0, 1, 0},
+                {0, 0, 1, 1, 1, 0}
+        };
+
+
+
+        RBM rbm = new RBM(train_N, n_visible, n_hidden, null, null, null, rng);
+
+        // train
+        for(int epoch=0; epoch<training_epochs; epoch++) {
+            for(int i=0; i<train_N; i++) {
+                rbm.contrastive_divergence(train_X[i], learning_rate, k);
+            }
+        }
+
+        // test data
+        int[][] test_X = {
+                {1, 1, 0, 0, 0, 0},
+                {0, 0, 0, 1, 1, 0}
+        };
+
+        double[][] reconstructed_X = new double[test_N][n_visible];
+
+        for(int i=0; i<test_N; i++) {
+            rbm.reconstruct(test_X[i], reconstructed_X[i]);
+            for(int j=0; j<n_visible; j++) {
+                System.out.printf("%.5f ", reconstructed_X[i][j]);
+            }
+            System.out.println();
+        }
+    }
+
+    public static void main(String[] args) {
+        test_rbm();
+    }
+
+}
diff --git a/java/src/DeepLearning/SdA.java b/java/src/DeepLearning/SdA.java
new file mode 100644
index 0000000..962c335
--- /dev/null
+++ b/java/src/DeepLearning/SdA.java
@@ -0,0 +1,230 @@
+package DeepLearning;
+
+import java.util.Random;
+import static DeepLearning.utils.*;
+
+public class SdA {
+    public int N;
+    public int n_ins;
+    public int[] hidden_layer_sizes;
+    public int n_outs;
+    public int n_layers;
+    public HiddenLayer[] sigmoid_layers;
+    public dA[] dA_layers;
+    public LogisticRegression log_layer;
+    public Random rng;
+
+
+    public SdA(int N, int n_ins, int[] hidden_layer_sizes, int n_outs, int n_layers, Random rng) {
+        int input_size;
+
+        this.N = N;
+        this.n_ins = n_ins;
+        this.hidden_layer_sizes = hidden_layer_sizes;
+        this.n_outs = n_outs;
+        this.n_layers = n_layers;
+
+        this.sigmoid_layers = new HiddenLayer[n_layers];
+        this.dA_layers = new dA[n_layers];
+
+        if(rng == null)	this.rng = new Random(1234);
+        else this.rng = rng;
+
+        // construct multi-layer
+        for(int i=0; i<this.n_layers; i++) {
+            if(i == 0) {
+                input_size = this.n_ins;
+            } else {
+                input_size = this.hidden_layer_sizes[i-1];
+            }
+
+            // construct sigmoid_layer
+            this.sigmoid_layers[i] = new HiddenLayer(this.N, input_size, this.hidden_layer_sizes[i], null, null, rng);
+
+            // construct dA_layer
+            this.dA_layers[i] = new dA(this.N, input_size, this.hidden_layer_sizes[i], this.sigmoid_layers[i].W, this.sigmoid_layers[i].b, null, rng);
+        }
+
+        // layer for output using DNN.LogisticRegression
+        this.log_layer = new LogisticRegression(this.N, this.hidden_layer_sizes[this.n_layers-1], this.n_outs);
+    }
+
+    public void pretrain(int[][] train_X, double lr, double corruption_level, int epochs) {
+        int[] layer_input = new int[0];
+        int prev_layer_input_size;
+        int[] prev_layer_input;
+
+        for(int i=0; i<n_layers; i++) {  // layer-wise
+            for(int epoch=0; epoch<epochs; epoch++) {  // training epochs
+                for(int n=0; n<N; n++) {  // input x1...xN
+                    // layer input
+                    for(int l=0; l<=i; l++) {
+
+                        if(l == 0) {
+                            layer_input = new int[n_ins];
+                            for(int j=0; j<n_ins; j++) layer_input[j] = train_X[n][j];
+                        } else {
+                            if(l == 1) prev_layer_input_size = n_ins;
+                            else prev_layer_input_size = hidden_layer_sizes[l-2];
+
+                            prev_layer_input = new int[prev_layer_input_size];
+                            for(int j=0; j<prev_layer_input_size; j++) prev_layer_input[j] = layer_input[j];
+
+                            layer_input = new int[hidden_layer_sizes[l-1]];
+
+                            sigmoid_layers[l-1].sample_h_given_v(prev_layer_input, layer_input);
+                        }
+                    }
+
+                    dA_layers[i].train(layer_input, lr, corruption_level);
+                }
+            }
+        }
+    }
+
+    public void finetune(int[][] train_X, int[][] train_Y, double lr, int epochs) {
+        int[] layer_input = new int[0];
+        // int prev_layer_input_size;
+        int[] prev_layer_input = new int[0];
+
+        for(int epoch=0; epoch<epochs; epoch++) {
+            for(int n=0; n<N; n++) {
+
+                // layer input
+                for(int i=0; i<n_layers; i++) {
+                    if(i == 0) {
+                        prev_layer_input = new int[n_ins];
+                        for(int j=0; j<n_ins; j++) prev_layer_input[j] = train_X[n][j];
+                    } else {
+                        prev_layer_input = new int[hidden_layer_sizes[i-1]];
+                        for(int j=0; j<hidden_layer_sizes[i-1]; j++) prev_layer_input[j] = layer_input[j];
+                    }
+
+                    layer_input = new int[hidden_layer_sizes[i]];
+                    sigmoid_layers[i].sample_h_given_v(prev_layer_input, layer_input);
+                }
+
+                log_layer.train(layer_input, train_Y[n], lr);
+            }
+            // lr *= 0.95;
+        }
+    }
+
+    public void predict(int[] x, double[] y) {
+        double[] layer_input = new double[0];
+        // int prev_layer_input_size;
+        double[] prev_layer_input = new double[n_ins];
+        for(int j=0; j<n_ins; j++) prev_layer_input[j] = x[j];
+
+        double linear_output;
+
+
+        // layer activation
+        for(int i=0; i<n_layers; i++) {
+            layer_input = new double[sigmoid_layers[i].n_out];
+
+            for(int k=0; k<sigmoid_layers[i].n_out; k++) {
+                linear_output = 0.0;
+
+                for(int j=0; j<sigmoid_layers[i].n_in; j++) {
+                    linear_output += sigmoid_layers[i].W[k][j] * prev_layer_input[j];
+                }
+                linear_output += sigmoid_layers[i].b[k];
+                layer_input[k] = sigmoid(linear_output);
+            }
+
+            if(i < n_layers-1) {
+                prev_layer_input = new double[sigmoid_layers[i].n_out];
+                for(int j=0; j<sigmoid_layers[i].n_out; j++) prev_layer_input[j] = layer_input[j];
+            }
+        }
+
+        for(int i=0; i<log_layer.n_out; i++) {
+            y[i] = 0;
+            for(int j=0; j<log_layer.n_in; j++) {
+                y[i] += log_layer.W[i][j] * layer_input[j];
+            }
+            y[i] += log_layer.b[i];
+        }
+
+        log_layer.softmax(y);
+    }
+
+
+    private static void test_sda() {
+        Random rng = new Random(123);
+
+        double pretrain_lr = 0.1;
+        double corruption_level = 0.3;
+        int pretraining_epochs = 1000;
+        double finetune_lr = 0.1;
+        int finetune_epochs = 500;
+
+        int train_N = 10;
+        int test_N = 4;
+        int n_ins = 28;
+        int n_outs = 2;
+        int[] hidden_layer_sizes = {15, 15};
+        int n_layers = hidden_layer_sizes.length;
+
+        // training data
+        int[][] train_X = {
+                {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+                {0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+                {1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+                {0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+                {1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+                {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
+                {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1},
+                {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1},
+                {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1},
+                {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1}
+        };
+
+        int[][] train_Y = {
+                {1, 0},
+                {1, 0},
+                {1, 0},
+                {1, 0},
+                {1, 0},
+                {0, 1},
+                {0, 1},
+                {0, 1},
+                {0, 1},
+                {0, 1}
+        };
+
+        // construct SdA
+        SdA sda = new SdA(train_N, n_ins, hidden_layer_sizes, n_outs, n_layers, rng);
+
+        // pretrain
+        sda.pretrain(train_X, pretrain_lr, corruption_level, pretraining_epochs);
+
+        // finetune
+        sda.finetune(train_X, train_Y, finetune_lr, finetune_epochs);
+
+
+        // test data
+        int[][] test_X = {
+                {1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+                {1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+                {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1},
+                {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1}
+        };
+
+        double[][] test_Y = new double[test_N][n_outs];
+
+        // test
+        for(int i=0; i<test_N; i++) {
+            sda.predict(test_X[i], test_Y[i]);
+            for(int j=0; j<n_outs; j++) {
+                System.out.print(test_Y[i][j] + " ");
+            }
+            System.out.println();
+        }
+    }
+
+    public static void main(String[] args) {
+        test_sda();
+    }
+}
diff --git a/java/src/DeepLearning/dA.java b/java/src/DeepLearning/dA.java
new file mode 100644
index 0000000..acf3b39
--- /dev/null
+++ b/java/src/DeepLearning/dA.java
@@ -0,0 +1,187 @@
+package DeepLearning;
+
+import java.util.Random;
+import static DeepLearning.utils.*;
+
+public class dA {
+    public int n_visible;
+    public int N;
+    public int n_hidden;
+    public double[][] W;
+    public double[] hbias;
+    public double[] vbias;
+    public Random rng;
+
+
+    public dA(int N, int n_visible, int n_hidden,
+              double[][] W, double[] hbias, double[] vbias, Random rng) {
+        this.N = N;
+        this.n_visible = n_visible;
+        this.n_hidden = n_hidden;
+
+        if(rng == null)	this.rng = new Random(1234);
+        else this.rng = rng;
+
+        if(W == null) {
+            this.W = new double[this.n_hidden][this.n_visible];
+            double a = 1.0 / this.n_visible;
+
+            for(int i=0; i<this.n_hidden; i++) {
+                for(int j=0; j<this.n_visible; j++) {
+                    this.W[i][j] = uniform(-a, a, rng);
+                }
+            }
+        } else {
+            this.W = W;
+        }
+
+        if(hbias == null) {
+            this.hbias = new double[this.n_hidden];
+            for(int i=0; i<this.n_hidden; i++) this.hbias[i] = 0;
+        } else {
+            this.hbias = hbias;
+        }
+
+        if(vbias == null) {
+            this.vbias = new double[this.n_visible];
+            for(int i=0; i<this.n_visible; i++) this.vbias[i] = 0;
+        } else {
+            this.vbias = vbias;
+        }
+    }
+
+    public void get_corrupted_input(int[] x, int[] tilde_x, double p) {
+        for(int i=0; i<n_visible; i++) {
+            if(x[i] == 0) {
+                tilde_x[i] = 0;
+            } else {
+                tilde_x[i] = binomial(1, p, rng);
+            }
+        }
+    }
+
+    // Encode
+    public void get_hidden_values(int[] x, double[] y) {
+        for(int i=0; i<n_hidden; i++) {
+            y[i] = 0;
+            for(int j=0; j<n_visible; j++) {
+                y[i] += W[i][j] * x[j];
+            }
+            y[i] += hbias[i];
+            y[i] = sigmoid(y[i]);
+        }
+    }
+
+    // Decode
+    public void get_reconstructed_input(double[] y, double[] z) {
+        for(int i=0; i<n_visible; i++) {
+            z[i] = 0;
+            for(int j=0; j<n_hidden; j++) {
+                z[i] += W[j][i] * y[j];
+            }
+            z[i] += vbias[i];
+            z[i] = sigmoid(z[i]);
+        }
+    }
+
+    public void train(int[] x, double lr, double corruption_level) {
+        int[] tilde_x = new int[n_visible];
+        double[] y = new double[n_hidden];
+        double[] z = new double[n_visible];
+
+        double[] L_vbias = new double[n_visible];
+        double[] L_hbias = new double[n_hidden];
+
+        double p = 1 - corruption_level;
+
+        get_corrupted_input(x, tilde_x, p);
+        get_hidden_values(tilde_x, y);
+        get_reconstructed_input(y, z);
+
+        // vbias
+        for(int i=0; i<n_visible; i++) {
+            L_vbias[i] = x[i] - z[i];
+            vbias[i] += lr * L_vbias[i] / N;
+        }
+
+        // hbias
+        for(int i=0; i<n_hidden; i++) {
+            L_hbias[i] = 0;
+            for(int j=0; j<n_visible; j++) {
+                L_hbias[i] += W[i][j] * L_vbias[j];
+            }
+            L_hbias[i] *= y[i] * (1 - y[i]);
+            hbias[i] += lr * L_hbias[i] / N;
+        }
+
+        // W
+        for(int i=0; i<n_hidden; i++) {
+            for(int j=0; j<n_visible; j++) {
+                W[i][j] += lr * (L_hbias[i] * tilde_x[j] + L_vbias[j] * y[i]) / N;
+            }
+        }
+    }
+
+    public void reconstruct(int[] x, double[] z) {
+        double[] y = new double[n_hidden];
+
+        get_hidden_values(x, y);
+        get_reconstructed_input(y, z);
+    }
+
+    private static void test_dA() {
+        Random rng = new Random(123);
+
+        double learning_rate = 0.1;
+        double corruption_level = 0.3;
+        int training_epochs = 100;
+
+        int train_N = 10;
+        int test_N = 2;
+        int n_visible = 20;
+        int n_hidden = 5;
+
+        int[][] train_X = {
+                {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+                {1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+                {1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+                {1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+                {0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+                {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
+                {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1},
+                {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1},
+                {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1},
+                {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0}
+        };
+
+        dA da = new dA(train_N, n_visible, n_hidden, null, null, null, rng);
+
+        // train
+        for(int epoch=0; epoch<training_epochs; epoch++) {
+            for(int i=0; i<train_N; i++) {
+                da.train(train_X[i], learning_rate, corruption_level);
+            }
+        }
+
+        // test data
+        int[][] test_X = {
+                {1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+                {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0}
+        };
+
+        double[][] reconstructed_X = new double[test_N][n_visible];
+
+        // test
+        for(int i=0; i<test_N; i++) {
+            da.reconstruct(test_X[i], reconstructed_X[i]);
+            for(int j=0; j<n_visible; j++) {
+                System.out.printf("%.5f ", reconstructed_X[i][j]);
+            }
+            System.out.println();
+        }
+    }
+
+    public static void main(String[] args) {
+        test_dA();
+    }
+}
diff --git a/java/src/DeepLearning/utils.java b/java/src/DeepLearning/utils.java
new file mode 100644
index 0000000..efce0d9
--- /dev/null
+++ b/java/src/DeepLearning/utils.java
@@ -0,0 +1,29 @@
+package DeepLearning;
+
+import java.util.Random;
+
+public class utils {
+    public static double uniform(double min, double max, Random rng) {
+        return rng.nextDouble() * (max - min) + min;
+    }
+
+    public static int binomial(int n, double p, Random rng) {
+        if(p < 0 || p > 1) return 0;
+
+        int c = 0;
+        double r;
+
+        for(int i=0; i<n; i++) {
+            r = rng.nextDouble();
+            if (r < p) c++;
+        }
+
+        return c;
+    }
+
+
+    public static double sigmoid(double x) {
+        return 1.0 / (1.0 + Math.pow(Math.E, -x));
+    }
+
+}

From 773fa9bbeed94e314a4aeaab05a75822982ff94e Mon Sep 17 00:00:00 2001
From: Yusuke Sugomori <me@yusugomori.com>
Date: Tue, 4 Aug 2015 22:10:42 -0400
Subject: [PATCH 28/45] add gitkeep in java

---
 java/.gitkeep | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 java/.gitkeep

diff --git a/java/.gitkeep b/java/.gitkeep
new file mode 100644
index 0000000..e69de29

From 31688afa659ca3846a3347629594220a2fd62a89 Mon Sep 17 00:00:00 2001
From: Yusuke Sugomori <me@yusugomori.com>
Date: Sun, 9 Aug 2015 12:49:29 -0400
Subject: [PATCH 29/45] bug fix

---
 python/RBM.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/RBM.py b/python/RBM.py
index 4a1be8e..4496692 100755
--- a/python/RBM.py
+++ b/python/RBM.py
@@ -58,7 +58,7 @@ def contrastive_divergence(self, lr=0.1, k=1, input=None):
         # chain_end = nv_samples
 
 
-        self.W += lr * (numpy.dot(self.input.T, ph_sample)
+        self.W += lr * (numpy.dot(self.input.T, ph_mean)
                         - numpy.dot(nv_samples.T, nh_means))
         self.vbias += lr * numpy.mean(self.input - nv_samples, axis=0)
         self.hbias += lr * numpy.mean(ph_sample - nh_means, axis=0)

From 0ffb1fe8df894a43939eb590c70337a107daafcb Mon Sep 17 00:00:00 2001
From: Yusuke Sugomori <me@yusugomori.com>
Date: Sun, 9 Aug 2015 13:07:59 -0400
Subject: [PATCH 30/45] bug fix on MLP

---
 python/Dropout.py            | 53 ++++++++++---------------
 python/HiddenLayer.py        | 29 ++++++--------
 python/LogisticRegression.py | 75 +++++++++++++++++++-----------------
 python/MLP.py                | 46 +++++++++++-----------
 4 files changed, 97 insertions(+), 106 deletions(-)

diff --git a/python/Dropout.py b/python/Dropout.py
index feb594e..df7d646 100755
--- a/python/Dropout.py
+++ b/python/Dropout.py
@@ -50,11 +50,11 @@ def __init__(self, input, label,\
             self.hidden_layers.append(hidden_layer)
 
 
-            # layer for ouput using Logistic Regression (softmax)
-            self.log_layer = LogisticRegression(input=self.hidden_layers[-1].output(),
-                                                label=self.y,
-                                                n_in=hidden_layer_sizes[-1],
-                                                n_out=n_out)
+        # layer for ouput using Logistic Regression (softmax)
+        self.log_layer = LogisticRegression(input=self.hidden_layers[-1].output(),
+                                            label=self.y,
+                                            n_in=hidden_layer_sizes[-1],
+                                            n_out=n_out)
 
 
     def train(self, epochs=500, dropout=True, p_dropout=0.5, rng=None):
@@ -108,27 +108,16 @@ def predict(self, x, dropout=True, p_dropout=0.5):
 
 def test_dropout(n_epochs=500, dropout=True, p_dropout=0.5):
 
-    x = numpy.array([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
-                     [1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
-                     [1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
-                     [1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
-                     [0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
-                     [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
-                     [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1],
-                     [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1],
-                     [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1],
-                     [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0]])
-
-    y = numpy.array([[1, 0],
-                     [1, 0],
-                     [1, 0],
-                     [1, 0],
-                     [1, 0],
-                     [0, 1],
-                     [0, 1],
-                     [0, 1],
-                     [0, 1],
-                     [0, 1]])
+    x = numpy.array([[-1, -1],
+                     [-1,  1],
+                     [ 1, -1],
+                     [ 1,  1]])
+
+
+    y = numpy.array([[1, -1],
+                     [1, -1],
+                     [-1, 1],
+                     [-1, 1]])
 
 
     rng = numpy.random.RandomState(123)
@@ -136,20 +125,20 @@ def test_dropout(n_epochs=500, dropout=True, p_dropout=0.5):
 
     # construct Dropout MLP
     classifier = Dropout(input=x, label=y, \
-                         n_in=20, hidden_layer_sizes=[12, 12], n_out=2, \
+                         n_in=2, hidden_layer_sizes=[3], n_out=2, \
                          rng=rng, activation=ReLU)
 
 
-    # train
+    # train XOR
     classifier.train(epochs=n_epochs, dropout=dropout, \
                      p_dropout=p_dropout, rng=rng)
 
 
     # test
-    x = numpy.array([[1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
-                     [1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
-                     [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1],
-                     [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1]])
+    x = numpy.array([[-1, -1],
+                     [-1,  1],
+                     [ 1, -1],
+                     [ 1,  1]])
 
     print classifier.predict(x)
 
diff --git a/python/HiddenLayer.py b/python/HiddenLayer.py
index f7a0731..166d24c 100755
--- a/python/HiddenLayer.py
+++ b/python/HiddenLayer.py
@@ -51,21 +51,7 @@ def output(self, input=None):
         
         linear_output = numpy.dot(self.x, self.W) + self.b
 
-        return (linear_output if self.activation is None
-                else self.activation(linear_output))
-
-
-    def sample_h_given_v(self, input=None):
-        if input is not None:
-            self.x = input
-
-        v_mean = self.output()
-        h_sample = self.rng.binomial(size=v_mean.shape,
-                                           n=1,
-                                           p=v_mean)
-        return h_sample
-
-
+        return self.activation(linear_output)
 
     def forward(self, input=None):
         return self.output(input=input)
@@ -75,11 +61,10 @@ def backward(self, prev_layer, lr=0.1, input=None):
         if input is not None:
             self.x = input
 
-        d_y = self.dactivation(prev_layer.x) * numpy.dot(prev_layer.d_y, prev_layer.W.T)
+        d_y = self.dactivation( prev_layer.x ) * numpy.dot( prev_layer.d_y, prev_layer.W.T )
 
         self.W += lr * numpy.dot(self.x.T, d_y)
         self.b += lr * numpy.mean(d_y, axis=0)
-
         self.d_y = d_y
 
 
@@ -94,4 +79,14 @@ def dropout(self, input, p, rng=None):
         return mask
                      
 
+    def sample_h_given_v(self, input=None):
+        if input is not None:
+            self.x = input
+
+        v_mean = self.output()
+        h_sample = self.rng.binomial(size=v_mean.shape,
+                                           n=1,
+                                           p=v_mean)
+        return h_sample
+
 
diff --git a/python/LogisticRegression.py b/python/LogisticRegression.py
index f1a686d..3f57889 100755
--- a/python/LogisticRegression.py
+++ b/python/LogisticRegression.py
@@ -9,25 +9,36 @@ class LogisticRegression(object):
     def __init__(self, input, label, n_in, n_out):
         self.x = input
         self.y = label
+
         self.W = numpy.zeros((n_in, n_out))  # initialize W 0
-        self.b = numpy.zeros(n_out)          # initialize bias 0
+        self.b = numpy.zeros(n_out)  # initialize bias 0
 
 
     def train(self, lr=0.1, input=None, L2_reg=0.00):
+        self.forward(input)
+        self.backward(lr, L2_reg)
+
+
+    def forward(self, input=None):
         if input is not None:
             self.x = input
 
-        # p_y_given_x = sigmoid(numpy.dot(self.x, self.W) + self.b)
-        p_y_given_x = softmax(numpy.dot(self.x, self.W) + self.b)
-        d_y = self.y - p_y_given_x
-        
-        self.W += lr * numpy.dot(self.x.T, d_y) - lr * L2_reg * self.W
-        self.b += lr * numpy.mean(d_y, axis=0)
+        p_y_given_x = self.output(self.x)
+        self.d_y = self.y - p_y_given_x
 
-        self.d_y = d_y
         
-        # cost = self.negative_log_likelihood()
-        # return cost
+    def backward(self, lr, L2_reg=0.00):
+        self.W += lr * numpy.dot(self.x.T, self.d_y) - lr * L2_reg * self.W
+        self.b += lr * numpy.mean(self.d_y, axis=0)
+
+
+    def output(self, x):
+        # return sigmoid(numpy.dot(x, self.W) + self.b)
+        return softmax(numpy.dot(x, self.W) + self.b)
+
+    def predict(self, x):
+        return self.output(x)
+
 
     def negative_log_likelihood(self):
         # sigmoid_activation = sigmoid(numpy.dot(self.x, self.W) + self.b)
@@ -41,47 +52,41 @@ def negative_log_likelihood(self):
         return cross_entropy
 
 
-    def predict(self, x):
-        # return sigmoid(numpy.dot(x, self.W) + self.b)
-        return softmax(numpy.dot(x, self.W) + self.b)
-
-    def output(self, x):
-        return self.predict(x)
+def test_lr(learning_rate=0.1, n_epochs=500):
 
+    rng = numpy.random.RandomState(123)
 
-def test_lr(learning_rate=0.01, n_epochs=200):
     # training data
-    x = numpy.array([[1,1,1,0,0,0],
-                     [1,0,1,0,0,0],
-                     [1,1,1,0,0,0],
-                     [0,0,1,1,1,0],
-                     [0,0,1,1,0,0],
-                     [0,0,1,1,1,0]])
-    y = numpy.array([[1, 0],
-                     [1, 0],
-                     [1, 0],
-                     [0, 1],
-                     [0, 1],
-                     [0, 1]])
+    d = 2
+    N = 10
+    x1 = rng.randn(N, d) + numpy.array([0, 0])
+    x2 = rng.randn(N, d) + numpy.array([20, 10])
+    y1 = [[1, 0] for i in xrange(N)]
+    y2 = [[0, 1] for i in xrange(N)]
+
+    x = numpy.r_[x1.astype(int), x2.astype(int)]
+    y = numpy.r_[y1, y2]
 
 
     # construct LogisticRegression
-    classifier = LogisticRegression(input=x, label=y, n_in=6, n_out=2)
+    classifier = LogisticRegression(input=x, label=y, n_in=d, n_out=2)
 
     # train
     for epoch in xrange(n_epochs):
         classifier.train(lr=learning_rate)
         # cost = classifier.negative_log_likelihood()
         # print >> sys.stderr, 'Training epoch %d, cost is ' % epoch, cost
-        learning_rate *= 0.95
+        learning_rate *= 0.995
 
 
     # test
-    x = numpy.array([[1, 1, 0, 0, 0, 0],
-                     [0, 0, 0, 1, 1, 0],
-                     [1, 1, 1, 1, 1, 0]])
+    result = classifier.predict(x)
+    for i in xrange(N):
+        print result[i]
+    print
+    for i in xrange(N):
+        print result[N+i]
 
-    print classifier.predict(x)
 
 
 if __name__ == "__main__":
diff --git a/python/MLP.py b/python/MLP.py
index 0e02f2a..f87e597 100755
--- a/python/MLP.py
+++ b/python/MLP.py
@@ -16,43 +16,49 @@ def __init__(self, input, label, n_in, n_hidden, n_out, rng=None):
         if rng is None:
             rng = numpy.random.RandomState(1234)
 
-        # construct hidden_layer (tanh or sigmoid so far)
+        # construct hidden_layer
         self.hidden_layer = HiddenLayer(input=self.x,
                                         n_in=n_in,
                                         n_out=n_hidden,
                                         rng=rng,
-                                        activation=tanh)
+                                        activation=sigmoid)
 
-        # construct log_layer (softmax)
+        # construct log_layer
         self.log_layer = LogisticRegression(input=self.hidden_layer.output,
                                             label=self.y,
                                             n_in=n_hidden,
                                             n_out=n_out)
 
     def train(self):
-        layer_input = self.hidden_layer.forward()
-        self.log_layer.train(input=layer_input)
-        self.hidden_layer.backward(prev_layer=self.log_layer)
-        
+        # forward hidden_layer
+        log_layer_input = self.hidden_layer.forward(input=self.x)
+
+        # forward log_layer
+        self.log_layer.forward(input=log_layer_input)
+
+        # backward hidden_layer
+        self.hidden_layer.backward(prev_layer=self.log_layer, lr=0.2, input=self.x)
+
+        # backward log_layer
+        self.log_layer.backward(lr=0.2)
+
+
 
     def predict(self, x):
         x = self.hidden_layer.output(input=x)
         return self.log_layer.predict(x)
 
 
-def test_mlp(n_epochs=100):
+def test_mlp(n_epochs=5000):
 
-    x = numpy.array([[1,1,1,0,0,0],
-                     [1,0,1,0,0,0],
-                     [1,1,1,0,0,0],
-                     [0,0,1,1,1,0],
-                     [0,0,1,1,0,0],
-                     [0,0,1,1,1,0]])
-    y = numpy.array([[1, 0],
+    x = numpy.array([[0,  0],
+                     [0,  1],
+                     [1,  0],
+                     [1,  1]])
+
+    y = numpy.array([[0, 1],
                      [1, 0],
                      [1, 0],
-                     [0, 1],
-                     [0, 1],
                      [0, 1]])
 
 
@@ -60,7 +66,7 @@ def test_mlp(n_epochs=100):
 
 
     # construct MLP
-    classifier = MLP(input=x, label=y, n_in=6, n_hidden=15, n_out=2, rng=rng)
+    classifier = MLP(input=x, label=y, n_in=2, n_hidden=3, n_out=2, rng=rng)
 
     # train
     for epoch in xrange(n_epochs):
@@ -68,10 +74,6 @@ def test_mlp(n_epochs=100):
 
 
     # test
-    x = numpy.array([[1, 1, 0, 0, 0, 0],
-                     [0, 0, 0, 1, 1, 0],
-                     [1, 1, 1, 1, 1, 0]])
-
     print classifier.predict(x)
         
 

From 18fa46dc43d6d8753401c4b34f8e00e546dfdbf8 Mon Sep 17 00:00:00 2001
From: Yusuke Sugomori <me@yusugomori.com>
Date: Sun, 9 Aug 2015 13:50:02 -0400
Subject: [PATCH 31/45] dropout

---
 python/Dropout.py            | 29 +++++++++++------------------
 python/HiddenLayer.py        |  4 ++--
 python/LogisticRegression.py | 30 ++++++++++++++++++++----------
 python/MLP.py                | 14 +++++++-------
 4 files changed, 40 insertions(+), 37 deletions(-)

diff --git a/python/Dropout.py b/python/Dropout.py
index df7d646..16c203f 100755
--- a/python/Dropout.py
+++ b/python/Dropout.py
@@ -57,7 +57,7 @@ def __init__(self, input, label,\
                                             n_out=n_out)
 
 
-    def train(self, epochs=500, dropout=True, p_dropout=0.5, rng=None):
+    def train(self, epochs=5000, dropout=True, p_dropout=0.5, rng=None):
 
         for epoch in xrange(epochs):
             dropout_masks = []  # create different masks in each training epoch
@@ -106,26 +106,24 @@ def predict(self, x, dropout=True, p_dropout=0.5):
 
 
 
-def test_dropout(n_epochs=500, dropout=True, p_dropout=0.5):
+def test_dropout(n_epochs=5000, dropout=True, p_dropout=0.5):
 
-    x = numpy.array([[-1, -1],
-                     [-1,  1],
-                     [ 1, -1],
-                     [ 1,  1]])
-
-
-    y = numpy.array([[1, -1],
-                     [1, -1],
-                     [-1, 1],
-                     [-1, 1]])
+    x = numpy.array([[0,  0],
+                     [0,  1],
+                     [1,  0],
+                     [1,  1]])
 
+    y = numpy.array([[0, 1],
+                     [1, 0],
+                     [1, 0],
+                     [0, 1]])
 
     rng = numpy.random.RandomState(123)
 
 
     # construct Dropout MLP
     classifier = Dropout(input=x, label=y, \
-                         n_in=2, hidden_layer_sizes=[3], n_out=2, \
+                         n_in=2, hidden_layer_sizes=[10, 10], n_out=2, \
                          rng=rng, activation=ReLU)
 
 
@@ -135,11 +133,6 @@ def test_dropout(n_epochs=500, dropout=True, p_dropout=0.5):
 
 
     # test
-    x = numpy.array([[-1, -1],
-                     [-1,  1],
-                     [ 1, -1],
-                     [ 1,  1]])
-
     print classifier.predict(x)
 
 
diff --git a/python/HiddenLayer.py b/python/HiddenLayer.py
index 166d24c..75f8ab2 100755
--- a/python/HiddenLayer.py
+++ b/python/HiddenLayer.py
@@ -50,9 +50,9 @@ def output(self, input=None):
             self.x = input
         
         linear_output = numpy.dot(self.x, self.W) + self.b
-
         return self.activation(linear_output)
 
+
     def forward(self, input=None):
         return self.output(input=input)
 
@@ -61,7 +61,7 @@ def backward(self, prev_layer, lr=0.1, input=None):
         if input is not None:
             self.x = input
 
-        d_y = self.dactivation( prev_layer.x ) * numpy.dot( prev_layer.d_y, prev_layer.W.T )
+        d_y = self.dactivation(prev_layer.x) * numpy.dot(prev_layer.d_y, prev_layer.W.T)
 
         self.W += lr * numpy.dot(self.x.T, d_y)
         self.b += lr * numpy.mean(d_y, axis=0)
diff --git a/python/LogisticRegression.py b/python/LogisticRegression.py
index 3f57889..708a1b3 100755
--- a/python/LogisticRegression.py
+++ b/python/LogisticRegression.py
@@ -14,22 +14,32 @@ def __init__(self, input, label, n_in, n_out):
         self.b = numpy.zeros(n_out)  # initialize bias 0
 
 
-    def train(self, lr=0.1, input=None, L2_reg=0.00):
-        self.forward(input)
-        self.backward(lr, L2_reg)
-
-
-    def forward(self, input=None):
+    def train(self, lr=0.1, input=None, L2_reg=0.00):        
         if input is not None:
             self.x = input
 
         p_y_given_x = self.output(self.x)
-        self.d_y = self.y - p_y_given_x
+        d_y = self.y - p_y_given_x
+
+        self.W += lr * numpy.dot(self.x.T, d_y) - lr * L2_reg * self.W
+        self.b += lr * numpy.mean(d_y, axis=0)
+        self.d_y = d_y
+        
+
+    # def train(self, lr=0.1, input=None, L2_reg=0.00):
+    #     self.forward(input)
+    #     self.backward(lr, L2_reg)
+
+    # def forward(self, input=None):
+    #     if input is not None:
+    #         self.x = input
 
+    #     p_y_given_x = self.output(self.x)
+    #     self.d_y = self.y - p_y_given_x
         
-    def backward(self, lr, L2_reg=0.00):
-        self.W += lr * numpy.dot(self.x.T, self.d_y) - lr * L2_reg * self.W
-        self.b += lr * numpy.mean(self.d_y, axis=0)
+    # def backward(self, lr=0.1, L2_reg=0.00):
+    #     self.W += lr * numpy.dot(self.x.T, self.d_y) - lr * L2_reg * self.W
+    #     self.b += lr * numpy.mean(self.d_y, axis=0)
 
 
     def output(self, x):
diff --git a/python/MLP.py b/python/MLP.py
index f87e597..e9ded0b 100755
--- a/python/MLP.py
+++ b/python/MLP.py
@@ -21,7 +21,7 @@ def __init__(self, input, label, n_in, n_hidden, n_out, rng=None):
                                         n_in=n_in,
                                         n_out=n_hidden,
                                         rng=rng,
-                                        activation=sigmoid)
+                                        activation=tanh)
 
         # construct log_layer
         self.log_layer = LogisticRegression(input=self.hidden_layer.output,
@@ -31,17 +31,17 @@ def __init__(self, input, label, n_in, n_hidden, n_out, rng=None):
 
     def train(self):
         # forward hidden_layer
-        log_layer_input = self.hidden_layer.forward(input=self.x)
+        layer_input = self.hidden_layer.forward()
 
-        # forward log_layer
-        self.log_layer.forward(input=log_layer_input)
+        # forward & backward log_layer
+        # self.log_layer.forward(input=layer_input)
+        self.log_layer.train(input=layer_input)
 
         # backward hidden_layer
-        self.hidden_layer.backward(prev_layer=self.log_layer, lr=0.2, input=self.x)
+        self.hidden_layer.backward(prev_layer=self.log_layer)
 
         # backward log_layer
-        self.log_layer.backward(lr=0.2)
-
+        # self.log_layer.backward()
 
 
     def predict(self, x):

From 3daf4b246f5ddfb991968536b1eb0987ac8ce291 Mon Sep 17 00:00:00 2001
From: Yusuke Sugomori <yusugomori@users.noreply.github.com>
Date: Mon, 17 Aug 2015 18:11:06 -0400
Subject: [PATCH 32/45] Update README.md

---
 README.md | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 14e17c5..4b11361 100644
--- a/README.md
+++ b/README.md
@@ -41,5 +41,10 @@
   http://yusugomori.com/docs/SGD_DA.pdf
 
 
+### Other projects : 
+  -  [DeepLearning.coffee](https://github.com/yusugomori/deeplearning.coffee) : Very simple implementation of deep learning by CoffeeScript
+　  
+　  
+  
+#### Bug reports are deeply welcome.
 
-### Bug reports are deeply welcome.

From d11bd59844fbb4f5f3921ec0f427afed32dafb64 Mon Sep 17 00:00:00 2001
From: Yusuke Sugomori <yusugomori@users.noreply.github.com>
Date: Wed, 19 Aug 2015 22:08:35 -0400
Subject: [PATCH 33/45] Update README.md

---
 README.md | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 4b11361..933b11a 100644
--- a/README.md
+++ b/README.md
@@ -42,9 +42,10 @@
 
 
 ### Other projects : 
-  -  [DeepLearning.coffee](https://github.com/yusugomori/deeplearning.coffee) : Very simple implementation of deep learning by CoffeeScript
+  -  My deep learning projects are [here](http://yusugomori.com/projects/deep-learning/)
 　  
 　  
   
-#### Bug reports are deeply welcome.
+##### Bug reports / contributions / donations are deeply welcome.
+Bitcoin wallet address: 1QAoYw5Y3opvah2APf4jVcpD6UAHyC3k7s
 

From 52f8752b9a9c4f5837c0ab59e5f3aea3a2031a6a Mon Sep 17 00:00:00 2001
From: Yusuke Sugomori <yusugomori@users.noreply.github.com>
Date: Wed, 19 Aug 2015 22:08:55 -0400
Subject: [PATCH 34/45] Update README.md

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 933b11a..26b41bd 100644
--- a/README.md
+++ b/README.md
@@ -42,7 +42,7 @@
 
 
 ### Other projects : 
-  -  My deep learning projects are [here](http://yusugomori.com/projects/deep-learning/)
+  -  My deep learning projects are [here](http://yusugomori.com/projects/deep-learning/).
 　  
 　  
   

From 8872adfcd46d77a778c5f37c66627a6526d2d2d7 Mon Sep 17 00:00:00 2001
From: Yusuke Sugomori <ysugomori@google.com>
Date: Thu, 20 Aug 2015 18:04:10 -0400
Subject: [PATCH 35/45] clean up cpp utils

---
 cpp/DBN.cpp         | 25 ++-----------------------
 cpp/HiddenLayer.cpp | 25 ++-----------------------
 cpp/RBM.cpp         | 24 ++----------------------
 cpp/SdA.cpp         | 25 +++----------------------
 cpp/dA.cpp          | 26 +++-----------------------
 cpp/utils.cpp       | 25 -------------------------
 cpp/utils.h         | 32 ++++++++++++++++++++++++++++++++
 7 files changed, 44 insertions(+), 138 deletions(-)
 delete mode 100644 cpp/utils.cpp
 create mode 100644 cpp/utils.h

diff --git a/cpp/DBN.cpp b/cpp/DBN.cpp
index f043e20..155e081 100644
--- a/cpp/DBN.cpp
+++ b/cpp/DBN.cpp
@@ -1,33 +1,12 @@
 #include <iostream>
 #include <math.h>
+#include "utils.h"
 #include "HiddenLayer.h"
 #include "RBM.h"
 #include "LogisticRegression.h"
 #include "DBN.h"
 using namespace std;
-
-
-double uniform(double min, double max) {
-  return rand() / (RAND_MAX + 1.0) * (max - min) + min;
-}
-
-int binomial(int n, double p) {
-  if(p < 0 || p > 1) return 0;
-  
-  int c = 0;
-  double r;
-  
-  for(int i=0; i<n; i++) {
-    r = rand() / (RAND_MAX + 1.0);
-    if (r < p) c++;
-  }
-
-  return c;
-}
-
-double sigmoid(double x) {
-  return 1.0 / (1.0 + exp(-x));
-}
+using namespace utils;
 
 
 // DBN
diff --git a/cpp/HiddenLayer.cpp b/cpp/HiddenLayer.cpp
index b1925fc..fb530c6 100644
--- a/cpp/HiddenLayer.cpp
+++ b/cpp/HiddenLayer.cpp
@@ -1,30 +1,9 @@
 #include <iostream>
 #include <math.h>
 #include "HiddenLayer.h"
+#include "utils.h"
 using namespace std;
-
-double uniform(double min, double max) {
-  return rand() / (RAND_MAX + 1.0) * (max - min) + min;
-}
-
-int binomial(int n, double p) {
-  if(p < 0 || p > 1) return 0;
-  
-  int c = 0;
-  double r;
-  
-  for(int i=0; i<n; i++) {
-    r = rand() / (RAND_MAX + 1.0);
-    if (r < p) c++;
-  }
-
-  return c;
-}
-
-double sigmoid(double x) {
-  return 1.0 / (1.0 + exp(-x));
-}
-
+using namespace utils;
 
 
 HiddenLayer::HiddenLayer(int size, int in, int out, double **w, double *bp) {
diff --git a/cpp/RBM.cpp b/cpp/RBM.cpp
index d64462c..1e606ee 100644
--- a/cpp/RBM.cpp
+++ b/cpp/RBM.cpp
@@ -1,29 +1,9 @@
 #include <iostream>
 #include <math.h>
+#include "utils.h"
 #include "RBM.h"
 using namespace std;
-
-double uniform(double min, double max) {
-  return rand() / (RAND_MAX + 1.0) * (max - min) + min;
-}
-
-int binomial(int n, double p) {
-  if(p < 0 || p > 1) return 0;
-  
-  int c = 0;
-  double r;
-  
-  for(int i=0; i<n; i++) {
-    r = rand() / (RAND_MAX + 1.0);
-    if (r < p) c++;
-  }
-
-  return c;
-}
-
-double sigmoid(double x) {
-  return 1.0 / (1.0 + exp(-x));
-}
+using namespace utils;
 
 
 RBM::RBM(int size, int n_v, int n_h, double **w, double *hb, double *vb) {
diff --git a/cpp/SdA.cpp b/cpp/SdA.cpp
index 39ac32e..5632306 100644
--- a/cpp/SdA.cpp
+++ b/cpp/SdA.cpp
@@ -1,32 +1,13 @@
 #include <iostream>
 #include <math.h>
+#include "utils.h"
+
 #include "HiddenLayer.h"
 #include "dA.h"
 #include "LogisticRegression.h"
 #include "SdA.h"
 using namespace std;
-
-double uniform(double min, double max) {
-  return rand() / (RAND_MAX + 1.0) * (max - min) + min;
-}
-
-int binomial(int n, double p) {
-  if(p < 0 || p > 1) return 0;
-  
-  int c = 0;
-  double r;
-  
-  for(int i=0; i<n; i++) {
-    r = rand() / (RAND_MAX + 1.0);
-    if (r < p) c++;
-  }
-
-  return c;
-}
-
-double sigmoid(double x) {
-  return 1.0 / (1.0 + exp(-x));
-}
+using namespace utils;
 
 
 // SdA
diff --git a/cpp/dA.cpp b/cpp/dA.cpp
index b5414e5..783327e 100644
--- a/cpp/dA.cpp
+++ b/cpp/dA.cpp
@@ -1,30 +1,10 @@
 #include <iostream>
 #include <math.h>
+#include "utils.h"
+
 #include "dA.h"
 using namespace std;
-
-
-double uniform(double min, double max) {
-  return rand() / (RAND_MAX + 1.0) * (max - min) + min;
-}
-
-int binomial(int n, double p) {
-  if(p < 0 || p > 1) return 0;
-  
-  int c = 0;
-  double r;
-  
-  for(int i=0; i<n; i++) {
-    r = rand() / (RAND_MAX + 1.0);
-    if (r < p) c++;
-  }
-
-  return c;
-}
-
-double sigmoid(double x) {
-  return 1.0 / (1.0 + exp(-x));
-}
+using namespace utils;
 
 
 dA::dA(int size, int n_v, int n_h, double **w, double *hb, double *vb) {
diff --git a/cpp/utils.cpp b/cpp/utils.cpp
deleted file mode 100644
index 96af323..0000000
--- a/cpp/utils.cpp
+++ /dev/null
@@ -1,25 +0,0 @@
-#include <iostream>
-#include <math.h>
-using namespace std;
-
-double uniform(double min, double max) {
-  return rand() / (RAND_MAX + 1.0) * (max - min) + min;
-}
-
-int binomial(int n, double p) {
-  if(p < 0 || p > 1) return 0;
-  
-  int c = 0;
-  double r;
-  
-  for(int i=0; i<n; i++) {
-    r = rand() / (RAND_MAX + 1.0);
-    if (r < p) c++;
-  }
-
-  return c;
-}
-
-double sigmoid(double x) {
-  return 1.0 / (1.0 + exp(-x));
-}
diff --git a/cpp/utils.h b/cpp/utils.h
new file mode 100644
index 0000000..78fb182
--- /dev/null
+++ b/cpp/utils.h
@@ -0,0 +1,32 @@
+#pragma once
+
+#include <iostream>
+#include <math.h>
+using namespace std;
+
+
+namespace utils {
+  
+  double uniform(double min, double max) {
+    return rand() / (RAND_MAX + 1.0) * (max - min) + min;
+  }
+
+  int binomial(int n, double p) {
+    if(p < 0 || p > 1) return 0;
+  
+    int c = 0;
+    double r;
+  
+    for(int i=0; i<n; i++) {
+      r = rand() / (RAND_MAX + 1.0);
+      if (r < p) c++;
+    }
+
+    return c;
+  }
+
+  double sigmoid(double x) {
+    return 1.0 / (1.0 + exp(-x));
+  }
+
+}

From 0911fa072463861ecc4f3b0734975b3f307d41b2 Mon Sep 17 00:00:00 2001
From: Yusuke Sugomori <me@yusugomori.com>
Date: Tue, 25 Aug 2015 20:59:56 -0400
Subject: [PATCH 36/45] MLP.java, Dropout.java

---
 java/src/DeepLearning/DBN.java                |  12 +-
 java/src/DeepLearning/Dropout.java            | 225 ++++++++++++++++++
 java/src/DeepLearning/HiddenLayer.java        |  70 +++++-
 .../src/DeepLearning/HiddenLayerDiscrete.java |  56 +++++
 java/src/DeepLearning/LogisticRegression.java |  32 +--
 .../LogisticRegressionDiscrete.java           | 117 +++++++++
 java/src/DeepLearning/MLP.java                | 124 ++++++++++
 java/src/DeepLearning/RBM.java                |   1 -
 java/src/DeepLearning/SdA.java                |  12 +-
 java/src/DeepLearning/utils.java              |  30 ++-
 10 files changed, 640 insertions(+), 39 deletions(-)
 create mode 100644 java/src/DeepLearning/Dropout.java
 create mode 100644 java/src/DeepLearning/HiddenLayerDiscrete.java
 create mode 100644 java/src/DeepLearning/LogisticRegressionDiscrete.java
 create mode 100644 java/src/DeepLearning/MLP.java

diff --git a/java/src/DeepLearning/DBN.java b/java/src/DeepLearning/DBN.java
index 39fb999..e070faf 100644
--- a/java/src/DeepLearning/DBN.java
+++ b/java/src/DeepLearning/DBN.java
@@ -9,9 +9,9 @@ public class DBN {
     public int[] hidden_layer_sizes;
     public int n_outs;
     public int n_layers;
-    public HiddenLayer[] sigmoid_layers;
+    public HiddenLayerDiscrete[] sigmoid_layers;
     public RBM[] rbm_layers;
-    public LogisticRegression log_layer;
+    public LogisticRegressionDiscrete log_layer;
     public Random rng;
 
 
@@ -24,7 +24,7 @@ public DBN(int N, int n_ins, int[] hidden_layer_sizes, int n_outs, int n_layers,
         this.n_outs = n_outs;
         this.n_layers = n_layers;
 
-        this.sigmoid_layers = new HiddenLayer[n_layers];
+        this.sigmoid_layers = new HiddenLayerDiscrete[n_layers];
         this.rbm_layers = new RBM[n_layers];
 
         if(rng == null)	this.rng = new Random(1234);
@@ -39,14 +39,14 @@ public DBN(int N, int n_ins, int[] hidden_layer_sizes, int n_outs, int n_layers,
             }
 
             // construct sigmoid_layer
-            this.sigmoid_layers[i] = new HiddenLayer(this.N, input_size, this.hidden_layer_sizes[i], null, null, rng);
+            this.sigmoid_layers[i] = new HiddenLayerDiscrete(this.N, input_size, this.hidden_layer_sizes[i], null, null, rng);
 
             // construct rbm_layer
             this.rbm_layers[i] = new RBM(this.N, input_size, this.hidden_layer_sizes[i], this.sigmoid_layers[i].W, this.sigmoid_layers[i].b, null, rng);
         }
 
-        // layer for output using DNN.LogisticRegression
-        this.log_layer = new LogisticRegression(this.N, this.hidden_layer_sizes[this.n_layers-1], this.n_outs);
+        // layer for output using Logistic Regression
+        this.log_layer = new LogisticRegressionDiscrete(this.N, this.hidden_layer_sizes[this.n_layers-1], this.n_outs);
     }
 
     public void pretrain(int[][] train_X, double lr, int k, int epochs) {
diff --git a/java/src/DeepLearning/Dropout.java b/java/src/DeepLearning/Dropout.java
new file mode 100644
index 0000000..b4717c6
--- /dev/null
+++ b/java/src/DeepLearning/Dropout.java
@@ -0,0 +1,225 @@
+package DeepLearning;
+
+import java.util.Random;
+import java.util.List;
+import java.util.ArrayList;
+
+public class Dropout {
+    public int N;
+    public int n_in;
+    public int[] hidden_layer_sizes;
+    public int n_out;
+    public int n_layers;
+    public HiddenLayer[] hiddenLayers;
+    public LogisticRegression logisticLayer;
+    public Random rng;
+
+
+    public Dropout(int N, int n_in, int[] hidden_layer_sizes, int n_out, Random rng, String activation) {
+        this.N = N;
+        this.n_in = n_in;
+        this.hidden_layer_sizes = hidden_layer_sizes;
+        this.n_layers = hidden_layer_sizes.length;
+        this.n_out = n_out;
+
+        this.hiddenLayers = new HiddenLayer[n_layers];
+
+        if (rng == null) rng = new Random(1234);
+        this.rng = rng;
+
+        if (activation == null) activation = "ReLU";
+
+        // construct multi-layer
+        int input_size;
+        for(int i=0; i<this.n_layers; i++) {
+            // layer_size
+            if(i == 0) {
+                input_size = n_in;
+            } else {
+                input_size = hidden_layer_sizes[i-1];
+            }
+
+            // construct hiddenLayer
+            this.hiddenLayers[i] = new HiddenLayer(N, input_size, hidden_layer_sizes[i], null, null, rng, activation);
+
+        }
+
+        // construct logisticLayer
+        this.logisticLayer = new LogisticRegression(N, hidden_layer_sizes[this.n_layers-1], n_out);
+
+    }
+
+    public void train(int epochs, double[][] train_X, int[][] train_Y, boolean dropout, double p_dropout, double lr) {
+        List<int[]> dropout_masks;
+        List<double[]> layer_inputs;
+        double[] layer_input;
+        double[] layer_output = new double[0];
+
+        for(int epoch=0; epoch<epochs; epoch++) {
+
+            for(int n=0; n<N; n++) {
+
+                dropout_masks = new ArrayList<>(n_layers);
+                layer_inputs = new ArrayList<>(n_layers+1);  // +1 for logistic layer
+
+                // forward hiddenLayers
+                for(int i=0; i<n_layers; i++) {
+
+                    if(i == 0) layer_input = train_X[n];
+                    else layer_input = layer_output.clone();
+
+                    layer_inputs.add(layer_input.clone());
+
+                    layer_output = new double[hidden_layer_sizes[i]];
+                    hiddenLayers[i].forward(layer_input, layer_output);
+
+                    if(dropout) {
+                        int[] mask;
+                        mask = hiddenLayers[i].dropout(layer_output.length, p_dropout, rng);
+                        for(int j=0; j<layer_output.length; j++) layer_output[j] *= mask[j];
+
+                        dropout_masks.add(mask.clone());
+                    }
+
+                }
+
+
+                // forward & backward logisticLayer
+                double[] logistic_layer_dy; // = new double[n_out];
+                logistic_layer_dy = logisticLayer.train(layer_output, train_Y[n], lr); //, logistic_layer_dy);
+                layer_inputs.add(layer_output.clone());
+
+                // backward hiddenLayers
+                double[] prev_dy = logistic_layer_dy;
+                double[][] prev_W;
+                double[] dy = new double[0];
+
+                for(int i=n_layers-1; i>=0; i--) {
+
+                    if(i == n_layers-1) {
+                        prev_W = logisticLayer.W;
+                    } else {
+                        prev_dy = dy.clone();
+                        prev_W = hiddenLayers[i+1].W;
+                    }
+
+                    dy = new double[hidden_layer_sizes[i]];
+                    hiddenLayers[i].backward(layer_inputs.get(i), dy, layer_inputs.get(i+1), prev_dy, prev_W, lr);
+
+                    if(dropout) {
+                        for(int j=0; j<dy.length; j++) {
+                            dy[j] *= dropout_masks.get(i)[j];
+                        }
+                    }
+                }
+
+            }
+        }
+    }
+
+
+    public void pretest(double p_dropout) {
+        for(int i=0; i<n_layers; i++) {
+            int in;
+            int out;
+
+            if (i == 0) in = n_in;
+            else in = hidden_layer_sizes[i];
+
+            if (i == n_layers - 1) out = n_out;
+            else out = hidden_layer_sizes[i+1];
+
+
+            for (int l = 0; l < out; l++) {
+                for (int m = 0; m < in; m++) {
+                    hiddenLayers[i].W[l][m] *= 1 - p_dropout;
+                }
+            }
+        }
+    }
+
+
+    public void predict(double[] x, double[] y) {
+        double[] layer_input;
+        double[] layer_output = new double[0];
+
+        for(int i=0; i<n_layers; i++) {
+
+            if(i == 0) layer_input = x;
+            else layer_input = layer_output.clone();
+
+            layer_output = new double[hidden_layer_sizes[i]];
+
+            hiddenLayers[i].forward(layer_input, layer_output);
+        }
+
+        logisticLayer.predict(layer_output, y);
+    }
+
+
+    private static void test_dropout() {
+        Random rng = new Random(123);
+
+        double learning_rate = 0.1;
+        int n_epochs = 5000;
+
+        int train_N = 4;
+        int test_N = 4;
+        int n_in = 2;
+        int[] hidden_layer_sizes = {10, 10};
+        int n_out = 2;
+
+        boolean dropout = true;
+        double p_dropout = 0.5;
+
+
+        double[][] train_X = {
+                {0., 0.},
+                {0., 1.},
+                {1., 0.},
+                {1., 1.},
+        };
+
+        int[][] train_Y = {
+                {0, 1},
+                {1, 0},
+                {1, 0},
+                {0, 1},
+        };
+
+        // construct Dropout
+        Dropout classifier = new Dropout(train_N, n_in, hidden_layer_sizes, n_out, rng, "ReLU");
+
+        // train
+        classifier.train(n_epochs, train_X, train_Y, dropout, p_dropout, learning_rate);
+
+        // pretest
+        if(dropout) classifier.pretest(p_dropout);
+
+
+        // test data
+        double[][] test_X = {
+                {0., 0.},
+                {0., 1.},
+                {1., 0.},
+                {1., 1.},
+        };
+
+        double[][] test_Y = new double[test_N][n_out];
+
+        // test
+        for(int i=0; i<test_N; i++) {
+            classifier.predict(test_X[i], test_Y[i]);
+            for(int j=0; j<n_out; j++) {
+                System.out.print(test_Y[i][j] + " ");
+            }
+            System.out.println();
+        }
+
+    }
+
+
+    public static void main(String[] args) {
+        test_dropout();
+    }
+}
diff --git a/java/src/DeepLearning/HiddenLayer.java b/java/src/DeepLearning/HiddenLayer.java
index ca50104..028727d 100644
--- a/java/src/DeepLearning/HiddenLayer.java
+++ b/java/src/DeepLearning/HiddenLayer.java
@@ -1,6 +1,7 @@
 package DeepLearning;
 
 import java.util.Random;
+import java.util.function.DoubleFunction;
 import static DeepLearning.utils.*;
 
 public class HiddenLayer {
@@ -10,17 +11,18 @@ public class HiddenLayer {
     public double[][] W;
     public double[] b;
     public Random rng;
+    public DoubleFunction<Double> activation;
+    public DoubleFunction<Double> dactivation;
 
-
-    public HiddenLayer(int N, int n_in, int n_out, double[][] W, double[] b, Random rng) {
+    public HiddenLayer(int N, int n_in, int n_out, double[][] W, double[] b, Random rng, String activation) {
         this.N = N;
         this.n_in = n_in;
         this.n_out = n_out;
 
-        if(rng == null)	this.rng = new Random(1234);
+        if (rng == null) this.rng = new Random(1234);
         else this.rng = rng;
 
-        if(W == null) {
+        if (W == null) {
             this.W = new double[n_out][n_in];
             double a = 1.0 / this.n_in;
 
@@ -33,22 +35,72 @@ public HiddenLayer(int N, int n_in, int n_out, double[][] W, double[] b, Random
             this.W = W;
         }
 
-        if(b == null) this.b = new double[n_out];
+        if (b == null) this.b = new double[n_out];
         else this.b = b;
+
+        if (activation == "sigmoid" || activation == null) {
+            this.activation = (double x) -> sigmoid(x);
+            this.dactivation = (double x) -> dsigmoid(x);
+
+        } else if (activation == "tanh") {
+            this.activation = (double x) -> tanh(x);
+            this.dactivation = (double x) -> dtanh(x);
+        } else if (activation == "ReLU") {
+            this.activation = (double x) -> ReLU(x);
+            this.dactivation = (double x) -> dReLU(x);
+        } else {
+            throw new IllegalArgumentException("activation function not supported");
+        }
+
     }
 
-    public double output(int[] input, double[] w, double b) {
+    public double output(double[] input, double[] w, double b) {
         double linear_output = 0.0;
         for(int j=0; j<n_in; j++) {
             linear_output += w[j] * input[j];
         }
         linear_output += b;
-        return sigmoid(linear_output);
+
+        return activation.apply(linear_output);
+    }
+
+
+    public void forward(double[] input, double[] output) {
+        for(int i=0; i<n_out; i++) {
+            output[i] = this.output(input, W[i], b[i]);
+        }
     }
 
-    public void sample_h_given_v(int[] input, int[] sample) {
+    public void backward(double[] input, double[] dy, double[] prev_layer_input, double[] prev_layer_dy, double[][] prev_layer_W, double lr) {
+        if(dy == null) dy = new double[n_out];
+
+        int prev_n_in = n_out;
+        int prev_n_out = prev_layer_dy.length;
+
+        for(int i=0; i<prev_n_in; i++) {
+            dy[i] = 0;
+            for(int j=0; j<prev_n_out; j++) {
+                dy[i] += prev_layer_dy[j] * prev_layer_W[j][i];
+            }
+
+            dy[i] *= dactivation.apply(prev_layer_input[i]);
+        }
+
         for(int i=0; i<n_out; i++) {
-            sample[i] = binomial(1, output(input, W[i], b[i]), rng);
+            for(int j=0; j<n_in; j++) {
+                W[i][j] += lr * dy[i] * input[j] / N;
+            }
+            b[i] += lr * dy[i] / N;
+        }
+    }
+
+    public int[] dropout(int size, double p, Random rng) {
+        int[] mask = new int[size];
+
+        for(int i=0; i<size; i++) {
+            mask[i] = binomial(1, p, rng);
         }
+
+        return mask;
     }
 }
diff --git a/java/src/DeepLearning/HiddenLayerDiscrete.java b/java/src/DeepLearning/HiddenLayerDiscrete.java
new file mode 100644
index 0000000..b399db9
--- /dev/null
+++ b/java/src/DeepLearning/HiddenLayerDiscrete.java
@@ -0,0 +1,56 @@
+package DeepLearning;
+
+import java.util.Random;
+import static DeepLearning.utils.*;
+
+public class HiddenLayerDiscrete extends HiddenLayer {
+    public int N;
+    public int n_in;
+    public int n_out;
+    public double[][] W;
+    public double[] b;
+    public Random rng;
+
+
+    public HiddenLayerDiscrete(int N, int n_in, int n_out, double[][] W, double[] b, Random rng) {
+        super(N, n_in, n_out, W, b, rng, null);
+
+        this.N = N;
+        this.n_in = n_in;
+        this.n_out = n_out;
+
+        if(rng == null)	this.rng = new Random(1234);
+        else this.rng = rng;
+
+        if(W == null) {
+            this.W = new double[n_out][n_in];
+            double a = 1.0 / this.n_in;
+
+            for(int i=0; i<n_out; i++) {
+                for(int j=0; j<n_in; j++) {
+                    this.W[i][j] = uniform(-a, a, rng);
+                }
+            }
+        } else {
+            this.W = W;
+        }
+
+        if(b == null) this.b = new double[n_out];
+        else this.b = b;
+    }
+
+    public double output(int[] input, double[] w, double b) {
+        double linear_output = 0.0;
+        for(int j=0; j<n_in; j++) {
+            linear_output += w[j] * input[j];
+        }
+        linear_output += b;
+        return sigmoid(linear_output);
+    }
+
+    public void sample_h_given_v(int[] input, int[] sample) {
+        for(int i=0; i<n_out; i++) {
+            sample[i] = binomial(1, output(input, W[i], b[i]), rng);
+        }
+    }
+}
diff --git a/java/src/DeepLearning/LogisticRegression.java b/java/src/DeepLearning/LogisticRegression.java
index abe11a0..af5a9a5 100644
--- a/java/src/DeepLearning/LogisticRegression.java
+++ b/java/src/DeepLearning/LogisticRegression.java
@@ -12,11 +12,11 @@ public LogisticRegression(int N, int n_in, int n_out) {
         this.n_in = n_in;
         this.n_out = n_out;
 
-        W = new double[this.n_out][this.n_in];
-        b = new double[this.n_out];
+        W = new double[n_out][n_in];
+        b = new double[n_out];
     }
 
-    public void train(int[] x, int[] y, double lr) {
+    public double[] train(double[] x, int[] y, double lr) {
         double[] p_y_given_x = new double[n_out];
         double[] dy = new double[n_out];
 
@@ -38,6 +38,8 @@ public void train(int[] x, int[] y, double lr) {
 
             b[i] += lr * dy[i] / N;
         }
+
+        return dy;
     }
 
     public void softmax(double[] x) {
@@ -54,9 +56,9 @@ public void softmax(double[] x) {
         for(int i=0; i<n_out; i++) x[i] /= sum;
     }
 
-    public void predict(int[] x, double[] y) {
+    public void predict(double[] x, double[] y) {
         for(int i=0; i<n_out; i++) {
-            y[i] = 0;
+            y[i] = 0.;
             for(int j=0; j<n_in; j++) {
                 y[i] += W[i][j] * x[j];
             }
@@ -75,13 +77,13 @@ private static void test_lr() {
         int n_in = 6;
         int n_out = 2;
 
-        int[][] train_X = {
-                {1, 1, 1, 0, 0, 0},
-                {1, 0, 1, 0, 0, 0},
-                {1, 1, 1, 0, 0, 0},
-                {0, 0, 1, 1, 1, 0},
-                {0, 0, 1, 1, 0, 0},
-                {0, 0, 1, 1, 1, 0}
+        double[][] train_X = {
+                {1., 1., 1., 0., 0., 0.},
+                {1., 0., 1., 0., 0., 0.},
+                {1., 1., 1., 0., 0., 0.},
+                {0., 0., 1., 1., 1., 0.},
+                {0., 0., 1., 1., 0., 0.},
+                {0., 0., 1., 1., 1., 0.}
         };
 
         int[][] train_Y = {
@@ -105,9 +107,9 @@ private static void test_lr() {
         }
 
         // test data
-        int[][] test_X = {
-                {1, 0, 1, 0, 0, 0},
-                {0, 0, 1, 1, 1, 0}
+        double[][] test_X = {
+                {1., 0., 1., 0., 0., 0.},
+                {0., 0., 1., 1., 1., 0.}
         };
 
         double[][] test_Y = new double[test_N][n_out];
diff --git a/java/src/DeepLearning/LogisticRegressionDiscrete.java b/java/src/DeepLearning/LogisticRegressionDiscrete.java
new file mode 100644
index 0000000..fce5172
--- /dev/null
+++ b/java/src/DeepLearning/LogisticRegressionDiscrete.java
@@ -0,0 +1,117 @@
+package DeepLearning;
+
+public class LogisticRegressionDiscrete extends LogisticRegression {
+    public int N;
+    public int n_in;
+    public int n_out;
+    public double[][] W;
+    public double[] b;
+
+    public LogisticRegressionDiscrete(int N, int n_in, int n_out) {
+        super(N, n_in, n_out);
+
+        this.N = N;
+        this.n_in = n_in;
+        this.n_out = n_out;
+
+        W = new double[this.n_out][this.n_in];
+        b = new double[this.n_out];
+    }
+
+    public void train(int[] x, int[] y, double lr) {
+        double[] p_y_given_x = new double[n_out];
+        double[] dy = new double[n_out];
+
+        for(int i=0; i<n_out; i++) {
+            p_y_given_x[i] = 0;
+            for(int j=0; j<n_in; j++) {
+                p_y_given_x[i] += W[i][j] * x[j];
+            }
+            p_y_given_x[i] += b[i];
+        }
+        softmax(p_y_given_x);
+
+        for(int i=0; i<n_out; i++) {
+            dy[i] = y[i] - p_y_given_x[i];
+
+            for(int j=0; j<n_in; j++) {
+                W[i][j] += lr * dy[i] * x[j] / N;
+            }
+
+            b[i] += lr * dy[i] / N;
+        }
+    }
+
+    public void predict(int[] x, double[] y) {
+        for(int i=0; i<n_out; i++) {
+            y[i] = 0;
+            for(int j=0; j<n_in; j++) {
+                y[i] += W[i][j] * x[j];
+            }
+            y[i] += b[i];
+        }
+
+        softmax(y);
+    }
+
+    private static void test_lr() {
+        double learning_rate = 0.1;
+        int n_epochs = 500;
+
+        int train_N = 6;
+        int test_N = 2;
+        int n_in = 6;
+        int n_out = 2;
+
+        int[][] train_X = {
+                {1, 1, 1, 0, 0, 0},
+                {1, 0, 1, 0, 0, 0},
+                {1, 1, 1, 0, 0, 0},
+                {0, 0, 1, 1, 1, 0},
+                {0, 0, 1, 1, 0, 0},
+                {0, 0, 1, 1, 1, 0}
+        };
+
+        int[][] train_Y = {
+                {1, 0},
+                {1, 0},
+                {1, 0},
+                {0, 1},
+                {0, 1},
+                {0, 1}
+        };
+
+        // construct
+        LogisticRegressionDiscrete classifier = new LogisticRegressionDiscrete(train_N, n_in, n_out);
+
+        // train
+        for(int epoch=0; epoch<n_epochs; epoch++) {
+            for(int i=0; i<train_N; i++) {
+                classifier.train(train_X[i], train_Y[i], learning_rate);
+            }
+            //learning_rate *= 0.95;
+        }
+
+        // test data
+        int[][] test_X = {
+                {1, 0, 1, 0, 0, 0},
+                {0, 0, 1, 1, 1, 0}
+        };
+
+        double[][] test_Y = new double[test_N][n_out];
+
+
+        // test
+        for(int i=0; i<test_N; i++) {
+            classifier.predict(test_X[i], test_Y[i]);
+            for(int j=0; j<n_out; j++) {
+                System.out.print(test_Y[i][j] + " ");
+            }
+            System.out.println();
+        }
+    }
+
+    public static void main(String[] args) {
+        test_lr();
+    }
+}
diff --git a/java/src/DeepLearning/MLP.java b/java/src/DeepLearning/MLP.java
new file mode 100644
index 0000000..7acece8
--- /dev/null
+++ b/java/src/DeepLearning/MLP.java
@@ -0,0 +1,124 @@
+package DeepLearning;
+
+import java.util.Random;
+
+public class MLP {
+    public int N;
+    public int n_in;
+    public int n_hidden;
+    public int n_out;
+    public HiddenLayer hiddenLayer;
+    public LogisticRegression logisticLayer;
+    public Random rng;
+
+
+    public MLP(int N, int n_in, int n_hidden, int n_out, Random rng) {
+
+        this.N = N;
+        this.n_in = n_in;
+        this.n_hidden = n_hidden;
+        this.n_out = n_out;
+
+        if (rng == null)rng = new Random(1234);
+        this.rng = rng;
+
+        // construct hiddenLayer
+        this.hiddenLayer = new HiddenLayer(N, n_in, n_hidden, null, null, rng, "tanh");
+
+        // construct logisticLayer
+        this.logisticLayer = new LogisticRegression(N, n_hidden, n_out);
+    }
+
+
+    public void train(double[][] train_X, int[][] train_Y, double lr) {
+        double[] hidden_layer_input;
+        double[] logistic_layer_input;
+        double[] dy;
+
+        for(int n=0; n<N; n++) {
+            hidden_layer_input = new double[n_in];
+            logistic_layer_input = new double[n_hidden];
+
+            for(int j=0; j<n_in; j++) hidden_layer_input[j] = train_X[n][j];
+
+            // forward hiddenLayer
+            hiddenLayer.forward(hidden_layer_input, logistic_layer_input);
+
+            // forward and backward logisticLayer
+            // dy = new double[n_out];  // define delta of y for backpropagation
+            dy = logisticLayer.train(logistic_layer_input, train_Y[n], lr); //, dy);
+
+            // backward hiddenLayer
+            hiddenLayer.backward(hidden_layer_input, null, logistic_layer_input, dy, logisticLayer.W, lr);
+
+        }
+    }
+
+    public void predict(double[] x, double[] y) {
+        double[] logistic_layer_input = new double[n_hidden];
+        hiddenLayer.forward(x, logistic_layer_input);
+        logisticLayer.predict(logistic_layer_input, y);
+    }
+
+
+
+    private static void test_mlp() {
+        Random rng = new Random(123);
+
+        double learning_rate = 0.1;
+        int n_epochs = 5000;
+
+        int train_N = 4;
+        int test_N = 4;
+        int n_in = 2;
+        int n_hidden = 3;
+        int n_out = 2;
+
+        double[][] train_X = {
+                {0., 0.},
+                {0., 1.},
+                {1., 0.},
+                {1., 1.},
+        };
+
+        int[][] train_Y = {
+                {0, 1},
+                {1, 0},
+                {1, 0},
+                {0, 1},
+        };
+
+        // construct MLP
+        MLP classifier = new MLP(train_N, n_in, n_hidden, n_out, rng);
+
+        // train
+        for(int epoch=0; epoch<n_epochs; epoch++) {
+            classifier.train(train_X, train_Y, learning_rate);
+        }
+
+        // test data
+        double[][] test_X = {
+                {0., 0.},
+                {0., 1.},
+                {1., 0.},
+                {1., 1.},
+        };
+
+        double[][] test_Y = new double[test_N][n_out];
+
+
+        // test
+        for(int i=0; i<test_N; i++) {
+            classifier.predict(test_X[i], test_Y[i]);
+            for(int j=0; j<n_out; j++) {
+                System.out.print(test_Y[i][j] + " ");
+            }
+            System.out.println();
+        }
+
+    }
+
+    public static void main(String[] args) {
+        test_mlp();
+    }
+}
diff --git a/java/src/DeepLearning/RBM.java b/java/src/DeepLearning/RBM.java
index d84e65c..f3268f8 100644
--- a/java/src/DeepLearning/RBM.java
+++ b/java/src/DeepLearning/RBM.java
@@ -3,7 +3,6 @@
 import java.util.Random;
 import static DeepLearning.utils.*;
 
-
 public class RBM {
     public int N;
     public int n_visible;
diff --git a/java/src/DeepLearning/SdA.java b/java/src/DeepLearning/SdA.java
index 962c335..af761e7 100644
--- a/java/src/DeepLearning/SdA.java
+++ b/java/src/DeepLearning/SdA.java
@@ -9,9 +9,9 @@ public class SdA {
     public int[] hidden_layer_sizes;
     public int n_outs;
     public int n_layers;
-    public HiddenLayer[] sigmoid_layers;
+    public HiddenLayerDiscrete[] sigmoid_layers;
     public dA[] dA_layers;
-    public LogisticRegression log_layer;
+    public LogisticRegressionDiscrete log_layer;
     public Random rng;
 
 
@@ -24,7 +24,7 @@ public SdA(int N, int n_ins, int[] hidden_layer_sizes, int n_outs, int n_layers,
         this.n_outs = n_outs;
         this.n_layers = n_layers;
 
-        this.sigmoid_layers = new HiddenLayer[n_layers];
+        this.sigmoid_layers = new HiddenLayerDiscrete[n_layers];
         this.dA_layers = new dA[n_layers];
 
         if(rng == null)	this.rng = new Random(1234);
@@ -39,14 +39,14 @@ public SdA(int N, int n_ins, int[] hidden_layer_sizes, int n_outs, int n_layers,
             }
 
             // construct sigmoid_layer
-            this.sigmoid_layers[i] = new HiddenLayer(this.N, input_size, this.hidden_layer_sizes[i], null, null, rng);
+            this.sigmoid_layers[i] = new HiddenLayerDiscrete(this.N, input_size, this.hidden_layer_sizes[i], null, null, rng);
 
             // construct dA_layer
             this.dA_layers[i] = new dA(this.N, input_size, this.hidden_layer_sizes[i], this.sigmoid_layers[i].W, this.sigmoid_layers[i].b, null, rng);
         }
 
-        // layer for output using DNN.LogisticRegression
-        this.log_layer = new LogisticRegression(this.N, this.hidden_layer_sizes[this.n_layers-1], this.n_outs);
+        // layer for output using Logistic Regression
+        this.log_layer = new LogisticRegressionDiscrete(this.N, this.hidden_layer_sizes[this.n_layers-1], this.n_outs);
     }
 
     public void pretrain(int[][] train_X, double lr, double corruption_level, int epochs) {
diff --git a/java/src/DeepLearning/utils.java b/java/src/DeepLearning/utils.java
index efce0d9..95d95f7 100644
--- a/java/src/DeepLearning/utils.java
+++ b/java/src/DeepLearning/utils.java
@@ -21,9 +21,35 @@ public static int binomial(int n, double p, Random rng) {
         return c;
     }
 
-
     public static double sigmoid(double x) {
-        return 1.0 / (1.0 + Math.pow(Math.E, -x));
+        return 1. / (1. + Math.pow(Math.E, -x));
+    }
+
+    public static double dsigmoid(double x) {
+        return x * (1. - x);
+    }
+
+    public static double tanh(double x) {
+        return Math.tanh(x);
+    }
+
+    public static double dtanh(double x) {
+        return 1. - x * x;
+    }
+
+    public static double ReLU(double x) {
+        if(x > 0) {
+            return x;
+        } else {
+            return 0.;
+        }
     }
 
+    public static double dReLU(double x) {
+        if(x > 0) {
+            return 1.;
+        } else {
+            return 0.;
+        }
+    }
 }

From ad217dbb8b993a9cff0d0e6cf3e6a9de5778ed33 Mon Sep 17 00:00:00 2001
From: Yusuke Sugomori <yusugomori@users.noreply.github.com>
Date: Thu, 27 Aug 2015 11:50:06 -0400
Subject: [PATCH 37/45] Update README.md

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 26b41bd..fc5b865 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,4 @@
-##  Deep Learning (Python, C/C++, Java, Scala, Go)
+##  Deep Learning (Python, C, C++, Java, Scala, Go)
 
 ### Classes :
 

From 03efd4a19dd5629cb85a8afdf5cb75d02ef07cc2 Mon Sep 17 00:00:00 2001
From: Yusuke Sugomori <me@yusugomori.com>
Date: Tue, 20 Oct 2015 00:53:17 +0900
Subject: [PATCH 38/45] bug fix

---
 .gitignore                         |  5 ++++-
 data/.gitkeep                      |  0
 java/src/DeepLearning/Dropout.java | 10 +++++-----
 python/RBM.py                      |  2 +-
 4 files changed, 10 insertions(+), 7 deletions(-)
 create mode 100644 data/.gitkeep

diff --git a/.gitignore b/.gitignore
index 252ef14..98c89ef 100644
--- a/.gitignore
+++ b/.gitignore
@@ -11,4 +11,7 @@
 
 java/.idea/*
 java/java.iml
-java/out/*
\ No newline at end of file
+java/out/*
+
+data/*
+!data/.gitkeep
\ No newline at end of file
diff --git a/data/.gitkeep b/data/.gitkeep
new file mode 100644
index 0000000..e69de29
diff --git a/java/src/DeepLearning/Dropout.java b/java/src/DeepLearning/Dropout.java
index b4717c6..bb6378f 100644
--- a/java/src/DeepLearning/Dropout.java
+++ b/java/src/DeepLearning/Dropout.java
@@ -103,14 +103,14 @@ public void train(int epochs, double[][] train_X, int[][] train_Y, boolean dropo
                         prev_W = hiddenLayers[i+1].W;
                     }
 
-                    dy = new double[hidden_layer_sizes[i]];
-                    hiddenLayers[i].backward(layer_inputs.get(i), dy, layer_inputs.get(i+1), prev_dy, prev_W, lr);
-
                     if(dropout) {
-                        for(int j=0; j<dy.length; j++) {
-                            dy[j] *= dropout_masks.get(i)[j];
+                        for(int j=0; j<prev_dy.length; j++) {
+                            prev_dy[j] *= dropout_masks.get(i)[j];
                         }
                     }
+
+                    dy = new double[hidden_layer_sizes[i]];
+                    hiddenLayers[i].backward(layer_inputs.get(i), dy, layer_inputs.get(i+1), prev_dy, prev_W, lr);
                 }
 
             }
diff --git a/python/RBM.py b/python/RBM.py
index 4496692..7a127d8 100755
--- a/python/RBM.py
+++ b/python/RBM.py
@@ -61,7 +61,7 @@ def contrastive_divergence(self, lr=0.1, k=1, input=None):
         self.W += lr * (numpy.dot(self.input.T, ph_mean)
                         - numpy.dot(nv_samples.T, nh_means))
         self.vbias += lr * numpy.mean(self.input - nv_samples, axis=0)
-        self.hbias += lr * numpy.mean(ph_sample - nh_means, axis=0)
+        self.hbias += lr * numpy.mean(ph_mean - nh_means, axis=0)
 
         # cost = self.get_reconstruction_cross_entropy()
         # return cost

From 5c9cfe02894a666343e5a4640586364e420e2458 Mon Sep 17 00:00:00 2001
From: Yusuke Sugomori <me@yusugomori.com>
Date: Tue, 20 Oct 2015 01:13:20 +0900
Subject: [PATCH 39/45] bug fix

---
 python/Dropout.py     | 7 ++++---
 python/HiddenLayer.py | 5 ++++-
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/python/Dropout.py b/python/Dropout.py
index 16c203f..ba99116 100755
--- a/python/Dropout.py
+++ b/python/Dropout.py
@@ -87,10 +87,11 @@ def train(self, epochs=5000, dropout=True, p_dropout=0.5, rng=None):
                 else:
                     prev_layer = self.hidden_layers[i+1]
 
-                self.hidden_layers[i].backward(prev_layer=prev_layer)
-                
                 if dropout == True:
-                    self.hidden_layers[i].d_y *= dropout_masks[i]  # also mask here
+                    self.hidden_layers[i].backward(prev_layer=prev_layer, dropout=True, mask=dropout_masks[i])
+                else:
+                    self.hidden_layers[i].backward(prev_layer=prev_layer)
+                
 
 
     def predict(self, x, dropout=True, p_dropout=0.5):
diff --git a/python/HiddenLayer.py b/python/HiddenLayer.py
index 75f8ab2..a97bc61 100755
--- a/python/HiddenLayer.py
+++ b/python/HiddenLayer.py
@@ -57,12 +57,15 @@ def forward(self, input=None):
         return self.output(input=input)
 
 
-    def backward(self, prev_layer, lr=0.1, input=None):
+    def backward(self, prev_layer, lr=0.1, input=None, dropout=False, mask=None):
         if input is not None:
             self.x = input
 
         d_y = self.dactivation(prev_layer.x) * numpy.dot(prev_layer.d_y, prev_layer.W.T)
 
+        if dropout == True:
+            d_y *= mask
+
         self.W += lr * numpy.dot(self.x.T, d_y)
         self.b += lr * numpy.mean(d_y, axis=0)
         self.d_y = d_y

From f4e32289fbe627cd2bcc5a1bf703b2df8499b55c Mon Sep 17 00:00:00 2001
From: Yusuke Sugomori <yusugomori@users.noreply.github.com>
Date: Fri, 30 Oct 2015 15:05:59 +0900
Subject: [PATCH 40/45] Update README.md

---
 README.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/README.md b/README.md
index fc5b865..ab89b7c 100644
--- a/README.md
+++ b/README.md
@@ -22,6 +22,8 @@
 
   - Dropout: Dropout MLP
 
+  - CNN: Convolutional Neural Networks (See [dev branch](https://github.com/yusugomori/DeepLearning/tree/dev).)
+
 
 
 ### References :

From 7a3596669bfe8e1e01b5137702d739155c4e4ba9 Mon Sep 17 00:00:00 2001
From: Yusuke Sugomori <yusugomori@users.noreply.github.com>
Date: Thu, 17 Dec 2015 19:23:14 +0900
Subject: [PATCH 41/45] Update README.md

---
 README.md | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/README.md b/README.md
index ab89b7c..0ece2c8 100644
--- a/README.md
+++ b/README.md
@@ -42,6 +42,10 @@
   - Yusuke Sugomori: Stochastic Gradient Descent for Denoising Autoencoders,
   http://yusugomori.com/docs/SGD_DA.pdf
 
+### Publication :
+  - More cleaner Java implementations are introduced in my book, Deep Learning with Java.
+
+    The book is now available to pre-order from [Packt Publishing](https://www.packtpub.com/big-data-and-business-intelligence/deep-learning-java) or [Amazon](http://www.amazon.com/Deep-Learning-Java-Yusuke-Sugomori/dp/1785282190/).
 
 ### Other projects : 
   -  My deep learning projects are [here](http://yusugomori.com/projects/deep-learning/).

From 8d371431bd4d80914a73d666c72ffc94182275c8 Mon Sep 17 00:00:00 2001
From: Yusuke Sugomori <yusugomori@users.noreply.github.com>
Date: Fri, 18 Dec 2015 10:27:27 +0900
Subject: [PATCH 42/45] Update README.md

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 0ece2c8..595890d 100644
--- a/README.md
+++ b/README.md
@@ -43,7 +43,7 @@
   http://yusugomori.com/docs/SGD_DA.pdf
 
 ### Publication :
-  - More cleaner Java implementations are introduced in my book, Deep Learning with Java.
+  - More detailed Java implementations are introduced in my book, Deep Learning with Java.
 
     The book is now available to pre-order from [Packt Publishing](https://www.packtpub.com/big-data-and-business-intelligence/deep-learning-java) or [Amazon](http://www.amazon.com/Deep-Learning-Java-Yusuke-Sugomori/dp/1785282190/).
 

From 73e2f448931d9a2d2117572ea041c2f76956fae4 Mon Sep 17 00:00:00 2001
From: Yusuke Sugomori <yusugomori@users.noreply.github.com>
Date: Tue, 31 May 2016 01:08:05 +0900
Subject: [PATCH 43/45] Update README.md

---
 README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 595890d..77ba574 100644
--- a/README.md
+++ b/README.md
@@ -43,9 +43,9 @@
   http://yusugomori.com/docs/SGD_DA.pdf
 
 ### Publication :
-  - More detailed Java implementations are introduced in my book, Deep Learning with Java.
+  - More detailed Java implementations are introduced in my book, Java Deep Learning Essentials.
 
-    The book is now available to pre-order from [Packt Publishing](https://www.packtpub.com/big-data-and-business-intelligence/deep-learning-java) or [Amazon](http://www.amazon.com/Deep-Learning-Java-Yusuke-Sugomori/dp/1785282190/).
+    The book is available from [Packt Publishing](https://www.packtpub.com/big-data-and-business-intelligence/java-deep-learning-essentials) or [Amazon](http://www.amazon.com/Deep-Learning-Java-Yusuke-Sugomori/dp/1785282190/).
 
 ### Other projects : 
   -  My deep learning projects are [here](http://yusugomori.com/projects/deep-learning/).

From 6b8af8891a94a1a25c15b5d5151f4bb59d0dab5f Mon Sep 17 00:00:00 2001
From: Yusuke Sugomori <yusuke_sugomori@micin.jp>
Date: Thu, 7 Sep 2017 21:35:24 +0900
Subject: [PATCH 44/45] add LICENSE

---
 LICENSE.txt | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)
 create mode 100644 LICENSE.txt

diff --git a/LICENSE.txt b/LICENSE.txt
new file mode 100644
index 0000000..92d495f
--- /dev/null
+++ b/LICENSE.txt
@@ -0,0 +1,22 @@
+Copyright (c) 2017 Yusuke Sugomori
+
+MIT License
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

From 739dfd1d7919c328e0d3b8129855c2ad71b80036 Mon Sep 17 00:00:00 2001
From: Yusuke Sugomori <yusuke_sugomori@micin.jp>
Date: Mon, 4 Dec 2017 15:01:44 +0900
Subject: [PATCH 45/45] update README

---
 README.md | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/README.md b/README.md
index 77ba574..37f8767 100644
--- a/README.md
+++ b/README.md
@@ -47,11 +47,7 @@
 
     The book is available from [Packt Publishing](https://www.packtpub.com/big-data-and-business-intelligence/java-deep-learning-essentials) or [Amazon](http://www.amazon.com/Deep-Learning-Java-Yusuke-Sugomori/dp/1785282190/).
 
-### Other projects : 
-  -  My deep learning projects are [here](http://yusugomori.com/projects/deep-learning/).
-　  
-　  
   
 ##### Bug reports / contributions / donations are deeply welcome.
-Bitcoin wallet address: 1QAoYw5Y3opvah2APf4jVcpD6UAHyC3k7s
+Bitcoin wallet address: 34kZarc2uBU6BMCouUp2iudvZtbmZMPqrA