diff --git a/.gitignore b/.gitignore
index 190cc57..98c89ef 100644
--- a/.gitignore
+++ b/.gitignore
@@ -8,3 +8,10 @@
 *.settings
 .project
 .metadata
+
+java/.idea/*
+java/java.iml
+java/out/*
+
+data/*
+!data/.gitkeep
\ No newline at end of file
diff --git a/LICENSE.txt b/LICENSE.txt
new file mode 100644
index 0000000..92d495f
--- /dev/null
+++ b/LICENSE.txt
@@ -0,0 +1,22 @@
+Copyright (c) 2017 Yusuke Sugomori
+
+MIT License
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
diff --git a/README.md b/README.md
index 804e867..37f8767 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,4 @@
-# Deep Learning (Python, C/C++, Java)
+##  Deep Learning (Python, C, C++, Java, Scala, Go)
 
 ### Classes :
 
@@ -17,6 +17,12 @@
   - LogisticRegression: Logistic Regression
 
   - HiddenLayer: Hidden Layer of Neural Networks
+  
+  - MLP: Multiple Layer Perceptron
+
+  - Dropout: Dropout MLP
+
+  - CNN: Convolutional Neural Networks (See [dev branch](https://github.com/yusugomori/DeepLearning/tree/dev).)
 
 
 
@@ -36,6 +42,12 @@
   - Yusuke Sugomori: Stochastic Gradient Descent for Denoising Autoencoders,
   http://yusugomori.com/docs/SGD_DA.pdf
 
+### Publication :
+  - More detailed Java implementations are introduced in my book, Java Deep Learning Essentials.
+
+    The book is available from [Packt Publishing](https://www.packtpub.com/big-data-and-business-intelligence/java-deep-learning-essentials) or [Amazon](http://www.amazon.com/Deep-Learning-Java-Yusuke-Sugomori/dp/1785282190/).
 
+  
+##### Bug reports / contributions / donations are deeply welcome.
+Bitcoin wallet address: 34kZarc2uBU6BMCouUp2iudvZtbmZMPqrA
 
-### Bug reports are deeply welcome.
\ No newline at end of file
diff --git a/c/DBN.c b/c/DBN.c
index 9423622..eee1e1c 100644
--- a/c/DBN.c
+++ b/c/DBN.c
@@ -190,8 +190,9 @@ void DBN_predict(DBN* this, int *x, double *y) {
   for(i=0; i<this->n_layers; i++) {
     layer_input = (double *)malloc(sizeof(double) * this->sigmoid_layers[i].n_out);
 
-    linear_output = 0.0;
     for(k=0; k<this->sigmoid_layers[i].n_out; k++) {
+      linear_output = 0.0;
+
       for(j=0; j<this->sigmoid_layers[i].n_in; j++) {
         linear_output += this->sigmoid_layers[i].W[k][j] * prev_layer_input[j];
       }
@@ -346,7 +347,8 @@ void RBM_contrastive_divergence(RBM* this, int *input, double lr, int k) {
 
   for(i=0; i<this->n_hidden; i++) {
     for(j=0; j<this->n_visible; j++) {
-      this->W[i][j] += lr * (ph_sample[i] * input[j] - nh_means[i] * nv_samples[j]) / this->N;
+      // this->W[i][j] += lr * (ph_sample[i] * input[j] - nh_means[i] * nv_samples[j]) / this->N;
+      this->W[i][j] += lr * (ph_mean[i] * input[j] - nh_means[i] * nv_samples[j]) / this->N;
     }
     this->hbias[i] += lr * (ph_sample[i] - nh_means[i]) / this->N;
   }
diff --git a/c/LogisticRegression.c b/c/LogisticRegression.c
index 0c0c04a..b55c707 100644
--- a/c/LogisticRegression.c
+++ b/c/LogisticRegression.c
@@ -94,7 +94,7 @@ void test_lr(void) {
   int i, j, epoch;
 
   double learning_rate = 0.1;
-  double n_epochs = 500;
+  int n_epochs = 500;
 
   int train_N = 6;
   int test_N = 2;
diff --git a/c/RBM.c b/c/RBM.c
index 9ea27b2..7e806bb 100644
--- a/c/RBM.c
+++ b/c/RBM.c
@@ -100,7 +100,8 @@ void RBM_contrastive_divergence(RBM* this, int *input, double lr, int k) {
 
   for(i=0; i<this->n_hidden; i++) {
     for(j=0; j<this->n_visible; j++) {
-      this->W[i][j] += lr * (ph_sample[i] * input[j] - nh_means[i] * nv_samples[j]) / this->N;
+      // this->W[i][j] += lr * (ph_sample[i] * input[j] - nh_means[i] * nv_samples[j]) / this->N;
+      this->W[i][j] += lr * (ph_mean[i] * input[j] - nh_means[i] * nv_samples[j]) / this->N;
     }
     this->hbias[i] += lr * (ph_sample[i] - nh_means[i]) / this->N;
   }
diff --git a/c/SdA.c b/c/SdA.c
index 273d2b8..99170ee 100644
--- a/c/SdA.c
+++ b/c/SdA.c
@@ -188,8 +188,9 @@ void SdA_predict(SdA* this, int *x, double *y) {
   for(i=0; i<this->n_layers; i++) {
     layer_input = (double *)malloc(sizeof(double) * this->sigmoid_layers[i].n_out);
 
-    linear_output = 0.0;
     for(k=0; k<this->sigmoid_layers[i].n_out; k++) {
+      linear_output = 0.0;
+
       for(j=0; j<this->sigmoid_layers[i].n_in; j++) {
         linear_output += this->sigmoid_layers[i].W[k][j] * prev_layer_input[j];
       }
diff --git a/cpp/DBN.cpp b/cpp/DBN.cpp
index e92bad7..155e081 100644
--- a/cpp/DBN.cpp
+++ b/cpp/DBN.cpp
@@ -1,33 +1,12 @@
 #include <iostream>
 #include <math.h>
+#include "utils.h"
 #include "HiddenLayer.h"
 #include "RBM.h"
 #include "LogisticRegression.h"
 #include "DBN.h"
 using namespace std;
-
-
-double uniform(double min, double max) {
-  return rand() / (RAND_MAX + 1.0) * (max - min) + min;
-}
-
-int binomial(int n, double p) {
-  if(p < 0 || p > 1) return 0;
-  
-  int c = 0;
-  double r;
-  
-  for(int i=0; i<n; i++) {
-    r = rand() / (RAND_MAX + 1.0);
-    if (r < p) c++;
-  }
-
-  return c;
-}
-
-double sigmoid(double x) {
-  return 1.0 / (1.0 + exp(-x));
-}
+using namespace utils;
 
 
 // DBN
@@ -176,8 +155,9 @@ void DBN::predict(int *x, double *y) {
   for(int i=0; i<n_layers; i++) {
     layer_input = new double[sigmoid_layers[i]->n_out];
 
-    linear_output = 0.0;
     for(int k=0; k<sigmoid_layers[i]->n_out; k++) {
+      linear_output = 0.0;
+
       for(int j=0; j<sigmoid_layers[i]->n_in; j++) {
         linear_output += sigmoid_layers[i]->W[k][j] * prev_layer_input[j];
       }
@@ -321,7 +301,8 @@ void RBM::contrastive_divergence(int *input, double lr, int k) {
 
   for(int i=0; i<n_hidden; i++) {
     for(int j=0; j<n_visible; j++) {
-      W[i][j] += lr * (ph_sample[i] * input[j] - nh_means[i] * nv_samples[j]) / N;
+      // W[i][j] += lr * (ph_sample[i] * input[j] - nh_means[i] * nv_samples[j]) / N;
+      W[i][j] += lr * (ph_mean[i] * input[j] - nh_means[i] * nv_samples[j]) / N;
     }
     hbias[i] += lr * (ph_sample[i] - nh_means[i]) / N;
   }
diff --git a/cpp/HiddenLayer.cpp b/cpp/HiddenLayer.cpp
index b1925fc..fb530c6 100644
--- a/cpp/HiddenLayer.cpp
+++ b/cpp/HiddenLayer.cpp
@@ -1,30 +1,9 @@
 #include <iostream>
 #include <math.h>
 #include "HiddenLayer.h"
+#include "utils.h"
 using namespace std;
-
-double uniform(double min, double max) {
-  return rand() / (RAND_MAX + 1.0) * (max - min) + min;
-}
-
-int binomial(int n, double p) {
-  if(p < 0 || p > 1) return 0;
-  
-  int c = 0;
-  double r;
-  
-  for(int i=0; i<n; i++) {
-    r = rand() / (RAND_MAX + 1.0);
-    if (r < p) c++;
-  }
-
-  return c;
-}
-
-double sigmoid(double x) {
-  return 1.0 / (1.0 + exp(-x));
-}
-
+using namespace utils;
 
 
 HiddenLayer::HiddenLayer(int size, int in, int out, double **w, double *bp) {
diff --git a/cpp/LogisticRegression.cpp b/cpp/LogisticRegression.cpp
index 9eb8f24..6eca566 100644
--- a/cpp/LogisticRegression.cpp
+++ b/cpp/LogisticRegression.cpp
@@ -86,30 +86,12 @@ void test_lr() {
   srand(0);
   
   double learning_rate = 0.1;
-  double n_epochs = 500;
+  int n_epochs = 500;
 
   int train_N = 6;
   int test_N = 2;
   int n_in = 6;
   int n_out = 2;
-  // int **train_X;
-  // int **train_Y;
-  // int **test_X;
-  // double **test_Y;
-
-  // train_X = new int*[train_N];
-  // train_Y = new int*[train_N];
-  // for(i=0; i<train_N; i++){
-  //   train_X[i] = new int[n_in];
-  //   train_Y[i] = new int[n_out];
-  // };
-
-  // test_X = new int*[test_N];
-  // test_Y = new double*[test_N];
-  // for(i=0; i<test_N; i++){
-  //   test_X[i] = new int[n_in];
-  //   test_Y[i] = new double[n_out];
-  // }
 
 
   // training data
diff --git a/cpp/RBM.cpp b/cpp/RBM.cpp
index 590199a..1e606ee 100644
--- a/cpp/RBM.cpp
+++ b/cpp/RBM.cpp
@@ -1,29 +1,9 @@
 #include <iostream>
 #include <math.h>
+#include "utils.h"
 #include "RBM.h"
 using namespace std;
-
-double uniform(double min, double max) {
-  return rand() / (RAND_MAX + 1.0) * (max - min) + min;
-}
-
-int binomial(int n, double p) {
-  if(p < 0 || p > 1) return 0;
-  
-  int c = 0;
-  double r;
-  
-  for(int i=0; i<n; i++) {
-    r = rand() / (RAND_MAX + 1.0);
-    if (r < p) c++;
-  }
-
-  return c;
-}
-
-double sigmoid(double x) {
-  return 1.0 / (1.0 + exp(-x));
-}
+using namespace utils;
 
 
 RBM::RBM(int size, int n_v, int n_h, double **w, double *hb, double *vb) {
@@ -89,7 +69,8 @@ void RBM::contrastive_divergence(int *input, double lr, int k) {
 
   for(int i=0; i<n_hidden; i++) {
     for(int j=0; j<n_visible; j++) {
-      W[i][j] += lr * (ph_sample[i] * input[j] - nh_means[i] * nv_samples[j]) / N;
+      // W[i][j] += lr * (ph_sample[i] * input[j] - nh_means[i] * nv_samples[j]) / N;
+      W[i][j] += lr * (ph_mean[i] * input[j] - nh_means[i] * nv_samples[j]) / N;
     }
     hbias[i] += lr * (ph_sample[i] - nh_means[i]) / N;
   }
diff --git a/cpp/SdA.cpp b/cpp/SdA.cpp
index 787bc6c..5632306 100644
--- a/cpp/SdA.cpp
+++ b/cpp/SdA.cpp
@@ -1,32 +1,13 @@
 #include <iostream>
 #include <math.h>
+#include "utils.h"
+
 #include "HiddenLayer.h"
 #include "dA.h"
 #include "LogisticRegression.h"
 #include "SdA.h"
 using namespace std;
-
-double uniform(double min, double max) {
-  return rand() / (RAND_MAX + 1.0) * (max - min) + min;
-}
-
-int binomial(int n, double p) {
-  if(p < 0 || p > 1) return 0;
-  
-  int c = 0;
-  double r;
-  
-  for(int i=0; i<n; i++) {
-    r = rand() / (RAND_MAX + 1.0);
-    if (r < p) c++;
-  }
-
-  return c;
-}
-
-double sigmoid(double x) {
-  return 1.0 / (1.0 + exp(-x));
-}
+using namespace utils;
 
 
 // SdA
@@ -174,8 +155,9 @@ void SdA::predict(int *x, double *y) {
   for(int i=0; i<n_layers; i++) {
     layer_input = new double[sigmoid_layers[i]->n_out];
 
-    linear_output = 0.0;
     for(int k=0; k<sigmoid_layers[i]->n_out; k++) {
+      linear_output = 0.0;
+
       for(int j=0; j<sigmoid_layers[i]->n_in; j++) {
         linear_output += sigmoid_layers[i]->W[k][j] * prev_layer_input[j];
       }
diff --git a/cpp/dA.cpp b/cpp/dA.cpp
index b5414e5..783327e 100644
--- a/cpp/dA.cpp
+++ b/cpp/dA.cpp
@@ -1,30 +1,10 @@
 #include <iostream>
 #include <math.h>
+#include "utils.h"
+
 #include "dA.h"
 using namespace std;
-
-
-double uniform(double min, double max) {
-  return rand() / (RAND_MAX + 1.0) * (max - min) + min;
-}
-
-int binomial(int n, double p) {
-  if(p < 0 || p > 1) return 0;
-  
-  int c = 0;
-  double r;
-  
-  for(int i=0; i<n; i++) {
-    r = rand() / (RAND_MAX + 1.0);
-    if (r < p) c++;
-  }
-
-  return c;
-}
-
-double sigmoid(double x) {
-  return 1.0 / (1.0 + exp(-x));
-}
+using namespace utils;
 
 
 dA::dA(int size, int n_v, int n_h, double **w, double *hb, double *vb) {
diff --git a/cpp/utils.cpp b/cpp/utils.cpp
deleted file mode 100644
index 96af323..0000000
--- a/cpp/utils.cpp
+++ /dev/null
@@ -1,25 +0,0 @@
-#include <iostream>
-#include <math.h>
-using namespace std;
-
-double uniform(double min, double max) {
-  return rand() / (RAND_MAX + 1.0) * (max - min) + min;
-}
-
-int binomial(int n, double p) {
-  if(p < 0 || p > 1) return 0;
-  
-  int c = 0;
-  double r;
-  
-  for(int i=0; i<n; i++) {
-    r = rand() / (RAND_MAX + 1.0);
-    if (r < p) c++;
-  }
-
-  return c;
-}
-
-double sigmoid(double x) {
-  return 1.0 / (1.0 + exp(-x));
-}
diff --git a/cpp/utils.h b/cpp/utils.h
new file mode 100644
index 0000000..78fb182
--- /dev/null
+++ b/cpp/utils.h
@@ -0,0 +1,32 @@
+#pragma once
+
+#include <iostream>
+#include <math.h>
+using namespace std;
+
+
+namespace utils {
+  
+  double uniform(double min, double max) {
+    return rand() / (RAND_MAX + 1.0) * (max - min) + min;
+  }
+
+  int binomial(int n, double p) {
+    if(p < 0 || p > 1) return 0;
+  
+    int c = 0;
+    double r;
+  
+    for(int i=0; i<n; i++) {
+      r = rand() / (RAND_MAX + 1.0);
+      if (r < p) c++;
+    }
+
+    return c;
+  }
+
+  double sigmoid(double x) {
+    return 1.0 / (1.0 + exp(-x));
+  }
+
+}
diff --git a/data/.gitkeep b/data/.gitkeep
new file mode 100644
index 0000000..e69de29
diff --git a/go/DBN.go b/go/DBN.go
new file mode 100644
index 0000000..e5522a6
--- /dev/null
+++ b/go/DBN.go
@@ -0,0 +1,237 @@
+package main
+
+import (
+	"fmt"
+	"math/rand"
+	u "./utils"
+	H "./HiddenLayer"
+	R "./RBM"
+	L "./LogisticRegression"
+)
+
+type DBN struct {
+	N int
+	n_ins int
+	hidden_layer_sizes []int
+	n_outs int
+	n_layers int
+	sigmoid_layers []H.HiddenLayer
+	rbm_layers []R.RBM
+	log_layer L.LogisticRegression
+}
+
+
+func DBN__construct(this *DBN, N int, n_ins int, hidden_layer_sizes []int, n_outs int, n_layers int) {
+	var input_size int
+	
+	this.N = N
+	this.n_ins = n_ins
+	this.hidden_layer_sizes = hidden_layer_sizes
+	this.n_outs = n_outs
+	this.n_layers = n_layers
+
+	this.sigmoid_layers = make([]H.HiddenLayer, n_layers)
+	this.rbm_layers = make([]R.RBM, n_layers)
+	
+	// construct multi-layer
+	for i := 0; i < n_layers; i++ {
+		if i == 0 {
+			input_size = n_ins
+		} else {
+			input_size = hidden_layer_sizes[i-1]
+		}
+
+		// construct sigmoid_layer
+		H.HiddenLayer__construct(&(this.sigmoid_layers[i]), N, input_size, hidden_layer_sizes[i], nil, nil)
+
+		// construct rbm_layer
+		R.RBM__construct(&(this.rbm_layers[i]), N, input_size, hidden_layer_sizes[i], this.sigmoid_layers[i].W, this.sigmoid_layers[i].B, nil)
+	}
+
+	// layer for output using LogisticRegression
+	L.LogisticRegression__construct(&(this.log_layer), N, hidden_layer_sizes[n_layers-1], n_outs)
+}
+
+func DBN_pretrain(this *DBN, train_X [][]int, lr float64, k int, epochs int){
+	var (
+		layer_input []int
+		prev_layer_input_size int
+		prev_layer_input []int
+	)
+
+
+	for i := 0; i < this.n_layers; i++ {	// layer-wise
+		for epoch := 0; epoch < epochs; epoch++ {	 // training epochs
+			for n := 0; n < this.N; n++ {	 // input x1...xN
+
+				// layer input
+				for l := 0; l <= i; l++ {
+					if l == 0 {
+						layer_input = make([]int, this.n_ins)
+						for j := 0; j < this.n_ins; j++ { layer_input[j] = train_X[n][j] }
+					} else {
+						if l == 1 {
+							prev_layer_input_size = this.n_ins
+						} else {
+							prev_layer_input_size = this.hidden_layer_sizes[l-2]
+						}
+
+						prev_layer_input = make([]int, prev_layer_input_size)
+						for j := 0; j < prev_layer_input_size; j++ { prev_layer_input[j] = layer_input[j] }
+
+						layer_input = make([]int, this.hidden_layer_sizes[l-1])
+
+						H.HiddenLayer_sample_h_given_v(&(this.sigmoid_layers[l-1]), prev_layer_input, layer_input)
+					}
+				}
+
+				R.RBM_contrastive_divergence(&(this.rbm_layers[i]), layer_input, lr, k)
+			}
+		}
+	}
+}
+
+func DBN_finetune(this *DBN, train_X [][]int, train_Y [][]int, lr float64, epochs int) {
+	var (
+		layer_input []int
+		prev_layer_input []int
+	)
+
+	for epoch := 0; epoch < epochs; epoch++ {
+		for n := 0; n < this.N; n++ {	 // input x1...xN
+
+			// layer input
+			for i := 0; i < this.n_layers; i++ {
+				if i == 0 {
+					prev_layer_input = make([]int, this.n_ins)
+					for j := 0; j < this.n_ins; j++ { prev_layer_input[j] = train_X[n][j] }
+				} else {
+					prev_layer_input = make([]int, this.hidden_layer_sizes[i-1])
+					for j:= 0; j < this.hidden_layer_sizes[i-1]; j++ { prev_layer_input[j] = layer_input[j] }
+				}
+
+				layer_input = make([]int, this.hidden_layer_sizes[i])
+				H.HiddenLayer_sample_h_given_v(&(this.sigmoid_layers[i]), prev_layer_input, layer_input)
+			}
+
+			L.LogisticRegression_train(&(this.log_layer), layer_input, train_Y[n], lr)
+		}
+		// lr *= 0.95
+	}
+}
+
+func DBN_predict(this *DBN, x []int, y []float64) {
+	var (
+		layer_input []float64
+	)	
+	prev_layer_input := make([]float64, this.n_ins)
+	for j := 0; j < this.n_ins; j++ { prev_layer_input[j] = float64(x[j]) }
+
+	
+	// layer activation
+	for i := 0; i < this.n_layers; i++ {
+		layer_input = make([]float64, this.sigmoid_layers[i].N_out)
+
+		for k := 0; k < this.sigmoid_layers[i].N_out; k++ {
+			linear_outuput := 0.0
+
+			for j := 0; j < this.sigmoid_layers[i].N_in; j++ {
+				linear_outuput += this.sigmoid_layers[i].W[k][j] * prev_layer_input[j]
+			}
+			linear_outuput += this.sigmoid_layers[i].B[k]
+			layer_input[k] = u.Sigmoid(linear_outuput)
+		}
+
+		if i < this.n_layers-1 {
+			prev_layer_input = make([]float64, this.sigmoid_layers[i].N_out)
+
+			for j := 0; j < this.sigmoid_layers[i].N_out; j++ {
+				prev_layer_input[j] = layer_input[j]
+			}
+		}
+	}
+
+	for i := 0; i < this.log_layer.N_out; i++ {
+		y[i] = 0
+		for j := 0; j < this.log_layer.N_in; j++ {
+			y[i] += this.log_layer.W[i][j] * layer_input[j]
+		}
+		y[i] += this.log_layer.B[i]
+	}
+
+	L.LogisticRegression_softmax(&(this.log_layer), y)
+}
+
+
+func test_dbn() {
+	rand.Seed(0)
+
+	pretrain_lr := 0.1
+	pretraining_epochs := 1000
+	k := 1
+	finetune_lr := 0.1
+	finetune_epochs := 500
+
+	train_N := 6
+	test_N := 4
+	n_ins := 6
+	n_outs := 2
+	hidden_layer_sizes := []int {3, 3}
+	n_layers := len(hidden_layer_sizes)
+
+
+	// training data
+	train_X := [][]int {
+		{1, 1, 1, 0, 0, 0},
+		{1, 0, 1, 0, 0, 0},
+		{1, 1, 1, 0, 0, 0},
+		{0, 0, 1, 1, 1, 0},
+		{0, 0, 1, 1, 0, 0},
+		{0, 0, 1, 1, 1, 0},
+	}
+
+	train_Y := [][]int {
+		{1, 0},
+		{1, 0},
+		{1, 0},
+		{0, 1},
+		{0, 1},
+		{0, 1},
+	}
+
+	// construct DBN
+	var dbn DBN
+	DBN__construct(&dbn, train_N, n_ins, hidden_layer_sizes, n_outs, n_layers)
+
+	// pretrain
+	DBN_pretrain(&dbn, train_X, pretrain_lr, k, pretraining_epochs)
+
+	// finetune
+	DBN_finetune(&dbn, train_X, train_Y, finetune_lr, finetune_epochs)
+
+	// test data
+	test_X := [][]int {
+		{1, 1, 0, 0, 0, 0},
+		{1, 1, 1, 1, 0, 0},
+		{0, 0, 0, 1, 1, 0},
+		{0, 0, 1, 1, 1, 0},
+	}
+
+	test_Y := make([][]float64, test_N)
+	for i := 0; i < test_N; i++ { test_Y[i] = make([]float64, n_outs)}
+
+	// test
+	for i := 0; i < test_N; i++ {
+		DBN_predict(&dbn, test_X[i], test_Y[i])
+		for j := 0; j < n_outs; j++ {
+			fmt.Printf("%.5f ", test_Y[i][j])
+		}
+		fmt.Printf("\n")
+	}
+}
+
+
+
+func main() {
+	test_dbn()
+}
diff --git a/go/HiddenLayer/HiddenLayer.go b/go/HiddenLayer/HiddenLayer.go
new file mode 100644
index 0000000..995ca44
--- /dev/null
+++ b/go/HiddenLayer/HiddenLayer.go
@@ -0,0 +1,60 @@
+package HiddenLayer
+
+import (
+	u "../utils"
+)
+
+
+type HiddenLayer struct {
+	N int
+	N_in int
+	N_out int
+	W [][]float64
+	B []float64
+}
+
+
+// HiddenLayer
+func HiddenLayer__construct(this *HiddenLayer, N int, n_in int, n_out int, W [][]float64, b []float64) {
+	a := 1.0 / float64(n_in)
+
+	this.N = N
+	this.N_in = n_in
+	this.N_out = n_out
+
+	if W == nil {
+		this.W = make([][]float64, n_out)
+		for i := 0; i < n_out; i++ { this.W[i] = make([]float64, n_in) }
+		
+		for i := 0; i < n_out; i++ {
+			for j := 0; j < n_in; j++ {
+				this.W[i][j] = u.Uniform(-a, a)
+			}
+		}
+	} else {
+		this.W = W
+	}
+
+	if b == nil {
+		this.B = make([]float64, n_out)
+	} else {
+		this.B = b
+	}
+}
+
+func HiddenLayer_output(this *HiddenLayer, input []int, w []float64, b float64) float64 {
+	linear_output := 0.0
+
+	for j := 0; j < this.N_in; j++ {
+		linear_output += w[j] * float64(input[j])
+	}
+	linear_output += b
+
+	return u.Sigmoid(linear_output)
+}
+
+func HiddenLayer_sample_h_given_v(this *HiddenLayer, input []int, sample []int) {
+	for i := 0; i < this.N_out; i++ {
+		sample[i] = u.Binomial(1, HiddenLayer_output(this, input, this.W[i], this.B[i]))
+	}
+}
diff --git a/go/LogisticRegression.go b/go/LogisticRegression.go
new file mode 100644
index 0000000..cbc7e0e
--- /dev/null
+++ b/go/LogisticRegression.go
@@ -0,0 +1,150 @@
+package main
+
+import (
+	"fmt"
+	"math"
+)
+
+type LogisticRegression struct {
+	N int
+	n_in int
+	n_out int
+	W [][]float64
+	b []float64
+}
+
+
+func LogisticRegression__construct(this *LogisticRegression, N int, n_in int, n_out int) {
+	this.N = N
+	this.n_in = n_in
+	this.n_out = n_out
+
+	this.W = make([][]float64, n_out)
+	for i := 0; i < n_out; i++ { this.W[i] = make([]float64, n_in) }
+	
+	this.b = make([]float64, n_out)
+}
+
+func LogisticRegression_train(this *LogisticRegression, x []int, y []int, lr float64) {
+	p_y_given_x := make([]float64, this.n_out)
+	dy := make([]float64, this.n_out)
+	
+	for i := 0; i < this.n_out; i++ {
+		p_y_given_x[i] = 0
+		for j := 0; j < this.n_in; j++ {
+			p_y_given_x[i] += this.W[i][j] * float64(x[j])
+		}
+		p_y_given_x[i] += this.b[i]
+	}
+	LogisticRegression_softmax(this, p_y_given_x)
+	
+	for i := 0; i < this.n_out; i++ {
+		dy[i] = float64(y[i]) - p_y_given_x[i]
+		
+		for j := 0; j < this.n_in; j++ {
+			this.W[i][j] += lr * dy[i] * float64(x[j]) / float64(this.N)
+		}
+
+		this.b[i] += lr * dy[i] / float64(this.N)
+	}
+	
+}
+
+func LogisticRegression_softmax(this *LogisticRegression, x []float64) {
+	var (
+		max float64
+		sum float64
+	)
+
+	for i := 0; i < this.n_out; i++ { if max < x[i] {max = x[i]} }
+	for i := 0; i < this.n_out; i++ {
+		x[i] = math.Exp(x[i] - max)
+		sum += x[i]
+	}
+
+	for i := 0; i < this.n_out; i++ { x[i] /= sum }
+}
+
+func LogisticRegression_predict(this *LogisticRegression, x []int, y []float64) {
+	for i := 0; i < this.n_out; i++ {
+		y[i] = 0
+		for j := 0; j < this.n_in; j++ {
+			y[i] += this.W[i][j] * float64(x[j])
+		}
+		y[i] += this.b[i]
+	}
+
+	LogisticRegression_softmax(this, y)
+}
+
+
+
+func test_lr() {
+	
+	learning_rate := 0.1
+	n_epochs := 500
+
+	train_N := 6
+	test_N := 2
+	n_in := 6
+	n_out := 2
+
+	
+	// training data
+	train_X := [][]int {
+		{1, 1, 1, 0, 0, 0},
+		{1, 0, 1, 0, 0, 0},
+		{1, 1, 1, 0, 0, 0},
+		{0, 0, 1, 1, 1, 0},
+		{0, 0, 1, 1, 0, 0},
+		{0, 0, 1, 1, 1, 0},
+	}
+
+
+	train_Y := [][]int {
+		{1, 0},
+		{1, 0},
+		{1, 0},
+		{0, 1},
+		{0, 1},
+		{0, 1},
+	}
+
+	
+	// construct LogisticRegression
+	var classifier LogisticRegression
+	LogisticRegression__construct(&classifier, train_N, n_in, n_out)
+
+	// train
+	for epoch := 0; epoch < n_epochs; epoch++ {
+		for i := 0; i < train_N; i++ {
+			LogisticRegression_train(&classifier, train_X[i], train_Y[i], learning_rate)
+		}
+	}
+	
+	// test data
+	test_X := [][]int {
+		{1, 0, 1, 0, 0, 0},
+		{0, 0, 1, 1, 1, 0},
+	}
+	
+	test_Y := make([][]float64, test_N)
+	for i := 0; i < test_N; i++ { test_Y[i] = make([]float64, n_out) }
+
+
+	// test
+	for i := 0; i < test_N; i++ {
+		LogisticRegression_predict(&classifier, test_X[i], test_Y[i])
+		for j := 0; j < n_out; j++ {
+			fmt.Printf("%f ", test_Y[i][j])
+		}
+		fmt.Printf("\n")
+	}
+	
+}
+
+
+func main() {
+	test_lr()
+}
+
diff --git a/go/LogisticRegression/LogisticRegression.go b/go/LogisticRegression/LogisticRegression.go
new file mode 100644
index 0000000..2f68ef5
--- /dev/null
+++ b/go/LogisticRegression/LogisticRegression.go
@@ -0,0 +1,77 @@
+package LogisticRegression
+
+import (
+	"math"
+)
+
+type LogisticRegression struct {
+	N int
+	N_in int
+	N_out int
+	W [][]float64
+	B []float64
+}
+
+
+func LogisticRegression__construct(this *LogisticRegression, N int, n_in int, n_out int) {
+	this.N = N
+	this.N_in = n_in
+	this.N_out = n_out
+
+	this.W = make([][]float64, n_out)
+	for i := 0; i < n_out; i++ { this.W[i] = make([]float64, n_in) }
+	
+	this.B = make([]float64, n_out)
+}
+
+func LogisticRegression_train(this *LogisticRegression, x []int, y []int, lr float64) {
+	p_y_given_x := make([]float64, this.N_out)
+	dy := make([]float64, this.N_out)
+	
+	for i := 0; i < this.N_out; i++ {
+		p_y_given_x[i] = 0
+		for j := 0; j < this.N_in; j++ {
+			p_y_given_x[i] += this.W[i][j] * float64(x[j])
+		}
+		p_y_given_x[i] += this.B[i]
+	}
+	LogisticRegression_softmax(this, p_y_given_x)
+	
+	for i := 0; i < this.N_out; i++ {
+		dy[i] = float64(y[i]) - p_y_given_x[i]
+		
+		for j := 0; j < this.N_in; j++ {
+			this.W[i][j] += lr * dy[i] * float64(x[j]) / float64(this.N)
+		}
+
+		this.B[i] += lr * dy[i] / float64(this.N)
+	}
+	
+}
+
+func LogisticRegression_softmax(this *LogisticRegression, x []float64) {
+	var (
+		max float64
+		sum float64
+	)
+
+	for i := 0; i < this.N_out; i++ { if max < x[i] {max = x[i]} }
+	for i := 0; i < this.N_out; i++ {
+		x[i] = math.Exp(x[i] - max)
+		sum += x[i]
+	}
+
+	for i := 0; i < this.N_out; i++ { x[i] /= sum }
+}
+
+func LogisticRegression_predict(this *LogisticRegression, x []int, y []float64) {
+	for i := 0; i < this.N_out; i++ {
+		y[i] = 0
+		for j := 0; j < this.N_in; j++ {
+			y[i] += this.W[i][j] * float64(x[j])
+		}
+		y[i] += this.B[i]
+	}
+
+	LogisticRegression_softmax(this, y)
+}
diff --git a/go/RBM.go b/go/RBM.go
new file mode 100644
index 0000000..6369da5
--- /dev/null
+++ b/go/RBM.go
@@ -0,0 +1,200 @@
+package main
+
+import (
+	"fmt"
+	"math/rand"
+	u "./utils"
+)
+
+type RBM struct {
+	N int
+	n_visible int
+	n_hidden int
+	W [][]float64
+	hbias []float64
+	vbias []float64
+}
+
+
+func RBM__construct(this *RBM, N int, n_visible int, n_hidden int, W [][]float64, hbias []float64, vbias []float64) {
+	a := 1.0 / float64(n_visible)
+
+	this.N = N
+	this.n_visible = n_visible
+	this.n_hidden = n_hidden
+
+	if W == nil {
+		this.W = make([][]float64, n_hidden)
+		for i := 0; i < n_hidden; i++ { this.W[i] = make([]float64, n_visible) }
+
+		for i := 0; i < n_hidden; i++ {
+			for j := 0; j < n_visible; j++ {
+				this.W[i][j] = u.Uniform(-a, a)
+			}
+		}
+	} else {
+		this.W = W
+	}
+
+	if hbias == nil {
+		this.hbias = make([]float64, n_hidden)
+	} else {
+		this.hbias = hbias
+	}
+
+	if vbias == nil {
+		this.vbias = make([]float64, n_visible)
+	} else {
+		this.vbias = vbias
+	}
+}
+
+func RBM_contrastive_divergence(this *RBM, input []int, lr float64, k int) {
+	ph_mean := make([]float64, this.n_hidden)
+	ph_sample := make([]int, this.n_hidden)
+	nv_means := make([]float64, this.n_visible)
+	nv_samples := make([]int, this.n_visible)
+	nh_means := make([]float64, this.n_hidden)
+	nh_samples := make([]int, this.n_hidden)
+
+	/* CD-k */
+	RBM_sample_h_given_v(this, input, ph_mean, ph_sample)
+
+	for step := 0; step < k; step++ {
+		if step == 0 {
+			RBM_gibbs_hvh(this, ph_sample, nv_means, nv_samples, nh_means, nh_samples)
+		} else {
+			RBM_gibbs_hvh(this, nh_samples, nv_means, nv_samples, nh_means, nh_samples)
+		}
+	}
+
+	for i := 0; i < this.n_hidden; i++ {
+		for j := 0; j < this.n_visible; j++ {
+			this.W[i][j] += lr * (ph_mean[i] * float64(input[j]) - nh_means[i] * float64(nv_samples[j])) / float64(this.N)
+		}
+		this.hbias[i] += lr * (float64(ph_sample[i]) - nh_means[i]) / float64(this.N)
+	}
+
+	for i := 0; i < this.n_visible; i++ {
+		this.vbias[i] += lr * float64(input[i] - nv_samples[i]) / float64(this.N)
+	}
+}
+
+func RBM_sample_h_given_v(this *RBM, v0_sample []int, mean []float64, sample []int) {
+	for i := 0; i < this.n_hidden; i++ {
+		mean[i] = RBM_propup(this, v0_sample, this.W[i], this.hbias[i])
+		sample[i] = u.Binomial(1, mean[i])
+	}
+}
+
+func RBM_sample_v_given_h(this *RBM, h0_sample []int, mean []float64, sample []int) {
+	for i := 0; i < this.n_visible; i++ {
+		mean[i] = RBM_propdown(this, h0_sample, i, this.vbias[i])
+		sample[i] = u.Binomial(1, mean[i])
+	}
+}
+
+func RBM_propup(this *RBM, v []int, w []float64, b float64) float64 {
+	pre_sigmoid_activation := 0.0
+	
+	for j := 0; j < this.n_visible; j++ {
+		pre_sigmoid_activation += w[j] * float64(v[j])
+	}
+	pre_sigmoid_activation += b
+	
+	return u.Sigmoid(pre_sigmoid_activation)
+}
+
+func RBM_propdown(this *RBM,	h []int, i int, b float64) float64 {
+	pre_sigmoid_activation := 0.0
+	
+	for j := 0; j < this.n_hidden; j++ {
+		pre_sigmoid_activation += this.W[j][i] * float64(h[j])
+	}
+	pre_sigmoid_activation += b
+
+	return u.Sigmoid(pre_sigmoid_activation)
+}
+
+func RBM_gibbs_hvh(this *RBM, h0_sample []int, nv_means []float64, nv_samples []int, nh_means []float64, nh_samples []int) {
+	RBM_sample_v_given_h(this, h0_sample, nv_means, nv_samples)
+	RBM_sample_h_given_v(this, nv_samples, nh_means, nh_samples)
+}
+
+func RBM_reconstruct(this *RBM, v []int, reconstructed_v []float64) {
+	h := make([]float64, this.n_hidden)
+	var pre_sigmoid_activation float64
+
+	for i := 0; i < this.n_hidden; i++ {
+		h[i] = RBM_propup(this, v, this.W[i], this.hbias[i])
+	}
+
+	for i := 0; i < this.n_visible; i++ {
+		pre_sigmoid_activation = 0.0
+		for j := 0; j < this.n_hidden; j++ {
+			pre_sigmoid_activation += this.W[j][i] * h[j]
+		}
+		pre_sigmoid_activation += this.vbias[i]
+
+		reconstructed_v[i] = u.Sigmoid(pre_sigmoid_activation)
+	}
+}
+
+
+func test_rbm() {
+	rand.Seed(0)
+
+	learning_rate := 0.1
+	training_epochs := 1000
+	k := 1
+	
+	train_N := 6
+	test_N := 2
+	n_visible := 6
+	n_hidden := 3
+
+	// training data
+	train_X := [][]int {
+		{1, 1, 1, 0, 0, 0},
+		{1, 0, 1, 0, 0, 0},
+		{1, 1, 1, 0, 0, 0},
+		{0, 0, 1, 1, 1, 0},
+		{0, 0, 1, 0, 1, 0},
+		{0, 0, 1, 1, 1, 0},
+	}
+	
+
+	// construct RBM
+	var rbm RBM
+	RBM__construct(&rbm, train_N, n_visible, n_hidden, nil, nil, nil)
+
+	// train
+	for epoch := 0; epoch < training_epochs; epoch++ {
+		for i := 0; i < train_N; i++ {
+			RBM_contrastive_divergence(&rbm, train_X[i], learning_rate, k)
+		}
+	}
+
+	// test data
+	test_X := [][]int {
+		{1, 1, 0, 0, 0, 0},
+		{0, 0, 0, 1, 1, 0},
+	}
+	reconstructed_X := make([][]float64, test_N)
+	for i := 0; i < test_N; i++ { reconstructed_X[i] = make([]float64, n_visible)}
+
+
+	// test
+	for i := 0;  i < test_N; i++ {
+		RBM_reconstruct(&rbm, test_X[i], reconstructed_X[i])
+		for j := 0; j < n_visible; j++ {
+			fmt.Printf("%.5f ", reconstructed_X[i][j])
+		}
+		fmt.Printf("\n")
+	}
+}
+
+
+func main() {
+	test_rbm()
+}
diff --git a/go/RBM/RBM.go b/go/RBM/RBM.go
new file mode 100644
index 0000000..708f8b7
--- /dev/null
+++ b/go/RBM/RBM.go
@@ -0,0 +1,139 @@
+package RBM
+
+import (
+	u "../utils"
+)
+
+type RBM struct {
+	N int
+	n_visible int
+	n_hidden int
+	W [][]float64
+	hbias []float64
+	vbias []float64
+}
+
+
+func RBM__construct(this *RBM, N int, n_visible int, n_hidden int, W [][]float64, hbias []float64, vbias []float64) {
+	a := 1.0 / float64(n_visible)
+
+	this.N = N
+	this.n_visible = n_visible
+	this.n_hidden = n_hidden
+
+	if W == nil {
+		this.W = make([][]float64, n_hidden)
+		for i := 0; i < n_hidden; i++ { this.W[i] = make([]float64, n_visible) }
+
+		for i := 0; i < n_hidden; i++ {
+			for j := 0; j < n_visible; j++ {
+				this.W[i][j] = u.Uniform(-a, a)
+			}
+		}
+	} else {
+		this.W = W
+	}
+
+	if hbias == nil {
+		this.hbias = make([]float64, n_hidden)
+	} else {
+		this.hbias = hbias
+	}
+
+	if vbias == nil {
+		this.vbias = make([]float64, n_visible)
+	} else {
+		this.vbias = vbias
+	}
+}
+
+func RBM_contrastive_divergence(this *RBM, input []int, lr float64, k int) {
+	ph_mean := make([]float64, this.n_hidden)
+	ph_sample := make([]int, this.n_hidden)
+	nv_means := make([]float64, this.n_visible)
+	nv_samples := make([]int, this.n_visible)
+	nh_means := make([]float64, this.n_hidden)
+	nh_samples := make([]int, this.n_hidden)
+
+	/* CD-k */
+	RBM_sample_h_given_v(this, input, ph_mean, ph_sample)
+
+	for step := 0; step < k; step++ {
+		if step == 0 {
+			RBM_gibbs_hvh(this, ph_sample, nv_means, nv_samples, nh_means, nh_samples)
+		} else {
+			RBM_gibbs_hvh(this, nh_samples, nv_means, nv_samples, nh_means, nh_samples)
+		}
+	}
+
+	for i := 0; i < this.n_hidden; i++ {
+		for j := 0; j < this.n_visible; j++ {
+			this.W[i][j] += lr * (ph_mean[i] * float64(input[j]) - nh_means[i] * float64(nv_samples[j])) / float64(this.N)
+		}
+		this.hbias[i] += lr * (float64(ph_sample[i]) - nh_means[i]) / float64(this.N)
+	}
+
+	for i := 0; i < this.n_visible; i++ {
+		this.vbias[i] += lr * float64(input[i] - nv_samples[i]) / float64(this.N)
+	}
+}
+
+func RBM_sample_h_given_v(this *RBM, v0_sample []int, mean []float64, sample []int) {
+	for i := 0; i < this.n_hidden; i++ {
+		mean[i] = RBM_propup(this, v0_sample, this.W[i], this.hbias[i])
+		sample[i] = u.Binomial(1, mean[i])
+	}
+}
+
+func RBM_sample_v_given_h(this *RBM, h0_sample []int, mean []float64, sample []int) {
+	for i := 0; i < this.n_visible; i++ {
+		mean[i] = RBM_propdown(this, h0_sample, i, this.vbias[i])
+		sample[i] = u.Binomial(1, mean[i])
+	}
+}
+
+func RBM_propup(this *RBM, v []int, w []float64, b float64) float64 {
+	pre_sigmoid_activation := 0.0
+	
+	for j := 0; j < this.n_visible; j++ {
+		pre_sigmoid_activation += w[j] * float64(v[j])
+	}
+	pre_sigmoid_activation += b
+	
+	return u.Sigmoid(pre_sigmoid_activation)
+}
+
+func RBM_propdown(this *RBM,	h []int, i int, b float64) float64 {
+	pre_sigmoid_activation := 0.0
+	
+	for j := 0; j < this.n_hidden; j++ {
+		pre_sigmoid_activation += this.W[j][i] * float64(h[j])
+	}
+	pre_sigmoid_activation += b
+
+	return u.Sigmoid(pre_sigmoid_activation)
+}
+
+func RBM_gibbs_hvh(this *RBM, h0_sample []int, nv_means []float64, nv_samples []int, nh_means []float64, nh_samples []int) {
+	RBM_sample_v_given_h(this, h0_sample, nv_means, nv_samples)
+	RBM_sample_h_given_v(this, nv_samples, nh_means, nh_samples)
+}
+
+func RBM_reconstruct(this *RBM, v []int, reconstructed_v []float64) {
+	h := make([]float64, this.n_hidden)
+	var pre_sigmoid_activation float64
+
+	for i := 0; i < this.n_hidden; i++ {
+		h[i] = RBM_propup(this, v, this.W[i], this.hbias[i])
+	}
+
+	for i := 0; i < this.n_visible; i++ {
+		pre_sigmoid_activation = 0.0
+		for j := 0; j < this.n_hidden; j++ {
+			pre_sigmoid_activation += this.W[j][i] * h[j]
+		}
+		pre_sigmoid_activation += this.vbias[i]
+
+		reconstructed_v[i] = u.Sigmoid(pre_sigmoid_activation)
+	}
+}
diff --git a/go/SdA.go b/go/SdA.go
new file mode 100644
index 0000000..27ccaf6
--- /dev/null
+++ b/go/SdA.go
@@ -0,0 +1,241 @@
+package main
+
+import (
+	"fmt"
+	"math/rand"
+	u "./utils"
+	H "./HiddenLayer"
+	D "./dA"
+	L "./LogisticRegression"
+)
+
+type SdA struct {
+	N int
+	n_ins int
+	hidden_layer_sizes []int
+	n_outs int
+	n_layers int
+	sigmoid_layers []H.HiddenLayer
+	dA_layers []D.DA
+	log_layer L.LogisticRegression
+}
+
+
+func SdA__construct(this *SdA, N int, n_ins int, hidden_layer_sizes []int, n_outs int, n_layers int) {
+	var input_size int
+
+	this.N = N
+	this.n_ins = n_ins
+	this.hidden_layer_sizes = hidden_layer_sizes
+	this.n_outs = n_outs
+	this.n_layers = n_layers
+
+	this.sigmoid_layers = make([]H.HiddenLayer, n_layers)
+	this.dA_layers = make([]D.DA, n_layers)
+
+	// construct multi-layer
+	for i := 0; i < n_layers; i++ {
+		if i == 0 {
+			input_size = n_ins
+		} else {
+			input_size = hidden_layer_sizes[i-1]
+		}
+
+		// construct sigmoid_layer
+		H.HiddenLayer__construct(&(this.sigmoid_layers[i]), N, input_size, hidden_layer_sizes[i], nil, nil)
+
+		// construct dA_layer
+		D.DA__construct(&(this.dA_layers[i]), N, input_size, hidden_layer_sizes[i], this.sigmoid_layers[i].W, this.sigmoid_layers[i].B, nil)
+	}
+
+	// layer for output using LogisticRegression
+	L.LogisticRegression__construct(&(this.log_layer), N, hidden_layer_sizes[n_layers-1], n_outs)
+}
+
+func SdA_pretrain(this *SdA, train_X [][]int, lr float64, corruption_level float64, epochs int) {
+	var (
+		layer_input []int
+		prev_layer_input_size int
+		prev_layer_input []int
+	)
+	for i := 0; i < this.n_layers; i++ {	// layer-wise
+		for epoch := 0; epoch < epochs; epoch++ {	 // training epochs
+			for n := 0; n < this.N; n++ {	 // input x1...xN
+
+				// layer input
+				for l := 0; l <= i; l++ {
+					if l == 0 {
+						layer_input = make([]int, this.n_ins)
+						for j := 0; j < this.n_ins; j++ { layer_input[j] = train_X[n][j] }
+					} else {
+						if l == 1 {
+							prev_layer_input_size = this.n_ins
+						} else {
+							prev_layer_input_size = this.hidden_layer_sizes[l-2]
+						}
+
+						prev_layer_input = make([]int, prev_layer_input_size)
+						for j := 0; j < prev_layer_input_size; j++ { prev_layer_input[j] = layer_input[j] }
+
+						layer_input = make([]int, this.hidden_layer_sizes[l-1])
+
+						H.HiddenLayer_sample_h_given_v(&(this.sigmoid_layers[l-1]), prev_layer_input, layer_input)
+					}
+				}
+
+				D.DA_train(&(this.dA_layers[i]), layer_input, lr, corruption_level)
+			}
+		}
+	}
+}
+
+func SdA_finetune(this *SdA, train_X [][]int, train_Y [][]int, lr float64, epochs int) {
+	var (
+		layer_input []int
+		prev_layer_input []int
+	)
+	
+	for epoch := 0; epoch < epochs; epoch++ {
+		for n := 0; n < this.N; n++ {	 // input x1...xN
+
+			// layer input
+			for i := 0; i < this.n_layers; i++ {
+				if i == 0 {
+					prev_layer_input = make([]int, this.n_ins)
+					for j := 0; j < this.n_ins; j++ { prev_layer_input[j] = train_X[n][j] }
+				} else {
+					prev_layer_input = make([]int, this.hidden_layer_sizes[i-1])
+					for j:= 0; j < this.hidden_layer_sizes[i-1]; j++ { prev_layer_input[j] = layer_input[j] }
+				}
+
+				layer_input = make([]int, this.hidden_layer_sizes[i])
+				H.HiddenLayer_sample_h_given_v(&(this.sigmoid_layers[i]), prev_layer_input, layer_input)
+			}
+
+			L.LogisticRegression_train(&(this.log_layer), layer_input, train_Y[n], lr)
+		}
+		// lr *= 0.95
+	}
+}
+
+func SdA_predict(this *SdA, x []int, y []float64) {
+	var (
+		layer_input []float64
+	)	
+	prev_layer_input := make([]float64, this.n_ins)
+	for j := 0; j < this.n_ins; j++ { prev_layer_input[j] = float64(x[j]) }
+
+	// layer activation
+	for i := 0; i < this.n_layers; i++ {
+		layer_input = make([]float64, this.sigmoid_layers[i].N_out)
+
+		for k := 0; k < this.sigmoid_layers[i].N_out; k++ {
+			linear_outuput := 0.0
+
+			for j := 0; j < this.sigmoid_layers[i].N_in; j++ {
+				linear_outuput += this.sigmoid_layers[i].W[k][j] * prev_layer_input[j]
+			}
+			linear_outuput += this.sigmoid_layers[i].B[k]
+			layer_input[k] = u.Sigmoid(linear_outuput)
+		}
+
+		if i < this.n_layers-1 {
+			prev_layer_input = make([]float64, this.sigmoid_layers[i].N_out)
+
+			for j := 0; j < this.sigmoid_layers[i].N_out; j++ {
+				prev_layer_input[j] = layer_input[j]
+			}
+		}
+	}
+
+	for i := 0; i < this.log_layer.N_out; i++ {
+		y[i] = 0
+		for j := 0; j < this.log_layer.N_in; j++ {
+			y[i] += this.log_layer.W[i][j] * layer_input[j]
+		}
+		y[i] += this.log_layer.B[i]
+	}
+
+	L.LogisticRegression_softmax(&(this.log_layer), y)
+}
+
+func test_SdA() {
+	rand.Seed(0)
+
+	pretrain_lr := 0.1
+	corruption_level := 0.3
+	pretraining_epochs := 1000
+	finetune_lr := 0.1
+	finetune_epochs := 500
+	
+	train_N := 10
+	test_N := 4
+	n_ins := 28
+	n_outs := 2
+	hidden_layer_sizes := []int {15, 15}
+	n_layers := len(hidden_layer_sizes)
+
+
+	// training data
+	train_X := [][]int {
+		{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+		{0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+		{1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+		{0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+		{1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+		{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
+		{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1},
+		{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1},
+		{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1},
+		{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1},
+	}
+
+	train_Y := [][]int {
+		{1, 0},
+		{1, 0},
+		{1, 0},
+		{1, 0},
+		{1, 0},
+		{0, 1},
+		{0, 1},
+		{0, 1},
+		{0, 1},
+		{0, 1},
+	}
+
+	// construct SdA
+	var sda SdA
+	SdA__construct(&sda, train_N, n_ins, hidden_layer_sizes, n_outs, n_layers)
+
+	// pretrain
+	SdA_pretrain(&sda, train_X, pretrain_lr, corruption_level, pretraining_epochs)
+
+	// finetune
+	SdA_finetune(&sda, train_X, train_Y, finetune_lr, finetune_epochs)
+
+
+  // test data
+	test_X := [][]int {
+		{1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+		{1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+		{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1},
+		{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1},
+	}
+	
+	test_Y := make([][]float64, test_N)
+	for i := 0; i < test_N; i++ { test_Y[i] = make([]float64, n_outs)}
+
+	// test
+	for i := 0; i < test_N; i++ {
+		SdA_predict(&sda, test_X[i], test_Y[i])
+		for j := 0; j < n_outs; j++ {
+			fmt.Printf("%.5f ", test_Y[i][j])
+		}
+		fmt.Printf("\n")
+	}
+}
+
+
+func main() {
+	test_SdA()
+}
diff --git a/go/dA.go b/go/dA.go
new file mode 100644
index 0000000..a36c226
--- /dev/null
+++ b/go/dA.go
@@ -0,0 +1,192 @@
+package main
+
+import (
+	"fmt"
+	"math/rand"
+	u "./utils"
+)
+
+type dA struct {
+	N int
+	n_visible int
+	n_hidden int
+	W [][]float64
+	hbias []float64
+	vbias []float64
+}
+
+
+func dA__construct(this *dA, N int, n_visible int, n_hidden int, W [][]float64, hbias []float64, vbias []float64) {
+	a := 1.0 / float64(n_visible)
+	
+	this.N = N
+	this.n_visible = n_visible
+	this.n_hidden = n_hidden
+
+	if W == nil {
+		this.W = make([][]float64, n_hidden)
+		for i := 0; i < n_hidden; i++ { this.W[i] = make([]float64, n_visible) }
+
+		for i := 0; i < n_hidden; i++ {
+			for j := 0; j < n_visible; j++ {
+				this.W[i][j] = u.Uniform(-a, a)
+			}
+		}
+	} else {
+		this.W = W
+	}
+
+	if hbias == nil {
+		this.hbias = make([]float64, n_hidden)
+	} else {
+		this.hbias = hbias
+	}
+
+	if vbias == nil {
+		this.vbias = make([]float64, n_visible)
+	} else {
+		this.vbias = vbias
+	}
+}
+
+func dA_get_corrupted_input(this *dA, x []int, tilde_x []int, p float64) {
+	for i := 0; i < this.n_visible; i++ {
+		if x[i] == 0 {
+			tilde_x[i] = 0
+		} else {
+			tilde_x[i] = u.Binomial(1, p)
+		}
+	}
+}
+
+// Encode
+func dA_get_hidden_values(this *dA, x []int, y []float64) {
+	for i := 0; i < this.n_hidden; i++ {
+		y[i] = 0
+		for j := 0; j < this.n_visible; j++ {
+			y[i] += this.W[i][j] * float64(x[j])
+		}
+		y[i] += this.hbias[i]
+		y[i] = u.Sigmoid(y[i])
+	}
+}
+
+// Decode
+func dA_get_reconstructed_input(this *dA, y []float64, z []float64) {
+	for i := 0; i < this.n_visible; i++ {
+		z[i] = 0
+		for j := 0; j < this.n_hidden; j++ {
+			z[i] += this.W[j][i] * y[j]
+		}
+		z[i] += this.vbias[i]
+		z[i] = u.Sigmoid(z[i])
+	}
+}
+
+func dA_train(this *dA, x []int, lr float64, corruption_level float64) {
+	tilde_x := make([]int, this.n_visible)
+	y := make([]float64, this.n_hidden)
+	z := make([]float64, this.n_visible)
+
+	L_vbias := make([]float64, this.n_visible)
+	L_hbias := make([]float64, this.n_hidden)
+
+	p := 1 - corruption_level
+
+	dA_get_corrupted_input(this, x, tilde_x, p)
+	dA_get_hidden_values(this, tilde_x, y)
+	dA_get_reconstructed_input(this, y, z)
+
+	// vbias
+	for i := 0; i < this.n_visible; i++ {
+		L_vbias[i] = float64(x[i]) - z[i]
+		this.vbias[i] += lr * L_vbias[i] / float64(this.N)
+	}
+
+	// hbias
+	for i := 0; i < this.n_hidden; i++ {
+		L_hbias[i] = 0
+		for j := 0; j < this.n_visible; j++ {
+			L_hbias[i] += this.W[i][j] * L_vbias[j]
+		}
+		L_hbias[i] *= y[i] * (1- y[i])
+		this.hbias[i] += lr * L_hbias[i] / float64(this.N)
+	}
+
+	// W
+	for i := 0; i < this.n_hidden; i++ {
+		for j := 0; j < this.n_visible; j++ {
+			this.W[i][j] += lr * (L_hbias[i] * float64(tilde_x[j]) + L_vbias[j] * y[i]) / float64(this.N)
+		}
+	}
+}
+
+func dA_reconstruct(this *dA, x []int, z []float64) {
+	y := make([]float64, this.n_hidden)
+
+	dA_get_hidden_values(this, x, y)
+	dA_get_reconstructed_input(this, y, z)
+}
+
+
+
+
+func test_dA() {
+	rand.Seed(0)
+
+	learning_rate := 0.1
+	corruption_level := 0.3
+	training_epochs := 1000
+
+	train_N := 6
+	test_N := 2
+	n_visible := 20
+	n_hidden := 5
+
+	// training data
+	train_X := [][]int {
+		{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+		{1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+		{1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+		{1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+		{0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+		{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
+		{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1},
+		{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1},
+		{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1},
+		{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0},
+	}
+
+	// construct dA
+	var da dA
+	dA__construct(&da, train_N, n_visible, n_hidden, nil, nil, nil)
+
+	// train
+	for epoch := 0; epoch < training_epochs; epoch++ {
+		for i := 0; i < train_N; i++ {
+			dA_train(&da, train_X[i], learning_rate, corruption_level)
+		}
+	}
+
+	// test data
+	test_X := [][]int {
+		{1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+		{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0},
+	}
+	reconstructed_X := make([][]float64, test_N)
+	for i := 0; i < test_N; i++ { reconstructed_X[i] = make([]float64, n_visible)}
+
+	
+	// test
+	for i := 0;  i < test_N; i++ {
+		dA_reconstruct(&da, test_X[i], reconstructed_X[i])
+		for j := 0; j < n_visible; j++ {
+			fmt.Printf("%.5f ", reconstructed_X[i][j])
+		}
+		fmt.Printf("\n")
+	}
+}
+
+func main() {
+	test_dA()
+}
diff --git a/go/dA/dA.go b/go/dA/dA.go
new file mode 100644
index 0000000..b41d1ce
--- /dev/null
+++ b/go/dA/dA.go
@@ -0,0 +1,128 @@
+package dA
+
+import (
+	u "../utils"
+)
+
+
+type DA struct {
+	N int
+	n_visible int
+	n_hidden int
+	W [][]float64
+	hbias []float64
+	vbias []float64
+}
+
+
+func DA__construct(this *DA, N int, n_visible int, n_hidden int, W [][]float64, hbias []float64, vbias []float64) {
+	a := 1.0 / float64(n_visible)
+	
+	this.N = N
+	this.n_visible = n_visible
+	this.n_hidden = n_hidden
+
+	if W == nil {
+		this.W = make([][]float64, n_hidden)
+		for i := 0; i < n_hidden; i++ { this.W[i] = make([]float64, n_visible) }
+
+		for i := 0; i < n_hidden; i++ {
+			for j := 0; j < n_visible; j++ {
+				this.W[i][j] = u.Uniform(-a, a)
+			}
+		}
+	} else {
+		this.W = W
+	}
+
+	if hbias == nil {
+		this.hbias = make([]float64, n_hidden)
+	} else {
+		this.hbias = hbias
+	}
+
+	if vbias == nil {
+		this.vbias = make([]float64, n_visible)
+	} else {
+		this.vbias = vbias
+	}
+}
+
+func dA_get_corrupted_input(this *DA, x []int, tilde_x []int, p float64) {
+	for i := 0; i < this.n_visible; i++ {
+		if x[i] == 0 {
+			tilde_x[i] = 0
+		} else {
+			tilde_x[i] = u.Binomial(1, p)
+		}
+	}
+}
+
+// Encode
+func dA_get_hidden_values(this *DA, x []int, y []float64) {
+	for i := 0; i < this.n_hidden; i++ {
+		y[i] = 0
+		for j := 0; j < this.n_visible; j++ {
+			y[i] += this.W[i][j] * float64(x[j])
+		}
+		y[i] += this.hbias[i]
+		y[i] = u.Sigmoid(y[i])
+	}
+}
+
+// Decode
+func dA_get_reconstructed_input(this *DA, y []float64, z []float64) {
+	for i := 0; i < this.n_visible; i++ {
+		z[i] = 0
+		for j := 0; j < this.n_hidden; j++ {
+			z[i] += this.W[j][i] * y[j]
+		}
+		z[i] += this.vbias[i]
+		z[i] = u.Sigmoid(z[i])
+	}
+}
+
+func DA_train(this *DA, x []int, lr float64, corruption_level float64) {
+	tilde_x := make([]int, this.n_visible)
+	y := make([]float64, this.n_hidden)
+	z := make([]float64, this.n_visible)
+
+	L_vbias := make([]float64, this.n_visible)
+	L_hbias := make([]float64, this.n_hidden)
+
+	p := 1 - corruption_level
+
+	dA_get_corrupted_input(this, x, tilde_x, p)
+	dA_get_hidden_values(this, tilde_x, y)
+	dA_get_reconstructed_input(this, y, z)
+
+	// vbias
+	for i := 0; i < this.n_visible; i++ {
+		L_vbias[i] = float64(x[i]) - z[i]
+		this.vbias[i] += lr * L_vbias[i] / float64(this.N)
+	}
+
+	// hbias
+	for i := 0; i < this.n_hidden; i++ {
+		L_hbias[i] = 0
+		for j := 0; j < this.n_visible; j++ {
+			L_hbias[i] += this.W[i][j] * L_vbias[j]
+		}
+		L_hbias[i] *= y[i] * (1- y[i])
+		this.hbias[i] += lr * L_hbias[i] / float64(this.N)
+	}
+
+	// W
+	for i := 0; i < this.n_hidden; i++ {
+		for j := 0; j < this.n_visible; j++ {
+			this.W[i][j] += lr * (L_hbias[i] * float64(tilde_x[j]) + L_vbias[j] * y[i]) / float64(this.N)
+		}
+	}
+}
+
+func dA_reconstruct(this *DA, x []int, z []float64) {
+	y := make([]float64, this.n_hidden)
+
+	dA_get_hidden_values(this, x, y)
+	dA_get_reconstructed_input(this, y, z)
+}
diff --git a/go/utils/utils.go b/go/utils/utils.go
new file mode 100644
index 0000000..44b3af2
--- /dev/null
+++ b/go/utils/utils.go
@@ -0,0 +1,28 @@
+package utils
+
+import (
+	"math"
+	"math/rand"
+)
+
+func Uniform(min float64, max float64) float64 {
+	return rand.Float64() * (max - min) + min
+}
+
+func Binomial(n int, p float64) int {
+	if p < 0 || p > 1 { return 0 }
+
+	c := 0
+	var r float64
+	
+	for i := 0; i < n; i++ {
+		r = rand.Float64()		
+		if r < p { c++ }
+	}
+
+	return c
+}
+
+func Sigmoid(x float64) float64 {
+	return 1.0 / (1.0 + math.Exp(-x))
+}
diff --git a/java/.gitkeep b/java/.gitkeep
new file mode 100644
index 0000000..e69de29
diff --git a/java/DBN/src/DBN.java b/java/DBN/src/DBN.java
deleted file mode 100644
index 238d1ce..0000000
--- a/java/DBN/src/DBN.java
+++ /dev/null
@@ -1,222 +0,0 @@
-import java.util.Random;
-
-public class DBN {
-	public int N;
-	public int n_ins;
-	public int[] hidden_layer_sizes;
-	public int n_outs;
-	public int n_layers;
-	public HiddenLayer[] sigmoid_layers;
-	public RBM[] rbm_layers;
-	public LogisticRegression log_layer;
-	public Random rng;
-
-	public static double sigmoid(double x) {
-		return 1.0 / (1.0 + Math.pow(Math.E, -x));
-	}
-	
-	
-	public DBN(int N, int n_ins, int[] hidden_layer_sizes, int n_outs, int n_layers, Random rng) {
-		int input_size;
-		
-		this.N = N;
-		this.n_ins = n_ins;
-		this.hidden_layer_sizes = hidden_layer_sizes;
-		this.n_outs = n_outs;
-		this.n_layers = n_layers;
-		
-		this.sigmoid_layers = new HiddenLayer[n_layers];
-		this.rbm_layers = new RBM[n_layers];
-
-		if(rng == null)	this.rng = new Random(1234);
-		else this.rng = rng;		
-		
-		// construct multi-layer
-		for(int i=0; i<this.n_layers; i++) {
-			if(i == 0) {
-				input_size = this.n_ins;
-			} else {
-				input_size = this.hidden_layer_sizes[i-1];
-			}
-			
-			// construct sigmoid_layer
-			this.sigmoid_layers[i] = new HiddenLayer(this.N, input_size, this.hidden_layer_sizes[i], null, null, rng);
-			
-			// construct rbm_layer
-			this.rbm_layers[i] = new RBM(this.N, input_size, this.hidden_layer_sizes[i], this.sigmoid_layers[i].W, this.sigmoid_layers[i].b, null, rng);
-		}
-		
-		// layer for output using LogisticRegression
-		this.log_layer = new LogisticRegression(this.N, this.hidden_layer_sizes[this.n_layers-1], this.n_outs);
-	}
-	
-	public void pretrain(int[][] train_X, double lr, int k, int epochs) {
-		int[] layer_input = new int[0];
-		int prev_layer_input_size;
-		int[] prev_layer_input;
-				
-		for(int i=0; i<n_layers; i++) {  // layer-wise			
-			for(int epoch=0; epoch<epochs; epoch++) {  // training epochs
-				for(int n=0; n<N; n++) {  // input x1...xN
-					// layer input
-					for(int l=0; l<=i; l++) {
-						
-						if(l == 0) {
-							layer_input = new int[n_ins];
-							for(int j=0; j<n_ins; j++) layer_input[j] = train_X[n][j];
-						} else {
-							if(l == 1) prev_layer_input_size = n_ins;
-							else prev_layer_input_size = hidden_layer_sizes[l-2];
-							
-							prev_layer_input = new int[prev_layer_input_size];
-							for(int j=0; j<prev_layer_input_size; j++) prev_layer_input[j] = layer_input[j];
-							
-							layer_input = new int[hidden_layer_sizes[l-1]];
-							
-							sigmoid_layers[l-1].sample_h_given_v(prev_layer_input, layer_input);
-						}
-					}
-					
-					rbm_layers[i].contrastive_divergence(layer_input, lr, k);
-				}
-			}
-		}
-	}
-	
-	public void finetune(int[][] train_X, int[][] train_Y, double lr, int epochs) {
-		int[] layer_input = new int[0];
-		// int prev_layer_input_size;
-		int[] prev_layer_input = new int[0];
-		
-		for(int epoch=0; epoch<epochs; epoch++) {
-			for(int n=0; n<N; n++) {
-				
-				// layer input
-				for(int i=0; i<n_layers; i++) {
-					if(i == 0) {
-						prev_layer_input = new int[n_ins];
-						for(int j=0; j<n_ins; j++) prev_layer_input[j] = train_X[n][j];
-					} else {
-						prev_layer_input = new int[hidden_layer_sizes[i-1]];
-						for(int j=0; j<hidden_layer_sizes[i-1]; j++) prev_layer_input[j] = layer_input[j];
-					}
-					
-					layer_input = new int[hidden_layer_sizes[i]];
-					sigmoid_layers[i].sample_h_given_v(prev_layer_input, layer_input);
-				}
-				
-				log_layer.train(layer_input, train_Y[n], lr);
-			}
-			// lr *= 0.95;
-		}
-	}
-	
-	public void predict(int[] x, double[] y) {
-		double[] layer_input = new double[0];
-		// int prev_layer_input_size;
-		double[] prev_layer_input = new double[n_ins];
-		for(int j=0; j<n_ins; j++) prev_layer_input[j] = x[j];
-	
-		double linear_output;
-		
-		
-		// layer activation
-		for(int i=0; i<n_layers; i++) {
-			layer_input = new double[sigmoid_layers[i].n_out];
-			
-			linear_output = 0.0;
-			for(int k=0; k<sigmoid_layers[i].n_out; k++) {
-				for(int j=0; j<sigmoid_layers[i].n_in; j++) {
-					linear_output += sigmoid_layers[i].W[k][j] * prev_layer_input[j];
-				}
-				linear_output += sigmoid_layers[i].b[k];
-				layer_input[k] = sigmoid(linear_output);
-			}
-			
-			if(i < n_layers-1) {
-				prev_layer_input = new double[sigmoid_layers[i].n_out];
-				for(int j=0; j<sigmoid_layers[i].n_out; j++) prev_layer_input[j] = layer_input[j];
-			}
-		}
-		
-		for(int i=0; i<log_layer.n_out; i++) {
-			y[i] = 0;
-			for(int j=0; j<log_layer.n_in; j++) {
-				y[i] += log_layer.W[i][j] * layer_input[j];
-			}
-			y[i] += log_layer.b[i];
-		}
-		
-		log_layer.softmax(y);
-	}
-	
-	private static void test_dbn() {
-		Random rng = new Random(123);
-		
-		double pretrain_lr = 0.1;
-		int pretraining_epochs = 1000;
-		int k = 1;
-		double finetune_lr = 0.1;
-		int finetune_epochs = 500;
-		
-		int train_N = 6;
-		int test_N = 4;
-		int n_ins = 6;
-		int n_outs = 2;
-		int[] hidden_layer_sizes = {3, 3};
-		int n_layers = hidden_layer_sizes.length;
-		
-		// training data
-		int[][] train_X = {
-			{1, 1, 1, 0, 0, 0},
-			{1, 0, 1, 0, 0, 0},
-			{1, 1, 1, 0, 0, 0},
-			{0, 0, 1, 1, 1, 0},
-			{0, 0, 1, 1, 0, 0},
-			{0, 0, 1, 1, 1, 0}
-		};
-		
-		int[][] train_Y = {
-			{1, 0},
-			{1, 0},
-			{1, 0},
-			{0, 1},
-			{0, 1},
-			{0, 1},
-		};
-		
-		
-		// construct DBN
-		DBN dbn = new DBN(train_N, n_ins, hidden_layer_sizes, n_outs, n_layers, rng);
-		
-		// pretrain
-		dbn.pretrain(train_X, pretrain_lr, k, pretraining_epochs);
-		
-		// finetune
-		dbn.finetune(train_X, train_Y, finetune_lr, finetune_epochs);
-		
-		
-		// test data
-		int[][] test_X = {
-			{1, 1, 0, 0, 0, 0},
-			{1, 1, 1, 1, 0, 0},
-			{0, 0, 0, 1, 1, 0},
-			{0, 0, 1, 1, 1, 0},
-		};
-		
-		double[][] test_Y = new double[test_N][n_outs];
-		
-		// test
-		for(int i=0; i<test_N; i++) {
-			dbn.predict(test_X[i], test_Y[i]);
-			for(int j=0; j<n_outs; j++) {
-				System.out.print(test_Y[i][j] + " ");
-			}
-			System.out.println();
-		}
-	}
-	
-	public static void main(String[] args) {
-		test_dbn();
-	}
-}
diff --git a/java/DBN/src/HiddenLayer.java b/java/DBN/src/HiddenLayer.java
deleted file mode 100644
index 588eea8..0000000
--- a/java/DBN/src/HiddenLayer.java
+++ /dev/null
@@ -1,74 +0,0 @@
-import java.util.Random;
-
-public class HiddenLayer {
-	public int N;
-	public int n_in;
-	public int n_out;
-	public double[][] W;
-	public double[] b;
-	public Random rng;
-	
-	public double uniform(double min, double max) {
-		return rng.nextDouble() * (max - min) + min;
-	}
-	
-	public int binomial(int n, double p) {
-		if(p < 0 || p > 1) return 0;
-		
-		int c = 0;
-		double r;
-		
-		for(int i=0; i<n; i++) {
-			r = rng.nextDouble();
-			if (r < p) c++;
-		}
-		
-		return c;
-	}
-	
-	public static double sigmoid(double x) {
-		return 1.0 / (1.0 + Math.pow(Math.E, -x));
-	}
-	
-	
-	
-	public HiddenLayer(int N, int n_in, int n_out, double[][] W, double[] b, Random rng) {
-		this.N = N;
-		this.n_in = n_in;
-		this.n_out = n_out;
-		
-		if(rng == null)	this.rng = new Random(1234);
-		else this.rng = rng;
-	
-		if(W == null) {
-			this.W = new double[n_out][n_in];
-			double a = 1.0 / this.n_in;
-			
-			for(int i=0; i<n_out; i++) {
-				for(int j=0; j<n_in; j++) {
-					this.W[i][j] = uniform(-a, a);
-				}
-			}
-		} else {
-			this.W = W;
-		}
-		
-		if(b == null) this.b = new double[n_out];
-		else this.b = b;
-	}
-	
-	public double output(int[] input, double[] w, double b) {
-		double linear_output = 0.0;
-		for(int j=0; j<n_in; j++) {
-			linear_output += w[j] * input[j];
-		}
-		linear_output += b;
-		return sigmoid(linear_output);
-	}
-	
-	public void sample_h_given_v(int[] input, int[] sample) {
-		for(int i=0; i<n_out; i++) {
-			sample[i] = binomial(1, output(input, W[i], b[i]));
-		}
-	}
-}
diff --git a/java/DBN/src/LogisticRegression.java b/java/DBN/src/LogisticRegression.java
deleted file mode 100644
index 09ba807..0000000
--- a/java/DBN/src/LogisticRegression.java
+++ /dev/null
@@ -1,73 +0,0 @@
-
-public class LogisticRegression {
-	public int N;
-	public int n_in;
-	public int n_out;
-	public double[][] W;
-	public double[] b;
-	
-	public LogisticRegression(int N, int n_in, int n_out) {
-		this.N = N;
-		this.n_in = n_in;
-		this.n_out = n_out;
-		
-		W = new double[this.n_out][this.n_in];
-		b = new double[this.n_out];
-	}
-	
-	public void train(int[] x, int[] y, double lr) {
-		double[] p_y_given_x = new double[n_out];
-		double[] dy = new double[n_out];
-		
-		for(int i=0; i<n_out; i++) {
-			p_y_given_x[i] = 0;
-			for(int j=0; j<n_in; j++) {
-				p_y_given_x[i] += W[i][j] * x[j];
-			}
-			p_y_given_x[i] += b[i];
-		}
-		softmax(p_y_given_x);
-		
-		for(int i=0; i<n_out; i++) {
-			dy[i] = y[i] - p_y_given_x[i];
-			
-			for(int j=0; j<n_in; j++) {
-				W[i][j] += lr * dy[i] * x[j] / N;
-			}
-			
-			b[i] += lr * dy[i] / N;
-		}
-	}
-	
-	public void softmax(double[] x) {
-		double max = 0.0;
-		double sum = 0.0;
-		
-		for(int i=0; i<n_out; i++) {
-			if(max < x[i]) {
-				max = x[i];
-			}
-		}
-		
-		for(int i=0; i<n_out; i++) {
-			x[i] = Math.exp(x[i] - max);
-			sum += x[i];
-		}
-		
-		for(int i=0; i<n_out; i++) {
-			x[i] /= sum;
-		}
-	}
-	
-	public void predict(int[] x, double[] y) {
-		for(int i=0; i<n_out; i++) {
-			y[i] = 0;
-			for(int j=0; j<n_in; j++) {
-				y[i] += W[i][j] * x[j];
-			}
-			y[i] += b[i];
-		}
-		
-		softmax(y);
-	}		
-}
diff --git a/java/DBN/src/RBM.java b/java/DBN/src/RBM.java
deleted file mode 100644
index ed1b4c5..0000000
--- a/java/DBN/src/RBM.java
+++ /dev/null
@@ -1,163 +0,0 @@
-import java.util.Random;
-
-public class RBM {
-	public int N;
-	public int n_visible;
-	public int n_hidden;
-	public double[][] W;
-	public double[] hbias;
-	public double[] vbias;
-	public Random rng;
-	
-	public double uniform(double min, double max) {
-		return rng.nextDouble() * (max - min) + min;
-	}
-	
-	public int binomial(int n, double p) {
-		if(p < 0 || p > 1) return 0;
-		
-		int c = 0;
-		double r;
-		
-		for(int i=0; i<n; i++) {
-			r = rng.nextDouble();
-			if (r < p) c++;
-		}
-		
-		return c;
-	}
-	
-	public static double sigmoid(double x) {
-		return 1.0 / (1.0 + Math.pow(Math.E, -x));
-	}
-	
-	
-	public RBM(int N, int n_visible, int n_hidden, 
-			double[][] W, double[] hbias, double[] vbias, Random rng) {
-		this.N = N;
-		this.n_visible = n_visible;
-		this.n_hidden = n_hidden;
-	
-		if(rng == null)	this.rng = new Random(1234);
-		else this.rng = rng;
-		
-		if(W == null) {
-			this.W = new double[this.n_hidden][this.n_visible];
-			double a = 1.0 / this.n_visible;
-			
-			for(int i=0; i<this.n_hidden; i++) {
-				for(int j=0; j<this.n_visible; j++) {
-					this.W[i][j] = uniform(-a, a); 
-				}
-			}	
-		} else {
-			this.W = W;
-		}
-		
-		if(hbias == null) {
-			this.hbias = new double[this.n_hidden];
-			for(int i=0; i<this.n_hidden; i++) this.hbias[i] = 0;
-		} else {
-			this.hbias = hbias;
-		}
-		
-		if(vbias == null) {
-			this.vbias = new double[this.n_visible];
-			for(int i=0; i<this.n_visible; i++) this.vbias[i] = 0;
-		} else {
-			this.vbias = vbias;
-		}
-	}
-	
-	
-	public void contrastive_divergence(int[] input, double lr, int k) {
-		double[] ph_mean = new double[n_hidden];
-		int[] ph_sample = new int[n_hidden];
-		double[] nv_means = new double[n_visible];
-		int[] nv_samples = new int[n_visible];
-		double[] nh_means = new double[n_hidden];
-		int[] nh_samples = new int[n_hidden];
-		
-		/* CD-k */
-		sample_h_given_v(input, ph_mean, ph_sample);
-		
-		for(int step=0; step<k; step++) {
-			if(step == 0) {
-				gibbs_hvh(ph_sample, nv_means, nv_samples, nh_means, nh_samples);
-			} else {
-				gibbs_hvh(nh_samples, nv_means, nv_samples, nh_means, nh_samples);
-			}
-		}
-		
-		for(int i=0; i<n_hidden; i++) {
-			for(int j=0; j<n_visible; j++) {
-				W[i][j] += lr *(ph_sample[i] * input[j] - nh_means[i] * nv_samples[j]) / N;
-			}
-			hbias[i] += lr * (ph_sample[i] - nh_means[i]) / N;
-		}
-		
-
-		for(int i=0; i<n_visible; i++) {
-			vbias[i] += lr * (input[i] - nv_samples[i]) / N;
-		}
-
-	}
-	
-	
-	public void sample_h_given_v(int[] v0_sample, double[] mean, int[] sample) {
-		for(int i=0; i<n_hidden; i++) {
-			mean[i] = propup(v0_sample, W[i], hbias[i]);
-			sample[i] = binomial(1, mean[i]);
-		}
-	}
-
-	public void sample_v_given_h(int[] h0_sample, double[] mean, int[] sample) {
-		for(int i=0; i<n_visible; i++) {
-			mean[i] = propdown(h0_sample, i, vbias[i]);
-			sample[i] = binomial(1, mean[i]);
-		}
-	}
-	
-	public double propup(int[] v, double[] w, double b) {
-		double pre_sigmoid_activation = 0.0;
-		for(int j=0; j<n_visible; j++) {
-			pre_sigmoid_activation += w[j] * v[j];
-		}
-		pre_sigmoid_activation += b;
-		return sigmoid(pre_sigmoid_activation);
-	}
-	
-	public double propdown(int[] h, int i, double b) {
-	  double pre_sigmoid_activation = 0.0;
-	  for(int j=0; j<n_hidden; j++) {
-	    pre_sigmoid_activation += W[j][i] * h[j];
-	  }
-	  pre_sigmoid_activation += b;
-	  return sigmoid(pre_sigmoid_activation);
-	}
-	
-	public void gibbs_hvh(int[] h0_sample, double[] nv_means, int[] nv_samples, double[] nh_means, int[] nh_samples) {
-	  sample_v_given_h(h0_sample, nv_means, nv_samples);
-	  sample_h_given_v(nv_samples, nh_means, nh_samples);
-	}
-
-
-	public void reconstruct(int[] v, double[] reconstructed_v) {
-	  double[] h = new double[n_hidden];
-	  double pre_sigmoid_activation;
-	
-	  for(int i=0; i<n_hidden; i++) {
-	    h[i] = propup(v, W[i], hbias[i]);
-	  }
-	
-	  for(int i=0; i<n_visible; i++) {
-	    pre_sigmoid_activation = 0.0;
-	    for(int j=0; j<n_hidden; j++) {
-	      pre_sigmoid_activation += W[j][i] * h[j];
-	    }
-	    pre_sigmoid_activation += vbias[i];
-	
-	    reconstructed_v[i] = sigmoid(pre_sigmoid_activation);
-	  }	
-	}	
-}
diff --git a/java/LogisticRegression/src/LogisticRegression.java b/java/LogisticRegression/src/LogisticRegression.java
deleted file mode 100644
index 8a13407..0000000
--- a/java/LogisticRegression/src/LogisticRegression.java
+++ /dev/null
@@ -1,128 +0,0 @@
-
-public class LogisticRegression {
-	public int N;
-	public int n_in;
-	public int n_out;
-	public double[][] W;
-	public double[] b;
-	
-	public LogisticRegression(int N, int n_in, int n_out) {
-		this.N = N;
-		this.n_in = n_in;
-		this.n_out = n_out;
-		
-		W = new double[this.n_out][this.n_in];
-		b = new double[this.n_out];
-	}
-	
-	public void train(int[] x, int[] y, double lr) {
-		double[] p_y_given_x = new double[n_out];
-		double[] dy = new double[n_out];
-		
-		for(int i=0; i<n_out; i++) {
-			p_y_given_x[i] = 0;
-			for(int j=0; j<n_in; j++) {
-				p_y_given_x[i] += W[i][j] * x[j];
-			}
-			p_y_given_x[i] += b[i];
-		}
-		softmax(p_y_given_x);
-		
-		for(int i=0; i<n_out; i++) {
-			dy[i] = y[i] - p_y_given_x[i];
-			
-			for(int j=0; j<n_in; j++) {
-				W[i][j] += lr * dy[i] * x[j] / N;
-			}
-			
-			b[i] += lr * dy[i] / N;
-		}
-	}
-	
-	public void softmax(double[] x) {
-		double max = 0.0;
-		double sum = 0.0;
-		
-		for(int i=0; i<n_out; i++) if(max < x[i]) max = x[i];
-		
-		for(int i=0; i<n_out; i++) {
-			x[i] = Math.exp(x[i] - max);
-			sum += x[i];
-		}
-		
-		for(int i=0; i<n_out; i++) x[i] /= sum;
-	}
-	
-	public void predict(int[] x, double[] y) {
-		for(int i=0; i<n_out; i++) {
-			y[i] = 0;
-			for(int j=0; j<n_in; j++) {
-				y[i] += W[i][j] * x[j];
-			}
-			y[i] += b[i];
-		}
-		
-		softmax(y);
-	}	
-	
-	private static void test_lr() {
-		double learning_rate = 0.1;
-		double n_epochs = 500;
-		
-		int train_N = 6;
-		int test_N = 2;
-		int n_in = 6;
-		int n_out = 2;
-		
-		int[][] train_X = {
-			{1, 1, 1, 0, 0, 0},
-			{1, 0, 1, 0, 0, 0},
-			{1, 1, 1, 0, 0, 0},
-			{0, 0, 1, 1, 1, 0},
-			{0, 0, 1, 1, 0, 0},
-			{0, 0, 1, 1, 1, 0}
-		};
-		
-		int[][] train_Y = {
-			{1, 0},
-			{1, 0},
-			{1, 0},
-			{0, 1},
-			{0, 1},
-			{0, 1}
-		}; 
-		
-		// construct
-		LogisticRegression classifier = new LogisticRegression(train_N, n_in, n_out);
-		
-		// train 
-		for(int epoch=0; epoch<n_epochs; epoch++) {
-			for(int i=0; i<train_N; i++) {
-				classifier.train(train_X[i], train_Y[i], learning_rate);
-			}
-			//learning_rate *= 0.95;
-		}
-		
-		// test data
-		int[][] test_X = {
-			{1, 0, 1, 0, 0, 0},
-			{0, 0, 1, 1, 1, 0}
-		};
-		
-		double[][] test_Y = new double[test_N][n_in];
-		
-		
-		// test
-		for(int i=0; i<test_N; i++) {
-			classifier.predict(test_X[i], test_Y[i]);
-			for(int j=0; j<n_out; j++) {
-				System.out.print(test_Y[i][j] + " ");
-			}
-			System.out.println();
-		}
-	}
-	
-	public static void main(String[] args) {
-		test_lr();
-	}
-}
diff --git a/java/RBM/src/RBM.java b/java/RBM/src/RBM.java
deleted file mode 100644
index 3100dd0..0000000
--- a/java/RBM/src/RBM.java
+++ /dev/null
@@ -1,220 +0,0 @@
-import java.util.Random;
-
-public class RBM {
-	public int N;
-	public int n_visible;
-	public int n_hidden;
-	public double[][] W;
-	public double[] hbias;
-	public double[] vbias;
-	public Random rng;
-	
-	public double uniform(double min, double max) {
-		return rng.nextDouble() * (max - min) + min;
-	}
-	
-	public int binomial(int n, double p) {
-		if(p < 0 || p > 1) return 0;
-		
-		int c = 0;
-		double r;
-		
-		for(int i=0; i<n; i++) {
-			r = rng.nextDouble();
-			if (r < p) c++;
-		}
-		
-		return c;
-	}
-	
-	public static double sigmoid(double x) {
-		return 1.0 / (1.0 + Math.pow(Math.E, -x));
-	}
-	
-	
-	public RBM(int N, int n_visible, int n_hidden, 
-			double[][] W, double[] hbias, double[] vbias, Random rng) {
-		this.N = N;
-		this.n_visible = n_visible;
-		this.n_hidden = n_hidden;
-	
-		if(rng == null)	this.rng = new Random(1234);
-		else this.rng = rng;
-		
-		if(W == null) {
-			this.W = new double[this.n_hidden][this.n_visible];
-			double a = 1.0 / this.n_visible;
-			
-			for(int i=0; i<this.n_hidden; i++) {
-				for(int j=0; j<this.n_visible; j++) {
-					this.W[i][j] = uniform(-a, a); 
-				}
-			}	
-		} else {
-			this.W = W;
-		}
-		
-		if(hbias == null) {
-			this.hbias = new double[this.n_hidden];
-			for(int i=0; i<this.n_hidden; i++) this.hbias[i] = 0;
-		} else {
-			this.hbias = hbias;
-		}
-		
-		if(vbias == null) {
-			this.vbias = new double[this.n_visible];
-			for(int i=0; i<this.n_visible; i++) this.vbias[i] = 0;
-		} else {
-			this.vbias = vbias;
-		}
-	}
-	
-	
-	public void contrastive_divergence(int[] input, double lr, int k) {
-		double[] ph_mean = new double[n_hidden];
-		int[] ph_sample = new int[n_hidden];
-		double[] nv_means = new double[n_visible];
-		int[] nv_samples = new int[n_visible];
-		double[] nh_means = new double[n_hidden];
-		int[] nh_samples = new int[n_hidden];
-		
-		/* CD-k */
-		sample_h_given_v(input, ph_mean, ph_sample);
-		
-		for(int step=0; step<k; step++) {
-			if(step == 0) {
-				gibbs_hvh(ph_sample, nv_means, nv_samples, nh_means, nh_samples);
-			} else {
-				gibbs_hvh(nh_samples, nv_means, nv_samples, nh_means, nh_samples);
-			}
-		}
-		
-		for(int i=0; i<n_hidden; i++) {
-			for(int j=0; j<n_visible; j++) {
-				W[i][j] += lr * (ph_sample[i] * input[j] - nh_means[i] * nv_samples[j]) / N;
-			}
-			hbias[i] += lr * (ph_sample[i] - nh_means[i]) / N;
-		}
-		
-
-		for(int i=0; i<n_visible; i++) {
-			vbias[i] += lr * (input[i] - nv_samples[i]) / N;
-		}
-
-	}
-	
-	
-	public void sample_h_given_v(int[] v0_sample, double[] mean, int[] sample) {
-		for(int i=0; i<n_hidden; i++) {
-			mean[i] = propup(v0_sample, W[i], hbias[i]);
-			sample[i] = binomial(1, mean[i]);
-		}
-	}
-
-	public void sample_v_given_h(int[] h0_sample, double[] mean, int[] sample) {
-		for(int i=0; i<n_visible; i++) {
-			mean[i] = propdown(h0_sample, i, vbias[i]);
-			sample[i] = binomial(1, mean[i]);
-		}
-	}
-	
-	public double propup(int[] v, double[] w, double b) {
-		double pre_sigmoid_activation = 0.0;
-		for(int j=0; j<n_visible; j++) {
-			pre_sigmoid_activation += w[j] * v[j];
-		}
-		pre_sigmoid_activation += b;
-		return sigmoid(pre_sigmoid_activation);
-	}
-	
-	public double propdown(int[] h, int i, double b) {
-	  double pre_sigmoid_activation = 0.0;
-	  for(int j=0; j<n_hidden; j++) {
-	    pre_sigmoid_activation += W[j][i] * h[j];
-	  }
-	  pre_sigmoid_activation += b;
-	  return sigmoid(pre_sigmoid_activation);
-	}
-	
-	public void gibbs_hvh(int[] h0_sample, double[] nv_means, int[] nv_samples, double[] nh_means, int[] nh_samples) {
-	  sample_v_given_h(h0_sample, nv_means, nv_samples);
-	  sample_h_given_v(nv_samples, nh_means, nh_samples);
-	}
-
-
-	public void reconstruct(int[] v, double[] reconstructed_v) {
-	  double[] h = new double[n_hidden];
-	  double pre_sigmoid_activation;
-	
-	  for(int i=0; i<n_hidden; i++) {
-	    h[i] = propup(v, W[i], hbias[i]);
-	  }
-	
-	  for(int i=0; i<n_visible; i++) {
-	    pre_sigmoid_activation = 0.0;
-	    for(int j=0; j<n_hidden; j++) {
-	      pre_sigmoid_activation += W[j][i] * h[j];
-	    }
-	    pre_sigmoid_activation += vbias[i];
-	
-	    reconstructed_v[i] = sigmoid(pre_sigmoid_activation);
-	  }	
-	}
-
-	
-	
-	private static void test_rbm() {
-		Random rng = new Random(123);
-
-		double learning_rate = 0.1;
-		int training_epochs = 1000;
-		int k = 1;
-		  
-		int train_N = 6;
-		int test_N = 2;
-		int n_visible = 6;
-		int n_hidden = 3;
-
-		// training data
-		int[][] train_X = {
-			{1, 1, 1, 0, 0, 0},
-			{1, 0, 1, 0, 0, 0},
-			{1, 1, 1, 0, 0, 0},
-			{0, 0, 1, 1, 1, 0},
-			{0, 0, 1, 0, 1, 0},
-			{0, 0, 1, 1, 1, 0}
-		};
-
-		
-		
-		RBM rbm = new RBM(train_N, n_visible, n_hidden, null, null, null, rng);
-
-		// train
-		for(int epoch=0; epoch<training_epochs; epoch++) {
-			for(int i=0; i<train_N; i++) {
-				rbm.contrastive_divergence(train_X[i], learning_rate, k);
-			}
-		}
-
-		// test data
-		int[][] test_X = {
-			{1, 1, 0, 0, 0, 0},
-			{0, 0, 0, 1, 1, 0}
-		};
-		
-		double[][] reconstructed_X = new double[test_N][n_visible];
-
-		for(int i=0; i<test_N; i++) {
-			rbm.reconstruct(test_X[i], reconstructed_X[i]);
-			for(int j=0; j<n_visible; j++) {
-				System.out.printf("%.5f ", reconstructed_X[i][j]);
-			}
-			System.out.println();
-		}
-	}
-	
-	public static void main(String[] args) {
-		test_rbm();
-	}
-	
-}
diff --git a/java/SdA/src/HiddenLayer.java b/java/SdA/src/HiddenLayer.java
deleted file mode 100644
index 588eea8..0000000
--- a/java/SdA/src/HiddenLayer.java
+++ /dev/null
@@ -1,74 +0,0 @@
-import java.util.Random;
-
-public class HiddenLayer {
-	public int N;
-	public int n_in;
-	public int n_out;
-	public double[][] W;
-	public double[] b;
-	public Random rng;
-	
-	public double uniform(double min, double max) {
-		return rng.nextDouble() * (max - min) + min;
-	}
-	
-	public int binomial(int n, double p) {
-		if(p < 0 || p > 1) return 0;
-		
-		int c = 0;
-		double r;
-		
-		for(int i=0; i<n; i++) {
-			r = rng.nextDouble();
-			if (r < p) c++;
-		}
-		
-		return c;
-	}
-	
-	public static double sigmoid(double x) {
-		return 1.0 / (1.0 + Math.pow(Math.E, -x));
-	}
-	
-	
-	
-	public HiddenLayer(int N, int n_in, int n_out, double[][] W, double[] b, Random rng) {
-		this.N = N;
-		this.n_in = n_in;
-		this.n_out = n_out;
-		
-		if(rng == null)	this.rng = new Random(1234);
-		else this.rng = rng;
-	
-		if(W == null) {
-			this.W = new double[n_out][n_in];
-			double a = 1.0 / this.n_in;
-			
-			for(int i=0; i<n_out; i++) {
-				for(int j=0; j<n_in; j++) {
-					this.W[i][j] = uniform(-a, a);
-				}
-			}
-		} else {
-			this.W = W;
-		}
-		
-		if(b == null) this.b = new double[n_out];
-		else this.b = b;
-	}
-	
-	public double output(int[] input, double[] w, double b) {
-		double linear_output = 0.0;
-		for(int j=0; j<n_in; j++) {
-			linear_output += w[j] * input[j];
-		}
-		linear_output += b;
-		return sigmoid(linear_output);
-	}
-	
-	public void sample_h_given_v(int[] input, int[] sample) {
-		for(int i=0; i<n_out; i++) {
-			sample[i] = binomial(1, output(input, W[i], b[i]));
-		}
-	}
-}
diff --git a/java/SdA/src/LogisticRegression.java b/java/SdA/src/LogisticRegression.java
deleted file mode 100644
index 09ba807..0000000
--- a/java/SdA/src/LogisticRegression.java
+++ /dev/null
@@ -1,73 +0,0 @@
-
-public class LogisticRegression {
-	public int N;
-	public int n_in;
-	public int n_out;
-	public double[][] W;
-	public double[] b;
-	
-	public LogisticRegression(int N, int n_in, int n_out) {
-		this.N = N;
-		this.n_in = n_in;
-		this.n_out = n_out;
-		
-		W = new double[this.n_out][this.n_in];
-		b = new double[this.n_out];
-	}
-	
-	public void train(int[] x, int[] y, double lr) {
-		double[] p_y_given_x = new double[n_out];
-		double[] dy = new double[n_out];
-		
-		for(int i=0; i<n_out; i++) {
-			p_y_given_x[i] = 0;
-			for(int j=0; j<n_in; j++) {
-				p_y_given_x[i] += W[i][j] * x[j];
-			}
-			p_y_given_x[i] += b[i];
-		}
-		softmax(p_y_given_x);
-		
-		for(int i=0; i<n_out; i++) {
-			dy[i] = y[i] - p_y_given_x[i];
-			
-			for(int j=0; j<n_in; j++) {
-				W[i][j] += lr * dy[i] * x[j] / N;
-			}
-			
-			b[i] += lr * dy[i] / N;
-		}
-	}
-	
-	public void softmax(double[] x) {
-		double max = 0.0;
-		double sum = 0.0;
-		
-		for(int i=0; i<n_out; i++) {
-			if(max < x[i]) {
-				max = x[i];
-			}
-		}
-		
-		for(int i=0; i<n_out; i++) {
-			x[i] = Math.exp(x[i] - max);
-			sum += x[i];
-		}
-		
-		for(int i=0; i<n_out; i++) {
-			x[i] /= sum;
-		}
-	}
-	
-	public void predict(int[] x, double[] y) {
-		for(int i=0; i<n_out; i++) {
-			y[i] = 0;
-			for(int j=0; j<n_in; j++) {
-				y[i] += W[i][j] * x[j];
-			}
-			y[i] += b[i];
-		}
-		
-		softmax(y);
-	}		
-}
diff --git a/java/SdA/src/SdA.java b/java/SdA/src/SdA.java
deleted file mode 100644
index 4c7a749..0000000
--- a/java/SdA/src/SdA.java
+++ /dev/null
@@ -1,229 +0,0 @@
-import java.util.Random;
-
-public class SdA {
-	public int N;
-	public int n_ins;
-	public int[] hidden_layer_sizes;
-	public int n_outs;
-	public int n_layers;
-	public HiddenLayer[] sigmoid_layers;
-	public dA[] dA_layers;
-	public LogisticRegression log_layer;
-	public Random rng;
-
-	public static double sigmoid(double x) {
-		return 1.0 / (1.0 + Math.pow(Math.E, -x));
-	}
-	
-	public SdA(int N, int n_ins, int[] hidden_layer_sizes, int n_outs, int n_layers, Random rng) {
-		int input_size;
-		
-		this.N = N;
-		this.n_ins = n_ins;
-		this.hidden_layer_sizes = hidden_layer_sizes;
-		this.n_outs = n_outs;
-		this.n_layers = n_layers;
-		
-		this.sigmoid_layers = new HiddenLayer[n_layers];
-		this.dA_layers = new dA[n_layers];
-
-		if(rng == null)	this.rng = new Random(1234);
-		else this.rng = rng;		
-		
-		// construct multi-layer
-		for(int i=0; i<this.n_layers; i++) {
-			if(i == 0) {
-				input_size = this.n_ins;
-			} else {
-				input_size = this.hidden_layer_sizes[i-1];
-			}
-			
-			// construct sigmoid_layer
-			this.sigmoid_layers[i] = new HiddenLayer(this.N, input_size, this.hidden_layer_sizes[i], null, null, rng);
-			
-			// construct dA_layer
-			this.dA_layers[i] = new dA(this.N, input_size, this.hidden_layer_sizes[i], this.sigmoid_layers[i].W, this.sigmoid_layers[i].b, null, rng);
-		}
-		
-		// layer for output using LogisticRegression
-		this.log_layer = new LogisticRegression(this.N, this.hidden_layer_sizes[this.n_layers-1], this.n_outs);
-	}
-	
-	public void pretrain(int[][] train_X, double lr, double corruption_level, int epochs) {
-		int[] layer_input = new int[0];
-		int prev_layer_input_size;
-		int[] prev_layer_input;
-				
-		for(int i=0; i<n_layers; i++) {  // layer-wise			
-			for(int epoch=0; epoch<epochs; epoch++) {  // training epochs
-				for(int n=0; n<N; n++) {  // input x1...xN
-					// layer input
-					for(int l=0; l<=i; l++) {
-						
-						if(l == 0) {
-							layer_input = new int[n_ins];
-							for(int j=0; j<n_ins; j++) layer_input[j] = train_X[n][j];
-						} else {
-							if(l == 1) prev_layer_input_size = n_ins;
-							else prev_layer_input_size = hidden_layer_sizes[l-2];
-							
-							prev_layer_input = new int[prev_layer_input_size];
-							for(int j=0; j<prev_layer_input_size; j++) prev_layer_input[j] = layer_input[j];
-							
-							layer_input = new int[hidden_layer_sizes[l-1]];
-							
-							sigmoid_layers[l-1].sample_h_given_v(prev_layer_input, layer_input);
-						}
-					}
-					
-					dA_layers[i].train(layer_input, lr, corruption_level);
-				}
-			}
-		}
-	}
-		
-	public void finetune(int[][] train_X, int[][] train_Y, double lr, int epochs) {
-		int[] layer_input = new int[0];
-		// int prev_layer_input_size;
-		int[] prev_layer_input = new int[0];
-		
-		for(int epoch=0; epoch<epochs; epoch++) {
-			for(int n=0; n<N; n++) {
-				
-				// layer input
-				for(int i=0; i<n_layers; i++) {
-					if(i == 0) {
-						prev_layer_input = new int[n_ins];
-						for(int j=0; j<n_ins; j++) prev_layer_input[j] = train_X[n][j];
-					} else {
-						prev_layer_input = new int[hidden_layer_sizes[i-1]];
-						for(int j=0; j<hidden_layer_sizes[i-1]; j++) prev_layer_input[j] = layer_input[j];
-					}
-					
-					layer_input = new int[hidden_layer_sizes[i]];
-					sigmoid_layers[i].sample_h_given_v(prev_layer_input, layer_input);
-				}
-				
-				log_layer.train(layer_input, train_Y[n], lr);
-			}
-			// lr *= 0.95;
-		}
-	}
-	
-	public void predict(int[] x, double[] y) {
-		double[] layer_input = new double[0];
-		// int prev_layer_input_size;
-		double[] prev_layer_input = new double[n_ins];
-		for(int j=0; j<n_ins; j++) prev_layer_input[j] = x[j];
-	
-		double linear_output;
-		
-		
-		// layer activation
-		for(int i=0; i<n_layers; i++) {
-			layer_input = new double[sigmoid_layers[i].n_out];
-			
-			linear_output = 0.0;
-			for(int k=0; k<sigmoid_layers[i].n_out; k++) {
-				for(int j=0; j<sigmoid_layers[i].n_in; j++) {
-					linear_output += sigmoid_layers[i].W[k][j] * prev_layer_input[j];
-				}
-				linear_output += sigmoid_layers[i].b[k];
-				layer_input[k] = sigmoid(linear_output);
-			}
-			
-			if(i < n_layers-1) {
-				prev_layer_input = new double[sigmoid_layers[i].n_out];
-				for(int j=0; j<sigmoid_layers[i].n_out; j++) prev_layer_input[j] = layer_input[j];
-			}
-		}
-		
-		for(int i=0; i<log_layer.n_out; i++) {
-			y[i] = 0;
-			for(int j=0; j<log_layer.n_in; j++) {
-				y[i] += log_layer.W[i][j] * layer_input[j];
-			}
-			y[i] += log_layer.b[i];
-		}
-		
-		log_layer.softmax(y);
-	}
-	
-
-	private static void test_sda() {
-		Random rng = new Random(123);
-		
-		double pretrain_lr = 0.1;
-		double corruption_level = 0.3;
-		int pretraining_epochs = 1000;
-		double finetune_lr = 0.1;
-		int finetune_epochs = 500;
-
-		int train_N = 10;
-		int test_N = 4;
-		int n_ins = 28;
-		int n_outs = 2;
-		int[] hidden_layer_sizes = {15, 15};
-		int n_layers = hidden_layer_sizes.length;
-		
-		// training data
-		int[][] train_X = {
-			{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
-			{0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
-			{1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
-			{0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
-			{1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
-			{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
-			{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1},
-			{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1},
-			{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1},
-			{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1}
-		};
-
-		int[][] train_Y = {
-			{1, 0},
-			{1, 0},
-			{1, 0},
-			{1, 0},
-			{1, 0},
-			{0, 1},
-			{0, 1},
-			{0, 1},
-			{0, 1},
-			{0, 1}
-		};
-		
-		// construct SdA
-		SdA sda = new SdA(train_N, n_ins, hidden_layer_sizes, n_outs, n_layers, rng);
-		
-		// pretrain
-		sda.pretrain(train_X, pretrain_lr, corruption_level, pretraining_epochs);
-		
-		// finetune
-		sda.finetune(train_X, train_Y, finetune_lr, finetune_epochs);
-		
-
-		// test data
-		int[][] test_X = {
-			{1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
-			{1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
-			{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1},
-			{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1}
-		};
-		
-		double[][] test_Y = new double[test_N][n_outs];
-		
-		// test
-		for(int i=0; i<test_N; i++) {
-			sda.predict(test_X[i], test_Y[i]);
-			for(int j=0; j<n_outs; j++) {
-				System.out.print(test_Y[i][j] + " ");
-			}
-			System.out.println();
-		}
-	}
-	
-	public static void main(String[] args) {
-		test_sda();
-	}
-}
diff --git a/java/SdA/src/dA.java b/java/SdA/src/dA.java
deleted file mode 100644
index 3484350..0000000
--- a/java/SdA/src/dA.java
+++ /dev/null
@@ -1,150 +0,0 @@
-import java.util.Random;
-
-public class dA {
-	public int N;
-	public int n_visible;
-	public int n_hidden;
-	public double[][] W;
-	public double[] hbias;
-	public double[] vbias;
-	public Random rng;
-	
-	
-	public double uniform(double min, double max) {
-		return rng.nextDouble() * (max - min) + min;
-	}
-	
-	public int binomial(int n, double p) {
-		if(p < 0 || p > 1) return 0;
-		
-		int c = 0;
-		double r;
-		
-		for(int i=0; i<n; i++) {
-			r = rng.nextDouble();
-			if (r < p) c++;
-		}
-		
-		return c;
-	}
-	
-	public static double sigmoid(double x) {
-		return 1.0 / (1.0 + Math.pow(Math.E, -x));
-	}
-
-	public dA(int N, int n_visible, int n_hidden, 
-			double[][] W, double[] hbias, double[] vbias, Random rng) {
-		this.N = N;
-		this.n_visible = n_visible;
-		this.n_hidden = n_hidden;
-
-		if(rng == null)	this.rng = new Random(1234);
-		else this.rng = rng;
-				
-		if(W == null) {
-			this.W = new double[this.n_hidden][this.n_visible];
-			double a = 1.0 / this.n_visible;
-			
-			for(int i=0; i<this.n_hidden; i++) {
-				for(int j=0; j<this.n_visible; j++) {
-					this.W[i][j] = uniform(-a, a); 
-				}
-			}	
-		} else {
-			this.W = W;
-		}
-		
-		if(hbias == null) {
-			this.hbias = new double[this.n_hidden];
-			for(int i=0; i<this.n_hidden; i++) this.hbias[i] = 0;
-		} else {
-			this.hbias = hbias;
-		}
-		
-		if(vbias == null) {
-			this.vbias = new double[this.n_visible];
-			for(int i=0; i<this.n_visible; i++) this.vbias[i] = 0;
-		} else {
-			this.vbias = vbias;
-		}	
-	}
-	
-	public void get_corrupted_input(int[] x, int[] tilde_x, double p) {
-		for(int i=0; i<n_visible; i++) {
-			if(x[i] == 0) {
-				tilde_x[i] = 0;
-			} else {
-				tilde_x[i] = binomial(1, p);
-			}
-		}
-	}
-	
-	// Encode
-	public void get_hidden_values(int[] x, double[] y) {
-		for(int i=0; i<n_hidden; i++) {
-			y[i] = 0;
-			for(int j=0; j<n_visible; j++) {
-				y[i] += W[i][j] * x[j];
-			}
-			y[i] += hbias[i];
-			y[i] = sigmoid(y[i]);
-		}
-	}
-	
-	// Decode
-	public void get_reconstructed_input(double[] y, double[] z) {
-		for(int i=0; i<n_visible; i++) {
-			z[i] = 0;
-			for(int j=0; j<n_hidden; j++) {
-				z[i] += W[j][i] * y[j];
-			}
-			z[i] += vbias[i];
-			z[i] = sigmoid(z[i]);
-		}
-	}
-	
-	public void train(int[] x, double lr, double corruption_level) {
-		int[] tilde_x = new int[n_visible];
-		double[] y = new double[n_hidden];
-		double[] z = new double[n_visible];
-		
-		double[] L_vbias = new double[n_visible];
-		double[] L_hbias = new double[n_hidden];
-		
-		double p = 1 - corruption_level;
-		
-		get_corrupted_input(x, tilde_x, p);
-		get_hidden_values(tilde_x, y);
-		get_reconstructed_input(y, z);
-		
-		// vbias
-		for(int i=0; i<n_visible; i++) {
-			L_vbias[i] = x[i] - z[i];
-			vbias[i] += lr * L_vbias[i] / N;
-		}
-		
-		// hbias
-		for(int i=0; i<n_hidden; i++) {
-			L_hbias[i] = 0;
-			for(int j=0; j<n_visible; j++) {
-				L_hbias[i] += W[i][j] * L_vbias[j];
-			}
-			L_hbias[i] *= y[i] * (1 - y[i]);
-			hbias[i] += lr * L_hbias[i] / N;
-		}
-		
-		// W
-		for(int i=0; i<n_hidden; i++) {
-			for(int j=0; j<n_visible; j++) {
-				W[i][j] += lr * (L_hbias[i] * tilde_x[j] + L_vbias[j] * y[i]) / N;
-			}
-		}
-	}
-	
-	public void reconstruct(int[] x, double[] z) {
-		double[] y = new double[n_hidden];
-		
-		get_hidden_values(x, y);
-		get_reconstructed_input(y, z);
-	}	
-}
diff --git a/java/dA/src/dA.java b/java/dA/src/dA.java
deleted file mode 100644
index 7429724..0000000
--- a/java/dA/src/dA.java
+++ /dev/null
@@ -1,207 +0,0 @@
-import java.util.Random;
-
-public class dA {
-	public int N;
-	public int n_visible;
-	public int n_hidden;
-	public double[][] W;
-	public double[] hbias;
-	public double[] vbias;
-	public Random rng;
-	
-	
-	public double uniform(double min, double max) {
-		return rng.nextDouble() * (max - min) + min;
-	}
-	
-	public int binomial(int n, double p) {
-		if(p < 0 || p > 1) return 0;
-		
-		int c = 0;
-		double r;
-		
-		for(int i=0; i<n; i++) {
-			r = rng.nextDouble();
-			if (r < p) c++;
-		}
-		
-		return c;
-	}
-	
-	public static double sigmoid(double x) {
-		return 1.0 / (1.0 + Math.pow(Math.E, -x));
-	}
-
-	public dA(int N, int n_visible, int n_hidden, 
-			double[][] W, double[] hbias, double[] vbias, Random rng) {
-		this.N = N;
-		this.n_visible = n_visible;
-		this.n_hidden = n_hidden;
-
-		if(rng == null)	this.rng = new Random(1234);
-		else this.rng = rng;
-				
-		if(W == null) {
-			this.W = new double[this.n_hidden][this.n_visible];
-			double a = 1.0 / this.n_visible;
-			
-			for(int i=0; i<this.n_hidden; i++) {
-				for(int j=0; j<this.n_visible; j++) {
-					this.W[i][j] = uniform(-a, a); 
-				}
-			}	
-		} else {
-			this.W = W;
-		}
-		
-		if(hbias == null) {
-			this.hbias = new double[this.n_hidden];
-			for(int i=0; i<this.n_hidden; i++) this.hbias[i] = 0;
-		} else {
-			this.hbias = hbias;
-		}
-		
-		if(vbias == null) {
-			this.vbias = new double[this.n_visible];
-			for(int i=0; i<this.n_visible; i++) this.vbias[i] = 0;
-		} else {
-			this.vbias = vbias;
-		}	
-	}
-	
-	public void get_corrupted_input(int[] x, int[] tilde_x, double p) {
-		for(int i=0; i<n_visible; i++) {
-			if(x[i] == 0) {
-				tilde_x[i] = 0;
-			} else {
-				tilde_x[i] = binomial(1, p);
-			}
-		}
-	}
-	
-	// Encode
-	public void get_hidden_values(int[] x, double[] y) {
-		for(int i=0; i<n_hidden; i++) {
-			y[i] = 0;
-			for(int j=0; j<n_visible; j++) {
-				y[i] += W[i][j] * x[j];
-			}
-			y[i] += hbias[i];
-			y[i] = sigmoid(y[i]);
-		}
-	}
-	
-	// Decode
-	public void get_reconstructed_input(double[] y, double[] z) {
-		for(int i=0; i<n_visible; i++) {
-			z[i] = 0;
-			for(int j=0; j<n_hidden; j++) {
-				z[i] += W[j][i] * y[j];
-			}
-			z[i] += vbias[i];
-			z[i] = sigmoid(z[i]);
-		}
-	}
-	
-	public void train(int[] x, double lr, double corruption_level) {
-		int[] tilde_x = new int[n_visible];
-		double[] y = new double[n_hidden];
-		double[] z = new double[n_visible];
-		
-		double[] L_vbias = new double[n_visible];
-		double[] L_hbias = new double[n_hidden];
-		
-		double p = 1 - corruption_level;
-		
-		get_corrupted_input(x, tilde_x, p);
-		get_hidden_values(tilde_x, y);
-		get_reconstructed_input(y, z);
-		
-		// vbias
-		for(int i=0; i<n_visible; i++) {
-			L_vbias[i] = x[i] - z[i];
-			vbias[i] += lr * L_vbias[i] / N;
-		}
-		
-		// hbias
-		for(int i=0; i<n_hidden; i++) {
-			L_hbias[i] = 0;
-			for(int j=0; j<n_visible; j++) {
-				L_hbias[i] += W[i][j] * L_vbias[j];
-			}
-			L_hbias[i] *= y[i] * (1 - y[i]);
-			hbias[i] += lr * L_hbias[i] / N;
-		}
-		
-		// W
-		for(int i=0; i<n_hidden; i++) {
-			for(int j=0; j<n_visible; j++) {
-				W[i][j] += lr * (L_hbias[i] * tilde_x[j] + L_vbias[j] * y[i]) / N;
-			}
-		}
-	}
-	
-	public void reconstruct(int[] x, double[] z) {
-		double[] y = new double[n_hidden];
-		
-		get_hidden_values(x, y);
-		get_reconstructed_input(y, z);
-	}
-	
-	
-	private static void test_dA() {
-		Random rng = new Random(123);
-		
-		double learning_rate = 0.1;
-		double corruption_level = 0.3;
-		int training_epochs = 100;
-		
-		int train_N = 10;
-		int test_N = 2;
-		int n_visible = 20;
-		int n_hidden = 5;
-		
-		int[][] train_X = {
-			{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
-			{1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
-			{1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
-			{1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
-			{0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
-			{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
-			{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1},
-			{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1},
-			{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1},
-			{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0}		
-		};
-		
-		dA da = new dA(train_N, n_visible, n_hidden, null, null, null, rng);
-		
-		// train
-		for(int epoch=0; epoch<training_epochs; epoch++) {
-			for(int i=0; i<train_N; i++) {
-				da.train(train_X[i], learning_rate, corruption_level);
-			}
-		}
-		
-		// test data
-		int[][] test_X = {
-			{1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
-			{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0}				
-		};
-		
-		double[][] reconstructed_X = new double[test_N][n_visible];
-		
-		// test
-		for(int i=0; i<test_N; i++) {
-			da.reconstruct(test_X[i], reconstructed_X[i]);
-			for(int j=0; j<n_visible; j++) {
-				System.out.printf("%.5f ", reconstructed_X[i][j]);
-			}
-			System.out.println();
-		}
-	}
-	
-	public static void main(String[] args) {
-		test_dA();
-	}
-}
diff --git a/java/src/DeepLearning/DBN.java b/java/src/DeepLearning/DBN.java
new file mode 100644
index 0000000..e070faf
--- /dev/null
+++ b/java/src/DeepLearning/DBN.java
@@ -0,0 +1,222 @@
+package DeepLearning;
+
+import java.util.Random;
+import static DeepLearning.utils.*;
+
+public class DBN {
+    public int N;
+    public int n_ins;
+    public int[] hidden_layer_sizes;
+    public int n_outs;
+    public int n_layers;
+    public HiddenLayerDiscrete[] sigmoid_layers;
+    public RBM[] rbm_layers;
+    public LogisticRegressionDiscrete log_layer;
+    public Random rng;
+
+
+    public DBN(int N, int n_ins, int[] hidden_layer_sizes, int n_outs, int n_layers, Random rng) {
+        int input_size;
+
+        this.N = N;
+        this.n_ins = n_ins;
+        this.hidden_layer_sizes = hidden_layer_sizes;
+        this.n_outs = n_outs;
+        this.n_layers = n_layers;
+
+        this.sigmoid_layers = new HiddenLayerDiscrete[n_layers];
+        this.rbm_layers = new RBM[n_layers];
+
+        if(rng == null)	this.rng = new Random(1234);
+        else this.rng = rng;
+
+        // construct multi-layer
+        for(int i=0; i<this.n_layers; i++) {
+            if(i == 0) {
+                input_size = this.n_ins;
+            } else {
+                input_size = this.hidden_layer_sizes[i-1];
+            }
+
+            // construct sigmoid_layer
+            this.sigmoid_layers[i] = new HiddenLayerDiscrete(this.N, input_size, this.hidden_layer_sizes[i], null, null, rng);
+
+            // construct rbm_layer
+            this.rbm_layers[i] = new RBM(this.N, input_size, this.hidden_layer_sizes[i], this.sigmoid_layers[i].W, this.sigmoid_layers[i].b, null, rng);
+        }
+
+        // layer for output using Logistic Regression
+        this.log_layer = new LogisticRegressionDiscrete(this.N, this.hidden_layer_sizes[this.n_layers-1], this.n_outs);
+    }
+
+    public void pretrain(int[][] train_X, double lr, int k, int epochs) {
+        int[] layer_input = new int[0];
+        int prev_layer_input_size;
+        int[] prev_layer_input;
+
+        for(int i=0; i<n_layers; i++) {  // layer-wise
+            for(int epoch=0; epoch<epochs; epoch++) {  // training epochs
+                for(int n=0; n<N; n++) {  // input x1...xN
+                    // layer input
+                    for(int l=0; l<=i; l++) {
+
+                        if(l == 0) {
+                            layer_input = new int[n_ins];
+                            for(int j=0; j<n_ins; j++) layer_input[j] = train_X[n][j];
+                        } else {
+                            if(l == 1) prev_layer_input_size = n_ins;
+                            else prev_layer_input_size = hidden_layer_sizes[l-2];
+
+                            prev_layer_input = new int[prev_layer_input_size];
+                            for(int j=0; j<prev_layer_input_size; j++) prev_layer_input[j] = layer_input[j];
+
+                            layer_input = new int[hidden_layer_sizes[l-1]];
+
+                            sigmoid_layers[l-1].sample_h_given_v(prev_layer_input, layer_input);
+                        }
+                    }
+
+                    rbm_layers[i].contrastive_divergence(layer_input, lr, k);
+                }
+            }
+        }
+    }
+
+    public void finetune(int[][] train_X, int[][] train_Y, double lr, int epochs) {
+        int[] layer_input = new int[0];
+        // int prev_layer_input_size;
+        int[] prev_layer_input = new int[0];
+
+        for(int epoch=0; epoch<epochs; epoch++) {
+            for(int n=0; n<N; n++) {
+
+                // layer input
+                for(int i=0; i<n_layers; i++) {
+                    if(i == 0) {
+                        prev_layer_input = new int[n_ins];
+                        for(int j=0; j<n_ins; j++) prev_layer_input[j] = train_X[n][j];
+                    } else {
+                        prev_layer_input = new int[hidden_layer_sizes[i-1]];
+                        for(int j=0; j<hidden_layer_sizes[i-1]; j++) prev_layer_input[j] = layer_input[j];
+                    }
+
+                    layer_input = new int[hidden_layer_sizes[i]];
+                    sigmoid_layers[i].sample_h_given_v(prev_layer_input, layer_input);
+                }
+
+                log_layer.train(layer_input, train_Y[n], lr);
+            }
+            // lr *= 0.95;
+        }
+    }
+
+    public void predict(int[] x, double[] y) {
+        double[] layer_input = new double[0];
+        // int prev_layer_input_size;
+        double[] prev_layer_input = new double[n_ins];
+        for(int j=0; j<n_ins; j++) prev_layer_input[j] = x[j];
+
+        double linear_output;
+
+
+        // layer activation
+        for(int i=0; i<n_layers; i++) {
+            layer_input = new double[sigmoid_layers[i].n_out];
+
+            for(int k=0; k<sigmoid_layers[i].n_out; k++) {
+                linear_output = 0.0;
+
+                for(int j=0; j<sigmoid_layers[i].n_in; j++) {
+                    linear_output += sigmoid_layers[i].W[k][j] * prev_layer_input[j];
+                }
+                linear_output += sigmoid_layers[i].b[k];
+                layer_input[k] = sigmoid(linear_output);
+            }
+
+            if(i < n_layers-1) {
+                prev_layer_input = new double[sigmoid_layers[i].n_out];
+                for(int j=0; j<sigmoid_layers[i].n_out; j++) prev_layer_input[j] = layer_input[j];
+            }
+        }
+
+        for(int i=0; i<log_layer.n_out; i++) {
+            y[i] = 0;
+            for(int j=0; j<log_layer.n_in; j++) {
+                y[i] += log_layer.W[i][j] * layer_input[j];
+            }
+            y[i] += log_layer.b[i];
+        }
+
+        log_layer.softmax(y);
+    }
+
+    private static void test_dbn() {
+        Random rng = new Random(123);
+
+        double pretrain_lr = 0.1;
+        int pretraining_epochs = 1000;
+        int k = 1;
+        double finetune_lr = 0.1;
+        int finetune_epochs = 500;
+
+        int train_N = 6;
+        int test_N = 4;
+        int n_ins = 6;
+        int n_outs = 2;
+        int[] hidden_layer_sizes = {3, 3};
+        int n_layers = hidden_layer_sizes.length;
+
+        // training data
+        int[][] train_X = {
+                {1, 1, 1, 0, 0, 0},
+                {1, 0, 1, 0, 0, 0},
+                {1, 1, 1, 0, 0, 0},
+                {0, 0, 1, 1, 1, 0},
+                {0, 0, 1, 1, 0, 0},
+                {0, 0, 1, 1, 1, 0}
+        };
+
+        int[][] train_Y = {
+                {1, 0},
+                {1, 0},
+                {1, 0},
+                {0, 1},
+                {0, 1},
+                {0, 1},
+        };
+
+
+        // construct DNN.DBN
+        DBN dbn = new DBN(train_N, n_ins, hidden_layer_sizes, n_outs, n_layers, rng);
+
+        // pretrain
+        dbn.pretrain(train_X, pretrain_lr, k, pretraining_epochs);
+
+        // finetune
+        dbn.finetune(train_X, train_Y, finetune_lr, finetune_epochs);
+
+
+        // test data
+        int[][] test_X = {
+                {1, 1, 0, 0, 0, 0},
+                {1, 1, 1, 1, 0, 0},
+                {0, 0, 0, 1, 1, 0},
+                {0, 0, 1, 1, 1, 0},
+        };
+
+        double[][] test_Y = new double[test_N][n_outs];
+
+        // test
+        for(int i=0; i<test_N; i++) {
+            dbn.predict(test_X[i], test_Y[i]);
+            for(int j=0; j<n_outs; j++) {
+                System.out.print(test_Y[i][j] + " ");
+            }
+            System.out.println();
+        }
+    }
+
+    public static void main(String[] args) {
+        test_dbn();
+    }
+}
diff --git a/java/src/DeepLearning/Dropout.java b/java/src/DeepLearning/Dropout.java
new file mode 100644
index 0000000..bb6378f
--- /dev/null
+++ b/java/src/DeepLearning/Dropout.java
@@ -0,0 +1,225 @@
+package DeepLearning;
+
+import java.util.Random;
+import java.util.List;
+import java.util.ArrayList;
+
+public class Dropout {
+    public int N;
+    public int n_in;
+    public int[] hidden_layer_sizes;
+    public int n_out;
+    public int n_layers;
+    public HiddenLayer[] hiddenLayers;
+    public LogisticRegression logisticLayer;
+    public Random rng;
+
+
+    public Dropout(int N, int n_in, int[] hidden_layer_sizes, int n_out, Random rng, String activation) {
+        this.N = N;
+        this.n_in = n_in;
+        this.hidden_layer_sizes = hidden_layer_sizes;
+        this.n_layers = hidden_layer_sizes.length;
+        this.n_out = n_out;
+
+        this.hiddenLayers = new HiddenLayer[n_layers];
+
+        if (rng == null) rng = new Random(1234);
+        this.rng = rng;
+
+        if (activation == null) activation = "ReLU";
+
+        // construct multi-layer
+        int input_size;
+        for(int i=0; i<this.n_layers; i++) {
+            // layer_size
+            if(i == 0) {
+                input_size = n_in;
+            } else {
+                input_size = hidden_layer_sizes[i-1];
+            }
+
+            // construct hiddenLayer
+            this.hiddenLayers[i] = new HiddenLayer(N, input_size, hidden_layer_sizes[i], null, null, rng, activation);
+
+        }
+
+        // construct logisticLayer
+        this.logisticLayer = new LogisticRegression(N, hidden_layer_sizes[this.n_layers-1], n_out);
+
+    }
+
+    public void train(int epochs, double[][] train_X, int[][] train_Y, boolean dropout, double p_dropout, double lr) {
+        List<int[]> dropout_masks;
+        List<double[]> layer_inputs;
+        double[] layer_input;
+        double[] layer_output = new double[0];
+
+        for(int epoch=0; epoch<epochs; epoch++) {
+
+            for(int n=0; n<N; n++) {
+
+                dropout_masks = new ArrayList<>(n_layers);
+                layer_inputs = new ArrayList<>(n_layers+1);  // +1 for logistic layer
+
+                // forward hiddenLayers
+                for(int i=0; i<n_layers; i++) {
+
+                    if(i == 0) layer_input = train_X[n];
+                    else layer_input = layer_output.clone();
+
+                    layer_inputs.add(layer_input.clone());
+
+                    layer_output = new double[hidden_layer_sizes[i]];
+                    hiddenLayers[i].forward(layer_input, layer_output);
+
+                    if(dropout) {
+                        int[] mask;
+                        mask = hiddenLayers[i].dropout(layer_output.length, p_dropout, rng);
+                        for(int j=0; j<layer_output.length; j++) layer_output[j] *= mask[j];
+
+                        dropout_masks.add(mask.clone());
+                    }
+
+                }
+
+
+                // forward & backward logisticLayer
+                double[] logistic_layer_dy; // = new double[n_out];
+                logistic_layer_dy = logisticLayer.train(layer_output, train_Y[n], lr); //, logistic_layer_dy);
+                layer_inputs.add(layer_output.clone());
+
+                // backward hiddenLayers
+                double[] prev_dy = logistic_layer_dy;
+                double[][] prev_W;
+                double[] dy = new double[0];
+
+                for(int i=n_layers-1; i>=0; i--) {
+
+                    if(i == n_layers-1) {
+                        prev_W = logisticLayer.W;
+                    } else {
+                        prev_dy = dy.clone();
+                        prev_W = hiddenLayers[i+1].W;
+                    }
+
+                    if(dropout) {
+                        for(int j=0; j<prev_dy.length; j++) {
+                            prev_dy[j] *= dropout_masks.get(i)[j];
+                        }
+                    }
+
+                    dy = new double[hidden_layer_sizes[i]];
+                    hiddenLayers[i].backward(layer_inputs.get(i), dy, layer_inputs.get(i+1), prev_dy, prev_W, lr);
+                }
+
+            }
+        }
+    }
+
+
+    public void pretest(double p_dropout) {
+        for(int i=0; i<n_layers; i++) {
+            int in;
+            int out;
+
+            if (i == 0) in = n_in;
+            else in = hidden_layer_sizes[i];
+
+            if (i == n_layers - 1) out = n_out;
+            else out = hidden_layer_sizes[i+1];
+
+
+            for (int l = 0; l < out; l++) {
+                for (int m = 0; m < in; m++) {
+                    hiddenLayers[i].W[l][m] *= 1 - p_dropout;
+                }
+            }
+        }
+    }
+
+
+    public void predict(double[] x, double[] y) {
+        double[] layer_input;
+        double[] layer_output = new double[0];
+
+        for(int i=0; i<n_layers; i++) {
+
+            if(i == 0) layer_input = x;
+            else layer_input = layer_output.clone();
+
+            layer_output = new double[hidden_layer_sizes[i]];
+
+            hiddenLayers[i].forward(layer_input, layer_output);
+        }
+
+        logisticLayer.predict(layer_output, y);
+    }
+
+
+    private static void test_dropout() {
+        Random rng = new Random(123);
+
+        double learning_rate = 0.1;
+        int n_epochs = 5000;
+
+        int train_N = 4;
+        int test_N = 4;
+        int n_in = 2;
+        int[] hidden_layer_sizes = {10, 10};
+        int n_out = 2;
+
+        boolean dropout = true;
+        double p_dropout = 0.5;
+
+
+        double[][] train_X = {
+                {0., 0.},
+                {0., 1.},
+                {1., 0.},
+                {1., 1.},
+        };
+
+        int[][] train_Y = {
+                {0, 1},
+                {1, 0},
+                {1, 0},
+                {0, 1},
+        };
+
+        // construct Dropout
+        Dropout classifier = new Dropout(train_N, n_in, hidden_layer_sizes, n_out, rng, "ReLU");
+
+        // train
+        classifier.train(n_epochs, train_X, train_Y, dropout, p_dropout, learning_rate);
+
+        // pretest
+        if(dropout) classifier.pretest(p_dropout);
+
+
+        // test data
+        double[][] test_X = {
+                {0., 0.},
+                {0., 1.},
+                {1., 0.},
+                {1., 1.},
+        };
+
+        double[][] test_Y = new double[test_N][n_out];
+
+        // test
+        for(int i=0; i<test_N; i++) {
+            classifier.predict(test_X[i], test_Y[i]);
+            for(int j=0; j<n_out; j++) {
+                System.out.print(test_Y[i][j] + " ");
+            }
+            System.out.println();
+        }
+
+    }
+
+
+    public static void main(String[] args) {
+        test_dropout();
+    }
+}
diff --git a/java/src/DeepLearning/HiddenLayer.java b/java/src/DeepLearning/HiddenLayer.java
new file mode 100644
index 0000000..028727d
--- /dev/null
+++ b/java/src/DeepLearning/HiddenLayer.java
@@ -0,0 +1,106 @@
+package DeepLearning;
+
+import java.util.Random;
+import java.util.function.DoubleFunction;
+import static DeepLearning.utils.*;
+
+public class HiddenLayer {
+    public int N;
+    public int n_in;
+    public int n_out;
+    public double[][] W;
+    public double[] b;
+    public Random rng;
+    public DoubleFunction<Double> activation;
+    public DoubleFunction<Double> dactivation;
+
+    public HiddenLayer(int N, int n_in, int n_out, double[][] W, double[] b, Random rng, String activation) {
+        this.N = N;
+        this.n_in = n_in;
+        this.n_out = n_out;
+
+        if (rng == null) this.rng = new Random(1234);
+        else this.rng = rng;
+
+        if (W == null) {
+            this.W = new double[n_out][n_in];
+            double a = 1.0 / this.n_in;
+
+            for(int i=0; i<n_out; i++) {
+                for(int j=0; j<n_in; j++) {
+                    this.W[i][j] = uniform(-a, a, rng);
+                }
+            }
+        } else {
+            this.W = W;
+        }
+
+        if (b == null) this.b = new double[n_out];
+        else this.b = b;
+
+        if (activation == "sigmoid" || activation == null) {
+            this.activation = (double x) -> sigmoid(x);
+            this.dactivation = (double x) -> dsigmoid(x);
+
+        } else if (activation == "tanh") {
+            this.activation = (double x) -> tanh(x);
+            this.dactivation = (double x) -> dtanh(x);
+        } else if (activation == "ReLU") {
+            this.activation = (double x) -> ReLU(x);
+            this.dactivation = (double x) -> dReLU(x);
+        } else {
+            throw new IllegalArgumentException("activation function not supported");
+        }
+
+    }
+
+    public double output(double[] input, double[] w, double b) {
+        double linear_output = 0.0;
+        for(int j=0; j<n_in; j++) {
+            linear_output += w[j] * input[j];
+        }
+        linear_output += b;
+
+        return activation.apply(linear_output);
+    }
+
+
+    public void forward(double[] input, double[] output) {
+        for(int i=0; i<n_out; i++) {
+            output[i] = this.output(input, W[i], b[i]);
+        }
+    }
+
+    public void backward(double[] input, double[] dy, double[] prev_layer_input, double[] prev_layer_dy, double[][] prev_layer_W, double lr) {
+        if(dy == null) dy = new double[n_out];
+
+        int prev_n_in = n_out;
+        int prev_n_out = prev_layer_dy.length;
+
+        for(int i=0; i<prev_n_in; i++) {
+            dy[i] = 0;
+            for(int j=0; j<prev_n_out; j++) {
+                dy[i] += prev_layer_dy[j] * prev_layer_W[j][i];
+            }
+
+            dy[i] *= dactivation.apply(prev_layer_input[i]);
+        }
+
+        for(int i=0; i<n_out; i++) {
+            for(int j=0; j<n_in; j++) {
+                W[i][j] += lr * dy[i] * input[j] / N;
+            }
+            b[i] += lr * dy[i] / N;
+        }
+    }
+
+    public int[] dropout(int size, double p, Random rng) {
+        int[] mask = new int[size];
+
+        for(int i=0; i<size; i++) {
+            mask[i] = binomial(1, p, rng);
+        }
+
+        return mask;
+    }
+}
diff --git a/java/src/DeepLearning/HiddenLayerDiscrete.java b/java/src/DeepLearning/HiddenLayerDiscrete.java
new file mode 100644
index 0000000..b399db9
--- /dev/null
+++ b/java/src/DeepLearning/HiddenLayerDiscrete.java
@@ -0,0 +1,56 @@
+package DeepLearning;
+
+import java.util.Random;
+import static DeepLearning.utils.*;
+
+public class HiddenLayerDiscrete extends HiddenLayer {
+    public int N;
+    public int n_in;
+    public int n_out;
+    public double[][] W;
+    public double[] b;
+    public Random rng;
+
+
+    public HiddenLayerDiscrete(int N, int n_in, int n_out, double[][] W, double[] b, Random rng) {
+        super(N, n_in, n_out, W, b, rng, null);
+
+        this.N = N;
+        this.n_in = n_in;
+        this.n_out = n_out;
+
+        if(rng == null)	this.rng = new Random(1234);
+        else this.rng = rng;
+
+        if(W == null) {
+            this.W = new double[n_out][n_in];
+            double a = 1.0 / this.n_in;
+
+            for(int i=0; i<n_out; i++) {
+                for(int j=0; j<n_in; j++) {
+                    this.W[i][j] = uniform(-a, a, rng);
+                }
+            }
+        } else {
+            this.W = W;
+        }
+
+        if(b == null) this.b = new double[n_out];
+        else this.b = b;
+    }
+
+    public double output(int[] input, double[] w, double b) {
+        double linear_output = 0.0;
+        for(int j=0; j<n_in; j++) {
+            linear_output += w[j] * input[j];
+        }
+        linear_output += b;
+        return sigmoid(linear_output);
+    }
+
+    public void sample_h_given_v(int[] input, int[] sample) {
+        for(int i=0; i<n_out; i++) {
+            sample[i] = binomial(1, output(input, W[i], b[i]), rng);
+        }
+    }
+}
diff --git a/java/src/DeepLearning/LogisticRegression.java b/java/src/DeepLearning/LogisticRegression.java
new file mode 100644
index 0000000..af5a9a5
--- /dev/null
+++ b/java/src/DeepLearning/LogisticRegression.java
@@ -0,0 +1,131 @@
+package DeepLearning;
+
+public class LogisticRegression {
+    public int N;
+    public int n_in;
+    public int n_out;
+    public double[][] W;
+    public double[] b;
+
+    public LogisticRegression(int N, int n_in, int n_out) {
+        this.N = N;
+        this.n_in = n_in;
+        this.n_out = n_out;
+
+        W = new double[n_out][n_in];
+        b = new double[n_out];
+    }
+
+    public double[] train(double[] x, int[] y, double lr) {
+        double[] p_y_given_x = new double[n_out];
+        double[] dy = new double[n_out];
+
+        for(int i=0; i<n_out; i++) {
+            p_y_given_x[i] = 0;
+            for(int j=0; j<n_in; j++) {
+                p_y_given_x[i] += W[i][j] * x[j];
+            }
+            p_y_given_x[i] += b[i];
+        }
+        softmax(p_y_given_x);
+
+        for(int i=0; i<n_out; i++) {
+            dy[i] = y[i] - p_y_given_x[i];
+
+            for(int j=0; j<n_in; j++) {
+                W[i][j] += lr * dy[i] * x[j] / N;
+            }
+
+            b[i] += lr * dy[i] / N;
+        }
+
+        return dy;
+    }
+
+    public void softmax(double[] x) {
+        double max = 0.0;
+        double sum = 0.0;
+
+        for(int i=0; i<n_out; i++) if(max < x[i]) max = x[i];
+
+        for(int i=0; i<n_out; i++) {
+            x[i] = Math.exp(x[i] - max);
+            sum += x[i];
+        }
+
+        for(int i=0; i<n_out; i++) x[i] /= sum;
+    }
+
+    public void predict(double[] x, double[] y) {
+        for(int i=0; i<n_out; i++) {
+            y[i] = 0.;
+            for(int j=0; j<n_in; j++) {
+                y[i] += W[i][j] * x[j];
+            }
+            y[i] += b[i];
+        }
+
+        softmax(y);
+    }
+
+    private static void test_lr() {
+        double learning_rate = 0.1;
+        int n_epochs = 500;
+
+        int train_N = 6;
+        int test_N = 2;
+        int n_in = 6;
+        int n_out = 2;
+
+        double[][] train_X = {
+                {1., 1., 1., 0., 0., 0.},
+                {1., 0., 1., 0., 0., 0.},
+                {1., 1., 1., 0., 0., 0.},
+                {0., 0., 1., 1., 1., 0.},
+                {0., 0., 1., 1., 0., 0.},
+                {0., 0., 1., 1., 1., 0.}
+        };
+
+        int[][] train_Y = {
+                {1, 0},
+                {1, 0},
+                {1, 0},
+                {0, 1},
+                {0, 1},
+                {0, 1}
+        };
+
+        // construct
+        LogisticRegression classifier = new LogisticRegression(train_N, n_in, n_out);
+
+        // train
+        for(int epoch=0; epoch<n_epochs; epoch++) {
+            for(int i=0; i<train_N; i++) {
+                classifier.train(train_X[i], train_Y[i], learning_rate);
+            }
+            //learning_rate *= 0.95;
+        }
+
+        // test data
+        double[][] test_X = {
+                {1., 0., 1., 0., 0., 0.},
+                {0., 0., 1., 1., 1., 0.}
+        };
+
+        double[][] test_Y = new double[test_N][n_out];
+
+
+        // test
+        for(int i=0; i<test_N; i++) {
+            classifier.predict(test_X[i], test_Y[i]);
+            for(int j=0; j<n_out; j++) {
+                System.out.print(test_Y[i][j] + " ");
+            }
+            System.out.println();
+        }
+    }
+
+    public static void main(String[] args) {
+        test_lr();
+    }
+}
diff --git a/java/src/DeepLearning/LogisticRegressionDiscrete.java b/java/src/DeepLearning/LogisticRegressionDiscrete.java
new file mode 100644
index 0000000..fce5172
--- /dev/null
+++ b/java/src/DeepLearning/LogisticRegressionDiscrete.java
@@ -0,0 +1,117 @@
+package DeepLearning;
+
+public class LogisticRegressionDiscrete extends LogisticRegression {
+    public int N;
+    public int n_in;
+    public int n_out;
+    public double[][] W;
+    public double[] b;
+
+    public LogisticRegressionDiscrete(int N, int n_in, int n_out) {
+        super(N, n_in, n_out);
+
+        this.N = N;
+        this.n_in = n_in;
+        this.n_out = n_out;
+
+        W = new double[this.n_out][this.n_in];
+        b = new double[this.n_out];
+    }
+
+    public void train(int[] x, int[] y, double lr) {
+        double[] p_y_given_x = new double[n_out];
+        double[] dy = new double[n_out];
+
+        for(int i=0; i<n_out; i++) {
+            p_y_given_x[i] = 0;
+            for(int j=0; j<n_in; j++) {
+                p_y_given_x[i] += W[i][j] * x[j];
+            }
+            p_y_given_x[i] += b[i];
+        }
+        softmax(p_y_given_x);
+
+        for(int i=0; i<n_out; i++) {
+            dy[i] = y[i] - p_y_given_x[i];
+
+            for(int j=0; j<n_in; j++) {
+                W[i][j] += lr * dy[i] * x[j] / N;
+            }
+
+            b[i] += lr * dy[i] / N;
+        }
+    }
+
+    public void predict(int[] x, double[] y) {
+        for(int i=0; i<n_out; i++) {
+            y[i] = 0;
+            for(int j=0; j<n_in; j++) {
+                y[i] += W[i][j] * x[j];
+            }
+            y[i] += b[i];
+        }
+
+        softmax(y);
+    }
+
+    private static void test_lr() {
+        double learning_rate = 0.1;
+        int n_epochs = 500;
+
+        int train_N = 6;
+        int test_N = 2;
+        int n_in = 6;
+        int n_out = 2;
+
+        int[][] train_X = {
+                {1, 1, 1, 0, 0, 0},
+                {1, 0, 1, 0, 0, 0},
+                {1, 1, 1, 0, 0, 0},
+                {0, 0, 1, 1, 1, 0},
+                {0, 0, 1, 1, 0, 0},
+                {0, 0, 1, 1, 1, 0}
+        };
+
+        int[][] train_Y = {
+                {1, 0},
+                {1, 0},
+                {1, 0},
+                {0, 1},
+                {0, 1},
+                {0, 1}
+        };
+
+        // construct
+        LogisticRegressionDiscrete classifier = new LogisticRegressionDiscrete(train_N, n_in, n_out);
+
+        // train
+        for(int epoch=0; epoch<n_epochs; epoch++) {
+            for(int i=0; i<train_N; i++) {
+                classifier.train(train_X[i], train_Y[i], learning_rate);
+            }
+            //learning_rate *= 0.95;
+        }
+
+        // test data
+        int[][] test_X = {
+                {1, 0, 1, 0, 0, 0},
+                {0, 0, 1, 1, 1, 0}
+        };
+
+        double[][] test_Y = new double[test_N][n_out];
+
+
+        // test
+        for(int i=0; i<test_N; i++) {
+            classifier.predict(test_X[i], test_Y[i]);
+            for(int j=0; j<n_out; j++) {
+                System.out.print(test_Y[i][j] + " ");
+            }
+            System.out.println();
+        }
+    }
+
+    public static void main(String[] args) {
+        test_lr();
+    }
+}
diff --git a/java/src/DeepLearning/MLP.java b/java/src/DeepLearning/MLP.java
new file mode 100644
index 0000000..7acece8
--- /dev/null
+++ b/java/src/DeepLearning/MLP.java
@@ -0,0 +1,124 @@
+package DeepLearning;
+
+import java.util.Random;
+
+public class MLP {
+    public int N;
+    public int n_in;
+    public int n_hidden;
+    public int n_out;
+    public HiddenLayer hiddenLayer;
+    public LogisticRegression logisticLayer;
+    public Random rng;
+
+
+    public MLP(int N, int n_in, int n_hidden, int n_out, Random rng) {
+
+        this.N = N;
+        this.n_in = n_in;
+        this.n_hidden = n_hidden;
+        this.n_out = n_out;
+
+        if (rng == null)rng = new Random(1234);
+        this.rng = rng;
+
+        // construct hiddenLayer
+        this.hiddenLayer = new HiddenLayer(N, n_in, n_hidden, null, null, rng, "tanh");
+
+        // construct logisticLayer
+        this.logisticLayer = new LogisticRegression(N, n_hidden, n_out);
+    }
+
+
+    public void train(double[][] train_X, int[][] train_Y, double lr) {
+        double[] hidden_layer_input;
+        double[] logistic_layer_input;
+        double[] dy;
+
+        for(int n=0; n<N; n++) {
+            hidden_layer_input = new double[n_in];
+            logistic_layer_input = new double[n_hidden];
+
+            for(int j=0; j<n_in; j++) hidden_layer_input[j] = train_X[n][j];
+
+            // forward hiddenLayer
+            hiddenLayer.forward(hidden_layer_input, logistic_layer_input);
+
+            // forward and backward logisticLayer
+            // dy = new double[n_out];  // define delta of y for backpropagation
+            dy = logisticLayer.train(logistic_layer_input, train_Y[n], lr); //, dy);
+
+            // backward hiddenLayer
+            hiddenLayer.backward(hidden_layer_input, null, logistic_layer_input, dy, logisticLayer.W, lr);
+
+        }
+    }
+
+    public void predict(double[] x, double[] y) {
+        double[] logistic_layer_input = new double[n_hidden];
+        hiddenLayer.forward(x, logistic_layer_input);
+        logisticLayer.predict(logistic_layer_input, y);
+    }
+
+
+
+    private static void test_mlp() {
+        Random rng = new Random(123);
+
+        double learning_rate = 0.1;
+        int n_epochs = 5000;
+
+        int train_N = 4;
+        int test_N = 4;
+        int n_in = 2;
+        int n_hidden = 3;
+        int n_out = 2;
+
+        double[][] train_X = {
+                {0., 0.},
+                {0., 1.},
+                {1., 0.},
+                {1., 1.},
+        };
+
+        int[][] train_Y = {
+                {0, 1},
+                {1, 0},
+                {1, 0},
+                {0, 1},
+        };
+
+        // construct MLP
+        MLP classifier = new MLP(train_N, n_in, n_hidden, n_out, rng);
+
+        // train
+        for(int epoch=0; epoch<n_epochs; epoch++) {
+            classifier.train(train_X, train_Y, learning_rate);
+        }
+
+        // test data
+        double[][] test_X = {
+                {0., 0.},
+                {0., 1.},
+                {1., 0.},
+                {1., 1.},
+        };
+
+        double[][] test_Y = new double[test_N][n_out];
+
+
+        // test
+        for(int i=0; i<test_N; i++) {
+            classifier.predict(test_X[i], test_Y[i]);
+            for(int j=0; j<n_out; j++) {
+                System.out.print(test_Y[i][j] + " ");
+            }
+            System.out.println();
+        }
+
+    }
+
+    public static void main(String[] args) {
+        test_mlp();
+    }
+}
diff --git a/java/src/DeepLearning/RBM.java b/java/src/DeepLearning/RBM.java
new file mode 100644
index 0000000..f3268f8
--- /dev/null
+++ b/java/src/DeepLearning/RBM.java
@@ -0,0 +1,202 @@
+package DeepLearning;
+
+import java.util.Random;
+import static DeepLearning.utils.*;
+
+public class RBM {
+    public int N;
+    public int n_visible;
+    public int n_hidden;
+    public double[][] W;
+    public double[] hbias;
+    public double[] vbias;
+    public Random rng;
+
+
+    public RBM(int N, int n_visible, int n_hidden,
+               double[][] W, double[] hbias, double[] vbias, Random rng) {
+        this.N = N;
+        this.n_visible = n_visible;
+        this.n_hidden = n_hidden;
+
+        if(rng == null)	this.rng = new Random(1234);
+        else this.rng = rng;
+
+        if(W == null) {
+            this.W = new double[this.n_hidden][this.n_visible];
+            double a = 1.0 / this.n_visible;
+
+            for(int i=0; i<this.n_hidden; i++) {
+                for(int j=0; j<this.n_visible; j++) {
+                    this.W[i][j] = uniform(-a, a, rng);
+                }
+            }
+        } else {
+            this.W = W;
+        }
+
+        if(hbias == null) {
+            this.hbias = new double[this.n_hidden];
+            for(int i=0; i<this.n_hidden; i++) this.hbias[i] = 0;
+        } else {
+            this.hbias = hbias;
+        }
+
+        if(vbias == null) {
+            this.vbias = new double[this.n_visible];
+            for(int i=0; i<this.n_visible; i++) this.vbias[i] = 0;
+        } else {
+            this.vbias = vbias;
+        }
+    }
+
+
+    public void contrastive_divergence(int[] input, double lr, int k) {
+        double[] ph_mean = new double[n_hidden];
+        int[] ph_sample = new int[n_hidden];
+        double[] nv_means = new double[n_visible];
+        int[] nv_samples = new int[n_visible];
+        double[] nh_means = new double[n_hidden];
+        int[] nh_samples = new int[n_hidden];
+		
+		/* CD-k */
+        sample_h_given_v(input, ph_mean, ph_sample);
+
+        for(int step=0; step<k; step++) {
+            if(step == 0) {
+                gibbs_hvh(ph_sample, nv_means, nv_samples, nh_means, nh_samples);
+            } else {
+                gibbs_hvh(nh_samples, nv_means, nv_samples, nh_means, nh_samples);
+            }
+        }
+
+        for(int i=0; i<n_hidden; i++) {
+            for(int j=0; j<n_visible; j++) {
+                // W[i][j] += lr * (ph_sample[i] * input[j] - nh_means[i] * nv_samples[j]) / N;
+                W[i][j] += lr * (ph_mean[i] * input[j] - nh_means[i] * nv_samples[j]) / N;
+            }
+            hbias[i] += lr * (ph_sample[i] - nh_means[i]) / N;
+        }
+
+
+        for(int i=0; i<n_visible; i++) {
+            vbias[i] += lr * (input[i] - nv_samples[i]) / N;
+        }
+
+    }
+
+
+    public void sample_h_given_v(int[] v0_sample, double[] mean, int[] sample) {
+        for(int i=0; i<n_hidden; i++) {
+            mean[i] = propup(v0_sample, W[i], hbias[i]);
+            sample[i] = binomial(1, mean[i], rng);
+        }
+    }
+
+    public void sample_v_given_h(int[] h0_sample, double[] mean, int[] sample) {
+        for(int i=0; i<n_visible; i++) {
+            mean[i] = propdown(h0_sample, i, vbias[i]);
+            sample[i] = binomial(1, mean[i], rng);
+        }
+    }
+
+    public double propup(int[] v, double[] w, double b) {
+        double pre_sigmoid_activation = 0.0;
+        for(int j=0; j<n_visible; j++) {
+            pre_sigmoid_activation += w[j] * v[j];
+        }
+        pre_sigmoid_activation += b;
+        return sigmoid(pre_sigmoid_activation);
+    }
+
+    public double propdown(int[] h, int i, double b) {
+        double pre_sigmoid_activation = 0.0;
+        for(int j=0; j<n_hidden; j++) {
+            pre_sigmoid_activation += W[j][i] * h[j];
+        }
+        pre_sigmoid_activation += b;
+        return sigmoid(pre_sigmoid_activation);
+    }
+
+    public void gibbs_hvh(int[] h0_sample, double[] nv_means, int[] nv_samples, double[] nh_means, int[] nh_samples) {
+        sample_v_given_h(h0_sample, nv_means, nv_samples);
+        sample_h_given_v(nv_samples, nh_means, nh_samples);
+    }
+
+
+    public void reconstruct(int[] v, double[] reconstructed_v) {
+        double[] h = new double[n_hidden];
+        double pre_sigmoid_activation;
+
+        for(int i=0; i<n_hidden; i++) {
+            h[i] = propup(v, W[i], hbias[i]);
+        }
+
+        for(int i=0; i<n_visible; i++) {
+            pre_sigmoid_activation = 0.0;
+            for(int j=0; j<n_hidden; j++) {
+                pre_sigmoid_activation += W[j][i] * h[j];
+            }
+            pre_sigmoid_activation += vbias[i];
+
+            reconstructed_v[i] = sigmoid(pre_sigmoid_activation);
+        }
+    }
+
+
+
+    private static void test_rbm() {
+        Random rng = new Random(123);
+
+        double learning_rate = 0.1;
+        int training_epochs = 1000;
+        int k = 1;
+
+        int train_N = 6;
+        int test_N = 2;
+        int n_visible = 6;
+        int n_hidden = 3;
+
+        // training data
+        int[][] train_X = {
+                {1, 1, 1, 0, 0, 0},
+                {1, 0, 1, 0, 0, 0},
+                {1, 1, 1, 0, 0, 0},
+                {0, 0, 1, 1, 1, 0},
+                {0, 0, 1, 0, 1, 0},
+                {0, 0, 1, 1, 1, 0}
+        };
+
+
+
+        RBM rbm = new RBM(train_N, n_visible, n_hidden, null, null, null, rng);
+
+        // train
+        for(int epoch=0; epoch<training_epochs; epoch++) {
+            for(int i=0; i<train_N; i++) {
+                rbm.contrastive_divergence(train_X[i], learning_rate, k);
+            }
+        }
+
+        // test data
+        int[][] test_X = {
+                {1, 1, 0, 0, 0, 0},
+                {0, 0, 0, 1, 1, 0}
+        };
+
+        double[][] reconstructed_X = new double[test_N][n_visible];
+
+        for(int i=0; i<test_N; i++) {
+            rbm.reconstruct(test_X[i], reconstructed_X[i]);
+            for(int j=0; j<n_visible; j++) {
+                System.out.printf("%.5f ", reconstructed_X[i][j]);
+            }
+            System.out.println();
+        }
+    }
+
+    public static void main(String[] args) {
+        test_rbm();
+    }
+
+}
diff --git a/java/src/DeepLearning/SdA.java b/java/src/DeepLearning/SdA.java
new file mode 100644
index 0000000..af761e7
--- /dev/null
+++ b/java/src/DeepLearning/SdA.java
@@ -0,0 +1,230 @@
+package DeepLearning;
+
+import java.util.Random;
+import static DeepLearning.utils.*;
+
+public class SdA {
+    public int N;
+    public int n_ins;
+    public int[] hidden_layer_sizes;
+    public int n_outs;
+    public int n_layers;
+    public HiddenLayerDiscrete[] sigmoid_layers;
+    public dA[] dA_layers;
+    public LogisticRegressionDiscrete log_layer;
+    public Random rng;
+
+
+    public SdA(int N, int n_ins, int[] hidden_layer_sizes, int n_outs, int n_layers, Random rng) {
+        int input_size;
+
+        this.N = N;
+        this.n_ins = n_ins;
+        this.hidden_layer_sizes = hidden_layer_sizes;
+        this.n_outs = n_outs;
+        this.n_layers = n_layers;
+
+        this.sigmoid_layers = new HiddenLayerDiscrete[n_layers];
+        this.dA_layers = new dA[n_layers];
+
+        if(rng == null)	this.rng = new Random(1234);
+        else this.rng = rng;
+
+        // construct multi-layer
+        for(int i=0; i<this.n_layers; i++) {
+            if(i == 0) {
+                input_size = this.n_ins;
+            } else {
+                input_size = this.hidden_layer_sizes[i-1];
+            }
+
+            // construct sigmoid_layer
+            this.sigmoid_layers[i] = new HiddenLayerDiscrete(this.N, input_size, this.hidden_layer_sizes[i], null, null, rng);
+
+            // construct dA_layer
+            this.dA_layers[i] = new dA(this.N, input_size, this.hidden_layer_sizes[i], this.sigmoid_layers[i].W, this.sigmoid_layers[i].b, null, rng);
+        }
+
+        // layer for output using Logistic Regression
+        this.log_layer = new LogisticRegressionDiscrete(this.N, this.hidden_layer_sizes[this.n_layers-1], this.n_outs);
+    }
+
+    public void pretrain(int[][] train_X, double lr, double corruption_level, int epochs) {
+        int[] layer_input = new int[0];
+        int prev_layer_input_size;
+        int[] prev_layer_input;
+
+        for(int i=0; i<n_layers; i++) {  // layer-wise
+            for(int epoch=0; epoch<epochs; epoch++) {  // training epochs
+                for(int n=0; n<N; n++) {  // input x1...xN
+                    // layer input
+                    for(int l=0; l<=i; l++) {
+
+                        if(l == 0) {
+                            layer_input = new int[n_ins];
+                            for(int j=0; j<n_ins; j++) layer_input[j] = train_X[n][j];
+                        } else {
+                            if(l == 1) prev_layer_input_size = n_ins;
+                            else prev_layer_input_size = hidden_layer_sizes[l-2];
+
+                            prev_layer_input = new int[prev_layer_input_size];
+                            for(int j=0; j<prev_layer_input_size; j++) prev_layer_input[j] = layer_input[j];
+
+                            layer_input = new int[hidden_layer_sizes[l-1]];
+
+                            sigmoid_layers[l-1].sample_h_given_v(prev_layer_input, layer_input);
+                        }
+                    }
+
+                    dA_layers[i].train(layer_input, lr, corruption_level);
+                }
+            }
+        }
+    }
+
+    public void finetune(int[][] train_X, int[][] train_Y, double lr, int epochs) {
+        int[] layer_input = new int[0];
+        // int prev_layer_input_size;
+        int[] prev_layer_input = new int[0];
+
+        for(int epoch=0; epoch<epochs; epoch++) {
+            for(int n=0; n<N; n++) {
+
+                // layer input
+                for(int i=0; i<n_layers; i++) {
+                    if(i == 0) {
+                        prev_layer_input = new int[n_ins];
+                        for(int j=0; j<n_ins; j++) prev_layer_input[j] = train_X[n][j];
+                    } else {
+                        prev_layer_input = new int[hidden_layer_sizes[i-1]];
+                        for(int j=0; j<hidden_layer_sizes[i-1]; j++) prev_layer_input[j] = layer_input[j];
+                    }
+
+                    layer_input = new int[hidden_layer_sizes[i]];
+                    sigmoid_layers[i].sample_h_given_v(prev_layer_input, layer_input);
+                }
+
+                log_layer.train(layer_input, train_Y[n], lr);
+            }
+            // lr *= 0.95;
+        }
+    }
+
+    public void predict(int[] x, double[] y) {
+        double[] layer_input = new double[0];
+        // int prev_layer_input_size;
+        double[] prev_layer_input = new double[n_ins];
+        for(int j=0; j<n_ins; j++) prev_layer_input[j] = x[j];
+
+        double linear_output;
+
+
+        // layer activation
+        for(int i=0; i<n_layers; i++) {
+            layer_input = new double[sigmoid_layers[i].n_out];
+
+            for(int k=0; k<sigmoid_layers[i].n_out; k++) {
+                linear_output = 0.0;
+
+                for(int j=0; j<sigmoid_layers[i].n_in; j++) {
+                    linear_output += sigmoid_layers[i].W[k][j] * prev_layer_input[j];
+                }
+                linear_output += sigmoid_layers[i].b[k];
+                layer_input[k] = sigmoid(linear_output);
+            }
+
+            if(i < n_layers-1) {
+                prev_layer_input = new double[sigmoid_layers[i].n_out];
+                for(int j=0; j<sigmoid_layers[i].n_out; j++) prev_layer_input[j] = layer_input[j];
+            }
+        }
+
+        for(int i=0; i<log_layer.n_out; i++) {
+            y[i] = 0;
+            for(int j=0; j<log_layer.n_in; j++) {
+                y[i] += log_layer.W[i][j] * layer_input[j];
+            }
+            y[i] += log_layer.b[i];
+        }
+
+        log_layer.softmax(y);
+    }
+
+
+    private static void test_sda() {
+        Random rng = new Random(123);
+
+        double pretrain_lr = 0.1;
+        double corruption_level = 0.3;
+        int pretraining_epochs = 1000;
+        double finetune_lr = 0.1;
+        int finetune_epochs = 500;
+
+        int train_N = 10;
+        int test_N = 4;
+        int n_ins = 28;
+        int n_outs = 2;
+        int[] hidden_layer_sizes = {15, 15};
+        int n_layers = hidden_layer_sizes.length;
+
+        // training data
+        int[][] train_X = {
+                {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+                {0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+                {1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+                {0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+                {1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+                {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
+                {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1},
+                {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1},
+                {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1},
+                {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1}
+        };
+
+        int[][] train_Y = {
+                {1, 0},
+                {1, 0},
+                {1, 0},
+                {1, 0},
+                {1, 0},
+                {0, 1},
+                {0, 1},
+                {0, 1},
+                {0, 1},
+                {0, 1}
+        };
+
+        // construct SdA
+        SdA sda = new SdA(train_N, n_ins, hidden_layer_sizes, n_outs, n_layers, rng);
+
+        // pretrain
+        sda.pretrain(train_X, pretrain_lr, corruption_level, pretraining_epochs);
+
+        // finetune
+        sda.finetune(train_X, train_Y, finetune_lr, finetune_epochs);
+
+
+        // test data
+        int[][] test_X = {
+                {1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+                {1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+                {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1},
+                {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1}
+        };
+
+        double[][] test_Y = new double[test_N][n_outs];
+
+        // test
+        for(int i=0; i<test_N; i++) {
+            sda.predict(test_X[i], test_Y[i]);
+            for(int j=0; j<n_outs; j++) {
+                System.out.print(test_Y[i][j] + " ");
+            }
+            System.out.println();
+        }
+    }
+
+    public static void main(String[] args) {
+        test_sda();
+    }
+}
diff --git a/java/src/DeepLearning/dA.java b/java/src/DeepLearning/dA.java
new file mode 100644
index 0000000..acf3b39
--- /dev/null
+++ b/java/src/DeepLearning/dA.java
@@ -0,0 +1,187 @@
+package DeepLearning;
+
+import java.util.Random;
+import static DeepLearning.utils.*;
+
+public class dA {
+    public int n_visible;
+    public int N;
+    public int n_hidden;
+    public double[][] W;
+    public double[] hbias;
+    public double[] vbias;
+    public Random rng;
+
+
+    public dA(int N, int n_visible, int n_hidden,
+              double[][] W, double[] hbias, double[] vbias, Random rng) {
+        this.N = N;
+        this.n_visible = n_visible;
+        this.n_hidden = n_hidden;
+
+        if(rng == null)	this.rng = new Random(1234);
+        else this.rng = rng;
+
+        if(W == null) {
+            this.W = new double[this.n_hidden][this.n_visible];
+            double a = 1.0 / this.n_visible;
+
+            for(int i=0; i<this.n_hidden; i++) {
+                for(int j=0; j<this.n_visible; j++) {
+                    this.W[i][j] = uniform(-a, a, rng);
+                }
+            }
+        } else {
+            this.W = W;
+        }
+
+        if(hbias == null) {
+            this.hbias = new double[this.n_hidden];
+            for(int i=0; i<this.n_hidden; i++) this.hbias[i] = 0;
+        } else {
+            this.hbias = hbias;
+        }
+
+        if(vbias == null) {
+            this.vbias = new double[this.n_visible];
+            for(int i=0; i<this.n_visible; i++) this.vbias[i] = 0;
+        } else {
+            this.vbias = vbias;
+        }
+    }
+
+    public void get_corrupted_input(int[] x, int[] tilde_x, double p) {
+        for(int i=0; i<n_visible; i++) {
+            if(x[i] == 0) {
+                tilde_x[i] = 0;
+            } else {
+                tilde_x[i] = binomial(1, p, rng);
+            }
+        }
+    }
+
+    // Encode
+    public void get_hidden_values(int[] x, double[] y) {
+        for(int i=0; i<n_hidden; i++) {
+            y[i] = 0;
+            for(int j=0; j<n_visible; j++) {
+                y[i] += W[i][j] * x[j];
+            }
+            y[i] += hbias[i];
+            y[i] = sigmoid(y[i]);
+        }
+    }
+
+    // Decode
+    public void get_reconstructed_input(double[] y, double[] z) {
+        for(int i=0; i<n_visible; i++) {
+            z[i] = 0;
+            for(int j=0; j<n_hidden; j++) {
+                z[i] += W[j][i] * y[j];
+            }
+            z[i] += vbias[i];
+            z[i] = sigmoid(z[i]);
+        }
+    }
+
+    public void train(int[] x, double lr, double corruption_level) {
+        int[] tilde_x = new int[n_visible];
+        double[] y = new double[n_hidden];
+        double[] z = new double[n_visible];
+
+        double[] L_vbias = new double[n_visible];
+        double[] L_hbias = new double[n_hidden];
+
+        double p = 1 - corruption_level;
+
+        get_corrupted_input(x, tilde_x, p);
+        get_hidden_values(tilde_x, y);
+        get_reconstructed_input(y, z);
+
+        // vbias
+        for(int i=0; i<n_visible; i++) {
+            L_vbias[i] = x[i] - z[i];
+            vbias[i] += lr * L_vbias[i] / N;
+        }
+
+        // hbias
+        for(int i=0; i<n_hidden; i++) {
+            L_hbias[i] = 0;
+            for(int j=0; j<n_visible; j++) {
+                L_hbias[i] += W[i][j] * L_vbias[j];
+            }
+            L_hbias[i] *= y[i] * (1 - y[i]);
+            hbias[i] += lr * L_hbias[i] / N;
+        }
+
+        // W
+        for(int i=0; i<n_hidden; i++) {
+            for(int j=0; j<n_visible; j++) {
+                W[i][j] += lr * (L_hbias[i] * tilde_x[j] + L_vbias[j] * y[i]) / N;
+            }
+        }
+    }
+
+    public void reconstruct(int[] x, double[] z) {
+        double[] y = new double[n_hidden];
+
+        get_hidden_values(x, y);
+        get_reconstructed_input(y, z);
+    }
+
+    private static void test_dA() {
+        Random rng = new Random(123);
+
+        double learning_rate = 0.1;
+        double corruption_level = 0.3;
+        int training_epochs = 100;
+
+        int train_N = 10;
+        int test_N = 2;
+        int n_visible = 20;
+        int n_hidden = 5;
+
+        int[][] train_X = {
+                {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+                {1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+                {1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+                {1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+                {0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+                {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
+                {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1},
+                {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1},
+                {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1},
+                {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0}
+        };
+
+        dA da = new dA(train_N, n_visible, n_hidden, null, null, null, rng);
+
+        // train
+        for(int epoch=0; epoch<training_epochs; epoch++) {
+            for(int i=0; i<train_N; i++) {
+                da.train(train_X[i], learning_rate, corruption_level);
+            }
+        }
+
+        // test data
+        int[][] test_X = {
+                {1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+                {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0}
+        };
+
+        double[][] reconstructed_X = new double[test_N][n_visible];
+
+        // test
+        for(int i=0; i<test_N; i++) {
+            da.reconstruct(test_X[i], reconstructed_X[i]);
+            for(int j=0; j<n_visible; j++) {
+                System.out.printf("%.5f ", reconstructed_X[i][j]);
+            }
+            System.out.println();
+        }
+    }
+
+    public static void main(String[] args) {
+        test_dA();
+    }
+}
diff --git a/java/src/DeepLearning/utils.java b/java/src/DeepLearning/utils.java
new file mode 100644
index 0000000..95d95f7
--- /dev/null
+++ b/java/src/DeepLearning/utils.java
@@ -0,0 +1,55 @@
+package DeepLearning;
+
+import java.util.Random;
+
+public class utils {
+    public static double uniform(double min, double max, Random rng) {
+        return rng.nextDouble() * (max - min) + min;
+    }
+
+    public static int binomial(int n, double p, Random rng) {
+        if(p < 0 || p > 1) return 0;
+
+        int c = 0;
+        double r;
+
+        for(int i=0; i<n; i++) {
+            r = rng.nextDouble();
+            if (r < p) c++;
+        }
+
+        return c;
+    }
+
+    public static double sigmoid(double x) {
+        return 1. / (1. + Math.pow(Math.E, -x));
+    }
+
+    public static double dsigmoid(double x) {
+        return x * (1. - x);
+    }
+
+    public static double tanh(double x) {
+        return Math.tanh(x);
+    }
+
+    public static double dtanh(double x) {
+        return 1. - x * x;
+    }
+
+    public static double ReLU(double x) {
+        if(x > 0) {
+            return x;
+        } else {
+            return 0.;
+        }
+    }
+
+    public static double dReLU(double x) {
+        if(x > 0) {
+            return 1.;
+        } else {
+            return 0.;
+        }
+    }
+}
diff --git a/python/CDBN.py b/python/CDBN.py
index 4ac987a..dbf6648 100755
--- a/python/CDBN.py
+++ b/python/CDBN.py
@@ -1,16 +1,5 @@
-#!/usr/bin/env python
 # -*- coding: utf-8 -*-
 
-'''
- DBN  w/ continuous-valued inputs (Linear Energy)
-
- References :
-   - Y. Bengio, P. Lamblin, D. Popovici, H. Larochelle: Greedy Layer-Wise
-   Training of Deep Networks, Advances in Neural Information Processing
-   Systems 19, 2007
-
-'''
-
 import sys
 import numpy
 from HiddenLayer import HiddenLayer
@@ -20,13 +9,11 @@
 from DBN import DBN
 from utils import *
 
-
-
  
 class CDBN(DBN):
     def __init__(self, input=None, label=None,\
                  n_ins=2, hidden_layer_sizes=[3, 3], n_outs=2,\
-                 numpy_rng=None):
+                 rng=None):
         
         self.x = input
         self.y = label
@@ -35,8 +22,8 @@ def __init__(self, input=None, label=None,\
         self.rbm_layers = []
         self.n_layers = len(hidden_layer_sizes)  # = len(self.rbm_layers)
 
-        if numpy_rng is None:
-            numpy_rng = numpy.random.RandomState(1234)
+        if rng is None:
+            rng = numpy.random.RandomState(1234)
 
         
         assert self.n_layers > 0
@@ -60,7 +47,7 @@ def __init__(self, input=None, label=None,\
             sigmoid_layer = HiddenLayer(input=layer_input,
                                         n_in=input_size,
                                         n_out=hidden_layer_sizes[i],
-                                        numpy_rng=numpy_rng,
+                                        rng=rng,
                                         activation=sigmoid)
             self.sigmoid_layers.append(sigmoid_layer)
 
@@ -113,7 +100,7 @@ def test_cdbn(pretrain_lr=0.1, pretraining_epochs=1000, k=1, \
     rng = numpy.random.RandomState(123)
 
     # construct DBN
-    dbn = CDBN(input=x, label=y, n_ins=6, hidden_layer_sizes=[5, 5], n_outs=2, numpy_rng=rng)
+    dbn = CDBN(input=x, label=y, n_ins=6, hidden_layer_sizes=[5, 5], n_outs=2, rng=rng)
 
     # pre-training (TrainUnsupervisedDBN)
     dbn.pretrain(lr=pretrain_lr, k=1, epochs=pretraining_epochs)
diff --git a/python/CRBM.py b/python/CRBM.py
index 0521883..e870047 100755
--- a/python/CRBM.py
+++ b/python/CRBM.py
@@ -1,16 +1,5 @@
-#!/usr/bin/env python
 # -*- coding: utf-8 -*-
 
-"""
- RBM  w/ continuous-valued inputs (Linear Energy)
-
- References :
-   - Y. Bengio, P. Lamblin, D. Popovici, H. Larochelle: Greedy Layer-Wise
-   Training of Deep Networks, Advances in Neural Information Processing
-   Systems 19, 2007
-
-"""
-
 import sys
 import numpy
 from RBM import RBM
@@ -30,7 +19,7 @@ def sample_v_given_h(self, h0_sample):
         ep = numpy.exp(a_h)
 
         v1_mean = 1 / (1 - en) - 1 / a_h
-        U = numpy.array(self.numpy_rng.uniform(
+        U = numpy.array(self.rng.uniform(
             low=0,
             high=1,
             size=v1_mean.shape))
@@ -53,7 +42,7 @@ def test_crbm(learning_rate=0.1, k=1, training_epochs=1000):
     rng = numpy.random.RandomState(123)
 
     # construct CRBM
-    rbm = CRBM(input=data, n_visible=6, n_hidden=5, numpy_rng=rng)
+    rbm = CRBM(input=data, n_visible=6, n_hidden=5, rng=rng)
 
     # train
     for epoch in xrange(training_epochs):
diff --git a/python/DBN.py b/python/DBN.py
index f639823..b1b351b 100755
--- a/python/DBN.py
+++ b/python/DBN.py
@@ -1,20 +1,5 @@
-#!/usr/bin/env python
 # -*- coding: utf-8 -*-
 
-'''
- Deep Belief Nets (DBN)
-
- References :
-   - Y. Bengio, P. Lamblin, D. Popovici, H. Larochelle: Greedy Layer-Wise
-   Training of Deep Networks, Advances in Neural Information Processing
-   Systems 19, 2007
-
-
-   - DeepLearningTutorials
-   https://github.com/lisa-lab/DeepLearningTutorials
-
-'''
-
 import sys
 import numpy
 from HiddenLayer import HiddenLayer
@@ -26,7 +11,7 @@
 class DBN(object):
     def __init__(self, input=None, label=None,\
                  n_ins=2, hidden_layer_sizes=[3, 3], n_outs=2,\
-                 numpy_rng=None):
+                 rng=None):
         
         self.x = input
         self.y = label
@@ -35,8 +20,8 @@ def __init__(self, input=None, label=None,\
         self.rbm_layers = []
         self.n_layers = len(hidden_layer_sizes)  # = len(self.rbm_layers)
 
-        if numpy_rng is None:
-            numpy_rng = numpy.random.RandomState(1234)
+        if rng is None:
+            rng = numpy.random.RandomState(1234)
 
         
         assert self.n_layers > 0
@@ -60,7 +45,7 @@ def __init__(self, input=None, label=None,\
             sigmoid_layer = HiddenLayer(input=layer_input,
                                         n_in=input_size,
                                         n_out=hidden_layer_sizes[i],
-                                        numpy_rng=numpy_rng,
+                                        rng=rng,
                                         activation=sigmoid)
             self.sigmoid_layers.append(sigmoid_layer)
 
@@ -100,21 +85,6 @@ def pretrain(self, lr=0.1, k=1, epochs=100):
                 # print >> sys.stderr, \
                 #        'Pre-training layer %d, epoch %d, cost ' %(i, epoch), cost
 
-    # def pretrain(self, lr=0.1, k=1, epochs=100):
-    #     # pre-train layer-wise
-    #     for i in xrange(self.n_layers):
-    #         rbm = self.rbm_layers[i]
-            
-    #         for epoch in xrange(epochs):
-    #             layer_input = self.x
-    #             for j in xrange(i):
-    #                 layer_input = self.sigmoid_layers[j].sample_h_given_v(layer_input)
-            
-    #             rbm.contrastive_divergence(lr=lr, k=k, input=layer_input)
-    #             # cost = rbm.get_reconstruction_cross_entropy()
-    #             # print >> sys.stderr, \
-    #             #        'Pre-training layer %d, epoch %d, cost ' %(i, epoch), cost
-
 
     def finetune(self, lr=0.1, epochs=100):
         layer_input = self.sigmoid_layers[-1].sample_h_given_v()
@@ -158,12 +128,11 @@ def test_dbn(pretrain_lr=0.1, pretraining_epochs=1000, k=1, \
                      [0, 1],
                      [0, 1],
                      [0, 1]])
-
     
     rng = numpy.random.RandomState(123)
 
     # construct DBN
-    dbn = DBN(input=x, label=y, n_ins=6, hidden_layer_sizes=[3, 3], n_outs=2, numpy_rng=rng)
+    dbn = DBN(input=x, label=y, n_ins=6, hidden_layer_sizes=[3, 3], n_outs=2, rng=rng)
 
     # pre-training (TrainUnsupervisedDBN)
     dbn.pretrain(lr=pretrain_lr, k=1, epochs=pretraining_epochs)
diff --git a/python/Dropout.py b/python/Dropout.py
new file mode 100755
index 0000000..ba99116
--- /dev/null
+++ b/python/Dropout.py
@@ -0,0 +1,142 @@
+# -*- coding: utf-8 -*-
+
+import sys
+import numpy
+from HiddenLayer import HiddenLayer
+from LogisticRegression import LogisticRegression
+from utils import *
+
+
+class Dropout(object):
+    def __init__(self, input, label,\
+                 n_in, hidden_layer_sizes, n_out,\
+                 rng=None, activation=ReLU):
+
+        self.x = input
+        self.y = label
+
+        self.hidden_layers = []
+        self.n_layers = len(hidden_layer_sizes)
+        
+        if rng is None:
+            rng = numpy.random.RandomState(1234)
+
+        assert self.n_layers > 0
+
+
+        # construct multi-layer 
+        for i in xrange(self.n_layers):
+
+            # layer_size
+            if i == 0:
+                input_size = n_in
+            else:
+                input_size = hidden_layer_sizes[i-1]
+
+            # layer_input
+            if i == 0:
+                layer_input = self.x
+
+            else:
+                layer_input = self.hidden_layers[-1].output()
+
+            # construct hidden_layer
+            hidden_layer = HiddenLayer(input=layer_input,
+                                       n_in=input_size,
+                                       n_out=hidden_layer_sizes[i],
+                                       rng=rng,
+                                       activation=activation)
+            
+            self.hidden_layers.append(hidden_layer)
+
+
+        # layer for ouput using Logistic Regression (softmax)
+        self.log_layer = LogisticRegression(input=self.hidden_layers[-1].output(),
+                                            label=self.y,
+                                            n_in=hidden_layer_sizes[-1],
+                                            n_out=n_out)
+
+
+    def train(self, epochs=5000, dropout=True, p_dropout=0.5, rng=None):
+
+        for epoch in xrange(epochs):
+            dropout_masks = []  # create different masks in each training epoch
+
+            # forward hidden_layers
+            for i in xrange(self.n_layers):
+                if i == 0:
+                    layer_input = self.x
+
+                layer_input = self.hidden_layers[i].forward(input=layer_input)
+
+                if dropout == True:
+                    mask = self.hidden_layers[i].dropout(input=layer_input, p=p_dropout, rng=rng)
+                    layer_input *= mask
+
+                    dropout_masks.append(mask)
+
+
+            # forward & backward log_layer
+            self.log_layer.train(input=layer_input)
+
+
+            # backward hidden_layers
+            for i in reversed(xrange(0, self.n_layers)):
+                if i == self.n_layers-1:
+                    prev_layer = self.log_layer
+                else:
+                    prev_layer = self.hidden_layers[i+1]
+
+                if dropout == True:
+                    self.hidden_layers[i].backward(prev_layer=prev_layer, dropout=True, mask=dropout_masks[i])
+                else:
+                    self.hidden_layers[i].backward(prev_layer=prev_layer)
+                
+
+
+    def predict(self, x, dropout=True, p_dropout=0.5):
+        layer_input = x
+
+        for i in xrange(self.n_layers):
+            if dropout == True:
+                self.hidden_layers[i].W = (1 - p_dropout) * self.hidden_layers[i].W
+            
+            layer_input = self.hidden_layers[i].output(input=layer_input)
+
+        return self.log_layer.predict(layer_input)
+
+
+
+def test_dropout(n_epochs=5000, dropout=True, p_dropout=0.5):
+
+    x = numpy.array([[0,  0],
+                     [0,  1],
+                     [1,  0],
+                     [1,  1]])
+
+    y = numpy.array([[0, 1],
+                     [1, 0],
+                     [1, 0],
+                     [0, 1]])
+
+    rng = numpy.random.RandomState(123)
+
+
+    # construct Dropout MLP
+    classifier = Dropout(input=x, label=y, \
+                         n_in=2, hidden_layer_sizes=[10, 10], n_out=2, \
+                         rng=rng, activation=ReLU)
+
+
+    # train XOR
+    classifier.train(epochs=n_epochs, dropout=dropout, \
+                     p_dropout=p_dropout, rng=rng)
+
+
+    # test
+    print classifier.predict(x)
+
+
+
+if __name__ == "__main__":
+    test_dropout()
diff --git a/python/HiddenLayer.py b/python/HiddenLayer.py
index 72e51e1..a97bc61 100755
--- a/python/HiddenLayer.py
+++ b/python/HiddenLayer.py
@@ -1,15 +1,5 @@
-#!/usr/bin/env python
 # -*- coding: utf-8 -*-
 
-'''
- Hidden Layer
-
- References :
-   - DeepLearningTutorials
-   https://github.com/lisa-lab/DeepLearningTutorials
-
-'''
-
 import sys
 import numpy
 from utils import *
@@ -17,49 +7,89 @@
 
 class HiddenLayer(object):
     def __init__(self, input, n_in, n_out,\
-                 W=None, b=None, numpy_rng=None, activation=numpy.tanh):
+                 W=None, b=None, rng=None, activation=tanh):
         
-        if numpy_rng is None:
-            numpy_rng = numpy.random.RandomState(1234)
+        if rng is None:
+            rng = numpy.random.RandomState(1234)
 
         if W is None:
             a = 1. / n_in
-            initial_W = numpy.array(numpy_rng.uniform(  # initialize W uniformly
+            W = numpy.array(rng.uniform(  # initialize W uniformly
                 low=-a,
                 high=a,
                 size=(n_in, n_out)))
 
-            W = initial_W
-
         if b is None:
             b = numpy.zeros(n_out)  # initialize bias 0
 
+        self.rng = rng
+        self.x = input
 
-        self.numpy_rng = numpy_rng
-        self.input = input
         self.W = W
         self.b = b
 
+        if activation == tanh:
+            self.dactivation = dtanh
+
+        elif activation == sigmoid:
+            self.dactivation = dsigmoid
+
+        elif activation == ReLU:
+            self.dactivation = dReLU
+
+        else:
+            raise ValueError('activation function not supported.')
+
+        
         self.activation = activation
+        
 
-        # self.params = [self.W, self.b]
 
     def output(self, input=None):
         if input is not None:
-            self.input = input
+            self.x = input
         
-        linear_output = numpy.dot(self.input, self.W) + self.b
+        linear_output = numpy.dot(self.x, self.W) + self.b
+        return self.activation(linear_output)
+
 
-        return (linear_output if self.activation is None
-                else self.activation(linear_output))
+    def forward(self, input=None):
+        return self.output(input=input)
 
 
+    def backward(self, prev_layer, lr=0.1, input=None, dropout=False, mask=None):
+        if input is not None:
+            self.x = input
+
+        d_y = self.dactivation(prev_layer.x) * numpy.dot(prev_layer.d_y, prev_layer.W.T)
+
+        if dropout == True:
+            d_y *= mask
+
+        self.W += lr * numpy.dot(self.x.T, d_y)
+        self.b += lr * numpy.mean(d_y, axis=0)
+        self.d_y = d_y
+
+
+    def dropout(self, input, p, rng=None):
+        if rng is None:
+            rng = numpy.random.RandomState(123)
+
+        mask = rng.binomial(size=input.shape,
+                            n=1,
+                            p=1-p)  # p is the prob of dropping
+
+        return mask
+                     
+
     def sample_h_given_v(self, input=None):
         if input is not None:
-            self.input = input
+            self.x = input
 
         v_mean = self.output()
-        h_sample = self.numpy_rng.binomial(size=v_mean.shape,
+        h_sample = self.rng.binomial(size=v_mean.shape,
                                            n=1,
                                            p=v_mean)
         return h_sample
+
+
diff --git a/python/LogisticRegression.py b/python/LogisticRegression.py
index a828a40..708a1b3 100755
--- a/python/LogisticRegression.py
+++ b/python/LogisticRegression.py
@@ -1,18 +1,5 @@
-#!/usr/bin/env python
 # -*- coding: utf-8 -*-
 
-'''
- Logistic Regression
- 
- References :
-   - Jason Rennie: Logistic Regression,
-   http://qwone.com/~jason/writing/lr.pdf
- 
-   - DeepLearningTutorials
-   https://github.com/lisa-lab/DeepLearningTutorials
-
-'''
-
 import sys
 import numpy
 from utils import *
@@ -22,24 +9,46 @@ class LogisticRegression(object):
     def __init__(self, input, label, n_in, n_out):
         self.x = input
         self.y = label
+
         self.W = numpy.zeros((n_in, n_out))  # initialize W 0
-        self.b = numpy.zeros(n_out)          # initialize bias 0
+        self.b = numpy.zeros(n_out)  # initialize bias 0
 
-        # self.params = [self.W, self.b]
 
-    def train(self, lr=0.1, input=None, L2_reg=0.00):
+    def train(self, lr=0.1, input=None, L2_reg=0.00):        
         if input is not None:
             self.x = input
 
-        # p_y_given_x = sigmoid(numpy.dot(self.x, self.W) + self.b)
-        p_y_given_x = softmax(numpy.dot(self.x, self.W) + self.b)
+        p_y_given_x = self.output(self.x)
         d_y = self.y - p_y_given_x
-        
+
         self.W += lr * numpy.dot(self.x.T, d_y) - lr * L2_reg * self.W
         self.b += lr * numpy.mean(d_y, axis=0)
+        self.d_y = d_y
         
-        # cost = self.negative_log_likelihood()
-        # return cost
+
+    # def train(self, lr=0.1, input=None, L2_reg=0.00):
+    #     self.forward(input)
+    #     self.backward(lr, L2_reg)
+
+    # def forward(self, input=None):
+    #     if input is not None:
+    #         self.x = input
+
+    #     p_y_given_x = self.output(self.x)
+    #     self.d_y = self.y - p_y_given_x
+        
+    # def backward(self, lr=0.1, L2_reg=0.00):
+    #     self.W += lr * numpy.dot(self.x.T, self.d_y) - lr * L2_reg * self.W
+    #     self.b += lr * numpy.mean(self.d_y, axis=0)
+
+
+    def output(self, x):
+        # return sigmoid(numpy.dot(x, self.W) + self.b)
+        return softmax(numpy.dot(x, self.W) + self.b)
+
+    def predict(self, x):
+        return self.output(x)
+
 
     def negative_log_likelihood(self):
         # sigmoid_activation = sigmoid(numpy.dot(self.x, self.W) + self.b)
@@ -53,44 +62,41 @@ def negative_log_likelihood(self):
         return cross_entropy
 
 
-    def predict(self, x):
-        # return sigmoid(numpy.dot(x, self.W) + self.b)
-        return softmax(numpy.dot(x, self.W) + self.b)
+def test_lr(learning_rate=0.1, n_epochs=500):
 
+    rng = numpy.random.RandomState(123)
 
-def test_lr(learning_rate=0.01, n_epochs=200):
     # training data
-    x = numpy.array([[1,1,1,0,0,0],
-                     [1,0,1,0,0,0],
-                     [1,1,1,0,0,0],
-                     [0,0,1,1,1,0],
-                     [0,0,1,1,0,0],
-                     [0,0,1,1,1,0]])
-    y = numpy.array([[1, 0],
-                     [1, 0],
-                     [1, 0],
-                     [0, 1],
-                     [0, 1],
-                     [0, 1]])
+    d = 2
+    N = 10
+    x1 = rng.randn(N, d) + numpy.array([0, 0])
+    x2 = rng.randn(N, d) + numpy.array([20, 10])
+    y1 = [[1, 0] for i in xrange(N)]
+    y2 = [[0, 1] for i in xrange(N)]
+
+    x = numpy.r_[x1.astype(int), x2.astype(int)]
+    y = numpy.r_[y1, y2]
 
 
     # construct LogisticRegression
-    classifier = LogisticRegression(input=x, label=y, n_in=6, n_out=2)
+    classifier = LogisticRegression(input=x, label=y, n_in=d, n_out=2)
 
     # train
     for epoch in xrange(n_epochs):
         classifier.train(lr=learning_rate)
         # cost = classifier.negative_log_likelihood()
         # print >> sys.stderr, 'Training epoch %d, cost is ' % epoch, cost
-        learning_rate *= 0.95
+        learning_rate *= 0.995
 
 
     # test
-    x = numpy.array([[1, 1, 0, 0, 0, 0],
-                     [0, 0, 0, 1, 1, 0],
-                     [1, 1, 1, 1, 1, 0]])
+    result = classifier.predict(x)
+    for i in xrange(N):
+        print result[i]
+    print
+    for i in xrange(N):
+        print result[N+i]
 
-    print >> sys.stderr, classifier.predict(x)
 
 
 if __name__ == "__main__":
diff --git a/python/MLP.py b/python/MLP.py
new file mode 100755
index 0000000..e9ded0b
--- /dev/null
+++ b/python/MLP.py
@@ -0,0 +1,81 @@
+# -*- coding: utf-8 -*-
+
+import sys
+import numpy
+from HiddenLayer import HiddenLayer
+from LogisticRegression import LogisticRegression
+from utils import *
+
+
+class MLP(object):
+    def __init__(self, input, label, n_in, n_hidden, n_out, rng=None):
+
+        self.x = input
+        self.y = label
+
+        if rng is None:
+            rng = numpy.random.RandomState(1234)
+
+        # construct hidden_layer
+        self.hidden_layer = HiddenLayer(input=self.x,
+                                        n_in=n_in,
+                                        n_out=n_hidden,
+                                        rng=rng,
+                                        activation=tanh)
+
+        # construct log_layer
+        self.log_layer = LogisticRegression(input=self.hidden_layer.output,
+                                            label=self.y,
+                                            n_in=n_hidden,
+                                            n_out=n_out)
+
+    def train(self):
+        # forward hidden_layer
+        layer_input = self.hidden_layer.forward()
+
+        # forward & backward log_layer
+        # self.log_layer.forward(input=layer_input)
+        self.log_layer.train(input=layer_input)
+
+        # backward hidden_layer
+        self.hidden_layer.backward(prev_layer=self.log_layer)
+
+        # backward log_layer
+        # self.log_layer.backward()
+
+
+    def predict(self, x):
+        x = self.hidden_layer.output(input=x)
+        return self.log_layer.predict(x)
+
+
+def test_mlp(n_epochs=5000):
+
+    x = numpy.array([[0,  0],
+                     [0,  1],
+                     [1,  0],
+                     [1,  1]])
+
+    y = numpy.array([[0, 1],
+                     [1, 0],
+                     [1, 0],
+                     [0, 1]])
+
+
+    rng = numpy.random.RandomState(123)
+
+
+    # construct MLP
+    classifier = MLP(input=x, label=y, n_in=2, n_hidden=3, n_out=2, rng=rng)
+
+    # train
+    for epoch in xrange(n_epochs):
+        classifier.train()
+
+
+    # test
+    print classifier.predict(x)
+        
+
+if __name__ == "__main__":
+    test_mlp()
diff --git a/python/RBM.py b/python/RBM.py
index 781241d..7a127d8 100755
--- a/python/RBM.py
+++ b/python/RBM.py
@@ -1,38 +1,23 @@
-#!/usr/bin/env python
 # -*- coding: utf-8 -*-
 
-"""
- Restricted Boltzmann Machine (RBM)
-
- References :
-   - Y. Bengio, P. Lamblin, D. Popovici, H. Larochelle: Greedy Layer-Wise
-   Training of Deep Networks, Advances in Neural Information Processing
-   Systems 19, 2007
-
-
-   - DeepLearningTutorials
-   https://github.com/lisa-lab/DeepLearningTutorials
-
-"""
-
 import sys
 import numpy
 from utils import *
 
 class RBM(object):
     def __init__(self, input=None, n_visible=2, n_hidden=3, \
-        W=None, hbias=None, vbias=None, numpy_rng=None):
+        W=None, hbias=None, vbias=None, rng=None):
         
         self.n_visible = n_visible  # num of units in visible (input) layer
         self.n_hidden = n_hidden    # num of units in hidden layer
 
-        if numpy_rng is None:
-            numpy_rng = numpy.random.RandomState(1234)
+        if rng is None:
+            rng = numpy.random.RandomState(1234)
 
 
         if W is None:
             a = 1. / n_visible
-            initial_W = numpy.array(numpy_rng.uniform(  # initialize W uniformly
+            initial_W = numpy.array(rng.uniform(  # initialize W uniformly
                 low=-a,
                 high=a,
                 size=(n_visible, n_hidden)))
@@ -46,14 +31,12 @@ def __init__(self, input=None, n_visible=2, n_hidden=3, \
             vbias = numpy.zeros(n_visible)  # initialize v bias 0
 
 
-        self.numpy_rng = numpy_rng
+        self.rng = rng
         self.input = input
         self.W = W
         self.hbias = hbias
         self.vbias = vbias
 
-        # self.params = [self.W, self.hbias, self.vbias]
-
 
     def contrastive_divergence(self, lr=0.1, k=1, input=None):
         if input is not None:
@@ -75,10 +58,10 @@ def contrastive_divergence(self, lr=0.1, k=1, input=None):
         # chain_end = nv_samples
 
 
-        self.W += lr * (numpy.dot(self.input.T, ph_sample)
+        self.W += lr * (numpy.dot(self.input.T, ph_mean)
                         - numpy.dot(nv_samples.T, nh_means))
         self.vbias += lr * numpy.mean(self.input - nv_samples, axis=0)
-        self.hbias += lr * numpy.mean(ph_sample - nh_means, axis=0)
+        self.hbias += lr * numpy.mean(ph_mean - nh_means, axis=0)
 
         # cost = self.get_reconstruction_cross_entropy()
         # return cost
@@ -86,7 +69,7 @@ def contrastive_divergence(self, lr=0.1, k=1, input=None):
 
     def sample_h_given_v(self, v0_sample):
         h1_mean = self.propup(v0_sample)
-        h1_sample = self.numpy_rng.binomial(size=h1_mean.shape,   # discrete: binomial
+        h1_sample = self.rng.binomial(size=h1_mean.shape,   # discrete: binomial
                                        n=1,
                                        p=h1_mean)
 
@@ -95,7 +78,7 @@ def sample_h_given_v(self, v0_sample):
 
     def sample_v_given_h(self, h0_sample):
         v1_mean = self.propdown(h0_sample)
-        v1_sample = self.numpy_rng.binomial(size=v1_mean.shape,   # discrete: binomial
+        v1_sample = self.rng.binomial(size=v1_mean.shape,   # discrete: binomial
                                             n=1,
                                             p=v1_mean)
         
@@ -153,7 +136,7 @@ def test_rbm(learning_rate=0.1, k=1, training_epochs=1000):
     rng = numpy.random.RandomState(123)
 
     # construct RBM
-    rbm = RBM(input=data, n_visible=6, n_hidden=2, numpy_rng=rng)
+    rbm = RBM(input=data, n_visible=6, n_hidden=2, rng=rng)
 
     # train
     for epoch in xrange(training_epochs):
diff --git a/python/SdA.py b/python/SdA.py
index 4a0f45a..5f8de37 100755
--- a/python/SdA.py
+++ b/python/SdA.py
@@ -1,20 +1,5 @@
-#!/usr/bin/env python
 # -*- coding: utf-8 -*-
 
-
-"""
- Stacked Denoising Autoencoders (SdA)
-
- References :
-   - P. Vincent, H. Larochelle, Y. Bengio, P.A. Manzagol: Extracting and
-   Composing Robust Features with Denoising Autoencoders, ICML' 08, 1096-1103,
-   2008
-
-   - DeepLearningTutorials
-   https://github.com/lisa-lab/DeepLearningTutorials
-   
-"""
-
 import sys
 import numpy
 from HiddenLayer import HiddenLayer
@@ -26,7 +11,7 @@
 class SdA(object):
     def __init__(self, input=None, label=None,\
                  n_ins=2, hidden_layer_sizes=[3, 3], n_outs=2,\
-                 numpy_rng=None):
+                 rng=None):
         
         self.x = input
         self.y = label
@@ -35,8 +20,8 @@ def __init__(self, input=None, label=None,\
         self.dA_layers = []
         self.n_layers = len(hidden_layer_sizes)  # = len(self.rbm_layers)
 
-        if numpy_rng is None:
-            numpy_rng = numpy.random.RandomState(1234)
+        if rng is None:
+            rng = numpy.random.RandomState(1234)
 
         
         assert self.n_layers > 0
@@ -60,7 +45,7 @@ def __init__(self, input=None, label=None,\
             sigmoid_layer = HiddenLayer(input=layer_input,
                                         n_in=input_size,
                                         n_out=hidden_layer_sizes[i],
-                                        numpy_rng=numpy_rng,
+                                        rng=rng,
                                         activation=sigmoid)
             self.sigmoid_layers.append(sigmoid_layer)
 
@@ -118,8 +103,8 @@ def predict(self, x):
             sigmoid_layer = self.sigmoid_layers[i]
             layer_input = sigmoid_layer.output(input=layer_input)
 
-        out = self.log_layer.predict(layer_input)
-        return out
+        return self.log_layer.predict(layer_input)
+
 
 
 
@@ -152,7 +137,7 @@ def test_SdA(pretrain_lr=0.1, pretraining_epochs=1000, corruption_level=0.3, \
 
     # construct SdA
     sda = SdA(input=x, label=y, \
-              n_ins=20, hidden_layer_sizes=[15, 15], n_outs=2, numpy_rng=rng)
+              n_ins=20, hidden_layer_sizes=[15, 15], n_outs=2, rng=rng)
 
     # pre-training
     sda.pretrain(lr=pretrain_lr, corruption_level=corruption_level, epochs=pretraining_epochs)
diff --git a/python/dA.py b/python/dA.py
index 0b911eb..edbf6c7 100755
--- a/python/dA.py
+++ b/python/dA.py
@@ -1,23 +1,5 @@
-#!/usr/bin/env python
 # -*- coding: utf-8 -*-
 
-"""
- Denoising Autoencoders (dA)
-
- References :
-   - P. Vincent, H. Larochelle, Y. Bengio, P.A. Manzagol: Extracting and
-   Composing Robust Features with Denoising Autoencoders, ICML'08, 1096-1103,
-   2008
-
-   - DeepLearningTutorials
-   https://github.com/lisa-lab/DeepLearningTutorials
-
-   - Yusuke Sugomori: Stochastic Gradient Descent for Denoising Autoencoders,
-   http://yusugomori.com/docs/SGD_DA.pdf
-
-"""
-
-
 import sys
 import numpy
 from utils import *
@@ -25,44 +7,39 @@
 
 class dA(object):
     def __init__(self, input=None, n_visible=2, n_hidden=3, \
-        W=None, hbias=None, vbias=None, numpy_rng=None):
+        W=None, hbias=None, vbias=None, rng=None):
 
         self.n_visible = n_visible  # num of units in visible (input) layer
         self.n_hidden = n_hidden    # num of units in hidden layer
 
-        if numpy_rng is None:
-            numpy_rng = numpy.random.RandomState(1234)
+        if rng is None:
+            rng = numpy.random.RandomState(1234)
             
         if W is None:
             a = 1. / n_visible
-            initial_W = numpy.array(numpy_rng.uniform(  # initialize W uniformly
+            W = numpy.array(rng.uniform(  # initialize W uniformly
                 low=-a,
                 high=a,
                 size=(n_visible, n_hidden)))
 
-            W = initial_W
-
         if hbias is None:
             hbias = numpy.zeros(n_hidden)  # initialize h bias 0
 
         if vbias is None:
             vbias = numpy.zeros(n_visible)  # initialize v bias 0
 
-        self.numpy_rng = numpy_rng
+        self.rng = rng
         self.x = input
         self.W = W
         self.W_prime = self.W.T
         self.hbias = hbias
         self.vbias = vbias
 
-        # self.params = [self.W, self.hbias, self.vbias]
-
-
         
     def get_corrupted_input(self, input, corruption_level):
         assert corruption_level < 1
 
-        return self.numpy_rng.binomial(size=input.shape,
+        return self.rng.binomial(size=input.shape,
                                        n=1,
                                        p=1-corruption_level) * input
 
@@ -133,7 +110,7 @@ def test_dA(learning_rate=0.1, corruption_level=0.3, training_epochs=50):
     rng = numpy.random.RandomState(123)
 
     # construct dA
-    da = dA(input=data, n_visible=20, n_hidden=5, numpy_rng=rng)
+    da = dA(input=data, n_visible=20, n_hidden=5, rng=rng)
 
     # train
     for epoch in xrange(training_epochs):
diff --git a/python/utils.py b/python/utils.py
index 5c4a748..7aca40d 100755
--- a/python/utils.py
+++ b/python/utils.py
@@ -1,7 +1,5 @@
-''' '''
-import numpy
-
 
+import numpy
 numpy.seterr(all='ignore')
 
 
@@ -9,6 +7,15 @@ def sigmoid(x):
     return 1. / (1 + numpy.exp(-x))
 
 
+def dsigmoid(x):
+    return x * (1. - x)
+
+def tanh(x):
+    return numpy.tanh(x)
+
+def dtanh(x):
+    return 1. - x * x
+
 def softmax(x):
     e = numpy.exp(x - numpy.max(x))  # prevent overflow
     if e.ndim == 1:
@@ -17,6 +24,13 @@ def softmax(x):
         return e / numpy.array([numpy.sum(e, axis=1)]).T  # ndim = 2
 
 
+def ReLU(x):
+    return x * (x > 0)
+
+def dReLU(x):
+    return 1. * (x > 0)
+
+
 # # probability density for the Gaussian dist
 # def gaussian(x, mean=0.0, scale=1.0):
 #     s = 2 * numpy.power(scale, 2)
diff --git a/scala/DBN.scala b/scala/DBN.scala
new file mode 100644
index 0000000..1b8df68
--- /dev/null
+++ b/scala/DBN.scala
@@ -0,0 +1,231 @@
+import scala.util.Random
+import scala.math
+
+class DBN(val N: Int, val n_ins: Int, hidden_layer_sizes: Array[Int], val n_outs: Int, val n_layers: Int, var rng: Random=null) {
+
+  def sigmoid(x: Double): Double = {
+    return 1.0 / (1.0 + math.pow(math.E, -x))
+  }
+
+
+  var input_size: Int = 0
+  
+  val sigmoid_layers: Array[HiddenLayer] = new Array[HiddenLayer](n_layers)
+  val rbm_layers: Array[RBM] = new Array[RBM](n_layers)
+
+  if(rng == null) rng = new Random(1234)
+
+  var i: Int = 0
+  // construct multi-layer
+  for(i <- 0 until n_layers) {
+    if(i == 0) {
+      input_size = n_ins
+    } else {
+      input_size = hidden_layer_sizes(i-1)
+    }
+
+    // construct sigmoid_layer
+    sigmoid_layers(i) = new HiddenLayer(N, input_size, hidden_layer_sizes(i), null, null, rng)
+
+    // construct rbm_layer
+    rbm_layers(i) = new RBM(N, input_size, hidden_layer_sizes(i), sigmoid_layers(i).W, sigmoid_layers(i).b, null, rng)
+
+  }
+
+  // layer for output using LogisticRegression
+  val log_layer: LogisticRegression = new LogisticRegression(N, hidden_layer_sizes(n_layers-1), n_outs)
+
+
+  def pretrain(train_X: Array[Array[Int]], lr: Double, k: Int, epochs: Int) {
+    var layer_input: Array[Int] = new Array[Int](0)
+    var prev_layer_input_size: Int = 0
+    var prev_layer_input: Array[Int] = new Array[Int](0)
+    
+    var i: Int = 0
+    var j: Int = 0
+    var epoch: Int = 0
+    var n: Int = 0
+    var l: Int = 0
+
+    for(i <- 0 until n_layers) {  // layer-wise
+      for(epoch <- 0 until epochs) {  // training epochs
+        for(n <- 0 until N) {  // input x1...xN
+          // layer input
+          for(l <- 0 to i) {
+            if(l == 0) {
+              layer_input = new Array[Int](n_ins)
+              for(j <- 0 until n_ins) layer_input(j) = train_X(n)(j)
+
+            } else {
+              if(l == 1) prev_layer_input_size = n_ins
+              else prev_layer_input_size = hidden_layer_sizes(l-2)
+
+              prev_layer_input = new Array[Int](prev_layer_input_size)
+              for(j <- 0 until prev_layer_input_size) prev_layer_input(j) = layer_input(j)
+
+              layer_input = new Array[Int](hidden_layer_sizes(l-1))
+              sigmoid_layers(l-1).sample_h_given_v(prev_layer_input, layer_input)
+            }
+          }
+
+          rbm_layers(i).contrastive_divergence(layer_input, lr, k)
+        }
+      }
+    }
+  }
+
+
+  def finetune(train_X: Array[Array[Int]], train_Y: Array[Array[Int]], lr: Double, epochs: Int) {
+    var layer_input: Array[Int] = new Array[Int](0)
+    var prev_layer_input: Array[Int] = new Array[Int](0)
+
+    var epoch: Int = 0
+    var n: Int = 0
+    var i: Int = 0
+    var j: Int = 0
+
+    for(epoch <- 0 until epochs) {
+      for(n <- 0 until N) {
+        
+        // layer input
+        for(i <- 0 until n_layers) {
+          if(i == 0) {
+            prev_layer_input = new Array[Int](n_ins)
+            for(j <- 0 until n_ins) prev_layer_input(j) = train_X(n)(j)
+          } else {
+            prev_layer_input = new Array[Int](hidden_layer_sizes(i-1))
+            for(j <- 0 until hidden_layer_sizes(i-1)) prev_layer_input(j) = layer_input(j)
+          }
+
+          layer_input = new Array[Int](hidden_layer_sizes(i))
+          sigmoid_layers(i).sample_h_given_v(prev_layer_input, layer_input)
+        }
+
+        log_layer.train(layer_input, train_Y(n), lr)
+      }
+      // lr *= 0.95
+    }
+  }
+
+  def predict(x: Array[Int], y: Array[Double]) {
+    var layer_input: Array[Double] = new Array[Double](0)
+    var prev_layer_input: Array[Double] = new Array[Double](n_ins)
+
+    var i: Int = 0
+    var j: Int = 0
+    var k: Int = 0
+
+    for(j <- 0 until n_ins) prev_layer_input(j) = x(j)
+    
+    var linear_outoput: Double = 0
+
+    // layer activation
+    for(i <- 0 until n_layers) {
+      layer_input = new Array[Double](sigmoid_layers(i).n_out)
+
+      for(k <- 0 until sigmoid_layers(i).n_out) {
+        linear_outoput = 0.0
+
+        for(j <- 0 until sigmoid_layers(i).n_in) {
+          linear_outoput += sigmoid_layers(i).W(k)(j) * prev_layer_input(j)
+        }
+        linear_outoput += sigmoid_layers(i).b(k)
+        layer_input(k) = sigmoid(linear_outoput)
+      }
+      
+      if(i < n_layers-1) {
+        prev_layer_input = new Array[Double](sigmoid_layers(i).n_out)
+        for(j <- 0 until sigmoid_layers(i).n_out) prev_layer_input(j) = layer_input(j)
+      }
+    }
+
+    for(i <- 0 until log_layer.n_out) {
+      y(i) = 0
+      for(j <- 0 until log_layer.n_in) {
+        y(i) += log_layer.W(i)(j) * layer_input(j)
+      }
+      y(i) += log_layer.b(i)
+    }
+
+    log_layer.softmax(y)
+  }
+
+}
+
+
+object DBN {
+  def test_dbn() {
+    val rng: Random = new Random(123)
+
+    val pretrain_lr: Double = 0.1
+    val pretraining_epochs: Int = 1000
+    val k: Int = 1
+    val finetune_lr: Double = 0.1
+    val finetune_epochs: Int = 500
+    
+    val train_N: Int = 6
+    val test_N: Int = 4
+    val n_ins: Int = 6
+    val n_outs: Int = 2
+    val hidden_layer_sizes: Array[Int] = Array(3, 3)
+    val n_layers = hidden_layer_sizes.length
+
+
+    // training data
+    val train_X: Array[Array[Int]] = Array(
+			Array(1, 1, 1, 0, 0, 0),
+			Array(1, 0, 1, 0, 0, 0),
+		  Array(1, 1, 1, 0, 0, 0),
+			Array(0, 0, 1, 1, 1, 0),
+			Array(0, 0, 1, 1, 0, 0),
+			Array(0, 0, 1, 1, 1, 0)
+    )
+
+    val train_Y: Array[Array[Int]] = Array(
+			Array(1, 0),
+			Array(1, 0),
+			Array(1, 0),
+			Array(0, 1),
+			Array(0, 1),
+			Array(0, 1)
+    )
+
+    // construct DBN
+    val dbn: DBN = new DBN(train_N, n_ins, hidden_layer_sizes, n_outs, n_layers, rng)
+
+		// pretrain
+		dbn.pretrain(train_X, pretrain_lr, k, pretraining_epochs);
+		
+		// finetune
+		dbn.finetune(train_X, train_Y, finetune_lr, finetune_epochs);
+		
+		
+		// test data
+		val test_X: Array[Array[Int]] = Array(
+			Array(1, 1, 0, 0, 0, 0),
+			Array(1, 1, 1, 1, 0, 0),
+			Array(0, 0, 0, 1, 1, 0),
+			Array(0, 0, 1, 1, 1, 0)
+		)
+
+    val test_Y: Array[Array[Double]] = Array.ofDim[Double](test_N, n_outs)
+
+    var i: Int = 0
+    var j: Int = 0
+
+    // test
+    for(i <- 0 until test_N) {
+      dbn.predict(test_X(i), test_Y(i))
+      for(j <- 0 until n_outs) {
+        print(test_Y(i)(j) + " ")
+      }
+      println()
+    }
+    
+  }
+  
+
+  def main(args: Array[String]) {
+    test_dbn()
+  }
+}
diff --git a/scala/HiddenLayer.scala b/scala/HiddenLayer.scala
new file mode 100644
index 0000000..118bc5e
--- /dev/null
+++ b/scala/HiddenLayer.scala
@@ -0,0 +1,73 @@
+import scala.util.Random
+import scala.math
+
+class HiddenLayer(val N: Int, val n_in: Int, val n_out: Int, _W: Array[Array[Double]], _b: Array[Double], var rng: Random=null) {
+
+
+  def uniform(min: Double, max: Double): Double = {
+    return rng.nextDouble() * (max - min) + min
+  }
+
+  def binomial(n: Int, p: Double): Int = {
+    if(p < 0 || p > 1) return 0
+
+    var c: Int = 0
+    var r: Double = 0.0
+
+    var i: Int = 0
+
+    for(i <- 0 until n) {
+      r = rng.nextDouble()
+      if(r < p) c += 1
+    }
+
+    return c
+  }
+
+  def sigmoid(x: Double): Double = {
+    return 1.0 / (1.0 + math.pow(math.E, -x))
+  }
+
+
+  if(rng == null) rng = new Random(1234)
+
+  var a: Double = 0.0
+  var W: Array[Array[Double]] = Array.ofDim[Double](n_out, n_in)
+  var b: Array[Double] = new Array[Double](n_out)
+
+  var i: Int = 0
+  if(_W == null) {
+    a = 1.0 / n_in
+
+    for(i <- 0 until n_out) {
+      for(j <- 0 until n_in) {
+        W(i)(j) = uniform(-a, a)
+      }
+    }
+  } else {
+    W = _W
+  }
+
+  if(_b != null) b = _b
+
+
+  def output(input: Array[Int], w: Array[Double], b: Double): Double = {
+    var linear_output: Double = 0.0
+
+    var j: Int = 0
+    for(j <- 0 until n_in) {
+      linear_output += w(j) * input(j)
+    }
+    linear_output += b
+
+    return sigmoid(linear_output)
+  }
+
+  def sample_h_given_v(input: Array[Int], sample: Array[Int]) {
+    var i: Int = 0
+    
+    for(i <- 0 until n_out) {
+      sample(i) = binomial(1, output(input, W(i), b(i)))
+    }
+  }
+}
diff --git a/scala/LogisticRegression.scala b/scala/LogisticRegression.scala
new file mode 100644
index 0000000..bf9b229
--- /dev/null
+++ b/scala/LogisticRegression.scala
@@ -0,0 +1,133 @@
+// $ scalac LogisticRegression.scala
+// $ scala LogisticRegression
+
+import scala.math
+
+class LogisticRegression(val N: Int, val n_in: Int, val n_out: Int) {
+
+  val W: Array[Array[Double]] = Array.ofDim[Double](n_out, n_in)
+  val b: Array[Double] = new Array[Double](n_out)
+
+  def train(x: Array[Int], y: Array[Int], lr: Double) {
+    val p_y_given_x: Array[Double] = new Array[Double](n_out)
+    val dy: Array[Double] = new Array[Double](n_out)
+
+    var i: Int = 0
+    var j: Int = 0
+    for(i <- 0 until n_out) {
+      p_y_given_x(i) = 0
+      for(j <- 0 until n_in) {
+        p_y_given_x(i) += W(i)(j) * x(j)
+      }
+      p_y_given_x(i) += b(i)
+    }
+    softmax(p_y_given_x)
+
+    for(i <- 0 until n_out) {
+      dy(i) = y(i) - p_y_given_x(i)
+
+      for(j <- 0 until n_in) {
+        W(i)(j) += lr * dy(i) * x(j) / N
+      }
+      b(i) += lr * dy(i) / N
+    }
+  }
+
+
+  def softmax(x: Array[Double]) {
+    var max: Double = 0.0
+    var sum: Double = 0.0
+
+    var i: Int = 0
+    for(i <- 0 until n_out) if(max < x(i)) max = x(i)
+
+    for(i <- 0 until n_out) {
+      x(i) = math.exp(x(i) - max)
+      sum += x(i)
+    }
+
+    for(i <- 0 until n_out) x(i) /= sum
+  }
+
+
+  def predict(x: Array[Int], y: Array[Double]) {
+    var i: Int = 0
+    var j: Int = 0
+    for(i <- 0 until n_out) {
+      y(i) = 0
+      for(j <- 0 until n_in) {
+        y(i) += W(i)(j) * x(j)
+      }
+      y(i) += b(i)
+    }
+    softmax(y)
+  }
+
+}
+
+
+object LogisticRegression {
+  def test_lr() {
+    val learning_rate: Double = 0.1
+    val n_epochs: Int = 500
+
+    val train_N: Int = 6
+    val test_N: Int = 2
+    val n_in: Int = 6
+    val n_out: Int = 2
+
+    val train_X: Array[Array[Int]] = Array(
+      Array(1, 1, 1, 0, 0, 0),
+      Array(1, 0, 1, 0, 0, 0),
+      Array(1, 1, 1, 0, 0, 0),
+      Array(0, 0, 1, 1, 1, 0),
+      Array(0, 0, 1, 0, 1, 0),
+      Array(0, 0, 1, 1, 1, 0)
+    )
+
+    val train_Y: Array[Array[Int]] = Array(
+      Array(1, 0),
+      Array(1, 0),
+      Array(1, 0),
+      Array(0, 1),
+      Array(0, 1),
+      Array(0, 1)
+    )
+
+    // construct
+    val classifier = new LogisticRegression(train_N, n_in, n_out)
+
+    // train
+    var epoch: Int = 0
+    var i: Int = 0
+    for(epoch <- 0 until n_epochs) {
+      for(i <- 0 until train_N) {
+        classifier.train(train_X(i), train_Y(i), learning_rate)
+      }
+      // learning_rate *= 0.95
+    }
+
+    // test data
+    val test_X: Array[Array[Int]] = Array(
+      Array(1, 0, 1, 0, 0, 0),
+      Array(0, 0, 1, 1, 1, 0)
+    )
+
+    val test_Y: Array[Array[Double]] = Array.ofDim[Double](test_N, n_out)
+
+    // test
+    var j: Int = 0
+    for(i <- 0 until test_N) {
+      classifier.predict(test_X(i), test_Y(i))
+      for(j <- 0 until n_out) {
+        printf("%.5f ", test_Y(i)(j))
+      }
+      println()
+    }
+  }
+
+  def main(args: Array[String]) {
+    test_lr()
+  }
+
+}
diff --git a/scala/RBM.scala b/scala/RBM.scala
index 79ae124..ad760fb 100644
--- a/scala/RBM.scala
+++ b/scala/RBM.scala
@@ -86,7 +86,8 @@ class RBM(val N: Int, val n_visible: Int, val n_hidden: Int,
     var j: Int = 0
     for(i <- 0 until n_hidden) {
       for(j <- 0 until n_visible) {
-        W(i)(j) += lr * (ph_sample(i) * input(j) - nh_means(i) * nv_samples(j)) / N
+        // W(i)(j) += lr * (ph_sample(i) * input(j) - nh_means(i) * nv_samples(j)) / N
+        W(i)(j) += lr * (ph_mean(i) * input(j) - nh_means(i) * nv_samples(j)) / N
       }
       hbias(i) += lr * (ph_sample(i) - nh_means(i)) / N
     }
diff --git a/scala/SdA.scala b/scala/SdA.scala
new file mode 100644
index 0000000..3f897e5
--- /dev/null
+++ b/scala/SdA.scala
@@ -0,0 +1,236 @@
+import scala.util.Random
+import scala.math
+
+class SdA(val N: Int, val n_ins: Int, hidden_layer_sizes: Array[Int], val n_outs: Int, val n_layers:Int, var rng: Random=null) {
+
+  def sigmoid(x: Double): Double = {
+    return 1.0 / (1.0 + math.pow(math.E, -x))
+  }
+
+  var input_size: Int = 0
+
+  // var hidden_layer_sizes: Array[Int] = new Array[Int](n_layers)
+  var sigmoid_layers: Array[HiddenLayer] = new Array[HiddenLayer](n_layers)
+  var dA_layers: Array[dA] = new Array[dA](n_layers)
+
+  if(rng == null) rng = new Random(1234)
+
+
+  var i: Int = 0
+
+  // construct multi-layer
+  for(i <- 0 until n_layers) {
+    if(i == 0) {
+      input_size = n_ins
+    } else {
+      input_size = hidden_layer_sizes(i-1)
+    }
+    
+    // construct sigmoid_layer
+    sigmoid_layers(i) = new HiddenLayer(N, input_size, hidden_layer_sizes(i), null, null, rng)
+
+    // construct dA_layer
+    dA_layers(i) = new dA(N, input_size, hidden_layer_sizes(i), sigmoid_layers(i).W, sigmoid_layers(i).b, null, rng)
+  }
+
+  // layer for output using LogisticRegression
+  val log_layer = new LogisticRegression(N, hidden_layer_sizes(n_layers-1), n_outs)
+
+
+  def pretrain(train_X: Array[Array[Int]], lr: Double, corruption_level: Double, epochs: Int) {
+    var layer_input: Array[Int] = new Array[Int](0)
+    var prev_layer_input_size: Int = 0
+    var prev_layer_input: Array[Int] = new Array[Int](0)
+
+    var i: Int = 0
+    var j: Int = 0
+    var epoch: Int = 0
+    var n: Int = 0
+    var l: Int = 0
+
+    for(i <- 0 until n_layers) {  // layer-wise
+      for(epoch <- 0 until epochs) {  // training epochs
+        for(n <- 0 until N) {  // input x1...xN
+          // layer input
+          for(l <- 0 to i) {
+            if(l == 0) {
+              layer_input = new Array[Int](n_ins)
+              for(j <- 0 until n_ins) layer_input(j) = train_X(n)(j)
+            } else {
+              if(l == 1) prev_layer_input_size = n_ins
+              else prev_layer_input_size = hidden_layer_sizes(l-2)
+
+              prev_layer_input = new Array[Int](prev_layer_input_size)
+              for(j <- 0 until prev_layer_input_size) prev_layer_input(j) = layer_input(j)
+
+              layer_input = new Array[Int](hidden_layer_sizes(l-1))
+              
+              sigmoid_layers(l-1).sample_h_given_v(prev_layer_input, layer_input)
+            }
+          }
+
+          dA_layers(i).train(layer_input, lr, corruption_level)
+        }
+      }
+    }
+    
+  }
+
+
+  def finetune(train_X: Array[Array[Int]], train_Y: Array[Array[Int]], lr: Double, epochs: Int) {
+    var layer_input: Array[Int] = new Array[Int](0)
+    var prev_layer_input: Array[Int] = new Array[Int](0)
+
+    var epoch: Int = 0
+    var n: Int = 0
+    
+    
+    for(epoch <- 0 until epochs) {
+      for(n <- 0 until N) {
+        
+        // layer input
+        for(i <- 0 until n_layers) {
+          if(i == 0) {
+            prev_layer_input = new Array[Int](n_ins)
+            for(j <- 0 until n_ins) prev_layer_input(j) = train_X(n)(j)
+          } else {
+            prev_layer_input = new Array[Int](hidden_layer_sizes(i-1))
+            for(j <- 0 until hidden_layer_sizes(i-1)) prev_layer_input(j) = layer_input(j)
+          }
+
+          layer_input = new Array[Int](hidden_layer_sizes(i))
+          sigmoid_layers(i).sample_h_given_v(prev_layer_input, layer_input)
+        }
+
+        log_layer.train(layer_input, train_Y(n), lr)
+      }
+      // lr *= 0.95
+    }
+  }
+
+  def predict(x: Array[Int], y: Array[Double]) {
+    var layer_input: Array[Double] = new Array[Double](0)
+    var prev_layer_input: Array[Double] = new Array[Double](n_ins)
+    
+    var j: Int = 0
+    for(j <- 0 until n_ins) prev_layer_input(j) = x(j)
+
+    var linear_output: Double = 0.0
+
+    // layer activation
+    var i: Int = 0
+    var k: Int = 0
+
+    for(i <- 0 until n_layers) {
+      layer_input = new Array[Double](sigmoid_layers(i).n_out)
+
+      for(k <- 0 until sigmoid_layers(i).n_out) {
+        linear_output = 0.0
+
+        for(j <- 0 until sigmoid_layers(i).n_in) {
+          linear_output += sigmoid_layers(i).W(k)(j) * prev_layer_input(j)
+        }
+        linear_output += sigmoid_layers(i).b(k)
+        layer_input(k) = sigmoid(linear_output)
+      }
+
+      if(i < n_layers-1) {
+        prev_layer_input = new Array[Double](sigmoid_layers(i).n_out)
+        for(j <- 0 until sigmoid_layers(i).n_out) prev_layer_input(j) = layer_input(j)
+      }
+    }
+
+    for(i <- 0 until log_layer.n_out) {
+      y(i) = 0
+      for(j <- 0 until log_layer.n_in) {
+        y(i) += log_layer.W(i)(j) * layer_input(j)
+      }
+      y(i) += log_layer.b(i)
+    }
+
+    log_layer.softmax(y)
+  }
+
+}
+
+
+object SdA {
+  def test_sda() {
+    val rng: Random = new Random(123)
+    
+    val pretrain_lr: Double = 0.1
+    val corruption_level: Double = 0.3
+    val pretraining_epochs: Int = 1000
+    val finetune_lr: Double = 0.1
+    val finetune_epochs: Int = 500
+
+    val train_N: Int = 10
+    val test_N: Int = 4
+    val n_ins: Int = 28
+    val n_outs: Int = 2
+    val hidden_layer_sizes: Array[Int] = Array(15, 15)
+    val n_layers: Int = hidden_layer_sizes.length
+
+    // training data
+    val train_X: Array[Array[Int]] = Array(
+			Array(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
+			Array(0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
+			Array(1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
+			Array(0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
+			Array(1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
+			Array(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1),
+			Array(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1),
+			Array(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1),
+			Array(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1),
+			Array(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1)
+    )
+
+    val train_Y: Array[Array[Int]] = Array(
+			Array(1, 0),
+			Array(1, 0),
+			Array(1, 0),
+			Array(1, 0),
+			Array(1, 0),
+			Array(0, 1),
+			Array(0, 1),
+			Array(0, 1),
+			Array(0, 1),
+			Array(0, 1)
+    )
+    
+    // construct SdA
+    val sda:SdA = new SdA(train_N, n_ins, hidden_layer_sizes, n_outs, n_layers, rng)
+
+    // pretrain
+    sda.pretrain(train_X, pretrain_lr, corruption_level, pretraining_epochs)
+
+    // finetune
+    sda.finetune(train_X, train_Y, finetune_lr, finetune_epochs)
+      
+    // test data
+    val test_X: Array[Array[Int]] = Array(
+			Array(1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
+			Array(1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
+			Array(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1),
+			Array(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1)
+    )
+
+    val test_Y: Array[Array[Double]] = Array.ofDim[Double](test_N, n_outs)
+
+    // test
+    var i: Int = 0
+    var j: Int = 0
+
+    for(i <- 0 until test_N) {
+      sda.predict(test_X(i), test_Y(i))
+      for(j <- 0 until n_outs) {
+        print(test_Y(i)(j) + " ")
+      }
+      println()
+    }
+  }
+
+  def main(args: Array[String]) {
+    test_sda()
+  }
+}