From 5e4203f357f718038d116b1c123c164d78a45749 Mon Sep 17 00:00:00 2001 From: Yusuke Sugomori <restinthenest@gmail.com> Date: Wed, 1 May 2013 20:33:54 +0900 Subject: [PATCH 01/45] LogisticRegression.scala --- scala/LogisticRegression.scala | 133 +++++++++++++++++++++++++++++++++ 1 file changed, 133 insertions(+) create mode 100644 scala/LogisticRegression.scala diff --git a/scala/LogisticRegression.scala b/scala/LogisticRegression.scala new file mode 100644 index 0000000..2386f5e --- /dev/null +++ b/scala/LogisticRegression.scala @@ -0,0 +1,133 @@ +// $ scalac LogisticRegression.scala +// $ scala LogisticRegression + +import scala.math + +class LogisticRegression(val N: Int, val n_in: Int, val n_out: Int) { + + val W: Array[Array[Double]] = Array.ofDim[Double](n_out, n_in) + val b: Array[Double] = new Array[Double](n_out) + + def train(x: Array[Int], y: Array[Int], lr: Double) { + val p_y_given_x: Array[Double] = new Array[Double](n_out) + val dy: Array[Double] = new Array[Double](n_out) + + var i: Int = 0 + var j: Int = 0 + for(i <- 0 until n_out) { + p_y_given_x(i) = 0 + for(j <- 0 until n_in) { + p_y_given_x(i) += W(i)(j) * x(j) + } + p_y_given_x(i) += b(i) + } + softmax(p_y_given_x) + + for(i <- 0 until n_out) { + dy(i) = y(i) - p_y_given_x(i) + + for(j <- 0 until n_in) { + W(i)(j) += lr * dy(i) * x(j) / N + } + b(i) += lr * dy(i) / N + } + } + + + def softmax(x: Array[Double]) { + var max: Double = 0.0 + var sum: Double = 0.0 + + var i: Int = 0 + for(i <- 0 until n_out) if(max < x(i)) max = x(i) + + for(i <- 0 until n_out) { + x(i) = math.exp(x(i) - max) + sum += x(i) + } + + for(i <- 0 until n_out) x(i) /= sum + } + + + def predict(x: Array[Int], y: Array[Double]) { + var i: Int = 0 + var j: Int = 0 + for(i <- 0 until n_out) { + y(i) = 0 + for(j <- 0 until n_in) { + y(i) += W(i)(j) * x(j) + } + y(i) += b(i) + } + softmax(y) + } + +} + + +object LogisticRegression { + def test_lr() { + val learning_rate: Double = 0.1 + val n_epochs: Int = 500 + + val train_N: Int = 6 + val test_N: Int = 2 + val n_in: Int = 6 + val n_out: Int = 2 + + val train_X: Array[Array[Int]] = Array( + Array(1, 1, 1, 0, 0, 0), + Array(1, 0, 1, 0, 0, 0), + Array(1, 1, 1, 0, 0, 0), + Array(0, 0, 1, 1, 1, 0), + Array(0, 0, 1, 0, 1, 0), + Array(0, 0, 1, 1, 1, 0) + ) + + val train_Y: Array[Array[Int]] = Array( + Array(1, 0), + Array(1, 0), + Array(1, 0), + Array(0, 1), + Array(0, 1), + Array(0, 1) + ) + + // construct + val classifier = new LogisticRegression(train_N, n_in, n_out) + + // train + var epoch: Int = 0 + var i: Int = 0 + for(epoch <- 0 until n_epochs) { + for(i <- 0 until train_N) { + classifier.train(train_X(i), train_Y(i), learning_rate) + } + // learning_rate *= 0.95 + } + + // test data + val test_X: Array[Array[Int]] = Array( + Array(1, 0, 1, 0, 0, 0), + Array(0, 0, 1, 1, 1, 0) + ) + + val test_Y: Array[Array[Double]] = Array.ofDim[Double](test_N, n_in) + + // test + var j: Int = 0 + for(i <- 0 until test_N) { + classifier.predict(test_X(i), test_Y(i)) + for(j <- 0 until n_out) { + printf("%.5f ", test_Y(i)(j)) + } + println() + } + } + + def main(args: Array[String]) { + test_lr() + } + +} From cbd78335c4041618e071c11b280c5c1b7ffec6e7 Mon Sep 17 00:00:00 2001 From: Yusuke Sugomori <restinthenest@gmail.com> Date: Mon, 24 Jun 2013 00:53:17 +0900 Subject: [PATCH 02/45] Issue #1 bug fix on C C++ Java --- c/DBN.c | 3 ++- c/SdA.c | 3 ++- cpp/DBN.cpp | 3 ++- cpp/SdA.cpp | 3 ++- java/DBN/src/DBN.java | 3 ++- java/SdA/src/SdA.java | 3 ++- 6 files changed, 12 insertions(+), 6 deletions(-) diff --git a/c/DBN.c b/c/DBN.c index 9423622..7899678 100644 --- a/c/DBN.c +++ b/c/DBN.c @@ -190,8 +190,9 @@ void DBN_predict(DBN* this, int *x, double *y) { for(i=0; i<this->n_layers; i++) { layer_input = (double *)malloc(sizeof(double) * this->sigmoid_layers[i].n_out); - linear_output = 0.0; for(k=0; k<this->sigmoid_layers[i].n_out; k++) { + linear_output = 0.0; + for(j=0; j<this->sigmoid_layers[i].n_in; j++) { linear_output += this->sigmoid_layers[i].W[k][j] * prev_layer_input[j]; } diff --git a/c/SdA.c b/c/SdA.c index 273d2b8..99170ee 100644 --- a/c/SdA.c +++ b/c/SdA.c @@ -188,8 +188,9 @@ void SdA_predict(SdA* this, int *x, double *y) { for(i=0; i<this->n_layers; i++) { layer_input = (double *)malloc(sizeof(double) * this->sigmoid_layers[i].n_out); - linear_output = 0.0; for(k=0; k<this->sigmoid_layers[i].n_out; k++) { + linear_output = 0.0; + for(j=0; j<this->sigmoid_layers[i].n_in; j++) { linear_output += this->sigmoid_layers[i].W[k][j] * prev_layer_input[j]; } diff --git a/cpp/DBN.cpp b/cpp/DBN.cpp index e92bad7..4f9b9c3 100644 --- a/cpp/DBN.cpp +++ b/cpp/DBN.cpp @@ -176,8 +176,9 @@ void DBN::predict(int *x, double *y) { for(int i=0; i<n_layers; i++) { layer_input = new double[sigmoid_layers[i]->n_out]; - linear_output = 0.0; for(int k=0; k<sigmoid_layers[i]->n_out; k++) { + linear_output = 0.0; + for(int j=0; j<sigmoid_layers[i]->n_in; j++) { linear_output += sigmoid_layers[i]->W[k][j] * prev_layer_input[j]; } diff --git a/cpp/SdA.cpp b/cpp/SdA.cpp index 787bc6c..39ac32e 100644 --- a/cpp/SdA.cpp +++ b/cpp/SdA.cpp @@ -174,8 +174,9 @@ void SdA::predict(int *x, double *y) { for(int i=0; i<n_layers; i++) { layer_input = new double[sigmoid_layers[i]->n_out]; - linear_output = 0.0; for(int k=0; k<sigmoid_layers[i]->n_out; k++) { + linear_output = 0.0; + for(int j=0; j<sigmoid_layers[i]->n_in; j++) { linear_output += sigmoid_layers[i]->W[k][j] * prev_layer_input[j]; } diff --git a/java/DBN/src/DBN.java b/java/DBN/src/DBN.java index 238d1ce..5b72e68 100644 --- a/java/DBN/src/DBN.java +++ b/java/DBN/src/DBN.java @@ -124,8 +124,9 @@ public void predict(int[] x, double[] y) { for(int i=0; i<n_layers; i++) { layer_input = new double[sigmoid_layers[i].n_out]; - linear_output = 0.0; for(int k=0; k<sigmoid_layers[i].n_out; k++) { + linear_output = 0.0; + for(int j=0; j<sigmoid_layers[i].n_in; j++) { linear_output += sigmoid_layers[i].W[k][j] * prev_layer_input[j]; } diff --git a/java/SdA/src/SdA.java b/java/SdA/src/SdA.java index 4c7a749..a626ad8 100644 --- a/java/SdA/src/SdA.java +++ b/java/SdA/src/SdA.java @@ -123,8 +123,9 @@ public void predict(int[] x, double[] y) { for(int i=0; i<n_layers; i++) { layer_input = new double[sigmoid_layers[i].n_out]; - linear_output = 0.0; for(int k=0; k<sigmoid_layers[i].n_out; k++) { + linear_output = 0.0; + for(int j=0; j<sigmoid_layers[i].n_in; j++) { linear_output += sigmoid_layers[i].W[k][j] * prev_layer_input[j]; } From 2d9d06de1052442993413cd14ebea5867ec90433 Mon Sep 17 00:00:00 2001 From: Yusuke Sugomori <restinthenest@gmail.com> Date: Thu, 18 Jul 2013 23:45:29 +0900 Subject: [PATCH 03/45] 07/18/2013 #1 fix C C++ Java Scala --- c/DBN.c | 3 ++- c/RBM.c | 3 ++- cpp/DBN.cpp | 3 ++- cpp/RBM.cpp | 3 ++- java/DBN/src/RBM.java | 3 ++- java/RBM/src/RBM.java | 3 ++- scala/RBM.scala | 3 ++- 7 files changed, 14 insertions(+), 7 deletions(-) diff --git a/c/DBN.c b/c/DBN.c index 7899678..eee1e1c 100644 --- a/c/DBN.c +++ b/c/DBN.c @@ -347,7 +347,8 @@ void RBM_contrastive_divergence(RBM* this, int *input, double lr, int k) { for(i=0; i<this->n_hidden; i++) { for(j=0; j<this->n_visible; j++) { - this->W[i][j] += lr * (ph_sample[i] * input[j] - nh_means[i] * nv_samples[j]) / this->N; + // this->W[i][j] += lr * (ph_sample[i] * input[j] - nh_means[i] * nv_samples[j]) / this->N; + this->W[i][j] += lr * (ph_mean[i] * input[j] - nh_means[i] * nv_samples[j]) / this->N; } this->hbias[i] += lr * (ph_sample[i] - nh_means[i]) / this->N; } diff --git a/c/RBM.c b/c/RBM.c index 9ea27b2..7e806bb 100644 --- a/c/RBM.c +++ b/c/RBM.c @@ -100,7 +100,8 @@ void RBM_contrastive_divergence(RBM* this, int *input, double lr, int k) { for(i=0; i<this->n_hidden; i++) { for(j=0; j<this->n_visible; j++) { - this->W[i][j] += lr * (ph_sample[i] * input[j] - nh_means[i] * nv_samples[j]) / this->N; + // this->W[i][j] += lr * (ph_sample[i] * input[j] - nh_means[i] * nv_samples[j]) / this->N; + this->W[i][j] += lr * (ph_mean[i] * input[j] - nh_means[i] * nv_samples[j]) / this->N; } this->hbias[i] += lr * (ph_sample[i] - nh_means[i]) / this->N; } diff --git a/cpp/DBN.cpp b/cpp/DBN.cpp index 4f9b9c3..f043e20 100644 --- a/cpp/DBN.cpp +++ b/cpp/DBN.cpp @@ -322,7 +322,8 @@ void RBM::contrastive_divergence(int *input, double lr, int k) { for(int i=0; i<n_hidden; i++) { for(int j=0; j<n_visible; j++) { - W[i][j] += lr * (ph_sample[i] * input[j] - nh_means[i] * nv_samples[j]) / N; + // W[i][j] += lr * (ph_sample[i] * input[j] - nh_means[i] * nv_samples[j]) / N; + W[i][j] += lr * (ph_mean[i] * input[j] - nh_means[i] * nv_samples[j]) / N; } hbias[i] += lr * (ph_sample[i] - nh_means[i]) / N; } diff --git a/cpp/RBM.cpp b/cpp/RBM.cpp index 590199a..d64462c 100644 --- a/cpp/RBM.cpp +++ b/cpp/RBM.cpp @@ -89,7 +89,8 @@ void RBM::contrastive_divergence(int *input, double lr, int k) { for(int i=0; i<n_hidden; i++) { for(int j=0; j<n_visible; j++) { - W[i][j] += lr * (ph_sample[i] * input[j] - nh_means[i] * nv_samples[j]) / N; + // W[i][j] += lr * (ph_sample[i] * input[j] - nh_means[i] * nv_samples[j]) / N; + W[i][j] += lr * (ph_mean[i] * input[j] - nh_means[i] * nv_samples[j]) / N; } hbias[i] += lr * (ph_sample[i] - nh_means[i]) / N; } diff --git a/java/DBN/src/RBM.java b/java/DBN/src/RBM.java index ed1b4c5..9ee3563 100644 --- a/java/DBN/src/RBM.java +++ b/java/DBN/src/RBM.java @@ -91,7 +91,8 @@ public void contrastive_divergence(int[] input, double lr, int k) { for(int i=0; i<n_hidden; i++) { for(int j=0; j<n_visible; j++) { - W[i][j] += lr *(ph_sample[i] * input[j] - nh_means[i] * nv_samples[j]) / N; + // W[i][j] += lr *(ph_sample[i] * input[j] - nh_means[i] * nv_samples[j]) / N; + W[i][j] += lr *(ph_mean[i] * input[j] - nh_means[i] * nv_samples[j]) / N; } hbias[i] += lr * (ph_sample[i] - nh_means[i]) / N; } diff --git a/java/RBM/src/RBM.java b/java/RBM/src/RBM.java index 3100dd0..6f2e3b7 100644 --- a/java/RBM/src/RBM.java +++ b/java/RBM/src/RBM.java @@ -91,7 +91,8 @@ public void contrastive_divergence(int[] input, double lr, int k) { for(int i=0; i<n_hidden; i++) { for(int j=0; j<n_visible; j++) { - W[i][j] += lr * (ph_sample[i] * input[j] - nh_means[i] * nv_samples[j]) / N; + // W[i][j] += lr * (ph_sample[i] * input[j] - nh_means[i] * nv_samples[j]) / N; + W[i][j] += lr * (ph_mean[i] * input[j] - nh_means[i] * nv_samples[j]) / N; } hbias[i] += lr * (ph_sample[i] - nh_means[i]) / N; } diff --git a/scala/RBM.scala b/scala/RBM.scala index 79ae124..ad760fb 100644 --- a/scala/RBM.scala +++ b/scala/RBM.scala @@ -86,7 +86,8 @@ class RBM(val N: Int, val n_visible: Int, val n_hidden: Int, var j: Int = 0 for(i <- 0 until n_hidden) { for(j <- 0 until n_visible) { - W(i)(j) += lr * (ph_sample(i) * input(j) - nh_means(i) * nv_samples(j)) / N + // W(i)(j) += lr * (ph_sample(i) * input(j) - nh_means(i) * nv_samples(j)) / N + W(i)(j) += lr * (ph_mean(i) * input(j) - nh_means(i) * nv_samples(j)) / N } hbias(i) += lr * (ph_sample(i) - nh_means(i)) / N } From 8acec2f6a6af831f8b992eefad18dc53afbeee49 Mon Sep 17 00:00:00 2001 From: Yusuke Sugomori <restinthenest@gmail.com> Date: Fri, 6 Sep 2013 09:01:18 +0900 Subject: [PATCH 04/45] scala sda --- scala/HiddenLayer.scala | 73 +++++++++++++ scala/SdA.scala | 236 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 309 insertions(+) create mode 100644 scala/HiddenLayer.scala create mode 100644 scala/SdA.scala diff --git a/scala/HiddenLayer.scala b/scala/HiddenLayer.scala new file mode 100644 index 0000000..118bc5e --- /dev/null +++ b/scala/HiddenLayer.scala @@ -0,0 +1,73 @@ +import scala.util.Random +import scala.math + +class HiddenLayer(val N: Int, val n_in: Int, val n_out: Int, _W: Array[Array[Double]], _b: Array[Double], var rng: Random=null) { + + + def uniform(min: Double, max: Double): Double = { + return rng.nextDouble() * (max - min) + min + } + + def binomial(n: Int, p: Double): Int = { + if(p < 0 || p > 1) return 0 + + var c: Int = 0 + var r: Double = 0.0 + + var i: Int = 0 + + for(i <- 0 until n) { + r = rng.nextDouble() + if(r < p) c += 1 + } + + return c + } + + def sigmoid(x: Double): Double = { + return 1.0 / (1.0 + math.pow(math.E, -x)) + } + + + if(rng == null) rng = new Random(1234) + + var a: Double = 0.0 + var W: Array[Array[Double]] = Array.ofDim[Double](n_out, n_in) + var b: Array[Double] = new Array[Double](n_out) + + var i: Int = 0 + if(_W == null) { + a = 1.0 / n_in + + for(i <- 0 until n_out) { + for(j <- 0 until n_in) { + W(i)(j) = uniform(-a, a) + } + } + } else { + W = _W + } + + if(_b != null) b = _b + + + def output(input: Array[Int], w: Array[Double], b: Double): Double = { + var linear_output: Double = 0.0 + + var j: Int = 0 + for(j <- 0 until n_in) { + linear_output += w(j) * input(j) + } + linear_output += b + + return sigmoid(linear_output) + } + + def sample_h_given_v(input: Array[Int], sample: Array[Int]) { + var i: Int = 0 + + for(i <- 0 until n_out) { + sample(i) = binomial(1, output(input, W(i), b(i))) + } + } +} diff --git a/scala/SdA.scala b/scala/SdA.scala new file mode 100644 index 0000000..3f897e5 --- /dev/null +++ b/scala/SdA.scala @@ -0,0 +1,236 @@ +import scala.util.Random +import scala.math + +class SdA(val N: Int, val n_ins: Int, hidden_layer_sizes: Array[Int], val n_outs: Int, val n_layers:Int, var rng: Random=null) { + + def sigmoid(x: Double): Double = { + return 1.0 / (1.0 + math.pow(math.E, -x)) + } + + var input_size: Int = 0 + + // var hidden_layer_sizes: Array[Int] = new Array[Int](n_layers) + var sigmoid_layers: Array[HiddenLayer] = new Array[HiddenLayer](n_layers) + var dA_layers: Array[dA] = new Array[dA](n_layers) + + if(rng == null) rng = new Random(1234) + + + var i: Int = 0 + + // construct multi-layer + for(i <- 0 until n_layers) { + if(i == 0) { + input_size = n_ins + } else { + input_size = hidden_layer_sizes(i-1) + } + + // construct sigmoid_layer + sigmoid_layers(i) = new HiddenLayer(N, input_size, hidden_layer_sizes(i), null, null, rng) + + // construct dA_layer + dA_layers(i) = new dA(N, input_size, hidden_layer_sizes(i), sigmoid_layers(i).W, sigmoid_layers(i).b, null, rng) + } + + // layer for output using LogisticRegression + val log_layer = new LogisticRegression(N, hidden_layer_sizes(n_layers-1), n_outs) + + + def pretrain(train_X: Array[Array[Int]], lr: Double, corruption_level: Double, epochs: Int) { + var layer_input: Array[Int] = new Array[Int](0) + var prev_layer_input_size: Int = 0 + var prev_layer_input: Array[Int] = new Array[Int](0) + + var i: Int = 0 + var j: Int = 0 + var epoch: Int = 0 + var n: Int = 0 + var l: Int = 0 + + for(i <- 0 until n_layers) { // layer-wise + for(epoch <- 0 until epochs) { // training epochs + for(n <- 0 until N) { // input x1...xN + // layer input + for(l <- 0 to i) { + if(l == 0) { + layer_input = new Array[Int](n_ins) + for(j <- 0 until n_ins) layer_input(j) = train_X(n)(j) + } else { + if(l == 1) prev_layer_input_size = n_ins + else prev_layer_input_size = hidden_layer_sizes(l-2) + + prev_layer_input = new Array[Int](prev_layer_input_size) + for(j <- 0 until prev_layer_input_size) prev_layer_input(j) = layer_input(j) + + layer_input = new Array[Int](hidden_layer_sizes(l-1)) + + sigmoid_layers(l-1).sample_h_given_v(prev_layer_input, layer_input) + } + } + + dA_layers(i).train(layer_input, lr, corruption_level) + } + } + } + + } + + + def finetune(train_X: Array[Array[Int]], train_Y: Array[Array[Int]], lr: Double, epochs: Int) { + var layer_input: Array[Int] = new Array[Int](0) + var prev_layer_input: Array[Int] = new Array[Int](0) + + var epoch: Int = 0 + var n: Int = 0 + + + for(epoch <- 0 until epochs) { + for(n <- 0 until N) { + + // layer input + for(i <- 0 until n_layers) { + if(i == 0) { + prev_layer_input = new Array[Int](n_ins) + for(j <- 0 until n_ins) prev_layer_input(j) = train_X(n)(j) + } else { + prev_layer_input = new Array[Int](hidden_layer_sizes(i-1)) + for(j <- 0 until hidden_layer_sizes(i-1)) prev_layer_input(j) = layer_input(j) + } + + layer_input = new Array[Int](hidden_layer_sizes(i)) + sigmoid_layers(i).sample_h_given_v(prev_layer_input, layer_input) + } + + log_layer.train(layer_input, train_Y(n), lr) + } + // lr *= 0.95 + } + } + + def predict(x: Array[Int], y: Array[Double]) { + var layer_input: Array[Double] = new Array[Double](0) + var prev_layer_input: Array[Double] = new Array[Double](n_ins) + + var j: Int = 0 + for(j <- 0 until n_ins) prev_layer_input(j) = x(j) + + var linear_output: Double = 0.0 + + // layer activation + var i: Int = 0 + var k: Int = 0 + + for(i <- 0 until n_layers) { + layer_input = new Array[Double](sigmoid_layers(i).n_out) + + for(k <- 0 until sigmoid_layers(i).n_out) { + linear_output = 0.0 + + for(j <- 0 until sigmoid_layers(i).n_in) { + linear_output += sigmoid_layers(i).W(k)(j) * prev_layer_input(j) + } + linear_output += sigmoid_layers(i).b(k) + layer_input(k) = sigmoid(linear_output) + } + + if(i < n_layers-1) { + prev_layer_input = new Array[Double](sigmoid_layers(i).n_out) + for(j <- 0 until sigmoid_layers(i).n_out) prev_layer_input(j) = layer_input(j) + } + } + + for(i <- 0 until log_layer.n_out) { + y(i) = 0 + for(j <- 0 until log_layer.n_in) { + y(i) += log_layer.W(i)(j) * layer_input(j) + } + y(i) += log_layer.b(i) + } + + log_layer.softmax(y) + } + +} + + +object SdA { + def test_sda() { + val rng: Random = new Random(123) + + val pretrain_lr: Double = 0.1 + val corruption_level: Double = 0.3 + val pretraining_epochs: Int = 1000 + val finetune_lr: Double = 0.1 + val finetune_epochs: Int = 500 + + val train_N: Int = 10 + val test_N: Int = 4 + val n_ins: Int = 28 + val n_outs: Int = 2 + val hidden_layer_sizes: Array[Int] = Array(15, 15) + val n_layers: Int = hidden_layer_sizes.length + + // training data + val train_X: Array[Array[Int]] = Array( + Array(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), + Array(0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), + Array(1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), + Array(0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), + Array(1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), + Array(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), + Array(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1), + Array(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1), + Array(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1), + Array(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1) + ) + + val train_Y: Array[Array[Int]] = Array( + Array(1, 0), + Array(1, 0), + Array(1, 0), + Array(1, 0), + Array(1, 0), + Array(0, 1), + Array(0, 1), + Array(0, 1), + Array(0, 1), + Array(0, 1) + ) + + // construct SdA + val sda:SdA = new SdA(train_N, n_ins, hidden_layer_sizes, n_outs, n_layers, rng) + + // pretrain + sda.pretrain(train_X, pretrain_lr, corruption_level, pretraining_epochs) + + // finetune + sda.finetune(train_X, train_Y, finetune_lr, finetune_epochs) + + // test data + val test_X: Array[Array[Int]] = Array( + Array(1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), + Array(1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), + Array(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1), + Array(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1) + ) + + val test_Y: Array[Array[Double]] = Array.ofDim[Double](test_N, n_outs) + + // test + var i: Int = 0 + var j: Int = 0 + + for(i <- 0 until test_N) { + sda.predict(test_X(i), test_Y(i)) + for(j <- 0 until n_outs) { + print(test_Y(i)(j) + " ") + } + println() + } + } + + def main(args: Array[String]) { + test_sda() + } +} From c11bd27b3a675dcc7a170c1f60313a882a6c96a8 Mon Sep 17 00:00:00 2001 From: Yusuke Sugomori <restinthenest@gmail.com> Date: Sat, 7 Sep 2013 12:16:27 +0900 Subject: [PATCH 05/45] dbn scala --- scala/DBN.scala | 231 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 231 insertions(+) create mode 100644 scala/DBN.scala diff --git a/scala/DBN.scala b/scala/DBN.scala new file mode 100644 index 0000000..1b8df68 --- /dev/null +++ b/scala/DBN.scala @@ -0,0 +1,231 @@ +import scala.util.Random +import scala.math + +class DBN(val N: Int, val n_ins: Int, hidden_layer_sizes: Array[Int], val n_outs: Int, val n_layers: Int, var rng: Random=null) { + + def sigmoid(x: Double): Double = { + return 1.0 / (1.0 + math.pow(math.E, -x)) + } + + + var input_size: Int = 0 + + val sigmoid_layers: Array[HiddenLayer] = new Array[HiddenLayer](n_layers) + val rbm_layers: Array[RBM] = new Array[RBM](n_layers) + + if(rng == null) rng = new Random(1234) + + var i: Int = 0 + // construct multi-layer + for(i <- 0 until n_layers) { + if(i == 0) { + input_size = n_ins + } else { + input_size = hidden_layer_sizes(i-1) + } + + // construct sigmoid_layer + sigmoid_layers(i) = new HiddenLayer(N, input_size, hidden_layer_sizes(i), null, null, rng) + + // construct rbm_layer + rbm_layers(i) = new RBM(N, input_size, hidden_layer_sizes(i), sigmoid_layers(i).W, sigmoid_layers(i).b, null, rng) + + } + + // layer for output using LogisticRegression + val log_layer: LogisticRegression = new LogisticRegression(N, hidden_layer_sizes(n_layers-1), n_outs) + + + def pretrain(train_X: Array[Array[Int]], lr: Double, k: Int, epochs: Int) { + var layer_input: Array[Int] = new Array[Int](0) + var prev_layer_input_size: Int = 0 + var prev_layer_input: Array[Int] = new Array[Int](0) + + var i: Int = 0 + var j: Int = 0 + var epoch: Int = 0 + var n: Int = 0 + var l: Int = 0 + + for(i <- 0 until n_layers) { // layer-wise + for(epoch <- 0 until epochs) { // training epochs + for(n <- 0 until N) { // input x1...xN + // layer input + for(l <- 0 to i) { + if(l == 0) { + layer_input = new Array[Int](n_ins) + for(j <- 0 until n_ins) layer_input(j) = train_X(n)(j) + + } else { + if(l == 1) prev_layer_input_size = n_ins + else prev_layer_input_size = hidden_layer_sizes(l-2) + + prev_layer_input = new Array[Int](prev_layer_input_size) + for(j <- 0 until prev_layer_input_size) prev_layer_input(j) = layer_input(j) + + layer_input = new Array[Int](hidden_layer_sizes(l-1)) + sigmoid_layers(l-1).sample_h_given_v(prev_layer_input, layer_input) + } + } + + rbm_layers(i).contrastive_divergence(layer_input, lr, k) + } + } + } + } + + + def finetune(train_X: Array[Array[Int]], train_Y: Array[Array[Int]], lr: Double, epochs: Int) { + var layer_input: Array[Int] = new Array[Int](0) + var prev_layer_input: Array[Int] = new Array[Int](0) + + var epoch: Int = 0 + var n: Int = 0 + var i: Int = 0 + var j: Int = 0 + + for(epoch <- 0 until epochs) { + for(n <- 0 until N) { + + // layer input + for(i <- 0 until n_layers) { + if(i == 0) { + prev_layer_input = new Array[Int](n_ins) + for(j <- 0 until n_ins) prev_layer_input(j) = train_X(n)(j) + } else { + prev_layer_input = new Array[Int](hidden_layer_sizes(i-1)) + for(j <- 0 until hidden_layer_sizes(i-1)) prev_layer_input(j) = layer_input(j) + } + + layer_input = new Array[Int](hidden_layer_sizes(i)) + sigmoid_layers(i).sample_h_given_v(prev_layer_input, layer_input) + } + + log_layer.train(layer_input, train_Y(n), lr) + } + // lr *= 0.95 + } + } + + def predict(x: Array[Int], y: Array[Double]) { + var layer_input: Array[Double] = new Array[Double](0) + var prev_layer_input: Array[Double] = new Array[Double](n_ins) + + var i: Int = 0 + var j: Int = 0 + var k: Int = 0 + + for(j <- 0 until n_ins) prev_layer_input(j) = x(j) + + var linear_outoput: Double = 0 + + // layer activation + for(i <- 0 until n_layers) { + layer_input = new Array[Double](sigmoid_layers(i).n_out) + + for(k <- 0 until sigmoid_layers(i).n_out) { + linear_outoput = 0.0 + + for(j <- 0 until sigmoid_layers(i).n_in) { + linear_outoput += sigmoid_layers(i).W(k)(j) * prev_layer_input(j) + } + linear_outoput += sigmoid_layers(i).b(k) + layer_input(k) = sigmoid(linear_outoput) + } + + if(i < n_layers-1) { + prev_layer_input = new Array[Double](sigmoid_layers(i).n_out) + for(j <- 0 until sigmoid_layers(i).n_out) prev_layer_input(j) = layer_input(j) + } + } + + for(i <- 0 until log_layer.n_out) { + y(i) = 0 + for(j <- 0 until log_layer.n_in) { + y(i) += log_layer.W(i)(j) * layer_input(j) + } + y(i) += log_layer.b(i) + } + + log_layer.softmax(y) + } + +} + + +object DBN { + def test_dbn() { + val rng: Random = new Random(123) + + val pretrain_lr: Double = 0.1 + val pretraining_epochs: Int = 1000 + val k: Int = 1 + val finetune_lr: Double = 0.1 + val finetune_epochs: Int = 500 + + val train_N: Int = 6 + val test_N: Int = 4 + val n_ins: Int = 6 + val n_outs: Int = 2 + val hidden_layer_sizes: Array[Int] = Array(3, 3) + val n_layers = hidden_layer_sizes.length + + + // training data + val train_X: Array[Array[Int]] = Array( + Array(1, 1, 1, 0, 0, 0), + Array(1, 0, 1, 0, 0, 0), + Array(1, 1, 1, 0, 0, 0), + Array(0, 0, 1, 1, 1, 0), + Array(0, 0, 1, 1, 0, 0), + Array(0, 0, 1, 1, 1, 0) + ) + + val train_Y: Array[Array[Int]] = Array( + Array(1, 0), + Array(1, 0), + Array(1, 0), + Array(0, 1), + Array(0, 1), + Array(0, 1) + ) + + // construct DBN + val dbn: DBN = new DBN(train_N, n_ins, hidden_layer_sizes, n_outs, n_layers, rng) + + // pretrain + dbn.pretrain(train_X, pretrain_lr, k, pretraining_epochs); + + // finetune + dbn.finetune(train_X, train_Y, finetune_lr, finetune_epochs); + + + // test data + val test_X: Array[Array[Int]] = Array( + Array(1, 1, 0, 0, 0, 0), + Array(1, 1, 1, 1, 0, 0), + Array(0, 0, 0, 1, 1, 0), + Array(0, 0, 1, 1, 1, 0) + ) + + val test_Y: Array[Array[Double]] = Array.ofDim[Double](test_N, n_outs) + + var i: Int = 0 + var j: Int = 0 + + // test + for(i <- 0 until test_N) { + dbn.predict(test_X(i), test_Y(i)) + for(j <- 0 until n_outs) { + print(test_Y(i)(j) + " ") + } + println() + } + + } + + + def main(args: Array[String]) { + test_dbn() + } +} From 58ef84bb4d21f4b95cd2719a2e5fa9e85512614c Mon Sep 17 00:00:00 2001 From: N011077 <n011077@N011077.local> Date: Wed, 9 Oct 2013 08:57:14 +0900 Subject: [PATCH 06/45] readme --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 804e867..c73cd3e 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# Deep Learning (Python, C/C++, Java) +# Deep Learning (Python, C/C++, Java, Scala) ### Classes : From 84fd62a7dcead5f0a2ac46a4487362098fd108db Mon Sep 17 00:00:00 2001 From: N011077 <me@yusugomori.com> Date: Thu, 10 Oct 2013 13:33:15 +0900 Subject: [PATCH 07/45] minor chg --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index c73cd3e..c5ff02d 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,5 @@ -# Deep Learning (Python, C/C++, Java, Scala) +# Deep Learning +## (Python, C/C++, Java, Scala) ### Classes : From 3f818eef4668ad158f174bdf900a4e5be5316140 Mon Sep 17 00:00:00 2001 From: N011077 <me@yusugomori.com> Date: Thu, 10 Oct 2013 13:36:53 +0900 Subject: [PATCH 08/45] minor chg --- README.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/README.md b/README.md index c5ff02d..6bc0a32 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,4 @@ -# Deep Learning -## (Python, C/C++, Java, Scala) +## Deep Learning (Python, C/C++, Java, Scala) ### Classes : From 38f3c9b43ff265889ba1890adca1b3da0fbf846a Mon Sep 17 00:00:00 2001 From: Yusuke Sugomori <me@yusugomori.com> Date: Thu, 10 Oct 2013 13:37:53 +0900 Subject: [PATCH 09/45] minor chg --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 6bc0a32..735e5d0 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -## Deep Learning (Python, C/C++, Java, Scala) +## Deep Learning (Python, C/C++, Java, Scala) ### Classes : From 78611962735f3a9bb41d6dcc04d9f9423557fd1f Mon Sep 17 00:00:00 2001 From: Yusuke Sugomori <me@yusugomori.com> Date: Tue, 25 Mar 2014 21:04:01 +0900 Subject: [PATCH 10/45] minor bug fix --- java/LogisticRegression/src/LogisticRegression.java | 2 +- scala/LogisticRegression.scala | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/java/LogisticRegression/src/LogisticRegression.java b/java/LogisticRegression/src/LogisticRegression.java index 8a13407..21e8f22 100644 --- a/java/LogisticRegression/src/LogisticRegression.java +++ b/java/LogisticRegression/src/LogisticRegression.java @@ -109,7 +109,7 @@ private static void test_lr() { {0, 0, 1, 1, 1, 0} }; - double[][] test_Y = new double[test_N][n_in]; + double[][] test_Y = new double[test_N][n_out]; // test diff --git a/scala/LogisticRegression.scala b/scala/LogisticRegression.scala index 2386f5e..bf9b229 100644 --- a/scala/LogisticRegression.scala +++ b/scala/LogisticRegression.scala @@ -113,7 +113,7 @@ object LogisticRegression { Array(0, 0, 1, 1, 1, 0) ) - val test_Y: Array[Array[Double]] = Array.ofDim[Double](test_N, n_in) + val test_Y: Array[Array[Double]] = Array.ofDim[Double](test_N, n_out) // test var j: Int = 0 From 15241eb8a1fa7ca3a487b83b458c4f4d77a80d0c Mon Sep 17 00:00:00 2001 From: Yusuke Sugomori <me@yusugomori.com> Date: Sun, 7 Dec 2014 02:22:43 +0900 Subject: [PATCH 11/45] go --- c/LogisticRegression.c | 2 +- cpp/LogisticRegression.cpp | 20 +-- go/LogisticRegression.go | 150 ++++++++++++++++++ .../src/LogisticRegression.java | 2 +- 4 files changed, 153 insertions(+), 21 deletions(-) create mode 100644 go/LogisticRegression.go diff --git a/c/LogisticRegression.c b/c/LogisticRegression.c index 0c0c04a..b55c707 100644 --- a/c/LogisticRegression.c +++ b/c/LogisticRegression.c @@ -94,7 +94,7 @@ void test_lr(void) { int i, j, epoch; double learning_rate = 0.1; - double n_epochs = 500; + int n_epochs = 500; int train_N = 6; int test_N = 2; diff --git a/cpp/LogisticRegression.cpp b/cpp/LogisticRegression.cpp index 9eb8f24..6eca566 100644 --- a/cpp/LogisticRegression.cpp +++ b/cpp/LogisticRegression.cpp @@ -86,30 +86,12 @@ void test_lr() { srand(0); double learning_rate = 0.1; - double n_epochs = 500; + int n_epochs = 500; int train_N = 6; int test_N = 2; int n_in = 6; int n_out = 2; - // int **train_X; - // int **train_Y; - // int **test_X; - // double **test_Y; - - // train_X = new int*[train_N]; - // train_Y = new int*[train_N]; - // for(i=0; i<train_N; i++){ - // train_X[i] = new int[n_in]; - // train_Y[i] = new int[n_out]; - // }; - - // test_X = new int*[test_N]; - // test_Y = new double*[test_N]; - // for(i=0; i<test_N; i++){ - // test_X[i] = new int[n_in]; - // test_Y[i] = new double[n_out]; - // } // training data diff --git a/go/LogisticRegression.go b/go/LogisticRegression.go new file mode 100644 index 0000000..00b8768 --- /dev/null +++ b/go/LogisticRegression.go @@ -0,0 +1,150 @@ +package main + +import ( + "fmt" + "math" +) + +type LogisticRegression struct { + N int + n_in int + n_out int + W [][]float64 + b []float64 +} + + +func LogisticRegression__construct(this *LogisticRegression, N int, n_in int, n_out int) { + this.N = N + this.n_in = n_in + this.n_out = n_out + + this.W = make([][]float64, n_out) + for i := 0; i < n_out; i++ { this.W[i] = make([]float64, n_in) } + + this.b = make([]float64, n_out) +} + +func LogisticRegression_train(this *LogisticRegression, x []int, y []int, lr float64) { + p_y_given_x := make([]float64, this.n_out) + dy := make([]float64, this.n_out) + + for i := 0; i < this.n_out; i++ { + p_y_given_x[i] = 0 + for j := 0; j < this.n_in; j++ { + p_y_given_x[i] += this.W[i][j] * float64(x[j]) + } + p_y_given_x[i] += this.b[i] + } + LogisticRegression_softmax(this, p_y_given_x) + + for i := 0; i < this.n_out; i++ { + dy[i] = float64(y[i]) - p_y_given_x[i] + + for j := 0; j < this.n_in; j++ { + this.W[i][j] += lr * dy[i] * float64(x[j]) / float64(this.N) + } + + this.b[i] += lr * dy[i] / float64(this.N) + } + +} + +func LogisticRegression_softmax(this *LogisticRegression, x []float64) { + var ( + max float64 + sum float64 + ) + + for i := 0; i < this.n_out; i++ { if max < x[i] {max = x[i]} } + for i := 0; i < this.n_out; i++ { + x[i] = math.Exp(x[i] - max) + sum += x[i] + } + + for i := 0; i < this.n_out; i++ { x[i] /= sum } +} + +func LogisticRegression_predict(this *LogisticRegression, x []int, y []float64) { + for i := 0; i < this.n_out; i++ { + y[i] = 0 + for j := 0; j < this.n_in; j++ { + y[i] += this.W[i][j] * float64(x[j]) + } + y[i] += this.b[i] + } + + LogisticRegression_softmax(this, y) +} + + + +func test_lr() { + + learning_rate := 0.1 + n_epochs := 500 + + train_N := 6 + test_N := 2 + n_in := 6 + n_out := 2 + + + // training data + train_X := [][]int { + {1, 1, 1, 0, 0, 0}, + {1, 0, 1, 0, 0, 0}, + {1, 1, 1, 0, 0, 0}, + {0, 0, 1, 1, 1, 0}, + {0, 0, 1, 1, 0, 0}, + {0, 0, 1, 1, 1, 0}, + } + + + train_Y := [][]int { + {1, 0}, + {1, 0}, + {1, 0}, + {0, 1}, + {0, 1}, + {0, 1}, + } + + + // construct LogisticRegression + var classifier LogisticRegression + LogisticRegression__construct(&classifier, train_N, n_in, n_out) + + // train + for epoch := 0; epoch < n_epochs; epoch++ { + for i := 0; i < train_N; i++ { + LogisticRegression_train(&classifier, train_X[i], train_Y[i], learning_rate) + } + } + + // test data + test_X := [][]int { + {1, 0, 1, 0, 0, 0}, + {0, 0, 1, 1, 1, 0}, + } + + test_Y := make([][]float64, test_N) + for i := 0; i < test_N; i++ { test_Y[i] = make([]float64, n_out) } + + + // test + for i := 0; i < test_N; i++ { + LogisticRegression_predict(&classifier, test_X[i], test_Y[i]) + for j := 0; j < n_out; j++ { + fmt.Printf("%f ", test_Y[i][j]) + } + fmt.Printf("\n") + } + +} + + +func main() { + test_lr() +} + diff --git a/java/LogisticRegression/src/LogisticRegression.java b/java/LogisticRegression/src/LogisticRegression.java index 21e8f22..8356c95 100644 --- a/java/LogisticRegression/src/LogisticRegression.java +++ b/java/LogisticRegression/src/LogisticRegression.java @@ -67,7 +67,7 @@ public void predict(int[] x, double[] y) { private static void test_lr() { double learning_rate = 0.1; - double n_epochs = 500; + int n_epochs = 500; int train_N = 6; int test_N = 2; From 0cd1a5db9a1bc66e77dad085f95262ff743304e7 Mon Sep 17 00:00:00 2001 From: Yusuke Sugomori <me@yusugomori.com> Date: Sun, 7 Dec 2014 02:23:48 +0900 Subject: [PATCH 12/45] add go to readme --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 735e5d0..48c2f67 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -## Deep Learning (Python, C/C++, Java, Scala) +## Deep Learning (Python, C/C++, Java, Scala, Go) ### Classes : From a636ca8a2cd98a8a344b4f7ebeb3792a442e87a0 Mon Sep 17 00:00:00 2001 From: Yusuke Sugomori <me@yusugomori.com> Date: Sun, 7 Dec 2014 02:34:12 +0900 Subject: [PATCH 13/45] untabify go --- go/LogisticRegression.go | 54 ++++++++++++++++++++-------------------- 1 file changed, 27 insertions(+), 27 deletions(-) diff --git a/go/LogisticRegression.go b/go/LogisticRegression.go index 00b8768..1103cff 100644 --- a/go/LogisticRegression.go +++ b/go/LogisticRegression.go @@ -113,38 +113,38 @@ func test_lr() { // construct LogisticRegression var classifier LogisticRegression - LogisticRegression__construct(&classifier, train_N, n_in, n_out) - - // train - for epoch := 0; epoch < n_epochs; epoch++ { - for i := 0; i < train_N; i++ { - LogisticRegression_train(&classifier, train_X[i], train_Y[i], learning_rate) - } - } - - // test data - test_X := [][]int { + LogisticRegression__construct(&classifier, train_N, n_in, n_out) + + // train + for epoch := 0; epoch < n_epochs; epoch++ { + for i := 0; i < train_N; i++ { + LogisticRegression_train(&classifier, train_X[i], train_Y[i], learning_rate) + } + } + + // test data + test_X := [][]int { {1, 0, 1, 0, 0, 0}, {0, 0, 1, 1, 1, 0}, - } - - test_Y := make([][]float64, test_N) - for i := 0; i < test_N; i++ { test_Y[i] = make([]float64, n_out) } - - - // test - for i := 0; i < test_N; i++ { - LogisticRegression_predict(&classifier, test_X[i], test_Y[i]) - for j := 0; j < n_out; j++ { - fmt.Printf("%f ", test_Y[i][j]) - } - fmt.Printf("\n") - } - + } + + test_Y := make([][]float64, test_N) + for i := 0; i < test_N; i++ { test_Y[i] = make([]float64, n_out) } + + + // test + for i := 0; i < test_N; i++ { + LogisticRegression_predict(&classifier, test_X[i], test_Y[i]) + for j := 0; j < n_out; j++ { + fmt.Printf("%f ", test_Y[i][j]) + } + fmt.Printf("\n") + } + } func main() { - test_lr() + test_lr() } From 9a09bc974637ef244bb743666e31795372b2fe70 Mon Sep 17 00:00:00 2001 From: Yusuke Sugomori <me@yusugomori.com> Date: Sun, 7 Dec 2014 02:37:50 +0900 Subject: [PATCH 14/45] untabify go --- go/LogisticRegression.go | 152 +++++++++++++++++++-------------------- 1 file changed, 76 insertions(+), 76 deletions(-) diff --git a/go/LogisticRegression.go b/go/LogisticRegression.go index 1103cff..b1e9a65 100644 --- a/go/LogisticRegression.go +++ b/go/LogisticRegression.go @@ -1,118 +1,118 @@ package main import ( - "fmt" - "math" + "fmt" + "math" ) type LogisticRegression struct { - N int - n_in int - n_out int - W [][]float64 - b []float64 + N int + n_in int + n_out int + W [][]float64 + b []float64 } func LogisticRegression__construct(this *LogisticRegression, N int, n_in int, n_out int) { - this.N = N - this.n_in = n_in - this.n_out = n_out - - this.W = make([][]float64, n_out) - for i := 0; i < n_out; i++ { this.W[i] = make([]float64, n_in) } - - this.b = make([]float64, n_out) + this.N = N + this.n_in = n_in + this.n_out = n_out + + this.W = make([][]float64, n_out) + for i := 0; i < n_out; i++ { this.W[i] = make([]float64, n_in) } + + this.b = make([]float64, n_out) } func LogisticRegression_train(this *LogisticRegression, x []int, y []int, lr float64) { - p_y_given_x := make([]float64, this.n_out) - dy := make([]float64, this.n_out) - - for i := 0; i < this.n_out; i++ { - p_y_given_x[i] = 0 - for j := 0; j < this.n_in; j++ { - p_y_given_x[i] += this.W[i][j] * float64(x[j]) - } - p_y_given_x[i] += this.b[i] - } - LogisticRegression_softmax(this, p_y_given_x) - - for i := 0; i < this.n_out; i++ { - dy[i] = float64(y[i]) - p_y_given_x[i] - - for j := 0; j < this.n_in; j++ { - this.W[i][j] += lr * dy[i] * float64(x[j]) / float64(this.N) - } - - this.b[i] += lr * dy[i] / float64(this.N) - } - + p_y_given_x := make([]float64, this.n_out) + dy := make([]float64, this.n_out) + + for i := 0; i < this.n_out; i++ { + p_y_given_x[i] = 0 + for j := 0; j < this.n_in; j++ { + p_y_given_x[i] += this.W[i][j] * float64(x[j]) + } + p_y_given_x[i] += this.b[i] + } + LogisticRegression_softmax(this, p_y_given_x) + + for i := 0; i < this.n_out; i++ { + dy[i] = float64(y[i]) - p_y_given_x[i] + + for j := 0; j < this.n_in; j++ { + this.W[i][j] += lr * dy[i] * float64(x[j]) / float64(this.N) + } + + this.b[i] += lr * dy[i] / float64(this.N) + } + } func LogisticRegression_softmax(this *LogisticRegression, x []float64) { - var ( - max float64 - sum float64 - ) - - for i := 0; i < this.n_out; i++ { if max < x[i] {max = x[i]} } - for i := 0; i < this.n_out; i++ { - x[i] = math.Exp(x[i] - max) - sum += x[i] - } - - for i := 0; i < this.n_out; i++ { x[i] /= sum } + var ( + max float64 + sum float64 + ) + + for i := 0; i < this.n_out; i++ { if max < x[i] {max = x[i]} } + for i := 0; i < this.n_out; i++ { + x[i] = math.Exp(x[i] - max) + sum += x[i] + } + + for i := 0; i < this.n_out; i++ { x[i] /= sum } } func LogisticRegression_predict(this *LogisticRegression, x []int, y []float64) { - for i := 0; i < this.n_out; i++ { - y[i] = 0 - for j := 0; j < this.n_in; j++ { - y[i] += this.W[i][j] * float64(x[j]) - } - y[i] += this.b[i] - } - - LogisticRegression_softmax(this, y) + for i := 0; i < this.n_out; i++ { + y[i] = 0 + for j := 0; j < this.n_in; j++ { + y[i] += this.W[i][j] * float64(x[j]) + } + y[i] += this.b[i] + } + + LogisticRegression_softmax(this, y) } func test_lr() { - - learning_rate := 0.1 - n_epochs := 500 - - train_N := 6 - test_N := 2 - n_in := 6 - n_out := 2 - - - // training data - train_X := [][]int { + + learning_rate := 0.1 + n_epochs := 500 + + train_N := 6 + test_N := 2 + n_in := 6 + n_out := 2 + + + // training data + train_X := [][]int { {1, 1, 1, 0, 0, 0}, {1, 0, 1, 0, 0, 0}, {1, 1, 1, 0, 0, 0}, {0, 0, 1, 1, 1, 0}, {0, 0, 1, 1, 0, 0}, {0, 0, 1, 1, 1, 0}, - } + } - train_Y := [][]int { + train_Y := [][]int { {1, 0}, {1, 0}, {1, 0}, {0, 1}, {0, 1}, {0, 1}, - } + } - - // construct LogisticRegression - var classifier LogisticRegression + + // construct LogisticRegression + var classifier LogisticRegression LogisticRegression__construct(&classifier, train_N, n_in, n_out) // train From ee7e7b6c208fa14089e9cf66b090036baf69d474 Mon Sep 17 00:00:00 2001 From: Yusuke Sugomori <me@yusugomori.com> Date: Sun, 7 Dec 2014 02:39:18 +0900 Subject: [PATCH 15/45] tabify go --- go/LogisticRegression.go | 240 +++++++++++++++++++-------------------- 1 file changed, 120 insertions(+), 120 deletions(-) diff --git a/go/LogisticRegression.go b/go/LogisticRegression.go index b1e9a65..cbc7e0e 100644 --- a/go/LogisticRegression.go +++ b/go/LogisticRegression.go @@ -1,150 +1,150 @@ package main import ( - "fmt" - "math" + "fmt" + "math" ) type LogisticRegression struct { - N int - n_in int - n_out int - W [][]float64 - b []float64 + N int + n_in int + n_out int + W [][]float64 + b []float64 } func LogisticRegression__construct(this *LogisticRegression, N int, n_in int, n_out int) { - this.N = N - this.n_in = n_in - this.n_out = n_out - - this.W = make([][]float64, n_out) - for i := 0; i < n_out; i++ { this.W[i] = make([]float64, n_in) } - - this.b = make([]float64, n_out) + this.N = N + this.n_in = n_in + this.n_out = n_out + + this.W = make([][]float64, n_out) + for i := 0; i < n_out; i++ { this.W[i] = make([]float64, n_in) } + + this.b = make([]float64, n_out) } func LogisticRegression_train(this *LogisticRegression, x []int, y []int, lr float64) { - p_y_given_x := make([]float64, this.n_out) - dy := make([]float64, this.n_out) - - for i := 0; i < this.n_out; i++ { - p_y_given_x[i] = 0 - for j := 0; j < this.n_in; j++ { - p_y_given_x[i] += this.W[i][j] * float64(x[j]) - } - p_y_given_x[i] += this.b[i] - } - LogisticRegression_softmax(this, p_y_given_x) - - for i := 0; i < this.n_out; i++ { - dy[i] = float64(y[i]) - p_y_given_x[i] - - for j := 0; j < this.n_in; j++ { - this.W[i][j] += lr * dy[i] * float64(x[j]) / float64(this.N) - } - - this.b[i] += lr * dy[i] / float64(this.N) - } - + p_y_given_x := make([]float64, this.n_out) + dy := make([]float64, this.n_out) + + for i := 0; i < this.n_out; i++ { + p_y_given_x[i] = 0 + for j := 0; j < this.n_in; j++ { + p_y_given_x[i] += this.W[i][j] * float64(x[j]) + } + p_y_given_x[i] += this.b[i] + } + LogisticRegression_softmax(this, p_y_given_x) + + for i := 0; i < this.n_out; i++ { + dy[i] = float64(y[i]) - p_y_given_x[i] + + for j := 0; j < this.n_in; j++ { + this.W[i][j] += lr * dy[i] * float64(x[j]) / float64(this.N) + } + + this.b[i] += lr * dy[i] / float64(this.N) + } + } func LogisticRegression_softmax(this *LogisticRegression, x []float64) { - var ( - max float64 - sum float64 - ) - - for i := 0; i < this.n_out; i++ { if max < x[i] {max = x[i]} } - for i := 0; i < this.n_out; i++ { - x[i] = math.Exp(x[i] - max) - sum += x[i] - } - - for i := 0; i < this.n_out; i++ { x[i] /= sum } + var ( + max float64 + sum float64 + ) + + for i := 0; i < this.n_out; i++ { if max < x[i] {max = x[i]} } + for i := 0; i < this.n_out; i++ { + x[i] = math.Exp(x[i] - max) + sum += x[i] + } + + for i := 0; i < this.n_out; i++ { x[i] /= sum } } func LogisticRegression_predict(this *LogisticRegression, x []int, y []float64) { - for i := 0; i < this.n_out; i++ { - y[i] = 0 - for j := 0; j < this.n_in; j++ { - y[i] += this.W[i][j] * float64(x[j]) - } - y[i] += this.b[i] - } - - LogisticRegression_softmax(this, y) + for i := 0; i < this.n_out; i++ { + y[i] = 0 + for j := 0; j < this.n_in; j++ { + y[i] += this.W[i][j] * float64(x[j]) + } + y[i] += this.b[i] + } + + LogisticRegression_softmax(this, y) } func test_lr() { - - learning_rate := 0.1 - n_epochs := 500 - - train_N := 6 - test_N := 2 - n_in := 6 - n_out := 2 - - - // training data - train_X := [][]int { - {1, 1, 1, 0, 0, 0}, - {1, 0, 1, 0, 0, 0}, - {1, 1, 1, 0, 0, 0}, - {0, 0, 1, 1, 1, 0}, - {0, 0, 1, 1, 0, 0}, - {0, 0, 1, 1, 1, 0}, - } - - - train_Y := [][]int { - {1, 0}, - {1, 0}, - {1, 0}, - {0, 1}, - {0, 1}, - {0, 1}, - } - - - // construct LogisticRegression - var classifier LogisticRegression - LogisticRegression__construct(&classifier, train_N, n_in, n_out) - - // train - for epoch := 0; epoch < n_epochs; epoch++ { - for i := 0; i < train_N; i++ { - LogisticRegression_train(&classifier, train_X[i], train_Y[i], learning_rate) - } - } - - // test data - test_X := [][]int { - {1, 0, 1, 0, 0, 0}, - {0, 0, 1, 1, 1, 0}, - } - - test_Y := make([][]float64, test_N) - for i := 0; i < test_N; i++ { test_Y[i] = make([]float64, n_out) } - - - // test - for i := 0; i < test_N; i++ { - LogisticRegression_predict(&classifier, test_X[i], test_Y[i]) - for j := 0; j < n_out; j++ { - fmt.Printf("%f ", test_Y[i][j]) - } - fmt.Printf("\n") - } - + + learning_rate := 0.1 + n_epochs := 500 + + train_N := 6 + test_N := 2 + n_in := 6 + n_out := 2 + + + // training data + train_X := [][]int { + {1, 1, 1, 0, 0, 0}, + {1, 0, 1, 0, 0, 0}, + {1, 1, 1, 0, 0, 0}, + {0, 0, 1, 1, 1, 0}, + {0, 0, 1, 1, 0, 0}, + {0, 0, 1, 1, 1, 0}, + } + + + train_Y := [][]int { + {1, 0}, + {1, 0}, + {1, 0}, + {0, 1}, + {0, 1}, + {0, 1}, + } + + + // construct LogisticRegression + var classifier LogisticRegression + LogisticRegression__construct(&classifier, train_N, n_in, n_out) + + // train + for epoch := 0; epoch < n_epochs; epoch++ { + for i := 0; i < train_N; i++ { + LogisticRegression_train(&classifier, train_X[i], train_Y[i], learning_rate) + } + } + + // test data + test_X := [][]int { + {1, 0, 1, 0, 0, 0}, + {0, 0, 1, 1, 1, 0}, + } + + test_Y := make([][]float64, test_N) + for i := 0; i < test_N; i++ { test_Y[i] = make([]float64, n_out) } + + + // test + for i := 0; i < test_N; i++ { + LogisticRegression_predict(&classifier, test_X[i], test_Y[i]) + for j := 0; j < n_out; j++ { + fmt.Printf("%f ", test_Y[i][j]) + } + fmt.Printf("\n") + } + } func main() { - test_lr() + test_lr() } From d2774d6f80a610c37a4b458dd13c8c7589fed25e Mon Sep 17 00:00:00 2001 From: Yusuke Sugomori <me@yusugomori.com> Date: Sun, 7 Dec 2014 12:32:12 +0900 Subject: [PATCH 16/45] RBM.go --- go/RBM.go | 200 ++++++++++++++++++++++++++++++++++++++++++++++ go/utils/utils.go | 28 +++++++ 2 files changed, 228 insertions(+) create mode 100644 go/RBM.go create mode 100644 go/utils/utils.go diff --git a/go/RBM.go b/go/RBM.go new file mode 100644 index 0000000..6369da5 --- /dev/null +++ b/go/RBM.go @@ -0,0 +1,200 @@ +package main + +import ( + "fmt" + "math/rand" + u "./utils" +) + +type RBM struct { + N int + n_visible int + n_hidden int + W [][]float64 + hbias []float64 + vbias []float64 +} + + +func RBM__construct(this *RBM, N int, n_visible int, n_hidden int, W [][]float64, hbias []float64, vbias []float64) { + a := 1.0 / float64(n_visible) + + this.N = N + this.n_visible = n_visible + this.n_hidden = n_hidden + + if W == nil { + this.W = make([][]float64, n_hidden) + for i := 0; i < n_hidden; i++ { this.W[i] = make([]float64, n_visible) } + + for i := 0; i < n_hidden; i++ { + for j := 0; j < n_visible; j++ { + this.W[i][j] = u.Uniform(-a, a) + } + } + } else { + this.W = W + } + + if hbias == nil { + this.hbias = make([]float64, n_hidden) + } else { + this.hbias = hbias + } + + if vbias == nil { + this.vbias = make([]float64, n_visible) + } else { + this.vbias = vbias + } +} + +func RBM_contrastive_divergence(this *RBM, input []int, lr float64, k int) { + ph_mean := make([]float64, this.n_hidden) + ph_sample := make([]int, this.n_hidden) + nv_means := make([]float64, this.n_visible) + nv_samples := make([]int, this.n_visible) + nh_means := make([]float64, this.n_hidden) + nh_samples := make([]int, this.n_hidden) + + /* CD-k */ + RBM_sample_h_given_v(this, input, ph_mean, ph_sample) + + for step := 0; step < k; step++ { + if step == 0 { + RBM_gibbs_hvh(this, ph_sample, nv_means, nv_samples, nh_means, nh_samples) + } else { + RBM_gibbs_hvh(this, nh_samples, nv_means, nv_samples, nh_means, nh_samples) + } + } + + for i := 0; i < this.n_hidden; i++ { + for j := 0; j < this.n_visible; j++ { + this.W[i][j] += lr * (ph_mean[i] * float64(input[j]) - nh_means[i] * float64(nv_samples[j])) / float64(this.N) + } + this.hbias[i] += lr * (float64(ph_sample[i]) - nh_means[i]) / float64(this.N) + } + + for i := 0; i < this.n_visible; i++ { + this.vbias[i] += lr * float64(input[i] - nv_samples[i]) / float64(this.N) + } +} + +func RBM_sample_h_given_v(this *RBM, v0_sample []int, mean []float64, sample []int) { + for i := 0; i < this.n_hidden; i++ { + mean[i] = RBM_propup(this, v0_sample, this.W[i], this.hbias[i]) + sample[i] = u.Binomial(1, mean[i]) + } +} + +func RBM_sample_v_given_h(this *RBM, h0_sample []int, mean []float64, sample []int) { + for i := 0; i < this.n_visible; i++ { + mean[i] = RBM_propdown(this, h0_sample, i, this.vbias[i]) + sample[i] = u.Binomial(1, mean[i]) + } +} + +func RBM_propup(this *RBM, v []int, w []float64, b float64) float64 { + pre_sigmoid_activation := 0.0 + + for j := 0; j < this.n_visible; j++ { + pre_sigmoid_activation += w[j] * float64(v[j]) + } + pre_sigmoid_activation += b + + return u.Sigmoid(pre_sigmoid_activation) +} + +func RBM_propdown(this *RBM, h []int, i int, b float64) float64 { + pre_sigmoid_activation := 0.0 + + for j := 0; j < this.n_hidden; j++ { + pre_sigmoid_activation += this.W[j][i] * float64(h[j]) + } + pre_sigmoid_activation += b + + return u.Sigmoid(pre_sigmoid_activation) +} + +func RBM_gibbs_hvh(this *RBM, h0_sample []int, nv_means []float64, nv_samples []int, nh_means []float64, nh_samples []int) { + RBM_sample_v_given_h(this, h0_sample, nv_means, nv_samples) + RBM_sample_h_given_v(this, nv_samples, nh_means, nh_samples) +} + +func RBM_reconstruct(this *RBM, v []int, reconstructed_v []float64) { + h := make([]float64, this.n_hidden) + var pre_sigmoid_activation float64 + + for i := 0; i < this.n_hidden; i++ { + h[i] = RBM_propup(this, v, this.W[i], this.hbias[i]) + } + + for i := 0; i < this.n_visible; i++ { + pre_sigmoid_activation = 0.0 + for j := 0; j < this.n_hidden; j++ { + pre_sigmoid_activation += this.W[j][i] * h[j] + } + pre_sigmoid_activation += this.vbias[i] + + reconstructed_v[i] = u.Sigmoid(pre_sigmoid_activation) + } +} + + +func test_rbm() { + rand.Seed(0) + + learning_rate := 0.1 + training_epochs := 1000 + k := 1 + + train_N := 6 + test_N := 2 + n_visible := 6 + n_hidden := 3 + + // training data + train_X := [][]int { + {1, 1, 1, 0, 0, 0}, + {1, 0, 1, 0, 0, 0}, + {1, 1, 1, 0, 0, 0}, + {0, 0, 1, 1, 1, 0}, + {0, 0, 1, 0, 1, 0}, + {0, 0, 1, 1, 1, 0}, + } + + + // construct RBM + var rbm RBM + RBM__construct(&rbm, train_N, n_visible, n_hidden, nil, nil, nil) + + // train + for epoch := 0; epoch < training_epochs; epoch++ { + for i := 0; i < train_N; i++ { + RBM_contrastive_divergence(&rbm, train_X[i], learning_rate, k) + } + } + + // test data + test_X := [][]int { + {1, 1, 0, 0, 0, 0}, + {0, 0, 0, 1, 1, 0}, + } + reconstructed_X := make([][]float64, test_N) + for i := 0; i < test_N; i++ { reconstructed_X[i] = make([]float64, n_visible)} + + + // test + for i := 0; i < test_N; i++ { + RBM_reconstruct(&rbm, test_X[i], reconstructed_X[i]) + for j := 0; j < n_visible; j++ { + fmt.Printf("%.5f ", reconstructed_X[i][j]) + } + fmt.Printf("\n") + } +} + + +func main() { + test_rbm() +} diff --git a/go/utils/utils.go b/go/utils/utils.go new file mode 100644 index 0000000..44b3af2 --- /dev/null +++ b/go/utils/utils.go @@ -0,0 +1,28 @@ +package utils + +import ( + "math" + "math/rand" +) + +func Uniform(min float64, max float64) float64 { + return rand.Float64() * (max - min) + min +} + +func Binomial(n int, p float64) int { + if p < 0 || p > 1 { return 0 } + + c := 0 + var r float64 + + for i := 0; i < n; i++ { + r = rand.Float64() + if r < p { c++ } + } + + return c +} + +func Sigmoid(x float64) float64 { + return 1.0 / (1.0 + math.Exp(-x)) +} From 76a32007bfe02b607f1ecf278b9f0c6623f2b834 Mon Sep 17 00:00:00 2001 From: Yusuke Sugomori <me@yusugomori.com> Date: Sun, 7 Dec 2014 15:21:19 +0900 Subject: [PATCH 17/45] DBN.go --- go/DBN.go | 237 ++++++++++++++++++++ go/HiddenLayer/HiddenLayer.go | 60 +++++ go/LogisticRegression/LogisticRegression.go | 77 +++++++ go/RBM/RBM.go | 139 ++++++++++++ 4 files changed, 513 insertions(+) create mode 100644 go/DBN.go create mode 100644 go/HiddenLayer/HiddenLayer.go create mode 100644 go/LogisticRegression/LogisticRegression.go create mode 100644 go/RBM/RBM.go diff --git a/go/DBN.go b/go/DBN.go new file mode 100644 index 0000000..498b473 --- /dev/null +++ b/go/DBN.go @@ -0,0 +1,237 @@ +package main + +import ( + "fmt" + "math/rand" + u "./utils" + H "./HiddenLayer" + R "./RBM" + L "./LogisticRegression" +) + +type DBN struct { + N int + n_ins int + hidden_layer_sizes []int + n_outs int + n_layers int + sigmoid_layers []H.HiddenLayer + rbm_layers []R.RBM + log_layer L.LogisticRegression +} + + +func DBN__construct(this *DBN, N int, n_ins int, hidden_layer_sizes []int, n_outs int, n_layers int) { + var input_size int + + this.N = N + this.n_ins = n_ins + this.hidden_layer_sizes = hidden_layer_sizes + this.n_outs = n_outs + this.n_layers = n_layers + + this.sigmoid_layers = make([]H.HiddenLayer, n_layers) + this.rbm_layers = make([]R.RBM, n_layers) + + // construct multi-layer + for i := 0; i < n_layers; i++ { + if i == 0 { + input_size = n_ins + } else { + input_size = hidden_layer_sizes[i-1] + } + + // construct sigmoid_layer + H.HiddenLayer__construct(&(this.sigmoid_layers[i]), N, input_size, hidden_layer_sizes[i], nil, nil) + + // construct rbm_layer + R.RBM__construct(&(this.rbm_layers[i]), N, input_size, hidden_layer_sizes[i], this.sigmoid_layers[i].W, this.sigmoid_layers[i].B, nil) + } + + // layer for output using LogisticRegression + L.LogisticRegression__construct(&(this.log_layer), N, hidden_layer_sizes[n_layers-1], n_outs) +} + +func DBN_pretrain(this *DBN, train_X [][]int, lr float64, k int, epochs int){ + var ( + layer_input []int + prev_layer_input_size int + prev_layer_input []int + ) + + + for i := 0; i < this.n_layers; i++ { // layer-wise + for epoch := 0; epoch < epochs; epoch++ { // training epochs + for n := 0; n < this.N; n++ { // input x1...xN + + // layer input + for l := 0; l <= i; l++ { + if l == 0 { + layer_input = make([]int, this.n_ins) + for j := 0; j < this.n_ins; j++ { layer_input[j] = train_X[n][j] } + } else { + if l == 1 { + prev_layer_input_size = this.n_ins + } else { + prev_layer_input_size = this.hidden_layer_sizes[l-2] + } + + prev_layer_input = make([]int, prev_layer_input_size) + for j := 0; j < prev_layer_input_size; j++ { prev_layer_input[j] = layer_input[j] } + + layer_input = make([]int, this.hidden_layer_sizes[l-1]) + + H.HiddenLayer_sample_h_given_v(&(this.sigmoid_layers[l-1]), prev_layer_input, layer_input) + } + } + + R.RBM_contrastive_divergence(&(this.rbm_layers[i]), layer_input, lr, k) + } + } + } +} + +func DBN_finetune(this *DBN, train_X [][]int, train_Y [][]int, lr float64, epochs int) { + var ( + layer_input []int + prev_layer_input []int + ) + + for epoch := 0; epoch < epochs; epoch++ { + for n := 0; n < this.N; n++ { // input x1...xN + + // layer input + for i := 0; i < this.n_layers; i++ { + if i == 0 { + prev_layer_input = make([]int, this.n_ins) + for j := 0; j < this.n_ins; j++ { prev_layer_input[j] = train_X[n][j] } + } else { + prev_layer_input = make([]int, this.hidden_layer_sizes[i-1]) + for j:= 0; j < this.hidden_layer_sizes[i-1]; j++ { prev_layer_input[j] = layer_input[j] } + } + + layer_input = make([]int, this.hidden_layer_sizes[i]) + H.HiddenLayer_sample_h_given_v(&(this.sigmoid_layers[i]), prev_layer_input, layer_input) + } + + L.LogisticRegression_train(&(this.log_layer), layer_input, train_Y[n], lr) + } + // lr *= 0.95 + } +} + +func DBN_predict(this *DBN, x []int, y []float64) { + var ( + layer_input []float64 + ) + prev_layer_input := make([]float64, this.n_ins) + for j := 0; j < this.n_ins; j++ { prev_layer_input[j] = float64(x[j]) } + + + // layer activation + for i := 0; i < this.n_layers; i++ { + layer_input = make([]float64, this.sigmoid_layers[i].N_out) + + for k := 0; k < this.sigmoid_layers[i].N_out; k++ { + linear_outuput := 0.0 + + for j := 0; j < this.sigmoid_layers[i].N_in; j++ { + linear_outuput += this.sigmoid_layers[i].W[k][j] * prev_layer_input[j] + } + linear_outuput += this.sigmoid_layers[i].B[k] + layer_input[k] = u.Sigmoid(linear_outuput) + } + + if i < this.n_layers-1 { + prev_layer_input = make([]float64, this.sigmoid_layers[i].N_out) + + for j := 0; j < this.sigmoid_layers[i].N_out; j++ { + prev_layer_input[j] = layer_input[j] + } + } + } + + for i := 0; i < this.log_layer.N_out; i++ { + y[i] = 0 + for j := 0; j < this.log_layer.N_in; j++ { + y[i] += this.log_layer.W[i][j] * layer_input[j] + } + y[i] += this.log_layer.B[i] + } + + L.LogisticRegression_softmax(&(this.log_layer), y) +} + + +func test_dbn() { + rand.Seed(0) + + pretrain_lr := 0.1 + pretraining_epochs := 1000 + k := 1 + fintune_lr := 0.1 + fintune_epochs := 500 + + train_N := 6 + test_N := 4 + n_ins := 6 + n_outs := 2 + hidden_layer_sizes := []int {3, 3} + n_layers := len(hidden_layer_sizes) + + + // training data + train_X := [][]int { + {1, 1, 1, 0, 0, 0}, + {1, 0, 1, 0, 0, 0}, + {1, 1, 1, 0, 0, 0}, + {0, 0, 1, 1, 1, 0}, + {0, 0, 1, 1, 0, 0}, + {0, 0, 1, 1, 1, 0}, + } + + train_Y := [][]int { + {1, 0}, + {1, 0}, + {1, 0}, + {0, 1}, + {0, 1}, + {0, 1}, + } + + // construct DBN + var dbn DBN + DBN__construct(&dbn, train_N, n_ins, hidden_layer_sizes, n_outs, n_layers) + + // pretrain + DBN_pretrain(&dbn, train_X, pretrain_lr, k, pretraining_epochs) + + // finetune + DBN_finetune(&dbn, train_X, train_Y, fintune_lr, fintune_epochs) + + // test data + test_X := [][]int { + {1, 1, 0, 0, 0, 0}, + {1, 1, 1, 1, 0, 0}, + {0, 0, 0, 1, 1, 0}, + {0, 0, 1, 1, 1, 0}, + } + + test_Y := make([][]float64, test_N) + for i := 0; i < test_N; i++ { test_Y[i] = make([]float64, n_outs)} + + // test + for i := 0; i < test_N; i++ { + DBN_predict(&dbn, test_X[i], test_Y[i]) + for j := 0; j < n_outs; j++ { + fmt.Printf("%.5f ", test_Y[i][j]) + } + fmt.Printf("\n") + } +} + + + +func main() { + test_dbn() +} diff --git a/go/HiddenLayer/HiddenLayer.go b/go/HiddenLayer/HiddenLayer.go new file mode 100644 index 0000000..995ca44 --- /dev/null +++ b/go/HiddenLayer/HiddenLayer.go @@ -0,0 +1,60 @@ +package HiddenLayer + +import ( + u "../utils" +) + + +type HiddenLayer struct { + N int + N_in int + N_out int + W [][]float64 + B []float64 +} + + +// HiddenLayer +func HiddenLayer__construct(this *HiddenLayer, N int, n_in int, n_out int, W [][]float64, b []float64) { + a := 1.0 / float64(n_in) + + this.N = N + this.N_in = n_in + this.N_out = n_out + + if W == nil { + this.W = make([][]float64, n_out) + for i := 0; i < n_out; i++ { this.W[i] = make([]float64, n_in) } + + for i := 0; i < n_out; i++ { + for j := 0; j < n_in; j++ { + this.W[i][j] = u.Uniform(-a, a) + } + } + } else { + this.W = W + } + + if b == nil { + this.B = make([]float64, n_out) + } else { + this.B = b + } +} + +func HiddenLayer_output(this *HiddenLayer, input []int, w []float64, b float64) float64 { + linear_output := 0.0 + + for j := 0; j < this.N_in; j++ { + linear_output += w[j] * float64(input[j]) + } + linear_output += b + + return u.Sigmoid(linear_output) +} + +func HiddenLayer_sample_h_given_v(this *HiddenLayer, input []int, sample []int) { + for i := 0; i < this.N_out; i++ { + sample[i] = u.Binomial(1, HiddenLayer_output(this, input, this.W[i], this.B[i])) + } +} diff --git a/go/LogisticRegression/LogisticRegression.go b/go/LogisticRegression/LogisticRegression.go new file mode 100644 index 0000000..2f68ef5 --- /dev/null +++ b/go/LogisticRegression/LogisticRegression.go @@ -0,0 +1,77 @@ +package LogisticRegression + +import ( + "math" +) + +type LogisticRegression struct { + N int + N_in int + N_out int + W [][]float64 + B []float64 +} + + +func LogisticRegression__construct(this *LogisticRegression, N int, n_in int, n_out int) { + this.N = N + this.N_in = n_in + this.N_out = n_out + + this.W = make([][]float64, n_out) + for i := 0; i < n_out; i++ { this.W[i] = make([]float64, n_in) } + + this.B = make([]float64, n_out) +} + +func LogisticRegression_train(this *LogisticRegression, x []int, y []int, lr float64) { + p_y_given_x := make([]float64, this.N_out) + dy := make([]float64, this.N_out) + + for i := 0; i < this.N_out; i++ { + p_y_given_x[i] = 0 + for j := 0; j < this.N_in; j++ { + p_y_given_x[i] += this.W[i][j] * float64(x[j]) + } + p_y_given_x[i] += this.B[i] + } + LogisticRegression_softmax(this, p_y_given_x) + + for i := 0; i < this.N_out; i++ { + dy[i] = float64(y[i]) - p_y_given_x[i] + + for j := 0; j < this.N_in; j++ { + this.W[i][j] += lr * dy[i] * float64(x[j]) / float64(this.N) + } + + this.B[i] += lr * dy[i] / float64(this.N) + } + +} + +func LogisticRegression_softmax(this *LogisticRegression, x []float64) { + var ( + max float64 + sum float64 + ) + + for i := 0; i < this.N_out; i++ { if max < x[i] {max = x[i]} } + for i := 0; i < this.N_out; i++ { + x[i] = math.Exp(x[i] - max) + sum += x[i] + } + + for i := 0; i < this.N_out; i++ { x[i] /= sum } +} + +func LogisticRegression_predict(this *LogisticRegression, x []int, y []float64) { + for i := 0; i < this.N_out; i++ { + y[i] = 0 + for j := 0; j < this.N_in; j++ { + y[i] += this.W[i][j] * float64(x[j]) + } + y[i] += this.B[i] + } + + LogisticRegression_softmax(this, y) +} diff --git a/go/RBM/RBM.go b/go/RBM/RBM.go new file mode 100644 index 0000000..708f8b7 --- /dev/null +++ b/go/RBM/RBM.go @@ -0,0 +1,139 @@ +package RBM + +import ( + u "../utils" +) + +type RBM struct { + N int + n_visible int + n_hidden int + W [][]float64 + hbias []float64 + vbias []float64 +} + + +func RBM__construct(this *RBM, N int, n_visible int, n_hidden int, W [][]float64, hbias []float64, vbias []float64) { + a := 1.0 / float64(n_visible) + + this.N = N + this.n_visible = n_visible + this.n_hidden = n_hidden + + if W == nil { + this.W = make([][]float64, n_hidden) + for i := 0; i < n_hidden; i++ { this.W[i] = make([]float64, n_visible) } + + for i := 0; i < n_hidden; i++ { + for j := 0; j < n_visible; j++ { + this.W[i][j] = u.Uniform(-a, a) + } + } + } else { + this.W = W + } + + if hbias == nil { + this.hbias = make([]float64, n_hidden) + } else { + this.hbias = hbias + } + + if vbias == nil { + this.vbias = make([]float64, n_visible) + } else { + this.vbias = vbias + } +} + +func RBM_contrastive_divergence(this *RBM, input []int, lr float64, k int) { + ph_mean := make([]float64, this.n_hidden) + ph_sample := make([]int, this.n_hidden) + nv_means := make([]float64, this.n_visible) + nv_samples := make([]int, this.n_visible) + nh_means := make([]float64, this.n_hidden) + nh_samples := make([]int, this.n_hidden) + + /* CD-k */ + RBM_sample_h_given_v(this, input, ph_mean, ph_sample) + + for step := 0; step < k; step++ { + if step == 0 { + RBM_gibbs_hvh(this, ph_sample, nv_means, nv_samples, nh_means, nh_samples) + } else { + RBM_gibbs_hvh(this, nh_samples, nv_means, nv_samples, nh_means, nh_samples) + } + } + + for i := 0; i < this.n_hidden; i++ { + for j := 0; j < this.n_visible; j++ { + this.W[i][j] += lr * (ph_mean[i] * float64(input[j]) - nh_means[i] * float64(nv_samples[j])) / float64(this.N) + } + this.hbias[i] += lr * (float64(ph_sample[i]) - nh_means[i]) / float64(this.N) + } + + for i := 0; i < this.n_visible; i++ { + this.vbias[i] += lr * float64(input[i] - nv_samples[i]) / float64(this.N) + } +} + +func RBM_sample_h_given_v(this *RBM, v0_sample []int, mean []float64, sample []int) { + for i := 0; i < this.n_hidden; i++ { + mean[i] = RBM_propup(this, v0_sample, this.W[i], this.hbias[i]) + sample[i] = u.Binomial(1, mean[i]) + } +} + +func RBM_sample_v_given_h(this *RBM, h0_sample []int, mean []float64, sample []int) { + for i := 0; i < this.n_visible; i++ { + mean[i] = RBM_propdown(this, h0_sample, i, this.vbias[i]) + sample[i] = u.Binomial(1, mean[i]) + } +} + +func RBM_propup(this *RBM, v []int, w []float64, b float64) float64 { + pre_sigmoid_activation := 0.0 + + for j := 0; j < this.n_visible; j++ { + pre_sigmoid_activation += w[j] * float64(v[j]) + } + pre_sigmoid_activation += b + + return u.Sigmoid(pre_sigmoid_activation) +} + +func RBM_propdown(this *RBM, h []int, i int, b float64) float64 { + pre_sigmoid_activation := 0.0 + + for j := 0; j < this.n_hidden; j++ { + pre_sigmoid_activation += this.W[j][i] * float64(h[j]) + } + pre_sigmoid_activation += b + + return u.Sigmoid(pre_sigmoid_activation) +} + +func RBM_gibbs_hvh(this *RBM, h0_sample []int, nv_means []float64, nv_samples []int, nh_means []float64, nh_samples []int) { + RBM_sample_v_given_h(this, h0_sample, nv_means, nv_samples) + RBM_sample_h_given_v(this, nv_samples, nh_means, nh_samples) +} + +func RBM_reconstruct(this *RBM, v []int, reconstructed_v []float64) { + h := make([]float64, this.n_hidden) + var pre_sigmoid_activation float64 + + for i := 0; i < this.n_hidden; i++ { + h[i] = RBM_propup(this, v, this.W[i], this.hbias[i]) + } + + for i := 0; i < this.n_visible; i++ { + pre_sigmoid_activation = 0.0 + for j := 0; j < this.n_hidden; j++ { + pre_sigmoid_activation += this.W[j][i] * h[j] + } + pre_sigmoid_activation += this.vbias[i] + + reconstructed_v[i] = u.Sigmoid(pre_sigmoid_activation) + } +} From daded50a016a085dcfe8f7c43b881c344ac4a342 Mon Sep 17 00:00:00 2001 From: Yusuke Sugomori <me@yusugomori.com> Date: Sat, 13 Dec 2014 21:43:14 +0900 Subject: [PATCH 18/45] dA.go --- go/dA.go | 195 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 195 insertions(+) create mode 100644 go/dA.go diff --git a/go/dA.go b/go/dA.go new file mode 100644 index 0000000..3b14c6c --- /dev/null +++ b/go/dA.go @@ -0,0 +1,195 @@ +package main + +import ( + "os" + "fmt" + "math/rand" + u "./utils" +) + +type dA struct { + N int + n_visible int + n_hidden int + W [][]float64 + hbias []float64 + vbias []float64 +} + + +func dA__construct(this *dA, N int, n_visible int, n_hidden int, W [][]float64, hbias []float64, vbias []float64) { + a := 1.0 / float64(n_visible) + + this.N = N + this.n_visible = n_visible + this.n_hidden = n_hidden + + if W == nil { + this.W = make([][]float64, n_hidden) + for i := 0; i < n_hidden; i++ { this.W[i] = make([]float64, n_visible) } + + for i := 0; i < n_hidden; i++ { + for j := 0; j < n_visible; j++ { + this.W[i][j] = u.Uniform(-a, a) + } + } + } else { + this.W = W + } + + if hbias == nil { + this.hbias = make([]float64, n_hidden) + } else { + this.hbias = hbias + } + + if vbias == nil { + this.vbias = make([]float64, n_visible) + } else { + this.vbias = vbias + } +} + +func dA_get_corrupted_input(this *dA, x []int, tilde_x []int, p float64) { + for i := 0; i < this.n_visible; i++ { + if x[i] == 0 { + tilde_x[i] = 0 + } else { + tilde_x[i] = u.Binomial(1, p) + } + } +} + +// Encode +func dA_get_hidden_values(this *dA, x []int, y []float64) { + for i := 0; i < this.n_hidden; i++ { + y[i] = 0 + for j := 0; j < this.n_visible; j++ { + y[i] += this.W[i][j] * float64(x[j]) + } + y[i] += this.hbias[i] + y[i] = u.Sigmoid(y[i]) + } +} + +// Decode +func dA_get_reconstructed_input(this *dA, y []float64, z []float64) { + for i := 0; i < this.n_visible; i++ { + z[i] = 0 + for j := 0; j < this.n_hidden; j++ { + z[i] += this.W[j][i] * y[j] + } + z[i] += this.vbias[i] + z[i] = u.Sigmoid(z[i]) + } +} + +func dA_train(this *dA, x []int, lr float64, corruption_level float64) { + tilde_x := make([]int, this.n_visible) + y := make([]float64, this.n_hidden) + z := make([]float64, this.n_visible) + + L_vbias := make([]float64, this.n_visible) + L_hbias := make([]float64, this.n_hidden) + + p := 1 - corruption_level + + dA_get_corrupted_input(this, x, tilde_x, p) + dA_get_hidden_values(this, tilde_x, y) + dA_get_reconstructed_input(this, y, z) + + // vbias + for i := 0; i < this.n_visible; i++ { + L_vbias[i] = float64(x[i]) - z[i] + this.vbias[i] += lr * L_vbias[i] / float64(this.N) + } + + // hbias + for i := 0; i < this.n_hidden; i++ { + L_hbias[i] = 0 + for j := 0; j < this.n_visible; j++ { + L_hbias[i] += this.W[i][j] * L_vbias[j] + } + L_hbias[i] *= y[i] * (1- y[i]) + this.hbias[i] += lr * L_hbias[i] / float64(this.N) + } + + // W + for i := 0; i < this.n_hidden; i++ { + for j := 0; j < this.n_visible; j++ { + this.W[i][j] += lr * (L_hbias[i] * float64(tilde_x[j]) + L_vbias[j] * y[i]) / float64(this.N) + } + } +} + +func dA_reconstruct(this *dA, x []int, z []float64) { + y := make([]float64, this.n_hidden) + + dA_get_hidden_values(this, x, y) + dA_get_reconstructed_input(this, y, z) +} + + + + +func test_dA() { + rand.Seed(0) + + learning_rate := 0.1 + corruption_level := 0.3 + training_epochs := 1000 + + train_N := 6 + test_N := 2 + n_visible := 20 + n_hidden := 5 + + // training data + train_X := [][]int { + {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0}, + } + + // construct dA + var da dA + dA__construct(&da, train_N, n_visible, n_hidden, nil, nil, nil) + + // train + for epoch := 0; epoch < training_epochs; epoch++ { + for i := 0; i < train_N; i++ { + dA_train(&da, train_X[i], learning_rate, corruption_level) + } + } + + // test data + test_X := [][]int { + {1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0}, + } + reconstructed_X := make([][]float64, test_N) + for i := 0; i < test_N; i++ { reconstructed_X[i] = make([]float64, n_visible)} + + + // test + for i := 0; i < test_N; i++ { + dA_reconstruct(&da, test_X[i], reconstructed_X[i]) + for j := 0; j < n_visible; j++ { + fmt.Printf("%.5f ", reconstructed_X[i][j]) + } + fmt.Printf("\n") + } + os.Exit(0) +} + +func main() { + test_dA() + +} From 3dfb34a81b0b31f4a440634f6f5f5e4d8feccbc5 Mon Sep 17 00:00:00 2001 From: Yusuke Sugomori <me@yusugomori.com> Date: Sat, 13 Dec 2014 21:44:26 +0900 Subject: [PATCH 19/45] minor fix --- go/dA.go | 3 --- 1 file changed, 3 deletions(-) diff --git a/go/dA.go b/go/dA.go index 3b14c6c..a36c226 100644 --- a/go/dA.go +++ b/go/dA.go @@ -1,7 +1,6 @@ package main import ( - "os" "fmt" "math/rand" u "./utils" @@ -186,10 +185,8 @@ func test_dA() { } fmt.Printf("\n") } - os.Exit(0) } func main() { test_dA() - } From 50a09be13f219008bae939fb2ef173994044ca51 Mon Sep 17 00:00:00 2001 From: Yusuke Sugomori <me@yusugomori.com> Date: Sun, 14 Dec 2014 16:55:49 +0900 Subject: [PATCH 20/45] SdA.go --- go/DBN.go | 6 +- go/SdA.go | 241 ++++++++++++++++++++++++++++++++++++++++++++++++++++ go/dA/dA.go | 128 ++++++++++++++++++++++++++++ 3 files changed, 372 insertions(+), 3 deletions(-) create mode 100644 go/SdA.go create mode 100644 go/dA/dA.go diff --git a/go/DBN.go b/go/DBN.go index 498b473..7962a5a 100644 --- a/go/DBN.go +++ b/go/DBN.go @@ -169,8 +169,8 @@ func test_dbn() { pretrain_lr := 0.1 pretraining_epochs := 1000 k := 1 - fintune_lr := 0.1 - fintune_epochs := 500 + finetune_lr := 0.1 + finetune_epochs := 500 train_N := 6 test_N := 4 @@ -207,7 +207,7 @@ func test_dbn() { DBN_pretrain(&dbn, train_X, pretrain_lr, k, pretraining_epochs) // finetune - DBN_finetune(&dbn, train_X, train_Y, fintune_lr, fintune_epochs) + DBN_finetune(&dbn, train_X, train_Y, finstune_lr, finetune_epochs) // test data test_X := [][]int { diff --git a/go/SdA.go b/go/SdA.go new file mode 100644 index 0000000..27ccaf6 --- /dev/null +++ b/go/SdA.go @@ -0,0 +1,241 @@ +package main + +import ( + "fmt" + "math/rand" + u "./utils" + H "./HiddenLayer" + D "./dA" + L "./LogisticRegression" +) + +type SdA struct { + N int + n_ins int + hidden_layer_sizes []int + n_outs int + n_layers int + sigmoid_layers []H.HiddenLayer + dA_layers []D.DA + log_layer L.LogisticRegression +} + + +func SdA__construct(this *SdA, N int, n_ins int, hidden_layer_sizes []int, n_outs int, n_layers int) { + var input_size int + + this.N = N + this.n_ins = n_ins + this.hidden_layer_sizes = hidden_layer_sizes + this.n_outs = n_outs + this.n_layers = n_layers + + this.sigmoid_layers = make([]H.HiddenLayer, n_layers) + this.dA_layers = make([]D.DA, n_layers) + + // construct multi-layer + for i := 0; i < n_layers; i++ { + if i == 0 { + input_size = n_ins + } else { + input_size = hidden_layer_sizes[i-1] + } + + // construct sigmoid_layer + H.HiddenLayer__construct(&(this.sigmoid_layers[i]), N, input_size, hidden_layer_sizes[i], nil, nil) + + // construct dA_layer + D.DA__construct(&(this.dA_layers[i]), N, input_size, hidden_layer_sizes[i], this.sigmoid_layers[i].W, this.sigmoid_layers[i].B, nil) + } + + // layer for output using LogisticRegression + L.LogisticRegression__construct(&(this.log_layer), N, hidden_layer_sizes[n_layers-1], n_outs) +} + +func SdA_pretrain(this *SdA, train_X [][]int, lr float64, corruption_level float64, epochs int) { + var ( + layer_input []int + prev_layer_input_size int + prev_layer_input []int + ) + for i := 0; i < this.n_layers; i++ { // layer-wise + for epoch := 0; epoch < epochs; epoch++ { // training epochs + for n := 0; n < this.N; n++ { // input x1...xN + + // layer input + for l := 0; l <= i; l++ { + if l == 0 { + layer_input = make([]int, this.n_ins) + for j := 0; j < this.n_ins; j++ { layer_input[j] = train_X[n][j] } + } else { + if l == 1 { + prev_layer_input_size = this.n_ins + } else { + prev_layer_input_size = this.hidden_layer_sizes[l-2] + } + + prev_layer_input = make([]int, prev_layer_input_size) + for j := 0; j < prev_layer_input_size; j++ { prev_layer_input[j] = layer_input[j] } + + layer_input = make([]int, this.hidden_layer_sizes[l-1]) + + H.HiddenLayer_sample_h_given_v(&(this.sigmoid_layers[l-1]), prev_layer_input, layer_input) + } + } + + D.DA_train(&(this.dA_layers[i]), layer_input, lr, corruption_level) + } + } + } +} + +func SdA_finetune(this *SdA, train_X [][]int, train_Y [][]int, lr float64, epochs int) { + var ( + layer_input []int + prev_layer_input []int + ) + + for epoch := 0; epoch < epochs; epoch++ { + for n := 0; n < this.N; n++ { // input x1...xN + + // layer input + for i := 0; i < this.n_layers; i++ { + if i == 0 { + prev_layer_input = make([]int, this.n_ins) + for j := 0; j < this.n_ins; j++ { prev_layer_input[j] = train_X[n][j] } + } else { + prev_layer_input = make([]int, this.hidden_layer_sizes[i-1]) + for j:= 0; j < this.hidden_layer_sizes[i-1]; j++ { prev_layer_input[j] = layer_input[j] } + } + + layer_input = make([]int, this.hidden_layer_sizes[i]) + H.HiddenLayer_sample_h_given_v(&(this.sigmoid_layers[i]), prev_layer_input, layer_input) + } + + L.LogisticRegression_train(&(this.log_layer), layer_input, train_Y[n], lr) + } + // lr *= 0.95 + } +} + +func SdA_predict(this *SdA, x []int, y []float64) { + var ( + layer_input []float64 + ) + prev_layer_input := make([]float64, this.n_ins) + for j := 0; j < this.n_ins; j++ { prev_layer_input[j] = float64(x[j]) } + + // layer activation + for i := 0; i < this.n_layers; i++ { + layer_input = make([]float64, this.sigmoid_layers[i].N_out) + + for k := 0; k < this.sigmoid_layers[i].N_out; k++ { + linear_outuput := 0.0 + + for j := 0; j < this.sigmoid_layers[i].N_in; j++ { + linear_outuput += this.sigmoid_layers[i].W[k][j] * prev_layer_input[j] + } + linear_outuput += this.sigmoid_layers[i].B[k] + layer_input[k] = u.Sigmoid(linear_outuput) + } + + if i < this.n_layers-1 { + prev_layer_input = make([]float64, this.sigmoid_layers[i].N_out) + + for j := 0; j < this.sigmoid_layers[i].N_out; j++ { + prev_layer_input[j] = layer_input[j] + } + } + } + + for i := 0; i < this.log_layer.N_out; i++ { + y[i] = 0 + for j := 0; j < this.log_layer.N_in; j++ { + y[i] += this.log_layer.W[i][j] * layer_input[j] + } + y[i] += this.log_layer.B[i] + } + + L.LogisticRegression_softmax(&(this.log_layer), y) +} + +func test_SdA() { + rand.Seed(0) + + pretrain_lr := 0.1 + corruption_level := 0.3 + pretraining_epochs := 1000 + finetune_lr := 0.1 + finetune_epochs := 500 + + train_N := 10 + test_N := 4 + n_ins := 28 + n_outs := 2 + hidden_layer_sizes := []int {15, 15} + n_layers := len(hidden_layer_sizes) + + + // training data + train_X := [][]int { + {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1}, + } + + train_Y := [][]int { + {1, 0}, + {1, 0}, + {1, 0}, + {1, 0}, + {1, 0}, + {0, 1}, + {0, 1}, + {0, 1}, + {0, 1}, + {0, 1}, + } + + // construct SdA + var sda SdA + SdA__construct(&sda, train_N, n_ins, hidden_layer_sizes, n_outs, n_layers) + + // pretrain + SdA_pretrain(&sda, train_X, pretrain_lr, corruption_level, pretraining_epochs) + + // finetune + SdA_finetune(&sda, train_X, train_Y, finetune_lr, finetune_epochs) + + + // test data + test_X := [][]int { + {1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1}, + } + + test_Y := make([][]float64, test_N) + for i := 0; i < test_N; i++ { test_Y[i] = make([]float64, n_outs)} + + // test + for i := 0; i < test_N; i++ { + SdA_predict(&sda, test_X[i], test_Y[i]) + for j := 0; j < n_outs; j++ { + fmt.Printf("%.5f ", test_Y[i][j]) + } + fmt.Printf("\n") + } +} + + +func main() { + test_SdA() +} diff --git a/go/dA/dA.go b/go/dA/dA.go new file mode 100644 index 0000000..b41d1ce --- /dev/null +++ b/go/dA/dA.go @@ -0,0 +1,128 @@ +package dA + +import ( + u "../utils" +) + + +type DA struct { + N int + n_visible int + n_hidden int + W [][]float64 + hbias []float64 + vbias []float64 +} + + +func DA__construct(this *DA, N int, n_visible int, n_hidden int, W [][]float64, hbias []float64, vbias []float64) { + a := 1.0 / float64(n_visible) + + this.N = N + this.n_visible = n_visible + this.n_hidden = n_hidden + + if W == nil { + this.W = make([][]float64, n_hidden) + for i := 0; i < n_hidden; i++ { this.W[i] = make([]float64, n_visible) } + + for i := 0; i < n_hidden; i++ { + for j := 0; j < n_visible; j++ { + this.W[i][j] = u.Uniform(-a, a) + } + } + } else { + this.W = W + } + + if hbias == nil { + this.hbias = make([]float64, n_hidden) + } else { + this.hbias = hbias + } + + if vbias == nil { + this.vbias = make([]float64, n_visible) + } else { + this.vbias = vbias + } +} + +func dA_get_corrupted_input(this *DA, x []int, tilde_x []int, p float64) { + for i := 0; i < this.n_visible; i++ { + if x[i] == 0 { + tilde_x[i] = 0 + } else { + tilde_x[i] = u.Binomial(1, p) + } + } +} + +// Encode +func dA_get_hidden_values(this *DA, x []int, y []float64) { + for i := 0; i < this.n_hidden; i++ { + y[i] = 0 + for j := 0; j < this.n_visible; j++ { + y[i] += this.W[i][j] * float64(x[j]) + } + y[i] += this.hbias[i] + y[i] = u.Sigmoid(y[i]) + } +} + +// Decode +func dA_get_reconstructed_input(this *DA, y []float64, z []float64) { + for i := 0; i < this.n_visible; i++ { + z[i] = 0 + for j := 0; j < this.n_hidden; j++ { + z[i] += this.W[j][i] * y[j] + } + z[i] += this.vbias[i] + z[i] = u.Sigmoid(z[i]) + } +} + +func DA_train(this *DA, x []int, lr float64, corruption_level float64) { + tilde_x := make([]int, this.n_visible) + y := make([]float64, this.n_hidden) + z := make([]float64, this.n_visible) + + L_vbias := make([]float64, this.n_visible) + L_hbias := make([]float64, this.n_hidden) + + p := 1 - corruption_level + + dA_get_corrupted_input(this, x, tilde_x, p) + dA_get_hidden_values(this, tilde_x, y) + dA_get_reconstructed_input(this, y, z) + + // vbias + for i := 0; i < this.n_visible; i++ { + L_vbias[i] = float64(x[i]) - z[i] + this.vbias[i] += lr * L_vbias[i] / float64(this.N) + } + + // hbias + for i := 0; i < this.n_hidden; i++ { + L_hbias[i] = 0 + for j := 0; j < this.n_visible; j++ { + L_hbias[i] += this.W[i][j] * L_vbias[j] + } + L_hbias[i] *= y[i] * (1- y[i]) + this.hbias[i] += lr * L_hbias[i] / float64(this.N) + } + + // W + for i := 0; i < this.n_hidden; i++ { + for j := 0; j < this.n_visible; j++ { + this.W[i][j] += lr * (L_hbias[i] * float64(tilde_x[j]) + L_vbias[j] * y[i]) / float64(this.N) + } + } +} + +func dA_reconstruct(this *DA, x []int, z []float64) { + y := make([]float64, this.n_hidden) + + dA_get_hidden_values(this, x, y) + dA_get_reconstructed_input(this, y, z) +} From 9a8f85cd32bc0a50747d56fc7e684a50bdf2c9f0 Mon Sep 17 00:00:00 2001 From: Yusuke Sugomori <me@yusugomori.com> Date: Sun, 14 Dec 2014 16:56:52 +0900 Subject: [PATCH 21/45] fix typo --- go/DBN.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/go/DBN.go b/go/DBN.go index 7962a5a..e5522a6 100644 --- a/go/DBN.go +++ b/go/DBN.go @@ -207,7 +207,7 @@ func test_dbn() { DBN_pretrain(&dbn, train_X, pretrain_lr, k, pretraining_epochs) // finetune - DBN_finetune(&dbn, train_X, train_Y, finstune_lr, finetune_epochs) + DBN_finetune(&dbn, train_X, train_Y, finetune_lr, finetune_epochs) // test data test_X := [][]int { From c02375dfe8db3080075ded85fa7d6338812f025b Mon Sep 17 00:00:00 2001 From: Yusuke Sugomori <me@yusugomori.com> Date: Thu, 25 Jun 2015 09:46:48 +0200 Subject: [PATCH 22/45] added MLP --- python/CDBN.py | 23 +++------- python/CRBM.py | 15 +------ python/DBN.py | 41 +++--------------- python/HiddenLayer.py | 60 +++++++++++++++----------- python/LogisticRegression.py | 21 +++------- python/MLP.py | 81 ++++++++++++++++++++++++++++++++++++ python/RBM.py | 33 ++++----------- python/SdA.py | 25 +++-------- python/dA.py | 37 ++++------------ python/utils.py | 11 +++-- 10 files changed, 163 insertions(+), 184 deletions(-) create mode 100755 python/MLP.py diff --git a/python/CDBN.py b/python/CDBN.py index 4ac987a..dbf6648 100755 --- a/python/CDBN.py +++ b/python/CDBN.py @@ -1,16 +1,5 @@ -#!/usr/bin/env python # -*- coding: utf-8 -*- -''' - DBN w/ continuous-valued inputs (Linear Energy) - - References : - - Y. Bengio, P. Lamblin, D. Popovici, H. Larochelle: Greedy Layer-Wise - Training of Deep Networks, Advances in Neural Information Processing - Systems 19, 2007 - -''' - import sys import numpy from HiddenLayer import HiddenLayer @@ -20,13 +9,11 @@ from DBN import DBN from utils import * - - class CDBN(DBN): def __init__(self, input=None, label=None,\ n_ins=2, hidden_layer_sizes=[3, 3], n_outs=2,\ - numpy_rng=None): + rng=None): self.x = input self.y = label @@ -35,8 +22,8 @@ def __init__(self, input=None, label=None,\ self.rbm_layers = [] self.n_layers = len(hidden_layer_sizes) # = len(self.rbm_layers) - if numpy_rng is None: - numpy_rng = numpy.random.RandomState(1234) + if rng is None: + rng = numpy.random.RandomState(1234) assert self.n_layers > 0 @@ -60,7 +47,7 @@ def __init__(self, input=None, label=None,\ sigmoid_layer = HiddenLayer(input=layer_input, n_in=input_size, n_out=hidden_layer_sizes[i], - numpy_rng=numpy_rng, + rng=rng, activation=sigmoid) self.sigmoid_layers.append(sigmoid_layer) @@ -113,7 +100,7 @@ def test_cdbn(pretrain_lr=0.1, pretraining_epochs=1000, k=1, \ rng = numpy.random.RandomState(123) # construct DBN - dbn = CDBN(input=x, label=y, n_ins=6, hidden_layer_sizes=[5, 5], n_outs=2, numpy_rng=rng) + dbn = CDBN(input=x, label=y, n_ins=6, hidden_layer_sizes=[5, 5], n_outs=2, rng=rng) # pre-training (TrainUnsupervisedDBN) dbn.pretrain(lr=pretrain_lr, k=1, epochs=pretraining_epochs) diff --git a/python/CRBM.py b/python/CRBM.py index 0521883..e870047 100755 --- a/python/CRBM.py +++ b/python/CRBM.py @@ -1,16 +1,5 @@ -#!/usr/bin/env python # -*- coding: utf-8 -*- -""" - RBM w/ continuous-valued inputs (Linear Energy) - - References : - - Y. Bengio, P. Lamblin, D. Popovici, H. Larochelle: Greedy Layer-Wise - Training of Deep Networks, Advances in Neural Information Processing - Systems 19, 2007 - -""" - import sys import numpy from RBM import RBM @@ -30,7 +19,7 @@ def sample_v_given_h(self, h0_sample): ep = numpy.exp(a_h) v1_mean = 1 / (1 - en) - 1 / a_h - U = numpy.array(self.numpy_rng.uniform( + U = numpy.array(self.rng.uniform( low=0, high=1, size=v1_mean.shape)) @@ -53,7 +42,7 @@ def test_crbm(learning_rate=0.1, k=1, training_epochs=1000): rng = numpy.random.RandomState(123) # construct CRBM - rbm = CRBM(input=data, n_visible=6, n_hidden=5, numpy_rng=rng) + rbm = CRBM(input=data, n_visible=6, n_hidden=5, rng=rng) # train for epoch in xrange(training_epochs): diff --git a/python/DBN.py b/python/DBN.py index f639823..b1b351b 100755 --- a/python/DBN.py +++ b/python/DBN.py @@ -1,20 +1,5 @@ -#!/usr/bin/env python # -*- coding: utf-8 -*- -''' - Deep Belief Nets (DBN) - - References : - - Y. Bengio, P. Lamblin, D. Popovici, H. Larochelle: Greedy Layer-Wise - Training of Deep Networks, Advances in Neural Information Processing - Systems 19, 2007 - - - - DeepLearningTutorials - https://github.com/lisa-lab/DeepLearningTutorials - -''' - import sys import numpy from HiddenLayer import HiddenLayer @@ -26,7 +11,7 @@ class DBN(object): def __init__(self, input=None, label=None,\ n_ins=2, hidden_layer_sizes=[3, 3], n_outs=2,\ - numpy_rng=None): + rng=None): self.x = input self.y = label @@ -35,8 +20,8 @@ def __init__(self, input=None, label=None,\ self.rbm_layers = [] self.n_layers = len(hidden_layer_sizes) # = len(self.rbm_layers) - if numpy_rng is None: - numpy_rng = numpy.random.RandomState(1234) + if rng is None: + rng = numpy.random.RandomState(1234) assert self.n_layers > 0 @@ -60,7 +45,7 @@ def __init__(self, input=None, label=None,\ sigmoid_layer = HiddenLayer(input=layer_input, n_in=input_size, n_out=hidden_layer_sizes[i], - numpy_rng=numpy_rng, + rng=rng, activation=sigmoid) self.sigmoid_layers.append(sigmoid_layer) @@ -100,21 +85,6 @@ def pretrain(self, lr=0.1, k=1, epochs=100): # print >> sys.stderr, \ # 'Pre-training layer %d, epoch %d, cost ' %(i, epoch), cost - # def pretrain(self, lr=0.1, k=1, epochs=100): - # # pre-train layer-wise - # for i in xrange(self.n_layers): - # rbm = self.rbm_layers[i] - - # for epoch in xrange(epochs): - # layer_input = self.x - # for j in xrange(i): - # layer_input = self.sigmoid_layers[j].sample_h_given_v(layer_input) - - # rbm.contrastive_divergence(lr=lr, k=k, input=layer_input) - # # cost = rbm.get_reconstruction_cross_entropy() - # # print >> sys.stderr, \ - # # 'Pre-training layer %d, epoch %d, cost ' %(i, epoch), cost - def finetune(self, lr=0.1, epochs=100): layer_input = self.sigmoid_layers[-1].sample_h_given_v() @@ -158,12 +128,11 @@ def test_dbn(pretrain_lr=0.1, pretraining_epochs=1000, k=1, \ [0, 1], [0, 1], [0, 1]]) - rng = numpy.random.RandomState(123) # construct DBN - dbn = DBN(input=x, label=y, n_ins=6, hidden_layer_sizes=[3, 3], n_outs=2, numpy_rng=rng) + dbn = DBN(input=x, label=y, n_ins=6, hidden_layer_sizes=[3, 3], n_outs=2, rng=rng) # pre-training (TrainUnsupervisedDBN) dbn.pretrain(lr=pretrain_lr, k=1, epochs=pretraining_epochs) diff --git a/python/HiddenLayer.py b/python/HiddenLayer.py index 72e51e1..4130b35 100755 --- a/python/HiddenLayer.py +++ b/python/HiddenLayer.py @@ -1,15 +1,5 @@ -#!/usr/bin/env python # -*- coding: utf-8 -*- -''' - Hidden Layer - - References : - - DeepLearningTutorials - https://github.com/lisa-lab/DeepLearningTutorials - -''' - import sys import numpy from utils import * @@ -17,38 +7,42 @@ class HiddenLayer(object): def __init__(self, input, n_in, n_out,\ - W=None, b=None, numpy_rng=None, activation=numpy.tanh): + W=None, b=None, rng=None, activation=numpy.tanh): - if numpy_rng is None: - numpy_rng = numpy.random.RandomState(1234) + if rng is None: + rng = numpy.random.RandomState(1234) if W is None: a = 1. / n_in - initial_W = numpy.array(numpy_rng.uniform( # initialize W uniformly + W = numpy.array(rng.uniform( # initialize W uniformly low=-a, high=a, size=(n_in, n_out))) - W = initial_W - if b is None: b = numpy.zeros(n_out) # initialize bias 0 - - self.numpy_rng = numpy_rng - self.input = input + self.rng = rng + self.x = input self.W = W self.b = b + if activation == numpy.tanh: + self.dactivation = dtanh + elif activation == sigmoid: + self.dactivation = dsigmoid + else: + raise ValueError('activation function not supported.') + self.activation = activation + - # self.params = [self.W, self.b] def output(self, input=None): if input is not None: - self.input = input + self.x = input - linear_output = numpy.dot(self.input, self.W) + self.b + linear_output = numpy.dot(self.x, self.W) + self.b return (linear_output if self.activation is None else self.activation(linear_output)) @@ -56,10 +50,28 @@ def output(self, input=None): def sample_h_given_v(self, input=None): if input is not None: - self.input = input + self.x = input v_mean = self.output() - h_sample = self.numpy_rng.binomial(size=v_mean.shape, + h_sample = self.rng.binomial(size=v_mean.shape, n=1, p=v_mean) return h_sample + + + + def forward(self, input=None): + return self.output(input=input) + + + def backward(self, prev_layer, lr=0.1, input=None): + if input is not None: + self.x = input + + # d_y = (1 - prev_layer.x * prev_layer.x) * numpy.dot(prev_layer.d_y, prev_layer.W.T) + d_y = self.dactivation(prev_layer.x) * numpy.dot(prev_layer.d_y, prev_layer.W.T) + + self.W += lr * numpy.dot(self.x.T, d_y) + self.b += lr * numpy.mean(d_y, axis=0) + + self.d_y = d_y diff --git a/python/LogisticRegression.py b/python/LogisticRegression.py index a828a40..f1a686d 100755 --- a/python/LogisticRegression.py +++ b/python/LogisticRegression.py @@ -1,18 +1,5 @@ -#!/usr/bin/env python # -*- coding: utf-8 -*- -''' - Logistic Regression - - References : - - Jason Rennie: Logistic Regression, - http://qwone.com/~jason/writing/lr.pdf - - - DeepLearningTutorials - https://github.com/lisa-lab/DeepLearningTutorials - -''' - import sys import numpy from utils import * @@ -25,7 +12,6 @@ def __init__(self, input, label, n_in, n_out): self.W = numpy.zeros((n_in, n_out)) # initialize W 0 self.b = numpy.zeros(n_out) # initialize bias 0 - # self.params = [self.W, self.b] def train(self, lr=0.1, input=None, L2_reg=0.00): if input is not None: @@ -37,6 +23,8 @@ def train(self, lr=0.1, input=None, L2_reg=0.00): self.W += lr * numpy.dot(self.x.T, d_y) - lr * L2_reg * self.W self.b += lr * numpy.mean(d_y, axis=0) + + self.d_y = d_y # cost = self.negative_log_likelihood() # return cost @@ -57,6 +45,9 @@ def predict(self, x): # return sigmoid(numpy.dot(x, self.W) + self.b) return softmax(numpy.dot(x, self.W) + self.b) + def output(self, x): + return self.predict(x) + def test_lr(learning_rate=0.01, n_epochs=200): # training data @@ -90,7 +81,7 @@ def test_lr(learning_rate=0.01, n_epochs=200): [0, 0, 0, 1, 1, 0], [1, 1, 1, 1, 1, 0]]) - print >> sys.stderr, classifier.predict(x) + print classifier.predict(x) if __name__ == "__main__": diff --git a/python/MLP.py b/python/MLP.py new file mode 100755 index 0000000..def6ab7 --- /dev/null +++ b/python/MLP.py @@ -0,0 +1,81 @@ +# -*- coding: utf-8 -*- + +import sys +import numpy +from HiddenLayer import HiddenLayer +from LogisticRegression import LogisticRegression +from utils import * + + +class MLP(object): + def __init__(self, input, label, n_in, n_hidden, n_out, rng=None): + + self.x = input + self.y = label + + if rng is None: + rng = numpy.random.RandomState(1234) + + # construct hidden_layer (tanh, sigmoid, etc...) + self.hidden_layer = HiddenLayer(input=self.x, + n_in=n_in, + n_out=n_hidden, + rng=rng, + activation=numpy.tanh) + + # construct log_layer (softmax) + self.log_layer = LogisticRegression(input=self.hidden_layer.output, + label=self.y, + n_in=n_hidden, + n_out=n_out) + + def train(self): + layer_input = self.hidden_layer.forward() + # print self.hidden_layer.W + + self.log_layer.train(input=layer_input) + self.hidden_layer.backward(prev_layer=self.log_layer) + + + def predict(self, x): + x = self.hidden_layer.output(x) + return self.log_layer.predict(x) + + +def test_mlp(n_epochs=100): + + x = numpy.array([[1,1,1,0,0,0], + [1,0,1,0,0,0], + [1,1,1,0,0,0], + [0,0,1,1,1,0], + [0,0,1,1,0,0], + [0,0,1,1,1,0]]) + y = numpy.array([[1, 0], + [1, 0], + [1, 0], + [0, 1], + [0, 1], + [0, 1]]) + + + rng = numpy.random.RandomState(123) + + + # construct MLP + classifier = MLP(input=x, label=y, n_in=6, n_hidden=15, n_out=2, rng=rng) + + # train + for epoch in xrange(n_epochs): + classifier.train() + + + # test + x = numpy.array([[1, 1, 0, 0, 0, 0], + [0, 0, 0, 1, 1, 0], + [1, 1, 1, 1, 1, 0]]) + + print classifier.predict(x) + + +if __name__ == "__main__": + test_mlp() diff --git a/python/RBM.py b/python/RBM.py index 781241d..4a1be8e 100755 --- a/python/RBM.py +++ b/python/RBM.py @@ -1,38 +1,23 @@ -#!/usr/bin/env python # -*- coding: utf-8 -*- -""" - Restricted Boltzmann Machine (RBM) - - References : - - Y. Bengio, P. Lamblin, D. Popovici, H. Larochelle: Greedy Layer-Wise - Training of Deep Networks, Advances in Neural Information Processing - Systems 19, 2007 - - - - DeepLearningTutorials - https://github.com/lisa-lab/DeepLearningTutorials - -""" - import sys import numpy from utils import * class RBM(object): def __init__(self, input=None, n_visible=2, n_hidden=3, \ - W=None, hbias=None, vbias=None, numpy_rng=None): + W=None, hbias=None, vbias=None, rng=None): self.n_visible = n_visible # num of units in visible (input) layer self.n_hidden = n_hidden # num of units in hidden layer - if numpy_rng is None: - numpy_rng = numpy.random.RandomState(1234) + if rng is None: + rng = numpy.random.RandomState(1234) if W is None: a = 1. / n_visible - initial_W = numpy.array(numpy_rng.uniform( # initialize W uniformly + initial_W = numpy.array(rng.uniform( # initialize W uniformly low=-a, high=a, size=(n_visible, n_hidden))) @@ -46,14 +31,12 @@ def __init__(self, input=None, n_visible=2, n_hidden=3, \ vbias = numpy.zeros(n_visible) # initialize v bias 0 - self.numpy_rng = numpy_rng + self.rng = rng self.input = input self.W = W self.hbias = hbias self.vbias = vbias - # self.params = [self.W, self.hbias, self.vbias] - def contrastive_divergence(self, lr=0.1, k=1, input=None): if input is not None: @@ -86,7 +69,7 @@ def contrastive_divergence(self, lr=0.1, k=1, input=None): def sample_h_given_v(self, v0_sample): h1_mean = self.propup(v0_sample) - h1_sample = self.numpy_rng.binomial(size=h1_mean.shape, # discrete: binomial + h1_sample = self.rng.binomial(size=h1_mean.shape, # discrete: binomial n=1, p=h1_mean) @@ -95,7 +78,7 @@ def sample_h_given_v(self, v0_sample): def sample_v_given_h(self, h0_sample): v1_mean = self.propdown(h0_sample) - v1_sample = self.numpy_rng.binomial(size=v1_mean.shape, # discrete: binomial + v1_sample = self.rng.binomial(size=v1_mean.shape, # discrete: binomial n=1, p=v1_mean) @@ -153,7 +136,7 @@ def test_rbm(learning_rate=0.1, k=1, training_epochs=1000): rng = numpy.random.RandomState(123) # construct RBM - rbm = RBM(input=data, n_visible=6, n_hidden=2, numpy_rng=rng) + rbm = RBM(input=data, n_visible=6, n_hidden=2, rng=rng) # train for epoch in xrange(training_epochs): diff --git a/python/SdA.py b/python/SdA.py index 4a0f45a..3d38d0f 100755 --- a/python/SdA.py +++ b/python/SdA.py @@ -1,20 +1,5 @@ -#!/usr/bin/env python # -*- coding: utf-8 -*- - -""" - Stacked Denoising Autoencoders (SdA) - - References : - - P. Vincent, H. Larochelle, Y. Bengio, P.A. Manzagol: Extracting and - Composing Robust Features with Denoising Autoencoders, ICML' 08, 1096-1103, - 2008 - - - DeepLearningTutorials - https://github.com/lisa-lab/DeepLearningTutorials - -""" - import sys import numpy from HiddenLayer import HiddenLayer @@ -26,7 +11,7 @@ class SdA(object): def __init__(self, input=None, label=None,\ n_ins=2, hidden_layer_sizes=[3, 3], n_outs=2,\ - numpy_rng=None): + rng=None): self.x = input self.y = label @@ -35,8 +20,8 @@ def __init__(self, input=None, label=None,\ self.dA_layers = [] self.n_layers = len(hidden_layer_sizes) # = len(self.rbm_layers) - if numpy_rng is None: - numpy_rng = numpy.random.RandomState(1234) + if rng is None: + rng = numpy.random.RandomState(1234) assert self.n_layers > 0 @@ -60,7 +45,7 @@ def __init__(self, input=None, label=None,\ sigmoid_layer = HiddenLayer(input=layer_input, n_in=input_size, n_out=hidden_layer_sizes[i], - numpy_rng=numpy_rng, + rng=rng, activation=sigmoid) self.sigmoid_layers.append(sigmoid_layer) @@ -152,7 +137,7 @@ def test_SdA(pretrain_lr=0.1, pretraining_epochs=1000, corruption_level=0.3, \ # construct SdA sda = SdA(input=x, label=y, \ - n_ins=20, hidden_layer_sizes=[15, 15], n_outs=2, numpy_rng=rng) + n_ins=20, hidden_layer_sizes=[15, 15], n_outs=2, rng=rng) # pre-training sda.pretrain(lr=pretrain_lr, corruption_level=corruption_level, epochs=pretraining_epochs) diff --git a/python/dA.py b/python/dA.py index 0b911eb..edbf6c7 100755 --- a/python/dA.py +++ b/python/dA.py @@ -1,23 +1,5 @@ -#!/usr/bin/env python # -*- coding: utf-8 -*- -""" - Denoising Autoencoders (dA) - - References : - - P. Vincent, H. Larochelle, Y. Bengio, P.A. Manzagol: Extracting and - Composing Robust Features with Denoising Autoencoders, ICML'08, 1096-1103, - 2008 - - - DeepLearningTutorials - https://github.com/lisa-lab/DeepLearningTutorials - - - Yusuke Sugomori: Stochastic Gradient Descent for Denoising Autoencoders, - http://yusugomori.com/docs/SGD_DA.pdf - -""" - - import sys import numpy from utils import * @@ -25,44 +7,39 @@ class dA(object): def __init__(self, input=None, n_visible=2, n_hidden=3, \ - W=None, hbias=None, vbias=None, numpy_rng=None): + W=None, hbias=None, vbias=None, rng=None): self.n_visible = n_visible # num of units in visible (input) layer self.n_hidden = n_hidden # num of units in hidden layer - if numpy_rng is None: - numpy_rng = numpy.random.RandomState(1234) + if rng is None: + rng = numpy.random.RandomState(1234) if W is None: a = 1. / n_visible - initial_W = numpy.array(numpy_rng.uniform( # initialize W uniformly + W = numpy.array(rng.uniform( # initialize W uniformly low=-a, high=a, size=(n_visible, n_hidden))) - W = initial_W - if hbias is None: hbias = numpy.zeros(n_hidden) # initialize h bias 0 if vbias is None: vbias = numpy.zeros(n_visible) # initialize v bias 0 - self.numpy_rng = numpy_rng + self.rng = rng self.x = input self.W = W self.W_prime = self.W.T self.hbias = hbias self.vbias = vbias - # self.params = [self.W, self.hbias, self.vbias] - - def get_corrupted_input(self, input, corruption_level): assert corruption_level < 1 - return self.numpy_rng.binomial(size=input.shape, + return self.rng.binomial(size=input.shape, n=1, p=1-corruption_level) * input @@ -133,7 +110,7 @@ def test_dA(learning_rate=0.1, corruption_level=0.3, training_epochs=50): rng = numpy.random.RandomState(123) # construct dA - da = dA(input=data, n_visible=20, n_hidden=5, numpy_rng=rng) + da = dA(input=data, n_visible=20, n_hidden=5, rng=rng) # train for epoch in xrange(training_epochs): diff --git a/python/utils.py b/python/utils.py index 5c4a748..e17f203 100755 --- a/python/utils.py +++ b/python/utils.py @@ -1,7 +1,5 @@ -''' ''' -import numpy - +import numpy numpy.seterr(all='ignore') @@ -9,6 +7,13 @@ def sigmoid(x): return 1. / (1 + numpy.exp(-x)) +def dsigmoid(x): + return x * (1. - x) + +def dtanh(x): + return 1. - x * x + + def softmax(x): e = numpy.exp(x - numpy.max(x)) # prevent overflow if e.ndim == 1: From f4ef9e5a8c63c368fb9a99b130ae9657aaf1ed3f Mon Sep 17 00:00:00 2001 From: Yusuke Sugomori <me@yusugomori.com> Date: Thu, 25 Jun 2015 09:48:36 +0200 Subject: [PATCH 23/45] minor fix MLP --- python/MLP.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/python/MLP.py b/python/MLP.py index def6ab7..3ba7425 100755 --- a/python/MLP.py +++ b/python/MLP.py @@ -16,7 +16,7 @@ def __init__(self, input, label, n_in, n_hidden, n_out, rng=None): if rng is None: rng = numpy.random.RandomState(1234) - # construct hidden_layer (tanh, sigmoid, etc...) + # construct hidden_layer (tanh or sigmoid so far) self.hidden_layer = HiddenLayer(input=self.x, n_in=n_in, n_out=n_hidden, @@ -31,8 +31,6 @@ def __init__(self, input, label, n_in, n_hidden, n_out, rng=None): def train(self): layer_input = self.hidden_layer.forward() - # print self.hidden_layer.W - self.log_layer.train(input=layer_input) self.hidden_layer.backward(prev_layer=self.log_layer) From 37aadf1d27ba356703716afd80c28410fd2d1c34 Mon Sep 17 00:00:00 2001 From: Yusuke Sugomori <me@yusugomori.com> Date: Sun, 2 Aug 2015 21:45:34 -0400 Subject: [PATCH 24/45] add Dropout with ReLU --- python/Dropout.py | 160 ++++++++++++++++++++++++++++++++++++++++++ python/HiddenLayer.py | 26 ++++++- python/MLP.py | 4 +- python/SdA.py | 4 +- python/utils.py | 11 ++- 5 files changed, 197 insertions(+), 8 deletions(-) create mode 100755 python/Dropout.py diff --git a/python/Dropout.py b/python/Dropout.py new file mode 100755 index 0000000..3edf3fe --- /dev/null +++ b/python/Dropout.py @@ -0,0 +1,160 @@ +# -*- coding: utf-8 -*- + +import sys +import numpy +from HiddenLayer import HiddenLayer +from LogisticRegression import LogisticRegression +from utils import * + + +class Dropout(object): + def __init__(self, input, label,\ + n_in, hidden_layer_sizes, n_out,\ + rng=None, activation=ReLU): + + self.x = input + self.y = label + + self.hidden_layers = [] + self.n_layers = len(hidden_layer_sizes) + + if rng is None: + rng = numpy.random.RandomState(1234) + + assert self.n_layers > 0 + + + # construct multi-layer + for i in xrange(self.n_layers): + + # layer_size + if i == 0: + input_size = n_in + else: + input_size = hidden_layer_sizes[i-1] + + # layer_input + if i == 0: + layer_input = self.x + + else: + layer_input = self.hidden_layers[-1].output() + + # construct hidden_layer + hidden_layer = HiddenLayer(input=layer_input, + n_in=input_size, + n_out=hidden_layer_sizes[i], + rng=rng, + activation=activation) + + self.hidden_layers.append(hidden_layer) + + + # layer for ouput using Logistic Regression (softmax) + self.log_layer = LogisticRegression(input=self.hidden_layers[-1].output(), + label=self.y, + n_in=hidden_layer_sizes[-1], + n_out=n_out) + + + def train(self, epochs=500, dropout=True, p_dropout=0.5, rng=None): + + for epoch in xrange(epochs): + dropout_masks = [] # create different masks in each training epoch + + # forward hidden_layers + for i in xrange(self.n_layers): + if i == 0: + layer_input = self.x + + layer_input = self.hidden_layers[i].forward(input=layer_input) + + if dropout == True: + mask = self.hidden_layers[i].dropout(input=layer_input, p=p_dropout, rng=rng) + layer_input *= mask + + dropout_masks.append(mask) + + + # forward & backward log_layer + self.log_layer.train(input=layer_input) + + + # backward hidden_layers + for i in reversed(xrange(0, self.n_layers)): + if i == self.n_layers-1: + prev_layer = self.log_layer + else: + prev_layer = self.hidden_layers[i+1] + + self.hidden_layers[i].backward(prev_layer=prev_layer) + + if dropout == True: + self.hidden_layers[i].d_y *= dropout_masks[i] # also mask here + + + def predict(self, x, dropout=True, p_dropout=0.5): + layer_input = x + + for i in xrange(self.n_layers): + if dropout == True: + self.hidden_layers[i].W = p_dropout * self.hidden_layers[i].W + self.hidden_layers[i].b = p_dropout * self.hidden_layers[i].b + + layer_input = self.hidden_layers[i].output(input=layer_input) + + return self.log_layer.predict(layer_input) + + + +def test_dropout(n_epochs=500, dropout=True, p_dropout=0.5): + + x = numpy.array([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0]]) + + y = numpy.array([[1, 0], + [1, 0], + [1, 0], + [1, 0], + [1, 0], + [0, 1], + [0, 1], + [0, 1], + [0, 1], + [0, 1]]) + + + rng = numpy.random.RandomState(123) + + + # construct Dropout MLP + classifier = Dropout(input=x, label=y, \ + n_in=20, hidden_layer_sizes=[12, 12], n_out=2, \ + rng=rng, activation=ReLU) + + + # train + classifier.train(epochs=n_epochs, dropout=dropout, \ + p_dropout=p_dropout, rng=rng) + + + # test + x = numpy.array([[1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1]]) + + print classifier.predict(x) + + + +if __name__ == "__main__": + test_dropout() diff --git a/python/HiddenLayer.py b/python/HiddenLayer.py index 4130b35..f7a0731 100755 --- a/python/HiddenLayer.py +++ b/python/HiddenLayer.py @@ -7,7 +7,7 @@ class HiddenLayer(object): def __init__(self, input, n_in, n_out,\ - W=None, b=None, rng=None, activation=numpy.tanh): + W=None, b=None, rng=None, activation=tanh): if rng is None: rng = numpy.random.RandomState(1234) @@ -24,15 +24,22 @@ def __init__(self, input, n_in, n_out,\ self.rng = rng self.x = input + self.W = W self.b = b - if activation == numpy.tanh: + if activation == tanh: self.dactivation = dtanh + elif activation == sigmoid: self.dactivation = dsigmoid + + elif activation == ReLU: + self.dactivation = dReLU + else: raise ValueError('activation function not supported.') + self.activation = activation @@ -68,10 +75,23 @@ def backward(self, prev_layer, lr=0.1, input=None): if input is not None: self.x = input - # d_y = (1 - prev_layer.x * prev_layer.x) * numpy.dot(prev_layer.d_y, prev_layer.W.T) d_y = self.dactivation(prev_layer.x) * numpy.dot(prev_layer.d_y, prev_layer.W.T) self.W += lr * numpy.dot(self.x.T, d_y) self.b += lr * numpy.mean(d_y, axis=0) self.d_y = d_y + + + def dropout(self, input, p, rng=None): + if rng is None: + rng = numpy.random.RandomState(123) + + mask = rng.binomial(size=input.shape, + n=1, + p=1-p) # p is the prob of dropping + + return mask + + + diff --git a/python/MLP.py b/python/MLP.py index 3ba7425..0e02f2a 100755 --- a/python/MLP.py +++ b/python/MLP.py @@ -21,7 +21,7 @@ def __init__(self, input, label, n_in, n_hidden, n_out, rng=None): n_in=n_in, n_out=n_hidden, rng=rng, - activation=numpy.tanh) + activation=tanh) # construct log_layer (softmax) self.log_layer = LogisticRegression(input=self.hidden_layer.output, @@ -36,7 +36,7 @@ def train(self): def predict(self, x): - x = self.hidden_layer.output(x) + x = self.hidden_layer.output(input=x) return self.log_layer.predict(x) diff --git a/python/SdA.py b/python/SdA.py index 3d38d0f..5f8de37 100755 --- a/python/SdA.py +++ b/python/SdA.py @@ -103,8 +103,8 @@ def predict(self, x): sigmoid_layer = self.sigmoid_layers[i] layer_input = sigmoid_layer.output(input=layer_input) - out = self.log_layer.predict(layer_input) - return out + return self.log_layer.predict(layer_input) + diff --git a/python/utils.py b/python/utils.py index e17f203..7aca40d 100755 --- a/python/utils.py +++ b/python/utils.py @@ -10,10 +10,12 @@ def sigmoid(x): def dsigmoid(x): return x * (1. - x) +def tanh(x): + return numpy.tanh(x) + def dtanh(x): return 1. - x * x - def softmax(x): e = numpy.exp(x - numpy.max(x)) # prevent overflow if e.ndim == 1: @@ -22,6 +24,13 @@ def softmax(x): return e / numpy.array([numpy.sum(e, axis=1)]).T # ndim = 2 +def ReLU(x): + return x * (x > 0) + +def dReLU(x): + return 1. * (x > 0) + + # # probability density for the Gaussian dist # def gaussian(x, mean=0.0, scale=1.0): # s = 2 * numpy.power(scale, 2) From 4aa12f1ca8cc58e0d232e721335110d604fce59d Mon Sep 17 00:00:00 2001 From: Yusuke Sugomori <yusugomori@users.noreply.github.com> Date: Sun, 2 Aug 2015 21:47:59 -0400 Subject: [PATCH 25/45] Update README.md --- README.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 48c2f67..14e17c5 100644 --- a/README.md +++ b/README.md @@ -17,6 +17,10 @@ - LogisticRegression: Logistic Regression - HiddenLayer: Hidden Layer of Neural Networks + + - MLP: Multiple Layer Perceptron + + - Dropout: Dropout MLP @@ -38,4 +42,4 @@ -### Bug reports are deeply welcome. \ No newline at end of file +### Bug reports are deeply welcome. From d31d4f51cdead6e4d0ae0a24225c3f79b7fa7c5f Mon Sep 17 00:00:00 2001 From: Yusuke Sugomori <me@yusugomori.com> Date: Mon, 3 Aug 2015 23:12:15 -0400 Subject: [PATCH 26/45] bug fix --- python/Dropout.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/python/Dropout.py b/python/Dropout.py index 3edf3fe..feb594e 100755 --- a/python/Dropout.py +++ b/python/Dropout.py @@ -98,8 +98,7 @@ def predict(self, x, dropout=True, p_dropout=0.5): for i in xrange(self.n_layers): if dropout == True: - self.hidden_layers[i].W = p_dropout * self.hidden_layers[i].W - self.hidden_layers[i].b = p_dropout * self.hidden_layers[i].b + self.hidden_layers[i].W = (1 - p_dropout) * self.hidden_layers[i].W layer_input = self.hidden_layers[i].output(input=layer_input) From bb903c6791c83851c725c6e8689edea83a0122df Mon Sep 17 00:00:00 2001 From: Yusuke Sugomori <me@yusugomori.com> Date: Tue, 4 Aug 2015 22:09:01 -0400 Subject: [PATCH 27/45] clean up java --- .gitignore | 4 + java/DBN/src/DBN.java | 223 ----------------- java/DBN/src/HiddenLayer.java | 74 ------ java/DBN/src/LogisticRegression.java | 73 ------ java/DBN/src/RBM.java | 164 ------------- .../src/LogisticRegression.java | 128 ---------- java/RBM/src/RBM.java | 221 ----------------- java/SdA/src/HiddenLayer.java | 74 ------ java/SdA/src/LogisticRegression.java | 73 ------ java/SdA/src/SdA.java | 230 ------------------ java/SdA/src/dA.java | 150 ------------ java/dA/src/dA.java | 207 ---------------- java/src/DeepLearning/DBN.java | 222 +++++++++++++++++ java/src/DeepLearning/HiddenLayer.java | 54 ++++ java/src/DeepLearning/LogisticRegression.java | 129 ++++++++++ java/src/DeepLearning/RBM.java | 203 ++++++++++++++++ java/src/DeepLearning/SdA.java | 230 ++++++++++++++++++ java/src/DeepLearning/dA.java | 187 ++++++++++++++ java/src/DeepLearning/utils.java | 29 +++ 19 files changed, 1058 insertions(+), 1617 deletions(-) delete mode 100644 java/DBN/src/DBN.java delete mode 100644 java/DBN/src/HiddenLayer.java delete mode 100644 java/DBN/src/LogisticRegression.java delete mode 100644 java/DBN/src/RBM.java delete mode 100644 java/LogisticRegression/src/LogisticRegression.java delete mode 100644 java/RBM/src/RBM.java delete mode 100644 java/SdA/src/HiddenLayer.java delete mode 100644 java/SdA/src/LogisticRegression.java delete mode 100644 java/SdA/src/SdA.java delete mode 100644 java/SdA/src/dA.java delete mode 100644 java/dA/src/dA.java create mode 100644 java/src/DeepLearning/DBN.java create mode 100644 java/src/DeepLearning/HiddenLayer.java create mode 100644 java/src/DeepLearning/LogisticRegression.java create mode 100644 java/src/DeepLearning/RBM.java create mode 100644 java/src/DeepLearning/SdA.java create mode 100644 java/src/DeepLearning/dA.java create mode 100644 java/src/DeepLearning/utils.java diff --git a/.gitignore b/.gitignore index 190cc57..252ef14 100644 --- a/.gitignore +++ b/.gitignore @@ -8,3 +8,7 @@ *.settings .project .metadata + +java/.idea/* +java/java.iml +java/out/* \ No newline at end of file diff --git a/java/DBN/src/DBN.java b/java/DBN/src/DBN.java deleted file mode 100644 index 5b72e68..0000000 --- a/java/DBN/src/DBN.java +++ /dev/null @@ -1,223 +0,0 @@ -import java.util.Random; - -public class DBN { - public int N; - public int n_ins; - public int[] hidden_layer_sizes; - public int n_outs; - public int n_layers; - public HiddenLayer[] sigmoid_layers; - public RBM[] rbm_layers; - public LogisticRegression log_layer; - public Random rng; - - public static double sigmoid(double x) { - return 1.0 / (1.0 + Math.pow(Math.E, -x)); - } - - - public DBN(int N, int n_ins, int[] hidden_layer_sizes, int n_outs, int n_layers, Random rng) { - int input_size; - - this.N = N; - this.n_ins = n_ins; - this.hidden_layer_sizes = hidden_layer_sizes; - this.n_outs = n_outs; - this.n_layers = n_layers; - - this.sigmoid_layers = new HiddenLayer[n_layers]; - this.rbm_layers = new RBM[n_layers]; - - if(rng == null) this.rng = new Random(1234); - else this.rng = rng; - - // construct multi-layer - for(int i=0; i<this.n_layers; i++) { - if(i == 0) { - input_size = this.n_ins; - } else { - input_size = this.hidden_layer_sizes[i-1]; - } - - // construct sigmoid_layer - this.sigmoid_layers[i] = new HiddenLayer(this.N, input_size, this.hidden_layer_sizes[i], null, null, rng); - - // construct rbm_layer - this.rbm_layers[i] = new RBM(this.N, input_size, this.hidden_layer_sizes[i], this.sigmoid_layers[i].W, this.sigmoid_layers[i].b, null, rng); - } - - // layer for output using LogisticRegression - this.log_layer = new LogisticRegression(this.N, this.hidden_layer_sizes[this.n_layers-1], this.n_outs); - } - - public void pretrain(int[][] train_X, double lr, int k, int epochs) { - int[] layer_input = new int[0]; - int prev_layer_input_size; - int[] prev_layer_input; - - for(int i=0; i<n_layers; i++) { // layer-wise - for(int epoch=0; epoch<epochs; epoch++) { // training epochs - for(int n=0; n<N; n++) { // input x1...xN - // layer input - for(int l=0; l<=i; l++) { - - if(l == 0) { - layer_input = new int[n_ins]; - for(int j=0; j<n_ins; j++) layer_input[j] = train_X[n][j]; - } else { - if(l == 1) prev_layer_input_size = n_ins; - else prev_layer_input_size = hidden_layer_sizes[l-2]; - - prev_layer_input = new int[prev_layer_input_size]; - for(int j=0; j<prev_layer_input_size; j++) prev_layer_input[j] = layer_input[j]; - - layer_input = new int[hidden_layer_sizes[l-1]]; - - sigmoid_layers[l-1].sample_h_given_v(prev_layer_input, layer_input); - } - } - - rbm_layers[i].contrastive_divergence(layer_input, lr, k); - } - } - } - } - - public void finetune(int[][] train_X, int[][] train_Y, double lr, int epochs) { - int[] layer_input = new int[0]; - // int prev_layer_input_size; - int[] prev_layer_input = new int[0]; - - for(int epoch=0; epoch<epochs; epoch++) { - for(int n=0; n<N; n++) { - - // layer input - for(int i=0; i<n_layers; i++) { - if(i == 0) { - prev_layer_input = new int[n_ins]; - for(int j=0; j<n_ins; j++) prev_layer_input[j] = train_X[n][j]; - } else { - prev_layer_input = new int[hidden_layer_sizes[i-1]]; - for(int j=0; j<hidden_layer_sizes[i-1]; j++) prev_layer_input[j] = layer_input[j]; - } - - layer_input = new int[hidden_layer_sizes[i]]; - sigmoid_layers[i].sample_h_given_v(prev_layer_input, layer_input); - } - - log_layer.train(layer_input, train_Y[n], lr); - } - // lr *= 0.95; - } - } - - public void predict(int[] x, double[] y) { - double[] layer_input = new double[0]; - // int prev_layer_input_size; - double[] prev_layer_input = new double[n_ins]; - for(int j=0; j<n_ins; j++) prev_layer_input[j] = x[j]; - - double linear_output; - - - // layer activation - for(int i=0; i<n_layers; i++) { - layer_input = new double[sigmoid_layers[i].n_out]; - - for(int k=0; k<sigmoid_layers[i].n_out; k++) { - linear_output = 0.0; - - for(int j=0; j<sigmoid_layers[i].n_in; j++) { - linear_output += sigmoid_layers[i].W[k][j] * prev_layer_input[j]; - } - linear_output += sigmoid_layers[i].b[k]; - layer_input[k] = sigmoid(linear_output); - } - - if(i < n_layers-1) { - prev_layer_input = new double[sigmoid_layers[i].n_out]; - for(int j=0; j<sigmoid_layers[i].n_out; j++) prev_layer_input[j] = layer_input[j]; - } - } - - for(int i=0; i<log_layer.n_out; i++) { - y[i] = 0; - for(int j=0; j<log_layer.n_in; j++) { - y[i] += log_layer.W[i][j] * layer_input[j]; - } - y[i] += log_layer.b[i]; - } - - log_layer.softmax(y); - } - - private static void test_dbn() { - Random rng = new Random(123); - - double pretrain_lr = 0.1; - int pretraining_epochs = 1000; - int k = 1; - double finetune_lr = 0.1; - int finetune_epochs = 500; - - int train_N = 6; - int test_N = 4; - int n_ins = 6; - int n_outs = 2; - int[] hidden_layer_sizes = {3, 3}; - int n_layers = hidden_layer_sizes.length; - - // training data - int[][] train_X = { - {1, 1, 1, 0, 0, 0}, - {1, 0, 1, 0, 0, 0}, - {1, 1, 1, 0, 0, 0}, - {0, 0, 1, 1, 1, 0}, - {0, 0, 1, 1, 0, 0}, - {0, 0, 1, 1, 1, 0} - }; - - int[][] train_Y = { - {1, 0}, - {1, 0}, - {1, 0}, - {0, 1}, - {0, 1}, - {0, 1}, - }; - - - // construct DBN - DBN dbn = new DBN(train_N, n_ins, hidden_layer_sizes, n_outs, n_layers, rng); - - // pretrain - dbn.pretrain(train_X, pretrain_lr, k, pretraining_epochs); - - // finetune - dbn.finetune(train_X, train_Y, finetune_lr, finetune_epochs); - - - // test data - int[][] test_X = { - {1, 1, 0, 0, 0, 0}, - {1, 1, 1, 1, 0, 0}, - {0, 0, 0, 1, 1, 0}, - {0, 0, 1, 1, 1, 0}, - }; - - double[][] test_Y = new double[test_N][n_outs]; - - // test - for(int i=0; i<test_N; i++) { - dbn.predict(test_X[i], test_Y[i]); - for(int j=0; j<n_outs; j++) { - System.out.print(test_Y[i][j] + " "); - } - System.out.println(); - } - } - - public static void main(String[] args) { - test_dbn(); - } -} diff --git a/java/DBN/src/HiddenLayer.java b/java/DBN/src/HiddenLayer.java deleted file mode 100644 index 588eea8..0000000 --- a/java/DBN/src/HiddenLayer.java +++ /dev/null @@ -1,74 +0,0 @@ -import java.util.Random; - -public class HiddenLayer { - public int N; - public int n_in; - public int n_out; - public double[][] W; - public double[] b; - public Random rng; - - public double uniform(double min, double max) { - return rng.nextDouble() * (max - min) + min; - } - - public int binomial(int n, double p) { - if(p < 0 || p > 1) return 0; - - int c = 0; - double r; - - for(int i=0; i<n; i++) { - r = rng.nextDouble(); - if (r < p) c++; - } - - return c; - } - - public static double sigmoid(double x) { - return 1.0 / (1.0 + Math.pow(Math.E, -x)); - } - - - - public HiddenLayer(int N, int n_in, int n_out, double[][] W, double[] b, Random rng) { - this.N = N; - this.n_in = n_in; - this.n_out = n_out; - - if(rng == null) this.rng = new Random(1234); - else this.rng = rng; - - if(W == null) { - this.W = new double[n_out][n_in]; - double a = 1.0 / this.n_in; - - for(int i=0; i<n_out; i++) { - for(int j=0; j<n_in; j++) { - this.W[i][j] = uniform(-a, a); - } - } - } else { - this.W = W; - } - - if(b == null) this.b = new double[n_out]; - else this.b = b; - } - - public double output(int[] input, double[] w, double b) { - double linear_output = 0.0; - for(int j=0; j<n_in; j++) { - linear_output += w[j] * input[j]; - } - linear_output += b; - return sigmoid(linear_output); - } - - public void sample_h_given_v(int[] input, int[] sample) { - for(int i=0; i<n_out; i++) { - sample[i] = binomial(1, output(input, W[i], b[i])); - } - } -} diff --git a/java/DBN/src/LogisticRegression.java b/java/DBN/src/LogisticRegression.java deleted file mode 100644 index 09ba807..0000000 --- a/java/DBN/src/LogisticRegression.java +++ /dev/null @@ -1,73 +0,0 @@ - -public class LogisticRegression { - public int N; - public int n_in; - public int n_out; - public double[][] W; - public double[] b; - - public LogisticRegression(int N, int n_in, int n_out) { - this.N = N; - this.n_in = n_in; - this.n_out = n_out; - - W = new double[this.n_out][this.n_in]; - b = new double[this.n_out]; - } - - public void train(int[] x, int[] y, double lr) { - double[] p_y_given_x = new double[n_out]; - double[] dy = new double[n_out]; - - for(int i=0; i<n_out; i++) { - p_y_given_x[i] = 0; - for(int j=0; j<n_in; j++) { - p_y_given_x[i] += W[i][j] * x[j]; - } - p_y_given_x[i] += b[i]; - } - softmax(p_y_given_x); - - for(int i=0; i<n_out; i++) { - dy[i] = y[i] - p_y_given_x[i]; - - for(int j=0; j<n_in; j++) { - W[i][j] += lr * dy[i] * x[j] / N; - } - - b[i] += lr * dy[i] / N; - } - } - - public void softmax(double[] x) { - double max = 0.0; - double sum = 0.0; - - for(int i=0; i<n_out; i++) { - if(max < x[i]) { - max = x[i]; - } - } - - for(int i=0; i<n_out; i++) { - x[i] = Math.exp(x[i] - max); - sum += x[i]; - } - - for(int i=0; i<n_out; i++) { - x[i] /= sum; - } - } - - public void predict(int[] x, double[] y) { - for(int i=0; i<n_out; i++) { - y[i] = 0; - for(int j=0; j<n_in; j++) { - y[i] += W[i][j] * x[j]; - } - y[i] += b[i]; - } - - softmax(y); - } -} diff --git a/java/DBN/src/RBM.java b/java/DBN/src/RBM.java deleted file mode 100644 index 9ee3563..0000000 --- a/java/DBN/src/RBM.java +++ /dev/null @@ -1,164 +0,0 @@ -import java.util.Random; - -public class RBM { - public int N; - public int n_visible; - public int n_hidden; - public double[][] W; - public double[] hbias; - public double[] vbias; - public Random rng; - - public double uniform(double min, double max) { - return rng.nextDouble() * (max - min) + min; - } - - public int binomial(int n, double p) { - if(p < 0 || p > 1) return 0; - - int c = 0; - double r; - - for(int i=0; i<n; i++) { - r = rng.nextDouble(); - if (r < p) c++; - } - - return c; - } - - public static double sigmoid(double x) { - return 1.0 / (1.0 + Math.pow(Math.E, -x)); - } - - - public RBM(int N, int n_visible, int n_hidden, - double[][] W, double[] hbias, double[] vbias, Random rng) { - this.N = N; - this.n_visible = n_visible; - this.n_hidden = n_hidden; - - if(rng == null) this.rng = new Random(1234); - else this.rng = rng; - - if(W == null) { - this.W = new double[this.n_hidden][this.n_visible]; - double a = 1.0 / this.n_visible; - - for(int i=0; i<this.n_hidden; i++) { - for(int j=0; j<this.n_visible; j++) { - this.W[i][j] = uniform(-a, a); - } - } - } else { - this.W = W; - } - - if(hbias == null) { - this.hbias = new double[this.n_hidden]; - for(int i=0; i<this.n_hidden; i++) this.hbias[i] = 0; - } else { - this.hbias = hbias; - } - - if(vbias == null) { - this.vbias = new double[this.n_visible]; - for(int i=0; i<this.n_visible; i++) this.vbias[i] = 0; - } else { - this.vbias = vbias; - } - } - - - public void contrastive_divergence(int[] input, double lr, int k) { - double[] ph_mean = new double[n_hidden]; - int[] ph_sample = new int[n_hidden]; - double[] nv_means = new double[n_visible]; - int[] nv_samples = new int[n_visible]; - double[] nh_means = new double[n_hidden]; - int[] nh_samples = new int[n_hidden]; - - /* CD-k */ - sample_h_given_v(input, ph_mean, ph_sample); - - for(int step=0; step<k; step++) { - if(step == 0) { - gibbs_hvh(ph_sample, nv_means, nv_samples, nh_means, nh_samples); - } else { - gibbs_hvh(nh_samples, nv_means, nv_samples, nh_means, nh_samples); - } - } - - for(int i=0; i<n_hidden; i++) { - for(int j=0; j<n_visible; j++) { - // W[i][j] += lr *(ph_sample[i] * input[j] - nh_means[i] * nv_samples[j]) / N; - W[i][j] += lr *(ph_mean[i] * input[j] - nh_means[i] * nv_samples[j]) / N; - } - hbias[i] += lr * (ph_sample[i] - nh_means[i]) / N; - } - - - for(int i=0; i<n_visible; i++) { - vbias[i] += lr * (input[i] - nv_samples[i]) / N; - } - - } - - - public void sample_h_given_v(int[] v0_sample, double[] mean, int[] sample) { - for(int i=0; i<n_hidden; i++) { - mean[i] = propup(v0_sample, W[i], hbias[i]); - sample[i] = binomial(1, mean[i]); - } - } - - public void sample_v_given_h(int[] h0_sample, double[] mean, int[] sample) { - for(int i=0; i<n_visible; i++) { - mean[i] = propdown(h0_sample, i, vbias[i]); - sample[i] = binomial(1, mean[i]); - } - } - - public double propup(int[] v, double[] w, double b) { - double pre_sigmoid_activation = 0.0; - for(int j=0; j<n_visible; j++) { - pre_sigmoid_activation += w[j] * v[j]; - } - pre_sigmoid_activation += b; - return sigmoid(pre_sigmoid_activation); - } - - public double propdown(int[] h, int i, double b) { - double pre_sigmoid_activation = 0.0; - for(int j=0; j<n_hidden; j++) { - pre_sigmoid_activation += W[j][i] * h[j]; - } - pre_sigmoid_activation += b; - return sigmoid(pre_sigmoid_activation); - } - - public void gibbs_hvh(int[] h0_sample, double[] nv_means, int[] nv_samples, double[] nh_means, int[] nh_samples) { - sample_v_given_h(h0_sample, nv_means, nv_samples); - sample_h_given_v(nv_samples, nh_means, nh_samples); - } - - - public void reconstruct(int[] v, double[] reconstructed_v) { - double[] h = new double[n_hidden]; - double pre_sigmoid_activation; - - for(int i=0; i<n_hidden; i++) { - h[i] = propup(v, W[i], hbias[i]); - } - - for(int i=0; i<n_visible; i++) { - pre_sigmoid_activation = 0.0; - for(int j=0; j<n_hidden; j++) { - pre_sigmoid_activation += W[j][i] * h[j]; - } - pre_sigmoid_activation += vbias[i]; - - reconstructed_v[i] = sigmoid(pre_sigmoid_activation); - } - } -} diff --git a/java/LogisticRegression/src/LogisticRegression.java b/java/LogisticRegression/src/LogisticRegression.java deleted file mode 100644 index 8356c95..0000000 --- a/java/LogisticRegression/src/LogisticRegression.java +++ /dev/null @@ -1,128 +0,0 @@ - -public class LogisticRegression { - public int N; - public int n_in; - public int n_out; - public double[][] W; - public double[] b; - - public LogisticRegression(int N, int n_in, int n_out) { - this.N = N; - this.n_in = n_in; - this.n_out = n_out; - - W = new double[this.n_out][this.n_in]; - b = new double[this.n_out]; - } - - public void train(int[] x, int[] y, double lr) { - double[] p_y_given_x = new double[n_out]; - double[] dy = new double[n_out]; - - for(int i=0; i<n_out; i++) { - p_y_given_x[i] = 0; - for(int j=0; j<n_in; j++) { - p_y_given_x[i] += W[i][j] * x[j]; - } - p_y_given_x[i] += b[i]; - } - softmax(p_y_given_x); - - for(int i=0; i<n_out; i++) { - dy[i] = y[i] - p_y_given_x[i]; - - for(int j=0; j<n_in; j++) { - W[i][j] += lr * dy[i] * x[j] / N; - } - - b[i] += lr * dy[i] / N; - } - } - - public void softmax(double[] x) { - double max = 0.0; - double sum = 0.0; - - for(int i=0; i<n_out; i++) if(max < x[i]) max = x[i]; - - for(int i=0; i<n_out; i++) { - x[i] = Math.exp(x[i] - max); - sum += x[i]; - } - - for(int i=0; i<n_out; i++) x[i] /= sum; - } - - public void predict(int[] x, double[] y) { - for(int i=0; i<n_out; i++) { - y[i] = 0; - for(int j=0; j<n_in; j++) { - y[i] += W[i][j] * x[j]; - } - y[i] += b[i]; - } - - softmax(y); - } - - private static void test_lr() { - double learning_rate = 0.1; - int n_epochs = 500; - - int train_N = 6; - int test_N = 2; - int n_in = 6; - int n_out = 2; - - int[][] train_X = { - {1, 1, 1, 0, 0, 0}, - {1, 0, 1, 0, 0, 0}, - {1, 1, 1, 0, 0, 0}, - {0, 0, 1, 1, 1, 0}, - {0, 0, 1, 1, 0, 0}, - {0, 0, 1, 1, 1, 0} - }; - - int[][] train_Y = { - {1, 0}, - {1, 0}, - {1, 0}, - {0, 1}, - {0, 1}, - {0, 1} - }; - - // construct - LogisticRegression classifier = new LogisticRegression(train_N, n_in, n_out); - - // train - for(int epoch=0; epoch<n_epochs; epoch++) { - for(int i=0; i<train_N; i++) { - classifier.train(train_X[i], train_Y[i], learning_rate); - } - //learning_rate *= 0.95; - } - - // test data - int[][] test_X = { - {1, 0, 1, 0, 0, 0}, - {0, 0, 1, 1, 1, 0} - }; - - double[][] test_Y = new double[test_N][n_out]; - - - // test - for(int i=0; i<test_N; i++) { - classifier.predict(test_X[i], test_Y[i]); - for(int j=0; j<n_out; j++) { - System.out.print(test_Y[i][j] + " "); - } - System.out.println(); - } - } - - public static void main(String[] args) { - test_lr(); - } -} diff --git a/java/RBM/src/RBM.java b/java/RBM/src/RBM.java deleted file mode 100644 index 6f2e3b7..0000000 --- a/java/RBM/src/RBM.java +++ /dev/null @@ -1,221 +0,0 @@ -import java.util.Random; - -public class RBM { - public int N; - public int n_visible; - public int n_hidden; - public double[][] W; - public double[] hbias; - public double[] vbias; - public Random rng; - - public double uniform(double min, double max) { - return rng.nextDouble() * (max - min) + min; - } - - public int binomial(int n, double p) { - if(p < 0 || p > 1) return 0; - - int c = 0; - double r; - - for(int i=0; i<n; i++) { - r = rng.nextDouble(); - if (r < p) c++; - } - - return c; - } - - public static double sigmoid(double x) { - return 1.0 / (1.0 + Math.pow(Math.E, -x)); - } - - - public RBM(int N, int n_visible, int n_hidden, - double[][] W, double[] hbias, double[] vbias, Random rng) { - this.N = N; - this.n_visible = n_visible; - this.n_hidden = n_hidden; - - if(rng == null) this.rng = new Random(1234); - else this.rng = rng; - - if(W == null) { - this.W = new double[this.n_hidden][this.n_visible]; - double a = 1.0 / this.n_visible; - - for(int i=0; i<this.n_hidden; i++) { - for(int j=0; j<this.n_visible; j++) { - this.W[i][j] = uniform(-a, a); - } - } - } else { - this.W = W; - } - - if(hbias == null) { - this.hbias = new double[this.n_hidden]; - for(int i=0; i<this.n_hidden; i++) this.hbias[i] = 0; - } else { - this.hbias = hbias; - } - - if(vbias == null) { - this.vbias = new double[this.n_visible]; - for(int i=0; i<this.n_visible; i++) this.vbias[i] = 0; - } else { - this.vbias = vbias; - } - } - - - public void contrastive_divergence(int[] input, double lr, int k) { - double[] ph_mean = new double[n_hidden]; - int[] ph_sample = new int[n_hidden]; - double[] nv_means = new double[n_visible]; - int[] nv_samples = new int[n_visible]; - double[] nh_means = new double[n_hidden]; - int[] nh_samples = new int[n_hidden]; - - /* CD-k */ - sample_h_given_v(input, ph_mean, ph_sample); - - for(int step=0; step<k; step++) { - if(step == 0) { - gibbs_hvh(ph_sample, nv_means, nv_samples, nh_means, nh_samples); - } else { - gibbs_hvh(nh_samples, nv_means, nv_samples, nh_means, nh_samples); - } - } - - for(int i=0; i<n_hidden; i++) { - for(int j=0; j<n_visible; j++) { - // W[i][j] += lr * (ph_sample[i] * input[j] - nh_means[i] * nv_samples[j]) / N; - W[i][j] += lr * (ph_mean[i] * input[j] - nh_means[i] * nv_samples[j]) / N; - } - hbias[i] += lr * (ph_sample[i] - nh_means[i]) / N; - } - - - for(int i=0; i<n_visible; i++) { - vbias[i] += lr * (input[i] - nv_samples[i]) / N; - } - - } - - - public void sample_h_given_v(int[] v0_sample, double[] mean, int[] sample) { - for(int i=0; i<n_hidden; i++) { - mean[i] = propup(v0_sample, W[i], hbias[i]); - sample[i] = binomial(1, mean[i]); - } - } - - public void sample_v_given_h(int[] h0_sample, double[] mean, int[] sample) { - for(int i=0; i<n_visible; i++) { - mean[i] = propdown(h0_sample, i, vbias[i]); - sample[i] = binomial(1, mean[i]); - } - } - - public double propup(int[] v, double[] w, double b) { - double pre_sigmoid_activation = 0.0; - for(int j=0; j<n_visible; j++) { - pre_sigmoid_activation += w[j] * v[j]; - } - pre_sigmoid_activation += b; - return sigmoid(pre_sigmoid_activation); - } - - public double propdown(int[] h, int i, double b) { - double pre_sigmoid_activation = 0.0; - for(int j=0; j<n_hidden; j++) { - pre_sigmoid_activation += W[j][i] * h[j]; - } - pre_sigmoid_activation += b; - return sigmoid(pre_sigmoid_activation); - } - - public void gibbs_hvh(int[] h0_sample, double[] nv_means, int[] nv_samples, double[] nh_means, int[] nh_samples) { - sample_v_given_h(h0_sample, nv_means, nv_samples); - sample_h_given_v(nv_samples, nh_means, nh_samples); - } - - - public void reconstruct(int[] v, double[] reconstructed_v) { - double[] h = new double[n_hidden]; - double pre_sigmoid_activation; - - for(int i=0; i<n_hidden; i++) { - h[i] = propup(v, W[i], hbias[i]); - } - - for(int i=0; i<n_visible; i++) { - pre_sigmoid_activation = 0.0; - for(int j=0; j<n_hidden; j++) { - pre_sigmoid_activation += W[j][i] * h[j]; - } - pre_sigmoid_activation += vbias[i]; - - reconstructed_v[i] = sigmoid(pre_sigmoid_activation); - } - } - - - - private static void test_rbm() { - Random rng = new Random(123); - - double learning_rate = 0.1; - int training_epochs = 1000; - int k = 1; - - int train_N = 6; - int test_N = 2; - int n_visible = 6; - int n_hidden = 3; - - // training data - int[][] train_X = { - {1, 1, 1, 0, 0, 0}, - {1, 0, 1, 0, 0, 0}, - {1, 1, 1, 0, 0, 0}, - {0, 0, 1, 1, 1, 0}, - {0, 0, 1, 0, 1, 0}, - {0, 0, 1, 1, 1, 0} - }; - - - - RBM rbm = new RBM(train_N, n_visible, n_hidden, null, null, null, rng); - - // train - for(int epoch=0; epoch<training_epochs; epoch++) { - for(int i=0; i<train_N; i++) { - rbm.contrastive_divergence(train_X[i], learning_rate, k); - } - } - - // test data - int[][] test_X = { - {1, 1, 0, 0, 0, 0}, - {0, 0, 0, 1, 1, 0} - }; - - double[][] reconstructed_X = new double[test_N][n_visible]; - - for(int i=0; i<test_N; i++) { - rbm.reconstruct(test_X[i], reconstructed_X[i]); - for(int j=0; j<n_visible; j++) { - System.out.printf("%.5f ", reconstructed_X[i][j]); - } - System.out.println(); - } - } - - public static void main(String[] args) { - test_rbm(); - } - -} diff --git a/java/SdA/src/HiddenLayer.java b/java/SdA/src/HiddenLayer.java deleted file mode 100644 index 588eea8..0000000 --- a/java/SdA/src/HiddenLayer.java +++ /dev/null @@ -1,74 +0,0 @@ -import java.util.Random; - -public class HiddenLayer { - public int N; - public int n_in; - public int n_out; - public double[][] W; - public double[] b; - public Random rng; - - public double uniform(double min, double max) { - return rng.nextDouble() * (max - min) + min; - } - - public int binomial(int n, double p) { - if(p < 0 || p > 1) return 0; - - int c = 0; - double r; - - for(int i=0; i<n; i++) { - r = rng.nextDouble(); - if (r < p) c++; - } - - return c; - } - - public static double sigmoid(double x) { - return 1.0 / (1.0 + Math.pow(Math.E, -x)); - } - - - - public HiddenLayer(int N, int n_in, int n_out, double[][] W, double[] b, Random rng) { - this.N = N; - this.n_in = n_in; - this.n_out = n_out; - - if(rng == null) this.rng = new Random(1234); - else this.rng = rng; - - if(W == null) { - this.W = new double[n_out][n_in]; - double a = 1.0 / this.n_in; - - for(int i=0; i<n_out; i++) { - for(int j=0; j<n_in; j++) { - this.W[i][j] = uniform(-a, a); - } - } - } else { - this.W = W; - } - - if(b == null) this.b = new double[n_out]; - else this.b = b; - } - - public double output(int[] input, double[] w, double b) { - double linear_output = 0.0; - for(int j=0; j<n_in; j++) { - linear_output += w[j] * input[j]; - } - linear_output += b; - return sigmoid(linear_output); - } - - public void sample_h_given_v(int[] input, int[] sample) { - for(int i=0; i<n_out; i++) { - sample[i] = binomial(1, output(input, W[i], b[i])); - } - } -} diff --git a/java/SdA/src/LogisticRegression.java b/java/SdA/src/LogisticRegression.java deleted file mode 100644 index 09ba807..0000000 --- a/java/SdA/src/LogisticRegression.java +++ /dev/null @@ -1,73 +0,0 @@ - -public class LogisticRegression { - public int N; - public int n_in; - public int n_out; - public double[][] W; - public double[] b; - - public LogisticRegression(int N, int n_in, int n_out) { - this.N = N; - this.n_in = n_in; - this.n_out = n_out; - - W = new double[this.n_out][this.n_in]; - b = new double[this.n_out]; - } - - public void train(int[] x, int[] y, double lr) { - double[] p_y_given_x = new double[n_out]; - double[] dy = new double[n_out]; - - for(int i=0; i<n_out; i++) { - p_y_given_x[i] = 0; - for(int j=0; j<n_in; j++) { - p_y_given_x[i] += W[i][j] * x[j]; - } - p_y_given_x[i] += b[i]; - } - softmax(p_y_given_x); - - for(int i=0; i<n_out; i++) { - dy[i] = y[i] - p_y_given_x[i]; - - for(int j=0; j<n_in; j++) { - W[i][j] += lr * dy[i] * x[j] / N; - } - - b[i] += lr * dy[i] / N; - } - } - - public void softmax(double[] x) { - double max = 0.0; - double sum = 0.0; - - for(int i=0; i<n_out; i++) { - if(max < x[i]) { - max = x[i]; - } - } - - for(int i=0; i<n_out; i++) { - x[i] = Math.exp(x[i] - max); - sum += x[i]; - } - - for(int i=0; i<n_out; i++) { - x[i] /= sum; - } - } - - public void predict(int[] x, double[] y) { - for(int i=0; i<n_out; i++) { - y[i] = 0; - for(int j=0; j<n_in; j++) { - y[i] += W[i][j] * x[j]; - } - y[i] += b[i]; - } - - softmax(y); - } -} diff --git a/java/SdA/src/SdA.java b/java/SdA/src/SdA.java deleted file mode 100644 index a626ad8..0000000 --- a/java/SdA/src/SdA.java +++ /dev/null @@ -1,230 +0,0 @@ -import java.util.Random; - -public class SdA { - public int N; - public int n_ins; - public int[] hidden_layer_sizes; - public int n_outs; - public int n_layers; - public HiddenLayer[] sigmoid_layers; - public dA[] dA_layers; - public LogisticRegression log_layer; - public Random rng; - - public static double sigmoid(double x) { - return 1.0 / (1.0 + Math.pow(Math.E, -x)); - } - - public SdA(int N, int n_ins, int[] hidden_layer_sizes, int n_outs, int n_layers, Random rng) { - int input_size; - - this.N = N; - this.n_ins = n_ins; - this.hidden_layer_sizes = hidden_layer_sizes; - this.n_outs = n_outs; - this.n_layers = n_layers; - - this.sigmoid_layers = new HiddenLayer[n_layers]; - this.dA_layers = new dA[n_layers]; - - if(rng == null) this.rng = new Random(1234); - else this.rng = rng; - - // construct multi-layer - for(int i=0; i<this.n_layers; i++) { - if(i == 0) { - input_size = this.n_ins; - } else { - input_size = this.hidden_layer_sizes[i-1]; - } - - // construct sigmoid_layer - this.sigmoid_layers[i] = new HiddenLayer(this.N, input_size, this.hidden_layer_sizes[i], null, null, rng); - - // construct dA_layer - this.dA_layers[i] = new dA(this.N, input_size, this.hidden_layer_sizes[i], this.sigmoid_layers[i].W, this.sigmoid_layers[i].b, null, rng); - } - - // layer for output using LogisticRegression - this.log_layer = new LogisticRegression(this.N, this.hidden_layer_sizes[this.n_layers-1], this.n_outs); - } - - public void pretrain(int[][] train_X, double lr, double corruption_level, int epochs) { - int[] layer_input = new int[0]; - int prev_layer_input_size; - int[] prev_layer_input; - - for(int i=0; i<n_layers; i++) { // layer-wise - for(int epoch=0; epoch<epochs; epoch++) { // training epochs - for(int n=0; n<N; n++) { // input x1...xN - // layer input - for(int l=0; l<=i; l++) { - - if(l == 0) { - layer_input = new int[n_ins]; - for(int j=0; j<n_ins; j++) layer_input[j] = train_X[n][j]; - } else { - if(l == 1) prev_layer_input_size = n_ins; - else prev_layer_input_size = hidden_layer_sizes[l-2]; - - prev_layer_input = new int[prev_layer_input_size]; - for(int j=0; j<prev_layer_input_size; j++) prev_layer_input[j] = layer_input[j]; - - layer_input = new int[hidden_layer_sizes[l-1]]; - - sigmoid_layers[l-1].sample_h_given_v(prev_layer_input, layer_input); - } - } - - dA_layers[i].train(layer_input, lr, corruption_level); - } - } - } - } - - public void finetune(int[][] train_X, int[][] train_Y, double lr, int epochs) { - int[] layer_input = new int[0]; - // int prev_layer_input_size; - int[] prev_layer_input = new int[0]; - - for(int epoch=0; epoch<epochs; epoch++) { - for(int n=0; n<N; n++) { - - // layer input - for(int i=0; i<n_layers; i++) { - if(i == 0) { - prev_layer_input = new int[n_ins]; - for(int j=0; j<n_ins; j++) prev_layer_input[j] = train_X[n][j]; - } else { - prev_layer_input = new int[hidden_layer_sizes[i-1]]; - for(int j=0; j<hidden_layer_sizes[i-1]; j++) prev_layer_input[j] = layer_input[j]; - } - - layer_input = new int[hidden_layer_sizes[i]]; - sigmoid_layers[i].sample_h_given_v(prev_layer_input, layer_input); - } - - log_layer.train(layer_input, train_Y[n], lr); - } - // lr *= 0.95; - } - } - - public void predict(int[] x, double[] y) { - double[] layer_input = new double[0]; - // int prev_layer_input_size; - double[] prev_layer_input = new double[n_ins]; - for(int j=0; j<n_ins; j++) prev_layer_input[j] = x[j]; - - double linear_output; - - - // layer activation - for(int i=0; i<n_layers; i++) { - layer_input = new double[sigmoid_layers[i].n_out]; - - for(int k=0; k<sigmoid_layers[i].n_out; k++) { - linear_output = 0.0; - - for(int j=0; j<sigmoid_layers[i].n_in; j++) { - linear_output += sigmoid_layers[i].W[k][j] * prev_layer_input[j]; - } - linear_output += sigmoid_layers[i].b[k]; - layer_input[k] = sigmoid(linear_output); - } - - if(i < n_layers-1) { - prev_layer_input = new double[sigmoid_layers[i].n_out]; - for(int j=0; j<sigmoid_layers[i].n_out; j++) prev_layer_input[j] = layer_input[j]; - } - } - - for(int i=0; i<log_layer.n_out; i++) { - y[i] = 0; - for(int j=0; j<log_layer.n_in; j++) { - y[i] += log_layer.W[i][j] * layer_input[j]; - } - y[i] += log_layer.b[i]; - } - - log_layer.softmax(y); - } - - - private static void test_sda() { - Random rng = new Random(123); - - double pretrain_lr = 0.1; - double corruption_level = 0.3; - int pretraining_epochs = 1000; - double finetune_lr = 0.1; - int finetune_epochs = 500; - - int train_N = 10; - int test_N = 4; - int n_ins = 28; - int n_outs = 2; - int[] hidden_layer_sizes = {15, 15}; - int n_layers = hidden_layer_sizes.length; - - // training data - int[][] train_X = { - {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - {0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - {1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - {0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - {1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1}, - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1}, - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1}, - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1} - }; - - int[][] train_Y = { - {1, 0}, - {1, 0}, - {1, 0}, - {1, 0}, - {1, 0}, - {0, 1}, - {0, 1}, - {0, 1}, - {0, 1}, - {0, 1} - }; - - // construct SdA - SdA sda = new SdA(train_N, n_ins, hidden_layer_sizes, n_outs, n_layers, rng); - - // pretrain - sda.pretrain(train_X, pretrain_lr, corruption_level, pretraining_epochs); - - // finetune - sda.finetune(train_X, train_Y, finetune_lr, finetune_epochs); - - - // test data - int[][] test_X = { - {1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - {1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1}, - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1} - }; - - double[][] test_Y = new double[test_N][n_outs]; - - // test - for(int i=0; i<test_N; i++) { - sda.predict(test_X[i], test_Y[i]); - for(int j=0; j<n_outs; j++) { - System.out.print(test_Y[i][j] + " "); - } - System.out.println(); - } - } - - public static void main(String[] args) { - test_sda(); - } -} diff --git a/java/SdA/src/dA.java b/java/SdA/src/dA.java deleted file mode 100644 index 3484350..0000000 --- a/java/SdA/src/dA.java +++ /dev/null @@ -1,150 +0,0 @@ -import java.util.Random; - -public class dA { - public int N; - public int n_visible; - public int n_hidden; - public double[][] W; - public double[] hbias; - public double[] vbias; - public Random rng; - - - public double uniform(double min, double max) { - return rng.nextDouble() * (max - min) + min; - } - - public int binomial(int n, double p) { - if(p < 0 || p > 1) return 0; - - int c = 0; - double r; - - for(int i=0; i<n; i++) { - r = rng.nextDouble(); - if (r < p) c++; - } - - return c; - } - - public static double sigmoid(double x) { - return 1.0 / (1.0 + Math.pow(Math.E, -x)); - } - - public dA(int N, int n_visible, int n_hidden, - double[][] W, double[] hbias, double[] vbias, Random rng) { - this.N = N; - this.n_visible = n_visible; - this.n_hidden = n_hidden; - - if(rng == null) this.rng = new Random(1234); - else this.rng = rng; - - if(W == null) { - this.W = new double[this.n_hidden][this.n_visible]; - double a = 1.0 / this.n_visible; - - for(int i=0; i<this.n_hidden; i++) { - for(int j=0; j<this.n_visible; j++) { - this.W[i][j] = uniform(-a, a); - } - } - } else { - this.W = W; - } - - if(hbias == null) { - this.hbias = new double[this.n_hidden]; - for(int i=0; i<this.n_hidden; i++) this.hbias[i] = 0; - } else { - this.hbias = hbias; - } - - if(vbias == null) { - this.vbias = new double[this.n_visible]; - for(int i=0; i<this.n_visible; i++) this.vbias[i] = 0; - } else { - this.vbias = vbias; - } - } - - public void get_corrupted_input(int[] x, int[] tilde_x, double p) { - for(int i=0; i<n_visible; i++) { - if(x[i] == 0) { - tilde_x[i] = 0; - } else { - tilde_x[i] = binomial(1, p); - } - } - } - - // Encode - public void get_hidden_values(int[] x, double[] y) { - for(int i=0; i<n_hidden; i++) { - y[i] = 0; - for(int j=0; j<n_visible; j++) { - y[i] += W[i][j] * x[j]; - } - y[i] += hbias[i]; - y[i] = sigmoid(y[i]); - } - } - - // Decode - public void get_reconstructed_input(double[] y, double[] z) { - for(int i=0; i<n_visible; i++) { - z[i] = 0; - for(int j=0; j<n_hidden; j++) { - z[i] += W[j][i] * y[j]; - } - z[i] += vbias[i]; - z[i] = sigmoid(z[i]); - } - } - - public void train(int[] x, double lr, double corruption_level) { - int[] tilde_x = new int[n_visible]; - double[] y = new double[n_hidden]; - double[] z = new double[n_visible]; - - double[] L_vbias = new double[n_visible]; - double[] L_hbias = new double[n_hidden]; - - double p = 1 - corruption_level; - - get_corrupted_input(x, tilde_x, p); - get_hidden_values(tilde_x, y); - get_reconstructed_input(y, z); - - // vbias - for(int i=0; i<n_visible; i++) { - L_vbias[i] = x[i] - z[i]; - vbias[i] += lr * L_vbias[i] / N; - } - - // hbias - for(int i=0; i<n_hidden; i++) { - L_hbias[i] = 0; - for(int j=0; j<n_visible; j++) { - L_hbias[i] += W[i][j] * L_vbias[j]; - } - L_hbias[i] *= y[i] * (1 - y[i]); - hbias[i] += lr * L_hbias[i] / N; - } - - // W - for(int i=0; i<n_hidden; i++) { - for(int j=0; j<n_visible; j++) { - W[i][j] += lr * (L_hbias[i] * tilde_x[j] + L_vbias[j] * y[i]) / N; - } - } - } - - public void reconstruct(int[] x, double[] z) { - double[] y = new double[n_hidden]; - - get_hidden_values(x, y); - get_reconstructed_input(y, z); - } -} diff --git a/java/dA/src/dA.java b/java/dA/src/dA.java deleted file mode 100644 index 7429724..0000000 --- a/java/dA/src/dA.java +++ /dev/null @@ -1,207 +0,0 @@ -import java.util.Random; - -public class dA { - public int N; - public int n_visible; - public int n_hidden; - public double[][] W; - public double[] hbias; - public double[] vbias; - public Random rng; - - - public double uniform(double min, double max) { - return rng.nextDouble() * (max - min) + min; - } - - public int binomial(int n, double p) { - if(p < 0 || p > 1) return 0; - - int c = 0; - double r; - - for(int i=0; i<n; i++) { - r = rng.nextDouble(); - if (r < p) c++; - } - - return c; - } - - public static double sigmoid(double x) { - return 1.0 / (1.0 + Math.pow(Math.E, -x)); - } - - public dA(int N, int n_visible, int n_hidden, - double[][] W, double[] hbias, double[] vbias, Random rng) { - this.N = N; - this.n_visible = n_visible; - this.n_hidden = n_hidden; - - if(rng == null) this.rng = new Random(1234); - else this.rng = rng; - - if(W == null) { - this.W = new double[this.n_hidden][this.n_visible]; - double a = 1.0 / this.n_visible; - - for(int i=0; i<this.n_hidden; i++) { - for(int j=0; j<this.n_visible; j++) { - this.W[i][j] = uniform(-a, a); - } - } - } else { - this.W = W; - } - - if(hbias == null) { - this.hbias = new double[this.n_hidden]; - for(int i=0; i<this.n_hidden; i++) this.hbias[i] = 0; - } else { - this.hbias = hbias; - } - - if(vbias == null) { - this.vbias = new double[this.n_visible]; - for(int i=0; i<this.n_visible; i++) this.vbias[i] = 0; - } else { - this.vbias = vbias; - } - } - - public void get_corrupted_input(int[] x, int[] tilde_x, double p) { - for(int i=0; i<n_visible; i++) { - if(x[i] == 0) { - tilde_x[i] = 0; - } else { - tilde_x[i] = binomial(1, p); - } - } - } - - // Encode - public void get_hidden_values(int[] x, double[] y) { - for(int i=0; i<n_hidden; i++) { - y[i] = 0; - for(int j=0; j<n_visible; j++) { - y[i] += W[i][j] * x[j]; - } - y[i] += hbias[i]; - y[i] = sigmoid(y[i]); - } - } - - // Decode - public void get_reconstructed_input(double[] y, double[] z) { - for(int i=0; i<n_visible; i++) { - z[i] = 0; - for(int j=0; j<n_hidden; j++) { - z[i] += W[j][i] * y[j]; - } - z[i] += vbias[i]; - z[i] = sigmoid(z[i]); - } - } - - public void train(int[] x, double lr, double corruption_level) { - int[] tilde_x = new int[n_visible]; - double[] y = new double[n_hidden]; - double[] z = new double[n_visible]; - - double[] L_vbias = new double[n_visible]; - double[] L_hbias = new double[n_hidden]; - - double p = 1 - corruption_level; - - get_corrupted_input(x, tilde_x, p); - get_hidden_values(tilde_x, y); - get_reconstructed_input(y, z); - - // vbias - for(int i=0; i<n_visible; i++) { - L_vbias[i] = x[i] - z[i]; - vbias[i] += lr * L_vbias[i] / N; - } - - // hbias - for(int i=0; i<n_hidden; i++) { - L_hbias[i] = 0; - for(int j=0; j<n_visible; j++) { - L_hbias[i] += W[i][j] * L_vbias[j]; - } - L_hbias[i] *= y[i] * (1 - y[i]); - hbias[i] += lr * L_hbias[i] / N; - } - - // W - for(int i=0; i<n_hidden; i++) { - for(int j=0; j<n_visible; j++) { - W[i][j] += lr * (L_hbias[i] * tilde_x[j] + L_vbias[j] * y[i]) / N; - } - } - } - - public void reconstruct(int[] x, double[] z) { - double[] y = new double[n_hidden]; - - get_hidden_values(x, y); - get_reconstructed_input(y, z); - } - - - private static void test_dA() { - Random rng = new Random(123); - - double learning_rate = 0.1; - double corruption_level = 0.3; - int training_epochs = 100; - - int train_N = 10; - int test_N = 2; - int n_visible = 20; - int n_hidden = 5; - - int[][] train_X = { - {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - {1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - {1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - {1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - {0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1}, - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1}, - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1}, - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0} - }; - - dA da = new dA(train_N, n_visible, n_hidden, null, null, null, rng); - - // train - for(int epoch=0; epoch<training_epochs; epoch++) { - for(int i=0; i<train_N; i++) { - da.train(train_X[i], learning_rate, corruption_level); - } - } - - // test data - int[][] test_X = { - {1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0} - }; - - double[][] reconstructed_X = new double[test_N][n_visible]; - - // test - for(int i=0; i<test_N; i++) { - da.reconstruct(test_X[i], reconstructed_X[i]); - for(int j=0; j<n_visible; j++) { - System.out.printf("%.5f ", reconstructed_X[i][j]); - } - System.out.println(); - } - } - - public static void main(String[] args) { - test_dA(); - } -} diff --git a/java/src/DeepLearning/DBN.java b/java/src/DeepLearning/DBN.java new file mode 100644 index 0000000..39fb999 --- /dev/null +++ b/java/src/DeepLearning/DBN.java @@ -0,0 +1,222 @@ +package DeepLearning; + +import java.util.Random; +import static DeepLearning.utils.*; + +public class DBN { + public int N; + public int n_ins; + public int[] hidden_layer_sizes; + public int n_outs; + public int n_layers; + public HiddenLayer[] sigmoid_layers; + public RBM[] rbm_layers; + public LogisticRegression log_layer; + public Random rng; + + + public DBN(int N, int n_ins, int[] hidden_layer_sizes, int n_outs, int n_layers, Random rng) { + int input_size; + + this.N = N; + this.n_ins = n_ins; + this.hidden_layer_sizes = hidden_layer_sizes; + this.n_outs = n_outs; + this.n_layers = n_layers; + + this.sigmoid_layers = new HiddenLayer[n_layers]; + this.rbm_layers = new RBM[n_layers]; + + if(rng == null) this.rng = new Random(1234); + else this.rng = rng; + + // construct multi-layer + for(int i=0; i<this.n_layers; i++) { + if(i == 0) { + input_size = this.n_ins; + } else { + input_size = this.hidden_layer_sizes[i-1]; + } + + // construct sigmoid_layer + this.sigmoid_layers[i] = new HiddenLayer(this.N, input_size, this.hidden_layer_sizes[i], null, null, rng); + + // construct rbm_layer + this.rbm_layers[i] = new RBM(this.N, input_size, this.hidden_layer_sizes[i], this.sigmoid_layers[i].W, this.sigmoid_layers[i].b, null, rng); + } + + // layer for output using DNN.LogisticRegression + this.log_layer = new LogisticRegression(this.N, this.hidden_layer_sizes[this.n_layers-1], this.n_outs); + } + + public void pretrain(int[][] train_X, double lr, int k, int epochs) { + int[] layer_input = new int[0]; + int prev_layer_input_size; + int[] prev_layer_input; + + for(int i=0; i<n_layers; i++) { // layer-wise + for(int epoch=0; epoch<epochs; epoch++) { // training epochs + for(int n=0; n<N; n++) { // input x1...xN + // layer input + for(int l=0; l<=i; l++) { + + if(l == 0) { + layer_input = new int[n_ins]; + for(int j=0; j<n_ins; j++) layer_input[j] = train_X[n][j]; + } else { + if(l == 1) prev_layer_input_size = n_ins; + else prev_layer_input_size = hidden_layer_sizes[l-2]; + + prev_layer_input = new int[prev_layer_input_size]; + for(int j=0; j<prev_layer_input_size; j++) prev_layer_input[j] = layer_input[j]; + + layer_input = new int[hidden_layer_sizes[l-1]]; + + sigmoid_layers[l-1].sample_h_given_v(prev_layer_input, layer_input); + } + } + + rbm_layers[i].contrastive_divergence(layer_input, lr, k); + } + } + } + } + + public void finetune(int[][] train_X, int[][] train_Y, double lr, int epochs) { + int[] layer_input = new int[0]; + // int prev_layer_input_size; + int[] prev_layer_input = new int[0]; + + for(int epoch=0; epoch<epochs; epoch++) { + for(int n=0; n<N; n++) { + + // layer input + for(int i=0; i<n_layers; i++) { + if(i == 0) { + prev_layer_input = new int[n_ins]; + for(int j=0; j<n_ins; j++) prev_layer_input[j] = train_X[n][j]; + } else { + prev_layer_input = new int[hidden_layer_sizes[i-1]]; + for(int j=0; j<hidden_layer_sizes[i-1]; j++) prev_layer_input[j] = layer_input[j]; + } + + layer_input = new int[hidden_layer_sizes[i]]; + sigmoid_layers[i].sample_h_given_v(prev_layer_input, layer_input); + } + + log_layer.train(layer_input, train_Y[n], lr); + } + // lr *= 0.95; + } + } + + public void predict(int[] x, double[] y) { + double[] layer_input = new double[0]; + // int prev_layer_input_size; + double[] prev_layer_input = new double[n_ins]; + for(int j=0; j<n_ins; j++) prev_layer_input[j] = x[j]; + + double linear_output; + + + // layer activation + for(int i=0; i<n_layers; i++) { + layer_input = new double[sigmoid_layers[i].n_out]; + + for(int k=0; k<sigmoid_layers[i].n_out; k++) { + linear_output = 0.0; + + for(int j=0; j<sigmoid_layers[i].n_in; j++) { + linear_output += sigmoid_layers[i].W[k][j] * prev_layer_input[j]; + } + linear_output += sigmoid_layers[i].b[k]; + layer_input[k] = sigmoid(linear_output); + } + + if(i < n_layers-1) { + prev_layer_input = new double[sigmoid_layers[i].n_out]; + for(int j=0; j<sigmoid_layers[i].n_out; j++) prev_layer_input[j] = layer_input[j]; + } + } + + for(int i=0; i<log_layer.n_out; i++) { + y[i] = 0; + for(int j=0; j<log_layer.n_in; j++) { + y[i] += log_layer.W[i][j] * layer_input[j]; + } + y[i] += log_layer.b[i]; + } + + log_layer.softmax(y); + } + + private static void test_dbn() { + Random rng = new Random(123); + + double pretrain_lr = 0.1; + int pretraining_epochs = 1000; + int k = 1; + double finetune_lr = 0.1; + int finetune_epochs = 500; + + int train_N = 6; + int test_N = 4; + int n_ins = 6; + int n_outs = 2; + int[] hidden_layer_sizes = {3, 3}; + int n_layers = hidden_layer_sizes.length; + + // training data + int[][] train_X = { + {1, 1, 1, 0, 0, 0}, + {1, 0, 1, 0, 0, 0}, + {1, 1, 1, 0, 0, 0}, + {0, 0, 1, 1, 1, 0}, + {0, 0, 1, 1, 0, 0}, + {0, 0, 1, 1, 1, 0} + }; + + int[][] train_Y = { + {1, 0}, + {1, 0}, + {1, 0}, + {0, 1}, + {0, 1}, + {0, 1}, + }; + + + // construct DNN.DBN + DBN dbn = new DBN(train_N, n_ins, hidden_layer_sizes, n_outs, n_layers, rng); + + // pretrain + dbn.pretrain(train_X, pretrain_lr, k, pretraining_epochs); + + // finetune + dbn.finetune(train_X, train_Y, finetune_lr, finetune_epochs); + + + // test data + int[][] test_X = { + {1, 1, 0, 0, 0, 0}, + {1, 1, 1, 1, 0, 0}, + {0, 0, 0, 1, 1, 0}, + {0, 0, 1, 1, 1, 0}, + }; + + double[][] test_Y = new double[test_N][n_outs]; + + // test + for(int i=0; i<test_N; i++) { + dbn.predict(test_X[i], test_Y[i]); + for(int j=0; j<n_outs; j++) { + System.out.print(test_Y[i][j] + " "); + } + System.out.println(); + } + } + + public static void main(String[] args) { + test_dbn(); + } +} diff --git a/java/src/DeepLearning/HiddenLayer.java b/java/src/DeepLearning/HiddenLayer.java new file mode 100644 index 0000000..ca50104 --- /dev/null +++ b/java/src/DeepLearning/HiddenLayer.java @@ -0,0 +1,54 @@ +package DeepLearning; + +import java.util.Random; +import static DeepLearning.utils.*; + +public class HiddenLayer { + public int N; + public int n_in; + public int n_out; + public double[][] W; + public double[] b; + public Random rng; + + + public HiddenLayer(int N, int n_in, int n_out, double[][] W, double[] b, Random rng) { + this.N = N; + this.n_in = n_in; + this.n_out = n_out; + + if(rng == null) this.rng = new Random(1234); + else this.rng = rng; + + if(W == null) { + this.W = new double[n_out][n_in]; + double a = 1.0 / this.n_in; + + for(int i=0; i<n_out; i++) { + for(int j=0; j<n_in; j++) { + this.W[i][j] = uniform(-a, a, rng); + } + } + } else { + this.W = W; + } + + if(b == null) this.b = new double[n_out]; + else this.b = b; + } + + public double output(int[] input, double[] w, double b) { + double linear_output = 0.0; + for(int j=0; j<n_in; j++) { + linear_output += w[j] * input[j]; + } + linear_output += b; + return sigmoid(linear_output); + } + + public void sample_h_given_v(int[] input, int[] sample) { + for(int i=0; i<n_out; i++) { + sample[i] = binomial(1, output(input, W[i], b[i]), rng); + } + } +} diff --git a/java/src/DeepLearning/LogisticRegression.java b/java/src/DeepLearning/LogisticRegression.java new file mode 100644 index 0000000..abe11a0 --- /dev/null +++ b/java/src/DeepLearning/LogisticRegression.java @@ -0,0 +1,129 @@ +package DeepLearning; + +public class LogisticRegression { + public int N; + public int n_in; + public int n_out; + public double[][] W; + public double[] b; + + public LogisticRegression(int N, int n_in, int n_out) { + this.N = N; + this.n_in = n_in; + this.n_out = n_out; + + W = new double[this.n_out][this.n_in]; + b = new double[this.n_out]; + } + + public void train(int[] x, int[] y, double lr) { + double[] p_y_given_x = new double[n_out]; + double[] dy = new double[n_out]; + + for(int i=0; i<n_out; i++) { + p_y_given_x[i] = 0; + for(int j=0; j<n_in; j++) { + p_y_given_x[i] += W[i][j] * x[j]; + } + p_y_given_x[i] += b[i]; + } + softmax(p_y_given_x); + + for(int i=0; i<n_out; i++) { + dy[i] = y[i] - p_y_given_x[i]; + + for(int j=0; j<n_in; j++) { + W[i][j] += lr * dy[i] * x[j] / N; + } + + b[i] += lr * dy[i] / N; + } + } + + public void softmax(double[] x) { + double max = 0.0; + double sum = 0.0; + + for(int i=0; i<n_out; i++) if(max < x[i]) max = x[i]; + + for(int i=0; i<n_out; i++) { + x[i] = Math.exp(x[i] - max); + sum += x[i]; + } + + for(int i=0; i<n_out; i++) x[i] /= sum; + } + + public void predict(int[] x, double[] y) { + for(int i=0; i<n_out; i++) { + y[i] = 0; + for(int j=0; j<n_in; j++) { + y[i] += W[i][j] * x[j]; + } + y[i] += b[i]; + } + + softmax(y); + } + + private static void test_lr() { + double learning_rate = 0.1; + int n_epochs = 500; + + int train_N = 6; + int test_N = 2; + int n_in = 6; + int n_out = 2; + + int[][] train_X = { + {1, 1, 1, 0, 0, 0}, + {1, 0, 1, 0, 0, 0}, + {1, 1, 1, 0, 0, 0}, + {0, 0, 1, 1, 1, 0}, + {0, 0, 1, 1, 0, 0}, + {0, 0, 1, 1, 1, 0} + }; + + int[][] train_Y = { + {1, 0}, + {1, 0}, + {1, 0}, + {0, 1}, + {0, 1}, + {0, 1} + }; + + // construct + LogisticRegression classifier = new LogisticRegression(train_N, n_in, n_out); + + // train + for(int epoch=0; epoch<n_epochs; epoch++) { + for(int i=0; i<train_N; i++) { + classifier.train(train_X[i], train_Y[i], learning_rate); + } + //learning_rate *= 0.95; + } + + // test data + int[][] test_X = { + {1, 0, 1, 0, 0, 0}, + {0, 0, 1, 1, 1, 0} + }; + + double[][] test_Y = new double[test_N][n_out]; + + + // test + for(int i=0; i<test_N; i++) { + classifier.predict(test_X[i], test_Y[i]); + for(int j=0; j<n_out; j++) { + System.out.print(test_Y[i][j] + " "); + } + System.out.println(); + } + } + + public static void main(String[] args) { + test_lr(); + } +} diff --git a/java/src/DeepLearning/RBM.java b/java/src/DeepLearning/RBM.java new file mode 100644 index 0000000..d84e65c --- /dev/null +++ b/java/src/DeepLearning/RBM.java @@ -0,0 +1,203 @@ +package DeepLearning; + +import java.util.Random; +import static DeepLearning.utils.*; + + +public class RBM { + public int N; + public int n_visible; + public int n_hidden; + public double[][] W; + public double[] hbias; + public double[] vbias; + public Random rng; + + + public RBM(int N, int n_visible, int n_hidden, + double[][] W, double[] hbias, double[] vbias, Random rng) { + this.N = N; + this.n_visible = n_visible; + this.n_hidden = n_hidden; + + if(rng == null) this.rng = new Random(1234); + else this.rng = rng; + + if(W == null) { + this.W = new double[this.n_hidden][this.n_visible]; + double a = 1.0 / this.n_visible; + + for(int i=0; i<this.n_hidden; i++) { + for(int j=0; j<this.n_visible; j++) { + this.W[i][j] = uniform(-a, a, rng); + } + } + } else { + this.W = W; + } + + if(hbias == null) { + this.hbias = new double[this.n_hidden]; + for(int i=0; i<this.n_hidden; i++) this.hbias[i] = 0; + } else { + this.hbias = hbias; + } + + if(vbias == null) { + this.vbias = new double[this.n_visible]; + for(int i=0; i<this.n_visible; i++) this.vbias[i] = 0; + } else { + this.vbias = vbias; + } + } + + + public void contrastive_divergence(int[] input, double lr, int k) { + double[] ph_mean = new double[n_hidden]; + int[] ph_sample = new int[n_hidden]; + double[] nv_means = new double[n_visible]; + int[] nv_samples = new int[n_visible]; + double[] nh_means = new double[n_hidden]; + int[] nh_samples = new int[n_hidden]; + + /* CD-k */ + sample_h_given_v(input, ph_mean, ph_sample); + + for(int step=0; step<k; step++) { + if(step == 0) { + gibbs_hvh(ph_sample, nv_means, nv_samples, nh_means, nh_samples); + } else { + gibbs_hvh(nh_samples, nv_means, nv_samples, nh_means, nh_samples); + } + } + + for(int i=0; i<n_hidden; i++) { + for(int j=0; j<n_visible; j++) { + // W[i][j] += lr * (ph_sample[i] * input[j] - nh_means[i] * nv_samples[j]) / N; + W[i][j] += lr * (ph_mean[i] * input[j] - nh_means[i] * nv_samples[j]) / N; + } + hbias[i] += lr * (ph_sample[i] - nh_means[i]) / N; + } + + + for(int i=0; i<n_visible; i++) { + vbias[i] += lr * (input[i] - nv_samples[i]) / N; + } + + } + + + public void sample_h_given_v(int[] v0_sample, double[] mean, int[] sample) { + for(int i=0; i<n_hidden; i++) { + mean[i] = propup(v0_sample, W[i], hbias[i]); + sample[i] = binomial(1, mean[i], rng); + } + } + + public void sample_v_given_h(int[] h0_sample, double[] mean, int[] sample) { + for(int i=0; i<n_visible; i++) { + mean[i] = propdown(h0_sample, i, vbias[i]); + sample[i] = binomial(1, mean[i], rng); + } + } + + public double propup(int[] v, double[] w, double b) { + double pre_sigmoid_activation = 0.0; + for(int j=0; j<n_visible; j++) { + pre_sigmoid_activation += w[j] * v[j]; + } + pre_sigmoid_activation += b; + return sigmoid(pre_sigmoid_activation); + } + + public double propdown(int[] h, int i, double b) { + double pre_sigmoid_activation = 0.0; + for(int j=0; j<n_hidden; j++) { + pre_sigmoid_activation += W[j][i] * h[j]; + } + pre_sigmoid_activation += b; + return sigmoid(pre_sigmoid_activation); + } + + public void gibbs_hvh(int[] h0_sample, double[] nv_means, int[] nv_samples, double[] nh_means, int[] nh_samples) { + sample_v_given_h(h0_sample, nv_means, nv_samples); + sample_h_given_v(nv_samples, nh_means, nh_samples); + } + + + public void reconstruct(int[] v, double[] reconstructed_v) { + double[] h = new double[n_hidden]; + double pre_sigmoid_activation; + + for(int i=0; i<n_hidden; i++) { + h[i] = propup(v, W[i], hbias[i]); + } + + for(int i=0; i<n_visible; i++) { + pre_sigmoid_activation = 0.0; + for(int j=0; j<n_hidden; j++) { + pre_sigmoid_activation += W[j][i] * h[j]; + } + pre_sigmoid_activation += vbias[i]; + + reconstructed_v[i] = sigmoid(pre_sigmoid_activation); + } + } + + + + private static void test_rbm() { + Random rng = new Random(123); + + double learning_rate = 0.1; + int training_epochs = 1000; + int k = 1; + + int train_N = 6; + int test_N = 2; + int n_visible = 6; + int n_hidden = 3; + + // training data + int[][] train_X = { + {1, 1, 1, 0, 0, 0}, + {1, 0, 1, 0, 0, 0}, + {1, 1, 1, 0, 0, 0}, + {0, 0, 1, 1, 1, 0}, + {0, 0, 1, 0, 1, 0}, + {0, 0, 1, 1, 1, 0} + }; + + + + RBM rbm = new RBM(train_N, n_visible, n_hidden, null, null, null, rng); + + // train + for(int epoch=0; epoch<training_epochs; epoch++) { + for(int i=0; i<train_N; i++) { + rbm.contrastive_divergence(train_X[i], learning_rate, k); + } + } + + // test data + int[][] test_X = { + {1, 1, 0, 0, 0, 0}, + {0, 0, 0, 1, 1, 0} + }; + + double[][] reconstructed_X = new double[test_N][n_visible]; + + for(int i=0; i<test_N; i++) { + rbm.reconstruct(test_X[i], reconstructed_X[i]); + for(int j=0; j<n_visible; j++) { + System.out.printf("%.5f ", reconstructed_X[i][j]); + } + System.out.println(); + } + } + + public static void main(String[] args) { + test_rbm(); + } + +} diff --git a/java/src/DeepLearning/SdA.java b/java/src/DeepLearning/SdA.java new file mode 100644 index 0000000..962c335 --- /dev/null +++ b/java/src/DeepLearning/SdA.java @@ -0,0 +1,230 @@ +package DeepLearning; + +import java.util.Random; +import static DeepLearning.utils.*; + +public class SdA { + public int N; + public int n_ins; + public int[] hidden_layer_sizes; + public int n_outs; + public int n_layers; + public HiddenLayer[] sigmoid_layers; + public dA[] dA_layers; + public LogisticRegression log_layer; + public Random rng; + + + public SdA(int N, int n_ins, int[] hidden_layer_sizes, int n_outs, int n_layers, Random rng) { + int input_size; + + this.N = N; + this.n_ins = n_ins; + this.hidden_layer_sizes = hidden_layer_sizes; + this.n_outs = n_outs; + this.n_layers = n_layers; + + this.sigmoid_layers = new HiddenLayer[n_layers]; + this.dA_layers = new dA[n_layers]; + + if(rng == null) this.rng = new Random(1234); + else this.rng = rng; + + // construct multi-layer + for(int i=0; i<this.n_layers; i++) { + if(i == 0) { + input_size = this.n_ins; + } else { + input_size = this.hidden_layer_sizes[i-1]; + } + + // construct sigmoid_layer + this.sigmoid_layers[i] = new HiddenLayer(this.N, input_size, this.hidden_layer_sizes[i], null, null, rng); + + // construct dA_layer + this.dA_layers[i] = new dA(this.N, input_size, this.hidden_layer_sizes[i], this.sigmoid_layers[i].W, this.sigmoid_layers[i].b, null, rng); + } + + // layer for output using DNN.LogisticRegression + this.log_layer = new LogisticRegression(this.N, this.hidden_layer_sizes[this.n_layers-1], this.n_outs); + } + + public void pretrain(int[][] train_X, double lr, double corruption_level, int epochs) { + int[] layer_input = new int[0]; + int prev_layer_input_size; + int[] prev_layer_input; + + for(int i=0; i<n_layers; i++) { // layer-wise + for(int epoch=0; epoch<epochs; epoch++) { // training epochs + for(int n=0; n<N; n++) { // input x1...xN + // layer input + for(int l=0; l<=i; l++) { + + if(l == 0) { + layer_input = new int[n_ins]; + for(int j=0; j<n_ins; j++) layer_input[j] = train_X[n][j]; + } else { + if(l == 1) prev_layer_input_size = n_ins; + else prev_layer_input_size = hidden_layer_sizes[l-2]; + + prev_layer_input = new int[prev_layer_input_size]; + for(int j=0; j<prev_layer_input_size; j++) prev_layer_input[j] = layer_input[j]; + + layer_input = new int[hidden_layer_sizes[l-1]]; + + sigmoid_layers[l-1].sample_h_given_v(prev_layer_input, layer_input); + } + } + + dA_layers[i].train(layer_input, lr, corruption_level); + } + } + } + } + + public void finetune(int[][] train_X, int[][] train_Y, double lr, int epochs) { + int[] layer_input = new int[0]; + // int prev_layer_input_size; + int[] prev_layer_input = new int[0]; + + for(int epoch=0; epoch<epochs; epoch++) { + for(int n=0; n<N; n++) { + + // layer input + for(int i=0; i<n_layers; i++) { + if(i == 0) { + prev_layer_input = new int[n_ins]; + for(int j=0; j<n_ins; j++) prev_layer_input[j] = train_X[n][j]; + } else { + prev_layer_input = new int[hidden_layer_sizes[i-1]]; + for(int j=0; j<hidden_layer_sizes[i-1]; j++) prev_layer_input[j] = layer_input[j]; + } + + layer_input = new int[hidden_layer_sizes[i]]; + sigmoid_layers[i].sample_h_given_v(prev_layer_input, layer_input); + } + + log_layer.train(layer_input, train_Y[n], lr); + } + // lr *= 0.95; + } + } + + public void predict(int[] x, double[] y) { + double[] layer_input = new double[0]; + // int prev_layer_input_size; + double[] prev_layer_input = new double[n_ins]; + for(int j=0; j<n_ins; j++) prev_layer_input[j] = x[j]; + + double linear_output; + + + // layer activation + for(int i=0; i<n_layers; i++) { + layer_input = new double[sigmoid_layers[i].n_out]; + + for(int k=0; k<sigmoid_layers[i].n_out; k++) { + linear_output = 0.0; + + for(int j=0; j<sigmoid_layers[i].n_in; j++) { + linear_output += sigmoid_layers[i].W[k][j] * prev_layer_input[j]; + } + linear_output += sigmoid_layers[i].b[k]; + layer_input[k] = sigmoid(linear_output); + } + + if(i < n_layers-1) { + prev_layer_input = new double[sigmoid_layers[i].n_out]; + for(int j=0; j<sigmoid_layers[i].n_out; j++) prev_layer_input[j] = layer_input[j]; + } + } + + for(int i=0; i<log_layer.n_out; i++) { + y[i] = 0; + for(int j=0; j<log_layer.n_in; j++) { + y[i] += log_layer.W[i][j] * layer_input[j]; + } + y[i] += log_layer.b[i]; + } + + log_layer.softmax(y); + } + + + private static void test_sda() { + Random rng = new Random(123); + + double pretrain_lr = 0.1; + double corruption_level = 0.3; + int pretraining_epochs = 1000; + double finetune_lr = 0.1; + int finetune_epochs = 500; + + int train_N = 10; + int test_N = 4; + int n_ins = 28; + int n_outs = 2; + int[] hidden_layer_sizes = {15, 15}; + int n_layers = hidden_layer_sizes.length; + + // training data + int[][] train_X = { + {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1} + }; + + int[][] train_Y = { + {1, 0}, + {1, 0}, + {1, 0}, + {1, 0}, + {1, 0}, + {0, 1}, + {0, 1}, + {0, 1}, + {0, 1}, + {0, 1} + }; + + // construct SdA + SdA sda = new SdA(train_N, n_ins, hidden_layer_sizes, n_outs, n_layers, rng); + + // pretrain + sda.pretrain(train_X, pretrain_lr, corruption_level, pretraining_epochs); + + // finetune + sda.finetune(train_X, train_Y, finetune_lr, finetune_epochs); + + + // test data + int[][] test_X = { + {1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1} + }; + + double[][] test_Y = new double[test_N][n_outs]; + + // test + for(int i=0; i<test_N; i++) { + sda.predict(test_X[i], test_Y[i]); + for(int j=0; j<n_outs; j++) { + System.out.print(test_Y[i][j] + " "); + } + System.out.println(); + } + } + + public static void main(String[] args) { + test_sda(); + } +} diff --git a/java/src/DeepLearning/dA.java b/java/src/DeepLearning/dA.java new file mode 100644 index 0000000..acf3b39 --- /dev/null +++ b/java/src/DeepLearning/dA.java @@ -0,0 +1,187 @@ +package DeepLearning; + +import java.util.Random; +import static DeepLearning.utils.*; + +public class dA { + public int n_visible; + public int N; + public int n_hidden; + public double[][] W; + public double[] hbias; + public double[] vbias; + public Random rng; + + + public dA(int N, int n_visible, int n_hidden, + double[][] W, double[] hbias, double[] vbias, Random rng) { + this.N = N; + this.n_visible = n_visible; + this.n_hidden = n_hidden; + + if(rng == null) this.rng = new Random(1234); + else this.rng = rng; + + if(W == null) { + this.W = new double[this.n_hidden][this.n_visible]; + double a = 1.0 / this.n_visible; + + for(int i=0; i<this.n_hidden; i++) { + for(int j=0; j<this.n_visible; j++) { + this.W[i][j] = uniform(-a, a, rng); + } + } + } else { + this.W = W; + } + + if(hbias == null) { + this.hbias = new double[this.n_hidden]; + for(int i=0; i<this.n_hidden; i++) this.hbias[i] = 0; + } else { + this.hbias = hbias; + } + + if(vbias == null) { + this.vbias = new double[this.n_visible]; + for(int i=0; i<this.n_visible; i++) this.vbias[i] = 0; + } else { + this.vbias = vbias; + } + } + + public void get_corrupted_input(int[] x, int[] tilde_x, double p) { + for(int i=0; i<n_visible; i++) { + if(x[i] == 0) { + tilde_x[i] = 0; + } else { + tilde_x[i] = binomial(1, p, rng); + } + } + } + + // Encode + public void get_hidden_values(int[] x, double[] y) { + for(int i=0; i<n_hidden; i++) { + y[i] = 0; + for(int j=0; j<n_visible; j++) { + y[i] += W[i][j] * x[j]; + } + y[i] += hbias[i]; + y[i] = sigmoid(y[i]); + } + } + + // Decode + public void get_reconstructed_input(double[] y, double[] z) { + for(int i=0; i<n_visible; i++) { + z[i] = 0; + for(int j=0; j<n_hidden; j++) { + z[i] += W[j][i] * y[j]; + } + z[i] += vbias[i]; + z[i] = sigmoid(z[i]); + } + } + + public void train(int[] x, double lr, double corruption_level) { + int[] tilde_x = new int[n_visible]; + double[] y = new double[n_hidden]; + double[] z = new double[n_visible]; + + double[] L_vbias = new double[n_visible]; + double[] L_hbias = new double[n_hidden]; + + double p = 1 - corruption_level; + + get_corrupted_input(x, tilde_x, p); + get_hidden_values(tilde_x, y); + get_reconstructed_input(y, z); + + // vbias + for(int i=0; i<n_visible; i++) { + L_vbias[i] = x[i] - z[i]; + vbias[i] += lr * L_vbias[i] / N; + } + + // hbias + for(int i=0; i<n_hidden; i++) { + L_hbias[i] = 0; + for(int j=0; j<n_visible; j++) { + L_hbias[i] += W[i][j] * L_vbias[j]; + } + L_hbias[i] *= y[i] * (1 - y[i]); + hbias[i] += lr * L_hbias[i] / N; + } + + // W + for(int i=0; i<n_hidden; i++) { + for(int j=0; j<n_visible; j++) { + W[i][j] += lr * (L_hbias[i] * tilde_x[j] + L_vbias[j] * y[i]) / N; + } + } + } + + public void reconstruct(int[] x, double[] z) { + double[] y = new double[n_hidden]; + + get_hidden_values(x, y); + get_reconstructed_input(y, z); + } + + private static void test_dA() { + Random rng = new Random(123); + + double learning_rate = 0.1; + double corruption_level = 0.3; + int training_epochs = 100; + + int train_N = 10; + int test_N = 2; + int n_visible = 20; + int n_hidden = 5; + + int[][] train_X = { + {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0} + }; + + dA da = new dA(train_N, n_visible, n_hidden, null, null, null, rng); + + // train + for(int epoch=0; epoch<training_epochs; epoch++) { + for(int i=0; i<train_N; i++) { + da.train(train_X[i], learning_rate, corruption_level); + } + } + + // test data + int[][] test_X = { + {1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0} + }; + + double[][] reconstructed_X = new double[test_N][n_visible]; + + // test + for(int i=0; i<test_N; i++) { + da.reconstruct(test_X[i], reconstructed_X[i]); + for(int j=0; j<n_visible; j++) { + System.out.printf("%.5f ", reconstructed_X[i][j]); + } + System.out.println(); + } + } + + public static void main(String[] args) { + test_dA(); + } +} diff --git a/java/src/DeepLearning/utils.java b/java/src/DeepLearning/utils.java new file mode 100644 index 0000000..efce0d9 --- /dev/null +++ b/java/src/DeepLearning/utils.java @@ -0,0 +1,29 @@ +package DeepLearning; + +import java.util.Random; + +public class utils { + public static double uniform(double min, double max, Random rng) { + return rng.nextDouble() * (max - min) + min; + } + + public static int binomial(int n, double p, Random rng) { + if(p < 0 || p > 1) return 0; + + int c = 0; + double r; + + for(int i=0; i<n; i++) { + r = rng.nextDouble(); + if (r < p) c++; + } + + return c; + } + + + public static double sigmoid(double x) { + return 1.0 / (1.0 + Math.pow(Math.E, -x)); + } + +} From 773fa9bbeed94e314a4aeaab05a75822982ff94e Mon Sep 17 00:00:00 2001 From: Yusuke Sugomori <me@yusugomori.com> Date: Tue, 4 Aug 2015 22:10:42 -0400 Subject: [PATCH 28/45] add gitkeep in java --- java/.gitkeep | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 java/.gitkeep diff --git a/java/.gitkeep b/java/.gitkeep new file mode 100644 index 0000000..e69de29 From 31688afa659ca3846a3347629594220a2fd62a89 Mon Sep 17 00:00:00 2001 From: Yusuke Sugomori <me@yusugomori.com> Date: Sun, 9 Aug 2015 12:49:29 -0400 Subject: [PATCH 29/45] bug fix --- python/RBM.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/RBM.py b/python/RBM.py index 4a1be8e..4496692 100755 --- a/python/RBM.py +++ b/python/RBM.py @@ -58,7 +58,7 @@ def contrastive_divergence(self, lr=0.1, k=1, input=None): # chain_end = nv_samples - self.W += lr * (numpy.dot(self.input.T, ph_sample) + self.W += lr * (numpy.dot(self.input.T, ph_mean) - numpy.dot(nv_samples.T, nh_means)) self.vbias += lr * numpy.mean(self.input - nv_samples, axis=0) self.hbias += lr * numpy.mean(ph_sample - nh_means, axis=0) From 0ffb1fe8df894a43939eb590c70337a107daafcb Mon Sep 17 00:00:00 2001 From: Yusuke Sugomori <me@yusugomori.com> Date: Sun, 9 Aug 2015 13:07:59 -0400 Subject: [PATCH 30/45] bug fix on MLP --- python/Dropout.py | 53 ++++++++++--------------- python/HiddenLayer.py | 29 ++++++-------- python/LogisticRegression.py | 75 +++++++++++++++++++----------------- python/MLP.py | 46 +++++++++++----------- 4 files changed, 97 insertions(+), 106 deletions(-) diff --git a/python/Dropout.py b/python/Dropout.py index feb594e..df7d646 100755 --- a/python/Dropout.py +++ b/python/Dropout.py @@ -50,11 +50,11 @@ def __init__(self, input, label,\ self.hidden_layers.append(hidden_layer) - # layer for ouput using Logistic Regression (softmax) - self.log_layer = LogisticRegression(input=self.hidden_layers[-1].output(), - label=self.y, - n_in=hidden_layer_sizes[-1], - n_out=n_out) + # layer for ouput using Logistic Regression (softmax) + self.log_layer = LogisticRegression(input=self.hidden_layers[-1].output(), + label=self.y, + n_in=hidden_layer_sizes[-1], + n_out=n_out) def train(self, epochs=500, dropout=True, p_dropout=0.5, rng=None): @@ -108,27 +108,16 @@ def predict(self, x, dropout=True, p_dropout=0.5): def test_dropout(n_epochs=500, dropout=True, p_dropout=0.5): - x = numpy.array([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - [1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - [1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - [1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - [0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], - [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1], - [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1], - [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1], - [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0]]) - - y = numpy.array([[1, 0], - [1, 0], - [1, 0], - [1, 0], - [1, 0], - [0, 1], - [0, 1], - [0, 1], - [0, 1], - [0, 1]]) + x = numpy.array([[-1, -1], + [-1, 1], + [ 1, -1], + [ 1, 1]]) + + + y = numpy.array([[1, -1], + [1, -1], + [-1, 1], + [-1, 1]]) rng = numpy.random.RandomState(123) @@ -136,20 +125,20 @@ def test_dropout(n_epochs=500, dropout=True, p_dropout=0.5): # construct Dropout MLP classifier = Dropout(input=x, label=y, \ - n_in=20, hidden_layer_sizes=[12, 12], n_out=2, \ + n_in=2, hidden_layer_sizes=[3], n_out=2, \ rng=rng, activation=ReLU) - # train + # train XOR classifier.train(epochs=n_epochs, dropout=dropout, \ p_dropout=p_dropout, rng=rng) # test - x = numpy.array([[1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - [1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1], - [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1]]) + x = numpy.array([[-1, -1], + [-1, 1], + [ 1, -1], + [ 1, 1]]) print classifier.predict(x) diff --git a/python/HiddenLayer.py b/python/HiddenLayer.py index f7a0731..166d24c 100755 --- a/python/HiddenLayer.py +++ b/python/HiddenLayer.py @@ -51,21 +51,7 @@ def output(self, input=None): linear_output = numpy.dot(self.x, self.W) + self.b - return (linear_output if self.activation is None - else self.activation(linear_output)) - - - def sample_h_given_v(self, input=None): - if input is not None: - self.x = input - - v_mean = self.output() - h_sample = self.rng.binomial(size=v_mean.shape, - n=1, - p=v_mean) - return h_sample - - + return self.activation(linear_output) def forward(self, input=None): return self.output(input=input) @@ -75,11 +61,10 @@ def backward(self, prev_layer, lr=0.1, input=None): if input is not None: self.x = input - d_y = self.dactivation(prev_layer.x) * numpy.dot(prev_layer.d_y, prev_layer.W.T) + d_y = self.dactivation( prev_layer.x ) * numpy.dot( prev_layer.d_y, prev_layer.W.T ) self.W += lr * numpy.dot(self.x.T, d_y) self.b += lr * numpy.mean(d_y, axis=0) - self.d_y = d_y @@ -94,4 +79,14 @@ def dropout(self, input, p, rng=None): return mask + def sample_h_given_v(self, input=None): + if input is not None: + self.x = input + + v_mean = self.output() + h_sample = self.rng.binomial(size=v_mean.shape, + n=1, + p=v_mean) + return h_sample + diff --git a/python/LogisticRegression.py b/python/LogisticRegression.py index f1a686d..3f57889 100755 --- a/python/LogisticRegression.py +++ b/python/LogisticRegression.py @@ -9,25 +9,36 @@ class LogisticRegression(object): def __init__(self, input, label, n_in, n_out): self.x = input self.y = label + self.W = numpy.zeros((n_in, n_out)) # initialize W 0 - self.b = numpy.zeros(n_out) # initialize bias 0 + self.b = numpy.zeros(n_out) # initialize bias 0 def train(self, lr=0.1, input=None, L2_reg=0.00): + self.forward(input) + self.backward(lr, L2_reg) + + + def forward(self, input=None): if input is not None: self.x = input - # p_y_given_x = sigmoid(numpy.dot(self.x, self.W) + self.b) - p_y_given_x = softmax(numpy.dot(self.x, self.W) + self.b) - d_y = self.y - p_y_given_x - - self.W += lr * numpy.dot(self.x.T, d_y) - lr * L2_reg * self.W - self.b += lr * numpy.mean(d_y, axis=0) + p_y_given_x = self.output(self.x) + self.d_y = self.y - p_y_given_x - self.d_y = d_y - # cost = self.negative_log_likelihood() - # return cost + def backward(self, lr, L2_reg=0.00): + self.W += lr * numpy.dot(self.x.T, self.d_y) - lr * L2_reg * self.W + self.b += lr * numpy.mean(self.d_y, axis=0) + + + def output(self, x): + # return sigmoid(numpy.dot(x, self.W) + self.b) + return softmax(numpy.dot(x, self.W) + self.b) + + def predict(self, x): + return self.output(x) + def negative_log_likelihood(self): # sigmoid_activation = sigmoid(numpy.dot(self.x, self.W) + self.b) @@ -41,47 +52,41 @@ def negative_log_likelihood(self): return cross_entropy - def predict(self, x): - # return sigmoid(numpy.dot(x, self.W) + self.b) - return softmax(numpy.dot(x, self.W) + self.b) - - def output(self, x): - return self.predict(x) +def test_lr(learning_rate=0.1, n_epochs=500): + rng = numpy.random.RandomState(123) -def test_lr(learning_rate=0.01, n_epochs=200): # training data - x = numpy.array([[1,1,1,0,0,0], - [1,0,1,0,0,0], - [1,1,1,0,0,0], - [0,0,1,1,1,0], - [0,0,1,1,0,0], - [0,0,1,1,1,0]]) - y = numpy.array([[1, 0], - [1, 0], - [1, 0], - [0, 1], - [0, 1], - [0, 1]]) + d = 2 + N = 10 + x1 = rng.randn(N, d) + numpy.array([0, 0]) + x2 = rng.randn(N, d) + numpy.array([20, 10]) + y1 = [[1, 0] for i in xrange(N)] + y2 = [[0, 1] for i in xrange(N)] + + x = numpy.r_[x1.astype(int), x2.astype(int)] + y = numpy.r_[y1, y2] # construct LogisticRegression - classifier = LogisticRegression(input=x, label=y, n_in=6, n_out=2) + classifier = LogisticRegression(input=x, label=y, n_in=d, n_out=2) # train for epoch in xrange(n_epochs): classifier.train(lr=learning_rate) # cost = classifier.negative_log_likelihood() # print >> sys.stderr, 'Training epoch %d, cost is ' % epoch, cost - learning_rate *= 0.95 + learning_rate *= 0.995 # test - x = numpy.array([[1, 1, 0, 0, 0, 0], - [0, 0, 0, 1, 1, 0], - [1, 1, 1, 1, 1, 0]]) + result = classifier.predict(x) + for i in xrange(N): + print result[i] + print + for i in xrange(N): + print result[N+i] - print classifier.predict(x) if __name__ == "__main__": diff --git a/python/MLP.py b/python/MLP.py index 0e02f2a..f87e597 100755 --- a/python/MLP.py +++ b/python/MLP.py @@ -16,43 +16,49 @@ def __init__(self, input, label, n_in, n_hidden, n_out, rng=None): if rng is None: rng = numpy.random.RandomState(1234) - # construct hidden_layer (tanh or sigmoid so far) + # construct hidden_layer self.hidden_layer = HiddenLayer(input=self.x, n_in=n_in, n_out=n_hidden, rng=rng, - activation=tanh) + activation=sigmoid) - # construct log_layer (softmax) + # construct log_layer self.log_layer = LogisticRegression(input=self.hidden_layer.output, label=self.y, n_in=n_hidden, n_out=n_out) def train(self): - layer_input = self.hidden_layer.forward() - self.log_layer.train(input=layer_input) - self.hidden_layer.backward(prev_layer=self.log_layer) - + # forward hidden_layer + log_layer_input = self.hidden_layer.forward(input=self.x) + + # forward log_layer + self.log_layer.forward(input=log_layer_input) + + # backward hidden_layer + self.hidden_layer.backward(prev_layer=self.log_layer, lr=0.2, input=self.x) + + # backward log_layer + self.log_layer.backward(lr=0.2) + + def predict(self, x): x = self.hidden_layer.output(input=x) return self.log_layer.predict(x) -def test_mlp(n_epochs=100): +def test_mlp(n_epochs=5000): - x = numpy.array([[1,1,1,0,0,0], - [1,0,1,0,0,0], - [1,1,1,0,0,0], - [0,0,1,1,1,0], - [0,0,1,1,0,0], - [0,0,1,1,1,0]]) - y = numpy.array([[1, 0], + x = numpy.array([[0, 0], + [0, 1], + [1, 0], + [1, 1]]) + + y = numpy.array([[0, 1], [1, 0], [1, 0], - [0, 1], - [0, 1], [0, 1]]) @@ -60,7 +66,7 @@ def test_mlp(n_epochs=100): # construct MLP - classifier = MLP(input=x, label=y, n_in=6, n_hidden=15, n_out=2, rng=rng) + classifier = MLP(input=x, label=y, n_in=2, n_hidden=3, n_out=2, rng=rng) # train for epoch in xrange(n_epochs): @@ -68,10 +74,6 @@ def test_mlp(n_epochs=100): # test - x = numpy.array([[1, 1, 0, 0, 0, 0], - [0, 0, 0, 1, 1, 0], - [1, 1, 1, 1, 1, 0]]) - print classifier.predict(x) From 18fa46dc43d6d8753401c4b34f8e00e546dfdbf8 Mon Sep 17 00:00:00 2001 From: Yusuke Sugomori <me@yusugomori.com> Date: Sun, 9 Aug 2015 13:50:02 -0400 Subject: [PATCH 31/45] dropout --- python/Dropout.py | 29 +++++++++++------------------ python/HiddenLayer.py | 4 ++-- python/LogisticRegression.py | 30 ++++++++++++++++++++---------- python/MLP.py | 14 +++++++------- 4 files changed, 40 insertions(+), 37 deletions(-) diff --git a/python/Dropout.py b/python/Dropout.py index df7d646..16c203f 100755 --- a/python/Dropout.py +++ b/python/Dropout.py @@ -57,7 +57,7 @@ def __init__(self, input, label,\ n_out=n_out) - def train(self, epochs=500, dropout=True, p_dropout=0.5, rng=None): + def train(self, epochs=5000, dropout=True, p_dropout=0.5, rng=None): for epoch in xrange(epochs): dropout_masks = [] # create different masks in each training epoch @@ -106,26 +106,24 @@ def predict(self, x, dropout=True, p_dropout=0.5): -def test_dropout(n_epochs=500, dropout=True, p_dropout=0.5): +def test_dropout(n_epochs=5000, dropout=True, p_dropout=0.5): - x = numpy.array([[-1, -1], - [-1, 1], - [ 1, -1], - [ 1, 1]]) - - - y = numpy.array([[1, -1], - [1, -1], - [-1, 1], - [-1, 1]]) + x = numpy.array([[0, 0], + [0, 1], + [1, 0], + [1, 1]]) + y = numpy.array([[0, 1], + [1, 0], + [1, 0], + [0, 1]]) rng = numpy.random.RandomState(123) # construct Dropout MLP classifier = Dropout(input=x, label=y, \ - n_in=2, hidden_layer_sizes=[3], n_out=2, \ + n_in=2, hidden_layer_sizes=[10, 10], n_out=2, \ rng=rng, activation=ReLU) @@ -135,11 +133,6 @@ def test_dropout(n_epochs=500, dropout=True, p_dropout=0.5): # test - x = numpy.array([[-1, -1], - [-1, 1], - [ 1, -1], - [ 1, 1]]) - print classifier.predict(x) diff --git a/python/HiddenLayer.py b/python/HiddenLayer.py index 166d24c..75f8ab2 100755 --- a/python/HiddenLayer.py +++ b/python/HiddenLayer.py @@ -50,9 +50,9 @@ def output(self, input=None): self.x = input linear_output = numpy.dot(self.x, self.W) + self.b - return self.activation(linear_output) + def forward(self, input=None): return self.output(input=input) @@ -61,7 +61,7 @@ def backward(self, prev_layer, lr=0.1, input=None): if input is not None: self.x = input - d_y = self.dactivation( prev_layer.x ) * numpy.dot( prev_layer.d_y, prev_layer.W.T ) + d_y = self.dactivation(prev_layer.x) * numpy.dot(prev_layer.d_y, prev_layer.W.T) self.W += lr * numpy.dot(self.x.T, d_y) self.b += lr * numpy.mean(d_y, axis=0) diff --git a/python/LogisticRegression.py b/python/LogisticRegression.py index 3f57889..708a1b3 100755 --- a/python/LogisticRegression.py +++ b/python/LogisticRegression.py @@ -14,22 +14,32 @@ def __init__(self, input, label, n_in, n_out): self.b = numpy.zeros(n_out) # initialize bias 0 - def train(self, lr=0.1, input=None, L2_reg=0.00): - self.forward(input) - self.backward(lr, L2_reg) - - - def forward(self, input=None): + def train(self, lr=0.1, input=None, L2_reg=0.00): if input is not None: self.x = input p_y_given_x = self.output(self.x) - self.d_y = self.y - p_y_given_x + d_y = self.y - p_y_given_x + + self.W += lr * numpy.dot(self.x.T, d_y) - lr * L2_reg * self.W + self.b += lr * numpy.mean(d_y, axis=0) + self.d_y = d_y + + + # def train(self, lr=0.1, input=None, L2_reg=0.00): + # self.forward(input) + # self.backward(lr, L2_reg) + + # def forward(self, input=None): + # if input is not None: + # self.x = input + # p_y_given_x = self.output(self.x) + # self.d_y = self.y - p_y_given_x - def backward(self, lr, L2_reg=0.00): - self.W += lr * numpy.dot(self.x.T, self.d_y) - lr * L2_reg * self.W - self.b += lr * numpy.mean(self.d_y, axis=0) + # def backward(self, lr=0.1, L2_reg=0.00): + # self.W += lr * numpy.dot(self.x.T, self.d_y) - lr * L2_reg * self.W + # self.b += lr * numpy.mean(self.d_y, axis=0) def output(self, x): diff --git a/python/MLP.py b/python/MLP.py index f87e597..e9ded0b 100755 --- a/python/MLP.py +++ b/python/MLP.py @@ -21,7 +21,7 @@ def __init__(self, input, label, n_in, n_hidden, n_out, rng=None): n_in=n_in, n_out=n_hidden, rng=rng, - activation=sigmoid) + activation=tanh) # construct log_layer self.log_layer = LogisticRegression(input=self.hidden_layer.output, @@ -31,17 +31,17 @@ def __init__(self, input, label, n_in, n_hidden, n_out, rng=None): def train(self): # forward hidden_layer - log_layer_input = self.hidden_layer.forward(input=self.x) + layer_input = self.hidden_layer.forward() - # forward log_layer - self.log_layer.forward(input=log_layer_input) + # forward & backward log_layer + # self.log_layer.forward(input=layer_input) + self.log_layer.train(input=layer_input) # backward hidden_layer - self.hidden_layer.backward(prev_layer=self.log_layer, lr=0.2, input=self.x) + self.hidden_layer.backward(prev_layer=self.log_layer) # backward log_layer - self.log_layer.backward(lr=0.2) - + # self.log_layer.backward() def predict(self, x): From 3daf4b246f5ddfb991968536b1eb0987ac8ce291 Mon Sep 17 00:00:00 2001 From: Yusuke Sugomori <yusugomori@users.noreply.github.com> Date: Mon, 17 Aug 2015 18:11:06 -0400 Subject: [PATCH 32/45] Update README.md --- README.md | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 14e17c5..4b11361 100644 --- a/README.md +++ b/README.md @@ -41,5 +41,10 @@ http://yusugomori.com/docs/SGD_DA.pdf +### Other projects : + - [DeepLearning.coffee](https://github.com/yusugomori/deeplearning.coffee) : Very simple implementation of deep learning by CoffeeScript + + + +#### Bug reports are deeply welcome. -### Bug reports are deeply welcome. From d11bd59844fbb4f5f3921ec0f427afed32dafb64 Mon Sep 17 00:00:00 2001 From: Yusuke Sugomori <yusugomori@users.noreply.github.com> Date: Wed, 19 Aug 2015 22:08:35 -0400 Subject: [PATCH 33/45] Update README.md --- README.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 4b11361..933b11a 100644 --- a/README.md +++ b/README.md @@ -42,9 +42,10 @@ ### Other projects : - - [DeepLearning.coffee](https://github.com/yusugomori/deeplearning.coffee) : Very simple implementation of deep learning by CoffeeScript + - My deep learning projects are [here](http://yusugomori.com/projects/deep-learning/) -#### Bug reports are deeply welcome. +##### Bug reports / contributions / donations are deeply welcome. +Bitcoin wallet address: 1QAoYw5Y3opvah2APf4jVcpD6UAHyC3k7s From 52f8752b9a9c4f5837c0ab59e5f3aea3a2031a6a Mon Sep 17 00:00:00 2001 From: Yusuke Sugomori <yusugomori@users.noreply.github.com> Date: Wed, 19 Aug 2015 22:08:55 -0400 Subject: [PATCH 34/45] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 933b11a..26b41bd 100644 --- a/README.md +++ b/README.md @@ -42,7 +42,7 @@ ### Other projects : - - My deep learning projects are [here](http://yusugomori.com/projects/deep-learning/) + - My deep learning projects are [here](http://yusugomori.com/projects/deep-learning/). From 8872adfcd46d77a778c5f37c66627a6526d2d2d7 Mon Sep 17 00:00:00 2001 From: Yusuke Sugomori <ysugomori@google.com> Date: Thu, 20 Aug 2015 18:04:10 -0400 Subject: [PATCH 35/45] clean up cpp utils --- cpp/DBN.cpp | 25 ++----------------------- cpp/HiddenLayer.cpp | 25 ++----------------------- cpp/RBM.cpp | 24 ++---------------------- cpp/SdA.cpp | 25 +++---------------------- cpp/dA.cpp | 26 +++----------------------- cpp/utils.cpp | 25 ------------------------- cpp/utils.h | 32 ++++++++++++++++++++++++++++++++ 7 files changed, 44 insertions(+), 138 deletions(-) delete mode 100644 cpp/utils.cpp create mode 100644 cpp/utils.h diff --git a/cpp/DBN.cpp b/cpp/DBN.cpp index f043e20..155e081 100644 --- a/cpp/DBN.cpp +++ b/cpp/DBN.cpp @@ -1,33 +1,12 @@ #include <iostream> #include <math.h> +#include "utils.h" #include "HiddenLayer.h" #include "RBM.h" #include "LogisticRegression.h" #include "DBN.h" using namespace std; - - -double uniform(double min, double max) { - return rand() / (RAND_MAX + 1.0) * (max - min) + min; -} - -int binomial(int n, double p) { - if(p < 0 || p > 1) return 0; - - int c = 0; - double r; - - for(int i=0; i<n; i++) { - r = rand() / (RAND_MAX + 1.0); - if (r < p) c++; - } - - return c; -} - -double sigmoid(double x) { - return 1.0 / (1.0 + exp(-x)); -} +using namespace utils; // DBN diff --git a/cpp/HiddenLayer.cpp b/cpp/HiddenLayer.cpp index b1925fc..fb530c6 100644 --- a/cpp/HiddenLayer.cpp +++ b/cpp/HiddenLayer.cpp @@ -1,30 +1,9 @@ #include <iostream> #include <math.h> #include "HiddenLayer.h" +#include "utils.h" using namespace std; - -double uniform(double min, double max) { - return rand() / (RAND_MAX + 1.0) * (max - min) + min; -} - -int binomial(int n, double p) { - if(p < 0 || p > 1) return 0; - - int c = 0; - double r; - - for(int i=0; i<n; i++) { - r = rand() / (RAND_MAX + 1.0); - if (r < p) c++; - } - - return c; -} - -double sigmoid(double x) { - return 1.0 / (1.0 + exp(-x)); -} - +using namespace utils; HiddenLayer::HiddenLayer(int size, int in, int out, double **w, double *bp) { diff --git a/cpp/RBM.cpp b/cpp/RBM.cpp index d64462c..1e606ee 100644 --- a/cpp/RBM.cpp +++ b/cpp/RBM.cpp @@ -1,29 +1,9 @@ #include <iostream> #include <math.h> +#include "utils.h" #include "RBM.h" using namespace std; - -double uniform(double min, double max) { - return rand() / (RAND_MAX + 1.0) * (max - min) + min; -} - -int binomial(int n, double p) { - if(p < 0 || p > 1) return 0; - - int c = 0; - double r; - - for(int i=0; i<n; i++) { - r = rand() / (RAND_MAX + 1.0); - if (r < p) c++; - } - - return c; -} - -double sigmoid(double x) { - return 1.0 / (1.0 + exp(-x)); -} +using namespace utils; RBM::RBM(int size, int n_v, int n_h, double **w, double *hb, double *vb) { diff --git a/cpp/SdA.cpp b/cpp/SdA.cpp index 39ac32e..5632306 100644 --- a/cpp/SdA.cpp +++ b/cpp/SdA.cpp @@ -1,32 +1,13 @@ #include <iostream> #include <math.h> +#include "utils.h" + #include "HiddenLayer.h" #include "dA.h" #include "LogisticRegression.h" #include "SdA.h" using namespace std; - -double uniform(double min, double max) { - return rand() / (RAND_MAX + 1.0) * (max - min) + min; -} - -int binomial(int n, double p) { - if(p < 0 || p > 1) return 0; - - int c = 0; - double r; - - for(int i=0; i<n; i++) { - r = rand() / (RAND_MAX + 1.0); - if (r < p) c++; - } - - return c; -} - -double sigmoid(double x) { - return 1.0 / (1.0 + exp(-x)); -} +using namespace utils; // SdA diff --git a/cpp/dA.cpp b/cpp/dA.cpp index b5414e5..783327e 100644 --- a/cpp/dA.cpp +++ b/cpp/dA.cpp @@ -1,30 +1,10 @@ #include <iostream> #include <math.h> +#include "utils.h" + #include "dA.h" using namespace std; - - -double uniform(double min, double max) { - return rand() / (RAND_MAX + 1.0) * (max - min) + min; -} - -int binomial(int n, double p) { - if(p < 0 || p > 1) return 0; - - int c = 0; - double r; - - for(int i=0; i<n; i++) { - r = rand() / (RAND_MAX + 1.0); - if (r < p) c++; - } - - return c; -} - -double sigmoid(double x) { - return 1.0 / (1.0 + exp(-x)); -} +using namespace utils; dA::dA(int size, int n_v, int n_h, double **w, double *hb, double *vb) { diff --git a/cpp/utils.cpp b/cpp/utils.cpp deleted file mode 100644 index 96af323..0000000 --- a/cpp/utils.cpp +++ /dev/null @@ -1,25 +0,0 @@ -#include <iostream> -#include <math.h> -using namespace std; - -double uniform(double min, double max) { - return rand() / (RAND_MAX + 1.0) * (max - min) + min; -} - -int binomial(int n, double p) { - if(p < 0 || p > 1) return 0; - - int c = 0; - double r; - - for(int i=0; i<n; i++) { - r = rand() / (RAND_MAX + 1.0); - if (r < p) c++; - } - - return c; -} - -double sigmoid(double x) { - return 1.0 / (1.0 + exp(-x)); -} diff --git a/cpp/utils.h b/cpp/utils.h new file mode 100644 index 0000000..78fb182 --- /dev/null +++ b/cpp/utils.h @@ -0,0 +1,32 @@ +#pragma once + +#include <iostream> +#include <math.h> +using namespace std; + + +namespace utils { + + double uniform(double min, double max) { + return rand() / (RAND_MAX + 1.0) * (max - min) + min; + } + + int binomial(int n, double p) { + if(p < 0 || p > 1) return 0; + + int c = 0; + double r; + + for(int i=0; i<n; i++) { + r = rand() / (RAND_MAX + 1.0); + if (r < p) c++; + } + + return c; + } + + double sigmoid(double x) { + return 1.0 / (1.0 + exp(-x)); + } + +} From 0911fa072463861ecc4f3b0734975b3f307d41b2 Mon Sep 17 00:00:00 2001 From: Yusuke Sugomori <me@yusugomori.com> Date: Tue, 25 Aug 2015 20:59:56 -0400 Subject: [PATCH 36/45] MLP.java, Dropout.java --- java/src/DeepLearning/DBN.java | 12 +- java/src/DeepLearning/Dropout.java | 225 ++++++++++++++++++ java/src/DeepLearning/HiddenLayer.java | 70 +++++- .../src/DeepLearning/HiddenLayerDiscrete.java | 56 +++++ java/src/DeepLearning/LogisticRegression.java | 32 +-- .../LogisticRegressionDiscrete.java | 117 +++++++++ java/src/DeepLearning/MLP.java | 124 ++++++++++ java/src/DeepLearning/RBM.java | 1 - java/src/DeepLearning/SdA.java | 12 +- java/src/DeepLearning/utils.java | 30 ++- 10 files changed, 640 insertions(+), 39 deletions(-) create mode 100644 java/src/DeepLearning/Dropout.java create mode 100644 java/src/DeepLearning/HiddenLayerDiscrete.java create mode 100644 java/src/DeepLearning/LogisticRegressionDiscrete.java create mode 100644 java/src/DeepLearning/MLP.java diff --git a/java/src/DeepLearning/DBN.java b/java/src/DeepLearning/DBN.java index 39fb999..e070faf 100644 --- a/java/src/DeepLearning/DBN.java +++ b/java/src/DeepLearning/DBN.java @@ -9,9 +9,9 @@ public class DBN { public int[] hidden_layer_sizes; public int n_outs; public int n_layers; - public HiddenLayer[] sigmoid_layers; + public HiddenLayerDiscrete[] sigmoid_layers; public RBM[] rbm_layers; - public LogisticRegression log_layer; + public LogisticRegressionDiscrete log_layer; public Random rng; @@ -24,7 +24,7 @@ public DBN(int N, int n_ins, int[] hidden_layer_sizes, int n_outs, int n_layers, this.n_outs = n_outs; this.n_layers = n_layers; - this.sigmoid_layers = new HiddenLayer[n_layers]; + this.sigmoid_layers = new HiddenLayerDiscrete[n_layers]; this.rbm_layers = new RBM[n_layers]; if(rng == null) this.rng = new Random(1234); @@ -39,14 +39,14 @@ public DBN(int N, int n_ins, int[] hidden_layer_sizes, int n_outs, int n_layers, } // construct sigmoid_layer - this.sigmoid_layers[i] = new HiddenLayer(this.N, input_size, this.hidden_layer_sizes[i], null, null, rng); + this.sigmoid_layers[i] = new HiddenLayerDiscrete(this.N, input_size, this.hidden_layer_sizes[i], null, null, rng); // construct rbm_layer this.rbm_layers[i] = new RBM(this.N, input_size, this.hidden_layer_sizes[i], this.sigmoid_layers[i].W, this.sigmoid_layers[i].b, null, rng); } - // layer for output using DNN.LogisticRegression - this.log_layer = new LogisticRegression(this.N, this.hidden_layer_sizes[this.n_layers-1], this.n_outs); + // layer for output using Logistic Regression + this.log_layer = new LogisticRegressionDiscrete(this.N, this.hidden_layer_sizes[this.n_layers-1], this.n_outs); } public void pretrain(int[][] train_X, double lr, int k, int epochs) { diff --git a/java/src/DeepLearning/Dropout.java b/java/src/DeepLearning/Dropout.java new file mode 100644 index 0000000..b4717c6 --- /dev/null +++ b/java/src/DeepLearning/Dropout.java @@ -0,0 +1,225 @@ +package DeepLearning; + +import java.util.Random; +import java.util.List; +import java.util.ArrayList; + +public class Dropout { + public int N; + public int n_in; + public int[] hidden_layer_sizes; + public int n_out; + public int n_layers; + public HiddenLayer[] hiddenLayers; + public LogisticRegression logisticLayer; + public Random rng; + + + public Dropout(int N, int n_in, int[] hidden_layer_sizes, int n_out, Random rng, String activation) { + this.N = N; + this.n_in = n_in; + this.hidden_layer_sizes = hidden_layer_sizes; + this.n_layers = hidden_layer_sizes.length; + this.n_out = n_out; + + this.hiddenLayers = new HiddenLayer[n_layers]; + + if (rng == null) rng = new Random(1234); + this.rng = rng; + + if (activation == null) activation = "ReLU"; + + // construct multi-layer + int input_size; + for(int i=0; i<this.n_layers; i++) { + // layer_size + if(i == 0) { + input_size = n_in; + } else { + input_size = hidden_layer_sizes[i-1]; + } + + // construct hiddenLayer + this.hiddenLayers[i] = new HiddenLayer(N, input_size, hidden_layer_sizes[i], null, null, rng, activation); + + } + + // construct logisticLayer + this.logisticLayer = new LogisticRegression(N, hidden_layer_sizes[this.n_layers-1], n_out); + + } + + public void train(int epochs, double[][] train_X, int[][] train_Y, boolean dropout, double p_dropout, double lr) { + List<int[]> dropout_masks; + List<double[]> layer_inputs; + double[] layer_input; + double[] layer_output = new double[0]; + + for(int epoch=0; epoch<epochs; epoch++) { + + for(int n=0; n<N; n++) { + + dropout_masks = new ArrayList<>(n_layers); + layer_inputs = new ArrayList<>(n_layers+1); // +1 for logistic layer + + // forward hiddenLayers + for(int i=0; i<n_layers; i++) { + + if(i == 0) layer_input = train_X[n]; + else layer_input = layer_output.clone(); + + layer_inputs.add(layer_input.clone()); + + layer_output = new double[hidden_layer_sizes[i]]; + hiddenLayers[i].forward(layer_input, layer_output); + + if(dropout) { + int[] mask; + mask = hiddenLayers[i].dropout(layer_output.length, p_dropout, rng); + for(int j=0; j<layer_output.length; j++) layer_output[j] *= mask[j]; + + dropout_masks.add(mask.clone()); + } + + } + + + // forward & backward logisticLayer + double[] logistic_layer_dy; // = new double[n_out]; + logistic_layer_dy = logisticLayer.train(layer_output, train_Y[n], lr); //, logistic_layer_dy); + layer_inputs.add(layer_output.clone()); + + // backward hiddenLayers + double[] prev_dy = logistic_layer_dy; + double[][] prev_W; + double[] dy = new double[0]; + + for(int i=n_layers-1; i>=0; i--) { + + if(i == n_layers-1) { + prev_W = logisticLayer.W; + } else { + prev_dy = dy.clone(); + prev_W = hiddenLayers[i+1].W; + } + + dy = new double[hidden_layer_sizes[i]]; + hiddenLayers[i].backward(layer_inputs.get(i), dy, layer_inputs.get(i+1), prev_dy, prev_W, lr); + + if(dropout) { + for(int j=0; j<dy.length; j++) { + dy[j] *= dropout_masks.get(i)[j]; + } + } + } + + } + } + } + + + public void pretest(double p_dropout) { + for(int i=0; i<n_layers; i++) { + int in; + int out; + + if (i == 0) in = n_in; + else in = hidden_layer_sizes[i]; + + if (i == n_layers - 1) out = n_out; + else out = hidden_layer_sizes[i+1]; + + + for (int l = 0; l < out; l++) { + for (int m = 0; m < in; m++) { + hiddenLayers[i].W[l][m] *= 1 - p_dropout; + } + } + } + } + + + public void predict(double[] x, double[] y) { + double[] layer_input; + double[] layer_output = new double[0]; + + for(int i=0; i<n_layers; i++) { + + if(i == 0) layer_input = x; + else layer_input = layer_output.clone(); + + layer_output = new double[hidden_layer_sizes[i]]; + + hiddenLayers[i].forward(layer_input, layer_output); + } + + logisticLayer.predict(layer_output, y); + } + + + private static void test_dropout() { + Random rng = new Random(123); + + double learning_rate = 0.1; + int n_epochs = 5000; + + int train_N = 4; + int test_N = 4; + int n_in = 2; + int[] hidden_layer_sizes = {10, 10}; + int n_out = 2; + + boolean dropout = true; + double p_dropout = 0.5; + + + double[][] train_X = { + {0., 0.}, + {0., 1.}, + {1., 0.}, + {1., 1.}, + }; + + int[][] train_Y = { + {0, 1}, + {1, 0}, + {1, 0}, + {0, 1}, + }; + + // construct Dropout + Dropout classifier = new Dropout(train_N, n_in, hidden_layer_sizes, n_out, rng, "ReLU"); + + // train + classifier.train(n_epochs, train_X, train_Y, dropout, p_dropout, learning_rate); + + // pretest + if(dropout) classifier.pretest(p_dropout); + + + // test data + double[][] test_X = { + {0., 0.}, + {0., 1.}, + {1., 0.}, + {1., 1.}, + }; + + double[][] test_Y = new double[test_N][n_out]; + + // test + for(int i=0; i<test_N; i++) { + classifier.predict(test_X[i], test_Y[i]); + for(int j=0; j<n_out; j++) { + System.out.print(test_Y[i][j] + " "); + } + System.out.println(); + } + + } + + + public static void main(String[] args) { + test_dropout(); + } +} diff --git a/java/src/DeepLearning/HiddenLayer.java b/java/src/DeepLearning/HiddenLayer.java index ca50104..028727d 100644 --- a/java/src/DeepLearning/HiddenLayer.java +++ b/java/src/DeepLearning/HiddenLayer.java @@ -1,6 +1,7 @@ package DeepLearning; import java.util.Random; +import java.util.function.DoubleFunction; import static DeepLearning.utils.*; public class HiddenLayer { @@ -10,17 +11,18 @@ public class HiddenLayer { public double[][] W; public double[] b; public Random rng; + public DoubleFunction<Double> activation; + public DoubleFunction<Double> dactivation; - - public HiddenLayer(int N, int n_in, int n_out, double[][] W, double[] b, Random rng) { + public HiddenLayer(int N, int n_in, int n_out, double[][] W, double[] b, Random rng, String activation) { this.N = N; this.n_in = n_in; this.n_out = n_out; - if(rng == null) this.rng = new Random(1234); + if (rng == null) this.rng = new Random(1234); else this.rng = rng; - if(W == null) { + if (W == null) { this.W = new double[n_out][n_in]; double a = 1.0 / this.n_in; @@ -33,22 +35,72 @@ public HiddenLayer(int N, int n_in, int n_out, double[][] W, double[] b, Random this.W = W; } - if(b == null) this.b = new double[n_out]; + if (b == null) this.b = new double[n_out]; else this.b = b; + + if (activation == "sigmoid" || activation == null) { + this.activation = (double x) -> sigmoid(x); + this.dactivation = (double x) -> dsigmoid(x); + + } else if (activation == "tanh") { + this.activation = (double x) -> tanh(x); + this.dactivation = (double x) -> dtanh(x); + } else if (activation == "ReLU") { + this.activation = (double x) -> ReLU(x); + this.dactivation = (double x) -> dReLU(x); + } else { + throw new IllegalArgumentException("activation function not supported"); + } + } - public double output(int[] input, double[] w, double b) { + public double output(double[] input, double[] w, double b) { double linear_output = 0.0; for(int j=0; j<n_in; j++) { linear_output += w[j] * input[j]; } linear_output += b; - return sigmoid(linear_output); + + return activation.apply(linear_output); + } + + + public void forward(double[] input, double[] output) { + for(int i=0; i<n_out; i++) { + output[i] = this.output(input, W[i], b[i]); + } } - public void sample_h_given_v(int[] input, int[] sample) { + public void backward(double[] input, double[] dy, double[] prev_layer_input, double[] prev_layer_dy, double[][] prev_layer_W, double lr) { + if(dy == null) dy = new double[n_out]; + + int prev_n_in = n_out; + int prev_n_out = prev_layer_dy.length; + + for(int i=0; i<prev_n_in; i++) { + dy[i] = 0; + for(int j=0; j<prev_n_out; j++) { + dy[i] += prev_layer_dy[j] * prev_layer_W[j][i]; + } + + dy[i] *= dactivation.apply(prev_layer_input[i]); + } + for(int i=0; i<n_out; i++) { - sample[i] = binomial(1, output(input, W[i], b[i]), rng); + for(int j=0; j<n_in; j++) { + W[i][j] += lr * dy[i] * input[j] / N; + } + b[i] += lr * dy[i] / N; + } + } + + public int[] dropout(int size, double p, Random rng) { + int[] mask = new int[size]; + + for(int i=0; i<size; i++) { + mask[i] = binomial(1, p, rng); } + + return mask; } } diff --git a/java/src/DeepLearning/HiddenLayerDiscrete.java b/java/src/DeepLearning/HiddenLayerDiscrete.java new file mode 100644 index 0000000..b399db9 --- /dev/null +++ b/java/src/DeepLearning/HiddenLayerDiscrete.java @@ -0,0 +1,56 @@ +package DeepLearning; + +import java.util.Random; +import static DeepLearning.utils.*; + +public class HiddenLayerDiscrete extends HiddenLayer { + public int N; + public int n_in; + public int n_out; + public double[][] W; + public double[] b; + public Random rng; + + + public HiddenLayerDiscrete(int N, int n_in, int n_out, double[][] W, double[] b, Random rng) { + super(N, n_in, n_out, W, b, rng, null); + + this.N = N; + this.n_in = n_in; + this.n_out = n_out; + + if(rng == null) this.rng = new Random(1234); + else this.rng = rng; + + if(W == null) { + this.W = new double[n_out][n_in]; + double a = 1.0 / this.n_in; + + for(int i=0; i<n_out; i++) { + for(int j=0; j<n_in; j++) { + this.W[i][j] = uniform(-a, a, rng); + } + } + } else { + this.W = W; + } + + if(b == null) this.b = new double[n_out]; + else this.b = b; + } + + public double output(int[] input, double[] w, double b) { + double linear_output = 0.0; + for(int j=0; j<n_in; j++) { + linear_output += w[j] * input[j]; + } + linear_output += b; + return sigmoid(linear_output); + } + + public void sample_h_given_v(int[] input, int[] sample) { + for(int i=0; i<n_out; i++) { + sample[i] = binomial(1, output(input, W[i], b[i]), rng); + } + } +} diff --git a/java/src/DeepLearning/LogisticRegression.java b/java/src/DeepLearning/LogisticRegression.java index abe11a0..af5a9a5 100644 --- a/java/src/DeepLearning/LogisticRegression.java +++ b/java/src/DeepLearning/LogisticRegression.java @@ -12,11 +12,11 @@ public LogisticRegression(int N, int n_in, int n_out) { this.n_in = n_in; this.n_out = n_out; - W = new double[this.n_out][this.n_in]; - b = new double[this.n_out]; + W = new double[n_out][n_in]; + b = new double[n_out]; } - public void train(int[] x, int[] y, double lr) { + public double[] train(double[] x, int[] y, double lr) { double[] p_y_given_x = new double[n_out]; double[] dy = new double[n_out]; @@ -38,6 +38,8 @@ public void train(int[] x, int[] y, double lr) { b[i] += lr * dy[i] / N; } + + return dy; } public void softmax(double[] x) { @@ -54,9 +56,9 @@ public void softmax(double[] x) { for(int i=0; i<n_out; i++) x[i] /= sum; } - public void predict(int[] x, double[] y) { + public void predict(double[] x, double[] y) { for(int i=0; i<n_out; i++) { - y[i] = 0; + y[i] = 0.; for(int j=0; j<n_in; j++) { y[i] += W[i][j] * x[j]; } @@ -75,13 +77,13 @@ private static void test_lr() { int n_in = 6; int n_out = 2; - int[][] train_X = { - {1, 1, 1, 0, 0, 0}, - {1, 0, 1, 0, 0, 0}, - {1, 1, 1, 0, 0, 0}, - {0, 0, 1, 1, 1, 0}, - {0, 0, 1, 1, 0, 0}, - {0, 0, 1, 1, 1, 0} + double[][] train_X = { + {1., 1., 1., 0., 0., 0.}, + {1., 0., 1., 0., 0., 0.}, + {1., 1., 1., 0., 0., 0.}, + {0., 0., 1., 1., 1., 0.}, + {0., 0., 1., 1., 0., 0.}, + {0., 0., 1., 1., 1., 0.} }; int[][] train_Y = { @@ -105,9 +107,9 @@ private static void test_lr() { } // test data - int[][] test_X = { - {1, 0, 1, 0, 0, 0}, - {0, 0, 1, 1, 1, 0} + double[][] test_X = { + {1., 0., 1., 0., 0., 0.}, + {0., 0., 1., 1., 1., 0.} }; double[][] test_Y = new double[test_N][n_out]; diff --git a/java/src/DeepLearning/LogisticRegressionDiscrete.java b/java/src/DeepLearning/LogisticRegressionDiscrete.java new file mode 100644 index 0000000..fce5172 --- /dev/null +++ b/java/src/DeepLearning/LogisticRegressionDiscrete.java @@ -0,0 +1,117 @@ +package DeepLearning; + +public class LogisticRegressionDiscrete extends LogisticRegression { + public int N; + public int n_in; + public int n_out; + public double[][] W; + public double[] b; + + public LogisticRegressionDiscrete(int N, int n_in, int n_out) { + super(N, n_in, n_out); + + this.N = N; + this.n_in = n_in; + this.n_out = n_out; + + W = new double[this.n_out][this.n_in]; + b = new double[this.n_out]; + } + + public void train(int[] x, int[] y, double lr) { + double[] p_y_given_x = new double[n_out]; + double[] dy = new double[n_out]; + + for(int i=0; i<n_out; i++) { + p_y_given_x[i] = 0; + for(int j=0; j<n_in; j++) { + p_y_given_x[i] += W[i][j] * x[j]; + } + p_y_given_x[i] += b[i]; + } + softmax(p_y_given_x); + + for(int i=0; i<n_out; i++) { + dy[i] = y[i] - p_y_given_x[i]; + + for(int j=0; j<n_in; j++) { + W[i][j] += lr * dy[i] * x[j] / N; + } + + b[i] += lr * dy[i] / N; + } + } + + public void predict(int[] x, double[] y) { + for(int i=0; i<n_out; i++) { + y[i] = 0; + for(int j=0; j<n_in; j++) { + y[i] += W[i][j] * x[j]; + } + y[i] += b[i]; + } + + softmax(y); + } + + private static void test_lr() { + double learning_rate = 0.1; + int n_epochs = 500; + + int train_N = 6; + int test_N = 2; + int n_in = 6; + int n_out = 2; + + int[][] train_X = { + {1, 1, 1, 0, 0, 0}, + {1, 0, 1, 0, 0, 0}, + {1, 1, 1, 0, 0, 0}, + {0, 0, 1, 1, 1, 0}, + {0, 0, 1, 1, 0, 0}, + {0, 0, 1, 1, 1, 0} + }; + + int[][] train_Y = { + {1, 0}, + {1, 0}, + {1, 0}, + {0, 1}, + {0, 1}, + {0, 1} + }; + + // construct + LogisticRegressionDiscrete classifier = new LogisticRegressionDiscrete(train_N, n_in, n_out); + + // train + for(int epoch=0; epoch<n_epochs; epoch++) { + for(int i=0; i<train_N; i++) { + classifier.train(train_X[i], train_Y[i], learning_rate); + } + //learning_rate *= 0.95; + } + + // test data + int[][] test_X = { + {1, 0, 1, 0, 0, 0}, + {0, 0, 1, 1, 1, 0} + }; + + double[][] test_Y = new double[test_N][n_out]; + + + // test + for(int i=0; i<test_N; i++) { + classifier.predict(test_X[i], test_Y[i]); + for(int j=0; j<n_out; j++) { + System.out.print(test_Y[i][j] + " "); + } + System.out.println(); + } + } + + public static void main(String[] args) { + test_lr(); + } +} diff --git a/java/src/DeepLearning/MLP.java b/java/src/DeepLearning/MLP.java new file mode 100644 index 0000000..7acece8 --- /dev/null +++ b/java/src/DeepLearning/MLP.java @@ -0,0 +1,124 @@ +package DeepLearning; + +import java.util.Random; + +public class MLP { + public int N; + public int n_in; + public int n_hidden; + public int n_out; + public HiddenLayer hiddenLayer; + public LogisticRegression logisticLayer; + public Random rng; + + + public MLP(int N, int n_in, int n_hidden, int n_out, Random rng) { + + this.N = N; + this.n_in = n_in; + this.n_hidden = n_hidden; + this.n_out = n_out; + + if (rng == null)rng = new Random(1234); + this.rng = rng; + + // construct hiddenLayer + this.hiddenLayer = new HiddenLayer(N, n_in, n_hidden, null, null, rng, "tanh"); + + // construct logisticLayer + this.logisticLayer = new LogisticRegression(N, n_hidden, n_out); + } + + + public void train(double[][] train_X, int[][] train_Y, double lr) { + double[] hidden_layer_input; + double[] logistic_layer_input; + double[] dy; + + for(int n=0; n<N; n++) { + hidden_layer_input = new double[n_in]; + logistic_layer_input = new double[n_hidden]; + + for(int j=0; j<n_in; j++) hidden_layer_input[j] = train_X[n][j]; + + // forward hiddenLayer + hiddenLayer.forward(hidden_layer_input, logistic_layer_input); + + // forward and backward logisticLayer + // dy = new double[n_out]; // define delta of y for backpropagation + dy = logisticLayer.train(logistic_layer_input, train_Y[n], lr); //, dy); + + // backward hiddenLayer + hiddenLayer.backward(hidden_layer_input, null, logistic_layer_input, dy, logisticLayer.W, lr); + + } + } + + public void predict(double[] x, double[] y) { + double[] logistic_layer_input = new double[n_hidden]; + hiddenLayer.forward(x, logistic_layer_input); + logisticLayer.predict(logistic_layer_input, y); + } + + + + private static void test_mlp() { + Random rng = new Random(123); + + double learning_rate = 0.1; + int n_epochs = 5000; + + int train_N = 4; + int test_N = 4; + int n_in = 2; + int n_hidden = 3; + int n_out = 2; + + double[][] train_X = { + {0., 0.}, + {0., 1.}, + {1., 0.}, + {1., 1.}, + }; + + int[][] train_Y = { + {0, 1}, + {1, 0}, + {1, 0}, + {0, 1}, + }; + + // construct MLP + MLP classifier = new MLP(train_N, n_in, n_hidden, n_out, rng); + + // train + for(int epoch=0; epoch<n_epochs; epoch++) { + classifier.train(train_X, train_Y, learning_rate); + } + + // test data + double[][] test_X = { + {0., 0.}, + {0., 1.}, + {1., 0.}, + {1., 1.}, + }; + + double[][] test_Y = new double[test_N][n_out]; + + + // test + for(int i=0; i<test_N; i++) { + classifier.predict(test_X[i], test_Y[i]); + for(int j=0; j<n_out; j++) { + System.out.print(test_Y[i][j] + " "); + } + System.out.println(); + } + + } + + public static void main(String[] args) { + test_mlp(); + } +} diff --git a/java/src/DeepLearning/RBM.java b/java/src/DeepLearning/RBM.java index d84e65c..f3268f8 100644 --- a/java/src/DeepLearning/RBM.java +++ b/java/src/DeepLearning/RBM.java @@ -3,7 +3,6 @@ import java.util.Random; import static DeepLearning.utils.*; - public class RBM { public int N; public int n_visible; diff --git a/java/src/DeepLearning/SdA.java b/java/src/DeepLearning/SdA.java index 962c335..af761e7 100644 --- a/java/src/DeepLearning/SdA.java +++ b/java/src/DeepLearning/SdA.java @@ -9,9 +9,9 @@ public class SdA { public int[] hidden_layer_sizes; public int n_outs; public int n_layers; - public HiddenLayer[] sigmoid_layers; + public HiddenLayerDiscrete[] sigmoid_layers; public dA[] dA_layers; - public LogisticRegression log_layer; + public LogisticRegressionDiscrete log_layer; public Random rng; @@ -24,7 +24,7 @@ public SdA(int N, int n_ins, int[] hidden_layer_sizes, int n_outs, int n_layers, this.n_outs = n_outs; this.n_layers = n_layers; - this.sigmoid_layers = new HiddenLayer[n_layers]; + this.sigmoid_layers = new HiddenLayerDiscrete[n_layers]; this.dA_layers = new dA[n_layers]; if(rng == null) this.rng = new Random(1234); @@ -39,14 +39,14 @@ public SdA(int N, int n_ins, int[] hidden_layer_sizes, int n_outs, int n_layers, } // construct sigmoid_layer - this.sigmoid_layers[i] = new HiddenLayer(this.N, input_size, this.hidden_layer_sizes[i], null, null, rng); + this.sigmoid_layers[i] = new HiddenLayerDiscrete(this.N, input_size, this.hidden_layer_sizes[i], null, null, rng); // construct dA_layer this.dA_layers[i] = new dA(this.N, input_size, this.hidden_layer_sizes[i], this.sigmoid_layers[i].W, this.sigmoid_layers[i].b, null, rng); } - // layer for output using DNN.LogisticRegression - this.log_layer = new LogisticRegression(this.N, this.hidden_layer_sizes[this.n_layers-1], this.n_outs); + // layer for output using Logistic Regression + this.log_layer = new LogisticRegressionDiscrete(this.N, this.hidden_layer_sizes[this.n_layers-1], this.n_outs); } public void pretrain(int[][] train_X, double lr, double corruption_level, int epochs) { diff --git a/java/src/DeepLearning/utils.java b/java/src/DeepLearning/utils.java index efce0d9..95d95f7 100644 --- a/java/src/DeepLearning/utils.java +++ b/java/src/DeepLearning/utils.java @@ -21,9 +21,35 @@ public static int binomial(int n, double p, Random rng) { return c; } - public static double sigmoid(double x) { - return 1.0 / (1.0 + Math.pow(Math.E, -x)); + return 1. / (1. + Math.pow(Math.E, -x)); + } + + public static double dsigmoid(double x) { + return x * (1. - x); + } + + public static double tanh(double x) { + return Math.tanh(x); + } + + public static double dtanh(double x) { + return 1. - x * x; + } + + public static double ReLU(double x) { + if(x > 0) { + return x; + } else { + return 0.; + } } + public static double dReLU(double x) { + if(x > 0) { + return 1.; + } else { + return 0.; + } + } } From ad217dbb8b993a9cff0d0e6cf3e6a9de5778ed33 Mon Sep 17 00:00:00 2001 From: Yusuke Sugomori <yusugomori@users.noreply.github.com> Date: Thu, 27 Aug 2015 11:50:06 -0400 Subject: [PATCH 37/45] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 26b41bd..fc5b865 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -## Deep Learning (Python, C/C++, Java, Scala, Go) +## Deep Learning (Python, C, C++, Java, Scala, Go) ### Classes : From 03efd4a19dd5629cb85a8afdf5cb75d02ef07cc2 Mon Sep 17 00:00:00 2001 From: Yusuke Sugomori <me@yusugomori.com> Date: Tue, 20 Oct 2015 00:53:17 +0900 Subject: [PATCH 38/45] bug fix --- .gitignore | 5 ++++- data/.gitkeep | 0 java/src/DeepLearning/Dropout.java | 10 +++++----- python/RBM.py | 2 +- 4 files changed, 10 insertions(+), 7 deletions(-) create mode 100644 data/.gitkeep diff --git a/.gitignore b/.gitignore index 252ef14..98c89ef 100644 --- a/.gitignore +++ b/.gitignore @@ -11,4 +11,7 @@ java/.idea/* java/java.iml -java/out/* \ No newline at end of file +java/out/* + +data/* +!data/.gitkeep \ No newline at end of file diff --git a/data/.gitkeep b/data/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/java/src/DeepLearning/Dropout.java b/java/src/DeepLearning/Dropout.java index b4717c6..bb6378f 100644 --- a/java/src/DeepLearning/Dropout.java +++ b/java/src/DeepLearning/Dropout.java @@ -103,14 +103,14 @@ public void train(int epochs, double[][] train_X, int[][] train_Y, boolean dropo prev_W = hiddenLayers[i+1].W; } - dy = new double[hidden_layer_sizes[i]]; - hiddenLayers[i].backward(layer_inputs.get(i), dy, layer_inputs.get(i+1), prev_dy, prev_W, lr); - if(dropout) { - for(int j=0; j<dy.length; j++) { - dy[j] *= dropout_masks.get(i)[j]; + for(int j=0; j<prev_dy.length; j++) { + prev_dy[j] *= dropout_masks.get(i)[j]; } } + + dy = new double[hidden_layer_sizes[i]]; + hiddenLayers[i].backward(layer_inputs.get(i), dy, layer_inputs.get(i+1), prev_dy, prev_W, lr); } } diff --git a/python/RBM.py b/python/RBM.py index 4496692..7a127d8 100755 --- a/python/RBM.py +++ b/python/RBM.py @@ -61,7 +61,7 @@ def contrastive_divergence(self, lr=0.1, k=1, input=None): self.W += lr * (numpy.dot(self.input.T, ph_mean) - numpy.dot(nv_samples.T, nh_means)) self.vbias += lr * numpy.mean(self.input - nv_samples, axis=0) - self.hbias += lr * numpy.mean(ph_sample - nh_means, axis=0) + self.hbias += lr * numpy.mean(ph_mean - nh_means, axis=0) # cost = self.get_reconstruction_cross_entropy() # return cost From 5c9cfe02894a666343e5a4640586364e420e2458 Mon Sep 17 00:00:00 2001 From: Yusuke Sugomori <me@yusugomori.com> Date: Tue, 20 Oct 2015 01:13:20 +0900 Subject: [PATCH 39/45] bug fix --- python/Dropout.py | 7 ++++--- python/HiddenLayer.py | 5 ++++- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/python/Dropout.py b/python/Dropout.py index 16c203f..ba99116 100755 --- a/python/Dropout.py +++ b/python/Dropout.py @@ -87,10 +87,11 @@ def train(self, epochs=5000, dropout=True, p_dropout=0.5, rng=None): else: prev_layer = self.hidden_layers[i+1] - self.hidden_layers[i].backward(prev_layer=prev_layer) - if dropout == True: - self.hidden_layers[i].d_y *= dropout_masks[i] # also mask here + self.hidden_layers[i].backward(prev_layer=prev_layer, dropout=True, mask=dropout_masks[i]) + else: + self.hidden_layers[i].backward(prev_layer=prev_layer) + def predict(self, x, dropout=True, p_dropout=0.5): diff --git a/python/HiddenLayer.py b/python/HiddenLayer.py index 75f8ab2..a97bc61 100755 --- a/python/HiddenLayer.py +++ b/python/HiddenLayer.py @@ -57,12 +57,15 @@ def forward(self, input=None): return self.output(input=input) - def backward(self, prev_layer, lr=0.1, input=None): + def backward(self, prev_layer, lr=0.1, input=None, dropout=False, mask=None): if input is not None: self.x = input d_y = self.dactivation(prev_layer.x) * numpy.dot(prev_layer.d_y, prev_layer.W.T) + if dropout == True: + d_y *= mask + self.W += lr * numpy.dot(self.x.T, d_y) self.b += lr * numpy.mean(d_y, axis=0) self.d_y = d_y From f4e32289fbe627cd2bcc5a1bf703b2df8499b55c Mon Sep 17 00:00:00 2001 From: Yusuke Sugomori <yusugomori@users.noreply.github.com> Date: Fri, 30 Oct 2015 15:05:59 +0900 Subject: [PATCH 40/45] Update README.md --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index fc5b865..ab89b7c 100644 --- a/README.md +++ b/README.md @@ -22,6 +22,8 @@ - Dropout: Dropout MLP + - CNN: Convolutional Neural Networks (See [dev branch](https://github.com/yusugomori/DeepLearning/tree/dev).) + ### References : From 7a3596669bfe8e1e01b5137702d739155c4e4ba9 Mon Sep 17 00:00:00 2001 From: Yusuke Sugomori <yusugomori@users.noreply.github.com> Date: Thu, 17 Dec 2015 19:23:14 +0900 Subject: [PATCH 41/45] Update README.md --- README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.md b/README.md index ab89b7c..0ece2c8 100644 --- a/README.md +++ b/README.md @@ -42,6 +42,10 @@ - Yusuke Sugomori: Stochastic Gradient Descent for Denoising Autoencoders, http://yusugomori.com/docs/SGD_DA.pdf +### Publication : + - More cleaner Java implementations are introduced in my book, Deep Learning with Java. + + The book is now available to pre-order from [Packt Publishing](https://www.packtpub.com/big-data-and-business-intelligence/deep-learning-java) or [Amazon](http://www.amazon.com/Deep-Learning-Java-Yusuke-Sugomori/dp/1785282190/). ### Other projects : - My deep learning projects are [here](http://yusugomori.com/projects/deep-learning/). From 8d371431bd4d80914a73d666c72ffc94182275c8 Mon Sep 17 00:00:00 2001 From: Yusuke Sugomori <yusugomori@users.noreply.github.com> Date: Fri, 18 Dec 2015 10:27:27 +0900 Subject: [PATCH 42/45] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 0ece2c8..595890d 100644 --- a/README.md +++ b/README.md @@ -43,7 +43,7 @@ http://yusugomori.com/docs/SGD_DA.pdf ### Publication : - - More cleaner Java implementations are introduced in my book, Deep Learning with Java. + - More detailed Java implementations are introduced in my book, Deep Learning with Java. The book is now available to pre-order from [Packt Publishing](https://www.packtpub.com/big-data-and-business-intelligence/deep-learning-java) or [Amazon](http://www.amazon.com/Deep-Learning-Java-Yusuke-Sugomori/dp/1785282190/). From 73e2f448931d9a2d2117572ea041c2f76956fae4 Mon Sep 17 00:00:00 2001 From: Yusuke Sugomori <yusugomori@users.noreply.github.com> Date: Tue, 31 May 2016 01:08:05 +0900 Subject: [PATCH 43/45] Update README.md --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 595890d..77ba574 100644 --- a/README.md +++ b/README.md @@ -43,9 +43,9 @@ http://yusugomori.com/docs/SGD_DA.pdf ### Publication : - - More detailed Java implementations are introduced in my book, Deep Learning with Java. + - More detailed Java implementations are introduced in my book, Java Deep Learning Essentials. - The book is now available to pre-order from [Packt Publishing](https://www.packtpub.com/big-data-and-business-intelligence/deep-learning-java) or [Amazon](http://www.amazon.com/Deep-Learning-Java-Yusuke-Sugomori/dp/1785282190/). + The book is available from [Packt Publishing](https://www.packtpub.com/big-data-and-business-intelligence/java-deep-learning-essentials) or [Amazon](http://www.amazon.com/Deep-Learning-Java-Yusuke-Sugomori/dp/1785282190/). ### Other projects : - My deep learning projects are [here](http://yusugomori.com/projects/deep-learning/). From 6b8af8891a94a1a25c15b5d5151f4bb59d0dab5f Mon Sep 17 00:00:00 2001 From: Yusuke Sugomori <yusuke_sugomori@micin.jp> Date: Thu, 7 Sep 2017 21:35:24 +0900 Subject: [PATCH 44/45] add LICENSE --- LICENSE.txt | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 LICENSE.txt diff --git a/LICENSE.txt b/LICENSE.txt new file mode 100644 index 0000000..92d495f --- /dev/null +++ b/LICENSE.txt @@ -0,0 +1,22 @@ +Copyright (c) 2017 Yusuke Sugomori + +MIT License + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. From 739dfd1d7919c328e0d3b8129855c2ad71b80036 Mon Sep 17 00:00:00 2001 From: Yusuke Sugomori <yusuke_sugomori@micin.jp> Date: Mon, 4 Dec 2017 15:01:44 +0900 Subject: [PATCH 45/45] update README --- README.md | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/README.md b/README.md index 77ba574..37f8767 100644 --- a/README.md +++ b/README.md @@ -47,11 +47,7 @@ The book is available from [Packt Publishing](https://www.packtpub.com/big-data-and-business-intelligence/java-deep-learning-essentials) or [Amazon](http://www.amazon.com/Deep-Learning-Java-Yusuke-Sugomori/dp/1785282190/). -### Other projects : - - My deep learning projects are [here](http://yusugomori.com/projects/deep-learning/). - - ##### Bug reports / contributions / donations are deeply welcome. -Bitcoin wallet address: 1QAoYw5Y3opvah2APf4jVcpD6UAHyC3k7s +Bitcoin wallet address: 34kZarc2uBU6BMCouUp2iudvZtbmZMPqrA