From 78611962735f3a9bb41d6dcc04d9f9423557fd1f Mon Sep 17 00:00:00 2001 From: Yusuke Sugomori Date: Tue, 25 Mar 2014 21:04:01 +0900 Subject: [PATCH 01/36] minor bug fix --- java/LogisticRegression/src/LogisticRegression.java | 2 +- scala/LogisticRegression.scala | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/java/LogisticRegression/src/LogisticRegression.java b/java/LogisticRegression/src/LogisticRegression.java index 8a13407..21e8f22 100644 --- a/java/LogisticRegression/src/LogisticRegression.java +++ b/java/LogisticRegression/src/LogisticRegression.java @@ -109,7 +109,7 @@ private static void test_lr() { {0, 0, 1, 1, 1, 0} }; - double[][] test_Y = new double[test_N][n_in]; + double[][] test_Y = new double[test_N][n_out]; // test diff --git a/scala/LogisticRegression.scala b/scala/LogisticRegression.scala index 2386f5e..bf9b229 100644 --- a/scala/LogisticRegression.scala +++ b/scala/LogisticRegression.scala @@ -113,7 +113,7 @@ object LogisticRegression { Array(0, 0, 1, 1, 1, 0) ) - val test_Y: Array[Array[Double]] = Array.ofDim[Double](test_N, n_in) + val test_Y: Array[Array[Double]] = Array.ofDim[Double](test_N, n_out) // test var j: Int = 0 From 15241eb8a1fa7ca3a487b83b458c4f4d77a80d0c Mon Sep 17 00:00:00 2001 From: Yusuke Sugomori Date: Sun, 7 Dec 2014 02:22:43 +0900 Subject: [PATCH 02/36] go --- c/LogisticRegression.c | 2 +- cpp/LogisticRegression.cpp | 20 +-- go/LogisticRegression.go | 150 ++++++++++++++++++ .../src/LogisticRegression.java | 2 +- 4 files changed, 153 insertions(+), 21 deletions(-) create mode 100644 go/LogisticRegression.go diff --git a/c/LogisticRegression.c b/c/LogisticRegression.c index 0c0c04a..b55c707 100644 --- a/c/LogisticRegression.c +++ b/c/LogisticRegression.c @@ -94,7 +94,7 @@ void test_lr(void) { int i, j, epoch; double learning_rate = 0.1; - double n_epochs = 500; + int n_epochs = 500; int train_N = 6; int test_N = 2; diff --git a/cpp/LogisticRegression.cpp b/cpp/LogisticRegression.cpp index 9eb8f24..6eca566 100644 --- a/cpp/LogisticRegression.cpp +++ b/cpp/LogisticRegression.cpp @@ -86,30 +86,12 @@ void test_lr() { srand(0); double learning_rate = 0.1; - double n_epochs = 500; + int n_epochs = 500; int train_N = 6; int test_N = 2; int n_in = 6; int n_out = 2; - // int **train_X; - // int **train_Y; - // int **test_X; - // double **test_Y; - - // train_X = new int*[train_N]; - // train_Y = new int*[train_N]; - // for(i=0; i Date: Sun, 7 Dec 2014 02:23:48 +0900 Subject: [PATCH 03/36] add go to readme --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 735e5d0..48c2f67 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -## Deep Learning (Python, C/C++, Java, Scala) +## Deep Learning (Python, C/C++, Java, Scala, Go) ### Classes : From a636ca8a2cd98a8a344b4f7ebeb3792a442e87a0 Mon Sep 17 00:00:00 2001 From: Yusuke Sugomori Date: Sun, 7 Dec 2014 02:34:12 +0900 Subject: [PATCH 04/36] untabify go --- go/LogisticRegression.go | 54 ++++++++++++++++++++-------------------- 1 file changed, 27 insertions(+), 27 deletions(-) diff --git a/go/LogisticRegression.go b/go/LogisticRegression.go index 00b8768..1103cff 100644 --- a/go/LogisticRegression.go +++ b/go/LogisticRegression.go @@ -113,38 +113,38 @@ func test_lr() { // construct LogisticRegression var classifier LogisticRegression - LogisticRegression__construct(&classifier, train_N, n_in, n_out) - - // train - for epoch := 0; epoch < n_epochs; epoch++ { - for i := 0; i < train_N; i++ { - LogisticRegression_train(&classifier, train_X[i], train_Y[i], learning_rate) - } - } - - // test data - test_X := [][]int { + LogisticRegression__construct(&classifier, train_N, n_in, n_out) + + // train + for epoch := 0; epoch < n_epochs; epoch++ { + for i := 0; i < train_N; i++ { + LogisticRegression_train(&classifier, train_X[i], train_Y[i], learning_rate) + } + } + + // test data + test_X := [][]int { {1, 0, 1, 0, 0, 0}, {0, 0, 1, 1, 1, 0}, - } - - test_Y := make([][]float64, test_N) - for i := 0; i < test_N; i++ { test_Y[i] = make([]float64, n_out) } - - - // test - for i := 0; i < test_N; i++ { - LogisticRegression_predict(&classifier, test_X[i], test_Y[i]) - for j := 0; j < n_out; j++ { - fmt.Printf("%f ", test_Y[i][j]) - } - fmt.Printf("\n") - } - + } + + test_Y := make([][]float64, test_N) + for i := 0; i < test_N; i++ { test_Y[i] = make([]float64, n_out) } + + + // test + for i := 0; i < test_N; i++ { + LogisticRegression_predict(&classifier, test_X[i], test_Y[i]) + for j := 0; j < n_out; j++ { + fmt.Printf("%f ", test_Y[i][j]) + } + fmt.Printf("\n") + } + } func main() { - test_lr() + test_lr() } From 9a09bc974637ef244bb743666e31795372b2fe70 Mon Sep 17 00:00:00 2001 From: Yusuke Sugomori Date: Sun, 7 Dec 2014 02:37:50 +0900 Subject: [PATCH 05/36] untabify go --- go/LogisticRegression.go | 152 +++++++++++++++++++-------------------- 1 file changed, 76 insertions(+), 76 deletions(-) diff --git a/go/LogisticRegression.go b/go/LogisticRegression.go index 1103cff..b1e9a65 100644 --- a/go/LogisticRegression.go +++ b/go/LogisticRegression.go @@ -1,118 +1,118 @@ package main import ( - "fmt" - "math" + "fmt" + "math" ) type LogisticRegression struct { - N int - n_in int - n_out int - W [][]float64 - b []float64 + N int + n_in int + n_out int + W [][]float64 + b []float64 } func LogisticRegression__construct(this *LogisticRegression, N int, n_in int, n_out int) { - this.N = N - this.n_in = n_in - this.n_out = n_out - - this.W = make([][]float64, n_out) - for i := 0; i < n_out; i++ { this.W[i] = make([]float64, n_in) } - - this.b = make([]float64, n_out) + this.N = N + this.n_in = n_in + this.n_out = n_out + + this.W = make([][]float64, n_out) + for i := 0; i < n_out; i++ { this.W[i] = make([]float64, n_in) } + + this.b = make([]float64, n_out) } func LogisticRegression_train(this *LogisticRegression, x []int, y []int, lr float64) { - p_y_given_x := make([]float64, this.n_out) - dy := make([]float64, this.n_out) - - for i := 0; i < this.n_out; i++ { - p_y_given_x[i] = 0 - for j := 0; j < this.n_in; j++ { - p_y_given_x[i] += this.W[i][j] * float64(x[j]) - } - p_y_given_x[i] += this.b[i] - } - LogisticRegression_softmax(this, p_y_given_x) - - for i := 0; i < this.n_out; i++ { - dy[i] = float64(y[i]) - p_y_given_x[i] - - for j := 0; j < this.n_in; j++ { - this.W[i][j] += lr * dy[i] * float64(x[j]) / float64(this.N) - } - - this.b[i] += lr * dy[i] / float64(this.N) - } - + p_y_given_x := make([]float64, this.n_out) + dy := make([]float64, this.n_out) + + for i := 0; i < this.n_out; i++ { + p_y_given_x[i] = 0 + for j := 0; j < this.n_in; j++ { + p_y_given_x[i] += this.W[i][j] * float64(x[j]) + } + p_y_given_x[i] += this.b[i] + } + LogisticRegression_softmax(this, p_y_given_x) + + for i := 0; i < this.n_out; i++ { + dy[i] = float64(y[i]) - p_y_given_x[i] + + for j := 0; j < this.n_in; j++ { + this.W[i][j] += lr * dy[i] * float64(x[j]) / float64(this.N) + } + + this.b[i] += lr * dy[i] / float64(this.N) + } + } func LogisticRegression_softmax(this *LogisticRegression, x []float64) { - var ( - max float64 - sum float64 - ) - - for i := 0; i < this.n_out; i++ { if max < x[i] {max = x[i]} } - for i := 0; i < this.n_out; i++ { - x[i] = math.Exp(x[i] - max) - sum += x[i] - } - - for i := 0; i < this.n_out; i++ { x[i] /= sum } + var ( + max float64 + sum float64 + ) + + for i := 0; i < this.n_out; i++ { if max < x[i] {max = x[i]} } + for i := 0; i < this.n_out; i++ { + x[i] = math.Exp(x[i] - max) + sum += x[i] + } + + for i := 0; i < this.n_out; i++ { x[i] /= sum } } func LogisticRegression_predict(this *LogisticRegression, x []int, y []float64) { - for i := 0; i < this.n_out; i++ { - y[i] = 0 - for j := 0; j < this.n_in; j++ { - y[i] += this.W[i][j] * float64(x[j]) - } - y[i] += this.b[i] - } - - LogisticRegression_softmax(this, y) + for i := 0; i < this.n_out; i++ { + y[i] = 0 + for j := 0; j < this.n_in; j++ { + y[i] += this.W[i][j] * float64(x[j]) + } + y[i] += this.b[i] + } + + LogisticRegression_softmax(this, y) } func test_lr() { - - learning_rate := 0.1 - n_epochs := 500 - - train_N := 6 - test_N := 2 - n_in := 6 - n_out := 2 - - - // training data - train_X := [][]int { + + learning_rate := 0.1 + n_epochs := 500 + + train_N := 6 + test_N := 2 + n_in := 6 + n_out := 2 + + + // training data + train_X := [][]int { {1, 1, 1, 0, 0, 0}, {1, 0, 1, 0, 0, 0}, {1, 1, 1, 0, 0, 0}, {0, 0, 1, 1, 1, 0}, {0, 0, 1, 1, 0, 0}, {0, 0, 1, 1, 1, 0}, - } + } - train_Y := [][]int { + train_Y := [][]int { {1, 0}, {1, 0}, {1, 0}, {0, 1}, {0, 1}, {0, 1}, - } + } - - // construct LogisticRegression - var classifier LogisticRegression + + // construct LogisticRegression + var classifier LogisticRegression LogisticRegression__construct(&classifier, train_N, n_in, n_out) // train From ee7e7b6c208fa14089e9cf66b090036baf69d474 Mon Sep 17 00:00:00 2001 From: Yusuke Sugomori Date: Sun, 7 Dec 2014 02:39:18 +0900 Subject: [PATCH 06/36] tabify go --- go/LogisticRegression.go | 240 +++++++++++++++++++-------------------- 1 file changed, 120 insertions(+), 120 deletions(-) diff --git a/go/LogisticRegression.go b/go/LogisticRegression.go index b1e9a65..cbc7e0e 100644 --- a/go/LogisticRegression.go +++ b/go/LogisticRegression.go @@ -1,150 +1,150 @@ package main import ( - "fmt" - "math" + "fmt" + "math" ) type LogisticRegression struct { - N int - n_in int - n_out int - W [][]float64 - b []float64 + N int + n_in int + n_out int + W [][]float64 + b []float64 } func LogisticRegression__construct(this *LogisticRegression, N int, n_in int, n_out int) { - this.N = N - this.n_in = n_in - this.n_out = n_out - - this.W = make([][]float64, n_out) - for i := 0; i < n_out; i++ { this.W[i] = make([]float64, n_in) } - - this.b = make([]float64, n_out) + this.N = N + this.n_in = n_in + this.n_out = n_out + + this.W = make([][]float64, n_out) + for i := 0; i < n_out; i++ { this.W[i] = make([]float64, n_in) } + + this.b = make([]float64, n_out) } func LogisticRegression_train(this *LogisticRegression, x []int, y []int, lr float64) { - p_y_given_x := make([]float64, this.n_out) - dy := make([]float64, this.n_out) - - for i := 0; i < this.n_out; i++ { - p_y_given_x[i] = 0 - for j := 0; j < this.n_in; j++ { - p_y_given_x[i] += this.W[i][j] * float64(x[j]) - } - p_y_given_x[i] += this.b[i] - } - LogisticRegression_softmax(this, p_y_given_x) - - for i := 0; i < this.n_out; i++ { - dy[i] = float64(y[i]) - p_y_given_x[i] - - for j := 0; j < this.n_in; j++ { - this.W[i][j] += lr * dy[i] * float64(x[j]) / float64(this.N) - } - - this.b[i] += lr * dy[i] / float64(this.N) - } - + p_y_given_x := make([]float64, this.n_out) + dy := make([]float64, this.n_out) + + for i := 0; i < this.n_out; i++ { + p_y_given_x[i] = 0 + for j := 0; j < this.n_in; j++ { + p_y_given_x[i] += this.W[i][j] * float64(x[j]) + } + p_y_given_x[i] += this.b[i] + } + LogisticRegression_softmax(this, p_y_given_x) + + for i := 0; i < this.n_out; i++ { + dy[i] = float64(y[i]) - p_y_given_x[i] + + for j := 0; j < this.n_in; j++ { + this.W[i][j] += lr * dy[i] * float64(x[j]) / float64(this.N) + } + + this.b[i] += lr * dy[i] / float64(this.N) + } + } func LogisticRegression_softmax(this *LogisticRegression, x []float64) { - var ( - max float64 - sum float64 - ) - - for i := 0; i < this.n_out; i++ { if max < x[i] {max = x[i]} } - for i := 0; i < this.n_out; i++ { - x[i] = math.Exp(x[i] - max) - sum += x[i] - } - - for i := 0; i < this.n_out; i++ { x[i] /= sum } + var ( + max float64 + sum float64 + ) + + for i := 0; i < this.n_out; i++ { if max < x[i] {max = x[i]} } + for i := 0; i < this.n_out; i++ { + x[i] = math.Exp(x[i] - max) + sum += x[i] + } + + for i := 0; i < this.n_out; i++ { x[i] /= sum } } func LogisticRegression_predict(this *LogisticRegression, x []int, y []float64) { - for i := 0; i < this.n_out; i++ { - y[i] = 0 - for j := 0; j < this.n_in; j++ { - y[i] += this.W[i][j] * float64(x[j]) - } - y[i] += this.b[i] - } - - LogisticRegression_softmax(this, y) + for i := 0; i < this.n_out; i++ { + y[i] = 0 + for j := 0; j < this.n_in; j++ { + y[i] += this.W[i][j] * float64(x[j]) + } + y[i] += this.b[i] + } + + LogisticRegression_softmax(this, y) } func test_lr() { - - learning_rate := 0.1 - n_epochs := 500 - - train_N := 6 - test_N := 2 - n_in := 6 - n_out := 2 - - - // training data - train_X := [][]int { - {1, 1, 1, 0, 0, 0}, - {1, 0, 1, 0, 0, 0}, - {1, 1, 1, 0, 0, 0}, - {0, 0, 1, 1, 1, 0}, - {0, 0, 1, 1, 0, 0}, - {0, 0, 1, 1, 1, 0}, - } - - - train_Y := [][]int { - {1, 0}, - {1, 0}, - {1, 0}, - {0, 1}, - {0, 1}, - {0, 1}, - } - - - // construct LogisticRegression - var classifier LogisticRegression - LogisticRegression__construct(&classifier, train_N, n_in, n_out) - - // train - for epoch := 0; epoch < n_epochs; epoch++ { - for i := 0; i < train_N; i++ { - LogisticRegression_train(&classifier, train_X[i], train_Y[i], learning_rate) - } - } - - // test data - test_X := [][]int { - {1, 0, 1, 0, 0, 0}, - {0, 0, 1, 1, 1, 0}, - } - - test_Y := make([][]float64, test_N) - for i := 0; i < test_N; i++ { test_Y[i] = make([]float64, n_out) } - - - // test - for i := 0; i < test_N; i++ { - LogisticRegression_predict(&classifier, test_X[i], test_Y[i]) - for j := 0; j < n_out; j++ { - fmt.Printf("%f ", test_Y[i][j]) - } - fmt.Printf("\n") - } - + + learning_rate := 0.1 + n_epochs := 500 + + train_N := 6 + test_N := 2 + n_in := 6 + n_out := 2 + + + // training data + train_X := [][]int { + {1, 1, 1, 0, 0, 0}, + {1, 0, 1, 0, 0, 0}, + {1, 1, 1, 0, 0, 0}, + {0, 0, 1, 1, 1, 0}, + {0, 0, 1, 1, 0, 0}, + {0, 0, 1, 1, 1, 0}, + } + + + train_Y := [][]int { + {1, 0}, + {1, 0}, + {1, 0}, + {0, 1}, + {0, 1}, + {0, 1}, + } + + + // construct LogisticRegression + var classifier LogisticRegression + LogisticRegression__construct(&classifier, train_N, n_in, n_out) + + // train + for epoch := 0; epoch < n_epochs; epoch++ { + for i := 0; i < train_N; i++ { + LogisticRegression_train(&classifier, train_X[i], train_Y[i], learning_rate) + } + } + + // test data + test_X := [][]int { + {1, 0, 1, 0, 0, 0}, + {0, 0, 1, 1, 1, 0}, + } + + test_Y := make([][]float64, test_N) + for i := 0; i < test_N; i++ { test_Y[i] = make([]float64, n_out) } + + + // test + for i := 0; i < test_N; i++ { + LogisticRegression_predict(&classifier, test_X[i], test_Y[i]) + for j := 0; j < n_out; j++ { + fmt.Printf("%f ", test_Y[i][j]) + } + fmt.Printf("\n") + } + } func main() { - test_lr() + test_lr() } From d2774d6f80a610c37a4b458dd13c8c7589fed25e Mon Sep 17 00:00:00 2001 From: Yusuke Sugomori Date: Sun, 7 Dec 2014 12:32:12 +0900 Subject: [PATCH 07/36] RBM.go --- go/RBM.go | 200 ++++++++++++++++++++++++++++++++++++++++++++++ go/utils/utils.go | 28 +++++++ 2 files changed, 228 insertions(+) create mode 100644 go/RBM.go create mode 100644 go/utils/utils.go diff --git a/go/RBM.go b/go/RBM.go new file mode 100644 index 0000000..6369da5 --- /dev/null +++ b/go/RBM.go @@ -0,0 +1,200 @@ +package main + +import ( + "fmt" + "math/rand" + u "./utils" +) + +type RBM struct { + N int + n_visible int + n_hidden int + W [][]float64 + hbias []float64 + vbias []float64 +} + + +func RBM__construct(this *RBM, N int, n_visible int, n_hidden int, W [][]float64, hbias []float64, vbias []float64) { + a := 1.0 / float64(n_visible) + + this.N = N + this.n_visible = n_visible + this.n_hidden = n_hidden + + if W == nil { + this.W = make([][]float64, n_hidden) + for i := 0; i < n_hidden; i++ { this.W[i] = make([]float64, n_visible) } + + for i := 0; i < n_hidden; i++ { + for j := 0; j < n_visible; j++ { + this.W[i][j] = u.Uniform(-a, a) + } + } + } else { + this.W = W + } + + if hbias == nil { + this.hbias = make([]float64, n_hidden) + } else { + this.hbias = hbias + } + + if vbias == nil { + this.vbias = make([]float64, n_visible) + } else { + this.vbias = vbias + } +} + +func RBM_contrastive_divergence(this *RBM, input []int, lr float64, k int) { + ph_mean := make([]float64, this.n_hidden) + ph_sample := make([]int, this.n_hidden) + nv_means := make([]float64, this.n_visible) + nv_samples := make([]int, this.n_visible) + nh_means := make([]float64, this.n_hidden) + nh_samples := make([]int, this.n_hidden) + + /* CD-k */ + RBM_sample_h_given_v(this, input, ph_mean, ph_sample) + + for step := 0; step < k; step++ { + if step == 0 { + RBM_gibbs_hvh(this, ph_sample, nv_means, nv_samples, nh_means, nh_samples) + } else { + RBM_gibbs_hvh(this, nh_samples, nv_means, nv_samples, nh_means, nh_samples) + } + } + + for i := 0; i < this.n_hidden; i++ { + for j := 0; j < this.n_visible; j++ { + this.W[i][j] += lr * (ph_mean[i] * float64(input[j]) - nh_means[i] * float64(nv_samples[j])) / float64(this.N) + } + this.hbias[i] += lr * (float64(ph_sample[i]) - nh_means[i]) / float64(this.N) + } + + for i := 0; i < this.n_visible; i++ { + this.vbias[i] += lr * float64(input[i] - nv_samples[i]) / float64(this.N) + } +} + +func RBM_sample_h_given_v(this *RBM, v0_sample []int, mean []float64, sample []int) { + for i := 0; i < this.n_hidden; i++ { + mean[i] = RBM_propup(this, v0_sample, this.W[i], this.hbias[i]) + sample[i] = u.Binomial(1, mean[i]) + } +} + +func RBM_sample_v_given_h(this *RBM, h0_sample []int, mean []float64, sample []int) { + for i := 0; i < this.n_visible; i++ { + mean[i] = RBM_propdown(this, h0_sample, i, this.vbias[i]) + sample[i] = u.Binomial(1, mean[i]) + } +} + +func RBM_propup(this *RBM, v []int, w []float64, b float64) float64 { + pre_sigmoid_activation := 0.0 + + for j := 0; j < this.n_visible; j++ { + pre_sigmoid_activation += w[j] * float64(v[j]) + } + pre_sigmoid_activation += b + + return u.Sigmoid(pre_sigmoid_activation) +} + +func RBM_propdown(this *RBM, h []int, i int, b float64) float64 { + pre_sigmoid_activation := 0.0 + + for j := 0; j < this.n_hidden; j++ { + pre_sigmoid_activation += this.W[j][i] * float64(h[j]) + } + pre_sigmoid_activation += b + + return u.Sigmoid(pre_sigmoid_activation) +} + +func RBM_gibbs_hvh(this *RBM, h0_sample []int, nv_means []float64, nv_samples []int, nh_means []float64, nh_samples []int) { + RBM_sample_v_given_h(this, h0_sample, nv_means, nv_samples) + RBM_sample_h_given_v(this, nv_samples, nh_means, nh_samples) +} + +func RBM_reconstruct(this *RBM, v []int, reconstructed_v []float64) { + h := make([]float64, this.n_hidden) + var pre_sigmoid_activation float64 + + for i := 0; i < this.n_hidden; i++ { + h[i] = RBM_propup(this, v, this.W[i], this.hbias[i]) + } + + for i := 0; i < this.n_visible; i++ { + pre_sigmoid_activation = 0.0 + for j := 0; j < this.n_hidden; j++ { + pre_sigmoid_activation += this.W[j][i] * h[j] + } + pre_sigmoid_activation += this.vbias[i] + + reconstructed_v[i] = u.Sigmoid(pre_sigmoid_activation) + } +} + + +func test_rbm() { + rand.Seed(0) + + learning_rate := 0.1 + training_epochs := 1000 + k := 1 + + train_N := 6 + test_N := 2 + n_visible := 6 + n_hidden := 3 + + // training data + train_X := [][]int { + {1, 1, 1, 0, 0, 0}, + {1, 0, 1, 0, 0, 0}, + {1, 1, 1, 0, 0, 0}, + {0, 0, 1, 1, 1, 0}, + {0, 0, 1, 0, 1, 0}, + {0, 0, 1, 1, 1, 0}, + } + + + // construct RBM + var rbm RBM + RBM__construct(&rbm, train_N, n_visible, n_hidden, nil, nil, nil) + + // train + for epoch := 0; epoch < training_epochs; epoch++ { + for i := 0; i < train_N; i++ { + RBM_contrastive_divergence(&rbm, train_X[i], learning_rate, k) + } + } + + // test data + test_X := [][]int { + {1, 1, 0, 0, 0, 0}, + {0, 0, 0, 1, 1, 0}, + } + reconstructed_X := make([][]float64, test_N) + for i := 0; i < test_N; i++ { reconstructed_X[i] = make([]float64, n_visible)} + + + // test + for i := 0; i < test_N; i++ { + RBM_reconstruct(&rbm, test_X[i], reconstructed_X[i]) + for j := 0; j < n_visible; j++ { + fmt.Printf("%.5f ", reconstructed_X[i][j]) + } + fmt.Printf("\n") + } +} + + +func main() { + test_rbm() +} diff --git a/go/utils/utils.go b/go/utils/utils.go new file mode 100644 index 0000000..44b3af2 --- /dev/null +++ b/go/utils/utils.go @@ -0,0 +1,28 @@ +package utils + +import ( + "math" + "math/rand" +) + +func Uniform(min float64, max float64) float64 { + return rand.Float64() * (max - min) + min +} + +func Binomial(n int, p float64) int { + if p < 0 || p > 1 { return 0 } + + c := 0 + var r float64 + + for i := 0; i < n; i++ { + r = rand.Float64() + if r < p { c++ } + } + + return c +} + +func Sigmoid(x float64) float64 { + return 1.0 / (1.0 + math.Exp(-x)) +} From 76a32007bfe02b607f1ecf278b9f0c6623f2b834 Mon Sep 17 00:00:00 2001 From: Yusuke Sugomori Date: Sun, 7 Dec 2014 15:21:19 +0900 Subject: [PATCH 08/36] DBN.go --- go/DBN.go | 237 ++++++++++++++++++++ go/HiddenLayer/HiddenLayer.go | 60 +++++ go/LogisticRegression/LogisticRegression.go | 77 +++++++ go/RBM/RBM.go | 139 ++++++++++++ 4 files changed, 513 insertions(+) create mode 100644 go/DBN.go create mode 100644 go/HiddenLayer/HiddenLayer.go create mode 100644 go/LogisticRegression/LogisticRegression.go create mode 100644 go/RBM/RBM.go diff --git a/go/DBN.go b/go/DBN.go new file mode 100644 index 0000000..498b473 --- /dev/null +++ b/go/DBN.go @@ -0,0 +1,237 @@ +package main + +import ( + "fmt" + "math/rand" + u "./utils" + H "./HiddenLayer" + R "./RBM" + L "./LogisticRegression" +) + +type DBN struct { + N int + n_ins int + hidden_layer_sizes []int + n_outs int + n_layers int + sigmoid_layers []H.HiddenLayer + rbm_layers []R.RBM + log_layer L.LogisticRegression +} + + +func DBN__construct(this *DBN, N int, n_ins int, hidden_layer_sizes []int, n_outs int, n_layers int) { + var input_size int + + this.N = N + this.n_ins = n_ins + this.hidden_layer_sizes = hidden_layer_sizes + this.n_outs = n_outs + this.n_layers = n_layers + + this.sigmoid_layers = make([]H.HiddenLayer, n_layers) + this.rbm_layers = make([]R.RBM, n_layers) + + // construct multi-layer + for i := 0; i < n_layers; i++ { + if i == 0 { + input_size = n_ins + } else { + input_size = hidden_layer_sizes[i-1] + } + + // construct sigmoid_layer + H.HiddenLayer__construct(&(this.sigmoid_layers[i]), N, input_size, hidden_layer_sizes[i], nil, nil) + + // construct rbm_layer + R.RBM__construct(&(this.rbm_layers[i]), N, input_size, hidden_layer_sizes[i], this.sigmoid_layers[i].W, this.sigmoid_layers[i].B, nil) + } + + // layer for output using LogisticRegression + L.LogisticRegression__construct(&(this.log_layer), N, hidden_layer_sizes[n_layers-1], n_outs) +} + +func DBN_pretrain(this *DBN, train_X [][]int, lr float64, k int, epochs int){ + var ( + layer_input []int + prev_layer_input_size int + prev_layer_input []int + ) + + + for i := 0; i < this.n_layers; i++ { // layer-wise + for epoch := 0; epoch < epochs; epoch++ { // training epochs + for n := 0; n < this.N; n++ { // input x1...xN + + // layer input + for l := 0; l <= i; l++ { + if l == 0 { + layer_input = make([]int, this.n_ins) + for j := 0; j < this.n_ins; j++ { layer_input[j] = train_X[n][j] } + } else { + if l == 1 { + prev_layer_input_size = this.n_ins + } else { + prev_layer_input_size = this.hidden_layer_sizes[l-2] + } + + prev_layer_input = make([]int, prev_layer_input_size) + for j := 0; j < prev_layer_input_size; j++ { prev_layer_input[j] = layer_input[j] } + + layer_input = make([]int, this.hidden_layer_sizes[l-1]) + + H.HiddenLayer_sample_h_given_v(&(this.sigmoid_layers[l-1]), prev_layer_input, layer_input) + } + } + + R.RBM_contrastive_divergence(&(this.rbm_layers[i]), layer_input, lr, k) + } + } + } +} + +func DBN_finetune(this *DBN, train_X [][]int, train_Y [][]int, lr float64, epochs int) { + var ( + layer_input []int + prev_layer_input []int + ) + + for epoch := 0; epoch < epochs; epoch++ { + for n := 0; n < this.N; n++ { // input x1...xN + + // layer input + for i := 0; i < this.n_layers; i++ { + if i == 0 { + prev_layer_input = make([]int, this.n_ins) + for j := 0; j < this.n_ins; j++ { prev_layer_input[j] = train_X[n][j] } + } else { + prev_layer_input = make([]int, this.hidden_layer_sizes[i-1]) + for j:= 0; j < this.hidden_layer_sizes[i-1]; j++ { prev_layer_input[j] = layer_input[j] } + } + + layer_input = make([]int, this.hidden_layer_sizes[i]) + H.HiddenLayer_sample_h_given_v(&(this.sigmoid_layers[i]), prev_layer_input, layer_input) + } + + L.LogisticRegression_train(&(this.log_layer), layer_input, train_Y[n], lr) + } + // lr *= 0.95 + } +} + +func DBN_predict(this *DBN, x []int, y []float64) { + var ( + layer_input []float64 + ) + prev_layer_input := make([]float64, this.n_ins) + for j := 0; j < this.n_ins; j++ { prev_layer_input[j] = float64(x[j]) } + + + // layer activation + for i := 0; i < this.n_layers; i++ { + layer_input = make([]float64, this.sigmoid_layers[i].N_out) + + for k := 0; k < this.sigmoid_layers[i].N_out; k++ { + linear_outuput := 0.0 + + for j := 0; j < this.sigmoid_layers[i].N_in; j++ { + linear_outuput += this.sigmoid_layers[i].W[k][j] * prev_layer_input[j] + } + linear_outuput += this.sigmoid_layers[i].B[k] + layer_input[k] = u.Sigmoid(linear_outuput) + } + + if i < this.n_layers-1 { + prev_layer_input = make([]float64, this.sigmoid_layers[i].N_out) + + for j := 0; j < this.sigmoid_layers[i].N_out; j++ { + prev_layer_input[j] = layer_input[j] + } + } + } + + for i := 0; i < this.log_layer.N_out; i++ { + y[i] = 0 + for j := 0; j < this.log_layer.N_in; j++ { + y[i] += this.log_layer.W[i][j] * layer_input[j] + } + y[i] += this.log_layer.B[i] + } + + L.LogisticRegression_softmax(&(this.log_layer), y) +} + + +func test_dbn() { + rand.Seed(0) + + pretrain_lr := 0.1 + pretraining_epochs := 1000 + k := 1 + fintune_lr := 0.1 + fintune_epochs := 500 + + train_N := 6 + test_N := 4 + n_ins := 6 + n_outs := 2 + hidden_layer_sizes := []int {3, 3} + n_layers := len(hidden_layer_sizes) + + + // training data + train_X := [][]int { + {1, 1, 1, 0, 0, 0}, + {1, 0, 1, 0, 0, 0}, + {1, 1, 1, 0, 0, 0}, + {0, 0, 1, 1, 1, 0}, + {0, 0, 1, 1, 0, 0}, + {0, 0, 1, 1, 1, 0}, + } + + train_Y := [][]int { + {1, 0}, + {1, 0}, + {1, 0}, + {0, 1}, + {0, 1}, + {0, 1}, + } + + // construct DBN + var dbn DBN + DBN__construct(&dbn, train_N, n_ins, hidden_layer_sizes, n_outs, n_layers) + + // pretrain + DBN_pretrain(&dbn, train_X, pretrain_lr, k, pretraining_epochs) + + // finetune + DBN_finetune(&dbn, train_X, train_Y, fintune_lr, fintune_epochs) + + // test data + test_X := [][]int { + {1, 1, 0, 0, 0, 0}, + {1, 1, 1, 1, 0, 0}, + {0, 0, 0, 1, 1, 0}, + {0, 0, 1, 1, 1, 0}, + } + + test_Y := make([][]float64, test_N) + for i := 0; i < test_N; i++ { test_Y[i] = make([]float64, n_outs)} + + // test + for i := 0; i < test_N; i++ { + DBN_predict(&dbn, test_X[i], test_Y[i]) + for j := 0; j < n_outs; j++ { + fmt.Printf("%.5f ", test_Y[i][j]) + } + fmt.Printf("\n") + } +} + + + +func main() { + test_dbn() +} diff --git a/go/HiddenLayer/HiddenLayer.go b/go/HiddenLayer/HiddenLayer.go new file mode 100644 index 0000000..995ca44 --- /dev/null +++ b/go/HiddenLayer/HiddenLayer.go @@ -0,0 +1,60 @@ +package HiddenLayer + +import ( + u "../utils" +) + + +type HiddenLayer struct { + N int + N_in int + N_out int + W [][]float64 + B []float64 +} + + +// HiddenLayer +func HiddenLayer__construct(this *HiddenLayer, N int, n_in int, n_out int, W [][]float64, b []float64) { + a := 1.0 / float64(n_in) + + this.N = N + this.N_in = n_in + this.N_out = n_out + + if W == nil { + this.W = make([][]float64, n_out) + for i := 0; i < n_out; i++ { this.W[i] = make([]float64, n_in) } + + for i := 0; i < n_out; i++ { + for j := 0; j < n_in; j++ { + this.W[i][j] = u.Uniform(-a, a) + } + } + } else { + this.W = W + } + + if b == nil { + this.B = make([]float64, n_out) + } else { + this.B = b + } +} + +func HiddenLayer_output(this *HiddenLayer, input []int, w []float64, b float64) float64 { + linear_output := 0.0 + + for j := 0; j < this.N_in; j++ { + linear_output += w[j] * float64(input[j]) + } + linear_output += b + + return u.Sigmoid(linear_output) +} + +func HiddenLayer_sample_h_given_v(this *HiddenLayer, input []int, sample []int) { + for i := 0; i < this.N_out; i++ { + sample[i] = u.Binomial(1, HiddenLayer_output(this, input, this.W[i], this.B[i])) + } +} diff --git a/go/LogisticRegression/LogisticRegression.go b/go/LogisticRegression/LogisticRegression.go new file mode 100644 index 0000000..2f68ef5 --- /dev/null +++ b/go/LogisticRegression/LogisticRegression.go @@ -0,0 +1,77 @@ +package LogisticRegression + +import ( + "math" +) + +type LogisticRegression struct { + N int + N_in int + N_out int + W [][]float64 + B []float64 +} + + +func LogisticRegression__construct(this *LogisticRegression, N int, n_in int, n_out int) { + this.N = N + this.N_in = n_in + this.N_out = n_out + + this.W = make([][]float64, n_out) + for i := 0; i < n_out; i++ { this.W[i] = make([]float64, n_in) } + + this.B = make([]float64, n_out) +} + +func LogisticRegression_train(this *LogisticRegression, x []int, y []int, lr float64) { + p_y_given_x := make([]float64, this.N_out) + dy := make([]float64, this.N_out) + + for i := 0; i < this.N_out; i++ { + p_y_given_x[i] = 0 + for j := 0; j < this.N_in; j++ { + p_y_given_x[i] += this.W[i][j] * float64(x[j]) + } + p_y_given_x[i] += this.B[i] + } + LogisticRegression_softmax(this, p_y_given_x) + + for i := 0; i < this.N_out; i++ { + dy[i] = float64(y[i]) - p_y_given_x[i] + + for j := 0; j < this.N_in; j++ { + this.W[i][j] += lr * dy[i] * float64(x[j]) / float64(this.N) + } + + this.B[i] += lr * dy[i] / float64(this.N) + } + +} + +func LogisticRegression_softmax(this *LogisticRegression, x []float64) { + var ( + max float64 + sum float64 + ) + + for i := 0; i < this.N_out; i++ { if max < x[i] {max = x[i]} } + for i := 0; i < this.N_out; i++ { + x[i] = math.Exp(x[i] - max) + sum += x[i] + } + + for i := 0; i < this.N_out; i++ { x[i] /= sum } +} + +func LogisticRegression_predict(this *LogisticRegression, x []int, y []float64) { + for i := 0; i < this.N_out; i++ { + y[i] = 0 + for j := 0; j < this.N_in; j++ { + y[i] += this.W[i][j] * float64(x[j]) + } + y[i] += this.B[i] + } + + LogisticRegression_softmax(this, y) +} diff --git a/go/RBM/RBM.go b/go/RBM/RBM.go new file mode 100644 index 0000000..708f8b7 --- /dev/null +++ b/go/RBM/RBM.go @@ -0,0 +1,139 @@ +package RBM + +import ( + u "../utils" +) + +type RBM struct { + N int + n_visible int + n_hidden int + W [][]float64 + hbias []float64 + vbias []float64 +} + + +func RBM__construct(this *RBM, N int, n_visible int, n_hidden int, W [][]float64, hbias []float64, vbias []float64) { + a := 1.0 / float64(n_visible) + + this.N = N + this.n_visible = n_visible + this.n_hidden = n_hidden + + if W == nil { + this.W = make([][]float64, n_hidden) + for i := 0; i < n_hidden; i++ { this.W[i] = make([]float64, n_visible) } + + for i := 0; i < n_hidden; i++ { + for j := 0; j < n_visible; j++ { + this.W[i][j] = u.Uniform(-a, a) + } + } + } else { + this.W = W + } + + if hbias == nil { + this.hbias = make([]float64, n_hidden) + } else { + this.hbias = hbias + } + + if vbias == nil { + this.vbias = make([]float64, n_visible) + } else { + this.vbias = vbias + } +} + +func RBM_contrastive_divergence(this *RBM, input []int, lr float64, k int) { + ph_mean := make([]float64, this.n_hidden) + ph_sample := make([]int, this.n_hidden) + nv_means := make([]float64, this.n_visible) + nv_samples := make([]int, this.n_visible) + nh_means := make([]float64, this.n_hidden) + nh_samples := make([]int, this.n_hidden) + + /* CD-k */ + RBM_sample_h_given_v(this, input, ph_mean, ph_sample) + + for step := 0; step < k; step++ { + if step == 0 { + RBM_gibbs_hvh(this, ph_sample, nv_means, nv_samples, nh_means, nh_samples) + } else { + RBM_gibbs_hvh(this, nh_samples, nv_means, nv_samples, nh_means, nh_samples) + } + } + + for i := 0; i < this.n_hidden; i++ { + for j := 0; j < this.n_visible; j++ { + this.W[i][j] += lr * (ph_mean[i] * float64(input[j]) - nh_means[i] * float64(nv_samples[j])) / float64(this.N) + } + this.hbias[i] += lr * (float64(ph_sample[i]) - nh_means[i]) / float64(this.N) + } + + for i := 0; i < this.n_visible; i++ { + this.vbias[i] += lr * float64(input[i] - nv_samples[i]) / float64(this.N) + } +} + +func RBM_sample_h_given_v(this *RBM, v0_sample []int, mean []float64, sample []int) { + for i := 0; i < this.n_hidden; i++ { + mean[i] = RBM_propup(this, v0_sample, this.W[i], this.hbias[i]) + sample[i] = u.Binomial(1, mean[i]) + } +} + +func RBM_sample_v_given_h(this *RBM, h0_sample []int, mean []float64, sample []int) { + for i := 0; i < this.n_visible; i++ { + mean[i] = RBM_propdown(this, h0_sample, i, this.vbias[i]) + sample[i] = u.Binomial(1, mean[i]) + } +} + +func RBM_propup(this *RBM, v []int, w []float64, b float64) float64 { + pre_sigmoid_activation := 0.0 + + for j := 0; j < this.n_visible; j++ { + pre_sigmoid_activation += w[j] * float64(v[j]) + } + pre_sigmoid_activation += b + + return u.Sigmoid(pre_sigmoid_activation) +} + +func RBM_propdown(this *RBM, h []int, i int, b float64) float64 { + pre_sigmoid_activation := 0.0 + + for j := 0; j < this.n_hidden; j++ { + pre_sigmoid_activation += this.W[j][i] * float64(h[j]) + } + pre_sigmoid_activation += b + + return u.Sigmoid(pre_sigmoid_activation) +} + +func RBM_gibbs_hvh(this *RBM, h0_sample []int, nv_means []float64, nv_samples []int, nh_means []float64, nh_samples []int) { + RBM_sample_v_given_h(this, h0_sample, nv_means, nv_samples) + RBM_sample_h_given_v(this, nv_samples, nh_means, nh_samples) +} + +func RBM_reconstruct(this *RBM, v []int, reconstructed_v []float64) { + h := make([]float64, this.n_hidden) + var pre_sigmoid_activation float64 + + for i := 0; i < this.n_hidden; i++ { + h[i] = RBM_propup(this, v, this.W[i], this.hbias[i]) + } + + for i := 0; i < this.n_visible; i++ { + pre_sigmoid_activation = 0.0 + for j := 0; j < this.n_hidden; j++ { + pre_sigmoid_activation += this.W[j][i] * h[j] + } + pre_sigmoid_activation += this.vbias[i] + + reconstructed_v[i] = u.Sigmoid(pre_sigmoid_activation) + } +} From daded50a016a085dcfe8f7c43b881c344ac4a342 Mon Sep 17 00:00:00 2001 From: Yusuke Sugomori Date: Sat, 13 Dec 2014 21:43:14 +0900 Subject: [PATCH 09/36] dA.go --- go/dA.go | 195 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 195 insertions(+) create mode 100644 go/dA.go diff --git a/go/dA.go b/go/dA.go new file mode 100644 index 0000000..3b14c6c --- /dev/null +++ b/go/dA.go @@ -0,0 +1,195 @@ +package main + +import ( + "os" + "fmt" + "math/rand" + u "./utils" +) + +type dA struct { + N int + n_visible int + n_hidden int + W [][]float64 + hbias []float64 + vbias []float64 +} + + +func dA__construct(this *dA, N int, n_visible int, n_hidden int, W [][]float64, hbias []float64, vbias []float64) { + a := 1.0 / float64(n_visible) + + this.N = N + this.n_visible = n_visible + this.n_hidden = n_hidden + + if W == nil { + this.W = make([][]float64, n_hidden) + for i := 0; i < n_hidden; i++ { this.W[i] = make([]float64, n_visible) } + + for i := 0; i < n_hidden; i++ { + for j := 0; j < n_visible; j++ { + this.W[i][j] = u.Uniform(-a, a) + } + } + } else { + this.W = W + } + + if hbias == nil { + this.hbias = make([]float64, n_hidden) + } else { + this.hbias = hbias + } + + if vbias == nil { + this.vbias = make([]float64, n_visible) + } else { + this.vbias = vbias + } +} + +func dA_get_corrupted_input(this *dA, x []int, tilde_x []int, p float64) { + for i := 0; i < this.n_visible; i++ { + if x[i] == 0 { + tilde_x[i] = 0 + } else { + tilde_x[i] = u.Binomial(1, p) + } + } +} + +// Encode +func dA_get_hidden_values(this *dA, x []int, y []float64) { + for i := 0; i < this.n_hidden; i++ { + y[i] = 0 + for j := 0; j < this.n_visible; j++ { + y[i] += this.W[i][j] * float64(x[j]) + } + y[i] += this.hbias[i] + y[i] = u.Sigmoid(y[i]) + } +} + +// Decode +func dA_get_reconstructed_input(this *dA, y []float64, z []float64) { + for i := 0; i < this.n_visible; i++ { + z[i] = 0 + for j := 0; j < this.n_hidden; j++ { + z[i] += this.W[j][i] * y[j] + } + z[i] += this.vbias[i] + z[i] = u.Sigmoid(z[i]) + } +} + +func dA_train(this *dA, x []int, lr float64, corruption_level float64) { + tilde_x := make([]int, this.n_visible) + y := make([]float64, this.n_hidden) + z := make([]float64, this.n_visible) + + L_vbias := make([]float64, this.n_visible) + L_hbias := make([]float64, this.n_hidden) + + p := 1 - corruption_level + + dA_get_corrupted_input(this, x, tilde_x, p) + dA_get_hidden_values(this, tilde_x, y) + dA_get_reconstructed_input(this, y, z) + + // vbias + for i := 0; i < this.n_visible; i++ { + L_vbias[i] = float64(x[i]) - z[i] + this.vbias[i] += lr * L_vbias[i] / float64(this.N) + } + + // hbias + for i := 0; i < this.n_hidden; i++ { + L_hbias[i] = 0 + for j := 0; j < this.n_visible; j++ { + L_hbias[i] += this.W[i][j] * L_vbias[j] + } + L_hbias[i] *= y[i] * (1- y[i]) + this.hbias[i] += lr * L_hbias[i] / float64(this.N) + } + + // W + for i := 0; i < this.n_hidden; i++ { + for j := 0; j < this.n_visible; j++ { + this.W[i][j] += lr * (L_hbias[i] * float64(tilde_x[j]) + L_vbias[j] * y[i]) / float64(this.N) + } + } +} + +func dA_reconstruct(this *dA, x []int, z []float64) { + y := make([]float64, this.n_hidden) + + dA_get_hidden_values(this, x, y) + dA_get_reconstructed_input(this, y, z) +} + + + + +func test_dA() { + rand.Seed(0) + + learning_rate := 0.1 + corruption_level := 0.3 + training_epochs := 1000 + + train_N := 6 + test_N := 2 + n_visible := 20 + n_hidden := 5 + + // training data + train_X := [][]int { + {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0}, + } + + // construct dA + var da dA + dA__construct(&da, train_N, n_visible, n_hidden, nil, nil, nil) + + // train + for epoch := 0; epoch < training_epochs; epoch++ { + for i := 0; i < train_N; i++ { + dA_train(&da, train_X[i], learning_rate, corruption_level) + } + } + + // test data + test_X := [][]int { + {1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0}, + } + reconstructed_X := make([][]float64, test_N) + for i := 0; i < test_N; i++ { reconstructed_X[i] = make([]float64, n_visible)} + + + // test + for i := 0; i < test_N; i++ { + dA_reconstruct(&da, test_X[i], reconstructed_X[i]) + for j := 0; j < n_visible; j++ { + fmt.Printf("%.5f ", reconstructed_X[i][j]) + } + fmt.Printf("\n") + } + os.Exit(0) +} + +func main() { + test_dA() + +} From 3dfb34a81b0b31f4a440634f6f5f5e4d8feccbc5 Mon Sep 17 00:00:00 2001 From: Yusuke Sugomori Date: Sat, 13 Dec 2014 21:44:26 +0900 Subject: [PATCH 10/36] minor fix --- go/dA.go | 3 --- 1 file changed, 3 deletions(-) diff --git a/go/dA.go b/go/dA.go index 3b14c6c..a36c226 100644 --- a/go/dA.go +++ b/go/dA.go @@ -1,7 +1,6 @@ package main import ( - "os" "fmt" "math/rand" u "./utils" @@ -186,10 +185,8 @@ func test_dA() { } fmt.Printf("\n") } - os.Exit(0) } func main() { test_dA() - } From 50a09be13f219008bae939fb2ef173994044ca51 Mon Sep 17 00:00:00 2001 From: Yusuke Sugomori Date: Sun, 14 Dec 2014 16:55:49 +0900 Subject: [PATCH 11/36] SdA.go --- go/DBN.go | 6 +- go/SdA.go | 241 ++++++++++++++++++++++++++++++++++++++++++++++++++++ go/dA/dA.go | 128 ++++++++++++++++++++++++++++ 3 files changed, 372 insertions(+), 3 deletions(-) create mode 100644 go/SdA.go create mode 100644 go/dA/dA.go diff --git a/go/DBN.go b/go/DBN.go index 498b473..7962a5a 100644 --- a/go/DBN.go +++ b/go/DBN.go @@ -169,8 +169,8 @@ func test_dbn() { pretrain_lr := 0.1 pretraining_epochs := 1000 k := 1 - fintune_lr := 0.1 - fintune_epochs := 500 + finetune_lr := 0.1 + finetune_epochs := 500 train_N := 6 test_N := 4 @@ -207,7 +207,7 @@ func test_dbn() { DBN_pretrain(&dbn, train_X, pretrain_lr, k, pretraining_epochs) // finetune - DBN_finetune(&dbn, train_X, train_Y, fintune_lr, fintune_epochs) + DBN_finetune(&dbn, train_X, train_Y, finstune_lr, finetune_epochs) // test data test_X := [][]int { diff --git a/go/SdA.go b/go/SdA.go new file mode 100644 index 0000000..27ccaf6 --- /dev/null +++ b/go/SdA.go @@ -0,0 +1,241 @@ +package main + +import ( + "fmt" + "math/rand" + u "./utils" + H "./HiddenLayer" + D "./dA" + L "./LogisticRegression" +) + +type SdA struct { + N int + n_ins int + hidden_layer_sizes []int + n_outs int + n_layers int + sigmoid_layers []H.HiddenLayer + dA_layers []D.DA + log_layer L.LogisticRegression +} + + +func SdA__construct(this *SdA, N int, n_ins int, hidden_layer_sizes []int, n_outs int, n_layers int) { + var input_size int + + this.N = N + this.n_ins = n_ins + this.hidden_layer_sizes = hidden_layer_sizes + this.n_outs = n_outs + this.n_layers = n_layers + + this.sigmoid_layers = make([]H.HiddenLayer, n_layers) + this.dA_layers = make([]D.DA, n_layers) + + // construct multi-layer + for i := 0; i < n_layers; i++ { + if i == 0 { + input_size = n_ins + } else { + input_size = hidden_layer_sizes[i-1] + } + + // construct sigmoid_layer + H.HiddenLayer__construct(&(this.sigmoid_layers[i]), N, input_size, hidden_layer_sizes[i], nil, nil) + + // construct dA_layer + D.DA__construct(&(this.dA_layers[i]), N, input_size, hidden_layer_sizes[i], this.sigmoid_layers[i].W, this.sigmoid_layers[i].B, nil) + } + + // layer for output using LogisticRegression + L.LogisticRegression__construct(&(this.log_layer), N, hidden_layer_sizes[n_layers-1], n_outs) +} + +func SdA_pretrain(this *SdA, train_X [][]int, lr float64, corruption_level float64, epochs int) { + var ( + layer_input []int + prev_layer_input_size int + prev_layer_input []int + ) + for i := 0; i < this.n_layers; i++ { // layer-wise + for epoch := 0; epoch < epochs; epoch++ { // training epochs + for n := 0; n < this.N; n++ { // input x1...xN + + // layer input + for l := 0; l <= i; l++ { + if l == 0 { + layer_input = make([]int, this.n_ins) + for j := 0; j < this.n_ins; j++ { layer_input[j] = train_X[n][j] } + } else { + if l == 1 { + prev_layer_input_size = this.n_ins + } else { + prev_layer_input_size = this.hidden_layer_sizes[l-2] + } + + prev_layer_input = make([]int, prev_layer_input_size) + for j := 0; j < prev_layer_input_size; j++ { prev_layer_input[j] = layer_input[j] } + + layer_input = make([]int, this.hidden_layer_sizes[l-1]) + + H.HiddenLayer_sample_h_given_v(&(this.sigmoid_layers[l-1]), prev_layer_input, layer_input) + } + } + + D.DA_train(&(this.dA_layers[i]), layer_input, lr, corruption_level) + } + } + } +} + +func SdA_finetune(this *SdA, train_X [][]int, train_Y [][]int, lr float64, epochs int) { + var ( + layer_input []int + prev_layer_input []int + ) + + for epoch := 0; epoch < epochs; epoch++ { + for n := 0; n < this.N; n++ { // input x1...xN + + // layer input + for i := 0; i < this.n_layers; i++ { + if i == 0 { + prev_layer_input = make([]int, this.n_ins) + for j := 0; j < this.n_ins; j++ { prev_layer_input[j] = train_X[n][j] } + } else { + prev_layer_input = make([]int, this.hidden_layer_sizes[i-1]) + for j:= 0; j < this.hidden_layer_sizes[i-1]; j++ { prev_layer_input[j] = layer_input[j] } + } + + layer_input = make([]int, this.hidden_layer_sizes[i]) + H.HiddenLayer_sample_h_given_v(&(this.sigmoid_layers[i]), prev_layer_input, layer_input) + } + + L.LogisticRegression_train(&(this.log_layer), layer_input, train_Y[n], lr) + } + // lr *= 0.95 + } +} + +func SdA_predict(this *SdA, x []int, y []float64) { + var ( + layer_input []float64 + ) + prev_layer_input := make([]float64, this.n_ins) + for j := 0; j < this.n_ins; j++ { prev_layer_input[j] = float64(x[j]) } + + // layer activation + for i := 0; i < this.n_layers; i++ { + layer_input = make([]float64, this.sigmoid_layers[i].N_out) + + for k := 0; k < this.sigmoid_layers[i].N_out; k++ { + linear_outuput := 0.0 + + for j := 0; j < this.sigmoid_layers[i].N_in; j++ { + linear_outuput += this.sigmoid_layers[i].W[k][j] * prev_layer_input[j] + } + linear_outuput += this.sigmoid_layers[i].B[k] + layer_input[k] = u.Sigmoid(linear_outuput) + } + + if i < this.n_layers-1 { + prev_layer_input = make([]float64, this.sigmoid_layers[i].N_out) + + for j := 0; j < this.sigmoid_layers[i].N_out; j++ { + prev_layer_input[j] = layer_input[j] + } + } + } + + for i := 0; i < this.log_layer.N_out; i++ { + y[i] = 0 + for j := 0; j < this.log_layer.N_in; j++ { + y[i] += this.log_layer.W[i][j] * layer_input[j] + } + y[i] += this.log_layer.B[i] + } + + L.LogisticRegression_softmax(&(this.log_layer), y) +} + +func test_SdA() { + rand.Seed(0) + + pretrain_lr := 0.1 + corruption_level := 0.3 + pretraining_epochs := 1000 + finetune_lr := 0.1 + finetune_epochs := 500 + + train_N := 10 + test_N := 4 + n_ins := 28 + n_outs := 2 + hidden_layer_sizes := []int {15, 15} + n_layers := len(hidden_layer_sizes) + + + // training data + train_X := [][]int { + {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1}, + } + + train_Y := [][]int { + {1, 0}, + {1, 0}, + {1, 0}, + {1, 0}, + {1, 0}, + {0, 1}, + {0, 1}, + {0, 1}, + {0, 1}, + {0, 1}, + } + + // construct SdA + var sda SdA + SdA__construct(&sda, train_N, n_ins, hidden_layer_sizes, n_outs, n_layers) + + // pretrain + SdA_pretrain(&sda, train_X, pretrain_lr, corruption_level, pretraining_epochs) + + // finetune + SdA_finetune(&sda, train_X, train_Y, finetune_lr, finetune_epochs) + + + // test data + test_X := [][]int { + {1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1}, + } + + test_Y := make([][]float64, test_N) + for i := 0; i < test_N; i++ { test_Y[i] = make([]float64, n_outs)} + + // test + for i := 0; i < test_N; i++ { + SdA_predict(&sda, test_X[i], test_Y[i]) + for j := 0; j < n_outs; j++ { + fmt.Printf("%.5f ", test_Y[i][j]) + } + fmt.Printf("\n") + } +} + + +func main() { + test_SdA() +} diff --git a/go/dA/dA.go b/go/dA/dA.go new file mode 100644 index 0000000..b41d1ce --- /dev/null +++ b/go/dA/dA.go @@ -0,0 +1,128 @@ +package dA + +import ( + u "../utils" +) + + +type DA struct { + N int + n_visible int + n_hidden int + W [][]float64 + hbias []float64 + vbias []float64 +} + + +func DA__construct(this *DA, N int, n_visible int, n_hidden int, W [][]float64, hbias []float64, vbias []float64) { + a := 1.0 / float64(n_visible) + + this.N = N + this.n_visible = n_visible + this.n_hidden = n_hidden + + if W == nil { + this.W = make([][]float64, n_hidden) + for i := 0; i < n_hidden; i++ { this.W[i] = make([]float64, n_visible) } + + for i := 0; i < n_hidden; i++ { + for j := 0; j < n_visible; j++ { + this.W[i][j] = u.Uniform(-a, a) + } + } + } else { + this.W = W + } + + if hbias == nil { + this.hbias = make([]float64, n_hidden) + } else { + this.hbias = hbias + } + + if vbias == nil { + this.vbias = make([]float64, n_visible) + } else { + this.vbias = vbias + } +} + +func dA_get_corrupted_input(this *DA, x []int, tilde_x []int, p float64) { + for i := 0; i < this.n_visible; i++ { + if x[i] == 0 { + tilde_x[i] = 0 + } else { + tilde_x[i] = u.Binomial(1, p) + } + } +} + +// Encode +func dA_get_hidden_values(this *DA, x []int, y []float64) { + for i := 0; i < this.n_hidden; i++ { + y[i] = 0 + for j := 0; j < this.n_visible; j++ { + y[i] += this.W[i][j] * float64(x[j]) + } + y[i] += this.hbias[i] + y[i] = u.Sigmoid(y[i]) + } +} + +// Decode +func dA_get_reconstructed_input(this *DA, y []float64, z []float64) { + for i := 0; i < this.n_visible; i++ { + z[i] = 0 + for j := 0; j < this.n_hidden; j++ { + z[i] += this.W[j][i] * y[j] + } + z[i] += this.vbias[i] + z[i] = u.Sigmoid(z[i]) + } +} + +func DA_train(this *DA, x []int, lr float64, corruption_level float64) { + tilde_x := make([]int, this.n_visible) + y := make([]float64, this.n_hidden) + z := make([]float64, this.n_visible) + + L_vbias := make([]float64, this.n_visible) + L_hbias := make([]float64, this.n_hidden) + + p := 1 - corruption_level + + dA_get_corrupted_input(this, x, tilde_x, p) + dA_get_hidden_values(this, tilde_x, y) + dA_get_reconstructed_input(this, y, z) + + // vbias + for i := 0; i < this.n_visible; i++ { + L_vbias[i] = float64(x[i]) - z[i] + this.vbias[i] += lr * L_vbias[i] / float64(this.N) + } + + // hbias + for i := 0; i < this.n_hidden; i++ { + L_hbias[i] = 0 + for j := 0; j < this.n_visible; j++ { + L_hbias[i] += this.W[i][j] * L_vbias[j] + } + L_hbias[i] *= y[i] * (1- y[i]) + this.hbias[i] += lr * L_hbias[i] / float64(this.N) + } + + // W + for i := 0; i < this.n_hidden; i++ { + for j := 0; j < this.n_visible; j++ { + this.W[i][j] += lr * (L_hbias[i] * float64(tilde_x[j]) + L_vbias[j] * y[i]) / float64(this.N) + } + } +} + +func dA_reconstruct(this *DA, x []int, z []float64) { + y := make([]float64, this.n_hidden) + + dA_get_hidden_values(this, x, y) + dA_get_reconstructed_input(this, y, z) +} From 9a8f85cd32bc0a50747d56fc7e684a50bdf2c9f0 Mon Sep 17 00:00:00 2001 From: Yusuke Sugomori Date: Sun, 14 Dec 2014 16:56:52 +0900 Subject: [PATCH 12/36] fix typo --- go/DBN.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/go/DBN.go b/go/DBN.go index 7962a5a..e5522a6 100644 --- a/go/DBN.go +++ b/go/DBN.go @@ -207,7 +207,7 @@ func test_dbn() { DBN_pretrain(&dbn, train_X, pretrain_lr, k, pretraining_epochs) // finetune - DBN_finetune(&dbn, train_X, train_Y, finstune_lr, finetune_epochs) + DBN_finetune(&dbn, train_X, train_Y, finetune_lr, finetune_epochs) // test data test_X := [][]int { From c02375dfe8db3080075ded85fa7d6338812f025b Mon Sep 17 00:00:00 2001 From: Yusuke Sugomori Date: Thu, 25 Jun 2015 09:46:48 +0200 Subject: [PATCH 13/36] added MLP --- python/CDBN.py | 23 +++------- python/CRBM.py | 15 +------ python/DBN.py | 41 +++--------------- python/HiddenLayer.py | 60 +++++++++++++++----------- python/LogisticRegression.py | 21 +++------- python/MLP.py | 81 ++++++++++++++++++++++++++++++++++++ python/RBM.py | 33 ++++----------- python/SdA.py | 25 +++-------- python/dA.py | 37 ++++------------ python/utils.py | 11 +++-- 10 files changed, 163 insertions(+), 184 deletions(-) create mode 100755 python/MLP.py diff --git a/python/CDBN.py b/python/CDBN.py index 4ac987a..dbf6648 100755 --- a/python/CDBN.py +++ b/python/CDBN.py @@ -1,16 +1,5 @@ -#!/usr/bin/env python # -*- coding: utf-8 -*- -''' - DBN w/ continuous-valued inputs (Linear Energy) - - References : - - Y. Bengio, P. Lamblin, D. Popovici, H. Larochelle: Greedy Layer-Wise - Training of Deep Networks, Advances in Neural Information Processing - Systems 19, 2007 - -''' - import sys import numpy from HiddenLayer import HiddenLayer @@ -20,13 +9,11 @@ from DBN import DBN from utils import * - - class CDBN(DBN): def __init__(self, input=None, label=None,\ n_ins=2, hidden_layer_sizes=[3, 3], n_outs=2,\ - numpy_rng=None): + rng=None): self.x = input self.y = label @@ -35,8 +22,8 @@ def __init__(self, input=None, label=None,\ self.rbm_layers = [] self.n_layers = len(hidden_layer_sizes) # = len(self.rbm_layers) - if numpy_rng is None: - numpy_rng = numpy.random.RandomState(1234) + if rng is None: + rng = numpy.random.RandomState(1234) assert self.n_layers > 0 @@ -60,7 +47,7 @@ def __init__(self, input=None, label=None,\ sigmoid_layer = HiddenLayer(input=layer_input, n_in=input_size, n_out=hidden_layer_sizes[i], - numpy_rng=numpy_rng, + rng=rng, activation=sigmoid) self.sigmoid_layers.append(sigmoid_layer) @@ -113,7 +100,7 @@ def test_cdbn(pretrain_lr=0.1, pretraining_epochs=1000, k=1, \ rng = numpy.random.RandomState(123) # construct DBN - dbn = CDBN(input=x, label=y, n_ins=6, hidden_layer_sizes=[5, 5], n_outs=2, numpy_rng=rng) + dbn = CDBN(input=x, label=y, n_ins=6, hidden_layer_sizes=[5, 5], n_outs=2, rng=rng) # pre-training (TrainUnsupervisedDBN) dbn.pretrain(lr=pretrain_lr, k=1, epochs=pretraining_epochs) diff --git a/python/CRBM.py b/python/CRBM.py index 0521883..e870047 100755 --- a/python/CRBM.py +++ b/python/CRBM.py @@ -1,16 +1,5 @@ -#!/usr/bin/env python # -*- coding: utf-8 -*- -""" - RBM w/ continuous-valued inputs (Linear Energy) - - References : - - Y. Bengio, P. Lamblin, D. Popovici, H. Larochelle: Greedy Layer-Wise - Training of Deep Networks, Advances in Neural Information Processing - Systems 19, 2007 - -""" - import sys import numpy from RBM import RBM @@ -30,7 +19,7 @@ def sample_v_given_h(self, h0_sample): ep = numpy.exp(a_h) v1_mean = 1 / (1 - en) - 1 / a_h - U = numpy.array(self.numpy_rng.uniform( + U = numpy.array(self.rng.uniform( low=0, high=1, size=v1_mean.shape)) @@ -53,7 +42,7 @@ def test_crbm(learning_rate=0.1, k=1, training_epochs=1000): rng = numpy.random.RandomState(123) # construct CRBM - rbm = CRBM(input=data, n_visible=6, n_hidden=5, numpy_rng=rng) + rbm = CRBM(input=data, n_visible=6, n_hidden=5, rng=rng) # train for epoch in xrange(training_epochs): diff --git a/python/DBN.py b/python/DBN.py index f639823..b1b351b 100755 --- a/python/DBN.py +++ b/python/DBN.py @@ -1,20 +1,5 @@ -#!/usr/bin/env python # -*- coding: utf-8 -*- -''' - Deep Belief Nets (DBN) - - References : - - Y. Bengio, P. Lamblin, D. Popovici, H. Larochelle: Greedy Layer-Wise - Training of Deep Networks, Advances in Neural Information Processing - Systems 19, 2007 - - - - DeepLearningTutorials - https://github.com/lisa-lab/DeepLearningTutorials - -''' - import sys import numpy from HiddenLayer import HiddenLayer @@ -26,7 +11,7 @@ class DBN(object): def __init__(self, input=None, label=None,\ n_ins=2, hidden_layer_sizes=[3, 3], n_outs=2,\ - numpy_rng=None): + rng=None): self.x = input self.y = label @@ -35,8 +20,8 @@ def __init__(self, input=None, label=None,\ self.rbm_layers = [] self.n_layers = len(hidden_layer_sizes) # = len(self.rbm_layers) - if numpy_rng is None: - numpy_rng = numpy.random.RandomState(1234) + if rng is None: + rng = numpy.random.RandomState(1234) assert self.n_layers > 0 @@ -60,7 +45,7 @@ def __init__(self, input=None, label=None,\ sigmoid_layer = HiddenLayer(input=layer_input, n_in=input_size, n_out=hidden_layer_sizes[i], - numpy_rng=numpy_rng, + rng=rng, activation=sigmoid) self.sigmoid_layers.append(sigmoid_layer) @@ -100,21 +85,6 @@ def pretrain(self, lr=0.1, k=1, epochs=100): # print >> sys.stderr, \ # 'Pre-training layer %d, epoch %d, cost ' %(i, epoch), cost - # def pretrain(self, lr=0.1, k=1, epochs=100): - # # pre-train layer-wise - # for i in xrange(self.n_layers): - # rbm = self.rbm_layers[i] - - # for epoch in xrange(epochs): - # layer_input = self.x - # for j in xrange(i): - # layer_input = self.sigmoid_layers[j].sample_h_given_v(layer_input) - - # rbm.contrastive_divergence(lr=lr, k=k, input=layer_input) - # # cost = rbm.get_reconstruction_cross_entropy() - # # print >> sys.stderr, \ - # # 'Pre-training layer %d, epoch %d, cost ' %(i, epoch), cost - def finetune(self, lr=0.1, epochs=100): layer_input = self.sigmoid_layers[-1].sample_h_given_v() @@ -158,12 +128,11 @@ def test_dbn(pretrain_lr=0.1, pretraining_epochs=1000, k=1, \ [0, 1], [0, 1], [0, 1]]) - rng = numpy.random.RandomState(123) # construct DBN - dbn = DBN(input=x, label=y, n_ins=6, hidden_layer_sizes=[3, 3], n_outs=2, numpy_rng=rng) + dbn = DBN(input=x, label=y, n_ins=6, hidden_layer_sizes=[3, 3], n_outs=2, rng=rng) # pre-training (TrainUnsupervisedDBN) dbn.pretrain(lr=pretrain_lr, k=1, epochs=pretraining_epochs) diff --git a/python/HiddenLayer.py b/python/HiddenLayer.py index 72e51e1..4130b35 100755 --- a/python/HiddenLayer.py +++ b/python/HiddenLayer.py @@ -1,15 +1,5 @@ -#!/usr/bin/env python # -*- coding: utf-8 -*- -''' - Hidden Layer - - References : - - DeepLearningTutorials - https://github.com/lisa-lab/DeepLearningTutorials - -''' - import sys import numpy from utils import * @@ -17,38 +7,42 @@ class HiddenLayer(object): def __init__(self, input, n_in, n_out,\ - W=None, b=None, numpy_rng=None, activation=numpy.tanh): + W=None, b=None, rng=None, activation=numpy.tanh): - if numpy_rng is None: - numpy_rng = numpy.random.RandomState(1234) + if rng is None: + rng = numpy.random.RandomState(1234) if W is None: a = 1. / n_in - initial_W = numpy.array(numpy_rng.uniform( # initialize W uniformly + W = numpy.array(rng.uniform( # initialize W uniformly low=-a, high=a, size=(n_in, n_out))) - W = initial_W - if b is None: b = numpy.zeros(n_out) # initialize bias 0 - - self.numpy_rng = numpy_rng - self.input = input + self.rng = rng + self.x = input self.W = W self.b = b + if activation == numpy.tanh: + self.dactivation = dtanh + elif activation == sigmoid: + self.dactivation = dsigmoid + else: + raise ValueError('activation function not supported.') + self.activation = activation + - # self.params = [self.W, self.b] def output(self, input=None): if input is not None: - self.input = input + self.x = input - linear_output = numpy.dot(self.input, self.W) + self.b + linear_output = numpy.dot(self.x, self.W) + self.b return (linear_output if self.activation is None else self.activation(linear_output)) @@ -56,10 +50,28 @@ def output(self, input=None): def sample_h_given_v(self, input=None): if input is not None: - self.input = input + self.x = input v_mean = self.output() - h_sample = self.numpy_rng.binomial(size=v_mean.shape, + h_sample = self.rng.binomial(size=v_mean.shape, n=1, p=v_mean) return h_sample + + + + def forward(self, input=None): + return self.output(input=input) + + + def backward(self, prev_layer, lr=0.1, input=None): + if input is not None: + self.x = input + + # d_y = (1 - prev_layer.x * prev_layer.x) * numpy.dot(prev_layer.d_y, prev_layer.W.T) + d_y = self.dactivation(prev_layer.x) * numpy.dot(prev_layer.d_y, prev_layer.W.T) + + self.W += lr * numpy.dot(self.x.T, d_y) + self.b += lr * numpy.mean(d_y, axis=0) + + self.d_y = d_y diff --git a/python/LogisticRegression.py b/python/LogisticRegression.py index a828a40..f1a686d 100755 --- a/python/LogisticRegression.py +++ b/python/LogisticRegression.py @@ -1,18 +1,5 @@ -#!/usr/bin/env python # -*- coding: utf-8 -*- -''' - Logistic Regression - - References : - - Jason Rennie: Logistic Regression, - http://qwone.com/~jason/writing/lr.pdf - - - DeepLearningTutorials - https://github.com/lisa-lab/DeepLearningTutorials - -''' - import sys import numpy from utils import * @@ -25,7 +12,6 @@ def __init__(self, input, label, n_in, n_out): self.W = numpy.zeros((n_in, n_out)) # initialize W 0 self.b = numpy.zeros(n_out) # initialize bias 0 - # self.params = [self.W, self.b] def train(self, lr=0.1, input=None, L2_reg=0.00): if input is not None: @@ -37,6 +23,8 @@ def train(self, lr=0.1, input=None, L2_reg=0.00): self.W += lr * numpy.dot(self.x.T, d_y) - lr * L2_reg * self.W self.b += lr * numpy.mean(d_y, axis=0) + + self.d_y = d_y # cost = self.negative_log_likelihood() # return cost @@ -57,6 +45,9 @@ def predict(self, x): # return sigmoid(numpy.dot(x, self.W) + self.b) return softmax(numpy.dot(x, self.W) + self.b) + def output(self, x): + return self.predict(x) + def test_lr(learning_rate=0.01, n_epochs=200): # training data @@ -90,7 +81,7 @@ def test_lr(learning_rate=0.01, n_epochs=200): [0, 0, 0, 1, 1, 0], [1, 1, 1, 1, 1, 0]]) - print >> sys.stderr, classifier.predict(x) + print classifier.predict(x) if __name__ == "__main__": diff --git a/python/MLP.py b/python/MLP.py new file mode 100755 index 0000000..def6ab7 --- /dev/null +++ b/python/MLP.py @@ -0,0 +1,81 @@ +# -*- coding: utf-8 -*- + +import sys +import numpy +from HiddenLayer import HiddenLayer +from LogisticRegression import LogisticRegression +from utils import * + + +class MLP(object): + def __init__(self, input, label, n_in, n_hidden, n_out, rng=None): + + self.x = input + self.y = label + + if rng is None: + rng = numpy.random.RandomState(1234) + + # construct hidden_layer (tanh, sigmoid, etc...) + self.hidden_layer = HiddenLayer(input=self.x, + n_in=n_in, + n_out=n_hidden, + rng=rng, + activation=numpy.tanh) + + # construct log_layer (softmax) + self.log_layer = LogisticRegression(input=self.hidden_layer.output, + label=self.y, + n_in=n_hidden, + n_out=n_out) + + def train(self): + layer_input = self.hidden_layer.forward() + # print self.hidden_layer.W + + self.log_layer.train(input=layer_input) + self.hidden_layer.backward(prev_layer=self.log_layer) + + + def predict(self, x): + x = self.hidden_layer.output(x) + return self.log_layer.predict(x) + + +def test_mlp(n_epochs=100): + + x = numpy.array([[1,1,1,0,0,0], + [1,0,1,0,0,0], + [1,1,1,0,0,0], + [0,0,1,1,1,0], + [0,0,1,1,0,0], + [0,0,1,1,1,0]]) + y = numpy.array([[1, 0], + [1, 0], + [1, 0], + [0, 1], + [0, 1], + [0, 1]]) + + + rng = numpy.random.RandomState(123) + + + # construct MLP + classifier = MLP(input=x, label=y, n_in=6, n_hidden=15, n_out=2, rng=rng) + + # train + for epoch in xrange(n_epochs): + classifier.train() + + + # test + x = numpy.array([[1, 1, 0, 0, 0, 0], + [0, 0, 0, 1, 1, 0], + [1, 1, 1, 1, 1, 0]]) + + print classifier.predict(x) + + +if __name__ == "__main__": + test_mlp() diff --git a/python/RBM.py b/python/RBM.py index 781241d..4a1be8e 100755 --- a/python/RBM.py +++ b/python/RBM.py @@ -1,38 +1,23 @@ -#!/usr/bin/env python # -*- coding: utf-8 -*- -""" - Restricted Boltzmann Machine (RBM) - - References : - - Y. Bengio, P. Lamblin, D. Popovici, H. Larochelle: Greedy Layer-Wise - Training of Deep Networks, Advances in Neural Information Processing - Systems 19, 2007 - - - - DeepLearningTutorials - https://github.com/lisa-lab/DeepLearningTutorials - -""" - import sys import numpy from utils import * class RBM(object): def __init__(self, input=None, n_visible=2, n_hidden=3, \ - W=None, hbias=None, vbias=None, numpy_rng=None): + W=None, hbias=None, vbias=None, rng=None): self.n_visible = n_visible # num of units in visible (input) layer self.n_hidden = n_hidden # num of units in hidden layer - if numpy_rng is None: - numpy_rng = numpy.random.RandomState(1234) + if rng is None: + rng = numpy.random.RandomState(1234) if W is None: a = 1. / n_visible - initial_W = numpy.array(numpy_rng.uniform( # initialize W uniformly + initial_W = numpy.array(rng.uniform( # initialize W uniformly low=-a, high=a, size=(n_visible, n_hidden))) @@ -46,14 +31,12 @@ def __init__(self, input=None, n_visible=2, n_hidden=3, \ vbias = numpy.zeros(n_visible) # initialize v bias 0 - self.numpy_rng = numpy_rng + self.rng = rng self.input = input self.W = W self.hbias = hbias self.vbias = vbias - # self.params = [self.W, self.hbias, self.vbias] - def contrastive_divergence(self, lr=0.1, k=1, input=None): if input is not None: @@ -86,7 +69,7 @@ def contrastive_divergence(self, lr=0.1, k=1, input=None): def sample_h_given_v(self, v0_sample): h1_mean = self.propup(v0_sample) - h1_sample = self.numpy_rng.binomial(size=h1_mean.shape, # discrete: binomial + h1_sample = self.rng.binomial(size=h1_mean.shape, # discrete: binomial n=1, p=h1_mean) @@ -95,7 +78,7 @@ def sample_h_given_v(self, v0_sample): def sample_v_given_h(self, h0_sample): v1_mean = self.propdown(h0_sample) - v1_sample = self.numpy_rng.binomial(size=v1_mean.shape, # discrete: binomial + v1_sample = self.rng.binomial(size=v1_mean.shape, # discrete: binomial n=1, p=v1_mean) @@ -153,7 +136,7 @@ def test_rbm(learning_rate=0.1, k=1, training_epochs=1000): rng = numpy.random.RandomState(123) # construct RBM - rbm = RBM(input=data, n_visible=6, n_hidden=2, numpy_rng=rng) + rbm = RBM(input=data, n_visible=6, n_hidden=2, rng=rng) # train for epoch in xrange(training_epochs): diff --git a/python/SdA.py b/python/SdA.py index 4a0f45a..3d38d0f 100755 --- a/python/SdA.py +++ b/python/SdA.py @@ -1,20 +1,5 @@ -#!/usr/bin/env python # -*- coding: utf-8 -*- - -""" - Stacked Denoising Autoencoders (SdA) - - References : - - P. Vincent, H. Larochelle, Y. Bengio, P.A. Manzagol: Extracting and - Composing Robust Features with Denoising Autoencoders, ICML' 08, 1096-1103, - 2008 - - - DeepLearningTutorials - https://github.com/lisa-lab/DeepLearningTutorials - -""" - import sys import numpy from HiddenLayer import HiddenLayer @@ -26,7 +11,7 @@ class SdA(object): def __init__(self, input=None, label=None,\ n_ins=2, hidden_layer_sizes=[3, 3], n_outs=2,\ - numpy_rng=None): + rng=None): self.x = input self.y = label @@ -35,8 +20,8 @@ def __init__(self, input=None, label=None,\ self.dA_layers = [] self.n_layers = len(hidden_layer_sizes) # = len(self.rbm_layers) - if numpy_rng is None: - numpy_rng = numpy.random.RandomState(1234) + if rng is None: + rng = numpy.random.RandomState(1234) assert self.n_layers > 0 @@ -60,7 +45,7 @@ def __init__(self, input=None, label=None,\ sigmoid_layer = HiddenLayer(input=layer_input, n_in=input_size, n_out=hidden_layer_sizes[i], - numpy_rng=numpy_rng, + rng=rng, activation=sigmoid) self.sigmoid_layers.append(sigmoid_layer) @@ -152,7 +137,7 @@ def test_SdA(pretrain_lr=0.1, pretraining_epochs=1000, corruption_level=0.3, \ # construct SdA sda = SdA(input=x, label=y, \ - n_ins=20, hidden_layer_sizes=[15, 15], n_outs=2, numpy_rng=rng) + n_ins=20, hidden_layer_sizes=[15, 15], n_outs=2, rng=rng) # pre-training sda.pretrain(lr=pretrain_lr, corruption_level=corruption_level, epochs=pretraining_epochs) diff --git a/python/dA.py b/python/dA.py index 0b911eb..edbf6c7 100755 --- a/python/dA.py +++ b/python/dA.py @@ -1,23 +1,5 @@ -#!/usr/bin/env python # -*- coding: utf-8 -*- -""" - Denoising Autoencoders (dA) - - References : - - P. Vincent, H. Larochelle, Y. Bengio, P.A. Manzagol: Extracting and - Composing Robust Features with Denoising Autoencoders, ICML'08, 1096-1103, - 2008 - - - DeepLearningTutorials - https://github.com/lisa-lab/DeepLearningTutorials - - - Yusuke Sugomori: Stochastic Gradient Descent for Denoising Autoencoders, - http://yusugomori.com/docs/SGD_DA.pdf - -""" - - import sys import numpy from utils import * @@ -25,44 +7,39 @@ class dA(object): def __init__(self, input=None, n_visible=2, n_hidden=3, \ - W=None, hbias=None, vbias=None, numpy_rng=None): + W=None, hbias=None, vbias=None, rng=None): self.n_visible = n_visible # num of units in visible (input) layer self.n_hidden = n_hidden # num of units in hidden layer - if numpy_rng is None: - numpy_rng = numpy.random.RandomState(1234) + if rng is None: + rng = numpy.random.RandomState(1234) if W is None: a = 1. / n_visible - initial_W = numpy.array(numpy_rng.uniform( # initialize W uniformly + W = numpy.array(rng.uniform( # initialize W uniformly low=-a, high=a, size=(n_visible, n_hidden))) - W = initial_W - if hbias is None: hbias = numpy.zeros(n_hidden) # initialize h bias 0 if vbias is None: vbias = numpy.zeros(n_visible) # initialize v bias 0 - self.numpy_rng = numpy_rng + self.rng = rng self.x = input self.W = W self.W_prime = self.W.T self.hbias = hbias self.vbias = vbias - # self.params = [self.W, self.hbias, self.vbias] - - def get_corrupted_input(self, input, corruption_level): assert corruption_level < 1 - return self.numpy_rng.binomial(size=input.shape, + return self.rng.binomial(size=input.shape, n=1, p=1-corruption_level) * input @@ -133,7 +110,7 @@ def test_dA(learning_rate=0.1, corruption_level=0.3, training_epochs=50): rng = numpy.random.RandomState(123) # construct dA - da = dA(input=data, n_visible=20, n_hidden=5, numpy_rng=rng) + da = dA(input=data, n_visible=20, n_hidden=5, rng=rng) # train for epoch in xrange(training_epochs): diff --git a/python/utils.py b/python/utils.py index 5c4a748..e17f203 100755 --- a/python/utils.py +++ b/python/utils.py @@ -1,7 +1,5 @@ -''' ''' -import numpy - +import numpy numpy.seterr(all='ignore') @@ -9,6 +7,13 @@ def sigmoid(x): return 1. / (1 + numpy.exp(-x)) +def dsigmoid(x): + return x * (1. - x) + +def dtanh(x): + return 1. - x * x + + def softmax(x): e = numpy.exp(x - numpy.max(x)) # prevent overflow if e.ndim == 1: From f4ef9e5a8c63c368fb9a99b130ae9657aaf1ed3f Mon Sep 17 00:00:00 2001 From: Yusuke Sugomori Date: Thu, 25 Jun 2015 09:48:36 +0200 Subject: [PATCH 14/36] minor fix MLP --- python/MLP.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/python/MLP.py b/python/MLP.py index def6ab7..3ba7425 100755 --- a/python/MLP.py +++ b/python/MLP.py @@ -16,7 +16,7 @@ def __init__(self, input, label, n_in, n_hidden, n_out, rng=None): if rng is None: rng = numpy.random.RandomState(1234) - # construct hidden_layer (tanh, sigmoid, etc...) + # construct hidden_layer (tanh or sigmoid so far) self.hidden_layer = HiddenLayer(input=self.x, n_in=n_in, n_out=n_hidden, @@ -31,8 +31,6 @@ def __init__(self, input, label, n_in, n_hidden, n_out, rng=None): def train(self): layer_input = self.hidden_layer.forward() - # print self.hidden_layer.W - self.log_layer.train(input=layer_input) self.hidden_layer.backward(prev_layer=self.log_layer) From 37aadf1d27ba356703716afd80c28410fd2d1c34 Mon Sep 17 00:00:00 2001 From: Yusuke Sugomori Date: Sun, 2 Aug 2015 21:45:34 -0400 Subject: [PATCH 15/36] add Dropout with ReLU --- python/Dropout.py | 160 ++++++++++++++++++++++++++++++++++++++++++ python/HiddenLayer.py | 26 ++++++- python/MLP.py | 4 +- python/SdA.py | 4 +- python/utils.py | 11 ++- 5 files changed, 197 insertions(+), 8 deletions(-) create mode 100755 python/Dropout.py diff --git a/python/Dropout.py b/python/Dropout.py new file mode 100755 index 0000000..3edf3fe --- /dev/null +++ b/python/Dropout.py @@ -0,0 +1,160 @@ +# -*- coding: utf-8 -*- + +import sys +import numpy +from HiddenLayer import HiddenLayer +from LogisticRegression import LogisticRegression +from utils import * + + +class Dropout(object): + def __init__(self, input, label,\ + n_in, hidden_layer_sizes, n_out,\ + rng=None, activation=ReLU): + + self.x = input + self.y = label + + self.hidden_layers = [] + self.n_layers = len(hidden_layer_sizes) + + if rng is None: + rng = numpy.random.RandomState(1234) + + assert self.n_layers > 0 + + + # construct multi-layer + for i in xrange(self.n_layers): + + # layer_size + if i == 0: + input_size = n_in + else: + input_size = hidden_layer_sizes[i-1] + + # layer_input + if i == 0: + layer_input = self.x + + else: + layer_input = self.hidden_layers[-1].output() + + # construct hidden_layer + hidden_layer = HiddenLayer(input=layer_input, + n_in=input_size, + n_out=hidden_layer_sizes[i], + rng=rng, + activation=activation) + + self.hidden_layers.append(hidden_layer) + + + # layer for ouput using Logistic Regression (softmax) + self.log_layer = LogisticRegression(input=self.hidden_layers[-1].output(), + label=self.y, + n_in=hidden_layer_sizes[-1], + n_out=n_out) + + + def train(self, epochs=500, dropout=True, p_dropout=0.5, rng=None): + + for epoch in xrange(epochs): + dropout_masks = [] # create different masks in each training epoch + + # forward hidden_layers + for i in xrange(self.n_layers): + if i == 0: + layer_input = self.x + + layer_input = self.hidden_layers[i].forward(input=layer_input) + + if dropout == True: + mask = self.hidden_layers[i].dropout(input=layer_input, p=p_dropout, rng=rng) + layer_input *= mask + + dropout_masks.append(mask) + + + # forward & backward log_layer + self.log_layer.train(input=layer_input) + + + # backward hidden_layers + for i in reversed(xrange(0, self.n_layers)): + if i == self.n_layers-1: + prev_layer = self.log_layer + else: + prev_layer = self.hidden_layers[i+1] + + self.hidden_layers[i].backward(prev_layer=prev_layer) + + if dropout == True: + self.hidden_layers[i].d_y *= dropout_masks[i] # also mask here + + + def predict(self, x, dropout=True, p_dropout=0.5): + layer_input = x + + for i in xrange(self.n_layers): + if dropout == True: + self.hidden_layers[i].W = p_dropout * self.hidden_layers[i].W + self.hidden_layers[i].b = p_dropout * self.hidden_layers[i].b + + layer_input = self.hidden_layers[i].output(input=layer_input) + + return self.log_layer.predict(layer_input) + + + +def test_dropout(n_epochs=500, dropout=True, p_dropout=0.5): + + x = numpy.array([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0]]) + + y = numpy.array([[1, 0], + [1, 0], + [1, 0], + [1, 0], + [1, 0], + [0, 1], + [0, 1], + [0, 1], + [0, 1], + [0, 1]]) + + + rng = numpy.random.RandomState(123) + + + # construct Dropout MLP + classifier = Dropout(input=x, label=y, \ + n_in=20, hidden_layer_sizes=[12, 12], n_out=2, \ + rng=rng, activation=ReLU) + + + # train + classifier.train(epochs=n_epochs, dropout=dropout, \ + p_dropout=p_dropout, rng=rng) + + + # test + x = numpy.array([[1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1]]) + + print classifier.predict(x) + + + +if __name__ == "__main__": + test_dropout() diff --git a/python/HiddenLayer.py b/python/HiddenLayer.py index 4130b35..f7a0731 100755 --- a/python/HiddenLayer.py +++ b/python/HiddenLayer.py @@ -7,7 +7,7 @@ class HiddenLayer(object): def __init__(self, input, n_in, n_out,\ - W=None, b=None, rng=None, activation=numpy.tanh): + W=None, b=None, rng=None, activation=tanh): if rng is None: rng = numpy.random.RandomState(1234) @@ -24,15 +24,22 @@ def __init__(self, input, n_in, n_out,\ self.rng = rng self.x = input + self.W = W self.b = b - if activation == numpy.tanh: + if activation == tanh: self.dactivation = dtanh + elif activation == sigmoid: self.dactivation = dsigmoid + + elif activation == ReLU: + self.dactivation = dReLU + else: raise ValueError('activation function not supported.') + self.activation = activation @@ -68,10 +75,23 @@ def backward(self, prev_layer, lr=0.1, input=None): if input is not None: self.x = input - # d_y = (1 - prev_layer.x * prev_layer.x) * numpy.dot(prev_layer.d_y, prev_layer.W.T) d_y = self.dactivation(prev_layer.x) * numpy.dot(prev_layer.d_y, prev_layer.W.T) self.W += lr * numpy.dot(self.x.T, d_y) self.b += lr * numpy.mean(d_y, axis=0) self.d_y = d_y + + + def dropout(self, input, p, rng=None): + if rng is None: + rng = numpy.random.RandomState(123) + + mask = rng.binomial(size=input.shape, + n=1, + p=1-p) # p is the prob of dropping + + return mask + + + diff --git a/python/MLP.py b/python/MLP.py index 3ba7425..0e02f2a 100755 --- a/python/MLP.py +++ b/python/MLP.py @@ -21,7 +21,7 @@ def __init__(self, input, label, n_in, n_hidden, n_out, rng=None): n_in=n_in, n_out=n_hidden, rng=rng, - activation=numpy.tanh) + activation=tanh) # construct log_layer (softmax) self.log_layer = LogisticRegression(input=self.hidden_layer.output, @@ -36,7 +36,7 @@ def train(self): def predict(self, x): - x = self.hidden_layer.output(x) + x = self.hidden_layer.output(input=x) return self.log_layer.predict(x) diff --git a/python/SdA.py b/python/SdA.py index 3d38d0f..5f8de37 100755 --- a/python/SdA.py +++ b/python/SdA.py @@ -103,8 +103,8 @@ def predict(self, x): sigmoid_layer = self.sigmoid_layers[i] layer_input = sigmoid_layer.output(input=layer_input) - out = self.log_layer.predict(layer_input) - return out + return self.log_layer.predict(layer_input) + diff --git a/python/utils.py b/python/utils.py index e17f203..7aca40d 100755 --- a/python/utils.py +++ b/python/utils.py @@ -10,10 +10,12 @@ def sigmoid(x): def dsigmoid(x): return x * (1. - x) +def tanh(x): + return numpy.tanh(x) + def dtanh(x): return 1. - x * x - def softmax(x): e = numpy.exp(x - numpy.max(x)) # prevent overflow if e.ndim == 1: @@ -22,6 +24,13 @@ def softmax(x): return e / numpy.array([numpy.sum(e, axis=1)]).T # ndim = 2 +def ReLU(x): + return x * (x > 0) + +def dReLU(x): + return 1. * (x > 0) + + # # probability density for the Gaussian dist # def gaussian(x, mean=0.0, scale=1.0): # s = 2 * numpy.power(scale, 2) From 4aa12f1ca8cc58e0d232e721335110d604fce59d Mon Sep 17 00:00:00 2001 From: Yusuke Sugomori Date: Sun, 2 Aug 2015 21:47:59 -0400 Subject: [PATCH 16/36] Update README.md --- README.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 48c2f67..14e17c5 100644 --- a/README.md +++ b/README.md @@ -17,6 +17,10 @@ - LogisticRegression: Logistic Regression - HiddenLayer: Hidden Layer of Neural Networks + + - MLP: Multiple Layer Perceptron + + - Dropout: Dropout MLP @@ -38,4 +42,4 @@ -### Bug reports are deeply welcome. \ No newline at end of file +### Bug reports are deeply welcome. From d31d4f51cdead6e4d0ae0a24225c3f79b7fa7c5f Mon Sep 17 00:00:00 2001 From: Yusuke Sugomori Date: Mon, 3 Aug 2015 23:12:15 -0400 Subject: [PATCH 17/36] bug fix --- python/Dropout.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/python/Dropout.py b/python/Dropout.py index 3edf3fe..feb594e 100755 --- a/python/Dropout.py +++ b/python/Dropout.py @@ -98,8 +98,7 @@ def predict(self, x, dropout=True, p_dropout=0.5): for i in xrange(self.n_layers): if dropout == True: - self.hidden_layers[i].W = p_dropout * self.hidden_layers[i].W - self.hidden_layers[i].b = p_dropout * self.hidden_layers[i].b + self.hidden_layers[i].W = (1 - p_dropout) * self.hidden_layers[i].W layer_input = self.hidden_layers[i].output(input=layer_input) From bb903c6791c83851c725c6e8689edea83a0122df Mon Sep 17 00:00:00 2001 From: Yusuke Sugomori Date: Tue, 4 Aug 2015 22:09:01 -0400 Subject: [PATCH 18/36] clean up java --- .gitignore | 4 + java/DBN/src/DBN.java | 223 ----------------- java/DBN/src/HiddenLayer.java | 74 ------ java/DBN/src/LogisticRegression.java | 73 ------ java/DBN/src/RBM.java | 164 ------------- .../src/LogisticRegression.java | 128 ---------- java/RBM/src/RBM.java | 221 ----------------- java/SdA/src/HiddenLayer.java | 74 ------ java/SdA/src/LogisticRegression.java | 73 ------ java/SdA/src/SdA.java | 230 ------------------ java/SdA/src/dA.java | 150 ------------ java/dA/src/dA.java | 207 ---------------- java/src/DeepLearning/DBN.java | 222 +++++++++++++++++ java/src/DeepLearning/HiddenLayer.java | 54 ++++ java/src/DeepLearning/LogisticRegression.java | 129 ++++++++++ java/src/DeepLearning/RBM.java | 203 ++++++++++++++++ java/src/DeepLearning/SdA.java | 230 ++++++++++++++++++ java/src/DeepLearning/dA.java | 187 ++++++++++++++ java/src/DeepLearning/utils.java | 29 +++ 19 files changed, 1058 insertions(+), 1617 deletions(-) delete mode 100644 java/DBN/src/DBN.java delete mode 100644 java/DBN/src/HiddenLayer.java delete mode 100644 java/DBN/src/LogisticRegression.java delete mode 100644 java/DBN/src/RBM.java delete mode 100644 java/LogisticRegression/src/LogisticRegression.java delete mode 100644 java/RBM/src/RBM.java delete mode 100644 java/SdA/src/HiddenLayer.java delete mode 100644 java/SdA/src/LogisticRegression.java delete mode 100644 java/SdA/src/SdA.java delete mode 100644 java/SdA/src/dA.java delete mode 100644 java/dA/src/dA.java create mode 100644 java/src/DeepLearning/DBN.java create mode 100644 java/src/DeepLearning/HiddenLayer.java create mode 100644 java/src/DeepLearning/LogisticRegression.java create mode 100644 java/src/DeepLearning/RBM.java create mode 100644 java/src/DeepLearning/SdA.java create mode 100644 java/src/DeepLearning/dA.java create mode 100644 java/src/DeepLearning/utils.java diff --git a/.gitignore b/.gitignore index 190cc57..252ef14 100644 --- a/.gitignore +++ b/.gitignore @@ -8,3 +8,7 @@ *.settings .project .metadata + +java/.idea/* +java/java.iml +java/out/* \ No newline at end of file diff --git a/java/DBN/src/DBN.java b/java/DBN/src/DBN.java deleted file mode 100644 index 5b72e68..0000000 --- a/java/DBN/src/DBN.java +++ /dev/null @@ -1,223 +0,0 @@ -import java.util.Random; - -public class DBN { - public int N; - public int n_ins; - public int[] hidden_layer_sizes; - public int n_outs; - public int n_layers; - public HiddenLayer[] sigmoid_layers; - public RBM[] rbm_layers; - public LogisticRegression log_layer; - public Random rng; - - public static double sigmoid(double x) { - return 1.0 / (1.0 + Math.pow(Math.E, -x)); - } - - - public DBN(int N, int n_ins, int[] hidden_layer_sizes, int n_outs, int n_layers, Random rng) { - int input_size; - - this.N = N; - this.n_ins = n_ins; - this.hidden_layer_sizes = hidden_layer_sizes; - this.n_outs = n_outs; - this.n_layers = n_layers; - - this.sigmoid_layers = new HiddenLayer[n_layers]; - this.rbm_layers = new RBM[n_layers]; - - if(rng == null) this.rng = new Random(1234); - else this.rng = rng; - - // construct multi-layer - for(int i=0; i 1) return 0; - - int c = 0; - double r; - - for(int i=0; i 1) return 0; - - int c = 0; - double r; - - for(int i=0; i 1) return 0; - - int c = 0; - double r; - - for(int i=0; i 1) return 0; - - int c = 0; - double r; - - for(int i=0; i 1) return 0; - - int c = 0; - double r; - - for(int i=0; i 1) return 0; - - int c = 0; - double r; - - for(int i=0; i 1) return 0; + + int c = 0; + double r; + + for(int i=0; i Date: Tue, 4 Aug 2015 22:10:42 -0400 Subject: [PATCH 19/36] add gitkeep in java --- java/.gitkeep | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 java/.gitkeep diff --git a/java/.gitkeep b/java/.gitkeep new file mode 100644 index 0000000..e69de29 From 31688afa659ca3846a3347629594220a2fd62a89 Mon Sep 17 00:00:00 2001 From: Yusuke Sugomori Date: Sun, 9 Aug 2015 12:49:29 -0400 Subject: [PATCH 20/36] bug fix --- python/RBM.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/RBM.py b/python/RBM.py index 4a1be8e..4496692 100755 --- a/python/RBM.py +++ b/python/RBM.py @@ -58,7 +58,7 @@ def contrastive_divergence(self, lr=0.1, k=1, input=None): # chain_end = nv_samples - self.W += lr * (numpy.dot(self.input.T, ph_sample) + self.W += lr * (numpy.dot(self.input.T, ph_mean) - numpy.dot(nv_samples.T, nh_means)) self.vbias += lr * numpy.mean(self.input - nv_samples, axis=0) self.hbias += lr * numpy.mean(ph_sample - nh_means, axis=0) From 0ffb1fe8df894a43939eb590c70337a107daafcb Mon Sep 17 00:00:00 2001 From: Yusuke Sugomori Date: Sun, 9 Aug 2015 13:07:59 -0400 Subject: [PATCH 21/36] bug fix on MLP --- python/Dropout.py | 53 ++++++++++--------------- python/HiddenLayer.py | 29 ++++++-------- python/LogisticRegression.py | 75 +++++++++++++++++++----------------- python/MLP.py | 46 +++++++++++----------- 4 files changed, 97 insertions(+), 106 deletions(-) diff --git a/python/Dropout.py b/python/Dropout.py index feb594e..df7d646 100755 --- a/python/Dropout.py +++ b/python/Dropout.py @@ -50,11 +50,11 @@ def __init__(self, input, label,\ self.hidden_layers.append(hidden_layer) - # layer for ouput using Logistic Regression (softmax) - self.log_layer = LogisticRegression(input=self.hidden_layers[-1].output(), - label=self.y, - n_in=hidden_layer_sizes[-1], - n_out=n_out) + # layer for ouput using Logistic Regression (softmax) + self.log_layer = LogisticRegression(input=self.hidden_layers[-1].output(), + label=self.y, + n_in=hidden_layer_sizes[-1], + n_out=n_out) def train(self, epochs=500, dropout=True, p_dropout=0.5, rng=None): @@ -108,27 +108,16 @@ def predict(self, x, dropout=True, p_dropout=0.5): def test_dropout(n_epochs=500, dropout=True, p_dropout=0.5): - x = numpy.array([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - [1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - [1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - [1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - [0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], - [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1], - [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1], - [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1], - [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0]]) - - y = numpy.array([[1, 0], - [1, 0], - [1, 0], - [1, 0], - [1, 0], - [0, 1], - [0, 1], - [0, 1], - [0, 1], - [0, 1]]) + x = numpy.array([[-1, -1], + [-1, 1], + [ 1, -1], + [ 1, 1]]) + + + y = numpy.array([[1, -1], + [1, -1], + [-1, 1], + [-1, 1]]) rng = numpy.random.RandomState(123) @@ -136,20 +125,20 @@ def test_dropout(n_epochs=500, dropout=True, p_dropout=0.5): # construct Dropout MLP classifier = Dropout(input=x, label=y, \ - n_in=20, hidden_layer_sizes=[12, 12], n_out=2, \ + n_in=2, hidden_layer_sizes=[3], n_out=2, \ rng=rng, activation=ReLU) - # train + # train XOR classifier.train(epochs=n_epochs, dropout=dropout, \ p_dropout=p_dropout, rng=rng) # test - x = numpy.array([[1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - [1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1], - [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1]]) + x = numpy.array([[-1, -1], + [-1, 1], + [ 1, -1], + [ 1, 1]]) print classifier.predict(x) diff --git a/python/HiddenLayer.py b/python/HiddenLayer.py index f7a0731..166d24c 100755 --- a/python/HiddenLayer.py +++ b/python/HiddenLayer.py @@ -51,21 +51,7 @@ def output(self, input=None): linear_output = numpy.dot(self.x, self.W) + self.b - return (linear_output if self.activation is None - else self.activation(linear_output)) - - - def sample_h_given_v(self, input=None): - if input is not None: - self.x = input - - v_mean = self.output() - h_sample = self.rng.binomial(size=v_mean.shape, - n=1, - p=v_mean) - return h_sample - - + return self.activation(linear_output) def forward(self, input=None): return self.output(input=input) @@ -75,11 +61,10 @@ def backward(self, prev_layer, lr=0.1, input=None): if input is not None: self.x = input - d_y = self.dactivation(prev_layer.x) * numpy.dot(prev_layer.d_y, prev_layer.W.T) + d_y = self.dactivation( prev_layer.x ) * numpy.dot( prev_layer.d_y, prev_layer.W.T ) self.W += lr * numpy.dot(self.x.T, d_y) self.b += lr * numpy.mean(d_y, axis=0) - self.d_y = d_y @@ -94,4 +79,14 @@ def dropout(self, input, p, rng=None): return mask + def sample_h_given_v(self, input=None): + if input is not None: + self.x = input + + v_mean = self.output() + h_sample = self.rng.binomial(size=v_mean.shape, + n=1, + p=v_mean) + return h_sample + diff --git a/python/LogisticRegression.py b/python/LogisticRegression.py index f1a686d..3f57889 100755 --- a/python/LogisticRegression.py +++ b/python/LogisticRegression.py @@ -9,25 +9,36 @@ class LogisticRegression(object): def __init__(self, input, label, n_in, n_out): self.x = input self.y = label + self.W = numpy.zeros((n_in, n_out)) # initialize W 0 - self.b = numpy.zeros(n_out) # initialize bias 0 + self.b = numpy.zeros(n_out) # initialize bias 0 def train(self, lr=0.1, input=None, L2_reg=0.00): + self.forward(input) + self.backward(lr, L2_reg) + + + def forward(self, input=None): if input is not None: self.x = input - # p_y_given_x = sigmoid(numpy.dot(self.x, self.W) + self.b) - p_y_given_x = softmax(numpy.dot(self.x, self.W) + self.b) - d_y = self.y - p_y_given_x - - self.W += lr * numpy.dot(self.x.T, d_y) - lr * L2_reg * self.W - self.b += lr * numpy.mean(d_y, axis=0) + p_y_given_x = self.output(self.x) + self.d_y = self.y - p_y_given_x - self.d_y = d_y - # cost = self.negative_log_likelihood() - # return cost + def backward(self, lr, L2_reg=0.00): + self.W += lr * numpy.dot(self.x.T, self.d_y) - lr * L2_reg * self.W + self.b += lr * numpy.mean(self.d_y, axis=0) + + + def output(self, x): + # return sigmoid(numpy.dot(x, self.W) + self.b) + return softmax(numpy.dot(x, self.W) + self.b) + + def predict(self, x): + return self.output(x) + def negative_log_likelihood(self): # sigmoid_activation = sigmoid(numpy.dot(self.x, self.W) + self.b) @@ -41,47 +52,41 @@ def negative_log_likelihood(self): return cross_entropy - def predict(self, x): - # return sigmoid(numpy.dot(x, self.W) + self.b) - return softmax(numpy.dot(x, self.W) + self.b) - - def output(self, x): - return self.predict(x) +def test_lr(learning_rate=0.1, n_epochs=500): + rng = numpy.random.RandomState(123) -def test_lr(learning_rate=0.01, n_epochs=200): # training data - x = numpy.array([[1,1,1,0,0,0], - [1,0,1,0,0,0], - [1,1,1,0,0,0], - [0,0,1,1,1,0], - [0,0,1,1,0,0], - [0,0,1,1,1,0]]) - y = numpy.array([[1, 0], - [1, 0], - [1, 0], - [0, 1], - [0, 1], - [0, 1]]) + d = 2 + N = 10 + x1 = rng.randn(N, d) + numpy.array([0, 0]) + x2 = rng.randn(N, d) + numpy.array([20, 10]) + y1 = [[1, 0] for i in xrange(N)] + y2 = [[0, 1] for i in xrange(N)] + + x = numpy.r_[x1.astype(int), x2.astype(int)] + y = numpy.r_[y1, y2] # construct LogisticRegression - classifier = LogisticRegression(input=x, label=y, n_in=6, n_out=2) + classifier = LogisticRegression(input=x, label=y, n_in=d, n_out=2) # train for epoch in xrange(n_epochs): classifier.train(lr=learning_rate) # cost = classifier.negative_log_likelihood() # print >> sys.stderr, 'Training epoch %d, cost is ' % epoch, cost - learning_rate *= 0.95 + learning_rate *= 0.995 # test - x = numpy.array([[1, 1, 0, 0, 0, 0], - [0, 0, 0, 1, 1, 0], - [1, 1, 1, 1, 1, 0]]) + result = classifier.predict(x) + for i in xrange(N): + print result[i] + print + for i in xrange(N): + print result[N+i] - print classifier.predict(x) if __name__ == "__main__": diff --git a/python/MLP.py b/python/MLP.py index 0e02f2a..f87e597 100755 --- a/python/MLP.py +++ b/python/MLP.py @@ -16,43 +16,49 @@ def __init__(self, input, label, n_in, n_hidden, n_out, rng=None): if rng is None: rng = numpy.random.RandomState(1234) - # construct hidden_layer (tanh or sigmoid so far) + # construct hidden_layer self.hidden_layer = HiddenLayer(input=self.x, n_in=n_in, n_out=n_hidden, rng=rng, - activation=tanh) + activation=sigmoid) - # construct log_layer (softmax) + # construct log_layer self.log_layer = LogisticRegression(input=self.hidden_layer.output, label=self.y, n_in=n_hidden, n_out=n_out) def train(self): - layer_input = self.hidden_layer.forward() - self.log_layer.train(input=layer_input) - self.hidden_layer.backward(prev_layer=self.log_layer) - + # forward hidden_layer + log_layer_input = self.hidden_layer.forward(input=self.x) + + # forward log_layer + self.log_layer.forward(input=log_layer_input) + + # backward hidden_layer + self.hidden_layer.backward(prev_layer=self.log_layer, lr=0.2, input=self.x) + + # backward log_layer + self.log_layer.backward(lr=0.2) + + def predict(self, x): x = self.hidden_layer.output(input=x) return self.log_layer.predict(x) -def test_mlp(n_epochs=100): +def test_mlp(n_epochs=5000): - x = numpy.array([[1,1,1,0,0,0], - [1,0,1,0,0,0], - [1,1,1,0,0,0], - [0,0,1,1,1,0], - [0,0,1,1,0,0], - [0,0,1,1,1,0]]) - y = numpy.array([[1, 0], + x = numpy.array([[0, 0], + [0, 1], + [1, 0], + [1, 1]]) + + y = numpy.array([[0, 1], [1, 0], [1, 0], - [0, 1], - [0, 1], [0, 1]]) @@ -60,7 +66,7 @@ def test_mlp(n_epochs=100): # construct MLP - classifier = MLP(input=x, label=y, n_in=6, n_hidden=15, n_out=2, rng=rng) + classifier = MLP(input=x, label=y, n_in=2, n_hidden=3, n_out=2, rng=rng) # train for epoch in xrange(n_epochs): @@ -68,10 +74,6 @@ def test_mlp(n_epochs=100): # test - x = numpy.array([[1, 1, 0, 0, 0, 0], - [0, 0, 0, 1, 1, 0], - [1, 1, 1, 1, 1, 0]]) - print classifier.predict(x) From 18fa46dc43d6d8753401c4b34f8e00e546dfdbf8 Mon Sep 17 00:00:00 2001 From: Yusuke Sugomori Date: Sun, 9 Aug 2015 13:50:02 -0400 Subject: [PATCH 22/36] dropout --- python/Dropout.py | 29 +++++++++++------------------ python/HiddenLayer.py | 4 ++-- python/LogisticRegression.py | 30 ++++++++++++++++++++---------- python/MLP.py | 14 +++++++------- 4 files changed, 40 insertions(+), 37 deletions(-) diff --git a/python/Dropout.py b/python/Dropout.py index df7d646..16c203f 100755 --- a/python/Dropout.py +++ b/python/Dropout.py @@ -57,7 +57,7 @@ def __init__(self, input, label,\ n_out=n_out) - def train(self, epochs=500, dropout=True, p_dropout=0.5, rng=None): + def train(self, epochs=5000, dropout=True, p_dropout=0.5, rng=None): for epoch in xrange(epochs): dropout_masks = [] # create different masks in each training epoch @@ -106,26 +106,24 @@ def predict(self, x, dropout=True, p_dropout=0.5): -def test_dropout(n_epochs=500, dropout=True, p_dropout=0.5): +def test_dropout(n_epochs=5000, dropout=True, p_dropout=0.5): - x = numpy.array([[-1, -1], - [-1, 1], - [ 1, -1], - [ 1, 1]]) - - - y = numpy.array([[1, -1], - [1, -1], - [-1, 1], - [-1, 1]]) + x = numpy.array([[0, 0], + [0, 1], + [1, 0], + [1, 1]]) + y = numpy.array([[0, 1], + [1, 0], + [1, 0], + [0, 1]]) rng = numpy.random.RandomState(123) # construct Dropout MLP classifier = Dropout(input=x, label=y, \ - n_in=2, hidden_layer_sizes=[3], n_out=2, \ + n_in=2, hidden_layer_sizes=[10, 10], n_out=2, \ rng=rng, activation=ReLU) @@ -135,11 +133,6 @@ def test_dropout(n_epochs=500, dropout=True, p_dropout=0.5): # test - x = numpy.array([[-1, -1], - [-1, 1], - [ 1, -1], - [ 1, 1]]) - print classifier.predict(x) diff --git a/python/HiddenLayer.py b/python/HiddenLayer.py index 166d24c..75f8ab2 100755 --- a/python/HiddenLayer.py +++ b/python/HiddenLayer.py @@ -50,9 +50,9 @@ def output(self, input=None): self.x = input linear_output = numpy.dot(self.x, self.W) + self.b - return self.activation(linear_output) + def forward(self, input=None): return self.output(input=input) @@ -61,7 +61,7 @@ def backward(self, prev_layer, lr=0.1, input=None): if input is not None: self.x = input - d_y = self.dactivation( prev_layer.x ) * numpy.dot( prev_layer.d_y, prev_layer.W.T ) + d_y = self.dactivation(prev_layer.x) * numpy.dot(prev_layer.d_y, prev_layer.W.T) self.W += lr * numpy.dot(self.x.T, d_y) self.b += lr * numpy.mean(d_y, axis=0) diff --git a/python/LogisticRegression.py b/python/LogisticRegression.py index 3f57889..708a1b3 100755 --- a/python/LogisticRegression.py +++ b/python/LogisticRegression.py @@ -14,22 +14,32 @@ def __init__(self, input, label, n_in, n_out): self.b = numpy.zeros(n_out) # initialize bias 0 - def train(self, lr=0.1, input=None, L2_reg=0.00): - self.forward(input) - self.backward(lr, L2_reg) - - - def forward(self, input=None): + def train(self, lr=0.1, input=None, L2_reg=0.00): if input is not None: self.x = input p_y_given_x = self.output(self.x) - self.d_y = self.y - p_y_given_x + d_y = self.y - p_y_given_x + + self.W += lr * numpy.dot(self.x.T, d_y) - lr * L2_reg * self.W + self.b += lr * numpy.mean(d_y, axis=0) + self.d_y = d_y + + + # def train(self, lr=0.1, input=None, L2_reg=0.00): + # self.forward(input) + # self.backward(lr, L2_reg) + + # def forward(self, input=None): + # if input is not None: + # self.x = input + # p_y_given_x = self.output(self.x) + # self.d_y = self.y - p_y_given_x - def backward(self, lr, L2_reg=0.00): - self.W += lr * numpy.dot(self.x.T, self.d_y) - lr * L2_reg * self.W - self.b += lr * numpy.mean(self.d_y, axis=0) + # def backward(self, lr=0.1, L2_reg=0.00): + # self.W += lr * numpy.dot(self.x.T, self.d_y) - lr * L2_reg * self.W + # self.b += lr * numpy.mean(self.d_y, axis=0) def output(self, x): diff --git a/python/MLP.py b/python/MLP.py index f87e597..e9ded0b 100755 --- a/python/MLP.py +++ b/python/MLP.py @@ -21,7 +21,7 @@ def __init__(self, input, label, n_in, n_hidden, n_out, rng=None): n_in=n_in, n_out=n_hidden, rng=rng, - activation=sigmoid) + activation=tanh) # construct log_layer self.log_layer = LogisticRegression(input=self.hidden_layer.output, @@ -31,17 +31,17 @@ def __init__(self, input, label, n_in, n_hidden, n_out, rng=None): def train(self): # forward hidden_layer - log_layer_input = self.hidden_layer.forward(input=self.x) + layer_input = self.hidden_layer.forward() - # forward log_layer - self.log_layer.forward(input=log_layer_input) + # forward & backward log_layer + # self.log_layer.forward(input=layer_input) + self.log_layer.train(input=layer_input) # backward hidden_layer - self.hidden_layer.backward(prev_layer=self.log_layer, lr=0.2, input=self.x) + self.hidden_layer.backward(prev_layer=self.log_layer) # backward log_layer - self.log_layer.backward(lr=0.2) - + # self.log_layer.backward() def predict(self, x): From 3daf4b246f5ddfb991968536b1eb0987ac8ce291 Mon Sep 17 00:00:00 2001 From: Yusuke Sugomori Date: Mon, 17 Aug 2015 18:11:06 -0400 Subject: [PATCH 23/36] Update README.md --- README.md | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 14e17c5..4b11361 100644 --- a/README.md +++ b/README.md @@ -41,5 +41,10 @@ http://yusugomori.com/docs/SGD_DA.pdf +### Other projects : + - [DeepLearning.coffee](https://github.com/yusugomori/deeplearning.coffee) : Very simple implementation of deep learning by CoffeeScript +  +  + +#### Bug reports are deeply welcome. -### Bug reports are deeply welcome. From d11bd59844fbb4f5f3921ec0f427afed32dafb64 Mon Sep 17 00:00:00 2001 From: Yusuke Sugomori Date: Wed, 19 Aug 2015 22:08:35 -0400 Subject: [PATCH 24/36] Update README.md --- README.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 4b11361..933b11a 100644 --- a/README.md +++ b/README.md @@ -42,9 +42,10 @@ ### Other projects : - - [DeepLearning.coffee](https://github.com/yusugomori/deeplearning.coffee) : Very simple implementation of deep learning by CoffeeScript + - My deep learning projects are [here](http://yusugomori.com/projects/deep-learning/)     -#### Bug reports are deeply welcome. +##### Bug reports / contributions / donations are deeply welcome. +Bitcoin wallet address: 1QAoYw5Y3opvah2APf4jVcpD6UAHyC3k7s From 52f8752b9a9c4f5837c0ab59e5f3aea3a2031a6a Mon Sep 17 00:00:00 2001 From: Yusuke Sugomori Date: Wed, 19 Aug 2015 22:08:55 -0400 Subject: [PATCH 25/36] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 933b11a..26b41bd 100644 --- a/README.md +++ b/README.md @@ -42,7 +42,7 @@ ### Other projects : - - My deep learning projects are [here](http://yusugomori.com/projects/deep-learning/) + - My deep learning projects are [here](http://yusugomori.com/projects/deep-learning/).     From 8872adfcd46d77a778c5f37c66627a6526d2d2d7 Mon Sep 17 00:00:00 2001 From: Yusuke Sugomori Date: Thu, 20 Aug 2015 18:04:10 -0400 Subject: [PATCH 26/36] clean up cpp utils --- cpp/DBN.cpp | 25 ++----------------------- cpp/HiddenLayer.cpp | 25 ++----------------------- cpp/RBM.cpp | 24 ++---------------------- cpp/SdA.cpp | 25 +++---------------------- cpp/dA.cpp | 26 +++----------------------- cpp/utils.cpp | 25 ------------------------- cpp/utils.h | 32 ++++++++++++++++++++++++++++++++ 7 files changed, 44 insertions(+), 138 deletions(-) delete mode 100644 cpp/utils.cpp create mode 100644 cpp/utils.h diff --git a/cpp/DBN.cpp b/cpp/DBN.cpp index f043e20..155e081 100644 --- a/cpp/DBN.cpp +++ b/cpp/DBN.cpp @@ -1,33 +1,12 @@ #include #include +#include "utils.h" #include "HiddenLayer.h" #include "RBM.h" #include "LogisticRegression.h" #include "DBN.h" using namespace std; - - -double uniform(double min, double max) { - return rand() / (RAND_MAX + 1.0) * (max - min) + min; -} - -int binomial(int n, double p) { - if(p < 0 || p > 1) return 0; - - int c = 0; - double r; - - for(int i=0; i #include #include "HiddenLayer.h" +#include "utils.h" using namespace std; - -double uniform(double min, double max) { - return rand() / (RAND_MAX + 1.0) * (max - min) + min; -} - -int binomial(int n, double p) { - if(p < 0 || p > 1) return 0; - - int c = 0; - double r; - - for(int i=0; i #include +#include "utils.h" #include "RBM.h" using namespace std; - -double uniform(double min, double max) { - return rand() / (RAND_MAX + 1.0) * (max - min) + min; -} - -int binomial(int n, double p) { - if(p < 0 || p > 1) return 0; - - int c = 0; - double r; - - for(int i=0; i #include +#include "utils.h" + #include "HiddenLayer.h" #include "dA.h" #include "LogisticRegression.h" #include "SdA.h" using namespace std; - -double uniform(double min, double max) { - return rand() / (RAND_MAX + 1.0) * (max - min) + min; -} - -int binomial(int n, double p) { - if(p < 0 || p > 1) return 0; - - int c = 0; - double r; - - for(int i=0; i #include +#include "utils.h" + #include "dA.h" using namespace std; - - -double uniform(double min, double max) { - return rand() / (RAND_MAX + 1.0) * (max - min) + min; -} - -int binomial(int n, double p) { - if(p < 0 || p > 1) return 0; - - int c = 0; - double r; - - for(int i=0; i -#include -using namespace std; - -double uniform(double min, double max) { - return rand() / (RAND_MAX + 1.0) * (max - min) + min; -} - -int binomial(int n, double p) { - if(p < 0 || p > 1) return 0; - - int c = 0; - double r; - - for(int i=0; i +#include +using namespace std; + + +namespace utils { + + double uniform(double min, double max) { + return rand() / (RAND_MAX + 1.0) * (max - min) + min; + } + + int binomial(int n, double p) { + if(p < 0 || p > 1) return 0; + + int c = 0; + double r; + + for(int i=0; i Date: Tue, 25 Aug 2015 20:59:56 -0400 Subject: [PATCH 27/36] MLP.java, Dropout.java --- java/src/DeepLearning/DBN.java | 12 +- java/src/DeepLearning/Dropout.java | 225 ++++++++++++++++++ java/src/DeepLearning/HiddenLayer.java | 70 +++++- .../src/DeepLearning/HiddenLayerDiscrete.java | 56 +++++ java/src/DeepLearning/LogisticRegression.java | 32 +-- .../LogisticRegressionDiscrete.java | 117 +++++++++ java/src/DeepLearning/MLP.java | 124 ++++++++++ java/src/DeepLearning/RBM.java | 1 - java/src/DeepLearning/SdA.java | 12 +- java/src/DeepLearning/utils.java | 30 ++- 10 files changed, 640 insertions(+), 39 deletions(-) create mode 100644 java/src/DeepLearning/Dropout.java create mode 100644 java/src/DeepLearning/HiddenLayerDiscrete.java create mode 100644 java/src/DeepLearning/LogisticRegressionDiscrete.java create mode 100644 java/src/DeepLearning/MLP.java diff --git a/java/src/DeepLearning/DBN.java b/java/src/DeepLearning/DBN.java index 39fb999..e070faf 100644 --- a/java/src/DeepLearning/DBN.java +++ b/java/src/DeepLearning/DBN.java @@ -9,9 +9,9 @@ public class DBN { public int[] hidden_layer_sizes; public int n_outs; public int n_layers; - public HiddenLayer[] sigmoid_layers; + public HiddenLayerDiscrete[] sigmoid_layers; public RBM[] rbm_layers; - public LogisticRegression log_layer; + public LogisticRegressionDiscrete log_layer; public Random rng; @@ -24,7 +24,7 @@ public DBN(int N, int n_ins, int[] hidden_layer_sizes, int n_outs, int n_layers, this.n_outs = n_outs; this.n_layers = n_layers; - this.sigmoid_layers = new HiddenLayer[n_layers]; + this.sigmoid_layers = new HiddenLayerDiscrete[n_layers]; this.rbm_layers = new RBM[n_layers]; if(rng == null) this.rng = new Random(1234); @@ -39,14 +39,14 @@ public DBN(int N, int n_ins, int[] hidden_layer_sizes, int n_outs, int n_layers, } // construct sigmoid_layer - this.sigmoid_layers[i] = new HiddenLayer(this.N, input_size, this.hidden_layer_sizes[i], null, null, rng); + this.sigmoid_layers[i] = new HiddenLayerDiscrete(this.N, input_size, this.hidden_layer_sizes[i], null, null, rng); // construct rbm_layer this.rbm_layers[i] = new RBM(this.N, input_size, this.hidden_layer_sizes[i], this.sigmoid_layers[i].W, this.sigmoid_layers[i].b, null, rng); } - // layer for output using DNN.LogisticRegression - this.log_layer = new LogisticRegression(this.N, this.hidden_layer_sizes[this.n_layers-1], this.n_outs); + // layer for output using Logistic Regression + this.log_layer = new LogisticRegressionDiscrete(this.N, this.hidden_layer_sizes[this.n_layers-1], this.n_outs); } public void pretrain(int[][] train_X, double lr, int k, int epochs) { diff --git a/java/src/DeepLearning/Dropout.java b/java/src/DeepLearning/Dropout.java new file mode 100644 index 0000000..b4717c6 --- /dev/null +++ b/java/src/DeepLearning/Dropout.java @@ -0,0 +1,225 @@ +package DeepLearning; + +import java.util.Random; +import java.util.List; +import java.util.ArrayList; + +public class Dropout { + public int N; + public int n_in; + public int[] hidden_layer_sizes; + public int n_out; + public int n_layers; + public HiddenLayer[] hiddenLayers; + public LogisticRegression logisticLayer; + public Random rng; + + + public Dropout(int N, int n_in, int[] hidden_layer_sizes, int n_out, Random rng, String activation) { + this.N = N; + this.n_in = n_in; + this.hidden_layer_sizes = hidden_layer_sizes; + this.n_layers = hidden_layer_sizes.length; + this.n_out = n_out; + + this.hiddenLayers = new HiddenLayer[n_layers]; + + if (rng == null) rng = new Random(1234); + this.rng = rng; + + if (activation == null) activation = "ReLU"; + + // construct multi-layer + int input_size; + for(int i=0; i dropout_masks; + List layer_inputs; + double[] layer_input; + double[] layer_output = new double[0]; + + for(int epoch=0; epoch(n_layers); + layer_inputs = new ArrayList<>(n_layers+1); // +1 for logistic layer + + // forward hiddenLayers + for(int i=0; i=0; i--) { + + if(i == n_layers-1) { + prev_W = logisticLayer.W; + } else { + prev_dy = dy.clone(); + prev_W = hiddenLayers[i+1].W; + } + + dy = new double[hidden_layer_sizes[i]]; + hiddenLayers[i].backward(layer_inputs.get(i), dy, layer_inputs.get(i+1), prev_dy, prev_W, lr); + + if(dropout) { + for(int j=0; j activation; + public DoubleFunction dactivation; - - public HiddenLayer(int N, int n_in, int n_out, double[][] W, double[] b, Random rng) { + public HiddenLayer(int N, int n_in, int n_out, double[][] W, double[] b, Random rng, String activation) { this.N = N; this.n_in = n_in; this.n_out = n_out; - if(rng == null) this.rng = new Random(1234); + if (rng == null) this.rng = new Random(1234); else this.rng = rng; - if(W == null) { + if (W == null) { this.W = new double[n_out][n_in]; double a = 1.0 / this.n_in; @@ -33,22 +35,72 @@ public HiddenLayer(int N, int n_in, int n_out, double[][] W, double[] b, Random this.W = W; } - if(b == null) this.b = new double[n_out]; + if (b == null) this.b = new double[n_out]; else this.b = b; + + if (activation == "sigmoid" || activation == null) { + this.activation = (double x) -> sigmoid(x); + this.dactivation = (double x) -> dsigmoid(x); + + } else if (activation == "tanh") { + this.activation = (double x) -> tanh(x); + this.dactivation = (double x) -> dtanh(x); + } else if (activation == "ReLU") { + this.activation = (double x) -> ReLU(x); + this.dactivation = (double x) -> dReLU(x); + } else { + throw new IllegalArgumentException("activation function not supported"); + } + } - public double output(int[] input, double[] w, double b) { + public double output(double[] input, double[] w, double b) { double linear_output = 0.0; for(int j=0; j 0) { + return x; + } else { + return 0.; + } } + public static double dReLU(double x) { + if(x > 0) { + return 1.; + } else { + return 0.; + } + } } From ad217dbb8b993a9cff0d0e6cf3e6a9de5778ed33 Mon Sep 17 00:00:00 2001 From: Yusuke Sugomori Date: Thu, 27 Aug 2015 11:50:06 -0400 Subject: [PATCH 28/36] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 26b41bd..fc5b865 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -## Deep Learning (Python, C/C++, Java, Scala, Go) +## Deep Learning (Python, C, C++, Java, Scala, Go) ### Classes : From 03efd4a19dd5629cb85a8afdf5cb75d02ef07cc2 Mon Sep 17 00:00:00 2001 From: Yusuke Sugomori Date: Tue, 20 Oct 2015 00:53:17 +0900 Subject: [PATCH 29/36] bug fix --- .gitignore | 5 ++++- data/.gitkeep | 0 java/src/DeepLearning/Dropout.java | 10 +++++----- python/RBM.py | 2 +- 4 files changed, 10 insertions(+), 7 deletions(-) create mode 100644 data/.gitkeep diff --git a/.gitignore b/.gitignore index 252ef14..98c89ef 100644 --- a/.gitignore +++ b/.gitignore @@ -11,4 +11,7 @@ java/.idea/* java/java.iml -java/out/* \ No newline at end of file +java/out/* + +data/* +!data/.gitkeep \ No newline at end of file diff --git a/data/.gitkeep b/data/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/java/src/DeepLearning/Dropout.java b/java/src/DeepLearning/Dropout.java index b4717c6..bb6378f 100644 --- a/java/src/DeepLearning/Dropout.java +++ b/java/src/DeepLearning/Dropout.java @@ -103,14 +103,14 @@ public void train(int epochs, double[][] train_X, int[][] train_Y, boolean dropo prev_W = hiddenLayers[i+1].W; } - dy = new double[hidden_layer_sizes[i]]; - hiddenLayers[i].backward(layer_inputs.get(i), dy, layer_inputs.get(i+1), prev_dy, prev_W, lr); - if(dropout) { - for(int j=0; j Date: Tue, 20 Oct 2015 01:13:20 +0900 Subject: [PATCH 30/36] bug fix --- python/Dropout.py | 7 ++++--- python/HiddenLayer.py | 5 ++++- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/python/Dropout.py b/python/Dropout.py index 16c203f..ba99116 100755 --- a/python/Dropout.py +++ b/python/Dropout.py @@ -87,10 +87,11 @@ def train(self, epochs=5000, dropout=True, p_dropout=0.5, rng=None): else: prev_layer = self.hidden_layers[i+1] - self.hidden_layers[i].backward(prev_layer=prev_layer) - if dropout == True: - self.hidden_layers[i].d_y *= dropout_masks[i] # also mask here + self.hidden_layers[i].backward(prev_layer=prev_layer, dropout=True, mask=dropout_masks[i]) + else: + self.hidden_layers[i].backward(prev_layer=prev_layer) + def predict(self, x, dropout=True, p_dropout=0.5): diff --git a/python/HiddenLayer.py b/python/HiddenLayer.py index 75f8ab2..a97bc61 100755 --- a/python/HiddenLayer.py +++ b/python/HiddenLayer.py @@ -57,12 +57,15 @@ def forward(self, input=None): return self.output(input=input) - def backward(self, prev_layer, lr=0.1, input=None): + def backward(self, prev_layer, lr=0.1, input=None, dropout=False, mask=None): if input is not None: self.x = input d_y = self.dactivation(prev_layer.x) * numpy.dot(prev_layer.d_y, prev_layer.W.T) + if dropout == True: + d_y *= mask + self.W += lr * numpy.dot(self.x.T, d_y) self.b += lr * numpy.mean(d_y, axis=0) self.d_y = d_y From f4e32289fbe627cd2bcc5a1bf703b2df8499b55c Mon Sep 17 00:00:00 2001 From: Yusuke Sugomori Date: Fri, 30 Oct 2015 15:05:59 +0900 Subject: [PATCH 31/36] Update README.md --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index fc5b865..ab89b7c 100644 --- a/README.md +++ b/README.md @@ -22,6 +22,8 @@ - Dropout: Dropout MLP + - CNN: Convolutional Neural Networks (See [dev branch](https://github.com/yusugomori/DeepLearning/tree/dev).) + ### References : From 7a3596669bfe8e1e01b5137702d739155c4e4ba9 Mon Sep 17 00:00:00 2001 From: Yusuke Sugomori Date: Thu, 17 Dec 2015 19:23:14 +0900 Subject: [PATCH 32/36] Update README.md --- README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.md b/README.md index ab89b7c..0ece2c8 100644 --- a/README.md +++ b/README.md @@ -42,6 +42,10 @@ - Yusuke Sugomori: Stochastic Gradient Descent for Denoising Autoencoders, http://yusugomori.com/docs/SGD_DA.pdf +### Publication : + - More cleaner Java implementations are introduced in my book, Deep Learning with Java. + + The book is now available to pre-order from [Packt Publishing](https://www.packtpub.com/big-data-and-business-intelligence/deep-learning-java) or [Amazon](http://www.amazon.com/Deep-Learning-Java-Yusuke-Sugomori/dp/1785282190/). ### Other projects : - My deep learning projects are [here](http://yusugomori.com/projects/deep-learning/). From 8d371431bd4d80914a73d666c72ffc94182275c8 Mon Sep 17 00:00:00 2001 From: Yusuke Sugomori Date: Fri, 18 Dec 2015 10:27:27 +0900 Subject: [PATCH 33/36] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 0ece2c8..595890d 100644 --- a/README.md +++ b/README.md @@ -43,7 +43,7 @@ http://yusugomori.com/docs/SGD_DA.pdf ### Publication : - - More cleaner Java implementations are introduced in my book, Deep Learning with Java. + - More detailed Java implementations are introduced in my book, Deep Learning with Java. The book is now available to pre-order from [Packt Publishing](https://www.packtpub.com/big-data-and-business-intelligence/deep-learning-java) or [Amazon](http://www.amazon.com/Deep-Learning-Java-Yusuke-Sugomori/dp/1785282190/). From 73e2f448931d9a2d2117572ea041c2f76956fae4 Mon Sep 17 00:00:00 2001 From: Yusuke Sugomori Date: Tue, 31 May 2016 01:08:05 +0900 Subject: [PATCH 34/36] Update README.md --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 595890d..77ba574 100644 --- a/README.md +++ b/README.md @@ -43,9 +43,9 @@ http://yusugomori.com/docs/SGD_DA.pdf ### Publication : - - More detailed Java implementations are introduced in my book, Deep Learning with Java. + - More detailed Java implementations are introduced in my book, Java Deep Learning Essentials. - The book is now available to pre-order from [Packt Publishing](https://www.packtpub.com/big-data-and-business-intelligence/deep-learning-java) or [Amazon](http://www.amazon.com/Deep-Learning-Java-Yusuke-Sugomori/dp/1785282190/). + The book is available from [Packt Publishing](https://www.packtpub.com/big-data-and-business-intelligence/java-deep-learning-essentials) or [Amazon](http://www.amazon.com/Deep-Learning-Java-Yusuke-Sugomori/dp/1785282190/). ### Other projects : - My deep learning projects are [here](http://yusugomori.com/projects/deep-learning/). From 6b8af8891a94a1a25c15b5d5151f4bb59d0dab5f Mon Sep 17 00:00:00 2001 From: Yusuke Sugomori Date: Thu, 7 Sep 2017 21:35:24 +0900 Subject: [PATCH 35/36] add LICENSE --- LICENSE.txt | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 LICENSE.txt diff --git a/LICENSE.txt b/LICENSE.txt new file mode 100644 index 0000000..92d495f --- /dev/null +++ b/LICENSE.txt @@ -0,0 +1,22 @@ +Copyright (c) 2017 Yusuke Sugomori + +MIT License + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. From 739dfd1d7919c328e0d3b8129855c2ad71b80036 Mon Sep 17 00:00:00 2001 From: Yusuke Sugomori Date: Mon, 4 Dec 2017 15:01:44 +0900 Subject: [PATCH 36/36] update README --- README.md | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/README.md b/README.md index 77ba574..37f8767 100644 --- a/README.md +++ b/README.md @@ -47,11 +47,7 @@ The book is available from [Packt Publishing](https://www.packtpub.com/big-data-and-business-intelligence/java-deep-learning-essentials) or [Amazon](http://www.amazon.com/Deep-Learning-Java-Yusuke-Sugomori/dp/1785282190/). -### Other projects : - - My deep learning projects are [here](http://yusugomori.com/projects/deep-learning/). -  -  ##### Bug reports / contributions / donations are deeply welcome. -Bitcoin wallet address: 1QAoYw5Y3opvah2APf4jVcpD6UAHyC3k7s +Bitcoin wallet address: 34kZarc2uBU6BMCouUp2iudvZtbmZMPqrA