diff --git a/.gitignore b/.gitignore index 190cc57..98c89ef 100644 --- a/.gitignore +++ b/.gitignore @@ -8,3 +8,10 @@ *.settings .project .metadata + +java/.idea/* +java/java.iml +java/out/* + +data/* +!data/.gitkeep \ No newline at end of file diff --git a/LICENSE.txt b/LICENSE.txt new file mode 100644 index 0000000..92d495f --- /dev/null +++ b/LICENSE.txt @@ -0,0 +1,22 @@ +Copyright (c) 2017 Yusuke Sugomori + +MIT License + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/README.md b/README.md index 804e867..37f8767 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# Deep Learning (Python, C/C++, Java) +## Deep Learning (Python, C, C++, Java, Scala, Go) ### Classes : @@ -17,6 +17,12 @@ - LogisticRegression: Logistic Regression - HiddenLayer: Hidden Layer of Neural Networks + + - MLP: Multiple Layer Perceptron + + - Dropout: Dropout MLP + + - CNN: Convolutional Neural Networks (See [dev branch](https://github.com/yusugomori/DeepLearning/tree/dev).) @@ -36,6 +42,12 @@ - Yusuke Sugomori: Stochastic Gradient Descent for Denoising Autoencoders, http://yusugomori.com/docs/SGD_DA.pdf +### Publication : + - More detailed Java implementations are introduced in my book, Java Deep Learning Essentials. + + The book is available from [Packt Publishing](https://www.packtpub.com/big-data-and-business-intelligence/java-deep-learning-essentials) or [Amazon](http://www.amazon.com/Deep-Learning-Java-Yusuke-Sugomori/dp/1785282190/). + +##### Bug reports / contributions / donations are deeply welcome. +Bitcoin wallet address: 34kZarc2uBU6BMCouUp2iudvZtbmZMPqrA -### Bug reports are deeply welcome. \ No newline at end of file diff --git a/c/DBN.c b/c/DBN.c index 9423622..eee1e1c 100644 --- a/c/DBN.c +++ b/c/DBN.c @@ -190,8 +190,9 @@ void DBN_predict(DBN* this, int *x, double *y) { for(i=0; in_layers; i++) { layer_input = (double *)malloc(sizeof(double) * this->sigmoid_layers[i].n_out); - linear_output = 0.0; for(k=0; ksigmoid_layers[i].n_out; k++) { + linear_output = 0.0; + for(j=0; jsigmoid_layers[i].n_in; j++) { linear_output += this->sigmoid_layers[i].W[k][j] * prev_layer_input[j]; } @@ -346,7 +347,8 @@ void RBM_contrastive_divergence(RBM* this, int *input, double lr, int k) { for(i=0; in_hidden; i++) { for(j=0; jn_visible; j++) { - this->W[i][j] += lr * (ph_sample[i] * input[j] - nh_means[i] * nv_samples[j]) / this->N; + // this->W[i][j] += lr * (ph_sample[i] * input[j] - nh_means[i] * nv_samples[j]) / this->N; + this->W[i][j] += lr * (ph_mean[i] * input[j] - nh_means[i] * nv_samples[j]) / this->N; } this->hbias[i] += lr * (ph_sample[i] - nh_means[i]) / this->N; } diff --git a/c/LogisticRegression.c b/c/LogisticRegression.c index 0c0c04a..b55c707 100644 --- a/c/LogisticRegression.c +++ b/c/LogisticRegression.c @@ -94,7 +94,7 @@ void test_lr(void) { int i, j, epoch; double learning_rate = 0.1; - double n_epochs = 500; + int n_epochs = 500; int train_N = 6; int test_N = 2; diff --git a/c/RBM.c b/c/RBM.c index 9ea27b2..7e806bb 100644 --- a/c/RBM.c +++ b/c/RBM.c @@ -100,7 +100,8 @@ void RBM_contrastive_divergence(RBM* this, int *input, double lr, int k) { for(i=0; in_hidden; i++) { for(j=0; jn_visible; j++) { - this->W[i][j] += lr * (ph_sample[i] * input[j] - nh_means[i] * nv_samples[j]) / this->N; + // this->W[i][j] += lr * (ph_sample[i] * input[j] - nh_means[i] * nv_samples[j]) / this->N; + this->W[i][j] += lr * (ph_mean[i] * input[j] - nh_means[i] * nv_samples[j]) / this->N; } this->hbias[i] += lr * (ph_sample[i] - nh_means[i]) / this->N; } diff --git a/c/SdA.c b/c/SdA.c index 273d2b8..99170ee 100644 --- a/c/SdA.c +++ b/c/SdA.c @@ -188,8 +188,9 @@ void SdA_predict(SdA* this, int *x, double *y) { for(i=0; in_layers; i++) { layer_input = (double *)malloc(sizeof(double) * this->sigmoid_layers[i].n_out); - linear_output = 0.0; for(k=0; ksigmoid_layers[i].n_out; k++) { + linear_output = 0.0; + for(j=0; jsigmoid_layers[i].n_in; j++) { linear_output += this->sigmoid_layers[i].W[k][j] * prev_layer_input[j]; } diff --git a/cpp/DBN.cpp b/cpp/DBN.cpp index e92bad7..155e081 100644 --- a/cpp/DBN.cpp +++ b/cpp/DBN.cpp @@ -1,33 +1,12 @@ #include #include +#include "utils.h" #include "HiddenLayer.h" #include "RBM.h" #include "LogisticRegression.h" #include "DBN.h" using namespace std; - - -double uniform(double min, double max) { - return rand() / (RAND_MAX + 1.0) * (max - min) + min; -} - -int binomial(int n, double p) { - if(p < 0 || p > 1) return 0; - - int c = 0; - double r; - - for(int i=0; in_out]; - linear_output = 0.0; for(int k=0; kn_out; k++) { + linear_output = 0.0; + for(int j=0; jn_in; j++) { linear_output += sigmoid_layers[i]->W[k][j] * prev_layer_input[j]; } @@ -321,7 +301,8 @@ void RBM::contrastive_divergence(int *input, double lr, int k) { for(int i=0; i #include #include "HiddenLayer.h" +#include "utils.h" using namespace std; - -double uniform(double min, double max) { - return rand() / (RAND_MAX + 1.0) * (max - min) + min; -} - -int binomial(int n, double p) { - if(p < 0 || p > 1) return 0; - - int c = 0; - double r; - - for(int i=0; i #include +#include "utils.h" #include "RBM.h" using namespace std; - -double uniform(double min, double max) { - return rand() / (RAND_MAX + 1.0) * (max - min) + min; -} - -int binomial(int n, double p) { - if(p < 0 || p > 1) return 0; - - int c = 0; - double r; - - for(int i=0; i #include +#include "utils.h" + #include "HiddenLayer.h" #include "dA.h" #include "LogisticRegression.h" #include "SdA.h" using namespace std; - -double uniform(double min, double max) { - return rand() / (RAND_MAX + 1.0) * (max - min) + min; -} - -int binomial(int n, double p) { - if(p < 0 || p > 1) return 0; - - int c = 0; - double r; - - for(int i=0; in_out]; - linear_output = 0.0; for(int k=0; kn_out; k++) { + linear_output = 0.0; + for(int j=0; jn_in; j++) { linear_output += sigmoid_layers[i]->W[k][j] * prev_layer_input[j]; } diff --git a/cpp/dA.cpp b/cpp/dA.cpp index b5414e5..783327e 100644 --- a/cpp/dA.cpp +++ b/cpp/dA.cpp @@ -1,30 +1,10 @@ #include #include +#include "utils.h" + #include "dA.h" using namespace std; - - -double uniform(double min, double max) { - return rand() / (RAND_MAX + 1.0) * (max - min) + min; -} - -int binomial(int n, double p) { - if(p < 0 || p > 1) return 0; - - int c = 0; - double r; - - for(int i=0; i -#include -using namespace std; - -double uniform(double min, double max) { - return rand() / (RAND_MAX + 1.0) * (max - min) + min; -} - -int binomial(int n, double p) { - if(p < 0 || p > 1) return 0; - - int c = 0; - double r; - - for(int i=0; i +#include +using namespace std; + + +namespace utils { + + double uniform(double min, double max) { + return rand() / (RAND_MAX + 1.0) * (max - min) + min; + } + + int binomial(int n, double p) { + if(p < 0 || p > 1) return 0; + + int c = 0; + double r; + + for(int i=0; i 1 { return 0 } + + c := 0 + var r float64 + + for i := 0; i < n; i++ { + r = rand.Float64() + if r < p { c++ } + } + + return c +} + +func Sigmoid(x float64) float64 { + return 1.0 / (1.0 + math.Exp(-x)) +} diff --git a/java/.gitkeep b/java/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/java/DBN/src/DBN.java b/java/DBN/src/DBN.java deleted file mode 100644 index 238d1ce..0000000 --- a/java/DBN/src/DBN.java +++ /dev/null @@ -1,222 +0,0 @@ -import java.util.Random; - -public class DBN { - public int N; - public int n_ins; - public int[] hidden_layer_sizes; - public int n_outs; - public int n_layers; - public HiddenLayer[] sigmoid_layers; - public RBM[] rbm_layers; - public LogisticRegression log_layer; - public Random rng; - - public static double sigmoid(double x) { - return 1.0 / (1.0 + Math.pow(Math.E, -x)); - } - - - public DBN(int N, int n_ins, int[] hidden_layer_sizes, int n_outs, int n_layers, Random rng) { - int input_size; - - this.N = N; - this.n_ins = n_ins; - this.hidden_layer_sizes = hidden_layer_sizes; - this.n_outs = n_outs; - this.n_layers = n_layers; - - this.sigmoid_layers = new HiddenLayer[n_layers]; - this.rbm_layers = new RBM[n_layers]; - - if(rng == null) this.rng = new Random(1234); - else this.rng = rng; - - // construct multi-layer - for(int i=0; i 1) return 0; - - int c = 0; - double r; - - for(int i=0; i 1) return 0; - - int c = 0; - double r; - - for(int i=0; i 1) return 0; - - int c = 0; - double r; - - for(int i=0; i 1) return 0; - - int c = 0; - double r; - - for(int i=0; i 1) return 0; - - int c = 0; - double r; - - for(int i=0; i 1) return 0; - - int c = 0; - double r; - - for(int i=0; i dropout_masks; + List layer_inputs; + double[] layer_input; + double[] layer_output = new double[0]; + + for(int epoch=0; epoch(n_layers); + layer_inputs = new ArrayList<>(n_layers+1); // +1 for logistic layer + + // forward hiddenLayers + for(int i=0; i=0; i--) { + + if(i == n_layers-1) { + prev_W = logisticLayer.W; + } else { + prev_dy = dy.clone(); + prev_W = hiddenLayers[i+1].W; + } + + if(dropout) { + for(int j=0; j activation; + public DoubleFunction dactivation; + + public HiddenLayer(int N, int n_in, int n_out, double[][] W, double[] b, Random rng, String activation) { + this.N = N; + this.n_in = n_in; + this.n_out = n_out; + + if (rng == null) this.rng = new Random(1234); + else this.rng = rng; + + if (W == null) { + this.W = new double[n_out][n_in]; + double a = 1.0 / this.n_in; + + for(int i=0; i sigmoid(x); + this.dactivation = (double x) -> dsigmoid(x); + + } else if (activation == "tanh") { + this.activation = (double x) -> tanh(x); + this.dactivation = (double x) -> dtanh(x); + } else if (activation == "ReLU") { + this.activation = (double x) -> ReLU(x); + this.dactivation = (double x) -> dReLU(x); + } else { + throw new IllegalArgumentException("activation function not supported"); + } + + } + + public double output(double[] input, double[] w, double b) { + double linear_output = 0.0; + for(int j=0; j 1) return 0; + + int c = 0; + double r; + + for(int i=0; i 0) { + return x; + } else { + return 0.; + } + } + + public static double dReLU(double x) { + if(x > 0) { + return 1.; + } else { + return 0.; + } + } +} diff --git a/python/CDBN.py b/python/CDBN.py index 4ac987a..dbf6648 100755 --- a/python/CDBN.py +++ b/python/CDBN.py @@ -1,16 +1,5 @@ -#!/usr/bin/env python # -*- coding: utf-8 -*- -''' - DBN w/ continuous-valued inputs (Linear Energy) - - References : - - Y. Bengio, P. Lamblin, D. Popovici, H. Larochelle: Greedy Layer-Wise - Training of Deep Networks, Advances in Neural Information Processing - Systems 19, 2007 - -''' - import sys import numpy from HiddenLayer import HiddenLayer @@ -20,13 +9,11 @@ from DBN import DBN from utils import * - - class CDBN(DBN): def __init__(self, input=None, label=None,\ n_ins=2, hidden_layer_sizes=[3, 3], n_outs=2,\ - numpy_rng=None): + rng=None): self.x = input self.y = label @@ -35,8 +22,8 @@ def __init__(self, input=None, label=None,\ self.rbm_layers = [] self.n_layers = len(hidden_layer_sizes) # = len(self.rbm_layers) - if numpy_rng is None: - numpy_rng = numpy.random.RandomState(1234) + if rng is None: + rng = numpy.random.RandomState(1234) assert self.n_layers > 0 @@ -60,7 +47,7 @@ def __init__(self, input=None, label=None,\ sigmoid_layer = HiddenLayer(input=layer_input, n_in=input_size, n_out=hidden_layer_sizes[i], - numpy_rng=numpy_rng, + rng=rng, activation=sigmoid) self.sigmoid_layers.append(sigmoid_layer) @@ -113,7 +100,7 @@ def test_cdbn(pretrain_lr=0.1, pretraining_epochs=1000, k=1, \ rng = numpy.random.RandomState(123) # construct DBN - dbn = CDBN(input=x, label=y, n_ins=6, hidden_layer_sizes=[5, 5], n_outs=2, numpy_rng=rng) + dbn = CDBN(input=x, label=y, n_ins=6, hidden_layer_sizes=[5, 5], n_outs=2, rng=rng) # pre-training (TrainUnsupervisedDBN) dbn.pretrain(lr=pretrain_lr, k=1, epochs=pretraining_epochs) diff --git a/python/CRBM.py b/python/CRBM.py index 0521883..e870047 100755 --- a/python/CRBM.py +++ b/python/CRBM.py @@ -1,16 +1,5 @@ -#!/usr/bin/env python # -*- coding: utf-8 -*- -""" - RBM w/ continuous-valued inputs (Linear Energy) - - References : - - Y. Bengio, P. Lamblin, D. Popovici, H. Larochelle: Greedy Layer-Wise - Training of Deep Networks, Advances in Neural Information Processing - Systems 19, 2007 - -""" - import sys import numpy from RBM import RBM @@ -30,7 +19,7 @@ def sample_v_given_h(self, h0_sample): ep = numpy.exp(a_h) v1_mean = 1 / (1 - en) - 1 / a_h - U = numpy.array(self.numpy_rng.uniform( + U = numpy.array(self.rng.uniform( low=0, high=1, size=v1_mean.shape)) @@ -53,7 +42,7 @@ def test_crbm(learning_rate=0.1, k=1, training_epochs=1000): rng = numpy.random.RandomState(123) # construct CRBM - rbm = CRBM(input=data, n_visible=6, n_hidden=5, numpy_rng=rng) + rbm = CRBM(input=data, n_visible=6, n_hidden=5, rng=rng) # train for epoch in xrange(training_epochs): diff --git a/python/DBN.py b/python/DBN.py index f639823..b1b351b 100755 --- a/python/DBN.py +++ b/python/DBN.py @@ -1,20 +1,5 @@ -#!/usr/bin/env python # -*- coding: utf-8 -*- -''' - Deep Belief Nets (DBN) - - References : - - Y. Bengio, P. Lamblin, D. Popovici, H. Larochelle: Greedy Layer-Wise - Training of Deep Networks, Advances in Neural Information Processing - Systems 19, 2007 - - - - DeepLearningTutorials - https://github.com/lisa-lab/DeepLearningTutorials - -''' - import sys import numpy from HiddenLayer import HiddenLayer @@ -26,7 +11,7 @@ class DBN(object): def __init__(self, input=None, label=None,\ n_ins=2, hidden_layer_sizes=[3, 3], n_outs=2,\ - numpy_rng=None): + rng=None): self.x = input self.y = label @@ -35,8 +20,8 @@ def __init__(self, input=None, label=None,\ self.rbm_layers = [] self.n_layers = len(hidden_layer_sizes) # = len(self.rbm_layers) - if numpy_rng is None: - numpy_rng = numpy.random.RandomState(1234) + if rng is None: + rng = numpy.random.RandomState(1234) assert self.n_layers > 0 @@ -60,7 +45,7 @@ def __init__(self, input=None, label=None,\ sigmoid_layer = HiddenLayer(input=layer_input, n_in=input_size, n_out=hidden_layer_sizes[i], - numpy_rng=numpy_rng, + rng=rng, activation=sigmoid) self.sigmoid_layers.append(sigmoid_layer) @@ -100,21 +85,6 @@ def pretrain(self, lr=0.1, k=1, epochs=100): # print >> sys.stderr, \ # 'Pre-training layer %d, epoch %d, cost ' %(i, epoch), cost - # def pretrain(self, lr=0.1, k=1, epochs=100): - # # pre-train layer-wise - # for i in xrange(self.n_layers): - # rbm = self.rbm_layers[i] - - # for epoch in xrange(epochs): - # layer_input = self.x - # for j in xrange(i): - # layer_input = self.sigmoid_layers[j].sample_h_given_v(layer_input) - - # rbm.contrastive_divergence(lr=lr, k=k, input=layer_input) - # # cost = rbm.get_reconstruction_cross_entropy() - # # print >> sys.stderr, \ - # # 'Pre-training layer %d, epoch %d, cost ' %(i, epoch), cost - def finetune(self, lr=0.1, epochs=100): layer_input = self.sigmoid_layers[-1].sample_h_given_v() @@ -158,12 +128,11 @@ def test_dbn(pretrain_lr=0.1, pretraining_epochs=1000, k=1, \ [0, 1], [0, 1], [0, 1]]) - rng = numpy.random.RandomState(123) # construct DBN - dbn = DBN(input=x, label=y, n_ins=6, hidden_layer_sizes=[3, 3], n_outs=2, numpy_rng=rng) + dbn = DBN(input=x, label=y, n_ins=6, hidden_layer_sizes=[3, 3], n_outs=2, rng=rng) # pre-training (TrainUnsupervisedDBN) dbn.pretrain(lr=pretrain_lr, k=1, epochs=pretraining_epochs) diff --git a/python/Dropout.py b/python/Dropout.py new file mode 100755 index 0000000..ba99116 --- /dev/null +++ b/python/Dropout.py @@ -0,0 +1,142 @@ +# -*- coding: utf-8 -*- + +import sys +import numpy +from HiddenLayer import HiddenLayer +from LogisticRegression import LogisticRegression +from utils import * + + +class Dropout(object): + def __init__(self, input, label,\ + n_in, hidden_layer_sizes, n_out,\ + rng=None, activation=ReLU): + + self.x = input + self.y = label + + self.hidden_layers = [] + self.n_layers = len(hidden_layer_sizes) + + if rng is None: + rng = numpy.random.RandomState(1234) + + assert self.n_layers > 0 + + + # construct multi-layer + for i in xrange(self.n_layers): + + # layer_size + if i == 0: + input_size = n_in + else: + input_size = hidden_layer_sizes[i-1] + + # layer_input + if i == 0: + layer_input = self.x + + else: + layer_input = self.hidden_layers[-1].output() + + # construct hidden_layer + hidden_layer = HiddenLayer(input=layer_input, + n_in=input_size, + n_out=hidden_layer_sizes[i], + rng=rng, + activation=activation) + + self.hidden_layers.append(hidden_layer) + + + # layer for ouput using Logistic Regression (softmax) + self.log_layer = LogisticRegression(input=self.hidden_layers[-1].output(), + label=self.y, + n_in=hidden_layer_sizes[-1], + n_out=n_out) + + + def train(self, epochs=5000, dropout=True, p_dropout=0.5, rng=None): + + for epoch in xrange(epochs): + dropout_masks = [] # create different masks in each training epoch + + # forward hidden_layers + for i in xrange(self.n_layers): + if i == 0: + layer_input = self.x + + layer_input = self.hidden_layers[i].forward(input=layer_input) + + if dropout == True: + mask = self.hidden_layers[i].dropout(input=layer_input, p=p_dropout, rng=rng) + layer_input *= mask + + dropout_masks.append(mask) + + + # forward & backward log_layer + self.log_layer.train(input=layer_input) + + + # backward hidden_layers + for i in reversed(xrange(0, self.n_layers)): + if i == self.n_layers-1: + prev_layer = self.log_layer + else: + prev_layer = self.hidden_layers[i+1] + + if dropout == True: + self.hidden_layers[i].backward(prev_layer=prev_layer, dropout=True, mask=dropout_masks[i]) + else: + self.hidden_layers[i].backward(prev_layer=prev_layer) + + + + def predict(self, x, dropout=True, p_dropout=0.5): + layer_input = x + + for i in xrange(self.n_layers): + if dropout == True: + self.hidden_layers[i].W = (1 - p_dropout) * self.hidden_layers[i].W + + layer_input = self.hidden_layers[i].output(input=layer_input) + + return self.log_layer.predict(layer_input) + + + +def test_dropout(n_epochs=5000, dropout=True, p_dropout=0.5): + + x = numpy.array([[0, 0], + [0, 1], + [1, 0], + [1, 1]]) + + y = numpy.array([[0, 1], + [1, 0], + [1, 0], + [0, 1]]) + + rng = numpy.random.RandomState(123) + + + # construct Dropout MLP + classifier = Dropout(input=x, label=y, \ + n_in=2, hidden_layer_sizes=[10, 10], n_out=2, \ + rng=rng, activation=ReLU) + + + # train XOR + classifier.train(epochs=n_epochs, dropout=dropout, \ + p_dropout=p_dropout, rng=rng) + + + # test + print classifier.predict(x) + + + +if __name__ == "__main__": + test_dropout() diff --git a/python/HiddenLayer.py b/python/HiddenLayer.py index 72e51e1..a97bc61 100755 --- a/python/HiddenLayer.py +++ b/python/HiddenLayer.py @@ -1,15 +1,5 @@ -#!/usr/bin/env python # -*- coding: utf-8 -*- -''' - Hidden Layer - - References : - - DeepLearningTutorials - https://github.com/lisa-lab/DeepLearningTutorials - -''' - import sys import numpy from utils import * @@ -17,49 +7,89 @@ class HiddenLayer(object): def __init__(self, input, n_in, n_out,\ - W=None, b=None, numpy_rng=None, activation=numpy.tanh): + W=None, b=None, rng=None, activation=tanh): - if numpy_rng is None: - numpy_rng = numpy.random.RandomState(1234) + if rng is None: + rng = numpy.random.RandomState(1234) if W is None: a = 1. / n_in - initial_W = numpy.array(numpy_rng.uniform( # initialize W uniformly + W = numpy.array(rng.uniform( # initialize W uniformly low=-a, high=a, size=(n_in, n_out))) - W = initial_W - if b is None: b = numpy.zeros(n_out) # initialize bias 0 + self.rng = rng + self.x = input - self.numpy_rng = numpy_rng - self.input = input self.W = W self.b = b + if activation == tanh: + self.dactivation = dtanh + + elif activation == sigmoid: + self.dactivation = dsigmoid + + elif activation == ReLU: + self.dactivation = dReLU + + else: + raise ValueError('activation function not supported.') + + self.activation = activation + - # self.params = [self.W, self.b] def output(self, input=None): if input is not None: - self.input = input + self.x = input - linear_output = numpy.dot(self.input, self.W) + self.b + linear_output = numpy.dot(self.x, self.W) + self.b + return self.activation(linear_output) + - return (linear_output if self.activation is None - else self.activation(linear_output)) + def forward(self, input=None): + return self.output(input=input) + def backward(self, prev_layer, lr=0.1, input=None, dropout=False, mask=None): + if input is not None: + self.x = input + + d_y = self.dactivation(prev_layer.x) * numpy.dot(prev_layer.d_y, prev_layer.W.T) + + if dropout == True: + d_y *= mask + + self.W += lr * numpy.dot(self.x.T, d_y) + self.b += lr * numpy.mean(d_y, axis=0) + self.d_y = d_y + + + def dropout(self, input, p, rng=None): + if rng is None: + rng = numpy.random.RandomState(123) + + mask = rng.binomial(size=input.shape, + n=1, + p=1-p) # p is the prob of dropping + + return mask + + def sample_h_given_v(self, input=None): if input is not None: - self.input = input + self.x = input v_mean = self.output() - h_sample = self.numpy_rng.binomial(size=v_mean.shape, + h_sample = self.rng.binomial(size=v_mean.shape, n=1, p=v_mean) return h_sample + + diff --git a/python/LogisticRegression.py b/python/LogisticRegression.py index a828a40..708a1b3 100755 --- a/python/LogisticRegression.py +++ b/python/LogisticRegression.py @@ -1,18 +1,5 @@ -#!/usr/bin/env python # -*- coding: utf-8 -*- -''' - Logistic Regression - - References : - - Jason Rennie: Logistic Regression, - http://qwone.com/~jason/writing/lr.pdf - - - DeepLearningTutorials - https://github.com/lisa-lab/DeepLearningTutorials - -''' - import sys import numpy from utils import * @@ -22,24 +9,46 @@ class LogisticRegression(object): def __init__(self, input, label, n_in, n_out): self.x = input self.y = label + self.W = numpy.zeros((n_in, n_out)) # initialize W 0 - self.b = numpy.zeros(n_out) # initialize bias 0 + self.b = numpy.zeros(n_out) # initialize bias 0 - # self.params = [self.W, self.b] - def train(self, lr=0.1, input=None, L2_reg=0.00): + def train(self, lr=0.1, input=None, L2_reg=0.00): if input is not None: self.x = input - # p_y_given_x = sigmoid(numpy.dot(self.x, self.W) + self.b) - p_y_given_x = softmax(numpy.dot(self.x, self.W) + self.b) + p_y_given_x = self.output(self.x) d_y = self.y - p_y_given_x - + self.W += lr * numpy.dot(self.x.T, d_y) - lr * L2_reg * self.W self.b += lr * numpy.mean(d_y, axis=0) + self.d_y = d_y - # cost = self.negative_log_likelihood() - # return cost + + # def train(self, lr=0.1, input=None, L2_reg=0.00): + # self.forward(input) + # self.backward(lr, L2_reg) + + # def forward(self, input=None): + # if input is not None: + # self.x = input + + # p_y_given_x = self.output(self.x) + # self.d_y = self.y - p_y_given_x + + # def backward(self, lr=0.1, L2_reg=0.00): + # self.W += lr * numpy.dot(self.x.T, self.d_y) - lr * L2_reg * self.W + # self.b += lr * numpy.mean(self.d_y, axis=0) + + + def output(self, x): + # return sigmoid(numpy.dot(x, self.W) + self.b) + return softmax(numpy.dot(x, self.W) + self.b) + + def predict(self, x): + return self.output(x) + def negative_log_likelihood(self): # sigmoid_activation = sigmoid(numpy.dot(self.x, self.W) + self.b) @@ -53,44 +62,41 @@ def negative_log_likelihood(self): return cross_entropy - def predict(self, x): - # return sigmoid(numpy.dot(x, self.W) + self.b) - return softmax(numpy.dot(x, self.W) + self.b) +def test_lr(learning_rate=0.1, n_epochs=500): + rng = numpy.random.RandomState(123) -def test_lr(learning_rate=0.01, n_epochs=200): # training data - x = numpy.array([[1,1,1,0,0,0], - [1,0,1,0,0,0], - [1,1,1,0,0,0], - [0,0,1,1,1,0], - [0,0,1,1,0,0], - [0,0,1,1,1,0]]) - y = numpy.array([[1, 0], - [1, 0], - [1, 0], - [0, 1], - [0, 1], - [0, 1]]) + d = 2 + N = 10 + x1 = rng.randn(N, d) + numpy.array([0, 0]) + x2 = rng.randn(N, d) + numpy.array([20, 10]) + y1 = [[1, 0] for i in xrange(N)] + y2 = [[0, 1] for i in xrange(N)] + + x = numpy.r_[x1.astype(int), x2.astype(int)] + y = numpy.r_[y1, y2] # construct LogisticRegression - classifier = LogisticRegression(input=x, label=y, n_in=6, n_out=2) + classifier = LogisticRegression(input=x, label=y, n_in=d, n_out=2) # train for epoch in xrange(n_epochs): classifier.train(lr=learning_rate) # cost = classifier.negative_log_likelihood() # print >> sys.stderr, 'Training epoch %d, cost is ' % epoch, cost - learning_rate *= 0.95 + learning_rate *= 0.995 # test - x = numpy.array([[1, 1, 0, 0, 0, 0], - [0, 0, 0, 1, 1, 0], - [1, 1, 1, 1, 1, 0]]) + result = classifier.predict(x) + for i in xrange(N): + print result[i] + print + for i in xrange(N): + print result[N+i] - print >> sys.stderr, classifier.predict(x) if __name__ == "__main__": diff --git a/python/MLP.py b/python/MLP.py new file mode 100755 index 0000000..e9ded0b --- /dev/null +++ b/python/MLP.py @@ -0,0 +1,81 @@ +# -*- coding: utf-8 -*- + +import sys +import numpy +from HiddenLayer import HiddenLayer +from LogisticRegression import LogisticRegression +from utils import * + + +class MLP(object): + def __init__(self, input, label, n_in, n_hidden, n_out, rng=None): + + self.x = input + self.y = label + + if rng is None: + rng = numpy.random.RandomState(1234) + + # construct hidden_layer + self.hidden_layer = HiddenLayer(input=self.x, + n_in=n_in, + n_out=n_hidden, + rng=rng, + activation=tanh) + + # construct log_layer + self.log_layer = LogisticRegression(input=self.hidden_layer.output, + label=self.y, + n_in=n_hidden, + n_out=n_out) + + def train(self): + # forward hidden_layer + layer_input = self.hidden_layer.forward() + + # forward & backward log_layer + # self.log_layer.forward(input=layer_input) + self.log_layer.train(input=layer_input) + + # backward hidden_layer + self.hidden_layer.backward(prev_layer=self.log_layer) + + # backward log_layer + # self.log_layer.backward() + + + def predict(self, x): + x = self.hidden_layer.output(input=x) + return self.log_layer.predict(x) + + +def test_mlp(n_epochs=5000): + + x = numpy.array([[0, 0], + [0, 1], + [1, 0], + [1, 1]]) + + y = numpy.array([[0, 1], + [1, 0], + [1, 0], + [0, 1]]) + + + rng = numpy.random.RandomState(123) + + + # construct MLP + classifier = MLP(input=x, label=y, n_in=2, n_hidden=3, n_out=2, rng=rng) + + # train + for epoch in xrange(n_epochs): + classifier.train() + + + # test + print classifier.predict(x) + + +if __name__ == "__main__": + test_mlp() diff --git a/python/RBM.py b/python/RBM.py index 781241d..7a127d8 100755 --- a/python/RBM.py +++ b/python/RBM.py @@ -1,38 +1,23 @@ -#!/usr/bin/env python # -*- coding: utf-8 -*- -""" - Restricted Boltzmann Machine (RBM) - - References : - - Y. Bengio, P. Lamblin, D. Popovici, H. Larochelle: Greedy Layer-Wise - Training of Deep Networks, Advances in Neural Information Processing - Systems 19, 2007 - - - - DeepLearningTutorials - https://github.com/lisa-lab/DeepLearningTutorials - -""" - import sys import numpy from utils import * class RBM(object): def __init__(self, input=None, n_visible=2, n_hidden=3, \ - W=None, hbias=None, vbias=None, numpy_rng=None): + W=None, hbias=None, vbias=None, rng=None): self.n_visible = n_visible # num of units in visible (input) layer self.n_hidden = n_hidden # num of units in hidden layer - if numpy_rng is None: - numpy_rng = numpy.random.RandomState(1234) + if rng is None: + rng = numpy.random.RandomState(1234) if W is None: a = 1. / n_visible - initial_W = numpy.array(numpy_rng.uniform( # initialize W uniformly + initial_W = numpy.array(rng.uniform( # initialize W uniformly low=-a, high=a, size=(n_visible, n_hidden))) @@ -46,14 +31,12 @@ def __init__(self, input=None, n_visible=2, n_hidden=3, \ vbias = numpy.zeros(n_visible) # initialize v bias 0 - self.numpy_rng = numpy_rng + self.rng = rng self.input = input self.W = W self.hbias = hbias self.vbias = vbias - # self.params = [self.W, self.hbias, self.vbias] - def contrastive_divergence(self, lr=0.1, k=1, input=None): if input is not None: @@ -75,10 +58,10 @@ def contrastive_divergence(self, lr=0.1, k=1, input=None): # chain_end = nv_samples - self.W += lr * (numpy.dot(self.input.T, ph_sample) + self.W += lr * (numpy.dot(self.input.T, ph_mean) - numpy.dot(nv_samples.T, nh_means)) self.vbias += lr * numpy.mean(self.input - nv_samples, axis=0) - self.hbias += lr * numpy.mean(ph_sample - nh_means, axis=0) + self.hbias += lr * numpy.mean(ph_mean - nh_means, axis=0) # cost = self.get_reconstruction_cross_entropy() # return cost @@ -86,7 +69,7 @@ def contrastive_divergence(self, lr=0.1, k=1, input=None): def sample_h_given_v(self, v0_sample): h1_mean = self.propup(v0_sample) - h1_sample = self.numpy_rng.binomial(size=h1_mean.shape, # discrete: binomial + h1_sample = self.rng.binomial(size=h1_mean.shape, # discrete: binomial n=1, p=h1_mean) @@ -95,7 +78,7 @@ def sample_h_given_v(self, v0_sample): def sample_v_given_h(self, h0_sample): v1_mean = self.propdown(h0_sample) - v1_sample = self.numpy_rng.binomial(size=v1_mean.shape, # discrete: binomial + v1_sample = self.rng.binomial(size=v1_mean.shape, # discrete: binomial n=1, p=v1_mean) @@ -153,7 +136,7 @@ def test_rbm(learning_rate=0.1, k=1, training_epochs=1000): rng = numpy.random.RandomState(123) # construct RBM - rbm = RBM(input=data, n_visible=6, n_hidden=2, numpy_rng=rng) + rbm = RBM(input=data, n_visible=6, n_hidden=2, rng=rng) # train for epoch in xrange(training_epochs): diff --git a/python/SdA.py b/python/SdA.py index 4a0f45a..5f8de37 100755 --- a/python/SdA.py +++ b/python/SdA.py @@ -1,20 +1,5 @@ -#!/usr/bin/env python # -*- coding: utf-8 -*- - -""" - Stacked Denoising Autoencoders (SdA) - - References : - - P. Vincent, H. Larochelle, Y. Bengio, P.A. Manzagol: Extracting and - Composing Robust Features with Denoising Autoencoders, ICML' 08, 1096-1103, - 2008 - - - DeepLearningTutorials - https://github.com/lisa-lab/DeepLearningTutorials - -""" - import sys import numpy from HiddenLayer import HiddenLayer @@ -26,7 +11,7 @@ class SdA(object): def __init__(self, input=None, label=None,\ n_ins=2, hidden_layer_sizes=[3, 3], n_outs=2,\ - numpy_rng=None): + rng=None): self.x = input self.y = label @@ -35,8 +20,8 @@ def __init__(self, input=None, label=None,\ self.dA_layers = [] self.n_layers = len(hidden_layer_sizes) # = len(self.rbm_layers) - if numpy_rng is None: - numpy_rng = numpy.random.RandomState(1234) + if rng is None: + rng = numpy.random.RandomState(1234) assert self.n_layers > 0 @@ -60,7 +45,7 @@ def __init__(self, input=None, label=None,\ sigmoid_layer = HiddenLayer(input=layer_input, n_in=input_size, n_out=hidden_layer_sizes[i], - numpy_rng=numpy_rng, + rng=rng, activation=sigmoid) self.sigmoid_layers.append(sigmoid_layer) @@ -118,8 +103,8 @@ def predict(self, x): sigmoid_layer = self.sigmoid_layers[i] layer_input = sigmoid_layer.output(input=layer_input) - out = self.log_layer.predict(layer_input) - return out + return self.log_layer.predict(layer_input) + @@ -152,7 +137,7 @@ def test_SdA(pretrain_lr=0.1, pretraining_epochs=1000, corruption_level=0.3, \ # construct SdA sda = SdA(input=x, label=y, \ - n_ins=20, hidden_layer_sizes=[15, 15], n_outs=2, numpy_rng=rng) + n_ins=20, hidden_layer_sizes=[15, 15], n_outs=2, rng=rng) # pre-training sda.pretrain(lr=pretrain_lr, corruption_level=corruption_level, epochs=pretraining_epochs) diff --git a/python/dA.py b/python/dA.py index 0b911eb..edbf6c7 100755 --- a/python/dA.py +++ b/python/dA.py @@ -1,23 +1,5 @@ -#!/usr/bin/env python # -*- coding: utf-8 -*- -""" - Denoising Autoencoders (dA) - - References : - - P. Vincent, H. Larochelle, Y. Bengio, P.A. Manzagol: Extracting and - Composing Robust Features with Denoising Autoencoders, ICML'08, 1096-1103, - 2008 - - - DeepLearningTutorials - https://github.com/lisa-lab/DeepLearningTutorials - - - Yusuke Sugomori: Stochastic Gradient Descent for Denoising Autoencoders, - http://yusugomori.com/docs/SGD_DA.pdf - -""" - - import sys import numpy from utils import * @@ -25,44 +7,39 @@ class dA(object): def __init__(self, input=None, n_visible=2, n_hidden=3, \ - W=None, hbias=None, vbias=None, numpy_rng=None): + W=None, hbias=None, vbias=None, rng=None): self.n_visible = n_visible # num of units in visible (input) layer self.n_hidden = n_hidden # num of units in hidden layer - if numpy_rng is None: - numpy_rng = numpy.random.RandomState(1234) + if rng is None: + rng = numpy.random.RandomState(1234) if W is None: a = 1. / n_visible - initial_W = numpy.array(numpy_rng.uniform( # initialize W uniformly + W = numpy.array(rng.uniform( # initialize W uniformly low=-a, high=a, size=(n_visible, n_hidden))) - W = initial_W - if hbias is None: hbias = numpy.zeros(n_hidden) # initialize h bias 0 if vbias is None: vbias = numpy.zeros(n_visible) # initialize v bias 0 - self.numpy_rng = numpy_rng + self.rng = rng self.x = input self.W = W self.W_prime = self.W.T self.hbias = hbias self.vbias = vbias - # self.params = [self.W, self.hbias, self.vbias] - - def get_corrupted_input(self, input, corruption_level): assert corruption_level < 1 - return self.numpy_rng.binomial(size=input.shape, + return self.rng.binomial(size=input.shape, n=1, p=1-corruption_level) * input @@ -133,7 +110,7 @@ def test_dA(learning_rate=0.1, corruption_level=0.3, training_epochs=50): rng = numpy.random.RandomState(123) # construct dA - da = dA(input=data, n_visible=20, n_hidden=5, numpy_rng=rng) + da = dA(input=data, n_visible=20, n_hidden=5, rng=rng) # train for epoch in xrange(training_epochs): diff --git a/python/utils.py b/python/utils.py index 5c4a748..7aca40d 100755 --- a/python/utils.py +++ b/python/utils.py @@ -1,7 +1,5 @@ -''' ''' -import numpy - +import numpy numpy.seterr(all='ignore') @@ -9,6 +7,15 @@ def sigmoid(x): return 1. / (1 + numpy.exp(-x)) +def dsigmoid(x): + return x * (1. - x) + +def tanh(x): + return numpy.tanh(x) + +def dtanh(x): + return 1. - x * x + def softmax(x): e = numpy.exp(x - numpy.max(x)) # prevent overflow if e.ndim == 1: @@ -17,6 +24,13 @@ def softmax(x): return e / numpy.array([numpy.sum(e, axis=1)]).T # ndim = 2 +def ReLU(x): + return x * (x > 0) + +def dReLU(x): + return 1. * (x > 0) + + # # probability density for the Gaussian dist # def gaussian(x, mean=0.0, scale=1.0): # s = 2 * numpy.power(scale, 2) diff --git a/scala/DBN.scala b/scala/DBN.scala new file mode 100644 index 0000000..1b8df68 --- /dev/null +++ b/scala/DBN.scala @@ -0,0 +1,231 @@ +import scala.util.Random +import scala.math + +class DBN(val N: Int, val n_ins: Int, hidden_layer_sizes: Array[Int], val n_outs: Int, val n_layers: Int, var rng: Random=null) { + + def sigmoid(x: Double): Double = { + return 1.0 / (1.0 + math.pow(math.E, -x)) + } + + + var input_size: Int = 0 + + val sigmoid_layers: Array[HiddenLayer] = new Array[HiddenLayer](n_layers) + val rbm_layers: Array[RBM] = new Array[RBM](n_layers) + + if(rng == null) rng = new Random(1234) + + var i: Int = 0 + // construct multi-layer + for(i <- 0 until n_layers) { + if(i == 0) { + input_size = n_ins + } else { + input_size = hidden_layer_sizes(i-1) + } + + // construct sigmoid_layer + sigmoid_layers(i) = new HiddenLayer(N, input_size, hidden_layer_sizes(i), null, null, rng) + + // construct rbm_layer + rbm_layers(i) = new RBM(N, input_size, hidden_layer_sizes(i), sigmoid_layers(i).W, sigmoid_layers(i).b, null, rng) + + } + + // layer for output using LogisticRegression + val log_layer: LogisticRegression = new LogisticRegression(N, hidden_layer_sizes(n_layers-1), n_outs) + + + def pretrain(train_X: Array[Array[Int]], lr: Double, k: Int, epochs: Int) { + var layer_input: Array[Int] = new Array[Int](0) + var prev_layer_input_size: Int = 0 + var prev_layer_input: Array[Int] = new Array[Int](0) + + var i: Int = 0 + var j: Int = 0 + var epoch: Int = 0 + var n: Int = 0 + var l: Int = 0 + + for(i <- 0 until n_layers) { // layer-wise + for(epoch <- 0 until epochs) { // training epochs + for(n <- 0 until N) { // input x1...xN + // layer input + for(l <- 0 to i) { + if(l == 0) { + layer_input = new Array[Int](n_ins) + for(j <- 0 until n_ins) layer_input(j) = train_X(n)(j) + + } else { + if(l == 1) prev_layer_input_size = n_ins + else prev_layer_input_size = hidden_layer_sizes(l-2) + + prev_layer_input = new Array[Int](prev_layer_input_size) + for(j <- 0 until prev_layer_input_size) prev_layer_input(j) = layer_input(j) + + layer_input = new Array[Int](hidden_layer_sizes(l-1)) + sigmoid_layers(l-1).sample_h_given_v(prev_layer_input, layer_input) + } + } + + rbm_layers(i).contrastive_divergence(layer_input, lr, k) + } + } + } + } + + + def finetune(train_X: Array[Array[Int]], train_Y: Array[Array[Int]], lr: Double, epochs: Int) { + var layer_input: Array[Int] = new Array[Int](0) + var prev_layer_input: Array[Int] = new Array[Int](0) + + var epoch: Int = 0 + var n: Int = 0 + var i: Int = 0 + var j: Int = 0 + + for(epoch <- 0 until epochs) { + for(n <- 0 until N) { + + // layer input + for(i <- 0 until n_layers) { + if(i == 0) { + prev_layer_input = new Array[Int](n_ins) + for(j <- 0 until n_ins) prev_layer_input(j) = train_X(n)(j) + } else { + prev_layer_input = new Array[Int](hidden_layer_sizes(i-1)) + for(j <- 0 until hidden_layer_sizes(i-1)) prev_layer_input(j) = layer_input(j) + } + + layer_input = new Array[Int](hidden_layer_sizes(i)) + sigmoid_layers(i).sample_h_given_v(prev_layer_input, layer_input) + } + + log_layer.train(layer_input, train_Y(n), lr) + } + // lr *= 0.95 + } + } + + def predict(x: Array[Int], y: Array[Double]) { + var layer_input: Array[Double] = new Array[Double](0) + var prev_layer_input: Array[Double] = new Array[Double](n_ins) + + var i: Int = 0 + var j: Int = 0 + var k: Int = 0 + + for(j <- 0 until n_ins) prev_layer_input(j) = x(j) + + var linear_outoput: Double = 0 + + // layer activation + for(i <- 0 until n_layers) { + layer_input = new Array[Double](sigmoid_layers(i).n_out) + + for(k <- 0 until sigmoid_layers(i).n_out) { + linear_outoput = 0.0 + + for(j <- 0 until sigmoid_layers(i).n_in) { + linear_outoput += sigmoid_layers(i).W(k)(j) * prev_layer_input(j) + } + linear_outoput += sigmoid_layers(i).b(k) + layer_input(k) = sigmoid(linear_outoput) + } + + if(i < n_layers-1) { + prev_layer_input = new Array[Double](sigmoid_layers(i).n_out) + for(j <- 0 until sigmoid_layers(i).n_out) prev_layer_input(j) = layer_input(j) + } + } + + for(i <- 0 until log_layer.n_out) { + y(i) = 0 + for(j <- 0 until log_layer.n_in) { + y(i) += log_layer.W(i)(j) * layer_input(j) + } + y(i) += log_layer.b(i) + } + + log_layer.softmax(y) + } + +} + + +object DBN { + def test_dbn() { + val rng: Random = new Random(123) + + val pretrain_lr: Double = 0.1 + val pretraining_epochs: Int = 1000 + val k: Int = 1 + val finetune_lr: Double = 0.1 + val finetune_epochs: Int = 500 + + val train_N: Int = 6 + val test_N: Int = 4 + val n_ins: Int = 6 + val n_outs: Int = 2 + val hidden_layer_sizes: Array[Int] = Array(3, 3) + val n_layers = hidden_layer_sizes.length + + + // training data + val train_X: Array[Array[Int]] = Array( + Array(1, 1, 1, 0, 0, 0), + Array(1, 0, 1, 0, 0, 0), + Array(1, 1, 1, 0, 0, 0), + Array(0, 0, 1, 1, 1, 0), + Array(0, 0, 1, 1, 0, 0), + Array(0, 0, 1, 1, 1, 0) + ) + + val train_Y: Array[Array[Int]] = Array( + Array(1, 0), + Array(1, 0), + Array(1, 0), + Array(0, 1), + Array(0, 1), + Array(0, 1) + ) + + // construct DBN + val dbn: DBN = new DBN(train_N, n_ins, hidden_layer_sizes, n_outs, n_layers, rng) + + // pretrain + dbn.pretrain(train_X, pretrain_lr, k, pretraining_epochs); + + // finetune + dbn.finetune(train_X, train_Y, finetune_lr, finetune_epochs); + + + // test data + val test_X: Array[Array[Int]] = Array( + Array(1, 1, 0, 0, 0, 0), + Array(1, 1, 1, 1, 0, 0), + Array(0, 0, 0, 1, 1, 0), + Array(0, 0, 1, 1, 1, 0) + ) + + val test_Y: Array[Array[Double]] = Array.ofDim[Double](test_N, n_outs) + + var i: Int = 0 + var j: Int = 0 + + // test + for(i <- 0 until test_N) { + dbn.predict(test_X(i), test_Y(i)) + for(j <- 0 until n_outs) { + print(test_Y(i)(j) + " ") + } + println() + } + + } + + + def main(args: Array[String]) { + test_dbn() + } +} diff --git a/scala/HiddenLayer.scala b/scala/HiddenLayer.scala new file mode 100644 index 0000000..118bc5e --- /dev/null +++ b/scala/HiddenLayer.scala @@ -0,0 +1,73 @@ +import scala.util.Random +import scala.math + +class HiddenLayer(val N: Int, val n_in: Int, val n_out: Int, _W: Array[Array[Double]], _b: Array[Double], var rng: Random=null) { + + + def uniform(min: Double, max: Double): Double = { + return rng.nextDouble() * (max - min) + min + } + + def binomial(n: Int, p: Double): Int = { + if(p < 0 || p > 1) return 0 + + var c: Int = 0 + var r: Double = 0.0 + + var i: Int = 0 + + for(i <- 0 until n) { + r = rng.nextDouble() + if(r < p) c += 1 + } + + return c + } + + def sigmoid(x: Double): Double = { + return 1.0 / (1.0 + math.pow(math.E, -x)) + } + + + if(rng == null) rng = new Random(1234) + + var a: Double = 0.0 + var W: Array[Array[Double]] = Array.ofDim[Double](n_out, n_in) + var b: Array[Double] = new Array[Double](n_out) + + var i: Int = 0 + if(_W == null) { + a = 1.0 / n_in + + for(i <- 0 until n_out) { + for(j <- 0 until n_in) { + W(i)(j) = uniform(-a, a) + } + } + } else { + W = _W + } + + if(_b != null) b = _b + + + def output(input: Array[Int], w: Array[Double], b: Double): Double = { + var linear_output: Double = 0.0 + + var j: Int = 0 + for(j <- 0 until n_in) { + linear_output += w(j) * input(j) + } + linear_output += b + + return sigmoid(linear_output) + } + + def sample_h_given_v(input: Array[Int], sample: Array[Int]) { + var i: Int = 0 + + for(i <- 0 until n_out) { + sample(i) = binomial(1, output(input, W(i), b(i))) + } + } +} diff --git a/scala/LogisticRegression.scala b/scala/LogisticRegression.scala new file mode 100644 index 0000000..bf9b229 --- /dev/null +++ b/scala/LogisticRegression.scala @@ -0,0 +1,133 @@ +// $ scalac LogisticRegression.scala +// $ scala LogisticRegression + +import scala.math + +class LogisticRegression(val N: Int, val n_in: Int, val n_out: Int) { + + val W: Array[Array[Double]] = Array.ofDim[Double](n_out, n_in) + val b: Array[Double] = new Array[Double](n_out) + + def train(x: Array[Int], y: Array[Int], lr: Double) { + val p_y_given_x: Array[Double] = new Array[Double](n_out) + val dy: Array[Double] = new Array[Double](n_out) + + var i: Int = 0 + var j: Int = 0 + for(i <- 0 until n_out) { + p_y_given_x(i) = 0 + for(j <- 0 until n_in) { + p_y_given_x(i) += W(i)(j) * x(j) + } + p_y_given_x(i) += b(i) + } + softmax(p_y_given_x) + + for(i <- 0 until n_out) { + dy(i) = y(i) - p_y_given_x(i) + + for(j <- 0 until n_in) { + W(i)(j) += lr * dy(i) * x(j) / N + } + b(i) += lr * dy(i) / N + } + } + + + def softmax(x: Array[Double]) { + var max: Double = 0.0 + var sum: Double = 0.0 + + var i: Int = 0 + for(i <- 0 until n_out) if(max < x(i)) max = x(i) + + for(i <- 0 until n_out) { + x(i) = math.exp(x(i) - max) + sum += x(i) + } + + for(i <- 0 until n_out) x(i) /= sum + } + + + def predict(x: Array[Int], y: Array[Double]) { + var i: Int = 0 + var j: Int = 0 + for(i <- 0 until n_out) { + y(i) = 0 + for(j <- 0 until n_in) { + y(i) += W(i)(j) * x(j) + } + y(i) += b(i) + } + softmax(y) + } + +} + + +object LogisticRegression { + def test_lr() { + val learning_rate: Double = 0.1 + val n_epochs: Int = 500 + + val train_N: Int = 6 + val test_N: Int = 2 + val n_in: Int = 6 + val n_out: Int = 2 + + val train_X: Array[Array[Int]] = Array( + Array(1, 1, 1, 0, 0, 0), + Array(1, 0, 1, 0, 0, 0), + Array(1, 1, 1, 0, 0, 0), + Array(0, 0, 1, 1, 1, 0), + Array(0, 0, 1, 0, 1, 0), + Array(0, 0, 1, 1, 1, 0) + ) + + val train_Y: Array[Array[Int]] = Array( + Array(1, 0), + Array(1, 0), + Array(1, 0), + Array(0, 1), + Array(0, 1), + Array(0, 1) + ) + + // construct + val classifier = new LogisticRegression(train_N, n_in, n_out) + + // train + var epoch: Int = 0 + var i: Int = 0 + for(epoch <- 0 until n_epochs) { + for(i <- 0 until train_N) { + classifier.train(train_X(i), train_Y(i), learning_rate) + } + // learning_rate *= 0.95 + } + + // test data + val test_X: Array[Array[Int]] = Array( + Array(1, 0, 1, 0, 0, 0), + Array(0, 0, 1, 1, 1, 0) + ) + + val test_Y: Array[Array[Double]] = Array.ofDim[Double](test_N, n_out) + + // test + var j: Int = 0 + for(i <- 0 until test_N) { + classifier.predict(test_X(i), test_Y(i)) + for(j <- 0 until n_out) { + printf("%.5f ", test_Y(i)(j)) + } + println() + } + } + + def main(args: Array[String]) { + test_lr() + } + +} diff --git a/scala/RBM.scala b/scala/RBM.scala index 79ae124..ad760fb 100644 --- a/scala/RBM.scala +++ b/scala/RBM.scala @@ -86,7 +86,8 @@ class RBM(val N: Int, val n_visible: Int, val n_hidden: Int, var j: Int = 0 for(i <- 0 until n_hidden) { for(j <- 0 until n_visible) { - W(i)(j) += lr * (ph_sample(i) * input(j) - nh_means(i) * nv_samples(j)) / N + // W(i)(j) += lr * (ph_sample(i) * input(j) - nh_means(i) * nv_samples(j)) / N + W(i)(j) += lr * (ph_mean(i) * input(j) - nh_means(i) * nv_samples(j)) / N } hbias(i) += lr * (ph_sample(i) - nh_means(i)) / N } diff --git a/scala/SdA.scala b/scala/SdA.scala new file mode 100644 index 0000000..3f897e5 --- /dev/null +++ b/scala/SdA.scala @@ -0,0 +1,236 @@ +import scala.util.Random +import scala.math + +class SdA(val N: Int, val n_ins: Int, hidden_layer_sizes: Array[Int], val n_outs: Int, val n_layers:Int, var rng: Random=null) { + + def sigmoid(x: Double): Double = { + return 1.0 / (1.0 + math.pow(math.E, -x)) + } + + var input_size: Int = 0 + + // var hidden_layer_sizes: Array[Int] = new Array[Int](n_layers) + var sigmoid_layers: Array[HiddenLayer] = new Array[HiddenLayer](n_layers) + var dA_layers: Array[dA] = new Array[dA](n_layers) + + if(rng == null) rng = new Random(1234) + + + var i: Int = 0 + + // construct multi-layer + for(i <- 0 until n_layers) { + if(i == 0) { + input_size = n_ins + } else { + input_size = hidden_layer_sizes(i-1) + } + + // construct sigmoid_layer + sigmoid_layers(i) = new HiddenLayer(N, input_size, hidden_layer_sizes(i), null, null, rng) + + // construct dA_layer + dA_layers(i) = new dA(N, input_size, hidden_layer_sizes(i), sigmoid_layers(i).W, sigmoid_layers(i).b, null, rng) + } + + // layer for output using LogisticRegression + val log_layer = new LogisticRegression(N, hidden_layer_sizes(n_layers-1), n_outs) + + + def pretrain(train_X: Array[Array[Int]], lr: Double, corruption_level: Double, epochs: Int) { + var layer_input: Array[Int] = new Array[Int](0) + var prev_layer_input_size: Int = 0 + var prev_layer_input: Array[Int] = new Array[Int](0) + + var i: Int = 0 + var j: Int = 0 + var epoch: Int = 0 + var n: Int = 0 + var l: Int = 0 + + for(i <- 0 until n_layers) { // layer-wise + for(epoch <- 0 until epochs) { // training epochs + for(n <- 0 until N) { // input x1...xN + // layer input + for(l <- 0 to i) { + if(l == 0) { + layer_input = new Array[Int](n_ins) + for(j <- 0 until n_ins) layer_input(j) = train_X(n)(j) + } else { + if(l == 1) prev_layer_input_size = n_ins + else prev_layer_input_size = hidden_layer_sizes(l-2) + + prev_layer_input = new Array[Int](prev_layer_input_size) + for(j <- 0 until prev_layer_input_size) prev_layer_input(j) = layer_input(j) + + layer_input = new Array[Int](hidden_layer_sizes(l-1)) + + sigmoid_layers(l-1).sample_h_given_v(prev_layer_input, layer_input) + } + } + + dA_layers(i).train(layer_input, lr, corruption_level) + } + } + } + + } + + + def finetune(train_X: Array[Array[Int]], train_Y: Array[Array[Int]], lr: Double, epochs: Int) { + var layer_input: Array[Int] = new Array[Int](0) + var prev_layer_input: Array[Int] = new Array[Int](0) + + var epoch: Int = 0 + var n: Int = 0 + + + for(epoch <- 0 until epochs) { + for(n <- 0 until N) { + + // layer input + for(i <- 0 until n_layers) { + if(i == 0) { + prev_layer_input = new Array[Int](n_ins) + for(j <- 0 until n_ins) prev_layer_input(j) = train_X(n)(j) + } else { + prev_layer_input = new Array[Int](hidden_layer_sizes(i-1)) + for(j <- 0 until hidden_layer_sizes(i-1)) prev_layer_input(j) = layer_input(j) + } + + layer_input = new Array[Int](hidden_layer_sizes(i)) + sigmoid_layers(i).sample_h_given_v(prev_layer_input, layer_input) + } + + log_layer.train(layer_input, train_Y(n), lr) + } + // lr *= 0.95 + } + } + + def predict(x: Array[Int], y: Array[Double]) { + var layer_input: Array[Double] = new Array[Double](0) + var prev_layer_input: Array[Double] = new Array[Double](n_ins) + + var j: Int = 0 + for(j <- 0 until n_ins) prev_layer_input(j) = x(j) + + var linear_output: Double = 0.0 + + // layer activation + var i: Int = 0 + var k: Int = 0 + + for(i <- 0 until n_layers) { + layer_input = new Array[Double](sigmoid_layers(i).n_out) + + for(k <- 0 until sigmoid_layers(i).n_out) { + linear_output = 0.0 + + for(j <- 0 until sigmoid_layers(i).n_in) { + linear_output += sigmoid_layers(i).W(k)(j) * prev_layer_input(j) + } + linear_output += sigmoid_layers(i).b(k) + layer_input(k) = sigmoid(linear_output) + } + + if(i < n_layers-1) { + prev_layer_input = new Array[Double](sigmoid_layers(i).n_out) + for(j <- 0 until sigmoid_layers(i).n_out) prev_layer_input(j) = layer_input(j) + } + } + + for(i <- 0 until log_layer.n_out) { + y(i) = 0 + for(j <- 0 until log_layer.n_in) { + y(i) += log_layer.W(i)(j) * layer_input(j) + } + y(i) += log_layer.b(i) + } + + log_layer.softmax(y) + } + +} + + +object SdA { + def test_sda() { + val rng: Random = new Random(123) + + val pretrain_lr: Double = 0.1 + val corruption_level: Double = 0.3 + val pretraining_epochs: Int = 1000 + val finetune_lr: Double = 0.1 + val finetune_epochs: Int = 500 + + val train_N: Int = 10 + val test_N: Int = 4 + val n_ins: Int = 28 + val n_outs: Int = 2 + val hidden_layer_sizes: Array[Int] = Array(15, 15) + val n_layers: Int = hidden_layer_sizes.length + + // training data + val train_X: Array[Array[Int]] = Array( + Array(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), + Array(0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), + Array(1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), + Array(0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), + Array(1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), + Array(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), + Array(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1), + Array(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1), + Array(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1), + Array(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1) + ) + + val train_Y: Array[Array[Int]] = Array( + Array(1, 0), + Array(1, 0), + Array(1, 0), + Array(1, 0), + Array(1, 0), + Array(0, 1), + Array(0, 1), + Array(0, 1), + Array(0, 1), + Array(0, 1) + ) + + // construct SdA + val sda:SdA = new SdA(train_N, n_ins, hidden_layer_sizes, n_outs, n_layers, rng) + + // pretrain + sda.pretrain(train_X, pretrain_lr, corruption_level, pretraining_epochs) + + // finetune + sda.finetune(train_X, train_Y, finetune_lr, finetune_epochs) + + // test data + val test_X: Array[Array[Int]] = Array( + Array(1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), + Array(1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), + Array(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1), + Array(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1) + ) + + val test_Y: Array[Array[Double]] = Array.ofDim[Double](test_N, n_outs) + + // test + var i: Int = 0 + var j: Int = 0 + + for(i <- 0 until test_N) { + sda.predict(test_X(i), test_Y(i)) + for(j <- 0 until n_outs) { + print(test_Y(i)(j) + " ") + } + println() + } + } + + def main(args: Array[String]) { + test_sda() + } +}