diff --git a/.gitignore b/.gitignore index 190cc57..98c89ef 100644 --- a/.gitignore +++ b/.gitignore @@ -8,3 +8,10 @@ *.settings .project .metadata + +java/.idea/* +java/java.iml +java/out/* + +data/* +!data/.gitkeep \ No newline at end of file diff --git a/LICENSE.txt b/LICENSE.txt new file mode 100644 index 0000000..92d495f --- /dev/null +++ b/LICENSE.txt @@ -0,0 +1,22 @@ +Copyright (c) 2017 Yusuke Sugomori + +MIT License + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/README.md b/README.md index 735e5d0..37f8767 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -## Deep Learning (Python, C/C++, Java, Scala) +## Deep Learning (Python, C, C++, Java, Scala, Go) ### Classes : @@ -17,6 +17,12 @@ - LogisticRegression: Logistic Regression - HiddenLayer: Hidden Layer of Neural Networks + + - MLP: Multiple Layer Perceptron + + - Dropout: Dropout MLP + + - CNN: Convolutional Neural Networks (See [dev branch](https://github.com/yusugomori/DeepLearning/tree/dev).) @@ -36,6 +42,12 @@ - Yusuke Sugomori: Stochastic Gradient Descent for Denoising Autoencoders, http://yusugomori.com/docs/SGD_DA.pdf +### Publication : + - More detailed Java implementations are introduced in my book, Java Deep Learning Essentials. + + The book is available from [Packt Publishing](https://www.packtpub.com/big-data-and-business-intelligence/java-deep-learning-essentials) or [Amazon](http://www.amazon.com/Deep-Learning-Java-Yusuke-Sugomori/dp/1785282190/). + +##### Bug reports / contributions / donations are deeply welcome. +Bitcoin wallet address: 34kZarc2uBU6BMCouUp2iudvZtbmZMPqrA -### Bug reports are deeply welcome. \ No newline at end of file diff --git a/c/LogisticRegression.c b/c/LogisticRegression.c index 0c0c04a..b55c707 100644 --- a/c/LogisticRegression.c +++ b/c/LogisticRegression.c @@ -94,7 +94,7 @@ void test_lr(void) { int i, j, epoch; double learning_rate = 0.1; - double n_epochs = 500; + int n_epochs = 500; int train_N = 6; int test_N = 2; diff --git a/cpp/DBN.cpp b/cpp/DBN.cpp index f043e20..155e081 100644 --- a/cpp/DBN.cpp +++ b/cpp/DBN.cpp @@ -1,33 +1,12 @@ #include #include +#include "utils.h" #include "HiddenLayer.h" #include "RBM.h" #include "LogisticRegression.h" #include "DBN.h" using namespace std; - - -double uniform(double min, double max) { - return rand() / (RAND_MAX + 1.0) * (max - min) + min; -} - -int binomial(int n, double p) { - if(p < 0 || p > 1) return 0; - - int c = 0; - double r; - - for(int i=0; i #include #include "HiddenLayer.h" +#include "utils.h" using namespace std; - -double uniform(double min, double max) { - return rand() / (RAND_MAX + 1.0) * (max - min) + min; -} - -int binomial(int n, double p) { - if(p < 0 || p > 1) return 0; - - int c = 0; - double r; - - for(int i=0; i #include +#include "utils.h" #include "RBM.h" using namespace std; - -double uniform(double min, double max) { - return rand() / (RAND_MAX + 1.0) * (max - min) + min; -} - -int binomial(int n, double p) { - if(p < 0 || p > 1) return 0; - - int c = 0; - double r; - - for(int i=0; i #include +#include "utils.h" + #include "HiddenLayer.h" #include "dA.h" #include "LogisticRegression.h" #include "SdA.h" using namespace std; - -double uniform(double min, double max) { - return rand() / (RAND_MAX + 1.0) * (max - min) + min; -} - -int binomial(int n, double p) { - if(p < 0 || p > 1) return 0; - - int c = 0; - double r; - - for(int i=0; i #include +#include "utils.h" + #include "dA.h" using namespace std; - - -double uniform(double min, double max) { - return rand() / (RAND_MAX + 1.0) * (max - min) + min; -} - -int binomial(int n, double p) { - if(p < 0 || p > 1) return 0; - - int c = 0; - double r; - - for(int i=0; i -#include -using namespace std; - -double uniform(double min, double max) { - return rand() / (RAND_MAX + 1.0) * (max - min) + min; -} - -int binomial(int n, double p) { - if(p < 0 || p > 1) return 0; - - int c = 0; - double r; - - for(int i=0; i +#include +using namespace std; + + +namespace utils { + + double uniform(double min, double max) { + return rand() / (RAND_MAX + 1.0) * (max - min) + min; + } + + int binomial(int n, double p) { + if(p < 0 || p > 1) return 0; + + int c = 0; + double r; + + for(int i=0; i 1 { return 0 } + + c := 0 + var r float64 + + for i := 0; i < n; i++ { + r = rand.Float64() + if r < p { c++ } + } + + return c +} + +func Sigmoid(x float64) float64 { + return 1.0 / (1.0 + math.Exp(-x)) +} diff --git a/java/.gitkeep b/java/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/java/DBN/src/DBN.java b/java/DBN/src/DBN.java deleted file mode 100644 index 5b72e68..0000000 --- a/java/DBN/src/DBN.java +++ /dev/null @@ -1,223 +0,0 @@ -import java.util.Random; - -public class DBN { - public int N; - public int n_ins; - public int[] hidden_layer_sizes; - public int n_outs; - public int n_layers; - public HiddenLayer[] sigmoid_layers; - public RBM[] rbm_layers; - public LogisticRegression log_layer; - public Random rng; - - public static double sigmoid(double x) { - return 1.0 / (1.0 + Math.pow(Math.E, -x)); - } - - - public DBN(int N, int n_ins, int[] hidden_layer_sizes, int n_outs, int n_layers, Random rng) { - int input_size; - - this.N = N; - this.n_ins = n_ins; - this.hidden_layer_sizes = hidden_layer_sizes; - this.n_outs = n_outs; - this.n_layers = n_layers; - - this.sigmoid_layers = new HiddenLayer[n_layers]; - this.rbm_layers = new RBM[n_layers]; - - if(rng == null) this.rng = new Random(1234); - else this.rng = rng; - - // construct multi-layer - for(int i=0; i 1) return 0; - - int c = 0; - double r; - - for(int i=0; i 1) return 0; - - int c = 0; - double r; - - for(int i=0; i 1) return 0; - - int c = 0; - double r; - - for(int i=0; i 1) return 0; - - int c = 0; - double r; - - for(int i=0; i 1) return 0; - - int c = 0; - double r; - - for(int i=0; i 1) return 0; - - int c = 0; - double r; - - for(int i=0; i dropout_masks; + List layer_inputs; + double[] layer_input; + double[] layer_output = new double[0]; + + for(int epoch=0; epoch(n_layers); + layer_inputs = new ArrayList<>(n_layers+1); // +1 for logistic layer + + // forward hiddenLayers + for(int i=0; i=0; i--) { + + if(i == n_layers-1) { + prev_W = logisticLayer.W; + } else { + prev_dy = dy.clone(); + prev_W = hiddenLayers[i+1].W; + } + + if(dropout) { + for(int j=0; j activation; + public DoubleFunction dactivation; + + public HiddenLayer(int N, int n_in, int n_out, double[][] W, double[] b, Random rng, String activation) { + this.N = N; + this.n_in = n_in; + this.n_out = n_out; + + if (rng == null) this.rng = new Random(1234); + else this.rng = rng; + + if (W == null) { + this.W = new double[n_out][n_in]; + double a = 1.0 / this.n_in; + + for(int i=0; i sigmoid(x); + this.dactivation = (double x) -> dsigmoid(x); + + } else if (activation == "tanh") { + this.activation = (double x) -> tanh(x); + this.dactivation = (double x) -> dtanh(x); + } else if (activation == "ReLU") { + this.activation = (double x) -> ReLU(x); + this.dactivation = (double x) -> dReLU(x); + } else { + throw new IllegalArgumentException("activation function not supported"); + } + + } + + public double output(double[] input, double[] w, double b) { + double linear_output = 0.0; + for(int j=0; j 1) return 0; + + int c = 0; + double r; + + for(int i=0; i 0) { + return x; + } else { + return 0.; + } + } + + public static double dReLU(double x) { + if(x > 0) { + return 1.; + } else { + return 0.; + } + } +} diff --git a/python/CDBN.py b/python/CDBN.py index 4ac987a..dbf6648 100755 --- a/python/CDBN.py +++ b/python/CDBN.py @@ -1,16 +1,5 @@ -#!/usr/bin/env python # -*- coding: utf-8 -*- -''' - DBN w/ continuous-valued inputs (Linear Energy) - - References : - - Y. Bengio, P. Lamblin, D. Popovici, H. Larochelle: Greedy Layer-Wise - Training of Deep Networks, Advances in Neural Information Processing - Systems 19, 2007 - -''' - import sys import numpy from HiddenLayer import HiddenLayer @@ -20,13 +9,11 @@ from DBN import DBN from utils import * - - class CDBN(DBN): def __init__(self, input=None, label=None,\ n_ins=2, hidden_layer_sizes=[3, 3], n_outs=2,\ - numpy_rng=None): + rng=None): self.x = input self.y = label @@ -35,8 +22,8 @@ def __init__(self, input=None, label=None,\ self.rbm_layers = [] self.n_layers = len(hidden_layer_sizes) # = len(self.rbm_layers) - if numpy_rng is None: - numpy_rng = numpy.random.RandomState(1234) + if rng is None: + rng = numpy.random.RandomState(1234) assert self.n_layers > 0 @@ -60,7 +47,7 @@ def __init__(self, input=None, label=None,\ sigmoid_layer = HiddenLayer(input=layer_input, n_in=input_size, n_out=hidden_layer_sizes[i], - numpy_rng=numpy_rng, + rng=rng, activation=sigmoid) self.sigmoid_layers.append(sigmoid_layer) @@ -113,7 +100,7 @@ def test_cdbn(pretrain_lr=0.1, pretraining_epochs=1000, k=1, \ rng = numpy.random.RandomState(123) # construct DBN - dbn = CDBN(input=x, label=y, n_ins=6, hidden_layer_sizes=[5, 5], n_outs=2, numpy_rng=rng) + dbn = CDBN(input=x, label=y, n_ins=6, hidden_layer_sizes=[5, 5], n_outs=2, rng=rng) # pre-training (TrainUnsupervisedDBN) dbn.pretrain(lr=pretrain_lr, k=1, epochs=pretraining_epochs) diff --git a/python/CRBM.py b/python/CRBM.py index 0521883..e870047 100755 --- a/python/CRBM.py +++ b/python/CRBM.py @@ -1,16 +1,5 @@ -#!/usr/bin/env python # -*- coding: utf-8 -*- -""" - RBM w/ continuous-valued inputs (Linear Energy) - - References : - - Y. Bengio, P. Lamblin, D. Popovici, H. Larochelle: Greedy Layer-Wise - Training of Deep Networks, Advances in Neural Information Processing - Systems 19, 2007 - -""" - import sys import numpy from RBM import RBM @@ -30,7 +19,7 @@ def sample_v_given_h(self, h0_sample): ep = numpy.exp(a_h) v1_mean = 1 / (1 - en) - 1 / a_h - U = numpy.array(self.numpy_rng.uniform( + U = numpy.array(self.rng.uniform( low=0, high=1, size=v1_mean.shape)) @@ -53,7 +42,7 @@ def test_crbm(learning_rate=0.1, k=1, training_epochs=1000): rng = numpy.random.RandomState(123) # construct CRBM - rbm = CRBM(input=data, n_visible=6, n_hidden=5, numpy_rng=rng) + rbm = CRBM(input=data, n_visible=6, n_hidden=5, rng=rng) # train for epoch in xrange(training_epochs): diff --git a/python/DBN.py b/python/DBN.py index f639823..b1b351b 100755 --- a/python/DBN.py +++ b/python/DBN.py @@ -1,20 +1,5 @@ -#!/usr/bin/env python # -*- coding: utf-8 -*- -''' - Deep Belief Nets (DBN) - - References : - - Y. Bengio, P. Lamblin, D. Popovici, H. Larochelle: Greedy Layer-Wise - Training of Deep Networks, Advances in Neural Information Processing - Systems 19, 2007 - - - - DeepLearningTutorials - https://github.com/lisa-lab/DeepLearningTutorials - -''' - import sys import numpy from HiddenLayer import HiddenLayer @@ -26,7 +11,7 @@ class DBN(object): def __init__(self, input=None, label=None,\ n_ins=2, hidden_layer_sizes=[3, 3], n_outs=2,\ - numpy_rng=None): + rng=None): self.x = input self.y = label @@ -35,8 +20,8 @@ def __init__(self, input=None, label=None,\ self.rbm_layers = [] self.n_layers = len(hidden_layer_sizes) # = len(self.rbm_layers) - if numpy_rng is None: - numpy_rng = numpy.random.RandomState(1234) + if rng is None: + rng = numpy.random.RandomState(1234) assert self.n_layers > 0 @@ -60,7 +45,7 @@ def __init__(self, input=None, label=None,\ sigmoid_layer = HiddenLayer(input=layer_input, n_in=input_size, n_out=hidden_layer_sizes[i], - numpy_rng=numpy_rng, + rng=rng, activation=sigmoid) self.sigmoid_layers.append(sigmoid_layer) @@ -100,21 +85,6 @@ def pretrain(self, lr=0.1, k=1, epochs=100): # print >> sys.stderr, \ # 'Pre-training layer %d, epoch %d, cost ' %(i, epoch), cost - # def pretrain(self, lr=0.1, k=1, epochs=100): - # # pre-train layer-wise - # for i in xrange(self.n_layers): - # rbm = self.rbm_layers[i] - - # for epoch in xrange(epochs): - # layer_input = self.x - # for j in xrange(i): - # layer_input = self.sigmoid_layers[j].sample_h_given_v(layer_input) - - # rbm.contrastive_divergence(lr=lr, k=k, input=layer_input) - # # cost = rbm.get_reconstruction_cross_entropy() - # # print >> sys.stderr, \ - # # 'Pre-training layer %d, epoch %d, cost ' %(i, epoch), cost - def finetune(self, lr=0.1, epochs=100): layer_input = self.sigmoid_layers[-1].sample_h_given_v() @@ -158,12 +128,11 @@ def test_dbn(pretrain_lr=0.1, pretraining_epochs=1000, k=1, \ [0, 1], [0, 1], [0, 1]]) - rng = numpy.random.RandomState(123) # construct DBN - dbn = DBN(input=x, label=y, n_ins=6, hidden_layer_sizes=[3, 3], n_outs=2, numpy_rng=rng) + dbn = DBN(input=x, label=y, n_ins=6, hidden_layer_sizes=[3, 3], n_outs=2, rng=rng) # pre-training (TrainUnsupervisedDBN) dbn.pretrain(lr=pretrain_lr, k=1, epochs=pretraining_epochs) diff --git a/python/Dropout.py b/python/Dropout.py new file mode 100755 index 0000000..ba99116 --- /dev/null +++ b/python/Dropout.py @@ -0,0 +1,142 @@ +# -*- coding: utf-8 -*- + +import sys +import numpy +from HiddenLayer import HiddenLayer +from LogisticRegression import LogisticRegression +from utils import * + + +class Dropout(object): + def __init__(self, input, label,\ + n_in, hidden_layer_sizes, n_out,\ + rng=None, activation=ReLU): + + self.x = input + self.y = label + + self.hidden_layers = [] + self.n_layers = len(hidden_layer_sizes) + + if rng is None: + rng = numpy.random.RandomState(1234) + + assert self.n_layers > 0 + + + # construct multi-layer + for i in xrange(self.n_layers): + + # layer_size + if i == 0: + input_size = n_in + else: + input_size = hidden_layer_sizes[i-1] + + # layer_input + if i == 0: + layer_input = self.x + + else: + layer_input = self.hidden_layers[-1].output() + + # construct hidden_layer + hidden_layer = HiddenLayer(input=layer_input, + n_in=input_size, + n_out=hidden_layer_sizes[i], + rng=rng, + activation=activation) + + self.hidden_layers.append(hidden_layer) + + + # layer for ouput using Logistic Regression (softmax) + self.log_layer = LogisticRegression(input=self.hidden_layers[-1].output(), + label=self.y, + n_in=hidden_layer_sizes[-1], + n_out=n_out) + + + def train(self, epochs=5000, dropout=True, p_dropout=0.5, rng=None): + + for epoch in xrange(epochs): + dropout_masks = [] # create different masks in each training epoch + + # forward hidden_layers + for i in xrange(self.n_layers): + if i == 0: + layer_input = self.x + + layer_input = self.hidden_layers[i].forward(input=layer_input) + + if dropout == True: + mask = self.hidden_layers[i].dropout(input=layer_input, p=p_dropout, rng=rng) + layer_input *= mask + + dropout_masks.append(mask) + + + # forward & backward log_layer + self.log_layer.train(input=layer_input) + + + # backward hidden_layers + for i in reversed(xrange(0, self.n_layers)): + if i == self.n_layers-1: + prev_layer = self.log_layer + else: + prev_layer = self.hidden_layers[i+1] + + if dropout == True: + self.hidden_layers[i].backward(prev_layer=prev_layer, dropout=True, mask=dropout_masks[i]) + else: + self.hidden_layers[i].backward(prev_layer=prev_layer) + + + + def predict(self, x, dropout=True, p_dropout=0.5): + layer_input = x + + for i in xrange(self.n_layers): + if dropout == True: + self.hidden_layers[i].W = (1 - p_dropout) * self.hidden_layers[i].W + + layer_input = self.hidden_layers[i].output(input=layer_input) + + return self.log_layer.predict(layer_input) + + + +def test_dropout(n_epochs=5000, dropout=True, p_dropout=0.5): + + x = numpy.array([[0, 0], + [0, 1], + [1, 0], + [1, 1]]) + + y = numpy.array([[0, 1], + [1, 0], + [1, 0], + [0, 1]]) + + rng = numpy.random.RandomState(123) + + + # construct Dropout MLP + classifier = Dropout(input=x, label=y, \ + n_in=2, hidden_layer_sizes=[10, 10], n_out=2, \ + rng=rng, activation=ReLU) + + + # train XOR + classifier.train(epochs=n_epochs, dropout=dropout, \ + p_dropout=p_dropout, rng=rng) + + + # test + print classifier.predict(x) + + + +if __name__ == "__main__": + test_dropout() diff --git a/python/HiddenLayer.py b/python/HiddenLayer.py index 72e51e1..a97bc61 100755 --- a/python/HiddenLayer.py +++ b/python/HiddenLayer.py @@ -1,15 +1,5 @@ -#!/usr/bin/env python # -*- coding: utf-8 -*- -''' - Hidden Layer - - References : - - DeepLearningTutorials - https://github.com/lisa-lab/DeepLearningTutorials - -''' - import sys import numpy from utils import * @@ -17,49 +7,89 @@ class HiddenLayer(object): def __init__(self, input, n_in, n_out,\ - W=None, b=None, numpy_rng=None, activation=numpy.tanh): + W=None, b=None, rng=None, activation=tanh): - if numpy_rng is None: - numpy_rng = numpy.random.RandomState(1234) + if rng is None: + rng = numpy.random.RandomState(1234) if W is None: a = 1. / n_in - initial_W = numpy.array(numpy_rng.uniform( # initialize W uniformly + W = numpy.array(rng.uniform( # initialize W uniformly low=-a, high=a, size=(n_in, n_out))) - W = initial_W - if b is None: b = numpy.zeros(n_out) # initialize bias 0 + self.rng = rng + self.x = input - self.numpy_rng = numpy_rng - self.input = input self.W = W self.b = b + if activation == tanh: + self.dactivation = dtanh + + elif activation == sigmoid: + self.dactivation = dsigmoid + + elif activation == ReLU: + self.dactivation = dReLU + + else: + raise ValueError('activation function not supported.') + + self.activation = activation + - # self.params = [self.W, self.b] def output(self, input=None): if input is not None: - self.input = input + self.x = input - linear_output = numpy.dot(self.input, self.W) + self.b + linear_output = numpy.dot(self.x, self.W) + self.b + return self.activation(linear_output) + - return (linear_output if self.activation is None - else self.activation(linear_output)) + def forward(self, input=None): + return self.output(input=input) + def backward(self, prev_layer, lr=0.1, input=None, dropout=False, mask=None): + if input is not None: + self.x = input + + d_y = self.dactivation(prev_layer.x) * numpy.dot(prev_layer.d_y, prev_layer.W.T) + + if dropout == True: + d_y *= mask + + self.W += lr * numpy.dot(self.x.T, d_y) + self.b += lr * numpy.mean(d_y, axis=0) + self.d_y = d_y + + + def dropout(self, input, p, rng=None): + if rng is None: + rng = numpy.random.RandomState(123) + + mask = rng.binomial(size=input.shape, + n=1, + p=1-p) # p is the prob of dropping + + return mask + + def sample_h_given_v(self, input=None): if input is not None: - self.input = input + self.x = input v_mean = self.output() - h_sample = self.numpy_rng.binomial(size=v_mean.shape, + h_sample = self.rng.binomial(size=v_mean.shape, n=1, p=v_mean) return h_sample + + diff --git a/python/LogisticRegression.py b/python/LogisticRegression.py index a828a40..708a1b3 100755 --- a/python/LogisticRegression.py +++ b/python/LogisticRegression.py @@ -1,18 +1,5 @@ -#!/usr/bin/env python # -*- coding: utf-8 -*- -''' - Logistic Regression - - References : - - Jason Rennie: Logistic Regression, - http://qwone.com/~jason/writing/lr.pdf - - - DeepLearningTutorials - https://github.com/lisa-lab/DeepLearningTutorials - -''' - import sys import numpy from utils import * @@ -22,24 +9,46 @@ class LogisticRegression(object): def __init__(self, input, label, n_in, n_out): self.x = input self.y = label + self.W = numpy.zeros((n_in, n_out)) # initialize W 0 - self.b = numpy.zeros(n_out) # initialize bias 0 + self.b = numpy.zeros(n_out) # initialize bias 0 - # self.params = [self.W, self.b] - def train(self, lr=0.1, input=None, L2_reg=0.00): + def train(self, lr=0.1, input=None, L2_reg=0.00): if input is not None: self.x = input - # p_y_given_x = sigmoid(numpy.dot(self.x, self.W) + self.b) - p_y_given_x = softmax(numpy.dot(self.x, self.W) + self.b) + p_y_given_x = self.output(self.x) d_y = self.y - p_y_given_x - + self.W += lr * numpy.dot(self.x.T, d_y) - lr * L2_reg * self.W self.b += lr * numpy.mean(d_y, axis=0) + self.d_y = d_y - # cost = self.negative_log_likelihood() - # return cost + + # def train(self, lr=0.1, input=None, L2_reg=0.00): + # self.forward(input) + # self.backward(lr, L2_reg) + + # def forward(self, input=None): + # if input is not None: + # self.x = input + + # p_y_given_x = self.output(self.x) + # self.d_y = self.y - p_y_given_x + + # def backward(self, lr=0.1, L2_reg=0.00): + # self.W += lr * numpy.dot(self.x.T, self.d_y) - lr * L2_reg * self.W + # self.b += lr * numpy.mean(self.d_y, axis=0) + + + def output(self, x): + # return sigmoid(numpy.dot(x, self.W) + self.b) + return softmax(numpy.dot(x, self.W) + self.b) + + def predict(self, x): + return self.output(x) + def negative_log_likelihood(self): # sigmoid_activation = sigmoid(numpy.dot(self.x, self.W) + self.b) @@ -53,44 +62,41 @@ def negative_log_likelihood(self): return cross_entropy - def predict(self, x): - # return sigmoid(numpy.dot(x, self.W) + self.b) - return softmax(numpy.dot(x, self.W) + self.b) +def test_lr(learning_rate=0.1, n_epochs=500): + rng = numpy.random.RandomState(123) -def test_lr(learning_rate=0.01, n_epochs=200): # training data - x = numpy.array([[1,1,1,0,0,0], - [1,0,1,0,0,0], - [1,1,1,0,0,0], - [0,0,1,1,1,0], - [0,0,1,1,0,0], - [0,0,1,1,1,0]]) - y = numpy.array([[1, 0], - [1, 0], - [1, 0], - [0, 1], - [0, 1], - [0, 1]]) + d = 2 + N = 10 + x1 = rng.randn(N, d) + numpy.array([0, 0]) + x2 = rng.randn(N, d) + numpy.array([20, 10]) + y1 = [[1, 0] for i in xrange(N)] + y2 = [[0, 1] for i in xrange(N)] + + x = numpy.r_[x1.astype(int), x2.astype(int)] + y = numpy.r_[y1, y2] # construct LogisticRegression - classifier = LogisticRegression(input=x, label=y, n_in=6, n_out=2) + classifier = LogisticRegression(input=x, label=y, n_in=d, n_out=2) # train for epoch in xrange(n_epochs): classifier.train(lr=learning_rate) # cost = classifier.negative_log_likelihood() # print >> sys.stderr, 'Training epoch %d, cost is ' % epoch, cost - learning_rate *= 0.95 + learning_rate *= 0.995 # test - x = numpy.array([[1, 1, 0, 0, 0, 0], - [0, 0, 0, 1, 1, 0], - [1, 1, 1, 1, 1, 0]]) + result = classifier.predict(x) + for i in xrange(N): + print result[i] + print + for i in xrange(N): + print result[N+i] - print >> sys.stderr, classifier.predict(x) if __name__ == "__main__": diff --git a/python/MLP.py b/python/MLP.py new file mode 100755 index 0000000..e9ded0b --- /dev/null +++ b/python/MLP.py @@ -0,0 +1,81 @@ +# -*- coding: utf-8 -*- + +import sys +import numpy +from HiddenLayer import HiddenLayer +from LogisticRegression import LogisticRegression +from utils import * + + +class MLP(object): + def __init__(self, input, label, n_in, n_hidden, n_out, rng=None): + + self.x = input + self.y = label + + if rng is None: + rng = numpy.random.RandomState(1234) + + # construct hidden_layer + self.hidden_layer = HiddenLayer(input=self.x, + n_in=n_in, + n_out=n_hidden, + rng=rng, + activation=tanh) + + # construct log_layer + self.log_layer = LogisticRegression(input=self.hidden_layer.output, + label=self.y, + n_in=n_hidden, + n_out=n_out) + + def train(self): + # forward hidden_layer + layer_input = self.hidden_layer.forward() + + # forward & backward log_layer + # self.log_layer.forward(input=layer_input) + self.log_layer.train(input=layer_input) + + # backward hidden_layer + self.hidden_layer.backward(prev_layer=self.log_layer) + + # backward log_layer + # self.log_layer.backward() + + + def predict(self, x): + x = self.hidden_layer.output(input=x) + return self.log_layer.predict(x) + + +def test_mlp(n_epochs=5000): + + x = numpy.array([[0, 0], + [0, 1], + [1, 0], + [1, 1]]) + + y = numpy.array([[0, 1], + [1, 0], + [1, 0], + [0, 1]]) + + + rng = numpy.random.RandomState(123) + + + # construct MLP + classifier = MLP(input=x, label=y, n_in=2, n_hidden=3, n_out=2, rng=rng) + + # train + for epoch in xrange(n_epochs): + classifier.train() + + + # test + print classifier.predict(x) + + +if __name__ == "__main__": + test_mlp() diff --git a/python/RBM.py b/python/RBM.py index 781241d..7a127d8 100755 --- a/python/RBM.py +++ b/python/RBM.py @@ -1,38 +1,23 @@ -#!/usr/bin/env python # -*- coding: utf-8 -*- -""" - Restricted Boltzmann Machine (RBM) - - References : - - Y. Bengio, P. Lamblin, D. Popovici, H. Larochelle: Greedy Layer-Wise - Training of Deep Networks, Advances in Neural Information Processing - Systems 19, 2007 - - - - DeepLearningTutorials - https://github.com/lisa-lab/DeepLearningTutorials - -""" - import sys import numpy from utils import * class RBM(object): def __init__(self, input=None, n_visible=2, n_hidden=3, \ - W=None, hbias=None, vbias=None, numpy_rng=None): + W=None, hbias=None, vbias=None, rng=None): self.n_visible = n_visible # num of units in visible (input) layer self.n_hidden = n_hidden # num of units in hidden layer - if numpy_rng is None: - numpy_rng = numpy.random.RandomState(1234) + if rng is None: + rng = numpy.random.RandomState(1234) if W is None: a = 1. / n_visible - initial_W = numpy.array(numpy_rng.uniform( # initialize W uniformly + initial_W = numpy.array(rng.uniform( # initialize W uniformly low=-a, high=a, size=(n_visible, n_hidden))) @@ -46,14 +31,12 @@ def __init__(self, input=None, n_visible=2, n_hidden=3, \ vbias = numpy.zeros(n_visible) # initialize v bias 0 - self.numpy_rng = numpy_rng + self.rng = rng self.input = input self.W = W self.hbias = hbias self.vbias = vbias - # self.params = [self.W, self.hbias, self.vbias] - def contrastive_divergence(self, lr=0.1, k=1, input=None): if input is not None: @@ -75,10 +58,10 @@ def contrastive_divergence(self, lr=0.1, k=1, input=None): # chain_end = nv_samples - self.W += lr * (numpy.dot(self.input.T, ph_sample) + self.W += lr * (numpy.dot(self.input.T, ph_mean) - numpy.dot(nv_samples.T, nh_means)) self.vbias += lr * numpy.mean(self.input - nv_samples, axis=0) - self.hbias += lr * numpy.mean(ph_sample - nh_means, axis=0) + self.hbias += lr * numpy.mean(ph_mean - nh_means, axis=0) # cost = self.get_reconstruction_cross_entropy() # return cost @@ -86,7 +69,7 @@ def contrastive_divergence(self, lr=0.1, k=1, input=None): def sample_h_given_v(self, v0_sample): h1_mean = self.propup(v0_sample) - h1_sample = self.numpy_rng.binomial(size=h1_mean.shape, # discrete: binomial + h1_sample = self.rng.binomial(size=h1_mean.shape, # discrete: binomial n=1, p=h1_mean) @@ -95,7 +78,7 @@ def sample_h_given_v(self, v0_sample): def sample_v_given_h(self, h0_sample): v1_mean = self.propdown(h0_sample) - v1_sample = self.numpy_rng.binomial(size=v1_mean.shape, # discrete: binomial + v1_sample = self.rng.binomial(size=v1_mean.shape, # discrete: binomial n=1, p=v1_mean) @@ -153,7 +136,7 @@ def test_rbm(learning_rate=0.1, k=1, training_epochs=1000): rng = numpy.random.RandomState(123) # construct RBM - rbm = RBM(input=data, n_visible=6, n_hidden=2, numpy_rng=rng) + rbm = RBM(input=data, n_visible=6, n_hidden=2, rng=rng) # train for epoch in xrange(training_epochs): diff --git a/python/SdA.py b/python/SdA.py index 4a0f45a..5f8de37 100755 --- a/python/SdA.py +++ b/python/SdA.py @@ -1,20 +1,5 @@ -#!/usr/bin/env python # -*- coding: utf-8 -*- - -""" - Stacked Denoising Autoencoders (SdA) - - References : - - P. Vincent, H. Larochelle, Y. Bengio, P.A. Manzagol: Extracting and - Composing Robust Features with Denoising Autoencoders, ICML' 08, 1096-1103, - 2008 - - - DeepLearningTutorials - https://github.com/lisa-lab/DeepLearningTutorials - -""" - import sys import numpy from HiddenLayer import HiddenLayer @@ -26,7 +11,7 @@ class SdA(object): def __init__(self, input=None, label=None,\ n_ins=2, hidden_layer_sizes=[3, 3], n_outs=2,\ - numpy_rng=None): + rng=None): self.x = input self.y = label @@ -35,8 +20,8 @@ def __init__(self, input=None, label=None,\ self.dA_layers = [] self.n_layers = len(hidden_layer_sizes) # = len(self.rbm_layers) - if numpy_rng is None: - numpy_rng = numpy.random.RandomState(1234) + if rng is None: + rng = numpy.random.RandomState(1234) assert self.n_layers > 0 @@ -60,7 +45,7 @@ def __init__(self, input=None, label=None,\ sigmoid_layer = HiddenLayer(input=layer_input, n_in=input_size, n_out=hidden_layer_sizes[i], - numpy_rng=numpy_rng, + rng=rng, activation=sigmoid) self.sigmoid_layers.append(sigmoid_layer) @@ -118,8 +103,8 @@ def predict(self, x): sigmoid_layer = self.sigmoid_layers[i] layer_input = sigmoid_layer.output(input=layer_input) - out = self.log_layer.predict(layer_input) - return out + return self.log_layer.predict(layer_input) + @@ -152,7 +137,7 @@ def test_SdA(pretrain_lr=0.1, pretraining_epochs=1000, corruption_level=0.3, \ # construct SdA sda = SdA(input=x, label=y, \ - n_ins=20, hidden_layer_sizes=[15, 15], n_outs=2, numpy_rng=rng) + n_ins=20, hidden_layer_sizes=[15, 15], n_outs=2, rng=rng) # pre-training sda.pretrain(lr=pretrain_lr, corruption_level=corruption_level, epochs=pretraining_epochs) diff --git a/python/dA.py b/python/dA.py index 0b911eb..edbf6c7 100755 --- a/python/dA.py +++ b/python/dA.py @@ -1,23 +1,5 @@ -#!/usr/bin/env python # -*- coding: utf-8 -*- -""" - Denoising Autoencoders (dA) - - References : - - P. Vincent, H. Larochelle, Y. Bengio, P.A. Manzagol: Extracting and - Composing Robust Features with Denoising Autoencoders, ICML'08, 1096-1103, - 2008 - - - DeepLearningTutorials - https://github.com/lisa-lab/DeepLearningTutorials - - - Yusuke Sugomori: Stochastic Gradient Descent for Denoising Autoencoders, - http://yusugomori.com/docs/SGD_DA.pdf - -""" - - import sys import numpy from utils import * @@ -25,44 +7,39 @@ class dA(object): def __init__(self, input=None, n_visible=2, n_hidden=3, \ - W=None, hbias=None, vbias=None, numpy_rng=None): + W=None, hbias=None, vbias=None, rng=None): self.n_visible = n_visible # num of units in visible (input) layer self.n_hidden = n_hidden # num of units in hidden layer - if numpy_rng is None: - numpy_rng = numpy.random.RandomState(1234) + if rng is None: + rng = numpy.random.RandomState(1234) if W is None: a = 1. / n_visible - initial_W = numpy.array(numpy_rng.uniform( # initialize W uniformly + W = numpy.array(rng.uniform( # initialize W uniformly low=-a, high=a, size=(n_visible, n_hidden))) - W = initial_W - if hbias is None: hbias = numpy.zeros(n_hidden) # initialize h bias 0 if vbias is None: vbias = numpy.zeros(n_visible) # initialize v bias 0 - self.numpy_rng = numpy_rng + self.rng = rng self.x = input self.W = W self.W_prime = self.W.T self.hbias = hbias self.vbias = vbias - # self.params = [self.W, self.hbias, self.vbias] - - def get_corrupted_input(self, input, corruption_level): assert corruption_level < 1 - return self.numpy_rng.binomial(size=input.shape, + return self.rng.binomial(size=input.shape, n=1, p=1-corruption_level) * input @@ -133,7 +110,7 @@ def test_dA(learning_rate=0.1, corruption_level=0.3, training_epochs=50): rng = numpy.random.RandomState(123) # construct dA - da = dA(input=data, n_visible=20, n_hidden=5, numpy_rng=rng) + da = dA(input=data, n_visible=20, n_hidden=5, rng=rng) # train for epoch in xrange(training_epochs): diff --git a/python/utils.py b/python/utils.py index 5c4a748..7aca40d 100755 --- a/python/utils.py +++ b/python/utils.py @@ -1,7 +1,5 @@ -''' ''' -import numpy - +import numpy numpy.seterr(all='ignore') @@ -9,6 +7,15 @@ def sigmoid(x): return 1. / (1 + numpy.exp(-x)) +def dsigmoid(x): + return x * (1. - x) + +def tanh(x): + return numpy.tanh(x) + +def dtanh(x): + return 1. - x * x + def softmax(x): e = numpy.exp(x - numpy.max(x)) # prevent overflow if e.ndim == 1: @@ -17,6 +24,13 @@ def softmax(x): return e / numpy.array([numpy.sum(e, axis=1)]).T # ndim = 2 +def ReLU(x): + return x * (x > 0) + +def dReLU(x): + return 1. * (x > 0) + + # # probability density for the Gaussian dist # def gaussian(x, mean=0.0, scale=1.0): # s = 2 * numpy.power(scale, 2) diff --git a/scala/LogisticRegression.scala b/scala/LogisticRegression.scala index 2386f5e..bf9b229 100644 --- a/scala/LogisticRegression.scala +++ b/scala/LogisticRegression.scala @@ -113,7 +113,7 @@ object LogisticRegression { Array(0, 0, 1, 1, 1, 0) ) - val test_Y: Array[Array[Double]] = Array.ofDim[Double](test_N, n_in) + val test_Y: Array[Array[Double]] = Array.ofDim[Double](test_N, n_out) // test var j: Int = 0