SdA.java

Yusuke Sugomori · Yusuke Sugomori · commit ff41d3043763 · 2013-03-21T17:42:42.000+09:00
diff --git a/java/SdA/src/HiddenLayer.java b/java/SdA/src/HiddenLayer.java
@@ -0,0 +1,74 @@
+import java.util.Random;
+
+public class HiddenLayer {
+	public int N;
+	public int n_in;
+	public int n_out;
+	public double[][] W;
+	public double[] b;
+	public Random rng;
+	
+	public double uniform(double min, double max) {
+		return rng.nextDouble() * (max - min) + min;
+	}
+	
+	public int binomial(int n, double p) {
+		if(p < 0 || p > 1) return 0;
+		
+		int c = 0;
+		double r;
+		
+		for(int i=0; i<n; i++) {
+			r = rng.nextDouble();
+			if (r < p) c++;
+		}
+		
+		return c;
+	}
+	
+	public static double sigmoid(double x) {
+		return 1.0 / (1.0 + Math.pow(Math.E, -x));
+	}
+	
+	
+	
+	public HiddenLayer(int N, int n_in, int n_out, double[][] W, double[] b, Random rng) {
+		this.N = N;
+		this.n_in = n_in;
+		this.n_out = n_out;
+		
+		if(rng == null)	this.rng = new Random(1234);
+		else this.rng = rng;
+	
+		if(W == null) {
+			this.W = new double[n_out][n_in];
+			double a = 1.0 / this.n_in;
+			
+			for(int i=0; i<n_out; i++) {
+				for(int j=0; j<n_in; j++) {
+					this.W[i][j] = uniform(-a, a);
+				}
+			}
+		} else {
+			this.W = W;
+		}
+		
+		if(b == null) this.b = new double[n_out];
+		else this.b = b;
+	}
+	
+	public double output(int[] input, double[] w, double b) {
+		double linear_output = 0.0;
+		for(int j=0; j<n_in; j++) {
+			linear_output += w[j] * input[j];
+		}
+		linear_output += b;
+		return sigmoid(linear_output);
+	}
+	
+	public void sample_h_given_v(int[] input, int[] sample) {
+		for(int i=0; i<n_out; i++) {
+			sample[i] = binomial(1, output(input, W[i], b[i]));
+		}
+	}
+}
diff --git a/java/SdA/src/LogisticRegression.java b/java/SdA/src/LogisticRegression.java
@@ -0,0 +1,73 @@
+
+public class LogisticRegression {
+	public int N;
+	public int n_in;
+	public int n_out;
+	public double[][] W;
+	public double[] b;
+	
+	public LogisticRegression(int N, int n_in, int n_out) {
+		this.N = N;
+		this.n_in = n_in;
+		this.n_out = n_out;
+		
+		W = new double[this.n_out][this.n_in];
+		b = new double[this.n_out];
+	}
+	
+	public void train(int[] x, int[] y, double lr) {
+		double[] p_y_given_x = new double[n_out];
+		double[] dy = new double[n_out];
+		
+		for(int i=0; i<n_out; i++) {
+			p_y_given_x[i] = 0;
+			for(int j=0; j<n_in; j++) {
+				p_y_given_x[i] += W[i][j] * x[j];
+			}
+			p_y_given_x[i] += b[i];
+		}
+		softmax(p_y_given_x);
+		
+		for(int i=0; i<n_out; i++) {
+			dy[i] = y[i] - p_y_given_x[i];
+			
+			for(int j=0; j<n_in; j++) {
+				W[i][j] += lr * dy[i] * x[j] / N;
+			}
+			
+			b[i] += lr * dy[i] / N;
+		}
+	}
+	
+	public void softmax(double[] x) {
+		double max = 0.0;
+		double sum = 0.0;
+		
+		for(int i=0; i<n_out; i++) {
+			if(max < x[i]) {
+				max = x[i];
+			}
+		}
+		
+		for(int i=0; i<n_out; i++) {
+			x[i] = Math.exp(x[i] - max);
+			sum += x[i];
+		}
+		
+		for(int i=0; i<n_out; i++) {
+			x[i] /= sum;
+		}
+	}
+	
+	public void predict(int[] x, double[] y) {
+		for(int i=0; i<n_out; i++) {
+			y[i] = 0;
+			for(int j=0; j<n_in; j++) {
+				y[i] += W[i][j] * x[j];
+			}
+			y[i] += b[i];
+		}
+		
+		softmax(y);
+	}		
+}
diff --git a/java/SdA/src/SdA.java b/java/SdA/src/SdA.java
@@ -0,0 +1,229 @@
+import java.util.Random;
+
+public class SdA {
+	public int N;
+	public int n_ins;
+	public int[] hidden_layer_sizes;
+	public int n_outs;
+	public int n_layers;
+	public HiddenLayer[] sigmoid_layers;
+	public dA[] dA_layers;
+	public LogisticRegression log_layer;
+	public Random rng;
+
+	public static double sigmoid(double x) {
+		return 1.0 / (1.0 + Math.pow(Math.E, -x));
+	}
+	
+	public SdA(int N, int n_ins, int[] hidden_layer_sizes, int n_outs, int n_layers, Random rng) {
+		int input_size;
+		
+		this.N = N;
+		this.n_ins = n_ins;
+		this.hidden_layer_sizes = hidden_layer_sizes;
+		this.n_outs = n_outs;
+		this.n_layers = n_layers;
+		
+		this.sigmoid_layers = new HiddenLayer[n_layers];
+		this.dA_layers = new dA[n_layers];
+
+		if(rng == null)	this.rng = new Random(1234);
+		else this.rng = rng;		
+		
+		// construct multi-layer
+		for(int i=0; i<this.n_layers; i++) {
+			if(i == 0) {
+				input_size = this.n_ins;
+			} else {
+				input_size = this.hidden_layer_sizes[i-1];
+			}
+			
+			// construct sigmoid_layer
+			this.sigmoid_layers[i] = new HiddenLayer(this.N, input_size, this.hidden_layer_sizes[i], null, null, rng);
+			
+			// construct dA_layer
+			this.dA_layers[i] = new dA(this.N, input_size, this.hidden_layer_sizes[i], this.sigmoid_layers[i].W, this.sigmoid_layers[i].b, null, rng);
+		}
+		
+		// layer for output using LogisticRegression
+		this.log_layer = new LogisticRegression(this.N, this.hidden_layer_sizes[this.n_layers-1], this.n_outs);
+	}
+	
+	public void pretrain(int[][] train_X, double lr, double corruption_level, int epochs) {
+		int[] layer_input = new int[0];
+		int prev_layer_input_size;
+		int[] prev_layer_input;
+				
+		for(int i=0; i<n_layers; i++) {  // layer-wise			
+			for(int epoch=0; epoch<epochs; epoch++) {  // training epochs
+				for(int n=0; n<N; n++) {  // input x1...xN
+					// layer input
+					for(int l=0; l<=i; l++) {
+						
+						if(l == 0) {
+							layer_input = new int[n_ins];
+							for(int j=0; j<n_ins; j++) layer_input[j] = train_X[n][j];
+						} else {
+							if(l == 1) prev_layer_input_size = n_ins;
+							else prev_layer_input_size = hidden_layer_sizes[l-2];
+							
+							prev_layer_input = new int[prev_layer_input_size];
+							for(int j=0; j<prev_layer_input_size; j++) prev_layer_input[j] = layer_input[j];
+							
+							layer_input = new int[hidden_layer_sizes[l-1]];
+							
+							sigmoid_layers[l-1].sample_h_given_v(prev_layer_input, layer_input);
+						}
+					}
+					
+					dA_layers[i].train(layer_input, lr, corruption_level);
+				}
+			}
+		}
+	}
+		
+	public void finetune(int[][] train_X, int[][] train_Y, double lr, int epochs) {
+		int[] layer_input = new int[0];
+		// int prev_layer_input_size;
+		int[] prev_layer_input = new int[0];
+		
+		for(int epoch=0; epoch<epochs; epoch++) {
+			for(int n=0; n<N; n++) {
+				
+				// layer input
+				for(int i=0; i<n_layers; i++) {
+					if(i == 0) {
+						prev_layer_input = new int[n_ins];
+						for(int j=0; j<n_ins; j++) prev_layer_input[j] = train_X[n][j];
+					} else {
+						prev_layer_input = new int[hidden_layer_sizes[i-1]];
+						for(int j=0; j<hidden_layer_sizes[i-1]; j++) prev_layer_input[j] = layer_input[j];
+					}
+					
+					layer_input = new int[hidden_layer_sizes[i]];
+					sigmoid_layers[i].sample_h_given_v(prev_layer_input, layer_input);
+				}
+				
+				log_layer.train(layer_input, train_Y[n], lr);
+			}
+			// lr *= 0.95;
+		}
+	}
+	
+	public void predict(int[] x, double[] y) {
+		double[] layer_input = new double[0];
+		// int prev_layer_input_size;
+		double[] prev_layer_input = new double[n_ins];
+		for(int j=0; j<n_ins; j++) prev_layer_input[j] = x[j];
+	
+		double linear_output;
+		
+		
+		// layer activation
+		for(int i=0; i<n_layers; i++) {
+			layer_input = new double[sigmoid_layers[i].n_out];
+			
+			linear_output = 0.0;
+			for(int k=0; k<sigmoid_layers[i].n_out; k++) {
+				for(int j=0; j<sigmoid_layers[i].n_in; j++) {
+					linear_output += sigmoid_layers[i].W[k][j] * prev_layer_input[j];
+				}
+				linear_output += sigmoid_layers[i].b[k];
+				layer_input[k] = sigmoid(linear_output);
+			}
+			
+			if(i < n_layers-1) {
+				prev_layer_input = new double[sigmoid_layers[i].n_out];
+				for(int j=0; j<sigmoid_layers[i].n_out; j++) prev_layer_input[j] = layer_input[j];
+			}
+		}
+		
+		for(int i=0; i<log_layer.n_out; i++) {
+			y[i] = 0;
+			for(int j=0; j<log_layer.n_in; j++) {
+				y[i] += log_layer.W[i][j] * layer_input[j];
+			}
+			y[i] += log_layer.b[i];
+		}
+		
+		log_layer.softmax(y);
+	}
+	
+
+	private static void test_sda() {
+		Random rng = new Random(123);
+		
+		double pretrain_lr = 0.1;
+		double corruption_level = 0.3;
+		int pretraining_epochs = 1000;
+		double finetune_lr = 0.1;
+		int finetune_epochs = 500;
+
+		int train_N = 10;
+		int test_N = 4;
+		int n_ins = 28;
+		int n_outs = 2;
+		int[] hidden_layer_sizes = {15, 15};
+		int n_layers = hidden_layer_sizes.length;
+		
+		// training data
+		int[][] train_X = {
+			{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+			{0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+			{1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+			{0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+			{1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+			{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
+			{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1},
+			{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1},
+			{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1},
+			{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1}
+		};
+
+		int[][] train_Y = {
+			{1, 0},
+			{1, 0},
+			{1, 0},
+			{1, 0},
+			{1, 0},
+			{0, 1},
+			{0, 1},
+			{0, 1},
+			{0, 1},
+			{0, 1}
+		};
+		
+		// construct SdA
+		SdA sda = new SdA(train_N, n_ins, hidden_layer_sizes, n_outs, n_layers, rng);
+		
+		// pretrain
+		sda.pretrain(train_X, pretrain_lr, corruption_level, pretraining_epochs);
+		
+		// finetune
+		sda.finetune(train_X, train_Y, finetune_lr, finetune_epochs);
+		
+
+		// test data
+		int[][] test_X = {
+			{1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+			{1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+			{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1},
+			{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1}
+		};
+		
+		double[][] test_Y = new double[test_N][n_outs];
+		
+		// test
+		for(int i=0; i<test_N; i++) {
+			sda.predict(test_X[i], test_Y[i]);
+			for(int j=0; j<n_outs; j++) {
+				System.out.print(test_Y[i][j] + " ");
+			}
+			System.out.println();
+		}
+	}
+	
+	public static void main(String[] args) {
+		test_sda();
+	}
+}
diff --git a/java/SdA/src/dA.java b/java/SdA/src/dA.java