Skip to content

Commit ff41d30

Browse files
author
Yusuke Sugomori
committed
SdA.java
1 parent 6d6b412 commit ff41d30

File tree

4 files changed

+526
-0
lines changed

4 files changed

+526
-0
lines changed

java/SdA/src/HiddenLayer.java

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
import java.util.Random;
2+
3+
public class HiddenLayer {
4+
public int N;
5+
public int n_in;
6+
public int n_out;
7+
public double[][] W;
8+
public double[] b;
9+
public Random rng;
10+
11+
public double uniform(double min, double max) {
12+
return rng.nextDouble() * (max - min) + min;
13+
}
14+
15+
public int binomial(int n, double p) {
16+
if(p < 0 || p > 1) return 0;
17+
18+
int c = 0;
19+
double r;
20+
21+
for(int i=0; i<n; i++) {
22+
r = rng.nextDouble();
23+
if (r < p) c++;
24+
}
25+
26+
return c;
27+
}
28+
29+
public static double sigmoid(double x) {
30+
return 1.0 / (1.0 + Math.pow(Math.E, -x));
31+
}
32+
33+
34+
35+
public HiddenLayer(int N, int n_in, int n_out, double[][] W, double[] b, Random rng) {
36+
this.N = N;
37+
this.n_in = n_in;
38+
this.n_out = n_out;
39+
40+
if(rng == null) this.rng = new Random(1234);
41+
else this.rng = rng;
42+
43+
if(W == null) {
44+
this.W = new double[n_out][n_in];
45+
double a = 1.0 / this.n_in;
46+
47+
for(int i=0; i<n_out; i++) {
48+
for(int j=0; j<n_in; j++) {
49+
this.W[i][j] = uniform(-a, a);
50+
}
51+
}
52+
} else {
53+
this.W = W;
54+
}
55+
56+
if(b == null) this.b = new double[n_out];
57+
else this.b = b;
58+
}
59+
60+
public double output(int[] input, double[] w, double b) {
61+
double linear_output = 0.0;
62+
for(int j=0; j<n_in; j++) {
63+
linear_output += w[j] * input[j];
64+
}
65+
linear_output += b;
66+
return sigmoid(linear_output);
67+
}
68+
69+
public void sample_h_given_v(int[] input, int[] sample) {
70+
for(int i=0; i<n_out; i++) {
71+
sample[i] = binomial(1, output(input, W[i], b[i]));
72+
}
73+
}
74+
}

java/SdA/src/LogisticRegression.java

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
2+
public class LogisticRegression {
3+
public int N;
4+
public int n_in;
5+
public int n_out;
6+
public double[][] W;
7+
public double[] b;
8+
9+
public LogisticRegression(int N, int n_in, int n_out) {
10+
this.N = N;
11+
this.n_in = n_in;
12+
this.n_out = n_out;
13+
14+
W = new double[this.n_out][this.n_in];
15+
b = new double[this.n_out];
16+
}
17+
18+
public void train(int[] x, int[] y, double lr) {
19+
double[] p_y_given_x = new double[n_out];
20+
double[] dy = new double[n_out];
21+
22+
for(int i=0; i<n_out; i++) {
23+
p_y_given_x[i] = 0;
24+
for(int j=0; j<n_in; j++) {
25+
p_y_given_x[i] += W[i][j] * x[j];
26+
}
27+
p_y_given_x[i] += b[i];
28+
}
29+
softmax(p_y_given_x);
30+
31+
for(int i=0; i<n_out; i++) {
32+
dy[i] = y[i] - p_y_given_x[i];
33+
34+
for(int j=0; j<n_in; j++) {
35+
W[i][j] += lr * dy[i] * x[j] / N;
36+
}
37+
38+
b[i] += lr * dy[i] / N;
39+
}
40+
}
41+
42+
public void softmax(double[] x) {
43+
double max = 0.0;
44+
double sum = 0.0;
45+
46+
for(int i=0; i<n_out; i++) {
47+
if(max < x[i]) {
48+
max = x[i];
49+
}
50+
}
51+
52+
for(int i=0; i<n_out; i++) {
53+
x[i] = Math.exp(x[i] - max);
54+
sum += x[i];
55+
}
56+
57+
for(int i=0; i<n_out; i++) {
58+
x[i] /= sum;
59+
}
60+
}
61+
62+
public void predict(int[] x, double[] y) {
63+
for(int i=0; i<n_out; i++) {
64+
y[i] = 0;
65+
for(int j=0; j<n_in; j++) {
66+
y[i] += W[i][j] * x[j];
67+
}
68+
y[i] += b[i];
69+
}
70+
71+
softmax(y);
72+
}
73+
}

java/SdA/src/SdA.java

Lines changed: 229 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,229 @@
1+
import java.util.Random;
2+
3+
public class SdA {
4+
public int N;
5+
public int n_ins;
6+
public int[] hidden_layer_sizes;
7+
public int n_outs;
8+
public int n_layers;
9+
public HiddenLayer[] sigmoid_layers;
10+
public dA[] dA_layers;
11+
public LogisticRegression log_layer;
12+
public Random rng;
13+
14+
public static double sigmoid(double x) {
15+
return 1.0 / (1.0 + Math.pow(Math.E, -x));
16+
}
17+
18+
public SdA(int N, int n_ins, int[] hidden_layer_sizes, int n_outs, int n_layers, Random rng) {
19+
int input_size;
20+
21+
this.N = N;
22+
this.n_ins = n_ins;
23+
this.hidden_layer_sizes = hidden_layer_sizes;
24+
this.n_outs = n_outs;
25+
this.n_layers = n_layers;
26+
27+
this.sigmoid_layers = new HiddenLayer[n_layers];
28+
this.dA_layers = new dA[n_layers];
29+
30+
if(rng == null) this.rng = new Random(1234);
31+
else this.rng = rng;
32+
33+
// construct multi-layer
34+
for(int i=0; i<this.n_layers; i++) {
35+
if(i == 0) {
36+
input_size = this.n_ins;
37+
} else {
38+
input_size = this.hidden_layer_sizes[i-1];
39+
}
40+
41+
// construct sigmoid_layer
42+
this.sigmoid_layers[i] = new HiddenLayer(this.N, input_size, this.hidden_layer_sizes[i], null, null, rng);
43+
44+
// construct dA_layer
45+
this.dA_layers[i] = new dA(this.N, input_size, this.hidden_layer_sizes[i], this.sigmoid_layers[i].W, this.sigmoid_layers[i].b, null, rng);
46+
}
47+
48+
// layer for output using LogisticRegression
49+
this.log_layer = new LogisticRegression(this.N, this.hidden_layer_sizes[this.n_layers-1], this.n_outs);
50+
}
51+
52+
public void pretrain(int[][] train_X, double lr, double corruption_level, int epochs) {
53+
int[] layer_input = new int[0];
54+
int prev_layer_input_size;
55+
int[] prev_layer_input;
56+
57+
for(int i=0; i<n_layers; i++) { // layer-wise
58+
for(int epoch=0; epoch<epochs; epoch++) { // training epochs
59+
for(int n=0; n<N; n++) { // input x1...xN
60+
// layer input
61+
for(int l=0; l<=i; l++) {
62+
63+
if(l == 0) {
64+
layer_input = new int[n_ins];
65+
for(int j=0; j<n_ins; j++) layer_input[j] = train_X[n][j];
66+
} else {
67+
if(l == 1) prev_layer_input_size = n_ins;
68+
else prev_layer_input_size = hidden_layer_sizes[l-2];
69+
70+
prev_layer_input = new int[prev_layer_input_size];
71+
for(int j=0; j<prev_layer_input_size; j++) prev_layer_input[j] = layer_input[j];
72+
73+
layer_input = new int[hidden_layer_sizes[l-1]];
74+
75+
sigmoid_layers[l-1].sample_h_given_v(prev_layer_input, layer_input);
76+
}
77+
}
78+
79+
dA_layers[i].train(layer_input, lr, corruption_level);
80+
}
81+
}
82+
}
83+
}
84+
85+
public void finetune(int[][] train_X, int[][] train_Y, double lr, int epochs) {
86+
int[] layer_input = new int[0];
87+
// int prev_layer_input_size;
88+
int[] prev_layer_input = new int[0];
89+
90+
for(int epoch=0; epoch<epochs; epoch++) {
91+
for(int n=0; n<N; n++) {
92+
93+
// layer input
94+
for(int i=0; i<n_layers; i++) {
95+
if(i == 0) {
96+
prev_layer_input = new int[n_ins];
97+
for(int j=0; j<n_ins; j++) prev_layer_input[j] = train_X[n][j];
98+
} else {
99+
prev_layer_input = new int[hidden_layer_sizes[i-1]];
100+
for(int j=0; j<hidden_layer_sizes[i-1]; j++) prev_layer_input[j] = layer_input[j];
101+
}
102+
103+
layer_input = new int[hidden_layer_sizes[i]];
104+
sigmoid_layers[i].sample_h_given_v(prev_layer_input, layer_input);
105+
}
106+
107+
log_layer.train(layer_input, train_Y[n], lr);
108+
}
109+
// lr *= 0.95;
110+
}
111+
}
112+
113+
public void predict(int[] x, double[] y) {
114+
double[] layer_input = new double[0];
115+
// int prev_layer_input_size;
116+
double[] prev_layer_input = new double[n_ins];
117+
for(int j=0; j<n_ins; j++) prev_layer_input[j] = x[j];
118+
119+
double linear_output;
120+
121+
122+
// layer activation
123+
for(int i=0; i<n_layers; i++) {
124+
layer_input = new double[sigmoid_layers[i].n_out];
125+
126+
linear_output = 0.0;
127+
for(int k=0; k<sigmoid_layers[i].n_out; k++) {
128+
for(int j=0; j<sigmoid_layers[i].n_in; j++) {
129+
linear_output += sigmoid_layers[i].W[k][j] * prev_layer_input[j];
130+
}
131+
linear_output += sigmoid_layers[i].b[k];
132+
layer_input[k] = sigmoid(linear_output);
133+
}
134+
135+
if(i < n_layers-1) {
136+
prev_layer_input = new double[sigmoid_layers[i].n_out];
137+
for(int j=0; j<sigmoid_layers[i].n_out; j++) prev_layer_input[j] = layer_input[j];
138+
}
139+
}
140+
141+
for(int i=0; i<log_layer.n_out; i++) {
142+
y[i] = 0;
143+
for(int j=0; j<log_layer.n_in; j++) {
144+
y[i] += log_layer.W[i][j] * layer_input[j];
145+
}
146+
y[i] += log_layer.b[i];
147+
}
148+
149+
log_layer.softmax(y);
150+
}
151+
152+
153+
private static void test_sda() {
154+
Random rng = new Random(123);
155+
156+
double pretrain_lr = 0.1;
157+
double corruption_level = 0.3;
158+
int pretraining_epochs = 1000;
159+
double finetune_lr = 0.1;
160+
int finetune_epochs = 500;
161+
162+
int train_N = 10;
163+
int test_N = 4;
164+
int n_ins = 28;
165+
int n_outs = 2;
166+
int[] hidden_layer_sizes = {15, 15};
167+
int n_layers = hidden_layer_sizes.length;
168+
169+
// training data
170+
int[][] train_X = {
171+
{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
172+
{0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
173+
{1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
174+
{0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
175+
{1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
176+
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
177+
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1},
178+
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1},
179+
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1},
180+
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1}
181+
};
182+
183+
int[][] train_Y = {
184+
{1, 0},
185+
{1, 0},
186+
{1, 0},
187+
{1, 0},
188+
{1, 0},
189+
{0, 1},
190+
{0, 1},
191+
{0, 1},
192+
{0, 1},
193+
{0, 1}
194+
};
195+
196+
// construct SdA
197+
SdA sda = new SdA(train_N, n_ins, hidden_layer_sizes, n_outs, n_layers, rng);
198+
199+
// pretrain
200+
sda.pretrain(train_X, pretrain_lr, corruption_level, pretraining_epochs);
201+
202+
// finetune
203+
sda.finetune(train_X, train_Y, finetune_lr, finetune_epochs);
204+
205+
206+
// test data
207+
int[][] test_X = {
208+
{1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
209+
{1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
210+
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1},
211+
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1}
212+
};
213+
214+
double[][] test_Y = new double[test_N][n_outs];
215+
216+
// test
217+
for(int i=0; i<test_N; i++) {
218+
sda.predict(test_X[i], test_Y[i]);
219+
for(int j=0; j<n_outs; j++) {
220+
System.out.print(test_Y[i][j] + " ");
221+
}
222+
System.out.println();
223+
}
224+
}
225+
226+
public static void main(String[] args) {
227+
test_sda();
228+
}
229+
}

0 commit comments

Comments
 (0)