Fix off-by-one error in numbering of epochs in most tutorials

lamblin · lamblin · commit 2dd943af3109 · 2013-03-27T18:29:53.000-04:00
The first epoch consisted in only one minibatch, and no validation
phase was used. For the following epochs, the validation phase happened
just before the last minibatch of the epoch.
diff --git a/code/DBN.py b/code/DBN.py
@@ -359,7 +359,7 @@ def test_DBN(finetune_lr=0.1, pretraining_epochs=100,
         for minibatch_index in xrange(n_train_batches):
 
             minibatch_avg_cost = train_fn(minibatch_index)
-            iter = epoch * n_train_batches + minibatch_index
+            iter = (epoch - 1) * n_train_batches + minibatch_index
 
             if (iter + 1) % validation_frequency == 0:
 
diff --git a/code/SdA.py b/code/SdA.py
@@ -395,9 +395,10 @@ def test_SdA(finetune_lr=0.1, pretraining_epochs=15,
     epoch = 0
 
     while (epoch < training_epochs) and (not done_looping):
+        epoch = epoch + 1
         for minibatch_index in xrange(n_train_batches):
             minibatch_avg_cost = train_fn(minibatch_index)
-            iter = epoch * n_train_batches + minibatch_index
+            iter = (epoch - 1) * n_train_batches + minibatch_index
 
             if (iter + 1) % validation_frequency == 0:
                 validation_losses = validate_model()
@@ -429,7 +430,6 @@ def test_SdA(finetune_lr=0.1, pretraining_epochs=15,
             if patience <= iter:
                 done_looping = True
                 break
-        epoch = epoch + 1
 
     end_time = time.clock()
     print(('Optimization complete with best validation score of %f %%,'
diff --git a/code/convolutional_mlp.py b/code/convolutional_mlp.py
@@ -246,7 +246,7 @@ def evaluate_lenet5(learning_rate=0.1, n_epochs=200,
         epoch = epoch + 1
         for minibatch_index in xrange(n_train_batches):
 
-            iter = epoch * n_train_batches + minibatch_index
+            iter = (epoch - 1) * n_train_batches + minibatch_index
 
             if iter % 100 == 0:
                 print 'training @ iter = ', iter
@@ -296,7 +296,7 @@ def evaluate_lenet5(learning_rate=0.1, n_epochs=200,
                           ' ran for %.2fm' % ((end_time - start_time) / 60.))
 
 if __name__ == '__main__':
-    evaluate_lenet5()
+    evaluate_lenet5(n_epochs=1, batch_size=1)
 
 
 def experiment(state, channel):
diff --git a/code/logistic_sgd.py b/code/logistic_sgd.py
@@ -323,7 +323,7 @@ def sgd_optimization_mnist(learning_rate=0.13, n_epochs=1000,
 
             minibatch_avg_cost = train_model(minibatch_index)
             # iteration number
-            iter = epoch * n_train_batches + minibatch_index
+            iter = (epoch - 1) * n_train_batches + minibatch_index
 
             if (iter + 1) % validation_frequency == 0:
                 # compute zero-one loss on validation set
diff --git a/code/mlp.py b/code/mlp.py
@@ -309,7 +309,7 @@ def test_mlp(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=1000,
 
             minibatch_avg_cost = train_model(minibatch_index)
             # iteration number
-            iter = epoch * n_train_batches + minibatch_index
+            iter = (epoch - 1) * n_train_batches + minibatch_index
 
             if (iter + 1) % validation_frequency == 0:
                 # compute zero-one loss on validation set
diff --git a/doc/gettingstarted.txt b/doc/gettingstarted.txt
@@ -576,17 +576,19 @@ of a strategy based on a geometrically increasing amount of patience.
     done_looping = False
     epoch = 0
     while (epoch < n_epochs) and (not done_looping):
+        # Report "1" for first epoch, "n_epochs" for last epoch
         epoch = epoch + 1
         for minibatch_index in xrange(n_train_batches):
 
             d_loss_wrt_params = ... # compute gradient
             params -= learning_rate * d_loss_wrt_params # gradient descent
 
-            # iteration number
-            iter = epoch * n_train_batches + minibatch_index
+            # iteration number. We want it to start at 0.
+            iter = (epoch - 1) * n_train_batches + minibatch_index
             # note that if we do `iter % validation_frequency` it will be
-            # true for iter = 0 which we do not want
-            if iter and iter % validation_frequency == 0:
+            # true for iter = 0 which we do not want. We want it true for
+            # iter = validation_frequency - 1.
+            if (iter + 1) % validation_frequency == 0:
 
                 this_validation_loss = ... # compute zero-one loss on validation set
 
diff --git a/doc/logreg.txt b/doc/logreg.txt
@@ -396,12 +396,12 @@ The output one should expect is of the form :
 .. code-block:: bash
 
     ...
+    epoch 72, minibatch 83/83, validation error 7.510417 %
+         epoch 72, minibatch 83/83, test error of best model 7.510417 %
     epoch 73, minibatch 83/83, validation error 7.500000 %
-        epoch 73, minibatch 83/83, test error of best model 7.489583 %
-    epoch 74, minibatch 83/83, validation error 7.479167 %
-        epoch 74, minibatch 83/83, test error of best model 7.489583 %
-    Optimization complete with best validation score of 7.479167 %,with test performance 7.489583 %
-    The code run for 75 epochs, with 1.936983 epochs/sec
+         epoch 73, minibatch 83/83, test error of best model 7.489583 %
+    Optimization complete with best validation score of 7.500000 %,with test performance 7.489583 %
+    The code run for 74 epochs, with 1.936983 epochs/sec
 
 
 On an Intel(R) Core(TM)2 Duo CPU E8400 @ 3.00 Ghz  the code runs with