diff --git a/README.md b/README.md
index e69c52e0..59ac3300 100644
--- a/README.md
+++ b/README.md
@@ -32,7 +32,6 @@ This repository provides tutorial code for deep learning researchers to learn [P
* [TensorBoard in PyTorch](https://github.com/yunjey/pytorch-tutorial/tree/master/tutorials/04-utils/tensorboard)
-
## Getting Started
@@ -46,12 +45,8 @@ $ python main.py
## Dependencies
* [Python 2.7 or 3.5+](https://www.continuum.io/downloads)
-* [PyTorch 0.4.0](http://pytorch.org/)
-
+* [PyTorch 0.4.0+](http://pytorch.org/)
-
-## Author
-Yunjey Choi/ [@yunjey](https://github.com/yunjey)
diff --git a/tutorials/01-basics/logistic_regression/main.py b/tutorials/01-basics/logistic_regression/main.py
index a5b53b6c..c7eb378b 100644
--- a/tutorials/01-basics/logistic_regression/main.py
+++ b/tutorials/01-basics/logistic_regression/main.py
@@ -5,7 +5,7 @@
# Hyper-parameters
-input_size = 784
+input_size = 28 * 28 # 784
num_classes = 10
num_epochs = 5
batch_size = 100
@@ -43,7 +43,7 @@
for epoch in range(num_epochs):
for i, (images, labels) in enumerate(train_loader):
# Reshape images to (batch_size, input_size)
- images = images.reshape(-1, 28*28)
+ images = images.reshape(-1, input_size)
# Forward pass
outputs = model(images)
@@ -64,7 +64,7 @@
correct = 0
total = 0
for images, labels in test_loader:
- images = images.reshape(-1, 28*28)
+ images = images.reshape(-1, input_size)
outputs = model(images)
_, predicted = torch.max(outputs.data, 1)
total += labels.size(0)
diff --git a/tutorials/01-basics/pytorch_basics/main.py b/tutorials/01-basics/pytorch_basics/main.py
index afaa51ba..744400c2 100644
--- a/tutorials/01-basics/pytorch_basics/main.py
+++ b/tutorials/01-basics/pytorch_basics/main.py
@@ -98,7 +98,7 @@
# ================================================================== #
-# 4. Input pipline #
+# 4. Input pipeline #
# ================================================================== #
# Download and construct CIFAR-10 dataset.
@@ -130,7 +130,7 @@
# ================================================================== #
-# 5. Input pipline for custom dataset #
+# 5. Input pipeline for custom dataset #
# ================================================================== #
# You should build your custom dataset as below.
@@ -186,4 +186,4 @@ def __len__(self):
# Save and load only the model parameters (recommended).
torch.save(resnet.state_dict(), 'params.ckpt')
-resnet.load_state_dict(torch.load('params.ckpt'))
\ No newline at end of file
+resnet.load_state_dict(torch.load('params.ckpt'))
diff --git a/tutorials/02-intermediate/deep_residual_network/main.py b/tutorials/02-intermediate/deep_residual_network/main.py
index f1bb1365..69dbe5fb 100644
--- a/tutorials/02-intermediate/deep_residual_network/main.py
+++ b/tutorials/02-intermediate/deep_residual_network/main.py
@@ -16,6 +16,7 @@
# Hyper-parameters
num_epochs = 80
+batch_size = 100
learning_rate = 0.001
# Image preprocessing modules
@@ -37,11 +38,11 @@
# Data loader
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
- batch_size=100,
+ batch_size=batch_size,
shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
- batch_size=100,
+ batch_size=batch_size,
shuffle=False)
# 3x3 convolution
diff --git a/tutorials/02-intermediate/language_model/main.py b/tutorials/02-intermediate/language_model/main.py
index 3c03db03..ef135bb7 100644
--- a/tutorials/02-intermediate/language_model/main.py
+++ b/tutorials/02-intermediate/language_model/main.py
@@ -76,7 +76,7 @@ def detach(states):
loss = criterion(outputs, targets.reshape(-1))
# Backward and optimize
- model.zero_grad()
+ optimizer.zero_grad()
loss.backward()
clip_grad_norm_(model.parameters(), 0.5)
optimizer.step()
diff --git a/tutorials/02-intermediate/recurrent_neural_network/main.py b/tutorials/02-intermediate/recurrent_neural_network/main.py
index 9b8685ca..c138c5ad 100644
--- a/tutorials/02-intermediate/recurrent_neural_network/main.py
+++ b/tutorials/02-intermediate/recurrent_neural_network/main.py
@@ -85,6 +85,7 @@ def forward(self, x):
.format(epoch+1, num_epochs, i+1, total_step, loss.item()))
# Test the model
+model.eval()
with torch.no_grad():
correct = 0
total = 0
diff --git a/tutorials/03-advanced/generative_adversarial_network/main.py b/tutorials/03-advanced/generative_adversarial_network/main.py
index 34f41274..c2062cf3 100644
--- a/tutorials/03-advanced/generative_adversarial_network/main.py
+++ b/tutorials/03-advanced/generative_adversarial_network/main.py
@@ -22,10 +22,14 @@
os.makedirs(sample_dir)
# Image processing
+# transform = transforms.Compose([
+# transforms.ToTensor(),
+# transforms.Normalize(mean=(0.5, 0.5, 0.5), # 3 for RGB channels
+# std=(0.5, 0.5, 0.5))])
transform = transforms.Compose([
transforms.ToTensor(),
- transforms.Normalize(mean=(0.5, 0.5, 0.5), # 3 for RGB channels
- std=(0.5, 0.5, 0.5))])
+ transforms.Normalize(mean=[0.5], # 1 for greyscale channels
+ std=[0.5])])
# MNIST dataset
mnist = torchvision.datasets.MNIST(root='../../data/',
diff --git a/tutorials/03-advanced/image_captioning/README.md b/tutorials/03-advanced/image_captioning/README.md
index eec7b284..409b62b4 100644
--- a/tutorials/03-advanced/image_captioning/README.md
+++ b/tutorials/03-advanced/image_captioning/README.md
@@ -16,41 +16,41 @@ In the test phase, the encoder part is almost same as the training phase. The on
#### 1. Clone the repositories
```bash
-$ git clone https://github.com/pdollar/coco.git
-$ cd coco/PythonAPI/
-$ make
-$ python setup.py build
-$ python setup.py install
-$ cd ../../
-$ git clone https://github.com/yunjey/pytorch-tutorial.git
-$ cd pytorch-tutorial/tutorials/03-advanced/image_captioning/
+git clone https://github.com/pdollar/coco.git
+cd coco/PythonAPI/
+make
+python setup.py build
+python setup.py install
+cd ../../
+git clone https://github.com/yunjey/pytorch-tutorial.git
+cd pytorch-tutorial/tutorials/03-advanced/image_captioning/
```
#### 2. Download the dataset
```bash
-$ pip install -r requirements.txt
-$ chmod +x download.sh
-$ ./download.sh
+pip install -r requirements.txt
+chmod +x download.sh
+./download.sh
```
#### 3. Preprocessing
```bash
-$ python build_vocab.py
-$ python resize.py
+python build_vocab.py
+python resize.py
```
#### 4. Train the model
```bash
-$ python train.py
+python train.py
```
#### 5. Test the model
```bash
-$ python sample.py --image='png/example.png'
+python sample.py --image='png/example.png'
```
diff --git a/tutorials/03-advanced/image_captioning/sample.py b/tutorials/03-advanced/image_captioning/sample.py
index 23e07efd..74ff40fe 100644
--- a/tutorials/03-advanced/image_captioning/sample.py
+++ b/tutorials/03-advanced/image_captioning/sample.py
@@ -14,7 +14,7 @@
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
def load_image(image_path, transform=None):
- image = Image.open(image_path)
+ image = Image.open(image_path).convert('RGB')
image = image.resize([224, 224], Image.LANCZOS)
if transform is not None:
@@ -69,8 +69,8 @@ def main(args):
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--image', type=str, required=True, help='input image for generating caption')
- parser.add_argument('--encoder_path', type=str, default='models/encoder-2-1000.ckpt', help='path for trained encoder')
- parser.add_argument('--decoder_path', type=str, default='models/decoder-2-1000.ckpt', help='path for trained decoder')
+ parser.add_argument('--encoder_path', type=str, default='models/encoder-5-3000.pkl', help='path for trained encoder')
+ parser.add_argument('--decoder_path', type=str, default='models/decoder-5-3000.pkl', help='path for trained decoder')
parser.add_argument('--vocab_path', type=str, default='data/vocab.pkl', help='path for vocabulary wrapper')
# Model parameters (should be same as paramters in train.py)
@@ -78,4 +78,4 @@ def main(args):
parser.add_argument('--hidden_size', type=int , default=512, help='dimension of lstm hidden states')
parser.add_argument('--num_layers', type=int , default=1, help='number of layers in lstm')
args = parser.parse_args()
- main(args)
\ No newline at end of file
+ main(args)