diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..7eade253 --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ +*.pkl +*.zip +data/ +.ipynb_checkpoints + diff --git a/README.md b/README.md index f3e4b371..59ac3300 100644 --- a/README.md +++ b/README.md @@ -1,26 +1,35 @@ -

+

-------------------------------------------------------------------------------- -This repository provides tutorial code for deep learning researchers to learn [PyTorch](https://github.com/pytorch/pytorch). In the tutorial, most of the models were implemented with less than 30 lines of code. Before starting this tutorial, it is recommended to finish [Official Pytorch Tutorial](https://github.com/pytorch/tutorials/blob/master/Deep%20Learning%20with%20PyTorch.ipynb). +This repository provides tutorial code for deep learning researchers to learn [PyTorch](https://github.com/pytorch/pytorch). In the tutorial, most of the models were implemented with less than 30 lines of code. Before starting this tutorial, it is recommended to finish [Official Pytorch Tutorial](http://pytorch.org/tutorials/beginner/deep_learning_60min_blitz.html).
## Table of Contents -* [PyTorch Basics](https://github.com/yunjey/pytorch-tutorial/tree/master/tutorials/00%20-%20PyTorch%20Basics/main.py) -* [Linear Regression](https://github.com/yunjey/pytorch-tutorial/blob/master/tutorials/01%20-%20Linear%20Regression/main.py#L24-L31) -* [Logistic Regression](https://github.com/yunjey/pytorch-tutorial/blob/master/tutorials/02%20-%20Logistic%20Regression/main.py#L35-L42) -* [Feedforward Neural Network](https://github.com/yunjey/pytorch-tutorial/blob/master/tutorials/03%20-%20Feedforward%20Neural%20Network/main.py#L36-L47) -* [Convolutional Neural Network](https://github.com/yunjey/pytorch-tutorial/blob/master/tutorials/04%20-%20Convolutional%20Neural%20Network/main.py#L33-L53) -* [Deep Residual Network](https://github.com/yunjey/pytorch-tutorial/blob/master/tutorials/05%20-%20Deep%20Residual%20Network/main.py#L67-L103) -* [Recurrent Neural Network](https://github.com/yunjey/pytorch-tutorial/blob/master/tutorials/06%20-%20Recurrent%20Neural%20Network/main.py#L38-L56) -* [Bidirectional Recurrent Neural Network](https://github.com/yunjey/pytorch-tutorial/blob/master/tutorials/07%20-%20Bidirectional%20Recurrent%20Neural%20Network/main.py#L38-L57) -* [Language Model (RNNLM)](https://github.com/yunjey/pytorch-tutorial/blob/master/tutorials/08%20-%20Language%20Model/main.py#L28-L54) -* [Image Captioning (CNN-RNN)](https://github.com/yunjey/pytorch-tutorial/blob/master/tutorials/09%20-%20Image%20Captioning/model.py#L29-L64) -* [Generative Adversarial Network](https://github.com/yunjey/pytorch-tutorial/blob/master/tutorials/10%20-%20Generative%20Adversarial%20Network/main.py#L32-L50) -* [Deep Q-Network and Q-learning (WIP)](https://github.com/yunjey/pytorch-tutorial/blob/master/tutorials/11%20-%20Deep%20Q%20Network/dqn13.py) +#### 1. Basics +* [PyTorch Basics](https://github.com/yunjey/pytorch-tutorial/tree/master/tutorials/01-basics/pytorch_basics/main.py) +* [Linear Regression](https://github.com/yunjey/pytorch-tutorial/tree/master/tutorials/01-basics/linear_regression/main.py#L22-L23) +* [Logistic Regression](https://github.com/yunjey/pytorch-tutorial/tree/master/tutorials/01-basics/logistic_regression/main.py#L33-L34) +* [Feedforward Neural Network](https://github.com/yunjey/pytorch-tutorial/tree/master/tutorials/01-basics/feedforward_neural_network/main.py#L37-L49) + +#### 2. Intermediate +* [Convolutional Neural Network](https://github.com/yunjey/pytorch-tutorial/tree/master/tutorials/02-intermediate/convolutional_neural_network/main.py#L35-L56) +* [Deep Residual Network](https://github.com/yunjey/pytorch-tutorial/tree/master/tutorials/02-intermediate/deep_residual_network/main.py#L76-L113) +* [Recurrent Neural Network](https://github.com/yunjey/pytorch-tutorial/tree/master/tutorials/02-intermediate/recurrent_neural_network/main.py#L39-L58) +* [Bidirectional Recurrent Neural Network](https://github.com/yunjey/pytorch-tutorial/tree/master/tutorials/02-intermediate/bidirectional_recurrent_neural_network/main.py#L39-L58) +* [Language Model (RNN-LM)](https://github.com/yunjey/pytorch-tutorial/tree/master/tutorials/02-intermediate/language_model/main.py#L30-L50) + +#### 3. Advanced +* [Generative Adversarial Networks](https://github.com/yunjey/pytorch-tutorial/blob/master/tutorials/03-advanced/generative_adversarial_network/main.py#L41-L57) +* [Variational Auto-Encoder](https://github.com/yunjey/pytorch-tutorial/blob/master/tutorials/03-advanced/variational_autoencoder/main.py#L38-L65) +* [Neural Style Transfer](https://github.com/yunjey/pytorch-tutorial/tree/master/tutorials/03-advanced/neural_style_transfer) +* [Image Captioning (CNN-RNN)](https://github.com/yunjey/pytorch-tutorial/tree/master/tutorials/03-advanced/image_captioning) + +#### 4. Utilities +* [TensorBoard in PyTorch](https://github.com/yunjey/pytorch-tutorial/tree/master/tutorials/04-utils/tensorboard)
@@ -28,25 +37,16 @@ This repository provides tutorial code for deep learning researchers to learn [P ## Getting Started ```bash $ git clone https://github.com/yunjey/pytorch-tutorial.git -$ cd pytorch-tutorial/tutorials/project_path -$ python main.py # cpu version -$ python main-gpu.py # gpu version +$ cd pytorch-tutorial/tutorials/PATH_TO_PROJECT +$ python main.py ```
## Dependencies -* [pytorch](https://github.com/pytorch/pytorch) -* [pytorch-vision](https://github.com/pytorch/vision) +* [Python 2.7 or 3.5+](https://www.continuum.io/downloads) +* [PyTorch 0.4.0+](http://pytorch.org/) -
- -## Future Work -* [Other GAN Models](https://github.com/zhangqianhui/AdversarialNetsPapers) -* [One-shot Learning](https://arxiv.org/abs/1606.04080) -* [Question Answering](https://rajpurkar.github.io/SQuAD-explorer/) -* [Visual Question Answering](http://www.visualqa.org/) -
diff --git a/logo/README.md b/logo/README.md deleted file mode 100644 index 5304bcf7..00000000 --- a/logo/README.md +++ /dev/null @@ -1 +0,0 @@ -create folder diff --git a/logo/pytorch_logo_2018.svg b/logo/pytorch_logo_2018.svg new file mode 100644 index 00000000..5e530003 --- /dev/null +++ b/logo/pytorch_logo_2018.svg @@ -0,0 +1,33 @@ + + + + + + + + + + + + + + + + + + + + diff --git a/tutorials/00 - PyTorch Basics/basics.ipynb b/tutorials/00 - PyTorch Basics/basics.ipynb deleted file mode 100644 index 4024f112..00000000 --- a/tutorials/00 - PyTorch Basics/basics.ipynb +++ /dev/null @@ -1,397 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 19, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "import torch \n", - "import torchvision\n", - "import torch.nn as nn\n", - "import torch.utils.data as data\n", - "import numpy as np\n", - "import torchvision.transforms as transforms\n", - "import torchvision.datasets as dsets\n", - "from torch.autograd import Variable" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Simple Example" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "-1.2532 -1.1120 0.9717\n", - "-2.3617 0.1516 1.1280\n", - "-2.1599 0.0828 -1.4305\n", - " 0.5265 0.5020 -2.1852\n", - "-0.9197 0.1772 -1.1378\n", - "[torch.FloatTensor of size 5x3]\n", - "\n" - ] - } - ], - "source": [ - "# random normal\n", - "x = torch.randn(5, 3)\n", - "print (x)" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "# build a layer\n", - "linear = nn.Linear(3, 2)" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Parameter containing:\n", - " 0.3884 -0.3335 -0.5146\n", - "-0.3692 0.1977 -0.4081\n", - "[torch.FloatTensor of size 2x3]\n", - "\n", - "Parameter containing:\n", - "-0.4826\n", - "-0.0038\n", - "[torch.FloatTensor of size 2]\n", - "\n" - ] - } - ], - "source": [ - "# Sess weight and bias\n", - "print (linear.weight)\n", - "print (linear.bias)" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Variable containing:\n", - "-1.0986 -0.1575\n", - "-2.0311 0.4378\n", - "-0.6131 1.3938\n", - " 0.6790 0.7929\n", - "-0.3134 0.8351\n", - "[torch.FloatTensor of size 5x2]\n", - "\n" - ] - } - ], - "source": [ - "# forward propagate\n", - "y = linear(Variable(x))\n", - "print (y)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Convert numpy array to torch tensor" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "# convert numpy array to tensor\n", - "a = np.array([[1,2], [3,4]])\n", - "b = torch.from_numpy(a)\n", - "print (b)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Input pipeline" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### (1) Preprocessing" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "# Image Preprocessing \n", - "transform = transforms.Compose([\n", - " transforms.Scale(40),\n", - " transforms.RandomHorizontalFlip(),\n", - " transforms.RandomCrop(32),\n", - " transforms.ToTensor()])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### (2) Define Dataset" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Files already downloaded and verified\n", - "torch.Size([3, 32, 32])\n", - "6\n" - ] - } - ], - "source": [ - "# download and loading dataset f\n", - "train_dataset = dsets.CIFAR10(root='./data/',\n", - " train=True, \n", - " transform=transform,\n", - " download=True)\n", - "\n", - "image, label = train_dataset[0]\n", - "print (image.size())\n", - "print (label)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### (3) Data Loader" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "# data loader provides queue and thread in a very simple way\n", - "train_loader = data.DataLoader(dataset=train_dataset,\n", - " batch_size=100, \n", - " shuffle=True,\n", - " num_workers=2)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "# iteration start then queue and thread start\n", - "data_iter = iter(train_loader)\n", - "\n", - "# mini-batch images and labels\n", - "images, labels = data_iter.next()\n", - "\n", - "for images, labels in train_loader:\n", - " # your training code will be written here\n", - " pass" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### (4) What about custom dataset not cifar10?" - ] - }, - { - "cell_type": "code", - "execution_count": 25, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "class CustomDataset(data.Dataset):\n", - " def __init__(self):\n", - " pass\n", - " def __getitem__(self, index):\n", - " # You should build this function to return one data for given index\n", - " pass\n", - " def __len__(self):\n", - " pass" - ] - }, - { - "cell_type": "code", - "execution_count": 26, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "ename": "TypeError", - "evalue": "'NoneType' object cannot be interpreted as an integer", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0mbatch_size\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m100\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0mshuffle\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 5\u001b[0;31m num_workers=2)\n\u001b[0m", - "\u001b[0;32m/home/yunjey/anaconda3/lib/python3.5/site-packages/torch/utils/data/dataloader.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, dataset, batch_size, shuffle, sampler, num_workers, collate_fn, pin_memory)\u001b[0m\n\u001b[1;32m 250\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msampler\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0msampler\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 251\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0mshuffle\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 252\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msampler\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mRandomSampler\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdataset\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 253\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mshuffle\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 254\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msampler\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mSequentialSampler\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdataset\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/home/yunjey/anaconda3/lib/python3.5/site-packages/torch/utils/data/sampler.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, data_source)\u001b[0m\n\u001b[1;32m 45\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 46\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m__init__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdata_source\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 47\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnum_samples\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata_source\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 48\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 49\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m__iter__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mTypeError\u001b[0m: 'NoneType' object cannot be interpreted as an integer" - ] - } - ], - "source": [ - "custom_dataset = CustomDataset()\n", - "data.DataLoader(dataset=custom_dataset,\n", - " batch_size=100, \n", - " shuffle=True,\n", - " num_workers=2)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Using Pretrained Model" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Downloading: \"https://s3.amazonaws.com/pytorch/models/resnet18-5c106cde.pth\" to /home/yunjey/.torch/models/resnet18-5c106cde.pth\n", - "100%|██████████| 46827520/46827520 [07:48<00:00, 99907.53it/s] \n" - ] - } - ], - "source": [ - "# Download and load pretrained model\n", - "resnet = torchvision.models.resnet18(pretrained=True)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "# delete top layer for finetuning\n", - "sub_model = nn.Sequentialtial(*list(resnet.children()[:-1]))\n", - "\n", - "# for test\n", - "images = Variable(torch.randn(10, 3, 256, 256))\n", - "print (resnet(images).size())\n", - "print (sub_model(images).size())" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Save and Load Model" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "# Save and load the trained model\n", - "torch.save(sub_model, 'model.pkl')\n", - "\n", - "model = torch.load('model.pkl')" - ] - } - ], - "metadata": { - "anaconda-cloud": {}, - "kernelspec": { - "display_name": "Python [conda root]", - "language": "python", - "name": "conda-root-py" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.5.2" - } - }, - "nbformat": 4, - "nbformat_minor": 1 -} diff --git a/tutorials/00 - PyTorch Basics/main.py b/tutorials/00 - PyTorch Basics/main.py deleted file mode 100644 index 1bd907b1..00000000 --- a/tutorials/00 - PyTorch Basics/main.py +++ /dev/null @@ -1,158 +0,0 @@ -import torch -import torchvision -import torch.nn as nn -import numpy as np -import torch.utils.data as data -import torchvision.transforms as transforms -import torchvision.datasets as dsets -from torch.autograd import Variable - - -#========================== Table of Contents ==========================# -# 1. Basic autograd example 1 (Line 21 to 36) -# 2. Basic autograd example 2 (Line 39 to 76) -# 3. Loading data from numpy (Line 79 to 82) -# 4. Implementing the input pipline (Line 86 to 113) -# 5. Input pipline for custom dataset (Line 115 to 135) -# 6. Using pretrained model (Line 138 to 152) -# 7. Save and load model (Line 155 to L157) - - -#======================= Basic autograd example 1 =======================# -# Create tensors. -x = Variable(torch.Tensor([1]), requires_grad=True) -w = Variable(torch.Tensor([2]), requires_grad=True) -b = Variable(torch.Tensor([3]), requires_grad=True) - -# Build a computational graph. -y = w * x + b # y = 2 * x + 3 - -# Compute gradients. -y.backward() - -# Print out the gradients. -print(x.grad) # x.grad = 2 -print(w.grad) # w.grad = 1 -print(b.grad) # b.grad = 1 - - -#======================== Basic autograd example 2 =======================# -# Create tensors. -x = Variable(torch.randn(5, 3)) -y = Variable(torch.randn(5, 2)) - -# Build a linear layer. -linear = nn.Linear(3, 2) -print ('w: ', linear.weight) -print ('b: ', linear.bias) - -# Build Loss and Optimizer. -criterion = nn.MSELoss() -optimizer = torch.optim.SGD(linear.parameters(), lr=0.01) - -# Forward propagation. -pred = linear(x) - -# Compute loss. -loss = criterion(pred, y) -print('loss: ', loss.data[0]) - -# Backpropagation. -loss.backward() - -# Print out the gradients. -print ('dL/dw: ', linear.weight.grad) -print ('dL/db: ', linear.bias.grad) - -# 1-step Optimization (gradient descent). -optimizer.step() - -# You can also do optimization at the low level as shown below. -# linear.weight.data.sub_(0.01 * linear.weight.grad.data) -# linear.bias.data.sub_(0.01 * linear.bias.grad.data) - -# Print out the loss after optimization. -pred = linear(x) -loss = criterion(pred, y) -print('loss after 1 step optimization: ', loss.data[0]) - - -#======================== Loading data from numpy ========================# -a = np.array([[1,2], [3,4]]) -b = torch.from_numpy(a) -print (b) - - - -#===================== Implementing the input pipline =====================# -# Download and construct dataset. -train_dataset = dsets.CIFAR10(root='../data/', - train=True, - transform=transforms.ToTensor(), - download=True) - -# Select one data pair (read data from disk). -image, label = train_dataset[0] -print (image.size()) -print (label) - -# Data Loader (this provides queue and thread in a very simple way). -train_loader = torch.utils.data.DataLoader(dataset=train_dataset, - batch_size=100, - shuffle=True, - num_workers=2) - -# When iteration starts, queue and thread start to load dataset from files. -data_iter = iter(train_loader) - -# Mini-batch images and labels. -images, labels = data_iter.next() - -# Actual usage of data loader is as below. -for images, labels in train_loader: - # Your training code will be written here - pass - -#===================== Input pipline for custom dataset =====================# -# You should build custom dataset as below. -class CustomDataset(data.Dataset): - def __init__(self): - # TODO - # 1. Initialize file path or list of file names. - pass - def __getitem__(self, index): - # TODO - # 1. Read one data from file (e.g. using numpy.fromfile, PIL.Image.open). - # 2. Return a data pair (e.g. image and label). - pass - def __len__(self): - # You should change 0 to the total size of your dataset. - return 0 - -# Then, you can just use prebuilt torch's data loader. -train_loader = torch.utils.data.DataLoader(dataset=train_dataset, - batch_size=100, - shuffle=True, - num_workers=2) - - -#========================== Using pretrained model ==========================# -# Download and load pretrained resnet. -resnet = torchvision.models.resnet18(pretrained=True) - -# If you want to finetune only top layer of the model. -for param in resnet.parameters(): - param.requires_grad = False - -# Replace top layer for finetuning. -resnet.fc = nn.Linear(resnet.fc.in_features, 100) # 100 is for example. - -# For test. -images = Variable(torch.randn(10, 3, 256, 256)) -outputs = resnet(images) -print (outputs.size()) # (10, 100) - - -#============================ Save and load model ============================# -torch.save(resnet, 'model.pkl') -model = torch.load('model.pkl') \ No newline at end of file diff --git a/tutorials/01-basics/feedforward_neural_network/main.py b/tutorials/01-basics/feedforward_neural_network/main.py new file mode 100644 index 00000000..0c766a7e --- /dev/null +++ b/tutorials/01-basics/feedforward_neural_network/main.py @@ -0,0 +1,94 @@ +import torch +import torch.nn as nn +import torchvision +import torchvision.transforms as transforms + + +# Device configuration +device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') + +# Hyper-parameters +input_size = 784 +hidden_size = 500 +num_classes = 10 +num_epochs = 5 +batch_size = 100 +learning_rate = 0.001 + +# MNIST dataset +train_dataset = torchvision.datasets.MNIST(root='../../data', + train=True, + transform=transforms.ToTensor(), + download=True) + +test_dataset = torchvision.datasets.MNIST(root='../../data', + train=False, + transform=transforms.ToTensor()) + +# Data loader +train_loader = torch.utils.data.DataLoader(dataset=train_dataset, + batch_size=batch_size, + shuffle=True) + +test_loader = torch.utils.data.DataLoader(dataset=test_dataset, + batch_size=batch_size, + shuffle=False) + +# Fully connected neural network with one hidden layer +class NeuralNet(nn.Module): + def __init__(self, input_size, hidden_size, num_classes): + super(NeuralNet, self).__init__() + self.fc1 = nn.Linear(input_size, hidden_size) + self.relu = nn.ReLU() + self.fc2 = nn.Linear(hidden_size, num_classes) + + def forward(self, x): + out = self.fc1(x) + out = self.relu(out) + out = self.fc2(out) + return out + +model = NeuralNet(input_size, hidden_size, num_classes).to(device) + +# Loss and optimizer +criterion = nn.CrossEntropyLoss() +optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) + +# Train the model +total_step = len(train_loader) +for epoch in range(num_epochs): + for i, (images, labels) in enumerate(train_loader): + # Move tensors to the configured device + images = images.reshape(-1, 28*28).to(device) + labels = labels.to(device) + + # Forward pass + outputs = model(images) + loss = criterion(outputs, labels) + + # Backward and optimize + optimizer.zero_grad() + loss.backward() + optimizer.step() + + if (i+1) % 100 == 0: + print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' + .format(epoch+1, num_epochs, i+1, total_step, loss.item())) + +# Test the model +# In test phase, we don't need to compute gradients (for memory efficiency) +with torch.no_grad(): + correct = 0 + total = 0 + for images, labels in test_loader: + images = images.reshape(-1, 28*28).to(device) + labels = labels.to(device) + outputs = model(images) + _, predicted = torch.max(outputs.data, 1) + total += labels.size(0) + correct += (predicted == labels).sum().item() + + print('Accuracy of the network on the 10000 test images: {} %'.format(100 * correct / total)) + +# Save the model checkpoint +torch.save(model.state_dict(), 'model.ckpt') \ No newline at end of file diff --git a/tutorials/01 - Linear Regression/main.py b/tutorials/01-basics/linear_regression/main.py similarity index 53% rename from tutorials/01 - Linear Regression/main.py rename to tutorials/01-basics/linear_regression/main.py index 274305f2..b3715d99 100644 --- a/tutorials/01 - Linear Regression/main.py +++ b/tutorials/01-basics/linear_regression/main.py @@ -2,16 +2,15 @@ import torch.nn as nn import numpy as np import matplotlib.pyplot as plt -from torch.autograd import Variable -# Hyper Parameters +# Hyper-parameters input_size = 1 output_size = 1 num_epochs = 60 learning_rate = 0.001 -# Toy Dataset +# Toy dataset x_train = np.array([[3.3], [4.4], [5.5], [6.71], [6.93], [4.168], [9.779], [6.182], [7.59], [2.167], [7.042], [10.791], [5.313], [7.997], [3.1]], dtype=np.float32) @@ -20,45 +19,37 @@ [3.366], [2.596], [2.53], [1.221], [2.827], [3.465], [1.65], [2.904], [1.3]], dtype=np.float32) -# Linear Regression Model -class LinearRegression(nn.Module): - def __init__(self, input_size, output_size): - super(LinearRegression, self).__init__() - self.linear = nn.Linear(input_size, output_size) - - def forward(self, x): - out = self.linear(x) - return out - -model = LinearRegression(input_size, output_size) +# Linear regression model +model = nn.Linear(input_size, output_size) -# Loss and Optimizer +# Loss and optimizer criterion = nn.MSELoss() optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate) -# Train the Model +# Train the model for epoch in range(num_epochs): - # Convert numpy array to torch Variable - inputs = Variable(torch.from_numpy(x_train)) - targets = Variable(torch.from_numpy(y_train)) + # Convert numpy arrays to torch tensors + inputs = torch.from_numpy(x_train) + targets = torch.from_numpy(y_train) - # Forward + Backward + Optimize - optimizer.zero_grad() + # Forward pass outputs = model(inputs) loss = criterion(outputs, targets) + + # Backward and optimize + optimizer.zero_grad() loss.backward() optimizer.step() if (epoch+1) % 5 == 0: - print ('Epoch [%d/%d], Loss: %.4f' - %(epoch+1, num_epochs, loss.data[0])) - + print ('Epoch [{}/{}], Loss: {:.4f}'.format(epoch+1, num_epochs, loss.item())) + # Plot the graph -predicted = model(Variable(torch.from_numpy(x_train))).data.numpy() +predicted = model(torch.from_numpy(x_train)).detach().numpy() plt.plot(x_train, y_train, 'ro', label='Original data') plt.plot(x_train, predicted, label='Fitted line') plt.legend() plt.show() -# Save the Model -torch.save(model, 'model.pkl') \ No newline at end of file +# Save the model checkpoint +torch.save(model.state_dict(), 'model.ckpt') \ No newline at end of file diff --git a/tutorials/01-basics/logistic_regression/main.py b/tutorials/01-basics/logistic_regression/main.py new file mode 100644 index 00000000..c7eb378b --- /dev/null +++ b/tutorials/01-basics/logistic_regression/main.py @@ -0,0 +1,76 @@ +import torch +import torch.nn as nn +import torchvision +import torchvision.transforms as transforms + + +# Hyper-parameters +input_size = 28 * 28 # 784 +num_classes = 10 +num_epochs = 5 +batch_size = 100 +learning_rate = 0.001 + +# MNIST dataset (images and labels) +train_dataset = torchvision.datasets.MNIST(root='../../data', + train=True, + transform=transforms.ToTensor(), + download=True) + +test_dataset = torchvision.datasets.MNIST(root='../../data', + train=False, + transform=transforms.ToTensor()) + +# Data loader (input pipeline) +train_loader = torch.utils.data.DataLoader(dataset=train_dataset, + batch_size=batch_size, + shuffle=True) + +test_loader = torch.utils.data.DataLoader(dataset=test_dataset, + batch_size=batch_size, + shuffle=False) + +# Logistic regression model +model = nn.Linear(input_size, num_classes) + +# Loss and optimizer +# nn.CrossEntropyLoss() computes softmax internally +criterion = nn.CrossEntropyLoss() +optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate) + +# Train the model +total_step = len(train_loader) +for epoch in range(num_epochs): + for i, (images, labels) in enumerate(train_loader): + # Reshape images to (batch_size, input_size) + images = images.reshape(-1, input_size) + + # Forward pass + outputs = model(images) + loss = criterion(outputs, labels) + + # Backward and optimize + optimizer.zero_grad() + loss.backward() + optimizer.step() + + if (i+1) % 100 == 0: + print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' + .format(epoch+1, num_epochs, i+1, total_step, loss.item())) + +# Test the model +# In test phase, we don't need to compute gradients (for memory efficiency) +with torch.no_grad(): + correct = 0 + total = 0 + for images, labels in test_loader: + images = images.reshape(-1, input_size) + outputs = model(images) + _, predicted = torch.max(outputs.data, 1) + total += labels.size(0) + correct += (predicted == labels).sum() + + print('Accuracy of the model on the 10000 test images: {} %'.format(100 * correct / total)) + +# Save the model checkpoint +torch.save(model.state_dict(), 'model.ckpt') diff --git a/tutorials/01-basics/pytorch_basics/main.py b/tutorials/01-basics/pytorch_basics/main.py new file mode 100644 index 00000000..744400c2 --- /dev/null +++ b/tutorials/01-basics/pytorch_basics/main.py @@ -0,0 +1,189 @@ +import torch +import torchvision +import torch.nn as nn +import numpy as np +import torchvision.transforms as transforms + + +# ================================================================== # +# Table of Contents # +# ================================================================== # + +# 1. Basic autograd example 1 (Line 25 to 39) +# 2. Basic autograd example 2 (Line 46 to 83) +# 3. Loading data from numpy (Line 90 to 97) +# 4. Input pipline (Line 104 to 129) +# 5. Input pipline for custom dataset (Line 136 to 156) +# 6. Pretrained model (Line 163 to 176) +# 7. Save and load model (Line 183 to 189) + + +# ================================================================== # +# 1. Basic autograd example 1 # +# ================================================================== # + +# Create tensors. +x = torch.tensor(1., requires_grad=True) +w = torch.tensor(2., requires_grad=True) +b = torch.tensor(3., requires_grad=True) + +# Build a computational graph. +y = w * x + b # y = 2 * x + 3 + +# Compute gradients. +y.backward() + +# Print out the gradients. +print(x.grad) # x.grad = 2 +print(w.grad) # w.grad = 1 +print(b.grad) # b.grad = 1 + + +# ================================================================== # +# 2. Basic autograd example 2 # +# ================================================================== # + +# Create tensors of shape (10, 3) and (10, 2). +x = torch.randn(10, 3) +y = torch.randn(10, 2) + +# Build a fully connected layer. +linear = nn.Linear(3, 2) +print ('w: ', linear.weight) +print ('b: ', linear.bias) + +# Build loss function and optimizer. +criterion = nn.MSELoss() +optimizer = torch.optim.SGD(linear.parameters(), lr=0.01) + +# Forward pass. +pred = linear(x) + +# Compute loss. +loss = criterion(pred, y) +print('loss: ', loss.item()) + +# Backward pass. +loss.backward() + +# Print out the gradients. +print ('dL/dw: ', linear.weight.grad) +print ('dL/db: ', linear.bias.grad) + +# 1-step gradient descent. +optimizer.step() + +# You can also perform gradient descent at the low level. +# linear.weight.data.sub_(0.01 * linear.weight.grad.data) +# linear.bias.data.sub_(0.01 * linear.bias.grad.data) + +# Print out the loss after 1-step gradient descent. +pred = linear(x) +loss = criterion(pred, y) +print('loss after 1 step optimization: ', loss.item()) + + +# ================================================================== # +# 3. Loading data from numpy # +# ================================================================== # + +# Create a numpy array. +x = np.array([[1, 2], [3, 4]]) + +# Convert the numpy array to a torch tensor. +y = torch.from_numpy(x) + +# Convert the torch tensor to a numpy array. +z = y.numpy() + + +# ================================================================== # +# 4. Input pipeline # +# ================================================================== # + +# Download and construct CIFAR-10 dataset. +train_dataset = torchvision.datasets.CIFAR10(root='../../data/', + train=True, + transform=transforms.ToTensor(), + download=True) + +# Fetch one data pair (read data from disk). +image, label = train_dataset[0] +print (image.size()) +print (label) + +# Data loader (this provides queues and threads in a very simple way). +train_loader = torch.utils.data.DataLoader(dataset=train_dataset, + batch_size=64, + shuffle=True) + +# When iteration starts, queue and thread start to load data from files. +data_iter = iter(train_loader) + +# Mini-batch images and labels. +images, labels = data_iter.next() + +# Actual usage of the data loader is as below. +for images, labels in train_loader: + # Training code should be written here. + pass + + +# ================================================================== # +# 5. Input pipeline for custom dataset # +# ================================================================== # + +# You should build your custom dataset as below. +class CustomDataset(torch.utils.data.Dataset): + def __init__(self): + # TODO + # 1. Initialize file paths or a list of file names. + pass + def __getitem__(self, index): + # TODO + # 1. Read one data from file (e.g. using numpy.fromfile, PIL.Image.open). + # 2. Preprocess the data (e.g. torchvision.Transform). + # 3. Return a data pair (e.g. image and label). + pass + def __len__(self): + # You should change 0 to the total size of your dataset. + return 0 + +# You can then use the prebuilt data loader. +custom_dataset = CustomDataset() +train_loader = torch.utils.data.DataLoader(dataset=custom_dataset, + batch_size=64, + shuffle=True) + + +# ================================================================== # +# 6. Pretrained model # +# ================================================================== # + +# Download and load the pretrained ResNet-18. +resnet = torchvision.models.resnet18(pretrained=True) + +# If you want to finetune only the top layer of the model, set as below. +for param in resnet.parameters(): + param.requires_grad = False + +# Replace the top layer for finetuning. +resnet.fc = nn.Linear(resnet.fc.in_features, 100) # 100 is an example. + +# Forward pass. +images = torch.randn(64, 3, 224, 224) +outputs = resnet(images) +print (outputs.size()) # (64, 100) + + +# ================================================================== # +# 7. Save and load the model # +# ================================================================== # + +# Save and load the entire model. +torch.save(resnet, 'model.ckpt') +model = torch.load('model.ckpt') + +# Save and load only the model parameters (recommended). +torch.save(resnet.state_dict(), 'params.ckpt') +resnet.load_state_dict(torch.load('params.ckpt')) diff --git a/tutorials/02 - Logistic Regression/main.py b/tutorials/02 - Logistic Regression/main.py deleted file mode 100644 index c648433f..00000000 --- a/tutorials/02 - Logistic Regression/main.py +++ /dev/null @@ -1,82 +0,0 @@ -import torch -import torch.nn as nn -import torchvision.datasets as dsets -import torchvision.transforms as transforms -from torch.autograd import Variable - - -# Hyper Parameters -input_size = 784 -num_classes = 10 -num_epochs = 5 -batch_size = 100 -learning_rate = 0.001 - -# MNIST Dataset (Images and Labels) -train_dataset = dsets.MNIST(root='../data', - train=True, - transform=transforms.ToTensor(), - download=True) - -test_dataset = dsets.MNIST(root='../data', - train=False, - transform=transforms.ToTensor()) - -# Dataset Loader (Input Pipline) -train_loader = torch.utils.data.DataLoader(dataset=train_dataset, - batch_size=batch_size, - shuffle=True) - -test_loader = torch.utils.data.DataLoader(dataset=test_dataset, - batch_size=batch_size, - shuffle=False) - -# Model -class LogisticRegression(nn.Module): - def __init__(self, input_size, num_classes): - super(LogisticRegression, self).__init__() - self.linear = nn.Linear(input_size, num_classes) - - def forward(self, x): - out = self.linear(x) - return out - -model = LogisticRegression(input_size, num_classes) - -# Loss and Optimizer -# Softmax is internally computed. -# Set parameters to be updated. -criterion = nn.CrossEntropyLoss() -optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate) - -# Training the Model -for epoch in range(num_epochs): - for i, (images, labels) in enumerate(train_loader): - images = Variable(images.view(-1, 28*28)) - labels = Variable(labels) - - # Forward + Backward + Optimize - optimizer.zero_grad() - outputs = model(images) - loss = criterion(outputs, labels) - loss.backward() - optimizer.step() - - if (i+1) % 100 == 0: - print ('Epoch: [%d/%d], Step: [%d/%d], Loss: %.4f' - % (epoch+1, num_epochs, i+1, len(train_dataset)//batch_size, loss.data[0])) - -# Test the Model -correct = 0 -total = 0 -for images, labels in test_loader: - images = Variable(images.view(-1, 28*28)) - outputs = model(images) - _, predicted = torch.max(outputs.data, 1) - total += labels.size(0) - correct += (predicted == labels).sum() - -print('Accuracy of the model on the 10000 test images: %d %%' % (100 * correct / total)) - -# Save the Model -torch.save(model, 'model.pkl') \ No newline at end of file diff --git a/tutorials/02-intermediate/bidirectional_recurrent_neural_network/main.py b/tutorials/02-intermediate/bidirectional_recurrent_neural_network/main.py new file mode 100644 index 00000000..a0ecd773 --- /dev/null +++ b/tutorials/02-intermediate/bidirectional_recurrent_neural_network/main.py @@ -0,0 +1,102 @@ +import torch +import torch.nn as nn +import torchvision +import torchvision.transforms as transforms + + +# Device configuration +device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') + +# Hyper-parameters +sequence_length = 28 +input_size = 28 +hidden_size = 128 +num_layers = 2 +num_classes = 10 +batch_size = 100 +num_epochs = 2 +learning_rate = 0.003 + +# MNIST dataset +train_dataset = torchvision.datasets.MNIST(root='../../data/', + train=True, + transform=transforms.ToTensor(), + download=True) + +test_dataset = torchvision.datasets.MNIST(root='../../data/', + train=False, + transform=transforms.ToTensor()) + +# Data loader +train_loader = torch.utils.data.DataLoader(dataset=train_dataset, + batch_size=batch_size, + shuffle=True) + +test_loader = torch.utils.data.DataLoader(dataset=test_dataset, + batch_size=batch_size, + shuffle=False) + +# Bidirectional recurrent neural network (many-to-one) +class BiRNN(nn.Module): + def __init__(self, input_size, hidden_size, num_layers, num_classes): + super(BiRNN, self).__init__() + self.hidden_size = hidden_size + self.num_layers = num_layers + self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, bidirectional=True) + self.fc = nn.Linear(hidden_size*2, num_classes) # 2 for bidirection + + def forward(self, x): + # Set initial states + h0 = torch.zeros(self.num_layers*2, x.size(0), self.hidden_size).to(device) # 2 for bidirection + c0 = torch.zeros(self.num_layers*2, x.size(0), self.hidden_size).to(device) + + # Forward propagate LSTM + out, _ = self.lstm(x, (h0, c0)) # out: tensor of shape (batch_size, seq_length, hidden_size*2) + + # Decode the hidden state of the last time step + out = self.fc(out[:, -1, :]) + return out + +model = BiRNN(input_size, hidden_size, num_layers, num_classes).to(device) + + +# Loss and optimizer +criterion = nn.CrossEntropyLoss() +optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) + +# Train the model +total_step = len(train_loader) +for epoch in range(num_epochs): + for i, (images, labels) in enumerate(train_loader): + images = images.reshape(-1, sequence_length, input_size).to(device) + labels = labels.to(device) + + # Forward pass + outputs = model(images) + loss = criterion(outputs, labels) + + # Backward and optimize + optimizer.zero_grad() + loss.backward() + optimizer.step() + + if (i+1) % 100 == 0: + print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' + .format(epoch+1, num_epochs, i+1, total_step, loss.item())) + +# Test the model +with torch.no_grad(): + correct = 0 + total = 0 + for images, labels in test_loader: + images = images.reshape(-1, sequence_length, input_size).to(device) + labels = labels.to(device) + outputs = model(images) + _, predicted = torch.max(outputs.data, 1) + total += labels.size(0) + correct += (predicted == labels).sum().item() + + print('Test Accuracy of the model on the 10000 test images: {} %'.format(100 * correct / total)) + +# Save the model checkpoint +torch.save(model.state_dict(), 'model.ckpt') \ No newline at end of file diff --git a/tutorials/02-intermediate/convolutional_neural_network/main.py b/tutorials/02-intermediate/convolutional_neural_network/main.py new file mode 100644 index 00000000..ec904f1f --- /dev/null +++ b/tutorials/02-intermediate/convolutional_neural_network/main.py @@ -0,0 +1,100 @@ +import torch +import torch.nn as nn +import torchvision +import torchvision.transforms as transforms + + +# Device configuration +device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') + +# Hyper parameters +num_epochs = 5 +num_classes = 10 +batch_size = 100 +learning_rate = 0.001 + +# MNIST dataset +train_dataset = torchvision.datasets.MNIST(root='../../data/', + train=True, + transform=transforms.ToTensor(), + download=True) + +test_dataset = torchvision.datasets.MNIST(root='../../data/', + train=False, + transform=transforms.ToTensor()) + +# Data loader +train_loader = torch.utils.data.DataLoader(dataset=train_dataset, + batch_size=batch_size, + shuffle=True) + +test_loader = torch.utils.data.DataLoader(dataset=test_dataset, + batch_size=batch_size, + shuffle=False) + +# Convolutional neural network (two convolutional layers) +class ConvNet(nn.Module): + def __init__(self, num_classes=10): + super(ConvNet, self).__init__() + self.layer1 = nn.Sequential( + nn.Conv2d(1, 16, kernel_size=5, stride=1, padding=2), + nn.BatchNorm2d(16), + nn.ReLU(), + nn.MaxPool2d(kernel_size=2, stride=2)) + self.layer2 = nn.Sequential( + nn.Conv2d(16, 32, kernel_size=5, stride=1, padding=2), + nn.BatchNorm2d(32), + nn.ReLU(), + nn.MaxPool2d(kernel_size=2, stride=2)) + self.fc = nn.Linear(7*7*32, num_classes) + + def forward(self, x): + out = self.layer1(x) + out = self.layer2(out) + out = out.reshape(out.size(0), -1) + out = self.fc(out) + return out + +model = ConvNet(num_classes).to(device) + +# Loss and optimizer +criterion = nn.CrossEntropyLoss() +optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) + +# Train the model +total_step = len(train_loader) +for epoch in range(num_epochs): + for i, (images, labels) in enumerate(train_loader): + images = images.to(device) + labels = labels.to(device) + + # Forward pass + outputs = model(images) + loss = criterion(outputs, labels) + + # Backward and optimize + optimizer.zero_grad() + loss.backward() + optimizer.step() + + if (i+1) % 100 == 0: + print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' + .format(epoch+1, num_epochs, i+1, total_step, loss.item())) + +# Test the model +model.eval() # eval mode (batchnorm uses moving mean/variance instead of mini-batch mean/variance) +with torch.no_grad(): + correct = 0 + total = 0 + for images, labels in test_loader: + images = images.to(device) + labels = labels.to(device) + outputs = model(images) + _, predicted = torch.max(outputs.data, 1) + total += labels.size(0) + correct += (predicted == labels).sum().item() + + print('Test Accuracy of the model on the 10000 test images: {} %'.format(100 * correct / total)) + +# Save the model checkpoint +torch.save(model.state_dict(), 'model.ckpt') \ No newline at end of file diff --git a/tutorials/05 - Deep Residual Network/main-gpu.py b/tutorials/02-intermediate/deep_residual_network/main.py similarity index 53% rename from tutorials/05 - Deep Residual Network/main-gpu.py rename to tutorials/02-intermediate/deep_residual_network/main.py index 2da4b0fa..69dbe5fb 100644 --- a/tutorials/05 - Deep Residual Network/main-gpu.py +++ b/tutorials/02-intermediate/deep_residual_network/main.py @@ -1,45 +1,56 @@ -# Implementation of https://arxiv.org/pdf/1512.03385.pdf/ -# See section 4.2 for model architecture on CIFAR-10. -# Some part of the code was referenced below. -# https://github.com/pytorch/vision/blob/master/torchvision/models/resnet.py -import torch +# ---------------------------------------------------------------------------- # +# An implementation of https://arxiv.org/pdf/1512.03385.pdf # +# See section 4.2 for the model architecture on CIFAR-10 # +# Some part of the code was referenced from below # +# https://github.com/pytorch/vision/blob/master/torchvision/models/resnet.py # +# ---------------------------------------------------------------------------- # + +import torch import torch.nn as nn -import torchvision.datasets as dsets +import torchvision import torchvision.transforms as transforms -from torch.autograd import Variable -# Image Preprocessing + +# Device configuration +device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') + +# Hyper-parameters +num_epochs = 80 +batch_size = 100 +learning_rate = 0.001 + +# Image preprocessing modules transform = transforms.Compose([ - transforms.Scale(40), + transforms.Pad(4), transforms.RandomHorizontalFlip(), transforms.RandomCrop(32), transforms.ToTensor()]) -# CIFAR-10 Dataset -train_dataset = dsets.CIFAR10(root='../data/', - train=True, - transform=transform, - download=True) +# CIFAR-10 dataset +train_dataset = torchvision.datasets.CIFAR10(root='../../data/', + train=True, + transform=transform, + download=True) -test_dataset = dsets.CIFAR10(root='../data/', - train=False, - transform=transforms.ToTensor()) +test_dataset = torchvision.datasets.CIFAR10(root='../../data/', + train=False, + transform=transforms.ToTensor()) -# Data Loader (Input Pipeline) +# Data loader train_loader = torch.utils.data.DataLoader(dataset=train_dataset, - batch_size=100, + batch_size=batch_size, shuffle=True) test_loader = torch.utils.data.DataLoader(dataset=test_dataset, - batch_size=100, + batch_size=batch_size, shuffle=False) -# 3x3 Convolution +# 3x3 convolution def conv3x3(in_channels, out_channels, stride=1): return nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False) -# Residual Block +# Residual block class ResidualBlock(nn.Module): def __init__(self, in_channels, out_channels, stride=1, downsample=None): super(ResidualBlock, self).__init__() @@ -63,7 +74,7 @@ def forward(self, x): out = self.relu(out) return out -# ResNet Module +# ResNet class ResNet(nn.Module): def __init__(self, block, layers, num_classes=10): super(ResNet, self).__init__() @@ -72,8 +83,8 @@ def __init__(self, block, layers, num_classes=10): self.bn = nn.BatchNorm2d(16) self.relu = nn.ReLU(inplace=True) self.layer1 = self.make_layer(block, 16, layers[0]) - self.layer2 = self.make_layer(block, 32, layers[0], 2) - self.layer3 = self.make_layer(block, 64, layers[1], 2) + self.layer2 = self.make_layer(block, 32, layers[1], 2) + self.layer3 = self.make_layer(block, 64, layers[2], 2) self.avg_pool = nn.AvgPool2d(8) self.fc = nn.Linear(64, num_classes) @@ -102,46 +113,58 @@ def forward(self, x): out = self.fc(out) return out -resnet = ResNet(ResidualBlock, [3, 3, 3]) -resnet.cuda() +model = ResNet(ResidualBlock, [2, 2, 2]).to(device) -# Loss and Optimizer + +# Loss and optimizer criterion = nn.CrossEntropyLoss() -lr = 0.001 -optimizer = torch.optim.Adam(resnet.parameters(), lr=lr) - -# Training -for epoch in range(80): +optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) + +# For updating learning rate +def update_lr(optimizer, lr): + for param_group in optimizer.param_groups: + param_group['lr'] = lr + +# Train the model +total_step = len(train_loader) +curr_lr = learning_rate +for epoch in range(num_epochs): for i, (images, labels) in enumerate(train_loader): - images = Variable(images.cuda()) - labels = Variable(labels.cuda()) + images = images.to(device) + labels = labels.to(device) - # Forward + Backward + Optimize - optimizer.zero_grad() - outputs = resnet(images) + # Forward pass + outputs = model(images) loss = criterion(outputs, labels) + + # Backward and optimize + optimizer.zero_grad() loss.backward() optimizer.step() if (i+1) % 100 == 0: - print ("Epoch [%d/%d], Iter [%d/%d] Loss: %.4f" %(epoch+1, 80, i+1, 500, loss.data[0])) + print ("Epoch [{}/{}], Step [{}/{}] Loss: {:.4f}" + .format(epoch+1, num_epochs, i+1, total_step, loss.item())) - # Decaying Learning Rate + # Decay learning rate if (epoch+1) % 20 == 0: - lr /= 3 - optimizer = torch.optim.Adam(resnet.parameters(), lr=lr) - -# Test -correct = 0 -total = 0 -for images, labels in test_loader: - images = Variable(images.cuda()) - outputs = resnet(images) - _, predicted = torch.max(outputs.data, 1) - total += labels.size(0) - correct += (predicted.cpu() == labels).sum() - -print('Accuracy of the model on the test images: %d %%' % (100 * correct / total)) - -# Save the Model -torch.save(resnet, 'resnet.pkl') \ No newline at end of file + curr_lr /= 3 + update_lr(optimizer, curr_lr) + +# Test the model +model.eval() +with torch.no_grad(): + correct = 0 + total = 0 + for images, labels in test_loader: + images = images.to(device) + labels = labels.to(device) + outputs = model(images) + _, predicted = torch.max(outputs.data, 1) + total += labels.size(0) + correct += (predicted == labels).sum().item() + + print('Accuracy of the model on the test images: {} %'.format(100 * correct / total)) + +# Save the model checkpoint +torch.save(model.state_dict(), 'resnet.ckpt') diff --git a/tutorials/08 - Language Model/data/train.txt b/tutorials/02-intermediate/language_model/data/train.txt similarity index 100% rename from tutorials/08 - Language Model/data/train.txt rename to tutorials/02-intermediate/language_model/data/train.txt diff --git a/tutorials/08 - Language Model/data_utils.py b/tutorials/02-intermediate/language_model/data_utils.py similarity index 89% rename from tutorials/08 - Language Model/data_utils.py rename to tutorials/02-intermediate/language_model/data_utils.py index e0238b81..91bc6053 100644 --- a/tutorials/08 - Language Model/data_utils.py +++ b/tutorials/02-intermediate/language_model/data_utils.py @@ -1,6 +1,7 @@ import torch import os + class Dictionary(object): def __init__(self): self.word2idx = {} @@ -15,12 +16,11 @@ def add_word(self, word): def __len__(self): return len(self.word2idx) - + + class Corpus(object): - def __init__(self, path='./data'): + def __init__(self): self.dictionary = Dictionary() - self.train = os.path.join(path, 'train.txt') - self.test = os.path.join(path, 'test.txt') def get_data(self, path, batch_size=20): # Add words to the dictionary diff --git a/tutorials/02-intermediate/language_model/main.py b/tutorials/02-intermediate/language_model/main.py new file mode 100644 index 00000000..ef135bb7 --- /dev/null +++ b/tutorials/02-intermediate/language_model/main.py @@ -0,0 +1,120 @@ +# Some part of the code was referenced from below. +# https://github.com/pytorch/examples/tree/master/word_language_model +import torch +import torch.nn as nn +import numpy as np +from torch.nn.utils import clip_grad_norm_ +from data_utils import Dictionary, Corpus + + +# Device configuration +device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') + +# Hyper-parameters +embed_size = 128 +hidden_size = 1024 +num_layers = 1 +num_epochs = 5 +num_samples = 1000 # number of words to be sampled +batch_size = 20 +seq_length = 30 +learning_rate = 0.002 + +# Load "Penn Treebank" dataset +corpus = Corpus() +ids = corpus.get_data('data/train.txt', batch_size) +vocab_size = len(corpus.dictionary) +num_batches = ids.size(1) // seq_length + + +# RNN based language model +class RNNLM(nn.Module): + def __init__(self, vocab_size, embed_size, hidden_size, num_layers): + super(RNNLM, self).__init__() + self.embed = nn.Embedding(vocab_size, embed_size) + self.lstm = nn.LSTM(embed_size, hidden_size, num_layers, batch_first=True) + self.linear = nn.Linear(hidden_size, vocab_size) + + def forward(self, x, h): + # Embed word ids to vectors + x = self.embed(x) + + # Forward propagate LSTM + out, (h, c) = self.lstm(x, h) + + # Reshape output to (batch_size*sequence_length, hidden_size) + out = out.reshape(out.size(0)*out.size(1), out.size(2)) + + # Decode hidden states of all time steps + out = self.linear(out) + return out, (h, c) + +model = RNNLM(vocab_size, embed_size, hidden_size, num_layers).to(device) + +# Loss and optimizer +criterion = nn.CrossEntropyLoss() +optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) + +# Truncated backpropagation +def detach(states): + return [state.detach() for state in states] + +# Train the model +for epoch in range(num_epochs): + # Set initial hidden and cell states + states = (torch.zeros(num_layers, batch_size, hidden_size).to(device), + torch.zeros(num_layers, batch_size, hidden_size).to(device)) + + for i in range(0, ids.size(1) - seq_length, seq_length): + # Get mini-batch inputs and targets + inputs = ids[:, i:i+seq_length].to(device) + targets = ids[:, (i+1):(i+1)+seq_length].to(device) + + # Forward pass + states = detach(states) + outputs, states = model(inputs, states) + loss = criterion(outputs, targets.reshape(-1)) + + # Backward and optimize + optimizer.zero_grad() + loss.backward() + clip_grad_norm_(model.parameters(), 0.5) + optimizer.step() + + step = (i+1) // seq_length + if step % 100 == 0: + print ('Epoch [{}/{}], Step[{}/{}], Loss: {:.4f}, Perplexity: {:5.2f}' + .format(epoch+1, num_epochs, step, num_batches, loss.item(), np.exp(loss.item()))) + +# Test the model +with torch.no_grad(): + with open('sample.txt', 'w') as f: + # Set intial hidden ane cell states + state = (torch.zeros(num_layers, 1, hidden_size).to(device), + torch.zeros(num_layers, 1, hidden_size).to(device)) + + # Select one word id randomly + prob = torch.ones(vocab_size) + input = torch.multinomial(prob, num_samples=1).unsqueeze(1).to(device) + + for i in range(num_samples): + # Forward propagate RNN + output, state = model(input, state) + + # Sample a word id + prob = output.exp() + word_id = torch.multinomial(prob, num_samples=1).item() + + # Fill input with sampled word id for the next time step + input.fill_(word_id) + + # File write + word = corpus.dictionary.idx2word[word_id] + word = '\n' if word == '' else word + ' ' + f.write(word) + + if (i+1) % 100 == 0: + print('Sampled [{}/{}] words and save to {}'.format(i+1, num_samples, 'sample.txt')) + +# Save the model checkpoints +torch.save(model.state_dict(), 'model.ckpt') \ No newline at end of file diff --git a/tutorials/02-intermediate/recurrent_neural_network/main.py b/tutorials/02-intermediate/recurrent_neural_network/main.py new file mode 100644 index 00000000..c138c5ad --- /dev/null +++ b/tutorials/02-intermediate/recurrent_neural_network/main.py @@ -0,0 +1,103 @@ +import torch +import torch.nn as nn +import torchvision +import torchvision.transforms as transforms + + +# Device configuration +device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') + +# Hyper-parameters +sequence_length = 28 +input_size = 28 +hidden_size = 128 +num_layers = 2 +num_classes = 10 +batch_size = 100 +num_epochs = 2 +learning_rate = 0.01 + +# MNIST dataset +train_dataset = torchvision.datasets.MNIST(root='../../data/', + train=True, + transform=transforms.ToTensor(), + download=True) + +test_dataset = torchvision.datasets.MNIST(root='../../data/', + train=False, + transform=transforms.ToTensor()) + +# Data loader +train_loader = torch.utils.data.DataLoader(dataset=train_dataset, + batch_size=batch_size, + shuffle=True) + +test_loader = torch.utils.data.DataLoader(dataset=test_dataset, + batch_size=batch_size, + shuffle=False) + +# Recurrent neural network (many-to-one) +class RNN(nn.Module): + def __init__(self, input_size, hidden_size, num_layers, num_classes): + super(RNN, self).__init__() + self.hidden_size = hidden_size + self.num_layers = num_layers + self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True) + self.fc = nn.Linear(hidden_size, num_classes) + + def forward(self, x): + # Set initial hidden and cell states + h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device) + c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device) + + # Forward propagate LSTM + out, _ = self.lstm(x, (h0, c0)) # out: tensor of shape (batch_size, seq_length, hidden_size) + + # Decode the hidden state of the last time step + out = self.fc(out[:, -1, :]) + return out + +model = RNN(input_size, hidden_size, num_layers, num_classes).to(device) + + +# Loss and optimizer +criterion = nn.CrossEntropyLoss() +optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) + +# Train the model +total_step = len(train_loader) +for epoch in range(num_epochs): + for i, (images, labels) in enumerate(train_loader): + images = images.reshape(-1, sequence_length, input_size).to(device) + labels = labels.to(device) + + # Forward pass + outputs = model(images) + loss = criterion(outputs, labels) + + # Backward and optimize + optimizer.zero_grad() + loss.backward() + optimizer.step() + + if (i+1) % 100 == 0: + print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' + .format(epoch+1, num_epochs, i+1, total_step, loss.item())) + +# Test the model +model.eval() +with torch.no_grad(): + correct = 0 + total = 0 + for images, labels in test_loader: + images = images.reshape(-1, sequence_length, input_size).to(device) + labels = labels.to(device) + outputs = model(images) + _, predicted = torch.max(outputs.data, 1) + total += labels.size(0) + correct += (predicted == labels).sum().item() + + print('Test Accuracy of the model on the 10000 test images: {} %'.format(100 * correct / total)) + +# Save the model checkpoint +torch.save(model.state_dict(), 'model.ckpt') \ No newline at end of file diff --git a/tutorials/03 - Feedforward Neural Network/main-gpu.py b/tutorials/03 - Feedforward Neural Network/main-gpu.py deleted file mode 100644 index b0480a85..00000000 --- a/tutorials/03 - Feedforward Neural Network/main-gpu.py +++ /dev/null @@ -1,84 +0,0 @@ -import torch -import torch.nn as nn -import torchvision.datasets as dsets -import torchvision.transforms as transforms -from torch.autograd import Variable - - -# Hyper Parameters -input_size = 784 -hidden_size = 500 -num_classes = 10 -num_epochs = 5 -batch_size = 100 -learning_rate = 0.001 - -# MNIST Dataset -train_dataset = dsets.MNIST(root='../data', - train=True, - transform=transforms.ToTensor(), - download=True) - -test_dataset = dsets.MNIST(root='../data', - train=False, - transform=transforms.ToTensor()) - -# Data Loader (Input Pipeline) -train_loader = torch.utils.data.DataLoader(dataset=train_dataset, - batch_size=batch_size, - shuffle=True) - -test_loader = torch.utils.data.DataLoader(dataset=test_dataset, - batch_size=batch_size, - shuffle=False) - -# Neural Network Model (1 hidden layer) -class Net(nn.Module): - def __init__(self, input_size, hidden_size, num_classes): - super(Net, self).__init__() - self.fc1 = nn.Linear(input_size, hidden_size) - self.relu = nn.ReLU() - self.fc2 = nn.Linear(hidden_size, num_classes) - - def forward(self, x): - out = self.fc1(x) - out = self.relu(out) - out = self.fc2(out) - return out - -net = Net(input_size, hidden_size, num_classes) -net.cuda() - -# Loss and Optimizer -criterion = nn.CrossEntropyLoss() -optimizer = torch.optim.Adam(net.parameters(), lr=learning_rate) - -# Train the Model -for epoch in range(num_epochs): - for i, (images, labels) in enumerate(train_loader): - # Convert torch tensor to Variable - images = Variable(images.view(-1, 28*28)).cuda() - labels = Variable(labels).cuda() - - # Forward + Backward + Optimize - optimizer.zero_grad() # zero the gradient buffer - outputs = net(images) - loss = criterion(outputs, labels) - loss.backward() - optimizer.step() - - if (i+1) % 100 == 0: - print ('Epoch [%d/%d], Step [%d/%d], Loss: %.4f' - %(epoch+1, num_epochs, i+1, len(train_dataset)//batch_size, loss.data[0])) - -# Test the Model -correct = 0 -total = 0 -for images, labels in test_loader: - images = Variable(images.view(-1, 28*28)).cuda() - outputs = net(images) - _, predicted = torch.max(outputs.data, 1) - total += labels.size(0) - correct += (predicted.cpu() == labels).sum() - -print('Accuracy of the network on the 10000 test images: %d %%' % (100 * correct / total)) \ No newline at end of file diff --git a/tutorials/03 - Feedforward Neural Network/main.py b/tutorials/03 - Feedforward Neural Network/main.py deleted file mode 100644 index c0f28cda..00000000 --- a/tutorials/03 - Feedforward Neural Network/main.py +++ /dev/null @@ -1,84 +0,0 @@ -import torch -import torch.nn as nn -import torchvision.datasets as dsets -import torchvision.transforms as transforms -from torch.autograd import Variable - - -# Hyper Parameters -input_size = 784 -hidden_size = 500 -num_classes = 10 -num_epochs = 5 -batch_size = 100 -learning_rate = 0.001 - -# MNIST Dataset -train_dataset = dsets.MNIST(root='../data', - train=True, - transform=transforms.ToTensor(), - download=True) - -test_dataset = dsets.MNIST(root='../data', - train=False, - transform=transforms.ToTensor()) - -# Data Loader (Input Pipeline) -train_loader = torch.utils.data.DataLoader(dataset=train_dataset, - batch_size=batch_size, - shuffle=True) - -test_loader = torch.utils.data.DataLoader(dataset=test_dataset, - batch_size=batch_size, - shuffle=False) - -# Neural Network Model (1 hidden layer) -class Net(nn.Module): - def __init__(self, input_size, hidden_size, num_classes): - super(Net, self).__init__() - self.fc1 = nn.Linear(input_size, hidden_size) - self.relu = nn.ReLU() - self.fc2 = nn.Linear(hidden_size, num_classes) - - def forward(self, x): - out = self.fc1(x) - out = self.relu(out) - out = self.fc2(out) - return out - -net = Net(input_size, hidden_size, num_classes) - - -# Loss and Optimizer -criterion = nn.CrossEntropyLoss() -optimizer = torch.optim.Adam(net.parameters(), lr=learning_rate) - -# Train the Model -for epoch in range(num_epochs): - for i, (images, labels) in enumerate(train_loader): - # Convert torch tensor to Variable - images = Variable(images.view(-1, 28*28)) - labels = Variable(labels) - - # Forward + Backward + Optimize - optimizer.zero_grad() # zero the gradient buffer - outputs = net(images) - loss = criterion(outputs, labels) - loss.backward() - optimizer.step() - - if (i+1) % 100 == 0: - print ('Epoch [%d/%d], Step [%d/%d], Loss: %.4f' - %(epoch+1, num_epochs, i+1, len(train_dataset)//batch_size, loss.data[0])) - -# Test the Model -correct = 0 -total = 0 -for images, labels in test_loader: - images = Variable(images.view(-1, 28*28)) - outputs = net(images) - _, predicted = torch.max(outputs.data, 1) - total += labels.size(0) - correct += (predicted == labels).sum() - -print('Accuracy of the network on the 10000 test images: %d %%' % (100 * correct / total)) \ No newline at end of file diff --git a/tutorials/03-advanced/generative_adversarial_network/main.py b/tutorials/03-advanced/generative_adversarial_network/main.py new file mode 100644 index 00000000..c2062cf3 --- /dev/null +++ b/tutorials/03-advanced/generative_adversarial_network/main.py @@ -0,0 +1,148 @@ +import os +import torch +import torchvision +import torch.nn as nn +from torchvision import transforms +from torchvision.utils import save_image + + +# Device configuration +device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') + +# Hyper-parameters +latent_size = 64 +hidden_size = 256 +image_size = 784 +num_epochs = 200 +batch_size = 100 +sample_dir = 'samples' + +# Create a directory if not exists +if not os.path.exists(sample_dir): + os.makedirs(sample_dir) + +# Image processing +# transform = transforms.Compose([ +# transforms.ToTensor(), +# transforms.Normalize(mean=(0.5, 0.5, 0.5), # 3 for RGB channels +# std=(0.5, 0.5, 0.5))]) +transform = transforms.Compose([ + transforms.ToTensor(), + transforms.Normalize(mean=[0.5], # 1 for greyscale channels + std=[0.5])]) + +# MNIST dataset +mnist = torchvision.datasets.MNIST(root='../../data/', + train=True, + transform=transform, + download=True) + +# Data loader +data_loader = torch.utils.data.DataLoader(dataset=mnist, + batch_size=batch_size, + shuffle=True) + +# Discriminator +D = nn.Sequential( + nn.Linear(image_size, hidden_size), + nn.LeakyReLU(0.2), + nn.Linear(hidden_size, hidden_size), + nn.LeakyReLU(0.2), + nn.Linear(hidden_size, 1), + nn.Sigmoid()) + +# Generator +G = nn.Sequential( + nn.Linear(latent_size, hidden_size), + nn.ReLU(), + nn.Linear(hidden_size, hidden_size), + nn.ReLU(), + nn.Linear(hidden_size, image_size), + nn.Tanh()) + +# Device setting +D = D.to(device) +G = G.to(device) + +# Binary cross entropy loss and optimizer +criterion = nn.BCELoss() +d_optimizer = torch.optim.Adam(D.parameters(), lr=0.0002) +g_optimizer = torch.optim.Adam(G.parameters(), lr=0.0002) + +def denorm(x): + out = (x + 1) / 2 + return out.clamp(0, 1) + +def reset_grad(): + d_optimizer.zero_grad() + g_optimizer.zero_grad() + +# Start training +total_step = len(data_loader) +for epoch in range(num_epochs): + for i, (images, _) in enumerate(data_loader): + images = images.reshape(batch_size, -1).to(device) + + # Create the labels which are later used as input for the BCE loss + real_labels = torch.ones(batch_size, 1).to(device) + fake_labels = torch.zeros(batch_size, 1).to(device) + + # ================================================================== # + # Train the discriminator # + # ================================================================== # + + # Compute BCE_Loss using real images where BCE_Loss(x, y): - y * log(D(x)) - (1-y) * log(1 - D(x)) + # Second term of the loss is always zero since real_labels == 1 + outputs = D(images) + d_loss_real = criterion(outputs, real_labels) + real_score = outputs + + # Compute BCELoss using fake images + # First term of the loss is always zero since fake_labels == 0 + z = torch.randn(batch_size, latent_size).to(device) + fake_images = G(z) + outputs = D(fake_images) + d_loss_fake = criterion(outputs, fake_labels) + fake_score = outputs + + # Backprop and optimize + d_loss = d_loss_real + d_loss_fake + reset_grad() + d_loss.backward() + d_optimizer.step() + + # ================================================================== # + # Train the generator # + # ================================================================== # + + # Compute loss with fake images + z = torch.randn(batch_size, latent_size).to(device) + fake_images = G(z) + outputs = D(fake_images) + + # We train G to maximize log(D(G(z)) instead of minimizing log(1-D(G(z))) + # For the reason, see the last paragraph of section 3. https://arxiv.org/pdf/1406.2661.pdf + g_loss = criterion(outputs, real_labels) + + # Backprop and optimize + reset_grad() + g_loss.backward() + g_optimizer.step() + + if (i+1) % 200 == 0: + print('Epoch [{}/{}], Step [{}/{}], d_loss: {:.4f}, g_loss: {:.4f}, D(x): {:.2f}, D(G(z)): {:.2f}' + .format(epoch, num_epochs, i+1, total_step, d_loss.item(), g_loss.item(), + real_score.mean().item(), fake_score.mean().item())) + + # Save real images + if (epoch+1) == 1: + images = images.reshape(images.size(0), 1, 28, 28) + save_image(denorm(images), os.path.join(sample_dir, 'real_images.png')) + + # Save sampled images + fake_images = fake_images.reshape(fake_images.size(0), 1, 28, 28) + save_image(denorm(fake_images), os.path.join(sample_dir, 'fake_images-{}.png'.format(epoch+1))) + +# Save the model checkpoints +torch.save(G.state_dict(), 'G.ckpt') +torch.save(D.state_dict(), 'D.ckpt') \ No newline at end of file diff --git a/tutorials/03-advanced/image_captioning/README.md b/tutorials/03-advanced/image_captioning/README.md new file mode 100644 index 00000000..409b62b4 --- /dev/null +++ b/tutorials/03-advanced/image_captioning/README.md @@ -0,0 +1,59 @@ +# Image Captioning +The goal of image captioning is to convert a given input image into a natural language description. The encoder-decoder framework is widely used for this task. The image encoder is a convolutional neural network (CNN). In this tutorial, we used [resnet-152](https://arxiv.org/abs/1512.03385) model pretrained on the [ILSVRC-2012-CLS](http://www.image-net.org/challenges/LSVRC/2012/) image classification dataset. The decoder is a long short-term memory (LSTM) network. + +![alt text](png/model.png) + +#### Training phase +For the encoder part, the pretrained CNN extracts the feature vector from a given input image. The feature vector is linearly transformed to have the same dimension as the input dimension of the LSTM network. For the decoder part, source and target texts are predefined. For example, if the image description is **"Giraffes standing next to each other"**, the source sequence is a list containing **['\', 'Giraffes', 'standing', 'next', 'to', 'each', 'other']** and the target sequence is a list containing **['Giraffes', 'standing', 'next', 'to', 'each', 'other', '\']**. Using these source and target sequences and the feature vector, the LSTM decoder is trained as a language model conditioned on the feature vector. + +#### Test phase +In the test phase, the encoder part is almost same as the training phase. The only difference is that batchnorm layer uses moving average and variance instead of mini-batch statistics. This can be easily implemented using [encoder.eval()](https://github.com/yunjey/pytorch-tutorial/blob/master/tutorials/03-advanced/image_captioning/sample.py#L37). For the decoder part, there is a significant difference between the training phase and the test phase. In the test phase, the LSTM decoder can't see the image description. To deal with this problem, the LSTM decoder feeds back the previosly generated word to the next input. This can be implemented using a [for-loop](https://github.com/yunjey/pytorch-tutorial/blob/master/tutorials/03-advanced/image_captioning/model.py#L48). + + + +## Usage + + +#### 1. Clone the repositories +```bash +git clone https://github.com/pdollar/coco.git +cd coco/PythonAPI/ +make +python setup.py build +python setup.py install +cd ../../ +git clone https://github.com/yunjey/pytorch-tutorial.git +cd pytorch-tutorial/tutorials/03-advanced/image_captioning/ +``` + +#### 2. Download the dataset + +```bash +pip install -r requirements.txt +chmod +x download.sh +./download.sh +``` + +#### 3. Preprocessing + +```bash +python build_vocab.py +python resize.py +``` + +#### 4. Train the model + +```bash +python train.py +``` + +#### 5. Test the model + +```bash +python sample.py --image='png/example.png' +``` + +
+ +## Pretrained model +If you do not want to train the model from scratch, you can use a pretrained model. You can download the pretrained model [here](https://www.dropbox.com/s/ne0ixz5d58ccbbz/pretrained_model.zip?dl=0) and the vocabulary file [here](https://www.dropbox.com/s/26adb7y9m98uisa/vocap.zip?dl=0). You should extract pretrained_model.zip to `./models/` and vocab.pkl to `./data/` using `unzip` command. diff --git a/tutorials/09 - Image Captioning/vocab.py b/tutorials/03-advanced/image_captioning/build_vocab.py similarity index 50% rename from tutorials/09 - Image Captioning/vocab.py rename to tutorials/03-advanced/image_captioning/build_vocab.py index 78c16f28..946b4afb 100644 --- a/tutorials/09 - Image Captioning/vocab.py +++ b/tutorials/03-advanced/image_captioning/build_vocab.py @@ -1,6 +1,6 @@ -# Create a vocabulary wrapper import nltk import pickle +import argparse from collections import Counter from pycocotools.coco import COCO @@ -11,18 +11,18 @@ def __init__(self): self.word2idx = {} self.idx2word = {} self.idx = 0 - + def add_word(self, word): if not word in self.word2idx: self.word2idx[word] = self.idx self.idx2word[self.idx] = word self.idx += 1 - + def __call__(self, word): if not word in self.word2idx: return self.word2idx[''] return self.word2idx[word] - + def __len__(self): return len(self.word2idx) @@ -35,31 +35,42 @@ def build_vocab(json, threshold): caption = str(coco.anns[id]['caption']) tokens = nltk.tokenize.word_tokenize(caption.lower()) counter.update(tokens) - - if i % 1000 == 0: - print("[%d/%d] tokenized the captions." %(i, len(ids))) - - # Discard if the occurrence of the word is less than min_word_cnt. + + if (i+1) % 1000 == 0: + print("[{}/{}] Tokenized the captions.".format(i+1, len(ids))) + + # If the word frequency is less than 'threshold', then the word is discarded. words = [word for word, cnt in counter.items() if cnt >= threshold] # Create a vocab wrapper and add some special tokens. vocab = Vocabulary() - vocab.add_word('') - vocab.add_word('') - vocab.add_word('') - vocab.add_word('') - - # Add words to the vocabulary. + vocab.add_word('') + vocab.add_word('') + vocab.add_word('') + vocab.add_word('') + + # Add the words to the vocabulary. for i, word in enumerate(words): vocab.add_word(word) return vocab -def main(): - vocab = build_vocab(json='./data/annotations/captions_train2014.json', - threshold=4) - with open('./data/vocab.pkl', 'wb') as f: - pickle.dump(vocab, f, pickle.HIGHEST_PROTOCOL) - print("Saved vocabulary file to ", './data/vocab.pkl') - +def main(args): + vocab = build_vocab(json=args.caption_path, threshold=args.threshold) + vocab_path = args.vocab_path + with open(vocab_path, 'wb') as f: + pickle.dump(vocab, f) + print("Total vocabulary size: {}".format(len(vocab))) + print("Saved the vocabulary wrapper to '{}'".format(vocab_path)) + + if __name__ == '__main__': - main() \ No newline at end of file + parser = argparse.ArgumentParser() + parser.add_argument('--caption_path', type=str, + default='data/annotations/captions_train2014.json', + help='path for train annotation file') + parser.add_argument('--vocab_path', type=str, default='./data/vocab.pkl', + help='path for saving vocabulary wrapper') + parser.add_argument('--threshold', type=int, default=4, + help='minimum word count threshold') + args = parser.parse_args() + main(args) \ No newline at end of file diff --git a/tutorials/09 - Image Captioning/data.py b/tutorials/03-advanced/image_captioning/data_loader.py similarity index 69% rename from tutorials/09 - Image Captioning/data.py rename to tutorials/03-advanced/image_captioning/data_loader.py index a244fa28..0f0ef301 100644 --- a/tutorials/09 - Image Captioning/data.py +++ b/tutorials/03-advanced/image_captioning/data_loader.py @@ -6,19 +6,20 @@ import numpy as np import nltk from PIL import Image -from vocab import Vocabulary +from build_vocab import Vocabulary from pycocotools.coco import COCO class CocoDataset(data.Dataset): """COCO Custom Dataset compatible with torch.utils.data.DataLoader.""" def __init__(self, root, json, vocab, transform=None): - """ + """Set the path for images, captions and vocabulary wrapper. + Args: root: image directory. json: coco annotation file path. vocab: vocabulary wrapper. - transform: transformer for image. + transform: image transformer. """ self.root = root self.coco = COCO(json) @@ -27,7 +28,7 @@ def __init__(self, root, json, vocab, transform=None): self.transform = transform def __getitem__(self, index): - """This function should return one data pair(image and caption).""" + """Returns one data pair (image and caption).""" coco = self.coco vocab = self.vocab ann_id = self.ids[index] @@ -38,7 +39,7 @@ def __getitem__(self, index): image = Image.open(os.path.join(self.root, path)).convert('RGB') if self.transform is not None: image = self.transform(image) - + # Convert caption (string) to word ids. tokens = nltk.tokenize.word_tokenize(str(caption).lower()) caption = [] @@ -51,27 +52,31 @@ def __getitem__(self, index): def __len__(self): return len(self.ids) - + def collate_fn(data): - """Build mini-batch tensors from a list of (image, caption) tuples. + """Creates mini-batch tensors from the list of tuples (image, caption). + + We should build custom collate_fn rather than using default collate_fn, + because merging caption (including padding) is not supported in default. + Args: - data: list of (image, caption) tuple. + data: list of tuple (image, caption). - image: torch tensor of shape (3, 256, 256). - caption: torch tensor of shape (?); variable length. - + Returns: images: torch tensor of shape (batch_size, 3, 256, 256). targets: torch tensor of shape (batch_size, padded_length). lengths: list; valid length for each padded caption. """ - # Sort a data list by caption length + # Sort a data list by caption length (descending order). data.sort(key=lambda x: len(x[1]), reverse=True) images, captions = zip(*data) - - # Merge images (convert tuple of 3D tensor to 4D tensor) + + # Merge images (from tuple of 3D tensor to 4D tensor). images = torch.stack(images, 0) - - # Merget captions (convert tuple of 1D tensor to 2D tensor) + + # Merge captions (from tuple of 1D tensor to 2D tensor). lengths = [len(cap) for cap in captions] targets = torch.zeros(len(captions), max(lengths)).long() for i, cap in enumerate(captions): @@ -79,19 +84,22 @@ def collate_fn(data): targets[i, :end] = cap[:end] return images, targets, lengths - -def get_loader(root, json, vocab, transform, batch_size=100, shuffle=True, num_workers=2): +def get_loader(root, json, vocab, transform, batch_size, shuffle, num_workers): """Returns torch.utils.data.DataLoader for custom coco dataset.""" - # COCO custom dataset + # COCO caption dataset coco = CocoDataset(root=root, json=json, - vocab = vocab, + vocab=vocab, transform=transform) - # Data loader + # Data loader for COCO dataset + # This will return (images, captions, lengths) for each iteration. + # images: a tensor of shape (batch_size, 3, 224, 224). + # captions: a tensor of shape (batch_size, padded_length). + # lengths: a list indicating valid length for each caption. length is (batch_size). data_loader = torch.utils.data.DataLoader(dataset=coco, batch_size=batch_size, - shuffle=True, + shuffle=shuffle, num_workers=num_workers, collate_fn=collate_fn) return data_loader \ No newline at end of file diff --git a/tutorials/03-advanced/image_captioning/download.sh b/tutorials/03-advanced/image_captioning/download.sh new file mode 100755 index 00000000..dace6aad --- /dev/null +++ b/tutorials/03-advanced/image_captioning/download.sh @@ -0,0 +1,11 @@ +mkdir data +wget http://msvocds.blob.core.windows.net/annotations-1-0-3/captions_train-val2014.zip -P ./data/ +wget http://images.cocodataset.org/zips/train2014.zip -P ./data/ +wget http://images.cocodataset.org/zips/val2014.zip -P ./data/ + +unzip ./data/captions_train-val2014.zip -d ./data/ +rm ./data/captions_train-val2014.zip +unzip ./data/train2014.zip -d ./data/ +rm ./data/train2014.zip +unzip ./data/val2014.zip -d ./data/ +rm ./data/val2014.zip diff --git a/tutorials/03-advanced/image_captioning/model.py b/tutorials/03-advanced/image_captioning/model.py new file mode 100644 index 00000000..b1aef0cd --- /dev/null +++ b/tutorials/03-advanced/image_captioning/model.py @@ -0,0 +1,56 @@ +import torch +import torch.nn as nn +import torchvision.models as models +from torch.nn.utils.rnn import pack_padded_sequence + + +class EncoderCNN(nn.Module): + def __init__(self, embed_size): + """Load the pretrained ResNet-152 and replace top fc layer.""" + super(EncoderCNN, self).__init__() + resnet = models.resnet152(pretrained=True) + modules = list(resnet.children())[:-1] # delete the last fc layer. + self.resnet = nn.Sequential(*modules) + self.linear = nn.Linear(resnet.fc.in_features, embed_size) + self.bn = nn.BatchNorm1d(embed_size, momentum=0.01) + + def forward(self, images): + """Extract feature vectors from input images.""" + with torch.no_grad(): + features = self.resnet(images) + features = features.reshape(features.size(0), -1) + features = self.bn(self.linear(features)) + return features + + +class DecoderRNN(nn.Module): + def __init__(self, embed_size, hidden_size, vocab_size, num_layers, max_seq_length=20): + """Set the hyper-parameters and build the layers.""" + super(DecoderRNN, self).__init__() + self.embed = nn.Embedding(vocab_size, embed_size) + self.lstm = nn.LSTM(embed_size, hidden_size, num_layers, batch_first=True) + self.linear = nn.Linear(hidden_size, vocab_size) + self.max_seg_length = max_seq_length + + def forward(self, features, captions, lengths): + """Decode image feature vectors and generates captions.""" + embeddings = self.embed(captions) + embeddings = torch.cat((features.unsqueeze(1), embeddings), 1) + packed = pack_padded_sequence(embeddings, lengths, batch_first=True) + hiddens, _ = self.lstm(packed) + outputs = self.linear(hiddens[0]) + return outputs + + def sample(self, features, states=None): + """Generate captions for given image features using greedy search.""" + sampled_ids = [] + inputs = features.unsqueeze(1) + for i in range(self.max_seg_length): + hiddens, states = self.lstm(inputs, states) # hiddens: (batch_size, 1, hidden_size) + outputs = self.linear(hiddens.squeeze(1)) # outputs: (batch_size, vocab_size) + _, predicted = outputs.max(1) # predicted: (batch_size) + sampled_ids.append(predicted) + inputs = self.embed(predicted) # inputs: (batch_size, embed_size) + inputs = inputs.unsqueeze(1) # inputs: (batch_size, 1, embed_size) + sampled_ids = torch.stack(sampled_ids, 1) # sampled_ids: (batch_size, max_seq_length) + return sampled_ids \ No newline at end of file diff --git a/tutorials/03-advanced/image_captioning/png/example.png b/tutorials/03-advanced/image_captioning/png/example.png new file mode 100644 index 00000000..810228d8 Binary files /dev/null and b/tutorials/03-advanced/image_captioning/png/example.png differ diff --git a/tutorials/03-advanced/image_captioning/png/image_captioning.png b/tutorials/03-advanced/image_captioning/png/image_captioning.png new file mode 100644 index 00000000..2aceadd3 Binary files /dev/null and b/tutorials/03-advanced/image_captioning/png/image_captioning.png differ diff --git a/tutorials/03-advanced/image_captioning/png/model.png b/tutorials/03-advanced/image_captioning/png/model.png new file mode 100644 index 00000000..4fc7c7ab Binary files /dev/null and b/tutorials/03-advanced/image_captioning/png/model.png differ diff --git a/tutorials/03-advanced/image_captioning/requirements.txt b/tutorials/03-advanced/image_captioning/requirements.txt new file mode 100644 index 00000000..778d2a83 --- /dev/null +++ b/tutorials/03-advanced/image_captioning/requirements.txt @@ -0,0 +1,5 @@ +matplotlib +nltk +numpy +Pillow +argparse \ No newline at end of file diff --git a/tutorials/03-advanced/image_captioning/resize.py b/tutorials/03-advanced/image_captioning/resize.py new file mode 100644 index 00000000..5620b0d4 --- /dev/null +++ b/tutorials/03-advanced/image_captioning/resize.py @@ -0,0 +1,42 @@ +import argparse +import os +from PIL import Image + + +def resize_image(image, size): + """Resize an image to the given size.""" + return image.resize(size, Image.ANTIALIAS) + +def resize_images(image_dir, output_dir, size): + """Resize the images in 'image_dir' and save into 'output_dir'.""" + if not os.path.exists(output_dir): + os.makedirs(output_dir) + + images = os.listdir(image_dir) + num_images = len(images) + for i, image in enumerate(images): + with open(os.path.join(image_dir, image), 'r+b') as f: + with Image.open(f) as img: + img = resize_image(img, size) + img.save(os.path.join(output_dir, image), img.format) + if (i+1) % 100 == 0: + print ("[{}/{}] Resized the images and saved into '{}'." + .format(i+1, num_images, output_dir)) + +def main(args): + image_dir = args.image_dir + output_dir = args.output_dir + image_size = [args.image_size, args.image_size] + resize_images(image_dir, output_dir, image_size) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('--image_dir', type=str, default='./data/train2014/', + help='directory for train images') + parser.add_argument('--output_dir', type=str, default='./data/resized2014/', + help='directory for saving resized images') + parser.add_argument('--image_size', type=int, default=256, + help='size for image after processing') + args = parser.parse_args() + main(args) \ No newline at end of file diff --git a/tutorials/03-advanced/image_captioning/sample.py b/tutorials/03-advanced/image_captioning/sample.py new file mode 100644 index 00000000..74ff40fe --- /dev/null +++ b/tutorials/03-advanced/image_captioning/sample.py @@ -0,0 +1,81 @@ +import torch +import matplotlib.pyplot as plt +import numpy as np +import argparse +import pickle +import os +from torchvision import transforms +from build_vocab import Vocabulary +from model import EncoderCNN, DecoderRNN +from PIL import Image + + +# Device configuration +device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') + +def load_image(image_path, transform=None): + image = Image.open(image_path).convert('RGB') + image = image.resize([224, 224], Image.LANCZOS) + + if transform is not None: + image = transform(image).unsqueeze(0) + + return image + +def main(args): + # Image preprocessing + transform = transforms.Compose([ + transforms.ToTensor(), + transforms.Normalize((0.485, 0.456, 0.406), + (0.229, 0.224, 0.225))]) + + # Load vocabulary wrapper + with open(args.vocab_path, 'rb') as f: + vocab = pickle.load(f) + + # Build models + encoder = EncoderCNN(args.embed_size).eval() # eval mode (batchnorm uses moving mean/variance) + decoder = DecoderRNN(args.embed_size, args.hidden_size, len(vocab), args.num_layers) + encoder = encoder.to(device) + decoder = decoder.to(device) + + # Load the trained model parameters + encoder.load_state_dict(torch.load(args.encoder_path)) + decoder.load_state_dict(torch.load(args.decoder_path)) + + # Prepare an image + image = load_image(args.image, transform) + image_tensor = image.to(device) + + # Generate an caption from the image + feature = encoder(image_tensor) + sampled_ids = decoder.sample(feature) + sampled_ids = sampled_ids[0].cpu().numpy() # (1, max_seq_length) -> (max_seq_length) + + # Convert word_ids to words + sampled_caption = [] + for word_id in sampled_ids: + word = vocab.idx2word[word_id] + sampled_caption.append(word) + if word == '': + break + sentence = ' '.join(sampled_caption) + + # Print out the image and the generated caption + print (sentence) + image = Image.open(args.image) + plt.imshow(np.asarray(image)) + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('--image', type=str, required=True, help='input image for generating caption') + parser.add_argument('--encoder_path', type=str, default='models/encoder-5-3000.pkl', help='path for trained encoder') + parser.add_argument('--decoder_path', type=str, default='models/decoder-5-3000.pkl', help='path for trained decoder') + parser.add_argument('--vocab_path', type=str, default='data/vocab.pkl', help='path for vocabulary wrapper') + + # Model parameters (should be same as paramters in train.py) + parser.add_argument('--embed_size', type=int , default=256, help='dimension of word embedding vectors') + parser.add_argument('--hidden_size', type=int , default=512, help='dimension of lstm hidden states') + parser.add_argument('--num_layers', type=int , default=1, help='number of layers in lstm') + args = parser.parse_args() + main(args) diff --git a/tutorials/03-advanced/image_captioning/train.py b/tutorials/03-advanced/image_captioning/train.py new file mode 100644 index 00000000..73007637 --- /dev/null +++ b/tutorials/03-advanced/image_captioning/train.py @@ -0,0 +1,101 @@ +import argparse +import torch +import torch.nn as nn +import numpy as np +import os +import pickle +from data_loader import get_loader +from build_vocab import Vocabulary +from model import EncoderCNN, DecoderRNN +from torch.nn.utils.rnn import pack_padded_sequence +from torchvision import transforms + + +# Device configuration +device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') + +def main(args): + # Create model directory + if not os.path.exists(args.model_path): + os.makedirs(args.model_path) + + # Image preprocessing, normalization for the pretrained resnet + transform = transforms.Compose([ + transforms.RandomCrop(args.crop_size), + transforms.RandomHorizontalFlip(), + transforms.ToTensor(), + transforms.Normalize((0.485, 0.456, 0.406), + (0.229, 0.224, 0.225))]) + + # Load vocabulary wrapper + with open(args.vocab_path, 'rb') as f: + vocab = pickle.load(f) + + # Build data loader + data_loader = get_loader(args.image_dir, args.caption_path, vocab, + transform, args.batch_size, + shuffle=True, num_workers=args.num_workers) + + # Build the models + encoder = EncoderCNN(args.embed_size).to(device) + decoder = DecoderRNN(args.embed_size, args.hidden_size, len(vocab), args.num_layers).to(device) + + # Loss and optimizer + criterion = nn.CrossEntropyLoss() + params = list(decoder.parameters()) + list(encoder.linear.parameters()) + list(encoder.bn.parameters()) + optimizer = torch.optim.Adam(params, lr=args.learning_rate) + + # Train the models + total_step = len(data_loader) + for epoch in range(args.num_epochs): + for i, (images, captions, lengths) in enumerate(data_loader): + + # Set mini-batch dataset + images = images.to(device) + captions = captions.to(device) + targets = pack_padded_sequence(captions, lengths, batch_first=True)[0] + + # Forward, backward and optimize + features = encoder(images) + outputs = decoder(features, captions, lengths) + loss = criterion(outputs, targets) + decoder.zero_grad() + encoder.zero_grad() + loss.backward() + optimizer.step() + + # Print log info + if i % args.log_step == 0: + print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}, Perplexity: {:5.4f}' + .format(epoch, args.num_epochs, i, total_step, loss.item(), np.exp(loss.item()))) + + # Save the model checkpoints + if (i+1) % args.save_step == 0: + torch.save(decoder.state_dict(), os.path.join( + args.model_path, 'decoder-{}-{}.ckpt'.format(epoch+1, i+1))) + torch.save(encoder.state_dict(), os.path.join( + args.model_path, 'encoder-{}-{}.ckpt'.format(epoch+1, i+1))) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('--model_path', type=str, default='models/' , help='path for saving trained models') + parser.add_argument('--crop_size', type=int, default=224 , help='size for randomly cropping images') + parser.add_argument('--vocab_path', type=str, default='data/vocab.pkl', help='path for vocabulary wrapper') + parser.add_argument('--image_dir', type=str, default='data/resized2014', help='directory for resized images') + parser.add_argument('--caption_path', type=str, default='data/annotations/captions_train2014.json', help='path for train annotation json file') + parser.add_argument('--log_step', type=int , default=10, help='step size for prining log info') + parser.add_argument('--save_step', type=int , default=1000, help='step size for saving trained models') + + # Model parameters + parser.add_argument('--embed_size', type=int , default=256, help='dimension of word embedding vectors') + parser.add_argument('--hidden_size', type=int , default=512, help='dimension of lstm hidden states') + parser.add_argument('--num_layers', type=int , default=1, help='number of layers in lstm') + + parser.add_argument('--num_epochs', type=int, default=5) + parser.add_argument('--batch_size', type=int, default=128) + parser.add_argument('--num_workers', type=int, default=2) + parser.add_argument('--learning_rate', type=float, default=0.001) + args = parser.parse_args() + print(args) + main(args) \ No newline at end of file diff --git a/tutorials/03-advanced/neural_style_transfer/README.md b/tutorials/03-advanced/neural_style_transfer/README.md new file mode 100644 index 00000000..579a6d22 --- /dev/null +++ b/tutorials/03-advanced/neural_style_transfer/README.md @@ -0,0 +1,33 @@ +# Neural Style Transfer + +[Neural style transfer](https://arxiv.org/abs/1508.06576) is an algorithm that combines the content of one image with the style of another image using CNN. Given a content image and a style image, the goal is to generate a target image that minimizes the content difference with the content image and the style difference with the style image. + +

+ + +#### Content loss + +To minimize the content difference, we forward propagate the content image and the target image to pretrained [VGGNet](https://arxiv.org/abs/1409.1556) respectively, and extract feature maps from multiple convolutional layers. Then, the target image is updated to minimize the [mean-squared error](https://github.com/yunjey/pytorch-tutorial/blob/master/tutorials/03-advanced/neural_style_transfer/main.py#L81-L82) between the feature maps of the content image and its feature maps. + +#### Style loss + +As in computing the content loss, we forward propagate the style image and the target image to the VGGNet and extract convolutional feature maps. To generate a texture that matches the style of the style image, we update the target image by minimizing the mean-squared error between the Gram matrix of the style image and the Gram matrix of the target image (feature correlation minimization). See [here](https://github.com/yunjey/pytorch-tutorial/blob/master/tutorials/03-advanced/neural_style_transfer/main.py#L84-L94) for how to compute the style loss. + + + + +
+ +## Usage + +```bash +$ pip install -r requirements.txt +$ python main.py --content='png/content.png' --style='png/style.png' +``` + +
+ +## Results +The following is the result of applying variaous styles of artwork to Anne Hathaway's photograph. + +![alt text](png/neural_style.png) diff --git a/tutorials/03-advanced/neural_style_transfer/main.py b/tutorials/03-advanced/neural_style_transfer/main.py new file mode 100644 index 00000000..99153ee7 --- /dev/null +++ b/tutorials/03-advanced/neural_style_transfer/main.py @@ -0,0 +1,126 @@ +from __future__ import division +from torchvision import models +from torchvision import transforms +from PIL import Image +import argparse +import torch +import torchvision +import torch.nn as nn +import numpy as np + + +# Device configuration +device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') + +def load_image(image_path, transform=None, max_size=None, shape=None): + """Load an image and convert it to a torch tensor.""" + image = Image.open(image_path) + + if max_size: + scale = max_size / max(image.size) + size = np.array(image.size) * scale + image = image.resize(size.astype(int), Image.ANTIALIAS) + + if shape: + image = image.resize(shape, Image.LANCZOS) + + if transform: + image = transform(image).unsqueeze(0) + + return image.to(device) + + +class VGGNet(nn.Module): + def __init__(self): + """Select conv1_1 ~ conv5_1 activation maps.""" + super(VGGNet, self).__init__() + self.select = ['0', '5', '10', '19', '28'] + self.vgg = models.vgg19(pretrained=True).features + + def forward(self, x): + """Extract multiple convolutional feature maps.""" + features = [] + for name, layer in self.vgg._modules.items(): + x = layer(x) + if name in self.select: + features.append(x) + return features + + +def main(config): + + # Image preprocessing + # VGGNet was trained on ImageNet where images are normalized by mean=[0.485, 0.456, 0.406] and std=[0.229, 0.224, 0.225]. + # We use the same normalization statistics here. + transform = transforms.Compose([ + transforms.ToTensor(), + transforms.Normalize(mean=(0.485, 0.456, 0.406), + std=(0.229, 0.224, 0.225))]) + + # Load content and style images + # Make the style image same size as the content image + content = load_image(config.content, transform, max_size=config.max_size) + style = load_image(config.style, transform, shape=[content.size(2), content.size(3)]) + + # Initialize a target image with the content image + target = content.clone().requires_grad_(True) + + optimizer = torch.optim.Adam([target], lr=config.lr, betas=[0.5, 0.999]) + vgg = VGGNet().to(device).eval() + + for step in range(config.total_step): + + # Extract multiple(5) conv feature vectors + target_features = vgg(target) + content_features = vgg(content) + style_features = vgg(style) + + style_loss = 0 + content_loss = 0 + for f1, f2, f3 in zip(target_features, content_features, style_features): + # Compute content loss with target and content images + content_loss += torch.mean((f1 - f2)**2) + + # Reshape convolutional feature maps + _, c, h, w = f1.size() + f1 = f1.view(c, h * w) + f3 = f3.view(c, h * w) + + # Compute gram matrix + f1 = torch.mm(f1, f1.t()) + f3 = torch.mm(f3, f3.t()) + + # Compute style loss with target and style images + style_loss += torch.mean((f1 - f3)**2) / (c * h * w) + + # Compute total loss, backprop and optimize + loss = content_loss + config.style_weight * style_loss + optimizer.zero_grad() + loss.backward() + optimizer.step() + + if (step+1) % config.log_step == 0: + print ('Step [{}/{}], Content Loss: {:.4f}, Style Loss: {:.4f}' + .format(step+1, config.total_step, content_loss.item(), style_loss.item())) + + if (step+1) % config.sample_step == 0: + # Save the generated image + denorm = transforms.Normalize((-2.12, -2.04, -1.80), (4.37, 4.46, 4.44)) + img = target.clone().squeeze() + img = denorm(img).clamp_(0, 1) + torchvision.utils.save_image(img, 'output-{}.png'.format(step+1)) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument('--content', type=str, default='png/content.png') + parser.add_argument('--style', type=str, default='png/style.png') + parser.add_argument('--max_size', type=int, default=400) + parser.add_argument('--total_step', type=int, default=2000) + parser.add_argument('--log_step', type=int, default=10) + parser.add_argument('--sample_step', type=int, default=500) + parser.add_argument('--style_weight', type=float, default=100) + parser.add_argument('--lr', type=float, default=0.003) + config = parser.parse_args() + print(config) + main(config) \ No newline at end of file diff --git a/tutorials/03-advanced/neural_style_transfer/png/content.png b/tutorials/03-advanced/neural_style_transfer/png/content.png new file mode 100644 index 00000000..96889a0a Binary files /dev/null and b/tutorials/03-advanced/neural_style_transfer/png/content.png differ diff --git a/tutorials/03-advanced/neural_style_transfer/png/neural_style.png b/tutorials/03-advanced/neural_style_transfer/png/neural_style.png new file mode 100644 index 00000000..0f5eacd3 Binary files /dev/null and b/tutorials/03-advanced/neural_style_transfer/png/neural_style.png differ diff --git a/tutorials/03-advanced/neural_style_transfer/png/neural_style2.png b/tutorials/03-advanced/neural_style_transfer/png/neural_style2.png new file mode 100644 index 00000000..92bfe817 Binary files /dev/null and b/tutorials/03-advanced/neural_style_transfer/png/neural_style2.png differ diff --git a/tutorials/03-advanced/neural_style_transfer/png/style.png b/tutorials/03-advanced/neural_style_transfer/png/style.png new file mode 100644 index 00000000..e7d9b4cb Binary files /dev/null and b/tutorials/03-advanced/neural_style_transfer/png/style.png differ diff --git a/tutorials/03-advanced/neural_style_transfer/png/style2.png b/tutorials/03-advanced/neural_style_transfer/png/style2.png new file mode 100644 index 00000000..eb7df210 Binary files /dev/null and b/tutorials/03-advanced/neural_style_transfer/png/style2.png differ diff --git a/tutorials/03-advanced/neural_style_transfer/png/style3.png b/tutorials/03-advanced/neural_style_transfer/png/style3.png new file mode 100644 index 00000000..0260be25 Binary files /dev/null and b/tutorials/03-advanced/neural_style_transfer/png/style3.png differ diff --git a/tutorials/03-advanced/neural_style_transfer/png/style4.png b/tutorials/03-advanced/neural_style_transfer/png/style4.png new file mode 100644 index 00000000..c62fdb35 Binary files /dev/null and b/tutorials/03-advanced/neural_style_transfer/png/style4.png differ diff --git a/tutorials/03-advanced/neural_style_transfer/requirements.txt b/tutorials/03-advanced/neural_style_transfer/requirements.txt new file mode 100644 index 00000000..131621d9 --- /dev/null +++ b/tutorials/03-advanced/neural_style_transfer/requirements.txt @@ -0,0 +1,4 @@ +argparse +torch +torchvision +Pillow diff --git a/tutorials/03-advanced/variational_autoencoder/main.py b/tutorials/03-advanced/variational_autoencoder/main.py new file mode 100644 index 00000000..fe476d83 --- /dev/null +++ b/tutorials/03-advanced/variational_autoencoder/main.py @@ -0,0 +1,101 @@ +import os +import torch +import torch.nn as nn +import torch.nn.functional as F +import torchvision +from torchvision import transforms +from torchvision.utils import save_image + + +# Device configuration +device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') + +# Create a directory if not exists +sample_dir = 'samples' +if not os.path.exists(sample_dir): + os.makedirs(sample_dir) + +# Hyper-parameters +image_size = 784 +h_dim = 400 +z_dim = 20 +num_epochs = 15 +batch_size = 128 +learning_rate = 1e-3 + +# MNIST dataset +dataset = torchvision.datasets.MNIST(root='../../data', + train=True, + transform=transforms.ToTensor(), + download=True) + +# Data loader +data_loader = torch.utils.data.DataLoader(dataset=dataset, + batch_size=batch_size, + shuffle=True) + + +# VAE model +class VAE(nn.Module): + def __init__(self, image_size=784, h_dim=400, z_dim=20): + super(VAE, self).__init__() + self.fc1 = nn.Linear(image_size, h_dim) + self.fc2 = nn.Linear(h_dim, z_dim) + self.fc3 = nn.Linear(h_dim, z_dim) + self.fc4 = nn.Linear(z_dim, h_dim) + self.fc5 = nn.Linear(h_dim, image_size) + + def encode(self, x): + h = F.relu(self.fc1(x)) + return self.fc2(h), self.fc3(h) + + def reparameterize(self, mu, log_var): + std = torch.exp(log_var/2) + eps = torch.randn_like(std) + return mu + eps * std + + def decode(self, z): + h = F.relu(self.fc4(z)) + return F.sigmoid(self.fc5(h)) + + def forward(self, x): + mu, log_var = self.encode(x) + z = self.reparameterize(mu, log_var) + x_reconst = self.decode(z) + return x_reconst, mu, log_var + +model = VAE().to(device) +optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) + +# Start training +for epoch in range(num_epochs): + for i, (x, _) in enumerate(data_loader): + # Forward pass + x = x.to(device).view(-1, image_size) + x_reconst, mu, log_var = model(x) + + # Compute reconstruction loss and kl divergence + # For KL divergence, see Appendix B in VAE paper or http://yunjey47.tistory.com/43 + reconst_loss = F.binary_cross_entropy(x_reconst, x, size_average=False) + kl_div = - 0.5 * torch.sum(1 + log_var - mu.pow(2) - log_var.exp()) + + # Backprop and optimize + loss = reconst_loss + kl_div + optimizer.zero_grad() + loss.backward() + optimizer.step() + + if (i+1) % 10 == 0: + print ("Epoch[{}/{}], Step [{}/{}], Reconst Loss: {:.4f}, KL Div: {:.4f}" + .format(epoch+1, num_epochs, i+1, len(data_loader), reconst_loss.item(), kl_div.item())) + + with torch.no_grad(): + # Save the sampled images + z = torch.randn(batch_size, z_dim).to(device) + out = model.decode(z).view(-1, 1, 28, 28) + save_image(out, os.path.join(sample_dir, 'sampled-{}.png'.format(epoch+1))) + + # Save the reconstructed images + out, _, _ = model(x) + x_concat = torch.cat([x.view(-1, 1, 28, 28), out.view(-1, 1, 28, 28)], dim=3) + save_image(x_concat, os.path.join(sample_dir, 'reconst-{}.png'.format(epoch+1))) \ No newline at end of file diff --git a/tutorials/04 - Convolutional Neural Network/main-gpu.py b/tutorials/04 - Convolutional Neural Network/main-gpu.py deleted file mode 100644 index 5040bd6c..00000000 --- a/tutorials/04 - Convolutional Neural Network/main-gpu.py +++ /dev/null @@ -1,93 +0,0 @@ -import torch -import torch.nn as nn -import torchvision.datasets as dsets -import torchvision.transforms as transforms -from torch.autograd import Variable - - -# Hyper Parameters -num_epochs = 5 -batch_size = 100 -learning_rate = 0.001 - -# MNIST Dataset -train_dataset = dsets.MNIST(root='../data/', - train=True, - transform=transforms.ToTensor(), - download=True) - -test_dataset = dsets.MNIST(root='../data/', - train=False, - transform=transforms.ToTensor()) - -# Data Loader (Input Pipeline) -train_loader = torch.utils.data.DataLoader(dataset=train_dataset, - batch_size=batch_size, - shuffle=True) - -test_loader = torch.utils.data.DataLoader(dataset=test_dataset, - batch_size=batch_size, - shuffle=False) - -# CNN Model (2 conv layer) -class CNN(nn.Module): - def __init__(self): - super(CNN, self).__init__() - self.layer1 = nn.Sequential( - nn.Conv2d(1, 16, kernel_size=5, padding=2), - nn.BatchNorm2d(16), - nn.ReLU(), - nn.MaxPool2d(2)) - self.layer2 = nn.Sequential( - nn.Conv2d(16, 32, kernel_size=5, padding=2), - nn.BatchNorm2d(32), - nn.ReLU(), - nn.MaxPool2d(2)) - self.fc = nn.Linear(7*7*32, 10) - - def forward(self, x): - out = self.layer1(x) - out = self.layer2(out) - out = out.view(out.size(0), -1) - out = self.fc(out) - return out - -cnn = CNN() -cnn.cuda() - -# Loss and Optimizer -criterion = nn.CrossEntropyLoss() -optimizer = torch.optim.Adam(cnn.parameters(), lr=learning_rate) - -# Train the Model -for epoch in range(num_epochs): - for i, (images, labels) in enumerate(train_loader): - images = Variable(images).cuda() - labels = Variable(labels).cuda() - - # Forward + Backward + Optimize - optimizer.zero_grad() - outputs = cnn(images) - loss = criterion(outputs, labels) - loss.backward() - optimizer.step() - - if (i+1) % 100 == 0: - print ('Epoch [%d/%d], Iter [%d/%d] Loss: %.4f' - %(epoch+1, num_epochs, i+1, len(train_dataset)//batch_size, loss.data[0])) - -# Test the Model -cnn.eval() # Change model to 'eval' mode (BN uses moving mean/var). -correct = 0 -total = 0 -for images, labels in test_loader: - images = Variable(images).cuda() - outputs = cnn(images) - _, predicted = torch.max(outputs.data, 1) - total += labels.size(0) - correct += (predicted.cpu() == labels).sum() - -print('Test Accuracy of the model on the 10000 test images: %d %%' % (100 * correct / total)) - -# Save the Trained Model -torch.save(cnn, 'cnn.pkl') \ No newline at end of file diff --git a/tutorials/04 - Convolutional Neural Network/main.py b/tutorials/04 - Convolutional Neural Network/main.py deleted file mode 100644 index 5013b278..00000000 --- a/tutorials/04 - Convolutional Neural Network/main.py +++ /dev/null @@ -1,93 +0,0 @@ -import torch -import torch.nn as nn -import torchvision.datasets as dsets -import torchvision.transforms as transforms -from torch.autograd import Variable - - -# Hyper Parameters -num_epochs = 5 -batch_size = 100 -learning_rate = 0.001 - -# MNIST Dataset -train_dataset = dsets.MNIST(root='../data/', - train=True, - transform=transforms.ToTensor(), - download=True) - -test_dataset = dsets.MNIST(root='../data/', - train=False, - transform=transforms.ToTensor()) - -# Data Loader (Input Pipeline) -train_loader = torch.utils.data.DataLoader(dataset=train_dataset, - batch_size=batch_size, - shuffle=True) - -test_loader = torch.utils.data.DataLoader(dataset=test_dataset, - batch_size=batch_size, - shuffle=False) - -# CNN Model (2 conv layer) -class CNN(nn.Module): - def __init__(self): - super(CNN, self).__init__() - self.layer1 = nn.Sequential( - nn.Conv2d(1, 16, kernel_size=5, padding=2), - nn.BatchNorm2d(16), - nn.ReLU(), - nn.MaxPool2d(2)) - self.layer2 = nn.Sequential( - nn.Conv2d(16, 32, kernel_size=5, padding=2), - nn.BatchNorm2d(32), - nn.ReLU(), - nn.MaxPool2d(2)) - self.fc = nn.Linear(7*7*32, 10) - - def forward(self, x): - out = self.layer1(x) - out = self.layer2(out) - out = out.view(out.size(0), -1) - out = self.fc(out) - return out - -cnn = CNN() - - -# Loss and Optimizer -criterion = nn.CrossEntropyLoss() -optimizer = torch.optim.Adam(cnn.parameters(), lr=learning_rate) - -# Train the Model -for epoch in range(num_epochs): - for i, (images, labels) in enumerate(train_loader): - images = Variable(images) - labels = Variable(labels) - - # Forward + Backward + Optimize - optimizer.zero_grad() - outputs = cnn(images) - loss = criterion(outputs, labels) - loss.backward() - optimizer.step() - - if (i+1) % 100 == 0: - print ('Epoch [%d/%d], Iter [%d/%d] Loss: %.4f' - %(epoch+1, num_epochs, i+1, len(train_dataset)//batch_size, loss.data[0])) - -# Test the Model -cnn.eval() # Change model to 'eval' mode (BN uses moving mean/var). -correct = 0 -total = 0 -for images, labels in test_loader: - images = Variable(images) - outputs = cnn(images) - _, predicted = torch.max(outputs.data, 1) - total += labels.size(0) - correct += (predicted == labels).sum() - -print('Test Accuracy of the model on the 10000 test images: %d %%' % (100 * correct / total)) - -# Save the Trained Model -torch.save(cnn, 'cnn.pkl') \ No newline at end of file diff --git a/tutorials/04-utils/tensorboard/README.md b/tutorials/04-utils/tensorboard/README.md new file mode 100644 index 00000000..90781485 --- /dev/null +++ b/tutorials/04-utils/tensorboard/README.md @@ -0,0 +1,25 @@ +# TensorBoard in PyTorch + +In this tutorial, we implement a MNIST classifier using a simple neural network and visualize the training process using [TensorBoard](https://www.tensorflow.org/get_started/summaries_and_tensorboard). In training phase, we plot the loss and accuracy functions through `scalar_summary` and visualize the training images through `image_summary`. In addition, we visualize the weight and gradient values of the parameters of the neural network using `histogram_summary`. PyTorch code for handling these summary functions can be found [here](https://github.com/yunjey/pytorch-tutorial/blob/master/tutorials/04-utils/tensorboard/main.py#L81-L97). + +![alt text](gif/tensorboard.gif) + +
+ +## Usage + +#### 1. Install the dependencies +```bash +$ pip install -r requirements.txt +``` + +#### 2. Train the model +```bash +$ python main.py +``` + +#### 3. Open the TensorBoard +To run the TensorBoard, open a new terminal and run the command below. Then, open http://localhost:6006/ on your web browser. +```bash +$ tensorboard --logdir='./logs' --port=6006 +``` diff --git a/tutorials/04-utils/tensorboard/gif/tensorboard.gif b/tutorials/04-utils/tensorboard/gif/tensorboard.gif new file mode 100644 index 00000000..d6ac6099 Binary files /dev/null and b/tutorials/04-utils/tensorboard/gif/tensorboard.gif differ diff --git a/tutorials/04-utils/tensorboard/logger.py b/tutorials/04-utils/tensorboard/logger.py new file mode 100644 index 00000000..d872817e --- /dev/null +++ b/tutorials/04-utils/tensorboard/logger.py @@ -0,0 +1,71 @@ +# Code referenced from https://gist.github.com/gyglim/1f8dfb1b5c82627ae3efcfbbadb9f514 +import tensorflow as tf +import numpy as np +import scipy.misc +try: + from StringIO import StringIO # Python 2.7 +except ImportError: + from io import BytesIO # Python 3.x + + +class Logger(object): + + def __init__(self, log_dir): + """Create a summary writer logging to log_dir.""" + self.writer = tf.summary.FileWriter(log_dir) + + def scalar_summary(self, tag, value, step): + """Log a scalar variable.""" + summary = tf.Summary(value=[tf.Summary.Value(tag=tag, simple_value=value)]) + self.writer.add_summary(summary, step) + + def image_summary(self, tag, images, step): + """Log a list of images.""" + + img_summaries = [] + for i, img in enumerate(images): + # Write the image to a string + try: + s = StringIO() + except: + s = BytesIO() + scipy.misc.toimage(img).save(s, format="png") + + # Create an Image object + img_sum = tf.Summary.Image(encoded_image_string=s.getvalue(), + height=img.shape[0], + width=img.shape[1]) + # Create a Summary value + img_summaries.append(tf.Summary.Value(tag='%s/%d' % (tag, i), image=img_sum)) + + # Create and write Summary + summary = tf.Summary(value=img_summaries) + self.writer.add_summary(summary, step) + + def histo_summary(self, tag, values, step, bins=1000): + """Log a histogram of the tensor of values.""" + + # Create a histogram using numpy + counts, bin_edges = np.histogram(values, bins=bins) + + # Fill the fields of the histogram proto + hist = tf.HistogramProto() + hist.min = float(np.min(values)) + hist.max = float(np.max(values)) + hist.num = int(np.prod(values.shape)) + hist.sum = float(np.sum(values)) + hist.sum_squares = float(np.sum(values**2)) + + # Drop the start of the first bin + bin_edges = bin_edges[1:] + + # Add bin edges and counts + for edge in bin_edges: + hist.bucket_limit.append(edge) + for c in counts: + hist.bucket.append(c) + + # Create and write Summary + summary = tf.Summary(value=[tf.Summary.Value(tag=tag, histo=hist)]) + self.writer.add_summary(summary, step) + self.writer.flush() \ No newline at end of file diff --git a/tutorials/04-utils/tensorboard/main.py b/tutorials/04-utils/tensorboard/main.py new file mode 100644 index 00000000..b72f6292 --- /dev/null +++ b/tutorials/04-utils/tensorboard/main.py @@ -0,0 +1,97 @@ +import torch +import torch.nn as nn +import torchvision +from torchvision import transforms +from logger import Logger + + +# Device configuration +device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') + +# MNIST dataset +dataset = torchvision.datasets.MNIST(root='../../data', + train=True, + transform=transforms.ToTensor(), + download=True) + +# Data loader +data_loader = torch.utils.data.DataLoader(dataset=dataset, + batch_size=100, + shuffle=True) + + +# Fully connected neural network with one hidden layer +class NeuralNet(nn.Module): + def __init__(self, input_size=784, hidden_size=500, num_classes=10): + super(NeuralNet, self).__init__() + self.fc1 = nn.Linear(input_size, hidden_size) + self.relu = nn.ReLU() + self.fc2 = nn.Linear(hidden_size, num_classes) + + def forward(self, x): + out = self.fc1(x) + out = self.relu(out) + out = self.fc2(out) + return out + +model = NeuralNet().to(device) + +logger = Logger('./logs') + +# Loss and optimizer +criterion = nn.CrossEntropyLoss() +optimizer = torch.optim.Adam(model.parameters(), lr=0.00001) + +data_iter = iter(data_loader) +iter_per_epoch = len(data_loader) +total_step = 50000 + +# Start training +for step in range(total_step): + + # Reset the data_iter + if (step+1) % iter_per_epoch == 0: + data_iter = iter(data_loader) + + # Fetch images and labels + images, labels = next(data_iter) + images, labels = images.view(images.size(0), -1).to(device), labels.to(device) + + # Forward pass + outputs = model(images) + loss = criterion(outputs, labels) + + # Backward and optimize + optimizer.zero_grad() + loss.backward() + optimizer.step() + + # Compute accuracy + _, argmax = torch.max(outputs, 1) + accuracy = (labels == argmax.squeeze()).float().mean() + + if (step+1) % 100 == 0: + print ('Step [{}/{}], Loss: {:.4f}, Acc: {:.2f}' + .format(step+1, total_step, loss.item(), accuracy.item())) + + # ================================================================== # + # Tensorboard Logging # + # ================================================================== # + + # 1. Log scalar values (scalar summary) + info = { 'loss': loss.item(), 'accuracy': accuracy.item() } + + for tag, value in info.items(): + logger.scalar_summary(tag, value, step+1) + + # 2. Log values and gradients of the parameters (histogram summary) + for tag, value in model.named_parameters(): + tag = tag.replace('.', '/') + logger.histo_summary(tag, value.data.cpu().numpy(), step+1) + logger.histo_summary(tag+'/grad', value.grad.data.cpu().numpy(), step+1) + + # 3. Log training images (image summary) + info = { 'images': images.view(-1, 28, 28)[:10].cpu().numpy() } + + for tag, images in info.items(): + logger.image_summary(tag, images, step+1) \ No newline at end of file diff --git a/tutorials/04-utils/tensorboard/requirements.txt b/tutorials/04-utils/tensorboard/requirements.txt new file mode 100644 index 00000000..e74a2c9a --- /dev/null +++ b/tutorials/04-utils/tensorboard/requirements.txt @@ -0,0 +1,5 @@ +tensorflow +torch +torchvision +scipy +numpy diff --git a/tutorials/05 - Deep Residual Network/main.py b/tutorials/05 - Deep Residual Network/main.py deleted file mode 100644 index 685d9645..00000000 --- a/tutorials/05 - Deep Residual Network/main.py +++ /dev/null @@ -1,147 +0,0 @@ -# Implementation of https://arxiv.org/pdf/1512.03385.pdf. -# See section 4.2 for model architecture on CIFAR-10. -# Some part of the code was referenced below. -# https://github.com/pytorch/vision/blob/master/torchvision/models/resnet.py -import torch -import torch.nn as nn -import torchvision.datasets as dsets -import torchvision.transforms as transforms -from torch.autograd import Variable - -# Image Preprocessing -transform = transforms.Compose([ - transforms.Scale(40), - transforms.RandomHorizontalFlip(), - transforms.RandomCrop(32), - transforms.ToTensor()]) - -# CIFAR-10 Dataset -train_dataset = dsets.CIFAR10(root='../data/', - train=True, - transform=transform, - download=True) - -test_dataset = dsets.CIFAR10(root='../data/', - train=False, - transform=transforms.ToTensor()) - -# Data Loader (Input Pipeline) -train_loader = torch.utils.data.DataLoader(dataset=train_dataset, - batch_size=100, - shuffle=True) - -test_loader = torch.utils.data.DataLoader(dataset=test_dataset, - batch_size=100, - shuffle=False) - -# 3x3 Convolution -def conv3x3(in_channels, out_channels, stride=1): - return nn.Conv2d(in_channels, out_channels, kernel_size=3, - stride=stride, padding=1, bias=False) - -# Residual Block -class ResidualBlock(nn.Module): - def __init__(self, in_channels, out_channels, stride=1, downsample=None): - super(ResidualBlock, self).__init__() - self.conv1 = conv3x3(in_channels, out_channels, stride) - self.bn1 = nn.BatchNorm2d(out_channels) - self.relu = nn.ReLU(inplace=True) - self.conv2 = conv3x3(out_channels, out_channels) - self.bn2 = nn.BatchNorm2d(out_channels) - self.downsample = downsample - - def forward(self, x): - residual = x - out = self.conv1(x) - out = self.bn1(out) - out = self.relu(out) - out = self.conv2(out) - out = self.bn2(out) - if self.downsample: - residual = self.downsample(x) - out += residual - out = self.relu(out) - return out - -# ResNet Module -class ResNet(nn.Module): - def __init__(self, block, layers, num_classes=10): - super(ResNet, self).__init__() - self.in_channels = 16 - self.conv = conv3x3(3, 16) - self.bn = nn.BatchNorm2d(16) - self.relu = nn.ReLU(inplace=True) - self.layer1 = self.make_layer(block, 16, layers[0]) - self.layer2 = self.make_layer(block, 32, layers[0], 2) - self.layer3 = self.make_layer(block, 64, layers[1], 2) - self.avg_pool = nn.AvgPool2d(8) - self.fc = nn.Linear(64, num_classes) - - def make_layer(self, block, out_channels, blocks, stride=1): - downsample = None - if (stride != 1) or (self.in_channels != out_channels): - downsample = nn.Sequential( - conv3x3(self.in_channels, out_channels, stride=stride), - nn.BatchNorm2d(out_channels)) - layers = [] - layers.append(block(self.in_channels, out_channels, stride, downsample)) - self.in_channels = out_channels - for i in range(1, blocks): - layers.append(block(out_channels, out_channels)) - return nn.Sequential(*layers) - - def forward(self, x): - out = self.conv(x) - out = self.bn(out) - out = self.relu(out) - out = self.layer1(out) - out = self.layer2(out) - out = self.layer3(out) - out = self.avg_pool(out) - out = out.view(out.size(0), -1) - out = self.fc(out) - return out - -resnet = ResNet(ResidualBlock, [2, 2, 2, 2]) -resnet - -# Loss and Optimizer -criterion = nn.CrossEntropyLoss() -lr = 0.001 -optimizer = torch.optim.Adam(resnet.parameters(), lr=lr) - -# Training -for epoch in range(80): - for i, (images, labels) in enumerate(train_loader): - images = Variable(images) - labels = Variable(labels) - - # Forward + Backward + Optimize - optimizer.zero_grad() - outputs = resnet(images) - loss = criterion(outputs, labels) - loss.backward() - optimizer.step() - - if (i+1) % 100 == 0: - print ("Epoch [%d/%d], Iter [%d/%d] Loss: %.4f" %(epoch+1, 80, i+1, 500, loss.data[0])) - - # Decaying Learning Rate - if (epoch+1) % 30 == 0: - lr /= 3 - optimizer = torch.optim.Adam(resnet.parameters(), lr=lr) - -# Test -correct = 0 -total = 0 -for images, labels in test_loader: - images = Variable(images) - outputs = resnet(images) - _, predicted = torch.max(outputs.data, 1) - total += labels.size(0) - correct += (predicted == labels).sum() - -print('Accuracy of the model on the test images: %d %%' % (100 * correct / total)) - -# Save the Model -torch.save(resnet, 'resnet.pkl') \ No newline at end of file diff --git a/tutorials/06 - Recurrent Neural Network/main-gpu.py b/tutorials/06 - Recurrent Neural Network/main-gpu.py deleted file mode 100644 index 7f9f1a43..00000000 --- a/tutorials/06 - Recurrent Neural Network/main-gpu.py +++ /dev/null @@ -1,95 +0,0 @@ -import torch -import torch.nn as nn -import torchvision.datasets as dsets -import torchvision.transforms as transforms -from torch.autograd import Variable - - -# Hyper Parameters -sequence_length = 28 -input_size = 28 -hidden_size = 128 -num_layers = 2 -num_classes = 10 -batch_size = 100 -num_epochs = 2 -learning_rate = 0.01 - -# MNIST Dataset -train_dataset = dsets.MNIST(root='../data/', - train=True, - transform=transforms.ToTensor(), - download=True) - -test_dataset = dsets.MNIST(root='../data/', - train=False, - transform=transforms.ToTensor()) - -# Data Loader (Input Pipeline) -train_loader = torch.utils.data.DataLoader(dataset=train_dataset, - batch_size=batch_size, - shuffle=True) - -test_loader = torch.utils.data.DataLoader(dataset=test_dataset, - batch_size=batch_size, - shuffle=False) - -# RNN Model (Many-to-One) -class RNN(nn.Module): - def __init__(self, input_size, hidden_size, num_layers, num_classes): - super(RNN, self).__init__() - self.hidden_size = hidden_size - self.num_layers = num_layers - self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True) - self.fc = nn.Linear(hidden_size, num_classes) - - def forward(self, x): - # Set initial states - h0 = Variable(torch.zeros(self.num_layers, x.size(0), self.hidden_size).cuda()) - c0 = Variable(torch.zeros(self.num_layers, x.size(0), self.hidden_size).cuda()) - - # Forward propagate RNN - out, _ = self.lstm(x, (h0, c0)) - - # Decode hidden state of last time step - out = self.fc(out[:, -1, :]) - return out - -rnn = RNN(input_size, hidden_size, num_layers, num_classes) -rnn.cuda() - -# Loss and Optimizer -criterion = nn.CrossEntropyLoss() -optimizer = torch.optim.Adam(rnn.parameters(), lr=learning_rate) - -# Train the Model -for epoch in range(num_epochs): - for i, (images, labels) in enumerate(train_loader): - images = Variable(images.view(-1, sequence_length, input_size)).cuda() - labels = Variable(labels).cuda() - - # Forward + Backward + Optimize - optimizer.zero_grad() - outputs = rnn(images) - loss = criterion(outputs, labels) - loss.backward() - optimizer.step() - - if (i+1) % 100 == 0: - print ('Epoch [%d/%d], Step [%d/%d], Loss: %.4f' - %(epoch+1, num_epochs, i+1, len(train_dataset)//batch_size, loss.data[0])) - -# Test the Model -correct = 0 -total = 0 -for images, labels in test_loader: - images = Variable(images.view(-1, sequence_length, input_size)).cuda() - outputs = rnn(images) - _, predicted = torch.max(outputs.data, 1) - total += labels.size(0) - correct += (predicted.cpu() == labels).sum() - -print('Test Accuracy of the model on the 10000 test images: %d %%' % (100 * correct / total)) - -# Save the Model -torch.save(rnn, 'rnn.pkl') diff --git a/tutorials/06 - Recurrent Neural Network/main.py b/tutorials/06 - Recurrent Neural Network/main.py deleted file mode 100644 index 49c723ff..00000000 --- a/tutorials/06 - Recurrent Neural Network/main.py +++ /dev/null @@ -1,95 +0,0 @@ -import torch -import torch.nn as nn -import torchvision.datasets as dsets -import torchvision.transforms as transforms -from torch.autograd import Variable - - -# Hyper Parameters -sequence_length = 28 -input_size = 28 -hidden_size = 128 -num_layers = 2 -num_classes = 10 -batch_size = 100 -num_epochs = 2 -learning_rate = 0.01 - -# MNIST Dataset -train_dataset = dsets.MNIST(root='../data/', - train=True, - transform=transforms.ToTensor(), - download=True) - -test_dataset = dsets.MNIST(root='../data/', - train=False, - transform=transforms.ToTensor()) - -# Data Loader (Input Pipeline) -train_loader = torch.utils.data.DataLoader(dataset=train_dataset, - batch_size=batch_size, - shuffle=True) - -test_loader = torch.utils.data.DataLoader(dataset=test_dataset, - batch_size=batch_size, - shuffle=False) - -# RNN Model (Many-to-One) -class RNN(nn.Module): - def __init__(self, input_size, hidden_size, num_layers, num_classes): - super(RNN, self).__init__() - self.hidden_size = hidden_size - self.num_layers = num_layers - self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True) - self.fc = nn.Linear(hidden_size, num_classes) - - def forward(self, x): - # Set initial states - h0 = Variable(torch.zeros(self.num_layers, x.size(0), self.hidden_size)) - c0 = Variable(torch.zeros(self.num_layers, x.size(0), self.hidden_size)) - - # Forward propagate RNN - out, _ = self.lstm(x, (h0, c0)) - - # Decode hidden state of last time step - out = self.fc(out[:, -1, :]) - return out - -rnn = RNN(input_size, hidden_size, num_layers, num_classes) - - -# Loss and Optimizer -criterion = nn.CrossEntropyLoss() -optimizer = torch.optim.Adam(rnn.parameters(), lr=learning_rate) - -# Train the Model -for epoch in range(num_epochs): - for i, (images, labels) in enumerate(train_loader): - images = Variable(images.view(-1, sequence_length, input_size)) - labels = Variable(labels) - - # Forward + Backward + Optimize - optimizer.zero_grad() - outputs = rnn(images) - loss = criterion(outputs, labels) - loss.backward() - optimizer.step() - - if (i+1) % 100 == 0: - print ('Epoch [%d/%d], Step [%d/%d], Loss: %.4f' - %(epoch+1, num_epochs, i+1, len(train_dataset)//batch_size, loss.data[0])) - -# Test the Model -correct = 0 -total = 0 -for images, labels in test_loader: - images = Variable(images.view(-1, sequence_length, input_size)) - outputs = rnn(images) - _, predicted = torch.max(outputs.data, 1) - total += labels.size(0) - correct += (predicted == labels).sum() - -print('Test Accuracy of the model on the 10000 test images: %d %%' % (100 * correct / total)) - -# Save the Model -torch.save(rnn, 'rnn.pkl') diff --git a/tutorials/07 - Bidirectional Recurrent Neural Network/main-gpu.py b/tutorials/07 - Bidirectional Recurrent Neural Network/main-gpu.py deleted file mode 100644 index 00b3c89e..00000000 --- a/tutorials/07 - Bidirectional Recurrent Neural Network/main-gpu.py +++ /dev/null @@ -1,96 +0,0 @@ -import torch -import torch.nn as nn -import torchvision.datasets as dsets -import torchvision.transforms as transforms -from torch.autograd import Variable - - -# Hyper Parameters -sequence_length = 28 -input_size = 28 -hidden_size = 128 -num_layers = 2 -num_classes = 10 -batch_size = 100 -num_epochs = 2 -learning_rate = 0.003 - -# MNIST Dataset -train_dataset = dsets.MNIST(root='../data/', - train=True, - transform=transforms.ToTensor(), - download=True) - -test_dataset = dsets.MNIST(root='../data/', - train=False, - transform=transforms.ToTensor()) - -# Data Loader (Input Pipeline) -train_loader = torch.utils.data.DataLoader(dataset=train_dataset, - batch_size=batch_size, - shuffle=True) - -test_loader = torch.utils.data.DataLoader(dataset=test_dataset, - batch_size=batch_size, - shuffle=False) - -# BiRNN Model (Many-to-One) -class BiRNN(nn.Module): - def __init__(self, input_size, hidden_size, num_layers, num_classes): - super(BiRNN, self).__init__() - self.hidden_size = hidden_size - self.num_layers = num_layers - self.lstm = nn.LSTM(input_size, hidden_size, num_layers, - batch_first=True, bidirectional=True) - self.fc = nn.Linear(hidden_size*2, num_classes) # 2 for bidirection - - def forward(self, x): - # Set initial states - h0 = Variable(torch.zeros(self.num_layers*2, x.size(0), self.hidden_size)).cuda() # 2 for bidirection - c0 = Variable(torch.zeros(self.num_layers*2, x.size(0), self.hidden_size)).cuda() - - # Forward propagate RNN - out, _ = self.lstm(x, (h0, c0)) - - # Decode hidden state of last time step - out = self.fc(out[:, -1, :]) - return out - -rnn = BiRNN(input_size, hidden_size, num_layers, num_classes) -rnn.cuda() - -# Loss and Optimizer -criterion = nn.CrossEntropyLoss() -optimizer = torch.optim.Adam(rnn.parameters(), lr=learning_rate) - -# Train the Model -for epoch in range(num_epochs): - for i, (images, labels) in enumerate(train_loader): - images = Variable(images.view(-1, sequence_length, input_size)).cuda() - labels = Variable(labels).cuda() - - # Forward + Backward + Optimize - optimizer.zero_grad() - outputs = rnn(images) - loss = criterion(outputs, labels) - loss.backward() - optimizer.step() - - if (i+1) % 100 == 0: - print ('Epoch [%d/%d], Step [%d/%d], Loss: %.4f' - %(epoch+1, num_epochs, i+1, len(train_dataset)//batch_size, loss.data[0])) - -# Test the Model -correct = 0 -total = 0 -for images, labels in test_loader: - images = Variable(images.view(-1, sequence_length, input_size)).cuda() - outputs = rnn(images) - _, predicted = torch.max(outputs.data, 1) - total += labels.size(0) - correct += (predicted.cpu() == labels).sum() - -print('Test Accuracy of the model on the 10000 test images: %d %%' % (100 * correct / total)) - -# Save the Model -torch.save(rnn, 'rnn.pkl') diff --git a/tutorials/07 - Bidirectional Recurrent Neural Network/main.py b/tutorials/07 - Bidirectional Recurrent Neural Network/main.py deleted file mode 100644 index b256a04e..00000000 --- a/tutorials/07 - Bidirectional Recurrent Neural Network/main.py +++ /dev/null @@ -1,96 +0,0 @@ -import torch -import torch.nn as nn -import torchvision.datasets as dsets -import torchvision.transforms as transforms -from torch.autograd import Variable - - -# Hyper Parameters -sequence_length = 28 -input_size = 28 -hidden_size = 128 -num_layers = 2 -num_classes = 10 -batch_size = 100 -num_epochs = 2 -learning_rate = 0.003 - -# MNIST Dataset -train_dataset = dsets.MNIST(root='../data/', - train=True, - transform=transforms.ToTensor(), - download=True) - -test_dataset = dsets.MNIST(root='../data/', - train=False, - transform=transforms.ToTensor()) - -# Data Loader (Input Pipeline) -train_loader = torch.utils.data.DataLoader(dataset=train_dataset, - batch_size=batch_size, - shuffle=True) - -test_loader = torch.utils.data.DataLoader(dataset=test_dataset, - batch_size=batch_size, - shuffle=False) - -# BiRNN Model (Many-to-One) -class BiRNN(nn.Module): - def __init__(self, input_size, hidden_size, num_layers, num_classes): - super(BiRNN, self).__init__() - self.hidden_size = hidden_size - self.num_layers = num_layers - self.lstm = nn.LSTM(input_size, hidden_size, num_layers, - batch_first=True, bidirectional=True) - self.fc = nn.Linear(hidden_size*2, num_classes) # 2 for bidirection - - def forward(self, x): - # Set initial states - h0 = Variable(torch.zeros(self.num_layers*2, x.size(0), self.hidden_size)) # 2 for bidirection - c0 = Variable(torch.zeros(self.num_layers*2, x.size(0), self.hidden_size)) - - # Forward propagate RNN - out, _ = self.lstm(x, (h0, c0)) - - # Decode hidden state of last time step - out = self.fc(out[:, -1, :]) - return out - -rnn = BiRNN(input_size, hidden_size, num_layers, num_classes) - - -# Loss and Optimizer -criterion = nn.CrossEntropyLoss() -optimizer = torch.optim.Adam(rnn.parameters(), lr=learning_rate) - -# Train the Model -for epoch in range(num_epochs): - for i, (images, labels) in enumerate(train_loader): - images = Variable(images.view(-1, sequence_length, input_size)) - labels = Variable(labels) - - # Forward + Backward + Optimize - optimizer.zero_grad() - outputs = rnn(images) - loss = criterion(outputs, labels) - loss.backward() - optimizer.step() - - if (i+1) % 100 == 0: - print ('Epoch [%d/%d], Step [%d/%d], Loss: %.4f' - %(epoch+1, num_epochs, i+1, len(train_dataset)//batch_size, loss.data[0])) - -# Test the Model -correct = 0 -total = 0 -for images, labels in test_loader: - images = Variable(images.view(-1, sequence_length, input_size)) - outputs = rnn(images) - _, predicted = torch.max(outputs.data, 1) - total += labels.size(0) - correct += (predicted == labels).sum() - -print('Test Accuracy of the model on the 10000 test images: %d %%' % (100 * correct / total)) - -# Save the Model -torch.save(rnn, 'rnn.pkl') diff --git a/tutorials/08 - Language Model/main-gpu.py b/tutorials/08 - Language Model/main-gpu.py deleted file mode 100644 index a1fec846..00000000 --- a/tutorials/08 - Language Model/main-gpu.py +++ /dev/null @@ -1,123 +0,0 @@ -# Some part of the code was referenced from below. -# https://github.com/pytorch/examples/tree/master/word_language_model -import torch -import torch.nn as nn -import numpy as np -from torch.autograd import Variable -from data_utils import Dictionary, Corpus - -# Hyper Parameters -embed_size = 128 -hidden_size = 1024 -num_layers = 1 -num_epochs = 5 -num_samples = 1000 # number of words to be sampled -batch_size = 20 -seq_length = 30 -learning_rate = 0.002 - -# Load Penn Treebank Dataset -train_path = './data/train.txt' -sample_path = './sample.txt' -corpus = Corpus() -ids = corpus.get_data(train_path, batch_size) -vocab_size = len(corpus.dictionary) -num_batches = ids.size(1) // seq_length - -# RNN Based Language Model -class RNNLM(nn.Module): - def __init__(self, vocab_size, embed_size, hidden_size, num_layers): - super(RNNLM, self).__init__() - self.embed = nn.Embedding(vocab_size, embed_size) - self.lstm = nn.LSTM(embed_size, hidden_size, num_layers, batch_first=True) - self.linear = nn.Linear(hidden_size, vocab_size) - - self.init_weights() - - def init_weights(self): - self.embed.weight.data.uniform_(-0.1, 0.1) - self.linear.bias.data.fill_(0) - self.linear.weight.data.uniform_(-0.1, 0.1) - - def forward(self, x, h): - # Embed word ids to vectors - x = self.embed(x) - - # Forward propagate RNN - out, h = self.lstm(x, h) - - # Reshape output to (batch_size*sequence_length, hidden_size) - out = out.contiguous().view(out.size(0)*out.size(1), out.size(2)) - - # Decode hidden states of all time step - out = self.linear(out) - return out, h - -model = RNNLM(vocab_size, embed_size, hidden_size, num_layers) -model.cuda() - -# Loss and Optimizer -criterion = nn.CrossEntropyLoss() -optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) - -# Truncated Backpropagation -def detach(states): - return [Variable(state.data) for state in states] - -# Training -for epoch in range(num_epochs): - # Initial hidden and memory states - states = (Variable(torch.zeros(num_layers, batch_size, hidden_size)).cuda(), - Variable(torch.zeros(num_layers, batch_size, hidden_size)).cuda()) - - for i in range(0, ids.size(1) - seq_length, seq_length): - # Get batch inputs and targets - inputs = Variable(ids[:, i:i+seq_length]).cuda() - targets = Variable(ids[:, (i+1):(i+1)+seq_length].contiguous()).cuda() - - # Forward + Backward + Optimize - model.zero_grad() - states = detach(states) - outputs, states = model(inputs, states) - loss = criterion(outputs, targets.view(-1)) - loss.backward() - torch.nn.utils.clip_grad_norm(model.parameters(), 0.5) - optimizer.step() - - step = (i+1) // seq_length - if step % 100 == 0: - print ('Epoch [%d/%d], Step[%d/%d], Loss: %.3f, Perplexity: %5.2f' % - (epoch+1, num_epochs, step, num_batches, loss.data[0], np.exp(loss.data[0]))) - -# Sampling -with open(sample_path, 'w') as f: - # Set intial hidden ane memory states - state = (Variable(torch.zeros(num_layers, 1, hidden_size)).cuda(), - Variable(torch.zeros(num_layers, 1, hidden_size)).cuda()) - - # Select one word id randomly - prob = torch.ones(vocab_size) - input = Variable(torch.multinomial(prob, num_samples=1).unsqueeze(1), - volatile=True).cuda() - - for i in range(num_samples): - # Forward propagate rnn - output, state = model(input, state) - - # Sample a word id - prob = output.squeeze().data.exp().cpu() - word_id = torch.multinomial(prob, 1)[0] - - # Feed sampled word id to next time step - input.data.fill_(word_id) - - # File write - word = corpus.dictionary.idx2word[word_id] - word = '\n' if word == '' else word + ' ' - f.write(word) - - if (i+1) % 100 == 0: - print('Sampled [%d/%d] words and save to %s'%(i+1, num_samples, sample_path)) - -# Save the Trained Model -torch.save(model, 'model.pkl') \ No newline at end of file diff --git a/tutorials/08 - Language Model/main.py b/tutorials/08 - Language Model/main.py deleted file mode 100644 index df98e16b..00000000 --- a/tutorials/08 - Language Model/main.py +++ /dev/null @@ -1,123 +0,0 @@ -# Some part of the code was referenced from below. -# https://github.com/pytorch/examples/tree/master/word_language_model -import torch -import torch.nn as nn -import numpy as np -from torch.autograd import Variable -from data_utils import Dictionary, Corpus - -# Hyper Parameters -embed_size = 128 -hidden_size = 1024 -num_layers = 1 -num_epochs = 5 -num_samples = 1000 # number of words to be sampled -batch_size = 20 -seq_length = 30 -learning_rate = 0.002 - -# Load Penn Treebank Dataset -train_path = './data/train.txt' -sample_path = './sample.txt' -corpus = Corpus() -ids = corpus.get_data(train_path, batch_size) -vocab_size = len(corpus.dictionary) -num_batches = ids.size(1) // seq_length - -# RNN Based Language Model -class RNNLM(nn.Module): - def __init__(self, vocab_size, embed_size, hidden_size, num_layers): - super(RNNLM, self).__init__() - self.embed = nn.Embedding(vocab_size, embed_size) - self.lstm = nn.LSTM(embed_size, hidden_size, num_layers, batch_first=True) - self.linear = nn.Linear(hidden_size, vocab_size) - - self.init_weights() - - def init_weights(self): - self.embed.weight.data.uniform_(-0.1, 0.1) - self.linear.bias.data.fill_(0) - self.linear.weight.data.uniform_(-0.1, 0.1) - - def forward(self, x, h): - # Embed word ids to vectors - x = self.embed(x) - - # Forward propagate RNN - out, h = self.lstm(x, h) - - # Reshape output to (batch_size*sequence_length, hidden_size) - out = out.contiguous().view(out.size(0)*out.size(1), out.size(2)) - - # Decode hidden states of all time step - out = self.linear(out) - return out, h - -model = RNNLM(vocab_size, embed_size, hidden_size, num_layers) - - -# Loss and Optimizer -criterion = nn.CrossEntropyLoss() -optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) - -# Truncated Backpropagation -def detach(states): - return [Variable(state.data) for state in states] - -# Training -for epoch in range(num_epochs): - # Initial hidden and memory states - states = (Variable(torch.zeros(num_layers, batch_size, hidden_size)), - Variable(torch.zeros(num_layers, batch_size, hidden_size))) - - for i in range(0, ids.size(1) - seq_length, seq_length): - # Get batch inputs and targets - inputs = Variable(ids[:, i:i+seq_length]) - targets = Variable(ids[:, (i+1):(i+1)+seq_length].contiguous()) - - # Forward + Backward + Optimize - model.zero_grad() - states = detach(states) - outputs, states = model(inputs, states) - loss = criterion(outputs, targets.view(-1)) - loss.backward() - torch.nn.utils.clip_grad_norm(model.parameters(), 0.5) - optimizer.step() - - step = (i+1) // seq_length - if step % 100 == 0: - print ('Epoch [%d/%d], Step[%d/%d], Loss: %.3f, Perplexity: %5.2f' % - (epoch+1, num_epochs, step, num_batches, loss.data[0], np.exp(loss.data[0]))) - -# Sampling -with open(sample_path, 'w') as f: - # Set intial hidden ane memory states - state = (Variable(torch.zeros(num_layers, 1, hidden_size)), - Variable(torch.zeros(num_layers, 1, hidden_size))) - - # Select one word id randomly - prob = torch.ones(vocab_size) - input = Variable(torch.multinomial(prob, num_samples=1).unsqueeze(1), - volatile=True) - - for i in range(num_samples): - # Forward propagate rnn - output, state = model(input, state) - - # Sample a word id - prob = output.squeeze().data.exp() - word_id = torch.multinomial(prob, 1)[0] - - # Feed sampled word id to next time step - input.data.fill_(word_id) - - # File write - word = corpus.dictionary.idx2word[word_id] - word = '\n' if word == '' else word + ' ' - f.write(word) - - if (i+1) % 100 == 0: - print('Sampled [%d/%d] words and save to %s'%(i+1, num_samples, sample_path)) - -# Save the Trained Model -torch.save(model, 'model.pkl') \ No newline at end of file diff --git a/tutorials/09 - Image Captioning/model.py b/tutorials/09 - Image Captioning/model.py deleted file mode 100644 index fcef106d..00000000 --- a/tutorials/09 - Image Captioning/model.py +++ /dev/null @@ -1,64 +0,0 @@ -import torch -import torch.nn as nn -import torchvision.models as models -from torch.nn.utils.rnn import pack_padded_sequence -from torch.autograd import Variable - - -class EncoderCNN(nn.Module): - def __init__(self, embed_size): - """Load pretrained ResNet-152 and replace top fc layer.""" - super(EncoderCNN, self).__init__() - self.resnet = models.resnet152(pretrained=True) - # For efficient memory usage. - for param in self.resnet.parameters(): - param.requires_grad = False - self.resnet.fc = nn.Linear(self.resnet.fc.in_features, embed_size) - self.init_weights() - - def init_weights(self): - self.resnet.fc.weight.data.uniform_(-0.1, 0.1) - self.resnet.fc.bias.data.fill_(0) - - def forward(self, images): - """Extract image feature vectors.""" - features = self.resnet(images) - return features - - -class DecoderRNN(nn.Module): - def __init__(self, embed_size, hidden_size, vocab_size, num_layers): - """Set hyper-parameters and build layers.""" - super(DecoderRNN, self).__init__() - self.embed_size = embed_size - self.hidden_size = hidden_size - self.vocab_size = vocab_size - self.embed = nn.Embedding(vocab_size, embed_size) - self.lstm = nn.LSTM(embed_size, hidden_size, num_layers) - self.linear = nn.Linear(hidden_size, vocab_size) - - def init_weights(self): - self.embed.weight.data.uniform_(-0.1, 0.1) - self.linear.weigth.data.uniform_(-0.1, 0.1) - self.linear.bias.data.fill_(0) - - def forward(self, features, captions, lengths): - """Decode image feature vectors and generate caption.""" - embeddings = self.embed(captions) - embeddings = torch.cat((features.unsqueeze(1), embeddings), 1) - packed = pack_padded_sequence(embeddings, lengths, batch_first=True) - hiddens, _ = self.lstm(packed) - outputs = self.linear(hiddens[0]) - return outputs - - def sample(self, feature, state): - """Sample a caption for given a image feature.""" - sampled_ids = [] - input = feature.unsqueeze(1) - for i in range(20): - hidden, state = self.lstm(input, state) # (1, 1, hidden_size) - output = self.linear(hidden.view(-1, self.hidden_size)) # (1, vocab_size) - predicted = output.max(1)[1] - sampled_ids.append(predicted) - input = self.embed(predicted) - return sampled_ids \ No newline at end of file diff --git a/tutorials/09 - Image Captioning/resize.py b/tutorials/09 - Image Captioning/resize.py deleted file mode 100644 index da1cd99b..00000000 --- a/tutorials/09 - Image Captioning/resize.py +++ /dev/null @@ -1,35 +0,0 @@ -from PIL import Image -import os - - -def resize_image(image, size): - """Resizes an image to the given size.""" - return image.resize(size, Image.ANTIALIAS) - -def resize_images(image_dir, output_dir, size): - """Resizes the images in the image_dir and save into the output_dir.""" - if not os.path.exists(output_dir): - os.makedirs(output_dir) - - images = os.listdir(image_dir) - num_images = len(images) - for i, image in enumerate(images): - with open(os.path.join(image_dir, image), 'r+b') as f: - with Image.open(f) as img: - img = resize_image(img, size) - img.save( - os.path.join(output_dir, image), img.format) - if i % 100 == 0: - print ('[%d/%d] Resized the images and saved into %s.' - %(i, num_images, output_dir)) - -def main(): - splits = ['train', 'val'] - for split in splits: - image_dir = './data/%s2014/' %split - output_dir = './data/%s2014resized' %split - resize_images(image_dir, output_dir, (256, 256)) - - -if __name__ == '__main__': - main() \ No newline at end of file diff --git a/tutorials/09 - Image Captioning/sample.py b/tutorials/09 - Image Captioning/sample.py deleted file mode 100644 index a444d085..00000000 --- a/tutorials/09 - Image Captioning/sample.py +++ /dev/null @@ -1,58 +0,0 @@ -import os -import numpy as np -import torch -import torchvision.transforms as T -import pickle -import matplotlib.pyplot as plt -from PIL import Image -from model import EncoderCNN, DecoderRNN -from vocab import Vocabulary -from torch.autograd import Variable - -# Image processing -transform = T.Compose([ - T.CenterCrop(224), - T.ToTensor(), - T.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))]) - -# Hyper Parameters -embed_size = 128 -hidden_size = 512 -num_layers = 1 - -# Load vocabulary -with open('./data/vocab.pkl', 'rb') as f: - vocab = pickle.load(f) - -# Load an image array -images = os.listdir('./data/train2014resized/') -image_path = './data/train2014resized/' + images[12] -img = Image.open(image_path) -image = transform(img).unsqueeze(0) - -# Load the trained models -encoder = torch.load('./encoder.pkl') -decoder = torch.load('./decoder.pkl') - -# Encode the image -feature = encoder(Variable(image).cuda()) - -# Set initial states -state = (Variable(torch.zeros(num_layers, 1, hidden_size).cuda()), - Variable(torch.zeros(num_layers, 1, hidden_size)).cuda()) - -# Decode the feature to caption -ids = decoder.sample(feature, state) - -words = [] -for id in ids: - word = vocab.idx2word[id.data[0, 0]] - words.append(word) - if word == '': - break -caption = ' '.join(words) - -# Display the image and generated caption -plt.imshow(img) -plt.show() -print (caption) \ No newline at end of file diff --git a/tutorials/09 - Image Captioning/train.py b/tutorials/09 - Image Captioning/train.py deleted file mode 100644 index d97976dd..00000000 --- a/tutorials/09 - Image Captioning/train.py +++ /dev/null @@ -1,72 +0,0 @@ -from data import get_loader -from vocab import Vocabulary -from model import EncoderCNN, DecoderRNN -from torch.autograd import Variable -from torch.nn.utils.rnn import pack_padded_sequence -import torch -import torch.nn as nn -import numpy as np -import torchvision.transforms as T -import pickle - -# Hyper Parameters -num_epochs = 1 -batch_size = 32 -embed_size = 256 -hidden_size = 512 -crop_size = 224 -num_layers = 1 -learning_rate = 0.001 -train_image_path = './data/train2014resized/' -train_json_path = './data/annotations/captions_train2014.json' - -# Image Preprocessing -transform = T.Compose([ - T.RandomCrop(crop_size), - T.RandomHorizontalFlip(), - T.ToTensor(), - T.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))]) - -# Load Vocabulary Wrapper -with open('./data/vocab.pkl', 'rb') as f: - vocab = pickle.load(f) - -# Build Dataset Loader -train_loader = get_loader(train_image_path, train_json_path, vocab, transform, - batch_size=batch_size, shuffle=True, num_workers=2) -total_step = len(train_loader) - -# Build Models -encoder = EncoderCNN(embed_size) -decoder = DecoderRNN(embed_size, hidden_size, len(vocab), num_layers) -encoder.cuda() -decoder.cuda() - -# Loss and Optimizer -criterion = nn.CrossEntropyLoss() -params = list(decoder.parameters()) + list(encoder.resnet.fc.parameters()) -optimizer = torch.optim.Adam(params, lr=learning_rate) - -# Train the Decoder -for epoch in range(num_epochs): - for i, (images, captions, lengths) in enumerate(train_loader): - # Set mini-batch dataset - images = Variable(images).cuda() - captions = Variable(captions).cuda() - targets = pack_padded_sequence(captions, lengths, batch_first=True)[0] - - # Forward, Backward and Optimize - decoder.zero_grad() - features = encoder(images) - outputs = decoder(features, captions, lengths) - loss = criterion(outputs, targets) - loss.backward() - optimizer.step() - - if i % 100 == 0: - print('Epoch [%d/%d], Step [%d/%d], Loss: %.4f, Perplexity: %5.4f' - %(epoch, num_epochs, i, total_step, loss.data[0], np.exp(loss.data[0]))) - -# Save the Model -torch.save(decoder, 'decoder.pkl') -torch.save(encoder, 'encoder.pkl') \ No newline at end of file diff --git a/tutorials/10 - Generative Adversarial Network/main-gpu.py b/tutorials/10 - Generative Adversarial Network/main-gpu.py deleted file mode 100644 index e14f768d..00000000 --- a/tutorials/10 - Generative Adversarial Network/main-gpu.py +++ /dev/null @@ -1,134 +0,0 @@ -import torch -import torchvision -import torch.nn as nn -import torchvision.datasets as dsets -import torchvision.transforms as transforms -from torch.autograd import Variable - -# Image Preprocessing -transform = transforms.Compose([ - transforms.Scale(36), - transforms.RandomCrop(32), - transforms.ToTensor(), - transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))]) - -# CIFAR-10 Dataset -train_dataset = dsets.CIFAR10(root='../data/', - train=True, - transform=transform, - download=True) - -# Data Loader (Input Pipeline) -train_loader = torch.utils.data.DataLoader(dataset=train_dataset, - batch_size=100, - shuffle=True) - -# 5x5 Convolution -def conv5x5(in_channels, out_channels, stride): - return nn.Conv2d(in_channels, out_channels, kernel_size=4, - stride=stride, padding=1, bias=False) - -# Discriminator Model -class Discriminator(nn.Module): - def __init__(self): - super(Discriminator, self).__init__() - self.model = nn.Sequential( - conv5x5(3, 16, 2), - nn.LeakyReLU(0.2, inplace=True), - conv5x5(16, 32, 2), - nn.BatchNorm2d(32), - nn.LeakyReLU(0.2, inplace=True), - conv5x5(32, 64, 2), - nn.BatchNorm2d(64), - nn.LeakyReLU(0.2, inplace=True), - nn.Conv2d(64, 1, kernel_size=4), - nn.Sigmoid()) - - def forward(self, x): - out = self.model(x) - out = out.view(out.size(0), -1) - return out - -# 4x4 Transpose convolution -def conv_transpose4x4(in_channels, out_channels, stride=1, padding=1, bias=False): - return nn.ConvTranspose2d(in_channels, out_channels, kernel_size=4, - stride=stride, padding=padding, bias=bias) - -# Generator Model -class Generator(nn.Module): - def __init__(self): - super(Generator, self).__init__() - self.model = nn.Sequential( - conv_transpose4x4(128, 64, padding=0), - nn.BatchNorm2d(64), - nn.ReLU(inplace=True), - conv_transpose4x4(64, 32, 2), - nn.BatchNorm2d(32), - nn.ReLU(inplace=True), - conv_transpose4x4(32, 16, 2), - nn.BatchNorm2d(16), - nn.ReLU(inplace=True), - conv_transpose4x4(16, 3, 2, bias=True), - nn.Tanh()) - - def forward(self, x): - x = x.view(x.size(0), 128, 1, 1) - out = self.model(x) - return out - -discriminator = Discriminator() -generator = Generator() -discriminator.cuda() -generator.cuda() - -# Loss and Optimizer -criterion = nn.BCELoss() -lr = 0.002 -d_optimizer = torch.optim.Adam(discriminator.parameters(), lr=lr) -g_optimizer = torch.optim.Adam(generator.parameters(), lr=lr) - -# Training -for epoch in range(50): - for i, (images, _) in enumerate(train_loader): - images = Variable(images.cuda()) - real_labels = Variable(torch.ones(images.size(0)).cuda()) - fake_labels = Variable(torch.zeros(images.size(0)).cuda()) - - # Train the discriminator - discriminator.zero_grad() - outputs = discriminator(images) - real_loss = criterion(outputs, real_labels) - real_score = outputs - - noise = Variable(torch.randn(images.size(0), 128).cuda()) - fake_images = generator(noise) - outputs = discriminator(fake_images) - fake_loss = criterion(outputs, fake_labels) - fake_score = outputs - - d_loss = real_loss + fake_loss - d_loss.backward() - d_optimizer.step() - - # Train the generator - generator.zero_grad() - noise = Variable(torch.randn(images.size(0), 128).cuda()) - fake_images = generator(noise) - outputs = discriminator(fake_images) - g_loss = criterion(outputs, real_labels) - g_loss.backward() - g_optimizer.step() - - if (i+1) % 100 == 0: - print('Epoch [%d/%d], Step[%d/%d], d_loss: %.4f, g_loss: %.4f, ' - 'D(x): %.2f, D(G(z)): %.2f' - %(epoch, 50, i+1, 500, d_loss.data[0], g_loss.data[0], - real_score.cpu().data.mean(), fake_score.cpu().data.mean())) - - # Save the sampled images - torchvision.utils.save_image(fake_images.data, - './data/fake_samples_%d_%d.png' %(epoch+1, i+1)) - -# Save the Models -torch.save(generator, './generator.pkl') -torch.save(discriminator, './discriminator.pkl') \ No newline at end of file diff --git a/tutorials/10 - Generative Adversarial Network/main.py b/tutorials/10 - Generative Adversarial Network/main.py deleted file mode 100644 index 9f2ae1ae..00000000 --- a/tutorials/10 - Generative Adversarial Network/main.py +++ /dev/null @@ -1,134 +0,0 @@ -import torch -import torchvision -import torch.nn as nn -import torchvision.datasets as dsets -import torchvision.transforms as transforms -from torch.autograd import Variable - -# Image Preprocessing -transform = transforms.Compose([ - transforms.Scale(36), - transforms.RandomCrop(32), - transforms.ToTensor(), - transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))]) - -# CIFAR-10 Dataset -train_dataset = dsets.CIFAR10(root='../data/', - train=True, - transform=transform, - download=True) - -# Data Loader (Input Pipeline) -train_loader = torch.utils.data.DataLoader(dataset=train_dataset, - batch_size=100, - shuffle=True) - -# 5x5 Convolution -def conv5x5(in_channels, out_channels, stride): - return nn.Conv2d(in_channels, out_channels, kernel_size=4, - stride=stride, padding=1, bias=False) - -# Discriminator Model -class Discriminator(nn.Module): - def __init__(self): - super(Discriminator, self).__init__() - self.model = nn.Sequential( - conv5x5(3, 16, 2), - nn.LeakyReLU(0.2, inplace=True), - conv5x5(16, 32, 2), - nn.BatchNorm2d(32), - nn.LeakyReLU(0.2, inplace=True), - conv5x5(32, 64, 2), - nn.BatchNorm2d(64), - nn.LeakyReLU(0.2, inplace=True), - nn.Conv2d(64, 1, kernel_size=4), - nn.Sigmoid()) - - def forward(self, x): - out = self.model(x) - out = out.view(out.size(0), -1) - return out - -# 4x4 Transpose convolution -def conv_transpose4x4(in_channels, out_channels, stride=1, padding=1, bias=False): - return nn.ConvTranspose2d(in_channels, out_channels, kernel_size=4, - stride=stride, padding=padding, bias=bias) - -# Generator Model -class Generator(nn.Module): - def __init__(self): - super(Generator, self).__init__() - self.model = nn.Sequential( - conv_transpose4x4(128, 64, padding=0), - nn.BatchNorm2d(64), - nn.ReLU(inplace=True), - conv_transpose4x4(64, 32, 2), - nn.BatchNorm2d(32), - nn.ReLU(inplace=True), - conv_transpose4x4(32, 16, 2), - nn.BatchNorm2d(16), - nn.ReLU(inplace=True), - conv_transpose4x4(16, 3, 2, bias=True), - nn.Tanh()) - - def forward(self, x): - x = x.view(x.size(0), 128, 1, 1) - out = self.model(x) - return out - -discriminator = Discriminator() -generator = Generator() - - - -# Loss and Optimizer -criterion = nn.BCELoss() -lr = 0.0002 -d_optimizer = torch.optim.Adam(discriminator.parameters(), lr=lr) -g_optimizer = torch.optim.Adam(generator.parameters(), lr=lr) - -# Training -for epoch in range(50): - for i, (images, _) in enumerate(train_loader): - images = Variable(images.cuda()) - real_labels = Variable(torch.ones(images.size(0))) - fake_labels = Variable(torch.zeros(images.size(0))) - - # Train the discriminator - discriminator.zero_grad() - outputs = discriminator(images) - real_loss = criterion(outputs, real_labels) - real_score = outputs - - noise = Variable(torch.randn(images.size(0), 128)) - fake_images = generator(noise) - outputs = discriminator(fake_images) - fake_loss = criterion(outputs, fake_labels) - fake_score = outputs - - d_loss = real_loss + fake_loss - d_loss.backward() - d_optimizer.step() - - # Train the generator - generator.zero_grad() - noise = Variable(torch.randn(images.size(0), 128)) - fake_images = generator(noise) - outputs = discriminator(fake_images) - g_loss = criterion(outputs, real_labels) - g_loss.backward() - g_optimizer.step() - - if (i+1) % 100 == 0: - print('Epoch [%d/%d], Step[%d/%d], d_loss: %.4f, g_loss: %.4f, ' - 'D(x): %.2f, D(G(z)): %.2f' - %(epoch, 50, i+1, 500, d_loss.data[0], g_loss.data[0], - real_score.data.mean(), fake_score.data.mean())) - - # Save the sampled images - torchvision.utils.save_image(fake_images.data, - './data/fake_samples_%d_%d.png' %(epoch+1, i+1)) - -# Save the Models -torch.save(generator, './generator.pkl') -torch.save(discriminator, './discriminator.pkl') \ No newline at end of file diff --git a/tutorials/11 - Deep Q Network/ReplayMemory.ipynb b/tutorials/11 - Deep Q Network/ReplayMemory.ipynb deleted file mode 100644 index 262b483a..00000000 --- a/tutorials/11 - Deep Q Network/ReplayMemory.ipynb +++ /dev/null @@ -1,359 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# PyTorch DQN Implemenation\n", - "\n", - "
" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "%matplotlib inline\n", - "\n", - "import torch\n", - "import torch.nn as nn\n", - "import gym\n", - "import random\n", - "import numpy as np\n", - "import torchvision.transforms as transforms\n", - "import matplotlib.pyplot as plt\n", - "from torch.autograd import Variable\n", - "from collections import deque, namedtuple" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "[2017-03-09 21:31:48,174] Making new env: CartPole-v0\n" - ] - } - ], - "source": [ - "env = gym.envs.make(\"CartPole-v0\")" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "class Net(nn.Module):\n", - " def __init__(self):\n", - " super(Net, self).__init__()\n", - " self.fc1 = nn.Linear(4, 128)\n", - " self.tanh = nn.Tanh()\n", - " self.fc2 = nn.Linear(128, 2)\n", - " self.init_weights()\n", - " \n", - " def init_weights(self):\n", - " self.fc1.weight.data.uniform_(-0.1, 0.1)\n", - " self.fc2.weight.data.uniform_(-0.1, 0.1)\n", - " \n", - " def forward(self, x):\n", - " out = self.fc1(x)\n", - " out = self.tanh(out)\n", - " out = self.fc2(out)\n", - " return out" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "def make_epsilon_greedy_policy(network, epsilon, nA):\n", - " def policy(state):\n", - " sample = random.random()\n", - " if sample < (1-epsilon) + (epsilon/nA):\n", - " q_values = network(state.view(1, -1))\n", - " action = q_values.data.max(1)[1][0, 0]\n", - " else:\n", - " action = random.randrange(nA)\n", - " return action\n", - " return policy" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "class ReplayMemory(object):\n", - " \n", - " def __init__(self, capacity):\n", - " self.memory = deque()\n", - " self.capacity = capacity\n", - " \n", - " def push(self, transition):\n", - " if len(self.memory) > self.capacity:\n", - " self.memory.popleft()\n", - " self.memory.append(transition)\n", - " \n", - " def sample(self, batch_size):\n", - " return random.sample(self.memory, batch_size)\n", - " \n", - " def __len__(self):\n", - " return len(self.memory)" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "def to_tensor(ndarray, volatile=False):\n", - " return Variable(torch.from_numpy(ndarray), volatile=volatile).float()" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "def deep_q_learning(num_episodes=10, batch_size=100, \n", - " discount_factor=0.95, epsilon=0.1, epsilon_decay=0.95):\n", - "\n", - " # Q-Network and memory \n", - " net = Net()\n", - " memory = ReplayMemory(10000)\n", - " \n", - " # Loss and Optimizer\n", - " criterion = nn.MSELoss()\n", - " optimizer = torch.optim.Adam(net.parameters(), lr=0.001)\n", - " \n", - " for i_episode in range(num_episodes):\n", - " \n", - " # Set policy (TODO: decaying epsilon)\n", - " #if (i_episode+1) % 100 == 0:\n", - " # epsilon *= 0.9\n", - " \n", - " policy = make_epsilon_greedy_policy(\n", - " net, epsilon, env.action_space.n)\n", - " \n", - " # Start an episode\n", - " state = env.reset()\n", - " \n", - " for t in range(10000):\n", - " \n", - " # Sample action from epsilon greed policy\n", - " action = policy(to_tensor(state)) \n", - " next_state, reward, done, _ = env.step(action)\n", - " \n", - " \n", - " # Restore transition in memory\n", - " memory.push([state, action, reward, next_state])\n", - " \n", - " \n", - " if len(memory) >= batch_size:\n", - " # Sample mini-batch transitions from memory\n", - " batch = memory.sample(batch_size)\n", - " state_batch = np.vstack([trans[0] for trans in batch])\n", - " action_batch =np.vstack([trans[1] for trans in batch]) \n", - " reward_batch = np.vstack([trans[2] for trans in batch])\n", - " next_state_batch = np.vstack([trans[3] for trans in batch])\n", - " \n", - " # Forward + Backward + Opimize\n", - " net.zero_grad()\n", - " q_values = net(to_tensor(state_batch))\n", - " next_q_values = net(to_tensor(next_state_batch, volatile=True))\n", - " next_q_values.volatile = False\n", - " \n", - " td_target = to_tensor(reward_batch) + discount_factor * (next_q_values).max(1)[0]\n", - " loss = criterion(q_values.gather(1, \n", - " to_tensor(action_batch).long().view(-1, 1)), td_target)\n", - " loss.backward()\n", - " optimizer.step()\n", - " \n", - " if done:\n", - " break\n", - " \n", - " state = next_state\n", - " \n", - " if len(memory) >= batch_size and (i_episode+1) % 10 == 0:\n", - " print ('episode: %d, time: %d, loss: %.4f' %(i_episode, t, loss.data[0]))\n", - " " - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "episode: 9, time: 9, loss: 0.9945\n", - "episode: 19, time: 9, loss: 1.8221\n", - "episode: 29, time: 9, loss: 4.3124\n", - "episode: 39, time: 8, loss: 6.9764\n", - "episode: 49, time: 9, loss: 6.8300\n", - "episode: 59, time: 8, loss: 5.5186\n", - "episode: 69, time: 9, loss: 4.1160\n", - "episode: 79, time: 9, loss: 2.4802\n", - "episode: 89, time: 13, loss: 0.7890\n", - "episode: 99, time: 10, loss: 0.2805\n", - "episode: 109, time: 12, loss: 0.1323\n", - "episode: 119, time: 13, loss: 0.0519\n", - "episode: 129, time: 18, loss: 0.0176\n", - "episode: 139, time: 22, loss: 0.0067\n", - "episode: 149, time: 17, loss: 0.0114\n", - "episode: 159, time: 26, loss: 0.0017\n", - "episode: 169, time: 23, loss: 0.0018\n", - "episode: 179, time: 21, loss: 0.0023\n", - "episode: 189, time: 11, loss: 0.0024\n", - "episode: 199, time: 7, loss: 0.0040\n", - "episode: 209, time: 8, loss: 0.0030\n", - "episode: 219, time: 7, loss: 0.0070\n", - "episode: 229, time: 9, loss: 0.0031\n", - "episode: 239, time: 9, loss: 0.0029\n", - "episode: 249, time: 8, loss: 0.0046\n", - "episode: 259, time: 8, loss: 0.0009\n", - "episode: 269, time: 10, loss: 0.0020\n", - "episode: 279, time: 9, loss: 0.0025\n", - "episode: 289, time: 8, loss: 0.0015\n", - "episode: 299, time: 10, loss: 0.0009\n", - "episode: 309, time: 8, loss: 0.0012\n", - "episode: 319, time: 8, loss: 0.0034\n", - "episode: 329, time: 8, loss: 0.0008\n", - "episode: 339, time: 9, loss: 0.0021\n", - "episode: 349, time: 8, loss: 0.0018\n", - "episode: 359, time: 9, loss: 0.0017\n", - "episode: 369, time: 9, loss: 0.0006\n", - "episode: 379, time: 9, loss: 0.0023\n", - "episode: 389, time: 10, loss: 0.0017\n", - "episode: 399, time: 8, loss: 0.0018\n", - "episode: 409, time: 8, loss: 0.0023\n", - "episode: 419, time: 9, loss: 0.0020\n", - "episode: 429, time: 9, loss: 0.0006\n", - "episode: 439, time: 10, loss: 0.0006\n", - "episode: 449, time: 10, loss: 0.0025\n", - "episode: 459, time: 9, loss: 0.0013\n", - "episode: 469, time: 8, loss: 0.0011\n", - "episode: 479, time: 8, loss: 0.0005\n", - "episode: 489, time: 8, loss: 0.0004\n", - "episode: 499, time: 7, loss: 0.0017\n", - "episode: 509, time: 7, loss: 0.0004\n", - "episode: 519, time: 10, loss: 0.0008\n", - "episode: 529, time: 11, loss: 0.0006\n", - "episode: 539, time: 9, loss: 0.0010\n", - "episode: 549, time: 8, loss: 0.0006\n", - "episode: 559, time: 8, loss: 0.0012\n", - "episode: 569, time: 9, loss: 0.0011\n", - "episode: 579, time: 8, loss: 0.0010\n", - "episode: 589, time: 8, loss: 0.0008\n", - "episode: 599, time: 10, loss: 0.0010\n", - "episode: 609, time: 8, loss: 0.0005\n", - "episode: 619, time: 9, loss: 0.0004\n", - "episode: 629, time: 8, loss: 0.0007\n", - "episode: 639, time: 10, loss: 0.0014\n", - "episode: 649, time: 10, loss: 0.0004\n", - "episode: 659, time: 9, loss: 0.0008\n", - "episode: 669, time: 8, loss: 0.0005\n", - "episode: 679, time: 8, loss: 0.0003\n", - "episode: 689, time: 9, loss: 0.0009\n", - "episode: 699, time: 8, loss: 0.0004\n", - "episode: 709, time: 8, loss: 0.0013\n", - "episode: 719, time: 8, loss: 0.0006\n", - "episode: 729, time: 7, loss: 0.0021\n", - "episode: 739, time: 9, loss: 0.0023\n", - "episode: 749, time: 9, loss: 0.0039\n", - "episode: 759, time: 8, loss: 0.0030\n", - "episode: 769, time: 9, loss: 0.0016\n", - "episode: 779, time: 7, loss: 0.0041\n", - "episode: 789, time: 8, loss: 0.0050\n", - "episode: 799, time: 8, loss: 0.0041\n", - "episode: 809, time: 11, loss: 0.0053\n", - "episode: 819, time: 7, loss: 0.0018\n", - "episode: 829, time: 9, loss: 0.0019\n", - "episode: 839, time: 11, loss: 0.0017\n", - "episode: 849, time: 8, loss: 0.0029\n", - "episode: 859, time: 9, loss: 0.0012\n", - "episode: 869, time: 9, loss: 0.0036\n", - "episode: 879, time: 7, loss: 0.0017\n", - "episode: 889, time: 9, loss: 0.0016\n", - "episode: 899, time: 10, loss: 0.0023\n", - "episode: 909, time: 8, loss: 0.0032\n", - "episode: 919, time: 8, loss: 0.0015\n", - "episode: 929, time: 9, loss: 0.0021\n", - "episode: 939, time: 9, loss: 0.0015\n", - "episode: 949, time: 9, loss: 0.0016\n", - "episode: 959, time: 9, loss: 0.0013\n", - "episode: 969, time: 12, loss: 0.0029\n", - "episode: 979, time: 7, loss: 0.0016\n", - "episode: 989, time: 7, loss: 0.0012\n", - "episode: 999, time: 9, loss: 0.0013\n" - ] - } - ], - "source": [ - "deep_q_learning(1000)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [] - } - ], - "metadata": { - "anaconda-cloud": {}, - "kernelspec": { - "display_name": "Python 2", - "language": "python", - "name": "python2" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 2 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.13" - } - }, - "nbformat": 4, - "nbformat_minor": 1 -} diff --git a/tutorials/11 - Deep Q Network/Untitled.ipynb b/tutorials/11 - Deep Q Network/Untitled.ipynb deleted file mode 100644 index 10b10f69..00000000 --- a/tutorials/11 - Deep Q Network/Untitled.ipynb +++ /dev/null @@ -1,154 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 4, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "%matplotlib inline\n", - "\n", - "import gym\n", - "import numpy as np\n", - "from matplotlib import pyplot as plt" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "[2017-03-08 21:13:15,268] Making new env: Breakout-v0\n" - ] - }, - { - "ename": "DependencyNotInstalled", - "evalue": "No module named 'atari_py'. (HINT: you can install Atari dependencies by running 'pip install gym[atari]'.)", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mImportError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m/home/yunjey/anaconda3/lib/python3.5/site-packages/gym/envs/atari/atari_env.py\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 8\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 9\u001b[0;31m \u001b[0;32mimport\u001b[0m \u001b[0matari_py\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 10\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mImportError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mImportError\u001b[0m: No module named 'atari_py'", - "\nDuring handling of the above exception, another exception occurred:\n", - "\u001b[0;31mDependencyNotInstalled\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0menv\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mgym\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0menvs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmake\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Breakout-v0\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Action space size: {}\"\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0menv\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0maction_space\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mn\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0menv\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_action_meanings\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/home/yunjey/anaconda3/lib/python3.5/site-packages/gym/envs/registration.py\u001b[0m in \u001b[0;36mmake\u001b[0;34m(id)\u001b[0m\n\u001b[1;32m 159\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 160\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mmake\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mid\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 161\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mregistry\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmake\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mid\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 162\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 163\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mspec\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mid\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/home/yunjey/anaconda3/lib/python3.5/site-packages/gym/envs/registration.py\u001b[0m in \u001b[0;36mmake\u001b[0;34m(self, id)\u001b[0m\n\u001b[1;32m 117\u001b[0m \u001b[0mlogger\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0minfo\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'Making new env: %s'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mid\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 118\u001b[0m \u001b[0mspec\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mspec\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mid\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 119\u001b[0;31m \u001b[0menv\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mspec\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmake\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 120\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0menv\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mspec\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtimestep_limit\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mspec\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtags\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'vnc'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 121\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mgym\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mwrappers\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtime_limit\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mTimeLimit\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/home/yunjey/anaconda3/lib/python3.5/site-packages/gym/envs/registration.py\u001b[0m in \u001b[0;36mmake\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 83\u001b[0m \u001b[0;32mraise\u001b[0m \u001b[0merror\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'Attempting to make deprecated env {}. (HINT: is there a newer registered version of this env?)'\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mid\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 84\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 85\u001b[0;31m \u001b[0mcls\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mload\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_entry_point\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 86\u001b[0m \u001b[0menv\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcls\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m**\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_kwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 87\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/home/yunjey/anaconda3/lib/python3.5/site-packages/gym/envs/registration.py\u001b[0m in \u001b[0;36mload\u001b[0;34m(name)\u001b[0m\n\u001b[1;32m 15\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mload\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 16\u001b[0m \u001b[0mentry_point\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpkg_resources\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mEntryPoint\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mparse\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'x={}'\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 17\u001b[0;31m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mentry_point\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mload\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 18\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mresult\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 19\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/home/yunjey/anaconda3/lib/python3.5/site-packages/setuptools-27.2.0-py3.5.egg/pkg_resources/__init__.py\u001b[0m in \u001b[0;36mload\u001b[0;34m(self, require, *args, **kwargs)\u001b[0m\n\u001b[1;32m 2256\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mrequire\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2257\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrequire\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2258\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mresolve\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2259\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2260\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mresolve\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/home/yunjey/anaconda3/lib/python3.5/site-packages/setuptools-27.2.0-py3.5.egg/pkg_resources/__init__.py\u001b[0m in \u001b[0;36mresolve\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 2262\u001b[0m \u001b[0mResolve\u001b[0m \u001b[0mthe\u001b[0m \u001b[0mentry\u001b[0m \u001b[0mpoint\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mits\u001b[0m \u001b[0mmodule\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mattrs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2263\u001b[0m \"\"\"\n\u001b[0;32m-> 2264\u001b[0;31m \u001b[0mmodule\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m__import__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmodule_name\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfromlist\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'__name__'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlevel\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2265\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2266\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mfunctools\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreduce\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mgetattr\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mattrs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmodule\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/home/yunjey/anaconda3/lib/python3.5/site-packages/gym/envs/atari/__init__.py\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0;32mfrom\u001b[0m \u001b[0mgym\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0menvs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0matari\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0matari_env\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mAtariEnv\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", - "\u001b[0;32m/home/yunjey/anaconda3/lib/python3.5/site-packages/gym/envs/atari/atari_env.py\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 9\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0matari_py\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 10\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mImportError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 11\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0merror\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mDependencyNotInstalled\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"{}. (HINT: you can install Atari dependencies by running 'pip install gym[atari]'.)\"\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0me\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 12\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 13\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mlogging\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mDependencyNotInstalled\u001b[0m: No module named 'atari_py'. (HINT: you can install Atari dependencies by running 'pip install gym[atari]'.)" - ] - } - ], - "source": [ - "env = gym.envs.make(\"Breakout-v0\")\n", - "\n", - "print(\"Action space size: {}\".format(env.action_space.n))\n", - "print(env.get_action_meanings())\n", - "\n", - "observation = env.reset()\n", - "print(\"Observation space shape: {}\".format(observation.shape))\n", - "\n", - "plt.figure()\n", - "plt.imshow(env.render(mode='rgb_array'))\n", - "\n", - "[env.step(2) for x in range(1)]\n", - "plt.figure()\n", - "plt.imshow(env.render(mode='rgb_array'))\n", - "\n", - "env.render(close=True)\n" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "[2017-03-08 21:12:44,474] Making new env: CartPole-v0\n" - ] - }, - { - "ename": "NameError", - "evalue": "name 'base' is not defined", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0mobservation\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0menv\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreset\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mt\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m100\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 6\u001b[0;31m \u001b[0menv\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrender\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 7\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mobservation\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 8\u001b[0m \u001b[0maction\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0menv\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0maction_space\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msample\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/home/yunjey/anaconda3/lib/python3.5/site-packages/gym/core.py\u001b[0m in \u001b[0;36mrender\u001b[0;34m(self, mode, close)\u001b[0m\n\u001b[1;32m 155\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0mmode\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mmodes\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 156\u001b[0m \u001b[0;32mraise\u001b[0m \u001b[0merror\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mUnsupportedMode\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'Unsupported rendering mode: {}. (Supported modes for {}: {})'\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmode\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmodes\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 157\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_render\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmode\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mmode\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mclose\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mclose\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 158\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 159\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mclose\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/home/yunjey/anaconda3/lib/python3.5/site-packages/gym/core.py\u001b[0m in \u001b[0;36m_render\u001b[0;34m(self, mode, close)\u001b[0m\n\u001b[1;32m 285\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 286\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_render\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmode\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'human'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mclose\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 287\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0menv\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrender\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmode\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mclose\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 288\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 289\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_close\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/home/yunjey/anaconda3/lib/python3.5/site-packages/gym/core.py\u001b[0m in \u001b[0;36mrender\u001b[0;34m(self, mode, close)\u001b[0m\n\u001b[1;32m 155\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0mmode\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mmodes\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 156\u001b[0m \u001b[0;32mraise\u001b[0m \u001b[0merror\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mUnsupportedMode\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'Unsupported rendering mode: {}. (Supported modes for {}: {})'\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmode\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmodes\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 157\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_render\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmode\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mmode\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mclose\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mclose\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 158\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 159\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mclose\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/home/yunjey/anaconda3/lib/python3.5/site-packages/gym/envs/classic_control/cartpole.py\u001b[0m in \u001b[0;36m_render\u001b[0;34m(self, mode, close)\u001b[0m\n\u001b[1;32m 112\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 113\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mviewer\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 114\u001b[0;31m \u001b[0;32mfrom\u001b[0m \u001b[0mgym\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0menvs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mclassic_control\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mrendering\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 115\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mviewer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mrendering\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mViewer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mscreen_width\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mscreen_height\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 116\u001b[0m \u001b[0ml\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mr\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mt\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mb\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m-\u001b[0m\u001b[0mcartwidth\u001b[0m\u001b[0;34m/\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcartwidth\u001b[0m\u001b[0;34m/\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcartheight\u001b[0m\u001b[0;34m/\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m-\u001b[0m\u001b[0mcartheight\u001b[0m\u001b[0;34m/\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/home/yunjey/anaconda3/lib/python3.5/site-packages/gym/envs/classic_control/rendering.py\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 21\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 22\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 23\u001b[0;31m \u001b[0;32mfrom\u001b[0m \u001b[0mpyglet\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgl\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 24\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mImportError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 25\u001b[0m \u001b[0mreraise\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mprefix\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"Error occured while running `from pyglet.gl import *`\"\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0msuffix\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"HINT: make sure you have OpenGL install. On Ubuntu, you can run 'apt-get install python-opengl'. If you're running on a server, you may need a virtual frame buffer; something like this should work: 'xvfb-run -s \\\"-screen 0 1400x900x24\\\" python '\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/home/yunjey/anaconda3/lib/python3.5/site-packages/pyglet/gl/__init__.py\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 222\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 223\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0;34m.\u001b[0m\u001b[0mcarbon\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mCarbonConfig\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mConfig\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 224\u001b[0;31m \u001b[0;32mdel\u001b[0m \u001b[0mbase\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 225\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 226\u001b[0m \u001b[0;31m# XXX remove\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mNameError\u001b[0m: name 'base' is not defined" - ] - } - ], - "source": [ - "import gym\n", - "env = gym.make('CartPole-v0')\n", - "for i_episode in range(20):\n", - " observation = env.reset()\n", - " for t in range(100):\n", - " env.render()\n", - " print(observation)\n", - " action = env.action_space.sample()\n", - " observation, reward, done, info = env.step(action)\n", - " if done:\n", - " print(\"Episode finished after {} timesteps\".format(t+1))\n", - " break" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [] - } - ], - "metadata": { - "anaconda-cloud": {}, - "kernelspec": { - "display_name": "Python [conda root]", - "language": "python", - "name": "conda-root-py" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.5.2" - } - }, - "nbformat": 4, - "nbformat_minor": 1 -} diff --git a/tutorials/11 - Deep Q Network/dqn13.py b/tutorials/11 - Deep Q Network/dqn13.py deleted file mode 100644 index 442b6099..00000000 --- a/tutorials/11 - Deep Q Network/dqn13.py +++ /dev/null @@ -1,124 +0,0 @@ -%matplotlib inline - -import torch -import torch.nn as nn -import gym -import random -import numpy as np -import torchvision.transforms as transforms -import matplotlib.pyplot as plt -from torch.autograd import Variable -from collections import deque, namedtuple - -env = gym.envs.make("CartPole-v0") - -class Net(nn.Module): - def __init__(self): - super(Net, self).__init__() - self.fc1 = nn.Linear(4, 128) - self.tanh = nn.Tanh() - self.fc2 = nn.Linear(128, 2) - self.init_weights() - - def init_weights(self): - self.fc1.weight.data.uniform_(-0.1, 0.1) - self.fc2.weight.data.uniform_(-0.1, 0.1) - - def forward(self, x): - out = self.fc1(x) - out = self.tanh(out) - out = self.fc2(out) - return out - -def make_epsilon_greedy_policy(network, epsilon, nA): - def policy(state): - sample = random.random() - if sample < (1-epsilon) + (epsilon/nA): - q_values = network(state.view(1, -1)) - action = q_values.data.max(1)[1][0, 0] - else: - action = random.randrange(nA) - return action - return policy - -class ReplayMemory(object): - - def __init__(self, capacity): - self.memory = deque() - self.capacity = capacity - - def push(self, transition): - if len(self.memory) > self.capacity: - self.memory.popleft() - self.memory.append(transition) - - def sample(self, batch_size): - return random.sample(self.memory, batch_size) - - def __len__(self): - return len(self.memory) - -def to_tensor(ndarray, volatile=False): - return Variable(torch.from_numpy(ndarray), volatile=volatile).float() - -def deep_q_learning(num_episodes=10, batch_size=100, - discount_factor=0.95, epsilon=0.1, epsilon_decay=0.95): - - # Q-Network and memory - net = Net() - memory = ReplayMemory(10000) - - # Loss and Optimizer - criterion = nn.MSELoss() - optimizer = torch.optim.Adam(net.parameters(), lr=0.001) - - for i_episode in range(num_episodes): - - # Set policy (TODO: decaying epsilon) - #if (i_episode+1) % 100 == 0: - # epsilon *= 0.9 - - policy = make_epsilon_greedy_policy( - net, epsilon, env.action_space.n) - - # Start an episode - state = env.reset() - - for t in range(10000): - - # Sample action from epsilon greed policy - action = policy(to_tensor(state)) - next_state, reward, done, _ = env.step(action) - - - # Restore transition in memory - memory.push([state, action, reward, next_state]) - - - if len(memory) >= batch_size: - # Sample mini-batch transitions from memory - batch = memory.sample(batch_size) - state_batch = np.vstack([trans[0] for trans in batch]) - action_batch =np.vstack([trans[1] for trans in batch]) - reward_batch = np.vstack([trans[2] for trans in batch]) - next_state_batch = np.vstack([trans[3] for trans in batch]) - - # Forward + Backward + Opimize - net.zero_grad() - q_values = net(to_tensor(state_batch)) - next_q_values = net(to_tensor(next_state_batch, volatile=True)) - next_q_values.volatile = False - - td_target = to_tensor(reward_batch) + discount_factor * (next_q_values).max(1)[0] - loss = criterion(q_values.gather(1, - to_tensor(action_batch).long().view(-1, 1)), td_target) - loss.backward() - optimizer.step() - - if done: - break - - state = next_state - - if len(memory) >= batch_size and (i_episode+1) % 10 == 0: - print ('episode: %d, time: %d, loss: %.4f' %(i_episode, t, loss.data[0])) \ No newline at end of file