From bb8aec1fa4e22f94a5c6e0c4b1cd97bd7c2b5ceb Mon Sep 17 00:00:00 2001 From: wanglijun Date: Sat, 5 May 2018 22:25:28 +0800 Subject: [PATCH 1/4] init --- .../01-basics/linear_regression/main.ipynb | 283 ++++++++++++ .../01-basics/logistic_regression/main.ipynb | 246 ++++++++++ tutorials/01-basics/pytorch_basics/main.ipynb | 432 ++++++++++++++++++ 3 files changed, 961 insertions(+) create mode 100644 tutorials/01-basics/linear_regression/main.ipynb create mode 100644 tutorials/01-basics/logistic_regression/main.ipynb create mode 100644 tutorials/01-basics/pytorch_basics/main.ipynb diff --git a/tutorials/01-basics/linear_regression/main.ipynb b/tutorials/01-basics/linear_regression/main.ipynb new file mode 100644 index 00000000..78fbd99a --- /dev/null +++ b/tutorials/01-basics/linear_regression/main.ipynb @@ -0,0 +1,283 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import torch\n", + "import torch.nn as nn\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "from torch.autograd import Variable" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Hyper Parameters" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "input_size = 1\n", + "output_size = 1\n", + "num_epochs = 60\n", + "learning_rate = 0.001" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Toy Dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "x_train = np.array(\n", + " [[3.3], [4.4], [5.5], [6.71], [6.93], [4.168], [9.779], [6.182], [7.59],\n", + " [2.167], [7.042], [10.791], [5.313], [7.997], [3.1]],\n", + " dtype=np.float32)\n", + "\n", + "y_train = np.array(\n", + " [[1.7], [2.76], [2.09], [3.19], [1.694], [1.573], [3.366], [2.596], [2.53],\n", + " [1.221], [2.827], [3.465], [1.65], [2.904], [1.3]],\n", + " dtype=np.float32)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Linear Regression Model" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "class LinearRegression(nn.Module):\n", + " def __init__(self, input_size, output_size):\n", + " super(LinearRegression, self).__init__()\n", + " self.linear = nn.Linear(input_size, output_size)\n", + "\n", + " def forward(self, x):\n", + " out = self.linear(x)\n", + " return out" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "LinearRegression(\n", + " (linear): Linear(in_features=1, out_features=1, bias=True)\n", + ")" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model = LinearRegression(input_size, output_size)\n", + "model.train()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Loss and Optimizer" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "criterion = nn.MSELoss()\n", + "optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Train the Model" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch [5/60], Loss: 0.1703\n", + "Epoch [10/60], Loss: 0.1703\n", + "Epoch [15/60], Loss: 0.1703\n", + "Epoch [20/60], Loss: 0.1703\n", + "Epoch [25/60], Loss: 0.1703\n", + "Epoch [30/60], Loss: 0.1703\n", + "Epoch [35/60], Loss: 0.1703\n", + "Epoch [40/60], Loss: 0.1703\n", + "Epoch [45/60], Loss: 0.1703\n", + "Epoch [50/60], Loss: 0.1703\n", + "Epoch [55/60], Loss: 0.1703\n", + "Epoch [60/60], Loss: 0.1703\n" + ] + } + ], + "source": [ + "for epoch in range(num_epochs):\n", + " # Convert numpy array to torch Variable\n", + " inputs = torch.from_numpy(x_train)\n", + " targets = torch.from_numpy(y_train)\n", + "\n", + " # Forward + Backward + Optimize\n", + " optimizer.zero_grad()\n", + " outputs = model(inputs)\n", + " loss = criterion(outputs, targets)\n", + " loss.backward()\n", + " optimizer.step()\n", + "\n", + " if (epoch + 1) % 5 == 0:\n", + " print('Epoch [%d/%d], Loss: %.4f' % (epoch + 1, num_epochs,\n", + " loss.item()))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Plot the graph" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[]" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "text/plain": [ + "[]" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAD8CAYAAACMwORRAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvNQv5yAAAIABJREFUeJzt3Xl0VFW+9vHvToyEMIhMDYKhEKJMQpAgo7YSZnBolCt2WsWlch26xb6oDURxIhqv8+t4o9iondZWELUVUWxmtNGAIKMMEjDiANgMMUwh+/2jQpkqKqSSVOWcqjyftViVs3NS52cJT+3s2nsfY61FRERiS5zTBYiISPgp3EVEYpDCXUQkBincRURikMJdRCQGKdxFRGKQwl1EJAYp3EVEYpDCXUQkBp3k1IWbNm1qPR6PU5cXEYlKy5cv32WtbVbReY6Fu8fjIS8vz6nLi4hEJWPMtlDO07CMiEgMUriLiMQghbuISAxybMw9mCNHjlBQUMDBgwedLkWAxMREWrduTUJCgtOliEgluSrcCwoKaNCgAR6PB2OM0+XUatZadu/eTUFBAW3btnW6HBGpJFcNyxw8eJAmTZoo2F3AGEOTJk30W5RIlHJVuAMKdhfR/wuR6OW6cBcRiVUHjxzlibkb+X7vgYhfS+EeoKCggEsuuYSUlBTatWvH+PHjOXz4cNBzd+zYweWXX17hcw4fPpw9e/ZUqZ57772XRx99tMLz6tevf8Lv79mzh+eee65KNYhI9b3++XY63D2Hp/61iUUbd0b8etEd7rm54PFAXJz3MTe3Wk9nrWXUqFFceumlbNq0iY0bN1JYWEhmZuZx5xYXF3PaaacxY8aMCp939uzZNGrUqFq1VZfCXcQZ//nlMJ6JHzDp7dUAjOreiit6Jkf8utEb7rm5MG4cbNsG1nofx42rVsDPmzePxMRErr32WgDi4+N54oknePnllykqKmL69OmMHj2aiy66iMGDB5Ofn0+XLl0AKCoq4r/+67/o2rUrV1xxBb169fJtr+DxeNi1axf5+fl07NiRG264gc6dOzN48GAOHPD+evbiiy/Ss2dPunXrxmWXXUZRUdEJa926dSt9+vShZ8+e3H333b72wsJC0tPTOeecczj77LN59913AZg4cSJbtmwhNTWVO+64o9zzRCR8Hvv4a7o/MNd3vPjOC3n8itQauXb0hntmJgQGYFGRt72K1q5dS48ePfzaGjZsSHJyMps3bwbgs88+45VXXmHevHl+5z333HOceuqpfPXVV9x9990sX7486DU2bdrELbfcwtq1a2nUqBEzZ84EYNSoUXzxxResWrWKjh07Mm3atBPWOn78eG666Sa++OILWrRo4WtPTExk1qxZrFixgvnz5zNhwgSstWRnZ9OuXTtWrlzJI488Uu55IlJ93/5chGfiBzw9z5sbtw5oT372CE5vnFRjNbhqnnulbN9eufYQWGuDzhAp2z5o0CAaN2583DlLlixh/PjxAHTp0oWuXbsGvUbbtm1JTfW+c/fo0YP8/HwA1qxZw1133cWePXsoLCxkyJAhJ6x16dKlvjeGq666ir/85S++WidPnsyiRYuIi4vju+++48cffwz63xTsvLJvFCJSeX/+x0pmffmd73jllEE0Sjq5xuuI3nBPTvYOxQRrr6LOnTv7AvOYffv28e2339KuXTuWL19OvXr1gv5sqL3eOnXq+L6Oj4/3DcuMHTuWd955h27dujF9+nQWLFhQ4XMFeyPKzc1l586dLF++nISEBDweT9C56qGeJyKhWfPdXkY+vcR3nD3qbMacG/mx9fJUOCxjjEk0xnxujFlljFlrjLkvyDljjTE7jTErS/9cH5lyy8jKgqSAX3GSkrztVZSenk5RURGvvvoqAEePHmXChAmMHTuWpMBrBejfvz9vvvkmAOvWrWP16tWVuvb+/ftp2bIlR44cITeEzw369evHG2+8AeB3/t69e2nevDkJCQnMnz+fbaVvgA0aNGD//v0VnicilVNSYhn13FJfsDdMPIkNDwx1NNghtDH3Q8AAa203IBUYaozpHeS8f1hrU0v/vBTWKoPJyICcHGjTBozxPubkeNuryBjDrFmzeOutt0hJSeHMM88kMTGRBx98sMKfvfnmm9m5cyddu3bl4YcfpmvXrpxyyikhX/uBBx6gV69eDBo0iA4dOlR4/lNPPcWzzz5Lz5492bt3r689IyODvLw80tLSyM3N9T1XkyZN6NevH126dOGOO+4o9zwRCd3CjTs5Y/JsVmz3TnWedk0aX907hMSEeIcrA1OZD9GMMUnAEuAma+2yMu1jgTRr7R9Dfa60tDQbeLOO9evX07Fjx5DrcZOjR49y5MgREhMT2bJlC+np6WzcuJGTT675sbZwiub/JyKRcqj4KP2y57Gr0LsG5uxWp/DOLf2Ij4v8qm5jzHJrbVpF54U0W8YYE2+MWQn8BMwtG+xlXGaM+coYM8MYc3o5zzPOGJNnjMnbuTPyk/hrUlFREf3796dbt2787ne/4/nnn4/6YBeR481YXsBZd83xBfs7t/Tjn3/qH1qwh3ltzomE9IGqtfYokGqMaQTMMsZ0sdauKXPKP4HXrbWHjDE3Aq8AA4I8Tw6QA96ee7Wrd5EGDRrotoEiMWzvgSN0u+9j3/GIri155sruoe/BdGxtzrEp3MfW5kC1hpPLU6l57tbaPcACYGhA+25r7aHSwxeBHoiIxIhn5m3yC/YFt1/As78/p3Kb60Vgbc6JVNhzN8Y0A45Ya/cYY+oCA4GHA85paa39vvTwYmB92CsVEalhO/YcoG/2rwsW//u3ZzBpWBU/g4rA2pwTCWVYpiXwijEmHm9P/01r7fvGmPuBPGvte8CtxpiLgWLgZ2BsRKoVEakhE2d+xRtffOs7zrtrIE3r1znBT1QgAmtzTqTCcLfWfgV0D9I+pczXk4BJ4S1NRKTmbfhhH0OfXOw7vv+Szlzdx1P9J87K8h9zh2qvzTmR6N1bJkLi4+NJTU31/cnPzycvL49bb70VgAULFvDpp5/6zn/nnXdYt25dpa9T3ha9x9pD3U5YRMLDWsvvX/y3L9jrnBTHuvuHhCfYISJrc04kercfiJC6deuycuVKvzaPx0Namnda6YIFC6hfvz59+/YFvOE+cuRIOnXqFNY6Qt1OWESq79Mtu/j9i7/O8H7hDz0Y2iUC+yxlZEQszAOp5x6CBQsWMHLkSPLz83nhhRd44oknSE1NZeHChbz33nvccccdpKamsmXLFrZs2cLQoUPp0aMH5513Hhs2bADK36K3PGW3E54+fTqjRo1i6NChpKSkcOedd/rO+/jjj+nTpw/nnHMOo0ePprCwMDIvgkg0qmBe+eHiEvplz/MFe0rz+mzOGhaZYK9hru253/fPtazbsS+sz9nptIbcc1HnE55z4MAB366Nbdu2ZdasWb7veTwebrzxRurXr8/tt98OwMUXX8zIkSN9Qyjp6em88MILpKSksGzZMm6++WbmzZvn26L36quv5tlnn6107StXruTLL7+kTp06nHXWWfzpT3+ibt26TJ06lU8++YR69erx8MMP8/jjjzNlypSKn1Ak1lUwr/zdld8x/o1ff0ufcWMf0jzH7/garVwb7k4JNiwTqsLCQj799FNGjx7tazt0yDv9v7wtekOVnp7u26umU6dObNu2jT179rBu3Tr69esHwOHDh+nTp0+VaheJOeXMK99/zwOcvfrXO6MN7NicF69Oi7kbwrs23CvqYbtRSUkJjRo1KvfNoTp/eQK3Ci4uLsZay6BBg3j99der/LwiMSvI/PGcc3/Hgxde5zv+5H9+S/vmJ77/cLTSmHslBW6dW/a4YcOGtG3blrfeegvwfvq+atUqoPwtequjd+/eLF261HeXqKKiIjZu3BiW5xaJemXmj/9U71Q8f3nfF+xj+3rIzx4Rs8EOCvdKu+iii5g1axapqaksXryYMWPG8Mgjj9C9e3e2bNlCbm4u06ZNo1u3bnTu3Nl3b9LytuitjmbNmjF9+nSuvPJKunbtSu/evX0f4IrUeqX3fGh753uc+8fXfM2fd9jLvRdH38hAZVVqy99wirUtf2OV/p9ItJqz5ntu/NsK3/FdX87k+qvTa2wqYqSEuuWva8fcRUSqwlpL20mz/do+n5xO84YjHKrIGQp3EYkZzy3YzP/O+dp3PLRzC164qnZuUuu6cLfWxtyUpGjl1JCdSGUdPHKUDnfP8Wtbf/9Q6p7s/O3unOKqcE9MTGT37t00adJEAe8way27d+8mMTHR6VJETmj8G1/y7sodvuM/DzyT8QNTHKzIHVwV7q1bt6agoIBYuwVftEpMTKR169ZOlyES1M79h+iZ9Ylf2zcPDieuBu5jGg1cFe4JCQm0bdvW6TJExOUufHQBW3f94jt++sruXNTtNAcrch9XhbuIyIl8/cN+hjy5yK8tP7t2zYIJlcJdRKKCZ+IHfsezbu5L9+RTHarG/bRCVaQyKthCVsJv/oaf/IK9QZ2TyM8eoWCvgHruIqGqYAtZCa9gi5GWThxAq0Z1HaoouqjnLhKqcraQJTPTmXpi2F+XbvUL9vNSmpKfPULBXgnquYuEKsgWsidsl0o7XFzCmXd96Ne25r4h1K+jqKosvWIioUpO9g7FBGuXapv09mpe//zXN8r//u0ZTBqmTeuqSuEuEqqsLP8xd4CkJG+7VNl/fjlM9wfm+rVteXA48VqMVC0Kd5FQHfvQNDPTOxSTnOwNdn2YWmUjn17Mmu9+vVfyo6O7cXkPrYoOB4W7SGVkZCjMw+CbnYUMeGyhX5sWI4WXwl1EalTgYqR/jOtNrzOaOFRN7FK4i0iN+HTzLn7/0jK/NvXWI0fhLiIRF9hbX3D7BXia1nOomtpB4S4iEfP659uZ9PZq33GPNqcy86a+DlZUeyjcRSTsio+W0D7TfzHSqimDOSUpwaGKah+Fu4iE1f3/XMfLS7f6jq/p04b7LuniYEW1k8JdRMJi38EjdL33Y7+2TVnDSIjXFlZOqDDcjTGJwCKgTun5M6y19wScUwd4FegB7AausNbmh71aEXGlK/7vM5Zt/dl3PPXSLvyhdxsHK5JQeu6HgAHW2kJjTAKwxBjzobX232XOuQ74j7W2vTFmDPAwcEUE6hURF/n25yLO+9/5fm1bHxquG9y7QIXhbq21QGHpYULpHxtw2iXAvaVfzwCeMcaY0p8VkRh05l0fcri4xHf82nXncl5KMwcrkrJCGnM3xsQDy4H2wLPW2mUBp7QCvgWw1hYbY/YCTYBdYaxVRFxg+bafuez5z/zatBjJfUIKd2vtUSDVGNMImGWM6WKtXVPmlGC/gx3XazfGjAPGASRrm1SRqBO4GOmT/zmf9s0bOFSNnEilPsa21u4BFgBDA75VAJwOYIw5CTgF+DngHKy1OdbaNGttWrNm+vVNJFrM+rLAL9g7tGhAfvYIBbuLhTJbphlwxFq7xxhTFxiI9wPTst4DrgE+Ay4H5mm8XST6HS2xtJvsfx/TFXcPonG9kx2qSEIVyrBMS+CV0nH3OOBNa+37xpj7gTxr7XvANOA1Y8xmvD32MRGrWERqxGMff83T8zb7ji/v0ZpHR3dzsCKpjFBmy3wFdA/SPqXM1weB0eEtTUSc8MuhYjrf85Ff29dTh1LnpHiHKpKq0NIxkViXmwseD8TFeR9zc8s99brpX/gFe+bwjuRnj1CwRyFtPyASy3Jz/e/7um2b9xj87ij1/d4D9Hlont+PajFSdDNOfe6ZlpZm8/LyHLm2SK3h8XgDPVCbNpCfD0CPB+ay+5fDvm+9dHUaAzv9pmbqk0ozxiy31qZVdJ567iKxbPv2ctu/KtjDxc8s9WvWYqTYoXAXiWXJyUF77p47/wllgn32refR6bSGNVmZRJg+UBWJZVlZkJTkO/zwzL54/vK+77hVo7rkZ49QsMcg9dxFIiU3FzIzvUMjycneoC3zIWaNKL2ezcyk7Zhn/b71+eR0mjdMrNl6pMYo3EUiIcRZKjVh7OEUFpQJ9hFnt+TZjHNqtAapeZotIxIJIcxSibTCQ8V0CViMtP7+odQ9WXPWo5lmy4g46QSzVGpCSuZsjhz9tePWv31T/nZ9rxq5triDwl0kEsqZpUKEt7retvsXfvvIAr+2bx4cTlycFiPVNgp3kUjIyvIfcwfvrJWsrIhdMnCv9fHpKfx50JkRu564m6ZC1haV2F9EwiAjA3JyvGPsxngfc3Ii8mHq0s27jgv2/OwRCvZaTj332sBFMzdqlYyMiL++gaH+f1f1YEjnFhG9pkQH9dxrg8xM/+EB8B5nZjpTj1TbtCVbg/bWFexyjHrutYHDMzckfKy1tJ3kf2ekj247n7Na6HZ34k/hXhs4NHNDwuv8/53P9p/9fwPTRl9SHoV7beDAzA0Jn2B3Rlo5ZRCNknQfUymfwr02OPahntP7nEilBY6rg3rrEhqFe21RAzM3JHy+2VnIgMcW+rVtyhpGQrzmQEhoFO4iLhPYW+/Xvgm51/d2qBqJVgp3EZeYu+5HbnjVfzM9DcFIVSncRVwgsLd+x5CzuOXC9g5VI7FA4S7ioEc+2sCz87f4tam3LuGgcBdxSGBv/bXrzuW8lGYOVSOxRh+9S+xz2aZpFz+zJOjWAQp2CSf13CW2uWjTtINHjtLh7jl+bYvvvJDTGyeV8xMiVafb7Elsc8Ht7kCLkSR8dJs9EXB807Tv9hygX/Y8v7YNDwwlMUH3MZXIUrhLbHNw07TA3nrn0xrywa3nRfy6IqBwl1jnwKZpSzfvIuOlZX5tWx8ajjG6j6nUHM2WkdhWg7e7A29vvWyw33RBO/KzR4Qn2F0260fcrcKeuzHmdOBVoAVQAuRYa58KOOcC4F1ga2nT29ba+8NbqkgV1cCmaVPeXcOrn/kP/4T1A1MXzfqR6BDKsEwxMMFau8IY0wBYboyZa61dF3DeYmvtyPCXKOJugWPr2aPOZsy5YR7TP9GtEhXuEkSF4W6t/R74vvTr/caY9UArIDDcRWqVnlmfsHP/Ib+2iE1v1K0SpZIq9YGqMcYDdAeWBfl2H2PMKmAHcLu1dm21qxNxocPFJZx514d+bR/c2p/Op50SuYvqVolSSSGHuzGmPjATuM1auy/g2yuANtbaQmPMcOAdICXIc4wDxgEk6y+lRCHHFiPpVolSSSHNljHGJOAN9lxr7duB37fW7rPWFpZ+PRtIMMY0DXJejrU2zVqb1qyZ9tGQ6LFjz4Hjgn3VPYNrbpVpDc/6kegXymwZA0wD1ltrHy/nnBbAj9Zaa4w5F++bxu6wViriENdsHaBbJUolhDIs0w+4ClhtjFlZ2jYZSAaw1r4AXA7cZIwpBg4AY6xTm9aIhMnCjTu55uXP/dq0GEmiRSizZZYAJ/zbbK19BngmXEWJOC2wt963XRP+foPuYyrRQ9sPiJTx2Mdf8/S8zX5t2r1RopHCXaRUYG89c3hHbjj/DIeqEakehbvUekOfXMSGH/b7tam3LtFO4S611tESS7vJs/3aZtzYhzRPY4cqEgkfhbvUSq6Z3igSIQp3qVV2FR4ibeonfm15dw2kaf06DlUkEhkKd6k11FuX2kThLjFv3Y59DP9/i/3atjw4nPg4LUaS2KVwl5gW2Fvv0KIBc24736FqRGqOwl1i0qwvC/jzP1b5tWkIRmoThbvEnMDe+tRLu/CH3m0cqkbEGQp3iRmZs1aTu8z/zkTqrUttpXCXqGetpe0k/8VIM2/qS482pzpUkYjzFO4S1fplz+O7PQf82tRbF1G4S5QqPFRMl3s+8mv7InMgzRpoMZIIKNwlCmkxkkjFFO4SNbbsLCT9sYV+bZuyhpEQH9KtgEVqFYW7RIXA3vp5KU157bpeDlUj4n4Kd3G1j9b+wH+/ttyvTUMwIhVTuItrBfbWJw7rwI2/bedQNSLRReEurvPwnA08v2CLX5t66yKVo3AX1wi2GCn3+l70a9/UoYpEopfCXVxh5NOLWfPdPr829dZFqk7hLo46eOQoHe6e49e2dOIAWjWq61BFIrFB4S6O0WIkkchRuEuNK/hPEf0fnu/XtuGBoSQmxDtUkUjs0dI+qVGeiR/4BXu31qeQf/YeElPaQVwceDyQm+tcgSIxQuEuNWJ1wd7jhmHys0fw7qnbYNw42LYNrPU+jhungBepJmOtdeTCaWlpNi8vz5FrS80KDPVbLmzHHUM6lH7T4w30QG3aQH5+xGsTiTbGmOXW2rSKztOYu0TMO19+x23/WOnXdtwHptv975xUYbuIhEThLhER2FufcWMf0jyNjz8xOTl4zz05OUKVidQOGnOXsJr6/rqgY+tBgx0gKwuSkvzbkpK87SJSZeq5S1gUHy2hfeaHfm2fTRpAy1MqWIyUkeF9zMz0DsUkJ3uD/Vi7iFRJheFujDkdeBVoAZQAOdbapwLOMcBTwHCgCBhrrV0R/nLFjS55ZgmrCvb6jpvWr0PeXQNDf4KMDIW5SJiF0nMvBiZYa1cYYxoAy40xc62168qcMwxIKf3TC3i+9FFi2J6iw6TeP9evTYuRRNyhwnC31n4PfF/69X5jzHqgFVA23C8BXrXeeZX/NsY0Msa0LP1ZiUGB4+oXdTuNp6/s7lA1IhKoUmPuxhgP0B1YFvCtVsC3ZY4LStv8wt0YMw4YB5Cs2RBR6esf9jPkyUV+bVsfGo53ZE5E3CLkcDfG1AdmArdZa/cFfjvIjxy3OspamwPkgHcRUyXqFBcI7K3fPbIT1/Vv61A1InIiIYW7MSYBb7DnWmvfDnJKAXB6mePWwI7qlyduMGfND9z4N93HVCSahDJbxgDTgPXW2sfLOe094I/GmDfwfpC6V+PtsSGwt/6363rRP0V3RhJxu1B67v2Aq4DVxphja8knA8kA1toXgNl4p0FuxjsV8trwlyo16Ym5G3nqX5v82tRbF4keocyWWULwMfWy51jglnAVJc4pKbGcMdn/PqYL77iANk3qOVSRiFSFVqiKz1XTlrF40y7fcUK8YVPWcAcrEpGqUrgL+w8e4ex7P/ZrW3PfEOrX0V8PkWilf721XOAHphee1Yy/XnuuQ9WISLgo3Guprbt+4cJHF/i1ffPgcOLitBhJJBYo3GuhwN76hEFn8qf0FIeqEZFIULjXIgs37uSalz/3a9P0RpHYpHCvJQJ76y9dncbATr9xqBoRiTSFe4zLWbSFB2dv8GtTb10k9incY5S1lraT/Bcjzf3z+aT8poFDFYlITVK4x6Cbc5cze/UPfm3qrYvULgr3GHLg8FE6Tpnj17ZqymBOSUpwqCIRcYrCPUZMeHMVM1cU+I57tDmVmTf1dbAiEXGSwj3K7S48RI+pn/i1bXlwOPFajCRSqynco9iQJxbx9Y/7fcdPXpHKpd1bOViRiLiFwj0Kbf5pPwMf97+PqT4wFZGyFO5RJnAx0syb+tCjTWOHqhERt4pzuoCYkpsLHg/ExXkfc3PD9tSLNu70C/Y6J8WRnz1CwS4iQSncwyU3F8aNg23bwFrv47hx1Q54ay2eiR9wdZk9YRbfeSFfTx1W3YolEiL4Bi9SGQr3cMnMhKIi/7aiIm97Fb32721+q0x7n9GY/OwRnN44qcrPKREUoTd4kaow3tuf1ry0tDSbl5fnyLUjIi7O+w86kDFQUlKppzpytISUzA/92r66dzANE7UYydU8Hm+gB2rTBvLza7oaiVHGmOXW2rSKzlPPPVySkyvXXo4p767xC/br+rclP3tE9Ad7bRiu2L69cu0iEaTZMuGSleX9Fbzs0ExSkrc9BHuLjtDtfv/7mG7OGsZJ8THw/ntsuOLYa3NsuAIgI8O5usItOTl4z72Sb/Ai4RADyeESGRmQk+P9FdwY72NOTkjhNeq5pX7Bnj3qbPKzR8RGsENEPo9wpaws7xt6WZV4gxcJJ425Oyh/1y9cEHAf05hcjBTGzyNcLzfX+6a1fbu3x56VFVu/nYjjQh1z17CMQwIXI/39+l70bd/UoWoirDYNV2RkKMzFFWLk9/7oseyb3ccFe372iNgNdtBwhYgD1HOvQYGhPm/CbzmjWX2HqqlBx3qyGq4QqTHqudeAt/K+9Qv2rq1PIT97RHiD3e1TDTMyvHO9S0q8jwp2kYhSzz2CjpZY2k32v4/pyimDaJR0cngvVFumGopIyNRzj5CHPlzvF+xXnptMfvaI8Ac71J6phiISMvXcw6zwUDFd7vnIr23j1GGcfFIE30e1MlJEAijcwyjrg3W8uHir7/jeizoxtl/byF+4Nk01FJGQVBjuxpiXgZHAT9baLkG+fwHwLnAs1d621t4fziLdbuf+Q/TM8r+P6daHhmNMDd3HtJpbH4hI7Aml5z4deAZ49QTnLLbWjgxLRVHm2r9+zvyvd/qOZ93cl+7Jp9ZsEZpqKCIBKgx3a+0iY4wn8qVEl00/7mfQE7/ex7Rjy4Z8OP485wrSykgRKSNcY+59jDGrgB3A7dbatWF6Xle64JH55O/+dQhk8Z0X6gYaIuIq4Qj3FUAba22hMWY48A6QEuxEY8w4YBxAchR+2Pf1D/sZ8uSvvfXfdW/FE1ekOliRiEhw1Q53a+2+Ml/PNsY8Z4xpaq3dFeTcHCAHvLtCVvfaNcVay1XTPmfJZu9/UkK8YcXdg2gQ7TfQEJGYVe1wN8a0AH601lpjzLl4F0btrnZlLvHvb3YzJuffvuPnM85h2NktHaxIRKRioUyFfB24AGhqjCkA7gESAKy1LwCXAzcZY4qBA8AY69Qm8WF05GgJ6Y8tZPvP3rH1ds3q8dFt58fODTREJKaFMlvmygq+/wzeqZIx4/2vdvDHv3/pO37rxj709DR2sCIRkcrRCtUyArcOGNChOdOuSau5xUgiImGicC/10uJvmPrBet/xJ/9zPu2bN3CwIhGRqqv14f7T/oOcm/Uv3/E1fdpw3yXH7bIgIhJVanW43/fPtfx1ab7v+PPJ6TRvmOhcQSIiYVIrw33LzkLSH1voO548vAPjzm/nYEUiIuFVq8LdWsv1r+Txrw0/+dpW3ztYi5FEJObUmnBfvu1nLnv+M9/x/7uyOxd3O83BikREIifmw734aAnDnlrMpp8KAWh9al3mTbggsndGEhFxWEwivJTiAAAE20lEQVQn3Jw1P9A+80NfsP/9hl4s+cuA6gV7bi54PBAX533MzQ1LrSIi4RSTPfeiw8Wk3jeXw0dLAOjbrgm51/eq/mKk3Fz/Ox5t2+Y9Bu2lLiKuYpzaBiYtLc3m5eWF/Xlf/SyfKe/+up38nNvOo0OLhuF5co8n+L1K27SB/PzwXENE5ASMMcuttWkVnRczPffdhYfoMfXX+5iO6Xk62Zd1De9Ftm+vXLuIiENiItwf+nA9/7fwG9/xpxMHcFqjuuG/UHJy8J57FN54RERiW1SHe/6uX7jg0QW+49sHn8kfBwS9CVR4ZGX5j7kDJCV520VEXCQqw91ayy1/X8Hs1T/42lbdM5hT6kZ4MdKxD00zM71DMcnJ3mDXh6ki4jLRFe65uRycci8dRj/pa3p0dDcu79G65mrIyFCYi4jrRc8899JpiN/uPQRA01/+w4bnMrh8w8IKflBEpPaJnqmQmoYoIhLyVMjo6blrGqKISMiiJ9zLm26oaYgiIseJnnDPyvJOOyxL0xBFRIKKnnDPyICcHO8YuzHex5wczVwREQkiuqZCahqiiEhIoqfnLiIiIVO4i4jEIIW7iEgMUriLiMQghbuISAxybPsBY8xOIMh+AsdpCuyKcDnRSK9L+fTaBKfXpXzR9Nq0sdY2q+gkx8I9VMaYvFD2Uaht9LqUT69NcHpdyheLr42GZUREYpDCXUQkBkVDuOc4XYBL6XUpn16b4PS6lC/mXhvXj7mLiEjlRUPPXUREKsmV4W6MOd0YM98Ys94Ys9YYM97pmtzEGBNvjPnSGPO+07W4iTGmkTFmhjFmQ+nfnT5O1+QWxpg/l/5bWmOMed0Yk+h0TU4xxrxsjPnJGLOmTFtjY8xcY8ym0sdTnawxHFwZ7kAxMMFa2xHoDdxijOnkcE1uMh5Y73QRLvQUMMda2wHohl4jAIwxrYBbgTRrbRcgHhjjbFWOmg4MDWibCPzLWpsC/Kv0OKq5Mtyttd9ba1eUfr0f7z/SVs5W5Q7GmNbACOAlp2txE2NMQ+B8YBqAtfawtXaPs1W5yklAXWPMSUASsMPhehxjrV0E/BzQfAnwSunXrwCX1mhREeDKcC/LGOMBugPLnK3ENZ4E7gRKnC7EZc4AdgJ/LR2yeskYU8/potzAWvsd8CiwHfge2Gut/djZqlznN9ba78HbuQSaO1xPtbk63I0x9YGZwG3W2n1O1+M0Y8xI4Cdr7XKna3Ghk4BzgOettd2BX4iBX63DoXT8+BKgLXAaUM8Y8wdnq5JIc224G2MS8AZ7rrX2bafrcYl+wMXGmHzgDWCAMeZvzpbkGgVAgbX22G94M/CGvcBAYKu1dqe19gjwNtDX4Zrc5kdjTEuA0sefHK6n2lwZ7sYYg3fsdL219nGn63ELa+0ka21ra60H7wdi86y16oEB1tofgG+NMWeVNqUD6xwsyU22A72NMUml/7bS0YfNgd4Drin9+hrgXQdrCQu33kO1H3AVsNoYs7K0bbK1draDNYn7/QnINcacDHwDXOtwPa5grV1mjJkBrMA7E+1LYnBFZqiMMa8DFwBNjTEFwD1ANvCmMeY6vG+Go52rMDy0QlVEJAa5clhGRESqR+EuIhKDFO4iIjFI4S4iEoMU7iIiMUjhLiISgxTuIiIxSOEuIhKD/j+DGH7RYc4MjgAAAABJRU5ErkJggg==\n", + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "with torch.no_grad():\n", + " predicted = model(torch.from_numpy(x_train)).numpy()\n", + " plt.plot(x_train, y_train, 'ro', label='Original data')\n", + " plt.plot(x_train, predicted, label='Fitted line')\n", + " plt.legend()\n", + " plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Save the Model" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "torch.save(model.state_dict(), 'model.pkl')" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.4" + }, + "toc": { + "nav_menu": {}, + "number_sections": true, + "sideBar": true, + "skip_h1_title": false, + "title_cell": "Table of Contents", + "title_sidebar": "Contents", + "toc_cell": false, + "toc_position": {}, + "toc_section_display": true, + "toc_window_display": false + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/tutorials/01-basics/logistic_regression/main.ipynb b/tutorials/01-basics/logistic_regression/main.ipynb new file mode 100644 index 00000000..294f7497 --- /dev/null +++ b/tutorials/01-basics/logistic_regression/main.ipynb @@ -0,0 +1,246 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import torch\n", + "import torch.nn as nn\n", + "import torchvision.datasets as dsets\n", + "import torchvision.transforms as transforms\n", + "from torch.autograd import Variable" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Hyper Parameters " + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "input_size = 784\n", + "num_classes = 10\n", + "num_epochs = 5\n", + "batch_size = 100\n", + "learning_rate = 0.001" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "MNIST Dataset (Images and Labels)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz\n", + "Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz\n", + "Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz\n" + ] + } + ], + "source": [ + "train_dataset = dsets.MNIST(\n", + " root='./data', train=True, transform=transforms.ToTensor(), download=True)\n", + "\n", + "test_dataset = dsets.MNIST(\n", + " root='./data', train=False, transform=transforms.ToTensor())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Dataset Loader (Input Pipline)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "train_loader = torch.utils.data.DataLoader(\n", + " dataset=train_dataset, batch_size=batch_size, shuffle=True)\n", + "\n", + "test_loader = torch.utils.data.DataLoader(\n", + " dataset=test_dataset, batch_size=batch_size, shuffle=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "class LogisticRegression(nn.Module):\n", + " def __init__(self, input_size, num_classes):\n", + " super(LogisticRegression, self).__init__()\n", + " self.linear = nn.Linear(input_size, num_classes)\n", + "\n", + " def forward(self, x):\n", + " x = x.view(-1, 28 * 28)\n", + " out = self.linear(x)\n", + " return out" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "model = LogisticRegression(input_size, num_classes)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "Loss and Optimizer\n", + "\n", + "Softmax is internally computed.\n", + "\n", + "Set parameters to be updated." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "criterion = nn.CrossEntropyLoss()\n", + "optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Training the Model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for epoch in range(num_epochs):\n", + " for i, (images, labels) in enumerate(train_loader):\n", + " images = images.view(-1, 28 * 28)\n", + " # Forward + Backward + Optimize\n", + " optimizer.zero_grad()\n", + " outputs = model(images)\n", + " loss = criterion(outputs, labels)\n", + " loss.backward()\n", + " optimizer.step()\n", + "\n", + " if (i + 1) % 100 == 0:\n", + " print('Epoch: [%d/%d], Step: [%d/%d], Loss: %.4f' %\n", + " (epoch + 1, num_epochs, i + 1,\n", + " len(train_dataset) // batch_size, loss.data[0]))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Test the Model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "correct = 0\n", + "total = 0\n", + "for images, labels in test_loader:\n", + " images = images.view(-1, 28 * 28)\n", + " outputs = model(images)\n", + " _, predicted = torch.max(outputs, 1)\n", + " total += labels.size(0)\n", + " correct += (predicted == labels).sum()\n", + "\n", + "print('Accuracy of the model on the 10000 test images: %d %%' %\n", + " (100 * correct / total))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Save the Model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "torch.save(model.state_dict(), 'model.pkl')" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.4" + }, + "toc": { + "nav_menu": {}, + "number_sections": true, + "sideBar": true, + "skip_h1_title": false, + "title_cell": "Table of Contents", + "title_sidebar": "Contents", + "toc_cell": false, + "toc_position": {}, + "toc_section_display": true, + "toc_window_display": false + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/tutorials/01-basics/pytorch_basics/main.ipynb b/tutorials/01-basics/pytorch_basics/main.ipynb new file mode 100644 index 00000000..536b4ccf --- /dev/null +++ b/tutorials/01-basics/pytorch_basics/main.ipynb @@ -0,0 +1,432 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import torch\n", + "import torchvision\n", + "import torch.nn as nn\n", + "import numpy as np\n", + "import torch.utils.data as data\n", + "import torchvision.transforms as transforms\n", + "import torchvision.datasets as dsets\n", + "from torch.autograd import Variable" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Basic autograd example 1" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "tensor([ 2.])\n", + "tensor([ 1.])\n", + "tensor([ 1.])\n" + ] + } + ], + "source": [ + "# Create tensors.\n", + "x = torch.tensor([1], dtype=torch.float32, requires_grad=True)\n", + "w = torch.tensor([2], dtype=torch.float32, requires_grad=True)\n", + "b = Variable(torch.Tensor([3]), requires_grad=True)\n", + "\n", + "# Build a computational graph.\n", + "y = w * x + b # y = 2 * x + 3\n", + "\n", + "# Compute gradients.\n", + "y.backward()\n", + "\n", + "# Print out the gradients.\n", + "print(x.grad) # x.grad = 2\n", + "print(w.grad) # w.grad = 1\n", + "print(b.grad) # b.grad = 1" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Basic autograd example 2" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "w: Parameter containing:\n", + "tensor([[ 0.2911, -0.2927, -0.2906],\n", + " [-0.5541, 0.3784, 0.3746]])\n", + "b: Parameter containing:\n", + "tensor([ 0.2662, 0.3458])\n" + ] + } + ], + "source": [ + "# Create tensors.\n", + "x = Variable(torch.randn(5, 3))\n", + "y = Variable(torch.randn(5, 2))\n", + "\n", + "# Build a linear layer.\n", + "linear = nn.Linear(3, 2)\n", + "print('w: ', linear.weight)\n", + "print('b: ', linear.bias)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "torch.Size([2, 3])\n", + "torch.Size([2])\n" + ] + } + ], + "source": [ + "for p in linear.parameters():\n", + " print(p.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "torch.Size([5, 2]) torch.Size([5, 2])\n" + ] + } + ], + "source": [ + "# Build Loss and Optimizer.\n", + "criterion = nn.MSELoss()\n", + "optimizer = torch.optim.SGD(linear.parameters(), lr=0.01)\n", + "\n", + "# Forward propagation.\n", + "pred = linear(x)\n", + "print(pred.shape, y.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "loss: 1.252275824546814\n" + ] + } + ], + "source": [ + "# Compute loss.\n", + "loss = criterion(pred, y)\n", + "print('loss: ', loss.item())" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "dL/dw: tensor([[ 0.1941, -0.5061, -0.3969],\n", + " [ 0.4256, 1.5922, 0.9655]])\n", + "dL/db: tensor([ 0.4807, 0.1702])\n" + ] + } + ], + "source": [ + "# Backpropagation.\n", + "loss.backward()\n", + "\n", + "# Print out the gradients.\n", + "print('dL/dw: ', linear.weight.grad)\n", + "print('dL/db: ', linear.bias.grad)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "# 1-step Optimization (gradient descent).\n", + "optimizer.step()" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "loss after 1 step optimization: 1.252275824546814\n" + ] + } + ], + "source": [ + "# You can also do optimization at the low level as shown below.\n", + "# linear.weight.data.sub_(0.01 * linear.weight.grad.data)\n", + "# linear.bias.data.sub_(0.01 * linear.bias.grad.data)\n", + "\n", + "# Print out the loss after optimization.\n", + "pred = linear(x)\n", + "loss = criterion(pred, y)\n", + "print('loss after 1 step optimization: ', loss.item())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Loading data from numpy" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(tensor([[ 1, 2],\n", + " [ 3, 4]]), tensor([[ 1, 2],\n", + " [ 3, 4]]), array([[1, 2],\n", + " [3, 4]]))" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "a = np.array([[1, 2], [3, 4]])\n", + "b = torch.from_numpy(a) # convert numpy array to torch tensor\n", + "b1 = torch.tensor(a)\n", + "c = b.numpy() # convert torch tensor to numpy array\n", + "b, b1, c" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Implementing the input pipline" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ../data/cifar-10-python.tar.gz\n" + ] + } + ], + "source": [ + "# Download and construct dataset.\n", + "train_dataset = dsets.CIFAR10(\n", + " root='../data/',\n", + " train=True,\n", + " transform=transforms.ToTensor(),\n", + " download=True)\n", + "\n", + "# Select one data pair (read data from disk).\n", + "image, label = train_dataset[0]\n", + "print(image.size())\n", + "print(label)\n", + "\n", + "# Data Loader (this provides queue and thread in a very simple way).\n", + "train_loader = torch.utils.data.DataLoader(\n", + " dataset=train_dataset, batch_size=100, shuffle=True, num_workers=2)\n", + "\n", + "# When iteration starts, queue and thread start to load dataset from files.\n", + "data_iter = iter(train_loader)\n", + "\n", + "# Mini-batch images and labels.\n", + "images, labels = data_iter.next()\n", + "\n", + "# Actual usage of data loader is as below.\n", + "for images, labels in train_loader:\n", + " # Your training code will be written here\n", + " pass" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Input pipline for custom dataset" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# You should build custom dataset as below.\n", + "class CustomDataset(data.Dataset):\n", + " def __init__(self):\n", + " # TODO\n", + " # 1. Initialize file path or list of file names.\n", + " pass\n", + "\n", + " def __getitem__(self, index):\n", + " # TODO\n", + " # 1. Read one data from file (e.g. using numpy.fromfile, PIL.Image.open).\n", + " # 2. Preprocess the data (e.g. torchvision.Transform).\n", + " # 3. Return a data pair (e.g. image and label).\n", + " pass\n", + "\n", + " def __len__(self):\n", + " # You should change 0 to the total size of your dataset.\n", + " return 0" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Then, you can just use prebuilt torch's data loader.\n", + "custom_dataset = CustomDataset()\n", + "train_loader = torch.utils.data.DataLoader(\n", + " dataset=custom_dataset, batch_size=100, shuffle=True, num_workers=2)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Using pretrained model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Download and load pretrained resnet.\n", + "resnet = torchvision.models.resnet18(pretrained=True)\n", + "\n", + "# If you want to finetune only top layer of the model.\n", + "for param in resnet.parameters():\n", + " param.requires_grad = False\n", + "\n", + "# Replace top layer for finetuning.\n", + "resnet.fc = nn.Linear(resnet.fc.in_features, 100) # 100 is for example.\n", + "\n", + "# For test.\n", + "images = Variable(torch.randn(10, 3, 224, 224))\n", + "outputs = resnet(images)\n", + "print(outputs.size()) # (10, 100)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Save and load the model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Save and load the entire model.\n", + "torch.save(resnet, 'model.pkl')\n", + "model = torch.load('model.pkl')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Save and load only the model parameters(recommended).\n", + "torch.save(resnet.state_dict(), 'params.pkl')\n", + "resnet.load_state_dict(torch.load('params.pkl'))" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.4" + }, + "toc": { + "nav_menu": {}, + "number_sections": true, + "sideBar": true, + "skip_h1_title": false, + "title_cell": "Table of Contents", + "title_sidebar": "Contents", + "toc_cell": false, + "toc_position": {}, + "toc_section_display": true, + "toc_window_display": false + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} From f7ec460bad4654480104ae911f5c71e0aeeec8cb Mon Sep 17 00:00:00 2001 From: wanglijun Date: Sun, 6 May 2018 00:03:48 +0800 Subject: [PATCH 2/4] update --- .../feedforward_neural_network/main-gpu.ipynb | 133 +++++++++ .../feedforward_neural_network/main.ipynb | 281 ++++++++++++++++++ .../recurrent_neural_network/main-gpu.ipynb | 263 ++++++++++++++++ 3 files changed, 677 insertions(+) create mode 100644 tutorials/01-basics/feedforward_neural_network/main-gpu.ipynb create mode 100644 tutorials/01-basics/feedforward_neural_network/main.ipynb create mode 100644 tutorials/02-intermediate/recurrent_neural_network/main-gpu.ipynb diff --git a/tutorials/01-basics/feedforward_neural_network/main-gpu.ipynb b/tutorials/01-basics/feedforward_neural_network/main-gpu.ipynb new file mode 100644 index 00000000..76355fd8 --- /dev/null +++ b/tutorials/01-basics/feedforward_neural_network/main-gpu.ipynb @@ -0,0 +1,133 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import torch\n", + "import torch.nn as nn\n", + "import torchvision.datasets as dsets\n", + "import torchvision.transforms as transforms\n", + "from torch.autograd import Variable\n", + "\n", + "# Hyper Parameters\n", + "input_size = 784\n", + "hidden_size = 500\n", + "num_classes = 10\n", + "num_epochs = 5\n", + "batch_size = 100\n", + "learning_rate = 0.001\n", + "\n", + "# MNIST Dataset\n", + "train_dataset = dsets.MNIST(\n", + " root='../../data',\n", + " train=True,\n", + " transform=transforms.ToTensor(),\n", + " download=True)\n", + "\n", + "test_dataset = dsets.MNIST(\n", + " root='../../data', train=False, transform=transforms.ToTensor())\n", + "\n", + "# Data Loader (Input Pipeline)\n", + "train_loader = torch.utils.data.DataLoader(\n", + " dataset=train_dataset, batch_size=batch_size, shuffle=True)\n", + "\n", + "test_loader = torch.utils.data.DataLoader(\n", + " dataset=test_dataset, batch_size=batch_size, shuffle=False)\n", + "\n", + "\n", + "# Neural Network Model (1 hidden layer)\n", + "class Net(nn.Module):\n", + " def __init__(self, input_size, hidden_size, num_classes):\n", + " super(Net, self).__init__()\n", + " self.fc1 = nn.Linear(input_size, hidden_size)\n", + " self.relu = nn.ReLU()\n", + " self.fc2 = nn.Linear(hidden_size, num_classes)\n", + "\n", + " def forward(self, x):\n", + " out = self.fc1(x)\n", + " out = self.relu(out)\n", + " out = self.fc2(out)\n", + " return out\n", + "\n", + "\n", + "net = Net(input_size, hidden_size, num_classes)\n", + "net.cuda()\n", + "\n", + "# Loss and Optimizer\n", + "criterion = nn.CrossEntropyLoss()\n", + "optimizer = torch.optim.Adam(net.parameters(), lr=learning_rate)\n", + "\n", + "# Train the Model\n", + "for epoch in range(num_epochs):\n", + " for i, (images, labels) in enumerate(train_loader):\n", + " # Convert torch tensor to Variable\n", + " images = Variable(images.view(-1, 28 * 28).cuda())\n", + " labels = Variable(labels.cuda())\n", + "\n", + " # Forward + Backward + Optimize\n", + " optimizer.zero_grad() # zero the gradient buffer\n", + " outputs = net(images)\n", + " loss = criterion(outputs, labels)\n", + " loss.backward()\n", + " optimizer.step()\n", + "\n", + " if (i + 1) % 100 == 0:\n", + " print('Epoch [%d/%d], Step [%d/%d], Loss: %.4f' %\n", + " (epoch + 1, num_epochs, i + 1,\n", + " len(train_dataset) // batch_size, loss.data[0]))\n", + "\n", + "# Test the Model\n", + "correct = 0\n", + "total = 0\n", + "for images, labels in test_loader:\n", + " images = Variable(images.view(-1, 28 * 28)).cuda()\n", + " outputs = net(images)\n", + " _, predicted = torch.max(outputs.data, 1)\n", + " total += labels.size(0)\n", + " correct += (predicted.cpu() == labels).sum()\n", + "\n", + "print('Accuracy of the network on the 10000 test images: %d %%' %\n", + " (100 * correct / total))\n", + "\n", + "# Save the Model\n", + "torch.save(net.state_dict(), 'model.pkl')" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.4" + }, + "toc": { + "nav_menu": {}, + "number_sections": true, + "sideBar": true, + "skip_h1_title": false, + "title_cell": "Table of Contents", + "title_sidebar": "Contents", + "toc_cell": false, + "toc_position": {}, + "toc_section_display": true, + "toc_window_display": false + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/tutorials/01-basics/feedforward_neural_network/main.ipynb b/tutorials/01-basics/feedforward_neural_network/main.ipynb new file mode 100644 index 00000000..84f1a484 --- /dev/null +++ b/tutorials/01-basics/feedforward_neural_network/main.ipynb @@ -0,0 +1,281 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import torch\n", + "import torch.nn as nn\n", + "import torchvision.datasets as dsets\n", + "import torchvision.transforms as transforms" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Hyper Parameters" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "input_size = 784\n", + "hidden_size = 500\n", + "num_classes = 10\n", + "num_epochs = 5\n", + "batch_size = 100\n", + "learning_rate = 0.001" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "MNIST Dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "train_dataset = dsets.MNIST(\n", + " root='../../data', train=True, transform=transforms.ToTensor(), download=True)\n", + "\n", + "test_dataset = dsets.MNIST(\n", + " root='../../data', train=False, transform=transforms.ToTensor())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Data Loader (Input Pipeline)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "train_loader = torch.utils.data.DataLoader(\n", + " dataset=train_dataset, batch_size=batch_size, shuffle=True)\n", + "\n", + "test_loader = torch.utils.data.DataLoader(\n", + " dataset=test_dataset, batch_size=batch_size, shuffle=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Neural Network Model (1 hidden layer)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "class Net(nn.Module):\n", + " def __init__(self, input_size, hidden_size, num_classes):\n", + " super(Net, self).__init__()\n", + " self.fc1 = nn.Linear(input_size, hidden_size)\n", + " self.relu = nn.ReLU()\n", + " self.fc2 = nn.Linear(hidden_size, num_classes)\n", + "\n", + " def forward(self, x):\n", + " out = self.fc1(x)\n", + " out = self.relu(out)\n", + " out = self.fc2(out)\n", + " return out" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "net = Net(input_size, hidden_size, num_classes)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Loss and Optimizer" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "criterion = nn.CrossEntropyLoss()\n", + "optimizer = torch.optim.Adam(net.parameters(), lr=learning_rate)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Train the Model" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch [1/5], Step [100/600], Loss: 0.4392\n", + "Epoch [1/5], Step [200/600], Loss: 0.2366\n", + "Epoch [1/5], Step [300/600], Loss: 0.2125\n", + "Epoch [1/5], Step [400/600], Loss: 0.2087\n", + "Epoch [1/5], Step [500/600], Loss: 0.2037\n", + "Epoch [1/5], Step [600/600], Loss: 0.0945\n", + "Epoch [2/5], Step [100/600], Loss: 0.1767\n", + "Epoch [2/5], Step [200/600], Loss: 0.1293\n", + "Epoch [2/5], Step [300/600], Loss: 0.1616\n", + "Epoch [2/5], Step [400/600], Loss: 0.0632\n", + "Epoch [2/5], Step [500/600], Loss: 0.0570\n", + "Epoch [2/5], Step [600/600], Loss: 0.2703\n", + "Epoch [3/5], Step [100/600], Loss: 0.1002\n", + "Epoch [3/5], Step [200/600], Loss: 0.0762\n", + "Epoch [3/5], Step [300/600], Loss: 0.0502\n", + "Epoch [3/5], Step [400/600], Loss: 0.0854\n", + "Epoch [3/5], Step [500/600], Loss: 0.0161\n", + "Epoch [3/5], Step [600/600], Loss: 0.0661\n", + "Epoch [4/5], Step [100/600], Loss: 0.0297\n", + "Epoch [4/5], Step [200/600], Loss: 0.0376\n", + "Epoch [4/5], Step [300/600], Loss: 0.1031\n", + "Epoch [4/5], Step [400/600], Loss: 0.0485\n", + "Epoch [4/5], Step [500/600], Loss: 0.0706\n", + "Epoch [4/5], Step [600/600], Loss: 0.0458\n", + "Epoch [5/5], Step [100/600], Loss: 0.0289\n", + "Epoch [5/5], Step [200/600], Loss: 0.0245\n", + "Epoch [5/5], Step [300/600], Loss: 0.0354\n", + "Epoch [5/5], Step [400/600], Loss: 0.0149\n", + "Epoch [5/5], Step [500/600], Loss: 0.0194\n", + "Epoch [5/5], Step [600/600], Loss: 0.0370\n" + ] + } + ], + "source": [ + "for epoch in range(num_epochs):\n", + " for i, (images, labels) in enumerate(train_loader):\n", + " # Convert torch tensor to Variable\n", + " images = images.view(-1, 28 * 28)\n", + "\n", + " # Forward + Backward + Optimize\n", + " optimizer.zero_grad() # zero the gradient buffer\n", + " outputs = net(images)\n", + " loss = criterion(outputs, labels)\n", + " loss.backward()\n", + " optimizer.step()\n", + "\n", + " if (i + 1) % 100 == 0:\n", + " print('Epoch [%d/%d], Step [%d/%d], Loss: %.4f' %\n", + " (epoch + 1, num_epochs, i + 1,\n", + " len(train_dataset) // batch_size, loss.item()))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Test the Model" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Accuracy of the network on the 10000 test images: 98 %\n" + ] + } + ], + "source": [ + "correct = 0\n", + "total = 0\n", + "for images, labels in test_loader:\n", + " images = images.view(-1, 28 * 28)\n", + " outputs = net(images)\n", + " _, predicted = torch.max(outputs.data, 1)\n", + " total += labels.size(0)\n", + " correct += (predicted == labels).sum()\n", + "\n", + "print('Accuracy of the network on the 10000 test images: %d %%' %\n", + " (100 * correct / total))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Save the Model" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "torch.save(net.state_dict(), 'model.pkl')" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.4" + }, + "toc": { + "nav_menu": {}, + "number_sections": true, + "sideBar": true, + "skip_h1_title": false, + "title_cell": "Table of Contents", + "title_sidebar": "Contents", + "toc_cell": false, + "toc_position": {}, + "toc_section_display": true, + "toc_window_display": false + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/tutorials/02-intermediate/recurrent_neural_network/main-gpu.ipynb b/tutorials/02-intermediate/recurrent_neural_network/main-gpu.ipynb new file mode 100644 index 00000000..06b9a406 --- /dev/null +++ b/tutorials/02-intermediate/recurrent_neural_network/main-gpu.ipynb @@ -0,0 +1,263 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "import torch \n", + "import torch.nn as nn\n", + "import torchvision.datasets as dsets\n", + "import torchvision.transforms as transforms\n", + "from torch.autograd import Variable" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "# Hyper Parameters\n", + "sequence_length = 28\n", + "input_size = 28\n", + "hidden_size = 128\n", + "num_layers = 2\n", + "num_classes = 10\n", + "batch_size = 100\n", + "num_epochs = 2\n", + "learning_rate = 0.01" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "# MNIST Dataset\n", + "train_dataset = dsets.MNIST(\n", + " root='../../data/', train=True, transform=transforms.ToTensor(), download=True)\n", + "\n", + "test_dataset = dsets.MNIST(\n", + " root='../../data/', train=False, transform=transforms.ToTensor())" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "# Data Loader (Input Pipeline)\n", + "train_loader = torch.utils.data.DataLoader(\n", + " dataset=train_dataset, batch_size=batch_size, shuffle=True)\n", + "\n", + "test_loader = torch.utils.data.DataLoader(\n", + " dataset=test_dataset, batch_size=batch_size, shuffle=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "# RNN Model (Many-to-One)\n", + "class RNN(nn.Module):\n", + " def __init__(self, input_size, hidden_size, num_layers, num_classes):\n", + " super(RNN, self).__init__()\n", + " self.hidden_size = hidden_size\n", + " self.num_layers = num_layers\n", + " self.lstm = nn.LSTM(\n", + " input_size, hidden_size, num_layers, batch_first=True)\n", + " self.fc = nn.Linear(hidden_size, num_classes)\n", + "\n", + " def forward(self, x):\n", + " # Set initial states\n", + " h0 = Variable(\n", + " torch.zeros(self.num_layers, x.size(0), self.hidden_size).cuda())\n", + " c0 = Variable(\n", + " torch.zeros(self.num_layers, x.size(0), self.hidden_size).cuda())\n", + "\n", + " # Forward propagate RNN\n", + " out, _ = self.lstm(x, (h0, c0))\n", + "\n", + " # Decode hidden state of last time step\n", + " out = self.fc(out[:, -1, :])\n", + " return out" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "RNN(\n", + " (lstm): LSTM(28, 128, num_layers=2, batch_first=True)\n", + " (fc): Linear(in_features=128, out_features=10, bias=True)\n", + ")" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "rnn = RNN(input_size, hidden_size, num_layers, num_classes)\n", + "rnn.cuda()" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "# Loss and Optimizer\n", + "criterion = nn.CrossEntropyLoss()\n", + "optimizer = torch.optim.Adam(rnn.parameters(), lr=learning_rate)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch [1/2], Step [100/600], Loss: 0.5494\n", + "Epoch [1/2], Step [200/600], Loss: 0.2035\n", + "Epoch [1/2], Step [300/600], Loss: 0.1505\n", + "Epoch [1/2], Step [400/600], Loss: 0.3548\n", + "Epoch [1/2], Step [500/600], Loss: 0.1892\n", + "Epoch [1/2], Step [600/600], Loss: 0.1495\n", + "Epoch [2/2], Step [100/600], Loss: 0.0616\n", + "Epoch [2/2], Step [200/600], Loss: 0.1158\n", + "Epoch [2/2], Step [300/600], Loss: 0.1124\n", + "Epoch [2/2], Step [400/600], Loss: 0.1369\n", + "Epoch [2/2], Step [500/600], Loss: 0.1065\n", + "Epoch [2/2], Step [600/600], Loss: 0.1572\n" + ] + } + ], + "source": [ + "# Train the Model\n", + "for epoch in range(num_epochs):\n", + " for i, (images, labels) in enumerate(train_loader):\n", + " images = Variable(images.view(-1, sequence_length, input_size)).cuda()\n", + " labels = Variable(labels).cuda()\n", + "\n", + " # Forward + Backward + Optimize\n", + " optimizer.zero_grad()\n", + " outputs = rnn(images)\n", + " loss = criterion(outputs, labels)\n", + " loss.backward()\n", + " optimizer.step()\n", + "\n", + " if (i + 1) % 100 == 0:\n", + " print('Epoch [%d/%d], Step [%d/%d], Loss: %.4f' %\n", + " (epoch + 1, num_epochs, i + 1,\n", + " len(train_dataset) // batch_size, loss.item()))" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Test Accuracy of the model on the 10000 test images: 97 %\n" + ] + } + ], + "source": [ + "# Test the Model\n", + "correct = 0\n", + "total = 0\n", + "for images, labels in test_loader:\n", + " images = Variable(images.view(-1, sequence_length, input_size)).cuda()\n", + " outputs = rnn(images)\n", + " _, predicted = torch.max(outputs.data, 1)\n", + " total += labels.size(0)\n", + " correct += (predicted.cpu() == labels).sum()\n", + "\n", + "print('Test Accuracy of the model on the 10000 test images: %d %%' %\n", + " (100 * correct / total))" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "# Save the Model" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "torch.save(rnn.state_dict(), 'rnn.pkl')" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.3" + }, + "toc": { + "nav_menu": {}, + "number_sections": true, + "sideBar": true, + "skip_h1_title": false, + "toc_cell": false, + "toc_position": {}, + "toc_section_display": "block", + "toc_window_display": false + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} From 34f0f8fa99c961cf1f0742ccb9726633efe6e3e1 Mon Sep 17 00:00:00 2001 From: wanglijun Date: Sun, 6 May 2018 10:43:13 +0800 Subject: [PATCH 3/4] update --- .../main-gpu.ipynb | 299 ++++++++++++++ .../deep_residual_network/main-gpu.ipynb | 209 ++++++++++ .../language_model/main-gpu.ipynb | 379 ++++++++++++++++++ .../recurrent_neural_network/main-gpu.ipynb | 79 ++-- 4 files changed, 936 insertions(+), 30 deletions(-) create mode 100644 tutorials/02-intermediate/bidirectional_recurrent_neural_network/main-gpu.ipynb create mode 100644 tutorials/02-intermediate/deep_residual_network/main-gpu.ipynb create mode 100644 tutorials/02-intermediate/language_model/main-gpu.ipynb diff --git a/tutorials/02-intermediate/bidirectional_recurrent_neural_network/main-gpu.ipynb b/tutorials/02-intermediate/bidirectional_recurrent_neural_network/main-gpu.ipynb new file mode 100644 index 00000000..8d5853ed --- /dev/null +++ b/tutorials/02-intermediate/bidirectional_recurrent_neural_network/main-gpu.ipynb @@ -0,0 +1,299 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "import torch\n", + "import torch.nn as nn\n", + "import torchvision.datasets as dsets\n", + "import torchvision.transforms as transforms\n", + "from torch.autograd import Variable" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Hyper Parameters" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "sequence_length = 28\n", + "input_size = 28\n", + "hidden_size = 128\n", + "num_layers = 2\n", + "num_classes = 10\n", + "batch_size = 100\n", + "num_epochs = 2\n", + "learning_rate = 0.003" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "MNIST Dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "train_dataset = dsets.MNIST(\n", + " root='../../data/', train=True, transform=transforms.ToTensor(), download=True)\n", + "\n", + "test_dataset = dsets.MNIST(\n", + " root='../../data/', train=False, transform=transforms.ToTensor())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Data Loader (Input Pipeline)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "train_loader = torch.utils.data.DataLoader(\n", + " dataset=train_dataset, batch_size=batch_size, shuffle=True)\n", + "\n", + "test_loader = torch.utils.data.DataLoader(\n", + " dataset=test_dataset, batch_size=batch_size, shuffle=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "BiRNN Model (Many-to-One)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "class BiRNN(nn.Module):\n", + " def __init__(self, input_size, hidden_size, num_layers, num_classes):\n", + " super(BiRNN, self).__init__()\n", + " self.hidden_size = hidden_size\n", + " self.num_layers = num_layers\n", + " self.lstm = nn.LSTM(\n", + " input_size,\n", + " hidden_size,\n", + " num_layers,\n", + " batch_first=True,\n", + " bidirectional=True)\n", + " self.fc = nn.Linear(hidden_size * 2, num_classes) # 2 for bidirection\n", + "\n", + " def forward(self, x):\n", + " # Set initial states\n", + " h0 = Variable(\n", + " torch.zeros(self.num_layers * 2, x.size(0),\n", + " self.hidden_size)).cuda() # 2 for bidirection\n", + " c0 = Variable(\n", + " torch.zeros(self.num_layers * 2, x.size(0),\n", + " self.hidden_size)).cuda()\n", + "\n", + " # Forward propagate RNN\n", + " out, _ = self.lstm(x, (h0, c0))\n", + "\n", + " # Decode hidden state of last time step\n", + " out = self.fc(out[:, -1, :])\n", + " return out" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "BiRNN(\n", + " (lstm): LSTM(28, 128, num_layers=2, batch_first=True, bidirectional=True)\n", + " (fc): Linear(in_features=256, out_features=10, bias=True)\n", + ")" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "rnn = BiRNN(input_size, hidden_size, num_layers, num_classes)\n", + "rnn.cuda()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Loss and Optimizer" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "criterion = nn.CrossEntropyLoss()\n", + "optimizer = torch.optim.Adam(rnn.parameters(), lr=learning_rate)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch [1/2], Step [100/600], Loss: 0.7779\n", + "Epoch [1/2], Step [200/600], Loss: 0.2361\n", + "Epoch [1/2], Step [300/600], Loss: 0.1961\n", + "Epoch [1/2], Step [400/600], Loss: 0.2657\n", + "Epoch [1/2], Step [500/600], Loss: 0.1520\n", + "Epoch [1/2], Step [600/600], Loss: 0.0947\n", + "Epoch [2/2], Step [100/600], Loss: 0.1011\n", + "Epoch [2/2], Step [200/600], Loss: 0.0931\n", + "Epoch [2/2], Step [300/600], Loss: 0.0934\n", + "Epoch [2/2], Step [400/600], Loss: 0.1860\n", + "Epoch [2/2], Step [500/600], Loss: 0.0562\n", + "Epoch [2/2], Step [600/600], Loss: 0.0969\n" + ] + } + ], + "source": [ + "# Train the Model\n", + "for epoch in range(num_epochs):\n", + " for i, (images, labels) in enumerate(train_loader):\n", + " images = Variable(images.view(-1, sequence_length, input_size)).cuda()\n", + " labels = Variable(labels).cuda()\n", + "\n", + " # Forward + Backward + Optimize\n", + " optimizer.zero_grad()\n", + " outputs = rnn(images)\n", + " loss = criterion(outputs, labels)\n", + " loss.backward()\n", + " optimizer.step()\n", + "\n", + " if (i + 1) % 100 == 0:\n", + " print('Epoch [%d/%d], Step [%d/%d], Loss: %.4f' %\n", + " (epoch + 1, num_epochs, i + 1,\n", + " len(train_dataset) // batch_size, loss.item()))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Test the Model" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Test Accuracy of the model on the 10000 test images: 97 %\n" + ] + } + ], + "source": [ + "correct = 0\n", + "total = 0\n", + "for images, labels in test_loader:\n", + " images = Variable(images.view(-1, sequence_length, input_size)).cuda()\n", + " outputs = rnn(images)\n", + " _, predicted = torch.max(outputs.data, 1)\n", + " total += labels.size(0)\n", + " correct += (predicted.cpu() == labels).sum()\n", + "\n", + "print('Test Accuracy of the model on the 10000 test images: %d %%' %\n", + " (100 * correct / total))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Save the Model" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "torch.save(rnn.state_dict(), 'rnn.pkl')" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.3" + }, + "toc": { + "nav_menu": {}, + "number_sections": true, + "sideBar": true, + "skip_h1_title": false, + "toc_cell": false, + "toc_position": {}, + "toc_section_display": "block", + "toc_window_display": false + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/tutorials/02-intermediate/deep_residual_network/main-gpu.ipynb b/tutorials/02-intermediate/deep_residual_network/main-gpu.ipynb new file mode 100644 index 00000000..235fcef3 --- /dev/null +++ b/tutorials/02-intermediate/deep_residual_network/main-gpu.ipynb @@ -0,0 +1,209 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "# Implementation of https://arxiv.org/pdf/1512.03385.pdf\n", + "# See section 4.2 for model architecture on CIFAR-10.\n", + "# Some part of the code was referenced below.\n", + "# https://github.com/pytorch/vision/blob/master/torchvision/models/resnet.py\n", + "import torch\n", + "import torch.nn as nn\n", + "import torchvision.datasets as dsets\n", + "import torchvision.transforms as transforms\n", + "from torch.autograd import Variable" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "# Image Preprocessing\n", + "transform = transforms.Compose([\n", + " transforms.Scale(40),\n", + " transforms.RandomHorizontalFlip(),\n", + " transforms.RandomCrop(32),\n", + " transforms.ToTensor()\n", + "])\n", + "\n", + "# CIFAR-10 Dataset\n", + "train_dataset = dsets.CIFAR10(\n", + " root='./data/', train=True, transform=transform, download=True)\n", + "\n", + "test_dataset = dsets.CIFAR10(\n", + " root='./data/', train=False, transform=transforms.ToTensor())\n", + "\n", + "# Data Loader (Input Pipeline)\n", + "train_loader = torch.utils.data.DataLoader(\n", + " dataset=train_dataset, batch_size=100, shuffle=True)\n", + "\n", + "test_loader = torch.utils.data.DataLoader(\n", + " dataset=test_dataset, batch_size=100, shuffle=False)\n", + "\n", + "\n", + "# 3x3 Convolution\n", + "def conv3x3(in_channels, out_channels, stride=1):\n", + " return nn.Conv2d(\n", + " in_channels,\n", + " out_channels,\n", + " kernel_size=3,\n", + " stride=stride,\n", + " padding=1,\n", + " bias=False)\n", + "\n", + "\n", + "# Residual Block\n", + "class ResidualBlock(nn.Module):\n", + " def __init__(self, in_channels, out_channels, stride=1, downsample=None):\n", + " super(ResidualBlock, self).__init__()\n", + " self.conv1 = conv3x3(in_channels, out_channels, stride)\n", + " self.bn1 = nn.BatchNorm2d(out_channels)\n", + " self.relu = nn.ReLU(inplace=True)\n", + " self.conv2 = conv3x3(out_channels, out_channels)\n", + " self.bn2 = nn.BatchNorm2d(out_channels)\n", + " self.downsample = downsample\n", + "\n", + " def forward(self, x):\n", + " residual = x\n", + " out = self.conv1(x)\n", + " out = self.bn1(out)\n", + " out = self.relu(out)\n", + " out = self.conv2(out)\n", + " out = self.bn2(out)\n", + " if self.downsample:\n", + " residual = self.downsample(x)\n", + " out += residual\n", + " out = self.relu(out)\n", + " return out\n", + "\n", + "\n", + "# ResNet Module\n", + "class ResNet(nn.Module):\n", + " def __init__(self, block, layers, num_classes=10):\n", + " super(ResNet, self).__init__()\n", + " self.in_channels = 16\n", + " self.conv = conv3x3(3, 16)\n", + " self.bn = nn.BatchNorm2d(16)\n", + " self.relu = nn.ReLU(inplace=True)\n", + " self.layer1 = self.make_layer(block, 16, layers[0])\n", + " self.layer2 = self.make_layer(block, 32, layers[0], 2)\n", + " self.layer3 = self.make_layer(block, 64, layers[1], 2)\n", + " self.avg_pool = nn.AvgPool2d(8)\n", + " self.fc = nn.Linear(64, num_classes)\n", + "\n", + " def make_layer(self, block, out_channels, blocks, stride=1):\n", + " downsample = None\n", + " if (stride != 1) or (self.in_channels != out_channels):\n", + " downsample = nn.Sequential(\n", + " conv3x3(self.in_channels, out_channels, stride=stride),\n", + " nn.BatchNorm2d(out_channels))\n", + " layers = []\n", + " layers.append(\n", + " block(self.in_channels, out_channels, stride, downsample))\n", + " self.in_channels = out_channels\n", + " for i in range(1, blocks):\n", + " layers.append(block(out_channels, out_channels))\n", + " return nn.Sequential(*layers)\n", + "\n", + " def forward(self, x):\n", + " out = self.conv(x)\n", + " out = self.bn(out)\n", + " out = self.relu(out)\n", + " out = self.layer1(out)\n", + " out = self.layer2(out)\n", + " out = self.layer3(out)\n", + " out = self.avg_pool(out)\n", + " out = out.view(out.size(0), -1)\n", + " out = self.fc(out)\n", + " return out\n", + "\n", + "\n", + "resnet = ResNet(ResidualBlock, [3, 3, 3])\n", + "resnet.cuda()\n", + "\n", + "# Loss and Optimizer\n", + "criterion = nn.CrossEntropyLoss()\n", + "lr = 0.001\n", + "optimizer = torch.optim.Adam(resnet.parameters(), lr=lr)\n", + "\n", + "# Training\n", + "for epoch in range(80):\n", + " for i, (images, labels) in enumerate(train_loader):\n", + " images = Variable(images.cuda())\n", + " labels = Variable(labels.cuda())\n", + "\n", + " # Forward + Backward + Optimize\n", + " optimizer.zero_grad()\n", + " outputs = resnet(images)\n", + " loss = criterion(outputs, labels)\n", + " loss.backward()\n", + " optimizer.step()\n", + "\n", + " if (i + 1) % 100 == 0:\n", + " print(\"Epoch [%d/%d], Iter [%d/%d] Loss: %.4f\" %\n", + " (epoch + 1, 80, i + 1, 500, loss.data[0]))\n", + "\n", + " # Decaying Learning Rate\n", + " if (epoch + 1) % 20 == 0:\n", + " lr /= 3\n", + " optimizer = torch.optim.Adam(resnet.parameters(), lr=lr)\n", + "\n", + "# Test\n", + "correct = 0\n", + "total = 0\n", + "for images, labels in test_loader:\n", + " images = Variable(images.cuda())\n", + " outputs = resnet(images)\n", + " _, predicted = torch.max(outputs.data, 1)\n", + " total += labels.size(0)\n", + " correct += (predicted.cpu() == labels).sum()\n", + "\n", + "print('Accuracy of the model on the test images: %d %%' %\n", + " (100 * correct / total))\n", + "\n", + "# Save the Model\n", + "torch.save(resnet.state_dict(), 'resnet.pkl')" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.3" + }, + "toc": { + "nav_menu": {}, + "number_sections": true, + "sideBar": true, + "skip_h1_title": false, + "toc_cell": false, + "toc_position": {}, + "toc_section_display": "block", + "toc_window_display": false + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/tutorials/02-intermediate/language_model/main-gpu.ipynb b/tutorials/02-intermediate/language_model/main-gpu.ipynb new file mode 100644 index 00000000..9f1a533f --- /dev/null +++ b/tutorials/02-intermediate/language_model/main-gpu.ipynb @@ -0,0 +1,379 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "# Some part of the code was referenced from below.\n", + "# https://github.com/pytorch/examples/tree/master/word_language_model \n", + "import torch \n", + "import torch.nn as nn\n", + "import numpy as np\n", + "from torch.autograd import Variable\n", + "from data_utils import Dictionary, Corpus" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "# Hyper Parameters\n", + "embed_size = 128\n", + "hidden_size = 1024\n", + "num_layers = 1\n", + "num_epochs = 5\n", + "num_samples = 1000 # number of words to be sampled\n", + "batch_size = 20\n", + "seq_length = 30\n", + "learning_rate = 0.002" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "# Load Penn Treebank Dataset\n", + "train_path = './data/train.txt'\n", + "sample_path = './sample.txt'\n", + "corpus = Corpus()\n", + "ids = corpus.get_data(train_path, batch_size)\n", + "vocab_size = len(corpus.dictionary)\n", + "num_batches = ids.size(1) // seq_length" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "# RNN Based Language Model\n", + "class RNNLM(nn.Module):\n", + " def __init__(self, vocab_size, embed_size, hidden_size, num_layers):\n", + " super(RNNLM, self).__init__()\n", + " self.embed = nn.Embedding(vocab_size, embed_size)\n", + " self.lstm = nn.LSTM(embed_size, hidden_size, num_layers, batch_first=True)\n", + " self.linear = nn.Linear(hidden_size, vocab_size)\n", + " self.init_weights()\n", + "\n", + " def init_weights(self):\n", + " self.embed.weight.data.uniform_(-0.1, 0.1)\n", + " self.linear.bias.data.fill_(0)\n", + " self.linear.weight.data.uniform_(-0.1, 0.1)\n", + "\n", + " def forward(self, x, h):\n", + " # Embed word ids to vectors\n", + " x = self.embed(x) \n", + "\n", + " # Forward propagate RNN \n", + " out, h = self.lstm(x, h)\n", + "\n", + " # Reshape output to (batch_size*sequence_length, hidden_size)\n", + " out = out.contiguous().view(out.size(0)*out.size(1), out.size(2))\n", + "\n", + " # Decode hidden states of all time step\n", + " out = self.linear(out) \n", + " return out, h" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "RNNLM(\n", + " (embed): Embedding(10000, 128)\n", + " (lstm): LSTM(128, 1024, batch_first=True)\n", + " (linear): Linear(in_features=1024, out_features=10000, bias=True)\n", + ")" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model = RNNLM(vocab_size, embed_size, hidden_size, num_layers)\n", + "model.cuda()" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "# Loss and Optimizer\n", + "criterion = nn.CrossEntropyLoss()\n", + "optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "# Truncated Backpropagation \n", + "def detach(states):\n", + " return [state.detach() for state in states] " + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch [1/5], Step[0/1549], Loss: 3.366, Perplexity: 28.96\n", + "Epoch [1/5], Step[100/1549], Loss: 2.934, Perplexity: 18.80\n", + "Epoch [1/5], Step[200/1549], Loss: 3.104, Perplexity: 22.29\n", + "Epoch [1/5], Step[300/1549], Loss: 3.105, Perplexity: 22.31\n", + "Epoch [1/5], Step[400/1549], Loss: 3.024, Perplexity: 20.58\n", + "Epoch [1/5], Step[500/1549], Loss: 2.551, Perplexity: 12.82\n", + "Epoch [1/5], Step[600/1549], Loss: 3.102, Perplexity: 22.23\n", + "Epoch [1/5], Step[700/1549], Loss: 3.039, Perplexity: 20.88\n", + "Epoch [1/5], Step[800/1549], Loss: 3.133, Perplexity: 22.93\n", + "Epoch [1/5], Step[900/1549], Loss: 2.749, Perplexity: 15.63\n", + "Epoch [1/5], Step[1000/1549], Loss: 2.903, Perplexity: 18.23\n", + "Epoch [1/5], Step[1100/1549], Loss: 3.026, Perplexity: 20.62\n", + "Epoch [1/5], Step[1200/1549], Loss: 3.107, Perplexity: 22.36\n", + "Epoch [1/5], Step[1300/1549], Loss: 2.793, Perplexity: 16.33\n", + "Epoch [1/5], Step[1400/1549], Loss: 2.511, Perplexity: 12.32\n", + "Epoch [1/5], Step[1500/1549], Loss: 2.926, Perplexity: 18.65\n", + "Epoch [2/5], Step[0/1549], Loss: 3.054, Perplexity: 21.21\n", + "Epoch [2/5], Step[100/1549], Loss: 2.815, Perplexity: 16.69\n", + "Epoch [2/5], Step[200/1549], Loss: 2.857, Perplexity: 17.41\n", + "Epoch [2/5], Step[300/1549], Loss: 2.935, Perplexity: 18.82\n", + "Epoch [2/5], Step[400/1549], Loss: 2.793, Perplexity: 16.33\n", + "Epoch [2/5], Step[500/1549], Loss: 2.458, Perplexity: 11.68\n", + "Epoch [2/5], Step[600/1549], Loss: 3.023, Perplexity: 20.54\n", + "Epoch [2/5], Step[700/1549], Loss: 2.917, Perplexity: 18.48\n", + "Epoch [2/5], Step[800/1549], Loss: 2.967, Perplexity: 19.44\n", + "Epoch [2/5], Step[900/1549], Loss: 2.681, Perplexity: 14.60\n", + "Epoch [2/5], Step[1000/1549], Loss: 2.771, Perplexity: 15.98\n", + "Epoch [2/5], Step[1100/1549], Loss: 2.890, Perplexity: 18.00\n", + "Epoch [2/5], Step[1200/1549], Loss: 2.953, Perplexity: 19.17\n", + "Epoch [2/5], Step[1300/1549], Loss: 2.671, Perplexity: 14.45\n", + "Epoch [2/5], Step[1400/1549], Loss: 2.350, Perplexity: 10.49\n", + "Epoch [2/5], Step[1500/1549], Loss: 2.826, Perplexity: 16.87\n", + "Epoch [3/5], Step[0/1549], Loss: 2.970, Perplexity: 19.49\n", + "Epoch [3/5], Step[100/1549], Loss: 2.744, Perplexity: 15.54\n", + "Epoch [3/5], Step[200/1549], Loss: 2.689, Perplexity: 14.72\n", + "Epoch [3/5], Step[300/1549], Loss: 2.805, Perplexity: 16.52\n", + "Epoch [3/5], Step[400/1549], Loss: 2.699, Perplexity: 14.86\n", + "Epoch [3/5], Step[500/1549], Loss: 2.399, Perplexity: 11.01\n", + "Epoch [3/5], Step[600/1549], Loss: 2.939, Perplexity: 18.90\n", + "Epoch [3/5], Step[700/1549], Loss: 2.776, Perplexity: 16.05\n", + "Epoch [3/5], Step[800/1549], Loss: 2.778, Perplexity: 16.08\n", + "Epoch [3/5], Step[900/1549], Loss: 2.510, Perplexity: 12.31\n", + "Epoch [3/5], Step[1000/1549], Loss: 2.580, Perplexity: 13.19\n", + "Epoch [3/5], Step[1100/1549], Loss: 2.746, Perplexity: 15.58\n", + "Epoch [3/5], Step[1200/1549], Loss: 2.761, Perplexity: 15.82\n", + "Epoch [3/5], Step[1300/1549], Loss: 2.538, Perplexity: 12.66\n", + "Epoch [3/5], Step[1400/1549], Loss: 2.248, Perplexity: 9.47\n", + "Epoch [3/5], Step[1500/1549], Loss: 2.809, Perplexity: 16.59\n", + "Epoch [4/5], Step[0/1549], Loss: 2.740, Perplexity: 15.49\n", + "Epoch [4/5], Step[100/1549], Loss: 2.592, Perplexity: 13.36\n", + "Epoch [4/5], Step[200/1549], Loss: 2.838, Perplexity: 17.09\n", + "Epoch [4/5], Step[300/1549], Loss: 2.675, Perplexity: 14.51\n", + "Epoch [4/5], Step[400/1549], Loss: 2.687, Perplexity: 14.68\n", + "Epoch [4/5], Step[500/1549], Loss: 2.259, Perplexity: 9.57\n", + "Epoch [4/5], Step[600/1549], Loss: 2.844, Perplexity: 17.19\n", + "Epoch [4/5], Step[700/1549], Loss: 2.653, Perplexity: 14.20\n", + "Epoch [4/5], Step[800/1549], Loss: 2.777, Perplexity: 16.07\n", + "Epoch [4/5], Step[900/1549], Loss: 2.404, Perplexity: 11.07\n", + "Epoch [4/5], Step[1000/1549], Loss: 2.616, Perplexity: 13.68\n", + "Epoch [4/5], Step[1100/1549], Loss: 2.661, Perplexity: 14.31\n", + "Epoch [4/5], Step[1200/1549], Loss: 2.794, Perplexity: 16.34\n", + "Epoch [4/5], Step[1300/1549], Loss: 2.390, Perplexity: 10.92\n", + "Epoch [4/5], Step[1400/1549], Loss: 2.164, Perplexity: 8.70\n", + "Epoch [4/5], Step[1500/1549], Loss: 2.681, Perplexity: 14.60\n", + "Epoch [5/5], Step[0/1549], Loss: 2.711, Perplexity: 15.04\n", + "Epoch [5/5], Step[100/1549], Loss: 2.411, Perplexity: 11.15\n", + "Epoch [5/5], Step[200/1549], Loss: 2.669, Perplexity: 14.43\n", + "Epoch [5/5], Step[300/1549], Loss: 2.623, Perplexity: 13.78\n", + "Epoch [5/5], Step[400/1549], Loss: 2.612, Perplexity: 13.63\n", + "Epoch [5/5], Step[500/1549], Loss: 2.265, Perplexity: 9.63\n", + "Epoch [5/5], Step[600/1549], Loss: 2.813, Perplexity: 16.66\n", + "Epoch [5/5], Step[700/1549], Loss: 2.613, Perplexity: 13.64\n", + "Epoch [5/5], Step[800/1549], Loss: 2.695, Perplexity: 14.80\n", + "Epoch [5/5], Step[900/1549], Loss: 2.280, Perplexity: 9.78\n", + "Epoch [5/5], Step[1000/1549], Loss: 2.487, Perplexity: 12.03\n", + "Epoch [5/5], Step[1100/1549], Loss: 2.676, Perplexity: 14.52\n", + "Epoch [5/5], Step[1200/1549], Loss: 2.749, Perplexity: 15.63\n", + "Epoch [5/5], Step[1300/1549], Loss: 2.291, Perplexity: 9.88\n", + "Epoch [5/5], Step[1400/1549], Loss: 2.137, Perplexity: 8.47\n", + "Epoch [5/5], Step[1500/1549], Loss: 2.635, Perplexity: 13.94\n" + ] + } + ], + "source": [ + "# Training\n", + "for epoch in range(num_epochs):\n", + " # Initial hidden and memory states\n", + " device = torch.device('cuda')\n", + " states = (torch.zeros(num_layers, batch_size, hidden_size, device=device),\n", + " torch.zeros(num_layers, batch_size, hidden_size, device=device))\n", + "\n", + " for i in range(0, ids.size(1) - seq_length, seq_length):\n", + " # Get batch inputs and targets\n", + " inputs = ids[:, i:i+seq_length].cuda()\n", + " targets = ids[:, (i+1):(i+1)+seq_length].contiguous().cuda()\n", + "\n", + " # Forward + Backward + Optimize\n", + " model.zero_grad()\n", + " states = detach(states)\n", + " outputs, states = model(inputs, states) \n", + " loss = criterion(outputs, targets.view(-1))\n", + " loss.backward()\n", + " torch.nn.utils.clip_grad_norm_(model.parameters(), 0.5)\n", + " optimizer.step()\n", + "\n", + " step = (i+1) // seq_length\n", + " if step % 100 == 0:\n", + " print ('Epoch [%d/%d], Step[%d/%d], Loss: %.3f, Perplexity: %5.2f' %\n", + " (epoch+1, num_epochs, step, num_batches, loss.item(), np.exp(loss.item())))" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/wanglijun/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py:10: UserWarning: volatile was removed and now has no effect. Use `with torch.no_grad():` instead.\n", + " # Remove the CWD from sys.path while we load stuff.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Sampled [100/1000] words and save to ./sample.txt\n", + "Sampled [200/1000] words and save to ./sample.txt\n", + "Sampled [300/1000] words and save to ./sample.txt\n", + "Sampled [400/1000] words and save to ./sample.txt\n", + "Sampled [500/1000] words and save to ./sample.txt\n", + "Sampled [600/1000] words and save to ./sample.txt\n", + "Sampled [700/1000] words and save to ./sample.txt\n", + "Sampled [800/1000] words and save to ./sample.txt\n", + "Sampled [900/1000] words and save to ./sample.txt\n", + "Sampled [1000/1000] words and save to ./sample.txt\n" + ] + } + ], + "source": [ + "# Sampling\n", + "with open(sample_path, 'w') as f:\n", + " # Set intial hidden ane memory states\n", + " state = (Variable(torch.zeros(num_layers, 1, hidden_size)).cuda(),\n", + " Variable(torch.zeros(num_layers, 1, hidden_size)).cuda())\n", + "\n", + " # Select one word id randomly\n", + " prob = torch.ones(vocab_size)\n", + " input = Variable(\n", + " torch.multinomial(prob, num_samples=1).unsqueeze(1),\n", + " volatile=True).cuda()\n", + "\n", + " for i in range(num_samples):\n", + " # Forward propagate rnn\n", + " output, state = model(input, state)\n", + "\n", + " # Sample a word id\n", + " prob = output.squeeze().data.exp().cpu()\n", + " word_id = torch.multinomial(prob, 1)[0]\n", + "\n", + " # Feed sampled word id to next time step\n", + " input.data.fill_(word_id)\n", + "\n", + " # File write\n", + " word = corpus.dictionary.idx2word[word_id.item()]\n", + " word = '\\n' if word == '' else word + ' '\n", + " f.write(word)\n", + "\n", + " if (i + 1) % 100 == 0:\n", + " print('Sampled [%d/%d] words and save to %s' % (i + 1, num_samples,\n", + " sample_path))" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "# Save the Trained Model\n", + "torch.save(model.state_dict(), 'model.pkl')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.3" + }, + "toc": { + "nav_menu": {}, + "number_sections": true, + "sideBar": true, + "skip_h1_title": false, + "toc_cell": false, + "toc_position": {}, + "toc_section_display": "block", + "toc_window_display": false + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/tutorials/02-intermediate/recurrent_neural_network/main-gpu.ipynb b/tutorials/02-intermediate/recurrent_neural_network/main-gpu.ipynb index 06b9a406..50a5e3c1 100644 --- a/tutorials/02-intermediate/recurrent_neural_network/main-gpu.ipynb +++ b/tutorials/02-intermediate/recurrent_neural_network/main-gpu.ipynb @@ -15,6 +15,13 @@ "from torch.autograd import Variable" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Hyper Parameters" + ] + }, { "cell_type": "code", "execution_count": 2, @@ -23,7 +30,6 @@ }, "outputs": [], "source": [ - "# Hyper Parameters\n", "sequence_length = 28\n", "input_size = 28\n", "hidden_size = 128\n", @@ -34,6 +40,13 @@ "learning_rate = 0.01" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "MNIST Dataset" + ] + }, { "cell_type": "code", "execution_count": 3, @@ -42,7 +55,6 @@ }, "outputs": [], "source": [ - "# MNIST Dataset\n", "train_dataset = dsets.MNIST(\n", " root='../../data/', train=True, transform=transforms.ToTensor(), download=True)\n", "\n", @@ -50,6 +62,13 @@ " root='../../data/', train=False, transform=transforms.ToTensor())" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Data Loader (Input Pipeline)" + ] + }, { "cell_type": "code", "execution_count": 4, @@ -58,7 +77,6 @@ }, "outputs": [], "source": [ - "# Data Loader (Input Pipeline)\n", "train_loader = torch.utils.data.DataLoader(\n", " dataset=train_dataset, batch_size=batch_size, shuffle=True)\n", "\n", @@ -66,6 +84,13 @@ " dataset=test_dataset, batch_size=batch_size, shuffle=False)" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "RNN Model (Many-to-One)" + ] + }, { "cell_type": "code", "execution_count": 5, @@ -74,7 +99,6 @@ }, "outputs": [], "source": [ - "# RNN Model (Many-to-One)\n", "class RNN(nn.Module):\n", " def __init__(self, input_size, hidden_size, num_layers, num_classes):\n", " super(RNN, self).__init__()\n", @@ -85,11 +109,10 @@ " self.fc = nn.Linear(hidden_size, num_classes)\n", "\n", " def forward(self, x):\n", + " device = torch.device('cuda')\n", " # Set initial states\n", - " h0 = Variable(\n", - " torch.zeros(self.num_layers, x.size(0), self.hidden_size).cuda())\n", - " c0 = Variable(\n", - " torch.zeros(self.num_layers, x.size(0), self.hidden_size).cuda())\n", + " h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size, device=device)\n", + " c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size, device=device)\n", "\n", " # Forward propagate RNN\n", " out, _ = self.lstm(x, (h0, c0))\n", @@ -145,18 +168,18 @@ "name": "stdout", "output_type": "stream", "text": [ - "Epoch [1/2], Step [100/600], Loss: 0.5494\n", - "Epoch [1/2], Step [200/600], Loss: 0.2035\n", - "Epoch [1/2], Step [300/600], Loss: 0.1505\n", - "Epoch [1/2], Step [400/600], Loss: 0.3548\n", - "Epoch [1/2], Step [500/600], Loss: 0.1892\n", - "Epoch [1/2], Step [600/600], Loss: 0.1495\n", - "Epoch [2/2], Step [100/600], Loss: 0.0616\n", - "Epoch [2/2], Step [200/600], Loss: 0.1158\n", - "Epoch [2/2], Step [300/600], Loss: 0.1124\n", - "Epoch [2/2], Step [400/600], Loss: 0.1369\n", - "Epoch [2/2], Step [500/600], Loss: 0.1065\n", - "Epoch [2/2], Step [600/600], Loss: 0.1572\n" + "Epoch [1/2], Step [100/600], Loss: 0.5457\n", + "Epoch [1/2], Step [200/600], Loss: 0.1737\n", + "Epoch [1/2], Step [300/600], Loss: 0.1552\n", + "Epoch [1/2], Step [400/600], Loss: 0.1071\n", + "Epoch [1/2], Step [500/600], Loss: 0.1606\n", + "Epoch [1/2], Step [600/600], Loss: 0.1674\n", + "Epoch [2/2], Step [100/600], Loss: 0.0522\n", + "Epoch [2/2], Step [200/600], Loss: 0.0928\n", + "Epoch [2/2], Step [300/600], Loss: 0.1489\n", + "Epoch [2/2], Step [400/600], Loss: 0.2393\n", + "Epoch [2/2], Step [500/600], Loss: 0.0827\n", + "Epoch [2/2], Step [600/600], Loss: 0.1014\n" ] } ], @@ -164,8 +187,8 @@ "# Train the Model\n", "for epoch in range(num_epochs):\n", " for i, (images, labels) in enumerate(train_loader):\n", - " images = Variable(images.view(-1, sequence_length, input_size)).cuda()\n", - " labels = Variable(labels).cuda()\n", + " images = images.view(-1, sequence_length, input_size).cuda()\n", + " labels = labels.cuda()\n", "\n", " # Forward + Backward + Optimize\n", " optimizer.zero_grad()\n", @@ -198,7 +221,7 @@ "correct = 0\n", "total = 0\n", "for images, labels in test_loader:\n", - " images = Variable(images.view(-1, sequence_length, input_size)).cuda()\n", + " images = images.view(-1, sequence_length, input_size).cuda()\n", " outputs = rnn(images)\n", " _, predicted = torch.max(outputs.data, 1)\n", " total += labels.size(0)\n", @@ -209,14 +232,10 @@ ] }, { - "cell_type": "code", - "execution_count": 10, - "metadata": { - "collapsed": true - }, - "outputs": [], + "cell_type": "markdown", + "metadata": {}, "source": [ - "# Save the Model" + "Save the Model" ] }, { From c5dcd0506c33625813f51339cfac581197676e7e Mon Sep 17 00:00:00 2001 From: wanglijun Date: Sun, 6 May 2018 11:40:56 +0800 Subject: [PATCH 4/4] update --- .../01-basics/logistic_regression/main.ipynb | 88 +++-- tutorials/01-basics/pytorch_basics/main.ipynb | 110 ++++-- .../deep_residual_network/main-gpu.ipynb | 272 ++++++++++++-- .../generative_adversarial_network/main.ipynb | 337 ++++++++++++++++++ 4 files changed, 719 insertions(+), 88 deletions(-) create mode 100644 tutorials/02-intermediate/generative_adversarial_network/main.ipynb diff --git a/tutorials/01-basics/logistic_regression/main.ipynb b/tutorials/01-basics/logistic_regression/main.ipynb index 294f7497..6c6d9ec4 100644 --- a/tutorials/01-basics/logistic_regression/main.ipynb +++ b/tutorials/01-basics/logistic_regression/main.ipynb @@ -2,15 +2,14 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "import torch\n", "import torch.nn as nn\n", "import torchvision.datasets as dsets\n", - "import torchvision.transforms as transforms\n", - "from torch.autograd import Variable" + "import torchvision.transforms as transforms" ] }, { @@ -22,7 +21,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 13, "metadata": {}, "outputs": [], "source": [ @@ -42,19 +41,9 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 14, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz\n", - "Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz\n", - "Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz\n" - ] - } - ], + "outputs": [], "source": [ "train_dataset = dsets.MNIST(\n", " root='./data', train=True, transform=transforms.ToTensor(), download=True)\n", @@ -72,7 +61,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 15, "metadata": {}, "outputs": [], "source": [ @@ -92,7 +81,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 16, "metadata": {}, "outputs": [], "source": [ @@ -109,7 +98,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 17, "metadata": {}, "outputs": [], "source": [ @@ -130,7 +119,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 18, "metadata": {}, "outputs": [], "source": [ @@ -147,13 +136,49 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 20, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch: [1/5], Step: [100/600], Loss: 1.0199\n", + "Epoch: [1/5], Step: [200/600], Loss: 0.9715\n", + "Epoch: [1/5], Step: [300/600], Loss: 0.9932\n", + "Epoch: [1/5], Step: [400/600], Loss: 0.9211\n", + "Epoch: [1/5], Step: [500/600], Loss: 0.9605\n", + "Epoch: [1/5], Step: [600/600], Loss: 0.8711\n", + "Epoch: [2/5], Step: [100/600], Loss: 0.9282\n", + "Epoch: [2/5], Step: [200/600], Loss: 0.8996\n", + "Epoch: [2/5], Step: [300/600], Loss: 0.8775\n", + "Epoch: [2/5], Step: [400/600], Loss: 0.8941\n", + "Epoch: [2/5], Step: [500/600], Loss: 0.8430\n", + "Epoch: [2/5], Step: [600/600], Loss: 0.8450\n", + "Epoch: [3/5], Step: [100/600], Loss: 0.8696\n", + "Epoch: [3/5], Step: [200/600], Loss: 0.8127\n", + "Epoch: [3/5], Step: [300/600], Loss: 0.7602\n", + "Epoch: [3/5], Step: [400/600], Loss: 0.7931\n", + "Epoch: [3/5], Step: [500/600], Loss: 0.8892\n", + "Epoch: [3/5], Step: [600/600], Loss: 0.7710\n", + "Epoch: [4/5], Step: [100/600], Loss: 0.8848\n", + "Epoch: [4/5], Step: [200/600], Loss: 0.8064\n", + "Epoch: [4/5], Step: [300/600], Loss: 0.6603\n", + "Epoch: [4/5], Step: [400/600], Loss: 0.7730\n", + "Epoch: [4/5], Step: [500/600], Loss: 0.8910\n", + "Epoch: [4/5], Step: [600/600], Loss: 0.6882\n", + "Epoch: [5/5], Step: [100/600], Loss: 0.7511\n", + "Epoch: [5/5], Step: [200/600], Loss: 0.8586\n", + "Epoch: [5/5], Step: [300/600], Loss: 0.7412\n", + "Epoch: [5/5], Step: [400/600], Loss: 0.7737\n", + "Epoch: [5/5], Step: [500/600], Loss: 0.7441\n", + "Epoch: [5/5], Step: [600/600], Loss: 0.7945\n" + ] + } + ], "source": [ "for epoch in range(num_epochs):\n", " for i, (images, labels) in enumerate(train_loader):\n", - " images = images.view(-1, 28 * 28)\n", " # Forward + Backward + Optimize\n", " optimizer.zero_grad()\n", " outputs = model(images)\n", @@ -164,7 +189,7 @@ " if (i + 1) % 100 == 0:\n", " print('Epoch: [%d/%d], Step: [%d/%d], Loss: %.4f' %\n", " (epoch + 1, num_epochs, i + 1,\n", - " len(train_dataset) // batch_size, loss.data[0]))" + " len(train_dataset) // batch_size, loss.item()))" ] }, { @@ -176,14 +201,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 21, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Accuracy of the model on the 10000 test images: 85 %\n" + ] + } + ], "source": [ "correct = 0\n", "total = 0\n", "for images, labels in test_loader:\n", - " images = images.view(-1, 28 * 28)\n", " outputs = model(images)\n", " _, predicted = torch.max(outputs, 1)\n", " total += labels.size(0)\n", @@ -202,7 +234,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "metadata": {}, "outputs": [], "source": [ diff --git a/tutorials/01-basics/pytorch_basics/main.ipynb b/tutorials/01-basics/pytorch_basics/main.ipynb index 536b4ccf..c5098722 100644 --- a/tutorials/01-basics/pytorch_basics/main.ipynb +++ b/tutorials/01-basics/pytorch_basics/main.ipynb @@ -25,7 +25,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 2, "metadata": {}, "outputs": [ { @@ -65,7 +65,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 3, "metadata": {}, "outputs": [ { @@ -73,10 +73,10 @@ "output_type": "stream", "text": [ "w: Parameter containing:\n", - "tensor([[ 0.2911, -0.2927, -0.2906],\n", - " [-0.5541, 0.3784, 0.3746]])\n", + "tensor([[ 0.5555, -0.2952, 0.4467],\n", + " [ 0.4385, 0.0137, 0.4614]])\n", "b: Parameter containing:\n", - "tensor([ 0.2662, 0.3458])\n" + "tensor([-0.1501, 0.1891])\n" ] } ], @@ -93,7 +93,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 4, "metadata": {}, "outputs": [ { @@ -112,7 +112,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 5, "metadata": {}, "outputs": [ { @@ -135,14 +135,14 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "loss: 1.252275824546814\n" + "loss: 1.4186806678771973\n" ] } ], @@ -154,16 +154,16 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "dL/dw: tensor([[ 0.1941, -0.5061, -0.3969],\n", - " [ 0.4256, 1.5922, 0.9655]])\n", - "dL/db: tensor([ 0.4807, 0.1702])\n" + "dL/dw: tensor([[ 0.8328, 0.0963, -0.9456],\n", + " [-0.2433, -0.3411, 0.2777]])\n", + "dL/db: tensor([-0.4444, 0.1440])\n" ] } ], @@ -178,7 +178,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 8, "metadata": {}, "outputs": [], "source": [ @@ -188,14 +188,14 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "loss after 1 step optimization: 1.252275824546814\n" + "loss after 1 step optimization: 1.3981201648712158\n" ] } ], @@ -219,7 +219,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -231,7 +231,7 @@ " [3, 4]]))" ] }, - "execution_count": 24, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } @@ -251,6 +251,13 @@ "## Implementing the input pipline" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Download and construct dataset." + ] + }, { "cell_type": "code", "execution_count": null, @@ -260,24 +267,49 @@ "name": "stdout", "output_type": "stream", "text": [ - "Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ../data/cifar-10-python.tar.gz\n" + "Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ../../data/cifar-10-python.tar.gz\n" ] } ], "source": [ - "# Download and construct dataset.\n", "train_dataset = dsets.CIFAR10(\n", - " root='../data/',\n", + " root='../../data/',\n", " train=True,\n", " transform=transforms.ToTensor(),\n", - " download=True)\n", - "\n", - "# Select one data pair (read data from disk).\n", + " download=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Select one data pair (read data from disk)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ "image, label = train_dataset[0]\n", "print(image.size())\n", - "print(label)\n", - "\n", - "# Data Loader (this provides queue and thread in a very simple way).\n", + "print(label)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Data Loader (this provides queue and thread in a very simple way)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ "train_loader = torch.utils.data.DataLoader(\n", " dataset=train_dataset, batch_size=100, shuffle=True, num_workers=2)\n", "\n", @@ -325,13 +357,19 @@ " return 0" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Then, you can just use prebuilt torch's data loader." + ] + }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "# Then, you can just use prebuilt torch's data loader.\n", "custom_dataset = CustomDataset()\n", "train_loader = torch.utils.data.DataLoader(\n", " dataset=custom_dataset, batch_size=100, shuffle=True, num_workers=2)" @@ -373,24 +411,36 @@ "## Save and load the model" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Save and load the entire model." + ] + }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "# Save and load the entire model.\n", "torch.save(resnet, 'model.pkl')\n", "model = torch.load('model.pkl')" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Save and load only the model parameters(recommended)." + ] + }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "# Save and load only the model parameters(recommended).\n", "torch.save(resnet.state_dict(), 'params.pkl')\n", "resnet.load_state_dict(torch.load('params.pkl'))" ] diff --git a/tutorials/02-intermediate/deep_residual_network/main-gpu.ipynb b/tutorials/02-intermediate/deep_residual_network/main-gpu.ipynb index 235fcef3..07c62895 100644 --- a/tutorials/02-intermediate/deep_residual_network/main-gpu.ipynb +++ b/tutorials/02-intermediate/deep_residual_network/main-gpu.ipynb @@ -21,11 +21,18 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/wanglijun/anaconda3/lib/python3.6/site-packages/torchvision/transforms/transforms.py:188: UserWarning: The use of the transforms.Scale transform is deprecated, please use transforms.Resize instead.\n", + " \"please use transforms.Resize instead.\")\n" + ] + } + ], "source": [ "# Image Preprocessing\n", "transform = transforms.Compose([\n", @@ -33,23 +40,46 @@ " transforms.RandomHorizontalFlip(),\n", " transforms.RandomCrop(32),\n", " transforms.ToTensor()\n", - "])\n", - "\n", + "])" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Using downloaded and verified file: ../../data/cifar-10-python.tar.gz\n" + ] + } + ], + "source": [ "# CIFAR-10 Dataset\n", "train_dataset = dsets.CIFAR10(\n", - " root='./data/', train=True, transform=transform, download=True)\n", + " root='../../data/', train=True, transform=transform, download=True)\n", "\n", "test_dataset = dsets.CIFAR10(\n", - " root='./data/', train=False, transform=transforms.ToTensor())\n", + " root='../../data/', train=False, transform=transforms.ToTensor())\n", "\n", "# Data Loader (Input Pipeline)\n", "train_loader = torch.utils.data.DataLoader(\n", " dataset=train_dataset, batch_size=100, shuffle=True)\n", "\n", "test_loader = torch.utils.data.DataLoader(\n", - " dataset=test_dataset, batch_size=100, shuffle=False)\n", - "\n", - "\n", + " dataset=test_dataset, batch_size=100, shuffle=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ "# 3x3 Convolution\n", "def conv3x3(in_channels, out_channels, stride=1):\n", " return nn.Conv2d(\n", @@ -58,9 +88,17 @@ " kernel_size=3,\n", " stride=stride,\n", " padding=1,\n", - " bias=False)\n", - "\n", - "\n", + " bias=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ "# Residual Block\n", "class ResidualBlock(nn.Module):\n", " def __init__(self, in_channels, out_channels, stride=1, downsample=None):\n", @@ -83,9 +121,17 @@ " residual = self.downsample(x)\n", " out += residual\n", " out = self.relu(out)\n", - " return out\n", - "\n", - "\n", + " return out" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ "# ResNet Module\n", "class ResNet(nn.Module):\n", " def __init__(self, block, layers, num_classes=10):\n", @@ -124,17 +170,165 @@ " out = self.avg_pool(out)\n", " out = out.view(out.size(0), -1)\n", " out = self.fc(out)\n", - " return out\n", - "\n", - "\n", + " return out" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "ResNet(\n", + " (conv): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", + " (bn): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (relu): ReLU(inplace)\n", + " (layer1): Sequential(\n", + " (0): ResidualBlock(\n", + " (conv1): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", + " (bn1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (relu): ReLU(inplace)\n", + " (conv2): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", + " (bn2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " )\n", + " (1): ResidualBlock(\n", + " (conv1): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", + " (bn1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (relu): ReLU(inplace)\n", + " (conv2): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", + " (bn2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " )\n", + " (2): ResidualBlock(\n", + " (conv1): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", + " (bn1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (relu): ReLU(inplace)\n", + " (conv2): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", + " (bn2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " )\n", + " )\n", + " (layer2): Sequential(\n", + " (0): ResidualBlock(\n", + " (conv1): Conv2d(16, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)\n", + " (bn1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (relu): ReLU(inplace)\n", + " (conv2): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", + " (bn2): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (downsample): Sequential(\n", + " (0): Conv2d(16, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)\n", + " (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " )\n", + " )\n", + " (1): ResidualBlock(\n", + " (conv1): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", + " (bn1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (relu): ReLU(inplace)\n", + " (conv2): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", + " (bn2): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " )\n", + " (2): ResidualBlock(\n", + " (conv1): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", + " (bn1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (relu): ReLU(inplace)\n", + " (conv2): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", + " (bn2): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " )\n", + " )\n", + " (layer3): Sequential(\n", + " (0): ResidualBlock(\n", + " (conv1): Conv2d(32, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)\n", + " (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (relu): ReLU(inplace)\n", + " (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", + " (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (downsample): Sequential(\n", + " (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)\n", + " (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " )\n", + " )\n", + " (1): ResidualBlock(\n", + " (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", + " (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (relu): ReLU(inplace)\n", + " (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", + " (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " )\n", + " (2): ResidualBlock(\n", + " (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", + " (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (relu): ReLU(inplace)\n", + " (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", + " (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " )\n", + " )\n", + " (avg_pool): AvgPool2d(kernel_size=8, stride=8, padding=0)\n", + " (fc): Linear(in_features=64, out_features=10, bias=True)\n", + ")" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ "resnet = ResNet(ResidualBlock, [3, 3, 3])\n", - "resnet.cuda()\n", - "\n", + "resnet.cuda()" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ "# Loss and Optimizer\n", "criterion = nn.CrossEntropyLoss()\n", "lr = 0.001\n", - "optimizer = torch.optim.Adam(resnet.parameters(), lr=lr)\n", - "\n", + "optimizer = torch.optim.Adam(resnet.parameters(), lr=lr)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/wanglijun/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py:16: UserWarning: invalid index of a 0-dim tensor. This will be an error in PyTorch 0.5. Use tensor.item() to convert a 0-dim tensor to a Python number\n", + " app.launch_new_instance()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch [1/80], Iter [100/500] Loss: 1.7425\n", + "Epoch [1/80], Iter [200/500] Loss: 1.4576\n", + "Epoch [1/80], Iter [300/500] Loss: 1.2788\n", + "Epoch [1/80], Iter [400/500] Loss: 1.0928\n", + "Epoch [1/80], Iter [500/500] Loss: 1.1920\n", + "Epoch [2/80], Iter [100/500] Loss: 1.0616\n", + "Epoch [2/80], Iter [200/500] Loss: 1.0191\n", + "Epoch [2/80], Iter [300/500] Loss: 0.9947\n", + "Epoch [2/80], Iter [400/500] Loss: 0.9901\n", + "Epoch [2/80], Iter [500/500] Loss: 0.8910\n", + "Epoch [3/80], Iter [100/500] Loss: 0.9540\n", + "Epoch [3/80], Iter [200/500] Loss: 0.8914\n", + "Epoch [3/80], Iter [300/500] Loss: 0.7513\n", + "Epoch [3/80], Iter [400/500] Loss: 0.8767\n", + "Epoch [3/80], Iter [500/500] Loss: 0.7607\n", + "Epoch [4/80], Iter [100/500] Loss: 0.7119\n", + "Epoch [4/80], Iter [200/500] Loss: 0.7928\n" + ] + } + ], + "source": [ "# Training\n", "for epoch in range(80):\n", " for i, (images, labels) in enumerate(train_loader):\n", @@ -150,13 +344,22 @@ "\n", " if (i + 1) % 100 == 0:\n", " print(\"Epoch [%d/%d], Iter [%d/%d] Loss: %.4f\" %\n", - " (epoch + 1, 80, i + 1, 500, loss.data[0]))\n", + " (epoch + 1, 80, i + 1, 500, loss.item()))\n", "\n", " # Decaying Learning Rate\n", " if (epoch + 1) % 20 == 0:\n", " lr /= 3\n", - " optimizer = torch.optim.Adam(resnet.parameters(), lr=lr)\n", - "\n", + " optimizer = torch.optim.Adam(resnet.parameters(), lr=lr)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ "# Test\n", "correct = 0\n", "total = 0\n", @@ -168,8 +371,17 @@ " correct += (predicted.cpu() == labels).sum()\n", "\n", "print('Accuracy of the model on the test images: %d %%' %\n", - " (100 * correct / total))\n", - "\n", + " (100 * correct / total))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ "# Save the Model\n", "torch.save(resnet.state_dict(), 'resnet.pkl')" ] diff --git a/tutorials/02-intermediate/generative_adversarial_network/main.ipynb b/tutorials/02-intermediate/generative_adversarial_network/main.ipynb new file mode 100644 index 00000000..ce721875 --- /dev/null +++ b/tutorials/02-intermediate/generative_adversarial_network/main.ipynb @@ -0,0 +1,337 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "import torch\n", + "import torchvision\n", + "import torch.nn as nn\n", + "import torch.nn.functional as F\n", + "from torchvision import datasets \n", + "from torchvision import transforms\n", + "from torchvision.utils import save_image\n", + "from torch.autograd import Variable" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "def to_var(x):\n", + " if torch.cuda.is_available():\n", + " x = x.cuda()\n", + " return Variable(x)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "def denorm(x):\n", + " out = (x + 1) / 2\n", + " return out.clamp(0, 1)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "# Image processing\n", + "transform = transforms.Compose([\n", + " transforms.ToTensor(),\n", + " transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))\n", + "])" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "# MNIST dataset\n", + "mnist = datasets.MNIST(\n", + " root='../../data/', train=True, transform=transform, download=True)\n", + "# Data loader\n", + "data_loader = torch.utils.data.DataLoader(\n", + " dataset=mnist, batch_size=100, shuffle=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "# Discriminator\n", + "D = nn.Sequential(\n", + " nn.Linear(784, 256), nn.LeakyReLU(0.2), nn.Linear(256, 256),\n", + " nn.LeakyReLU(0.2), nn.Linear(256, 1), nn.Sigmoid())\n", + "\n", + "# Generator\n", + "G = nn.Sequential(\n", + " nn.Linear(64, 256), nn.LeakyReLU(0.2), nn.Linear(256, 256),\n", + " nn.LeakyReLU(0.2), nn.Linear(256, 784), nn.Tanh())" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "if torch.cuda.is_available():\n", + " D.cuda()\n", + " G.cuda()" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "# Binary cross entropy loss and optimizer\n", + "criterion = nn.BCELoss()\n", + "d_optimizer = torch.optim.Adam(D.parameters(), lr=0.0003)\n", + "g_optimizer = torch.optim.Adam(G.parameters(), lr=0.0003)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/wanglijun/anaconda3/lib/python3.6/site-packages/torch/nn/functional.py:1474: UserWarning: Using a target size (torch.Size([100])) that is different to the input size (torch.Size([100, 1])) is deprecated. Please ensure they have the same size.\n", + " \"Please ensure they have the same size.\".format(target.size(), input.size()))\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch [0/200], Step[300/600], d_loss: 0.5309, g_loss: 2.6516, D(x): 0.79, D(G(z)): 0.19\n", + "Epoch [0/200], Step[600/600], d_loss: 2.2574, g_loss: 0.8113, D(x): 0.43, D(G(z)): 0.45\n", + "Epoch [1/200], Step[300/600], d_loss: 1.6688, g_loss: 1.4659, D(x): 0.65, D(G(z)): 0.46\n", + "Epoch [1/200], Step[600/600], d_loss: 0.1316, g_loss: 4.3763, D(x): 0.96, D(G(z)): 0.07\n", + "Epoch [2/200], Step[300/600], d_loss: 0.6292, g_loss: 2.2588, D(x): 0.75, D(G(z)): 0.17\n", + "Epoch [2/200], Step[600/600], d_loss: 0.7988, g_loss: 1.7797, D(x): 0.76, D(G(z)): 0.32\n", + "Epoch [3/200], Step[300/600], d_loss: 0.7584, g_loss: 2.7641, D(x): 0.80, D(G(z)): 0.29\n", + "Epoch [3/200], Step[600/600], d_loss: 1.4283, g_loss: 1.3157, D(x): 0.64, D(G(z)): 0.44\n", + "Epoch [4/200], Step[300/600], d_loss: 0.3495, g_loss: 3.2987, D(x): 0.90, D(G(z)): 0.12\n", + "Epoch [4/200], Step[600/600], d_loss: 1.2803, g_loss: 1.1740, D(x): 0.59, D(G(z)): 0.40\n", + "Epoch [5/200], Step[300/600], d_loss: 1.1722, g_loss: 1.9675, D(x): 0.57, D(G(z)): 0.22\n", + "Epoch [5/200], Step[600/600], d_loss: 1.7974, g_loss: 0.9628, D(x): 0.51, D(G(z)): 0.57\n", + "Epoch [6/200], Step[300/600], d_loss: 1.3530, g_loss: 1.2004, D(x): 0.59, D(G(z)): 0.50\n", + "Epoch [6/200], Step[600/600], d_loss: 0.8224, g_loss: 1.9385, D(x): 0.70, D(G(z)): 0.27\n", + "Epoch [7/200], Step[300/600], d_loss: 0.4865, g_loss: 2.4545, D(x): 0.88, D(G(z)): 0.26\n", + "Epoch [7/200], Step[600/600], d_loss: 1.8352, g_loss: 1.4978, D(x): 0.63, D(G(z)): 0.58\n", + "Epoch [8/200], Step[300/600], d_loss: 0.9291, g_loss: 1.6059, D(x): 0.71, D(G(z)): 0.29\n", + "Epoch [8/200], Step[600/600], d_loss: 1.7051, g_loss: 1.4000, D(x): 0.48, D(G(z)): 0.28\n", + "Epoch [9/200], Step[300/600], d_loss: 0.5494, g_loss: 2.4653, D(x): 0.80, D(G(z)): 0.17\n", + "Epoch [9/200], Step[600/600], d_loss: 1.0661, g_loss: 1.9460, D(x): 0.67, D(G(z)): 0.26\n", + "Epoch [10/200], Step[300/600], d_loss: 0.7726, g_loss: 2.4095, D(x): 0.84, D(G(z)): 0.36\n", + "Epoch [10/200], Step[600/600], d_loss: 0.4196, g_loss: 3.6294, D(x): 0.84, D(G(z)): 0.10\n", + "Epoch [11/200], Step[300/600], d_loss: 0.4784, g_loss: 2.5315, D(x): 0.82, D(G(z)): 0.15\n", + "Epoch [11/200], Step[600/600], d_loss: 0.4335, g_loss: 2.4839, D(x): 0.79, D(G(z)): 0.13\n", + "Epoch [12/200], Step[300/600], d_loss: 0.7401, g_loss: 2.0420, D(x): 0.72, D(G(z)): 0.15\n", + "Epoch [12/200], Step[600/600], d_loss: 0.5065, g_loss: 2.0637, D(x): 0.78, D(G(z)): 0.13\n", + "Epoch [13/200], Step[300/600], d_loss: 0.6011, g_loss: 2.5222, D(x): 0.78, D(G(z)): 0.20\n", + "Epoch [13/200], Step[600/600], d_loss: 1.7426, g_loss: 1.1689, D(x): 0.49, D(G(z)): 0.36\n", + "Epoch [14/200], Step[300/600], d_loss: 0.8783, g_loss: 1.7997, D(x): 0.81, D(G(z)): 0.39\n", + "Epoch [14/200], Step[600/600], d_loss: 0.8017, g_loss: 1.8073, D(x): 0.72, D(G(z)): 0.29\n", + "Epoch [15/200], Step[300/600], d_loss: 0.7838, g_loss: 2.8603, D(x): 0.70, D(G(z)): 0.18\n", + "Epoch [15/200], Step[600/600], d_loss: 1.7985, g_loss: 1.7794, D(x): 0.55, D(G(z)): 0.49\n", + "Epoch [16/200], Step[300/600], d_loss: 0.8829, g_loss: 1.2817, D(x): 0.72, D(G(z)): 0.32\n", + "Epoch [16/200], Step[600/600], d_loss: 1.0457, g_loss: 2.0245, D(x): 0.74, D(G(z)): 0.31\n", + "Epoch [17/200], Step[300/600], d_loss: 0.8169, g_loss: 3.1381, D(x): 0.71, D(G(z)): 0.20\n", + "Epoch [17/200], Step[600/600], d_loss: 0.8164, g_loss: 2.1566, D(x): 0.74, D(G(z)): 0.27\n", + "Epoch [18/200], Step[300/600], d_loss: 1.0738, g_loss: 2.7594, D(x): 0.68, D(G(z)): 0.25\n", + "Epoch [18/200], Step[600/600], d_loss: 0.7713, g_loss: 2.0212, D(x): 0.78, D(G(z)): 0.29\n", + "Epoch [19/200], Step[300/600], d_loss: 1.7861, g_loss: 1.1829, D(x): 0.56, D(G(z)): 0.46\n", + "Epoch [19/200], Step[600/600], d_loss: 0.6501, g_loss: 2.0299, D(x): 0.73, D(G(z)): 0.17\n", + "Epoch [20/200], Step[300/600], d_loss: 1.0020, g_loss: 2.9095, D(x): 0.69, D(G(z)): 0.21\n", + "Epoch [20/200], Step[600/600], d_loss: 0.7252, g_loss: 2.6028, D(x): 0.84, D(G(z)): 0.32\n", + "Epoch [21/200], Step[300/600], d_loss: 0.6441, g_loss: 1.8729, D(x): 0.82, D(G(z)): 0.26\n", + "Epoch [21/200], Step[600/600], d_loss: 0.9662, g_loss: 1.4863, D(x): 0.76, D(G(z)): 0.35\n", + "Epoch [22/200], Step[300/600], d_loss: 0.8941, g_loss: 1.3556, D(x): 0.73, D(G(z)): 0.27\n", + "Epoch [22/200], Step[600/600], d_loss: 0.6727, g_loss: 2.4101, D(x): 0.77, D(G(z)): 0.23\n", + "Epoch [23/200], Step[300/600], d_loss: 0.7678, g_loss: 1.7464, D(x): 0.77, D(G(z)): 0.30\n", + "Epoch [23/200], Step[600/600], d_loss: 0.8240, g_loss: 1.6250, D(x): 0.79, D(G(z)): 0.36\n", + "Epoch [24/200], Step[300/600], d_loss: 0.9504, g_loss: 1.9247, D(x): 0.68, D(G(z)): 0.28\n", + "Epoch [24/200], Step[600/600], d_loss: 0.7695, g_loss: 1.7126, D(x): 0.70, D(G(z)): 0.23\n", + "Epoch [25/200], Step[300/600], d_loss: 0.7339, g_loss: 1.7120, D(x): 0.71, D(G(z)): 0.23\n", + "Epoch [25/200], Step[600/600], d_loss: 0.9323, g_loss: 2.0295, D(x): 0.73, D(G(z)): 0.31\n", + "Epoch [26/200], Step[300/600], d_loss: 0.6988, g_loss: 2.3213, D(x): 0.80, D(G(z)): 0.25\n", + "Epoch [26/200], Step[600/600], d_loss: 0.6962, g_loss: 1.9388, D(x): 0.82, D(G(z)): 0.30\n", + "Epoch [27/200], Step[300/600], d_loss: 0.7207, g_loss: 2.4931, D(x): 0.77, D(G(z)): 0.26\n", + "Epoch [27/200], Step[600/600], d_loss: 0.7408, g_loss: 2.1665, D(x): 0.73, D(G(z)): 0.22\n", + "Epoch [28/200], Step[300/600], d_loss: 0.9663, g_loss: 2.0794, D(x): 0.68, D(G(z)): 0.25\n", + "Epoch [28/200], Step[600/600], d_loss: 0.8515, g_loss: 1.7461, D(x): 0.71, D(G(z)): 0.24\n", + "Epoch [29/200], Step[300/600], d_loss: 0.9394, g_loss: 1.8435, D(x): 0.66, D(G(z)): 0.24\n", + "Epoch [29/200], Step[600/600], d_loss: 0.9079, g_loss: 1.7545, D(x): 0.70, D(G(z)): 0.31\n", + "Epoch [30/200], Step[300/600], d_loss: 0.8705, g_loss: 1.5295, D(x): 0.76, D(G(z)): 0.30\n", + "Epoch [30/200], Step[600/600], d_loss: 0.9223, g_loss: 1.5076, D(x): 0.73, D(G(z)): 0.31\n", + "Epoch [31/200], Step[300/600], d_loss: 0.8254, g_loss: 1.9665, D(x): 0.73, D(G(z)): 0.23\n", + "Epoch [31/200], Step[600/600], d_loss: 0.5945, g_loss: 2.2866, D(x): 0.81, D(G(z)): 0.21\n", + "Epoch [32/200], Step[300/600], d_loss: 0.8853, g_loss: 1.8050, D(x): 0.78, D(G(z)): 0.31\n", + "Epoch [32/200], Step[600/600], d_loss: 1.0401, g_loss: 2.0798, D(x): 0.72, D(G(z)): 0.31\n", + "Epoch [33/200], Step[300/600], d_loss: 0.8667, g_loss: 2.6323, D(x): 0.68, D(G(z)): 0.18\n", + "Epoch [33/200], Step[600/600], d_loss: 0.8556, g_loss: 1.6299, D(x): 0.68, D(G(z)): 0.28\n", + "Epoch [34/200], Step[300/600], d_loss: 0.9350, g_loss: 1.7595, D(x): 0.75, D(G(z)): 0.35\n", + "Epoch [34/200], Step[600/600], d_loss: 0.9461, g_loss: 1.7809, D(x): 0.72, D(G(z)): 0.32\n", + "Epoch [35/200], Step[300/600], d_loss: 0.8312, g_loss: 1.8604, D(x): 0.70, D(G(z)): 0.26\n", + "Epoch [35/200], Step[600/600], d_loss: 0.9078, g_loss: 1.9198, D(x): 0.79, D(G(z)): 0.38\n", + "Epoch [36/200], Step[300/600], d_loss: 0.9402, g_loss: 1.6005, D(x): 0.77, D(G(z)): 0.39\n", + "Epoch [36/200], Step[600/600], d_loss: 1.0273, g_loss: 1.8539, D(x): 0.67, D(G(z)): 0.34\n", + "Epoch [37/200], Step[300/600], d_loss: 0.9818, g_loss: 2.2176, D(x): 0.68, D(G(z)): 0.28\n" + ] + } + ], + "source": [ + "# Start training\n", + "for epoch in range(200):\n", + " for i, (images, _) in enumerate(data_loader):\n", + " # Build mini-batch dataset\n", + " batch_size = images.size(0)\n", + " images = to_var(images.view(batch_size, -1))\n", + "\n", + " # Create the labels which are later used as input for the BCE loss\n", + " real_labels = to_var(torch.ones(batch_size))\n", + " fake_labels = to_var(torch.zeros(batch_size))\n", + "\n", + " #============= Train the discriminator =============#\n", + " # Compute BCE_Loss using real images where BCE_Loss(x, y): - y * log(D(x)) - (1-y) * log(1 - D(x))\n", + " # Second term of the loss is always zero since real_labels == 1\n", + " outputs = D(images)\n", + " d_loss_real = criterion(outputs, real_labels)\n", + " real_score = outputs\n", + "\n", + " # Compute BCELoss using fake images\n", + " # First term of the loss is always zero since fake_labels == 0\n", + " z = to_var(torch.randn(batch_size, 64))\n", + " fake_images = G(z)\n", + " outputs = D(fake_images)\n", + " d_loss_fake = criterion(outputs, fake_labels)\n", + " fake_score = outputs\n", + "\n", + " # Backprop + Optimize\n", + " d_loss = d_loss_real + d_loss_fake\n", + " D.zero_grad()\n", + " d_loss.backward()\n", + " d_optimizer.step()\n", + "\n", + " #=============== Train the generator ===============#\n", + " # Compute loss with fake images\n", + " z = to_var(torch.randn(batch_size, 64))\n", + " fake_images = G(z)\n", + " outputs = D(fake_images)\n", + "\n", + " # We train G to maximize log(D(G(z)) instead of minimizing log(1-D(G(z)))\n", + " # For the reason, see the last paragraph of section 3. https://arxiv.org/pdf/1406.2661.pdf\n", + " g_loss = criterion(outputs, real_labels)\n", + "\n", + " # Backprop + Optimize\n", + " D.zero_grad()\n", + " G.zero_grad()\n", + " g_loss.backward()\n", + " g_optimizer.step()\n", + "\n", + " if (i + 1) % 300 == 0:\n", + " print('Epoch [%d/%d], Step[%d/%d], d_loss: %.4f, '\n", + " 'g_loss: %.4f, D(x): %.2f, D(G(z)): %.2f' %\n", + " (epoch, 200, i + 1, 600, d_loss.item(), g_loss.item(),\n", + " real_score.data.mean(), fake_score.data.mean()))\n", + "\n", + " # Save real images\n", + " if (epoch + 1) == 1:\n", + " images = images.view(images.size(0), 1, 28, 28)\n", + " save_image(denorm(images.data), './data/real_images.png')\n", + "\n", + " # Save sampled images\n", + " fake_images = fake_images.view(fake_images.size(0), 1, 28, 28)\n", + " save_image(\n", + " denorm(fake_images.data), './data/fake_images-%d.png' % (epoch + 1))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Save the trained parameters" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "torch.save(G.state_dict(), './generator.pkl')\n", + "torch.save(D.state_dict(), './discriminator.pkl')" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.3" + }, + "toc": { + "nav_menu": {}, + "number_sections": true, + "sideBar": true, + "skip_h1_title": false, + "toc_cell": false, + "toc_position": {}, + "toc_section_display": "block", + "toc_window_display": false + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}