kdnet finished

fxia22 · fxia22 · commit 62c521d883e1 · 2017-05-31T16:33:40.000-07:00
diff --git a/playground.ipynb b/playground.ipynb
@@ -35,6 +35,28 @@
     "d = PartDataset(root = '../unsupervised3d/shapenetcore_partanno_segmentation_benchmark_v0', classification = True)"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 151,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "16"
+      ]
+     },
+     "execution_count": 151,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "len(d.classes)"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 3,
@@ -60,7 +82,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 95,
+   "execution_count": 5,
    "metadata": {
     "collapsed": false
    },
@@ -95,7 +117,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 145,
+   "execution_count": 34,
    "metadata": {
     "collapsed": false
    },
@@ -104,8 +126,8 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "CPU times: user 1.23 s, sys: 12 ms, total: 1.24 s\n",
-      "Wall time: 1.24 s\n"
+      "CPU times: user 1.28 s, sys: 0 ns, total: 1.28 s\n",
+      "Wall time: 1.28 s\n"
      ]
     }
    ],
@@ -124,7 +146,7 @@
     "            tree[level+1].append(right_ps)\n",
     "            cutdim[level].append(dim)  \n",
     "            cutdim[level].append(dim)  \n",
-    "    cutdim = [Variable(torch.from_numpy(np.array(item).astype(np.int64))) for item in cutdim]\n",
+    "    cutdim = [(torch.from_numpy(np.array(item).astype(np.int64))) for item in cutdim]\n",
     "    points = torch.stack(tree[-1])\n",
     "    \n",
     "    \n",
@@ -133,33 +155,62 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 165,
+   "execution_count": 174,
    "metadata": {
     "collapsed": false
    },
    "outputs": [],
    "source": [
     "class KDNet(nn.Module):\n",
-    "    def __init__(self):\n",
+    "    def __init__(self, k = 16):\n",
     "        super(KDNet, self).__init__()\n",
     "        self.conv1 = nn.Conv1d(3,8 * 3,1,1)\n",
-    "        \n",
+    "        self.conv2 = nn.Conv1d(8,32 * 3,1,1)\n",
+    "        self.conv3 = nn.Conv1d(32,64 * 3,1,1)\n",
+    "        self.conv4 = nn.Conv1d(64,64 * 3,1,1)\n",
+    "        self.conv5 = nn.Conv1d(64,64 * 3,1,1)\n",
+    "        self.conv6 = nn.Conv1d(64,128 * 3,1,1)\n",
+    "        self.conv7 = nn.Conv1d(128,256 * 3,1,1)\n",
+    "        self.conv8 = nn.Conv1d(256,512 * 3,1,1)\n",
+    "        self.conv9 = nn.Conv1d(512,512 * 3,1,1)\n",
+    "        self.conv10 = nn.Conv1d(512,512 * 3,1,1)\n",
+    "        self.conv11 = nn.Conv1d(512,1024 * 3,1,1)      \n",
+    "        self.fc = nn.Linear(1024, k)\n",
     "\n",
     "    def forward(self, x, c):\n",
-    "        x1 = self.conv1(x)\n",
-    "        #x1 = x1.view(-1, 3, 8, 2048)\n",
-    "        #sel = c[-1]\n",
-    "        \n",
-    "        #x1 = torch.index_select(x1, dim = 1, index = sel)\n",
+    "        def kdconv(x, dim, featdim, sel, conv):\n",
+    "            x =  F.relu(conv(x))\n",
+    "            x = x.view(-1, featdim, 3, dim)\n",
+    "            x = x.view(-1, featdim, 3 * dim)\n",
+    "            sel = Variable(sel + (torch.arange(0,dim) * 3).long())\n",
+    "            if x.is_cuda:\n",
+    "                sel = sel.cuda()     \n",
+    "            x = torch.index_select(x, dim = 2, index = sel)\n",
+    "            x = x.view(-1, featdim, dim/2, 2)\n",
+    "            x = torch.squeeze(torch.max(x, dim = -1)[0], 3)\n",
+    "            return x      \n",
     "        \n",
-    "        return x1\n",
+    "        x1 = kdconv(x, 2048, 8, c[-1], self.conv1)\n",
+    "        x2 = kdconv(x1, 1024, 32, c[-2], self.conv2)\n",
+    "        x3 = kdconv(x2, 512, 64, c[-3], self.conv3)\n",
+    "        x4 = kdconv(x3, 256, 64, c[-4], self.conv4)\n",
+    "        x5 = kdconv(x4, 128, 64, c[-5], self.conv5)\n",
+    "        x6 = kdconv(x5, 64, 128, c[-6], self.conv6)\n",
+    "        x7 = kdconv(x6, 32, 256, c[-7], self.conv7)\n",
+    "        x8 = kdconv(x7, 16, 512, c[-8], self.conv8)\n",
+    "        x9 = kdconv(x8, 8, 512, c[-9], self.conv9)\n",
+    "        x10 = kdconv(x9, 4, 512, c[-10], self.conv10)\n",
+    "        x11 = kdconv(x10, 2, 1024, c[-11], self.conv11)\n",
+    "        x11 = x11.view(-1,1024)\n",
+    "        out = F.log_softmax(self.fc(x11))\n",
+    "        return out\n",
     "        \n",
     "net = KDNet()"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 166,
+   "execution_count": 175,
    "metadata": {
     "collapsed": false
    },
@@ -170,42 +221,59 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 176,
    "metadata": {
-    "collapsed": true
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "torch.Size([1, 3, 2048])"
+      ]
+     },
+     "execution_count": 176,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "points_v.size()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 177,
+   "metadata": {
+    "collapsed": false
    },
    "outputs": [],
-   "source": []
+   "source": [
+    "torch.sum(x).backward()"
+   ]
   },
   {
    "cell_type": "code",
-   "execution_count": 167,
+   "execution_count": 178,
    "metadata": {
     "collapsed": false
    },
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "Variable containing:\n",
-       "( 0  ,.,.) = \n",
-       " -1.0363e+00 -1.0316e+00 -1.0316e+00  ...  -1.3211e-02 -1.3213e-02 -1.3207e-02\n",
-       "  3.9648e-01  3.9941e-01  3.9941e-01  ...  -7.5476e-01 -7.5476e-01 -7.5477e-01\n",
-       "  3.3743e-01  3.2779e-01  3.2779e-01  ...  -2.3070e-01 -2.3070e-01 -2.3070e-01\n",
-       "                 ...                   ⋱                   ...                \n",
-       " -8.4430e-01 -8.3342e-01 -8.3342e-01  ...  -1.9406e-01 -1.9406e-01 -1.9405e-01\n",
-       "  2.6654e-02  2.4352e-02  2.4351e-02  ...   1.4608e-01  1.4608e-01  1.4608e-01\n",
-       " -7.8591e-01 -7.9823e-01 -7.9823e-01  ...   2.9584e-02  2.9581e-02  2.9582e-02\n",
-       "[torch.FloatTensor of size 1x24x2048]"
+       "\n",
+       " 0\n",
+       "[torch.LongTensor of size 1]"
       ]
      },
-     "execution_count": 167,
+     "execution_count": 178,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "x"
+    "class_label"
    ]
   },
   {
diff --git a/train.py b/train.py
@@ -0,0 +1,124 @@
+from datasets import PartDataset
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torch.autograd import Variable
+import torch.optim as optim
+
+
+class KDNet(nn.Module):
+    def __init__(self, k = 16):
+        super(KDNet, self).__init__()
+        self.conv1 = nn.Conv1d(3,8 * 3,1,1)
+        self.conv2 = nn.Conv1d(8,32 * 3,1,1)
+        self.conv3 = nn.Conv1d(32,64 * 3,1,1)
+        self.conv4 = nn.Conv1d(64,64 * 3,1,1)
+        self.conv5 = nn.Conv1d(64,64 * 3,1,1)
+        self.conv6 = nn.Conv1d(64,128 * 3,1,1)
+        self.conv7 = nn.Conv1d(128,256 * 3,1,1)
+        self.conv8 = nn.Conv1d(256,512 * 3,1,1)
+        self.conv9 = nn.Conv1d(512,512 * 3,1,1)
+        self.conv10 = nn.Conv1d(512,512 * 3,1,1)
+        self.conv11 = nn.Conv1d(512,1024 * 3,1,1)      
+        self.fc = nn.Linear(1024, k)
+
+    def forward(self, x, c):
+        def kdconv(x, dim, featdim, sel, conv):
+            x =  F.relu(conv(x))
+            x = x.view(-1, featdim, 3, dim)
+            x = x.view(-1, featdim, 3 * dim)
+            sel = Variable(sel + (torch.arange(0,dim) * 3).long())
+            if x.is_cuda:
+                sel = sel.cuda()     
+            x = torch.index_select(x, dim = 2, index = sel)
+            x = x.view(-1, featdim, dim/2, 2)
+            x = torch.squeeze(torch.max(x, dim = -1)[0], 3)
+            return x      
+        
+        x1 = kdconv(x, 2048, 8, c[-1], self.conv1)
+        x2 = kdconv(x1, 1024, 32, c[-2], self.conv2)
+        x3 = kdconv(x2, 512, 64, c[-3], self.conv3)
+        x4 = kdconv(x3, 256, 64, c[-4], self.conv4)
+        x5 = kdconv(x4, 128, 64, c[-5], self.conv5)
+        x6 = kdconv(x5, 64, 128, c[-6], self.conv6)
+        x7 = kdconv(x6, 32, 256, c[-7], self.conv7)
+        x8 = kdconv(x7, 16, 512, c[-8], self.conv8)
+        x9 = kdconv(x8, 8, 512, c[-9], self.conv9)
+        x10 = kdconv(x9, 4, 512, c[-10], self.conv10)
+        x11 = kdconv(x10, 2, 1024, c[-11], self.conv11)
+        x11 = x11.view(-1,1024)
+        out = F.log_softmax(self.fc(x11))
+        return out
+    
+def split_ps(point_set):
+    #print point_set.size()
+    num_points = point_set.size()[0]/2
+    diff = point_set.max(dim=0)[0] - point_set.min(dim=0)[0] 
+    dim = torch.max(diff, dim = 1)[1][0,0]
+    cut = torch.median(point_set[:,dim])[0][0]  
+    left_idx = torch.squeeze(torch.nonzero(point_set[:,dim] > cut))
+    right_idx = torch.squeeze(torch.nonzero(point_set[:,dim] < cut))
+    middle_idx = torch.squeeze(torch.nonzero(point_set[:,dim] == cut))
+    
+    #if torch.numel(left_idx) > 0:
+    #    left_idx = left_idx[:,0]
+    #if torch.numel(right_idx) > 0:
+    #    right_idx = right_idx[:,0]
+    #if torch.numel(middle_idx) > 0:
+    #    middle_idx = middle_idx[:,0] 
+    
+    if torch.numel(left_idx) < num_points:
+        left_idx = torch.cat([left_idx, middle_idx[0:1].repeat(num_points - torch.numel(left_idx))], 0)
+    if torch.numel(right_idx) < num_points:
+        right_idx = torch.cat([right_idx, middle_idx[0:1].repeat(num_points - torch.numel(right_idx))], 0)
+    
+    left_ps = torch.index_select(point_set, dim = 0, index = left_idx)
+    right_ps = torch.index_select(point_set, dim = 0, index = right_idx)
+    return left_ps, right_ps, dim 
+
+
+
+
+d = PartDataset(root = '../unsupervised3d/shapenetcore_partanno_segmentation_benchmark_v0', classification = True)
+
+print(len(d.classes))
+
+levels = (np.log(2048)/np.log(2)).astype(int)
+
+cutdim = torch.zeros((levels)).long()
+
+net = KDNet()
+optimizer = optim.SGD(net.parameters(), lr=0.01, momentum=0.9)
+
+
+for i in range(1000):
+    
+    point_set, class_label = d[i]
+    target = Variable(class_label)
+    tree = [[] for i in range(levels + 1)]
+    cutdim = [[] for i in range(levels)]
+    tree[0].append(point_set)
+    for level in range(levels):
+        for item in tree[level]:
+            left_ps, right_ps, dim = split_ps(item)
+            tree[level+1].append(left_ps)
+            tree[level+1].append(right_ps)
+            cutdim[level].append(dim)  
+            cutdim[level].append(dim)  
+    cutdim = [(torch.from_numpy(np.array(item).astype(np.int64))) for item in cutdim]
+    points = torch.stack(tree[-1])
+    
+    
+    points_v = Variable(torch.unsqueeze(torch.squeeze(points), 0)).transpose(2,1)
+
+
+    optimizer.zero_grad()
+    pred = net(points_v, cutdim)
+
+    loss = F.nll_loss(pred, target)
+    loss.backward()
+    optimizer.step()
+    
+    print(loss)
+