square images before converting them to tensors to prevent gpu memory leak issue

justadudewhohacks · justadudewhohacks · commit 896371af2f5f · 2018-08-14T09:17:36.000+02:00
diff --git a/tools/train/tinyYolov2/train.html b/tools/train/tinyYolov2/train.html
@@ -24,11 +24,13 @@
 
     const weightsUrl = `/tmp/tmp__224_${startIdx224}__320_${startIdx320}__416_${startIdx416}__608_${startIdx608}.weights`
 
-    //const weightsUrl = '/tmp/tmp_2_count_41000.weights'
     const fromEpoch = 0
 
     const trainOnlyMultibox = false
 
+    const trainSizes = [160, 224, 320, 416]
+    //const trainSizes = [608]
+
     window.debug = false
     window.logTrainSteps = true
 
@@ -38,8 +40,8 @@
     window.noObjectScale = 1
     window.coordScale = 1
 
-    const rescaleEveryNthBatch = Infinity
-    window.saveEveryNthDataIdx = 100
+    const rescaleEveryNthBatch = 100
+    window.saveEveryNthDataIdx = trainSizes.length * rescaleEveryNthBatch
     window.trainSteps = 4000
     //window.optimizer = tf.train.sgd(0.001)
     window.optimizer = tf.train.adam(0.001, 0.9, 0.999, 1e-8)
@@ -78,26 +80,23 @@
       console.log('ready')
     }
 
-    //const trainSizes = [224, 320, 416]
-    const trainSizes = [608]
-
     function logLossChange(lossType) {
       const { currentLoss, prevLoss, detectionFilenames } = window
       log(`${lossType} : ${faceapi.round(currentLoss[lossType])} (avg: ${faceapi.round(currentLoss[lossType] / detectionFilenames.length)}) (delta: ${currentLoss[lossType] - prevLoss[lossType]})`)
     }
 
     window.count = 0
 
-    function _onBatchProcessed(dataIdx, inputSize) {
+    function onBatchProcessed(dataIdx, inputSize) {
       window.count++
       const idx = (dataIdx + 1) + (window.epoch * window.detectionFilenames.length)
       console.log('dataIdx', dataIdx)
       if ((window.count % saveEveryNthDataIdx) === 0) {
-        saveWeights(window.net, `tmp_2_count_${window.count}.weights`)
+        saveWeights(window.net, `tmp_multiscale_count_${window.count}.weights`)
       }
     }
 
-    function onBatchProcessed(dataIdx, inputSize) {
+    function _onBatchProcessed(dataIdx, inputSize) {
       const idx = (dataIdx + 1) + (window.epoch * window.detectionFilenames.length)
       console.log('idx', idx)
       if ((idx % saveEveryNthDataIdx) === 0) {
diff --git a/tools/train/tinyYolov2/train.js b/tools/train/tinyYolov2/train.js
@@ -1,6 +1,6 @@
 const batchIdx = 0
 
-function minimize(groundTruthBoxes, batchInput, inputSize, batch) {
+function minimize(groundTruthBoxes, batchInput, inputSize, batch, { reshapedImgDims, paddings }) {
   const filename = batch.filenames[batchIdx]
   const { dataIdx } = batch
 
@@ -16,8 +16,8 @@ function minimize(groundTruthBoxes, batchInput, inputSize, batch) {
     } = computeLoss(
       outTensor,
       groundTruthBoxes,
-      batchInput.getReshapedInputDimensions(batchIdx),
-      batchInput.getRelativePaddings(batchIdx)
+      reshapedImgDims,
+      paddings
     )
 
     const losses = {
@@ -47,6 +47,35 @@ function minimize(groundTruthBoxes, batchInput, inputSize, batch) {
   }, true)
 }
 
+function imageToSquare(img) {
+  const scale = 608 / Math.max(img.height, img.width)
+  const width = scale * img.width
+  const height = scale * img.height
+
+  const canvas1 = faceapi.createCanvasFromMedia(img)
+  const targetCanvas = faceapi.createCanvas({ width: 608, height: 608 })
+  targetCanvas.getContext('2d').putImageData(canvas1.getContext('2d').getImageData(0, 0, width, height), 0, 0)
+  return targetCanvas
+}
+
+function getPaddingsAndReshapedSize(img, inputSize) {
+  const [h, w] = [img.height, img.width]
+  const maxDim = Math.max(h, w)
+
+  const f = inputSize / maxDim
+  const reshapedImgDims = {
+    height: Math.floor(h * f),
+    width: Math.floor(w * f)
+  }
+
+  const paddings = new faceapi.Point(
+    maxDim / img.width,
+    maxDim / img.height
+  )
+
+  return { paddings, reshapedImgDims }
+}
+
 async function trainStep(batchCreators, inputSizes, rescaleEveryNthBatch, onBatchProcessed = () => {}) {
 
   async function step(currentBatchCreators) {
@@ -61,7 +90,11 @@ async function trainStep(batchCreators, inputSizes, rescaleEveryNthBatch, onBatc
         const batch = await batchCreator()
         const { imgs, groundTruthBoxes, filenames, dataIdx } = batch
 
-        const batchInput = await faceapi.toNetInput(imgs)
+        const img = imgs[0]
+        const { reshapedImgDims, paddings } = getPaddingsAndReshapedSize(img, inputSize)
+        const squareImg = imageToSquare(img)
+
+        const batchInput = await faceapi.toNetInput(squareImg)
 
         const [imgHeight, imgWidth] = batchInput.inputs[batchIdx].shape
 
@@ -90,7 +123,8 @@ async function trainStep(batchCreators, inputSizes, rescaleEveryNthBatch, onBatc
         }
 
         let ts = Date.now()
-        const loss = minimize(filteredGroundTruthBoxes, batchInput, inputSize, batch)
+        const loss = minimize(filteredGroundTruthBoxes, batchInput, inputSize, batch, { reshapedImgDims, paddings })
+
         ts = Date.now() - ts
         if (window.logTrainSteps) {
           log(`trainStep time for dataIdx ${dataIdx} (${inputSize}): ${ts} ms`)
diff --git a/tools/train/tinyYolov2/verify.html b/tools/train/tinyYolov2/verify.html
@@ -144,7 +144,8 @@
       const startIdx416 = 31050
       const startIdx608 = 16520
 
-      const weightsUrl = `/tmp/tmp__224_${startIdx224}__320_${startIdx320}__416_${startIdx416}__608_${startIdx608}.weights`
+      //const weightsUrl = `/tmp/tmp__224_${startIdx224}__320_${startIdx320}__416_${startIdx416}__608_${startIdx608}.weights`
+      const weightsUrl = `/tmp/overfit_count_1500.weights`
 
       const weights = await loadNetWeights(weightsUrl)
       window.net = new faceapi.TinyYolov2(true)