implemented yolo loss functions + mean subtraction for retrained model

justadudewhohacks · justadudewhohacks · commit ca47cbbdffc5 · 2018-08-04T19:39:37.000+02:00
diff --git a/src/NetInput.ts b/src/NetInput.ts
@@ -3,7 +3,7 @@ import * as tf from '@tensorflow/tfjs-core';
 import { isTensor3D, isTensor4D } from './commons/isTensor';
 import { padToSquare } from './padToSquare';
 import { Point } from './Point';
-import { TResolvedNetInput } from './types';
+import { TResolvedNetInput, Dimensions } from './types';
 import { createCanvasFromMedia } from './utils';
 
 export class NetInput {
@@ -14,6 +14,7 @@ export class NetInput {
 
   private _inputDimensions: number[][] = []
   private _paddings: Point[] = []
+  private _inputSize: number = 0
 
   constructor(
     inputs: tf.Tensor4D | Array<TResolvedNetInput>,
@@ -81,6 +82,10 @@ export class NetInput {
     return this._paddings
   }
 
+  public get inputSize(): number {
+    return this._inputSize
+  }
+
   public getInputDimensions(batchIdx: number): number[] {
     return this._inputDimensions[batchIdx]
   }
@@ -97,8 +102,26 @@ export class NetInput {
     return this._paddings[batchIdx]
   }
 
+  public getRelativePaddings(batchIdx: number): Point {
+    return new Point(
+      (this.getPaddings(batchIdx).x + this.getInputWidth(batchIdx)) / this.getInputWidth(batchIdx),
+      (this.getPaddings(batchIdx).y + this.getInputHeight(batchIdx)) / this.getInputHeight(batchIdx)
+    )
+  }
+
+  public getReshapedInputDimensions(batchIdx: number): Dimensions {
+    const [h, w] = [this.getInputHeight(batchIdx), this.getInputWidth(batchIdx)]
+    const f = this.inputSize / Math.max(h, w)
+    return {
+      height: Math.floor(h * f),
+      width: Math.floor(w * f)
+    }
+  }
+
   public toBatchTensor(inputSize: number, isCenterInputs: boolean = true): tf.Tensor4D {
 
+    this._inputSize = inputSize
+
     return tf.tidy(() => {
 
       const inputTensors = this._inputs.map((inputTensor: tf.Tensor3D) => {
diff --git a/src/commons/extractWeightsFactory.ts b/src/commons/extractWeightsFactory.ts
@@ -2,7 +2,6 @@ export function extractWeightsFactory(weights: Float32Array) {
   let remainingWeights = weights
 
   function extractWeights(numWeights: number): Float32Array {
-    console.log(numWeights)
     const ret = remainingWeights.slice(0, numWeights)
     remainingWeights = remainingWeights.slice(numWeights)
     return ret
diff --git a/src/commons/normalize.ts b/src/commons/normalize.ts
@@ -0,0 +1,13 @@
+import * as tf from '@tensorflow/tfjs-core';
+
+export function normalize(x: tf.Tensor4D, meanRgb: number[]): tf.Tensor4D {
+  return tf.tidy(() => {
+    const [r, g, b] = meanRgb
+    const avg_r = tf.fill([...x.shape.slice(0, 3), 1], r)
+    const avg_g = tf.fill([...x.shape.slice(0, 3), 1], g)
+    const avg_b = tf.fill([...x.shape.slice(0, 3), 1], b)
+    const avg_rgb = tf.concat([avg_r, avg_g, avg_b], 3)
+
+    return tf.sub(x, avg_rgb)
+  })
+}
diff --git a/src/faceDetectionNet/FaceDetectionNet.ts b/src/faceDetectionNet/FaceDetectionNet.ts
@@ -81,19 +81,18 @@ export class FaceDetectionNet extends NeuralNetwork<NetParams> {
       minConfidence
     )
 
-    const paddedHeightRelative = (netInput.getPaddings(0).y + netInput.getInputHeight(0)) / netInput.getInputHeight(0)
-    const paddedWidthRelative = (netInput.getPaddings(0).x + netInput.getInputWidth(0)) / netInput.getInputWidth(0)
+    const paddings = netInput.getRelativePaddings(0)
 
     const results = indices
       .map(idx => {
         const [top, bottom] = [
           Math.max(0, boxes.get(idx, 0)),
           Math.min(1.0, boxes.get(idx, 2))
-        ].map(val => val * paddedHeightRelative)
+        ].map(val => val * paddings.y)
         const [left, right] = [
           Math.max(0, boxes.get(idx, 1)),
           Math.min(1.0, boxes.get(idx, 3))
-        ].map(val => val * paddedWidthRelative)
+        ].map(val => val * paddings.x)
         return new FaceDetection(
           scoresData[idx],
           new Rect(
diff --git a/src/faceRecognitionNet/FaceRecognitionNet.ts b/src/faceRecognitionNet/FaceRecognitionNet.ts
@@ -1,13 +1,13 @@
 import * as tf from '@tensorflow/tfjs-core';
 
 import { NeuralNetwork } from '../commons/NeuralNetwork';
+import { normalize } from '../commons/normalize';
 import { NetInput } from '../NetInput';
 import { toNetInput } from '../toNetInput';
 import { TNetInput } from '../types';
 import { convDown } from './convLayer';
 import { extractParams } from './extractParams';
 import { loadQuantizedParams } from './loadQuantizedParams';
-import { normalize } from './normalize';
 import { residual, residualDown } from './residualLayer';
 import { NetParams } from './types';
 
@@ -28,7 +28,8 @@ export class FaceRecognitionNet extends NeuralNetwork<NetParams> {
     return tf.tidy(() => {
       const batchTensor = input.toBatchTensor(150, true)
 
-      const normalized = normalize(batchTensor)
+      const meanRgb = [122.782, 117.001, 104.298]
+      const normalized = normalize(batchTensor, meanRgb).div(tf.scalar(256)) as tf.Tensor4D
 
       let out = convDown(normalized, params.conv32_down)
       out = tf.maxPool(out, 3, 2, 'valid')
diff --git a/src/faceRecognitionNet/normalize.ts b/src/faceRecognitionNet/normalize.ts
diff --git a/src/index.ts b/src/index.ts
@@ -5,6 +5,7 @@ export {
 }
 
 
+export * from './BoundingBox';
 export * from './FaceDetection';
 export * from './FullFaceDescription';
 export * from './NetInput';
@@ -24,4 +25,6 @@ export * from './mtcnn';
 export * from './padToSquare';
 export * from './tinyYolov2';
 export * from './toNetInput';
-export * from './utils'
+export * from './utils';
+
+export * from './types';
diff --git a/src/tinyYolov2/TinyYolov2.ts b/src/tinyYolov2/TinyYolov2.ts
@@ -4,13 +4,14 @@ import { BoundingBox } from '../BoundingBox';
 import { convLayer } from '../commons/convLayer';
 import { NeuralNetwork } from '../commons/NeuralNetwork';
 import { nonMaxSuppression } from '../commons/nonMaxSuppression';
+import { normalize } from '../commons/normalize';
 import { FaceDetection } from '../FaceDetection';
 import { NetInput } from '../NetInput';
 import { Point } from '../Point';
 import { toNetInput } from '../toNetInput';
 import { TNetInput } from '../types';
 import { sigmoid } from '../utils';
-import { BOX_ANCHORS, BOX_ANCHORS_SEPARABLE, INPUT_SIZES, IOU_THRESHOLD, NUM_BOXES } from './config';
+import { BOX_ANCHORS, BOX_ANCHORS_SEPARABLE, INPUT_SIZES, IOU_THRESHOLD, MEAN_RGB, NUM_BOXES } from './config';
 import { convWithBatchNorm } from './convWithBatchNorm';
 import { extractParams } from './extractParams';
 import { getDefaultParams } from './getDefaultParams';
@@ -45,7 +46,12 @@ export class TinyYolov2 extends NeuralNetwork<NetParams> {
     }
 
     const out = tf.tidy(() => {
-      const batchTensor = input.toBatchTensor(inputSize, false).div(tf.scalar(255)) as tf.Tensor4D
+
+      let batchTensor = input.toBatchTensor(inputSize, false)
+      batchTensor = this.hasSeparableConvs
+        ? normalize(batchTensor, MEAN_RGB)
+        : batchTensor
+      batchTensor = batchTensor.div(tf.scalar(256)) as tf.Tensor4D
 
       let out = convWithBatchNorm(batchTensor, params.conv0)
       out = tf.maxPool(out, [2, 2], [2, 2], 'same')
@@ -87,22 +93,23 @@ export class TinyYolov2 extends NeuralNetwork<NetParams> {
 
     const netInput = await toNetInput(input, true)
     const out = await this.forwardInput(netInput, inputSize)
+    const out0 = tf.tidy(() => tf.unstack(out)[0].expandDims()) as tf.Tensor4D
+
+    console.log(out0.shape)
 
     const inputDimensions = {
       width: netInput.getInputWidth(0),
       height: netInput.getInputHeight(0)
     }
 
-    const paddings = new Point(
-      (netInput.getPaddings(0).x + netInput.getInputWidth(0)) / netInput.getInputWidth(0),
-      (netInput.getPaddings(0).y + netInput.getInputHeight(0)) / netInput.getInputHeight(0)
-    )
+    const paddings = netInput.getRelativePaddings(0)
 
-    const results = this.postProcess(out, { scoreThreshold, paddings })
+    const results = this.postProcess(out0, { scoreThreshold, paddings })
     const boxes = results.map(res => res.box)
     const scores = results.map(res => res.score)
 
     out.dispose()
+    out0.dispose()
 
     const indices = nonMaxSuppression(
       boxes.map(box => box.rescale(inputSize)),
diff --git a/src/tinyYolov2/config.ts b/src/tinyYolov2/config.ts
@@ -18,4 +18,6 @@ export const BOX_ANCHORS_SEPARABLE = [
   new Point(2.882459, 3.518061),
   new Point(4.266906, 5.178857),
   new Point(9.041765, 10.66308)
-]
+]
+
+export const MEAN_RGB = [117.001, 114.697, 97.404]
diff --git a/src/tinyYolov2/convWithBatchNorm.ts b/src/tinyYolov2/convWithBatchNorm.ts
@@ -8,7 +8,7 @@ export function convWithBatchNorm(x: tf.Tensor4D, params: ConvWithBatchNorm | Se
     let out = tf.pad(x, [[0, 0], [1, 1], [1, 1], [0, 0]]) as tf.Tensor4D
 
     if (params instanceof SeparableConvParams) {
-      out = tf.separableConv2d(out, params.depthwise_filter, params.pointwise_filter, [1, 1], 'same')
+      out = tf.separableConv2d(out, params.depthwise_filter, params.pointwise_filter, [1, 1], 'valid')
       out = tf.add(out, params.bias)
     } else {
       out = tf.conv2d(out, params.conv.filters, [1, 1], 'valid')
diff --git a/src/tinyYolov2/extractParams.ts b/src/tinyYolov2/extractParams.ts
@@ -31,11 +31,10 @@ function extractorsFactory(extractWeights: ExtractWeightsFunction, paramMappings
   }
 
   function extractSeparableConvParams(channelsIn: number, channelsOut: number, mappedPrefix: string): SeparableConvParams {
-    console.log(mappedPrefix)
     const depthwise_filter = tf.tensor4d(extractWeights(3 * 3 * channelsIn), [3, 3, channelsIn, 1])
     const pointwise_filter = tf.tensor4d(extractWeights(channelsIn * channelsOut), [1, 1, channelsIn, channelsOut])
     const bias = tf.tensor1d(extractWeights(channelsOut))
-    console.log('done')
+
     paramMappings.push(
       { paramPath: `${mappedPrefix}/depthwise_filter` },
       { paramPath: `${mappedPrefix}/pointwise_filter` },
diff --git a/tools/train/faceLandmarks/faceLandmarksTrain.js b/tools/train/faceLandmarks/faceLandmarksTrain.js
@@ -104,16 +104,4 @@ async function getTrainData() {
   const batch2 = await loadImagesInBatch(allLandmarks.slice(4000), 4000)
 
   return batch1.concat(batch2)
-}
-
-// https://stackoverflow.com/questions/6274339/how-can-i-shuffle-an-array
-function shuffle(a) {
-  var j, x, i;
-  for (i = a.length - 1; i > 0; i--) {
-      j = Math.floor(Math.random() * (i + 1));
-      x = a[i];
-      a[i] = a[j];
-      a[j] = x;
-  }
-  return a;
 }
diff --git a/tools/train/faceLandmarks/faceLandmarksUi.js b/tools/train/faceLandmarks/faceLandmarksUi.js
diff --git a/tools/train/faceLandmarks/train.html b/tools/train/faceLandmarks/train.html
@@ -10,6 +10,7 @@
   <script src="faceLandmarksUi.js"></script>
   <script src="faceLandmarksTrain.js"></script>
   <script src="FileSaver.js"></script>
+  <script src="trainUtils.js"></script>
 </head>
 <body>
   <div id="navbar"></div>
diff --git a/tools/train/faceLandmarks/trainUtils.js b/tools/train/faceLandmarks/trainUtils.js
@@ -0,0 +1,13 @@
+
+
+// https://stackoverflow.com/questions/6274339/how-can-i-shuffle-an-array
+function shuffle(a) {
+  var j, x, i;
+  for (i = a.length - 1; i > 0; i--) {
+      j = Math.floor(Math.random() * (i + 1));
+      x = a[i];
+      a[i] = a[j];
+      a[j] = x;
+  }
+  return a;
+}
diff --git a/tools/train/serveFaceLandmarks.js b/tools/train/serveFaceLandmarks.js
@@ -1,14 +1,12 @@
-require('./.env')
+require('./faceLandmarks/.env')
 
 const express = require('express')
 const path = require('path')
 
 const app = express()
 
-const viewsDir = path.join(__dirname, 'views')
-app.use(express.static(viewsDir))
-app.use(express.static(path.join(__dirname, './public')))
-app.use(express.static(path.join(__dirname, './tmp')))
+const publicDir = path.join(__dirname, './faceLandmarks')
+app.use(express.static(publicDir))
 app.use(express.static(path.join(__dirname, './node_modules/file-saver')))
 app.use(express.static(path.join(__dirname, '../../examples/public')))
 app.use(express.static(path.join(__dirname, '../../weights')))
@@ -18,6 +16,6 @@ const trainDataPath = path.resolve(process.env.TRAIN_DATA_PATH)
 app.use(express.static(trainDataPath))
 
 app.get('/', (req, res) => res.redirect('/face_landmarks'))
-app.get('/face_landmarks', (req, res) => res.sendFile(path.join(viewsDir, 'faceLandmarks.html')))
+app.get('/face_landmarks', (req, res) => res.sendFile(path.join(publicDir, 'train.html')))
 
 app.listen(3000, () => console.log('Listening on port 3000!'))
diff --git a/tools/train/serveTinyYolov2.js b/tools/train/serveTinyYolov2.js
@@ -0,0 +1,22 @@
+require('./tinyYolov2/.env')
+
+const express = require('express')
+const path = require('path')
+
+const app = express()
+
+const publicDir = path.join(__dirname, './tinyYolov2')
+app.use(express.static(publicDir))
+app.use(express.static(path.join(__dirname, './node_modules/file-saver')))
+app.use(express.static(path.join(__dirname, '../../examples/public')))
+app.use(express.static(path.join(__dirname, '../../weights')))
+app.use(express.static(path.join(__dirname, '../../dist')))
+
+const trainDataPath = path.resolve(process.env.TRAIN_DATA_PATH)
+app.use(express.static(trainDataPath))
+
+//app.get('/', (req, res) => res.sendFile(path.join(publicDir, 'train.html')))
+//app.get('/', (req, res) => res.sendFile(path.join(publicDir, 'tinyYolov2FaceDetectionVideo.html')))
+app.get('/', (req, res) => res.sendFile(path.join(publicDir, 'testLoss.html')))
+
+app.listen(3000, () => console.log('Listening on port 3000!'))
diff --git a/tools/train/tinyYolov2/initWeights.html b/tools/train/tinyYolov2/initWeights.html
diff --git a/tools/train/tinyYolov2/loss.js b/tools/train/tinyYolov2/loss.js
diff --git a/tools/train/tinyYolov2/train.html b/tools/train/tinyYolov2/train.html