init mtcnn + stage1 until bounding box extraction

justadudewhohacks · justadudewhohacks · commit 33296efdcec5 · 2018-07-07T16:21:53.000+02:00
diff --git a/src/index.ts b/src/index.ts
@@ -17,6 +17,7 @@ export * from './faceDetectionNet';
 export * from './faceLandmarkNet';
 export * from './faceRecognitionNet';
 export * from './globalApi';
+export * from './mtcnn';
 export * from './padToSquare';
 export * from './toNetInput';
 export * from './utils'
diff --git a/src/mtcnn/Mtcnn.ts b/src/mtcnn/Mtcnn.ts
@@ -4,7 +4,9 @@ import { NeuralNetwork } from '../commons/NeuralNetwork';
 import { NetInput } from '../NetInput';
 import { toNetInput } from '../toNetInput';
 import { TNetInput } from '../types';
+import { bgrToRgbTensor } from './bgrToRgbTensor';
 import { extractParams } from './extractParams';
+import { pyramidDown } from './pyramidDown';
 import { stage1 } from './stage1';
 import { NetParams } from './types';
 
@@ -14,7 +16,12 @@ export class Mtcnn extends NeuralNetwork<NetParams> {
     super('Mtcnn')
   }
 
-  public forwardInput(input: NetInput, minFaceSize: number = 20, scaleFactor: number = 0.709): tf.Tensor2D {
+  public forwardInput(
+    input: NetInput,
+    minFaceSize: number = 20,
+    scaleFactor: number = 0.709,
+    scoreThresholds: number[] = [0.6, 0.7, 0.7]
+  ): tf.Tensor2D {
 
     const { params } = this
 
@@ -23,28 +30,14 @@ export class Mtcnn extends NeuralNetwork<NetParams> {
     }
 
     return tf.tidy(() => {
-      const imgTensor = tf.expandDims(input.inputs[0]).toFloat() as tf.Tensor4D
+      // TODO: expects bgr input?
+      let imgTensor = bgrToRgbTensor(
+        tf.expandDims(input.inputs[0]).toFloat() as tf.Tensor4D
+      )
 
-      function pyramidDown(minFaceSize: number, scaleFactor: number, dims: number[]): number[] {
+      const scales = pyramidDown(minFaceSize, scaleFactor, imgTensor.shape.slice(1))
 
-        const [height, width] = dims
-        const m = 12 / minFaceSize
-
-        const scales = []
-
-        let minLayer = Math.min(height, width) * m
-        let exp = 0
-        while (minLayer >= 12) {
-          scales.push(m * Math.pow(scaleFactor, exp))
-          minLayer = minLayer * scaleFactor
-          exp += 1
-        }
-
-        return scales
-      }
-
-      const scales = pyramidDown(minFaceSize, scaleFactor, imgTensor.shape)
-      const out1 = stage1(imgTensor, scales, params.pnet)
+      const out1 = stage1(imgTensor, scales, scoreThresholds[0], params.pnet)
 
       return tf.tensor2d([0], [1, 1])
     })
diff --git a/src/mtcnn/PNet.ts b/src/mtcnn/PNet.ts
@@ -4,16 +4,15 @@ import { convLayer } from '../commons/convLayer';
 import { sharedLayer } from './sharedLayers';
 import { PNetParams } from './types';
 
-export function PNet(x: tf.Tensor4D, params: PNetParams): { prob: tf.Tensor3D, convOut: tf.Tensor4D } {
+export function PNet(x: tf.Tensor4D, params: PNetParams): { prob: tf.Tensor4D, regions: tf.Tensor4D } {
   return tf.tidy(() => {
 
-    let out = sharedLayer(x, params)
+    let out = sharedLayer(x, params, true)
     const conv = convLayer(out, params.conv4_1, 'valid')
-    // TODO: tf.reduce_max <=> tf.max ?
-    const logits = tf.sub(conv, tf.max(conv, 3))
-    const prob = tf.softmax(logits, 3) as tf.Tensor3D
-    const convOut = convLayer(out, params.conv4_2, 'valid')
+    const max = tf.expandDims(tf.max(conv, 3), 3)
+    const prob = tf.softmax(tf.sub(conv, max), 3) as tf.Tensor4D
+    const regions = convLayer(out, params.conv4_2, 'valid')
 
-    return { prob, convOut }
+    return { prob, regions }
   })
 }
diff --git a/src/mtcnn/bgrToRgbTensor.ts b/src/mtcnn/bgrToRgbTensor.ts
@@ -0,0 +1,7 @@
+import * as tf from '@tensorflow/tfjs-core';
+
+export function bgrToRgbTensor(tensor: tf.Tensor4D): tf.Tensor4D {
+  return tf.tidy(
+    () => tf.stack(tf.unstack(tensor, 3), 3)
+  ) as tf.Tensor4D
+}
diff --git a/src/mtcnn/config.ts b/src/mtcnn/config.ts
@@ -0,0 +1,2 @@
+export const CELL_STRIDE = 2
+export const CELL_SIZE = 12
diff --git a/src/mtcnn/extractParams.ts b/src/mtcnn/extractParams.ts
@@ -40,7 +40,7 @@ function extractorsFactory(extractWeights: ExtractWeightsFunction, paramMappings
 
   function extractRNetParams(): RNetParams {
 
-    const sharedParams = extractSharedParams([3, 28, 48, 64], 'rnet')
+    const sharedParams = extractSharedParams([3, 28, 48, 64], 'rnet', true)
     const fc1 = extractFCParams(576, 128, 'rnet/fc1')
     const prelu4_alpha = extractPReluParams(128, 'rnet/prelu4_alpha')
     const fc2_1 = extractFCParams(128, 2, 'rnet/fc2_1')
@@ -90,5 +90,9 @@ export function extractParams(weights: Float32Array): { params: NetParams, param
   const rnet = extractRNetParams()
   const onet = extractONetParams()
 
+  if (getRemainingWeights().length !== 0) {
+    throw new Error(`weights remaing after extract: ${getRemainingWeights().length}`)
+  }
+
   return { params: { pnet, rnet, onet }, paramMappings }
 }
diff --git a/src/mtcnn/pyramidDown.ts b/src/mtcnn/pyramidDown.ts
@@ -0,0 +1,19 @@
+import { CELL_SIZE } from './config';
+
+export function pyramidDown(minFaceSize: number, scaleFactor: number, dims: number[]): number[] {
+
+  const [height, width] = dims
+  const m = CELL_SIZE / minFaceSize
+
+  const scales = []
+
+  let minLayer = Math.min(height, width) * m
+  let exp = 0
+  while (minLayer >= 12) {
+    scales.push(m * Math.pow(scaleFactor, exp))
+    minLayer = minLayer * scaleFactor
+    exp += 1
+  }
+
+  return scales
+}
diff --git a/src/mtcnn/stage1.ts b/src/mtcnn/stage1.ts
@@ -1,78 +1,113 @@
 import * as tf from '@tensorflow/tfjs-core';
 
+import { Point } from '../Point';
+import { CELL_SIZE, CELL_STRIDE } from './config';
 import { PNet } from './PNet';
 import { PNetParams } from './types';
 
 function rescaleAndNormalize(x: tf.Tensor4D, scale: number): tf.Tensor4D {
   return tf.tidy(() => {
-    const [height, width] = x.shape
-    const resized = tf.image.resizeBilinear(x, [height * scale, width * scale])
 
-    return tf.mul(tf.sub(resized, tf.scalar(127.5)), tf.scalar(0.0078125))
-
-    // TODO: ?
-    // img_x = np.expand_dims(scaled_image, 0)
-    // img_y = np.transpose(img_x, (0, 2, 1, 3))
+    const [height, width] = x.shape.slice(1)
+    const resized = tf.image.resizeBilinear(x, [Math.floor(height * scale), Math.floor(width * scale)])
+    const normalized = tf.mul(tf.sub(resized, tf.scalar(127.5)), tf.scalar(0.0078125))
 
+    return (tf.transpose(normalized, [0, 2, 1, 3]) as tf.Tensor4D)
   })
 }
 
-export function stage1(x: tf.Tensor4D, scales: number[], params: PNetParams) {
-  return tf.tidy(() => {
 
-    const boxes = scales.map((scale) => {
-      const resized = rescaleAndNormalize(x, scale)
-      const { prob, convOut } = PNet(resized, params)
-    })
+function extractBoundingBoxes(
+  scores: tf.Tensor2D,
+  regions: tf.Tensor3D,
+  scale: number,
+  scoreThreshold: number
+) {
 
-  })
-}
+  // TODO: fix this!, maybe better to use tf.gather here
+  const indices2D: Point[] = []
+  for (let y = 0; y < scores.shape[0]; y++) {
+    for (let x = 0; x < scores.shape[1]; x++) {
+      if (scores.get(y, x) >= scoreThreshold) {
+        indices2D.push(new Point(x, y))
+      }
+    }
+  }
+
+  if (!indices2D.length) {
+    return null
+  }
+
+  return tf.tidy(() => {
 
-/*
+    const indicesTensor = tf.tensor2d(
+      indices2D.map(pt => [pt.y, pt.x]),
+      [indices2D.length, 2]
+    )
 
-  for scale in scales:
-      scaled_image = self.__scale_image(image, scale)
+    const cellsStart = tf.round(
+      indicesTensor.mul(tf.scalar(CELL_STRIDE)).add(tf.scalar(1)).div(tf.scalar(scale))
+    ) as tf.Tensor2D
+    const cellsEnd = tf.round(
+      indicesTensor.mul(tf.scalar(CELL_STRIDE)).add(tf.scalar(CELL_SIZE)).div(tf.scalar(scale))
+    ) as tf.Tensor2D
 
-      img_x = np.expand_dims(scaled_image, 0)
-      img_y = np.transpose(img_x, (0, 2, 1, 3))
+    const scoresTensor = tf.tensor1d(indices2D.map(pt => scores.get(pt.y, pt.x)))
 
-      out = self.__pnet.feed(img_y)
+    const candidateRegions = indices2D.map(c => ({
+      left: regions.get(c.y, c.x, 0),
+      top: regions.get(c.y, c.x, 1),
+      right: regions.get(c.y, c.x, 2),
+      bottom: regions.get(c.y, c.x, 3)
+    }))
 
-      out0 = np.transpose(out[0], (0, 2, 1, 3))
-      out1 = np.transpose(out[1], (0, 2, 1, 3))
+    const regionsTensor = tf.tensor2d(
+      candidateRegions.map(r => [r.left, r.top, r.right, r.bottom]),
+      [candidateRegions.length, 4]
+    )
 
-      boxes, _ = self.__generate_bounding_box(out1[0, :, :, 1].copy(),
-                                              out0[0, :, :, :].copy(), scale, self.__steps_threshold[0])
+    const boxesTensor = tf.concat2d([cellsStart, cellsEnd, scoresTensor.as2D(scoresTensor.size, 1), regionsTensor], 1)
 
-      # inter-scale nms
-      pick = self.__nms(boxes.copy(), 0.5, 'Union')
-      if boxes.size > 0 and pick.size > 0:
-          boxes = boxes[pick, :]
-          total_boxes = np.append(total_boxes, boxes, axis=0)
+    return boxesTensor
+  })
+}
 
+// TODO: debug
+declare const window: any
 
+export function stage1(x: tf.Tensor4D, scales: number[], scoreThreshold: number, params: PNetParams) {
+  return tf.tidy(() => {
 
+    const boxes = scales.map((scale, i) => {
+      let resized = i === 0
+        // TODO: debug
+        ? tf.tensor4d(window.resizedData, [1, 820, 461, 3])
 
-  numboxes = total_boxes.shape[0]
+        : rescaleAndNormalize(x, scale)
 
-  if numboxes > 0:
-      pick = self.__nms(total_boxes.copy(), 0.7, 'Union')
-      total_boxes = total_boxes[pick, :]
+      const { prob, regions } = PNet(resized, params)
 
-      regw = total_boxes[:, 2] - total_boxes[:, 0]
-      regh = total_boxes[:, 3] - total_boxes[:, 1]
+      const scores = tf.unstack(prob, 3)[1]
+      const [sh, sw] = scores.shape.slice(1)
+      const [rh, rw] = regions.shape.slice(1)
 
-      qq1 = total_boxes[:, 0] + total_boxes[:, 5] * regw
-      qq2 = total_boxes[:, 1] + total_boxes[:, 6] * regh
-      qq3 = total_boxes[:, 2] + total_boxes[:, 7] * regw
-      qq4 = total_boxes[:, 3] + total_boxes[:, 8] * regh
 
-      total_boxes = np.transpose(np.vstack([qq1, qq2, qq3, qq4, total_boxes[:, 4]]))
-      total_boxes = self.__rerec(total_boxes.copy())
+      const boxes = extractBoundingBoxes(
+        scores.as2D(sh, sw),
+        regions.as3D(rh, rw, 4),
+        scale,
+        scoreThreshold
+      )
 
-      total_boxes[:, 0:4] = np.fix(total_boxes[:, 0:4]).astype(np.int32)
-      status = StageStatus(self.__pad(total_boxes.copy(), stage_status.width, stage_status.height),
-                            width=stage_status.width, height=stage_status.height)
+      // TODO: debug
+      if (!boxes) {
+        console.log('no boxes for scale', scale)
+        return
+      }
+      // TODO: debug
+      i === 0 && (window.boxes = boxes.dataSync())
 
-  return total_boxes, status
-  */
+    })
+
+  })
+}
diff --git a/weights/uncompressed/mtcnn_model.weights b/weights/uncompressed/mtcnn_model.weights

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+export const CELL_STRIDE = 2`
	`2`	`+export const CELL_SIZE = 12`