finalize architecture

justadudewhohacks · justadudewhohacks · commit 45c9d6f8757f · 2018-06-05T22:54:06.000+02:00
diff --git a/src/faceDetectionNet/index.ts b/src/faceDetectionNet/index.ts
@@ -15,7 +15,7 @@ function fromData(input: number[]): tf.Tensor4D {
     throw new Error(`invalid input size: ${dim}x${dim}x3 (array length: ${input.length})`)
   }
 
-  return tf.tensor4d(input as number[], [1, 580, 580, 3])
+  return tf.tensor4d(input as number[], [1, dim, dim, 3])
 }
 
 function fromImageData(input: ImageData[]) {
@@ -31,24 +31,30 @@ function fromImageData(input: ImageData[]) {
   return tf.cast(tf.concat(imgTensors, 0), 'float32')
 }
 
+function getImgTensor(input: ImageData|ImageData[]|number[]) {
+  return tf.tidy(() => {
+
+    const imgDataArray = input instanceof ImageData
+      ? [input]
+      : (
+        input[0] instanceof ImageData
+          ? input as ImageData[]
+          : null
+      )
+
+    return imgDataArray !== null
+      ? fromImageData(imgDataArray)
+      : fromData(input as number[])
+
+  })
+}
+
 export function faceDetectionNet(weights: Float32Array) {
   const params = extractParams(weights)
 
-  async function forward(input: ImageData|ImageData[]|number[]) {
+  function forwardTensor(imgTensor: tf.Tensor4D) {
     return tf.tidy(() => {
 
-      const imgDataArray = input instanceof ImageData
-        ? [input]
-        : (
-          input[0] instanceof ImageData
-            ? input as ImageData[]
-            : null
-        )
-
-      const imgTensor = imgDataArray !== null
-        ? fromImageData(imgDataArray)
-        : fromData(input as number[])
-
       const resized = resizeLayer(imgTensor) as tf.Tensor4D
       const features = mobileNetV1(resized, params.mobilenetv1_params)
 
@@ -57,14 +63,54 @@ export function faceDetectionNet(weights: Float32Array) {
         classPredictions
       } = predictionLayer(features.out, features.conv11, params.prediction_layer_params)
 
-      const decoded = outputLayer(boxPredictions, classPredictions, params.output_layer_params)
+      return outputLayer(boxPredictions, classPredictions, params.output_layer_params)
+    })
+  }
+
+  // TODO debug output
+  function forward(input: ImageData|ImageData[]|number[]) {
+    return tf.tidy(
+      () => forwardTensor(getImgTensor(input))
+    )
+  }
 
-      return decoded
+  async function locateFaces(
+    input: ImageData|ImageData[]|number[],
+    minConfidence: number = 0.8
+  ) {
+    const imgTensor = getImgTensor(input)
+
+    const [_, height, width] = imgTensor.shape
+
+    const {
+      boxes: _boxes,
+      scores: _scores
+    } = forwardTensor(imgTensor)
+
+    // TODO batches
+    const boxes = _boxes[0]
+    const scores = _scores[0]
+
+    // TODO find a better way to filter by minConfidence
+    const data = await scores.data()
+
+    return Array.from(data)
+      .map((score, idx) => ({ score, idx }))
+      .filter(({ score }) => minConfidence < score)
+      .map(({ score, idx }) => ({
+        score,
+        box: {
+          left: Math.max(0, width * boxes.get(idx, 0)),
+          right: Math.min(width, width * boxes.get(idx, 1)),
+          top: Math.max(0, height * boxes.get(idx, 2)),
+          bottom: Math.min(height, height * boxes.get(idx, 3))
+        }
+      }))
 
-    })
   }
 
   return {
-    forward
+    forward,
+    locateFaces
   }
 }
diff --git a/src/faceDetectionNet/outputLayer.ts b/src/faceDetectionNet/outputLayer.ts
@@ -2,12 +2,6 @@ import * as tf from '@tensorflow/tfjs-core';
 
 import { FaceDetectionNet } from './types';
 
-
-function batchMultiClassNonMaxSuppressionLayer(x0: tf.Tensor2D, x1: tf.Tensor2D) {
-  // TODO
-  return x0
-}
-
 function getCenterCoordinatesAndSizesLayer(x: tf.Tensor2D) {
   const vec = tf.unstack(tf.transpose(x, [1, 0]))
 
@@ -27,7 +21,7 @@ function getCenterCoordinatesAndSizesLayer(x: tf.Tensor2D) {
   }
 }
 
-function decodeLayer(x0: tf.Tensor2D, x1: tf.Tensor2D) {
+function decodeBoxesLayer(x0: tf.Tensor2D, x1: tf.Tensor2D) {
   const {
     sizes,
     centers
@@ -61,15 +55,30 @@ export function outputLayer(
 
     const batchSize = boxPredictions.shape[0]
 
-    const decoded = decodeLayer(
+    let boxes = decodeBoxesLayer(
       tf.reshape(tf.tile(params.extra_dim, [batchSize, 1, 1]), [-1, 4]) as tf.Tensor2D,
       tf.reshape(boxPredictions, [-1, 4]) as tf.Tensor2D
     )
+    boxes = tf.reshape(
+      boxes,
+      [batchSize, (boxes.shape[0] / batchSize), 4]
+    )
+
+    const scoresAndClasses = tf.sigmoid(tf.slice(classPredictions, [0, 0, 1], [-1, -1, -1]))
+    let scores = tf.slice(scoresAndClasses, [0, 0, 0], [-1, -1, 1]) as tf.Tensor
+
+    scores = tf.reshape(
+      scores,
+      [batchSize, scores.shape[1]]
+    )
 
-    const in1 = tf.sigmoid(tf.slice(classPredictions, [0, 0, 1], [-1, -1, -1]))
-    const in2 = tf.expandDims(tf.reshape(decoded, [batchSize, 5118, 4]), 2)
+    const boxesByBatch = tf.unstack(boxes) as tf.Tensor2D[]
+    const scoresByBatch = tf.unstack(scores) as tf.Tensor1D[]
 
-    return decoded
+    return {
+      boxes: boxesByBatch,
+      scores: scoresByBatch
+    }
 
   })
 }
diff --git a/src/index.ts b/src/index.ts
@@ -2,10 +2,12 @@ import { euclideanDistance } from './euclideanDistance';
 import { faceDetectionNet } from './faceDetectionNet';
 import { faceRecognitionNet } from './faceRecognitionNet';
 import { normalize } from './normalize';
+import * as tf from '@tensorflow/tfjs-core';
 
 export {
   euclideanDistance,
   faceDetectionNet,
   faceRecognitionNet,
-  normalize
+  normalize,
+  tf
 }