implemented mobilenetv1 for face detector

justadudewhohacks · justadudewhohacks · commit 2efff4f7b489 · 2018-06-04T14:40:41.000+02:00
diff --git a/src/faceDetectionNet/extractParams.ts b/src/faceDetectionNet/extractParams.ts
@@ -5,27 +5,27 @@ import { FaceDetectionNet } from './types';
 function mobilenetV1WeightsExtractorsFactory(extractWeights: (numWeights: number) => Float32Array) {
 
   function extractDepthwiseConvParams(numChannels: number): FaceDetectionNet.MobileNetV1.DepthwiseConvParams {
-    const weights = tf.tensor4d(extractWeights(3 * 3 * numChannels), [3, 3, numChannels, 1])
-    const batch_norm_gamma = tf.tensor1d(extractWeights(numChannels))
-    const batch_norm_beta = tf.tensor1d(extractWeights(numChannels))
+    const filters = tf.tensor4d(extractWeights(3 * 3 * numChannels), [3, 3, numChannels, 1])
+    const batch_norm_scale = tf.tensor1d(extractWeights(numChannels))
+    const batch_norm_offset = tf.tensor1d(extractWeights(numChannels))
     const batch_norm_mean = tf.tensor1d(extractWeights(numChannels))
     const batch_norm_variance = tf.tensor1d(extractWeights(numChannels))
 
     return {
-      weights,
-      batch_norm_gamma,
-      batch_norm_beta,
+      filters,
+      batch_norm_scale,
+      batch_norm_offset,
       batch_norm_mean,
       batch_norm_variance
     }
   }
 
   function extractPointwiseConvParams(channelsIn: number, channelsOut: number): FaceDetectionNet.MobileNetV1.PointwiseConvParams {
-    const weights = tf.tensor4d(extractWeights(channelsIn * channelsOut), [1, 1, channelsIn, channelsOut])
+    const filters = tf.tensor4d(extractWeights(channelsIn * channelsOut), [1, 1, channelsIn, channelsOut])
     const batch_norm_offset = tf.tensor1d(extractWeights(channelsOut))
 
     return {
-      weights,
+      filters,
       batch_norm_offset
     }
   }
@@ -59,7 +59,7 @@ function extractorsFactory(extractWeights: (numWeights: number) => Float32Array)
   function extractMobilenetV1Params(): FaceDetectionNet.MobileNetV1.Params {
 
     const conv_0_params = {
-      weights: tf.tensor4d(extractWeights(3 * 3 * 3 * 32), [3, 3, 3, 32]),
+      filters: tf.tensor4d(extractWeights(3 * 3 * 3 * 32), [3, 3, 3, 32]),
       batch_norm_offset: tf.tensor1d(extractWeights(32))
 
     }
diff --git a/src/faceDetectionNet/index.ts b/src/faceDetectionNet/index.ts
@@ -1,25 +1,58 @@
 import * as tf from '@tensorflow/tfjs-core';
 
-import { resizeLayer } from './resizeLayer';
+import { isFloat } from '../utils';
 import { extractParams } from './extractParams';
 import { mobileNetV1 } from './mobileNetV1';
+import { resizeLayer } from './resizeLayer';
+
+function fromData(input: number[]): tf.Tensor4D {
+  const pxPerChannel = input.length / 3
+  const dim = Math.sqrt(pxPerChannel)
+
+  if (isFloat(dim)) {
+    throw new Error(`invalid input size: ${dim}x${dim}x3 (array length: ${input.length})`)
+  }
+
+  return tf.tensor4d(input as number[], [1, 580, 580, 3])
+}
+
+function fromImageData(input: ImageData[]) {
+  const idx = input.findIndex(data => !(data instanceof ImageData))
+  if (idx !== -1) {
+    throw new Error(`expected input at index ${idx} to be instanceof ImageData`)
+  }
+
+  const imgTensors = input
+    .map(data => tf.fromPixels(data))
+    .map(data => tf.expandDims(data, 0)) as tf.Tensor4D[]
+
+  return tf.cast(tf.concat(imgTensors, 0), 'float32')
+}
 
 export function faceDetectionNet(weights: Float32Array) {
   const params = extractParams(weights)
 
-  async function forward(input: ImageData|ImageData[]) {
+  async function forward(input: ImageData|ImageData[]|number[]) {
+    return tf.tidy(() => {
 
-    const imgTensors = (input instanceof ImageData ? [input] : input)
-      .map(data => tf.fromPixels(data))
-      .map(data => tf.expandDims(data, 0)) as tf.Tensor4D[]
+      const imgDataArray = input instanceof ImageData
+        ? [input]
+        : (
+          input[0] instanceof ImageData
+            ? input as ImageData[]
+            : null
+        )
 
-    const imgTensor = tf.cast(tf.concat(imgTensors, 0), 'float32')
+      const imgTensor = imgDataArray !== null
+        ? fromImageData(imgDataArray)
+        : fromData(input as number[])
 
-    let out = resizeLayer(imgTensor) as tf.Tensor4D
+      let out = resizeLayer(imgTensor) as tf.Tensor4D
+      out = mobileNetV1(out, params.mobilenetv1_params)
 
-    out = mobileNetV1(out, params.mobilenetv1_params)
+      return out
 
-    return out
+    })
   }
 
   return {
diff --git a/src/faceDetectionNet/mobileNetV1.ts b/src/faceDetectionNet/mobileNetV1.ts
@@ -2,6 +2,57 @@ import * as tf from '@tensorflow/tfjs-core';
 
 import { FaceDetectionNet } from './types';
 
+const epsilon = 0.0010000000474974513
+
+function depthwiseConvLayer(
+  x: tf.Tensor4D,
+  params: FaceDetectionNet.MobileNetV1.DepthwiseConvParams,
+  strides: [number, number]
+) {
+  return tf.tidy(() => {
+
+    let out = tf.depthwiseConv2d(x, params.filters, strides, 'same')
+    out = tf.batchNormalization<tf.Rank.R4>(
+      out,
+      params.batch_norm_mean,
+      params.batch_norm_variance,
+      epsilon,
+      params.batch_norm_scale,
+      params.batch_norm_offset
+    )
+    return tf.relu(out)
+
+  })
+}
+
+function pointwiseConvLayer(
+  x: tf.Tensor4D,
+  params: FaceDetectionNet.MobileNetV1.PointwiseConvParams,
+  strides: [number, number]
+) {
+  return tf.tidy(() => {
+
+    let out = tf.conv2d(x, params.filters, strides, 'same')
+    out = tf.add(out, params.batch_norm_offset)
+    return tf.relu(out)
+
+  })
+}
+
+function getStridesForLayerIdx(layerIdx: number): [number, number] {
+  return [2, 4, 6, 12].some(idx => idx === layerIdx) ? [2, 2] : [1, 1]
+}
+
 export function mobileNetV1(x: tf.Tensor4D, params: FaceDetectionNet.MobileNetV1.Params) {
-  return x
+  return tf.tidy(() => {
+
+    let out = pointwiseConvLayer(x, params.conv_0_params, [2, 2])
+    params.conv_pair_params.forEach((param, i) => {
+      const depthwiseConvStrides = getStridesForLayerIdx(i + 1)
+      out = depthwiseConvLayer(out, param.depthwise_conv_params, depthwiseConvStrides)
+      out = pointwiseConvLayer(out, param.pointwise_conv_params, [1, 1])
+    })
+    return out
+
+  })
 }
diff --git a/src/faceDetectionNet/resizeLayer.ts b/src/faceDetectionNet/resizeLayer.ts
@@ -1,11 +1,14 @@
 import * as tf from '@tensorflow/tfjs-core';
 
-// TODO: hardcoded params
 const resizedImageSize = [512, 512] as [number, number]
 const weight = tf.scalar(0.007843137718737125)
 const bias = tf.scalar(1)
 
 export function resizeLayer(x: tf.Tensor4D) {
-  const resized = tf.image.resizeBilinear(x, resizedImageSize, false)
-  return tf.sub(tf.mul(resized, weight), bias)
+  return tf.tidy(() => {
+
+    const resized = tf.image.resizeBilinear(x, resizedImageSize, false)
+    return tf.sub(tf.mul(resized, weight), bias)
+
+  })
 }
diff --git a/src/faceDetectionNet/types.ts b/src/faceDetectionNet/types.ts
@@ -5,15 +5,15 @@ export namespace FaceDetectionNet {
   export namespace MobileNetV1 {
 
     export type DepthwiseConvParams = {
-      weights: tf.Tensor4D // [3, 3, ch, 1]
-      batch_norm_gamma: tf.Tensor1D
-      batch_norm_beta: tf.Tensor1D
+      filters: tf.Tensor4D
+      batch_norm_scale: tf.Tensor1D
+      batch_norm_offset: tf.Tensor1D
       batch_norm_mean: tf.Tensor1D
       batch_norm_variance: tf.Tensor1D
     }
 
     export type PointwiseConvParams = {
-      weights: tf.Tensor4D // [1, 1, ch_in, ch_out]
+      filters: tf.Tensor4D
       batch_norm_offset: tf.Tensor1D
     }
 
diff --git a/src/faceRecognitionNet/extractParams.ts b/src/faceRecognitionNet/extractParams.ts
@@ -1,11 +1,8 @@
 import * as tf from '@tensorflow/tfjs-core';
 
+import { isFloat } from '../utils';
 import { FaceRecognitionNet } from './types';
 
-function isFloat(num: number) {
-  return num % 1 !== 0
-}
-
 function extractorsFactory(extractWeights: (numWeights: number) => Float32Array) {
 
   function extractFilterValues(numFilterValues: number, numFilters: number, filterSize: number): tf.Tensor4D {
diff --git a/src/utils.ts b/src/utils.ts
@@ -0,0 +1,3 @@
+export function isFloat(num: number) {
+  return num % 1 !== 0
+}

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+export function isFloat(num: number) {`
	`2`	`+ return num % 1 !== 0`
	`3`	`+}`