implemented stage2

justadudewhohacks · justadudewhohacks · commit 5e957ec7e90f · 2018-07-10T14:09:56.000+02:00
diff --git a/src/NetInput.ts b/src/NetInput.ts
@@ -8,6 +8,7 @@ import { createCanvasFromMedia } from './utils';
 
 export class NetInput {
   private _inputs: tf.Tensor3D[] = []
+  private _canvases: HTMLCanvasElement[] = []
   private _isManaged: boolean = false
   private _isBatchInput: boolean = false
 
@@ -16,14 +17,15 @@ export class NetInput {
 
   constructor(
     inputs: tf.Tensor4D | Array<TResolvedNetInput>,
-    isBatchInput: boolean = false
+    isBatchInput: boolean = false,
+    keepCanvases: boolean = false
   ) {
     if (isTensor4D(inputs)) {
       this._inputs = tf.unstack(inputs as tf.Tensor4D) as tf.Tensor3D[]
     }
 
     if (Array.isArray(inputs)) {
-      this._inputs = inputs.map(input => {
+      this._inputs = inputs.map((input, idx) => {
         if (isTensor3D(input)) {
           // TODO: make sure not to dispose original tensors passed in by the user
           return tf.clone(input as tf.Tensor3D)
@@ -39,9 +41,11 @@ export class NetInput {
           return (input as tf.Tensor4D).reshape(shape.slice(1) as [number, number, number]) as tf.Tensor3D
         }
 
-        return tf.fromPixels(
-          input instanceof HTMLCanvasElement ? input : createCanvasFromMedia(input as HTMLImageElement | HTMLVideoElement)
-        )
+        const canvas = input instanceof HTMLCanvasElement ? input : createCanvasFromMedia(input as HTMLImageElement | HTMLVideoElement)
+        if (keepCanvases) {
+          this._canvases[idx] = canvas
+        }
+        return tf.fromPixels(canvas)
       })
     }
 
@@ -53,6 +57,10 @@ export class NetInput {
     return this._inputs
   }
 
+  public get canvases(): HTMLCanvasElement[] {
+    return this._canvases
+  }
+
   public get isManaged(): boolean {
     return this._isManaged
   }
diff --git a/src/mtcnn/BoundingBox.ts b/src/mtcnn/BoundingBox.ts
@@ -55,4 +55,38 @@ export class BoundingBox {
       Math.round(this.bottom)
     )
   }
+
+  public padAtBorders(imageHeight: number, imageWidth: number) {
+    const w = this.width + 1
+    const h = this.height + 1
+
+    let dx = 1
+    let dy = 1
+    let edx = w
+    let edy = h
+
+    let x = this.left
+    let y = this.top
+    let ex = this.right
+    let ey = this.bottom
+
+    if (ex > imageWidth) {
+      edx = -ex + imageWidth + w
+      ex = imageWidth
+    }
+    if (ey > imageHeight) {
+      edy = -ey + imageHeight + h
+      ey = imageHeight
+    }
+    if (x < 1) {
+      edy = 2 - x
+      x = 1
+    }
+    if (y < 1) {
+      edy = 2 - y
+      y = 1
+    }
+
+    return { dy, edy, dx, edx, y, ey, x, ex, w, h }
+  }
 }
diff --git a/src/mtcnn/Mtcnn.ts b/src/mtcnn/Mtcnn.ts
@@ -8,6 +8,7 @@ import { bgrToRgbTensor } from './bgrToRgbTensor';
 import { extractParams } from './extractParams';
 import { pyramidDown } from './pyramidDown';
 import { stage1 } from './stage1';
+import { stage2 } from './stage2';
 import { NetParams } from './types';
 
 export class Mtcnn extends NeuralNetwork<NetParams> {
@@ -16,31 +17,45 @@ export class Mtcnn extends NeuralNetwork<NetParams> {
     super('Mtcnn')
   }
 
-  public forwardInput(
+  public async forwardInput(
     input: NetInput,
     minFaceSize: number = 20,
     scaleFactor: number = 0.709,
     scoreThresholds: number[] = [0.6, 0.7, 0.7]
-  ): tf.Tensor2D {
+  ): Promise<tf.Tensor2D> {
 
     const { params } = this
 
     if (!params) {
       throw new Error('Mtcnn - load model before inference')
     }
 
-    return tf.tidy(() => {
-      // TODO: expects bgr input?
-      let imgTensor = bgrToRgbTensor(
-        tf.expandDims(input.inputs[0]).toFloat() as tf.Tensor4D
+    const inputTensor = input.inputs[0]
+    const inputCanvas = input.canvases[0]
+
+    if (!inputCanvas) {
+      throw new Error('Mtcnn - inputCanvas is not defined, note that passing tensors into Mtcnn.forwardInput is not supported yet.')
+    }
+
+    const imgTensor = tf.tidy(() =>
+      bgrToRgbTensor(
+        tf.expandDims(inputTensor).toFloat() as tf.Tensor4D
       )
+    )
+
+    const scales = pyramidDown(minFaceSize, scaleFactor, imgTensor.shape.slice(1))
+    const out1 = await stage1(imgTensor, scales, scoreThresholds[0], params.pnet)
+
+    // using the inputCanvas to extract and resize the image patches, since it is faster
+    // than doing this on the gpu
+    const out2 = await stage2(inputCanvas, out1, scoreThresholds[1], params.rnet)
+
 
-      const scales = pyramidDown(minFaceSize, scaleFactor, imgTensor.shape.slice(1))
 
-      const out1 = stage1(imgTensor, scales, scoreThresholds[0], params.pnet)
+    imgTensor.dispose()
+    input.dispose()
 
-      return tf.tensor2d([0], [1, 1])
-    })
+    return tf.tensor2d([0], [1, 1])
   }
 
   public async forward(
@@ -50,7 +65,7 @@ export class Mtcnn extends NeuralNetwork<NetParams> {
     scoreThresholds: number[] = [0.6, 0.7, 0.7]
   ): Promise<tf.Tensor2D> {
     return this.forwardInput(
-      await toNetInput(input, true),
+      await toNetInput(input, true, true),
       minFaceSize,
       scaleFactor,
       scoreThresholds
diff --git a/src/mtcnn/RNet.ts b/src/mtcnn/RNet.ts
@@ -0,0 +1,22 @@
+import * as tf from '@tensorflow/tfjs-core';
+
+import { fullyConnectedLayer } from '../faceLandmarkNet/fullyConnectedLayer';
+import { prelu } from './prelu';
+import { sharedLayer } from './sharedLayers';
+import { RNetParams } from './types';
+
+export function RNet(x: tf.Tensor4D, params: RNetParams): { prob: tf.Tensor2D, regions: tf.Tensor2D } {
+  return tf.tidy(() => {
+
+    const convOut = sharedLayer(x, params)
+    const vectorized = tf.reshape(convOut, [convOut.shape[0], params.fc1.weights.shape[0]]) as tf.Tensor2D
+    const fc1 = fullyConnectedLayer(vectorized, params.fc1)
+    const prelu4 = prelu<tf.Tensor2D>(fc1, params.prelu4_alpha)
+    const fc2_1 = fullyConnectedLayer(prelu4, params.fc2_1)
+    const max = tf.expandDims(tf.max(fc2_1, 1), 1)
+    const prob = tf.softmax(tf.sub(fc2_1, max), 1) as tf.Tensor2D
+    const regions = fullyConnectedLayer(prelu4, params.fc2_2)
+
+    return { prob, regions }
+  })
+}
diff --git a/src/mtcnn/bgrToRgbTensor.ts b/src/mtcnn/bgrToRgbTensor.ts
@@ -2,6 +2,6 @@ import * as tf from '@tensorflow/tfjs-core';
 
 export function bgrToRgbTensor(tensor: tf.Tensor4D): tf.Tensor4D {
   return tf.tidy(
-    () => tf.stack(tf.unstack(tensor, 3), 3)
+    () => tf.stack(tf.unstack(tensor, 3).reverse(), 3)
   ) as tf.Tensor4D
 }
diff --git a/src/mtcnn/normalize.ts b/src/mtcnn/normalize.ts
@@ -0,0 +1,7 @@
+import * as tf from '@tensorflow/tfjs-core';
+
+export function normalize(x: tf.Tensor4D): tf.Tensor4D {
+  return tf.tidy(
+    () => tf.mul(tf.sub(x, tf.scalar(127.5)), tf.scalar(0.0078125))
+  )
+}
diff --git a/src/mtcnn/prelu.ts b/src/mtcnn/prelu.ts
@@ -1,6 +1,6 @@
 import * as tf from '@tensorflow/tfjs-core';
 
-export function prelu(x: tf.Tensor4D, alpha: tf.Tensor1D): tf.Tensor4D {
+export function prelu<T extends tf.Tensor>(x: T, alpha: tf.Tensor1D): T {
   return tf.tidy(() =>
     tf.add(
       tf.relu(x),
diff --git a/src/mtcnn/sharedLayers.ts b/src/mtcnn/sharedLayers.ts
@@ -8,13 +8,13 @@ export function sharedLayer(x: tf.Tensor4D, params: SharedParams, isPnet: boolea
   return tf.tidy(() => {
 
     let out = convLayer(x, params.conv1, 'valid')
-    out = prelu(out, params.prelu1_alpha)
+    out = prelu<tf.Tensor4D>(out, params.prelu1_alpha)
     out = tf.maxPool(out, isPnet ? [2, 2]: [3, 3], [2, 2], 'same')
     out = convLayer(out, params.conv2, 'valid')
-    out = prelu(out, params.prelu2_alpha)
+    out = prelu<tf.Tensor4D>(out, params.prelu2_alpha)
     out = isPnet ? out : tf.maxPool(out, [3, 3], [2, 2], 'valid')
     out = convLayer(out, params.conv3, 'valid')
-    out = prelu(out, params.prelu3_alpha)
+    out = prelu<tf.Tensor4D>(out, params.prelu3_alpha)
 
     return out
   })
diff --git a/src/mtcnn/stage1.ts b/src/mtcnn/stage1.ts
@@ -79,13 +79,12 @@ export function stage1(
       const { prob, regions } = PNet(resized, params)
 
 
-      const scores = tf.unstack(prob, 3)[1]
-      const [sh, sw] = scores.shape.slice(1)
-      const [rh, rw] = regions.shape.slice(1)
+      const scoresTensor = tf.unstack(tf.unstack(prob, 3)[1])[0] as tf.Tensor2D
+      const regionsTensor = tf.unstack(regions)[0] as tf.Tensor3D
 
       return {
-        scoresTensor: scores.as2D(sh, sw),
-        regionsTensor: regions.as3D(rh, rw, 4)
+        scoresTensor,
+        regionsTensor
       }
     })
 
diff --git a/src/mtcnn/stage2.ts b/src/mtcnn/stage2.ts
@@ -0,0 +1,109 @@
+import * as tf from '@tensorflow/tfjs-core';
+
+import { createCanvas, getContext2dOrThrow } from '../utils';
+import { bgrToRgbTensor } from './bgrToRgbTensor';
+import { BoundingBox } from './BoundingBox';
+import { nms } from './nms';
+import { normalize } from './normalize';
+import { RNet } from './RNet';
+import { RNetParams } from './types';
+
+export async function stage2(
+  img: HTMLCanvasElement,
+  boxes: { box: BoundingBox, score: number }[],
+  scoreThreshold: number,
+  params: RNetParams
+) {
+
+  const { height, width } = img
+
+  const imgCtx = getContext2dOrThrow(img)
+
+  const bitmaps = await Promise.all(boxes.map(async ({ box }) => {
+    // TODO: correct padding
+    const { y, ey, x, ex } = box.padAtBorders(height, width)
+
+    const fromX = x - 1
+    const fromY = y - 1
+    const imgData = imgCtx.getImageData(fromX, fromY, (ex - fromX), (ey - fromY))
+
+    return createImageBitmap(imgData)
+  }))
+
+  const imagePatchesData: number[] = []
+
+  bitmaps.forEach(bmp => {
+    const patch = createCanvas({ width: 24, height: 24 })
+    const patchCtx = getContext2dOrThrow(patch)
+    patchCtx.drawImage(bmp, 0, 0, 24, 24)
+    const { data } = patchCtx.getImageData(0, 0, 24, 24)
+
+    for(let i = 0; i < data.length; i++) {
+      if ((i + 1) % 4 === 0) continue
+      imagePatchesData.push(data[i])
+    }
+  })
+
+  const rnetOut = tf.tidy(() => {
+    const imagePatchTensor = bgrToRgbTensor(tf.transpose(
+      tf.tensor4d(imagePatchesData, [boxes.length, 24, 24, 3]),
+      [0, 2, 1, 3]
+    ).toFloat()) as tf.Tensor4D
+
+    const normalized = normalize(imagePatchTensor)
+
+    const { prob, regions } = RNet(normalized, params)
+    return {
+      scores: tf.unstack(prob, 1)[1],
+      regions
+    }
+  })
+
+  const scores = Array.from(await rnetOut.scores.data())
+
+  const indices = scores
+    .map((score, idx) => ({ score, idx }))
+    .filter(c => c.score > scoreThreshold)
+    .map(({ idx }) => idx)
+
+  const filteredBoxes = indices.map(idx => boxes[idx].box)
+  const filteredScores = indices.map(idx => scores[idx])
+
+  let finalBoxes: BoundingBox[] = []
+  let finalScores: number[] = []
+
+  if (filteredBoxes.length > 0) {
+    const indicesNms = nms(
+      filteredBoxes,
+      filteredScores,
+      0.7
+    )
+
+    finalScores = indicesNms.map(idx => filteredScores[idx])
+    finalBoxes = indicesNms
+      .map(idx => {
+        const box = filteredBoxes[idx]
+        const [rleft, rtop, right, rbottom] = [
+          rnetOut.regions.get(indices[idx], 0),
+          rnetOut.regions.get(indices[idx], 1),
+          rnetOut.regions.get(indices[idx], 2),
+          rnetOut.regions.get(indices[idx], 3)
+        ]
+
+        return new BoundingBox(
+          box.left + (rleft * box.width),
+          box.top + (rtop * box.height),
+          box.right + (right * box.width),
+          box.bottom + (rbottom * box.height)
+        ).toSquare().round()
+      })
+  }
+
+  rnetOut.regions.dispose()
+  rnetOut.scores.dispose()
+
+  return {
+    finalBoxes,
+    finalScores
+  }
+}
diff --git a/src/toNetInput.ts b/src/toNetInput.ts
@@ -17,7 +17,8 @@ import { awaitMediaLoaded, resolveInput } from './utils';
  */
 export async function toNetInput(
   inputs: TNetInput,
-  manageCreatedInput: boolean = false
+  manageCreatedInput: boolean = false,
+  keepCanvases: boolean = false
 ): Promise<NetInput> {
   if (inputs instanceof NetInput) {
     return inputs
@@ -67,5 +68,5 @@ export async function toNetInput(
     inputArray.map(input => isMediaElement(input) && awaitMediaLoaded(input))
   )
 
-  return afterCreate(new NetInput(inputArray, Array.isArray(inputs)))
+  return afterCreate(new NetInput(inputArray, Array.isArray(inputs), keepCanvases))
 }

Original file line number	Diff line number	Diff line change
`@@ -2,6 +2,6 @@ import * as tf from '@tensorflow/tfjs-core';`
`2`	`2`
`3`	`3`	`export function bgrToRgbTensor(tensor: tf.Tensor4D): tf.Tensor4D {`
`4`	`4`	`return tf.tidy(`
`5`		`- () => tf.stack(tf.unstack(tensor, 3), 3)`
	`5`	`+ () => tf.stack(tf.unstack(tensor, 3).reverse(), 3)`
`6`	`6`	`) as tf.Tensor4D`
`7`	`7`	`}`