runnin nets sequentially instead of in batches seems to be faster + gather runtime stats

justadudewhohacks · justadudewhohacks · commit 08aae43def9c · 2018-07-12T16:02:54.000+02:00
diff --git a/src/mtcnn/Mtcnn.ts b/src/mtcnn/Mtcnn.ts
@@ -10,11 +10,12 @@ import { TNetInput } from '../types';
 import { bgrToRgbTensor } from './bgrToRgbTensor';
 import { extractParams } from './extractParams';
 import { FaceLandmarks5 } from './FaceLandmarks5';
+import { getSizesForScale } from './getSizesForScale';
 import { pyramidDown } from './pyramidDown';
 import { stage1 } from './stage1';
 import { stage2 } from './stage2';
 import { stage3 } from './stage3';
-import { NetParams } from './types';
+import { MtcnnResult, NetParams } from './types';
 
 export class Mtcnn extends NeuralNetwork<NetParams> {
 
@@ -26,8 +27,9 @@ export class Mtcnn extends NeuralNetwork<NetParams> {
     input: NetInput,
     minFaceSize: number = 20,
     scaleFactor: number = 0.709,
+    maxNumScales: number = 10,
     scoreThresholds: number[] = [0.6, 0.7, 0.7]
-  ): Promise<any> {
+  ): Promise<{ results: MtcnnResult[], stats: any }> {
 
     const { params } = this
 
@@ -42,6 +44,10 @@ export class Mtcnn extends NeuralNetwork<NetParams> {
       throw new Error('Mtcnn - inputCanvas is not defined, note that passing tensors into Mtcnn.forwardInput is not supported yet.')
     }
 
+    const stats: any = {}
+
+    const tsTotal = Date.now()
+
     const imgTensor = tf.tidy(() =>
       bgrToRgbTensor(
         tf.expandDims(inputTensor).toFloat() as tf.Tensor4D
@@ -51,18 +57,47 @@ export class Mtcnn extends NeuralNetwork<NetParams> {
     const [height, width] = imgTensor.shape.slice(1)
 
     const scales = pyramidDown(minFaceSize, scaleFactor, [height, width])
-    const out1 = await stage1(imgTensor, scales, scoreThresholds[0], params.pnet)
+      .filter(scale => {
+        const sizes = getSizesForScale(scale, [height, width])
+        return Math.min(sizes.width, sizes.height) > 48
+      })
+      .slice(0, maxNumScales)
+
+    stats.scales = scales
+    stats.pyramid = scales.map(scale => getSizesForScale(scale, [height, width]))
+
+    let ts = Date.now()
+    const out1 = await stage1(imgTensor, scales, scoreThresholds[0], params.pnet, stats)
+    stats.total_stage1 = Date.now() - ts
+
+    if (!out1.boxes.length) {
+      stats.total = Date.now() - tsTotal
+      return { results: [], stats }
+    }
 
+    stats.stage2_numInputBoxes = out1.boxes.length
     // using the inputCanvas to extract and resize the image patches, since it is faster
     // than doing this on the gpu
-    const out2 = await stage2(inputCanvas, out1.boxes, scoreThresholds[1], params.rnet)
-    const out3 = await stage3(inputCanvas, out2.boxes, scoreThresholds[2], params.onet)
+    ts = Date.now()
+    const out2 = await stage2(inputCanvas, out1.boxes, scoreThresholds[1], params.rnet, stats)
+    stats.total_stage2 = Date.now() - ts
+
+    if (!out2.boxes.length) {
+      stats.total = Date.now() - tsTotal
+      return { results: [], stats }
+    }
+
+    stats.stage3_numInputBoxes = out2.boxes.length
+
+    ts = Date.now()
+    const out3 = await stage3(inputCanvas, out2.boxes, scoreThresholds[2], params.onet, stats)
+    stats.total_stage3 = Date.now() - ts
 
     imgTensor.dispose()
     input.dispose()
 
-    const faceDetections = out3.boxes.map((box, idx) =>
-      new FaceDetection(
+    const results = out3.boxes.map((box, idx) => ({
+      faceDetection: new FaceDetection(
         out3.scores[idx],
         new Rect(
           box.left / width,
@@ -74,32 +109,47 @@ export class Mtcnn extends NeuralNetwork<NetParams> {
           height,
           width
         }
-      )
-    )
-
-    const faceLandmarks = out3.points.map(pts =>
-      new FaceLandmarks5(
-        pts.map(pt => pt.div(new Point(width, height))),
+      ),
+      faceLandmarks: new FaceLandmarks5(
+        out3.points[idx].map(pt => pt.div(new Point(width, height))),
         { width, height }
       )
-    )
+    }))
 
-    return {
-      faceDetections,
-      faceLandmarks
-    }
+    stats.total = Date.now() - tsTotal
+    return { results, stats }
   }
 
   public async forward(
     input: TNetInput,
     minFaceSize: number = 20,
     scaleFactor: number = 0.709,
+    maxNumScales: number = 10,
+    scoreThresholds: number[] = [0.6, 0.7, 0.7]
+  ): Promise<MtcnnResult[]> {
+    return (
+      await this.forwardInput(
+        await toNetInput(input, true, true),
+        minFaceSize,
+        scaleFactor,
+        maxNumScales,
+        scoreThresholds
+      )
+    ).results
+  }
+
+  public async forwardWithStats(
+    input: TNetInput,
+    minFaceSize: number = 20,
+    scaleFactor: number = 0.709,
+    maxNumScales: number = 10,
     scoreThresholds: number[] = [0.6, 0.7, 0.7]
-  ): Promise<tf.Tensor2D> {
+  ): Promise<{ results: MtcnnResult[], stats: any }> {
     return this.forwardInput(
       await toNetInput(input, true, true),
       minFaceSize,
       scaleFactor,
+      maxNumScales,
       scoreThresholds
     )
   }
diff --git a/src/mtcnn/extractImagePatches.ts b/src/mtcnn/extractImagePatches.ts
@@ -10,7 +10,7 @@ export async function extractImagePatches(
   img: HTMLCanvasElement,
   boxes: BoundingBox[],
   { width, height }: Dimensions
-): Promise<tf.Tensor4D> {
+): Promise<tf.Tensor4D[]> {
 
 
   const imgCtx = getContext2dOrThrow(img)
@@ -26,26 +26,32 @@ export async function extractImagePatches(
     return createImageBitmap(imgData)
   }))
 
-  const imagePatchesData: number[] = []
+  const imagePatchesDatas: number[][] = []
 
   bitmaps.forEach(bmp => {
     const patch = createCanvas({ width, height })
     const patchCtx = getContext2dOrThrow(patch)
     patchCtx.drawImage(bmp, 0, 0, width, height)
     const { data } = patchCtx.getImageData(0, 0, width, height)
 
+    const currData = []
     for(let i = 0; i < data.length; i++) {
       if ((i + 1) % 4 === 0) continue
-      imagePatchesData.push(data[i])
+      currData.push(data[i])
     }
+    imagePatchesDatas.push(currData)
   })
 
-  return tf.tidy(() => {
-    const imagePatchTensor = bgrToRgbTensor(tf.transpose(
-      tf.tensor4d(imagePatchesData, [boxes.length, width, height, 3]),
-      [0, 2, 1, 3]
-    ).toFloat()) as tf.Tensor4D
 
-    return normalize(imagePatchTensor)
+  return imagePatchesDatas.map(data => {
+    const t = tf.tidy(() => {
+      const imagePatchTensor = bgrToRgbTensor(tf.transpose(
+        tf.tensor4d(data, [1, width, height, 3]),
+        [0, 2, 1, 3]
+      ).toFloat()) as tf.Tensor4D
+
+      return normalize(imagePatchTensor)
+    })
+    return t
   })
 }
diff --git a/src/mtcnn/getSizesForScale.ts b/src/mtcnn/getSizesForScale.ts
@@ -0,0 +1,6 @@
+export function getSizesForScale(scale: number, [height, width]: number[]) {
+  return {
+    height: Math.floor(height * scale),
+    width: Math.floor(width * scale)
+  }
+}
diff --git a/src/mtcnn/stage1.ts b/src/mtcnn/stage1.ts
@@ -7,12 +7,13 @@ import { nms } from './nms';
 import { normalize } from './normalize';
 import { PNet } from './PNet';
 import { PNetParams } from './types';
+import { getSizesForScale } from './getSizesForScale';
 
 function rescaleAndNormalize(x: tf.Tensor4D, scale: number): tf.Tensor4D {
   return tf.tidy(() => {
 
-    const [height, width] = x.shape.slice(1)
-    const resized = tf.image.resizeBilinear(x, [Math.floor(height * scale), Math.floor(width * scale)])
+    const { height, width } = getSizesForScale(scale, x.shape.slice(1))
+    const resized = tf.image.resizeBilinear(x, [height, width])
     const normalized = normalize(resized)
 
     return (tf.transpose(normalized, [0, 2, 1, 3]) as tf.Tensor4D)
@@ -67,17 +68,20 @@ export function stage1(
   imgTensor: tf.Tensor4D,
   scales: number[],
   scoreThreshold: number,
-  params: PNetParams
+  params: PNetParams,
+  stats: any
 ) {
+  stats.stage1 = []
 
-  const boxesForScale = scales.map((scale, i) => {
+  const boxesForScale = scales.map((scale) => {
+    const statsForScale: any = { scale }
 
     const { scoresTensor, regionsTensor } = tf.tidy(() => {
       const resized = rescaleAndNormalize(imgTensor, scale)
 
-
+      let ts = Date.now()
       const { prob, regions } = PNet(resized, params)
-
+      statsForScale.pnet = Date.now() - ts
 
       const scoresTensor = tf.unstack(tf.unstack(prob, 3)[1])[0] as tf.Tensor2D
       const regionsTensor = tf.unstack(regions)[0] as tf.Tensor3D
@@ -99,15 +103,20 @@ export function stage1(
     regionsTensor.dispose()
 
     if (!boundingBoxes.length) {
+      stats.stage1.push(statsForScale)
       return []
     }
 
+    let ts = Date.now()
     const indices = nms(
       boundingBoxes.map(bbox => bbox.cell),
       boundingBoxes.map(bbox => bbox.score),
       0.5
     )
+    statsForScale.nms = Date.now() - ts
+    statsForScale.numBoxes = indices.length
 
+    stats.stage1.push(statsForScale)
     return indices.map(boxIdx => boundingBoxes[boxIdx])
   })
 
@@ -119,11 +128,13 @@ export function stage1(
   let finalScores: number[] = []
 
   if (allBoxes.length > 0) {
+    let ts = Date.now()
     const indices = nms(
       allBoxes.map(bbox => bbox.cell),
       allBoxes.map(bbox => bbox.score),
       0.7
     )
+    stats.stage1_nms = Date.now() - ts
 
     finalScores = indices.map(idx => allBoxes[idx].score)
     finalBoxes = indices
diff --git a/src/mtcnn/stage2.ts b/src/mtcnn/stage2.ts
@@ -8,15 +8,26 @@ export async function stage2(
   img: HTMLCanvasElement,
   inputBoxes: BoundingBox[],
   scoreThreshold: number,
-  params: RNetParams
+  params: RNetParams,
+  stats: any
 ) {
 
-  const rnetInput = await extractImagePatches(img, inputBoxes, { width: 24, height: 24 })
-  const rnetOut = RNet(rnetInput, params)
+  let ts = Date.now()
+  const rnetInputs = await extractImagePatches(img, inputBoxes, { width: 24, height: 24 })
+  stats.stage2_extractImagePatches = Date.now() - ts
 
-  rnetInput.dispose()
+  ts = Date.now()
+  const rnetOuts = rnetInputs.map(
+    rnetInput => {
+      const out = RNet(rnetInput, params)
+      rnetInput.dispose()
+      return out
+    }
+  )
+  stats.stage2_rnet = Date.now() - ts
 
-  const scores = Array.from(await rnetOut.scores.data())
+  const scoreDatas = await Promise.all(rnetOuts.map(out => out.scores.data()))
+  const scores = scoreDatas.map(arr => Array.from(arr)).reduce((all, arr) => all.concat(arr))
   const indices = scores
     .map((score, idx) => ({ score, idx }))
     .filter(c => c.score > scoreThreshold)
@@ -29,27 +40,31 @@ export async function stage2(
   let finalScores: number[] = []
 
   if (filteredBoxes.length > 0) {
+    ts = Date.now()
     const indicesNms = nms(
       filteredBoxes,
       filteredScores,
       0.7
     )
+    stats.stage2_nms = Date.now() - ts
 
     const regions = indicesNms.map(idx =>
       new BoundingBox(
-        rnetOut.regions.get(indices[idx], 0),
-        rnetOut.regions.get(indices[idx], 1),
-        rnetOut.regions.get(indices[idx], 2),
-        rnetOut.regions.get(indices[idx], 3)
+        rnetOuts[indices[idx]].regions.get(0, 0),
+        rnetOuts[indices[idx]].regions.get(0, 1),
+        rnetOuts[indices[idx]].regions.get(0, 2),
+        rnetOuts[indices[idx]].regions.get(0, 3)
       )
     )
 
     finalScores = indicesNms.map(idx => filteredScores[idx])
     finalBoxes = indicesNms.map((idx, i) => filteredBoxes[idx].calibrate(regions[i]))
   }
 
-  rnetOut.regions.dispose()
-  rnetOut.scores.dispose()
+  rnetOuts.forEach(t => {
+    t.regions.dispose()
+    t.scores.dispose()
+  })
 
   return {
     boxes: finalBoxes,
diff --git a/src/mtcnn/stage3.ts b/src/mtcnn/stage3.ts
diff --git a/src/mtcnn/types.ts b/src/mtcnn/types.ts