face alignment from 5 point face landmarks + allFacesMtcnn

justadudewhohacks · justadudewhohacks · commit 930f85b0148b · 2018-07-16T10:01:08.000+02:00
diff --git a/src/FaceLandmarks.ts b/src/FaceLandmarks.ts
@@ -1,6 +1,14 @@
-import { Point } from './Point';
+import { getCenterPoint } from './commons/getCenterPoint';
+import { FaceDetection } from './FaceDetection';
+import { IPoint, Point } from './Point';
+import { Rect } from './Rect';
 import { Dimensions } from './types';
 
+// face alignment constants
+const relX = 0.5
+const relY = 0.43
+const relScale = 0.45
+
 export class FaceLandmarks {
   protected _imageWidth: number
   protected _imageHeight: number
@@ -42,4 +50,65 @@ export class FaceLandmarks {
       pt => pt.sub(this._shift).div(new Point(this._imageWidth, this._imageHeight))
     )
   }
+
+  public forSize<T extends FaceLandmarks>(width: number, height: number): T {
+    return new (this.constructor as any)(
+      this.getRelativePositions(),
+      { width, height }
+    )
+  }
+
+  public shift<T extends FaceLandmarks>(x: number, y: number): T {
+    return new (this.constructor as any)(
+      this.getRelativePositions(),
+      { width: this._imageWidth, height: this._imageHeight },
+      new Point(x, y)
+    )
+  }
+
+  public shiftByPoint<T extends FaceLandmarks>(pt: IPoint): T {
+    return this.shift(pt.x, pt.y)
+  }
+
+  /**
+   * Aligns the face landmarks after face detection from the relative positions of the faces
+   * bounding box, or it's current shift. This function should be used to align the face images
+   * after face detection has been performed, before they are passed to the face recognition net.
+   * This will make the computed face descriptor more accurate.
+   *
+   * @param detection (optional) The bounding box of the face or the face detection result. If
+   * no argument was passed the position of the face landmarks are assumed to be relative to
+   * it's current shift.
+   * @returns The bounding box of the aligned face.
+   */
+  public align(
+    detection?: FaceDetection | Rect
+  ): Rect {
+    if (detection) {
+      const box = detection instanceof FaceDetection
+        ? detection.getBox().floor()
+        : detection
+
+      return this.shift(box.x, box.y).align()
+    }
+
+    const centers = this.getRefPointsForAlignment()
+
+    const [leftEyeCenter, rightEyeCenter, mouthCenter] = centers
+    const distToMouth = (pt: Point) => mouthCenter.sub(pt).magnitude()
+    const eyeToMouthDist = (distToMouth(leftEyeCenter) + distToMouth(rightEyeCenter)) / 2
+
+    const size = Math.floor(eyeToMouthDist / relScale)
+
+    const refPoint = getCenterPoint(centers)
+    // TODO: pad in case rectangle is out of image bounds
+    const x = Math.floor(Math.max(0, refPoint.x - (relX * size)))
+    const y = Math.floor(Math.max(0, refPoint.y - (relY * size)))
+
+    return new Rect(x, y, Math.min(size, this._imageWidth - x), Math.min(size, this._imageHeight - y))
+  }
+
+  protected getRefPointsForAlignment(): Point[] {
+    throw new Error('getRefPointsForAlignment not implemented by base class')
+  }
 }
diff --git a/src/FullFaceDescription.ts b/src/FullFaceDescription.ts
@@ -1,18 +1,18 @@
 import { FaceDetection } from './FaceDetection';
-import { FaceLandmarks68 } from './faceLandmarkNet/FaceLandmarks68';
+import { FaceLandmarks } from './FaceLandmarks';
 
 export class FullFaceDescription {
   constructor(
     private _detection: FaceDetection,
-    private _landmarks: FaceLandmarks68,
+    private _landmarks: FaceLandmarks,
     private _descriptor: Float32Array
   ) {}
 
   public get detection(): FaceDetection {
     return this._detection
   }
 
-  public get landmarks(): FaceLandmarks68 {
+  public get landmarks(): FaceLandmarks {
     return this._landmarks
   }
 
diff --git a/src/allFacesFactory.ts b/src/allFacesFactory.ts
@@ -2,14 +2,16 @@ import { extractFaceTensors } from './extractFaceTensors';
 import { FaceDetectionNet } from './faceDetectionNet/FaceDetectionNet';
 import { FaceLandmarkNet } from './faceLandmarkNet/FaceLandmarkNet';
 import { FaceLandmarks68 } from './faceLandmarkNet/FaceLandmarks68';
-import { FaceRecognitionNet } from './faceRecognitionNet/FaceRecognitionNet';
 import { FullFaceDescription } from './FullFaceDescription';
+import { Mtcnn } from './mtcnn/Mtcnn';
+import { MtcnnForwardParams } from './mtcnn/types';
+import { Rect } from './Rect';
 import { TNetInput } from './types';
 
 export function allFacesFactory(
   detectionNet: FaceDetectionNet,
   landmarkNet: FaceLandmarkNet,
-  recognitionNet: FaceRecognitionNet
+  computeDescriptors: (input: TNetInput, alignedFaceBoxes: Rect[], useBatchProcessing: boolean) => Promise<Float32Array[]>
 ) {
   return async function(
     input: TNetInput,
@@ -32,20 +34,42 @@ export function allFacesFactory(
     const alignedFaceBoxes = faceLandmarksByFace.map(
       (landmarks, i) => landmarks.align(detections[i].getBox())
     )
-    const alignedFaceTensors = await extractFaceTensors(input, alignedFaceBoxes)
 
-    const descriptors = useBatchProcessing
-      ? await recognitionNet.computeFaceDescriptor(alignedFaceTensors) as Float32Array[]
-      : await Promise.all(alignedFaceTensors.map(
-        faceTensor => recognitionNet.computeFaceDescriptor(faceTensor)
-      )) as Float32Array[]
-
-    alignedFaceTensors.forEach(t => t.dispose())
+    const descriptors = await computeDescriptors(input, alignedFaceBoxes, useBatchProcessing)
 
     return detections.map((detection, i) =>
       new FullFaceDescription(
         detection,
-        faceLandmarksByFace[i].shiftByPoint(detection.getBox()),
+        faceLandmarksByFace[i].shiftByPoint<FaceLandmarks68>(detection.getBox()),
+        descriptors[i]
+      )
+    )
+
+  }
+}
+
+export function allFacesMtcnnFactory(
+  mtcnn: Mtcnn,
+  computeDescriptors: (input: TNetInput, alignedFaceBoxes: Rect[], useBatchProcessing: boolean) => Promise<Float32Array[]>
+) {
+  return async function(
+    input: TNetInput,
+    mtcnnForwardParams: MtcnnForwardParams,
+    useBatchProcessing: boolean = false
+  ): Promise<FullFaceDescription[]> {
+
+    const results = await mtcnn.forward(input, mtcnnForwardParams)
+
+    const alignedFaceBoxes = results.map(
+      ({ faceLandmarks }) => faceLandmarks.align()
+    )
+
+    const descriptors = await computeDescriptors(input, alignedFaceBoxes, useBatchProcessing)
+
+    return results.map(({ faceDetection, faceLandmarks }, i) =>
+      new FullFaceDescription(
+        faceDetection,
+        faceLandmarks,
         descriptors[i]
       )
     )
diff --git a/src/faceLandmarkNet/FaceLandmarks68.ts b/src/faceLandmarkNet/FaceLandmarks68.ts
@@ -1,14 +1,9 @@
 import { getCenterPoint } from '../commons/getCenterPoint';
 import { FaceDetection } from '../FaceDetection';
 import { FaceLandmarks } from '../FaceLandmarks';
-import { IPoint, Point } from '../Point';
+import { Point } from '../Point';
 import { Rect } from '../Rect';
 
-// face alignment constants
-const relX = 0.5
-const relY = 0.43
-const relScale = 0.45
-
 export class FaceLandmarks68 extends FaceLandmarks {
   public getJawOutline(): Point[] {
     return this._faceLandmarks.slice(0, 17)
@@ -38,64 +33,11 @@ export class FaceLandmarks68 extends FaceLandmarks {
     return this._faceLandmarks.slice(48, 68)
   }
 
-  public forSize(width: number, height: number): FaceLandmarks68 {
-    return new FaceLandmarks68(
-      this.getRelativePositions(),
-      { width, height }
-    )
-  }
-
-  public shift(x: number, y: number): FaceLandmarks68 {
-    return new FaceLandmarks68(
-      this.getRelativePositions(),
-      { width: this._imageWidth, height: this._imageHeight },
-      new Point(x, y)
-    )
-  }
-
-  public shiftByPoint(pt: IPoint): FaceLandmarks68 {
-    return this.shift(pt.x, pt.y)
-  }
-
-  /**
-   * Aligns the face landmarks after face detection from the relative positions of the faces
-   * bounding box, or it's current shift. This function should be used to align the face images
-   * after face detection has been performed, before they are passed to the face recognition net.
-   * This will make the computed face descriptor more accurate.
-   *
-   * @param detection (optional) The bounding box of the face or the face detection result. If
-   * no argument was passed the position of the face landmarks are assumed to be relative to
-   * it's current shift.
-   * @returns The bounding box of the aligned face.
-   */
-  public align(
-    detection?: FaceDetection | Rect
-  ): Rect {
-    if (detection) {
-      const box = detection instanceof FaceDetection
-        ? detection.getBox().floor()
-        : detection
-
-      return this.shift(box.x, box.y).align()
-    }
-
-    const centers = [
+  protected getRefPointsForAlignment(): Point[] {
+    return [
       this.getLeftEye(),
       this.getRightEye(),
       this.getMouth()
     ].map(getCenterPoint)
-
-    const [leftEyeCenter, rightEyeCenter, mouthCenter] = centers
-    const distToMouth = (pt: Point) => mouthCenter.sub(pt).magnitude()
-    const eyeToMouthDist = (distToMouth(leftEyeCenter) + distToMouth(rightEyeCenter)) / 2
-
-    const size = Math.floor(eyeToMouthDist / relScale)
-
-    const refPoint = getCenterPoint(centers)
-    // TODO: pad in case rectangle is out of image bounds
-    const x = Math.floor(Math.max(0, refPoint.x - (relX * size)))
-    const y = Math.floor(Math.max(0, refPoint.y - (relY * size)))
-
-    return new Rect(x, y, size, size)
   }
 }
diff --git a/src/globalApi.ts b/src/globalApi.ts
@@ -1,16 +1,17 @@
 import * as tf from '@tensorflow/tfjs-core';
 
-import { allFacesFactory } from './allFacesFactory';
+import { allFacesFactory, allFacesMtcnnFactory } from './allFacesFactory';
+import { extractFaceTensors } from './extractFaceTensors';
 import { FaceDetection } from './FaceDetection';
 import { FaceDetectionNet } from './faceDetectionNet/FaceDetectionNet';
 import { FaceLandmarkNet } from './faceLandmarkNet/FaceLandmarkNet';
 import { FaceLandmarks68 } from './faceLandmarkNet/FaceLandmarks68';
 import { FaceRecognitionNet } from './faceRecognitionNet/FaceRecognitionNet';
 import { FullFaceDescription } from './FullFaceDescription';
-import { getDefaultMtcnnForwardParams } from './mtcnn/getDefaultMtcnnForwardParams';
 import { Mtcnn } from './mtcnn/Mtcnn';
 import { MtcnnForwardParams, MtcnnResult } from './mtcnn/types';
 import { NetInput } from './NetInput';
+import { Rect } from './Rect';
 import { TNetInput } from './types';
 
 export const detectionNet = new FaceDetectionNet()
@@ -22,7 +23,7 @@ export const recognitionNet = new FaceRecognitionNet()
 export const nets = {
   ssdMobilenet: detectionNet,
   faceLandmark68Net: landmarkNet,
-  faceNet: recognitionNet,
+  faceRecognitionNet: recognitionNet,
   mtcnn: new Mtcnn()
 }
 
@@ -35,7 +36,7 @@ export function loadFaceLandmarkModel(url: string) {
 }
 
 export function loadFaceRecognitionModel(url: string) {
-  return nets.faceNet.load(url)
+  return nets.faceRecognitionNet.load(url)
 }
 
 export function loadMtcnnModel(url: string) {
@@ -68,7 +69,7 @@ export function detectLandmarks(
 export function computeFaceDescriptor(
   input: TNetInput
 ): Promise<Float32Array | Float32Array[]>  {
-  return nets.faceNet.computeFaceDescriptor(input)
+  return nets.faceRecognitionNet.computeFaceDescriptor(input)
 }
 
 export function mtcnn(
@@ -85,5 +86,32 @@ export const allFaces: (
 ) => Promise<FullFaceDescription[]> = allFacesFactory(
   detectionNet,
   landmarkNet,
-  recognitionNet
-)
+  computeDescriptorsFactory(nets.faceRecognitionNet)
+)
+
+export const allFacesMtcnn: (
+  input: tf.Tensor | NetInput | TNetInput,
+  mtcnnForwardParams: MtcnnForwardParams,
+  useBatchProcessing?: boolean
+) => Promise<FullFaceDescription[]> = allFacesMtcnnFactory(
+  nets.mtcnn,
+  computeDescriptorsFactory(nets.faceRecognitionNet)
+)
+
+function computeDescriptorsFactory(
+  recognitionNet: FaceRecognitionNet
+) {
+  return async function(input: TNetInput, alignedFaceBoxes: Rect[], useBatchProcessing: boolean) {
+    const alignedFaceTensors = await extractFaceTensors(input, alignedFaceBoxes)
+
+    const descriptors = useBatchProcessing
+      ? await recognitionNet.computeFaceDescriptor(alignedFaceTensors) as Float32Array[]
+      : await Promise.all(alignedFaceTensors.map(
+        faceTensor => recognitionNet.computeFaceDescriptor(faceTensor)
+      )) as Float32Array[]
+
+    alignedFaceTensors.forEach(t => t.dispose())
+
+    return descriptors
+  }
+}
diff --git a/src/mtcnn/FaceLandmarks5.ts b/src/mtcnn/FaceLandmarks5.ts
@@ -1,24 +1,15 @@
+import { getCenterPoint } from '../commons/getCenterPoint';
 import { FaceLandmarks } from '../FaceLandmarks';
-import { IPoint, Point } from '../Point';
+import { Point } from '../Point';
 
 export class FaceLandmarks5 extends FaceLandmarks {
 
-  public forSize(width: number, height: number): FaceLandmarks5 {
-    return new FaceLandmarks5(
-      this.getRelativePositions(),
-      { width, height }
-    )
-  }
-
-  public shift(x: number, y: number): FaceLandmarks5 {
-    return new FaceLandmarks5(
-      this.getRelativePositions(),
-      { width: this._imageWidth, height: this._imageHeight },
-      new Point(x, y)
-    )
-  }
-
-  public shiftByPoint(pt: IPoint): FaceLandmarks5 {
-    return this.shift(pt.x, pt.y)
+  protected getRefPointsForAlignment(): Point[] {
+    const pts = this.getPositions()
+    return [
+      pts[0],
+      pts[1],
+      getCenterPoint([pts[3], pts[4]])
+    ]
   }
 }