final landmark net implementation

justadudewhohacks · justadudewhohacks · commit ec095c075c12 · 2018-06-16T12:28:41.000+02:00
diff --git a/src/NetInput.ts b/src/NetInput.ts
@@ -1,5 +1,5 @@
 import { Dimensions, TMediaElement, TNetInput } from './types';
-import { createCanvas, getContext2dOrThrow, getElement, getMediaDimensions } from './utils';
+import { createCanvasFromMedia, getContext2dOrThrow, getElement, getMediaDimensions } from './utils';
 
 export class NetInput {
   private _canvases: HTMLCanvasElement[]
@@ -40,11 +40,8 @@ export class NetInput {
     }
 
     // if input is batch type, make sure every canvas has the same dimensions
-    const { width, height } = this.dims || dims || getMediaDimensions(media)
-
-    const canvas = createCanvas({ width, height })
-    getContext2dOrThrow(canvas).drawImage(media, 0, 0, width, height)
-    this._canvases.push(canvas)
+    const canvasDims = this.dims || dims
+    this._canvases.push(createCanvasFromMedia(media, canvasDims))
   }
 
   public get canvases() : HTMLCanvasElement[] {
diff --git a/src/Point.ts b/src/Point.ts
@@ -0,0 +1,9 @@
+export class Point {
+  public x: number
+  public y: number
+
+  constructor(x: number, y: number) {
+    this.x = x
+    this.y = y
+  }
+}
diff --git a/src/commons/convLayer.ts b/src/commons/convLayer.ts
@@ -5,12 +5,15 @@ import { ConvParams } from './types';
 export function convLayer(
   x: tf.Tensor4D,
   params: ConvParams,
-  padding: 'valid' | 'same' = 'same'
+  padding: 'valid' | 'same' = 'same',
+  withRelu: boolean = false
 ): tf.Tensor4D {
-  return tf.tidy(() =>
-    tf.add(
+  return tf.tidy(() => {
+    const out = tf.add(
       tf.conv2d(x, params.filters, [1, 1], padding),
       params.bias
-    )
-  )
+    ) as tf.Tensor4D
+
+    return withRelu ? tf.relu(out) : out
+  })
 }
diff --git a/src/faceLandmarkNet/FaceLandmarks.ts b/src/faceLandmarkNet/FaceLandmarks.ts
@@ -0,0 +1,34 @@
+import { Point } from '../Point';
+import { Dimensions } from '../types';
+
+export class FaceLandmarks {
+  private _faceLandmarks: Point[]
+  private _imageWidth: number
+  private _imageHeight: number
+
+  constructor(
+    relativeFaceLandmarkPositions: Point[],
+    imageDims: Dimensions
+  ) {
+    const { width, height } = imageDims
+    this._imageWidth = width
+    this._imageHeight = height
+    this._faceLandmarks = relativeFaceLandmarkPositions.map(
+      pt => new Point(pt.x * width, pt.y * height)
+    )
+  }
+
+  public getPositions() {
+    return this._faceLandmarks
+  }
+
+  public getRelativePositions() {
+    return this._faceLandmarks.map(
+      pt => new Point(pt.x / this._imageWidth, pt.y / this._imageHeight)
+    )
+  }
+
+  public forSize(width: number, height: number): FaceLandmarks {
+    return new FaceLandmarks(this.getRelativePositions(), { width, height })
+  }
+}
diff --git a/src/faceLandmarkNet/index.ts b/src/faceLandmarkNet/index.ts
@@ -1,45 +1,75 @@
 import * as tf from '@tensorflow/tfjs-core';
 
+import { convLayer } from '../commons/convLayer';
+import { ConvParams } from '../commons/types';
 import { getImageTensor } from '../getImageTensor';
 import { NetInput } from '../NetInput';
 import { padToSquare } from '../padToSquare';
-import { TNetInput } from '../types';
+import { Dimensions, TNetInput } from '../types';
 import { extractParams } from './extractParams';
-import { convLayer } from '../commons/convLayer';
+import { FaceLandmarks } from './FaceLandmarks';
 import { fullyConnectedLayer } from './fullyConnectedLayer';
 
+function conv(x: tf.Tensor4D, params: ConvParams): tf.Tensor4D {
+  return convLayer(x, params, 'valid', true)
+}
+
+function maxPool(x: tf.Tensor4D, strides: [number, number] = [2, 2]): tf.Tensor4D {
+  return tf.maxPool(x, [2, 2], strides, 'valid')
+}
+
 export function faceLandmarkNet(weights: Float32Array) {
   const params = extractParams(weights)
 
-  function forward(input: tf.Tensor | NetInput | TNetInput) {
-    return tf.tidy(() => {
+  async function detectLandmarks(input: tf.Tensor | NetInput | TNetInput) {
+    let adjustRelativeX = 0
+    let adjustRelativeY = 0
+    let imageDimensions: Dimensions | undefined
+
+    const outTensor = tf.tidy(() => {
+      let imgTensor = getImageTensor(input)
+      const [height, width] = imgTensor.shape.slice(1)
+      imageDimensions = { width, height }
+
+      imgTensor = padToSquare(imgTensor, true)
+      adjustRelativeX = (height > width) ? imgTensor.shape[2] / (2 * width) : 0
+      adjustRelativeY = (width > height) ? imgTensor.shape[1] / (2 * height) : 0
 
-      let x = padToSquare(getImageTensor(input), true)
       // work with 128 x 128 sized face images
-      if (x.shape[1] !== 128 || x.shape[2] !== 128) {
-        x = tf.image.resizeBilinear(x, [128, 128])
+      if (imgTensor.shape[1] !== 128 || imgTensor.shape[2] !== 128) {
+        imgTensor = tf.image.resizeBilinear(imgTensor, [128, 128])
       }
 
-      let out = convLayer(x, params.conv0_params, 'valid')
-      out = tf.maxPool(out, [2, 2], [2, 2], 'valid')
-      out = convLayer(out, params.conv1_params, 'valid')
-      out = convLayer(out, params.conv2_params, 'valid')
-      out = tf.maxPool(out, [2, 2], [2, 2], 'valid')
-      out = convLayer(out, params.conv3_params, 'valid')
-      out = convLayer(out, params.conv4_params, 'valid')
-      out = tf.maxPool(out, [2, 2], [2, 2], 'valid')
-      out = convLayer(out, params.conv5_params, 'valid')
-      out = convLayer(out, params.conv6_params, 'valid')
-      out = tf.maxPool(out, [2, 2], [1, 1], 'valid')
-      out = convLayer(out, params.conv7_params, 'valid')
-      const fc0 = fullyConnectedLayer(out.as2D(out.shape[0], -1), params.fc0_params)
+      let out = conv(imgTensor, params.conv0_params)
+      out = maxPool(out)
+      out = conv(out, params.conv1_params)
+      out = conv(out, params.conv2_params)
+      out = maxPool(out)
+      out = conv(out, params.conv3_params)
+      out = conv(out, params.conv4_params)
+      out = maxPool(out)
+      out = conv(out, params.conv5_params)
+      out = conv(out, params.conv6_params)
+      out = maxPool(out, [1, 1])
+      out = conv(out, params.conv7_params)
+      const fc0 = tf.relu(fullyConnectedLayer(out.as2D(out.shape[0], -1), params.fc0_params))
       const fc1 = fullyConnectedLayer(fc0, params.fc1_params)
 
       return fc1
     })
+
+    const faceLandmarksArray = Array.from(await outTensor.data())
+    const xCoords = faceLandmarksArray.filter((c, i) => (i - 1) % 2).map(x => x + adjustRelativeX)
+    const yCoords = faceLandmarksArray.filter((c, i) => i % 2).map(y => y + adjustRelativeY)
+    outTensor.dispose()
+
+    return new FaceLandmarks(
+      Array(68).fill(0).map((_, i) => ({ x: xCoords[i], y: yCoords[i] })),
+      imageDimensions as Dimensions
+    )
   }
 
   return {
-    forward
+    detectLandmarks
   }
 }
diff --git a/src/types.ts b/src/types.ts
@@ -10,15 +10,25 @@ export type Dimensions = {
 }
 
 export type DrawBoxOptions = {
-  lineWidth: number
-  color: string
+  lineWidth?: number
+  color?: string
 }
 
 export type DrawTextOptions = {
+  lineWidth?: number
+  fontSize?: number
+  fontStyle?: string
+  color?: string
+}
+
+export type DrawLandmarksOptions = {
+  lineWidth?: number
+  color?: string
+}
+
+export type DrawOptions = {
   lineWidth: number
   fontSize: number
   fontStyle: string
   color: string
-}
-
-export type DrawOptions = DrawBoxOptions & DrawTextOptions
+}
diff --git a/src/utils.ts b/src/utils.ts
@@ -1,5 +1,6 @@
 import { FaceDetectionNet } from './faceDetectionNet/types';
-import { Dimensions, DrawBoxOptions, DrawOptions, DrawTextOptions } from './types';
+import { FaceLandmarks } from './faceLandmarkNet/FaceLandmarks';
+import { Dimensions, DrawBoxOptions, DrawLandmarksOptions, DrawOptions, DrawTextOptions } from './types';
 
 export function isFloat(num: number) {
   return num % 1 !== 0
@@ -24,16 +25,17 @@ export function getContext2dOrThrow(canvas: HTMLCanvasElement): CanvasRenderingC
   return ctx
 }
 
-export function createCanvas({ width, height}: Dimensions): HTMLCanvasElement {
+export function createCanvas({ width, height }: Dimensions): HTMLCanvasElement {
   const canvas = document.createElement('canvas')
   canvas.width = width
   canvas.height = height
   return canvas
 }
 
-export function createCanvasWithImageData({ width, height}: Dimensions, buf: Uint8ClampedArray): HTMLCanvasElement {
+export function createCanvasFromMedia(media: HTMLImageElement | HTMLVideoElement, dims?: Dimensions): HTMLCanvasElement {
+  const { width, height } = dims || getMediaDimensions(media)
   const canvas = createCanvas({ width, height })
-  getContext2dOrThrow(canvas).putImageData(new ImageData(buf, width, height), 0, 0)
+  getContext2dOrThrow(canvas).drawImage(media, 0, 0, width, height)
   return canvas
 }
 
@@ -82,8 +84,13 @@ export function drawBox(
   h: number,
   options: DrawBoxOptions
 ) {
-  ctx.strokeStyle = options.color
-  ctx.lineWidth = options.lineWidth
+  const drawOptions = Object.assign(
+    getDefaultDrawOptions(),
+    (options || {})
+  )
+
+  ctx.strokeStyle = drawOptions.color
+  ctx.lineWidth = drawOptions.lineWidth
   ctx.strokeRect(x, y, w, h)
 }
 
@@ -94,11 +101,16 @@ export function drawText(
   text: string,
   options: DrawTextOptions
 ) {
-  const padText = 2 + options.lineWidth
+  const drawOptions = Object.assign(
+    getDefaultDrawOptions(),
+    (options || {})
+  )
+
+  const padText = 2 + drawOptions.lineWidth
 
-  ctx.fillStyle = options.color
-  ctx.font = `${options.fontSize}px ${options.fontStyle}`
-  ctx.fillText(text, x + padText, y + padText + (options.fontSize * 0.6))
+  ctx.fillStyle = drawOptions.color
+  ctx.font = `${drawOptions.fontSize}px ${drawOptions.fontStyle}`
+  ctx.fillText(text, x + padText, y + padText + (drawOptions.fontSize * 0.6))
 }
 
 export function drawDetection(
@@ -154,4 +166,28 @@ export function drawDetection(
       )
     }
   })
+}
+
+export function drawLandmarks(
+  canvasArg: string | HTMLCanvasElement,
+  faceLandmarks: FaceLandmarks,
+  options?: DrawLandmarksOptions & { drawLines: boolean }
+) {
+  const canvas = getElement(canvasArg)
+  if (!(canvas instanceof HTMLCanvasElement)) {
+    throw new Error('drawLandmarks - expected canvas to be of type: HTMLCanvasElement')
+  }
+
+    const drawOptions = Object.assign(
+      getDefaultDrawOptions(),
+      (options || {})
+    )
+
+    const { drawLines } = Object.assign({ drawLines: false }, (options || {}))
+
+    const ctx = getContext2dOrThrow(canvas)
+    const { lineWidth,color } = drawOptions
+    ctx.fillStyle = color
+    const ptOffset = lineWidth / 2
+    faceLandmarks.getPositions().forEach(pt => ctx.fillRect(pt.x - ptOffset, pt.y - ptOffset, lineWidth, lineWidth))
 }