Skip to content

Commit 1c56acb

Browse files
Merge pull request justadudewhohacks#93 from justadudewhohacks/fixes-and-performance-improvements
fixed landmark postprocessing + resize input canvases instead of tensors to net input size, which is much more performant
2 parents f512f8b + 8b1d577 commit 1c56acb

39 files changed

+723
-562
lines changed

examples/views/detectAndDrawFaces.html

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -84,12 +84,9 @@
8484
const detections = await faceapi.locateFaces(input, minConfidence)
8585
faceapi.drawDetection('overlay', detections.map(det => det.forSize(width, height)))
8686

87-
const faceImages = await faceapi.extractFaces(input.inputs[0], detections)
87+
const faceImages = await faceapi.extractFaces(inputImgEl, detections)
8888
$('#facesContainer').empty()
8989
faceImages.forEach(canvas => $('#facesContainer').append(canvas))
90-
91-
// free memory for input tensors
92-
input.dispose()
9390
}
9491

9592
async function onSelectionChanged(uri) {

examples/views/detectAndDrawLandmarks.html

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -89,11 +89,8 @@
8989
const input = await faceapi.toNetInput(inputImgEl)
9090
const locations = await faceapi.locateFaces(input, minConfidence)
9191

92-
const faceTensors = (await faceapi.extractFaceTensors(input, locations))
93-
let landmarksByFace = await Promise.all(faceTensors.map(t => faceapi.detectLandmarks(t)))
94-
95-
// free memory for face image tensors after we computed their descriptors
96-
faceTensors.forEach(t => t.dispose())
92+
const faces = await faceapi.extractFaces(input, locations)
93+
let landmarksByFace = await Promise.all(faces.map(face => faceapi.detectLandmarks(face)))
9794

9895
// shift and scale the face landmarks to the face image position in the canvas
9996
landmarksByFace = landmarksByFace.map((landmarks, i) => {
@@ -103,9 +100,6 @@
103100

104101
faceapi.drawLandmarks(canvas, landmarksByFace, { lineWidth: drawLines ? 2 : 4, drawLines, color: 'red' })
105102
faceapi.drawDetection('overlay', locations.map(det => det.forSize(width, height)))
106-
107-
// free memory for input tensors
108-
input.dispose()
109103
}
110104

111105
async function run() {

examples/views/faceAlignment.html

Lines changed: 5 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -86,26 +86,17 @@
8686
}
8787

8888
async function locateAndAlignFacesWithMtcnn(inputImgEl) {
89-
const input = await faceapi.toNetInput(
90-
inputImgEl,
91-
// dispose input manually
92-
false,
93-
// keep canvases (required for mtcnn)
94-
true
95-
)
89+
const input = await faceapi.toNetInput(inputImgEl)
9690

9791
const results = await faceapi.mtcnn(input, { minFaceSize: 100 })
9892

99-
const unalignedFaceImages = await faceapi.extractFaces(input.inputs[0], results.map(res => res.faceDetection))
93+
const unalignedFaceImages = await faceapi.extractFaces(input.getInput(0), results.map(res => res.faceDetection))
10094

10195
const alignedFaceBoxes = results
10296
.filter(res => res.faceDetection.score > minConfidence)
10397
.map(res => res.faceLandmarks.align())
10498

105-
const alignedFaceImages = await faceapi.extractFaces(input.inputs[0], alignedFaceBoxes)
106-
107-
// free memory for input tensors
108-
input.dispose()
99+
const alignedFaceImages = await faceapi.extractFaces(input.getInput(0), alignedFaceBoxes)
109100

110101
return {
111102
unalignedFaceImages,
@@ -118,7 +109,7 @@
118109

119110
const locations = await faceapi.locateFaces(input, minConfidence)
120111

121-
const unalignedFaceImages = await faceapi.extractFaces(input.inputs[0], locations)
112+
const unalignedFaceImages = await faceapi.extractFaces(input.getInput(0), locations)
122113

123114
// detect landmarks and get the aligned face image bounding boxes
124115
const alignedFaceBoxes = await Promise.all(unalignedFaceImages.map(
@@ -127,10 +118,7 @@
127118
return faceLandmarks.align(locations[i])
128119
}
129120
))
130-
const alignedFaceImages = await faceapi.extractFaces(input.inputs[0], alignedFaceBoxes)
131-
132-
// free memory for input tensors
133-
input.dispose()
121+
const alignedFaceImages = await faceapi.extractFaces(input.getInput(0), alignedFaceBoxes)
134122

135123
return {
136124
unalignedFaceImages,

package-lock.json

Lines changed: 8 additions & 7 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,8 @@
2222
"license": "MIT",
2323
"dependencies": {
2424
"@tensorflow/tfjs-core": "^0.12.14",
25-
"tfjs-image-recognition-base": "^0.0.0",
26-
"tfjs-tiny-yolov2": "0.0.2",
25+
"tfjs-image-recognition-base": "0.1.0",
26+
"tfjs-tiny-yolov2": "0.1.0",
2727
"tslib": "^1.9.3"
2828
},
2929
"devDependencies": {

src/allFacesFactory.ts

Lines changed: 13 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,9 @@ import { TinyYolov2 } from '.';
55
import { FaceDetection } from './classes/FaceDetection';
66
import { FaceLandmarks68 } from './classes/FaceLandmarks68';
77
import { FullFaceDescription } from './classes/FullFaceDescription';
8-
import { extractFaceTensors } from './dom';
8+
import { extractFaces } from './dom';
99
import { FaceDetectionNet } from './faceDetectionNet/FaceDetectionNet';
10-
import { FaceLandmarkNet } from './faceLandmarkNet/FaceLandmarkNet';
10+
import { FaceLandmark68Net } from './faceLandmarkNet/FaceLandmark68Net';
1111
import { FaceRecognitionNet } from './faceRecognitionNet/FaceRecognitionNet';
1212
import { Mtcnn } from './mtcnn/Mtcnn';
1313
import { MtcnnForwardParams } from './mtcnn/types';
@@ -16,23 +16,21 @@ function computeDescriptorsFactory(
1616
recognitionNet: FaceRecognitionNet
1717
) {
1818
return async function(input: TNetInput, alignedFaceBoxes: Rect[], useBatchProcessing: boolean) {
19-
const alignedFaceTensors = await extractFaceTensors(input, alignedFaceBoxes)
19+
const alignedFaceCanvases = await extractFaces(input, alignedFaceBoxes)
2020

2121
const descriptors = useBatchProcessing
22-
? await recognitionNet.computeFaceDescriptor(alignedFaceTensors) as Float32Array[]
23-
: await Promise.all(alignedFaceTensors.map(
24-
faceTensor => recognitionNet.computeFaceDescriptor(faceTensor)
22+
? await recognitionNet.computeFaceDescriptor(alignedFaceCanvases) as Float32Array[]
23+
: await Promise.all(alignedFaceCanvases.map(
24+
canvas => recognitionNet.computeFaceDescriptor(canvas)
2525
)) as Float32Array[]
2626

27-
alignedFaceTensors.forEach(t => t.dispose())
28-
2927
return descriptors
3028
}
3129
}
3230

3331
function allFacesFactory(
3432
detectFaces: (input: TNetInput) => Promise<FaceDetection[]>,
35-
landmarkNet: FaceLandmarkNet,
33+
landmarkNet: FaceLandmark68Net,
3634
recognitionNet: FaceRecognitionNet
3735
) {
3836
const computeDescriptors = computeDescriptorsFactory(recognitionNet)
@@ -43,15 +41,14 @@ function allFacesFactory(
4341
): Promise<FullFaceDescription[]> {
4442

4543
const detections = await detectFaces(input)
46-
const faceTensors = await extractFaceTensors(input, detections)
44+
const faceCanvases = await extractFaces(input, detections)
4745

4846
const faceLandmarksByFace = useBatchProcessing
49-
? await landmarkNet.detectLandmarks(faceTensors) as FaceLandmarks68[]
50-
: await Promise.all(faceTensors.map(
51-
faceTensor => landmarkNet.detectLandmarks(faceTensor)
47+
? await landmarkNet.detectLandmarks(faceCanvases) as FaceLandmarks68[]
48+
: await Promise.all(faceCanvases.map(
49+
canvas => landmarkNet.detectLandmarks(canvas)
5250
)) as FaceLandmarks68[]
5351

54-
faceTensors.forEach(t => t.dispose())
5552

5653
const alignedFaceBoxes = faceLandmarksByFace.map(
5754
(landmarks, i) => landmarks.align(detections[i].getBox())
@@ -74,7 +71,7 @@ function allFacesFactory(
7471

7572
export function allFacesSsdMobilenetv1Factory(
7673
ssdMobilenetv1: FaceDetectionNet,
77-
landmarkNet: FaceLandmarkNet,
74+
landmarkNet: FaceLandmark68Net,
7875
recognitionNet: FaceRecognitionNet
7976
) {
8077
return async function(
@@ -90,7 +87,7 @@ export function allFacesSsdMobilenetv1Factory(
9087

9188
export function allFacesTinyYolov2Factory(
9289
tinyYolov2: TinyYolov2,
93-
landmarkNet: FaceLandmarkNet,
90+
landmarkNet: FaceLandmark68Net,
9491
recognitionNet: FaceRecognitionNet
9592
) {
9693
return async function(

src/dom/extractFaceTensors.ts

Lines changed: 7 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import * as tf from '@tensorflow/tfjs-core';
2-
import { Rect, TNetInput, toNetInput } from 'tfjs-image-recognition-base';
2+
import { isTensor4D, Rect } from 'tfjs-image-recognition-base';
33

44
import { FaceDetection } from '../classes/FaceDetection';
55

@@ -9,28 +9,21 @@ import { FaceDetection } from '../classes/FaceDetection';
99
* Using this method is faster then extracting a canvas for each face and
1010
* converting them to tensors individually.
1111
*
12-
* @param input The image that face detection has been performed on.
12+
* @param imageTensor The image tensor that face detection has been performed on.
1313
* @param detections The face detection results or face bounding boxes for that image.
1414
* @returns Tensors of the corresponding image region for each detected face.
1515
*/
1616
export async function extractFaceTensors(
17-
input: TNetInput,
17+
imageTensor: tf.Tensor3D | tf.Tensor4D,
1818
detections: Array<FaceDetection | Rect>
19-
): Promise<tf.Tensor4D[]> {
19+
): Promise<tf.Tensor3D[]> {
2020

21-
const netInput = await toNetInput(input, true)
22-
23-
if (netInput.batchSize > 1) {
24-
if (netInput.isManaged) {
25-
netInput.dispose()
26-
}
21+
if (isTensor4D(imageTensor) && imageTensor.shape[0] > 1) {
2722
throw new Error('extractFaceTensors - batchSize > 1 not supported')
2823
}
2924

3025
return tf.tidy(() => {
31-
const imgTensor = netInput.inputs[0].expandDims().toFloat() as tf.Tensor4D
32-
33-
const [imgHeight, imgWidth, numChannels] = imgTensor.shape.slice(1)
26+
const [imgHeight, imgWidth, numChannels] = imageTensor.shape.slice(isTensor4D(imageTensor) ? 1 : 0)
3427

3528
const boxes = detections.map(
3629
det => det instanceof FaceDetection
@@ -40,12 +33,9 @@ export async function extractFaceTensors(
4033
.map(box => box.clipAtImageBorders(imgWidth, imgHeight))
4134

4235
const faceTensors = boxes.map(({ x, y, width, height }) =>
43-
tf.slice4d(imgTensor, [0, y, x, 0], [1, height, width, numChannels])
36+
tf.slice3d(imageTensor.as3D(imgHeight, imgWidth, numChannels), [y, x, 0], [height, width, numChannels])
4437
)
4538

46-
if (netInput.isManaged) {
47-
netInput.dispose()
48-
}
4939
return faceTensors
5040
})
5141
}

src/dom/extractFaces.ts

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -24,24 +24,19 @@ export async function extractFaces(
2424
let canvas = input as HTMLCanvasElement
2525

2626
if (!(input instanceof HTMLCanvasElement)) {
27-
const netInput = await toNetInput(input, true)
27+
const netInput = await toNetInput(input)
2828

2929
if (netInput.batchSize > 1) {
30-
if (netInput.isManaged) {
31-
netInput.dispose()
32-
}
3330
throw new Error('extractFaces - batchSize > 1 not supported')
3431
}
3532

36-
canvas = await imageTensorToCanvas(netInput.inputs[0])
37-
38-
if (netInput.isManaged) {
39-
netInput.dispose()
40-
}
33+
const tensorOrCanvas = netInput.getInput(0)
34+
canvas = tensorOrCanvas instanceof HTMLCanvasElement
35+
? tensorOrCanvas
36+
: await imageTensorToCanvas(tensorOrCanvas)
4137
}
4238

4339
const ctx = getContext2dOrThrow(canvas)
44-
4540
const boxes = detections.map(
4641
det => det instanceof FaceDetection
4742
? det.forSize(canvas.width, canvas.height).getBox().floor()

src/faceDetectionNet/FaceDetectionNet.ts

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ export class FaceDetectionNet extends NeuralNetwork<NetParams> {
2525
}
2626

2727
return tf.tidy(() => {
28-
const batchTensor = input.toBatchTensor(512, false)
28+
const batchTensor = input.toBatchTensor(512, false).toFloat()
2929

3030
const x = tf.sub(tf.mul(batchTensor, tf.scalar(0.007843137718737125)), tf.scalar(1)) as tf.Tensor4D
3131
const features = mobileNetV1(x, params.mobilenetv1)
@@ -40,7 +40,7 @@ export class FaceDetectionNet extends NeuralNetwork<NetParams> {
4040
}
4141

4242
public async forward(input: TNetInput) {
43-
return this.forwardInput(await toNetInput(input, true))
43+
return this.forwardInput(await toNetInput(input))
4444
}
4545

4646
public async locateFaces(
@@ -49,7 +49,7 @@ export class FaceDetectionNet extends NeuralNetwork<NetParams> {
4949
maxResults: number = 100
5050
): Promise<FaceDetection[]> {
5151

52-
const netInput = await toNetInput(input, true)
52+
const netInput = await toNetInput(input)
5353

5454
const {
5555
boxes: _boxes,
@@ -77,18 +77,21 @@ export class FaceDetectionNet extends NeuralNetwork<NetParams> {
7777
minConfidence
7878
)
7979

80-
const paddings = netInput.getRelativePaddings(0)
80+
const reshapedDims = netInput.getReshapedInputDimensions(0)
81+
const inputSize = netInput.inputSize as number
82+
const padX = inputSize / reshapedDims.width
83+
const padY = inputSize / reshapedDims.height
8184

8285
const results = indices
8386
.map(idx => {
8487
const [top, bottom] = [
8588
Math.max(0, boxes.get(idx, 0)),
8689
Math.min(1.0, boxes.get(idx, 2))
87-
].map(val => val * paddings.y)
90+
].map(val => val * padY)
8891
const [left, right] = [
8992
Math.max(0, boxes.get(idx, 1)),
9093
Math.min(1.0, boxes.get(idx, 3))
91-
].map(val => val * paddings.x)
94+
].map(val => val * padX)
9295
return new FaceDetection(
9396
scoresData[idx],
9497
new Rect(

0 commit comments

Comments
 (0)