Skip to content

Commit 33296ef

Browse files
init mtcnn + stage1 until bounding box extraction
1 parent ae742d9 commit 33296ef

File tree

9 files changed

+136
-76
lines changed

9 files changed

+136
-76
lines changed

src/index.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ export * from './faceDetectionNet';
1717
export * from './faceLandmarkNet';
1818
export * from './faceRecognitionNet';
1919
export * from './globalApi';
20+
export * from './mtcnn';
2021
export * from './padToSquare';
2122
export * from './toNetInput';
2223
export * from './utils'

src/mtcnn/Mtcnn.ts

Lines changed: 14 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,9 @@ import { NeuralNetwork } from '../commons/NeuralNetwork';
44
import { NetInput } from '../NetInput';
55
import { toNetInput } from '../toNetInput';
66
import { TNetInput } from '../types';
7+
import { bgrToRgbTensor } from './bgrToRgbTensor';
78
import { extractParams } from './extractParams';
9+
import { pyramidDown } from './pyramidDown';
810
import { stage1 } from './stage1';
911
import { NetParams } from './types';
1012

@@ -14,7 +16,12 @@ export class Mtcnn extends NeuralNetwork<NetParams> {
1416
super('Mtcnn')
1517
}
1618

17-
public forwardInput(input: NetInput, minFaceSize: number = 20, scaleFactor: number = 0.709): tf.Tensor2D {
19+
public forwardInput(
20+
input: NetInput,
21+
minFaceSize: number = 20,
22+
scaleFactor: number = 0.709,
23+
scoreThresholds: number[] = [0.6, 0.7, 0.7]
24+
): tf.Tensor2D {
1825

1926
const { params } = this
2027

@@ -23,28 +30,14 @@ export class Mtcnn extends NeuralNetwork<NetParams> {
2330
}
2431

2532
return tf.tidy(() => {
26-
const imgTensor = tf.expandDims(input.inputs[0]).toFloat() as tf.Tensor4D
33+
// TODO: expects bgr input?
34+
let imgTensor = bgrToRgbTensor(
35+
tf.expandDims(input.inputs[0]).toFloat() as tf.Tensor4D
36+
)
2737

28-
function pyramidDown(minFaceSize: number, scaleFactor: number, dims: number[]): number[] {
38+
const scales = pyramidDown(minFaceSize, scaleFactor, imgTensor.shape.slice(1))
2939

30-
const [height, width] = dims
31-
const m = 12 / minFaceSize
32-
33-
const scales = []
34-
35-
let minLayer = Math.min(height, width) * m
36-
let exp = 0
37-
while (minLayer >= 12) {
38-
scales.push(m * Math.pow(scaleFactor, exp))
39-
minLayer = minLayer * scaleFactor
40-
exp += 1
41-
}
42-
43-
return scales
44-
}
45-
46-
const scales = pyramidDown(minFaceSize, scaleFactor, imgTensor.shape)
47-
const out1 = stage1(imgTensor, scales, params.pnet)
40+
const out1 = stage1(imgTensor, scales, scoreThresholds[0], params.pnet)
4841

4942
return tf.tensor2d([0], [1, 1])
5043
})

src/mtcnn/PNet.ts

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,16 +4,15 @@ import { convLayer } from '../commons/convLayer';
44
import { sharedLayer } from './sharedLayers';
55
import { PNetParams } from './types';
66

7-
export function PNet(x: tf.Tensor4D, params: PNetParams): { prob: tf.Tensor3D, convOut: tf.Tensor4D } {
7+
export function PNet(x: tf.Tensor4D, params: PNetParams): { prob: tf.Tensor4D, regions: tf.Tensor4D } {
88
return tf.tidy(() => {
99

10-
let out = sharedLayer(x, params)
10+
let out = sharedLayer(x, params, true)
1111
const conv = convLayer(out, params.conv4_1, 'valid')
12-
// TODO: tf.reduce_max <=> tf.max ?
13-
const logits = tf.sub(conv, tf.max(conv, 3))
14-
const prob = tf.softmax(logits, 3) as tf.Tensor3D
15-
const convOut = convLayer(out, params.conv4_2, 'valid')
12+
const max = tf.expandDims(tf.max(conv, 3), 3)
13+
const prob = tf.softmax(tf.sub(conv, max), 3) as tf.Tensor4D
14+
const regions = convLayer(out, params.conv4_2, 'valid')
1615

17-
return { prob, convOut }
16+
return { prob, regions }
1817
})
1918
}

src/mtcnn/bgrToRgbTensor.ts

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
import * as tf from '@tensorflow/tfjs-core';
2+
3+
export function bgrToRgbTensor(tensor: tf.Tensor4D): tf.Tensor4D {
4+
return tf.tidy(
5+
() => tf.stack(tf.unstack(tensor, 3), 3)
6+
) as tf.Tensor4D
7+
}

src/mtcnn/config.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
export const CELL_STRIDE = 2
2+
export const CELL_SIZE = 12

src/mtcnn/extractParams.ts

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ function extractorsFactory(extractWeights: ExtractWeightsFunction, paramMappings
4040

4141
function extractRNetParams(): RNetParams {
4242

43-
const sharedParams = extractSharedParams([3, 28, 48, 64], 'rnet')
43+
const sharedParams = extractSharedParams([3, 28, 48, 64], 'rnet', true)
4444
const fc1 = extractFCParams(576, 128, 'rnet/fc1')
4545
const prelu4_alpha = extractPReluParams(128, 'rnet/prelu4_alpha')
4646
const fc2_1 = extractFCParams(128, 2, 'rnet/fc2_1')
@@ -90,5 +90,9 @@ export function extractParams(weights: Float32Array): { params: NetParams, param
9090
const rnet = extractRNetParams()
9191
const onet = extractONetParams()
9292

93+
if (getRemainingWeights().length !== 0) {
94+
throw new Error(`weights remaing after extract: ${getRemainingWeights().length}`)
95+
}
96+
9397
return { params: { pnet, rnet, onet }, paramMappings }
9498
}

src/mtcnn/pyramidDown.ts

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
import { CELL_SIZE } from './config';
2+
3+
export function pyramidDown(minFaceSize: number, scaleFactor: number, dims: number[]): number[] {
4+
5+
const [height, width] = dims
6+
const m = CELL_SIZE / minFaceSize
7+
8+
const scales = []
9+
10+
let minLayer = Math.min(height, width) * m
11+
let exp = 0
12+
while (minLayer >= 12) {
13+
scales.push(m * Math.pow(scaleFactor, exp))
14+
minLayer = minLayer * scaleFactor
15+
exp += 1
16+
}
17+
18+
return scales
19+
}

src/mtcnn/stage1.ts

Lines changed: 82 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -1,78 +1,113 @@
11
import * as tf from '@tensorflow/tfjs-core';
22

3+
import { Point } from '../Point';
4+
import { CELL_SIZE, CELL_STRIDE } from './config';
35
import { PNet } from './PNet';
46
import { PNetParams } from './types';
57

68
function rescaleAndNormalize(x: tf.Tensor4D, scale: number): tf.Tensor4D {
79
return tf.tidy(() => {
8-
const [height, width] = x.shape
9-
const resized = tf.image.resizeBilinear(x, [height * scale, width * scale])
1010

11-
return tf.mul(tf.sub(resized, tf.scalar(127.5)), tf.scalar(0.0078125))
12-
13-
// TODO: ?
14-
// img_x = np.expand_dims(scaled_image, 0)
15-
// img_y = np.transpose(img_x, (0, 2, 1, 3))
11+
const [height, width] = x.shape.slice(1)
12+
const resized = tf.image.resizeBilinear(x, [Math.floor(height * scale), Math.floor(width * scale)])
13+
const normalized = tf.mul(tf.sub(resized, tf.scalar(127.5)), tf.scalar(0.0078125))
1614

15+
return (tf.transpose(normalized, [0, 2, 1, 3]) as tf.Tensor4D)
1716
})
1817
}
1918

20-
export function stage1(x: tf.Tensor4D, scales: number[], params: PNetParams) {
21-
return tf.tidy(() => {
2219

23-
const boxes = scales.map((scale) => {
24-
const resized = rescaleAndNormalize(x, scale)
25-
const { prob, convOut } = PNet(resized, params)
26-
})
20+
function extractBoundingBoxes(
21+
scores: tf.Tensor2D,
22+
regions: tf.Tensor3D,
23+
scale: number,
24+
scoreThreshold: number
25+
) {
2726

28-
})
29-
}
27+
// TODO: fix this!, maybe better to use tf.gather here
28+
const indices2D: Point[] = []
29+
for (let y = 0; y < scores.shape[0]; y++) {
30+
for (let x = 0; x < scores.shape[1]; x++) {
31+
if (scores.get(y, x) >= scoreThreshold) {
32+
indices2D.push(new Point(x, y))
33+
}
34+
}
35+
}
36+
37+
if (!indices2D.length) {
38+
return null
39+
}
40+
41+
return tf.tidy(() => {
3042

31-
/*
43+
const indicesTensor = tf.tensor2d(
44+
indices2D.map(pt => [pt.y, pt.x]),
45+
[indices2D.length, 2]
46+
)
3247

33-
for scale in scales:
34-
scaled_image = self.__scale_image(image, scale)
48+
const cellsStart = tf.round(
49+
indicesTensor.mul(tf.scalar(CELL_STRIDE)).add(tf.scalar(1)).div(tf.scalar(scale))
50+
) as tf.Tensor2D
51+
const cellsEnd = tf.round(
52+
indicesTensor.mul(tf.scalar(CELL_STRIDE)).add(tf.scalar(CELL_SIZE)).div(tf.scalar(scale))
53+
) as tf.Tensor2D
3554

36-
img_x = np.expand_dims(scaled_image, 0)
37-
img_y = np.transpose(img_x, (0, 2, 1, 3))
55+
const scoresTensor = tf.tensor1d(indices2D.map(pt => scores.get(pt.y, pt.x)))
3856

39-
out = self.__pnet.feed(img_y)
57+
const candidateRegions = indices2D.map(c => ({
58+
left: regions.get(c.y, c.x, 0),
59+
top: regions.get(c.y, c.x, 1),
60+
right: regions.get(c.y, c.x, 2),
61+
bottom: regions.get(c.y, c.x, 3)
62+
}))
4063

41-
out0 = np.transpose(out[0], (0, 2, 1, 3))
42-
out1 = np.transpose(out[1], (0, 2, 1, 3))
64+
const regionsTensor = tf.tensor2d(
65+
candidateRegions.map(r => [r.left, r.top, r.right, r.bottom]),
66+
[candidateRegions.length, 4]
67+
)
4368

44-
boxes, _ = self.__generate_bounding_box(out1[0, :, :, 1].copy(),
45-
out0[0, :, :, :].copy(), scale, self.__steps_threshold[0])
69+
const boxesTensor = tf.concat2d([cellsStart, cellsEnd, scoresTensor.as2D(scoresTensor.size, 1), regionsTensor], 1)
4670

47-
# inter-scale nms
48-
pick = self.__nms(boxes.copy(), 0.5, 'Union')
49-
if boxes.size > 0 and pick.size > 0:
50-
boxes = boxes[pick, :]
51-
total_boxes = np.append(total_boxes, boxes, axis=0)
71+
return boxesTensor
72+
})
73+
}
5274

75+
// TODO: debug
76+
declare const window: any
5377

78+
export function stage1(x: tf.Tensor4D, scales: number[], scoreThreshold: number, params: PNetParams) {
79+
return tf.tidy(() => {
5480

81+
const boxes = scales.map((scale, i) => {
82+
let resized = i === 0
83+
// TODO: debug
84+
? tf.tensor4d(window.resizedData, [1, 820, 461, 3])
5585

56-
numboxes = total_boxes.shape[0]
86+
: rescaleAndNormalize(x, scale)
5787

58-
if numboxes > 0:
59-
pick = self.__nms(total_boxes.copy(), 0.7, 'Union')
60-
total_boxes = total_boxes[pick, :]
88+
const { prob, regions } = PNet(resized, params)
6189

62-
regw = total_boxes[:, 2] - total_boxes[:, 0]
63-
regh = total_boxes[:, 3] - total_boxes[:, 1]
90+
const scores = tf.unstack(prob, 3)[1]
91+
const [sh, sw] = scores.shape.slice(1)
92+
const [rh, rw] = regions.shape.slice(1)
6493

65-
qq1 = total_boxes[:, 0] + total_boxes[:, 5] * regw
66-
qq2 = total_boxes[:, 1] + total_boxes[:, 6] * regh
67-
qq3 = total_boxes[:, 2] + total_boxes[:, 7] * regw
68-
qq4 = total_boxes[:, 3] + total_boxes[:, 8] * regh
6994

70-
total_boxes = np.transpose(np.vstack([qq1, qq2, qq3, qq4, total_boxes[:, 4]]))
71-
total_boxes = self.__rerec(total_boxes.copy())
95+
const boxes = extractBoundingBoxes(
96+
scores.as2D(sh, sw),
97+
regions.as3D(rh, rw, 4),
98+
scale,
99+
scoreThreshold
100+
)
72101

73-
total_boxes[:, 0:4] = np.fix(total_boxes[:, 0:4]).astype(np.int32)
74-
status = StageStatus(self.__pad(total_boxes.copy(), stage_status.width, stage_status.height),
75-
width=stage_status.width, height=stage_status.height)
102+
// TODO: debug
103+
if (!boxes) {
104+
console.log('no boxes for scale', scale)
105+
return
106+
}
107+
// TODO: debug
108+
i === 0 && (window.boxes = boxes.dataSync())
76109

77-
return total_boxes, status
78-
*/
110+
})
111+
112+
})
113+
}
File renamed without changes.

0 commit comments

Comments
 (0)