Skip to content

Commit ca47cbb

Browse files
implemented yolo loss functions + mean subtraction for retrained model
1 parent 542dc68 commit ca47cbb

20 files changed

+363
-50
lines changed

src/NetInput.ts

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ import * as tf from '@tensorflow/tfjs-core';
33
import { isTensor3D, isTensor4D } from './commons/isTensor';
44
import { padToSquare } from './padToSquare';
55
import { Point } from './Point';
6-
import { TResolvedNetInput } from './types';
6+
import { TResolvedNetInput, Dimensions } from './types';
77
import { createCanvasFromMedia } from './utils';
88

99
export class NetInput {
@@ -14,6 +14,7 @@ export class NetInput {
1414

1515
private _inputDimensions: number[][] = []
1616
private _paddings: Point[] = []
17+
private _inputSize: number = 0
1718

1819
constructor(
1920
inputs: tf.Tensor4D | Array<TResolvedNetInput>,
@@ -81,6 +82,10 @@ export class NetInput {
8182
return this._paddings
8283
}
8384

85+
public get inputSize(): number {
86+
return this._inputSize
87+
}
88+
8489
public getInputDimensions(batchIdx: number): number[] {
8590
return this._inputDimensions[batchIdx]
8691
}
@@ -97,8 +102,26 @@ export class NetInput {
97102
return this._paddings[batchIdx]
98103
}
99104

105+
public getRelativePaddings(batchIdx: number): Point {
106+
return new Point(
107+
(this.getPaddings(batchIdx).x + this.getInputWidth(batchIdx)) / this.getInputWidth(batchIdx),
108+
(this.getPaddings(batchIdx).y + this.getInputHeight(batchIdx)) / this.getInputHeight(batchIdx)
109+
)
110+
}
111+
112+
public getReshapedInputDimensions(batchIdx: number): Dimensions {
113+
const [h, w] = [this.getInputHeight(batchIdx), this.getInputWidth(batchIdx)]
114+
const f = this.inputSize / Math.max(h, w)
115+
return {
116+
height: Math.floor(h * f),
117+
width: Math.floor(w * f)
118+
}
119+
}
120+
100121
public toBatchTensor(inputSize: number, isCenterInputs: boolean = true): tf.Tensor4D {
101122

123+
this._inputSize = inputSize
124+
102125
return tf.tidy(() => {
103126

104127
const inputTensors = this._inputs.map((inputTensor: tf.Tensor3D) => {

src/commons/extractWeightsFactory.ts

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@ export function extractWeightsFactory(weights: Float32Array) {
22
let remainingWeights = weights
33

44
function extractWeights(numWeights: number): Float32Array {
5-
console.log(numWeights)
65
const ret = remainingWeights.slice(0, numWeights)
76
remainingWeights = remainingWeights.slice(numWeights)
87
return ret

src/commons/normalize.ts

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
import * as tf from '@tensorflow/tfjs-core';
2+
3+
export function normalize(x: tf.Tensor4D, meanRgb: number[]): tf.Tensor4D {
4+
return tf.tidy(() => {
5+
const [r, g, b] = meanRgb
6+
const avg_r = tf.fill([...x.shape.slice(0, 3), 1], r)
7+
const avg_g = tf.fill([...x.shape.slice(0, 3), 1], g)
8+
const avg_b = tf.fill([...x.shape.slice(0, 3), 1], b)
9+
const avg_rgb = tf.concat([avg_r, avg_g, avg_b], 3)
10+
11+
return tf.sub(x, avg_rgb)
12+
})
13+
}

src/faceDetectionNet/FaceDetectionNet.ts

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -81,19 +81,18 @@ export class FaceDetectionNet extends NeuralNetwork<NetParams> {
8181
minConfidence
8282
)
8383

84-
const paddedHeightRelative = (netInput.getPaddings(0).y + netInput.getInputHeight(0)) / netInput.getInputHeight(0)
85-
const paddedWidthRelative = (netInput.getPaddings(0).x + netInput.getInputWidth(0)) / netInput.getInputWidth(0)
84+
const paddings = netInput.getRelativePaddings(0)
8685

8786
const results = indices
8887
.map(idx => {
8988
const [top, bottom] = [
9089
Math.max(0, boxes.get(idx, 0)),
9190
Math.min(1.0, boxes.get(idx, 2))
92-
].map(val => val * paddedHeightRelative)
91+
].map(val => val * paddings.y)
9392
const [left, right] = [
9493
Math.max(0, boxes.get(idx, 1)),
9594
Math.min(1.0, boxes.get(idx, 3))
96-
].map(val => val * paddedWidthRelative)
95+
].map(val => val * paddings.x)
9796
return new FaceDetection(
9897
scoresData[idx],
9998
new Rect(

src/faceRecognitionNet/FaceRecognitionNet.ts

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
11
import * as tf from '@tensorflow/tfjs-core';
22

33
import { NeuralNetwork } from '../commons/NeuralNetwork';
4+
import { normalize } from '../commons/normalize';
45
import { NetInput } from '../NetInput';
56
import { toNetInput } from '../toNetInput';
67
import { TNetInput } from '../types';
78
import { convDown } from './convLayer';
89
import { extractParams } from './extractParams';
910
import { loadQuantizedParams } from './loadQuantizedParams';
10-
import { normalize } from './normalize';
1111
import { residual, residualDown } from './residualLayer';
1212
import { NetParams } from './types';
1313

@@ -28,7 +28,8 @@ export class FaceRecognitionNet extends NeuralNetwork<NetParams> {
2828
return tf.tidy(() => {
2929
const batchTensor = input.toBatchTensor(150, true)
3030

31-
const normalized = normalize(batchTensor)
31+
const meanRgb = [122.782, 117.001, 104.298]
32+
const normalized = normalize(batchTensor, meanRgb).div(tf.scalar(256)) as tf.Tensor4D
3233

3334
let out = convDown(normalized, params.conv32_down)
3435
out = tf.maxPool(out, 3, 2, 'valid')

src/faceRecognitionNet/normalize.ts

Lines changed: 0 additions & 12 deletions
This file was deleted.

src/index.ts

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ export {
55
}
66

77

8+
export * from './BoundingBox';
89
export * from './FaceDetection';
910
export * from './FullFaceDescription';
1011
export * from './NetInput';
@@ -24,4 +25,6 @@ export * from './mtcnn';
2425
export * from './padToSquare';
2526
export * from './tinyYolov2';
2627
export * from './toNetInput';
27-
export * from './utils'
28+
export * from './utils';
29+
30+
export * from './types';

src/tinyYolov2/TinyYolov2.ts

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,14 @@ import { BoundingBox } from '../BoundingBox';
44
import { convLayer } from '../commons/convLayer';
55
import { NeuralNetwork } from '../commons/NeuralNetwork';
66
import { nonMaxSuppression } from '../commons/nonMaxSuppression';
7+
import { normalize } from '../commons/normalize';
78
import { FaceDetection } from '../FaceDetection';
89
import { NetInput } from '../NetInput';
910
import { Point } from '../Point';
1011
import { toNetInput } from '../toNetInput';
1112
import { TNetInput } from '../types';
1213
import { sigmoid } from '../utils';
13-
import { BOX_ANCHORS, BOX_ANCHORS_SEPARABLE, INPUT_SIZES, IOU_THRESHOLD, NUM_BOXES } from './config';
14+
import { BOX_ANCHORS, BOX_ANCHORS_SEPARABLE, INPUT_SIZES, IOU_THRESHOLD, MEAN_RGB, NUM_BOXES } from './config';
1415
import { convWithBatchNorm } from './convWithBatchNorm';
1516
import { extractParams } from './extractParams';
1617
import { getDefaultParams } from './getDefaultParams';
@@ -45,7 +46,12 @@ export class TinyYolov2 extends NeuralNetwork<NetParams> {
4546
}
4647

4748
const out = tf.tidy(() => {
48-
const batchTensor = input.toBatchTensor(inputSize, false).div(tf.scalar(255)) as tf.Tensor4D
49+
50+
let batchTensor = input.toBatchTensor(inputSize, false)
51+
batchTensor = this.hasSeparableConvs
52+
? normalize(batchTensor, MEAN_RGB)
53+
: batchTensor
54+
batchTensor = batchTensor.div(tf.scalar(256)) as tf.Tensor4D
4955

5056
let out = convWithBatchNorm(batchTensor, params.conv0)
5157
out = tf.maxPool(out, [2, 2], [2, 2], 'same')
@@ -87,22 +93,23 @@ export class TinyYolov2 extends NeuralNetwork<NetParams> {
8793

8894
const netInput = await toNetInput(input, true)
8995
const out = await this.forwardInput(netInput, inputSize)
96+
const out0 = tf.tidy(() => tf.unstack(out)[0].expandDims()) as tf.Tensor4D
97+
98+
console.log(out0.shape)
9099

91100
const inputDimensions = {
92101
width: netInput.getInputWidth(0),
93102
height: netInput.getInputHeight(0)
94103
}
95104

96-
const paddings = new Point(
97-
(netInput.getPaddings(0).x + netInput.getInputWidth(0)) / netInput.getInputWidth(0),
98-
(netInput.getPaddings(0).y + netInput.getInputHeight(0)) / netInput.getInputHeight(0)
99-
)
105+
const paddings = netInput.getRelativePaddings(0)
100106

101-
const results = this.postProcess(out, { scoreThreshold, paddings })
107+
const results = this.postProcess(out0, { scoreThreshold, paddings })
102108
const boxes = results.map(res => res.box)
103109
const scores = results.map(res => res.score)
104110

105111
out.dispose()
112+
out0.dispose()
106113

107114
const indices = nonMaxSuppression(
108115
boxes.map(box => box.rescale(inputSize)),

src/tinyYolov2/config.ts

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,4 +18,6 @@ export const BOX_ANCHORS_SEPARABLE = [
1818
new Point(2.882459, 3.518061),
1919
new Point(4.266906, 5.178857),
2020
new Point(9.041765, 10.66308)
21-
]
21+
]
22+
23+
export const MEAN_RGB = [117.001, 114.697, 97.404]

src/tinyYolov2/convWithBatchNorm.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ export function convWithBatchNorm(x: tf.Tensor4D, params: ConvWithBatchNorm | Se
88
let out = tf.pad(x, [[0, 0], [1, 1], [1, 1], [0, 0]]) as tf.Tensor4D
99

1010
if (params instanceof SeparableConvParams) {
11-
out = tf.separableConv2d(out, params.depthwise_filter, params.pointwise_filter, [1, 1], 'same')
11+
out = tf.separableConv2d(out, params.depthwise_filter, params.pointwise_filter, [1, 1], 'valid')
1212
out = tf.add(out, params.bias)
1313
} else {
1414
out = tf.conv2d(out, params.conv.filters, [1, 1], 'valid')

0 commit comments

Comments
 (0)