Skip to content

Commit ca47cbb

Browse files
implemented yolo loss functions + mean subtraction for retrained model
1 parent 542dc68 commit ca47cbb

20 files changed

+363
-50
lines changed

src/NetInput.ts

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ import * as tf from '@tensorflow/tfjs-core';
33
import { isTensor3D, isTensor4D } from './commons/isTensor';
44
import { padToSquare } from './padToSquare';
55
import { Point } from './Point';
6-
import { TResolvedNetInput } from './types';
6+
import { TResolvedNetInput, Dimensions } from './types';
77
import { createCanvasFromMedia } from './utils';
88

99
export class NetInput {
@@ -14,6 +14,7 @@ export class NetInput {
1414

1515
private _inputDimensions: number[][] = []
1616
private _paddings: Point[] = []
17+
private _inputSize: number = 0
1718

1819
constructor(
1920
inputs: tf.Tensor4D | Array<TResolvedNetInput>,
@@ -81,6 +82,10 @@ export class NetInput {
8182
return this._paddings
8283
}
8384

85+
public get inputSize(): number {
86+
return this._inputSize
87+
}
88+
8489
public getInputDimensions(batchIdx: number): number[] {
8590
return this._inputDimensions[batchIdx]
8691
}
@@ -97,8 +102,26 @@ export class NetInput {
97102
return this._paddings[batchIdx]
98103
}
99104

105+
public getRelativePaddings(batchIdx: number): Point {
106+
return new Point(
107+
(this.getPaddings(batchIdx).x + this.getInputWidth(batchIdx)) / this.getInputWidth(batchIdx),
108+
(this.getPaddings(batchIdx).y + this.getInputHeight(batchIdx)) / this.getInputHeight(batchIdx)
109+
)
110+
}
111+
112+
public getReshapedInputDimensions(batchIdx: number): Dimensions {
113+
const [h, w] = [this.getInputHeight(batchIdx), this.getInputWidth(batchIdx)]
114+
const f = this.inputSize / Math.max(h, w)
115+
return {
116+
height: Math.floor(h * f),
117+
width: Math.floor(w * f)
118+
}
119+
}
120+
100121
public toBatchTensor(inputSize: number, isCenterInputs: boolean = true): tf.Tensor4D {
101122

123+
this._inputSize = inputSize
124+
102125
return tf.tidy(() => {
103126

104127
const inputTensors = this._inputs.map((inputTensor: tf.Tensor3D) => {

src/commons/extractWeightsFactory.ts

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@ export function extractWeightsFactory(weights: Float32Array) {
22
let remainingWeights = weights
33

44
function extractWeights(numWeights: number): Float32Array {
5-
console.log(numWeights)
65
const ret = remainingWeights.slice(0, numWeights)
76
remainingWeights = remainingWeights.slice(numWeights)
87
return ret

src/commons/normalize.ts

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
import * as tf from '@tensorflow/tfjs-core';
2+
3+
export function normalize(x: tf.Tensor4D, meanRgb: number[]): tf.Tensor4D {
4+
return tf.tidy(() => {
5+
const [r, g, b] = meanRgb
6+
const avg_r = tf.fill([...x.shape.slice(0, 3), 1], r)
7+
const avg_g = tf.fill([...x.shape.slice(0, 3), 1], g)
8+
const avg_b = tf.fill([...x.shape.slice(0, 3), 1], b)
9+
const avg_rgb = tf.concat([avg_r, avg_g, avg_b], 3)
10+
11+
return tf.sub(x, avg_rgb)
12+
})
13+
}

src/faceDetectionNet/FaceDetectionNet.ts

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -81,19 +81,18 @@ export class FaceDetectionNet extends NeuralNetwork<NetParams> {
8181
minConfidence
8282
)
8383

84-
const paddedHeightRelative = (netInput.getPaddings(0).y + netInput.getInputHeight(0)) / netInput.getInputHeight(0)
85-
const paddedWidthRelative = (netInput.getPaddings(0).x + netInput.getInputWidth(0)) / netInput.getInputWidth(0)
84+
const paddings = netInput.getRelativePaddings(0)
8685

8786
const results = indices
8887
.map(idx => {
8988
const [top, bottom] = [
9089
Math.max(0, boxes.get(idx, 0)),
9190
Math.min(1.0, boxes.get(idx, 2))
92-
].map(val => val * paddedHeightRelative)
91+
].map(val => val * paddings.y)
9392
const [left, right] = [
9493
Math.max(0, boxes.get(idx, 1)),
9594
Math.min(1.0, boxes.get(idx, 3))
96-
].map(val => val * paddedWidthRelative)
95+
].map(val => val * paddings.x)
9796
return new FaceDetection(
9897
scoresData[idx],
9998
new Rect(

src/faceRecognitionNet/FaceRecognitionNet.ts

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
11
import * as tf from '@tensorflow/tfjs-core';
22

33
import { NeuralNetwork } from '../commons/NeuralNetwork';
4+
import { normalize } from '../commons/normalize';
45
import { NetInput } from '../NetInput';
56
import { toNetInput } from '../toNetInput';
67
import { TNetInput } from '../types';
78
import { convDown } from './convLayer';
89
import { extractParams } from './extractParams';
910
import { loadQuantizedParams } from './loadQuantizedParams';
10-
import { normalize } from './normalize';
1111
import { residual, residualDown } from './residualLayer';
1212
import { NetParams } from './types';
1313

@@ -28,7 +28,8 @@ export class FaceRecognitionNet extends NeuralNetwork<NetParams> {
2828
return tf.tidy(() => {
2929
const batchTensor = input.toBatchTensor(150, true)
3030

31-
const normalized = normalize(batchTensor)
31+
const meanRgb = [122.782, 117.001, 104.298]
32+
const normalized = normalize(batchTensor, meanRgb).div(tf.scalar(256)) as tf.Tensor4D
3233

3334
let out = convDown(normalized, params.conv32_down)
3435
out = tf.maxPool(out, 3, 2, 'valid')

src/faceRecognitionNet/normalize.ts

Lines changed: 0 additions & 12 deletions
This file was deleted.

src/index.ts

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ export {
55
}
66

77

8+
export * from './BoundingBox';
89
export * from './FaceDetection';
910
export * from './FullFaceDescription';
1011
export * from './NetInput';
@@ -24,4 +25,6 @@ export * from './mtcnn';
2425
export * from './padToSquare';
2526
export * from './tinyYolov2';
2627
export * from './toNetInput';
27-
export * from './utils'
28+
export * from './utils';
29+
30+
export * from './types';

src/tinyYolov2/TinyYolov2.ts

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,14 @@ import { BoundingBox } from '../BoundingBox';
44
import { convLayer } from '../commons/convLayer';
55
import { NeuralNetwork } from '../commons/NeuralNetwork';
66
import { nonMaxSuppression } from '../commons/nonMaxSuppression';
7+
import { normalize } from '../commons/normalize';
78
import { FaceDetection } from '../FaceDetection';
89
import { NetInput } from '../NetInput';
910
import { Point } from '../Point';
1011
import { toNetInput } from '../toNetInput';
1112
import { TNetInput } from '../types';
1213
import { sigmoid } from '../utils';
13-
import { BOX_ANCHORS, BOX_ANCHORS_SEPARABLE, INPUT_SIZES, IOU_THRESHOLD, NUM_BOXES } from './config';
14+
import { BOX_ANCHORS, BOX_ANCHORS_SEPARABLE, INPUT_SIZES, IOU_THRESHOLD, MEAN_RGB, NUM_BOXES } from './config';
1415
import { convWithBatchNorm } from './convWithBatchNorm';
1516
import { extractParams } from './extractParams';
1617
import { getDefaultParams } from './getDefaultParams';
@@ -45,7 +46,12 @@ export class TinyYolov2 extends NeuralNetwork<NetParams> {
4546
}
4647

4748
const out = tf.tidy(() => {
48-
const batchTensor = input.toBatchTensor(inputSize, false).div(tf.scalar(255)) as tf.Tensor4D
49+
50+
let batchTensor = input.toBatchTensor(inputSize, false)
51+
batchTensor = this.hasSeparableConvs
52+
? normalize(batchTensor, MEAN_RGB)
53+
: batchTensor
54+
batchTensor = batchTensor.div(tf.scalar(256)) as tf.Tensor4D
4955

5056
let out = convWithBatchNorm(batchTensor, params.conv0)
5157
out = tf.maxPool(out, [2, 2], [2, 2], 'same')
@@ -87,22 +93,23 @@ export class TinyYolov2 extends NeuralNetwork<NetParams> {
8793

8894
const netInput = await toNetInput(input, true)
8995
const out = await this.forwardInput(netInput, inputSize)
96+
const out0 = tf.tidy(() => tf.unstack(out)[0].expandDims()) as tf.Tensor4D
97+
98+
console.log(out0.shape)
9099

91100
const inputDimensions = {
92101
width: netInput.getInputWidth(0),
93102
height: netInput.getInputHeight(0)
94103
}
95104

96-
const paddings = new Point(
97-
(netInput.getPaddings(0).x + netInput.getInputWidth(0)) / netInput.getInputWidth(0),
98-
(netInput.getPaddings(0).y + netInput.getInputHeight(0)) / netInput.getInputHeight(0)
99-
)
105+
const paddings = netInput.getRelativePaddings(0)
100106

101-
const results = this.postProcess(out, { scoreThreshold, paddings })
107+
const results = this.postProcess(out0, { scoreThreshold, paddings })
102108
const boxes = results.map(res => res.box)
103109
const scores = results.map(res => res.score)
104110

105111
out.dispose()
112+
out0.dispose()
106113

107114
const indices = nonMaxSuppression(
108115
boxes.map(box => box.rescale(inputSize)),

src/tinyYolov2/config.ts

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,4 +18,6 @@ export const BOX_ANCHORS_SEPARABLE = [
1818
new Point(2.882459, 3.518061),
1919
new Point(4.266906, 5.178857),
2020
new Point(9.041765, 10.66308)
21-
]
21+
]
22+
23+
export const MEAN_RGB = [117.001, 114.697, 97.404]

src/tinyYolov2/convWithBatchNorm.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ export function convWithBatchNorm(x: tf.Tensor4D, params: ConvWithBatchNorm | Se
88
let out = tf.pad(x, [[0, 0], [1, 1], [1, 1], [0, 0]]) as tf.Tensor4D
99

1010
if (params instanceof SeparableConvParams) {
11-
out = tf.separableConv2d(out, params.depthwise_filter, params.pointwise_filter, [1, 1], 'same')
11+
out = tf.separableConv2d(out, params.depthwise_filter, params.pointwise_filter, [1, 1], 'valid')
1212
out = tf.add(out, params.bias)
1313
} else {
1414
out = tf.conv2d(out, params.conv.filters, [1, 1], 'valid')

src/tinyYolov2/extractParams.ts

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,11 +31,10 @@ function extractorsFactory(extractWeights: ExtractWeightsFunction, paramMappings
3131
}
3232

3333
function extractSeparableConvParams(channelsIn: number, channelsOut: number, mappedPrefix: string): SeparableConvParams {
34-
console.log(mappedPrefix)
3534
const depthwise_filter = tf.tensor4d(extractWeights(3 * 3 * channelsIn), [3, 3, channelsIn, 1])
3635
const pointwise_filter = tf.tensor4d(extractWeights(channelsIn * channelsOut), [1, 1, channelsIn, channelsOut])
3736
const bias = tf.tensor1d(extractWeights(channelsOut))
38-
console.log('done')
37+
3938
paramMappings.push(
4039
{ paramPath: `${mappedPrefix}/depthwise_filter` },
4140
{ paramPath: `${mappedPrefix}/pointwise_filter` },

tools/train/public/faceLandmarksTrain.js renamed to tools/train/faceLandmarks/faceLandmarksTrain.js

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -104,16 +104,4 @@ async function getTrainData() {
104104
const batch2 = await loadImagesInBatch(allLandmarks.slice(4000), 4000)
105105

106106
return batch1.concat(batch2)
107-
}
108-
109-
// https://stackoverflow.com/questions/6274339/how-can-i-shuffle-an-array
110-
function shuffle(a) {
111-
var j, x, i;
112-
for (i = a.length - 1; i > 0; i--) {
113-
j = Math.floor(Math.random() * (i + 1));
114-
x = a[i];
115-
a[i] = a[j];
116-
a[j] = x;
117-
}
118-
return a;
119107
}

tools/train/views/faceLandmarks.html renamed to tools/train/faceLandmarks/train.html

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
<script src="faceLandmarksUi.js"></script>
1111
<script src="faceLandmarksTrain.js"></script>
1212
<script src="FileSaver.js"></script>
13+
<script src="trainUtils.js"></script>
1314
</head>
1415
<body>
1516
<div id="navbar"></div>
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
2+
3+
// https://stackoverflow.com/questions/6274339/how-can-i-shuffle-an-array
4+
function shuffle(a) {
5+
var j, x, i;
6+
for (i = a.length - 1; i > 0; i--) {
7+
j = Math.floor(Math.random() * (i + 1));
8+
x = a[i];
9+
a[i] = a[j];
10+
a[j] = x;
11+
}
12+
return a;
13+
}
Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,12 @@
1-
require('./.env')
1+
require('./faceLandmarks/.env')
22

33
const express = require('express')
44
const path = require('path')
55

66
const app = express()
77

8-
const viewsDir = path.join(__dirname, 'views')
9-
app.use(express.static(viewsDir))
10-
app.use(express.static(path.join(__dirname, './public')))
11-
app.use(express.static(path.join(__dirname, './tmp')))
8+
const publicDir = path.join(__dirname, './faceLandmarks')
9+
app.use(express.static(publicDir))
1210
app.use(express.static(path.join(__dirname, './node_modules/file-saver')))
1311
app.use(express.static(path.join(__dirname, '../../examples/public')))
1412
app.use(express.static(path.join(__dirname, '../../weights')))
@@ -18,6 +16,6 @@ const trainDataPath = path.resolve(process.env.TRAIN_DATA_PATH)
1816
app.use(express.static(trainDataPath))
1917

2018
app.get('/', (req, res) => res.redirect('/face_landmarks'))
21-
app.get('/face_landmarks', (req, res) => res.sendFile(path.join(viewsDir, 'faceLandmarks.html')))
19+
app.get('/face_landmarks', (req, res) => res.sendFile(path.join(publicDir, 'train.html')))
2220

2321
app.listen(3000, () => console.log('Listening on port 3000!'))

tools/train/serveTinyYolov2.js

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
require('./tinyYolov2/.env')
2+
3+
const express = require('express')
4+
const path = require('path')
5+
6+
const app = express()
7+
8+
const publicDir = path.join(__dirname, './tinyYolov2')
9+
app.use(express.static(publicDir))
10+
app.use(express.static(path.join(__dirname, './node_modules/file-saver')))
11+
app.use(express.static(path.join(__dirname, '../../examples/public')))
12+
app.use(express.static(path.join(__dirname, '../../weights')))
13+
app.use(express.static(path.join(__dirname, '../../dist')))
14+
15+
const trainDataPath = path.resolve(process.env.TRAIN_DATA_PATH)
16+
app.use(express.static(trainDataPath))
17+
18+
//app.get('/', (req, res) => res.sendFile(path.join(publicDir, 'train.html')))
19+
//app.get('/', (req, res) => res.sendFile(path.join(publicDir, 'tinyYolov2FaceDetectionVideo.html')))
20+
app.get('/', (req, res) => res.sendFile(path.join(publicDir, 'testLoss.html')))
21+
22+
app.listen(3000, () => console.log('Listening on port 3000!'))

0 commit comments

Comments
 (0)