Skip to content

Commit 99cbd7e

Browse files
fixed masks and coordinate loss
1 parent 459067c commit 99cbd7e

File tree

14 files changed

+543
-48
lines changed

14 files changed

+543
-48
lines changed

tools/train/faceLandmarks/faceLandmarksTrain.js

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,15 @@ async function trainStep(batchCreators) {
22
await promiseSequential(batchCreators.map((batchCreator, dataIdx) => async () => {
33

44
const { batchInput, landmarksBatchTensor } = await batchCreator()
5+
56
let ts = Date.now()
67
const cost = optimizer.minimize(() => {
78
const out = window.trainNet.forwardInput(batchInput.managed())
89
const loss = lossFunction(
910
landmarksBatchTensor,
1011
out
1112
)
12-
return loss
13+
return tf.sum(out)
1314
}, true)
1415

1516
ts = Date.now() - ts
@@ -19,6 +20,7 @@ async function trainStep(batchCreators) {
1920
cost.dispose()
2021

2122
await tf.nextFrame()
23+
console.log(tf.memory())
2224
}))
2325
}
2426

@@ -63,7 +65,7 @@ function landmarkPositionsToArray(landmarks) {
6365
}
6466

6567
function toFaceLandmarks(landmarks, { naturalWidth, naturalHeight }) {
66-
return new faceapi.FaceLandmarks(
68+
return new faceapi.FaceLandmarks68(
6769
landmarks.map(l => new faceapi.Point(l.x / naturalWidth, l.y / naturalHeight)),
6870
{ width: naturalWidth, height: naturalHeight }
6971
)
@@ -90,8 +92,11 @@ async function getTrainData() {
9092
(_, i) => landmarksJson[i]
9193
)
9294

95+
return await loadImagesInBatch(allLandmarks.slice(0, 100))
96+
/**
9397
const batch1 = await loadImagesInBatch(allLandmarks.slice(0, 4000))
9498
const batch2 = await loadImagesInBatch(allLandmarks.slice(4000), 4000)
9599
96100
return batch1.concat(batch2)
101+
*/
97102
}

tools/train/faceLandmarks/faceLandmarksUi.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ async function init() {
6060
//window.nets.push(await loadNet('retrained/landmarks_v0.weights'))
6161
//window.nets.push(await loadNet('retrained/landmarks_v2.weights'))
6262

63-
window.trainNet = await loadNet('retrained/landmarks_v6.weights')
63+
window.trainNet = await loadNet('/tmp/retrained/landmarks_v9.weights')
6464
window.nets.push(trainNet)
6565

6666
$('#loader').hide()

tools/train/faceLandmarks/train.html

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@
6161
await train()
6262
}
6363

64-
async function train(batchSize = 10) {
64+
async function train(batchSize = 1) {
6565
for (let i = 0; i < trainSteps; i++) {
6666
console.log('step', i)
6767
const batchCreators = createBatchCreators(shuffle(window.trainData), batchSize)
@@ -70,7 +70,7 @@
7070
ts = Date.now() - ts
7171
console.log('step %s done (%s ms)', i, ts)
7272
if (((i + 1) % saveEveryNthIteration) === 0) {
73-
saveWeights(window.trainNet, 'landmark_trained_weights_' + idx + '.weights')
73+
//saveWeights(window.trainNet, 'landmark_trained_weights_' + idx + '.weights')
7474
}
7575
}
7676
}

tools/train/karma.conf.js

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
module.exports = function(config) {
2+
config.set({
3+
frameworks: ['jasmine', 'karma-typescript'],
4+
files: [
5+
'tinyYolov2/**/*.ts'
6+
],
7+
preprocessors: {
8+
'**/*.ts': ['karma-typescript']
9+
},
10+
karmaTypescriptConfig: {
11+
tsconfig: './tsconfig.test.json'
12+
},
13+
browsers: ['Chrome'],
14+
browserNoActivityTimeout: 60000,
15+
client: {
16+
jasmine: {
17+
timeoutInterval: 30000
18+
}
19+
}
20+
})
21+
}

tools/train/package.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
{
22
"scripts": {
3-
"start": "node server.js"
3+
"start": "node server.js",
4+
"test": "karma start"
45
},
56
"author": "justadudewhohacks",
67
"license": "MIT",

tools/train/serveTinyYolov2.js

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ const detectionFilenames = fs.readdirSync(detectionsPath)
2525
app.use(express.static(trainDataPath))
2626

2727
app.get('/detection_filenames', (req, res) => res.status(202).send(detectionFilenames))
28-
app.get('/', (req, res) => res.sendFile(path.join(publicDir, 'train.html')))
28+
app.get('/', (req, res) => res.sendFile(path.join(publicDir, 'overfit.html')))
29+
app.get('/verify', (req, res) => res.sendFile(path.join(publicDir, 'verify.html')))
2930

3031
app.listen(3000, () => console.log('Listening on port 3000!'))

tools/train/shared/trainUtils.js

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,4 +27,6 @@ function saveWeights(net, filename = 'train_tmp') {
2727
.reduce((flat, arr) => flat.concat(arr))
2828
)
2929
saveAs(new Blob([binaryWeights]), filename)
30-
}
30+
}
31+
32+
const log = (str, ...args) => console.log(`[${[(new Date()).toTimeString().substr(0, 8)]}] ${str || ''}`, ...args)

tools/train/tinyYolov2/loss.js

Lines changed: 142 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,17 @@
1-
// hyper parameters
2-
const objectScale = 1
3-
const noObjectScale = 0.5
4-
const coordScale = 5
5-
61
const CELL_SIZE = 32
72

83
const getNumCells = inputSize => inputSize / CELL_SIZE
94

5+
const inverseSigmoid = x => Math.log(x / (1 - x))
6+
107
function getAnchors() {
118
return window.net.anchors
129
}
1310

11+
function squaredSumOverMask(lossTensors, mask) {
12+
return tf.tidy(() => tf.sum(tf.square(tf.mul(mask, lossTensors))))
13+
}
14+
1415
function assignBoxesToAnchors(groundTruthBoxes, reshapedImgDims) {
1516

1617
const inputSize = Math.max(reshapedImgDims.width, reshapedImgDims.height)
@@ -53,6 +54,30 @@ function getGroundTruthMask(groundTruthBoxes, inputSize) {
5354
return mask
5455
}
5556

57+
function getCoordAndScoreMasks(inputSize) {
58+
59+
const numCells = getNumCells(inputSize)
60+
61+
const coordMask = tf.zeros([numCells, numCells, 25])
62+
const scoreMask = tf.zeros([numCells, numCells, 25])
63+
const coordBuf = coordMask.buffer()
64+
const scoreBuf = scoreMask.buffer()
65+
66+
for (let row = 0; row < numCells; row++) {
67+
for (let col = 0; col < numCells; col++) {
68+
for (let anchor = 0; anchor < 5; anchor++) {
69+
const anchorOffset = 5 * anchor
70+
for (let i = 0; i < 4; i++) {
71+
coordBuf.set(1, row, col, anchorOffset + i)
72+
}
73+
scoreBuf.set(1, row, col, anchorOffset + 4)
74+
}
75+
}
76+
}
77+
78+
return { coordMask, scoreMask }
79+
}
80+
5681
function computeBoxAdjustments(groundTruthBoxes, reshapedImgDims) {
5782

5883
const inputSize = Math.max(reshapedImgDims.width, reshapedImgDims.height)
@@ -66,10 +91,14 @@ function computeBoxAdjustments(groundTruthBoxes, reshapedImgDims) {
6691

6792
const centerX = (left + right) / 2
6893
const centerY = (top + bottom) / 2
69-
const dx = (centerX - (col * CELL_SIZE + (CELL_SIZE / 2))) / inputSize
70-
const dy = (centerY - (row * CELL_SIZE + (CELL_SIZE / 2))) / inputSize
71-
const dw = Math.log(width / getAnchors()[anchor].x)
72-
const dh = Math.log(height / getAnchors()[anchor].y)
94+
95+
const dCenterX = centerX - (col * CELL_SIZE + (CELL_SIZE / 2))
96+
const dCenterY = centerY - (row * CELL_SIZE + (CELL_SIZE / 2))
97+
98+
const dx = inverseSigmoid(dCenterX / inputSize)
99+
const dy = inverseSigmoid(dCenterY / inputSize)
100+
const dw = Math.log((width / CELL_SIZE) / getAnchors()[anchor].x)
101+
const dh = Math.log((height / CELL_SIZE) / getAnchors()[anchor].y)
73102

74103
const anchorOffset = anchor * 5
75104
buf.set(dx, row, col, anchorOffset + 0)
@@ -83,7 +112,8 @@ function computeBoxAdjustments(groundTruthBoxes, reshapedImgDims) {
83112

84113
function computeIous(predBoxes, groundTruthBoxes, reshapedImgDims) {
85114

86-
const numCells = getNumCells(Math.max(reshapedImgDims.width, reshapedImgDims.height))
115+
const inputSize = Math.max(reshapedImgDims.width, reshapedImgDims.height)
116+
const numCells = getNumCells(inputSize)
87117

88118
const isSameAnchor = p1 => p2 =>
89119
p1.row === p2.row
@@ -104,44 +134,130 @@ function computeIous(predBoxes, groundTruthBoxes, reshapedImgDims) {
104134

105135
const iou = faceapi.iou(
106136
box.rescale(reshapedImgDims),
107-
predBox.box.rescale(reshapedImgDims)
137+
predBox.box
108138
)
109139

140+
if (window.debug) {
141+
console.log('ground thruth box:', box.rescale(reshapedImgDims))
142+
console.log('predicted box:', predBox.box)
143+
console.log(iou)
144+
}
145+
110146
const anchorOffset = anchor * 5
111147
buf.set(iou, row, col, anchorOffset + 4)
112148
})
113149

114150
return ious
115151
}
116152

117-
function computeNoObjectLoss(outTensor) {
118-
return tf.tidy(() => tf.square(tf.sigmoid(outTensor)))
153+
window.computeNoObjectLoss = function(outTensor, mask) {
154+
return tf.tidy(() => {
155+
const lossTensor = tf.sigmoid(outTensor)
156+
return squaredSumOverMask(lossTensor, mask)
157+
})
119158
}
120159

121-
function computeObjectLoss(outTensor, groundTruthBoxes, reshapedImgDims, paddings) {
160+
function computeObjectLoss(outTensor, groundTruthBoxes, reshapedImgDims, paddings, mask) {
122161
return tf.tidy(() => {
123162
const predBoxes = window.net.postProcess(
124163
outTensor,
125164
{ paddings }
126165
)
166+
167+
if (window.debug) {
168+
console.log(predBoxes)
169+
console.log(predBoxes.filter(b => b.score > 0.1))
170+
}
171+
172+
// debug
173+
174+
const numCells = getNumCells(Math.max(reshapedImgDims.width, reshapedImgDims.height))
175+
if (predBoxes.length !== (numCells * numCells * getAnchors().length)) {
176+
console.log(predBoxes.length)
177+
throw new Error('predBoxes.length !== (numCells * numCells * 25)')
178+
}
179+
180+
const isInvalid = num => !num && num !== 0
181+
182+
183+
predBoxes.forEach(({ row, col, anchor }) => {
184+
if ([row, col, anchor].some(isInvalid)) {
185+
console.log(row, col, anchor)
186+
throw new Error('row, col, anchor invalid')
187+
}
188+
})
189+
190+
// debug
191+
127192
const ious = computeIous(
128193
predBoxes,
129194
groundTruthBoxes,
130195
reshapedImgDims
131196
)
132197

133-
return tf.square(tf.sub(ious, tf.sigmoid(outTensor)))
198+
const lossTensor = tf.sub(ious, tf.sigmoid(outTensor))
199+
200+
return squaredSumOverMask(lossTensor, mask)
134201
})
135202
}
136203

137-
function computeCoordLoss(groundTruthBoxes, outTensor, reshapedImgDims) {
204+
function computeCoordLoss(groundTruthBoxes, outTensor, reshapedImgDims, mask, paddings) {
138205
return tf.tidy(() => {
139206
const boxAdjustments = computeBoxAdjustments(
140207
groundTruthBoxes,
141208
reshapedImgDims
142209
)
143210

144-
return tf.square(tf.sub(boxAdjustments, outTensor))
211+
// debug
212+
if (window.debug) {
213+
const indToPos = []
214+
const numCells = outTensor.shape[1]
215+
for (let row = 0; row < numCells; row++) {
216+
for (let col = 0; col < numCells; col++) {
217+
for (let anchor = 0; anchor < 25; anchor++) {
218+
indToPos.push({ row, col, anchor: parseInt(anchor / 5) })
219+
}
220+
}
221+
}
222+
223+
const m = Array.from(mask.dataSync())
224+
const ind = m.map((val, ind) => ({ val, ind })).filter(v => v.val !== 0).map(v => v.ind)
225+
const gt = Array.from(boxAdjustments.dataSync())
226+
const out = Array.from(outTensor.dataSync())
227+
228+
const comp = ind.map(i => (
229+
{
230+
pos: indToPos[i],
231+
gt: gt[i],
232+
out: out[i]
233+
}
234+
))
235+
console.log(comp)
236+
console.log(comp.map(c => `gt: ${c.gt}, out: ${c.out}`))
237+
238+
const printBbox = (which) => {
239+
const { col, row, anchor } = comp[0].pos
240+
console.log(col, row, anchor)
241+
const ctX = ((col + faceapi.sigmoid(comp[0][which])) / numCells) * paddings.x
242+
const ctY = ((row + faceapi.sigmoid(comp[1][which])) / numCells) * paddings.y
243+
const width = ((Math.exp(comp[2][which]) * getAnchors()[anchor].x) / numCells) * paddings.x
244+
const height = ((Math.exp(comp[3][which]) * getAnchors()[anchor].y) / numCells) * paddings.y
245+
246+
const x = (ctX - (width / 2))
247+
const y = (ctY - (height / 2))
248+
console.log(which, x * reshapedImgDims.width, y * reshapedImgDims.height, width * reshapedImgDims.width, height * reshapedImgDims.height)
249+
}
250+
251+
252+
printBbox('out')
253+
printBbox('gt')
254+
255+
}
256+
// debug
257+
258+
const lossTensor = tf.sub(boxAdjustments, outTensor)
259+
260+
return squaredSumOverMask(lossTensor, mask)
145261
})
146262
}
147263

@@ -160,29 +276,30 @@ function computeLoss(outTensor, groundTruth, reshapedImgDims, paddings) {
160276
reshapedImgDims
161277
)
162278

163-
const mask = getGroundTruthMask(
164-
groundTruthBoxes,
165-
inputSize
166-
)
167-
const inverseMask = tf.tidy(() => tf.sub(tf.scalar(1), mask))
279+
const groundTruthMask = getGroundTruthMask(groundTruthBoxes, inputSize)
280+
const { coordMask, scoreMask } = getCoordAndScoreMasks(inputSize)
281+
282+
const noObjectLossMask = tf.tidy(() => tf.mul(scoreMask, tf.sub(tf.scalar(1), groundTruthMask)))
283+
const objectLossMask = tf.tidy(() => tf.mul(scoreMask, groundTruthMask))
284+
const coordLossMask = tf.tidy(() => tf.mul(coordMask, groundTruthMask))
168285

169286
const noObjectLoss = tf.tidy(() =>
170287
tf.mul(
171288
tf.scalar(noObjectScale),
172-
tf.sum(tf.mul(inverseMask, computeNoObjectLoss(outTensor)))
289+
computeNoObjectLoss(outTensor, noObjectLossMask)
173290
)
174291
)
175292
const objectLoss = tf.tidy(() =>
176293
tf.mul(
177294
tf.scalar(objectScale),
178-
tf.sum(tf.mul(mask, computeObjectLoss(outTensor, groundTruthBoxes, reshapedImgDims, paddings)))
295+
computeObjectLoss(outTensor, groundTruthBoxes, reshapedImgDims, paddings, objectLossMask)
179296
)
180297
)
181298

182299
const coordLoss = tf.tidy(() =>
183300
tf.mul(
184301
tf.scalar(coordScale),
185-
tf.sum(tf.mul(mask, computeCoordLoss(groundTruthBoxes, outTensor, reshapedImgDims)))
302+
computeCoordLoss(groundTruthBoxes, outTensor, reshapedImgDims, coordLossMask, paddings)
186303
)
187304
)
188305

0 commit comments

Comments
 (0)