1
- // hyper parameters
2
- const objectScale = 1
3
- const noObjectScale = 0.5
4
- const coordScale = 5
5
-
6
1
const CELL_SIZE = 32
7
2
8
3
const getNumCells = inputSize => inputSize / CELL_SIZE
9
4
5
+ const inverseSigmoid = x => Math . log ( x / ( 1 - x ) )
6
+
10
7
function getAnchors ( ) {
11
8
return window . net . anchors
12
9
}
13
10
11
+ function squaredSumOverMask ( lossTensors , mask ) {
12
+ return tf . tidy ( ( ) => tf . sum ( tf . square ( tf . mul ( mask , lossTensors ) ) ) )
13
+ }
14
+
14
15
function assignBoxesToAnchors ( groundTruthBoxes , reshapedImgDims ) {
15
16
16
17
const inputSize = Math . max ( reshapedImgDims . width , reshapedImgDims . height )
@@ -53,6 +54,30 @@ function getGroundTruthMask(groundTruthBoxes, inputSize) {
53
54
return mask
54
55
}
55
56
57
+ function getCoordAndScoreMasks ( inputSize ) {
58
+
59
+ const numCells = getNumCells ( inputSize )
60
+
61
+ const coordMask = tf . zeros ( [ numCells , numCells , 25 ] )
62
+ const scoreMask = tf . zeros ( [ numCells , numCells , 25 ] )
63
+ const coordBuf = coordMask . buffer ( )
64
+ const scoreBuf = scoreMask . buffer ( )
65
+
66
+ for ( let row = 0 ; row < numCells ; row ++ ) {
67
+ for ( let col = 0 ; col < numCells ; col ++ ) {
68
+ for ( let anchor = 0 ; anchor < 5 ; anchor ++ ) {
69
+ const anchorOffset = 5 * anchor
70
+ for ( let i = 0 ; i < 4 ; i ++ ) {
71
+ coordBuf . set ( 1 , row , col , anchorOffset + i )
72
+ }
73
+ scoreBuf . set ( 1 , row , col , anchorOffset + 4 )
74
+ }
75
+ }
76
+ }
77
+
78
+ return { coordMask, scoreMask }
79
+ }
80
+
56
81
function computeBoxAdjustments ( groundTruthBoxes , reshapedImgDims ) {
57
82
58
83
const inputSize = Math . max ( reshapedImgDims . width , reshapedImgDims . height )
@@ -66,10 +91,14 @@ function computeBoxAdjustments(groundTruthBoxes, reshapedImgDims) {
66
91
67
92
const centerX = ( left + right ) / 2
68
93
const centerY = ( top + bottom ) / 2
69
- const dx = ( centerX - ( col * CELL_SIZE + ( CELL_SIZE / 2 ) ) ) / inputSize
70
- const dy = ( centerY - ( row * CELL_SIZE + ( CELL_SIZE / 2 ) ) ) / inputSize
71
- const dw = Math . log ( width / getAnchors ( ) [ anchor ] . x )
72
- const dh = Math . log ( height / getAnchors ( ) [ anchor ] . y )
94
+
95
+ const dCenterX = centerX - ( col * CELL_SIZE + ( CELL_SIZE / 2 ) )
96
+ const dCenterY = centerY - ( row * CELL_SIZE + ( CELL_SIZE / 2 ) )
97
+
98
+ const dx = inverseSigmoid ( dCenterX / inputSize )
99
+ const dy = inverseSigmoid ( dCenterY / inputSize )
100
+ const dw = Math . log ( ( width / CELL_SIZE ) / getAnchors ( ) [ anchor ] . x )
101
+ const dh = Math . log ( ( height / CELL_SIZE ) / getAnchors ( ) [ anchor ] . y )
73
102
74
103
const anchorOffset = anchor * 5
75
104
buf . set ( dx , row , col , anchorOffset + 0 )
@@ -83,7 +112,8 @@ function computeBoxAdjustments(groundTruthBoxes, reshapedImgDims) {
83
112
84
113
function computeIous ( predBoxes , groundTruthBoxes , reshapedImgDims ) {
85
114
86
- const numCells = getNumCells ( Math . max ( reshapedImgDims . width , reshapedImgDims . height ) )
115
+ const inputSize = Math . max ( reshapedImgDims . width , reshapedImgDims . height )
116
+ const numCells = getNumCells ( inputSize )
87
117
88
118
const isSameAnchor = p1 => p2 =>
89
119
p1 . row === p2 . row
@@ -104,44 +134,130 @@ function computeIous(predBoxes, groundTruthBoxes, reshapedImgDims) {
104
134
105
135
const iou = faceapi . iou (
106
136
box . rescale ( reshapedImgDims ) ,
107
- predBox . box . rescale ( reshapedImgDims )
137
+ predBox . box
108
138
)
109
139
140
+ if ( window . debug ) {
141
+ console . log ( 'ground thruth box:' , box . rescale ( reshapedImgDims ) )
142
+ console . log ( 'predicted box:' , predBox . box )
143
+ console . log ( iou )
144
+ }
145
+
110
146
const anchorOffset = anchor * 5
111
147
buf . set ( iou , row , col , anchorOffset + 4 )
112
148
} )
113
149
114
150
return ious
115
151
}
116
152
117
- function computeNoObjectLoss ( outTensor ) {
118
- return tf . tidy ( ( ) => tf . square ( tf . sigmoid ( outTensor ) ) )
153
+ window . computeNoObjectLoss = function ( outTensor , mask ) {
154
+ return tf . tidy ( ( ) => {
155
+ const lossTensor = tf . sigmoid ( outTensor )
156
+ return squaredSumOverMask ( lossTensor , mask )
157
+ } )
119
158
}
120
159
121
- function computeObjectLoss ( outTensor , groundTruthBoxes , reshapedImgDims , paddings ) {
160
+ function computeObjectLoss ( outTensor , groundTruthBoxes , reshapedImgDims , paddings , mask ) {
122
161
return tf . tidy ( ( ) => {
123
162
const predBoxes = window . net . postProcess (
124
163
outTensor ,
125
164
{ paddings }
126
165
)
166
+
167
+ if ( window . debug ) {
168
+ console . log ( predBoxes )
169
+ console . log ( predBoxes . filter ( b => b . score > 0.1 ) )
170
+ }
171
+
172
+ // debug
173
+
174
+ const numCells = getNumCells ( Math . max ( reshapedImgDims . width , reshapedImgDims . height ) )
175
+ if ( predBoxes . length !== ( numCells * numCells * getAnchors ( ) . length ) ) {
176
+ console . log ( predBoxes . length )
177
+ throw new Error ( 'predBoxes.length !== (numCells * numCells * 25)' )
178
+ }
179
+
180
+ const isInvalid = num => ! num && num !== 0
181
+
182
+
183
+ predBoxes . forEach ( ( { row, col, anchor } ) => {
184
+ if ( [ row , col , anchor ] . some ( isInvalid ) ) {
185
+ console . log ( row , col , anchor )
186
+ throw new Error ( 'row, col, anchor invalid' )
187
+ }
188
+ } )
189
+
190
+ // debug
191
+
127
192
const ious = computeIous (
128
193
predBoxes ,
129
194
groundTruthBoxes ,
130
195
reshapedImgDims
131
196
)
132
197
133
- return tf . square ( tf . sub ( ious , tf . sigmoid ( outTensor ) ) )
198
+ const lossTensor = tf . sub ( ious , tf . sigmoid ( outTensor ) )
199
+
200
+ return squaredSumOverMask ( lossTensor , mask )
134
201
} )
135
202
}
136
203
137
- function computeCoordLoss ( groundTruthBoxes , outTensor , reshapedImgDims ) {
204
+ function computeCoordLoss ( groundTruthBoxes , outTensor , reshapedImgDims , mask , paddings ) {
138
205
return tf . tidy ( ( ) => {
139
206
const boxAdjustments = computeBoxAdjustments (
140
207
groundTruthBoxes ,
141
208
reshapedImgDims
142
209
)
143
210
144
- return tf . square ( tf . sub ( boxAdjustments , outTensor ) )
211
+ // debug
212
+ if ( window . debug ) {
213
+ const indToPos = [ ]
214
+ const numCells = outTensor . shape [ 1 ]
215
+ for ( let row = 0 ; row < numCells ; row ++ ) {
216
+ for ( let col = 0 ; col < numCells ; col ++ ) {
217
+ for ( let anchor = 0 ; anchor < 25 ; anchor ++ ) {
218
+ indToPos . push ( { row, col, anchor : parseInt ( anchor / 5 ) } )
219
+ }
220
+ }
221
+ }
222
+
223
+ const m = Array . from ( mask . dataSync ( ) )
224
+ const ind = m . map ( ( val , ind ) => ( { val, ind } ) ) . filter ( v => v . val !== 0 ) . map ( v => v . ind )
225
+ const gt = Array . from ( boxAdjustments . dataSync ( ) )
226
+ const out = Array . from ( outTensor . dataSync ( ) )
227
+
228
+ const comp = ind . map ( i => (
229
+ {
230
+ pos : indToPos [ i ] ,
231
+ gt : gt [ i ] ,
232
+ out : out [ i ]
233
+ }
234
+ ) )
235
+ console . log ( comp )
236
+ console . log ( comp . map ( c => `gt: ${ c . gt } , out: ${ c . out } ` ) )
237
+
238
+ const printBbox = ( which ) => {
239
+ const { col, row, anchor } = comp [ 0 ] . pos
240
+ console . log ( col , row , anchor )
241
+ const ctX = ( ( col + faceapi . sigmoid ( comp [ 0 ] [ which ] ) ) / numCells ) * paddings . x
242
+ const ctY = ( ( row + faceapi . sigmoid ( comp [ 1 ] [ which ] ) ) / numCells ) * paddings . y
243
+ const width = ( ( Math . exp ( comp [ 2 ] [ which ] ) * getAnchors ( ) [ anchor ] . x ) / numCells ) * paddings . x
244
+ const height = ( ( Math . exp ( comp [ 3 ] [ which ] ) * getAnchors ( ) [ anchor ] . y ) / numCells ) * paddings . y
245
+
246
+ const x = ( ctX - ( width / 2 ) )
247
+ const y = ( ctY - ( height / 2 ) )
248
+ console . log ( which , x * reshapedImgDims . width , y * reshapedImgDims . height , width * reshapedImgDims . width , height * reshapedImgDims . height )
249
+ }
250
+
251
+
252
+ printBbox ( 'out' )
253
+ printBbox ( 'gt' )
254
+
255
+ }
256
+ // debug
257
+
258
+ const lossTensor = tf . sub ( boxAdjustments , outTensor )
259
+
260
+ return squaredSumOverMask ( lossTensor , mask )
145
261
} )
146
262
}
147
263
@@ -160,29 +276,30 @@ function computeLoss(outTensor, groundTruth, reshapedImgDims, paddings) {
160
276
reshapedImgDims
161
277
)
162
278
163
- const mask = getGroundTruthMask (
164
- groundTruthBoxes ,
165
- inputSize
166
- )
167
- const inverseMask = tf . tidy ( ( ) => tf . sub ( tf . scalar ( 1 ) , mask ) )
279
+ const groundTruthMask = getGroundTruthMask ( groundTruthBoxes , inputSize )
280
+ const { coordMask, scoreMask } = getCoordAndScoreMasks ( inputSize )
281
+
282
+ const noObjectLossMask = tf . tidy ( ( ) => tf . mul ( scoreMask , tf . sub ( tf . scalar ( 1 ) , groundTruthMask ) ) )
283
+ const objectLossMask = tf . tidy ( ( ) => tf . mul ( scoreMask , groundTruthMask ) )
284
+ const coordLossMask = tf . tidy ( ( ) => tf . mul ( coordMask , groundTruthMask ) )
168
285
169
286
const noObjectLoss = tf . tidy ( ( ) =>
170
287
tf . mul (
171
288
tf . scalar ( noObjectScale ) ,
172
- tf . sum ( tf . mul ( inverseMask , computeNoObjectLoss ( outTensor ) ) )
289
+ computeNoObjectLoss ( outTensor , noObjectLossMask )
173
290
)
174
291
)
175
292
const objectLoss = tf . tidy ( ( ) =>
176
293
tf . mul (
177
294
tf . scalar ( objectScale ) ,
178
- tf . sum ( tf . mul ( mask , computeObjectLoss ( outTensor , groundTruthBoxes , reshapedImgDims , paddings ) ) )
295
+ computeObjectLoss ( outTensor , groundTruthBoxes , reshapedImgDims , paddings , objectLossMask )
179
296
)
180
297
)
181
298
182
299
const coordLoss = tf . tidy ( ( ) =>
183
300
tf . mul (
184
301
tf . scalar ( coordScale ) ,
185
- tf . sum ( tf . mul ( mask , computeCoordLoss ( groundTruthBoxes , outTensor , reshapedImgDims ) ) )
302
+ computeCoordLoss ( groundTruthBoxes , outTensor , reshapedImgDims , coordLossMask , paddings )
186
303
)
187
304
)
188
305
0 commit comments