@@ -6,19 +6,34 @@ import { NeuralNetwork } from '../commons/NeuralNetwork';
6
6
import { nonMaxSuppression } from '../commons/nonMaxSuppression' ;
7
7
import { FaceDetection } from '../FaceDetection' ;
8
8
import { NetInput } from '../NetInput' ;
9
+ import { Point } from '../Point' ;
9
10
import { toNetInput } from '../toNetInput' ;
10
11
import { TNetInput } from '../types' ;
11
- import { BOX_ANCHORS , INPUT_SIZES , IOU_THRESHOLD , NUM_BOXES } from './config' ;
12
+ import { sigmoid } from '../utils' ;
13
+ import { BOX_ANCHORS , BOX_ANCHORS_SEPARABLE , INPUT_SIZES , IOU_THRESHOLD , NUM_BOXES } from './config' ;
12
14
import { convWithBatchNorm } from './convWithBatchNorm' ;
13
15
import { extractParams } from './extractParams' ;
14
16
import { getDefaultParams } from './getDefaultParams' ;
15
17
import { loadQuantizedParams } from './loadQuantizedParams' ;
16
- import { NetParams , TinyYolov2ForwardParams } from './types' ;
18
+ import { NetParams , PostProcessingParams , TinyYolov2ForwardParams } from './types' ;
17
19
18
20
export class TinyYolov2 extends NeuralNetwork < NetParams > {
19
21
20
- constructor ( ) {
22
+ private _hasSeparableConvs : boolean
23
+ private _anchors : Point [ ]
24
+
25
+ constructor ( hasSeparableConvs : boolean = false ) {
21
26
super ( 'TinyYolov2' )
27
+ this . _hasSeparableConvs = hasSeparableConvs
28
+ this . _anchors = hasSeparableConvs ? BOX_ANCHORS_SEPARABLE : BOX_ANCHORS
29
+ }
30
+
31
+ public get hasSeparableConvs ( ) : boolean {
32
+ return this . _hasSeparableConvs
33
+ }
34
+
35
+ public get anchors ( ) : Point [ ] {
36
+ return this . _anchors
22
37
}
23
38
24
39
public forwardInput ( input : NetInput , inputSize : number ) : tf . Tensor4D {
@@ -30,7 +45,7 @@ export class TinyYolov2 extends NeuralNetwork<NetParams> {
30
45
}
31
46
32
47
const out = tf . tidy ( ( ) => {
33
- const batchTensor = input . toBatchTensor ( inputSize , false ) . div ( tf . scalar ( 255 ) ) . toFloat ( ) as tf . Tensor4D
48
+ const batchTensor = input . toBatchTensor ( inputSize , false ) . div ( tf . scalar ( 255 ) ) as tf . Tensor4D
34
49
35
50
let out = convWithBatchNorm ( batchTensor , params . conv0 )
36
51
out = tf . maxPool ( out , [ 2 , 2 ] , [ 2 , 2 ] , 'same' )
@@ -72,39 +87,74 @@ export class TinyYolov2 extends NeuralNetwork<NetParams> {
72
87
73
88
const netInput = await toNetInput ( input , true )
74
89
const out = await this . forwardInput ( netInput , inputSize )
75
- const numCells = out . shape [ 1 ]
90
+
91
+ const inputDimensions = {
92
+ width : netInput . getInputWidth ( 0 ) ,
93
+ height : netInput . getInputHeight ( 0 )
94
+ }
95
+
96
+ const paddings = new Point (
97
+ ( netInput . getPaddings ( 0 ) . x + netInput . getInputWidth ( 0 ) ) / netInput . getInputWidth ( 0 ) ,
98
+ ( netInput . getPaddings ( 0 ) . y + netInput . getInputHeight ( 0 ) ) / netInput . getInputHeight ( 0 )
99
+ )
100
+
101
+ const results = this . postProcess ( out , { scoreThreshold, paddings } )
102
+ const boxes = results . map ( res => res . box )
103
+ const scores = results . map ( res => res . score )
104
+
105
+ out . dispose ( )
106
+
107
+ const indices = nonMaxSuppression (
108
+ boxes . map ( box => box . rescale ( inputSize ) ) ,
109
+ scores ,
110
+ IOU_THRESHOLD ,
111
+ true
112
+ )
113
+
114
+ const detections = indices . map ( idx =>
115
+ new FaceDetection (
116
+ scores [ idx ] ,
117
+ boxes [ idx ] . toRect ( ) ,
118
+ inputDimensions
119
+ )
120
+ )
121
+
122
+ return detections
123
+ }
124
+
125
+ public postProcess ( outputTensor : tf . Tensor4D , { scoreThreshold, paddings } : PostProcessingParams ) {
126
+
127
+ const numCells = outputTensor . shape [ 1 ]
76
128
77
129
const [ boxesTensor , scoresTensor ] = tf . tidy ( ( ) => {
78
- const reshaped = out . reshape ( [ numCells , numCells , NUM_BOXES , 6 ] )
79
- out . dispose ( )
130
+ const reshaped = outputTensor . reshape ( [ numCells , numCells , NUM_BOXES , this . hasSeparableConvs ? 5 : 6 ] )
80
131
81
132
const boxes = reshaped . slice ( [ 0 , 0 , 0 , 0 ] , [ numCells , numCells , NUM_BOXES , 4 ] )
82
133
const scores = reshaped . slice ( [ 0 , 0 , 0 , 4 ] , [ numCells , numCells , NUM_BOXES , 1 ] )
83
134
return [ boxes , scores ]
84
135
} )
85
136
86
- const expit = ( x : number ) : number => 1 / ( 1 + Math . exp ( - x ) )
87
-
88
- const paddedHeightRelative = ( netInput . getPaddings ( 0 ) . y + netInput . getInputHeight ( 0 ) ) / netInput . getInputHeight ( 0 )
89
- const paddedWidthRelative = ( netInput . getPaddings ( 0 ) . x + netInput . getInputWidth ( 0 ) ) / netInput . getInputWidth ( 0 )
90
-
91
- const boxes : BoundingBox [ ] = [ ]
92
- const scores : number [ ] = [ ]
137
+ const results = [ ]
93
138
94
139
for ( let row = 0 ; row < numCells ; row ++ ) {
95
140
for ( let col = 0 ; col < numCells ; col ++ ) {
96
- for ( let box = 0 ; box < NUM_BOXES ; box ++ ) {
97
- const score = expit ( scoresTensor . get ( row , col , box , 0 ) )
141
+ for ( let anchor = 0 ; anchor < NUM_BOXES ; anchor ++ ) {
142
+ const score = sigmoid ( scoresTensor . get ( row , col , anchor , 0 ) )
98
143
if ( score > scoreThreshold ) {
99
- const ctX = ( ( col + expit ( boxesTensor . get ( row , col , box , 0 ) ) ) / numCells ) * paddedWidthRelative
100
- const ctY = ( ( row + expit ( boxesTensor . get ( row , col , box , 1 ) ) ) / numCells ) * paddedHeightRelative
101
- const width = ( ( Math . exp ( boxesTensor . get ( row , col , box , 2 ) ) * BOX_ANCHORS [ box ] . x ) / numCells ) * paddedWidthRelative
102
- const height = ( ( Math . exp ( boxesTensor . get ( row , col , box , 3 ) ) * BOX_ANCHORS [ box ] . y ) / numCells ) * paddedHeightRelative
144
+ const ctX = ( ( col + sigmoid ( boxesTensor . get ( row , col , anchor , 0 ) ) ) / numCells ) * paddings . x
145
+ const ctY = ( ( row + sigmoid ( boxesTensor . get ( row , col , anchor , 1 ) ) ) / numCells ) * paddings . y
146
+ const width = ( ( Math . exp ( boxesTensor . get ( row , col , anchor , 2 ) ) * this . anchors [ anchor ] . x ) / numCells ) * paddings . x
147
+ const height = ( ( Math . exp ( boxesTensor . get ( row , col , anchor , 3 ) ) * this . anchors [ anchor ] . y ) / numCells ) * paddings . y
103
148
104
149
const x = ( ctX - ( width / 2 ) )
105
150
const y = ( ctY - ( height / 2 ) )
106
- boxes . push ( new BoundingBox ( x , y , x + width , y + height ) )
107
- scores . push ( score )
151
+ results . push ( {
152
+ box : new BoundingBox ( x , y , x + width , y + height ) ,
153
+ score,
154
+ row,
155
+ col,
156
+ anchor
157
+ } )
108
158
}
109
159
}
110
160
}
@@ -113,34 +163,14 @@ export class TinyYolov2 extends NeuralNetwork<NetParams> {
113
163
boxesTensor . dispose ( )
114
164
scoresTensor . dispose ( )
115
165
116
- const indices = nonMaxSuppression (
117
- boxes . map ( box => new BoundingBox (
118
- box . left * inputSize ,
119
- box . top * inputSize ,
120
- box . right * inputSize ,
121
- box . bottom * inputSize
122
- ) ) ,
123
- scores ,
124
- IOU_THRESHOLD ,
125
- true
126
- )
127
-
128
- const detections = indices . map ( idx =>
129
- new FaceDetection (
130
- scores [ idx ] ,
131
- boxes [ idx ] . toRect ( ) ,
132
- { width : netInput . getInputWidth ( 0 ) , height : netInput . getInputHeight ( 0 ) }
133
- )
134
- )
135
-
136
- return detections
166
+ return results
137
167
}
138
168
139
169
protected loadQuantizedParams ( uri : string | undefined ) {
140
170
return loadQuantizedParams ( uri )
141
171
}
142
172
143
173
protected extractParams ( weights : Float32Array ) {
144
- return extractParams ( weights )
174
+ return extractParams ( weights , this . hasSeparableConvs )
145
175
}
146
176
}
0 commit comments