@@ -2,12 +2,12 @@ import * as tf from '@tensorflow/tfjs-core';
2
2
3
3
import { isFloat } from '../utils' ;
4
4
import { extractParams } from './extractParams' ;
5
+ import { FaceDetectionResult } from './FaceDetectionResult' ;
5
6
import { mobileNetV1 } from './mobileNetV1' ;
6
- import { resizeLayer } from './resizeLayer' ;
7
- import { predictionLayer } from './predictionLayer' ;
8
- import { outputLayer } from './outputLayer' ;
9
7
import { nonMaxSuppression } from './nonMaxSuppression' ;
10
- import { FaceDetectionNet } from './types' ;
8
+ import { outputLayer } from './outputLayer' ;
9
+ import { predictionLayer } from './predictionLayer' ;
10
+ import { resizeLayer } from './resizeLayer' ;
11
11
12
12
function fromData ( input : number [ ] ) : tf . Tensor4D {
13
13
const pxPerChannel = input . length / 3
@@ -21,34 +21,53 @@ function fromData(input: number[]): tf.Tensor4D {
21
21
}
22
22
23
23
function fromImageData ( input : ImageData [ ] ) {
24
- const idx = input . findIndex ( data => ! ( data instanceof ImageData ) )
25
- if ( idx !== - 1 ) {
26
- throw new Error ( `expected input at index ${ idx } to be instanceof ImageData` )
27
- }
24
+ return tf . tidy ( ( ) => {
25
+ const idx = input . findIndex ( data => ! ( data instanceof ImageData ) )
26
+ if ( idx !== - 1 ) {
27
+ throw new Error ( `expected input at index ${ idx } to be instanceof ImageData` )
28
+ }
28
29
29
- const imgTensors = input
30
- . map ( data => tf . fromPixels ( data ) )
31
- . map ( data => tf . expandDims ( data , 0 ) ) as tf . Tensor4D [ ]
30
+ const imgTensors = input
31
+ . map ( data => tf . fromPixels ( data ) )
32
+ . map ( data => tf . expandDims ( data , 0 ) ) as tf . Tensor4D [ ]
32
33
33
- return tf . cast ( tf . concat ( imgTensors , 0 ) , 'float32' )
34
+ return tf . cast ( tf . concat ( imgTensors , 0 ) , 'float32' )
35
+ } )
34
36
}
35
37
36
38
function padToSquare ( imgTensor : tf . Tensor4D ) : tf . Tensor4D {
37
- const [ _ , height , width ] = imgTensor . shape
38
- if ( height === width ) {
39
- return imgTensor
40
- }
39
+ return tf . tidy ( ( ) => {
41
40
42
- if ( height > width ) {
43
- const pad = tf . fill ( [ 1 , height , height - width , 3 ] , 0 ) as tf . Tensor4D
44
- return tf . concat ( [ imgTensor , pad ] , 2 )
45
- }
46
- const pad = tf . fill ( [ 1 , width - height , width , 3 ] , 0 ) as tf . Tensor4D
47
- return tf . concat ( [ imgTensor , pad ] , 1 )
41
+ const [ _ , height , width ] = imgTensor . shape
42
+ if ( height === width ) {
43
+ return imgTensor
44
+ }
45
+
46
+ if ( height > width ) {
47
+ const pad = tf . fill ( [ 1 , height , height - width , 3 ] , 0 ) as tf . Tensor4D
48
+ return tf . concat ( [ imgTensor , pad ] , 2 )
49
+ }
50
+ const pad = tf . fill ( [ 1 , width - height , width , 3 ] , 0 ) as tf . Tensor4D
51
+ return tf . concat ( [ imgTensor , pad ] , 1 )
52
+ } )
48
53
}
49
54
50
- function getImgTensor ( input : ImageData | ImageData [ ] | number [ ] ) {
55
+ function getImgTensor ( input : tf . Tensor | HTMLCanvasElement | ImageData | ImageData [ ] | number [ ] ) {
51
56
return tf . tidy ( ( ) => {
57
+ if ( input instanceof HTMLCanvasElement ) {
58
+ return tf . cast (
59
+ tf . expandDims ( tf . fromPixels ( input ) , 0 ) , 'float32'
60
+ ) as tf . Tensor4D
61
+ }
62
+ if ( input instanceof tf . Tensor ) {
63
+ const rank = input . shape . length
64
+ if ( rank !== 3 && rank !== 4 ) {
65
+ throw new Error ( 'input tensor must be of rank 3 or 4' )
66
+ }
67
+ return tf . cast (
68
+ rank === 3 ? tf . expandDims ( input , 0 ) : input , 'float32'
69
+ ) as tf . Tensor4D
70
+ }
52
71
53
72
const imgDataArray = input instanceof ImageData
54
73
? [ input ]
@@ -58,11 +77,9 @@ function getImgTensor(input: ImageData|ImageData[]|number[]) {
58
77
: null
59
78
)
60
79
61
- return padToSquare (
62
- imgDataArray !== null
63
- ? fromImageData ( imgDataArray )
64
- : fromData ( input as number [ ] )
65
- )
80
+ return imgDataArray !== null
81
+ ? fromImageData ( imgDataArray )
82
+ : fromData ( input as number [ ] )
66
83
67
84
} )
68
85
}
@@ -85,31 +102,47 @@ export function faceDetectionNet(weights: Float32Array) {
85
102
} )
86
103
}
87
104
88
- function forward ( input : ImageData | ImageData [ ] | number [ ] ) {
105
+ function forward ( input : tf . Tensor | ImageData | ImageData [ ] | number [ ] ) {
89
106
return tf . tidy (
90
107
( ) => forwardTensor ( padToSquare ( getImgTensor ( input ) ) )
91
108
)
92
109
}
93
110
94
111
async function locateFaces (
95
- input : ImageData | ImageData [ ] | number [ ] ,
112
+ input : tf . Tensor | HTMLCanvasElement | ImageData | ImageData [ ] | number [ ] ,
96
113
minConfidence : number = 0.8 ,
97
114
maxResults : number = 100 ,
98
- ) : Promise < FaceDetectionNet . Detection [ ] > {
99
- const imgTensor = getImgTensor ( input )
100
- const [ _ , height , width ] = imgTensor . shape
115
+ ) : Promise < FaceDetectionResult [ ] > {
116
+
117
+ let paddedHeightRelative = 1 , paddedWidthRelative = 1
101
118
102
119
const {
103
120
boxes : _boxes ,
104
121
scores : _scores
105
- } = forwardTensor ( imgTensor )
122
+ } = tf . tidy ( ( ) => {
123
+
124
+ let imgTensor = getImgTensor ( input )
125
+ const [ _ , height , width ] = imgTensor . shape
126
+
127
+ imgTensor = padToSquare ( imgTensor )
128
+ paddedHeightRelative = imgTensor . shape [ 1 ] / height
129
+ paddedWidthRelative = imgTensor . shape [ 2 ] / width
130
+
131
+ return forwardTensor ( imgTensor )
132
+ } )
106
133
107
134
// TODO batches
108
135
const boxes = _boxes [ 0 ]
109
136
const scores = _scores [ 0 ]
137
+ for ( let i = 1 ; i < _boxes . length ; i ++ ) {
138
+ _boxes [ i ] . dispose ( )
139
+ _scores [ i ] . dispose ( )
140
+ }
110
141
111
142
// TODO find a better way to filter by minConfidence
143
+ //const ts = Date.now()
112
144
const scoresData = Array . from ( await scores . data ( ) )
145
+ //console.log('await data:', (Date.now() - ts))
113
146
114
147
const iouThreshold = 0.5
115
148
const indices = nonMaxSuppression (
@@ -120,17 +153,19 @@ export function faceDetectionNet(weights: Float32Array) {
120
153
minConfidence
121
154
)
122
155
123
- return indices
124
- . map ( idx => ( {
125
- score : scoresData [ idx ] ,
126
- box : {
127
- top : Math . max ( 0 , height * boxes . get ( idx , 0 ) ) ,
128
- left : Math . max ( 0 , width * boxes . get ( idx , 1 ) ) ,
129
- bottom : Math . min ( height , height * boxes . get ( idx , 2 ) ) ,
130
- right : Math . min ( width , width * boxes . get ( idx , 3 ) )
131
- }
132
- } ) )
156
+ const results = indices
157
+ . map ( idx => new FaceDetectionResult (
158
+ scoresData [ idx ] ,
159
+ boxes . get ( idx , 0 ) * paddedHeightRelative ,
160
+ boxes . get ( idx , 1 ) * paddedWidthRelative ,
161
+ boxes . get ( idx , 2 ) * paddedHeightRelative ,
162
+ boxes . get ( idx , 3 ) * paddedWidthRelative
163
+ ) )
164
+
165
+ boxes . dispose ( )
166
+ scores . dispose ( )
133
167
168
+ return results
134
169
}
135
170
136
171
return {
0 commit comments