@@ -10,14 +10,14 @@ using namespace cv;
10
10
using namespace cv ::ml;
11
11
using namespace std ;
12
12
13
- void get_svm_detector ( const Ptr< SVM > & svm, vector< float > & hog_detector );
13
+ vector< float > get_svm_detector ( const Ptr< SVM >& svm );
14
14
void convert_to_ml ( const std::vector< Mat > & train_samples, Mat& trainData );
15
15
void load_images ( const String & dirname, vector< Mat > & img_lst, bool showImages );
16
16
void sample_neg ( const vector< Mat > & full_neg_lst, vector< Mat > & neg_lst, const Size & size );
17
- void computeHOGs ( const Size wsize, const vector< Mat > & img_lst, vector< Mat > & gradient_lst );
18
- int test_trained_detector ( String obj_det_filename, String test_dir, String videofilename );
17
+ void computeHOGs ( const Size wsize, const vector< Mat > & img_lst, vector< Mat > & gradient_lst, bool use_flip );
18
+ void test_trained_detector ( String obj_det_filename, String test_dir, String videofilename );
19
19
20
- void get_svm_detector ( const Ptr< SVM >& svm, vector< float > & hog_detector )
20
+ vector< float > get_svm_detector ( const Ptr< SVM >& svm )
21
21
{
22
22
// get the support vectors
23
23
Mat sv = svm->getSupportVectors ();
@@ -30,11 +30,11 @@ void get_svm_detector( const Ptr< SVM >& svm, vector< float > & hog_detector )
30
30
CV_Assert ( (alpha.type () == CV_64F && alpha.at <double >(0 ) == 1 .) ||
31
31
(alpha.type () == CV_32F && alpha.at <float >(0 ) == 1 .f ) );
32
32
CV_Assert ( sv.type () == CV_32F );
33
- hog_detector.clear ();
34
33
35
- hog_detector. resize ( sv.cols + 1 );
34
+ vector< float > hog_detector ( sv.cols + 1 );
36
35
memcpy ( &hog_detector[0 ], sv.ptr (), sv.cols *sizeof ( hog_detector[0 ] ) );
37
36
hog_detector[sv.cols ] = (float )-rho;
37
+ return hog_detector;
38
38
}
39
39
40
40
/*
@@ -101,35 +101,44 @@ void sample_neg( const vector< Mat > & full_neg_lst, vector< Mat > & neg_lst, co
101
101
srand ( (unsigned int )time ( NULL ) );
102
102
103
103
for ( size_t i = 0 ; i < full_neg_lst.size (); i++ )
104
- {
105
- box.x = rand () % ( full_neg_lst[i].cols - size_x );
106
- box.y = rand () % ( full_neg_lst[i].rows - size_y );
107
- Mat roi = full_neg_lst[i]( box );
108
- neg_lst.push_back ( roi.clone () );
109
- }
104
+ if ( full_neg_lst[i].cols >= box.width && full_neg_lst[i].rows >= box.height )
105
+ {
106
+ box.x = rand () % ( full_neg_lst[i].cols - size_x );
107
+ box.y = rand () % ( full_neg_lst[i].rows - size_y );
108
+ Mat roi = full_neg_lst[i]( box );
109
+ neg_lst.push_back ( roi.clone () );
110
+ }
110
111
}
111
112
112
- void computeHOGs ( const Size wsize, const vector< Mat > & img_lst, vector< Mat > & gradient_lst )
113
+ void computeHOGs ( const Size wsize, const vector< Mat > & img_lst, vector< Mat > & gradient_lst, bool use_flip )
113
114
{
114
115
HOGDescriptor hog;
115
116
hog.winSize = wsize;
116
-
117
- Rect r = Rect ( 0 , 0 , wsize.width , wsize.height );
118
- r.x += ( img_lst[0 ].cols - r.width ) / 2 ;
119
- r.y += ( img_lst[0 ].rows - r.height ) / 2 ;
120
-
121
117
Mat gray;
122
118
vector< float > descriptors;
123
119
124
- for ( size_t i= 0 ; i< img_lst.size (); i++ )
120
+ for ( size_t i = 0 ; i < img_lst.size (); i++ )
125
121
{
126
- cvtColor ( img_lst[i](r), gray, COLOR_BGR2GRAY );
127
- hog.compute ( gray, descriptors, Size ( 8 , 8 ), Size ( 0 , 0 ) );
128
- gradient_lst.push_back ( Mat ( descriptors ).clone () );
122
+ if ( img_lst[i].cols >= wsize.width && img_lst[i].rows >= wsize.height )
123
+ {
124
+ Rect r = Rect (( img_lst[i].cols - wsize.width ) / 2 ,
125
+ ( img_lst[i].rows - wsize.height ) / 2 ,
126
+ wsize.width ,
127
+ wsize.height );
128
+ cvtColor ( img_lst[i](r), gray, COLOR_BGR2GRAY );
129
+ hog.compute ( gray, descriptors, Size ( 8 , 8 ), Size ( 0 , 0 ) );
130
+ gradient_lst.push_back ( Mat ( descriptors ).clone () );
131
+ if ( use_flip )
132
+ {
133
+ flip ( gray, gray, 1 );
134
+ hog.compute ( gray, descriptors, Size ( 8 , 8 ), Size ( 0 , 0 ) );
135
+ gradient_lst.push_back ( Mat ( descriptors ).clone () );
136
+ }
137
+ }
129
138
}
130
139
}
131
140
132
- int test_trained_detector ( String obj_det_filename, String test_dir, String videofilename )
141
+ void test_trained_detector ( String obj_det_filename, String test_dir, String videofilename )
133
142
{
134
143
cout << " Testing trained detector..." << endl;
135
144
HOGDescriptor hog;
@@ -143,7 +152,10 @@ int test_trained_detector( String obj_det_filename, String test_dir, String vide
143
152
144
153
if ( videofilename != " " )
145
154
{
146
- cap.open ( videofilename );
155
+ if ( videofilename.size () == 1 && isdigit ( videofilename[0 ] ) )
156
+ cap.open ( videofilename[0 ] - ' 0' );
157
+ else
158
+ cap.open ( videofilename );
147
159
}
148
160
149
161
obj_det_filename = " testing " + obj_det_filename;
@@ -165,7 +177,7 @@ int test_trained_detector( String obj_det_filename, String test_dir, String vide
165
177
166
178
if ( img.empty () )
167
179
{
168
- return 0 ;
180
+ return ;
169
181
}
170
182
171
183
vector< Rect > detections;
@@ -180,12 +192,11 @@ int test_trained_detector( String obj_det_filename, String test_dir, String vide
180
192
181
193
imshow ( obj_det_filename, img );
182
194
183
- if ( 27 == waitKey ( delay ) )
195
+ if ( waitKey ( delay ) == 27 )
184
196
{
185
- return 0 ;
197
+ return ;
186
198
}
187
199
}
188
- return 0 ;
189
200
}
190
201
191
202
int main ( int argc, char ** argv )
@@ -199,6 +210,7 @@ int main( int argc, char** argv )
199
210
" {tv | | test video file name}"
200
211
" {dw | | width of the detector}"
201
212
" {dh | | height of the detector}"
213
+ " {f |false| indicates if the program will generate and use mirrored samples or not}"
202
214
" {d |false| train twice}"
203
215
" {t |false| test a trained detector}"
204
216
" {v |false| visualize training steps}"
@@ -223,6 +235,7 @@ int main( int argc, char** argv )
223
235
bool test_detector = parser.get < bool >( " t" );
224
236
bool train_twice = parser.get < bool >( " d" );
225
237
bool visualization = parser.get < bool >( " v" );
238
+ bool flip_samples = parser.get < bool >( " f" );
226
239
227
240
if ( test_detector )
228
241
{
@@ -234,8 +247,8 @@ int main( int argc, char** argv )
234
247
{
235
248
parser.printMessage ();
236
249
cout << " Wrong number of parameters.\n\n "
237
- << " Example command line:\n " << argv[0 ] << " -pd=/INRIAPerson/96X160H96/Train/pos -nd=/INRIAPerson/neg -td=/INRIAPerson/Test/pos -fn=HOGpedestrian96x160.yml -d\n "
238
- << " \n Example command line for testing trained detector:\n " << argv[0 ] << " -t -dw=96 -dh=160 - fn=HOGpedestrian96x160.yml -td=/INRIAPerson/Test/pos" ;
250
+ << " Example command line:\n " << argv[0 ] << " -dw=64 -dh=128 - pd=/INRIAPerson/96X160H96/Train/pos -nd=/INRIAPerson/neg -td=/INRIAPerson/Test/pos -fn=HOGpedestrian64x128.xml -d\n "
251
+ << " \n Example command line for testing trained detector:\n " << argv[0 ] << " -t -fn=HOGpedestrian64x128.xml -td=/INRIAPerson/Test/pos" ;
239
252
exit ( 1 );
240
253
}
241
254
@@ -256,40 +269,40 @@ int main( int argc, char** argv )
256
269
257
270
Size pos_image_size = pos_lst[0 ].size ();
258
271
259
- for ( size_t i = 0 ; i < pos_lst.size (); ++i )
260
- {
261
- if ( pos_lst[i].size () != pos_image_size )
262
- {
263
- cout << " All positive images should be same size!" << endl;
264
- exit ( 1 );
265
- }
266
- }
267
-
268
- pos_image_size = pos_image_size / 8 * 8 ;
269
-
270
272
if ( detector_width && detector_height )
271
273
{
272
274
pos_image_size = Size ( detector_width, detector_height );
273
275
}
274
-
275
- labels.assign ( pos_lst.size (), +1 );
276
- const unsigned int old = (unsigned int )labels.size ();
276
+ else
277
+ {
278
+ for ( size_t i = 0 ; i < pos_lst.size (); ++i )
279
+ {
280
+ if ( pos_lst[i].size () != pos_image_size )
281
+ {
282
+ cout << " All positive images should be same size!" << endl;
283
+ exit ( 1 );
284
+ }
285
+ }
286
+ pos_image_size = pos_image_size / 8 * 8 ;
287
+ }
277
288
278
289
clog << " Negative images are being loaded..." ;
279
290
load_images ( neg_dir, full_neg_lst, false );
280
291
sample_neg ( full_neg_lst, neg_lst, pos_image_size );
281
292
clog << " ...[done]" << endl;
282
293
283
- labels.insert ( labels.end (), neg_lst.size (), -1 );
284
- CV_Assert ( old < labels.size () );
285
-
286
294
clog << " Histogram of Gradients are being calculated for positive images..." ;
287
- computeHOGs ( pos_image_size, pos_lst, gradient_lst );
288
- clog << " ...[done]" << endl;
295
+ computeHOGs ( pos_image_size, pos_lst, gradient_lst, flip_samples );
296
+ size_t positive_count = gradient_lst.size ();
297
+ labels.assign ( positive_count, +1 );
298
+ clog << " ...[done] ( positive count : " << positive_count << " )" << endl;
289
299
290
300
clog << " Histogram of Gradients are being calculated for negative images..." ;
291
- computeHOGs ( pos_image_size, neg_lst, gradient_lst );
292
- clog << " ...[done]" << endl;
301
+ computeHOGs ( pos_image_size, neg_lst, gradient_lst, flip_samples );
302
+ size_t negative_count = gradient_lst.size () - positive_count;
303
+ labels.insert ( labels.end (), negative_count, -1 );
304
+ CV_Assert ( positive_count < labels.size () );
305
+ clog << " ...[done] ( negative count : " << negative_count << " )" << endl;
293
306
294
307
Mat train_data;
295
308
convert_to_ml ( gradient_lst, train_data );
@@ -306,7 +319,7 @@ int main( int argc, char** argv )
306
319
svm->setP ( 0.1 ); // for EPSILON_SVR, epsilon in loss function?
307
320
svm->setC ( 0.01 ); // From paper, soft classifier
308
321
svm->setType ( SVM::EPS_SVR ); // C_SVC; // EPSILON_SVR; // may be also NU_SVR; // do regression task
309
- svm->train ( train_data, ROW_SAMPLE, Mat ( labels ) );
322
+ svm->train ( train_data, ROW_SAMPLE, labels );
310
323
clog << " ...[done]" << endl;
311
324
312
325
if ( train_twice )
@@ -316,22 +329,25 @@ int main( int argc, char** argv )
316
329
my_hog.winSize = pos_image_size;
317
330
318
331
// Set the trained svm to my_hog
319
- vector< float > hog_detector;
320
- get_svm_detector ( svm, hog_detector );
321
- my_hog.setSVMDetector ( hog_detector );
332
+ my_hog.setSVMDetector ( get_svm_detector ( svm ) );
322
333
323
334
vector< Rect > detections;
324
335
vector< double > foundWeights;
325
336
326
337
for ( size_t i = 0 ; i < full_neg_lst.size (); i++ )
327
338
{
328
- my_hog.detectMultiScale ( full_neg_lst[i], detections, foundWeights );
339
+ if ( full_neg_lst[i].cols >= pos_image_size.width && full_neg_lst[i].rows >= pos_image_size.height )
340
+ my_hog.detectMultiScale ( full_neg_lst[i], detections, foundWeights );
341
+ else
342
+ detections.clear ();
343
+
329
344
for ( size_t j = 0 ; j < detections.size (); j++ )
330
345
{
331
346
Mat detection = full_neg_lst[i]( detections[j] ).clone ();
332
347
resize ( detection, detection, pos_image_size );
333
348
neg_lst.push_back ( detection );
334
349
}
350
+
335
351
if ( visualization )
336
352
{
337
353
for ( size_t j = 0 ; j < detections.size (); j++ )
@@ -344,30 +360,30 @@ int main( int argc, char** argv )
344
360
}
345
361
clog << " ...[done]" << endl;
346
362
347
- labels.clear ();
348
- labels.assign ( pos_lst.size (), +1 );
349
- labels.insert ( labels.end (), neg_lst.size (), -1 );
350
-
351
363
gradient_lst.clear ();
352
364
clog << " Histogram of Gradients are being calculated for positive images..." ;
353
- computeHOGs ( pos_image_size, pos_lst, gradient_lst );
354
- clog << " ...[done]" << endl;
365
+ computeHOGs ( pos_image_size, pos_lst, gradient_lst, flip_samples );
366
+ positive_count = gradient_lst.size ();
367
+ clog << " ...[done] ( positive count : " << positive_count << " )" << endl;
355
368
356
369
clog << " Histogram of Gradients are being calculated for negative images..." ;
357
- computeHOGs ( pos_image_size, neg_lst, gradient_lst );
358
- clog << " ...[done]" << endl;
370
+ computeHOGs ( pos_image_size, neg_lst, gradient_lst, flip_samples );
371
+ negative_count = gradient_lst.size () - positive_count;
372
+ clog << " ...[done] ( negative count : " << negative_count << " )" << endl;
373
+
374
+ labels.clear ();
375
+ labels.assign (positive_count, +1 );
376
+ labels.insert (labels.end (), negative_count, -1 );
359
377
360
378
clog << " Training SVM again..." ;
361
379
convert_to_ml ( gradient_lst, train_data );
362
- svm->train ( train_data, ROW_SAMPLE, Mat ( labels ) );
380
+ svm->train ( train_data, ROW_SAMPLE, labels );
363
381
clog << " ...[done]" << endl;
364
382
}
365
383
366
- vector< float > hog_detector;
367
- get_svm_detector ( svm, hog_detector );
368
384
HOGDescriptor hog;
369
385
hog.winSize = pos_image_size;
370
- hog.setSVMDetector ( hog_detector );
386
+ hog.setSVMDetector ( get_svm_detector ( svm ) );
371
387
hog.save ( obj_det_filename );
372
388
373
389
test_trained_detector ( obj_det_filename, test_dir, videofilename );
0 commit comments