Skip to content

Commit a3ec2ac

Browse files
committed
Merge pull request opencv#10176 from sturkmen72:update_train_hog
2 parents 69830b1 + 2aa3807 commit a3ec2ac

File tree

1 file changed

+84
-68
lines changed

1 file changed

+84
-68
lines changed

samples/cpp/train_HOG.cpp

Lines changed: 84 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -10,14 +10,14 @@ using namespace cv;
1010
using namespace cv::ml;
1111
using namespace std;
1212

13-
void get_svm_detector( const Ptr< SVM > & svm, vector< float > & hog_detector );
13+
vector< float > get_svm_detector( const Ptr< SVM >& svm );
1414
void convert_to_ml( const std::vector< Mat > & train_samples, Mat& trainData );
1515
void load_images( const String & dirname, vector< Mat > & img_lst, bool showImages );
1616
void sample_neg( const vector< Mat > & full_neg_lst, vector< Mat > & neg_lst, const Size & size );
17-
void computeHOGs( const Size wsize, const vector< Mat > & img_lst, vector< Mat > & gradient_lst );
18-
int test_trained_detector( String obj_det_filename, String test_dir, String videofilename );
17+
void computeHOGs( const Size wsize, const vector< Mat > & img_lst, vector< Mat > & gradient_lst, bool use_flip );
18+
void test_trained_detector( String obj_det_filename, String test_dir, String videofilename );
1919

20-
void get_svm_detector( const Ptr< SVM >& svm, vector< float > & hog_detector )
20+
vector< float > get_svm_detector( const Ptr< SVM >& svm )
2121
{
2222
// get the support vectors
2323
Mat sv = svm->getSupportVectors();
@@ -30,11 +30,11 @@ void get_svm_detector( const Ptr< SVM >& svm, vector< float > & hog_detector )
3030
CV_Assert( (alpha.type() == CV_64F && alpha.at<double>(0) == 1.) ||
3131
(alpha.type() == CV_32F && alpha.at<float>(0) == 1.f) );
3232
CV_Assert( sv.type() == CV_32F );
33-
hog_detector.clear();
3433

35-
hog_detector.resize(sv.cols + 1);
34+
vector< float > hog_detector( sv.cols + 1 );
3635
memcpy( &hog_detector[0], sv.ptr(), sv.cols*sizeof( hog_detector[0] ) );
3736
hog_detector[sv.cols] = (float)-rho;
37+
return hog_detector;
3838
}
3939

4040
/*
@@ -101,35 +101,44 @@ void sample_neg( const vector< Mat > & full_neg_lst, vector< Mat > & neg_lst, co
101101
srand( (unsigned int)time( NULL ) );
102102

103103
for ( size_t i = 0; i < full_neg_lst.size(); i++ )
104-
{
105-
box.x = rand() % ( full_neg_lst[i].cols - size_x );
106-
box.y = rand() % ( full_neg_lst[i].rows - size_y );
107-
Mat roi = full_neg_lst[i]( box );
108-
neg_lst.push_back( roi.clone() );
109-
}
104+
if ( full_neg_lst[i].cols >= box.width && full_neg_lst[i].rows >= box.height )
105+
{
106+
box.x = rand() % ( full_neg_lst[i].cols - size_x );
107+
box.y = rand() % ( full_neg_lst[i].rows - size_y );
108+
Mat roi = full_neg_lst[i]( box );
109+
neg_lst.push_back( roi.clone() );
110+
}
110111
}
111112

112-
void computeHOGs( const Size wsize, const vector< Mat > & img_lst, vector< Mat > & gradient_lst )
113+
void computeHOGs( const Size wsize, const vector< Mat > & img_lst, vector< Mat > & gradient_lst, bool use_flip )
113114
{
114115
HOGDescriptor hog;
115116
hog.winSize = wsize;
116-
117-
Rect r = Rect( 0, 0, wsize.width, wsize.height );
118-
r.x += ( img_lst[0].cols - r.width ) / 2;
119-
r.y += ( img_lst[0].rows - r.height ) / 2;
120-
121117
Mat gray;
122118
vector< float > descriptors;
123119

124-
for( size_t i=0 ; i< img_lst.size(); i++ )
120+
for( size_t i = 0 ; i < img_lst.size(); i++ )
125121
{
126-
cvtColor( img_lst[i](r), gray, COLOR_BGR2GRAY );
127-
hog.compute( gray, descriptors, Size( 8, 8 ), Size( 0, 0 ) );
128-
gradient_lst.push_back( Mat( descriptors ).clone() );
122+
if ( img_lst[i].cols >= wsize.width && img_lst[i].rows >= wsize.height )
123+
{
124+
Rect r = Rect(( img_lst[i].cols - wsize.width ) / 2,
125+
( img_lst[i].rows - wsize.height ) / 2,
126+
wsize.width,
127+
wsize.height);
128+
cvtColor( img_lst[i](r), gray, COLOR_BGR2GRAY );
129+
hog.compute( gray, descriptors, Size( 8, 8 ), Size( 0, 0 ) );
130+
gradient_lst.push_back( Mat( descriptors ).clone() );
131+
if ( use_flip )
132+
{
133+
flip( gray, gray, 1 );
134+
hog.compute( gray, descriptors, Size( 8, 8 ), Size( 0, 0 ) );
135+
gradient_lst.push_back( Mat( descriptors ).clone() );
136+
}
137+
}
129138
}
130139
}
131140

132-
int test_trained_detector( String obj_det_filename, String test_dir, String videofilename )
141+
void test_trained_detector( String obj_det_filename, String test_dir, String videofilename )
133142
{
134143
cout << "Testing trained detector..." << endl;
135144
HOGDescriptor hog;
@@ -143,7 +152,10 @@ int test_trained_detector( String obj_det_filename, String test_dir, String vide
143152

144153
if ( videofilename != "" )
145154
{
146-
cap.open( videofilename );
155+
if ( videofilename.size() == 1 && isdigit( videofilename[0] ) )
156+
cap.open( videofilename[0] - '0' );
157+
else
158+
cap.open( videofilename );
147159
}
148160

149161
obj_det_filename = "testing " + obj_det_filename;
@@ -165,7 +177,7 @@ int test_trained_detector( String obj_det_filename, String test_dir, String vide
165177

166178
if ( img.empty() )
167179
{
168-
return 0;
180+
return;
169181
}
170182

171183
vector< Rect > detections;
@@ -180,12 +192,11 @@ int test_trained_detector( String obj_det_filename, String test_dir, String vide
180192

181193
imshow( obj_det_filename, img );
182194

183-
if( 27 == waitKey( delay ) )
195+
if( waitKey( delay ) == 27 )
184196
{
185-
return 0;
197+
return;
186198
}
187199
}
188-
return 0;
189200
}
190201

191202
int main( int argc, char** argv )
@@ -199,6 +210,7 @@ int main( int argc, char** argv )
199210
"{tv | | test video file name}"
200211
"{dw | | width of the detector}"
201212
"{dh | | height of the detector}"
213+
"{f |false| indicates if the program will generate and use mirrored samples or not}"
202214
"{d |false| train twice}"
203215
"{t |false| test a trained detector}"
204216
"{v |false| visualize training steps}"
@@ -223,6 +235,7 @@ int main( int argc, char** argv )
223235
bool test_detector = parser.get< bool >( "t" );
224236
bool train_twice = parser.get< bool >( "d" );
225237
bool visualization = parser.get< bool >( "v" );
238+
bool flip_samples = parser.get< bool >( "f" );
226239

227240
if ( test_detector )
228241
{
@@ -234,8 +247,8 @@ int main( int argc, char** argv )
234247
{
235248
parser.printMessage();
236249
cout << "Wrong number of parameters.\n\n"
237-
<< "Example command line:\n" << argv[0] << " -pd=/INRIAPerson/96X160H96/Train/pos -nd=/INRIAPerson/neg -td=/INRIAPerson/Test/pos -fn=HOGpedestrian96x160.yml -d\n"
238-
<< "\nExample command line for testing trained detector:\n" << argv[0] << " -t -dw=96 -dh=160 -fn=HOGpedestrian96x160.yml -td=/INRIAPerson/Test/pos";
250+
<< "Example command line:\n" << argv[0] << " -dw=64 -dh=128 -pd=/INRIAPerson/96X160H96/Train/pos -nd=/INRIAPerson/neg -td=/INRIAPerson/Test/pos -fn=HOGpedestrian64x128.xml -d\n"
251+
<< "\nExample command line for testing trained detector:\n" << argv[0] << " -t -fn=HOGpedestrian64x128.xml -td=/INRIAPerson/Test/pos";
239252
exit( 1 );
240253
}
241254

@@ -256,40 +269,40 @@ int main( int argc, char** argv )
256269

257270
Size pos_image_size = pos_lst[0].size();
258271

259-
for ( size_t i = 0; i < pos_lst.size(); ++i )
260-
{
261-
if( pos_lst[i].size() != pos_image_size )
262-
{
263-
cout << "All positive images should be same size!" << endl;
264-
exit( 1 );
265-
}
266-
}
267-
268-
pos_image_size = pos_image_size / 8 * 8;
269-
270272
if ( detector_width && detector_height )
271273
{
272274
pos_image_size = Size( detector_width, detector_height );
273275
}
274-
275-
labels.assign( pos_lst.size(), +1 );
276-
const unsigned int old = (unsigned int)labels.size();
276+
else
277+
{
278+
for ( size_t i = 0; i < pos_lst.size(); ++i )
279+
{
280+
if( pos_lst[i].size() != pos_image_size )
281+
{
282+
cout << "All positive images should be same size!" << endl;
283+
exit( 1 );
284+
}
285+
}
286+
pos_image_size = pos_image_size / 8 * 8;
287+
}
277288

278289
clog << "Negative images are being loaded...";
279290
load_images( neg_dir, full_neg_lst, false );
280291
sample_neg( full_neg_lst, neg_lst, pos_image_size );
281292
clog << "...[done]" << endl;
282293

283-
labels.insert( labels.end(), neg_lst.size(), -1 );
284-
CV_Assert( old < labels.size() );
285-
286294
clog << "Histogram of Gradients are being calculated for positive images...";
287-
computeHOGs( pos_image_size, pos_lst, gradient_lst );
288-
clog << "...[done]" << endl;
295+
computeHOGs( pos_image_size, pos_lst, gradient_lst, flip_samples );
296+
size_t positive_count = gradient_lst.size();
297+
labels.assign( positive_count, +1 );
298+
clog << "...[done] ( positive count : " << positive_count << " )" << endl;
289299

290300
clog << "Histogram of Gradients are being calculated for negative images...";
291-
computeHOGs( pos_image_size, neg_lst, gradient_lst );
292-
clog << "...[done]" << endl;
301+
computeHOGs( pos_image_size, neg_lst, gradient_lst, flip_samples );
302+
size_t negative_count = gradient_lst.size() - positive_count;
303+
labels.insert( labels.end(), negative_count, -1 );
304+
CV_Assert( positive_count < labels.size() );
305+
clog << "...[done] ( negative count : " << negative_count << " )" << endl;
293306

294307
Mat train_data;
295308
convert_to_ml( gradient_lst, train_data );
@@ -306,7 +319,7 @@ int main( int argc, char** argv )
306319
svm->setP( 0.1 ); // for EPSILON_SVR, epsilon in loss function?
307320
svm->setC( 0.01 ); // From paper, soft classifier
308321
svm->setType( SVM::EPS_SVR ); // C_SVC; // EPSILON_SVR; // may be also NU_SVR; // do regression task
309-
svm->train( train_data, ROW_SAMPLE, Mat( labels ) );
322+
svm->train( train_data, ROW_SAMPLE, labels );
310323
clog << "...[done]" << endl;
311324

312325
if ( train_twice )
@@ -316,22 +329,25 @@ int main( int argc, char** argv )
316329
my_hog.winSize = pos_image_size;
317330

318331
// Set the trained svm to my_hog
319-
vector< float > hog_detector;
320-
get_svm_detector( svm, hog_detector );
321-
my_hog.setSVMDetector( hog_detector );
332+
my_hog.setSVMDetector( get_svm_detector( svm ) );
322333

323334
vector< Rect > detections;
324335
vector< double > foundWeights;
325336

326337
for ( size_t i = 0; i < full_neg_lst.size(); i++ )
327338
{
328-
my_hog.detectMultiScale( full_neg_lst[i], detections, foundWeights );
339+
if ( full_neg_lst[i].cols >= pos_image_size.width && full_neg_lst[i].rows >= pos_image_size.height )
340+
my_hog.detectMultiScale( full_neg_lst[i], detections, foundWeights );
341+
else
342+
detections.clear();
343+
329344
for ( size_t j = 0; j < detections.size(); j++ )
330345
{
331346
Mat detection = full_neg_lst[i]( detections[j] ).clone();
332347
resize( detection, detection, pos_image_size );
333348
neg_lst.push_back( detection );
334349
}
350+
335351
if ( visualization )
336352
{
337353
for ( size_t j = 0; j < detections.size(); j++ )
@@ -344,30 +360,30 @@ int main( int argc, char** argv )
344360
}
345361
clog << "...[done]" << endl;
346362

347-
labels.clear();
348-
labels.assign( pos_lst.size(), +1 );
349-
labels.insert( labels.end(), neg_lst.size(), -1);
350-
351363
gradient_lst.clear();
352364
clog << "Histogram of Gradients are being calculated for positive images...";
353-
computeHOGs( pos_image_size, pos_lst, gradient_lst );
354-
clog << "...[done]" << endl;
365+
computeHOGs( pos_image_size, pos_lst, gradient_lst, flip_samples );
366+
positive_count = gradient_lst.size();
367+
clog << "...[done] ( positive count : " << positive_count << " )" << endl;
355368

356369
clog << "Histogram of Gradients are being calculated for negative images...";
357-
computeHOGs( pos_image_size, neg_lst, gradient_lst );
358-
clog << "...[done]" << endl;
370+
computeHOGs( pos_image_size, neg_lst, gradient_lst, flip_samples );
371+
negative_count = gradient_lst.size() - positive_count;
372+
clog << "...[done] ( negative count : " << negative_count << " )" << endl;
373+
374+
labels.clear();
375+
labels.assign(positive_count, +1);
376+
labels.insert(labels.end(), negative_count, -1);
359377

360378
clog << "Training SVM again...";
361379
convert_to_ml( gradient_lst, train_data );
362-
svm->train( train_data, ROW_SAMPLE, Mat( labels ) );
380+
svm->train( train_data, ROW_SAMPLE, labels );
363381
clog << "...[done]" << endl;
364382
}
365383

366-
vector< float > hog_detector;
367-
get_svm_detector( svm, hog_detector );
368384
HOGDescriptor hog;
369385
hog.winSize = pos_image_size;
370-
hog.setSVMDetector( hog_detector );
386+
hog.setSVMDetector( get_svm_detector( svm ) );
371387
hog.save( obj_det_filename );
372388

373389
test_trained_detector( obj_det_filename, test_dir, videofilename );

0 commit comments

Comments
 (0)