@@ -13,7 +13,6 @@ using namespace std;
13
13
14
14
const size_t inWidth = 300 ;
15
15
const size_t inHeight = 300 ;
16
- const float WHRatio = inWidth / (float )inHeight;
17
16
const float inScaleFactor = 0 .007843f ;
18
17
const float meanVal = 127.5 ;
19
18
const char * classNames[] = {" background" ,
@@ -23,13 +22,6 @@ const char* classNames[] = {"background",
23
22
" motorbike" , " person" , " pottedplant" ,
24
23
" sheep" , " sofa" , " train" , " tvmonitor" };
25
24
26
- const char * about = " This sample uses MobileNet Single-Shot Detector "
27
- " (https://arxiv.org/abs/1704.04861) "
28
- " to detect objects on camera/video/image.\n "
29
- " .caffemodel model's file is available here: "
30
- " https://github.com/chuanqi305/MobileNet-SSD\n "
31
- " Default network is 300x300 and 20-classes VOC.\n " ;
32
-
33
25
const char * params
34
26
= " { help | false | print usage }"
35
27
" { proto | MobileNetSSD_deploy.prototxt | model configuration }"
@@ -44,16 +36,22 @@ const char* params
44
36
int main (int argc, char ** argv)
45
37
{
46
38
CommandLineParser parser (argc, argv, params);
47
-
48
- if (parser.get <bool >(" help" ))
39
+ parser.about (" This sample uses MobileNet Single-Shot Detector "
40
+ " (https://arxiv.org/abs/1704.04861) "
41
+ " to detect objects on camera/video/image.\n "
42
+ " .caffemodel model's file is available here: "
43
+ " https://github.com/chuanqi305/MobileNet-SSD\n "
44
+ " Default network is 300x300 and 20-classes VOC.\n " );
45
+
46
+ if (parser.get <bool >(" help" ) || argc == 1 )
49
47
{
50
- cout << about << endl;
51
48
parser.printMessage ();
52
49
return 0 ;
53
50
}
54
51
55
52
String modelConfiguration = parser.get <string>(" proto" );
56
53
String modelBinary = parser.get <string>(" model" );
54
+ CV_Assert (!modelConfiguration.empty () && !modelBinary.empty ());
57
55
58
56
// ! [Initialize network]
59
57
dnn::Net net = readNetFromCaffe (modelConfiguration, modelBinary);
@@ -75,7 +73,7 @@ int main(int argc, char** argv)
75
73
}
76
74
77
75
VideoCapture cap;
78
- if (parser.get <String> (" video" ). empty ( ))
76
+ if (! parser.has (" video" ))
79
77
{
80
78
int cameraDevice = parser.get <int >(" camera_device" );
81
79
cap = VideoCapture (cameraDevice);
@@ -95,32 +93,16 @@ int main(int argc, char** argv)
95
93
}
96
94
}
97
95
98
- Size inVideoSize;
99
- inVideoSize = Size ((int ) cap.get (CV_CAP_PROP_FRAME_WIDTH), // Acquire input size
100
- (int ) cap.get (CV_CAP_PROP_FRAME_HEIGHT));
101
-
102
- Size cropSize;
103
- if (inVideoSize.width / (float )inVideoSize.height > WHRatio)
104
- {
105
- cropSize = Size (static_cast <int >(inVideoSize.height * WHRatio),
106
- inVideoSize.height );
107
- }
108
- else
109
- {
110
- cropSize = Size (inVideoSize.width ,
111
- static_cast <int >(inVideoSize.width / WHRatio));
112
- }
113
-
114
- Rect crop (Point ((inVideoSize.width - cropSize.width ) / 2 ,
115
- (inVideoSize.height - cropSize.height ) / 2 ),
116
- cropSize);
96
+ // Acquire input size
97
+ Size inVideoSize ((int ) cap.get (CV_CAP_PROP_FRAME_WIDTH),
98
+ (int ) cap.get (CV_CAP_PROP_FRAME_HEIGHT));
117
99
118
100
double fps = cap.get (CV_CAP_PROP_FPS);
119
101
int fourcc = static_cast <int >(cap.get (CV_CAP_PROP_FOURCC));
120
102
VideoWriter outputVideo;
121
103
outputVideo.open (parser.get <String>(" out" ) ,
122
104
(fourcc != 0 ? fourcc : VideoWriter::fourcc (' M' ,' J' ,' P' ,' G' )),
123
- (fps != 0 ? fps : 10.0 ), cropSize , true );
105
+ (fps != 0 ? fps : 10.0 ), inVideoSize , true );
124
106
125
107
for (;;)
126
108
{
@@ -138,15 +120,17 @@ int main(int argc, char** argv)
138
120
139
121
// ! [Prepare blob]
140
122
Mat inputBlob = blobFromImage (frame, inScaleFactor,
141
- Size (inWidth, inHeight), meanVal, false ); // Convert Mat to batch of images
123
+ Size (inWidth, inHeight),
124
+ Scalar (meanVal, meanVal, meanVal),
125
+ false , false ); // Convert Mat to batch of images
142
126
// ! [Prepare blob]
143
127
144
128
// ! [Set input blob]
145
- net.setInput (inputBlob, " data " ); // set the network input
129
+ net.setInput (inputBlob); // set the network input
146
130
// ! [Set input blob]
147
131
148
132
// ! [Make forward pass]
149
- Mat detection = net.forward (" detection_out " ); // compute output
133
+ Mat detection = net.forward (); // compute output
150
134
// ! [Make forward pass]
151
135
152
136
vector<double > layersTimings;
@@ -155,13 +139,10 @@ int main(int argc, char** argv)
155
139
156
140
Mat detectionMat (detection.size [2 ], detection.size [3 ], CV_32F, detection.ptr <float >());
157
141
158
- frame = frame (crop);
159
-
160
- ostringstream ss;
161
142
if (!outputVideo.isOpened ())
162
143
{
163
- ss << " FPS: " << 1000 /time << " ; time: " << time << " ms " ;
164
- putText (frame, ss. str (), Point (20 ,20 ), 0 , 0.5 , Scalar (0 ,0 ,255 ));
144
+ putText (frame, format ( " FPS: %.2f ; time: %.2f ms " , 1000 . f / time, time),
145
+ Point (20 ,20 ), 0 , 0.5 , Scalar (0 ,0 ,255 ));
165
146
}
166
147
else
167
148
cout << " Inference time, ms: " << time << endl;
@@ -175,27 +156,20 @@ int main(int argc, char** argv)
175
156
{
176
157
size_t objectClass = (size_t )(detectionMat.at <float >(i, 1 ));
177
158
178
- int xLeftBottom = static_cast <int >(detectionMat.at <float >(i, 3 ) * frame.cols );
179
- int yLeftBottom = static_cast <int >(detectionMat.at <float >(i, 4 ) * frame.rows );
180
- int xRightTop = static_cast <int >(detectionMat.at <float >(i, 5 ) * frame.cols );
181
- int yRightTop = static_cast <int >(detectionMat.at <float >(i, 6 ) * frame.rows );
182
-
183
- ss.str (" " );
184
- ss << confidence;
185
- String conf (ss.str ());
186
-
187
- Rect object ((int )xLeftBottom, (int )yLeftBottom,
188
- (int )(xRightTop - xLeftBottom),
189
- (int )(yRightTop - yLeftBottom));
159
+ int left = static_cast <int >(detectionMat.at <float >(i, 3 ) * frame.cols );
160
+ int top = static_cast <int >(detectionMat.at <float >(i, 4 ) * frame.rows );
161
+ int right = static_cast <int >(detectionMat.at <float >(i, 5 ) * frame.cols );
162
+ int bottom = static_cast <int >(detectionMat.at <float >(i, 6 ) * frame.rows );
190
163
191
- rectangle (frame, object , Scalar (0 , 255 , 0 ));
192
- String label = String (classNames[objectClass]) + " : " + conf ;
164
+ rectangle (frame, Point (left, top), Point (right, bottom) , Scalar (0 , 255 , 0 ));
165
+ String label = format ( " %s: %.2f " , classNames[objectClass], confidence) ;
193
166
int baseLine = 0 ;
194
167
Size labelSize = getTextSize (label, FONT_HERSHEY_SIMPLEX, 0.5 , 1 , &baseLine);
195
- rectangle (frame, Rect (Point (xLeftBottom, yLeftBottom - labelSize.height ),
196
- Size (labelSize.width , labelSize.height + baseLine)),
168
+ top = max (top, labelSize.height );
169
+ rectangle (frame, Point (left, top - labelSize.height ),
170
+ Point (left + labelSize.width , top + baseLine),
197
171
Scalar (255 , 255 , 255 ), CV_FILLED);
198
- putText (frame, label, Point (xLeftBottom, yLeftBottom ),
172
+ putText (frame, label, Point (left, top ),
199
173
FONT_HERSHEY_SIMPLEX, 0.5 , Scalar (0 ,0 ,0 ));
200
174
}
201
175
}
0 commit comments