@@ -40,15 +40,26 @@ static Mat preprocess(const Mat& frame)
40
40
return preprocessed;
41
41
}
42
42
43
+ const char * classNames[] = {" background" ,
44
+ " aeroplane" , " bicycle" , " bird" , " boat" ,
45
+ " bottle" , " bus" , " car" , " cat" , " chair" ,
46
+ " cow" , " diningtable" , " dog" , " horse" ,
47
+ " motorbike" , " person" , " pottedplant" ,
48
+ " sheep" , " sofa" , " train" , " tvmonitor" };
49
+
43
50
const char * about = " This sample uses Single-Shot Detector "
44
- " (https://arxiv.org/abs/1512.02325)"
45
- " to detect objects on image\n " ; // TODO: link
51
+ " (https://arxiv.org/abs/1512.02325) "
52
+ " to detect objects on camera/video/image.\n "
53
+ " .caffemodel model's file is available here: "
54
+ " https://github.com/weiliu89/caffe/tree/ssd#models\n "
55
+ " Default network is 300x300 and 20-classes VOC.\n " ;
46
56
47
57
const char * params
48
58
= " { help | false | print usage }"
49
59
" { proto | | model configuration }"
50
60
" { model | | model weights }"
51
- " { image | | image for detection }"
61
+ " { camera_device | 0 | camera device number}"
62
+ " { video | | video or image for detection}"
52
63
" { min_confidence | 0.5 | min confidence }" ;
53
64
54
65
int main (int argc, char ** argv)
@@ -57,7 +68,7 @@ int main(int argc, char** argv)
57
68
58
69
if (parser.get <bool >(" help" ))
59
70
{
60
- std:: cout << about << std:: endl;
71
+ cout << about << endl;
61
72
parser.printMessage ();
62
73
return 0 ;
63
74
}
@@ -79,58 +90,101 @@ int main(int argc, char** argv)
79
90
exit (-1 );
80
91
}
81
92
82
- cv::Mat frame = cv::imread (parser.get <string>(" image" ), -1 );
83
-
84
- if (frame.channels () == 4 )
85
- cvtColor (frame, frame, COLOR_BGRA2BGR);
86
- // ! [Prepare blob]
87
- Mat preprocessedFrame = preprocess (frame);
88
-
89
- Mat inputBlob = blobFromImage (preprocessedFrame, 1 .0f , Size (), Scalar (), false ); // Convert Mat to batch of images
90
- // ! [Prepare blob]
93
+ VideoCapture cap;
94
+ if (parser.get <String>(" video" ).empty ())
95
+ {
96
+ int cameraDevice = parser.get <int >(" camera_device" );
97
+ cap = VideoCapture (cameraDevice);
98
+ if (!cap.isOpened ())
99
+ {
100
+ cout << " Couldn't find camera: " << cameraDevice << endl;
101
+ return -1 ;
102
+ }
103
+ }
104
+ else
105
+ {
106
+ cap.open (parser.get <String>(" video" ));
107
+ if (!cap.isOpened ())
108
+ {
109
+ cout << " Couldn't open image or video: " << parser.get <String>(" video" ) << endl;
110
+ return -1 ;
111
+ }
112
+ }
91
113
92
- // ! [Set input blob]
93
- net.setInput (inputBlob, " data" ); // set the network input
94
- // ! [Set input blob]
114
+ for (;;)
115
+ {
116
+ cv::Mat frame;
117
+ cap >> frame; // get a new frame from camera/video or read image
95
118
96
- // ! [Make forward pass]
97
- Mat detection = net.forward (" detection_out" ); // compute output
98
- // ! [Make forward pass]
119
+ if (frame.empty ())
120
+ {
121
+ waitKey ();
122
+ break ;
123
+ }
99
124
100
- Mat detectionMat (detection.size [2 ], detection.size [3 ], CV_32F, detection.ptr <float >());
125
+ if (frame.channels () == 4 )
126
+ cvtColor (frame, frame, COLOR_BGRA2BGR);
101
127
102
- float confidenceThreshold = parser.get <float >(" min_confidence" );
103
- for (int i = 0 ; i < detectionMat.rows ; i++)
104
- {
105
- float confidence = detectionMat.at <float >(i, 2 );
128
+ // ! [Prepare blob]
129
+ Mat preprocessedFrame = preprocess (frame);
106
130
107
- if (confidence > confidenceThreshold)
108
- {
109
- size_t objectClass = (size_t )(detectionMat.at <float >(i, 1 ));
131
+ Mat inputBlob = blobFromImage (preprocessedFrame, 1 .0f , Size (), Scalar (), false ); // Convert Mat to batch of images
132
+ // ! [Prepare blob]
110
133
111
- float xLeftBottom = detectionMat.at <float >(i, 3 ) * frame.cols ;
112
- float yLeftBottom = detectionMat.at <float >(i, 4 ) * frame.rows ;
113
- float xRightTop = detectionMat.at <float >(i, 5 ) * frame.cols ;
114
- float yRightTop = detectionMat.at <float >(i, 6 ) * frame.rows ;
134
+ // ! [Set input blob]
135
+ net.setInput (inputBlob, " data" ); // set the network input
136
+ // ! [Set input blob]
115
137
116
- std::cout << " Class: " << objectClass << std::endl;
117
- std::cout << " Confidence: " << confidence << std::endl;
138
+ // ! [Make forward pass]
139
+ Mat detection = net.forward (" detection_out" ); // compute output
140
+ // ! [Make forward pass]
118
141
119
- std::cout << " " << xLeftBottom
120
- << " " << yLeftBottom
121
- << " " << xRightTop
122
- << " " << yRightTop << std::endl;
142
+ vector<double > layersTimings;
143
+ double freq = getTickFrequency () / 1000 ;
144
+ double time = net.getPerfProfile (layersTimings) / freq;
145
+ ostringstream ss;
146
+ ss << " FPS: " << 1000 /time << " ; time: " << time << " ms" ;
147
+ putText (frame, ss.str (), Point (20 ,20 ), 0 , 0.5 , Scalar (0 ,0 ,255 ));
123
148
124
- Rect object ((int )xLeftBottom, (int )yLeftBottom,
125
- (int )(xRightTop - xLeftBottom),
126
- (int )(yRightTop - yLeftBottom));
149
+ Mat detectionMat (detection.size [2 ], detection.size [3 ], CV_32F, detection.ptr <float >());
127
150
128
- rectangle (frame, object, Scalar (0 , 255 , 0 ));
151
+ float confidenceThreshold = parser.get <float >(" min_confidence" );
152
+ for (int i = 0 ; i < detectionMat.rows ; i++)
153
+ {
154
+ float confidence = detectionMat.at <float >(i, 2 );
155
+
156
+ if (confidence > confidenceThreshold)
157
+ {
158
+ size_t objectClass = (size_t )(detectionMat.at <float >(i, 1 ));
159
+
160
+ int xLeftBottom = static_cast <int >(detectionMat.at <float >(i, 3 ) * frame.cols );
161
+ int yLeftBottom = static_cast <int >(detectionMat.at <float >(i, 4 ) * frame.rows );
162
+ int xRightTop = static_cast <int >(detectionMat.at <float >(i, 5 ) * frame.cols );
163
+ int yRightTop = static_cast <int >(detectionMat.at <float >(i, 6 ) * frame.rows );
164
+
165
+ ss.str (" " );
166
+ ss << confidence;
167
+ String conf (ss.str ());
168
+
169
+ Rect object (xLeftBottom, yLeftBottom,
170
+ xRightTop - xLeftBottom,
171
+ yRightTop - yLeftBottom);
172
+
173
+ rectangle (frame, object, Scalar (0 , 255 , 0 ));
174
+ String label = String (classNames[objectClass]) + " : " + conf;
175
+ int baseLine = 0 ;
176
+ Size labelSize = getTextSize (label, FONT_HERSHEY_SIMPLEX, 0.5 , 1 , &baseLine);
177
+ rectangle (frame, Rect (Point (xLeftBottom, yLeftBottom - labelSize.height ),
178
+ Size (labelSize.width , labelSize.height + baseLine)),
179
+ Scalar (255 , 255 , 255 ), CV_FILLED);
180
+ putText (frame, label, Point (xLeftBottom, yLeftBottom),
181
+ FONT_HERSHEY_SIMPLEX, 0.5 , Scalar (0 ,0 ,0 ));
182
+ }
129
183
}
130
- }
131
184
132
- imshow (" detections" , frame);
133
- waitKey ();
185
+ imshow (" detections" , frame);
186
+ if (waitKey (1 ) >= 0 ) break ;
187
+ }
134
188
135
189
return 0 ;
136
190
} // main
0 commit comments