@@ -199,125 +199,6 @@ struct LayerPin
199
199
}
200
200
};
201
201
202
- // Objects of this class manages wrappers. For every CPU memory pointer and shape
203
- // one and only wrapper. Now it support wrapping for single backend and target.
204
- class BackendWrapManager
205
- {
206
- public:
207
- Ptr<BackendWrapper> wrap (const Mat& m, int backendId, int targetId)
208
- {
209
- CV_TRACE_FUNCTION ();
210
-
211
- CV_Assert (backendId != DNN_BACKEND_DEFAULT);
212
-
213
- std::map<void *, Ptr<BackendWrapper> >::iterator hostsIt;
214
- // Check that the same CPU memory was previously wrapped.
215
- hostsIt = hostWrappers.find (m.data );
216
- if (hostsIt == hostWrappers.end ())
217
- {
218
- // If not wrapped before.
219
- return (hostWrappers[m.data ] = wrapHost (m, backendId, targetId));
220
- }
221
- else
222
- {
223
- // Find if wrapper of this host and shape was created before.
224
- std::map<std::pair<void *, MatSize>, Ptr<BackendWrapper> >::iterator it;
225
- std::pair<void *, MatSize> key (m.data , m.size );
226
- it = extraWrappers.find (key);
227
- if (it == extraWrappers.end ())
228
- {
229
- MatShape shape (m.dims );
230
- for (int i = 0 ; i < m.dims ; ++i)
231
- shape[i] = m.size .p [i];
232
- return (extraWrappers[key] = wrapUser (hostsIt->second , shape));
233
- }
234
- else
235
- return it->second ;
236
- }
237
- }
238
-
239
- std::vector<Ptr<BackendWrapper> > wrap (const std::vector<Mat*>& mats,
240
- int backendId, int targetId)
241
- {
242
- const int num = mats.size ();
243
- std::vector<Ptr<BackendWrapper> > dst (num);
244
- for (int i = 0 ; i < num; ++i)
245
- {
246
- dst[i] = wrap (*mats[i], backendId, targetId);
247
- }
248
- return dst;
249
- }
250
-
251
- std::vector<Ptr<BackendWrapper> > wrap (const std::vector<Mat>& mats,
252
- int backendId, int targetId)
253
- {
254
- const int num = mats.size ();
255
- std::vector<Ptr<BackendWrapper> > dst (num);
256
- for (int i = 0 ; i < num; ++i)
257
- {
258
- dst[i] = wrap (mats[i], backendId, targetId);
259
- }
260
- return dst;
261
- }
262
-
263
- void reset ()
264
- {
265
- CV_TRACE_FUNCTION ();
266
-
267
- hostWrappers.clear ();
268
- extraWrappers.clear ();
269
- }
270
-
271
- private:
272
- // Backend-specific wrapping function.
273
- Ptr<BackendWrapper> wrapHost (const Mat& m, int backendId, int targetId)
274
- {
275
- if (backendId == DNN_BACKEND_DEFAULT)
276
- {
277
- return Ptr<BackendWrapper>();
278
- }
279
- else if (backendId == DNN_BACKEND_HALIDE)
280
- {
281
- CV_Assert (haveHalide ());
282
- #ifdef HAVE_HALIDE
283
- return Ptr<BackendWrapper>(new HalideBackendWrapper (targetId, m));
284
- #endif // HAVE_HALIDE
285
- }
286
- else
287
- {
288
- CV_Error (Error::StsNotImplemented, " Unknown backend identifier" );
289
- }
290
- return Ptr<BackendWrapper>();
291
- }
292
-
293
- // Backend-specific wrapping function.
294
- Ptr<BackendWrapper> wrapUser (const Ptr<BackendWrapper>& host, const MatShape& shape)
295
- {
296
- int backendId = host->backendId ;
297
- if (backendId == DNN_BACKEND_DEFAULT)
298
- {
299
- return Ptr<BackendWrapper>();
300
- }
301
- else if (backendId == DNN_BACKEND_HALIDE)
302
- {
303
- CV_Assert (haveHalide ());
304
- #ifdef HAVE_HALIDE
305
- return Ptr<BackendWrapper>(new HalideBackendWrapper (host, shape));
306
- #endif // HAVE_HALIDE
307
- }
308
- else
309
- {
310
- CV_Error (Error::StsNotImplemented, " Unknown backend identifier" );
311
- }
312
- return Ptr<BackendWrapper>();
313
- }
314
-
315
- // Wrappers that initialized for memory hosts (first wrapping of CPU data).
316
- std::map<void *, Ptr<BackendWrapper> > hostWrappers;
317
- // The rest of wrappers. They initialized for non-host cv::Mat.
318
- std::map<std::pair<void *, MatSize>, Ptr<BackendWrapper> > extraWrappers;
319
- };
320
-
321
202
struct LayerData
322
203
{
323
204
LayerData () : id(-1 ), flag(0 ) {}
@@ -340,6 +221,8 @@ struct LayerData
340
221
std::set<int > inputLayersId;
341
222
std::set<int > requiredOutputs;
342
223
std::vector<LayerPin> consumers;
224
+ std::vector<Ptr<BackendWrapper> > outputBlobsWrappers;
225
+ std::vector<Ptr<BackendWrapper> > inputBlobsWrappers;
343
226
344
227
Ptr<Layer> layerInstance;
345
228
std::vector<Mat> outputBlobs;
@@ -618,6 +501,24 @@ struct BlobManager
618
501
std::map<LayerPin, Mat> memHosts;
619
502
};
620
503
504
+ static Ptr<BackendWrapper> wrapMat (int backendId, int targetId, const cv::Mat& m)
505
+ {
506
+ if (backendId == DNN_BACKEND_DEFAULT)
507
+ {
508
+ return Ptr<BackendWrapper>();
509
+ }
510
+ else if (backendId == DNN_BACKEND_HALIDE)
511
+ {
512
+ CV_Assert (haveHalide ());
513
+ #ifdef HAVE_HALIDE
514
+ return Ptr<BackendWrapper>(new HalideBackendWrapper (targetId, m));
515
+ #endif // HAVE_HALIDE
516
+ }
517
+ else
518
+ CV_Error (Error::StsNotImplemented, " Unknown backend identifier" );
519
+ return Ptr<BackendWrapper>();
520
+ }
521
+
621
522
struct Net ::Impl
622
523
{
623
524
typedef std::map<int , LayerShapes> LayersShapesMap;
@@ -650,15 +551,71 @@ struct Net::Impl
650
551
int preferableBackend;
651
552
int preferableTarget;
652
553
String halideConfigFile;
653
- // Backend-specific wrapping manager .
654
- BackendWrapManager backendWrapper ;
554
+ // Map host data to backend specific wrapper .
555
+ std::map< void *, Ptr<BackendWrapper> > backendWrappers ;
655
556
656
557
int lastLayerId;
657
558
658
559
bool netWasAllocated;
659
560
bool fusion;
660
561
std::vector<int64> layersTimings;
661
562
563
+ Ptr<BackendWrapper> wrap (const Mat& host)
564
+ {
565
+ if (preferableBackend == DNN_BACKEND_DEFAULT)
566
+ return Ptr<BackendWrapper>();
567
+
568
+ MatShape shape (host.dims );
569
+ for (int i = 0 ; i < host.dims ; ++i)
570
+ shape[i] = host.size [i];
571
+
572
+ void * data = host.data ;
573
+ if (backendWrappers.find (data) != backendWrappers.end ())
574
+ {
575
+ Ptr<BackendWrapper> baseBuffer = backendWrappers[data];
576
+ if (preferableBackend == DNN_BACKEND_HALIDE)
577
+ {
578
+ CV_Assert (haveHalide ());
579
+ #ifdef HAVE_HALIDE
580
+ return Ptr<BackendWrapper>(new HalideBackendWrapper (baseBuffer, shape));
581
+ #endif // HAVE_HALIDE
582
+ }
583
+ else
584
+ CV_Error (Error::StsNotImplemented, " Unknown backend identifier" );
585
+ }
586
+
587
+ Ptr<BackendWrapper> wrapper = wrapMat (preferableBackend, preferableTarget, host);
588
+ backendWrappers[data] = wrapper;
589
+ return wrapper;
590
+ }
591
+
592
+ class HalideCompiler : public ParallelLoopBody
593
+ {
594
+ public:
595
+ HalideCompiler (const MapIdToLayerData& layers_, int preferableTarget_)
596
+ : layers(&layers_), preferableTarget(preferableTarget_) {}
597
+
598
+ void operator ()(const Range& r) const
599
+ {
600
+ MapIdToLayerData::const_iterator it = layers->begin ();
601
+ for (int i = 0 ; i < r.start && it != layers->end (); ++i, ++it) {}
602
+ for (int i = r.start ; i < r.end && it != layers->end (); ++i, ++it)
603
+ {
604
+ const LayerData &ld = it->second ;
605
+ Ptr<Layer> layer = ld.layerInstance ;
606
+ bool skip = ld.skipFlags .find (DNN_BACKEND_HALIDE)->second ;
607
+ if (layer->supportBackend (DNN_BACKEND_HALIDE) && !skip)
608
+ {
609
+ Ptr<BackendNode> node = ld.backendNodes .find (DNN_BACKEND_HALIDE)->second ;
610
+ dnn::compileHalide (ld.outputBlobs , node, preferableTarget);
611
+ }
612
+ }
613
+ }
614
+ private:
615
+ const MapIdToLayerData* layers;
616
+ int preferableTarget;
617
+ };
618
+
662
619
void compileHalide ()
663
620
{
664
621
CV_TRACE_FUNCTION ();
@@ -682,10 +639,9 @@ struct Net::Impl
682
639
ld.inputBlobs , ld.outputBlobs ,
683
640
preferableTarget);
684
641
}
685
- dnn::compileHalide (ld.outputBlobs , ld.backendNodes [DNN_BACKEND_HALIDE],
686
- preferableTarget);
687
642
}
688
643
}
644
+ parallel_for_ (Range (0 , layers.size ()), HalideCompiler (layers, preferableTarget));
689
645
}
690
646
691
647
void clear ()
@@ -917,7 +873,6 @@ struct Net::Impl
917
873
{
918
874
CV_TRACE_FUNCTION ();
919
875
920
- backendWrapper.reset ();
921
876
if (preferableBackend == DNN_BACKEND_DEFAULT)
922
877
{
923
878
CV_Assert (preferableTarget == DNN_TARGET_CPU);
@@ -967,12 +922,10 @@ struct Net::Impl
967
922
}
968
923
// No layers fusion.
969
924
ldTop.skipFlags [preferableBackend] = false ;
970
- std::vector<Ptr<BackendWrapper> > inputs =
971
- backendWrapper.wrap (ldTop.inputBlobs , preferableBackend,
972
- preferableTarget);
973
925
if (preferableBackend == DNN_BACKEND_HALIDE)
974
926
{
975
- ldTop.backendNodes [DNN_BACKEND_HALIDE] = layerTop->initHalide (inputs);
927
+ ldTop.backendNodes [DNN_BACKEND_HALIDE] =
928
+ layerTop->initHalide (ldTop.inputBlobsWrappers );
976
929
baseIt = it;
977
930
}
978
931
else
@@ -1021,12 +974,14 @@ struct Net::Impl
1021
974
1022
975
// bind inputs
1023
976
ld.inputBlobs .resize (ninputs);
977
+ ld.inputBlobsWrappers .resize (ninputs);
1024
978
for (size_t i = 0 ; i < ninputs; i++)
1025
979
{
1026
980
LayerPin from = ld.inputBlobsId [i];
1027
981
CV_Assert (from.valid ());
1028
982
CV_DbgAssert (layers.count (from.lid ) && (int )layers[from.lid ].outputBlobs .size () > from.oid );
1029
983
ld.inputBlobs [i] = &layers[from.lid ].outputBlobs [from.oid ];
984
+ ld.inputBlobsWrappers [i] = layers[from.lid ].outputBlobsWrappers [from.oid ];
1030
985
}
1031
986
1032
987
LayersShapesMap::const_iterator layerShapesIt = layersShapes.find (lid);
@@ -1036,6 +991,11 @@ struct Net::Impl
1036
991
std::vector<LayerPin> pinsForInternalBlobs;
1037
992
bool maximizeReuse = preferableBackend == DNN_BACKEND_HALIDE;
1038
993
blobManager.allocateBlobsForLayer (ld, layerShapesIt->second , pinsForInternalBlobs, maximizeReuse);
994
+ ld.outputBlobsWrappers .resize (ld.outputBlobs .size ());
995
+ for (int i = 0 ; i < ld.outputBlobs .size (); ++i)
996
+ {
997
+ ld.outputBlobsWrappers [i] = wrap (ld.outputBlobs [i]);
998
+ }
1039
999
1040
1000
Ptr<Layer> layerPtr = ld.getLayerInstance ();
1041
1001
{
@@ -1256,6 +1216,8 @@ struct Net::Impl
1256
1216
getLayersShapes (inputShapes, layersShapes);
1257
1217
1258
1218
blobManager.reset ();
1219
+ backendWrappers.clear ();
1220
+ blobManager.addReference (LayerPin (0 , 0 ));
1259
1221
for (it = layers.begin (); it != layers.end (); ++it)
1260
1222
{
1261
1223
const LayerData& ld = it->second ;
@@ -1291,18 +1253,28 @@ struct Net::Impl
1291
1253
!layer->supportBackend (preferableBackend))
1292
1254
{
1293
1255
if ( !ld.skipFlags [DNN_BACKEND_DEFAULT] )
1256
+ {
1257
+ for (int i = 0 , n = ld.inputBlobsWrappers .size (); i < n; ++i)
1258
+ {
1259
+ if (!ld.inputBlobsWrappers [i].empty ())
1260
+ ld.inputBlobsWrappers [i]->copyToHost ();
1261
+ }
1294
1262
layer->forward (ld.inputBlobs , ld.outputBlobs , ld.internals );
1263
+ for (int i = 0 , n = ld.outputBlobsWrappers .size (); i < n; ++i)
1264
+ {
1265
+ if (!ld.outputBlobsWrappers [i].empty ())
1266
+ ld.outputBlobsWrappers [i]->setHostDirty ();
1267
+ }
1268
+ }
1295
1269
else
1296
1270
tm.reset ();
1297
1271
}
1298
1272
else if (!ld.skipFlags [preferableBackend])
1299
1273
{
1300
- std::vector<Ptr<BackendWrapper> > outputs =
1301
- backendWrapper.wrap (ld.outputBlobs , preferableBackend, preferableTarget);
1302
1274
Ptr<BackendNode> node = ld.backendNodes [preferableBackend];
1303
1275
if (preferableBackend == DNN_BACKEND_HALIDE)
1304
1276
{
1305
- forwardHalide (outputs , node);
1277
+ forwardHalide (ld. outputBlobsWrappers , node);
1306
1278
}
1307
1279
else
1308
1280
{
@@ -1423,11 +1395,10 @@ struct Net::Impl
1423
1395
CV_Error (Error::StsOutOfRange, " Layer \" " + ld.name + " \" produce only " + toString (ld.outputBlobs .size ()) +
1424
1396
" outputs, the #" + toString (pin.oid ) + " was requsted" );
1425
1397
}
1426
- if (preferableBackend != DNN_BACKEND_DEFAULT )
1398
+ if (preferableBackend != DNN_TARGET_CPU )
1427
1399
{
1428
1400
// Transfer data to CPU if it's require.
1429
- backendWrapper.wrap (ld.outputBlobs [pin.oid ], preferableBackend,
1430
- preferableTarget)->copyToHost ();
1401
+ ld.outputBlobsWrappers [pin.oid ]->copyToHost ();
1431
1402
}
1432
1403
else
1433
1404
{
@@ -1635,13 +1606,18 @@ void Net::setInput(const Mat &blob_, const String& name)
1635
1606
1636
1607
LayerData &ld = impl->layers [pin.lid ];
1637
1608
ld.outputBlobs .resize ( std::max (pin.oid +1 , (int )ld.requiredOutputs .size ()) );
1609
+ ld.outputBlobsWrappers .resize (ld.outputBlobs .size ());
1638
1610
MatShape prevShape = shape (ld.outputBlobs [pin.oid ]);
1639
1611
bool oldShape = prevShape == shape (blob_);
1640
1612
if (oldShape)
1641
1613
blob_.copyTo (ld.outputBlobs [pin.oid ]);
1642
1614
else
1643
1615
ld.outputBlobs [pin.oid ] = blob_.clone ();
1644
1616
1617
+ if (!ld.outputBlobsWrappers [pin.oid ].empty ())
1618
+ {
1619
+ ld.outputBlobsWrappers [pin.oid ]->setHostDirty ();
1620
+ }
1645
1621
impl->netWasAllocated = impl->netWasAllocated && oldShape;
1646
1622
}
1647
1623
0 commit comments