Skip to content

Commit a9807d8

Browse files
committed
Allocate new memory for optimized concat to prevent collisions.
Add a flag to disable memory reusing in dnn module.
1 parent 3542c98 commit a9807d8

File tree

2 files changed

+52
-49
lines changed

2 files changed

+52
-49
lines changed

modules/dnn/CMakeLists.txt

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,3 +97,8 @@ if(BUILD_PERF_TESTS)
9797
endif()
9898
endif()
9999
endif()
100+
101+
ocv_option(${the_module}_REUSE_MEMORY "Enable reusing strategy of memory management" ON)
102+
if (${the_module}_REUSE_MEMORY)
103+
add_definitions(-DREUSE_DNN_MEMORY=1)
104+
endif()

modules/dnn/src/dnn.cpp

Lines changed: 47 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -367,43 +367,42 @@ struct BlobManager
367367
}
368368
}
369369

370-
void reuseOrCreate(const MatShape& shape, const LayerPin& lp, Mat& dst, bool force)
370+
void reuseOrCreate(const MatShape& shape, const LayerPin& lp, Mat& dst)
371371
{
372+
#ifdef REUSE_DNN_MEMORY
372373
Mat bestBlob;
373374
LayerPin bestBlobPin;
374375

375-
if( !force )
376-
{
377-
std::map<LayerPin, Mat>::iterator hostIt;
378-
std::map<LayerPin, int>::iterator refIt;
376+
std::map<LayerPin, Mat>::iterator hostIt;
377+
std::map<LayerPin, int>::iterator refIt;
379378

380-
const int targetTotal = total(shape);
381-
int bestBlobTotal = INT_MAX;
379+
const int targetTotal = total(shape);
380+
int bestBlobTotal = INT_MAX;
382381

383-
for (hostIt = memHosts.begin(); hostIt != memHosts.end(); ++hostIt)
382+
for (hostIt = memHosts.begin(); hostIt != memHosts.end(); ++hostIt)
383+
{
384+
refIt = refCounter.find(hostIt->first);
385+
// Use only blobs that had references before because if not,
386+
// it might be used as output.
387+
if (refIt != refCounter.end() && refIt->second == 0)
384388
{
385-
refIt = refCounter.find(hostIt->first);
386-
// Use only blobs that had references before because if not,
387-
// it might be used as output.
388-
if (refIt != refCounter.end() && refIt->second == 0)
389+
Mat& unusedBlob = hostIt->second;
390+
if (unusedBlob.total() >= targetTotal &&
391+
unusedBlob.total() < bestBlobTotal)
389392
{
390-
Mat& unusedBlob = hostIt->second;
391-
if (unusedBlob.total() >= targetTotal &&
392-
unusedBlob.total() < bestBlobTotal)
393-
{
394-
bestBlobPin = hostIt->first;
395-
bestBlob = unusedBlob;
396-
bestBlobTotal = unusedBlob.total();
397-
}
393+
bestBlobPin = hostIt->first;
394+
bestBlob = unusedBlob;
395+
bestBlobTotal = unusedBlob.total();
398396
}
399397
}
400398
}
401399
if (!bestBlob.empty())
402400
{
403401
reuse(bestBlobPin, lp);
404-
dst = Mat(shape, CV_32F, bestBlob.data);
402+
dst = bestBlob.reshape(1, 1).colRange(0, targetTotal).reshape(1, shape);
405403
}
406404
else
405+
#endif // REUSE_DNN_MEMORY
407406
{
408407
// if dst already has been allocated with total(shape) elements,
409408
// it won't be recrreated and pointer of dst.data remains the same.
@@ -412,34 +411,32 @@ struct BlobManager
412411
}
413412
}
414413

415-
void reuseOrCreate(const MatShape& shape, const LayerPin& lp, UMat &umat_dst, bool force)
414+
void reuseOrCreate(const MatShape& shape, const LayerPin& lp, UMat &umat_dst)
416415
{
416+
#ifdef REUSE_DNN_MEMORY
417417
UMat bestBlob;
418418
LayerPin bestBlobPin;
419419

420-
if( !force )
421-
{
422-
std::map<LayerPin, UMat>::iterator hostIt;
423-
std::map<LayerPin, int>::iterator refIt;
420+
std::map<LayerPin, UMat>::iterator hostIt;
421+
std::map<LayerPin, int>::iterator refIt;
424422

425-
const int targetTotal = total(shape);
426-
int bestBlobTotal = INT_MAX;
423+
const int targetTotal = total(shape);
424+
int bestBlobTotal = INT_MAX;
427425

428-
for (hostIt = umat_memHosts.begin(); hostIt != umat_memHosts.end(); ++hostIt)
426+
for (hostIt = umat_memHosts.begin(); hostIt != umat_memHosts.end(); ++hostIt)
427+
{
428+
refIt = refCounter.find(hostIt->first);
429+
// Use only blobs that had references before because if not,
430+
// it might be used as output.
431+
if (refIt != refCounter.end() && refIt->second == 0)
429432
{
430-
refIt = refCounter.find(hostIt->first);
431-
// Use only blobs that had references before because if not,
432-
// it might be used as output.
433-
if (refIt != refCounter.end() && refIt->second == 0)
433+
UMat& unusedBlob = hostIt->second;
434+
if (unusedBlob.total() >= targetTotal &&
435+
unusedBlob.total() < bestBlobTotal)
434436
{
435-
UMat& unusedBlob = hostIt->second;
436-
if (unusedBlob.total() >= targetTotal &&
437-
unusedBlob.total() < bestBlobTotal)
438-
{
439-
bestBlobPin = hostIt->first;
440-
bestBlob = unusedBlob;
441-
bestBlobTotal = unusedBlob.total();
442-
}
437+
bestBlobPin = hostIt->first;
438+
bestBlob = unusedBlob;
439+
bestBlobTotal = unusedBlob.total();
443440
}
444441
}
445442
}
@@ -449,6 +446,7 @@ struct BlobManager
449446
umat_dst.create(shape, CV_32F);
450447
}
451448
else
449+
#endif // REUSE_DNN_MEMORY
452450
{
453451
// if dst already has been allocated with total(shape) elements,
454452
// it won't be recrreated and pointer of dst.data remains the same.
@@ -458,8 +456,7 @@ struct BlobManager
458456
}
459457

460458
void allocateBlobsForLayer(LayerData &ld, const LayerShapes& layerShapes,
461-
std::vector<LayerPin>& pinsForInternalBlobs,
462-
bool maximizeReuse)
459+
std::vector<LayerPin>& pinsForInternalBlobs)
463460
{
464461
CV_TRACE_FUNCTION();
465462
bool use_umat = (preferableBackend == DNN_BACKEND_DEFAULT &&
@@ -530,7 +527,6 @@ struct BlobManager
530527
}
531528

532529
std::map<int, std::vector<int> >::reverse_iterator it;
533-
bool force = !maximizeReuse && ld.inputBlobsId.size() > 1;
534530
for(it = idxSizes.rbegin(); it != idxSizes.rend(); it++)
535531
{
536532
for(int j = 0; j < it->second.size(); j++)
@@ -539,7 +535,7 @@ struct BlobManager
539535
if (total(shapes[index]))
540536
{
541537
LayerPin blobPin(ld.id, index);
542-
if (index < outShapes.size() && inPlace && !force)
538+
if (index < outShapes.size() && inPlace)
543539
{
544540
if (use_umat)
545541
{
@@ -558,9 +554,9 @@ struct BlobManager
558554
else
559555
{
560556
if (use_umat)
561-
reuseOrCreate(shapes[index], blobPin, *umat_blobs[index], force);
557+
reuseOrCreate(shapes[index], blobPin, *umat_blobs[index]);
562558
else
563-
reuseOrCreate(shapes[index], blobPin, *blobs[index], force);
559+
reuseOrCreate(shapes[index], blobPin, *blobs[index]);
564560
}
565561
}
566562
}
@@ -1111,8 +1107,7 @@ struct Net::Impl
11111107
CV_Assert(layerShapesIt != layersShapes.end());
11121108

11131109
std::vector<LayerPin> pinsForInternalBlobs;
1114-
bool maximizeReuse = preferableBackend == DNN_BACKEND_HALIDE;
1115-
blobManager.allocateBlobsForLayer(ld, layerShapesIt->second, pinsForInternalBlobs, maximizeReuse);
1110+
blobManager.allocateBlobsForLayer(ld, layerShapesIt->second, pinsForInternalBlobs);
11161111
ld.outputBlobsWrappers.resize(ld.outputBlobs.size());
11171112
for (int i = 0; i < ld.outputBlobs.size(); ++i)
11181113
{
@@ -1415,6 +1410,9 @@ struct Net::Impl
14151410

14161411
if( i >= ninputs )
14171412
{
1413+
// Allocate new memory to prevent collisions during memory
1414+
// reusing (see https://github.com/opencv/opencv/pull/10456).
1415+
output = output.clone();
14181416
Range chrange[] = { Range::all(), Range::all(), Range::all(), Range::all() };
14191417
int ofs = 0;
14201418
for( i = 0; i < ninputs; i++ )

0 commit comments

Comments
 (0)