@@ -367,43 +367,42 @@ struct BlobManager
367
367
}
368
368
}
369
369
370
- void reuseOrCreate (const MatShape& shape, const LayerPin& lp, Mat& dst, bool force )
370
+ void reuseOrCreate (const MatShape& shape, const LayerPin& lp, Mat& dst)
371
371
{
372
+ #ifdef REUSE_DNN_MEMORY
372
373
Mat bestBlob;
373
374
LayerPin bestBlobPin;
374
375
375
- if ( !force )
376
- {
377
- std::map<LayerPin, Mat>::iterator hostIt;
378
- std::map<LayerPin, int >::iterator refIt;
376
+ std::map<LayerPin, Mat>::iterator hostIt;
377
+ std::map<LayerPin, int >::iterator refIt;
379
378
380
- const int targetTotal = total (shape);
381
- int bestBlobTotal = INT_MAX;
379
+ const int targetTotal = total (shape);
380
+ int bestBlobTotal = INT_MAX;
382
381
383
- for (hostIt = memHosts.begin (); hostIt != memHosts.end (); ++hostIt)
382
+ for (hostIt = memHosts.begin (); hostIt != memHosts.end (); ++hostIt)
383
+ {
384
+ refIt = refCounter.find (hostIt->first );
385
+ // Use only blobs that had references before because if not,
386
+ // it might be used as output.
387
+ if (refIt != refCounter.end () && refIt->second == 0 )
384
388
{
385
- refIt = refCounter.find (hostIt->first );
386
- // Use only blobs that had references before because if not,
387
- // it might be used as output.
388
- if (refIt != refCounter.end () && refIt->second == 0 )
389
+ Mat& unusedBlob = hostIt->second ;
390
+ if (unusedBlob.total () >= targetTotal &&
391
+ unusedBlob.total () < bestBlobTotal)
389
392
{
390
- Mat& unusedBlob = hostIt->second ;
391
- if (unusedBlob.total () >= targetTotal &&
392
- unusedBlob.total () < bestBlobTotal)
393
- {
394
- bestBlobPin = hostIt->first ;
395
- bestBlob = unusedBlob;
396
- bestBlobTotal = unusedBlob.total ();
397
- }
393
+ bestBlobPin = hostIt->first ;
394
+ bestBlob = unusedBlob;
395
+ bestBlobTotal = unusedBlob.total ();
398
396
}
399
397
}
400
398
}
401
399
if (!bestBlob.empty ())
402
400
{
403
401
reuse (bestBlobPin, lp);
404
- dst = Mat (shape, CV_32F, bestBlob. data );
402
+ dst = bestBlob. reshape ( 1 , 1 ). colRange ( 0 , targetTotal). reshape ( 1 , shape );
405
403
}
406
404
else
405
+ #endif // REUSE_DNN_MEMORY
407
406
{
408
407
// if dst already has been allocated with total(shape) elements,
409
408
// it won't be recrreated and pointer of dst.data remains the same.
@@ -412,34 +411,32 @@ struct BlobManager
412
411
}
413
412
}
414
413
415
- void reuseOrCreate (const MatShape& shape, const LayerPin& lp, UMat &umat_dst, bool force )
414
+ void reuseOrCreate (const MatShape& shape, const LayerPin& lp, UMat &umat_dst)
416
415
{
416
+ #ifdef REUSE_DNN_MEMORY
417
417
UMat bestBlob;
418
418
LayerPin bestBlobPin;
419
419
420
- if ( !force )
421
- {
422
- std::map<LayerPin, UMat>::iterator hostIt;
423
- std::map<LayerPin, int >::iterator refIt;
420
+ std::map<LayerPin, UMat>::iterator hostIt;
421
+ std::map<LayerPin, int >::iterator refIt;
424
422
425
- const int targetTotal = total (shape);
426
- int bestBlobTotal = INT_MAX;
423
+ const int targetTotal = total (shape);
424
+ int bestBlobTotal = INT_MAX;
427
425
428
- for (hostIt = umat_memHosts.begin (); hostIt != umat_memHosts.end (); ++hostIt)
426
+ for (hostIt = umat_memHosts.begin (); hostIt != umat_memHosts.end (); ++hostIt)
427
+ {
428
+ refIt = refCounter.find (hostIt->first );
429
+ // Use only blobs that had references before because if not,
430
+ // it might be used as output.
431
+ if (refIt != refCounter.end () && refIt->second == 0 )
429
432
{
430
- refIt = refCounter.find (hostIt->first );
431
- // Use only blobs that had references before because if not,
432
- // it might be used as output.
433
- if (refIt != refCounter.end () && refIt->second == 0 )
433
+ UMat& unusedBlob = hostIt->second ;
434
+ if (unusedBlob.total () >= targetTotal &&
435
+ unusedBlob.total () < bestBlobTotal)
434
436
{
435
- UMat& unusedBlob = hostIt->second ;
436
- if (unusedBlob.total () >= targetTotal &&
437
- unusedBlob.total () < bestBlobTotal)
438
- {
439
- bestBlobPin = hostIt->first ;
440
- bestBlob = unusedBlob;
441
- bestBlobTotal = unusedBlob.total ();
442
- }
437
+ bestBlobPin = hostIt->first ;
438
+ bestBlob = unusedBlob;
439
+ bestBlobTotal = unusedBlob.total ();
443
440
}
444
441
}
445
442
}
@@ -449,6 +446,7 @@ struct BlobManager
449
446
umat_dst.create (shape, CV_32F);
450
447
}
451
448
else
449
+ #endif // REUSE_DNN_MEMORY
452
450
{
453
451
// if dst already has been allocated with total(shape) elements,
454
452
// it won't be recrreated and pointer of dst.data remains the same.
@@ -458,8 +456,7 @@ struct BlobManager
458
456
}
459
457
460
458
void allocateBlobsForLayer (LayerData &ld, const LayerShapes& layerShapes,
461
- std::vector<LayerPin>& pinsForInternalBlobs,
462
- bool maximizeReuse)
459
+ std::vector<LayerPin>& pinsForInternalBlobs)
463
460
{
464
461
CV_TRACE_FUNCTION ();
465
462
bool use_umat = (preferableBackend == DNN_BACKEND_DEFAULT &&
@@ -530,7 +527,6 @@ struct BlobManager
530
527
}
531
528
532
529
std::map<int , std::vector<int > >::reverse_iterator it;
533
- bool force = !maximizeReuse && ld.inputBlobsId .size () > 1 ;
534
530
for (it = idxSizes.rbegin (); it != idxSizes.rend (); it++)
535
531
{
536
532
for (int j = 0 ; j < it->second .size (); j++)
@@ -539,7 +535,7 @@ struct BlobManager
539
535
if (total (shapes[index]))
540
536
{
541
537
LayerPin blobPin (ld.id , index);
542
- if (index < outShapes.size () && inPlace && !force )
538
+ if (index < outShapes.size () && inPlace)
543
539
{
544
540
if (use_umat)
545
541
{
@@ -558,9 +554,9 @@ struct BlobManager
558
554
else
559
555
{
560
556
if (use_umat)
561
- reuseOrCreate (shapes[index], blobPin, *umat_blobs[index], force );
557
+ reuseOrCreate (shapes[index], blobPin, *umat_blobs[index]);
562
558
else
563
- reuseOrCreate (shapes[index], blobPin, *blobs[index], force );
559
+ reuseOrCreate (shapes[index], blobPin, *blobs[index]);
564
560
}
565
561
}
566
562
}
@@ -1111,8 +1107,7 @@ struct Net::Impl
1111
1107
CV_Assert (layerShapesIt != layersShapes.end ());
1112
1108
1113
1109
std::vector<LayerPin> pinsForInternalBlobs;
1114
- bool maximizeReuse = preferableBackend == DNN_BACKEND_HALIDE;
1115
- blobManager.allocateBlobsForLayer (ld, layerShapesIt->second , pinsForInternalBlobs, maximizeReuse);
1110
+ blobManager.allocateBlobsForLayer (ld, layerShapesIt->second , pinsForInternalBlobs);
1116
1111
ld.outputBlobsWrappers .resize (ld.outputBlobs .size ());
1117
1112
for (int i = 0 ; i < ld.outputBlobs .size (); ++i)
1118
1113
{
@@ -1415,6 +1410,9 @@ struct Net::Impl
1415
1410
1416
1411
if ( i >= ninputs )
1417
1412
{
1413
+ // Allocate new memory to prevent collisions during memory
1414
+ // reusing (see https://github.com/opencv/opencv/pull/10456).
1415
+ output = output.clone ();
1418
1416
Range chrange[] = { Range::all (), Range::all (), Range::all (), Range::all () };
1419
1417
int ofs = 0 ;
1420
1418
for ( i = 0 ; i < ninputs; i++ )
0 commit comments