Skip to content

Commit babd21c

Browse files
committed
Merge pull request opencv#9823 from alalek:dnn_halide_bypass_tbb_threads
2 parents 1ea1ff1 + 3935e13 commit babd21c

File tree

1 file changed

+28
-32
lines changed

1 file changed

+28
-32
lines changed

modules/dnn/src/dnn.cpp

Lines changed: 28 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -589,42 +589,16 @@ struct Net::Impl
589589
return wrapper;
590590
}
591591

592-
class HalideCompiler : public ParallelLoopBody
593-
{
594-
public:
595-
HalideCompiler(const MapIdToLayerData& layers_, int preferableTarget_)
596-
: layers(&layers_), preferableTarget(preferableTarget_) {}
597-
598-
void operator()(const Range& r) const
599-
{
600-
MapIdToLayerData::const_iterator it = layers->begin();
601-
for (int i = 0; i < r.start && it != layers->end(); ++i, ++it) {}
602-
for (int i = r.start; i < r.end && it != layers->end(); ++i, ++it)
603-
{
604-
const LayerData &ld = it->second;
605-
Ptr<Layer> layer = ld.layerInstance;
606-
bool skip = ld.skipFlags.find(DNN_BACKEND_HALIDE)->second;
607-
if (layer->supportBackend(DNN_BACKEND_HALIDE) && !skip)
608-
{
609-
Ptr<BackendNode> node = ld.backendNodes.find(DNN_BACKEND_HALIDE)->second;
610-
dnn::compileHalide(ld.outputBlobs, node, preferableTarget);
611-
}
612-
}
613-
}
614-
private:
615-
const MapIdToLayerData* layers;
616-
int preferableTarget;
617-
};
618-
592+
#ifdef HAVE_HALIDE
619593
void compileHalide()
620594
{
621595
CV_TRACE_FUNCTION();
622596

623597
CV_Assert(preferableBackend == DNN_BACKEND_HALIDE);
624598

625599
HalideScheduler scheduler(halideConfigFile);
626-
MapIdToLayerData::iterator it;
627-
for (it = layers.begin(); it != layers.end(); ++it)
600+
std::vector< std::reference_wrapper<LayerData> > compileList; compileList.reserve(64);
601+
for (MapIdToLayerData::iterator it = layers.begin(); it != layers.end(); ++it)
628602
{
629603
LayerData &ld = it->second;
630604
Ptr<Layer> layer = ld.layerInstance;
@@ -639,10 +613,30 @@ struct Net::Impl
639613
ld.inputBlobs, ld.outputBlobs,
640614
preferableTarget);
641615
}
616+
compileList.emplace_back(ld);
642617
}
643618
}
644-
parallel_for_(Range(0, layers.size()), HalideCompiler(layers, preferableTarget));
619+
std::atomic<int> progress(0);
620+
auto fn = ([&] () -> void
621+
{
622+
for (;;)
623+
{
624+
int id = progress.fetch_add(1);
625+
if ((size_t)id >= compileList.size())
626+
return;
627+
const LayerData& ld = compileList[id].get();
628+
Ptr<BackendNode> node = ld.backendNodes.find(DNN_BACKEND_HALIDE)->second;
629+
dnn::compileHalide(ld.outputBlobs, node, preferableTarget);
630+
}
631+
});
632+
size_t num_threads = std::min(compileList.size(), (size_t)std::thread::hardware_concurrency());
633+
num_threads = std::max((size_t)1u, std::min((size_t)8u, num_threads));
634+
std::vector<std::thread> threads(num_threads - 1);
635+
for (auto& t: threads) t = std::thread(fn);
636+
fn(); // process own tasks
637+
for (auto& t: threads) t.join();
645638
}
639+
#endif
646640

647641
void clear()
648642
{
@@ -692,10 +686,12 @@ struct Net::Impl
692686

693687
if (!netWasAllocated )
694688
{
695-
// If user didn't call compileHalide() between
696-
// setPreferableBackend(DNN_BACKEND_HALIDE) and forward().
689+
#ifdef HAVE_HALIDE
697690
if (preferableBackend == DNN_BACKEND_HALIDE)
698691
compileHalide();
692+
#else
693+
CV_Assert(preferableBackend != DNN_BACKEND_HALIDE);
694+
#endif
699695
}
700696

701697
netWasAllocated = true;

0 commit comments

Comments
 (0)