@@ -589,42 +589,16 @@ struct Net::Impl
589
589
return wrapper;
590
590
}
591
591
592
- class HalideCompiler : public ParallelLoopBody
593
- {
594
- public:
595
- HalideCompiler (const MapIdToLayerData& layers_, int preferableTarget_)
596
- : layers(&layers_), preferableTarget(preferableTarget_) {}
597
-
598
- void operator ()(const Range& r) const
599
- {
600
- MapIdToLayerData::const_iterator it = layers->begin ();
601
- for (int i = 0 ; i < r.start && it != layers->end (); ++i, ++it) {}
602
- for (int i = r.start ; i < r.end && it != layers->end (); ++i, ++it)
603
- {
604
- const LayerData &ld = it->second ;
605
- Ptr<Layer> layer = ld.layerInstance ;
606
- bool skip = ld.skipFlags .find (DNN_BACKEND_HALIDE)->second ;
607
- if (layer->supportBackend (DNN_BACKEND_HALIDE) && !skip)
608
- {
609
- Ptr<BackendNode> node = ld.backendNodes .find (DNN_BACKEND_HALIDE)->second ;
610
- dnn::compileHalide (ld.outputBlobs , node, preferableTarget);
611
- }
612
- }
613
- }
614
- private:
615
- const MapIdToLayerData* layers;
616
- int preferableTarget;
617
- };
618
-
592
+ #ifdef HAVE_HALIDE
619
593
void compileHalide ()
620
594
{
621
595
CV_TRACE_FUNCTION ();
622
596
623
597
CV_Assert (preferableBackend == DNN_BACKEND_HALIDE);
624
598
625
599
HalideScheduler scheduler (halideConfigFile);
626
- MapIdToLayerData::iterator it ;
627
- for (it = layers.begin (); it != layers.end (); ++it)
600
+ std::vector< std::reference_wrapper<LayerData> > compileList; compileList. reserve ( 64 ) ;
601
+ for (MapIdToLayerData::iterator it = layers.begin (); it != layers.end (); ++it)
628
602
{
629
603
LayerData &ld = it->second ;
630
604
Ptr<Layer> layer = ld.layerInstance ;
@@ -639,10 +613,30 @@ struct Net::Impl
639
613
ld.inputBlobs , ld.outputBlobs ,
640
614
preferableTarget);
641
615
}
616
+ compileList.emplace_back (ld);
642
617
}
643
618
}
644
- parallel_for_ (Range (0 , layers.size ()), HalideCompiler (layers, preferableTarget));
619
+ std::atomic<int > progress (0 );
620
+ auto fn = ([&] () -> void
621
+ {
622
+ for (;;)
623
+ {
624
+ int id = progress.fetch_add (1 );
625
+ if ((size_t )id >= compileList.size ())
626
+ return ;
627
+ const LayerData& ld = compileList[id].get ();
628
+ Ptr<BackendNode> node = ld.backendNodes .find (DNN_BACKEND_HALIDE)->second ;
629
+ dnn::compileHalide (ld.outputBlobs , node, preferableTarget);
630
+ }
631
+ });
632
+ size_t num_threads = std::min (compileList.size (), (size_t )std::thread::hardware_concurrency ());
633
+ num_threads = std::max ((size_t )1u , std::min ((size_t )8u , num_threads));
634
+ std::vector<std::thread> threads (num_threads - 1 );
635
+ for (auto & t: threads) t = std::thread (fn);
636
+ fn (); // process own tasks
637
+ for (auto & t: threads) t.join ();
645
638
}
639
+ #endif
646
640
647
641
void clear ()
648
642
{
@@ -692,10 +686,12 @@ struct Net::Impl
692
686
693
687
if (!netWasAllocated )
694
688
{
695
- // If user didn't call compileHalide() between
696
- // setPreferableBackend(DNN_BACKEND_HALIDE) and forward().
689
+ #ifdef HAVE_HALIDE
697
690
if (preferableBackend == DNN_BACKEND_HALIDE)
698
691
compileHalide ();
692
+ #else
693
+ CV_Assert (preferableBackend != DNN_BACKEND_HALIDE);
694
+ #endif
699
695
}
700
696
701
697
netWasAllocated = true ;
0 commit comments