@@ -812,86 +812,46 @@ class finalPass : public ParallelLoopBody
812
812
813
813
~finalPass () {}
814
814
815
- finalPass& operator =(const finalPass&) {return *this ;}
816
-
817
815
void operator ()(const Range &boundaries) const
818
816
{
819
817
// the final pass, form the final image
820
818
for (int i = boundaries.start ; i < boundaries.end ; i++)
821
819
{
822
820
int j = 0 ;
823
821
uchar *pdst = dst.ptr <uchar>(i);
824
- uchar *pmap;
822
+ const uchar *pmap = map. ptr <uchar>(i + 1 ) ;
825
823
#if CV_SIMD128
826
824
if (haveSIMD)
827
- pmap = (uchar*)map. ptr <uchar>(i + 1 ) + CV_MALLOC_SIMD128;
825
+ pmap += CV_MALLOC_SIMD128;
828
826
else
829
827
#endif
830
- pmap = (uchar*)map. ptr <uchar>(i + 1 ) + 1 ;
828
+ pmap += 1 ;
831
829
#if CV_SIMD128
832
830
if (haveSIMD) {
833
- const v_int8x16 v_zero = v_setzero_s8 ();
834
-
835
- for (; j <= dst.cols - 32 ; j += 32 ) {
836
- v_uint8x16 v_pmap1 = v_load_aligned ((const unsigned char *)(pmap + j));
837
- v_uint8x16 v_pmap2 = v_load_aligned ((const unsigned char *)(pmap + j + 16 ));
838
-
839
- v_uint16x8 v_pmaplo1, v_pmaphi1, v_pmaplo2, v_pmaphi2;
840
- v_expand (v_pmap1, v_pmaplo1, v_pmaphi1);
841
- v_expand (v_pmap2, v_pmaplo2, v_pmaphi2);
842
-
843
- v_pmaplo1 = v_pmaplo1 >> 1 ;
844
- v_pmaphi1 = v_pmaphi1 >> 1 ;
845
- v_pmaplo2 = v_pmaplo2 >> 1 ;
846
- v_pmaphi2 = v_pmaphi2 >> 1 ;
847
-
848
- v_pmap1 = v_pack (v_pmaplo1, v_pmaphi1);
849
- v_pmap2 = v_pack (v_pmaplo2, v_pmaphi2);
850
-
851
- v_pmap1 = v_reinterpret_as_u8 (v_zero - v_reinterpret_as_s8 (v_pmap1));
852
- v_pmap2 = v_reinterpret_as_u8 (v_zero - v_reinterpret_as_s8 (v_pmap2));
831
+ const v_uint8x16 v_zero = v_setzero_u8 ();
832
+ const v_uint8x16 v_ff = ~v_zero;
833
+ const v_uint8x16 v_two (2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 );
853
834
854
- v_store ((pdst + j), v_pmap1);
855
- v_store ((pdst + j + 16 ), v_pmap2);
856
- }
857
-
858
- if (j <= dst.cols - 16 ) {
835
+ for (; j <= dst.cols - 16 ; j += 16 )
836
+ {
859
837
v_uint8x16 v_pmap = v_load_aligned ((const unsigned char *)(pmap + j));
860
-
861
- v_uint16x8 v_pmaplo;
862
- v_uint16x8 v_pmaphi;
863
- v_expand (v_pmap, v_pmaplo, v_pmaphi);
864
-
865
- v_pmaplo = v_pmaplo >> 1 ;
866
- v_pmaphi = v_pmaphi >> 1 ;
867
-
868
- v_pmap = v_pack (v_pmaplo, v_pmaphi);
869
- v_pmap = v_reinterpret_as_u8 (v_zero - v_reinterpret_as_s8 (v_pmap));
870
-
838
+ v_pmap = v_select (v_pmap == v_two, v_ff, v_zero);
871
839
v_store ((pdst + j), v_pmap);
872
- j += 16 ;
873
840
}
874
841
875
- if (j <= dst.cols - 8 ) {
876
- v_uint8x16 v_pmap = v_load_halves ((const unsigned char *)(pmap + j), (const unsigned char *)(pmap + j));
877
-
878
- v_uint16x8 v_pmaplo;
879
- v_uint16x8 v_pmaphi;
880
- v_expand (v_pmap, v_pmaplo, v_pmaphi);
881
-
882
- v_pmaplo = v_pmaplo >> 1 ;
883
- v_pmaphi = v_pmaphi >> 1 ;
884
-
885
- v_pmap = v_pack (v_pmaplo, v_pmaphi);
886
- v_pmap = v_reinterpret_as_u8 (v_zero - v_reinterpret_as_s8 (v_pmap));
887
-
842
+ if (j <= dst.cols - 8 )
843
+ {
844
+ v_uint8x16 v_pmap = v_load_low ((const unsigned char *)(pmap + j));
845
+ v_pmap = v_select (v_pmap == v_two, v_ff, v_zero);
888
846
v_store_low ((pdst + j), v_pmap);
889
847
j += 8 ;
890
848
}
891
849
}
892
850
#endif
893
851
for (; j < dst.cols ; j++)
852
+ {
894
853
pdst[j] = (uchar)-(pmap[j] >> 1 );
854
+ }
895
855
}
896
856
}
897
857
@@ -901,6 +861,9 @@ class finalPass : public ParallelLoopBody
901
861
#if CV_SIMD128
902
862
bool haveSIMD;
903
863
#endif
864
+
865
+ finalPass (const finalPass&); // = delete
866
+ finalPass& operator =(const finalPass&); // = delete
904
867
};
905
868
906
869
#ifdef HAVE_OPENVX
0 commit comments