Skip to content

Commit 11330b9

Browse files
committed
Merge pull request opencv#10095 from alalek:fix_canny_intrinsics
2 parents 8254169 + baff521 commit 11330b9

File tree

1 file changed

+18
-55
lines changed

1 file changed

+18
-55
lines changed

modules/imgproc/src/canny.cpp

Lines changed: 18 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -812,86 +812,46 @@ class finalPass : public ParallelLoopBody
812812

813813
~finalPass() {}
814814

815-
finalPass& operator=(const finalPass&) {return *this;}
816-
817815
void operator()(const Range &boundaries) const
818816
{
819817
// the final pass, form the final image
820818
for (int i = boundaries.start; i < boundaries.end; i++)
821819
{
822820
int j = 0;
823821
uchar *pdst = dst.ptr<uchar>(i);
824-
uchar *pmap;
822+
const uchar *pmap = map.ptr<uchar>(i + 1);
825823
#if CV_SIMD128
826824
if(haveSIMD)
827-
pmap = (uchar*)map.ptr<uchar>(i + 1) + CV_MALLOC_SIMD128;
825+
pmap += CV_MALLOC_SIMD128;
828826
else
829827
#endif
830-
pmap = (uchar*)map.ptr<uchar>(i + 1) + 1;
828+
pmap += 1;
831829
#if CV_SIMD128
832830
if(haveSIMD) {
833-
const v_int8x16 v_zero = v_setzero_s8();
834-
835-
for(; j <= dst.cols - 32; j += 32) {
836-
v_uint8x16 v_pmap1 = v_load_aligned((const unsigned char*)(pmap + j));
837-
v_uint8x16 v_pmap2 = v_load_aligned((const unsigned char*)(pmap + j + 16));
838-
839-
v_uint16x8 v_pmaplo1, v_pmaphi1, v_pmaplo2, v_pmaphi2;
840-
v_expand(v_pmap1, v_pmaplo1, v_pmaphi1);
841-
v_expand(v_pmap2, v_pmaplo2, v_pmaphi2);
842-
843-
v_pmaplo1 = v_pmaplo1 >> 1;
844-
v_pmaphi1 = v_pmaphi1 >> 1;
845-
v_pmaplo2 = v_pmaplo2 >> 1;
846-
v_pmaphi2 = v_pmaphi2 >> 1;
847-
848-
v_pmap1 = v_pack(v_pmaplo1, v_pmaphi1);
849-
v_pmap2 = v_pack(v_pmaplo2, v_pmaphi2);
850-
851-
v_pmap1 = v_reinterpret_as_u8(v_zero - v_reinterpret_as_s8(v_pmap1));
852-
v_pmap2 = v_reinterpret_as_u8(v_zero - v_reinterpret_as_s8(v_pmap2));
831+
const v_uint8x16 v_zero = v_setzero_u8();
832+
const v_uint8x16 v_ff = ~v_zero;
833+
const v_uint8x16 v_two(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
853834

854-
v_store((pdst + j), v_pmap1);
855-
v_store((pdst + j + 16), v_pmap2);
856-
}
857-
858-
if(j <= dst.cols - 16) {
835+
for (; j <= dst.cols - 16; j += 16)
836+
{
859837
v_uint8x16 v_pmap = v_load_aligned((const unsigned char*)(pmap + j));
860-
861-
v_uint16x8 v_pmaplo;
862-
v_uint16x8 v_pmaphi;
863-
v_expand(v_pmap, v_pmaplo, v_pmaphi);
864-
865-
v_pmaplo = v_pmaplo >> 1;
866-
v_pmaphi = v_pmaphi >> 1;
867-
868-
v_pmap = v_pack(v_pmaplo, v_pmaphi);
869-
v_pmap = v_reinterpret_as_u8(v_zero - v_reinterpret_as_s8(v_pmap));
870-
838+
v_pmap = v_select(v_pmap == v_two, v_ff, v_zero);
871839
v_store((pdst + j), v_pmap);
872-
j += 16;
873840
}
874841

875-
if(j <= dst.cols - 8) {
876-
v_uint8x16 v_pmap = v_load_halves((const unsigned char*)(pmap + j), (const unsigned char*)(pmap + j));
877-
878-
v_uint16x8 v_pmaplo;
879-
v_uint16x8 v_pmaphi;
880-
v_expand(v_pmap, v_pmaplo, v_pmaphi);
881-
882-
v_pmaplo = v_pmaplo >> 1;
883-
v_pmaphi = v_pmaphi >> 1;
884-
885-
v_pmap = v_pack(v_pmaplo, v_pmaphi);
886-
v_pmap = v_reinterpret_as_u8(v_zero - v_reinterpret_as_s8(v_pmap));
887-
842+
if (j <= dst.cols - 8)
843+
{
844+
v_uint8x16 v_pmap = v_load_low((const unsigned char*)(pmap + j));
845+
v_pmap = v_select(v_pmap == v_two, v_ff, v_zero);
888846
v_store_low((pdst + j), v_pmap);
889847
j += 8;
890848
}
891849
}
892850
#endif
893851
for (; j < dst.cols; j++)
852+
{
894853
pdst[j] = (uchar)-(pmap[j] >> 1);
854+
}
895855
}
896856
}
897857

@@ -901,6 +861,9 @@ class finalPass : public ParallelLoopBody
901861
#if CV_SIMD128
902862
bool haveSIMD;
903863
#endif
864+
865+
finalPass(const finalPass&); // = delete
866+
finalPass& operator=(const finalPass&); // = delete
904867
};
905868

906869
#ifdef HAVE_OPENVX

0 commit comments

Comments
 (0)