Skip to content

Commit 9439872

Browse files
committed
Merge pull request opencv#9021 from terfendail:corner_avx
2 parents f6dd549 + 1ed9a58 commit 9439872

File tree

3 files changed

+263
-134
lines changed

3 files changed

+263
-134
lines changed

modules/imgproc/src/corner.avx.cpp

Lines changed: 181 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,181 @@
1+
/*M///////////////////////////////////////////////////////////////////////////////////////
2+
//
3+
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4+
//
5+
// By downloading, copying, installing or using the software you agree to this license.
6+
// If you do not agree to this license, do not download, install,
7+
// copy or use the software.
8+
//
9+
//
10+
// License Agreement
11+
// For Open Source Computer Vision Library
12+
//
13+
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
14+
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
15+
// Copyright (C) 2014-2015, Itseez Inc., all rights reserved.
16+
// Third party copyrights are property of their respective owners.
17+
//
18+
// Redistribution and use in source and binary forms, with or without modification,
19+
// are permitted provided that the following conditions are met:
20+
//
21+
// * Redistribution's of source code must retain the above copyright notice,
22+
// this list of conditions and the following disclaimer.
23+
//
24+
// * Redistribution's in binary form must reproduce the above copyright notice,
25+
// this list of conditions and the following disclaimer in the documentation
26+
// and/or other materials provided with the distribution.
27+
//
28+
// * The name of the copyright holders may not be used to endorse or promote products
29+
// derived from this software without specific prior written permission.
30+
//
31+
// This software is provided by the copyright holders and contributors "as is" and
32+
// any express or implied warranties, including, but not limited to, the implied
33+
// warranties of merchantability and fitness for a particular purpose are disclaimed.
34+
// In no event shall the Intel Corporation or contributors be liable for any direct,
35+
// indirect, incidental, special, exemplary, or consequential damages
36+
// (including, but not limited to, procurement of substitute goods or services;
37+
// loss of use, data, or profits; or business interruption) however caused
38+
// and on any theory of liability, whether in contract, strict liability,
39+
// or tort (including negligence or otherwise) arising in any way out of
40+
// the use of this software, even if advised of the possibility of such damage.
41+
//
42+
//M*/
43+
44+
#include "precomp.hpp"
45+
#include "opencv2/core/hal/intrin.hpp"
46+
#include "corner.hpp"
47+
48+
namespace cv
49+
{
50+
51+
// load three 8-packed float vector and deinterleave
52+
// probably it's better to write down somewhere else
53+
static void load_deinterleave(const float* ptr, __m256& a, __m256& b, __m256& c)
54+
{
55+
__m256 s0 = _mm256_loadu_ps(ptr); // a0, b0, c0, a1, b1, c1, a2, b2,
56+
__m256 s1 = _mm256_loadu_ps(ptr + 8); // c2, a3, b3, c3, a4, b4, c4, a5,
57+
__m256 s2 = _mm256_loadu_ps(ptr + 16); // b5, c5, a6, b6, c6, a7, b7, c7,
58+
__m256 s3 = _mm256_permute2f128_ps(s1, s2, 0x21); // a4, b4, c4, a5, b5, c5, a6, b6,
59+
__m256 s4 = _mm256_permute2f128_ps(s2, s2, 0x33); // c6, a7, b7, c7, c6, a7, b7, c7,
60+
61+
__m256 v00 = _mm256_unpacklo_ps(s0, s3); // a0, a4, b0, b4, b1, b5, c1, c5,
62+
__m256 v01 = _mm256_unpackhi_ps(s0, s3); // c0, c4, a1, a5, a2, a6, b2, b6,
63+
__m256 v02 = _mm256_unpacklo_ps(s1, s4); // c2, c6, a3, a7, x, x, x, x,
64+
__m256 v03 = _mm256_unpackhi_ps(s1, s4); // b3, b7, c3, c7, x, x, x, x,
65+
__m256 v04 = _mm256_permute2f128_ps(v02, v03, 0x20); // c2, c6, a3, a7, b3, b7, c3, c7,
66+
__m256 v05 = _mm256_permute2f128_ps(v01, v03, 0x21); // a2, a6, b2, b6, b3, b7, c3, c7,
67+
68+
__m256 v10 = _mm256_unpacklo_ps(v00, v05); // a0, a2, a4, a6, b1, b3, b5, b7,
69+
__m256 v11 = _mm256_unpackhi_ps(v00, v05); // b0, b2, b4, b6, c1, c3, c5, c7,
70+
__m256 v12 = _mm256_unpacklo_ps(v01, v04); // c0, c2, c4, c6, x, x, x, x,
71+
__m256 v13 = _mm256_unpackhi_ps(v01, v04); // a1, a3, a5, a7, x, x, x, x,
72+
__m256 v14 = _mm256_permute2f128_ps(v11, v12, 0x20); // b0, b2, b4, b6, c0, c2, c4, c6,
73+
__m256 v15 = _mm256_permute2f128_ps(v10, v11, 0x31); // b1, b3, b5, b7, c1, c3, c5, c7,
74+
75+
__m256 v20 = _mm256_unpacklo_ps(v14, v15); // b0, b1, b2, b3, c0, c1, c2, c3,
76+
__m256 v21 = _mm256_unpackhi_ps(v14, v15); // b4, b5, b6, b7, c4, c5, c6, c7,
77+
__m256 v22 = _mm256_unpacklo_ps(v10, v13); // a0, a1, a2, a3, x, x, x, x,
78+
__m256 v23 = _mm256_unpackhi_ps(v10, v13); // a4, a5, a6, a7, x, x, x, x,
79+
80+
a = _mm256_permute2f128_ps(v22, v23, 0x20); // a0, a1, a2, a3, a4, a5, a6, a7,
81+
b = _mm256_permute2f128_ps(v20, v21, 0x20); // b0, b1, b2, b3, b4, b5, b6, b7,
82+
c = _mm256_permute2f128_ps(v20, v21, 0x31); // c0, c1, c2, c3, c4, c5, c6, c7,
83+
}
84+
85+
// realign four 3-packed vector to three 4-packed vector
86+
static void v_pack4x3to3x4(const __m128i& s0, const __m128i& s1, const __m128i& s2, const __m128i& s3, __m128i& d0, __m128i& d1, __m128i& d2)
87+
{
88+
d0 = _mm_or_si128(s0, _mm_slli_si128(s1, 12));
89+
d1 = _mm_or_si128(_mm_srli_si128(s1, 4), _mm_slli_si128(s2, 8));
90+
d2 = _mm_or_si128(_mm_srli_si128(s2, 8), _mm_slli_si128(s3, 4));
91+
}
92+
93+
// separate high and low 128 bit and cast to __m128i
94+
static void v_separate_lo_hi(const __m256& src, __m128i& lo, __m128i& hi)
95+
{
96+
lo = _mm_castps_si128(_mm256_castps256_ps128(src));
97+
hi = _mm_castps_si128(_mm256_extractf128_ps(src, 1));
98+
}
99+
100+
// interleave three 8-float vector and store
101+
static void store_interleave(float* ptr, const __m256& a, const __m256& b, const __m256& c)
102+
{
103+
__m128i a0, a1, b0, b1, c0, c1;
104+
v_separate_lo_hi(a, a0, a1);
105+
v_separate_lo_hi(b, b0, b1);
106+
v_separate_lo_hi(c, c0, c1);
107+
108+
v_uint32x4 z = v_setzero_u32();
109+
v_uint32x4 u0, u1, u2, u3;
110+
v_transpose4x4(v_uint32x4(a0), v_uint32x4(b0), v_uint32x4(c0), z, u0, u1, u2, u3);
111+
v_pack4x3to3x4(u0.val, u1.val, u2.val, u3.val, a0, b0, c0);
112+
v_transpose4x4(v_uint32x4(a1), v_uint32x4(b1), v_uint32x4(c1), z, u0, u1, u2, u3);
113+
v_pack4x3to3x4(u0.val, u1.val, u2.val, u3.val, a1, b1, c1);
114+
115+
#if !defined(__GNUC__) || defined(__INTEL_COMPILER)
116+
_mm256_storeu_ps(ptr, _mm256_setr_m128(_mm_castsi128_ps(a0), _mm_castsi128_ps(b0)));
117+
_mm256_storeu_ps(ptr + 8, _mm256_setr_m128(_mm_castsi128_ps(c0), _mm_castsi128_ps(a1)));
118+
_mm256_storeu_ps(ptr + 16, _mm256_setr_m128(_mm_castsi128_ps(b1), _mm_castsi128_ps(c1)));
119+
#else
120+
// GCC: workaround for missing AVX intrinsic: "_mm256_setr_m128()"
121+
_mm256_storeu_ps(ptr, _mm256_insertf128_ps(_mm256_castps128_ps256(_mm_castsi128_ps(a0)), _mm_castsi128_ps(b0), 1));
122+
_mm256_storeu_ps(ptr + 8, _mm256_insertf128_ps(_mm256_castps128_ps256(_mm_castsi128_ps(c0)), _mm_castsi128_ps(a1), 1));
123+
_mm256_storeu_ps(ptr + 16, _mm256_insertf128_ps(_mm256_castps128_ps256(_mm_castsi128_ps(b1)), _mm_castsi128_ps(c1), 1));
124+
#endif
125+
}
126+
127+
int calcMinEigenValLine_AVX(const float* cov, float* dst, int width)
128+
{
129+
int j = 0;
130+
__m256 half = _mm256_set1_ps(0.5f);
131+
for (; j <= width - 8; j += 8)
132+
{
133+
__m256 v_a, v_b, v_c, v_t;
134+
load_deinterleave(cov + j * 3, v_a, v_b, v_c);
135+
v_a = _mm256_mul_ps(v_a, half);
136+
v_c = _mm256_mul_ps(v_c, half);
137+
v_t = _mm256_sub_ps(v_a, v_c);
138+
v_t = _mm256_add_ps(_mm256_mul_ps(v_b, v_b), _mm256_mul_ps(v_t, v_t));
139+
_mm256_storeu_ps(dst + j, _mm256_sub_ps(_mm256_add_ps(v_a, v_c), _mm256_sqrt_ps(v_t)));
140+
}
141+
return j;
142+
}
143+
144+
int calcHarrisLine_AVX(const float* cov, float* dst, double k, int width)
145+
{
146+
int j = 0;
147+
__m256 v_k = _mm256_set1_ps((float)k);
148+
149+
for (; j <= width - 8; j += 8)
150+
{
151+
__m256 v_a, v_b, v_c;
152+
load_deinterleave(cov + j * 3, v_a, v_b, v_c);
153+
154+
__m256 v_ac_bb = _mm256_sub_ps(_mm256_mul_ps(v_a, v_c), _mm256_mul_ps(v_b, v_b));
155+
__m256 v_ac = _mm256_add_ps(v_a, v_c);
156+
__m256 v_dst = _mm256_sub_ps(v_ac_bb, _mm256_mul_ps(v_k, _mm256_mul_ps(v_ac, v_ac)));
157+
_mm256_storeu_ps(dst + j, v_dst);
158+
}
159+
return j;
160+
}
161+
162+
int cornerEigenValsVecsLine_AVX(const float* dxdata, const float* dydata, float* cov_data, int width)
163+
{
164+
int j = 0;
165+
for (; j <= width - 8; j += 8)
166+
{
167+
__m256 v_dx = _mm256_loadu_ps(dxdata + j);
168+
__m256 v_dy = _mm256_loadu_ps(dydata + j);
169+
170+
__m256 v_dst0, v_dst1, v_dst2;
171+
v_dst0 = _mm256_mul_ps(v_dx, v_dx);
172+
v_dst1 = _mm256_mul_ps(v_dx, v_dy);
173+
v_dst2 = _mm256_mul_ps(v_dy, v_dy);
174+
175+
store_interleave(cov_data + j * 3, v_dst0, v_dst1, v_dst2);
176+
}
177+
return j;
178+
}
179+
180+
}
181+
/* End of file */

modules/imgproc/src/corner.cpp

Lines changed: 22 additions & 134 deletions
Original file line numberDiff line numberDiff line change
@@ -44,94 +44,17 @@
4444
#include "precomp.hpp"
4545
#include "opencl_kernels_imgproc.hpp"
4646
#include "opencv2/core/hal/intrin.hpp"
47+
#include "corner.hpp"
4748

4849
namespace cv
4950
{
5051

51-
#if CV_AVX
52-
// load three 8-packed float vector and deinterleave
53-
// probably it's better to write down somewhere else
54-
static inline void load_deinterleave(const float* ptr, __m256& a, __m256& b, __m256& c)
55-
{
56-
__m256 s0 = _mm256_loadu_ps(ptr); // a0, b0, c0, a1, b1, c1, a2, b2,
57-
__m256 s1 = _mm256_loadu_ps(ptr + 8); // c2, a3, b3, c3, a4, b4, c4, a5,
58-
__m256 s2 = _mm256_loadu_ps(ptr + 16); // b5, c5, a6, b6, c6, a7, b7, c7,
59-
__m256 s3 = _mm256_permute2f128_ps(s1, s2, 0x21); // a4, b4, c4, a5, b5, c5, a6, b6,
60-
__m256 s4 = _mm256_permute2f128_ps(s2, s2, 0x33); // c6, a7, b7, c7, c6, a7, b7, c7,
61-
62-
__m256 v00 = _mm256_unpacklo_ps(s0, s3); // a0, a4, b0, b4, b1, b5, c1, c5,
63-
__m256 v01 = _mm256_unpackhi_ps(s0, s3); // c0, c4, a1, a5, a2, a6, b2, b6,
64-
__m256 v02 = _mm256_unpacklo_ps(s1, s4); // c2, c6, a3, a7, x, x, x, x,
65-
__m256 v03 = _mm256_unpackhi_ps(s1, s4); // b3, b7, c3, c7, x, x, x, x,
66-
__m256 v04 = _mm256_permute2f128_ps(v02, v03, 0x20); // c2, c6, a3, a7, b3, b7, c3, c7,
67-
__m256 v05 = _mm256_permute2f128_ps(v01, v03, 0x21); // a2, a6, b2, b6, b3, b7, c3, c7,
68-
69-
__m256 v10 = _mm256_unpacklo_ps(v00, v05); // a0, a2, a4, a6, b1, b3, b5, b7,
70-
__m256 v11 = _mm256_unpackhi_ps(v00, v05); // b0, b2, b4, b6, c1, c3, c5, c7,
71-
__m256 v12 = _mm256_unpacklo_ps(v01, v04); // c0, c2, c4, c6, x, x, x, x,
72-
__m256 v13 = _mm256_unpackhi_ps(v01, v04); // a1, a3, a5, a7, x, x, x, x,
73-
__m256 v14 = _mm256_permute2f128_ps(v11, v12, 0x20); // b0, b2, b4, b6, c0, c2, c4, c6,
74-
__m256 v15 = _mm256_permute2f128_ps(v10, v11, 0x31); // b1, b3, b5, b7, c1, c3, c5, c7,
75-
76-
__m256 v20 = _mm256_unpacklo_ps(v14, v15); // b0, b1, b2, b3, c0, c1, c2, c3,
77-
__m256 v21 = _mm256_unpackhi_ps(v14, v15); // b4, b5, b6, b7, c4, c5, c6, c7,
78-
__m256 v22 = _mm256_unpacklo_ps(v10, v13); // a0, a1, a2, a3, x, x, x, x,
79-
__m256 v23 = _mm256_unpackhi_ps(v10, v13); // a4, a5, a6, a7, x, x, x, x,
80-
81-
a = _mm256_permute2f128_ps(v22, v23, 0x20); // a0, a1, a2, a3, a4, a5, a6, a7,
82-
b = _mm256_permute2f128_ps(v20, v21, 0x20); // b0, b1, b2, b3, b4, b5, b6, b7,
83-
c = _mm256_permute2f128_ps(v20, v21, 0x31); // c0, c1, c2, c3, c4, c5, c6, c7,
84-
}
85-
86-
// realign four 3-packed vector to three 4-packed vector
87-
static inline void v_pack4x3to3x4(const __m128i& s0, const __m128i& s1, const __m128i& s2, const __m128i& s3, __m128i& d0, __m128i& d1, __m128i& d2)
88-
{
89-
d0 = _mm_or_si128(s0, _mm_slli_si128(s1, 12));
90-
d1 = _mm_or_si128(_mm_srli_si128(s1, 4), _mm_slli_si128(s2, 8));
91-
d2 = _mm_or_si128(_mm_srli_si128(s2, 8), _mm_slli_si128(s3, 4));
92-
}
93-
94-
// separate high and low 128 bit and cast to __m128i
95-
static inline void v_separate_lo_hi(const __m256& src, __m128i& lo, __m128i& hi)
96-
{
97-
lo = _mm_castps_si128(_mm256_castps256_ps128(src));
98-
hi = _mm_castps_si128(_mm256_extractf128_ps(src, 1));
99-
}
100-
101-
// interleave three 8-float vector and store
102-
static inline void store_interleave(float* ptr, const __m256& a, const __m256& b, const __m256& c)
103-
{
104-
__m128i a0, a1, b0, b1, c0, c1;
105-
v_separate_lo_hi(a, a0, a1);
106-
v_separate_lo_hi(b, b0, b1);
107-
v_separate_lo_hi(c, c0, c1);
108-
109-
v_uint32x4 z = v_setzero_u32();
110-
v_uint32x4 u0, u1, u2, u3;
111-
v_transpose4x4(v_uint32x4(a0), v_uint32x4(b0), v_uint32x4(c0), z, u0, u1, u2, u3);
112-
v_pack4x3to3x4(u0.val, u1.val, u2.val, u3.val, a0, b0, c0);
113-
v_transpose4x4(v_uint32x4(a1), v_uint32x4(b1), v_uint32x4(c1), z, u0, u1, u2, u3);
114-
v_pack4x3to3x4(u0.val, u1.val, u2.val, u3.val, a1, b1, c1);
115-
116-
#if !defined(__GNUC__) || defined(__INTEL_COMPILER)
117-
_mm256_storeu_ps(ptr, _mm256_setr_m128(_mm_castsi128_ps(a0), _mm_castsi128_ps(b0)));
118-
_mm256_storeu_ps(ptr + 8, _mm256_setr_m128(_mm_castsi128_ps(c0), _mm_castsi128_ps(a1)));
119-
_mm256_storeu_ps(ptr + 16, _mm256_setr_m128(_mm_castsi128_ps(b1), _mm_castsi128_ps(c1)));
120-
#else
121-
// GCC: workaround for missing AVX intrinsic: "_mm256_setr_m128()"
122-
_mm256_storeu_ps(ptr, _mm256_insertf128_ps(_mm256_castps128_ps256(_mm_castsi128_ps(a0)), _mm_castsi128_ps(b0), 1));
123-
_mm256_storeu_ps(ptr + 8, _mm256_insertf128_ps(_mm256_castps128_ps256(_mm_castsi128_ps(c0)), _mm_castsi128_ps(a1), 1));
124-
_mm256_storeu_ps(ptr + 16, _mm256_insertf128_ps(_mm256_castps128_ps256(_mm_castsi128_ps(b1)), _mm_castsi128_ps(c1), 1));
125-
#endif
126-
}
127-
#endif // CV_AVX
128-
12952
static void calcMinEigenVal( const Mat& _cov, Mat& _dst )
13053
{
13154
int i, j;
13255
Size size = _cov.size();
133-
#if CV_AVX
134-
bool haveAvx = checkHardwareSupport(CV_CPU_AVX);
56+
#if CV_TRY_AVX
57+
bool haveAvx = CV_CPU_HAS_SUPPORT_AVX;
13558
#endif
13659
#if CV_SIMD128
13760
bool haveSimd = hasSIMD128();
@@ -147,23 +70,12 @@ static void calcMinEigenVal( const Mat& _cov, Mat& _dst )
14770
{
14871
const float* cov = _cov.ptr<float>(i);
14972
float* dst = _dst.ptr<float>(i);
150-
j = 0;
151-
#if CV_AVX
73+
#if CV_TRY_AVX
15274
if( haveAvx )
153-
{
154-
__m256 half = _mm256_set1_ps(0.5f);
155-
for( ; j <= size.width - 8; j += 8 )
156-
{
157-
__m256 v_a, v_b, v_c, v_t;
158-
load_deinterleave(cov + j*3, v_a, v_b, v_c);
159-
v_a = _mm256_mul_ps(v_a, half);
160-
v_c = _mm256_mul_ps(v_c, half);
161-
v_t = _mm256_sub_ps(v_a, v_c);
162-
v_t = _mm256_add_ps(_mm256_mul_ps(v_b, v_b), _mm256_mul_ps(v_t, v_t));
163-
_mm256_storeu_ps(dst + j, _mm256_sub_ps(_mm256_add_ps(v_a, v_c), _mm256_sqrt_ps(v_t)));
164-
}
165-
}
166-
#endif // CV_AVX
75+
j = calcMinEigenValLine_AVX(cov, dst, size.width);
76+
else
77+
#endif // CV_TRY_AVX
78+
j = 0;
16779

16880
#if CV_SIMD128
16981
if( haveSimd )
@@ -197,8 +109,8 @@ static void calcHarris( const Mat& _cov, Mat& _dst, double k )
197109
{
198110
int i, j;
199111
Size size = _cov.size();
200-
#if CV_AVX
201-
bool haveAvx = checkHardwareSupport(CV_CPU_AVX);
112+
#if CV_TRY_AVX
113+
bool haveAvx = CV_CPU_HAS_SUPPORT_AVX;
202114
#endif
203115
#if CV_SIMD128
204116
bool haveSimd = hasSIMD128();
@@ -214,25 +126,13 @@ static void calcHarris( const Mat& _cov, Mat& _dst, double k )
214126
{
215127
const float* cov = _cov.ptr<float>(i);
216128
float* dst = _dst.ptr<float>(i);
217-
j = 0;
218129

219-
#if CV_AVX
130+
#if CV_TRY_AVX
220131
if( haveAvx )
221-
{
222-
__m256 v_k = _mm256_set1_ps((float)k);
223-
224-
for( ; j <= size.width - 8; j += 8 )
225-
{
226-
__m256 v_a, v_b, v_c;
227-
load_deinterleave(cov + j * 3, v_a, v_b, v_c);
228-
229-
__m256 v_ac_bb = _mm256_sub_ps(_mm256_mul_ps(v_a, v_c), _mm256_mul_ps(v_b, v_b));
230-
__m256 v_ac = _mm256_add_ps(v_a, v_c);
231-
__m256 v_dst = _mm256_sub_ps(v_ac_bb, _mm256_mul_ps(v_k, _mm256_mul_ps(v_ac, v_ac)));
232-
_mm256_storeu_ps(dst + j, v_dst);
233-
}
234-
}
235-
#endif // CV_AVX
132+
j = calcHarrisLine_AVX(cov, dst, k, size.width);
133+
else
134+
#endif // CV_TRY_AVX
135+
j = 0;
236136

237137
#if CV_SIMD128
238138
if( haveSimd )
@@ -351,8 +251,8 @@ cornerEigenValsVecs( const Mat& src, Mat& eigenv, int block_size,
351251
if (tegra::useTegra() && tegra::cornerEigenValsVecs(src, eigenv, block_size, aperture_size, op_type, k, borderType))
352252
return;
353253
#endif
354-
#if CV_AVX
355-
bool haveAvx = checkHardwareSupport(CV_CPU_AVX);
254+
#if CV_TRY_AVX
255+
bool haveAvx = CV_CPU_HAS_SUPPORT_AVX;
356256
#endif
357257
#if CV_SIMD128
358258
bool haveSimd = hasSIMD128();
@@ -389,25 +289,13 @@ cornerEigenValsVecs( const Mat& src, Mat& eigenv, int block_size,
389289
float* cov_data = cov.ptr<float>(i);
390290
const float* dxdata = Dx.ptr<float>(i);
391291
const float* dydata = Dy.ptr<float>(i);
392-
j = 0;
393292

394-
#if CV_AVX
293+
#if CV_TRY_AVX
395294
if( haveAvx )
396-
{
397-
for( ; j <= size.width - 8; j += 8 )
398-
{
399-
__m256 v_dx = _mm256_loadu_ps(dxdata + j);
400-
__m256 v_dy = _mm256_loadu_ps(dydata + j);
401-
402-
__m256 v_dst0, v_dst1, v_dst2;
403-
v_dst0 = _mm256_mul_ps(v_dx, v_dx);
404-
v_dst1 = _mm256_mul_ps(v_dx, v_dy);
405-
v_dst2 = _mm256_mul_ps(v_dy, v_dy);
406-
407-
store_interleave(cov_data + j * 3, v_dst0, v_dst1, v_dst2);
408-
}
409-
}
410-
#endif // CV_AVX
295+
j = cornerEigenValsVecsLine_AVX(dxdata, dydata, cov_data, size.width);
296+
else
297+
#endif // CV_TRY_AVX
298+
j = 0;
411299

412300
#if CV_SIMD128
413301
if( haveSimd )

0 commit comments

Comments
 (0)