Skip to content

Commit 704c688

Browse files
committed
OCL code fixed, fix for NEON added
1 parent 6c71988 commit 704c688

File tree

2 files changed

+45
-21
lines changed

2 files changed

+45
-21
lines changed

modules/imgproc/src/color.cpp

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6608,6 +6608,14 @@ struct RGB2Luv_f
66086608
{
66096609
float32x4x3_t v_src = vld3q_f32(src);
66106610

6611+
v_src.val[0] = vmaxq_f32(v_src.val[0], vdupq_n_f32(0));
6612+
v_src.val[1] = vmaxq_f32(v_src.val[1], vdupq_n_f32(0));
6613+
v_src.val[2] = vmaxq_f32(v_src.val[2], vdupq_n_f32(0));
6614+
6615+
v_src.val[0] = vminq_f32(v_src.val[0], vdupq_n_f32(1));
6616+
v_src.val[1] = vminq_f32(v_src.val[1], vdupq_n_f32(1));
6617+
v_src.val[2] = vminq_f32(v_src.val[2], vdupq_n_f32(1));
6618+
66116619
if( gammaTab )
66126620
{
66136621
v_src.val[0] = vmulq_f32(v_src.val[0], vdupq_n_f32(gscale));
@@ -8574,7 +8582,7 @@ static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
85748582
coeffs[j] + coeffs[j + 1] + coeffs[j + 2] < 1.5f*(lab ? LabCbrtTabScale : 1) );
85758583
}
85768584

8577-
float d = 1.f/(_whitept[0] + _whitept[1]*15 + _whitept[2]*3);
8585+
float d = 1.f/std::max(_whitept[0] + _whitept[1]*15 + _whitept[2]*3, FLT_EPSILON);
85788586
un = 13*4*_whitept[0]*d;
85798587
vn = 13*9*_whitept[1]*d;
85808588

@@ -8641,9 +8649,9 @@ static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
86418649
coeffs[i+bidx*3] = _coeffs[i+6] * (lab ? _whitept[i] : 1);
86428650
}
86438651

8644-
float d = 1.f/(_whitept[0] + _whitept[1]*15 + _whitept[2]*3);
8645-
un = 4*_whitept[0]*d;
8646-
vn = 9*_whitept[1]*d;
8652+
float d = 1.f/std::max(_whitept[0] + _whitept[1]*15 + _whitept[2]*3, FLT_EPSILON);
8653+
un = 4*13*_whitept[0]*d;
8654+
vn = 9*13*_whitept[1]*d;
86478655

86488656
Mat(1, 9, CV_32FC1, coeffs).copyTo(ucoeffs);
86498657
}

modules/imgproc/src/opencl/cvtcolor.cl

Lines changed: 33 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1963,6 +1963,10 @@ __kernel void BGR2Luv(__global const uchar * srcptr, int src_step, int src_offse
19631963

19641964
float R = src[0], G = src[1], B = src[2];
19651965

1966+
R = clamp(R, 0.f, 1.f);
1967+
G = clamp(G, 0.f, 1.f);
1968+
B = clamp(B, 0.f, 1.f);
1969+
19661970
#ifdef SRGB
19671971
R = splineInterpolate(R*GammaTabScale, gammaTab, GAMMA_TAB_SIZE);
19681972
G = splineInterpolate(G*GammaTabScale, gammaTab, GAMMA_TAB_SIZE);
@@ -2067,15 +2071,21 @@ __kernel void Luv2BGR(__global const uchar * srcptr, int src_step, int src_offse
20672071
__global const float * src = (__global const float *)(srcptr + src_index);
20682072
__global float * dst = (__global float *)(dstptr + dst_index);
20692073

2070-
float L = src[0], u = src[1], v = src[2], d, X, Y, Z;
2071-
Y = (L + 16.f) * (1.f/116.f);
2072-
Y = Y*Y*Y;
2073-
d = (1.f/13.f)/L;
2074-
u = fma(u, d, _un);
2075-
v = fma(v, d, _vn);
2076-
float iv = 1.f/v;
2077-
X = 2.25f * u * Y * iv;
2078-
Z = (12 - fma(3.0f, u, 20.0f * v)) * Y * 0.25f * iv;
2074+
float L = src[0], u = src[1], v = src[2], X, Y, Z;
2075+
if(L >= 8)
2076+
{
2077+
Y = fma(L, 1.f/116.f, 16.f/116.f);
2078+
Y = Y*Y*Y;
2079+
}
2080+
else
2081+
{
2082+
Y = L * (1.0f/903.3f); // L*(3./29.)^3
2083+
}
2084+
float up = 3.f*fma(L, _un, u);
2085+
float vp = 0.25f/fma(L, _vn, v);
2086+
vp = clamp(vp, -0.25f, 0.25f);
2087+
X = 3.f*Y*up*vp;
2088+
Z = Y*fma(fma(12.f*13.f, L, -up), vp, -5.f);
20792089

20802090
float R = fma(X, coeffs[0], fma(Y, coeffs[1], Z * coeffs[2]));
20812091
float G = fma(X, coeffs[3], fma(Y, coeffs[4], Z * coeffs[5]));
@@ -2129,14 +2139,20 @@ __kernel void Luv2BGR(__global const uchar * src, int src_step, int src_offset,
21292139
float L = src[0]*(100.f/255.f);
21302140
float u = fma(convert_float(src[1]), 1.388235294117647f, -134.f);
21312141
float v = fma(convert_float(src[2]), 1.027450980392157f, - 140.f);
2132-
Y = (L + 16.f) * (1.f/116.f);
2133-
Y = Y*Y*Y;
2134-
d = (1.f/13.f)/L;
2135-
u = fma(u, d, _un);
2136-
v = fma(v, d, _vn);
2137-
float iv = 1.f/v;
2138-
X = 2.25f * u * Y * iv ;
2139-
Z = (12 - fma(3.0f, u, 20.0f * v)) * Y * 0.25f * iv;
2142+
if(L >= 8)
2143+
{
2144+
Y = fma(L, 1.f/116.f, 16.f/116.f);
2145+
Y = Y*Y*Y;
2146+
}
2147+
else
2148+
{
2149+
Y = L * (1.0f/903.3f); // L*(3./29.)^3
2150+
}
2151+
float up = 3.f*fma(L, _un, u);
2152+
float vp = 0.25f/fma(L, _vn, v);
2153+
vp = clamp(vp, -0.25f, 0.25f);
2154+
X = 3.f*Y*up*vp;
2155+
Z = Y*fma(fma(12.f*13.f, L, -up), vp, -5.f);
21402156

21412157
float R = fma(X, coeffs[0], fma(Y, coeffs[1], Z * coeffs[2]));
21422158
float G = fma(X, coeffs[3], fma(Y, coeffs[4], Z * coeffs[5]));

0 commit comments

Comments
 (0)