Skip to content

Commit fe7fd4c

Browse files
committed
Merge pull request opencv#9098 from savuor:fix/luv_div
2 parents 431e2e6 + aa621d6 commit fe7fd4c

File tree

2 files changed

+136
-51
lines changed

2 files changed

+136
-51
lines changed

modules/imgproc/src/color.cpp

Lines changed: 99 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -6495,7 +6495,7 @@ struct RGB2Luv_f
64956495
coeffs[i*3] + coeffs[i*3+1] + coeffs[i*3+2] < 1.5f );
64966496
}
64976497

6498-
float d = 1.f/(whitept[0] + whitept[1]*15 + whitept[2]*3);
6498+
float d = 1.f/std::max(whitept[0] + whitept[1]*15 + whitept[2]*3, FLT_EPSILON);
64996499
un = 4*whitept[0]*d*13;
65006500
vn = 9*whitept[1]*d*13;
65016501

@@ -6607,6 +6607,15 @@ struct RGB2Luv_f
66076607
for( ; i <= n - 12; i += 12, src += scn * 4 )
66086608
{
66096609
float32x4x3_t v_src = vld3q_f32(src);
6610+
6611+
v_src.val[0] = vmaxq_f32(v_src.val[0], vdupq_n_f32(0));
6612+
v_src.val[1] = vmaxq_f32(v_src.val[1], vdupq_n_f32(0));
6613+
v_src.val[2] = vmaxq_f32(v_src.val[2], vdupq_n_f32(0));
6614+
6615+
v_src.val[0] = vminq_f32(v_src.val[0], vdupq_n_f32(1));
6616+
v_src.val[1] = vminq_f32(v_src.val[1], vdupq_n_f32(1));
6617+
v_src.val[2] = vminq_f32(v_src.val[2], vdupq_n_f32(1));
6618+
66106619
if( gammaTab )
66116620
{
66126621
v_src.val[0] = vmulq_f32(v_src.val[0], vdupq_n_f32(gscale));
@@ -6627,6 +6636,15 @@ struct RGB2Luv_f
66276636
for( ; i <= n - 12; i += 12, src += scn * 4 )
66286637
{
66296638
float32x4x4_t v_src = vld4q_f32(src);
6639+
6640+
v_src.val[0] = vmaxq_f32(v_src.val[0], vdupq_n_f32(0));
6641+
v_src.val[1] = vmaxq_f32(v_src.val[1], vdupq_n_f32(0));
6642+
v_src.val[2] = vmaxq_f32(v_src.val[2], vdupq_n_f32(0));
6643+
6644+
v_src.val[0] = vminq_f32(v_src.val[0], vdupq_n_f32(1));
6645+
v_src.val[1] = vminq_f32(v_src.val[1], vdupq_n_f32(1));
6646+
v_src.val[2] = vminq_f32(v_src.val[2], vdupq_n_f32(1));
6647+
66306648
if( gammaTab )
66316649
{
66326650
v_src.val[0] = vmulq_f32(v_src.val[0], vdupq_n_f32(gscale));
@@ -6670,6 +6688,20 @@ struct RGB2Luv_f
66706688
_mm_deinterleave_ps(v_r0, v_r1, v_g0, v_g1, v_b0, v_b1, v_a0, v_a1);
66716689
}
66726690

6691+
v_r0 = _mm_max_ps(v_r0, _mm_setzero_ps());
6692+
v_r1 = _mm_max_ps(v_r1, _mm_setzero_ps());
6693+
v_g0 = _mm_max_ps(v_g0, _mm_setzero_ps());
6694+
v_g1 = _mm_max_ps(v_g1, _mm_setzero_ps());
6695+
v_b0 = _mm_max_ps(v_b0, _mm_setzero_ps());
6696+
v_b1 = _mm_max_ps(v_b1, _mm_setzero_ps());
6697+
6698+
v_r0 = _mm_min_ps(v_r0, _mm_set1_ps(1.f));
6699+
v_r1 = _mm_min_ps(v_r1, _mm_set1_ps(1.f));
6700+
v_g0 = _mm_min_ps(v_g0, _mm_set1_ps(1.f));
6701+
v_g1 = _mm_min_ps(v_g1, _mm_set1_ps(1.f));
6702+
v_b0 = _mm_min_ps(v_b0, _mm_set1_ps(1.f));
6703+
v_b1 = _mm_min_ps(v_b1, _mm_set1_ps(1.f));
6704+
66736705
if ( gammaTab )
66746706
{
66756707
__m128 v_gscale = _mm_set1_ps(gscale);
@@ -6704,6 +6736,9 @@ struct RGB2Luv_f
67046736
for( ; i < n; i += 3, src += scn )
67056737
{
67066738
float R = src[0], G = src[1], B = src[2];
6739+
R = std::min(std::max(R, 0.f), 1.f);
6740+
G = std::min(std::max(G, 0.f), 1.f);
6741+
B = std::min(std::max(B, 0.f), 1.f);
67076742
if( gammaTab )
67086743
{
67096744
R = splineInterpolate(R*gscale, gammaTab, GAMMA_TAB_SIZE);
@@ -6755,9 +6790,9 @@ struct Luv2RGB_f
67556790
coeffs[i+blueIdx*3] = _coeffs[i+6];
67566791
}
67576792

6758-
float d = 1.f/(whitept[0] + whitept[1]*15 + whitept[2]*3);
6759-
un = 4*whitept[0]*d;
6760-
vn = 9*whitept[1]*d;
6793+
float d = 1.f/std::max(whitept[0] + whitept[1]*15 + whitept[2]*3, FLT_EPSILON);
6794+
un = 4*13*whitept[0]*d;
6795+
vn = 9*13*whitept[1]*d;
67616796
#if CV_SSE2
67626797
haveSIMD = checkHardwareSupport(CV_CPU_SSE2);
67636798
#endif
@@ -6769,23 +6804,42 @@ struct Luv2RGB_f
67696804
void process(__m128& v_l0, __m128& v_l1, __m128& v_u0,
67706805
__m128& v_u1, __m128& v_v0, __m128& v_v1) const
67716806
{
6772-
__m128 v_y0 = _mm_mul_ps(_mm_add_ps(v_l0, _mm_set1_ps(16.0f)), _mm_set1_ps(1.f/116.f));
6773-
__m128 v_y1 = _mm_mul_ps(_mm_add_ps(v_l1, _mm_set1_ps(16.0f)), _mm_set1_ps(1.f/116.f));
6774-
v_y0 = _mm_mul_ps(_mm_mul_ps(v_y0, v_y0), v_y0);
6775-
v_y1 = _mm_mul_ps(_mm_mul_ps(v_y1, v_y1), v_y1);
6776-
__m128 v_d0 = _mm_div_ps(_mm_set1_ps(1.f/13.f), v_l0);
6777-
__m128 v_d1 = _mm_div_ps(_mm_set1_ps(1.f/13.f), v_l1);
6778-
v_u0 = _mm_mul_ps(_mm_add_ps(_mm_mul_ps(v_u0, v_d0), _mm_set1_ps(un)), _mm_set1_ps(3.f));
6779-
v_u1 = _mm_mul_ps(_mm_add_ps(_mm_mul_ps(v_u1, v_d1), _mm_set1_ps(un)), _mm_set1_ps(3.f));
6780-
v_v0 = _mm_add_ps(_mm_mul_ps(v_v0, v_d0), _mm_set1_ps(vn));
6781-
v_v1 = _mm_add_ps(_mm_mul_ps(v_v1, v_d1), _mm_set1_ps(vn));
6782-
__m128 v_iv0 = _mm_div_ps(_mm_set1_ps(0.25f), v_v0);
6783-
__m128 v_iv1 = _mm_div_ps(_mm_set1_ps(0.25f), v_v1);
6784-
__m128 v_x0 = _mm_mul_ps(_mm_mul_ps(_mm_set1_ps(3.f), v_u0), v_iv0);
6785-
__m128 v_x1 = _mm_mul_ps(_mm_mul_ps(_mm_set1_ps(3.f), v_u1), v_iv1);
6786-
__m128 v_z0 = _mm_mul_ps(_mm_sub_ps(_mm_sub_ps(_mm_set1_ps(12.f), v_u0), _mm_mul_ps(_mm_set1_ps(20.f), v_v0)), v_iv0);
6787-
__m128 v_z1 = _mm_mul_ps(_mm_sub_ps(_mm_sub_ps(_mm_set1_ps(12.f), v_u1), _mm_mul_ps(_mm_set1_ps(20.f), v_v1)), v_iv1);
6788-
6807+
// L*(3./29.)^3
6808+
__m128 v_y00 = _mm_mul_ps(v_l0, _mm_set1_ps(1.0f/903.3f));
6809+
__m128 v_y01 = _mm_mul_ps(v_l1, _mm_set1_ps(1.0f/903.3f));
6810+
// ((L + 16)/116)^3
6811+
__m128 v_y10 = _mm_mul_ps(_mm_add_ps(v_l0, _mm_set1_ps(16.0f)), _mm_set1_ps(1.f/116.f));
6812+
__m128 v_y11 = _mm_mul_ps(_mm_add_ps(v_l1, _mm_set1_ps(16.0f)), _mm_set1_ps(1.f/116.f));
6813+
v_y10 = _mm_mul_ps(_mm_mul_ps(v_y10, v_y10), v_y10);
6814+
v_y11 = _mm_mul_ps(_mm_mul_ps(v_y11, v_y11), v_y11);
6815+
// Y = (L <= 8) ? Y0 : Y1;
6816+
__m128 v_cmpl0 = _mm_cmplt_ps(v_l0, _mm_set1_ps(8.f));
6817+
__m128 v_cmpl1 = _mm_cmplt_ps(v_l1, _mm_set1_ps(8.f));
6818+
v_y00 = _mm_and_ps(v_cmpl0, v_y00);
6819+
v_y01 = _mm_and_ps(v_cmpl1, v_y01);
6820+
v_y10 = _mm_andnot_ps(v_cmpl0, v_y10);
6821+
v_y11 = _mm_andnot_ps(v_cmpl1, v_y11);
6822+
__m128 v_y0 = _mm_or_ps(v_y00, v_y10);
6823+
__m128 v_y1 = _mm_or_ps(v_y01, v_y11);
6824+
// up = 3*(u + L*_un);
6825+
__m128 v_up0 = _mm_mul_ps(_mm_set1_ps(3.f), _mm_add_ps(v_u0, _mm_mul_ps(v_l0, _mm_set1_ps(un))));
6826+
__m128 v_up1 = _mm_mul_ps(_mm_set1_ps(3.f), _mm_add_ps(v_u1, _mm_mul_ps(v_l1, _mm_set1_ps(un))));
6827+
// vp = 0.25/(v + L*_vn);
6828+
__m128 v_vp0 = _mm_div_ps(_mm_set1_ps(0.25f), _mm_add_ps(v_v0, _mm_mul_ps(v_l0, _mm_set1_ps(vn))));
6829+
__m128 v_vp1 = _mm_div_ps(_mm_set1_ps(0.25f), _mm_add_ps(v_v1, _mm_mul_ps(v_l1, _mm_set1_ps(vn))));
6830+
// vp = max(-0.25, min(0.25, vp));
6831+
v_vp0 = _mm_max_ps(v_vp0, _mm_set1_ps(-0.25f));
6832+
v_vp1 = _mm_max_ps(v_vp1, _mm_set1_ps(-0.25f));
6833+
v_vp0 = _mm_min_ps(v_vp0, _mm_set1_ps( 0.25f));
6834+
v_vp1 = _mm_min_ps(v_vp1, _mm_set1_ps( 0.25f));
6835+
//X = 3*up*vp; // (*Y) is done later
6836+
__m128 v_x0 = _mm_mul_ps(_mm_set1_ps(3.f), _mm_mul_ps(v_up0, v_vp0));
6837+
__m128 v_x1 = _mm_mul_ps(_mm_set1_ps(3.f), _mm_mul_ps(v_up1, v_vp1));
6838+
//Z = ((12*13*L - up)*vp - 5); // (*Y) is done later
6839+
__m128 v_z0 = _mm_sub_ps(_mm_mul_ps(_mm_sub_ps(_mm_mul_ps(_mm_set1_ps(12.f*13.f), v_l0), v_up0), v_vp0), _mm_set1_ps(5.f));
6840+
__m128 v_z1 = _mm_sub_ps(_mm_mul_ps(_mm_sub_ps(_mm_mul_ps(_mm_set1_ps(12.f*13.f), v_l1), v_up1), v_vp1), _mm_set1_ps(5.f));
6841+
6842+
// R = (X*C0 + C1 + Z*C2)*Y; // here (*Y) is done
67896843
v_l0 = _mm_mul_ps(v_x0, _mm_set1_ps(coeffs[0]));
67906844
v_l1 = _mm_mul_ps(v_x1, _mm_set1_ps(coeffs[0]));
67916845
v_u0 = _mm_mul_ps(v_x0, _mm_set1_ps(coeffs[3]));
@@ -6902,15 +6956,22 @@ struct Luv2RGB_f
69026956
#endif
69036957
for( ; i < n; i += 3, dst += dcn )
69046958
{
6905-
float L = src[i], u = src[i+1], v = src[i+2], d, X, Y, Z;
6906-
Y = (L + 16.f) * (1.f/116.f);
6907-
Y = Y*Y*Y;
6908-
d = (1.f/13.f)/L;
6909-
u = u*d + _un;
6910-
v = v*d + _vn;
6911-
float iv = 1.f/v;
6912-
X = 2.25f * u * Y * iv ;
6913-
Z = (12 - 3 * u - 20 * v) * Y * 0.25f * iv;
6959+
float L = src[i], u = src[i+1], v = src[i+2], X, Y, Z;
6960+
if(L >= 8)
6961+
{
6962+
Y = (L + 16.f) * (1.f/116.f);
6963+
Y = Y*Y*Y;
6964+
}
6965+
else
6966+
{
6967+
Y = L * (1.0f/903.3f); // L*(3./29.)^3
6968+
}
6969+
float up = 3.f*(u + L*_un);
6970+
float vp = 0.25f/(v + L*_vn);
6971+
if(vp > 0.25f) vp = 0.25f;
6972+
if(vp < -0.25f) vp = -0.25f;
6973+
X = Y*3.f*up*vp;
6974+
Z = Y*(((12.f*13.f)*L - up)*vp - 5.f);
69146975

69156976
float R = X*C0 + Y*C1 + Z*C2;
69166977
float G = X*C3 + Y*C4 + Z*C5;
@@ -6950,6 +7011,8 @@ struct RGB2Luv_b
69507011
const float* _whitept, bool _srgb )
69517012
: srccn(_srccn), cvt(3, blueIdx, _coeffs, _whitept, _srgb)
69527013
{
7014+
//0.72033 = 255/(220+134), 96.525 = 134*255/(220+134)
7015+
//0.9732 = 255/(140+122), 136.259 = 140*255/(140+122)
69537016
#if CV_NEON
69547017
v_scale_inv = vdupq_n_f32(1.f/255.f);
69557018
v_scale = vdupq_n_f32(2.55f);
@@ -7150,6 +7213,8 @@ struct Luv2RGB_b
71507213
const float* _whitept, bool _srgb )
71517214
: dstcn(_dstcn), cvt(3, blueIdx, _coeffs, _whitept, _srgb )
71527215
{
7216+
// 1.388235294117647 = (220+134)/255
7217+
// 1.027450980392157 = (140+122)/255
71537218
#if CV_NEON
71547219
v_scale_inv = vdupq_n_f32(100.f/255.f);
71557220
v_coeff1 = vdupq_n_f32(1.388235294117647f);
@@ -8521,7 +8586,7 @@ static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
85218586
coeffs[j] + coeffs[j + 1] + coeffs[j + 2] < 1.5f*(lab ? LabCbrtTabScale : 1) );
85228587
}
85238588

8524-
float d = 1.f/(_whitept[0] + _whitept[1]*15 + _whitept[2]*3);
8589+
float d = 1.f/std::max(_whitept[0] + _whitept[1]*15 + _whitept[2]*3, FLT_EPSILON);
85258590
un = 13*4*_whitept[0]*d;
85268591
vn = 13*9*_whitept[1]*d;
85278592

@@ -8588,9 +8653,9 @@ static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
85888653
coeffs[i+bidx*3] = _coeffs[i+6] * (lab ? _whitept[i] : 1);
85898654
}
85908655

8591-
float d = 1.f/(_whitept[0] + _whitept[1]*15 + _whitept[2]*3);
8592-
un = 4*_whitept[0]*d;
8593-
vn = 9*_whitept[1]*d;
8656+
float d = 1.f/std::max(_whitept[0] + _whitept[1]*15 + _whitept[2]*3, FLT_EPSILON);
8657+
un = 4*13*_whitept[0]*d;
8658+
vn = 9*13*_whitept[1]*d;
85948659

85958660
Mat(1, 9, CV_32FC1, coeffs).copyTo(ucoeffs);
85968661
}

modules/imgproc/src/opencl/cvtcolor.cl

Lines changed: 37 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1963,6 +1963,10 @@ __kernel void BGR2Luv(__global const uchar * srcptr, int src_step, int src_offse
19631963

19641964
float R = src[0], G = src[1], B = src[2];
19651965

1966+
R = clamp(R, 0.f, 1.f);
1967+
G = clamp(G, 0.f, 1.f);
1968+
B = clamp(B, 0.f, 1.f);
1969+
19661970
#ifdef SRGB
19671971
R = splineInterpolate(R*GammaTabScale, gammaTab, GAMMA_TAB_SIZE);
19681972
G = splineInterpolate(G*GammaTabScale, gammaTab, GAMMA_TAB_SIZE);
@@ -2031,7 +2035,9 @@ __kernel void BGR2Luv(__global const uchar * src, int src_step, int src_offset,
20312035
float v = L*fma(2.25f, Y*d, -_vn);
20322036

20332037
dst[0] = SAT_CAST(L * 2.55f);
2038+
//0.72033 = 255/(220+134), 96.525 = 134*255/(220+134)
20342039
dst[1] = SAT_CAST(fma(u, 0.72033898305084743f, 96.525423728813564f));
2040+
//0.9732 = 255/(140+122), 136.259 = 140*255/(140+122)
20352041
dst[2] = SAT_CAST(fma(v, 0.9732824427480916f, 136.259541984732824f));
20362042

20372043
++y;
@@ -2067,15 +2073,21 @@ __kernel void Luv2BGR(__global const uchar * srcptr, int src_step, int src_offse
20672073
__global const float * src = (__global const float *)(srcptr + src_index);
20682074
__global float * dst = (__global float *)(dstptr + dst_index);
20692075

2070-
float L = src[0], u = src[1], v = src[2], d, X, Y, Z;
2071-
Y = (L + 16.f) * (1.f/116.f);
2072-
Y = Y*Y*Y;
2073-
d = (1.f/13.f)/L;
2074-
u = fma(u, d, _un);
2075-
v = fma(v, d, _vn);
2076-
float iv = 1.f/v;
2077-
X = 2.25f * u * Y * iv;
2078-
Z = (12 - fma(3.0f, u, 20.0f * v)) * Y * 0.25f * iv;
2076+
float L = src[0], u = src[1], v = src[2], X, Y, Z;
2077+
if(L >= 8)
2078+
{
2079+
Y = fma(L, 1.f/116.f, 16.f/116.f);
2080+
Y = Y*Y*Y;
2081+
}
2082+
else
2083+
{
2084+
Y = L * (1.0f/903.3f); // L*(3./29.)^3
2085+
}
2086+
float up = 3.f*fma(L, _un, u);
2087+
float vp = 0.25f/fma(L, _vn, v);
2088+
vp = clamp(vp, -0.25f, 0.25f);
2089+
X = 3.f*Y*up*vp;
2090+
Z = Y*fma(fma(12.f*13.f, L, -up), vp, -5.f);
20792091

20802092
float R = fma(X, coeffs[0], fma(Y, coeffs[1], Z * coeffs[2]));
20812093
float G = fma(X, coeffs[3], fma(Y, coeffs[4], Z * coeffs[5]));
@@ -2127,16 +2139,24 @@ __kernel void Luv2BGR(__global const uchar * src, int src_step, int src_offset,
21272139
{
21282140
float d, X, Y, Z;
21292141
float L = src[0]*(100.f/255.f);
2142+
// 1.388235294117647 = (220+134)/255
21302143
float u = fma(convert_float(src[1]), 1.388235294117647f, -134.f);
2144+
// 1.027450980392157 = (140+122)/255
21312145
float v = fma(convert_float(src[2]), 1.027450980392157f, - 140.f);
2132-
Y = (L + 16.f) * (1.f/116.f);
2133-
Y = Y*Y*Y;
2134-
d = (1.f/13.f)/L;
2135-
u = fma(u, d, _un);
2136-
v = fma(v, d, _vn);
2137-
float iv = 1.f/v;
2138-
X = 2.25f * u * Y * iv ;
2139-
Z = (12 - fma(3.0f, u, 20.0f * v)) * Y * 0.25f * iv;
2146+
if(L >= 8)
2147+
{
2148+
Y = fma(L, 1.f/116.f, 16.f/116.f);
2149+
Y = Y*Y*Y;
2150+
}
2151+
else
2152+
{
2153+
Y = L * (1.0f/903.3f); // L*(3./29.)^3
2154+
}
2155+
float up = 3.f*fma(L, _un, u);
2156+
float vp = 0.25f/fma(L, _vn, v);
2157+
vp = clamp(vp, -0.25f, 0.25f);
2158+
X = 3.f*Y*up*vp;
2159+
Z = Y*fma(fma(12.f*13.f, L, -up), vp, -5.f);
21402160

21412161
float R = fma(X, coeffs[0], fma(Y, coeffs[1], Z * coeffs[2]));
21422162
float G = fma(X, coeffs[3], fma(Y, coeffs[4], Z * coeffs[5]));

0 commit comments

Comments
 (0)