@@ -6495,7 +6495,7 @@ struct RGB2Luv_f
6495
6495
coeffs[i*3 ] + coeffs[i*3 +1 ] + coeffs[i*3 +2 ] < 1 .5f );
6496
6496
}
6497
6497
6498
- float d = 1 .f /(whitept[0 ] + whitept[1 ]*15 + whitept[2 ]*3 );
6498
+ float d = 1 .f /std::max (whitept[0 ] + whitept[1 ]*15 + whitept[2 ]*3 , FLT_EPSILON );
6499
6499
un = 4 *whitept[0 ]*d*13 ;
6500
6500
vn = 9 *whitept[1 ]*d*13 ;
6501
6501
@@ -6607,6 +6607,15 @@ struct RGB2Luv_f
6607
6607
for ( ; i <= n - 12 ; i += 12 , src += scn * 4 )
6608
6608
{
6609
6609
float32x4x3_t v_src = vld3q_f32 (src);
6610
+
6611
+ v_src.val [0 ] = vmaxq_f32 (v_src.val [0 ], vdupq_n_f32 (0 ));
6612
+ v_src.val [1 ] = vmaxq_f32 (v_src.val [1 ], vdupq_n_f32 (0 ));
6613
+ v_src.val [2 ] = vmaxq_f32 (v_src.val [2 ], vdupq_n_f32 (0 ));
6614
+
6615
+ v_src.val [0 ] = vminq_f32 (v_src.val [0 ], vdupq_n_f32 (1 ));
6616
+ v_src.val [1 ] = vminq_f32 (v_src.val [1 ], vdupq_n_f32 (1 ));
6617
+ v_src.val [2 ] = vminq_f32 (v_src.val [2 ], vdupq_n_f32 (1 ));
6618
+
6610
6619
if ( gammaTab )
6611
6620
{
6612
6621
v_src.val [0 ] = vmulq_f32 (v_src.val [0 ], vdupq_n_f32 (gscale));
@@ -6627,6 +6636,15 @@ struct RGB2Luv_f
6627
6636
for ( ; i <= n - 12 ; i += 12 , src += scn * 4 )
6628
6637
{
6629
6638
float32x4x4_t v_src = vld4q_f32 (src);
6639
+
6640
+ v_src.val [0 ] = vmaxq_f32 (v_src.val [0 ], vdupq_n_f32 (0 ));
6641
+ v_src.val [1 ] = vmaxq_f32 (v_src.val [1 ], vdupq_n_f32 (0 ));
6642
+ v_src.val [2 ] = vmaxq_f32 (v_src.val [2 ], vdupq_n_f32 (0 ));
6643
+
6644
+ v_src.val [0 ] = vminq_f32 (v_src.val [0 ], vdupq_n_f32 (1 ));
6645
+ v_src.val [1 ] = vminq_f32 (v_src.val [1 ], vdupq_n_f32 (1 ));
6646
+ v_src.val [2 ] = vminq_f32 (v_src.val [2 ], vdupq_n_f32 (1 ));
6647
+
6630
6648
if ( gammaTab )
6631
6649
{
6632
6650
v_src.val [0 ] = vmulq_f32 (v_src.val [0 ], vdupq_n_f32 (gscale));
@@ -6670,6 +6688,20 @@ struct RGB2Luv_f
6670
6688
_mm_deinterleave_ps (v_r0, v_r1, v_g0, v_g1, v_b0, v_b1, v_a0, v_a1);
6671
6689
}
6672
6690
6691
+ v_r0 = _mm_max_ps (v_r0, _mm_setzero_ps ());
6692
+ v_r1 = _mm_max_ps (v_r1, _mm_setzero_ps ());
6693
+ v_g0 = _mm_max_ps (v_g0, _mm_setzero_ps ());
6694
+ v_g1 = _mm_max_ps (v_g1, _mm_setzero_ps ());
6695
+ v_b0 = _mm_max_ps (v_b0, _mm_setzero_ps ());
6696
+ v_b1 = _mm_max_ps (v_b1, _mm_setzero_ps ());
6697
+
6698
+ v_r0 = _mm_min_ps (v_r0, _mm_set1_ps (1 .f ));
6699
+ v_r1 = _mm_min_ps (v_r1, _mm_set1_ps (1 .f ));
6700
+ v_g0 = _mm_min_ps (v_g0, _mm_set1_ps (1 .f ));
6701
+ v_g1 = _mm_min_ps (v_g1, _mm_set1_ps (1 .f ));
6702
+ v_b0 = _mm_min_ps (v_b0, _mm_set1_ps (1 .f ));
6703
+ v_b1 = _mm_min_ps (v_b1, _mm_set1_ps (1 .f ));
6704
+
6673
6705
if ( gammaTab )
6674
6706
{
6675
6707
__m128 v_gscale = _mm_set1_ps (gscale);
@@ -6704,6 +6736,9 @@ struct RGB2Luv_f
6704
6736
for ( ; i < n; i += 3 , src += scn )
6705
6737
{
6706
6738
float R = src[0 ], G = src[1 ], B = src[2 ];
6739
+ R = std::min (std::max (R, 0 .f ), 1 .f );
6740
+ G = std::min (std::max (G, 0 .f ), 1 .f );
6741
+ B = std::min (std::max (B, 0 .f ), 1 .f );
6707
6742
if ( gammaTab )
6708
6743
{
6709
6744
R = splineInterpolate (R*gscale, gammaTab, GAMMA_TAB_SIZE);
@@ -6755,9 +6790,9 @@ struct Luv2RGB_f
6755
6790
coeffs[i+blueIdx*3 ] = _coeffs[i+6 ];
6756
6791
}
6757
6792
6758
- float d = 1 .f /(whitept[0 ] + whitept[1 ]*15 + whitept[2 ]*3 );
6759
- un = 4 *whitept[0 ]*d;
6760
- vn = 9 *whitept[1 ]*d;
6793
+ float d = 1 .f /std::max (whitept[0 ] + whitept[1 ]*15 + whitept[2 ]*3 , FLT_EPSILON );
6794
+ un = 4 *13 * whitept[0 ]*d;
6795
+ vn = 9 *13 * whitept[1 ]*d;
6761
6796
#if CV_SSE2
6762
6797
haveSIMD = checkHardwareSupport (CV_CPU_SSE2);
6763
6798
#endif
@@ -6769,23 +6804,42 @@ struct Luv2RGB_f
6769
6804
void process (__m128& v_l0, __m128& v_l1, __m128& v_u0,
6770
6805
__m128& v_u1, __m128& v_v0, __m128& v_v1) const
6771
6806
{
6772
- __m128 v_y0 = _mm_mul_ps (_mm_add_ps (v_l0, _mm_set1_ps (16 .0f )), _mm_set1_ps (1 .f /116 .f ));
6773
- __m128 v_y1 = _mm_mul_ps (_mm_add_ps (v_l1, _mm_set1_ps (16 .0f )), _mm_set1_ps (1 .f /116 .f ));
6774
- v_y0 = _mm_mul_ps (_mm_mul_ps (v_y0, v_y0), v_y0);
6775
- v_y1 = _mm_mul_ps (_mm_mul_ps (v_y1, v_y1), v_y1);
6776
- __m128 v_d0 = _mm_div_ps (_mm_set1_ps (1 .f /13 .f ), v_l0);
6777
- __m128 v_d1 = _mm_div_ps (_mm_set1_ps (1 .f /13 .f ), v_l1);
6778
- v_u0 = _mm_mul_ps (_mm_add_ps (_mm_mul_ps (v_u0, v_d0), _mm_set1_ps (un)), _mm_set1_ps (3 .f ));
6779
- v_u1 = _mm_mul_ps (_mm_add_ps (_mm_mul_ps (v_u1, v_d1), _mm_set1_ps (un)), _mm_set1_ps (3 .f ));
6780
- v_v0 = _mm_add_ps (_mm_mul_ps (v_v0, v_d0), _mm_set1_ps (vn));
6781
- v_v1 = _mm_add_ps (_mm_mul_ps (v_v1, v_d1), _mm_set1_ps (vn));
6782
- __m128 v_iv0 = _mm_div_ps (_mm_set1_ps (0 .25f ), v_v0);
6783
- __m128 v_iv1 = _mm_div_ps (_mm_set1_ps (0 .25f ), v_v1);
6784
- __m128 v_x0 = _mm_mul_ps (_mm_mul_ps (_mm_set1_ps (3 .f ), v_u0), v_iv0);
6785
- __m128 v_x1 = _mm_mul_ps (_mm_mul_ps (_mm_set1_ps (3 .f ), v_u1), v_iv1);
6786
- __m128 v_z0 = _mm_mul_ps (_mm_sub_ps (_mm_sub_ps (_mm_set1_ps (12 .f ), v_u0), _mm_mul_ps (_mm_set1_ps (20 .f ), v_v0)), v_iv0);
6787
- __m128 v_z1 = _mm_mul_ps (_mm_sub_ps (_mm_sub_ps (_mm_set1_ps (12 .f ), v_u1), _mm_mul_ps (_mm_set1_ps (20 .f ), v_v1)), v_iv1);
6788
-
6807
+ // L*(3./29.)^3
6808
+ __m128 v_y00 = _mm_mul_ps (v_l0, _mm_set1_ps (1 .0f /903 .3f ));
6809
+ __m128 v_y01 = _mm_mul_ps (v_l1, _mm_set1_ps (1 .0f /903 .3f ));
6810
+ // ((L + 16)/116)^3
6811
+ __m128 v_y10 = _mm_mul_ps (_mm_add_ps (v_l0, _mm_set1_ps (16 .0f )), _mm_set1_ps (1 .f /116 .f ));
6812
+ __m128 v_y11 = _mm_mul_ps (_mm_add_ps (v_l1, _mm_set1_ps (16 .0f )), _mm_set1_ps (1 .f /116 .f ));
6813
+ v_y10 = _mm_mul_ps (_mm_mul_ps (v_y10, v_y10), v_y10);
6814
+ v_y11 = _mm_mul_ps (_mm_mul_ps (v_y11, v_y11), v_y11);
6815
+ // Y = (L <= 8) ? Y0 : Y1;
6816
+ __m128 v_cmpl0 = _mm_cmplt_ps (v_l0, _mm_set1_ps (8 .f ));
6817
+ __m128 v_cmpl1 = _mm_cmplt_ps (v_l1, _mm_set1_ps (8 .f ));
6818
+ v_y00 = _mm_and_ps (v_cmpl0, v_y00);
6819
+ v_y01 = _mm_and_ps (v_cmpl1, v_y01);
6820
+ v_y10 = _mm_andnot_ps (v_cmpl0, v_y10);
6821
+ v_y11 = _mm_andnot_ps (v_cmpl1, v_y11);
6822
+ __m128 v_y0 = _mm_or_ps (v_y00, v_y10);
6823
+ __m128 v_y1 = _mm_or_ps (v_y01, v_y11);
6824
+ // up = 3*(u + L*_un);
6825
+ __m128 v_up0 = _mm_mul_ps (_mm_set1_ps (3 .f ), _mm_add_ps (v_u0, _mm_mul_ps (v_l0, _mm_set1_ps (un))));
6826
+ __m128 v_up1 = _mm_mul_ps (_mm_set1_ps (3 .f ), _mm_add_ps (v_u1, _mm_mul_ps (v_l1, _mm_set1_ps (un))));
6827
+ // vp = 0.25/(v + L*_vn);
6828
+ __m128 v_vp0 = _mm_div_ps (_mm_set1_ps (0 .25f ), _mm_add_ps (v_v0, _mm_mul_ps (v_l0, _mm_set1_ps (vn))));
6829
+ __m128 v_vp1 = _mm_div_ps (_mm_set1_ps (0 .25f ), _mm_add_ps (v_v1, _mm_mul_ps (v_l1, _mm_set1_ps (vn))));
6830
+ // vp = max(-0.25, min(0.25, vp));
6831
+ v_vp0 = _mm_max_ps (v_vp0, _mm_set1_ps (-0 .25f ));
6832
+ v_vp1 = _mm_max_ps (v_vp1, _mm_set1_ps (-0 .25f ));
6833
+ v_vp0 = _mm_min_ps (v_vp0, _mm_set1_ps ( 0 .25f ));
6834
+ v_vp1 = _mm_min_ps (v_vp1, _mm_set1_ps ( 0 .25f ));
6835
+ // X = 3*up*vp; // (*Y) is done later
6836
+ __m128 v_x0 = _mm_mul_ps (_mm_set1_ps (3 .f ), _mm_mul_ps (v_up0, v_vp0));
6837
+ __m128 v_x1 = _mm_mul_ps (_mm_set1_ps (3 .f ), _mm_mul_ps (v_up1, v_vp1));
6838
+ // Z = ((12*13*L - up)*vp - 5); // (*Y) is done later
6839
+ __m128 v_z0 = _mm_sub_ps (_mm_mul_ps (_mm_sub_ps (_mm_mul_ps (_mm_set1_ps (12 .f *13 .f ), v_l0), v_up0), v_vp0), _mm_set1_ps (5 .f ));
6840
+ __m128 v_z1 = _mm_sub_ps (_mm_mul_ps (_mm_sub_ps (_mm_mul_ps (_mm_set1_ps (12 .f *13 .f ), v_l1), v_up1), v_vp1), _mm_set1_ps (5 .f ));
6841
+
6842
+ // R = (X*C0 + C1 + Z*C2)*Y; // here (*Y) is done
6789
6843
v_l0 = _mm_mul_ps (v_x0, _mm_set1_ps (coeffs[0 ]));
6790
6844
v_l1 = _mm_mul_ps (v_x1, _mm_set1_ps (coeffs[0 ]));
6791
6845
v_u0 = _mm_mul_ps (v_x0, _mm_set1_ps (coeffs[3 ]));
@@ -6902,15 +6956,22 @@ struct Luv2RGB_f
6902
6956
#endif
6903
6957
for ( ; i < n; i += 3 , dst += dcn )
6904
6958
{
6905
- float L = src[i], u = src[i+1 ], v = src[i+2 ], d, X, Y, Z;
6906
- Y = (L + 16 .f ) * (1 .f /116 .f );
6907
- Y = Y*Y*Y;
6908
- d = (1 .f /13 .f )/L;
6909
- u = u*d + _un;
6910
- v = v*d + _vn;
6911
- float iv = 1 .f /v;
6912
- X = 2 .25f * u * Y * iv ;
6913
- Z = (12 - 3 * u - 20 * v) * Y * 0 .25f * iv;
6959
+ float L = src[i], u = src[i+1 ], v = src[i+2 ], X, Y, Z;
6960
+ if (L >= 8 )
6961
+ {
6962
+ Y = (L + 16 .f ) * (1 .f /116 .f );
6963
+ Y = Y*Y*Y;
6964
+ }
6965
+ else
6966
+ {
6967
+ Y = L * (1 .0f /903 .3f ); // L*(3./29.)^3
6968
+ }
6969
+ float up = 3 .f *(u + L*_un);
6970
+ float vp = 0 .25f /(v + L*_vn);
6971
+ if (vp > 0 .25f ) vp = 0 .25f ;
6972
+ if (vp < -0 .25f ) vp = -0 .25f ;
6973
+ X = Y*3 .f *up*vp;
6974
+ Z = Y*(((12 .f *13 .f )*L - up)*vp - 5 .f );
6914
6975
6915
6976
float R = X*C0 + Y*C1 + Z*C2;
6916
6977
float G = X*C3 + Y*C4 + Z*C5;
@@ -6950,6 +7011,8 @@ struct RGB2Luv_b
6950
7011
const float * _whitept, bool _srgb )
6951
7012
: srccn(_srccn), cvt(3 , blueIdx, _coeffs, _whitept, _srgb)
6952
7013
{
7014
+ // 0.72033 = 255/(220+134), 96.525 = 134*255/(220+134)
7015
+ // 0.9732 = 255/(140+122), 136.259 = 140*255/(140+122)
6953
7016
#if CV_NEON
6954
7017
v_scale_inv = vdupq_n_f32 (1 .f /255 .f );
6955
7018
v_scale = vdupq_n_f32 (2 .55f );
@@ -7150,6 +7213,8 @@ struct Luv2RGB_b
7150
7213
const float * _whitept, bool _srgb )
7151
7214
: dstcn(_dstcn), cvt(3 , blueIdx, _coeffs, _whitept, _srgb )
7152
7215
{
7216
+ // 1.388235294117647 = (220+134)/255
7217
+ // 1.027450980392157 = (140+122)/255
7153
7218
#if CV_NEON
7154
7219
v_scale_inv = vdupq_n_f32 (100 .f /255 .f );
7155
7220
v_coeff1 = vdupq_n_f32 (1 .388235294117647f );
@@ -8521,7 +8586,7 @@ static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
8521
8586
coeffs[j] + coeffs[j + 1 ] + coeffs[j + 2 ] < 1 .5f *(lab ? LabCbrtTabScale : 1 ) );
8522
8587
}
8523
8588
8524
- float d = 1 .f /(_whitept[0 ] + _whitept[1 ]*15 + _whitept[2 ]*3 );
8589
+ float d = 1 .f /std::max (_whitept[0 ] + _whitept[1 ]*15 + _whitept[2 ]*3 , FLT_EPSILON );
8525
8590
un = 13 *4 *_whitept[0 ]*d;
8526
8591
vn = 13 *9 *_whitept[1 ]*d;
8527
8592
@@ -8588,9 +8653,9 @@ static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
8588
8653
coeffs[i+bidx*3 ] = _coeffs[i+6 ] * (lab ? _whitept[i] : 1 );
8589
8654
}
8590
8655
8591
- float d = 1 .f /(_whitept[0 ] + _whitept[1 ]*15 + _whitept[2 ]*3 );
8592
- un = 4 *_whitept[0 ]*d;
8593
- vn = 9 *_whitept[1 ]*d;
8656
+ float d = 1 .f /std::max (_whitept[0 ] + _whitept[1 ]*15 + _whitept[2 ]*3 , FLT_EPSILON );
8657
+ un = 4 *13 * _whitept[0 ]*d;
8658
+ vn = 9 *13 * _whitept[1 ]*d;
8594
8659
8595
8660
Mat (1 , 9 , CV_32FC1, coeffs).copyTo (ucoeffs);
8596
8661
}
0 commit comments