Skip to content

Commit 15a2c77

Browse files
committed
Merge pull request opencv#8743 from tomoaki0705:featureConvertFp16UMat
2 parents 1ce61f3 + d81cdb8 commit 15a2c77

File tree

3 files changed

+162
-3
lines changed

3 files changed

+162
-3
lines changed

modules/core/src/convert.cpp

Lines changed: 34 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5371,6 +5371,34 @@ static bool ocl_convertScaleAbs( InputArray _src, OutputArray _dst, double alpha
53715371
return k.run(2, globalsize, NULL, false);
53725372
}
53735373

5374+
static bool ocl_convertFp16( InputArray _src, OutputArray _dst, int ddepth )
5375+
{
5376+
int type = _src.type(), cn = CV_MAT_CN(type);
5377+
5378+
_dst.createSameSize( _src, CV_MAKETYPE(ddepth, cn) );
5379+
int kercn = 1;
5380+
int rowsPerWI = 1;
5381+
String build_opt = format("-D HALF_SUPPORT -D dstT=%s -D srcT=%s -D rowsPerWI=%d%s",
5382+
ddepth == CV_16S ? "half" : "float",
5383+
ddepth == CV_16S ? "float" : "half",
5384+
rowsPerWI,
5385+
ddepth == CV_16S ? " -D FLOAT_TO_HALF " : "");
5386+
ocl::Kernel k("convertFp16", ocl::core::halfconvert_oclsrc, build_opt);
5387+
if (k.empty())
5388+
return false;
5389+
5390+
UMat src = _src.getUMat();
5391+
UMat dst = _dst.getUMat();
5392+
5393+
ocl::KernelArg srcarg = ocl::KernelArg::ReadOnlyNoSize(src),
5394+
dstarg = ocl::KernelArg::WriteOnly(dst, cn, kercn);
5395+
5396+
k.args(srcarg, dstarg);
5397+
5398+
size_t globalsize[2] = { (size_t)src.cols * cn / kercn, ((size_t)src.rows + rowsPerWI - 1) / rowsPerWI };
5399+
return k.run(2, globalsize, NULL, false);
5400+
}
5401+
53745402
#endif
53755403

53765404
}
@@ -5411,10 +5439,8 @@ void cv::convertFp16( InputArray _src, OutputArray _dst)
54115439
{
54125440
CV_INSTRUMENT_REGION()
54135441

5414-
Mat src = _src.getMat();
54155442
int ddepth = 0;
5416-
5417-
switch( src.depth() )
5443+
switch( _src.depth() )
54185444
{
54195445
case CV_32F:
54205446
ddepth = CV_16S;
@@ -5427,6 +5453,11 @@ void cv::convertFp16( InputArray _src, OutputArray _dst)
54275453
return;
54285454
}
54295455

5456+
CV_OCL_RUN(_src.dims() <= 2 && _dst.isUMat(),
5457+
ocl_convertFp16(_src, _dst, ddepth))
5458+
5459+
Mat src = _src.getMat();
5460+
54305461
int type = CV_MAKETYPE(ddepth, src.channels());
54315462
_dst.create( src.dims, src.size, type );
54325463
Mat dst = _dst.getMat();
Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
/*M///////////////////////////////////////////////////////////////////////////////////////
2+
//
3+
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4+
//
5+
// By downloading, copying, installing or using the software you agree to this license.
6+
// If you do not agree to this license, do not download, install,
7+
// copy or use the software.
8+
//
9+
//
10+
// License Agreement
11+
// For Open Source Computer Vision Library
12+
//
13+
// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
14+
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
15+
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
16+
// Third party copyrights are property of their respective owners.
17+
//
18+
// Redistribution and use in source and binary forms, with or without modification,
19+
// are permitted provided that the following conditions are met:
20+
//
21+
// * Redistribution's of source code must retain the above copyright notice,
22+
// this list of conditions and the following disclaimer.
23+
//
24+
// * Redistribution's in binary form must reproduce the above copyright notice,
25+
// this list of conditions and the following disclaimer in the documentation
26+
// and/or other materials provided with the distribution.
27+
//
28+
// * The name of the copyright holders may not be used to endorse or promote products
29+
// derived from this software without specific prior written permission.
30+
//
31+
// This software is provided by the copyright holders and contributors as is and
32+
// any express or implied warranties, including, but not limited to, the implied
33+
// warranties of merchantability and fitness for a particular purpose are disclaimed.
34+
// In no event shall the copyright holders or contributors be liable for any direct,
35+
// indirect, incidental, special, exemplary, or consequential damages
36+
// (including, but not limited to, procurement of substitute goods or services;
37+
// loss of use, data, or profits; or business interruption) however caused
38+
// and on any theory of liability, whether in contract, strict liability,
39+
// or tort (including negligence or otherwise) arising in any way out of
40+
// the use of this software, even if advised of the possibility of such damage.
41+
//
42+
//M*/
43+
44+
#ifdef HALF_SUPPORT
45+
#ifdef cl_khr_fp16
46+
#pragma OPENCL EXTENSION cl_khr_fp16:enable
47+
#endif
48+
#endif
49+
50+
__kernel void convertFp16(__global const uchar * srcptr, int src_step, int src_offset,
51+
__global uchar * dstptr, int dst_step, int dst_offset, int dst_rows, int dst_cols)
52+
{
53+
int x = get_global_id(0);
54+
int y0 = get_global_id(1) * rowsPerWI;
55+
56+
if (x < dst_cols)
57+
{
58+
int src_index = mad24(y0, src_step, mad24(x, (int)sizeof(srcT), src_offset));
59+
int dst_index = mad24(y0, dst_step, mad24(x, (int)sizeof(dstT), dst_offset));
60+
61+
for (int y = y0, y1 = min(dst_rows, y0 + rowsPerWI); y < y1; ++y, src_index += src_step, dst_index += dst_step)
62+
{
63+
__global const srcT * src = (__global const srcT *)(srcptr + src_index);
64+
__global dstT * dst = (__global dstT *)(dstptr + dst_index);
65+
66+
#ifdef FLOAT_TO_HALF
67+
vstore_half(src[0], 0, dst);
68+
#else
69+
dst[0] = vload_half(0, src);
70+
#endif
71+
}
72+
}
73+
}

modules/core/test/ocl/test_arithm.cpp

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1614,6 +1614,60 @@ OCL_TEST_P(ConvertScaleAbs, Mat)
16141614
}
16151615
}
16161616

1617+
//////////////////////////////// ConvertFp16 ////////////////////////////////////////////////
1618+
1619+
PARAM_TEST_CASE(ConvertFp16, Channels, bool)
1620+
{
1621+
int cn;
1622+
bool fromHalf;
1623+
cv::Scalar val;
1624+
1625+
TEST_DECLARE_INPUT_PARAMETER(src);
1626+
TEST_DECLARE_OUTPUT_PARAMETER(dst);
1627+
1628+
virtual void SetUp()
1629+
{
1630+
cn = GET_PARAM(0);
1631+
fromHalf = GET_PARAM(1);
1632+
}
1633+
1634+
void generateTestData()
1635+
{
1636+
const int stype = CV_MAKE_TYPE(fromHalf ? CV_32F : CV_16S, cn);
1637+
const int dtype = CV_MAKE_TYPE(fromHalf ? CV_16S : CV_32F, cn);
1638+
1639+
Size roiSize = randomSize(1, MAX_VALUE);
1640+
Border srcBorder = randomBorder(0, 0);
1641+
randomSubMat(src, src_roi, roiSize, srcBorder, stype, -11, 11); // FIXIT: Test with minV, maxV
1642+
1643+
Border dstBorder = randomBorder(0, 0);
1644+
randomSubMat(dst, dst_roi, roiSize, dstBorder, dtype, 5, 16);
1645+
1646+
UMAT_UPLOAD_INPUT_PARAMETER(src);
1647+
UMAT_UPLOAD_OUTPUT_PARAMETER(dst);
1648+
}
1649+
1650+
void Near(double threshold = 0.)
1651+
{
1652+
OCL_EXPECT_MATS_NEAR(dst, threshold);
1653+
}
1654+
1655+
};
1656+
1657+
1658+
OCL_TEST_P(ConvertFp16, Mat)
1659+
{
1660+
for (int j = 0; j < test_loop_times; j++)
1661+
{
1662+
generateTestData();
1663+
1664+
OCL_OFF(cv::convertFp16(src_roi, dst_roi));
1665+
OCL_ON(cv::convertFp16(usrc_roi, udst_roi));
1666+
1667+
Near(1);
1668+
}
1669+
}
1670+
16171671
//////////////////////////////// ScaleAdd ////////////////////////////////////////////////
16181672

16191673
typedef ArithmTestBase ScaleAdd;
@@ -1844,6 +1898,7 @@ OCL_INSTANTIATE_TEST_CASE_P(Arithm, Sqrt, Combine(::testing::Values(CV_32F, CV_6
18441898
OCL_INSTANTIATE_TEST_CASE_P(Arithm, Normalize, Combine(OCL_ALL_DEPTHS, Values(Channels(1)), Bool()));
18451899
OCL_INSTANTIATE_TEST_CASE_P(Arithm, InRange, Combine(OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool(), Bool()));
18461900
OCL_INSTANTIATE_TEST_CASE_P(Arithm, ConvertScaleAbs, Combine(OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool()));
1901+
OCL_INSTANTIATE_TEST_CASE_P(Arithm, ConvertFp16, Combine(OCL_ALL_CHANNELS, Bool()));
18471902
OCL_INSTANTIATE_TEST_CASE_P(Arithm, ScaleAdd, Combine(OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool()));
18481903
OCL_INSTANTIATE_TEST_CASE_P(Arithm, PatchNaNs, Combine(OCL_ALL_CHANNELS, Bool()));
18491904
OCL_INSTANTIATE_TEST_CASE_P(Arithm, Psnr, Combine(::testing::Values((MatDepth)CV_8U), OCL_ALL_CHANNELS, Bool()));

0 commit comments

Comments
 (0)