Skip to content

Commit f071a48

Browse files
committed
Merge pull request opencv#10143 from pengli:ocl4dnn
2 parents a1479cc + 636d636 commit f071a48

File tree

9 files changed

+72
-36
lines changed

9 files changed

+72
-36
lines changed

modules/dnn/include/opencv2/dnn/dnn.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -410,13 +410,13 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
410410
* @param outputName name for layer which output is needed to get
411411
* @details If @p outputName is empty, runs forward pass for the whole network.
412412
*/
413-
CV_WRAP void forward(std::vector<Mat>& outputBlobs, const String& outputName = String());
413+
CV_WRAP void forward(OutputArrayOfArrays outputBlobs, const String& outputName = String());
414414

415415
/** @brief Runs forward pass to compute outputs of layers listed in @p outBlobNames.
416416
* @param outputBlobs contains blobs for first outputs of specified layers.
417417
* @param outBlobNames names for layers which outputs are needed to get
418418
*/
419-
CV_WRAP void forward(std::vector<Mat>& outputBlobs,
419+
CV_WRAP void forward(OutputArrayOfArrays outputBlobs,
420420
const std::vector<String>& outBlobNames);
421421

422422
/** @brief Runs forward pass to compute outputs of layers listed in @p outBlobNames.

modules/dnn/src/dnn.cpp

Lines changed: 63 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1638,7 +1638,7 @@ struct Net::Impl
16381638
CV_Error(Error::StsOutOfRange, "Layer \"" + ld.name + "\" produce only " + toString(ld.outputBlobs.size()) +
16391639
" outputs, the #" + toString(pin.oid) + " was requsted");
16401640
}
1641-
if (preferableBackend != DNN_TARGET_CPU)
1641+
if (preferableBackend != DNN_BACKEND_DEFAULT)
16421642
{
16431643
// Transfer data to CPU if it's require.
16441644
ld.outputBlobsWrappers[pin.oid]->copyToHost();
@@ -1654,10 +1654,35 @@ struct Net::Impl
16541654
return ld.outputBlobs[pin.oid];
16551655
}
16561656

1657+
void getBlob(UMat& umat, const LayerPin& pin)
1658+
{
1659+
CV_TRACE_FUNCTION();
1660+
1661+
if (!pin.valid())
1662+
CV_Error(Error::StsObjectNotFound, "Requested blob not found");
1663+
1664+
LayerData &ld = layers[pin.lid];
1665+
if ((size_t)pin.oid >= ld.outputBlobs.size())
1666+
{
1667+
CV_Error(Error::StsOutOfRange, "Layer \"" + ld.name + "\" produce only " + toString(ld.outputBlobs.size()) +
1668+
" outputs, the #" + toString(pin.oid) + " was requsted");
1669+
}
1670+
1671+
if (ld.umat_outputBlobs.size() > 0 && !ld.umat_outputBlobs[pin.oid].empty())
1672+
umat = ld.umat_outputBlobs[pin.oid];
1673+
else
1674+
umat = UMat();
1675+
}
1676+
16571677
Mat getBlob(String outputName)
16581678
{
16591679
return getBlob(getPinByAlias(outputName));
16601680
}
1681+
1682+
void getBlob(UMat& umat, String outputName)
1683+
{
1684+
getBlob(umat, getPinByAlias(outputName));
1685+
}
16611686
};
16621687

16631688
Net::Net() : impl(new Net::Impl)
@@ -1735,7 +1760,7 @@ Mat Net::forward(const String& outputName)
17351760
return impl->getBlob(layerName);
17361761
}
17371762

1738-
void Net::forward(std::vector<Mat>& outputBlobs, const String& outputName)
1763+
void Net::forward(OutputArrayOfArrays outputBlobs, const String& outputName)
17391764
{
17401765
CV_TRACE_FUNCTION();
17411766

@@ -1751,24 +1776,48 @@ void Net::forward(std::vector<Mat>& outputBlobs, const String& outputName)
17511776
LayerPin pin = impl->getPinByAlias(layerName);
17521777
LayerData &ld = impl->layers[pin.lid];
17531778

1754-
if (ld.umat_outputBlobs.size() > 0)
1779+
if (outputBlobs.isUMat())
17551780
{
1756-
for (int i = 0; i < ld.umat_outputBlobs.size(); i++)
1757-
ld.umat_outputBlobs[i].copyTo(ld.outputBlobs[i]);
1781+
if (ld.umat_outputBlobs.size() > 0)
1782+
{
1783+
UMat umat;
1784+
impl->getBlob(umat, layerName);
1785+
outputBlobs.assign(umat);
1786+
}
1787+
}
1788+
else if (outputBlobs.isMat())
1789+
{
1790+
outputBlobs.assign(impl->getBlob(layerName));
1791+
}
1792+
else if (outputBlobs.isMatVector())
1793+
{
1794+
if (ld.umat_outputBlobs.size() > 0)
1795+
{
1796+
for (int i = 0; i < ld.umat_outputBlobs.size(); i++)
1797+
ld.umat_outputBlobs[i].copyTo(ld.outputBlobs[i]);
1798+
}
1799+
std::vector<Mat> & outputvec = *(std::vector<Mat> *)outputBlobs.getObj();
1800+
outputvec = ld.outputBlobs;
1801+
}
1802+
else if (outputBlobs.isUMatVector())
1803+
{
1804+
if (ld.umat_outputBlobs.size() > 0)
1805+
{
1806+
std::vector<UMat> & outputvec = *(std::vector<UMat> *)outputBlobs.getObj();
1807+
outputvec = ld.umat_outputBlobs;
1808+
}
17581809
}
1759-
1760-
outputBlobs = ld.outputBlobs;
17611810
}
17621811

1763-
void Net::forward(std::vector<Mat>& outputBlobs,
1812+
void Net::forward(OutputArrayOfArrays outputBlobs,
17641813
const std::vector<String>& outBlobNames)
17651814
{
17661815
CV_TRACE_FUNCTION();
17671816

17681817
std::vector<LayerPin> pins;
17691818
for (int i = 0; i < outBlobNames.size(); i++)
17701819
{
1771-
pins.push_back(impl->getPinByAlias(outBlobNames[i]));
1820+
pins.push_back(impl->getPinByAlias(outBlobNames[i]));
17721821
}
17731822

17741823
impl->setUpNet(pins);
@@ -1777,11 +1826,14 @@ void Net::forward(std::vector<Mat>& outputBlobs,
17771826

17781827
impl->forwardToLayer(impl->getLayerData(out.lid));
17791828

1780-
outputBlobs.clear();
1829+
std::vector<Mat> matvec;
17811830
for (int i = 0; i < pins.size(); i++)
17821831
{
1783-
outputBlobs.push_back(impl->getBlob(pins[i]));
1832+
matvec.push_back(impl->getBlob(pins[i]));
17841833
}
1834+
1835+
std::vector<Mat> & outputvec = *(std::vector<Mat> *)outputBlobs.getObj();
1836+
outputvec = matvec;
17851837
}
17861838

17871839
void Net::forward(std::vector<std::vector<Mat> >& outputBlobs,

modules/dnn/src/layers/fully_connected_layer.cpp

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -286,8 +286,13 @@ class FullyConnectedLayerImpl : public InnerProductLayer
286286
UMat biasOnesMat = UMat::ones(outerSize, 1, umat_blobs[0].type());
287287
for (size_t i = 0; i < inputs.size(); i++)
288288
{
289-
UMat& srcMat = inputs[i];
290-
UMat& dstMat = outputs[i];
289+
MatShape inshape, outshape;
290+
inshape = shape(outerSize, innerSize);
291+
outshape = shape(outerSize, numOutput);
292+
293+
UMat srcMat, dstMat;
294+
srcMat = inputs[i].reshape(1, inshape.size(), &inshape[0]);
295+
dstMat = outputs[i].reshape(1, outshape.size(), &outshape[0]);
291296
dstMat.setTo(0.0f);
292297

293298
if (!innerProductOp->Forward(srcMat, umat_blobs[0], (bias) ? umat_blobs[1] : UMat(), dstMat))

modules/dnn/src/ocl4dnn/src/math_functions.cpp

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -65,8 +65,6 @@ ocl::Image2D ocl4dnnGEMMCopyBufferToImage(UMat buffer, int offset,
6565
int padded_width, int height,
6666
int width, int ld)
6767
{
68-
ocl::Context ctx = ocl::Context::getDefault();
69-
ocl::Queue queue = ocl::Queue::getDefault();
7068
ocl::Image2D image;
7169

7270
if (!is_matrix_a && transpose)
@@ -192,9 +190,6 @@ static bool ocl4dnnFastImageGEMM(const CBLAS_TRANSPOSE TransA,
192190
// just padding one line is enough as the sub group block read
193191
// will clamp to edge according to the spec.
194192

195-
ocl::Context ctx = ocl::Context::getDefault();
196-
ocl::Queue queue = ocl::Queue::getDefault();
197-
198193
ocl::Image2D ImA;
199194
ocl::Image2D ImB;
200195

@@ -446,7 +441,6 @@ bool ocl4dnnGEMV<float>(const CBLAS_TRANSPOSE TransA,
446441
const int32_t offx, const float beta, UMat y,
447442
const int32_t offy)
448443
{
449-
ocl::Queue queue = ocl::Queue::getDefault();
450444
bool ret = false;
451445

452446
if (TransA == CblasNoTrans)
@@ -507,8 +501,6 @@ bool ocl4dnnAXPY(const int32_t N, const Dtype alpha,
507501
const UMat X, const int32_t offX, UMat Y,
508502
const int32_t offY)
509503
{
510-
ocl::Context ctx = ocl::Context::getDefault();
511-
512504
ocl::Kernel oclk_axpy(CL_KERNEL_SELECT("axpy"), cv::ocl::dnn::math_oclsrc);
513505
if (oclk_axpy.empty())
514506
return false;

modules/dnn/src/ocl4dnn/src/ocl4dnn_conv_spatial.cpp

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -198,8 +198,6 @@ void OCL4DNNConvSpatial<Dtype>::collectCommonInformation()
198198
addDef("as_Dtype2", "as_float2");
199199
addDef("as_Dtype4", "as_float4");
200200
addDef("as_Dtype8", "as_float8");
201-
addDef("Dtype_ID", (int)CV_32F);
202-
addDef("Dtype_SIZE", (int)sizeof(Dtype));
203201
}
204202

205203
typedef enum {

modules/dnn/src/ocl4dnn/src/ocl4dnn_lrn.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,6 @@ bool OCL4DNNLRN<Dtype>::Forward(const UMat& bottom, UMat& top)
9292
template<typename Dtype>
9393
bool OCL4DNNLRN<Dtype>::crossChannelForward(const UMat& bottom, UMat& top)
9494
{
95-
ocl::Queue queue = ocl::Queue::getDefault();
9695
CHECK_EQ(phase_test_, true) << "Only support forward inference.";
9796

9897
cl_uint argIdx = 0;

modules/dnn/src/ocl4dnn/src/ocl4dnn_pool.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,6 @@ bool OCL4DNNPool<Dtype>::Forward(const UMat& bottom,
9797
UMat& top_mask)
9898
{
9999
bool ret = true;
100-
ocl::Queue queue = ocl::Queue::getDefault();
101100
size_t global[] = { 128 * 128 };
102101
size_t local[] = { 128 };
103102
cl_uint argIdx = 0;

modules/dnn/src/ocl4dnn/src/ocl4dnn_softmax.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,6 @@ template<typename Dtype>
8383
bool OCL4DNNSoftmax<Dtype>::Forward(const UMat& bottom, UMat& top)
8484
{
8585
bool ret = false;
86-
ocl::Queue queue = ocl::Queue::getDefault();
8786
bool intel_subgroup = ocl::Device::getDefault().intelSubgroupsSupport();
8887
if (intel_subgroup && inner_num_ < 128)
8988
{

modules/dnn/src/opencl/conv_layer_spatial.cl

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,6 @@
9191
#define LOOP(N, VAR, STMT) CAT(LOOP, N)((VAR), (STMT))
9292

9393
#if defined(convolve_simd) || defined(Conv_Interleaved)
94-
#if Dtype_SIZE == 4
9594
#define INT_TYPE uint
9695
#define INT_TYPE2 uint2
9796
#define INT_TYPE4 uint4
@@ -100,9 +99,6 @@
10099
#define SUB_GROUP_BLOCK_READ4 intel_sub_group_block_read4
101100
#define SUB_GROUP_BLOCK_READ8 intel_sub_group_block_read8
102101
#define SUB_GROUP_BLOCK_READ intel_sub_group_block_read
103-
#else
104-
#error "Unsupported type"
105-
#endif
106102
#endif
107103

108104
#ifdef KERNEL_BASIC
@@ -186,11 +182,7 @@ __kernel void ConvolveBasic(
186182

187183
#elif defined KERNEL_IDLF
188184

189-
#if TYPE == TYPE_HALF
190-
#define VLOAD4(_v, _p) do { (_v).s0 = *(_p); (_v).s1 = *(_p + 1); (_v).s2 = *(_p + 2); (_v).s3 = *(_p + 3); } while(0)
191-
#else
192185
#define VLOAD4(_v, _p) do { _v = vload4(0, _p); } while(0)
193-
#endif
194186

195187
// Each work-item computes a OUT_BLOCK_WIDTH * OUT_BLOCK_HEIGHT region of one output map.
196188
// Each work-group (which will be mapped to 1 SIMD16/SIMD8 EU thread) will compute 16/8 different feature maps, but each feature map is for the same region of the imput image.

0 commit comments

Comments
 (0)