Skip to content

Commit ab648b5

Browse files
committed
GpuMatND - GpuData, offset, getDevicePtr(), license
1 parent ddebc2c commit ab648b5

File tree

4 files changed

+99
-44
lines changed

4 files changed

+99
-44
lines changed

modules/core/include/opencv2/core/cuda.hpp

Lines changed: 33 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -340,23 +340,24 @@ class CV_EXPORTS_W GpuMat
340340
Allocator* allocator;
341341
};
342342

343-
class CV_EXPORTS_W GpuMatND
343+
struct CV_EXPORTS_W GpuData
344344
{
345-
public:
346-
struct CV_EXPORTS_W DevicePtr
347-
{
348-
explicit DevicePtr(size_t _size);
349-
~DevicePtr();
345+
explicit GpuData(size_t _size);
346+
~GpuData();
350347

351-
DevicePtr(const DevicePtr&) = delete;
352-
DevicePtr& operator=(const DevicePtr&) = delete;
348+
GpuData(const GpuData&) = delete;
349+
GpuData& operator=(const GpuData&) = delete;
353350

354-
DevicePtr(DevicePtr&&) = delete;
355-
DevicePtr& operator=(DevicePtr&&) = delete;
351+
GpuData(GpuData&&) = delete;
352+
GpuData& operator=(GpuData&&) = delete;
356353

357-
uchar* data;
358-
};
354+
uchar* data;
355+
size_t size;
356+
};
359357

358+
class CV_EXPORTS_W GpuMatND
359+
{
360+
public:
360361
using SizeArray = std::vector<int>;
361362
using StepArray = std::vector<size_t>;
362363
using IndexArray = std::vector<int>;
@@ -466,9 +467,11 @@ class CV_EXPORTS_W GpuMatND
466467
//! returns true if data is null
467468
bool empty() const;
468469

469-
//! returns true if points to external(user-allocated) gpu memory
470+
//! returns true if not empty and points to external(user-allocated) gpu memory
470471
bool external() const;
471472

473+
uchar* getDevicePtr() const;
474+
472475
//! returns the total number of array elements
473476
size_t total() const;
474477

@@ -493,13 +496,6 @@ class CV_EXPORTS_W GpuMatND
493496

494497
int dims;
495498

496-
/*! pointer to the data
497-
If this is a submatrix of a larger matrix, this points to the first
498-
element of the submatrix, and it can be different from data_->data.
499-
If this is not a submatrix, then data is always equal to data_->data.
500-
*/
501-
uchar* data;
502-
503499
//! shape of this array
504500
SizeArray size;
505501

@@ -509,8 +505,23 @@ class CV_EXPORTS_W GpuMatND
509505
StepArray step;
510506

511507
private:
512-
//! internal use
513-
std::shared_ptr<DevicePtr> data_;
508+
/*! internal use
509+
If this GpuMatND holds external memory, this is empty.
510+
*/
511+
std::shared_ptr<GpuData> data_;
512+
513+
/*! internal use
514+
If this GpuMatND manages memory with reference counting, this value is
515+
always equal to data_->data. If this GpuMatND holds external memory,
516+
data_ is empty and data points to the external memory.
517+
*/
518+
uchar* data;
519+
520+
/*! internal use
521+
If this GpuMatND is a sub-matrix of a larger matrix, this value is the
522+
difference of the first byte between the sub-matrix and the whole matrix.
523+
*/
524+
size_t offset;
514525
};
515526

516527
/** @brief Creates a continuous matrix.

modules/core/include/opencv2/core/cuda.inl.hpp

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -389,13 +389,13 @@ void swap(GpuMat& a, GpuMat& b)
389389

390390
inline
391391
GpuMatND::GpuMatND() :
392-
flags(0), dims(0), data(nullptr)
392+
flags(0), dims(0), data(nullptr), offset(0)
393393
{
394394
}
395395

396396
inline
397397
GpuMatND::GpuMatND(SizeArray _size, int _type) :
398-
flags(0), dims(0), data(nullptr)
398+
flags(0), dims(0), data(nullptr), offset(0)
399399
{
400400
create(std::move(_size), _type);
401401
}
@@ -442,6 +442,12 @@ bool GpuMatND::external() const
442442
return !empty() && data_.use_count() == 0;
443443
}
444444

445+
inline
446+
uchar* GpuMatND::getDevicePtr() const
447+
{
448+
return data + offset;
449+
}
450+
445451
inline
446452
size_t GpuMatND::total() const
447453
{

modules/core/src/cuda/gpu_mat_nd.cu

Lines changed: 21 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
// This file is part of OpenCV project.
2+
// It is subject to the license terms in the LICENSE file found in the top-level directory
3+
// of this distribution and at http://opencv.org/license.html.
4+
15
#include "opencv2/opencv_modules.hpp"
26

37
#ifndef HAVE_OPENCV_CUDEV
@@ -12,13 +16,13 @@
1216
using namespace cv;
1317
using namespace cv::cuda;
1418

15-
GpuMatND::DevicePtr::DevicePtr(const size_t _size)
16-
: data(nullptr)
19+
GpuData::GpuData(const size_t _size)
20+
: data(nullptr), size(_size)
1721
{
1822
CV_CUDEV_SAFE_CALL(cudaMalloc(&data, _size));
1923
}
2024

21-
GpuMatND::DevicePtr::~DevicePtr()
25+
GpuData::~GpuData()
2226
{
2327
CV_CUDEV_SAFE_CALL(cudaFree(data));
2428
}
@@ -45,7 +49,7 @@ void GpuMatND::create(SizeArray _size, int _type)
4549

4650
setFields(std::move(_size), _type);
4751

48-
data_ = std::make_shared<DevicePtr>(totalMemSize());
52+
data_ = std::make_shared<GpuData>(totalMemSize());
4953
data = data_->data;
5054
}
5155

@@ -57,7 +61,7 @@ void GpuMatND::release()
5761
data = nullptr;
5862
data_.reset();
5963

60-
flags = dims = 0;
64+
flags = dims = offset = 0;
6165
size.clear();
6266
step.clear();
6367
}
@@ -71,7 +75,7 @@ GpuMatND GpuMatND::clone() const
7175

7276
if (isContinuous())
7377
{
74-
CV_CUDEV_SAFE_CALL(cudaMemcpy(ret.data, data, totalMemSize(), cudaMemcpyDeviceToDevice));
78+
CV_CUDEV_SAFE_CALL(cudaMemcpy(ret.getDevicePtr(), getDevicePtr(), totalMemSize(), cudaMemcpyDeviceToDevice));
7579
}
7680
else
7781
{
@@ -80,7 +84,7 @@ GpuMatND GpuMatND::clone() const
8084
if (dims == 2)
8185
{
8286
CV_CUDEV_SAFE_CALL(
83-
cudaMemcpy2D(ret.data, ret.step[0], data, step[0],
87+
cudaMemcpy2D(ret.getDevicePtr(), ret.step[0], getDevicePtr(), step[0],
8488
size[1]*step[1], size[0], cudaMemcpyDeviceToDevice)
8589
);
8690
}
@@ -90,8 +94,8 @@ GpuMatND GpuMatND::clone() const
9094

9195
bool end = false;
9296

93-
uchar* d = ret.data;
94-
uchar* s = data;
97+
uchar* d = ret.getDevicePtr();
98+
const uchar* s = getDevicePtr();
9599

96100
// iterate each 2D plane
97101
do
@@ -142,7 +146,7 @@ GpuMatND GpuMatND::clone(Stream& stream) const
142146

143147
if (isContinuous())
144148
{
145-
CV_CUDEV_SAFE_CALL(cudaMemcpyAsync(ret.data, data, totalMemSize(), cudaMemcpyDeviceToDevice, _stream));
149+
CV_CUDEV_SAFE_CALL(cudaMemcpyAsync(ret.getDevicePtr(), getDevicePtr(), totalMemSize(), cudaMemcpyDeviceToDevice, _stream));
146150
}
147151
else
148152
{
@@ -151,7 +155,7 @@ GpuMatND GpuMatND::clone(Stream& stream) const
151155
if (dims == 2)
152156
{
153157
CV_CUDEV_SAFE_CALL(
154-
cudaMemcpy2DAsync(ret.data, ret.step[0], data, step[0],
158+
cudaMemcpy2DAsync(ret.getDevicePtr(), ret.step[0], getDevicePtr(), step[0],
155159
size[1]*step[1], size[0], cudaMemcpyDeviceToDevice, _stream)
156160
);
157161
}
@@ -161,8 +165,8 @@ GpuMatND GpuMatND::clone(Stream& stream) const
161165

162166
bool end = false;
163167

164-
uchar* d = ret.data;
165-
uchar* s = data;
168+
uchar* d = ret.getDevicePtr();
169+
const uchar* s = getDevicePtr();
166170

167171
// iterate each 2D plane
168172
do
@@ -222,7 +226,7 @@ void GpuMatND::upload(InputArray src)
222226

223227
create(std::move(_size), mat.type());
224228

225-
CV_CUDEV_SAFE_CALL(cudaMemcpy(data, mat.data, totalMemSize(), cudaMemcpyHostToDevice));
229+
CV_CUDEV_SAFE_CALL(cudaMemcpy(getDevicePtr(), mat.data, totalMemSize(), cudaMemcpyHostToDevice));
226230
}
227231

228232
void GpuMatND::upload(InputArray src, Stream& stream)
@@ -240,7 +244,7 @@ void GpuMatND::upload(InputArray src, Stream& stream)
240244
create(std::move(_size), mat.type());
241245

242246
cudaStream_t _stream = StreamAccessor::getStream(stream);
243-
CV_CUDEV_SAFE_CALL(cudaMemcpyAsync(data, mat.data, totalMemSize(), cudaMemcpyHostToDevice, _stream));
247+
CV_CUDEV_SAFE_CALL(cudaMemcpyAsync(getDevicePtr(), mat.data, totalMemSize(), cudaMemcpyHostToDevice, _stream));
244248
}
245249

246250
/////////////////////////////////////////////////////
@@ -258,7 +262,7 @@ void GpuMatND::download(OutputArray dst) const
258262
if (!gmat.isContinuous())
259263
gmat = gmat.clone();
260264

261-
CV_CUDEV_SAFE_CALL(cudaMemcpy(mat.data, gmat.data, gmat.totalMemSize(), cudaMemcpyDeviceToHost));
265+
CV_CUDEV_SAFE_CALL(cudaMemcpy(mat.data, gmat.getDevicePtr(), gmat.totalMemSize(), cudaMemcpyDeviceToHost));
262266
}
263267

264268
void GpuMatND::download(OutputArray dst, Stream& stream) const
@@ -274,7 +278,7 @@ void GpuMatND::download(OutputArray dst, Stream& stream) const
274278
gmat = gmat.clone(stream);
275279

276280
cudaStream_t _stream = StreamAccessor::getStream(stream);
277-
CV_CUDEV_SAFE_CALL(cudaMemcpyAsync(mat.data, gmat.data, gmat.totalMemSize(), cudaMemcpyDeviceToHost, _stream));
281+
CV_CUDEV_SAFE_CALL(cudaMemcpyAsync(mat.data, gmat.getDevicePtr(), gmat.totalMemSize(), cudaMemcpyDeviceToHost, _stream));
278282
}
279283

280284
#endif

modules/core/src/cuda_gpu_mat_nd.cpp

Lines changed: 37 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,14 @@
1+
// This file is part of OpenCV project.
2+
// It is subject to the license terms in the LICENSE file found in the top-level directory
3+
// of this distribution and at http://opencv.org/license.html.
4+
15
#include "precomp.hpp"
26

37
using namespace cv;
48
using namespace cv::cuda;
59

610
GpuMatND::GpuMatND(SizeArray _size, int _type, void* _data, StepArray _step) :
7-
flags(0), dims(0), data(static_cast<uchar*>(_data))
11+
flags(0), dims(0), data(static_cast<uchar*>(_data)), offset(0)
812
{
913
CV_Assert(_step.empty() || _size.size() == _step.size() + 1);
1014

@@ -28,7 +32,7 @@ GpuMatND GpuMatND::operator()(const std::vector<Range>& ranges) const
2832
Range r = ranges[i];
2933
if (r != Range::all() && r != Range(0, ret.size[i]))
3034
{
31-
ret.data += r.start * ret.step[i];
35+
ret.offset += r.start * ret.step[i];
3236
ret.size[i] = r.size();
3337
ret.flags |= Mat::SUBMATRIX_FLAG;
3438
}
@@ -63,7 +67,7 @@ GpuMat GpuMatND::createGpuMatHeader() const
6367
};
6468
CV_Assert(Effectively2D(*this));
6569

66-
return GpuMat(size[dims-2], size[dims-1], type(), data, step[dims-2]);
70+
return GpuMat(size[dims-2], size[dims-1], type(), getDevicePtr(), step[dims-2]);
6771
}
6872

6973
GpuMat GpuMatND::operator()(IndexArray idx, Range rowRange, Range colRange) const
@@ -111,6 +115,16 @@ void GpuMatND::setFields(SizeArray _size, int _type, StepArray _step)
111115

112116
#ifndef HAVE_CUDA
113117

118+
GpuData::GpuData(const size_t _size)
119+
: data(nullptr), size(_size)
120+
{
121+
throw_no_cuda();
122+
}
123+
124+
GpuData::~GpuData()
125+
{
126+
}
127+
114128
void GpuMatND::create(SizeArray _size, int _type)
115129
{
116130
CV_UNUSED(_size);
@@ -128,16 +142,36 @@ GpuMatND GpuMatND::clone() const
128142
throw_no_cuda();
129143
}
130144

145+
GpuMatND GpuMatND::clone(Stream& stream) const
146+
{
147+
CV_UNUSED(stream);
148+
throw_no_cuda();
149+
}
150+
131151
void GpuMatND::upload(InputArray src)
132152
{
133153
CV_UNUSED(src);
134154
throw_no_cuda();
135155
}
136156

157+
void GpuMatND::upload(InputArray src, Stream& stream)
158+
{
159+
CV_UNUSED(src);
160+
CV_UNUSED(stream);
161+
throw_no_cuda();
162+
}
163+
137164
void GpuMatND::download(OutputArray dst) const
138165
{
139166
CV_UNUSED(dst);
140167
throw_no_cuda();
141168
}
142169

170+
void GpuMatND::download(OutputArray dst, Stream& stream) const
171+
{
172+
CV_UNUSED(dst);
173+
CV_UNUSED(stream);
174+
throw_no_cuda();
175+
}
176+
143177
#endif

0 commit comments

Comments
 (0)