GpuMatND - GpuData, offset, getDevicePtr(), license

cognex-nalee · cognex-nalee · commit ab648b55f027 · 2021-01-18T23:42:15.000+09:00
diff --git a/modules/core/include/opencv2/core/cuda.hpp b/modules/core/include/opencv2/core/cuda.hpp
@@ -340,23 +340,24 @@ class CV_EXPORTS_W GpuMat
     Allocator* allocator;
 };
 
-class CV_EXPORTS_W GpuMatND
+struct CV_EXPORTS_W GpuData
 {
-public:
-    struct CV_EXPORTS_W DevicePtr
-    {
-        explicit DevicePtr(size_t _size);
-         ~DevicePtr();
+    explicit GpuData(size_t _size);
+     ~GpuData();
 
-        DevicePtr(const DevicePtr&) = delete;
-        DevicePtr& operator=(const DevicePtr&) = delete;
+    GpuData(const GpuData&) = delete;
+    GpuData& operator=(const GpuData&) = delete;
 
-        DevicePtr(DevicePtr&&) = delete;
-        DevicePtr& operator=(DevicePtr&&) = delete;
+    GpuData(GpuData&&) = delete;
+    GpuData& operator=(GpuData&&) = delete;
 
-        uchar* data;
-    };
+    uchar* data;
+    size_t size;
+};
 
+class CV_EXPORTS_W GpuMatND
+{
+public:
     using SizeArray = std::vector<int>;
     using StepArray = std::vector<size_t>;
     using IndexArray = std::vector<int>;
@@ -466,9 +467,11 @@ class CV_EXPORTS_W GpuMatND
     //! returns true if data is null
     bool empty() const;
 
-    //! returns true if points to external(user-allocated) gpu memory
+    //! returns true if not empty and points to external(user-allocated) gpu memory
     bool external() const;
 
+    uchar* getDevicePtr() const;
+
     //! returns the total number of array elements
     size_t total() const;
 
@@ -493,13 +496,6 @@ class CV_EXPORTS_W GpuMatND
 
     int dims;
 
-    /*! pointer to the data
-    If this is a submatrix of a larger matrix, this points to the first
-    element of the submatrix, and it can be different from data_->data.
-    If this is not a submatrix, then data is always equal to data_->data.
-    */
-    uchar* data;
-
     //! shape of this array
     SizeArray size;
 
@@ -509,8 +505,23 @@ class CV_EXPORTS_W GpuMatND
     StepArray step;
 
 private:
-    //! internal use
-    std::shared_ptr<DevicePtr> data_;
+    /*! internal use
+    If this GpuMatND holds external memory, this is empty.
+    */
+    std::shared_ptr<GpuData> data_;
+
+    /*! internal use
+    If this GpuMatND manages memory with reference counting, this value is
+    always equal to data_->data. If this GpuMatND holds external memory,
+    data_ is empty and data points to the external memory.
+    */
+    uchar* data;
+
+    /*! internal use
+    If this GpuMatND is a sub-matrix of a larger matrix, this value is the
+    difference of the first byte between the sub-matrix and the whole matrix.
+    */
+    size_t offset;
 };
 
 /** @brief Creates a continuous matrix.
diff --git a/modules/core/include/opencv2/core/cuda.inl.hpp b/modules/core/include/opencv2/core/cuda.inl.hpp
@@ -389,13 +389,13 @@ void swap(GpuMat& a, GpuMat& b)
 
 inline
 GpuMatND::GpuMatND() :
-    flags(0), dims(0), data(nullptr)
+    flags(0), dims(0), data(nullptr), offset(0)
 {
 }
 
 inline
 GpuMatND::GpuMatND(SizeArray _size, int _type) :
-    flags(0), dims(0), data(nullptr)
+    flags(0), dims(0), data(nullptr), offset(0)
 {
     create(std::move(_size), _type);
 }
@@ -442,6 +442,12 @@ bool GpuMatND::external() const
     return !empty() && data_.use_count() == 0;
 }
 
+inline
+uchar* GpuMatND::getDevicePtr() const
+{
+    return data + offset;
+}
+
 inline
 size_t GpuMatND::total() const
 {
diff --git a/modules/core/src/cuda/gpu_mat_nd.cu b/modules/core/src/cuda/gpu_mat_nd.cu
@@ -1,3 +1,7 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
 #include "opencv2/opencv_modules.hpp"
 
 #ifndef HAVE_OPENCV_CUDEV
@@ -12,13 +16,13 @@
 using namespace cv;
 using namespace cv::cuda;
 
-GpuMatND::DevicePtr::DevicePtr(const size_t _size)
-    : data(nullptr)
+GpuData::GpuData(const size_t _size)
+    : data(nullptr), size(_size)
 {
     CV_CUDEV_SAFE_CALL(cudaMalloc(&data, _size));
 }
 
-GpuMatND::DevicePtr::~DevicePtr()
+GpuData::~GpuData()
 {
     CV_CUDEV_SAFE_CALL(cudaFree(data));
 }
@@ -45,7 +49,7 @@ void GpuMatND::create(SizeArray _size, int _type)
 
     setFields(std::move(_size), _type);
 
-    data_ = std::make_shared<DevicePtr>(totalMemSize());
+    data_ = std::make_shared<GpuData>(totalMemSize());
     data = data_->data;
 }
 
@@ -57,7 +61,7 @@ void GpuMatND::release()
     data = nullptr;
     data_.reset();
 
-    flags = dims = 0;
+    flags = dims = offset = 0;
     size.clear();
     step.clear();
 }
@@ -71,7 +75,7 @@ GpuMatND GpuMatND::clone() const
 
     if (isContinuous())
     {
-        CV_CUDEV_SAFE_CALL(cudaMemcpy(ret.data, data, totalMemSize(), cudaMemcpyDeviceToDevice));
+        CV_CUDEV_SAFE_CALL(cudaMemcpy(ret.getDevicePtr(), getDevicePtr(), totalMemSize(), cudaMemcpyDeviceToDevice));
     }
     else
     {
@@ -80,7 +84,7 @@ GpuMatND GpuMatND::clone() const
         if (dims == 2)
         {
             CV_CUDEV_SAFE_CALL(
-                cudaMemcpy2D(ret.data, ret.step[0], data, step[0],
+                cudaMemcpy2D(ret.getDevicePtr(), ret.step[0], getDevicePtr(), step[0],
                     size[1]*step[1], size[0], cudaMemcpyDeviceToDevice)
             );
         }
@@ -90,8 +94,8 @@ GpuMatND GpuMatND::clone() const
 
             bool end = false;
 
-            uchar* d = ret.data;
-            uchar* s = data;
+            uchar* d = ret.getDevicePtr();
+            const uchar* s = getDevicePtr();
 
             // iterate each 2D plane
             do
@@ -142,7 +146,7 @@ GpuMatND GpuMatND::clone(Stream& stream) const
 
     if (isContinuous())
     {
-        CV_CUDEV_SAFE_CALL(cudaMemcpyAsync(ret.data, data, totalMemSize(), cudaMemcpyDeviceToDevice, _stream));
+        CV_CUDEV_SAFE_CALL(cudaMemcpyAsync(ret.getDevicePtr(), getDevicePtr(), totalMemSize(), cudaMemcpyDeviceToDevice, _stream));
     }
     else
     {
@@ -151,7 +155,7 @@ GpuMatND GpuMatND::clone(Stream& stream) const
         if (dims == 2)
         {
             CV_CUDEV_SAFE_CALL(
-                cudaMemcpy2DAsync(ret.data, ret.step[0], data, step[0],
+                cudaMemcpy2DAsync(ret.getDevicePtr(), ret.step[0], getDevicePtr(), step[0],
                     size[1]*step[1], size[0], cudaMemcpyDeviceToDevice, _stream)
             );
         }
@@ -161,8 +165,8 @@ GpuMatND GpuMatND::clone(Stream& stream) const
 
             bool end = false;
 
-            uchar* d = ret.data;
-            uchar* s = data;
+            uchar* d = ret.getDevicePtr();
+            const uchar* s = getDevicePtr();
 
             // iterate each 2D plane
             do
@@ -222,7 +226,7 @@ void GpuMatND::upload(InputArray src)
 
     create(std::move(_size), mat.type());
 
-    CV_CUDEV_SAFE_CALL(cudaMemcpy(data, mat.data, totalMemSize(), cudaMemcpyHostToDevice));
+    CV_CUDEV_SAFE_CALL(cudaMemcpy(getDevicePtr(), mat.data, totalMemSize(), cudaMemcpyHostToDevice));
 }
 
 void GpuMatND::upload(InputArray src, Stream& stream)
@@ -240,7 +244,7 @@ void GpuMatND::upload(InputArray src, Stream& stream)
     create(std::move(_size), mat.type());
 
     cudaStream_t _stream = StreamAccessor::getStream(stream);
-    CV_CUDEV_SAFE_CALL(cudaMemcpyAsync(data, mat.data, totalMemSize(), cudaMemcpyHostToDevice, _stream));
+    CV_CUDEV_SAFE_CALL(cudaMemcpyAsync(getDevicePtr(), mat.data, totalMemSize(), cudaMemcpyHostToDevice, _stream));
 }
 
 /////////////////////////////////////////////////////
@@ -258,7 +262,7 @@ void GpuMatND::download(OutputArray dst) const
     if (!gmat.isContinuous())
         gmat = gmat.clone();
 
-    CV_CUDEV_SAFE_CALL(cudaMemcpy(mat.data, gmat.data, gmat.totalMemSize(), cudaMemcpyDeviceToHost));
+    CV_CUDEV_SAFE_CALL(cudaMemcpy(mat.data, gmat.getDevicePtr(), gmat.totalMemSize(), cudaMemcpyDeviceToHost));
 }
 
 void GpuMatND::download(OutputArray dst, Stream& stream) const
@@ -274,7 +278,7 @@ void GpuMatND::download(OutputArray dst, Stream& stream) const
         gmat = gmat.clone(stream);
 
     cudaStream_t _stream = StreamAccessor::getStream(stream);
-    CV_CUDEV_SAFE_CALL(cudaMemcpyAsync(mat.data, gmat.data, gmat.totalMemSize(), cudaMemcpyDeviceToHost, _stream));
+    CV_CUDEV_SAFE_CALL(cudaMemcpyAsync(mat.data, gmat.getDevicePtr(), gmat.totalMemSize(), cudaMemcpyDeviceToHost, _stream));
 }
 
 #endif
diff --git a/modules/core/src/cuda_gpu_mat_nd.cpp b/modules/core/src/cuda_gpu_mat_nd.cpp
@@ -1,10 +1,14 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
 #include "precomp.hpp"
 
 using namespace cv;
 using namespace cv::cuda;
 
 GpuMatND::GpuMatND(SizeArray _size, int _type, void* _data, StepArray _step) :
-    flags(0), dims(0), data(static_cast<uchar*>(_data))
+    flags(0), dims(0), data(static_cast<uchar*>(_data)), offset(0)
 {
     CV_Assert(_step.empty() || _size.size() == _step.size() + 1);
 
@@ -28,7 +32,7 @@ GpuMatND GpuMatND::operator()(const std::vector<Range>& ranges) const
         Range r = ranges[i];
         if (r != Range::all() && r != Range(0, ret.size[i]))
         {
-            ret.data += r.start * ret.step[i];
+            ret.offset += r.start * ret.step[i];
             ret.size[i] = r.size();
             ret.flags |= Mat::SUBMATRIX_FLAG;
         }
@@ -63,7 +67,7 @@ GpuMat GpuMatND::createGpuMatHeader() const
     };
     CV_Assert(Effectively2D(*this));
 
-    return GpuMat(size[dims-2], size[dims-1], type(), data, step[dims-2]);
+    return GpuMat(size[dims-2], size[dims-1], type(), getDevicePtr(), step[dims-2]);
 }
 
 GpuMat GpuMatND::operator()(IndexArray idx, Range rowRange, Range colRange) const
@@ -111,6 +115,16 @@ void GpuMatND::setFields(SizeArray _size, int _type, StepArray _step)
 
 #ifndef HAVE_CUDA
 
+GpuData::GpuData(const size_t _size)
+    : data(nullptr), size(_size)
+{
+    throw_no_cuda();
+}
+
+GpuData::~GpuData()
+{
+}
+
 void GpuMatND::create(SizeArray _size, int _type)
 {
     CV_UNUSED(_size);
@@ -128,16 +142,36 @@ GpuMatND GpuMatND::clone() const
     throw_no_cuda();
 }
 
+GpuMatND GpuMatND::clone(Stream& stream) const
+{
+    CV_UNUSED(stream);
+    throw_no_cuda();
+}
+
 void GpuMatND::upload(InputArray src)
 {
     CV_UNUSED(src);
     throw_no_cuda();
 }
 
+void GpuMatND::upload(InputArray src, Stream& stream)
+{
+    CV_UNUSED(src);
+    CV_UNUSED(stream);
+    throw_no_cuda();
+}
+
 void GpuMatND::download(OutputArray dst) const
 {
     CV_UNUSED(dst);
     throw_no_cuda();
 }
 
+void GpuMatND::download(OutputArray dst, Stream& stream) const
+{
+    CV_UNUSED(dst);
+    CV_UNUSED(stream);
+    throw_no_cuda();
+}
+
 #endif

Original file line number	Diff line number	Diff line change
`@@ -389,13 +389,13 @@ void swap(GpuMat& a, GpuMat& b)`
`389`	`389`
`390`	`390`	`inline`
`391`	`391`	`GpuMatND::GpuMatND() :`
`392`		`- flags(0), dims(0), data(nullptr)`
	`392`	`+ flags(0), dims(0), data(nullptr), offset(0)`
`393`	`393`	`{`
`394`	`394`	`}`
`395`	`395`
`396`	`396`	`inline`
`397`	`397`	`GpuMatND::GpuMatND(SizeArray _size, int _type) :`
`398`		`- flags(0), dims(0), data(nullptr)`
	`398`	`+ flags(0), dims(0), data(nullptr), offset(0)`
`399`	`399`	`{`
`400`	`400`	`create(std::move(_size), _type);`
`401`	`401`	`}`
`@@ -442,6 +442,12 @@ bool GpuMatND::external() const`
`442`	`442`	`return !empty() && data_.use_count() == 0;`
`443`	`443`	`}`
`444`	`444`
	`445`	`+inline`
	`446`	`+uchar* GpuMatND::getDevicePtr() const`
	`447`	`+{`
	`448`	`+ return data + offset;`
	`449`	`+}`
	`450`	`+`
`445`	`451`	`inline`
`446`	`452`	`size_t GpuMatND::total() const`
`447`	`453`	`{`
Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,7 @@`
	`1`	`+// This file is part of OpenCV project.`
	`2`	`+// It is subject to the license terms in the LICENSE file found in the top-level directory`
	`3`	`+// of this distribution and at http://opencv.org/license.html.`
	`4`	`+`
`1`	`5`	`#include "opencv2/opencv_modules.hpp"`
`2`	`6`
`3`	`7`	`#ifndef HAVE_OPENCV_CUDEV`
`@@ -12,13 +16,13 @@`
`12`	`16`	`using namespace cv;`
`13`	`17`	`using namespace cv::cuda;`
`14`	`18`
`15`		`-GpuMatND::DevicePtr::DevicePtr(const size_t _size)`
`16`		`- : data(nullptr)`
	`19`	`+GpuData::GpuData(const size_t _size)`
	`20`	`+ : data(nullptr), size(_size)`
`17`	`21`	`{`
`18`	`22`	`CV_CUDEV_SAFE_CALL(cudaMalloc(&data, _size));`
`19`	`23`	`}`
`20`	`24`
`21`		`-GpuMatND::DevicePtr::~DevicePtr()`
	`25`	`+GpuData::~GpuData()`
`22`	`26`	`{`
`23`	`27`	`CV_CUDEV_SAFE_CALL(cudaFree(data));`
`24`	`28`	`}`
`@@ -45,7 +49,7 @@ void GpuMatND::create(SizeArray _size, int _type)`
`45`	`49`
`46`	`50`	`setFields(std::move(_size), _type);`
`47`	`51`
`48`		`- data_ = std::make_shared<DevicePtr>(totalMemSize());`
	`52`	`+ data_ = std::make_shared<GpuData>(totalMemSize());`
`49`	`53`	`data = data_->data;`
`50`	`54`	`}`
`51`	`55`
`@@ -57,7 +61,7 @@ void GpuMatND::release()`
`57`	`61`	`data = nullptr;`
`58`	`62`	`data_.reset();`
`59`	`63`
`60`		`- flags = dims = 0;`
	`64`	`+ flags = dims = offset = 0;`
`61`	`65`	`size.clear();`
`62`	`66`	`step.clear();`
`63`	`67`	`}`
`@@ -71,7 +75,7 @@ GpuMatND GpuMatND::clone() const`
`71`	`75`
`72`	`76`	`if (isContinuous())`
`73`	`77`	`{`
`74`		`- CV_CUDEV_SAFE_CALL(cudaMemcpy(ret.data, data, totalMemSize(), cudaMemcpyDeviceToDevice));`
	`78`	`+ CV_CUDEV_SAFE_CALL(cudaMemcpy(ret.getDevicePtr(), getDevicePtr(), totalMemSize(), cudaMemcpyDeviceToDevice));`
`75`	`79`	`}`
`76`	`80`	`else`
`77`	`81`	`{`
`@@ -80,7 +84,7 @@ GpuMatND GpuMatND::clone() const`
`80`	`84`	`if (dims == 2)`
`81`	`85`	`{`
`82`	`86`	`CV_CUDEV_SAFE_CALL(`
`83`		`- cudaMemcpy2D(ret.data, ret.step[0], data, step[0],`
	`87`	`+ cudaMemcpy2D(ret.getDevicePtr(), ret.step[0], getDevicePtr(), step[0],`
`84`	`88`	`size[1]*step[1], size[0], cudaMemcpyDeviceToDevice)`
`85`	`89`	`);`
`86`	`90`	`}`
`@@ -90,8 +94,8 @@ GpuMatND GpuMatND::clone() const`
`90`	`94`
`91`	`95`	`bool end = false;`
`92`	`96`
`93`		`- uchar* d = ret.data;`
`94`		`- uchar* s = data;`
	`97`	`+ uchar* d = ret.getDevicePtr();`
	`98`	`+ const uchar* s = getDevicePtr();`
`95`	`99`
`96`	`100`	`// iterate each 2D plane`
`97`	`101`	`do`
`@@ -142,7 +146,7 @@ GpuMatND GpuMatND::clone(Stream& stream) const`
`142`	`146`
`143`	`147`	`if (isContinuous())`
`144`	`148`	`{`
`145`		`- CV_CUDEV_SAFE_CALL(cudaMemcpyAsync(ret.data, data, totalMemSize(), cudaMemcpyDeviceToDevice, _stream));`
	`149`	`+ CV_CUDEV_SAFE_CALL(cudaMemcpyAsync(ret.getDevicePtr(), getDevicePtr(), totalMemSize(), cudaMemcpyDeviceToDevice, _stream));`
`146`	`150`	`}`
`147`	`151`	`else`
`148`	`152`	`{`
`@@ -151,7 +155,7 @@ GpuMatND GpuMatND::clone(Stream& stream) const`
`151`	`155`	`if (dims == 2)`
`152`	`156`	`{`
`153`	`157`	`CV_CUDEV_SAFE_CALL(`
`154`		`- cudaMemcpy2DAsync(ret.data, ret.step[0], data, step[0],`
	`158`	`+ cudaMemcpy2DAsync(ret.getDevicePtr(), ret.step[0], getDevicePtr(), step[0],`
`155`	`159`	`size[1]*step[1], size[0], cudaMemcpyDeviceToDevice, _stream)`
`156`	`160`	`);`
`157`	`161`	`}`
`@@ -161,8 +165,8 @@ GpuMatND GpuMatND::clone(Stream& stream) const`
`161`	`165`
`162`	`166`	`bool end = false;`
`163`	`167`
`164`		`- uchar* d = ret.data;`
`165`		`- uchar* s = data;`
	`168`	`+ uchar* d = ret.getDevicePtr();`
	`169`	`+ const uchar* s = getDevicePtr();`
`166`	`170`
`167`	`171`	`// iterate each 2D plane`
`168`	`172`	`do`
`@@ -222,7 +226,7 @@ void GpuMatND::upload(InputArray src)`
`222`	`226`
`223`	`227`	`create(std::move(_size), mat.type());`
`224`	`228`
`225`		`- CV_CUDEV_SAFE_CALL(cudaMemcpy(data, mat.data, totalMemSize(), cudaMemcpyHostToDevice));`
	`229`	`+ CV_CUDEV_SAFE_CALL(cudaMemcpy(getDevicePtr(), mat.data, totalMemSize(), cudaMemcpyHostToDevice));`
`226`	`230`	`}`
`227`	`231`
`228`	`232`	`void GpuMatND::upload(InputArray src, Stream& stream)`
`@@ -240,7 +244,7 @@ void GpuMatND::upload(InputArray src, Stream& stream)`
`240`	`244`	`create(std::move(_size), mat.type());`
`241`	`245`
`242`	`246`	`cudaStream_t _stream = StreamAccessor::getStream(stream);`
`243`		`- CV_CUDEV_SAFE_CALL(cudaMemcpyAsync(data, mat.data, totalMemSize(), cudaMemcpyHostToDevice, _stream));`
	`247`	`+ CV_CUDEV_SAFE_CALL(cudaMemcpyAsync(getDevicePtr(), mat.data, totalMemSize(), cudaMemcpyHostToDevice, _stream));`
`244`	`248`	`}`
`245`	`249`
`246`	`250`	`/////////////////////////////////////////////////////`
`@@ -258,7 +262,7 @@ void GpuMatND::download(OutputArray dst) const`
`258`	`262`	`if (!gmat.isContinuous())`
`259`	`263`	`gmat = gmat.clone();`
`260`	`264`
`261`		`- CV_CUDEV_SAFE_CALL(cudaMemcpy(mat.data, gmat.data, gmat.totalMemSize(), cudaMemcpyDeviceToHost));`
	`265`	`+ CV_CUDEV_SAFE_CALL(cudaMemcpy(mat.data, gmat.getDevicePtr(), gmat.totalMemSize(), cudaMemcpyDeviceToHost));`
`262`	`266`	`}`
`263`	`267`
`264`	`268`	`void GpuMatND::download(OutputArray dst, Stream& stream) const`
`@@ -274,7 +278,7 @@ void GpuMatND::download(OutputArray dst, Stream& stream) const`
`274`	`278`	`gmat = gmat.clone(stream);`
`275`	`279`
`276`	`280`	`cudaStream_t _stream = StreamAccessor::getStream(stream);`
`277`		`- CV_CUDEV_SAFE_CALL(cudaMemcpyAsync(mat.data, gmat.data, gmat.totalMemSize(), cudaMemcpyDeviceToHost, _stream));`
	`281`	`+ CV_CUDEV_SAFE_CALL(cudaMemcpyAsync(mat.data, gmat.getDevicePtr(), gmat.totalMemSize(), cudaMemcpyDeviceToHost, _stream));`
`278`	`282`	`}`
`279`	`283`
`280`	`284`	`#endif`