Skip to content

Commit 554c47e

Browse files
committed
parallelize non linear diffusion computation
1 parent 4ea5bbd commit 554c47e

File tree

1 file changed

+32
-9
lines changed

1 file changed

+32
-9
lines changed

modules/features2d/src/kaze/AKAZEFeatures.cpp

Lines changed: 32 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -112,16 +112,18 @@ static inline int getGaussianKernelSize(float sigma) {
112112
/* ************************************************************************* */
113113
/**
114114
* @brief This function computes a scalar non-linear diffusion step
115-
* @param Ld Base image in the evolution
116-
* @param c Conductivity image
115+
* @param Lt Base image in the evolution
116+
* @param Lf Conductivity image
117117
* @param Lstep Output image that gives the difference between the current
118118
* Ld and the next Ld being evolved
119+
* @param row_begin row where to start
120+
* @param row_end last row to fill exclusive. the range is [row_begin, row_end).
119121
* @note Forward Euler Scheme 3x3 stencil
120122
* The function c is a scalar value that depends on the gradient norm
121123
* dL_by_ds = d(c dL_by_dx)_by_dx + d(c dL_by_dy)_by_dy
122124
*/
123125
static inline void
124-
nld_step_scalar_one_lane(const cv::Mat& Lt, const cv::Mat& Lf, cv::Mat& Lstep, int idx, int skip)
126+
nld_step_scalar_one_lane(const Mat& Lt, const Mat& Lf, Mat& Lstep, int row_begin, int row_end)
125127
{
126128
CV_INSTRUMENT_REGION()
127129
/* The labeling scheme for this five star stencil:
@@ -132,7 +134,7 @@ nld_step_scalar_one_lane(const cv::Mat& Lt, const cv::Mat& Lf, cv::Mat& Lstep, i
132134

133135
Lstep.create(Lt.size(), Lt.type());
134136
const int cols = Lt.cols - 2;
135-
int row = idx;
137+
int row = row_begin;
136138

137139
const float *lt_a, *lt_c, *lt_b;
138140
const float *lf_a, *lf_c, *lf_b;
@@ -151,11 +153,12 @@ nld_step_scalar_one_lane(const cv::Mat& Lt, const cv::Mat& Lf, cv::Mat& Lstep, i
151153
(lf_c[j] + lf_c[j - 1])*(lt_c[j - 1] - lt_c[j]) +
152154
(lf_c[j] + lf_b[j ])*(lt_b[j ] - lt_c[j]);
153155
}
154-
row += skip;
156+
++row;
155157
}
156158

157159
// Process the middle rows
158-
for (; row < Lt.rows - 1; row += skip)
160+
int middle_end = std::min(Lt.rows - 1, row_end);
161+
for (; row < middle_end; ++row)
159162
{
160163
lt_a = Lt.ptr<float>(row - 1);
161164
lf_a = Lf.ptr<float>(row - 1);
@@ -189,8 +192,8 @@ nld_step_scalar_one_lane(const cv::Mat& Lt, const cv::Mat& Lf, cv::Mat& Lstep, i
189192
(lf_c[cols] + lf_a[cols ])*(lt_a[cols ] - lt_c[cols]);
190193
}
191194

192-
// Process the bottom row
193-
if (row == Lt.rows - 1) {
195+
// Process the bottom row (row == Lt.rows - 1)
196+
if (row_end == Lt.rows) {
194197
lt_a = Lt.ptr<float>(row - 1) + 1; /* Skip the left-most column by +1 */
195198
lf_a = Lf.ptr<float>(row - 1) + 1;
196199
lt_c = Lt.ptr<float>(row ) + 1;
@@ -205,6 +208,24 @@ nld_step_scalar_one_lane(const cv::Mat& Lt, const cv::Mat& Lf, cv::Mat& Lstep, i
205208
}
206209
}
207210

211+
class NonLinearScalarDiffusionStep : public ParallelLoopBody
212+
{
213+
public:
214+
NonLinearScalarDiffusionStep(const Mat& Lt, const Mat& Lf, Mat& Lstep)
215+
: Lt_(&Lt), Lf_(&Lf), Lstep_(&Lstep)
216+
{}
217+
218+
void operator()(const Range& range) const
219+
{
220+
nld_step_scalar_one_lane(*Lt_, *Lf_, *Lstep_, range.start, range.end);
221+
}
222+
223+
private:
224+
const Mat* Lt_;
225+
const Mat* Lf_;
226+
Mat* Lstep_;
227+
};
228+
208229
/**
209230
* @brief This method creates the nonlinear scale space for a given image
210231
* @param img Input image for which the nonlinear scale space needs to be created
@@ -288,7 +309,9 @@ int AKAZEFeatures::Create_Nonlinear_Scale_Space(const Mat& img)
288309
// Perform Fast Explicit Diffusion on Lt
289310
std::vector<float> &tsteps = tsteps_[i - 1];
290311
for (size_t j = 0; j < tsteps.size(); j++) {
291-
nld_step_scalar_one_lane(e.Lt, Lflow, Lstep, 0, 1);
312+
// Lstep must be preallocated before this parallel loop
313+
parallel_for_(Range(0, e.Lt.rows), NonLinearScalarDiffusionStep(e.Lt, Lflow, Lstep),
314+
(double)e.Lt.total()/(1 << 16));
292315
const float step_size = tsteps[j];
293316
e.Lt += Lstep * (0.5f * step_size);
294317
}

0 commit comments

Comments
 (0)