Skip to content

Commit c133518

Browse files
committed
do multiplication right in the nlp diffusity kernel
1 parent 554c47e commit c133518

File tree

1 file changed

+25
-19
lines changed

1 file changed

+25
-19
lines changed

modules/features2d/src/kaze/AKAZEFeatures.cpp

Lines changed: 25 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,7 @@ static inline int getGaussianKernelSize(float sigma) {
123123
* dL_by_ds = d(c dL_by_dx)_by_dx + d(c dL_by_dy)_by_dy
124124
*/
125125
static inline void
126-
nld_step_scalar_one_lane(const Mat& Lt, const Mat& Lf, Mat& Lstep, int row_begin, int row_end)
126+
nld_step_scalar_one_lane(const Mat& Lt, const Mat& Lf, Mat& Lstep, float step_size, int row_begin, int row_end)
127127
{
128128
CV_INSTRUMENT_REGION()
129129
/* The labeling scheme for this five star stencil:
@@ -139,6 +139,7 @@ nld_step_scalar_one_lane(const Mat& Lt, const Mat& Lf, Mat& Lstep, int row_begin
139139
const float *lt_a, *lt_c, *lt_b;
140140
const float *lf_a, *lf_c, *lf_b;
141141
float *dst;
142+
float step_r = 0.f;
142143

143144
// Process the top row
144145
if (row == 0) {
@@ -149,9 +150,10 @@ nld_step_scalar_one_lane(const Mat& Lt, const Mat& Lf, Mat& Lstep, int row_begin
149150
dst = Lstep.ptr<float>(0) + 1;
150151

151152
for (int j = 0; j < cols; j++) {
152-
dst[j] = (lf_c[j] + lf_c[j + 1])*(lt_c[j + 1] - lt_c[j]) +
153-
(lf_c[j] + lf_c[j - 1])*(lt_c[j - 1] - lt_c[j]) +
154-
(lf_c[j] + lf_b[j ])*(lt_b[j ] - lt_c[j]);
153+
step_r = (lf_c[j] + lf_c[j + 1])*(lt_c[j + 1] - lt_c[j]) +
154+
(lf_c[j] + lf_c[j - 1])*(lt_c[j - 1] - lt_c[j]) +
155+
(lf_c[j] + lf_b[j ])*(lt_b[j ] - lt_c[j]);
156+
dst[j] = step_r * step_size;
155157
}
156158
++row;
157159
}
@@ -169,9 +171,10 @@ nld_step_scalar_one_lane(const Mat& Lt, const Mat& Lf, Mat& Lstep, int row_begin
169171
dst = Lstep.ptr<float>(row);
170172

171173
// The left-most column
172-
dst[0] = (lf_c[0] + lf_c[1])*(lt_c[1] - lt_c[0]) +
174+
step_r = (lf_c[0] + lf_c[1])*(lt_c[1] - lt_c[0]) +
173175
(lf_c[0] + lf_b[0])*(lt_b[0] - lt_c[0]) +
174176
(lf_c[0] + lf_a[0])*(lt_a[0] - lt_c[0]);
177+
dst[0] = step_r * step_size;
175178

176179
lt_a++; lt_c++; lt_b++;
177180
lf_a++; lf_c++; lf_b++;
@@ -180,16 +183,18 @@ nld_step_scalar_one_lane(const Mat& Lt, const Mat& Lf, Mat& Lstep, int row_begin
180183
// The middle columns
181184
for (int j = 0; j < cols; j++)
182185
{
183-
dst[j] = (lf_c[j] + lf_c[j + 1])*(lt_c[j + 1] - lt_c[j]) +
186+
step_r = (lf_c[j] + lf_c[j + 1])*(lt_c[j + 1] - lt_c[j]) +
184187
(lf_c[j] + lf_c[j - 1])*(lt_c[j - 1] - lt_c[j]) +
185188
(lf_c[j] + lf_b[j ])*(lt_b[j ] - lt_c[j]) +
186189
(lf_c[j] + lf_a[j ])*(lt_a[j ] - lt_c[j]);
190+
dst[j] = step_r * step_size;
187191
}
188192

189193
// The right-most column
190-
dst[cols] = (lf_c[cols] + lf_c[cols - 1])*(lt_c[cols - 1] - lt_c[cols]) +
191-
(lf_c[cols] + lf_b[cols ])*(lt_b[cols ] - lt_c[cols]) +
192-
(lf_c[cols] + lf_a[cols ])*(lt_a[cols ] - lt_c[cols]);
194+
step_r = (lf_c[cols] + lf_c[cols - 1])*(lt_c[cols - 1] - lt_c[cols]) +
195+
(lf_c[cols] + lf_b[cols ])*(lt_b[cols ] - lt_c[cols]) +
196+
(lf_c[cols] + lf_a[cols ])*(lt_a[cols ] - lt_c[cols]);
197+
dst[cols] = step_r * step_size;
193198
}
194199

195200
// Process the bottom row (row == Lt.rows - 1)
@@ -201,29 +206,31 @@ nld_step_scalar_one_lane(const Mat& Lt, const Mat& Lf, Mat& Lstep, int row_begin
201206
dst = Lstep.ptr<float>(row) +1;
202207

203208
for (int j = 0; j < cols; j++) {
204-
dst[j] = (lf_c[j] + lf_c[j + 1])*(lt_c[j + 1] - lt_c[j]) +
205-
(lf_c[j] + lf_c[j - 1])*(lt_c[j - 1] - lt_c[j]) +
206-
(lf_c[j] + lf_a[j ])*(lt_a[j ] - lt_c[j]);
209+
step_r = (lf_c[j] + lf_c[j + 1])*(lt_c[j + 1] - lt_c[j]) +
210+
(lf_c[j] + lf_c[j - 1])*(lt_c[j - 1] - lt_c[j]) +
211+
(lf_c[j] + lf_a[j ])*(lt_a[j ] - lt_c[j]);
212+
dst[j] = step_r * step_size;
207213
}
208214
}
209215
}
210216

211217
class NonLinearScalarDiffusionStep : public ParallelLoopBody
212218
{
213219
public:
214-
NonLinearScalarDiffusionStep(const Mat& Lt, const Mat& Lf, Mat& Lstep)
215-
: Lt_(&Lt), Lf_(&Lf), Lstep_(&Lstep)
220+
NonLinearScalarDiffusionStep(const Mat& Lt, const Mat& Lf, Mat& Lstep, float step_size)
221+
: Lt_(&Lt), Lf_(&Lf), Lstep_(&Lstep), step_size_(step_size)
216222
{}
217223

218224
void operator()(const Range& range) const
219225
{
220-
nld_step_scalar_one_lane(*Lt_, *Lf_, *Lstep_, range.start, range.end);
226+
nld_step_scalar_one_lane(*Lt_, *Lf_, *Lstep_, step_size_, range.start, range.end);
221227
}
222228

223229
private:
224230
const Mat* Lt_;
225231
const Mat* Lf_;
226232
Mat* Lstep_;
233+
float step_size_;
227234
};
228235

229236
/**
@@ -310,10 +317,9 @@ int AKAZEFeatures::Create_Nonlinear_Scale_Space(const Mat& img)
310317
std::vector<float> &tsteps = tsteps_[i - 1];
311318
for (size_t j = 0; j < tsteps.size(); j++) {
312319
// Lstep must be preallocated before this parallel loop
313-
parallel_for_(Range(0, e.Lt.rows), NonLinearScalarDiffusionStep(e.Lt, Lflow, Lstep),
314-
(double)e.Lt.total()/(1 << 16));
315-
const float step_size = tsteps[j];
316-
e.Lt += Lstep * (0.5f * step_size);
320+
const float step_size = tsteps[j] * 0.5f;
321+
parallel_for_(Range(0, e.Lt.rows), NonLinearScalarDiffusionStep(e.Lt, Lflow, Lstep, step_size));
322+
e.Lt += Lstep;
317323
}
318324
}
319325

0 commit comments

Comments
 (0)