Skip to content

Commit fca8463

Browse files
committed
rework diffusity stencil
* use one pass stencil for diffusity from https://github.com/h2suzuki/fast_akaze * improve locality in Create_Scale_Space
1 parent 7f8e9c2 commit fca8463

File tree

2 files changed

+144
-29
lines changed

2 files changed

+144
-29
lines changed

modules/features2d/src/kaze/AKAZEFeatures.cpp

Lines changed: 142 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -62,18 +62,16 @@ void AKAZEFeatures::Allocate_Memory_Evolution(void) {
6262

6363
for (int j = 0; j < options_.nsublevels; j++) {
6464
TEvolution step;
65-
Size size(level_width, level_height);
65+
step.size = Size(level_width, level_height);
6666
// TODO: investigate why these 2 need to be explicitly zero
6767
step.Lx = Mat::zeros(level_height, level_width, CV_32F);
6868
step.Ly = Mat::zeros(level_height, level_width, CV_32F);
6969

70-
step.Lxx.create(size, CV_32F);
71-
step.Lxy.create(size, CV_32F);
72-
step.Lyy.create(size, CV_32F);
73-
step.Lt.create(size, CV_32F);
74-
step.Ldet.create(size, CV_32F);
75-
step.Lflow.create(size, CV_32F);
76-
step.Lstep.create(size, CV_32F);
70+
step.Lxx.create(step.size, CV_32F);
71+
step.Lxy.create(step.size, CV_32F);
72+
step.Lyy.create(step.size, CV_32F);
73+
step.Lt.create(step.size, CV_32F);
74+
step.Ldet.create(step.size, CV_32F);
7775

7876
step.esigma = options_.soffset*pow(2.f, (float)(j) / (float)(options_.nsublevels) + i);
7977
step.sigma_size = fRound(step.esigma * options_.derivative_factor / power); // In fact sigma_size only depends on j
@@ -111,6 +109,102 @@ static inline int getGaussianKernelSize(float sigma) {
111109
return ksize;
112110
}
113111

112+
/* ************************************************************************* */
113+
/**
114+
* @brief This function computes a scalar non-linear diffusion step
115+
* @param Ld Base image in the evolution
116+
* @param c Conductivity image
117+
* @param Lstep Output image that gives the difference between the current
118+
* Ld and the next Ld being evolved
119+
* @note Forward Euler Scheme 3x3 stencil
120+
* The function c is a scalar value that depends on the gradient norm
121+
* dL_by_ds = d(c dL_by_dx)_by_dx + d(c dL_by_dy)_by_dy
122+
*/
123+
static inline void
124+
nld_step_scalar_one_lane(const cv::Mat& Lt, const cv::Mat& Lf, cv::Mat& Lstep, int idx, int skip)
125+
{
126+
CV_INSTRUMENT_REGION()
127+
/* The labeling scheme for this five star stencil:
128+
[ a ]
129+
[ -1 c +1 ]
130+
[ b ]
131+
*/
132+
133+
Lstep.create(Lt.size(), Lt.type());
134+
const int cols = Lt.cols - 2;
135+
int row = idx;
136+
137+
const float *lt_a, *lt_c, *lt_b;
138+
const float *lf_a, *lf_c, *lf_b;
139+
float *dst;
140+
141+
// Process the top row
142+
if (row == 0) {
143+
lt_c = Lt.ptr<float>(0) + 1; /* Skip the left-most column by +1 */
144+
lf_c = Lf.ptr<float>(0) + 1;
145+
lt_b = Lt.ptr<float>(1) + 1;
146+
lf_b = Lf.ptr<float>(1) + 1;
147+
dst = Lstep.ptr<float>(0) + 1;
148+
149+
for (int j = 0; j < cols; j++) {
150+
dst[j] = (lf_c[j] + lf_c[j + 1])*(lt_c[j + 1] - lt_c[j]) +
151+
(lf_c[j] + lf_c[j - 1])*(lt_c[j - 1] - lt_c[j]) +
152+
(lf_c[j] + lf_b[j ])*(lt_b[j ] - lt_c[j]);
153+
}
154+
row += skip;
155+
}
156+
157+
// Process the middle rows
158+
for (; row < Lt.rows - 1; row += skip)
159+
{
160+
lt_a = Lt.ptr<float>(row - 1);
161+
lf_a = Lf.ptr<float>(row - 1);
162+
lt_c = Lt.ptr<float>(row );
163+
lf_c = Lf.ptr<float>(row );
164+
lt_b = Lt.ptr<float>(row + 1);
165+
lf_b = Lf.ptr<float>(row + 1);
166+
dst = Lstep.ptr<float>(row);
167+
168+
// The left-most column
169+
dst[0] = (lf_c[0] + lf_c[1])*(lt_c[1] - lt_c[0]) +
170+
(lf_c[0] + lf_b[0])*(lt_b[0] - lt_c[0]) +
171+
(lf_c[0] + lf_a[0])*(lt_a[0] - lt_c[0]);
172+
173+
lt_a++; lt_c++; lt_b++;
174+
lf_a++; lf_c++; lf_b++;
175+
dst++;
176+
177+
// The middle columns
178+
for (int j = 0; j < cols; j++)
179+
{
180+
dst[j] = (lf_c[j] + lf_c[j + 1])*(lt_c[j + 1] - lt_c[j]) +
181+
(lf_c[j] + lf_c[j - 1])*(lt_c[j - 1] - lt_c[j]) +
182+
(lf_c[j] + lf_b[j ])*(lt_b[j ] - lt_c[j]) +
183+
(lf_c[j] + lf_a[j ])*(lt_a[j ] - lt_c[j]);
184+
}
185+
186+
// The right-most column
187+
dst[cols] = (lf_c[cols] + lf_c[cols - 1])*(lt_c[cols - 1] - lt_c[cols]) +
188+
(lf_c[cols] + lf_b[cols ])*(lt_b[cols ] - lt_c[cols]) +
189+
(lf_c[cols] + lf_a[cols ])*(lt_a[cols ] - lt_c[cols]);
190+
}
191+
192+
// Process the bottom row
193+
if (row == Lt.rows - 1) {
194+
lt_a = Lt.ptr<float>(row - 1) + 1; /* Skip the left-most column by +1 */
195+
lf_a = Lf.ptr<float>(row - 1) + 1;
196+
lt_c = Lt.ptr<float>(row ) + 1;
197+
lf_c = Lf.ptr<float>(row ) + 1;
198+
dst = Lstep.ptr<float>(row) +1;
199+
200+
for (int j = 0; j < cols; j++) {
201+
dst[j] = (lf_c[j] + lf_c[j + 1])*(lt_c[j + 1] - lt_c[j]) +
202+
(lf_c[j] + lf_c[j - 1])*(lt_c[j - 1] - lt_c[j]) +
203+
(lf_c[j] + lf_a[j ])*(lt_a[j ] - lt_c[j]);
204+
}
205+
}
206+
}
207+
114208
/**
115209
* @brief This method creates the nonlinear scale space for a given image
116210
* @param img Input image for which the nonlinear scale space needs to be created
@@ -132,50 +226,71 @@ int AKAZEFeatures::Create_Nonlinear_Scale_Space(const Mat& img)
132226
}
133227

134228
// First compute the kcontrast factor
135-
options_.kcontrast = compute_k_percentile(img, options_.kcontrast_percentile, 1.0f, options_.kcontrast_nbins, 0, 0);
229+
float kcontrast = compute_k_percentile(img, options_.kcontrast_percentile, 1.0f, options_.kcontrast_nbins, 0, 0);
230+
231+
// temporaries for diffusity computation, to reuse the same memory to improve locality
232+
Size base_size = evolution_[0].Lt.size();
233+
// buffers
234+
Mat Lx_buf (base_size, CV_32F);
235+
Mat Ly_buf (base_size, CV_32F);
236+
Mat Lflow_buf (base_size, CV_32F);
237+
Mat Lstep_buf (base_size, CV_32F);
238+
// views pointing to buffers
239+
Mat Lx (base_size, CV_32F, Lx_buf.data);
240+
Mat Ly (base_size, CV_32F, Ly_buf.data);
241+
Mat Lflow (base_size, CV_32F, Lflow_buf.data);
242+
Mat Lstep (base_size, CV_32F, Lstep_buf.data);
136243

137244
// Now generate the rest of evolution levels
138245
for (size_t i = 1; i < evolution_.size(); i++) {
139-
140-
if (evolution_[i].octave > evolution_[i - 1].octave) {
141-
Size half_size = evolution_[i - 1].Lt.size();
142-
half_size.width /= 2;
143-
half_size.height /= 2;
144-
resize(evolution_[i - 1].Lt, evolution_[i].Lt, half_size, 0, 0, INTER_AREA);
145-
options_.kcontrast = options_.kcontrast*0.75f;
246+
TEvolution &e = evolution_[i];
247+
248+
if (e.octave > evolution_[i - 1].octave) {
249+
// new octave will be half the size
250+
resize(evolution_[i - 1].Lt, e.Lt, e.size, 0, 0, INTER_AREA);
251+
kcontrast *= 0.75f;
252+
253+
// resize temporary views to buffers to prevent reallocation
254+
Lx = Mat(e.size, CV_32F, Lx_buf.data);
255+
Ly = Mat(e.size, CV_32F, Ly_buf.data);
256+
Lflow = Mat(e.size, CV_32F, Lflow_buf.data);
257+
Lstep = Mat(e.size, CV_32F, Lstep_buf.data);
146258
}
147259
else {
148-
evolution_[i - 1].Lt.copyTo(evolution_[i].Lt);
260+
evolution_[i - 1].Lt.copyTo(e.Lt);
149261
}
150262

151-
GaussianBlur(evolution_[i].Lt, evolution_[i].Lsmooth, Size(5, 5), 1.0f, 1.0f, BORDER_REPLICATE);
263+
GaussianBlur(e.Lt, e.Lsmooth, Size(5, 5), 1.0f, 1.0f, BORDER_REPLICATE);
152264

153265
// Compute the Gaussian derivatives Lx and Ly
154-
Scharr(evolution_[i].Lsmooth, evolution_[i].Lx, CV_32F, 1, 0, 1.0, 0, BORDER_DEFAULT);
155-
Scharr(evolution_[i].Lsmooth, evolution_[i].Ly, CV_32F, 0, 1, 1.0, 0, BORDER_DEFAULT);
266+
Scharr(e.Lsmooth, Lx, CV_32F, 1, 0, 1.0, 0, BORDER_DEFAULT);
267+
Scharr(e.Lsmooth, Ly, CV_32F, 0, 1, 1.0, 0, BORDER_DEFAULT);
156268

157269
// Compute the conductivity equation
158270
switch (options_.diffusivity) {
159271
case KAZE::DIFF_PM_G1:
160-
pm_g1(evolution_[i].Lx, evolution_[i].Ly, evolution_[i].Lflow, options_.kcontrast);
272+
pm_g1(Lx, Ly, Lflow, kcontrast);
161273
break;
162274
case KAZE::DIFF_PM_G2:
163-
pm_g2(evolution_[i].Lx, evolution_[i].Ly, evolution_[i].Lflow, options_.kcontrast);
275+
pm_g2(Lx, Ly, Lflow, kcontrast);
164276
break;
165277
case KAZE::DIFF_WEICKERT:
166-
weickert_diffusivity(evolution_[i].Lx, evolution_[i].Ly, evolution_[i].Lflow, options_.kcontrast);
278+
weickert_diffusivity(Lx, Ly, Lflow, kcontrast);
167279
break;
168280
case KAZE::DIFF_CHARBONNIER:
169-
charbonnier_diffusivity(evolution_[i].Lx, evolution_[i].Ly, evolution_[i].Lflow, options_.kcontrast);
281+
charbonnier_diffusivity(Lx, Ly, Lflow, kcontrast);
170282
break;
171283
default:
172284
CV_Error(options_.diffusivity, "Diffusivity is not supported");
173285
break;
174286
}
175287

176-
// Perform FED n inner steps
177-
for (int j = 0; j < nsteps_[i - 1]; j++) {
178-
nld_step_scalar(evolution_[i].Lt, evolution_[i].Lflow, evolution_[i].Lstep, tsteps_[i - 1][j]);
288+
// Perform Fast Explicit Diffusion on Lt
289+
std::vector<float> &tsteps = tsteps_[i - 1];
290+
for (size_t j = 0; j < tsteps.size(); j++) {
291+
nld_step_scalar_one_lane(e.Lt, Lflow, Lstep, 0, 1);
292+
const float step_size = tsteps[j];
293+
e.Lt += Lstep * (0.5f * step_size);
179294
}
180295
}
181296

modules/features2d/src/kaze/TEvolution.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,8 @@ struct TEvolution
2828
Mat Lt; ///< Evolution image
2929
Mat Lsmooth; ///< Smoothed image
3030
Mat Ldet; ///< Detector response
31-
Mat Lflow;
32-
Mat Lstep;
31+
32+
Size size; ///< Size of the layer
3333
float etime; ///< Evolution time
3434
float esigma; ///< Evolution sigma. For linear diffusion t = sigma^2 / 2
3535
int octave; ///< Image octave

0 commit comments

Comments
 (0)