Skip to content

Fix bounds check for FAST Nonmax #3357

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions src/backend/cpu/kernel/fast.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,10 @@ void non_maximal(CParam<float> score, CParam<float> x_in, CParam<float> y_in,
unsigned x = static_cast<unsigned>(round(x_in_ptr[k]));
unsigned y = static_cast<unsigned>(round(y_in_ptr[k]));

if (y >= score_dims[0] - edge - 1 || y <= edge + 1 ||
x >= score_dims[1] - edge - 1 || x <= edge + 1)
continue;

float v = score_ptr[y + score_dims[0] * x];
float max_v;
max_v = std::max(score_ptr[y - 1 + score_dims[0] * (x - 1)],
Expand All @@ -193,10 +197,6 @@ void non_maximal(CParam<float> score, CParam<float> x_in, CParam<float> y_in,
max_v = std::max(max_v, score_ptr[y + 1 + score_dims[0] * (x)]);
max_v = std::max(max_v, score_ptr[y + 1 + score_dims[0] * (x + 1)]);

if (y >= score_dims[1] - edge - 1 || y <= edge + 1 ||
x >= score_dims[0] - edge - 1 || x <= edge + 1)
continue;

// Stores keypoint to feat_out if it's response is maximum compared to
// its 8-neighborhood
if (v > max_v) {
Expand Down
23 changes: 12 additions & 11 deletions src/backend/cpu/kernel/susan.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,20 +62,21 @@ void non_maximal(Param<float> xcoords, Param<float> ycoords,
// Responses on the border don't have 8-neighbors to compare, discard them
const unsigned r = border_len + 1;

for (dim_t y = r; y < idim1 - r; y++) {
for (dim_t x = r; x < idim0 - r; x++) {
const T v = resp_in[y * idim0 + x];
for (dim_t y = r; y < idim0 - r; y++) {
for (dim_t x = r; x < idim1 - r; x++) {
const T v = resp_in[x * idim0 + y];

// Find maximum neighborhood response
T max_v;
max_v = std::max(resp_in[(y - 1) * idim0 + x - 1],
resp_in[y * idim0 + x - 1]);
max_v = std::max(max_v, resp_in[(y + 1) * idim0 + x - 1]);
max_v = std::max(max_v, resp_in[(y - 1) * idim0 + x]);
max_v = std::max(max_v, resp_in[(y + 1) * idim0 + x]);
max_v = std::max(max_v, resp_in[(y - 1) * idim0 + x + 1]);
max_v = std::max(max_v, resp_in[(y)*idim0 + x + 1]);
max_v = std::max(max_v, resp_in[(y + 1) * idim0 + x + 1]);
max_v = std::max(resp_in[(x - 1) * idim0 + (y - 1)],
resp_in[(x - 1) * idim0 + y]);
max_v = std::max(max_v, resp_in[(x - 1) * idim0 + (y + 1)]);
max_v = std::max(max_v, resp_in[x * idim0 + (y - 1)]);
max_v = std::max(max_v, resp_in[x * idim0 + (y + 1)]);
max_v = std::max(max_v, resp_in[(x + 1) * idim0 + (y - 1)]);

max_v = std::max(max_v, resp_in[(x + 1) * idim0 + y]);
max_v = std::max(max_v, resp_in[(x + 1) * idim0 + (y + 1)]);

// Stores corner to {x,y,resp}_out if it's response is maximum
// compared to its 8-neighborhood and greater or equal minimum
Expand Down
28 changes: 14 additions & 14 deletions src/backend/cuda/kernel/susan.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ __global__ void susan(T* out, const T* in, const unsigned idim0,
const unsigned gy = blockDim.y * blockIdx.y + ly + edge;

const unsigned nucleusIdx = (ly + radius) * shrdLen + lx + radius;
shrdMem[nucleusIdx] = gx < idim0 && gy < idim1 ? in[gy * idim0 + gx] : 0;
shrdMem[nucleusIdx] = gx < idim1 && gy < idim0 ? in[gx * idim0 + gy] : 0;
T m_0 = shrdMem[nucleusIdx];

#pragma unroll
Expand All @@ -54,13 +54,13 @@ __global__ void susan(T* out, const T* in, const unsigned idim0,
for (int a = lx, gx2 = gx; a < shrdLen; a += BLOCK_X, gx2 += BLOCK_X) {
int i = gx2 - radius;
shrdMem[b * shrdLen + a] =
(i < idim0 && j < idim1 ? in[j * idim0 + i] : m_0);
(i < idim1 && j < idim0 ? in[i * idim0 + j] : m_0);
}
}
__syncthreads();

if (gx < idim0 - edge && gy < idim1 - edge) {
unsigned idx = gy * idim0 + gx;
if (gx < idim1 - edge && gy < idim0 - edge) {
unsigned idx = gx * idim0 + gy;
float nM = 0.0f;
#pragma unroll
for (int p = 0; p < windLen; ++p) {
Expand Down Expand Up @@ -94,19 +94,19 @@ __global__ void nonMax(float* x_out, float* y_out, float* resp_out,
const unsigned gx = blockDim.x * blockIdx.x + threadIdx.x + r;
const unsigned gy = blockDim.y * blockIdx.y + threadIdx.y + r;

if (gx < idim0 - r && gy < idim1 - r) {
const T v = resp_in[gy * idim0 + gx];
if (gx < idim1 - r && gy < idim0 - r) {
const T v = resp_in[gx * idim0 + gy];

// Find maximum neighborhood response
T max_v;
max_v = max_val(resp_in[(gy - 1) * idim0 + gx - 1],
resp_in[gy * idim0 + gx - 1]);
max_v = max_val(max_v, resp_in[(gy + 1) * idim0 + gx - 1]);
max_v = max_val(max_v, resp_in[(gy - 1) * idim0 + gx]);
max_v = max_val(max_v, resp_in[(gy + 1) * idim0 + gx]);
max_v = max_val(max_v, resp_in[(gy - 1) * idim0 + gx + 1]);
max_v = max_val(max_v, resp_in[(gy)*idim0 + gx + 1]);
max_v = max_val(max_v, resp_in[(gy + 1) * idim0 + gx + 1]);
max_v = max_val(resp_in[(gx - 1) * idim0 + (gy - 1)],
resp_in[(gx - 1) * idim0 + gy]);
max_v = max_val(max_v, resp_in[(gx - 1) * idim0 + (gy + 1)]);
max_v = max_val(max_v, resp_in[gx * idim0 + (gy - 1)]);
max_v = max_val(max_v, resp_in[gx * idim0 + (gy + 1)]);
max_v = max_val(max_v, resp_in[(gx + 1) * idim0 + (gy - 1)]);
max_v = max_val(max_v, resp_in[(gx + 1) * idim0 + gy]);
max_v = max_val(max_v, resp_in[(gx + 1) * idim0 + (gy + 1)]);

// Stores corner to {x,y,resp}_out if it's response is maximum compared
// to its 8-neighborhood and greater or equal minimum response
Expand Down
8 changes: 4 additions & 4 deletions src/backend/cuda/kernel/susan.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,8 @@ void susan_responses(T* out, const T* in, const unsigned idim0,
{{DefineValue(BLOCK_X), DefineValue(BLOCK_Y)}});

dim3 threads(BLOCK_X, BLOCK_Y);
dim3 blocks(divup(idim0 - edge * 2, BLOCK_X),
divup(idim1 - edge * 2, BLOCK_Y));
dim3 blocks(divup(idim1 - edge * 2, BLOCK_X),
divup(idim0 - edge * 2, BLOCK_Y));
const size_t SMEM_SIZE =
(BLOCK_X + 2 * radius) * (BLOCK_Y + 2 * radius) * sizeof(T);

Expand All @@ -52,8 +52,8 @@ void nonMaximal(float* x_out, float* y_out, float* resp_out, unsigned* count,
TemplateArgs(TemplateTypename<T>()));

dim3 threads(BLOCK_X, BLOCK_Y);
dim3 blocks(divup(idim0 - edge * 2, BLOCK_X),
divup(idim1 - edge * 2, BLOCK_Y));
dim3 blocks(divup(idim1 - edge * 2, BLOCK_X),
divup(idim0 - edge * 2, BLOCK_Y));

auto d_corners_found = memAlloc<unsigned>(1);
CUDA_CHECK(cudaMemsetAsync(d_corners_found.get(), 0, sizeof(unsigned),
Expand Down
4 changes: 2 additions & 2 deletions src/backend/opencl/kernel/harris.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -144,9 +144,9 @@ void harris(unsigned *corners_out, Param &x_out, Param &y_out, Param &resp_out,

// Harris responses kernel sizes
unsigned blk_x_hr =
divup(in.info.dims[0] - border_len * 2, HARRIS_THREADS_X);
divup(in.info.dims[1] - border_len * 2, HARRIS_THREADS_X);
unsigned blk_y_hr =
divup(in.info.dims[1] - border_len * 2, HARRIS_THREADS_Y);
divup(in.info.dims[0] - border_len * 2, HARRIS_THREADS_Y);
const NDRange local_hr(HARRIS_THREADS_X, HARRIS_THREADS_Y);
const NDRange global_hr(blk_x_hr * HARRIS_THREADS_X,
blk_y_hr * HARRIS_THREADS_Y);
Expand Down
32 changes: 16 additions & 16 deletions src/backend/opencl/kernel/susan.cl
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@ kernel void susan_responses(global T* out, global const T* in_,
const unsigned gy = get_global_id(1) + edge;

const unsigned nucleusIdx = (ly + RADIUS) * shrdLen + lx + RADIUS;
if (gx < idim0 && gy < idim1)
localMem[nucleusIdx] = in[gy * idim0 + gx];
if (gx < idim1 && gy < idim0)
localMem[nucleusIdx] = in[gx * idim0 + gy];
else
localMem[nucleusIdx] = 0;
T m_0 = localMem[nucleusIdx];
Expand All @@ -39,16 +39,16 @@ kernel void susan_responses(global T* out, global const T* in_,
#pragma unroll
for (int a = lx, gx2 = gx; a < shrdLen; a += BLOCK_X, gx2 += BLOCK_X) {
int i = gx2 - RADIUS;
if (i < idim0 && j < idim1)
localMem[b * shrdLen + a] = in[i + idim0 * j];
if (i < idim1 && j < idim0)
localMem[b * shrdLen + a] = in[j + idim0 * i];
else
localMem[b * shrdLen + a] = m_0;
}
}
barrier(CLK_LOCAL_MEM_FENCE);

if (gx < idim0 - edge && gy < idim1 - edge) {
unsigned idx = gx + idim0 * gy;
if (gx < idim1 - edge && gy < idim0 - edge) {
unsigned idx = gy + idim0 * gx;
float nM = 0.0f;
#pragma unroll
for (int p = 0; p < windLen; ++p) {
Expand Down Expand Up @@ -84,19 +84,19 @@ kernel void non_maximal(global float* x_out, global float* y_out,
const unsigned gx = get_global_id(0) + r;
const unsigned gy = get_global_id(1) + r;

if (gx < idim0 - r && gy < idim1 - r) {
const T v = resp_in[gy * idim0 + gx];
if (gx < idim1 - r && gy < idim0 - r) {
const T v = resp_in[gx * idim0 + gy];

// Find maximum neighborhood response
T max_v;
max_v = MAX_VAL(resp_in[(gy - 1) * idim0 + gx - 1],
resp_in[gy * idim0 + gx - 1]);
max_v = MAX_VAL(max_v, resp_in[(gy + 1) * idim0 + gx - 1]);
max_v = MAX_VAL(max_v, resp_in[(gy - 1) * idim0 + gx]);
max_v = MAX_VAL(max_v, resp_in[(gy + 1) * idim0 + gx]);
max_v = MAX_VAL(max_v, resp_in[(gy - 1) * idim0 + gx + 1]);
max_v = MAX_VAL(max_v, resp_in[(gy)*idim0 + gx + 1]);
max_v = MAX_VAL(max_v, resp_in[(gy + 1) * idim0 + gx + 1]);
max_v = MAX_VAL(resp_in[(gx - 1) * idim0 + (gy - 1)],
resp_in[(gx - 1) * idim0 + gy]);
max_v = MAX_VAL(max_v, resp_in[(gx - 1) * idim0 + (gy + 1)]);
max_v = MAX_VAL(max_v, resp_in[gx * idim0 + (gy - 1)]);
max_v = MAX_VAL(max_v, resp_in[gx * idim0 + (gy + 1)]);
max_v = MAX_VAL(max_v, resp_in[(gx + 1) * idim0 + (gy - 1)]);
max_v = MAX_VAL(max_v, resp_in[(gx + 1) * idim0 + gy]);
max_v = MAX_VAL(max_v, resp_in[(gx + 1) * idim0 + (gy + 1)]);

// Stores corner to {x,y,resp}_out if it's response is maximum compared
// to its 8-neighborhood and greater or equal minimum response
Expand Down
8 changes: 4 additions & 4 deletions src/backend/opencl/kernel/susan.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,8 @@ void susan(cl::Buffer* out, const cl::Buffer* in, const unsigned in_off,
compileOpts);

cl::NDRange local(SUSAN_THREADS_X, SUSAN_THREADS_Y);
cl::NDRange global(divup(idim0 - 2 * edge, local[0]) * local[0],
divup(idim1 - 2 * edge, local[1]) * local[1]);
cl::NDRange global(divup(idim1 - 2 * edge, local[0]) * local[0],
divup(idim0 - 2 * edge, local[1]) * local[1]);

susan(cl::EnqueueArgs(getQueue(), global, local), *out, *in, in_off, idim0,
idim1, t, g, edge);
Expand Down Expand Up @@ -84,8 +84,8 @@ unsigned nonMaximal(cl::Buffer* x_out, cl::Buffer* y_out, cl::Buffer* resp_out,
sizeof(unsigned));

cl::NDRange local(SUSAN_THREADS_X, SUSAN_THREADS_Y);
cl::NDRange global(divup(idim0 - 2 * edge, local[0]) * local[0],
divup(idim1 - 2 * edge, local[1]) * local[1]);
cl::NDRange global(divup(idim1 - 2 * edge, local[0]) * local[0],
divup(idim0 - 2 * edge, local[1]) * local[1]);

nonMax(cl::EnqueueArgs(getQueue(), global, local), *x_out, *y_out,
*resp_out, *d_corners_found, idim0, idim1, *resp_in, edge,
Expand Down