Skip to content

Commit ad1413f

Browse files
committed
Fixes sub-array (cpu, cuda, opencl, oneapi) support for orb
1 parent 2e72a4d commit ad1413f

File tree

5 files changed

+101
-34
lines changed

5 files changed

+101
-34
lines changed

src/backend/cpu/kernel/orb.hpp

Lines changed: 19 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -119,8 +119,9 @@ void harris_response(float* x_out, float* y_out, float* score_out,
119119
unsigned* usable_feat, CParam<T> image,
120120
const unsigned block_size, const float k_thr,
121121
const unsigned patch_size) {
122-
const af::dim4 idims = image.dims();
123-
const T* image_ptr = image.get();
122+
const af::dim4 idims = image.dims();
123+
const af::dim4 istrides = image.strides();
124+
const T* image_ptr = image.get();
124125
for (unsigned f = 0; f < total_feat; f++) {
125126
unsigned x, y;
126127
float scl = 1.f;
@@ -154,10 +155,12 @@ void harris_response(float* x_out, float* y_out, float* score_out,
154155
int j = k % block_size - r;
155156

156157
// Calculate local x and y derivatives
157-
float ix = image_ptr[(x + i + 1) * idims[0] + y + j] -
158-
image_ptr[(x + i - 1) * idims[0] + y + j];
159-
float iy = image_ptr[(x + i) * idims[0] + y + j + 1] -
160-
image_ptr[(x + i) * idims[0] + y + j - 1];
158+
float ix =
159+
image_ptr[(x + i + 1) * istrides[1] + (y + j) * istrides[0]] -
160+
image_ptr[(x + i - 1) * istrides[1] + (y + j) * istrides[0]];
161+
float iy =
162+
image_ptr[(x + i) * istrides[1] + (y + j + 1) * istrides[0]] -
163+
image_ptr[(x + i) * istrides[1] + (y + j - 1) * istrides[0]];
161164

162165
// Accumulate second order derivatives
163166
ixx += ix * ix;
@@ -189,8 +192,9 @@ template<typename T>
189192
void centroid_angle(const float* x_in, const float* y_in,
190193
float* orientation_out, const unsigned total_feat,
191194
CParam<T> image, const unsigned patch_size) {
192-
const af::dim4 idims = image.dims();
193-
const T* image_ptr = image.get();
195+
const af::dim4 idims = image.dims();
196+
const af::dim4 istrides = image.strides();
197+
const T* image_ptr = image.get();
194198
for (unsigned f = 0; f < total_feat; f++) {
195199
unsigned x = (unsigned)round(x_in[f]);
196200
unsigned y = (unsigned)round(y_in[f]);
@@ -205,7 +209,7 @@ void centroid_angle(const float* x_in, const float* y_in,
205209
int j = k % patch_size - r;
206210

207211
// Calculate first order moments
208-
T p = image_ptr[(x + i) * idims[0] + y + j];
212+
T p = image_ptr[(x + i) * istrides[1] + (y + j) * istrides[0]];
209213
m01 += j * p;
210214
m10 += i * p;
211215
}
@@ -219,17 +223,17 @@ template<typename T>
219223
inline T get_pixel(unsigned x, unsigned y, const float ori, const unsigned size,
220224
const int dist_x, const int dist_y, CParam<T> image,
221225
const unsigned patch_size) {
222-
const af::dim4 idims = image.dims();
223-
const T* image_ptr = image.get();
224-
float ori_sin = sin(ori);
225-
float ori_cos = cos(ori);
226-
float patch_scl = (float)size / (float)patch_size;
226+
const af::dim4 istrides = image.strides();
227+
const T* image_ptr = image.get();
228+
float ori_sin = sin(ori);
229+
float ori_cos = cos(ori);
230+
float patch_scl = (float)size / (float)patch_size;
227231

228232
// Calculate point coordinates based on orientation and size
229233
x += round(dist_x * patch_scl * ori_cos - dist_y * patch_scl * ori_sin);
230234
y += round(dist_x * patch_scl * ori_sin + dist_y * patch_scl * ori_cos);
231235

232-
return image_ptr[x * idims[0] + y];
236+
return image_ptr[x * istrides[1] + y * istrides[0]];
233237
}
234238

235239
template<typename T>

src/backend/cuda/kernel/orb.hpp

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -125,10 +125,14 @@ __global__ void harris_response(float* score_out, float* size_out,
125125
int j = k % block_size - r;
126126

127127
// Calculate local x and y derivatives
128-
float ix = image.ptr[(x + i + 1) * image.dims[0] + y + j] -
129-
image.ptr[(x + i - 1) * image.dims[0] + y + j];
130-
float iy = image.ptr[(x + i) * image.dims[0] + y + j + 1] -
131-
image.ptr[(x + i) * image.dims[0] + y + j - 1];
128+
float ix = image.ptr[(x + i + 1) * image.strides[1] +
129+
(y + j) * image.strides[0]] -
130+
image.ptr[(x + i - 1) * image.strides[1] +
131+
(y + j) * image.strides[0]];
132+
float iy = image.ptr[(x + i) * image.strides[1] +
133+
(y + j + 1) * image.strides[0]] -
134+
image.ptr[(x + i) * image.strides[1] +
135+
(y + j - 1) * image.strides[0]];
132136

133137
// Accumulate second order derivatives
134138
ixx += ix * ix;
@@ -181,7 +185,8 @@ __global__ void centroid_angle(const float* x_in, const float* y_in,
181185
int j = k % patch_size - r;
182186

183187
// Calculate first order moments
184-
T p = image.ptr[(x + i) * image.dims[0] + y + j];
188+
T p = image.ptr[(x + i) * image.strides[1] +
189+
(y + j) * image.strides[0]];
185190
m01 += j * p;
186191
m10 += i * p;
187192
}
@@ -209,7 +214,7 @@ inline __device__ T get_pixel(unsigned x, unsigned y, const float ori,
209214
x += round(dist_x * patch_scl * ori_cos - dist_y * patch_scl * ori_sin);
210215
y += round(dist_x * patch_scl * ori_sin + dist_y * patch_scl * ori_cos);
211216

212-
return image.ptr[x * image.dims[0] + y];
217+
return image.ptr[x * image.strides[1] + y * image.strides[0]];
213218
}
214219

215220
inline __device__ int lookup(const int n, cudaTextureObject_t tex) {

src/backend/opencl/kernel/orb.cl

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,7 @@ kernel void harris_response(
128128
local float data[BLOCK_SIZE * BLOCK_SIZE];
129129

130130
unsigned f = get_global_id(0);
131+
image += iInfo.offset;
131132

132133
unsigned x, y;
133134
float ixx = 0.f, iyy = 0.f, ixy = 0.f;
@@ -155,10 +156,10 @@ kernel void harris_response(
155156
int j = k % block_size - r;
156157

157158
// Calculate local x and y derivatives
158-
float ix = image[(x + i + 1) * iInfo.dims[0] + y + j] -
159-
image[(x + i - 1) * iInfo.dims[0] + y + j];
160-
float iy = image[(x + i) * iInfo.dims[0] + y + j + 1] -
161-
image[(x + i) * iInfo.dims[0] + y + j - 1];
159+
float ix = image[(x + i + 1) * iInfo.strides[1] + (y + j) * iInfo.strides[0]] -
160+
image[(x + i - 1) * iInfo.strides[1] + (y + j) * iInfo.strides[0]] ;
161+
float iy = image[(x + i) * iInfo.strides[1] + (y + j + 1) * iInfo.strides[0]] -
162+
image[(x + i) * iInfo.strides[1] + (y + j - 1) * iInfo.strides[0]];
162163

163164
// Accumulate second order derivatives
164165
ixx += ix * ix;
@@ -219,7 +220,7 @@ kernel void centroid_angle(global const float* x_in,
219220
int j = k % patch_size - r;
220221

221222
// Calculate first order moments
222-
T p = image[(x + i) * iInfo.dims[0] + y + j];
223+
T p = image[(x + i) * iInfo.strides[1] + (y + j) * iInfo.strides[0] + iInfo.offset];
223224
m01 += j * p;
224225
m10 += i * p;
225226
}
@@ -246,7 +247,7 @@ inline T get_pixel(unsigned x, unsigned y, const float ori, const unsigned size,
246247
x += round(dist_x * patch_scl * ori_cos - dist_y * patch_scl * ori_sin);
247248
y += round(dist_x * patch_scl * ori_sin + dist_y * patch_scl * ori_cos);
248249

249-
return image[x * iInfo.dims[0] + y];
250+
return image[x * iInfo.strides[1] + y * iInfo.strides[0] + iInfo.offset];
250251
}
251252

252253
kernel void extract_orb(global unsigned* desc_out, const unsigned n_feat,

src/backend/opencl/kernel/orb.hpp

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -327,13 +327,14 @@ void orb(unsigned* out_feat, Param& x_out, Param& y_out, Param& score_out,
327327
Param lvl_tmp;
328328

329329
if (blur_img) {
330-
lvl_filt = lvl_img;
331-
lvl_tmp = lvl_img;
332-
333-
lvl_filt.data = bufferAlloc(lvl_filt.info.dims[0] *
334-
lvl_filt.info.dims[1] * sizeof(T));
335-
lvl_tmp.data = bufferAlloc(lvl_tmp.info.dims[0] *
336-
lvl_tmp.info.dims[1] * sizeof(T));
330+
const dim_t pixels = lvl_img.info.dims[0] * lvl_img.info.dims[1];
331+
lvl_filt.info = {{lvl_img.info.dims[0], lvl_img.info.dims[1], 1, 1},
332+
{1, lvl_img.info.dims[0], pixels, pixels},
333+
0};
334+
lvl_filt.data = bufferAlloc(pixels * sizeof(T));
335+
336+
lvl_tmp.info = lvl_filt.info;
337+
lvl_tmp.data = bufferAlloc(pixels * sizeof(T));
337338

338339
// Calculate a separable Gaussian kernel
339340
if (h_gauss == nullptr) {

test/orb.cpp

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -326,3 +326,59 @@ TEST(ORB, CPP) {
326326
delete[] outSize;
327327
delete[] outDesc;
328328
}
329+
330+
#define TEST_TEMP_FORMATS(form) \
331+
TEST(TEMP_FORMAT, form) { \
332+
UNSUPPORTED_BACKEND(AF_BACKEND_ONEAPI); \
333+
IMAGEIO_ENABLED_CHECK(); \
334+
\
335+
constexpr size_t MAX_FEATURES = 400; \
336+
\
337+
vector<dim4> inDims; \
338+
vector<string> inFiles; \
339+
vector<vector<float>> goldFeat; \
340+
vector<vector<unsigned>> goldDesc; \
341+
\
342+
readImageFeaturesDescriptors<unsigned>( \
343+
string(TEST_DIR "/orb/square.test"), inDims, inFiles, goldFeat, \
344+
goldDesc); \
345+
inFiles[0].insert(0, string(TEST_DIR "/orb/")); \
346+
array in = loadImage(inFiles[0].c_str(), false); \
347+
\
348+
features feat; \
349+
array desc; \
350+
orb(feat, desc, toTempFormat(form, in), 20.0f, MAX_FEATURES, 1.2f, 8, \
351+
true); \
352+
features gfeat; \
353+
array gdesc; \
354+
orb(gfeat, gdesc, in, 20.0f, MAX_FEATURES, 1.2f, 8, true); \
355+
\
356+
/* The clipping of the features is dependent on threads runtime, so \
357+
* capture them all. */ \
358+
ASSERT_GT(MAX_FEATURES, feat.getNumFeatures()) \
359+
<< "Please increase MAX_FEATURES to capture all features"; \
360+
\
361+
/* The results from orb are dependent on threads runtime, so sort \
362+
* by very simple hash on all columns of feat before comparing */ \
363+
array score = (feat.getX() * inDims[0].dims[1] + feat.getY()) * \
364+
feat.getScore() * feat.getOrientation() * \
365+
feat.getSize(); \
366+
array idx, score_sorted; \
367+
sort(score_sorted, idx, score); \
368+
\
369+
array gscore = (gfeat.getX() * inDims[0].dims[1] + gfeat.getY()) * \
370+
gfeat.getScore() * gfeat.getOrientation() * \
371+
gfeat.getSize(); \
372+
array gidx, gscore_sorted; \
373+
sort(gscore_sorted, gidx, gscore); \
374+
\
375+
EXPECT_ARRAYS_EQ(feat.getX()(idx), gfeat.getX()(gidx)); \
376+
EXPECT_ARRAYS_EQ(feat.getY()(idx), gfeat.getY()(gidx)); \
377+
EXPECT_ARRAYS_EQ(feat.getScore()(idx), gfeat.getScore()(gidx)); \
378+
EXPECT_ARRAYS_EQ(feat.getOrientation()(idx), \
379+
gfeat.getOrientation()(gidx)); \
380+
EXPECT_ARRAYS_EQ(feat.getSize()(idx), gfeat.getSize()(gidx)); \
381+
EXPECT_ARRAYS_EQ(desc(af::span, idx), gdesc(af::span, gidx)); \
382+
}
383+
384+
FOREACH_TEMP_FORMAT(TEST_TEMP_FORMATS)

0 commit comments

Comments
 (0)