@@ -258,9 +258,9 @@ inline void GetPatch(image2d_t J, float x, float y,
258
258
* b2 = mad (diff , * Dy , * b2 );
259
259
}
260
260
261
- inline void GetError (image2d_t J , const float x , const float y , const float * Pch , float * errval )
261
+ inline void GetError (image2d_t J , const float x , const float y , const float * Pch , float * errval , float w )
262
262
{
263
- float diff = (((read_imagef (J , sampler , (float2 )(x ,y )).x * 16384 ) + 256 ) / 512 ) - (((* Pch * 16384 ) + 256 ) /512 );
263
+ float diff = (((( read_imagef (J , sampler , (float2 )(x ,y )).x * 16384 ) + 256 ) / 512 ) - (((* Pch * 16384 ) + 256 ) /512 )) * w ;
264
264
* errval += fabs (diff );
265
265
}
266
266
@@ -310,10 +310,34 @@ __kernel void lkSparse(image2d_t I, image2d_t J,
310
310
int xsize = get_local_size (0 );
311
311
int ysize = get_local_size (1 );
312
312
int k ;
313
+
314
+ #ifdef CPU
315
+ float wx0 = 1.0f ;
316
+ float wy0 = 1.0f ;
313
317
int xBase = mad24 (xsize , 2 , xid );
314
318
int yBase = mad24 (ysize , 2 , yid );
315
- float wx = (xBase < c_winSize_x ) ? 1 : 0 ;
316
- float wy = (yBase < c_winSize_y ) ? 1 : 0 ;
319
+ float wx1 = (xBase < c_winSize_x ) ? 1 : 0 ;
320
+ float wy1 = (yBase < c_winSize_y ) ? 1 : 0 ;
321
+ #else
322
+ #if WSX == 1
323
+ float wx0 = 1.0f ;
324
+ int xBase = mad24 (xsize , 2 , xid );
325
+ float wx1 = (xBase < c_winSize_x ) ? 1 : 0 ;
326
+ #else
327
+ int xBase = mad24 (xsize , 1 , xid );
328
+ float wx0 = (xBase < c_winSize_x ) ? 1 : 0 ;
329
+ float wx1 = 0.0f ;
330
+ #endif
331
+ #if WSY == 1
332
+ float wy0 = 1.0f ;
333
+ int yBase = mad24 (ysize , 2 , yid );
334
+ float wy1 = (yBase < c_winSize_y ) ? 1 : 0 ;
335
+ #else
336
+ int yBase = mad24 (ysize , 1 , yid );
337
+ float wy0 = (yBase < c_winSize_y ) ? 1 : 0 ;
338
+ float wy1 = 0.0f ;
339
+ #endif
340
+ #endif
317
341
318
342
float2 c_halfWin = (float2 )((c_winSize_x - 1 )>>1 , (c_winSize_y - 1 )>>1 );
319
343
@@ -354,39 +378,39 @@ __kernel void lkSparse(image2d_t I, image2d_t J,
354
378
355
379
SetPatch (IPatchLocal , 0 , 1 ,
356
380
& I_patch [0 ][1 ], & dIdx_patch [0 ][1 ], & dIdy_patch [0 ][1 ],
357
- & A11 , & A12 , & A22 ,1 );
381
+ & A11 , & A12 , & A22 ,wx0 );
358
382
359
383
SetPatch (IPatchLocal , 0 , 2 ,
360
384
& I_patch [0 ][2 ], & dIdx_patch [0 ][2 ], & dIdy_patch [0 ][2 ],
361
- & A11 , & A12 , & A22 ,wx );
385
+ & A11 , & A12 , & A22 ,wx1 );
362
386
}
363
387
{
364
388
SetPatch (IPatchLocal , 1 , 0 ,
365
389
& I_patch [1 ][0 ], & dIdx_patch [1 ][0 ], & dIdy_patch [1 ][0 ],
366
- & A11 , & A12 , & A22 ,1 );
390
+ & A11 , & A12 , & A22 ,wy0 );
367
391
368
392
369
393
SetPatch (IPatchLocal , 1 ,1 ,
370
394
& I_patch [1 ][1 ], & dIdx_patch [1 ][1 ], & dIdy_patch [1 ][1 ],
371
- & A11 , & A12 , & A22 ,1 );
395
+ & A11 , & A12 , & A22 ,wx0 * wy0 );
372
396
373
397
SetPatch (IPatchLocal , 1 ,2 ,
374
398
& I_patch [1 ][2 ], & dIdx_patch [1 ][2 ], & dIdy_patch [1 ][2 ],
375
- & A11 , & A12 , & A22 ,wx );
399
+ & A11 , & A12 , & A22 ,wx1 * wy0 );
376
400
}
377
401
{
378
402
SetPatch (IPatchLocal , 2 ,0 ,
379
403
& I_patch [2 ][0 ], & dIdx_patch [2 ][0 ], & dIdy_patch [2 ][0 ],
380
- & A11 , & A12 , & A22 ,wy );
404
+ & A11 , & A12 , & A22 ,wy1 );
381
405
382
406
383
407
SetPatch (IPatchLocal , 2 ,1 ,
384
408
& I_patch [2 ][1 ], & dIdx_patch [2 ][1 ], & dIdy_patch [2 ][1 ],
385
- & A11 , & A12 , & A22 ,wy );
409
+ & A11 , & A12 , & A22 ,wx0 * wy1 );
386
410
387
411
SetPatch (IPatchLocal , 2 ,2 ,
388
412
& I_patch [2 ][2 ], & dIdx_patch [2 ][2 ], & dIdy_patch [2 ][2 ],
389
- & A11 , & A12 , & A22 ,wx * wy );
413
+ & A11 , & A12 , & A22 ,wx1 * wy1 );
390
414
}
391
415
392
416
@@ -496,24 +520,24 @@ __kernel void lkSparse(image2d_t I, image2d_t J,
496
520
if (calcErr )
497
521
{
498
522
{
499
- GetError (J , loc0 .x , loc0 .y , & I_patch [0 ][0 ], & D );
500
- GetError (J , loc1 .x , loc0 .y , & I_patch [0 ][1 ], & D );
523
+ GetError (J , loc0 .x , loc0 .y , & I_patch [0 ][0 ], & D , 1 );
524
+ GetError (J , loc1 .x , loc0 .y , & I_patch [0 ][1 ], & D , wx0 );
501
525
}
502
526
{
503
- GetError (J , loc0 .x , loc1 .y , & I_patch [1 ][0 ], & D );
504
- GetError (J , loc1 .x , loc1 .y , & I_patch [1 ][1 ], & D );
527
+ GetError (J , loc0 .x , loc1 .y , & I_patch [1 ][0 ], & D , wy0 );
528
+ GetError (J , loc1 .x , loc1 .y , & I_patch [1 ][1 ], & D , wx0 * wy0 );
505
529
}
506
530
if (xBase < c_winSize_x )
507
531
{
508
- GetError (J , loc2 .x , loc0 .y , & I_patch [0 ][2 ], & D );
509
- GetError (J , loc2 .x , loc1 .y , & I_patch [1 ][2 ], & D );
532
+ GetError (J , loc2 .x , loc0 .y , & I_patch [0 ][2 ], & D , wx1 );
533
+ GetError (J , loc2 .x , loc1 .y , & I_patch [1 ][2 ], & D , wx1 * wy0 );
510
534
}
511
535
if (yBase < c_winSize_y )
512
536
{
513
- GetError (J , loc0 .x , loc2 .y , & I_patch [2 ][0 ], & D );
514
- GetError (J , loc1 .x , loc2 .y , & I_patch [2 ][1 ], & D );
537
+ GetError (J , loc0 .x , loc2 .y , & I_patch [2 ][0 ], & D , wy1 );
538
+ GetError (J , loc1 .x , loc2 .y , & I_patch [2 ][1 ], & D , wx0 * wy1 );
515
539
if (xBase < c_winSize_x )
516
- GetError (J , loc2 .x , loc2 .y , & I_patch [2 ][2 ], & D );
540
+ GetError (J , loc2 .x , loc2 .y , & I_patch [2 ][2 ], & D , wx1 * wy1 );
517
541
}
518
542
519
543
reduce1 (D , smem1 , tid );
0 commit comments