@@ -494,37 +494,29 @@ DISOpticalFlowImpl::PatchInverseSearch_ParBody::PatchInverseSearch_ParBody(DISOp
494
494
v_float32x4 w10v = v_setall_f32(w10); \
495
495
v_float32x4 w11v = v_setall_f32(w11); \
496
496
\
497
- v_uint8x16 I0_row_16, I1_row_16, I1_row_shifted_16, I1_row_next_16, I1_row_next_shifted_16; \
498
497
v_uint16x8 I0_row_8, I1_row_8, I1_row_shifted_8, I1_row_next_8, I1_row_next_shifted_8, tmp; \
499
498
v_uint32x4 I0_row_4_left, I1_row_4_left, I1_row_shifted_4_left, I1_row_next_4_left, I1_row_next_shifted_4_left; \
500
499
v_uint32x4 I0_row_4_right, I1_row_4_right, I1_row_shifted_4_right, I1_row_next_4_right, \
501
500
I1_row_next_shifted_4_right; \
502
501
v_float32x4 I_diff_left, I_diff_right; \
503
502
\
504
503
/* Preload and expand the first row of I1: */ \
505
- I1_row_16 = v_load(I1_ptr); \
506
- I1_row_shifted_16 = v_extract<1 >(I1_row_16, I1_row_16); \
507
- v_expand (I1_row_16, I1_row_8, tmp); \
508
- v_expand (I1_row_shifted_16, I1_row_shifted_8, tmp); \
504
+ I1_row_8 = v_load_expand(I1_ptr); \
505
+ I1_row_shifted_8 = v_load_expand(I1_ptr + 1 ); \
509
506
v_expand (I1_row_8, I1_row_4_left, I1_row_4_right); \
510
507
v_expand (I1_row_shifted_8, I1_row_shifted_4_left, I1_row_shifted_4_right); \
511
508
I1_ptr += I1_stride;
512
509
513
510
#define HAL_PROCESS_BILINEAR_8x8_PATCH_EXTRACTION \
514
511
/* Load the next row of I1: */ \
515
- I1_row_next_16 = v_load(I1_ptr); \
516
- /* Circular shift left by 1 element: */ \
517
- I1_row_next_shifted_16 = v_extract<1 >(I1_row_next_16, I1_row_next_16); \
518
- /* Expand to 8 ushorts (we only need the first 8 values): */ \
519
- v_expand (I1_row_next_16, I1_row_next_8, tmp); \
520
- v_expand (I1_row_next_shifted_16, I1_row_next_shifted_8, tmp); \
512
+ I1_row_next_8 = v_load_expand(I1_ptr); \
513
+ I1_row_next_shifted_8 = v_load_expand(I1_ptr + 1 ); \
521
514
/* Separate the left and right halves: */ \
522
515
v_expand (I1_row_next_8, I1_row_next_4_left, I1_row_next_4_right); \
523
516
v_expand (I1_row_next_shifted_8, I1_row_next_shifted_4_left, I1_row_next_shifted_4_right); \
524
517
\
525
518
/* Load current row of I0: */ \
526
- I0_row_16 = v_load(I0_ptr); \
527
- v_expand (I0_row_16, I0_row_8, tmp); \
519
+ I0_row_8 = v_load_expand(I0_ptr); \
528
520
v_expand (I0_row_8, I0_row_4_left, I0_row_4_right); \
529
521
\
530
522
/* Compute diffs between I0 and bilinearly interpolated I1: */ \
0 commit comments