@@ -443,104 +443,6 @@ void hlineResizeCn<uint8_t, ufixedpoint16, 2, true, 2>(uint8_t* src, int, int *o
443
443
}
444
444
}
445
445
template <>
446
- void hlineResizeCn<uint8_t , ufixedpoint16, 2 , true , 3 >(uint8_t * src, int , int *ofst, ufixedpoint16* m, ufixedpoint16* dst, int dst_min, int dst_max, int dst_width)
447
- {
448
- int i = 0 ;
449
- ufixedpoint16 srccn[3 ] = { src[0 ], src[1 ], src[2 ] };
450
- v_uint16x8 v_srccn0 = v_setall_u16 (((uint16_t *)srccn)[0 ]);
451
- v_uint16x8 v_srccn1 = v_setall_u16 (((uint16_t *)srccn)[1 ]);
452
- v_uint16x8 v_srccn2 = v_setall_u16 (((uint16_t *)srccn)[2 ]);
453
- for (; i < dst_min - 7 ; i += 8 , m += 16 , dst += 24 ) // Points that fall left from src image so became equal to leftmost src point
454
- {
455
- v_store_interleave ((uint16_t *)dst, v_srccn0, v_srccn1, v_srccn2);
456
- }
457
- for (; i < dst_min; i++, m += 2 )
458
- {
459
- *(dst++) = srccn[0 ];
460
- *(dst++) = srccn[1 ];
461
- *(dst++) = srccn[2 ];
462
- }
463
- for (; i < dst_max - 7 && ofst[i + 7 ] + 15 <= ofst[dst_width - 1 ]; i += 8 , m += 16 , dst += 24 )
464
- {
465
- v_uint8x16 v_src0_c1, v_src0_c2, v_src0_c3;
466
- v_load_deinterleave (src + 3 * ofst[i ], v_src0_c1, v_src0_c2, v_src0_c3);
467
- v_uint8x16 v_src1_c1, v_src1_c2, v_src1_c3;
468
- v_load_deinterleave (src + 3 * ofst[i + 1 ], v_src1_c1, v_src1_c2, v_src1_c3);
469
- v_uint8x16 v_src2_c1, v_src2_c2, v_src2_c3;
470
- v_load_deinterleave (src + 3 * ofst[i + 2 ], v_src2_c1, v_src2_c2, v_src2_c3);
471
- v_uint8x16 v_src3_c1, v_src3_c2, v_src3_c3;
472
- v_load_deinterleave (src + 3 * ofst[i + 3 ], v_src3_c1, v_src3_c2, v_src3_c3);
473
-
474
- v_uint16x8 v_tmp0, v_tmp1, v_tmp2;
475
- v_uint16x8 v_src0123_c1, v_src0123_c2, v_src0123_c3;
476
- v_zip (v_reinterpret_as_u16 (v_src0_c1), v_reinterpret_as_u16 (v_src2_c1), v_tmp0, v_tmp1);
477
- v_zip (v_reinterpret_as_u16 (v_src1_c1), v_reinterpret_as_u16 (v_src3_c1), v_tmp1, v_tmp2);
478
- v_zip (v_tmp0, v_tmp1, v_src0123_c1, v_tmp2);
479
- v_zip (v_reinterpret_as_u16 (v_src0_c2), v_reinterpret_as_u16 (v_src2_c2), v_tmp0, v_tmp1);
480
- v_zip (v_reinterpret_as_u16 (v_src1_c2), v_reinterpret_as_u16 (v_src3_c2), v_tmp1, v_tmp2);
481
- v_zip (v_tmp0, v_tmp1, v_src0123_c2, v_tmp2);
482
- v_zip (v_reinterpret_as_u16 (v_src0_c3), v_reinterpret_as_u16 (v_src2_c3), v_tmp0, v_tmp1);
483
- v_zip (v_reinterpret_as_u16 (v_src1_c3), v_reinterpret_as_u16 (v_src3_c3), v_tmp1, v_tmp2);
484
- v_zip (v_tmp0, v_tmp1, v_src0123_c3, v_tmp2);
485
-
486
- v_load_deinterleave (src + 3 * ofst[i + 4 ], v_src0_c1, v_src0_c2, v_src0_c3);
487
- v_load_deinterleave (src + 3 * ofst[i + 5 ], v_src1_c1, v_src1_c2, v_src1_c3);
488
- v_load_deinterleave (src + 3 * ofst[i + 6 ], v_src2_c1, v_src2_c2, v_src2_c3);
489
- v_load_deinterleave (src + 3 * ofst[i + 7 ], v_src3_c1, v_src3_c2, v_src3_c3);
490
-
491
- v_uint16x8 v_src4567_c1, v_src4567_c2, v_src4567_c3;
492
- v_zip (v_reinterpret_as_u16 (v_src0_c1), v_reinterpret_as_u16 (v_src2_c1), v_tmp0, v_tmp1);
493
- v_zip (v_reinterpret_as_u16 (v_src1_c1), v_reinterpret_as_u16 (v_src3_c1), v_tmp1, v_tmp2);
494
- v_zip (v_tmp0, v_tmp1, v_src4567_c1, v_tmp2);
495
- v_zip (v_reinterpret_as_u16 (v_src0_c2), v_reinterpret_as_u16 (v_src2_c2), v_tmp0, v_tmp1);
496
- v_zip (v_reinterpret_as_u16 (v_src1_c2), v_reinterpret_as_u16 (v_src3_c2), v_tmp1, v_tmp2);
497
- v_zip (v_tmp0, v_tmp1, v_src4567_c2, v_tmp2);
498
- v_zip (v_reinterpret_as_u16 (v_src0_c3), v_reinterpret_as_u16 (v_src2_c3), v_tmp0, v_tmp1);
499
- v_zip (v_reinterpret_as_u16 (v_src1_c3), v_reinterpret_as_u16 (v_src3_c3), v_tmp1, v_tmp2);
500
- v_zip (v_tmp0, v_tmp1, v_src4567_c3, v_tmp2);
501
-
502
- v_expand (v_reinterpret_as_u8 (v_combine_low (v_src0123_c1, v_src4567_c1)),
503
- v_src0123_c1, v_src4567_c1
504
- );
505
- v_expand (v_reinterpret_as_u8 (v_combine_low (v_src0123_c2, v_src4567_c2)),
506
- v_src0123_c2, v_src4567_c2
507
- );
508
- v_expand (v_reinterpret_as_u8 (v_combine_low (v_src0123_c3, v_src4567_c3)),
509
- v_src0123_c3, v_src4567_c3
510
- );
511
- v_int16x8 v_mul0123 = v_load ((int16_t *)m);
512
- v_int16x8 v_mul4567 = v_load ((int16_t *)m+8 );
513
- v_uint32x4 v_res0123_c1 = v_reinterpret_as_u32 (v_dotprod (v_reinterpret_as_s16 (v_src0123_c1), v_mul0123));
514
- v_uint32x4 v_res0123_c2 = v_reinterpret_as_u32 (v_dotprod (v_reinterpret_as_s16 (v_src0123_c2), v_mul0123));
515
- v_uint32x4 v_res0123_c3 = v_reinterpret_as_u32 (v_dotprod (v_reinterpret_as_s16 (v_src0123_c3), v_mul0123));
516
- v_uint32x4 v_res4567_c1 = v_reinterpret_as_u32 (v_dotprod (v_reinterpret_as_s16 (v_src4567_c1), v_mul4567));
517
- v_uint32x4 v_res4567_c2 = v_reinterpret_as_u32 (v_dotprod (v_reinterpret_as_s16 (v_src4567_c2), v_mul4567));
518
- v_uint32x4 v_res4567_c3 = v_reinterpret_as_u32 (v_dotprod (v_reinterpret_as_s16 (v_src4567_c3), v_mul4567));
519
-
520
- v_store_interleave ((uint16_t *)dst , v_pack (v_res0123_c1, v_res4567_c1), v_pack (v_res0123_c2, v_res4567_c2), v_pack (v_res0123_c3, v_res4567_c3));
521
- }
522
- for (; i < dst_max; i += 1 , m += 2 )
523
- {
524
- uint8_t * px = src + 3 * ofst[i];
525
- *(dst++) = m[0 ] * px[0 ] + m[1 ] * px[3 ];
526
- *(dst++) = m[0 ] * px[1 ] + m[1 ] * px[4 ];
527
- *(dst++) = m[0 ] * px[2 ] + m[1 ] * px[5 ];
528
- }
529
- srccn[0 ] = (src + 3 * ofst[dst_width - 1 ])[0 ]; v_srccn0 = v_setall_u16 (((uint16_t *)srccn)[0 ]);
530
- srccn[1 ] = (src + 3 * ofst[dst_width - 1 ])[1 ]; v_srccn1 = v_setall_u16 (((uint16_t *)srccn)[1 ]);
531
- srccn[2 ] = (src + 3 * ofst[dst_width - 1 ])[2 ]; v_srccn2 = v_setall_u16 (((uint16_t *)srccn)[2 ]);
532
- for (; i < dst_width - 7 ; i += 8 , dst += 24 ) // Points that fall left from src image so became equal to leftmost src point
533
- {
534
- v_store_interleave ((uint16_t *)dst, v_srccn0, v_srccn1, v_srccn2);
535
- }
536
- for (; i < dst_width; i++)
537
- {
538
- *(dst++) = srccn[0 ];
539
- *(dst++) = srccn[1 ];
540
- *(dst++) = srccn[2 ];
541
- }
542
- }
543
- template <>
544
446
void hlineResizeCn<uint8_t , ufixedpoint16, 2 , true , 4 >(uint8_t * src, int , int *ofst, ufixedpoint16* m, ufixedpoint16* dst, int dst_min, int dst_max, int dst_width)
545
447
{
546
448
int i = 0 ;
0 commit comments