-
Notifications
You must be signed in to change notification settings - Fork 15k
[LV] Improve the test coverage for strided access. nfc #155981
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-llvm-transforms @llvm/pr-subscribers-backend-risc-v Author: Mel Chen (Mel-Chen) ChangesAdd tests for strided access with UF > 1, and introduce a new test case @constant_stride_reinterpret. Patch is 71.58 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/155981.diff 1 Files Affected:
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll b/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll
index 251e014dbb795..4e687d7992b56 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll
@@ -1,6 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -passes=loop-vectorize -mtriple=riscv64 -mattr=+v -S | FileCheck --check-prefixes=CHECK,NOSTRIDED %s
+; RUN: opt < %s -passes=loop-vectorize -mtriple=riscv64 -mattr=+v -force-vector-interleave=2 -S | FileCheck --check-prefixes=CHECK-UF2,NOSTRIDED-UF2 %s
; RUN: opt < %s -passes=loop-vectorize -mtriple=riscv64 -mattr=+v -lv-strided-pointer-ivs=true -laa-speculate-unit-stride=false -S | FileCheck --check-prefixes=CHECK,STRIDED %s
+; RUN: opt < %s -passes=loop-vectorize -mtriple=riscv64 -mattr=+v -lv-strided-pointer-ivs=true -laa-speculate-unit-stride=false -force-vector-interleave=2 -S | FileCheck --check-prefixes=CHECK-UF2,STRIDED-UF2 %s
define void @single_constant_stride_int_scaled(ptr %p) {
@@ -46,6 +48,62 @@ define void @single_constant_stride_int_scaled(ptr %p) {
; CHECK: exit:
; CHECK-NEXT: ret void
;
+; CHECK-UF2-LABEL: @single_constant_stride_int_scaled(
+; CHECK-UF2-NEXT: entry:
+; CHECK-UF2-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-UF2-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3
+; CHECK-UF2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i64 1024, [[TMP1]]
+; CHECK-UF2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK-UF2: vector.ph:
+; CHECK-UF2-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-UF2-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4
+; CHECK-UF2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP3]], i64 0
+; CHECK-UF2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
+; CHECK-UF2-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 2
+; CHECK-UF2-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP4]]
+; CHECK-UF2-NEXT: [[TMP5:%.*]] = icmp eq i64 [[N_MOD_VF]], 0
+; CHECK-UF2-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i64 [[TMP4]], i64 [[N_MOD_VF]]
+; CHECK-UF2-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[TMP6]]
+; CHECK-UF2-NEXT: [[TMP7:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
+; CHECK-UF2-NEXT: [[TMP8:%.*]] = mul <vscale x 4 x i64> [[TMP7]], splat (i64 1)
+; CHECK-UF2-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i64> zeroinitializer, [[TMP8]]
+; CHECK-UF2-NEXT: br label [[VECTOR_BODY:%.*]]
+; CHECK-UF2: vector.body:
+; CHECK-UF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-UF2-NEXT: [[VEC_IND:%.*]] = phi <vscale x 4 x i64> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-UF2-NEXT: [[STEP_ADD:%.*]] = add <vscale x 4 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
+; CHECK-UF2-NEXT: [[TMP9:%.*]] = mul nuw nsw <vscale x 4 x i64> [[VEC_IND]], splat (i64 8)
+; CHECK-UF2-NEXT: [[TMP10:%.*]] = mul nuw nsw <vscale x 4 x i64> [[STEP_ADD]], splat (i64 8)
+; CHECK-UF2-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[P:%.*]], <vscale x 4 x i64> [[TMP9]]
+; CHECK-UF2-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[P]], <vscale x 4 x i64> [[TMP10]]
+; CHECK-UF2-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> [[TMP11]], i32 4, <vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> poison)
+; CHECK-UF2-NEXT: [[WIDE_MASKED_GATHER1:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> [[TMP12]], i32 4, <vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> poison)
+; CHECK-UF2-NEXT: [[TMP13:%.*]] = add <vscale x 4 x i32> [[WIDE_MASKED_GATHER]], splat (i32 1)
+; CHECK-UF2-NEXT: [[TMP14:%.*]] = add <vscale x 4 x i32> [[WIDE_MASKED_GATHER1]], splat (i32 1)
+; CHECK-UF2-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[TMP13]], <vscale x 4 x ptr> [[TMP11]], i32 4, <vscale x 4 x i1> splat (i1 true))
+; CHECK-UF2-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[TMP14]], <vscale x 4 x ptr> [[TMP12]], i32 4, <vscale x 4 x i1> splat (i1 true))
+; CHECK-UF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]]
+; CHECK-UF2-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[STEP_ADD]], [[BROADCAST_SPLAT]]
+; CHECK-UF2-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-UF2-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK-UF2: middle.block:
+; CHECK-UF2-NEXT: br label [[SCALAR_PH]]
+; CHECK-UF2: scalar.ph:
+; CHECK-UF2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-UF2-NEXT: br label [[LOOP:%.*]]
+; CHECK-UF2: loop:
+; CHECK-UF2-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
+; CHECK-UF2-NEXT: [[OFFSET:%.*]] = mul nuw nsw i64 [[I]], 8
+; CHECK-UF2-NEXT: [[Q0:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET]]
+; CHECK-UF2-NEXT: [[X0:%.*]] = load i32, ptr [[Q0]], align 4
+; CHECK-UF2-NEXT: [[Y0:%.*]] = add i32 [[X0]], 1
+; CHECK-UF2-NEXT: store i32 [[Y0]], ptr [[Q0]], align 4
+; CHECK-UF2-NEXT: [[NEXTI]] = add i64 [[I]], 1
+; CHECK-UF2-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024
+; CHECK-UF2-NEXT: br i1 [[DONE]], label [[EXIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK-UF2: exit:
+; CHECK-UF2-NEXT: ret void
+;
entry:
br label %loop
loop:
@@ -108,6 +166,63 @@ define void @single_constant_stride_int_iv(ptr %p) {
; CHECK: exit:
; CHECK-NEXT: ret void
;
+; CHECK-UF2-LABEL: @single_constant_stride_int_iv(
+; CHECK-UF2-NEXT: entry:
+; CHECK-UF2-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-UF2-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3
+; CHECK-UF2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]]
+; CHECK-UF2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK-UF2: vector.ph:
+; CHECK-UF2-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-UF2-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4
+; CHECK-UF2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP3]], i64 0
+; CHECK-UF2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
+; CHECK-UF2-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 2
+; CHECK-UF2-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP4]]
+; CHECK-UF2-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
+; CHECK-UF2-NEXT: [[TMP5:%.*]] = mul i64 [[N_VEC]], 64
+; CHECK-UF2-NEXT: [[TMP6:%.*]] = mul <vscale x 4 x i64> [[BROADCAST_SPLAT]], splat (i64 64)
+; CHECK-UF2-NEXT: [[TMP7:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
+; CHECK-UF2-NEXT: [[TMP8:%.*]] = mul <vscale x 4 x i64> [[TMP7]], splat (i64 64)
+; CHECK-UF2-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i64> zeroinitializer, [[TMP8]]
+; CHECK-UF2-NEXT: br label [[VECTOR_BODY:%.*]]
+; CHECK-UF2: vector.body:
+; CHECK-UF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-UF2-NEXT: [[VEC_IND:%.*]] = phi <vscale x 4 x i64> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-UF2-NEXT: [[STEP_ADD:%.*]] = add <vscale x 4 x i64> [[VEC_IND]], [[TMP6]]
+; CHECK-UF2-NEXT: [[TMP9:%.*]] = getelementptr i32, ptr [[P:%.*]], <vscale x 4 x i64> [[VEC_IND]]
+; CHECK-UF2-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[P]], <vscale x 4 x i64> [[STEP_ADD]]
+; CHECK-UF2-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> [[TMP9]], i32 4, <vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> poison)
+; CHECK-UF2-NEXT: [[WIDE_MASKED_GATHER1:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> [[TMP10]], i32 4, <vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> poison)
+; CHECK-UF2-NEXT: [[TMP11:%.*]] = add <vscale x 4 x i32> [[WIDE_MASKED_GATHER]], splat (i32 1)
+; CHECK-UF2-NEXT: [[TMP12:%.*]] = add <vscale x 4 x i32> [[WIDE_MASKED_GATHER1]], splat (i32 1)
+; CHECK-UF2-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[TMP11]], <vscale x 4 x ptr> [[TMP9]], i32 4, <vscale x 4 x i1> splat (i1 true))
+; CHECK-UF2-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[TMP12]], <vscale x 4 x ptr> [[TMP10]], i32 4, <vscale x 4 x i1> splat (i1 true))
+; CHECK-UF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]]
+; CHECK-UF2-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[STEP_ADD]], [[TMP6]]
+; CHECK-UF2-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-UF2-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK-UF2: middle.block:
+; CHECK-UF2-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
+; CHECK-UF2-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
+; CHECK-UF2: scalar.ph:
+; CHECK-UF2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-UF2-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i64 [ [[TMP5]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
+; CHECK-UF2-NEXT: br label [[LOOP:%.*]]
+; CHECK-UF2: loop:
+; CHECK-UF2-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
+; CHECK-UF2-NEXT: [[OFFSET:%.*]] = phi i64 [ [[BC_RESUME_VAL2]], [[SCALAR_PH]] ], [ [[OFFSET_NEXT:%.*]], [[LOOP]] ]
+; CHECK-UF2-NEXT: [[Q0:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET]]
+; CHECK-UF2-NEXT: [[X0:%.*]] = load i32, ptr [[Q0]], align 4
+; CHECK-UF2-NEXT: [[Y0:%.*]] = add i32 [[X0]], 1
+; CHECK-UF2-NEXT: store i32 [[Y0]], ptr [[Q0]], align 4
+; CHECK-UF2-NEXT: [[OFFSET_NEXT]] = add nuw nsw i64 [[OFFSET]], 64
+; CHECK-UF2-NEXT: [[NEXTI]] = add i64 [[I]], 1
+; CHECK-UF2-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024
+; CHECK-UF2-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP5:![0-9]+]]
+; CHECK-UF2: exit:
+; CHECK-UF2-NEXT: ret void
+;
entry:
br label %loop
loop:
@@ -168,6 +283,69 @@ define void @single_constant_stride_ptr_iv(ptr %p) {
; CHECK: exit:
; CHECK-NEXT: ret void
;
+; CHECK-UF2-LABEL: @single_constant_stride_ptr_iv(
+; CHECK-UF2-NEXT: entry:
+; CHECK-UF2-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-UF2-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3
+; CHECK-UF2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i64 1024, [[TMP1]]
+; CHECK-UF2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK-UF2: vector.ph:
+; CHECK-UF2-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-UF2-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4
+; CHECK-UF2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP3]], i64 0
+; CHECK-UF2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
+; CHECK-UF2-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 2
+; CHECK-UF2-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP4]]
+; CHECK-UF2-NEXT: [[TMP5:%.*]] = icmp eq i64 [[N_MOD_VF]], 0
+; CHECK-UF2-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i64 [[TMP4]], i64 [[N_MOD_VF]]
+; CHECK-UF2-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[TMP6]]
+; CHECK-UF2-NEXT: [[TMP7:%.*]] = mul i64 [[N_VEC]], 8
+; CHECK-UF2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 [[TMP7]]
+; CHECK-UF2-NEXT: [[TMP9:%.*]] = mul <vscale x 4 x i64> [[BROADCAST_SPLAT]], splat (i64 8)
+; CHECK-UF2-NEXT: br label [[VECTOR_BODY:%.*]]
+; CHECK-UF2: vector.body:
+; CHECK-UF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-UF2-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[P]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
+; CHECK-UF2-NEXT: [[TMP10:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
+; CHECK-UF2-NEXT: [[TMP11:%.*]] = mul <vscale x 4 x i64> [[TMP10]], splat (i64 8)
+; CHECK-UF2-NEXT: [[VECTOR_GEP:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <vscale x 4 x i64> [[TMP11]]
+; CHECK-UF2-NEXT: [[STEP_ADD:%.*]] = getelementptr i8, <vscale x 4 x ptr> [[VECTOR_GEP]], <vscale x 4 x i64> [[TMP9]]
+; CHECK-UF2-NEXT: [[TMP12:%.*]] = extractelement <vscale x 4 x ptr> [[VECTOR_GEP]], i32 0
+; CHECK-UF2-NEXT: [[WIDE_VEC:%.*]] = load <vscale x 8 x i32>, ptr [[TMP12]], align 4
+; CHECK-UF2-NEXT: [[STRIDED_VEC:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.vector.deinterleave2.nxv8i32(<vscale x 8 x i32> [[WIDE_VEC]])
+; CHECK-UF2-NEXT: [[TMP13:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC]], 0
+; CHECK-UF2-NEXT: [[TMP14:%.*]] = extractelement <vscale x 4 x ptr> [[STEP_ADD]], i32 0
+; CHECK-UF2-NEXT: [[WIDE_VEC1:%.*]] = load <vscale x 8 x i32>, ptr [[TMP14]], align 4
+; CHECK-UF2-NEXT: [[STRIDED_VEC2:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.vector.deinterleave2.nxv8i32(<vscale x 8 x i32> [[WIDE_VEC1]])
+; CHECK-UF2-NEXT: [[TMP15:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC2]], 0
+; CHECK-UF2-NEXT: [[TMP16:%.*]] = add <vscale x 4 x i32> [[TMP13]], splat (i32 1)
+; CHECK-UF2-NEXT: [[TMP17:%.*]] = add <vscale x 4 x i32> [[TMP15]], splat (i32 1)
+; CHECK-UF2-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[TMP16]], <vscale x 4 x ptr> [[VECTOR_GEP]], i32 4, <vscale x 4 x i1> splat (i1 true))
+; CHECK-UF2-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[TMP17]], <vscale x 4 x ptr> [[STEP_ADD]], i32 4, <vscale x 4 x i1> splat (i1 true))
+; CHECK-UF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]]
+; CHECK-UF2-NEXT: [[TMP18:%.*]] = mul i64 8, [[TMP4]]
+; CHECK-UF2-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP18]]
+; CHECK-UF2-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-UF2-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
+; CHECK-UF2: middle.block:
+; CHECK-UF2-NEXT: br label [[SCALAR_PH]]
+; CHECK-UF2: scalar.ph:
+; CHECK-UF2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-UF2-NEXT: [[BC_RESUME_VAL3:%.*]] = phi ptr [ [[TMP8]], [[MIDDLE_BLOCK]] ], [ [[P]], [[ENTRY]] ]
+; CHECK-UF2-NEXT: br label [[LOOP:%.*]]
+; CHECK-UF2: loop:
+; CHECK-UF2-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
+; CHECK-UF2-NEXT: [[PTR:%.*]] = phi ptr [ [[BC_RESUME_VAL3]], [[SCALAR_PH]] ], [ [[PTR_NEXT:%.*]], [[LOOP]] ]
+; CHECK-UF2-NEXT: [[X0:%.*]] = load i32, ptr [[PTR]], align 4
+; CHECK-UF2-NEXT: [[Y0:%.*]] = add i32 [[X0]], 1
+; CHECK-UF2-NEXT: store i32 [[Y0]], ptr [[PTR]], align 4
+; CHECK-UF2-NEXT: [[PTR_NEXT]] = getelementptr inbounds i8, ptr [[PTR]], i64 8
+; CHECK-UF2-NEXT: [[NEXTI]] = add i64 [[I]], 1
+; CHECK-UF2-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024
+; CHECK-UF2-NEXT: br i1 [[DONE]], label [[EXIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP7:![0-9]+]]
+; CHECK-UF2: exit:
+; CHECK-UF2-NEXT: ret void
+;
entry:
br label %loop
loop:
@@ -227,6 +405,58 @@ define void @single_stride_int_scaled(ptr %p, i64 %stride) {
; NOSTRIDED: exit:
; NOSTRIDED-NEXT: ret void
;
+; NOSTRIDED-UF2-LABEL: @single_stride_int_scaled(
+; NOSTRIDED-UF2-NEXT: entry:
+; NOSTRIDED-UF2-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
+; NOSTRIDED-UF2-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3
+; NOSTRIDED-UF2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]]
+; NOSTRIDED-UF2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
+; NOSTRIDED-UF2: vector.scevcheck:
+; NOSTRIDED-UF2-NEXT: [[IDENT_CHECK:%.*]] = icmp ne i64 [[STRIDE:%.*]], 1
+; NOSTRIDED-UF2-NEXT: br i1 [[IDENT_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
+; NOSTRIDED-UF2: vector.ph:
+; NOSTRIDED-UF2-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
+; NOSTRIDED-UF2-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8
+; NOSTRIDED-UF2-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]]
+; NOSTRIDED-UF2-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
+; NOSTRIDED-UF2-NEXT: br label [[VECTOR_BODY:%.*]]
+; NOSTRIDED-UF2: vector.body:
+; NOSTRIDED-UF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; NOSTRIDED-UF2-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[INDEX]]
+; NOSTRIDED-UF2-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
+; NOSTRIDED-UF2-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 2
+; NOSTRIDED-UF2-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[TMP4]], i64 [[TMP6]]
+; NOSTRIDED-UF2-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP4]], align 4
+; NOSTRIDED-UF2-NEXT: [[WIDE_LOAD1:%.*]] = load <vscale x 4 x i32>, ptr [[TMP7]], align 4
+; NOSTRIDED-UF2-NEXT: [[TMP8:%.*]] = add <vscale x 4 x i32> [[WIDE_LOAD]], splat (i32 1)
+; NOSTRIDED-UF2-NEXT: [[TMP9:%.*]] = add <vscale x 4 x i32> [[WIDE_LOAD1]], splat (i32 1)
+; NOSTRIDED-UF2-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
+; NOSTRIDED-UF2-NEXT: [[TMP11:%.*]] = shl nuw i64 [[TMP10]], 2
+; NOSTRIDED-UF2-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[TMP4]], i64 [[TMP11]]
+; NOSTRIDED-UF2-NEXT: store <vscale x 4 x i32> [[TMP8]], ptr [[TMP4]], align 4
+; NOSTRIDED-UF2-NEXT: store <vscale x 4 x i32> [[TMP9]], ptr [[TMP12]], align 4
+; NOSTRIDED-UF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]]
+; NOSTRIDED-UF2-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; NOSTRIDED-UF2-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
+; NOSTRIDED-UF2: middle.block:
+; NOSTRIDED-UF2-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
+; NOSTRIDED-UF2-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
+; NOSTRIDED-UF2: scalar.ph:
+; NOSTRIDED-UF2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
+; NOSTRIDED-UF2-NEXT: br label [[LOOP:%.*]]
+; NOSTRIDED-UF2: loop:
+; NOSTRIDED-UF2-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
+; NOSTRIDED-UF2-NEXT: [[OFFSET:%.*]] = mul nuw nsw i64 [[I]], [[STRIDE]]
+; NOSTRIDED-UF2-NEXT: [[Q0:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET]]
+; NOSTRIDED-UF2-NEXT: [[X0:%.*]] = load i32, ptr [[Q0]], align 4
+; NOSTRIDED-UF2-NEXT: [[Y0:%.*]] = add i32 [[X0]], 1
+; NOSTRIDED-UF2-NEXT: store i32 [[Y0]], ptr [[Q0]], align 4
+; NOSTRIDED-UF2-NEXT: [[NEXTI]] = add i64 [[I]], 1
+; NOSTRIDED-UF2-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024
+; NOSTRIDED-UF2-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP9:![0-9]+]]
+; NOSTRIDED-UF2: exit:
+; NOSTRIDED-UF2-NEXT: ret void
+;
; STRIDED-LABEL: @single_stride_int_scaled(
; STRIDED-NEXT: entry:
; STRIDED-NEXT: br label [[LOOP:%.*]]
@@ -243,6 +473,22 @@ define void @single_stride_int_scaled(ptr %p, i64 %stride) {
; STRIDED: exit:
; STRIDED-NEXT: ret void
;
+; STRIDED-UF2-LABEL: @single_stride_int_scaled(
+; STRIDED-UF2-NEXT: entry:
+; STRIDED-UF2-NEXT: br label [[LOOP:%.*]]
+; STRIDED-UF2: loop:
+; STRIDED-UF2-NEXT: [[I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
+; STRIDED-UF2-NEXT: [[OFFSET:%.*]] = mul nuw nsw i64 [[I]], [[STRIDE:%.*]]
+; STRIDED-UF2-NEXT: [[Q0:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[OFFSET]]
+; STRIDED-UF2-NEXT: [[X0...
[truncated]
|
5d6d713
to
227fa6f
Compare
; CHECK-UF2-LABEL: @single_stride_ptr_iv( | ||
; CHECK-UF2-NEXT: entry: | ||
; CHECK-UF2-NEXT: br label [[LOOP:%.*]] | ||
; CHECK-UF2: loop: | ||
; CHECK-UF2-NEXT: [[I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[NEXTI:%.*]], [[LOOP]] ] | ||
; CHECK-UF2-NEXT: [[PTR:%.*]] = phi ptr [ [[P:%.*]], [[ENTRY]] ], [ [[PTR_NEXT:%.*]], [[LOOP]] ] | ||
; CHECK-UF2-NEXT: [[X0:%.*]] = load i32, ptr [[PTR]], align 4 | ||
; CHECK-UF2-NEXT: [[Y0:%.*]] = add i32 [[X0]], 1 | ||
; CHECK-UF2-NEXT: store i32 [[Y0]], ptr [[PTR]], align 4 | ||
; CHECK-UF2-NEXT: [[PTR_NEXT]] = getelementptr inbounds i8, ptr [[PTR]], i64 [[STRIDE:%.*]] | ||
; CHECK-UF2-NEXT: [[NEXTI]] = add i64 [[I]], 1 | ||
; CHECK-UF2-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024 | ||
; CHECK-UF2-NEXT: br i1 [[DONE]], label [[EXIT:%.*]], label [[LOOP]] | ||
; CHECK-UF2: exit: | ||
; CHECK-UF2-NEXT: ret void |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Could you add a COMMON prefix so we have a single set of check lines when the IR is the same for both?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Added COMMON and STRIDED-COMMON
250e026445f659997ff6d5f869dcba9835f1a48a
@@ -729,3 +1417,213 @@ loop: | |||
exit: | |||
ret void | |||
} | |||
|
|||
define void @constant_stride_reinterpret(ptr noalias %in, ptr noalias %out) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
might be good to add a comment to say that the ptr for %in
strides by 32 bit, but its load accesses 64 bit?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Updated the comment.
20c2aea0c4fa0a0e072694d5cff9c1fbc97aff1d
227fa6f
to
20c2aea
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM, thanks
20c2aea
to
f7147d2
Compare
Add tests for strided access with UF > 1, and introduce a new test case @constant_stride_reinterpret.