Skip to content

Commit c50ed05

Browse files
authored
[AMDGPU][True16][CodeGen] use vgpr16 for zext patterns (reopen #153894) (#154211)
recreate this patch from #153894 It seems ISel sliently ignore the `i64 = zext i16` with a chained `reg_sequence` pattern and thus this is causing a selection failure in hip test. Recreate a new patch with an alternative pattern, and added a ll test global-extload-gfx11plus.ll
1 parent 2f23767 commit c50ed05

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

46 files changed

+12689
-14019
lines changed

llvm/lib/Target/AMDGPU/SIInstructions.td

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3078,6 +3078,8 @@ def : GCNPat<
30783078
}
30793079
} // AddedComplexity = 1
30803080

3081+
foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
3082+
let True16Predicate = p in {
30813083
def : GCNPat<
30823084
(i32 (DivergentUnaryFrag<zext> i16:$src)),
30833085
(V_AND_B32_e64 (S_MOV_B32 (i32 0xffff)), $src)
@@ -3092,7 +3094,28 @@ def : GCNPat<
30923094

30933095
def : GCNPat<
30943096
(i32 (zext (i16 (bitconvert fp16_zeros_high_16bits:$src)))),
3095-
(COPY VSrc_b16:$src)>;
3097+
(COPY VSrc_b16:$src)
3098+
>;
3099+
}
3100+
3101+
let True16Predicate = UseRealTrue16Insts in {
3102+
def : GCNPat<
3103+
(i32 (DivergentUnaryFrag<zext> i16:$src)),
3104+
(REG_SEQUENCE VGPR_32, VGPR_16:$src, lo16, (V_MOV_B16_t16_e64 0, (i16 0), 0), hi16)
3105+
>;
3106+
3107+
def : GCNPat<
3108+
(i64 (DivergentUnaryFrag<zext> i16:$src)),
3109+
(REG_SEQUENCE VReg_64,
3110+
(INSERT_SUBREG (i32 (V_MOV_B32_e32 (i32 0))), VGPR_16:$src, lo16), sub0,
3111+
(S_MOV_B32 (i32 0)), sub1)
3112+
>;
3113+
3114+
def : GCNPat<
3115+
(i32 (zext (i16 (bitconvert fp16_zeros_high_16bits:$src)))),
3116+
(REG_SEQUENCE VGPR_32, VGPR_16:$src, lo16, (V_MOV_B16_t16_e64 0, (i16 0), 0), hi16)
3117+
>;
3118+
}
30963119

30973120
def : GCNPat <
30983121
(i32 (trunc i64:$a)),

llvm/test/CodeGen/AMDGPU/GlobalISel/mul.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -164,7 +164,7 @@ define zeroext i16 @v_mul_i16_zeroext(i16 zeroext %num, i16 zeroext %den) {
164164
; GFX11-TRUE16: ; %bb.0:
165165
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
166166
; GFX11-TRUE16-NEXT: v_mul_lo_u16 v0.l, v0.l, v1.l
167-
; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
167+
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, 0
168168
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
169169
;
170170
; GFX11-FAKE16-LABEL: v_mul_i16_zeroext:

llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.1024bit.ll

Lines changed: 5636 additions & 6265 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.128bit.ll

Lines changed: 546 additions & 602 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.256bit.ll

Lines changed: 620 additions & 700 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.320bit.ll

Lines changed: 1352 additions & 1534 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.32bit.ll

Lines changed: 108 additions & 132 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.512bit.ll

Lines changed: 2537 additions & 2877 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.64bit.ll

Lines changed: 287 additions & 350 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.96bit.ll

Lines changed: 283 additions & 311 deletions
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)