Skip to content

Commit 936483f

Browse files
committed
GlobalISel: Implement lower for G_BITCAST
Bitcast only really applies between scalars and vectors. Implement as an unmerge and remerge. The test needs to tolerate failure since one of the unmerges currently fails to legalize.
1 parent bd7658a commit 936483f

File tree

4 files changed

+191
-2
lines changed

4 files changed

+191
-2
lines changed

llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -221,6 +221,7 @@ class LegalizerHelper {
221221
LegalizeResult narrowScalarBasic(MachineInstr &MI, unsigned TypeIdx, LLT Ty);
222222
LegalizeResult narrowScalarSelect(MachineInstr &MI, unsigned TypeIdx, LLT Ty);
223223

224+
LegalizeResult lowerBitcast(MachineInstr &MI);
224225
LegalizeResult lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty);
225226

226227
LegalizeResult lowerU64ToF32BitOps(MachineInstr &MI);

llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1941,6 +1941,39 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
19411941
}
19421942
}
19431943

1944+
static void getUnmergePieces(SmallVectorImpl<Register> &Pieces,
1945+
MachineIRBuilder &B, Register Src, LLT Ty) {
1946+
auto Unmerge = B.buildUnmerge(Ty, Src);
1947+
for (int I = 0, E = Unmerge->getNumOperands() - 1; I != E; ++I)
1948+
Pieces.push_back(Unmerge.getReg(I));
1949+
}
1950+
1951+
LegalizerHelper::LegalizeResult
1952+
LegalizerHelper::lowerBitcast(MachineInstr &MI) {
1953+
Register Dst = MI.getOperand(0).getReg();
1954+
Register Src = MI.getOperand(1).getReg();
1955+
LLT DstTy = MRI.getType(Dst);
1956+
LLT SrcTy = MRI.getType(Src);
1957+
1958+
if (SrcTy.isVector() && !DstTy.isVector()) {
1959+
SmallVector<Register, 8> SrcRegs;
1960+
getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcTy.getElementType());
1961+
MIRBuilder.buildMerge(Dst, SrcRegs);
1962+
MI.eraseFromParent();
1963+
return Legalized;
1964+
}
1965+
1966+
if (DstTy.isVector() && !SrcTy.isVector()) {
1967+
SmallVector<Register, 8> SrcRegs;
1968+
getUnmergePieces(SrcRegs, MIRBuilder, Src, DstTy.getElementType());
1969+
MIRBuilder.buildMerge(Dst, SrcRegs);
1970+
MI.eraseFromParent();
1971+
return Legalized;
1972+
}
1973+
1974+
return UnableToLegalize;
1975+
}
1976+
19441977
LegalizerHelper::LegalizeResult
19451978
LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
19461979
using namespace TargetOpcode;
@@ -1949,6 +1982,8 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
19491982
switch(MI.getOpcode()) {
19501983
default:
19511984
return UnableToLegalize;
1985+
case TargetOpcode::G_BITCAST:
1986+
return lowerBitcast(MI);
19521987
case TargetOpcode::G_SREM:
19531988
case TargetOpcode::G_UREM: {
19541989
Register QuotReg = MRI.createGenericVirtualRegister(Ty);

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -308,7 +308,9 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
308308
// Don't worry about the size constraint.
309309
.legalIf(all(isRegisterType(0), isRegisterType(1)))
310310
// FIXME: Testing hack
311-
.legalForCartesianProduct({S16, LLT::vector(2, 8), });
311+
.legalForCartesianProduct({S16, LLT::vector(2, 8), })
312+
.lower();
313+
312314

313315
getActionDefinitionsBuilder(G_FCONSTANT)
314316
.legalFor({S32, S64, S16})

llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bitcast.mir

Lines changed: 152 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2-
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck %s
2+
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck %s
33

44
---
55
name: test_bitcast_s32_to_v2s16
@@ -282,3 +282,154 @@ body: |
282282
%1:_(<8 x s64>) = G_BITCAST %0
283283
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %1
284284
...
285+
286+
---
287+
name: test_bitcast_s24_to_v3s8
288+
body: |
289+
bb.0:
290+
liveins: $vgpr0
291+
292+
; CHECK-LABEL: name: test_bitcast_s24_to_v3s8
293+
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
294+
; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
295+
; CHECK: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[C]](s64)
296+
; CHECK: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[TRUNC]](s48)
297+
; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16777215
298+
; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[COPY]](s32)
299+
; CHECK: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C1]]
300+
; CHECK: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[AND]], [[TRUNC1]](s32)
301+
; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY [[SHL]](s64)
302+
; CHECK: [[OR:%[0-9]+]]:_(s64) = G_OR [[AND]], [[COPY1]]
303+
; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
304+
; CHECK: [[TRUNC2:%[0-9]+]]:_(s48) = G_TRUNC [[C2]](s64)
305+
; CHECK: [[TRUNC3:%[0-9]+]]:_(s32) = G_TRUNC [[TRUNC2]](s48)
306+
; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY [[OR]](s64)
307+
; CHECK: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[COPY2]], [[TRUNC3]](s32)
308+
; CHECK: [[COPY3:%[0-9]+]]:_(s64) = COPY [[SHL1]](s64)
309+
; CHECK: [[OR1:%[0-9]+]]:_(s64) = G_OR [[COPY2]], [[COPY3]]
310+
; CHECK: [[TRUNC4:%[0-9]+]]:_(s48) = G_TRUNC [[OR1]](s64)
311+
; CHECK: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[TRUNC4]](s48)
312+
; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s16)
313+
; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s16)
314+
; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s16)
315+
; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[ANYEXT1]](s32)
316+
; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[ANYEXT2]](s32)
317+
; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY [[ANYEXT3]](s32)
318+
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32)
319+
; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
320+
%0:_(s32) = COPY $vgpr0
321+
%1:_(s24) = G_TRUNC %0
322+
%2:_(<3 x s8>) = G_BITCAST %1
323+
%3:_(<3 x s32>) = G_ANYEXT %2
324+
$vgpr0_vgpr1_vgpr2 = COPY %3
325+
...
326+
327+
---
328+
name: test_bitcast_s48_to_v3s16
329+
body: |
330+
bb.0:
331+
liveins: $vgpr0_vgpr1
332+
333+
; CHECK-LABEL: name: test_bitcast_s48_to_v3s16
334+
; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
335+
; CHECK: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[COPY]](s64)
336+
; CHECK: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[TRUNC]](s48)
337+
; CHECK: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
338+
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16)
339+
; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[UV2]](s16), [[DEF]](s16)
340+
; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>)
341+
; CHECK: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0
342+
; CHECK: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
343+
; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0
344+
; CHECK: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>)
345+
; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
346+
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
347+
; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
348+
; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
349+
; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
350+
; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
351+
; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
352+
; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
353+
; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
354+
; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR2]](<3 x s32>)
355+
%0:_(s64) = COPY $vgpr0_vgpr1
356+
%1:_(s48) = G_TRUNC %0
357+
%2:_(<3 x s16>) = G_BITCAST %1
358+
%3:_(<3 x s32>) = G_ANYEXT %2
359+
$vgpr0_vgpr1_vgpr2 = COPY %3
360+
...
361+
362+
---
363+
name: test_bitcast_v3s8_to_s24
364+
body: |
365+
bb.0:
366+
liveins: $vgpr0_vgpr1_vgpr2
367+
368+
; CHECK-LABEL: name: test_bitcast_v3s8_to_s24
369+
; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
370+
; CHECK: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[COPY]](<3 x s32>)
371+
; CHECK: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[TRUNC]](<3 x s8>)
372+
; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
373+
; CHECK: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV]](s8)
374+
; CHECK: [[ZEXT1:%[0-9]+]]:_(s16) = G_ZEXT [[UV1]](s8)
375+
; CHECK: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
376+
; CHECK: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ZEXT1]], [[C]](s16)
377+
; CHECK: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[SHL]]
378+
; CHECK: [[ZEXT2:%[0-9]+]]:_(s16) = G_ZEXT [[UV2]](s8)
379+
; CHECK: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
380+
; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
381+
; CHECK: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
382+
; CHECK: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND]], [[C]](s16)
383+
; CHECK: [[OR1:%[0-9]+]]:_(s16) = G_OR [[ZEXT2]], [[SHL1]]
384+
; CHECK: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
385+
; CHECK: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
386+
; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
387+
; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT4]], [[C2]](s32)
388+
; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT3]], [[SHL2]]
389+
; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[OR2]](s32)
390+
; CHECK: $vgpr0 = COPY [[COPY1]](s32)
391+
%0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
392+
%1:_(<3 x s8>) = G_TRUNC %0
393+
%2:_(s24) = G_BITCAST %1
394+
%3:_(s32) = G_ANYEXT %2
395+
$vgpr0 = COPY %3
396+
...
397+
398+
---
399+
name: test_bitcast_v3s16_to_s48
400+
body: |
401+
bb.0:
402+
liveins: $vgpr0_vgpr1_vgpr2
403+
404+
; CHECK-LABEL: name: test_bitcast_v3s16_to_s48
405+
; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
406+
; CHECK: [[TRUNC:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[COPY]](<3 x s32>)
407+
; CHECK: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
408+
; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[TRUNC]](<3 x s16>), 0
409+
; CHECK: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>)
410+
; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
411+
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
412+
; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
413+
; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
414+
; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
415+
; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
416+
; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
417+
; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]]
418+
; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
419+
; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]]
420+
; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
421+
; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
422+
; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
423+
; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]]
424+
; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
425+
; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32)
426+
; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
427+
; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
428+
; CHECK: [[COPY4:%[0-9]+]]:_(s64) = COPY [[MV]](s64)
429+
; CHECK: $vgpr0_vgpr1 = COPY [[COPY4]](s64)
430+
%0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
431+
%1:_(<3 x s16>) = G_TRUNC %0
432+
%2:_(s48) = G_BITCAST %1
433+
%3:_(s64) = G_ANYEXT %2
434+
$vgpr0_vgpr1 = COPY %3
435+
...

0 commit comments

Comments
 (0)