@@ -856,177 +856,6 @@ bool AMDGPUInstructionSelector::selectG_ICMP(MachineInstr &I) const {
856
856
return Ret;
857
857
}
858
858
859
- static bool isZero (Register Reg, MachineRegisterInfo &MRI) {
860
- int64_t C;
861
- if (mi_match (Reg, MRI, m_ICst (C)) && C == 0 )
862
- return true ;
863
-
864
- // FIXME: matcher should ignore copies
865
- return mi_match (Reg, MRI, m_Copy (m_ICst (C))) && C == 0 ;
866
- }
867
-
868
- static unsigned extractGLC (unsigned AuxiliaryData) {
869
- return AuxiliaryData & 1 ;
870
- }
871
-
872
- static unsigned extractSLC (unsigned AuxiliaryData) {
873
- return (AuxiliaryData >> 1 ) & 1 ;
874
- }
875
-
876
- static unsigned extractDLC (unsigned AuxiliaryData) {
877
- return (AuxiliaryData >> 2 ) & 1 ;
878
- }
879
-
880
- static unsigned extractSWZ (unsigned AuxiliaryData) {
881
- return (AuxiliaryData >> 3 ) & 1 ;
882
- }
883
-
884
- static unsigned getBufferStoreOpcode (LLT Ty,
885
- const unsigned MemSize,
886
- const bool Offen) {
887
- const int Size = Ty.getSizeInBits ();
888
- switch (8 * MemSize) {
889
- case 8 :
890
- return Offen ? AMDGPU::BUFFER_STORE_BYTE_OFFEN_exact :
891
- AMDGPU::BUFFER_STORE_BYTE_OFFSET_exact;
892
- case 16 :
893
- return Offen ? AMDGPU::BUFFER_STORE_SHORT_OFFEN_exact :
894
- AMDGPU::BUFFER_STORE_SHORT_OFFSET_exact;
895
- default :
896
- unsigned Opc = Offen ? AMDGPU::BUFFER_STORE_DWORD_OFFEN_exact :
897
- AMDGPU::BUFFER_STORE_DWORD_OFFSET_exact;
898
- if (Size > 32 )
899
- Opc = AMDGPU::getMUBUFOpcode (Opc, Size / 32 );
900
- return Opc;
901
- }
902
- }
903
-
904
- static unsigned getBufferStoreFormatOpcode (LLT Ty,
905
- const unsigned MemSize,
906
- const bool Offen) {
907
- bool IsD16Packed = Ty.getScalarSizeInBits () == 16 ;
908
- bool IsD16Unpacked = 8 * MemSize < Ty.getSizeInBits ();
909
- int NumElts = Ty.isVector () ? Ty.getNumElements () : 1 ;
910
-
911
- if (IsD16Packed) {
912
- switch (NumElts) {
913
- case 1 :
914
- return Offen ? AMDGPU::BUFFER_STORE_FORMAT_D16_X_OFFEN_exact :
915
- AMDGPU::BUFFER_STORE_FORMAT_D16_X_OFFSET_exact;
916
- case 2 :
917
- return Offen ? AMDGPU::BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact :
918
- AMDGPU::BUFFER_STORE_FORMAT_D16_XY_OFFSET_exact;
919
- case 3 :
920
- return Offen ? AMDGPU::BUFFER_STORE_FORMAT_D16_XYZ_OFFEN_exact :
921
- AMDGPU::BUFFER_STORE_FORMAT_D16_XYZ_OFFSET_exact;
922
- case 4 :
923
- return Offen ? AMDGPU::BUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact :
924
- AMDGPU::BUFFER_STORE_FORMAT_D16_XYZW_OFFSET_exact;
925
- default :
926
- return -1 ;
927
- }
928
- }
929
-
930
- if (IsD16Unpacked) {
931
- switch (NumElts) {
932
- case 1 :
933
- return Offen ? AMDGPU::BUFFER_STORE_FORMAT_D16_X_OFFEN_exact :
934
- AMDGPU::BUFFER_STORE_FORMAT_D16_X_OFFSET_exact;
935
- case 2 :
936
- return Offen ? AMDGPU::BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact :
937
- AMDGPU::BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFSET_exact;
938
- case 3 :
939
- return Offen ? AMDGPU::BUFFER_STORE_FORMAT_D16_XYZ_gfx80_OFFEN_exact :
940
- AMDGPU::BUFFER_STORE_FORMAT_D16_XYZ_gfx80_OFFSET_exact;
941
- case 4 :
942
- return Offen ? AMDGPU::BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact :
943
- AMDGPU::BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFSET_exact;
944
- default :
945
- return -1 ;
946
- }
947
- }
948
-
949
- switch (NumElts) {
950
- case 1 :
951
- return Offen ? AMDGPU::BUFFER_STORE_FORMAT_X_OFFEN_exact :
952
- AMDGPU::BUFFER_STORE_FORMAT_X_OFFSET_exact;
953
- case 2 :
954
- return Offen ? AMDGPU::BUFFER_STORE_FORMAT_XY_OFFEN_exact :
955
- AMDGPU::BUFFER_STORE_FORMAT_XY_OFFSET_exact;
956
- case 3 :
957
- return Offen ? AMDGPU::BUFFER_STORE_FORMAT_XYZ_OFFEN_exact :
958
- AMDGPU::BUFFER_STORE_FORMAT_XYZ_OFFSET_exact;
959
- case 4 :
960
- return Offen ? AMDGPU::BUFFER_STORE_FORMAT_XYZW_OFFEN_exact :
961
- AMDGPU::BUFFER_STORE_FORMAT_XYZW_OFFSET_exact;
962
- default :
963
- return -1 ;
964
- }
965
-
966
- llvm_unreachable (" unhandled buffer store" );
967
- }
968
-
969
- // TODO: Move this to combiner
970
- // Returns base register, imm offset, total constant offset.
971
- std::tuple<Register, unsigned , unsigned >
972
- AMDGPUInstructionSelector::splitBufferOffsets (MachineIRBuilder &B,
973
- Register OrigOffset) const {
974
- const unsigned MaxImm = 4095 ;
975
- Register BaseReg;
976
- unsigned TotalConstOffset;
977
- MachineInstr *OffsetDef;
978
-
979
- std::tie (BaseReg, TotalConstOffset, OffsetDef)
980
- = AMDGPU::getBaseWithConstantOffset (*MRI, OrigOffset);
981
-
982
- unsigned ImmOffset = TotalConstOffset;
983
-
984
- // If the immediate value is too big for the immoffset field, put the value
985
- // and -4096 into the immoffset field so that the value that is copied/added
986
- // for the voffset field is a multiple of 4096, and it stands more chance
987
- // of being CSEd with the copy/add for another similar load/store.f
988
- // However, do not do that rounding down to a multiple of 4096 if that is a
989
- // negative number, as it appears to be illegal to have a negative offset
990
- // in the vgpr, even if adding the immediate offset makes it positive.
991
- unsigned Overflow = ImmOffset & ~MaxImm;
992
- ImmOffset -= Overflow;
993
- if ((int32_t )Overflow < 0 ) {
994
- Overflow += ImmOffset;
995
- ImmOffset = 0 ;
996
- }
997
-
998
- if (Overflow != 0 ) {
999
- // In case this is in a waterfall loop, insert offset code at the def point
1000
- // of the offset, not inside the loop.
1001
- MachineBasicBlock::iterator OldInsPt = B.getInsertPt ();
1002
- MachineBasicBlock &OldMBB = B.getMBB ();
1003
- B.setInstr (*OffsetDef);
1004
-
1005
- if (!BaseReg) {
1006
- BaseReg = MRI->createVirtualRegister (&AMDGPU::VGPR_32RegClass);
1007
- B.buildInstr (AMDGPU::V_MOV_B32_e32)
1008
- .addDef (BaseReg)
1009
- .addImm (Overflow);
1010
- } else {
1011
- Register OverflowVal = MRI->createVirtualRegister (&AMDGPU::VGPR_32RegClass);
1012
- B.buildInstr (AMDGPU::V_MOV_B32_e32)
1013
- .addDef (OverflowVal)
1014
- .addImm (Overflow);
1015
-
1016
- Register NewBaseReg = MRI->createVirtualRegister (&AMDGPU::VGPR_32RegClass);
1017
- TII.getAddNoCarry (B.getMBB (), B.getInsertPt (), B.getDebugLoc (), NewBaseReg)
1018
- .addReg (BaseReg)
1019
- .addReg (OverflowVal, RegState::Kill)
1020
- .addImm (0 );
1021
- BaseReg = NewBaseReg;
1022
- }
1023
-
1024
- B.setInsertPt (OldMBB, OldInsPt);
1025
- }
1026
-
1027
- return std::make_tuple (BaseReg, ImmOffset, TotalConstOffset);
1028
- }
1029
-
1030
859
bool AMDGPUInstructionSelector::selectEndCfIntrinsic (MachineInstr &MI) const {
1031
860
// FIXME: Manually selecting to avoid dealiing with the SReg_1 trick
1032
861
// SelectionDAG uses for wave32 vs wave64.
@@ -1042,60 +871,6 @@ bool AMDGPUInstructionSelector::selectEndCfIntrinsic(MachineInstr &MI) const {
1042
871
return true ;
1043
872
}
1044
873
1045
- bool AMDGPUInstructionSelector::selectStoreIntrinsic (MachineInstr &MI,
1046
- bool IsFormat) const {
1047
- MachineIRBuilder B (MI);
1048
- MachineFunction &MF = B.getMF ();
1049
- Register VData = MI.getOperand (1 ).getReg ();
1050
- LLT Ty = MRI->getType (VData);
1051
-
1052
- int Size = Ty.getSizeInBits ();
1053
- if (Size % 32 != 0 )
1054
- return false ;
1055
-
1056
- // FIXME: Verifier should enforce 1 MMO for these intrinsics.
1057
- MachineMemOperand *MMO = *MI.memoperands_begin ();
1058
- const int MemSize = MMO->getSize ();
1059
-
1060
- Register RSrc = MI.getOperand (2 ).getReg ();
1061
- Register VOffset = MI.getOperand (3 ).getReg ();
1062
- Register SOffset = MI.getOperand (4 ).getReg ();
1063
- unsigned AuxiliaryData = MI.getOperand (5 ).getImm ();
1064
- unsigned ImmOffset;
1065
- unsigned TotalOffset;
1066
-
1067
- std::tie (VOffset, ImmOffset, TotalOffset) = splitBufferOffsets (B, VOffset);
1068
- if (TotalOffset != 0 )
1069
- MMO = MF.getMachineMemOperand (MMO, TotalOffset, MemSize);
1070
-
1071
- const bool Offen = !isZero (VOffset, *MRI);
1072
-
1073
- int Opc = IsFormat ? getBufferStoreFormatOpcode (Ty, MemSize, Offen) :
1074
- getBufferStoreOpcode (Ty, MemSize, Offen);
1075
- if (Opc == -1 )
1076
- return false ;
1077
-
1078
- MachineInstrBuilder MIB = B.buildInstr (Opc)
1079
- .addUse (VData);
1080
-
1081
- if (Offen)
1082
- MIB.addUse (VOffset);
1083
-
1084
- MIB.addUse (RSrc)
1085
- .addUse (SOffset)
1086
- .addImm (ImmOffset)
1087
- .addImm (extractGLC (AuxiliaryData))
1088
- .addImm (extractSLC (AuxiliaryData))
1089
- .addImm (0 ) // tfe: FIXME: Remove from inst
1090
- .addImm (extractDLC (AuxiliaryData))
1091
- .addImm (extractSWZ (AuxiliaryData))
1092
- .addMemOperand (MMO);
1093
-
1094
- MI.eraseFromParent ();
1095
-
1096
- return constrainSelectedInstRegOperands (*MIB, TII, TRI, RBI);
1097
- }
1098
-
1099
874
static unsigned getDSShaderTypeValue (const MachineFunction &MF) {
1100
875
switch (MF.getFunction ().getCallingConv ()) {
1101
876
case CallingConv::AMDGPU_PS:
@@ -1325,10 +1100,6 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
1325
1100
switch (IntrinsicID) {
1326
1101
case Intrinsic::amdgcn_end_cf:
1327
1102
return selectEndCfIntrinsic (I);
1328
- case Intrinsic::amdgcn_raw_buffer_store:
1329
- return selectStoreIntrinsic (I, false );
1330
- case Intrinsic::amdgcn_raw_buffer_store_format:
1331
- return selectStoreIntrinsic (I, true );
1332
1103
case Intrinsic::amdgcn_ds_ordered_add:
1333
1104
case Intrinsic::amdgcn_ds_ordered_swap:
1334
1105
return selectDSOrderedIntrinsic (I, IntrinsicID);
0 commit comments