diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index a0b5f67c2e6c7..e8dc9280e8e2b 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -7981,8 +7981,6 @@ bool TargetLowering::expandDIVREMByConstant(SDNode *N, // If (1 << HBitWidth) % divisor == 1, we can add the two halves together and // then add in the carry. - // TODO: If we can't split it in half, we might be able to split into 3 or - // more pieces using a smaller bit width. if (HalfMaxPlus1.urem(Divisor).isOne()) { assert(!LL == !LH && "Expected both input halves or no input halves!"); if (!LL) @@ -8030,6 +8028,80 @@ bool TargetLowering::expandDIVREMByConstant(SDNode *N, DAG.getConstant(0, dl, HiLoVT)); Sum = DAG.getNode(ISD::ADD, dl, HiLoVT, Sum, Carry); } + + } else { + // If we cannot split in two halves. Let's look for a smaller chunk + // width where (1 << ChunkWidth) mod Divisor == 1. + // This ensures that the sum of all such chunks modulo Divisor + // is equivalent to the original value modulo Divisor. + const APInt &Divisor = CN->getAPIntValue(); + unsigned BitWidth = VT.getScalarSizeInBits(); + unsigned BestChunkWidth = 0; + + // We restrict to small chunk sizes (e.g., ≤ 32 bits) to ensure that all + // operations remain legal on most targets. + unsigned MaxChunk = 32; + for (int i = MaxChunk; i >= 1; --i) { + APInt ChunkMaxPlus1 = APInt::getOneBitSet(BitWidth, i); + if (ChunkMaxPlus1.urem(Divisor).isOne()) { + BestChunkWidth = i; + break; + } + } + + // If we found a good chunk width, slice the number and sum the pieces. + if (BestChunkWidth > 0) { + EVT ChunkVT = EVT::getIntegerVT(*DAG.getContext(), BestChunkWidth); + + if (!LL) + std::tie(LL, LH) = + DAG.SplitScalar(N->getOperand(0), dl, HiLoVT, HiLoVT); + SDValue In = DAG.getNode(ISD::BUILD_PAIR, dl, VT, LL, LH); + + SmallVector Parts; + // Split into fixed-size chunks + for (unsigned i = 0; i < BitWidth; i += BestChunkWidth) { + SDValue Shift = DAG.getShiftAmountConstant(i, VT, dl); + SDValue Chunk = DAG.getNode(ISD::SRL, dl, VT, In, Shift); + Chunk = DAG.getNode(ISD::TRUNCATE, dl, ChunkVT, Chunk); + Parts.push_back(Chunk); + } + if (Parts.empty()) + return false; + Sum = Parts[0]; + + // Use uaddo_carry if we can, otherwise use a compare to detect overflow. + // same logic as used in above if condition. + SDValue Carry = DAG.getConstant(0, dl, ChunkVT); + EVT SetCCType = + getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), ChunkVT); + for (unsigned i = 1; i < Parts.size(); ++i) { + if (isOperationLegalOrCustom(ISD::UADDO_CARRY, ChunkVT)) { + SDVTList VTList = DAG.getVTList(ChunkVT, SetCCType); + SDValue UAdd = DAG.getNode(ISD::UADDO, dl, VTList, Sum, Parts[i]); + Sum = DAG.getNode(ISD::UADDO_CARRY, dl, VTList, UAdd, Carry, + UAdd.getValue(1)); + } else { + SDValue Add = DAG.getNode(ISD::ADD, dl, ChunkVT, Sum, Parts[i]); + SDValue NewCarry = DAG.getSetCC(dl, SetCCType, Add, Sum, ISD::SETULT); + + if (getBooleanContents(ChunkVT) == + TargetLoweringBase::ZeroOrOneBooleanContent) + NewCarry = DAG.getZExtOrTrunc(NewCarry, dl, ChunkVT); + else + NewCarry = DAG.getSelect(dl, ChunkVT, NewCarry, + DAG.getConstant(1, dl, ChunkVT), + DAG.getConstant(0, dl, ChunkVT)); + + Sum = DAG.getNode(ISD::ADD, dl, ChunkVT, Add, Carry); + Carry = NewCarry; + } + } + + Sum = DAG.getNode(ISD::ZERO_EXTEND, dl, HiLoVT, Sum); + } else { + return false; + } } // If we didn't find a sum, we can't do the expansion. diff --git a/llvm/test/CodeGen/RISCV/div-by-constant.ll b/llvm/test/CodeGen/RISCV/div-by-constant.ll index ea8b04d727acf..f2e1979fc4057 100644 --- a/llvm/test/CodeGen/RISCV/div-by-constant.ll +++ b/llvm/test/CodeGen/RISCV/div-by-constant.ll @@ -115,16 +115,76 @@ define i64 @udiv64_constant_no_add(i64 %a) nounwind { } define i64 @udiv64_constant_add(i64 %a) nounwind { -; RV32-LABEL: udiv64_constant_add: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32-NEXT: li a2, 7 -; RV32-NEXT: li a3, 0 -; RV32-NEXT: call __udivdi3 -; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32-NEXT: addi sp, sp, 16 -; RV32-NEXT: ret +; RV32IM-LABEL: udiv64_constant_add: +; RV32IM: # %bb.0: +; RV32IM-NEXT: lui a2, 262144 +; RV32IM-NEXT: slli a3, a1, 2 +; RV32IM-NEXT: srli a4, a0, 30 +; RV32IM-NEXT: srli a5, a1, 28 +; RV32IM-NEXT: lui a6, 149797 +; RV32IM-NEXT: addi a2, a2, -1 +; RV32IM-NEXT: or a3, a4, a3 +; RV32IM-NEXT: and a4, a0, a2 +; RV32IM-NEXT: add a3, a0, a3 +; RV32IM-NEXT: add a5, a3, a5 +; RV32IM-NEXT: and a3, a3, a2 +; RV32IM-NEXT: sltu a3, a3, a4 +; RV32IM-NEXT: lui a4, 449390 +; RV32IM-NEXT: add a3, a5, a3 +; RV32IM-NEXT: lui a5, 748983 +; RV32IM-NEXT: addi a6, a6, -1755 +; RV32IM-NEXT: addi a4, a4, -1171 +; RV32IM-NEXT: addi a5, a5, -585 +; RV32IM-NEXT: and a2, a3, a2 +; RV32IM-NEXT: mulhu a3, a2, a6 +; RV32IM-NEXT: slli a6, a3, 3 +; RV32IM-NEXT: add a2, a2, a3 +; RV32IM-NEXT: sub a2, a2, a6 +; RV32IM-NEXT: sub a3, a0, a2 +; RV32IM-NEXT: sltu a0, a0, a2 +; RV32IM-NEXT: mul a2, a3, a4 +; RV32IM-NEXT: mulhu a4, a3, a5 +; RV32IM-NEXT: sub a1, a1, a0 +; RV32IM-NEXT: add a2, a4, a2 +; RV32IM-NEXT: mul a1, a1, a5 +; RV32IM-NEXT: add a1, a2, a1 +; RV32IM-NEXT: mul a0, a3, a5 +; RV32IM-NEXT: ret +; +; RV32IMZB-LABEL: udiv64_constant_add: +; RV32IMZB: # %bb.0: +; RV32IMZB-NEXT: srli a2, a0, 30 +; RV32IMZB-NEXT: srli a3, a1, 28 +; RV32IMZB-NEXT: lui a4, 786432 +; RV32IMZB-NEXT: slli a5, a0, 2 +; RV32IMZB-NEXT: lui a6, 149797 +; RV32IMZB-NEXT: sh2add a2, a1, a2 +; RV32IMZB-NEXT: srli a5, a5, 2 +; RV32IMZB-NEXT: add a2, a0, a2 +; RV32IMZB-NEXT: add a3, a2, a3 +; RV32IMZB-NEXT: andn a2, a2, a4 +; RV32IMZB-NEXT: sltu a2, a2, a5 +; RV32IMZB-NEXT: lui a5, 449390 +; RV32IMZB-NEXT: add a2, a3, a2 +; RV32IMZB-NEXT: lui a3, 748983 +; RV32IMZB-NEXT: addi a6, a6, -1755 +; RV32IMZB-NEXT: addi a5, a5, -1171 +; RV32IMZB-NEXT: addi a3, a3, -585 +; RV32IMZB-NEXT: andn a2, a2, a4 +; RV32IMZB-NEXT: mulhu a4, a2, a6 +; RV32IMZB-NEXT: slli a6, a4, 3 +; RV32IMZB-NEXT: add a2, a2, a4 +; RV32IMZB-NEXT: sub a2, a2, a6 +; RV32IMZB-NEXT: sub a4, a0, a2 +; RV32IMZB-NEXT: sltu a0, a0, a2 +; RV32IMZB-NEXT: mul a2, a4, a5 +; RV32IMZB-NEXT: mulhu a5, a4, a3 +; RV32IMZB-NEXT: sub a1, a1, a0 +; RV32IMZB-NEXT: add a2, a5, a2 +; RV32IMZB-NEXT: mul a1, a1, a3 +; RV32IMZB-NEXT: add a1, a2, a1 +; RV32IMZB-NEXT: mul a0, a4, a3 +; RV32IMZB-NEXT: ret ; ; RV64-LABEL: udiv64_constant_add: ; RV64: # %bb.0: diff --git a/llvm/test/CodeGen/RISCV/split-udiv-by-constant.ll b/llvm/test/CodeGen/RISCV/split-udiv-by-constant.ll index eb70d7f43c0ef..8250fc3a176e2 100644 --- a/llvm/test/CodeGen/RISCV/split-udiv-by-constant.ll +++ b/llvm/test/CodeGen/RISCV/split-udiv-by-constant.ll @@ -117,24 +117,89 @@ define iXLen2 @test_udiv_5(iXLen2 %x) nounwind { define iXLen2 @test_udiv_7(iXLen2 %x) nounwind { ; RV32-LABEL: test_udiv_7: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32-NEXT: li a2, 7 -; RV32-NEXT: li a3, 0 -; RV32-NEXT: call __udivdi3 -; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: lui a2, 262144 +; RV32-NEXT: slli a3, a1, 2 +; RV32-NEXT: srli a4, a0, 30 +; RV32-NEXT: srli a5, a1, 28 +; RV32-NEXT: lui a6, 149797 +; RV32-NEXT: addi a2, a2, -1 +; RV32-NEXT: or a3, a4, a3 +; RV32-NEXT: and a4, a0, a2 +; RV32-NEXT: add a3, a0, a3 +; RV32-NEXT: add a5, a3, a5 +; RV32-NEXT: and a3, a3, a2 +; RV32-NEXT: sltu a3, a3, a4 +; RV32-NEXT: lui a4, 449390 +; RV32-NEXT: add a3, a5, a3 +; RV32-NEXT: lui a5, 748983 +; RV32-NEXT: addi a6, a6, -1755 +; RV32-NEXT: addi a4, a4, -1171 +; RV32-NEXT: addi a5, a5, -585 +; RV32-NEXT: and a2, a3, a2 +; RV32-NEXT: mulhu a3, a2, a6 +; RV32-NEXT: slli a6, a3, 3 +; RV32-NEXT: add a2, a2, a3 +; RV32-NEXT: sub a2, a2, a6 +; RV32-NEXT: sub a3, a0, a2 +; RV32-NEXT: sltu a0, a0, a2 +; RV32-NEXT: mul a2, a3, a4 +; RV32-NEXT: mulhu a4, a3, a5 +; RV32-NEXT: sub a1, a1, a0 +; RV32-NEXT: add a2, a4, a2 +; RV32-NEXT: mul a1, a1, a5 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: mul a0, a3, a5 ; RV32-NEXT: ret ; ; RV64-LABEL: test_udiv_7: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -16 -; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64-NEXT: li a2, 7 -; RV64-NEXT: li a3, 0 -; RV64-NEXT: call __udivti3 -; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: slli a2, a1, 4 +; RV64-NEXT: srli a3, a0, 60 +; RV64-NEXT: slli a4, a1, 34 +; RV64-NEXT: srli a5, a0, 30 +; RV64-NEXT: lui a6, 262144 +; RV64-NEXT: srli a7, a1, 26 +; RV64-NEXT: or a2, a3, a2 +; RV64-NEXT: lui a3, 748983 +; RV64-NEXT: or a4, a5, a4 +; RV64-NEXT: addi a6, a6, -1 +; RV64-NEXT: addi a3, a3, -585 +; RV64-NEXT: add a4, a0, a4 +; RV64-NEXT: slli a5, a3, 33 +; RV64-NEXT: add a3, a3, a5 +; RV64-NEXT: and a5, a0, a6 +; RV64-NEXT: add a2, a4, a2 +; RV64-NEXT: and a4, a4, a6 +; RV64-NEXT: sltu a5, a4, a5 +; RV64-NEXT: add a5, a2, a5 +; RV64-NEXT: and a2, a2, a6 +; RV64-NEXT: sltu a2, a2, a4 +; RV64-NEXT: srli a4, a1, 56 +; RV64-NEXT: add a2, a2, a4 +; RV64-NEXT: lui a4, %hi(.LCPI2_0) +; RV64-NEXT: add a7, a5, a7 +; RV64-NEXT: and a5, a5, a6 +; RV64-NEXT: add a2, a7, a2 +; RV64-NEXT: and a7, a7, a6 +; RV64-NEXT: sltu a5, a7, a5 +; RV64-NEXT: lui a7, %hi(.LCPI2_1) +; RV64-NEXT: ld a4, %lo(.LCPI2_0)(a4) +; RV64-NEXT: ld a7, %lo(.LCPI2_1)(a7) +; RV64-NEXT: add a2, a2, a5 +; RV64-NEXT: and a2, a2, a6 +; RV64-NEXT: mulhu a4, a2, a4 +; RV64-NEXT: slli a5, a4, 3 +; RV64-NEXT: add a2, a2, a4 +; RV64-NEXT: sub a2, a2, a5 +; RV64-NEXT: sub a4, a0, a2 +; RV64-NEXT: sltu a0, a0, a2 +; RV64-NEXT: mul a2, a4, a7 +; RV64-NEXT: mulhu a5, a4, a3 +; RV64-NEXT: sub a1, a1, a0 +; RV64-NEXT: add a2, a5, a2 +; RV64-NEXT: mul a1, a1, a3 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: mul a0, a4, a3 ; RV64-NEXT: ret %a = udiv iXLen2 %x, 7 ret iXLen2 %a @@ -143,24 +208,86 @@ define iXLen2 @test_udiv_7(iXLen2 %x) nounwind { define iXLen2 @test_udiv_9(iXLen2 %x) nounwind { ; RV32-LABEL: test_udiv_9: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32-NEXT: li a2, 9 -; RV32-NEXT: li a3, 0 -; RV32-NEXT: call __udivdi3 -; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: lui a2, 262144 +; RV32-NEXT: slli a3, a1, 2 +; RV32-NEXT: srli a4, a0, 30 +; RV32-NEXT: srli a5, a1, 28 +; RV32-NEXT: lui a6, 233017 +; RV32-NEXT: addi a2, a2, -1 +; RV32-NEXT: or a3, a4, a3 +; RV32-NEXT: and a4, a0, a2 +; RV32-NEXT: add a3, a0, a3 +; RV32-NEXT: add a5, a3, a5 +; RV32-NEXT: and a3, a3, a2 +; RV32-NEXT: sltu a3, a3, a4 +; RV32-NEXT: lui a4, 582542 +; RV32-NEXT: addi a6, a6, -455 +; RV32-NEXT: addi a4, a4, 910 +; RV32-NEXT: add a3, a5, a3 +; RV32-NEXT: and a2, a3, a2 +; RV32-NEXT: mulhu a3, a2, a6 +; RV32-NEXT: srli a3, a3, 1 +; RV32-NEXT: slli a5, a3, 3 +; RV32-NEXT: sub a2, a2, a3 +; RV32-NEXT: sub a2, a2, a5 +; RV32-NEXT: sub a3, a0, a2 +; RV32-NEXT: sltu a0, a0, a2 +; RV32-NEXT: mul a2, a3, a4 +; RV32-NEXT: mulhu a4, a3, a6 +; RV32-NEXT: sub a1, a1, a0 +; RV32-NEXT: add a2, a4, a2 +; RV32-NEXT: mul a1, a1, a6 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: mul a0, a3, a6 ; RV32-NEXT: ret ; ; RV64-LABEL: test_udiv_9: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -16 -; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64-NEXT: li a2, 9 -; RV64-NEXT: li a3, 0 -; RV64-NEXT: call __udivti3 -; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: slli a2, a1, 4 +; RV64-NEXT: srli a3, a0, 60 +; RV64-NEXT: slli a4, a1, 34 +; RV64-NEXT: srli a5, a0, 30 +; RV64-NEXT: lui a6, 262144 +; RV64-NEXT: srli a7, a1, 26 +; RV64-NEXT: or a2, a3, a2 +; RV64-NEXT: srli a3, a1, 56 +; RV64-NEXT: or a4, a5, a4 +; RV64-NEXT: addi a6, a6, -1 +; RV64-NEXT: add a4, a0, a4 +; RV64-NEXT: and a5, a0, a6 +; RV64-NEXT: add a2, a4, a2 +; RV64-NEXT: and a4, a4, a6 +; RV64-NEXT: sltu a5, a4, a5 +; RV64-NEXT: add a5, a2, a5 +; RV64-NEXT: and a2, a2, a6 +; RV64-NEXT: sltu a2, a2, a4 +; RV64-NEXT: lui a4, %hi(.LCPI3_0) +; RV64-NEXT: add a2, a2, a3 +; RV64-NEXT: lui a3, %hi(.LCPI3_1) +; RV64-NEXT: add a7, a5, a7 +; RV64-NEXT: and a5, a5, a6 +; RV64-NEXT: add a2, a7, a2 +; RV64-NEXT: and a7, a7, a6 +; RV64-NEXT: sltu a5, a7, a5 +; RV64-NEXT: lui a7, %hi(.LCPI3_2) +; RV64-NEXT: ld a4, %lo(.LCPI3_0)(a4) +; RV64-NEXT: ld a3, %lo(.LCPI3_1)(a3) +; RV64-NEXT: ld a7, %lo(.LCPI3_2)(a7) +; RV64-NEXT: add a2, a2, a5 +; RV64-NEXT: and a2, a2, a6 +; RV64-NEXT: mulhu a4, a2, a4 +; RV64-NEXT: slli a5, a4, 3 +; RV64-NEXT: sub a2, a2, a4 +; RV64-NEXT: sub a2, a2, a5 +; RV64-NEXT: sub a4, a0, a2 +; RV64-NEXT: sltu a0, a0, a2 +; RV64-NEXT: mul a2, a4, a3 +; RV64-NEXT: mulhu a3, a4, a7 +; RV64-NEXT: sub a1, a1, a0 +; RV64-NEXT: add a2, a3, a2 +; RV64-NEXT: mul a1, a1, a7 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: mul a0, a4, a7 ; RV64-NEXT: ret %a = udiv iXLen2 %x, 9 ret iXLen2 %a diff --git a/llvm/test/CodeGen/RISCV/split-urem-by-constant.ll b/llvm/test/CodeGen/RISCV/split-urem-by-constant.ll index bc4a99a00ac64..1680ea7d8da30 100644 --- a/llvm/test/CodeGen/RISCV/split-urem-by-constant.ll +++ b/llvm/test/CodeGen/RISCV/split-urem-by-constant.ll @@ -79,24 +79,63 @@ define iXLen2 @test_urem_5(iXLen2 %x) nounwind { define iXLen2 @test_urem_7(iXLen2 %x) nounwind { ; RV32-LABEL: test_urem_7: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32-NEXT: li a2, 7 -; RV32-NEXT: li a3, 0 -; RV32-NEXT: call __umoddi3 -; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: lui a2, 262144 +; RV32-NEXT: slli a3, a1, 2 +; RV32-NEXT: srli a4, a0, 30 +; RV32-NEXT: srli a1, a1, 28 +; RV32-NEXT: lui a5, 149797 +; RV32-NEXT: addi a2, a2, -1 +; RV32-NEXT: or a3, a4, a3 +; RV32-NEXT: addi a4, a5, -1755 +; RV32-NEXT: and a5, a0, a2 +; RV32-NEXT: add a0, a0, a3 +; RV32-NEXT: and a3, a0, a2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: sltu a1, a3, a5 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: and a0, a0, a2 +; RV32-NEXT: mulhu a1, a0, a4 +; RV32-NEXT: slli a2, a1, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: sub a0, a0, a2 +; RV32-NEXT: li a1, 0 ; RV32-NEXT: ret ; ; RV64-LABEL: test_urem_7: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -16 -; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64-NEXT: li a2, 7 -; RV64-NEXT: li a3, 0 -; RV64-NEXT: call __umodti3 -; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: slli a2, a1, 4 +; RV64-NEXT: srli a3, a0, 60 +; RV64-NEXT: slli a4, a1, 34 +; RV64-NEXT: srli a5, a0, 30 +; RV64-NEXT: lui a6, 262144 +; RV64-NEXT: or a2, a3, a2 +; RV64-NEXT: srli a3, a1, 26 +; RV64-NEXT: srli a1, a1, 56 +; RV64-NEXT: or a4, a5, a4 +; RV64-NEXT: lui a5, %hi(.LCPI2_0) +; RV64-NEXT: addi a6, a6, -1 +; RV64-NEXT: ld a5, %lo(.LCPI2_0)(a5) +; RV64-NEXT: add a4, a0, a4 +; RV64-NEXT: and a0, a0, a6 +; RV64-NEXT: add a2, a4, a2 +; RV64-NEXT: and a4, a4, a6 +; RV64-NEXT: sltu a0, a4, a0 +; RV64-NEXT: add a0, a2, a0 +; RV64-NEXT: and a2, a2, a6 +; RV64-NEXT: sltu a2, a2, a4 +; RV64-NEXT: and a4, a0, a6 +; RV64-NEXT: add a0, a0, a3 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: and a2, a0, a6 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: sltu a1, a2, a4 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: and a0, a0, a6 +; RV64-NEXT: mulhu a1, a0, a5 +; RV64-NEXT: slli a2, a1, 3 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: sub a0, a0, a2 +; RV64-NEXT: li a1, 0 ; RV64-NEXT: ret %a = urem iXLen2 %x, 7 ret iXLen2 %a @@ -105,24 +144,64 @@ define iXLen2 @test_urem_7(iXLen2 %x) nounwind { define iXLen2 @test_urem_9(iXLen2 %x) nounwind { ; RV32-LABEL: test_urem_9: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32-NEXT: li a2, 9 -; RV32-NEXT: li a3, 0 -; RV32-NEXT: call __umoddi3 -; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: lui a2, 262144 +; RV32-NEXT: slli a3, a1, 2 +; RV32-NEXT: srli a4, a0, 30 +; RV32-NEXT: srli a1, a1, 28 +; RV32-NEXT: lui a5, 233017 +; RV32-NEXT: addi a2, a2, -1 +; RV32-NEXT: or a3, a4, a3 +; RV32-NEXT: addi a4, a5, -455 +; RV32-NEXT: and a5, a0, a2 +; RV32-NEXT: add a0, a0, a3 +; RV32-NEXT: and a3, a0, a2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: sltu a1, a3, a5 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: and a0, a0, a2 +; RV32-NEXT: mulhu a1, a0, a4 +; RV32-NEXT: srli a1, a1, 1 +; RV32-NEXT: slli a2, a1, 3 +; RV32-NEXT: sub a0, a0, a1 +; RV32-NEXT: sub a0, a0, a2 +; RV32-NEXT: li a1, 0 ; RV32-NEXT: ret ; ; RV64-LABEL: test_urem_9: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -16 -; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64-NEXT: li a2, 9 -; RV64-NEXT: li a3, 0 -; RV64-NEXT: call __umodti3 -; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: slli a2, a1, 4 +; RV64-NEXT: srli a3, a0, 60 +; RV64-NEXT: slli a4, a1, 34 +; RV64-NEXT: srli a5, a0, 30 +; RV64-NEXT: lui a6, 262144 +; RV64-NEXT: or a2, a3, a2 +; RV64-NEXT: srli a3, a1, 26 +; RV64-NEXT: srli a1, a1, 56 +; RV64-NEXT: or a4, a5, a4 +; RV64-NEXT: lui a5, %hi(.LCPI3_0) +; RV64-NEXT: addi a6, a6, -1 +; RV64-NEXT: ld a5, %lo(.LCPI3_0)(a5) +; RV64-NEXT: add a4, a0, a4 +; RV64-NEXT: and a0, a0, a6 +; RV64-NEXT: add a2, a4, a2 +; RV64-NEXT: and a4, a4, a6 +; RV64-NEXT: sltu a0, a4, a0 +; RV64-NEXT: add a0, a2, a0 +; RV64-NEXT: and a2, a2, a6 +; RV64-NEXT: sltu a2, a2, a4 +; RV64-NEXT: and a4, a0, a6 +; RV64-NEXT: add a0, a0, a3 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: and a2, a0, a6 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: sltu a1, a2, a4 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: and a0, a0, a6 +; RV64-NEXT: mulhu a1, a0, a5 +; RV64-NEXT: slli a2, a1, 3 +; RV64-NEXT: sub a0, a0, a1 +; RV64-NEXT: sub a0, a0, a2 +; RV64-NEXT: li a1, 0 ; RV64-NEXT: ret %a = urem iXLen2 %x, 9 ret iXLen2 %a diff --git a/llvm/test/CodeGen/RISCV/urem-vector-lkk.ll b/llvm/test/CodeGen/RISCV/urem-vector-lkk.ll index 3ef9f3f945108..77a026669c51d 100644 --- a/llvm/test/CodeGen/RISCV/urem-vector-lkk.ll +++ b/llvm/test/CodeGen/RISCV/urem-vector-lkk.ll @@ -862,51 +862,61 @@ define <4 x i64> @dont_fold_urem_i64(<4 x i64> %x) nounwind { ; RV32IM-NEXT: sw s5, 20(sp) # 4-byte Folded Spill ; RV32IM-NEXT: sw s6, 16(sp) # 4-byte Folded Spill ; RV32IM-NEXT: sw s7, 12(sp) # 4-byte Folded Spill -; RV32IM-NEXT: sw s8, 8(sp) # 4-byte Folded Spill -; RV32IM-NEXT: lw s1, 16(a1) -; RV32IM-NEXT: lw s2, 20(a1) -; RV32IM-NEXT: lw s3, 24(a1) -; RV32IM-NEXT: lw s4, 28(a1) -; RV32IM-NEXT: lw a3, 0(a1) -; RV32IM-NEXT: lw a4, 4(a1) -; RV32IM-NEXT: lw s5, 8(a1) -; RV32IM-NEXT: lw s6, 12(a1) ; RV32IM-NEXT: mv s0, a0 +; RV32IM-NEXT: lw a2, 16(a1) +; RV32IM-NEXT: lw a4, 20(a1) +; RV32IM-NEXT: lw s1, 24(a1) +; RV32IM-NEXT: lw s2, 28(a1) +; RV32IM-NEXT: lw a0, 0(a1) +; RV32IM-NEXT: lw a3, 4(a1) +; RV32IM-NEXT: lw s3, 8(a1) +; RV32IM-NEXT: lw s4, 12(a1) +; RV32IM-NEXT: lui a1, 1024 +; RV32IM-NEXT: lui a5, 45590 +; RV32IM-NEXT: addi a1, a1, -1 +; RV32IM-NEXT: addi a5, a5, 1069 +; RV32IM-NEXT: slli a6, a4, 10 +; RV32IM-NEXT: srli a7, a2, 22 +; RV32IM-NEXT: or a6, a7, a6 +; RV32IM-NEXT: and a7, a2, a1 +; RV32IM-NEXT: srli a4, a4, 12 +; RV32IM-NEXT: add a2, a2, a6 +; RV32IM-NEXT: and a6, a2, a1 +; RV32IM-NEXT: add a2, a2, a4 +; RV32IM-NEXT: sltu a4, a6, a7 +; RV32IM-NEXT: add a2, a2, a4 +; RV32IM-NEXT: and a1, a2, a1 +; RV32IM-NEXT: mulhu a2, a1, a5 +; RV32IM-NEXT: li a4, 23 +; RV32IM-NEXT: mul a2, a2, a4 +; RV32IM-NEXT: sub s7, a1, a2 ; RV32IM-NEXT: li a2, 1 -; RV32IM-NEXT: mv a0, a3 -; RV32IM-NEXT: mv a1, a4 -; RV32IM-NEXT: li a3, 0 -; RV32IM-NEXT: call __umoddi3 -; RV32IM-NEXT: mv s7, a0 -; RV32IM-NEXT: mv s8, a1 -; RV32IM-NEXT: li a2, 654 -; RV32IM-NEXT: mv a0, s5 -; RV32IM-NEXT: mv a1, s6 +; RV32IM-NEXT: mv a1, a3 ; RV32IM-NEXT: li a3, 0 ; RV32IM-NEXT: call __umoddi3 ; RV32IM-NEXT: mv s5, a0 ; RV32IM-NEXT: mv s6, a1 -; RV32IM-NEXT: li a2, 23 -; RV32IM-NEXT: mv a0, s1 -; RV32IM-NEXT: mv a1, s2 +; RV32IM-NEXT: li a2, 654 +; RV32IM-NEXT: mv a0, s3 +; RV32IM-NEXT: mv a1, s4 ; RV32IM-NEXT: li a3, 0 ; RV32IM-NEXT: call __umoddi3 -; RV32IM-NEXT: mv s1, a0 -; RV32IM-NEXT: mv s2, a1 +; RV32IM-NEXT: mv s3, a0 +; RV32IM-NEXT: mv s4, a1 ; RV32IM-NEXT: lui a0, 1 ; RV32IM-NEXT: addi a2, a0, 1327 -; RV32IM-NEXT: mv a0, s3 -; RV32IM-NEXT: mv a1, s4 +; RV32IM-NEXT: mv a0, s1 +; RV32IM-NEXT: mv a1, s2 ; RV32IM-NEXT: li a3, 0 ; RV32IM-NEXT: call __umoddi3 -; RV32IM-NEXT: sw s1, 16(s0) -; RV32IM-NEXT: sw s2, 20(s0) +; RV32IM-NEXT: sw s7, 16(s0) +; RV32IM-NEXT: sw zero, 20(s0) ; RV32IM-NEXT: sw a0, 24(s0) ; RV32IM-NEXT: sw a1, 28(s0) -; RV32IM-NEXT: sw s7, 0(s0) -; RV32IM-NEXT: sw s8, 4(s0) -; RV32IM-NEXT: sw s5, 8(s0) -; RV32IM-NEXT: sw s6, 12(s0) +; RV32IM-NEXT: sw s5, 0(s0) +; RV32IM-NEXT: sw s6, 4(s0) +; RV32IM-NEXT: sw s3, 8(s0) +; RV32IM-NEXT: sw s4, 12(s0) ; RV32IM-NEXT: lw ra, 44(sp) # 4-byte Folded Reload ; RV32IM-NEXT: lw s0, 40(sp) # 4-byte Folded Reload ; RV32IM-NEXT: lw s1, 36(sp) # 4-byte Folded Reload @@ -916,7 +926,6 @@ define <4 x i64> @dont_fold_urem_i64(<4 x i64> %x) nounwind { ; RV32IM-NEXT: lw s5, 20(sp) # 4-byte Folded Reload ; RV32IM-NEXT: lw s6, 16(sp) # 4-byte Folded Reload ; RV32IM-NEXT: lw s7, 12(sp) # 4-byte Folded Reload -; RV32IM-NEXT: lw s8, 8(sp) # 4-byte Folded Reload ; RV32IM-NEXT: addi sp, sp, 48 ; RV32IM-NEXT: ret ; diff --git a/llvm/test/CodeGen/X86/divide-by-constant.ll b/llvm/test/CodeGen/X86/divide-by-constant.ll index ac78136b9d8ea..f4f99749969e9 100644 --- a/llvm/test/CodeGen/X86/divide-by-constant.ll +++ b/llvm/test/CodeGen/X86/divide-by-constant.ll @@ -294,19 +294,47 @@ entry: define i64 @PR23590(i64 %x) nounwind { ; X86-LABEL: PR23590: ; X86: # %bb.0: # %entry -; X86-NEXT: subl $12, %esp +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi ; X86-NEXT: pushl $0 ; X86-NEXT: pushl $12345 # imm = 0x3039 ; X86-NEXT: pushl {{[0-9]+}}(%esp) ; X86-NEXT: pushl {{[0-9]+}}(%esp) ; X86-NEXT: calll __umoddi3 ; X86-NEXT: addl $16, %esp -; X86-NEXT: pushl $0 -; X86-NEXT: pushl $7 -; X86-NEXT: pushl %edx -; X86-NEXT: pushl %eax -; X86-NEXT: calll __udivdi3 -; X86-NEXT: addl $28, %esp +; X86-NEXT: movl %eax, %esi +; X86-NEXT: movl %edx, %ecx +; X86-NEXT: movl %edx, %eax +; X86-NEXT: shldl $2, %esi, %eax +; X86-NEXT: addl %esi, %eax +; X86-NEXT: movl %eax, %edx +; X86-NEXT: andl $1073741823, %edx # imm = 0x3FFFFFFF +; X86-NEXT: movl %ecx, %edi +; X86-NEXT: shrl $28, %edi +; X86-NEXT: movl %esi, %ebx +; X86-NEXT: andl $1073741823, %ebx # imm = 0x3FFFFFFF +; X86-NEXT: cmpl %ebx, %edx +; X86-NEXT: adcl %eax, %edi +; X86-NEXT: andl $1073741823, %edi # imm = 0x3FFFFFFF +; X86-NEXT: movl $613566757, %edx # imm = 0x24924925 +; X86-NEXT: movl %edi, %eax +; X86-NEXT: mull %edx +; X86-NEXT: leal (,%edx,8), %eax +; X86-NEXT: subl %eax, %edx +; X86-NEXT: addl %edi, %edx +; X86-NEXT: subl %edx, %esi +; X86-NEXT: sbbl $0, %ecx +; X86-NEXT: movl $-1227133513, %edx # imm = 0xB6DB6DB7 +; X86-NEXT: movl %esi, %eax +; X86-NEXT: mull %edx +; X86-NEXT: imull $1840700269, %esi, %esi # imm = 0x6DB6DB6D +; X86-NEXT: addl %esi, %edx +; X86-NEXT: imull $-1227133513, %ecx, %ecx # imm = 0xB6DB6DB7 +; X86-NEXT: addl %ecx, %edx +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx ; X86-NEXT: retl ; ; X64-FAST-LABEL: PR23590: diff --git a/llvm/test/CodeGen/X86/divmod128.ll b/llvm/test/CodeGen/X86/divmod128.ll index 3796dd796eaf9..16865030cfc36 100644 --- a/llvm/test/CodeGen/X86/divmod128.ll +++ b/llvm/test/CodeGen/X86/divmod128.ll @@ -67,25 +67,76 @@ define i64 @div128(i128 %x) nounwind { define i64 @umod128(i128 %x) nounwind { ; X86-64-LABEL: umod128: ; X86-64: # %bb.0: -; X86-64-NEXT: pushq %rax -; X86-64-NEXT: movl $11, %edx -; X86-64-NEXT: xorl %ecx, %ecx -; X86-64-NEXT: callq __umodti3@PLT -; X86-64-NEXT: popq %rcx +; X86-64-NEXT: movq %rsi, %rax +; X86-64-NEXT: shldq $4, %rdi, %rax +; X86-64-NEXT: movq %rdi, %rcx +; X86-64-NEXT: shrq $30, %rcx +; X86-64-NEXT: addl %edi, %ecx +; X86-64-NEXT: movl %ecx, %edx +; X86-64-NEXT: andl $1073741823, %edx # imm = 0x3FFFFFFF +; X86-64-NEXT: andl $1073741823, %edi # imm = 0x3FFFFFFF +; X86-64-NEXT: cmpl %edi, %edx +; X86-64-NEXT: movl %ecx, %edi +; X86-64-NEXT: adcl %eax, %edi +; X86-64-NEXT: addl %eax, %ecx +; X86-64-NEXT: andl $1073741823, %ecx # imm = 0x3FFFFFFF +; X86-64-NEXT: movq %rsi, %rax +; X86-64-NEXT: shrq $26, %rax +; X86-64-NEXT: cmpl %edx, %ecx +; X86-64-NEXT: movl %edi, %ecx +; X86-64-NEXT: adcl %eax, %ecx +; X86-64-NEXT: movl %edi, %edx +; X86-64-NEXT: andl $1073741823, %edx # imm = 0x3FFFFFFF +; X86-64-NEXT: addl %eax, %edi +; X86-64-NEXT: andl $1073741823, %edi # imm = 0x3FFFFFFF +; X86-64-NEXT: shrq $56, %rsi +; X86-64-NEXT: cmpl %edx, %edi +; X86-64-NEXT: adcl %esi, %ecx +; X86-64-NEXT: andl $1073741823, %ecx # imm = 0x3FFFFFFF +; X86-64-NEXT: movabsq $1676976733973595602, %rdx # imm = 0x1745D1745D1745D2 +; X86-64-NEXT: movq %rcx, %rax +; X86-64-NEXT: mulq %rdx +; X86-64-NEXT: leaq (%rdx,%rdx,4), %rax +; X86-64-NEXT: leaq (%rdx,%rax,2), %rax +; X86-64-NEXT: subq %rax, %rcx +; X86-64-NEXT: movq %rcx, %rax ; X86-64-NEXT: retq ; ; WIN64-LABEL: umod128: ; WIN64: # %bb.0: -; WIN64-NEXT: subq $72, %rsp -; WIN64-NEXT: movq %rdx, {{[0-9]+}}(%rsp) -; WIN64-NEXT: movq %rcx, {{[0-9]+}}(%rsp) -; WIN64-NEXT: movq $11, {{[0-9]+}}(%rsp) -; WIN64-NEXT: movq $0, {{[0-9]+}}(%rsp) -; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rcx -; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rdx -; WIN64-NEXT: callq __umodti3 -; WIN64-NEXT: movq %xmm0, %rax -; WIN64-NEXT: addq $72, %rsp +; WIN64-NEXT: movq %rdx, %rax +; WIN64-NEXT: shldq $4, %rcx, %rax +; WIN64-NEXT: movq %rcx, %r8 +; WIN64-NEXT: shrq $30, %r8 +; WIN64-NEXT: addl %ecx, %r8d +; WIN64-NEXT: movl %r8d, %r9d +; WIN64-NEXT: andl $1073741823, %r9d # imm = 0x3FFFFFFF +; WIN64-NEXT: andl $1073741823, %ecx # imm = 0x3FFFFFFF +; WIN64-NEXT: cmpl %ecx, %r9d +; WIN64-NEXT: movl %r8d, %r10d +; WIN64-NEXT: adcl %eax, %r10d +; WIN64-NEXT: addl %eax, %r8d +; WIN64-NEXT: andl $1073741823, %r8d # imm = 0x3FFFFFFF +; WIN64-NEXT: movq %rdx, %rax +; WIN64-NEXT: shrq $26, %rax +; WIN64-NEXT: cmpl %r9d, %r8d +; WIN64-NEXT: movl %r10d, %ecx +; WIN64-NEXT: adcl %eax, %ecx +; WIN64-NEXT: movl %r10d, %r8d +; WIN64-NEXT: andl $1073741823, %r8d # imm = 0x3FFFFFFF +; WIN64-NEXT: addl %eax, %r10d +; WIN64-NEXT: andl $1073741823, %r10d # imm = 0x3FFFFFFF +; WIN64-NEXT: shrq $56, %rdx +; WIN64-NEXT: cmpl %r8d, %r10d +; WIN64-NEXT: adcl %edx, %ecx +; WIN64-NEXT: andl $1073741823, %ecx # imm = 0x3FFFFFFF +; WIN64-NEXT: movabsq $1676976733973595602, %rdx # imm = 0x1745D1745D1745D2 +; WIN64-NEXT: movq %rcx, %rax +; WIN64-NEXT: mulq %rdx +; WIN64-NEXT: leaq (%rdx,%rdx,4), %rax +; WIN64-NEXT: leaq (%rdx,%rax,2), %rax +; WIN64-NEXT: subq %rax, %rcx +; WIN64-NEXT: movq %rcx, %rax ; WIN64-NEXT: retq diff --git a/llvm/test/CodeGen/X86/uint128-div-const.ll b/llvm/test/CodeGen/X86/uint128-div-const.ll new file mode 100644 index 0000000000000..60dddad952679 --- /dev/null +++ b/llvm/test/CodeGen/X86/uint128-div-const.ll @@ -0,0 +1,210 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -O2 | FileCheck %s + +define i128 @div_by_7(i128 %x) { +; CHECK-LABEL: div_by_7: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movq %rsi, %rax +; CHECK-NEXT: shldq $4, %rdi, %rax +; CHECK-NEXT: movq %rdi, %rcx +; CHECK-NEXT: shrq $30, %rcx +; CHECK-NEXT: addl %edi, %ecx +; CHECK-NEXT: movl %ecx, %edx +; CHECK-NEXT: andl $1073741823, %edx # imm = 0x3FFFFFFF +; CHECK-NEXT: movl %edi, %r8d +; CHECK-NEXT: andl $1073741823, %r8d # imm = 0x3FFFFFFF +; CHECK-NEXT: cmpl %r8d, %edx +; CHECK-NEXT: movl %ecx, %r8d +; CHECK-NEXT: adcl %eax, %r8d +; CHECK-NEXT: addl %eax, %ecx +; CHECK-NEXT: andl $1073741823, %ecx # imm = 0x3FFFFFFF +; CHECK-NEXT: movq %rsi, %rax +; CHECK-NEXT: shrq $26, %rax +; CHECK-NEXT: cmpl %edx, %ecx +; CHECK-NEXT: movl %r8d, %edx +; CHECK-NEXT: adcl %eax, %edx +; CHECK-NEXT: movl %r8d, %r9d +; CHECK-NEXT: andl $1073741823, %r9d # imm = 0x3FFFFFFF +; CHECK-NEXT: addl %eax, %r8d +; CHECK-NEXT: andl $1073741823, %r8d # imm = 0x3FFFFFFF +; CHECK-NEXT: movq %rsi, %rcx +; CHECK-NEXT: shrq $56, %rcx +; CHECK-NEXT: cmpl %r9d, %r8d +; CHECK-NEXT: adcl %edx, %ecx +; CHECK-NEXT: andl $1073741823, %ecx # imm = 0x3FFFFFFF +; CHECK-NEXT: movabsq $2635249153387078803, %rdx # imm = 0x2492492492492493 +; CHECK-NEXT: movq %rcx, %rax +; CHECK-NEXT: mulq %rdx +; CHECK-NEXT: leal (,%rdx,8), %eax +; CHECK-NEXT: subq %rax, %rdx +; CHECK-NEXT: addq %rcx, %rdx +; CHECK-NEXT: subq %rdx, %rdi +; CHECK-NEXT: sbbq $0, %rsi +; CHECK-NEXT: movabsq $-5270498306774157605, %rcx # imm = 0xB6DB6DB6DB6DB6DB +; CHECK-NEXT: imulq %rdi, %rcx +; CHECK-NEXT: movabsq $7905747460161236407, %r8 # imm = 0x6DB6DB6DB6DB6DB7 +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: mulq %r8 +; CHECK-NEXT: addq %rcx, %rdx +; CHECK-NEXT: imulq %rsi, %r8 +; CHECK-NEXT: addq %r8, %rdx +; CHECK-NEXT: retq +entry: + %div = udiv i128 %x, 7 + ret i128 %div +} + +define i128 @div_by_9(i128 %x) { +; CHECK-LABEL: div_by_9: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movq %rsi, %rax +; CHECK-NEXT: shldq $4, %rdi, %rax +; CHECK-NEXT: movq %rdi, %rcx +; CHECK-NEXT: shrq $30, %rcx +; CHECK-NEXT: addl %edi, %ecx +; CHECK-NEXT: movl %ecx, %edx +; CHECK-NEXT: andl $1073741823, %edx # imm = 0x3FFFFFFF +; CHECK-NEXT: movl %edi, %r8d +; CHECK-NEXT: andl $1073741823, %r8d # imm = 0x3FFFFFFF +; CHECK-NEXT: cmpl %r8d, %edx +; CHECK-NEXT: movl %ecx, %r8d +; CHECK-NEXT: adcl %eax, %r8d +; CHECK-NEXT: addl %eax, %ecx +; CHECK-NEXT: andl $1073741823, %ecx # imm = 0x3FFFFFFF +; CHECK-NEXT: movq %rsi, %rax +; CHECK-NEXT: shrq $26, %rax +; CHECK-NEXT: cmpl %edx, %ecx +; CHECK-NEXT: movl %r8d, %edx +; CHECK-NEXT: adcl %eax, %edx +; CHECK-NEXT: movl %r8d, %r9d +; CHECK-NEXT: andl $1073741823, %r9d # imm = 0x3FFFFFFF +; CHECK-NEXT: addl %eax, %r8d +; CHECK-NEXT: andl $1073741823, %r8d # imm = 0x3FFFFFFF +; CHECK-NEXT: movq %rsi, %rcx +; CHECK-NEXT: shrq $56, %rcx +; CHECK-NEXT: cmpl %r9d, %r8d +; CHECK-NEXT: adcl %edx, %ecx +; CHECK-NEXT: andl $1073741823, %ecx # imm = 0x3FFFFFFF +; CHECK-NEXT: movabsq $2049638230412172402, %rdx # imm = 0x1C71C71C71C71C72 +; CHECK-NEXT: movq %rcx, %rax +; CHECK-NEXT: mulq %rdx +; CHECK-NEXT: leaq (%rdx,%rdx,8), %rax +; CHECK-NEXT: subq %rax, %rcx +; CHECK-NEXT: subq %rcx, %rdi +; CHECK-NEXT: sbbq $0, %rsi +; CHECK-NEXT: movabsq $4099276460824344803, %rcx # imm = 0x38E38E38E38E38E3 +; CHECK-NEXT: imulq %rdi, %rcx +; CHECK-NEXT: movabsq $-8198552921648689607, %r8 # imm = 0x8E38E38E38E38E39 +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: mulq %r8 +; CHECK-NEXT: addq %rcx, %rdx +; CHECK-NEXT: imulq %rsi, %r8 +; CHECK-NEXT: addq %r8, %rdx +; CHECK-NEXT: retq +entry: + %div = udiv i128 %x, 9 + ret i128 %div +} + +define i128 @div_by_25(i128 %x) { +; CHECK-LABEL: div_by_25: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movq %rsi, %rax +; CHECK-NEXT: shldq $24, %rdi, %rax +; CHECK-NEXT: movq %rdi, %rcx +; CHECK-NEXT: shrq $20, %rcx +; CHECK-NEXT: addl %edi, %ecx +; CHECK-NEXT: movl %ecx, %edx +; CHECK-NEXT: andl $1048575, %edx # imm = 0xFFFFF +; CHECK-NEXT: movl %edi, %r8d +; CHECK-NEXT: andl $1048575, %r8d # imm = 0xFFFFF +; CHECK-NEXT: cmpl %r8d, %edx +; CHECK-NEXT: movl %ecx, %r8d +; CHECK-NEXT: adcl %eax, %r8d +; CHECK-NEXT: addl %eax, %ecx +; CHECK-NEXT: andl $1048575, %ecx # imm = 0xFFFFF +; CHECK-NEXT: movq %rsi, %rax +; CHECK-NEXT: shldq $4, %rdi, %rax +; CHECK-NEXT: cmpl %edx, %ecx +; CHECK-NEXT: movl %r8d, %ecx +; CHECK-NEXT: adcl %eax, %ecx +; CHECK-NEXT: movl %r8d, %edx +; CHECK-NEXT: andl $1048575, %edx # imm = 0xFFFFF +; CHECK-NEXT: addl %eax, %r8d +; CHECK-NEXT: andl $1048575, %r8d # imm = 0xFFFFF +; CHECK-NEXT: movq %rsi, %rax +; CHECK-NEXT: shrq $16, %rax +; CHECK-NEXT: cmpl %edx, %r8d +; CHECK-NEXT: movl %ecx, %edx +; CHECK-NEXT: adcl %eax, %edx +; CHECK-NEXT: movl %ecx, %r8d +; CHECK-NEXT: andl $1048575, %r8d # imm = 0xFFFFF +; CHECK-NEXT: addl %eax, %ecx +; CHECK-NEXT: andl $1048575, %ecx # imm = 0xFFFFF +; CHECK-NEXT: movq %rsi, %rax +; CHECK-NEXT: shrq $36, %rax +; CHECK-NEXT: cmpl %r8d, %ecx +; CHECK-NEXT: movl %edx, %r8d +; CHECK-NEXT: adcl %eax, %r8d +; CHECK-NEXT: movl %edx, %r9d +; CHECK-NEXT: andl $1048575, %r9d # imm = 0xFFFFF +; CHECK-NEXT: addl %eax, %edx +; CHECK-NEXT: andl $1048575, %edx # imm = 0xFFFFF +; CHECK-NEXT: movq %rsi, %rcx +; CHECK-NEXT: shrq $56, %rcx +; CHECK-NEXT: cmpl %r9d, %edx +; CHECK-NEXT: adcl %r8d, %ecx +; CHECK-NEXT: andl $1048575, %ecx # imm = 0xFFFFF +; CHECK-NEXT: movabsq $737869762948382065, %rdx # imm = 0xA3D70A3D70A3D71 +; CHECK-NEXT: movq %rcx, %rax +; CHECK-NEXT: mulq %rdx +; CHECK-NEXT: leaq (%rdx,%rdx,4), %rax +; CHECK-NEXT: leaq (%rax,%rax,4), %rax +; CHECK-NEXT: subq %rax, %rcx +; CHECK-NEXT: subq %rcx, %rdi +; CHECK-NEXT: sbbq $0, %rsi +; CHECK-NEXT: movabsq $2951479051793528258, %rcx # imm = 0x28F5C28F5C28F5C2 +; CHECK-NEXT: imulq %rdi, %rcx +; CHECK-NEXT: movabsq $-8116567392432202711, %r8 # imm = 0x8F5C28F5C28F5C29 +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: mulq %r8 +; CHECK-NEXT: addq %rcx, %rdx +; CHECK-NEXT: imulq %rsi, %r8 +; CHECK-NEXT: addq %r8, %rdx +; CHECK-NEXT: retq +entry: + %div = udiv i128 %x, 25 + ret i128 %div +} + +define i128 @div_by_14(i128 %x) { +; CHECK-LABEL: div_by_14: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: movl $14, %edx +; CHECK-NEXT: xorl %ecx, %ecx +; CHECK-NEXT: callq __udivti3@PLT +; CHECK-NEXT: popq %rcx +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +entry: + %div = udiv i128 %x, 14 + ret i128 %div +} + +define i128 @div_by_22(i128 %x) { +; CHECK-LABEL: div_by_22: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: movl $22, %edx +; CHECK-NEXT: xorl %ecx, %ecx +; CHECK-NEXT: callq __udivti3@PLT +; CHECK-NEXT: popq %rcx +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +entry: + %div = udiv i128 %x, 22 + ret i128 %div +}