-
Notifications
You must be signed in to change notification settings - Fork 14.9k
[RISCV][GlobalISel] Lower G_ATOMICRMW_SUB via G_ATOMICRMW_ADD #155972
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-llvm-globalisel Author: Kane Wang (ReVe1uv) ChangesRISCV does not provide a native atomic subtract instruction, so this patch lowers For example, lowering
on riscv32a produces:
On riscv64a, where the RHS type is narrower than XLEN, it currently produces:
There is still a constant-folding or InstConbiner gap. For instance, lowering
generates:
This sequence could be optimized further to eliminate the redundant neg. Addressing this may require improvements in the Combiner or Peephole Optimizer in future work. Patch is 96.88 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/155972.diff 13 Files Affected:
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 008c18837a522..1600594c955d1 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -26,6 +26,7 @@
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Register.h"
#include "llvm/CodeGen/RuntimeLibcallUtil.h"
#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
@@ -37,6 +38,7 @@
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/Transforms/Utils/LowerAtomic.h"
#include <numeric>
#include <optional>
@@ -4773,6 +4775,18 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
return lowerVectorReduction(MI);
case G_VAARG:
return lowerVAArg(MI);
+ case G_ATOMICRMW_SUB: {
+ auto [Ret, RetLLT, Mem, MemLLT, Val, ValLLT] = MI.getFirst3RegLLTs();
+ MachineMemOperand *MMO = *MI.memoperands_begin();
+
+ auto VNeg = MIRBuilder.buildNeg(ValLLT, Val);
+ auto NewRMW =
+ MIRBuilder.buildAtomicRMW(G_ATOMICRMW_ADD, RetLLT, Mem, VNeg, *MMO);
+
+ MIRBuilder.buildCopy(Ret, NewRMW);
+ MI.eraseFromParent();
+ return Legalized;
+ }
}
}
diff --git a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
index d6ae58ac890aa..ff733334f5d60 100644
--- a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
+++ b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
@@ -699,6 +699,11 @@ RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST)
.libcallFor(!ST.hasStdExtA(), {{s8, p0}, {s16, p0}, {s32, p0}, {s64, p0}})
.clampScalar(0, sXLen, sXLen);
+ getActionDefinitionsBuilder(G_ATOMICRMW_SUB)
+ .libcallFor(!ST.hasStdExtA(), {{s8, p0}, {s16, p0}, {s32, p0}, {s64, p0}})
+ .clampScalar(0, sXLen, sXLen)
+ .lower();
+
getLegacyLegalizerInfo().computeTables();
verify(*ST.getInstrInfo());
}
@@ -737,6 +742,7 @@ bool RISCVLegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
return true;
}
case Intrinsic::riscv_masked_atomicrmw_add:
+ case Intrinsic::riscv_masked_atomicrmw_sub:
return true;
}
}
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/atomicrmw-add-sub.ll b/llvm/test/CodeGen/RISCV/GlobalISel/atomicrmw-add-sub.ll
new file mode 100644
index 0000000000000..21b2bbfc59241
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/atomicrmw-add-sub.ll
@@ -0,0 +1,930 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+a,+zabha -global-isel -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV32IA-ZABHA
+; RUN: llc -mtriple=riscv32 -mattr=+a -global-isel -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV32IA
+; RUN: llc -mtriple=riscv32 -global-isel -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV32I
+; RUN: llc -mtriple=riscv64 -mattr=+a,+zabha -global-isel -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV64IA-ZABHA
+; RUN: llc -mtriple=riscv64 -mattr=+a -global-isel -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV64IA
+; RUN: llc -mtriple=riscv64 -global-isel -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV64I
+
+define i8 @atomicrmw_add_i8(ptr %ptr, i8 %rhs) nounwind {
+; RV32IA-ZABHA-LABEL: atomicrmw_add_i8:
+; RV32IA-ZABHA: # %bb.0:
+; RV32IA-ZABHA-NEXT: amoadd.b.aqrl a0, a1, (a0)
+; RV32IA-ZABHA-NEXT: ret
+;
+; RV32IA-LABEL: atomicrmw_add_i8:
+; RV32IA: # %bb.0:
+; RV32IA-NEXT: li a2, 255
+; RV32IA-NEXT: andi a3, a0, -4
+; RV32IA-NEXT: andi a0, a0, 3
+; RV32IA-NEXT: zext.b a1, a1
+; RV32IA-NEXT: slli a0, a0, 3
+; RV32IA-NEXT: sll a2, a2, a0
+; RV32IA-NEXT: sll a1, a1, a0
+; RV32IA-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NEXT: lr.w.aqrl a4, (a3)
+; RV32IA-NEXT: add a5, a4, a1
+; RV32IA-NEXT: xor a5, a4, a5
+; RV32IA-NEXT: and a5, a5, a2
+; RV32IA-NEXT: xor a5, a4, a5
+; RV32IA-NEXT: sc.w.rl a5, a5, (a3)
+; RV32IA-NEXT: bnez a5, .LBB0_1
+; RV32IA-NEXT: # %bb.2:
+; RV32IA-NEXT: srl a0, a4, a0
+; RV32IA-NEXT: ret
+;
+; RV32I-LABEL: atomicrmw_add_i8:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: li a2, 5
+; RV32I-NEXT: call __atomic_fetch_add_1
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV64IA-ZABHA-LABEL: atomicrmw_add_i8:
+; RV64IA-ZABHA: # %bb.0:
+; RV64IA-ZABHA-NEXT: amoadd.b.aqrl a0, a1, (a0)
+; RV64IA-ZABHA-NEXT: ret
+;
+; RV64IA-LABEL: atomicrmw_add_i8:
+; RV64IA: # %bb.0:
+; RV64IA-NEXT: li a2, 255
+; RV64IA-NEXT: andi a3, a0, -4
+; RV64IA-NEXT: andi a0, a0, 3
+; RV64IA-NEXT: zext.b a1, a1
+; RV64IA-NEXT: slli a0, a0, 3
+; RV64IA-NEXT: sllw a2, a2, a0
+; RV64IA-NEXT: sllw a1, a1, a0
+; RV64IA-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-NEXT: lr.w.aqrl a4, (a3)
+; RV64IA-NEXT: add a5, a4, a1
+; RV64IA-NEXT: xor a5, a4, a5
+; RV64IA-NEXT: and a5, a5, a2
+; RV64IA-NEXT: xor a5, a4, a5
+; RV64IA-NEXT: sc.w.rl a5, a5, (a3)
+; RV64IA-NEXT: bnez a5, .LBB0_1
+; RV64IA-NEXT: # %bb.2:
+; RV64IA-NEXT: srlw a0, a4, a0
+; RV64IA-NEXT: ret
+;
+; RV64I-LABEL: atomicrmw_add_i8:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: li a2, 5
+; RV64I-NEXT: call __atomic_fetch_add_1
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+ %res = atomicrmw add ptr %ptr, i8 %rhs seq_cst
+ ret i8 %res
+}
+
+define i16 @atomicrmw_add_i16(ptr %ptr, i16 %rhs) nounwind {
+; RV32IA-ZABHA-LABEL: atomicrmw_add_i16:
+; RV32IA-ZABHA: # %bb.0:
+; RV32IA-ZABHA-NEXT: amoadd.h.aqrl a0, a1, (a0)
+; RV32IA-ZABHA-NEXT: ret
+;
+; RV32IA-LABEL: atomicrmw_add_i16:
+; RV32IA: # %bb.0:
+; RV32IA-NEXT: lui a2, 16
+; RV32IA-NEXT: andi a3, a0, -4
+; RV32IA-NEXT: andi a0, a0, 3
+; RV32IA-NEXT: addi a2, a2, -1
+; RV32IA-NEXT: slli a0, a0, 3
+; RV32IA-NEXT: sll a4, a2, a0
+; RV32IA-NEXT: and a1, a1, a2
+; RV32IA-NEXT: sll a1, a1, a0
+; RV32IA-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NEXT: lr.w.aqrl a2, (a3)
+; RV32IA-NEXT: add a5, a2, a1
+; RV32IA-NEXT: xor a5, a2, a5
+; RV32IA-NEXT: and a5, a5, a4
+; RV32IA-NEXT: xor a5, a2, a5
+; RV32IA-NEXT: sc.w.rl a5, a5, (a3)
+; RV32IA-NEXT: bnez a5, .LBB1_1
+; RV32IA-NEXT: # %bb.2:
+; RV32IA-NEXT: srl a0, a2, a0
+; RV32IA-NEXT: ret
+;
+; RV32I-LABEL: atomicrmw_add_i16:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: li a2, 5
+; RV32I-NEXT: call __atomic_fetch_add_2
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV64IA-ZABHA-LABEL: atomicrmw_add_i16:
+; RV64IA-ZABHA: # %bb.0:
+; RV64IA-ZABHA-NEXT: amoadd.h.aqrl a0, a1, (a0)
+; RV64IA-ZABHA-NEXT: ret
+;
+; RV64IA-LABEL: atomicrmw_add_i16:
+; RV64IA: # %bb.0:
+; RV64IA-NEXT: lui a2, 16
+; RV64IA-NEXT: andi a3, a0, -4
+; RV64IA-NEXT: andi a0, a0, 3
+; RV64IA-NEXT: addi a2, a2, -1
+; RV64IA-NEXT: slli a0, a0, 3
+; RV64IA-NEXT: sllw a4, a2, a0
+; RV64IA-NEXT: and a1, a1, a2
+; RV64IA-NEXT: sllw a1, a1, a0
+; RV64IA-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-NEXT: lr.w.aqrl a2, (a3)
+; RV64IA-NEXT: add a5, a2, a1
+; RV64IA-NEXT: xor a5, a2, a5
+; RV64IA-NEXT: and a5, a5, a4
+; RV64IA-NEXT: xor a5, a2, a5
+; RV64IA-NEXT: sc.w.rl a5, a5, (a3)
+; RV64IA-NEXT: bnez a5, .LBB1_1
+; RV64IA-NEXT: # %bb.2:
+; RV64IA-NEXT: srlw a0, a2, a0
+; RV64IA-NEXT: ret
+;
+; RV64I-LABEL: atomicrmw_add_i16:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: li a2, 5
+; RV64I-NEXT: call __atomic_fetch_add_2
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+ %res = atomicrmw add ptr %ptr, i16 %rhs seq_cst
+ ret i16 %res
+}
+
+define i32 @atomicrmw_add_i32(ptr %ptr, i32 %rhs) nounwind {
+; RV32IA-ZABHA-LABEL: atomicrmw_add_i32:
+; RV32IA-ZABHA: # %bb.0:
+; RV32IA-ZABHA-NEXT: amoadd.w.aqrl a0, a1, (a0)
+; RV32IA-ZABHA-NEXT: ret
+;
+; RV32IA-LABEL: atomicrmw_add_i32:
+; RV32IA: # %bb.0:
+; RV32IA-NEXT: amoadd.w.aqrl a0, a1, (a0)
+; RV32IA-NEXT: ret
+;
+; RV32I-LABEL: atomicrmw_add_i32:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: li a2, 5
+; RV32I-NEXT: call __atomic_fetch_add_4
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV64IA-ZABHA-LABEL: atomicrmw_add_i32:
+; RV64IA-ZABHA: # %bb.0:
+; RV64IA-ZABHA-NEXT: amoadd.w.aqrl a0, a1, (a0)
+; RV64IA-ZABHA-NEXT: ret
+;
+; RV64IA-LABEL: atomicrmw_add_i32:
+; RV64IA: # %bb.0:
+; RV64IA-NEXT: amoadd.w.aqrl a0, a1, (a0)
+; RV64IA-NEXT: ret
+;
+; RV64I-LABEL: atomicrmw_add_i32:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: li a2, 5
+; RV64I-NEXT: call __atomic_fetch_add_4
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+ %res = atomicrmw add ptr %ptr, i32 %rhs seq_cst
+ ret i32 %res
+}
+
+define i64 @atomicrmw_add_i64(ptr %ptr, i64 %rhs) nounwind {
+; RV32IA-ZABHA-LABEL: atomicrmw_add_i64:
+; RV32IA-ZABHA: # %bb.0:
+; RV32IA-ZABHA-NEXT: addi sp, sp, -16
+; RV32IA-ZABHA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IA-ZABHA-NEXT: li a3, 5
+; RV32IA-ZABHA-NEXT: call __atomic_fetch_add_8
+; RV32IA-ZABHA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IA-ZABHA-NEXT: addi sp, sp, 16
+; RV32IA-ZABHA-NEXT: ret
+;
+; RV32IA-LABEL: atomicrmw_add_i64:
+; RV32IA: # %bb.0:
+; RV32IA-NEXT: addi sp, sp, -16
+; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: li a3, 5
+; RV32IA-NEXT: call __atomic_fetch_add_8
+; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: addi sp, sp, 16
+; RV32IA-NEXT: ret
+;
+; RV32I-LABEL: atomicrmw_add_i64:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: li a3, 5
+; RV32I-NEXT: call __atomic_fetch_add_8
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV64IA-ZABHA-LABEL: atomicrmw_add_i64:
+; RV64IA-ZABHA: # %bb.0:
+; RV64IA-ZABHA-NEXT: amoadd.d.aqrl a0, a1, (a0)
+; RV64IA-ZABHA-NEXT: ret
+;
+; RV64IA-LABEL: atomicrmw_add_i64:
+; RV64IA: # %bb.0:
+; RV64IA-NEXT: amoadd.d.aqrl a0, a1, (a0)
+; RV64IA-NEXT: ret
+;
+; RV64I-LABEL: atomicrmw_add_i64:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: li a2, 5
+; RV64I-NEXT: call __atomic_fetch_add_8
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+ %res = atomicrmw add ptr %ptr, i64 %rhs seq_cst
+ ret i64 %res
+}
+
+define i8 @atomicrmw_sub_i8(ptr %ptr, i8 %rhs) nounwind {
+; RV32IA-ZABHA-LABEL: atomicrmw_sub_i8:
+; RV32IA-ZABHA: # %bb.0:
+; RV32IA-ZABHA-NEXT: neg a1, a1
+; RV32IA-ZABHA-NEXT: amoadd.b.aqrl a0, a1, (a0)
+; RV32IA-ZABHA-NEXT: ret
+;
+; RV32IA-LABEL: atomicrmw_sub_i8:
+; RV32IA: # %bb.0:
+; RV32IA-NEXT: li a2, 255
+; RV32IA-NEXT: andi a3, a0, -4
+; RV32IA-NEXT: andi a0, a0, 3
+; RV32IA-NEXT: zext.b a1, a1
+; RV32IA-NEXT: slli a0, a0, 3
+; RV32IA-NEXT: sll a2, a2, a0
+; RV32IA-NEXT: sll a1, a1, a0
+; RV32IA-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NEXT: lr.w.aqrl a4, (a3)
+; RV32IA-NEXT: sub a5, a4, a1
+; RV32IA-NEXT: xor a5, a4, a5
+; RV32IA-NEXT: and a5, a5, a2
+; RV32IA-NEXT: xor a5, a4, a5
+; RV32IA-NEXT: sc.w.rl a5, a5, (a3)
+; RV32IA-NEXT: bnez a5, .LBB4_1
+; RV32IA-NEXT: # %bb.2:
+; RV32IA-NEXT: srl a0, a4, a0
+; RV32IA-NEXT: ret
+;
+; RV32I-LABEL: atomicrmw_sub_i8:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: li a2, 5
+; RV32I-NEXT: call __atomic_fetch_sub_1
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV64IA-ZABHA-LABEL: atomicrmw_sub_i8:
+; RV64IA-ZABHA: # %bb.0:
+; RV64IA-ZABHA-NEXT: neg a1, a1
+; RV64IA-ZABHA-NEXT: amoadd.b.aqrl a0, a1, (a0)
+; RV64IA-ZABHA-NEXT: ret
+;
+; RV64IA-LABEL: atomicrmw_sub_i8:
+; RV64IA: # %bb.0:
+; RV64IA-NEXT: li a2, 255
+; RV64IA-NEXT: andi a3, a0, -4
+; RV64IA-NEXT: andi a0, a0, 3
+; RV64IA-NEXT: zext.b a1, a1
+; RV64IA-NEXT: slli a0, a0, 3
+; RV64IA-NEXT: sllw a2, a2, a0
+; RV64IA-NEXT: sllw a1, a1, a0
+; RV64IA-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-NEXT: lr.w.aqrl a4, (a3)
+; RV64IA-NEXT: sub a5, a4, a1
+; RV64IA-NEXT: xor a5, a4, a5
+; RV64IA-NEXT: and a5, a5, a2
+; RV64IA-NEXT: xor a5, a4, a5
+; RV64IA-NEXT: sc.w.rl a5, a5, (a3)
+; RV64IA-NEXT: bnez a5, .LBB4_1
+; RV64IA-NEXT: # %bb.2:
+; RV64IA-NEXT: srlw a0, a4, a0
+; RV64IA-NEXT: ret
+;
+; RV64I-LABEL: atomicrmw_sub_i8:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: li a2, 5
+; RV64I-NEXT: call __atomic_fetch_sub_1
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+ %res = atomicrmw sub ptr %ptr, i8 %rhs seq_cst
+ ret i8 %res
+}
+
+define i16 @atomicrmw_sub_i16(ptr %ptr, i16 %rhs) nounwind {
+; RV32IA-ZABHA-LABEL: atomicrmw_sub_i16:
+; RV32IA-ZABHA: # %bb.0:
+; RV32IA-ZABHA-NEXT: neg a1, a1
+; RV32IA-ZABHA-NEXT: amoadd.h.aqrl a0, a1, (a0)
+; RV32IA-ZABHA-NEXT: ret
+;
+; RV32IA-LABEL: atomicrmw_sub_i16:
+; RV32IA: # %bb.0:
+; RV32IA-NEXT: lui a2, 16
+; RV32IA-NEXT: andi a3, a0, -4
+; RV32IA-NEXT: andi a0, a0, 3
+; RV32IA-NEXT: addi a2, a2, -1
+; RV32IA-NEXT: slli a0, a0, 3
+; RV32IA-NEXT: sll a4, a2, a0
+; RV32IA-NEXT: and a1, a1, a2
+; RV32IA-NEXT: sll a1, a1, a0
+; RV32IA-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NEXT: lr.w.aqrl a2, (a3)
+; RV32IA-NEXT: sub a5, a2, a1
+; RV32IA-NEXT: xor a5, a2, a5
+; RV32IA-NEXT: and a5, a5, a4
+; RV32IA-NEXT: xor a5, a2, a5
+; RV32IA-NEXT: sc.w.rl a5, a5, (a3)
+; RV32IA-NEXT: bnez a5, .LBB5_1
+; RV32IA-NEXT: # %bb.2:
+; RV32IA-NEXT: srl a0, a2, a0
+; RV32IA-NEXT: ret
+;
+; RV32I-LABEL: atomicrmw_sub_i16:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: li a2, 5
+; RV32I-NEXT: call __atomic_fetch_sub_2
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV64IA-ZABHA-LABEL: atomicrmw_sub_i16:
+; RV64IA-ZABHA: # %bb.0:
+; RV64IA-ZABHA-NEXT: neg a1, a1
+; RV64IA-ZABHA-NEXT: amoadd.h.aqrl a0, a1, (a0)
+; RV64IA-ZABHA-NEXT: ret
+;
+; RV64IA-LABEL: atomicrmw_sub_i16:
+; RV64IA: # %bb.0:
+; RV64IA-NEXT: lui a2, 16
+; RV64IA-NEXT: andi a3, a0, -4
+; RV64IA-NEXT: andi a0, a0, 3
+; RV64IA-NEXT: addi a2, a2, -1
+; RV64IA-NEXT: slli a0, a0, 3
+; RV64IA-NEXT: sllw a4, a2, a0
+; RV64IA-NEXT: and a1, a1, a2
+; RV64IA-NEXT: sllw a1, a1, a0
+; RV64IA-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-NEXT: lr.w.aqrl a2, (a3)
+; RV64IA-NEXT: sub a5, a2, a1
+; RV64IA-NEXT: xor a5, a2, a5
+; RV64IA-NEXT: and a5, a5, a4
+; RV64IA-NEXT: xor a5, a2, a5
+; RV64IA-NEXT: sc.w.rl a5, a5, (a3)
+; RV64IA-NEXT: bnez a5, .LBB5_1
+; RV64IA-NEXT: # %bb.2:
+; RV64IA-NEXT: srlw a0, a2, a0
+; RV64IA-NEXT: ret
+;
+; RV64I-LABEL: atomicrmw_sub_i16:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: li a2, 5
+; RV64I-NEXT: call __atomic_fetch_sub_2
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+ %res = atomicrmw sub ptr %ptr, i16 %rhs seq_cst
+ ret i16 %res
+}
+
+define i32 @atomicrmw_sub_i32(ptr %ptr, i32 %rhs) nounwind {
+; RV32IA-ZABHA-LABEL: atomicrmw_sub_i32:
+; RV32IA-ZABHA: # %bb.0:
+; RV32IA-ZABHA-NEXT: neg a1, a1
+; RV32IA-ZABHA-NEXT: amoadd.w.aqrl a0, a1, (a0)
+; RV32IA-ZABHA-NEXT: ret
+;
+; RV32IA-LABEL: atomicrmw_sub_i32:
+; RV32IA: # %bb.0:
+; RV32IA-NEXT: neg a1, a1
+; RV32IA-NEXT: amoadd.w.aqrl a0, a1, (a0)
+; RV32IA-NEXT: ret
+;
+; RV32I-LABEL: atomicrmw_sub_i32:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: li a2, 5
+; RV32I-NEXT: call __atomic_fetch_sub_4
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV64IA-ZABHA-LABEL: atomicrmw_sub_i32:
+; RV64IA-ZABHA: # %bb.0:
+; RV64IA-ZABHA-NEXT: neg a1, a1
+; RV64IA-ZABHA-NEXT: amoadd.w.aqrl a0, a1, (a0)
+; RV64IA-ZABHA-NEXT: ret
+;
+; RV64IA-LABEL: atomicrmw_sub_i32:
+; RV64IA: # %bb.0:
+; RV64IA-NEXT: neg a1, a1
+; RV64IA-NEXT: amoadd.w.aqrl a0, a1, (a0)
+; RV64IA-NEXT: ret
+;
+; RV64I-LABEL: atomicrmw_sub_i32:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: li a2, 5
+; RV64I-NEXT: call __atomic_fetch_sub_4
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+ %res = atomicrmw sub ptr %ptr, i32 %rhs seq_cst
+ ret i32 %res
+}
+
+define i64 @atomicrmw_sub_i64(ptr %ptr, i64 %rhs) nounwind {
+; RV32IA-ZABHA-LABEL: atomicrmw_sub_i64:
+; RV32IA-ZABHA: # %bb.0:
+; RV32IA-ZABHA-NEXT: addi sp, sp, -16
+; RV32IA-ZABHA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IA-ZABHA-NEXT: li a3, 5
+; RV32IA-ZABHA-NEXT: call __atomic_fetch_sub_8
+; RV32IA-ZABHA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IA-ZABHA-NEXT: addi sp, sp, 16
+; RV32IA-ZABHA-NEXT: ret
+;
+; RV32IA-LABEL: atomicrmw_sub_i64:
+; RV32IA: # %bb.0:
+; RV32IA-NEXT: addi sp, sp, -16
+; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: li a3, 5
+; RV32IA-NEXT: call __atomic_fetch_sub_8
+; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: addi sp, sp, 16
+; RV32IA-NEXT: ret
+;
+; RV32I-LABEL: atomicrmw_sub_i64:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: li a3, 5
+; RV32I-NEXT: call __atomic_fetch_sub_8
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV64IA-ZABHA-LABEL: atomicrmw_sub_i64:
+; RV64IA-ZABHA: # %bb.0:
+; RV64IA-ZABHA-NEXT: neg a1, a1
+; RV64IA-ZABHA-NEXT: amoadd.d.aqrl a0, a1, (a0)
+; RV64IA-ZABHA-NEXT: ret
+;
+; RV64IA-LABEL: atomicrmw_sub_i64:
+; RV64IA: # %bb.0:
+; RV64IA-NEXT: neg a1, a1
+; RV64IA-NEXT: amoadd.d.aqrl a0, a1, (a0)
+; RV64IA-NEXT: ret
+;
+; RV64I-LABEL: atomicrmw_sub_i64:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: li a2, 5
+; RV64I-NEXT: call __atomic_fetch_sub_8
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+ %res = atomicrmw sub ptr %ptr, i64 %rhs seq_cst
+ ret i64 %res
+}
+
+define i16 @atomicrmw_sub_i16_constant(ptr %a) nounwind {
+; RV32IA-ZABHA-LABEL: atomicrmw_sub_i16_constant:
+; RV32IA-...
[truncated]
|
@llvm/pr-subscribers-backend-risc-v Author: Kane Wang (ReVe1uv) ChangesRISCV does not provide a native atomic subtract instruction, so this patch lowers For example, lowering
on riscv32a produces:
On riscv64a, where the RHS type is narrower than XLEN, it currently produces:
There is still a constant-folding or InstConbiner gap. For instance, lowering
generates:
This sequence could be optimized further to eliminate the redundant neg. Addressing this may require improvements in the Combiner or Peephole Optimizer in future work. Patch is 96.88 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/155972.diff 13 Files Affected:
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 008c18837a522..1600594c955d1 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -26,6 +26,7 @@
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Register.h"
#include "llvm/CodeGen/RuntimeLibcallUtil.h"
#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
@@ -37,6 +38,7 @@
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/Transforms/Utils/LowerAtomic.h"
#include <numeric>
#include <optional>
@@ -4773,6 +4775,18 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
return lowerVectorReduction(MI);
case G_VAARG:
return lowerVAArg(MI);
+ case G_ATOMICRMW_SUB: {
+ auto [Ret, RetLLT, Mem, MemLLT, Val, ValLLT] = MI.getFirst3RegLLTs();
+ MachineMemOperand *MMO = *MI.memoperands_begin();
+
+ auto VNeg = MIRBuilder.buildNeg(ValLLT, Val);
+ auto NewRMW =
+ MIRBuilder.buildAtomicRMW(G_ATOMICRMW_ADD, RetLLT, Mem, VNeg, *MMO);
+
+ MIRBuilder.buildCopy(Ret, NewRMW);
+ MI.eraseFromParent();
+ return Legalized;
+ }
}
}
diff --git a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
index d6ae58ac890aa..ff733334f5d60 100644
--- a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
+++ b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
@@ -699,6 +699,11 @@ RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST)
.libcallFor(!ST.hasStdExtA(), {{s8, p0}, {s16, p0}, {s32, p0}, {s64, p0}})
.clampScalar(0, sXLen, sXLen);
+ getActionDefinitionsBuilder(G_ATOMICRMW_SUB)
+ .libcallFor(!ST.hasStdExtA(), {{s8, p0}, {s16, p0}, {s32, p0}, {s64, p0}})
+ .clampScalar(0, sXLen, sXLen)
+ .lower();
+
getLegacyLegalizerInfo().computeTables();
verify(*ST.getInstrInfo());
}
@@ -737,6 +742,7 @@ bool RISCVLegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
return true;
}
case Intrinsic::riscv_masked_atomicrmw_add:
+ case Intrinsic::riscv_masked_atomicrmw_sub:
return true;
}
}
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/atomicrmw-add-sub.ll b/llvm/test/CodeGen/RISCV/GlobalISel/atomicrmw-add-sub.ll
new file mode 100644
index 0000000000000..21b2bbfc59241
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/atomicrmw-add-sub.ll
@@ -0,0 +1,930 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+a,+zabha -global-isel -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV32IA-ZABHA
+; RUN: llc -mtriple=riscv32 -mattr=+a -global-isel -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV32IA
+; RUN: llc -mtriple=riscv32 -global-isel -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV32I
+; RUN: llc -mtriple=riscv64 -mattr=+a,+zabha -global-isel -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV64IA-ZABHA
+; RUN: llc -mtriple=riscv64 -mattr=+a -global-isel -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV64IA
+; RUN: llc -mtriple=riscv64 -global-isel -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV64I
+
+define i8 @atomicrmw_add_i8(ptr %ptr, i8 %rhs) nounwind {
+; RV32IA-ZABHA-LABEL: atomicrmw_add_i8:
+; RV32IA-ZABHA: # %bb.0:
+; RV32IA-ZABHA-NEXT: amoadd.b.aqrl a0, a1, (a0)
+; RV32IA-ZABHA-NEXT: ret
+;
+; RV32IA-LABEL: atomicrmw_add_i8:
+; RV32IA: # %bb.0:
+; RV32IA-NEXT: li a2, 255
+; RV32IA-NEXT: andi a3, a0, -4
+; RV32IA-NEXT: andi a0, a0, 3
+; RV32IA-NEXT: zext.b a1, a1
+; RV32IA-NEXT: slli a0, a0, 3
+; RV32IA-NEXT: sll a2, a2, a0
+; RV32IA-NEXT: sll a1, a1, a0
+; RV32IA-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NEXT: lr.w.aqrl a4, (a3)
+; RV32IA-NEXT: add a5, a4, a1
+; RV32IA-NEXT: xor a5, a4, a5
+; RV32IA-NEXT: and a5, a5, a2
+; RV32IA-NEXT: xor a5, a4, a5
+; RV32IA-NEXT: sc.w.rl a5, a5, (a3)
+; RV32IA-NEXT: bnez a5, .LBB0_1
+; RV32IA-NEXT: # %bb.2:
+; RV32IA-NEXT: srl a0, a4, a0
+; RV32IA-NEXT: ret
+;
+; RV32I-LABEL: atomicrmw_add_i8:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: li a2, 5
+; RV32I-NEXT: call __atomic_fetch_add_1
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV64IA-ZABHA-LABEL: atomicrmw_add_i8:
+; RV64IA-ZABHA: # %bb.0:
+; RV64IA-ZABHA-NEXT: amoadd.b.aqrl a0, a1, (a0)
+; RV64IA-ZABHA-NEXT: ret
+;
+; RV64IA-LABEL: atomicrmw_add_i8:
+; RV64IA: # %bb.0:
+; RV64IA-NEXT: li a2, 255
+; RV64IA-NEXT: andi a3, a0, -4
+; RV64IA-NEXT: andi a0, a0, 3
+; RV64IA-NEXT: zext.b a1, a1
+; RV64IA-NEXT: slli a0, a0, 3
+; RV64IA-NEXT: sllw a2, a2, a0
+; RV64IA-NEXT: sllw a1, a1, a0
+; RV64IA-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-NEXT: lr.w.aqrl a4, (a3)
+; RV64IA-NEXT: add a5, a4, a1
+; RV64IA-NEXT: xor a5, a4, a5
+; RV64IA-NEXT: and a5, a5, a2
+; RV64IA-NEXT: xor a5, a4, a5
+; RV64IA-NEXT: sc.w.rl a5, a5, (a3)
+; RV64IA-NEXT: bnez a5, .LBB0_1
+; RV64IA-NEXT: # %bb.2:
+; RV64IA-NEXT: srlw a0, a4, a0
+; RV64IA-NEXT: ret
+;
+; RV64I-LABEL: atomicrmw_add_i8:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: li a2, 5
+; RV64I-NEXT: call __atomic_fetch_add_1
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+ %res = atomicrmw add ptr %ptr, i8 %rhs seq_cst
+ ret i8 %res
+}
+
+define i16 @atomicrmw_add_i16(ptr %ptr, i16 %rhs) nounwind {
+; RV32IA-ZABHA-LABEL: atomicrmw_add_i16:
+; RV32IA-ZABHA: # %bb.0:
+; RV32IA-ZABHA-NEXT: amoadd.h.aqrl a0, a1, (a0)
+; RV32IA-ZABHA-NEXT: ret
+;
+; RV32IA-LABEL: atomicrmw_add_i16:
+; RV32IA: # %bb.0:
+; RV32IA-NEXT: lui a2, 16
+; RV32IA-NEXT: andi a3, a0, -4
+; RV32IA-NEXT: andi a0, a0, 3
+; RV32IA-NEXT: addi a2, a2, -1
+; RV32IA-NEXT: slli a0, a0, 3
+; RV32IA-NEXT: sll a4, a2, a0
+; RV32IA-NEXT: and a1, a1, a2
+; RV32IA-NEXT: sll a1, a1, a0
+; RV32IA-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NEXT: lr.w.aqrl a2, (a3)
+; RV32IA-NEXT: add a5, a2, a1
+; RV32IA-NEXT: xor a5, a2, a5
+; RV32IA-NEXT: and a5, a5, a4
+; RV32IA-NEXT: xor a5, a2, a5
+; RV32IA-NEXT: sc.w.rl a5, a5, (a3)
+; RV32IA-NEXT: bnez a5, .LBB1_1
+; RV32IA-NEXT: # %bb.2:
+; RV32IA-NEXT: srl a0, a2, a0
+; RV32IA-NEXT: ret
+;
+; RV32I-LABEL: atomicrmw_add_i16:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: li a2, 5
+; RV32I-NEXT: call __atomic_fetch_add_2
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV64IA-ZABHA-LABEL: atomicrmw_add_i16:
+; RV64IA-ZABHA: # %bb.0:
+; RV64IA-ZABHA-NEXT: amoadd.h.aqrl a0, a1, (a0)
+; RV64IA-ZABHA-NEXT: ret
+;
+; RV64IA-LABEL: atomicrmw_add_i16:
+; RV64IA: # %bb.0:
+; RV64IA-NEXT: lui a2, 16
+; RV64IA-NEXT: andi a3, a0, -4
+; RV64IA-NEXT: andi a0, a0, 3
+; RV64IA-NEXT: addi a2, a2, -1
+; RV64IA-NEXT: slli a0, a0, 3
+; RV64IA-NEXT: sllw a4, a2, a0
+; RV64IA-NEXT: and a1, a1, a2
+; RV64IA-NEXT: sllw a1, a1, a0
+; RV64IA-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-NEXT: lr.w.aqrl a2, (a3)
+; RV64IA-NEXT: add a5, a2, a1
+; RV64IA-NEXT: xor a5, a2, a5
+; RV64IA-NEXT: and a5, a5, a4
+; RV64IA-NEXT: xor a5, a2, a5
+; RV64IA-NEXT: sc.w.rl a5, a5, (a3)
+; RV64IA-NEXT: bnez a5, .LBB1_1
+; RV64IA-NEXT: # %bb.2:
+; RV64IA-NEXT: srlw a0, a2, a0
+; RV64IA-NEXT: ret
+;
+; RV64I-LABEL: atomicrmw_add_i16:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: li a2, 5
+; RV64I-NEXT: call __atomic_fetch_add_2
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+ %res = atomicrmw add ptr %ptr, i16 %rhs seq_cst
+ ret i16 %res
+}
+
+define i32 @atomicrmw_add_i32(ptr %ptr, i32 %rhs) nounwind {
+; RV32IA-ZABHA-LABEL: atomicrmw_add_i32:
+; RV32IA-ZABHA: # %bb.0:
+; RV32IA-ZABHA-NEXT: amoadd.w.aqrl a0, a1, (a0)
+; RV32IA-ZABHA-NEXT: ret
+;
+; RV32IA-LABEL: atomicrmw_add_i32:
+; RV32IA: # %bb.0:
+; RV32IA-NEXT: amoadd.w.aqrl a0, a1, (a0)
+; RV32IA-NEXT: ret
+;
+; RV32I-LABEL: atomicrmw_add_i32:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: li a2, 5
+; RV32I-NEXT: call __atomic_fetch_add_4
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV64IA-ZABHA-LABEL: atomicrmw_add_i32:
+; RV64IA-ZABHA: # %bb.0:
+; RV64IA-ZABHA-NEXT: amoadd.w.aqrl a0, a1, (a0)
+; RV64IA-ZABHA-NEXT: ret
+;
+; RV64IA-LABEL: atomicrmw_add_i32:
+; RV64IA: # %bb.0:
+; RV64IA-NEXT: amoadd.w.aqrl a0, a1, (a0)
+; RV64IA-NEXT: ret
+;
+; RV64I-LABEL: atomicrmw_add_i32:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: li a2, 5
+; RV64I-NEXT: call __atomic_fetch_add_4
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+ %res = atomicrmw add ptr %ptr, i32 %rhs seq_cst
+ ret i32 %res
+}
+
+define i64 @atomicrmw_add_i64(ptr %ptr, i64 %rhs) nounwind {
+; RV32IA-ZABHA-LABEL: atomicrmw_add_i64:
+; RV32IA-ZABHA: # %bb.0:
+; RV32IA-ZABHA-NEXT: addi sp, sp, -16
+; RV32IA-ZABHA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IA-ZABHA-NEXT: li a3, 5
+; RV32IA-ZABHA-NEXT: call __atomic_fetch_add_8
+; RV32IA-ZABHA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IA-ZABHA-NEXT: addi sp, sp, 16
+; RV32IA-ZABHA-NEXT: ret
+;
+; RV32IA-LABEL: atomicrmw_add_i64:
+; RV32IA: # %bb.0:
+; RV32IA-NEXT: addi sp, sp, -16
+; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: li a3, 5
+; RV32IA-NEXT: call __atomic_fetch_add_8
+; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: addi sp, sp, 16
+; RV32IA-NEXT: ret
+;
+; RV32I-LABEL: atomicrmw_add_i64:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: li a3, 5
+; RV32I-NEXT: call __atomic_fetch_add_8
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV64IA-ZABHA-LABEL: atomicrmw_add_i64:
+; RV64IA-ZABHA: # %bb.0:
+; RV64IA-ZABHA-NEXT: amoadd.d.aqrl a0, a1, (a0)
+; RV64IA-ZABHA-NEXT: ret
+;
+; RV64IA-LABEL: atomicrmw_add_i64:
+; RV64IA: # %bb.0:
+; RV64IA-NEXT: amoadd.d.aqrl a0, a1, (a0)
+; RV64IA-NEXT: ret
+;
+; RV64I-LABEL: atomicrmw_add_i64:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: li a2, 5
+; RV64I-NEXT: call __atomic_fetch_add_8
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+ %res = atomicrmw add ptr %ptr, i64 %rhs seq_cst
+ ret i64 %res
+}
+
+define i8 @atomicrmw_sub_i8(ptr %ptr, i8 %rhs) nounwind {
+; RV32IA-ZABHA-LABEL: atomicrmw_sub_i8:
+; RV32IA-ZABHA: # %bb.0:
+; RV32IA-ZABHA-NEXT: neg a1, a1
+; RV32IA-ZABHA-NEXT: amoadd.b.aqrl a0, a1, (a0)
+; RV32IA-ZABHA-NEXT: ret
+;
+; RV32IA-LABEL: atomicrmw_sub_i8:
+; RV32IA: # %bb.0:
+; RV32IA-NEXT: li a2, 255
+; RV32IA-NEXT: andi a3, a0, -4
+; RV32IA-NEXT: andi a0, a0, 3
+; RV32IA-NEXT: zext.b a1, a1
+; RV32IA-NEXT: slli a0, a0, 3
+; RV32IA-NEXT: sll a2, a2, a0
+; RV32IA-NEXT: sll a1, a1, a0
+; RV32IA-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NEXT: lr.w.aqrl a4, (a3)
+; RV32IA-NEXT: sub a5, a4, a1
+; RV32IA-NEXT: xor a5, a4, a5
+; RV32IA-NEXT: and a5, a5, a2
+; RV32IA-NEXT: xor a5, a4, a5
+; RV32IA-NEXT: sc.w.rl a5, a5, (a3)
+; RV32IA-NEXT: bnez a5, .LBB4_1
+; RV32IA-NEXT: # %bb.2:
+; RV32IA-NEXT: srl a0, a4, a0
+; RV32IA-NEXT: ret
+;
+; RV32I-LABEL: atomicrmw_sub_i8:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: li a2, 5
+; RV32I-NEXT: call __atomic_fetch_sub_1
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV64IA-ZABHA-LABEL: atomicrmw_sub_i8:
+; RV64IA-ZABHA: # %bb.0:
+; RV64IA-ZABHA-NEXT: neg a1, a1
+; RV64IA-ZABHA-NEXT: amoadd.b.aqrl a0, a1, (a0)
+; RV64IA-ZABHA-NEXT: ret
+;
+; RV64IA-LABEL: atomicrmw_sub_i8:
+; RV64IA: # %bb.0:
+; RV64IA-NEXT: li a2, 255
+; RV64IA-NEXT: andi a3, a0, -4
+; RV64IA-NEXT: andi a0, a0, 3
+; RV64IA-NEXT: zext.b a1, a1
+; RV64IA-NEXT: slli a0, a0, 3
+; RV64IA-NEXT: sllw a2, a2, a0
+; RV64IA-NEXT: sllw a1, a1, a0
+; RV64IA-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-NEXT: lr.w.aqrl a4, (a3)
+; RV64IA-NEXT: sub a5, a4, a1
+; RV64IA-NEXT: xor a5, a4, a5
+; RV64IA-NEXT: and a5, a5, a2
+; RV64IA-NEXT: xor a5, a4, a5
+; RV64IA-NEXT: sc.w.rl a5, a5, (a3)
+; RV64IA-NEXT: bnez a5, .LBB4_1
+; RV64IA-NEXT: # %bb.2:
+; RV64IA-NEXT: srlw a0, a4, a0
+; RV64IA-NEXT: ret
+;
+; RV64I-LABEL: atomicrmw_sub_i8:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: li a2, 5
+; RV64I-NEXT: call __atomic_fetch_sub_1
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+ %res = atomicrmw sub ptr %ptr, i8 %rhs seq_cst
+ ret i8 %res
+}
+
+define i16 @atomicrmw_sub_i16(ptr %ptr, i16 %rhs) nounwind {
+; RV32IA-ZABHA-LABEL: atomicrmw_sub_i16:
+; RV32IA-ZABHA: # %bb.0:
+; RV32IA-ZABHA-NEXT: neg a1, a1
+; RV32IA-ZABHA-NEXT: amoadd.h.aqrl a0, a1, (a0)
+; RV32IA-ZABHA-NEXT: ret
+;
+; RV32IA-LABEL: atomicrmw_sub_i16:
+; RV32IA: # %bb.0:
+; RV32IA-NEXT: lui a2, 16
+; RV32IA-NEXT: andi a3, a0, -4
+; RV32IA-NEXT: andi a0, a0, 3
+; RV32IA-NEXT: addi a2, a2, -1
+; RV32IA-NEXT: slli a0, a0, 3
+; RV32IA-NEXT: sll a4, a2, a0
+; RV32IA-NEXT: and a1, a1, a2
+; RV32IA-NEXT: sll a1, a1, a0
+; RV32IA-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NEXT: lr.w.aqrl a2, (a3)
+; RV32IA-NEXT: sub a5, a2, a1
+; RV32IA-NEXT: xor a5, a2, a5
+; RV32IA-NEXT: and a5, a5, a4
+; RV32IA-NEXT: xor a5, a2, a5
+; RV32IA-NEXT: sc.w.rl a5, a5, (a3)
+; RV32IA-NEXT: bnez a5, .LBB5_1
+; RV32IA-NEXT: # %bb.2:
+; RV32IA-NEXT: srl a0, a2, a0
+; RV32IA-NEXT: ret
+;
+; RV32I-LABEL: atomicrmw_sub_i16:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: li a2, 5
+; RV32I-NEXT: call __atomic_fetch_sub_2
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV64IA-ZABHA-LABEL: atomicrmw_sub_i16:
+; RV64IA-ZABHA: # %bb.0:
+; RV64IA-ZABHA-NEXT: neg a1, a1
+; RV64IA-ZABHA-NEXT: amoadd.h.aqrl a0, a1, (a0)
+; RV64IA-ZABHA-NEXT: ret
+;
+; RV64IA-LABEL: atomicrmw_sub_i16:
+; RV64IA: # %bb.0:
+; RV64IA-NEXT: lui a2, 16
+; RV64IA-NEXT: andi a3, a0, -4
+; RV64IA-NEXT: andi a0, a0, 3
+; RV64IA-NEXT: addi a2, a2, -1
+; RV64IA-NEXT: slli a0, a0, 3
+; RV64IA-NEXT: sllw a4, a2, a0
+; RV64IA-NEXT: and a1, a1, a2
+; RV64IA-NEXT: sllw a1, a1, a0
+; RV64IA-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-NEXT: lr.w.aqrl a2, (a3)
+; RV64IA-NEXT: sub a5, a2, a1
+; RV64IA-NEXT: xor a5, a2, a5
+; RV64IA-NEXT: and a5, a5, a4
+; RV64IA-NEXT: xor a5, a2, a5
+; RV64IA-NEXT: sc.w.rl a5, a5, (a3)
+; RV64IA-NEXT: bnez a5, .LBB5_1
+; RV64IA-NEXT: # %bb.2:
+; RV64IA-NEXT: srlw a0, a2, a0
+; RV64IA-NEXT: ret
+;
+; RV64I-LABEL: atomicrmw_sub_i16:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: li a2, 5
+; RV64I-NEXT: call __atomic_fetch_sub_2
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+ %res = atomicrmw sub ptr %ptr, i16 %rhs seq_cst
+ ret i16 %res
+}
+
+define i32 @atomicrmw_sub_i32(ptr %ptr, i32 %rhs) nounwind {
+; RV32IA-ZABHA-LABEL: atomicrmw_sub_i32:
+; RV32IA-ZABHA: # %bb.0:
+; RV32IA-ZABHA-NEXT: neg a1, a1
+; RV32IA-ZABHA-NEXT: amoadd.w.aqrl a0, a1, (a0)
+; RV32IA-ZABHA-NEXT: ret
+;
+; RV32IA-LABEL: atomicrmw_sub_i32:
+; RV32IA: # %bb.0:
+; RV32IA-NEXT: neg a1, a1
+; RV32IA-NEXT: amoadd.w.aqrl a0, a1, (a0)
+; RV32IA-NEXT: ret
+;
+; RV32I-LABEL: atomicrmw_sub_i32:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: li a2, 5
+; RV32I-NEXT: call __atomic_fetch_sub_4
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV64IA-ZABHA-LABEL: atomicrmw_sub_i32:
+; RV64IA-ZABHA: # %bb.0:
+; RV64IA-ZABHA-NEXT: neg a1, a1
+; RV64IA-ZABHA-NEXT: amoadd.w.aqrl a0, a1, (a0)
+; RV64IA-ZABHA-NEXT: ret
+;
+; RV64IA-LABEL: atomicrmw_sub_i32:
+; RV64IA: # %bb.0:
+; RV64IA-NEXT: neg a1, a1
+; RV64IA-NEXT: amoadd.w.aqrl a0, a1, (a0)
+; RV64IA-NEXT: ret
+;
+; RV64I-LABEL: atomicrmw_sub_i32:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: li a2, 5
+; RV64I-NEXT: call __atomic_fetch_sub_4
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+ %res = atomicrmw sub ptr %ptr, i32 %rhs seq_cst
+ ret i32 %res
+}
+
+define i64 @atomicrmw_sub_i64(ptr %ptr, i64 %rhs) nounwind {
+; RV32IA-ZABHA-LABEL: atomicrmw_sub_i64:
+; RV32IA-ZABHA: # %bb.0:
+; RV32IA-ZABHA-NEXT: addi sp, sp, -16
+; RV32IA-ZABHA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IA-ZABHA-NEXT: li a3, 5
+; RV32IA-ZABHA-NEXT: call __atomic_fetch_sub_8
+; RV32IA-ZABHA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IA-ZABHA-NEXT: addi sp, sp, 16
+; RV32IA-ZABHA-NEXT: ret
+;
+; RV32IA-LABEL: atomicrmw_sub_i64:
+; RV32IA: # %bb.0:
+; RV32IA-NEXT: addi sp, sp, -16
+; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: li a3, 5
+; RV32IA-NEXT: call __atomic_fetch_sub_8
+; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: addi sp, sp, 16
+; RV32IA-NEXT: ret
+;
+; RV32I-LABEL: atomicrmw_sub_i64:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: li a3, 5
+; RV32I-NEXT: call __atomic_fetch_sub_8
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV64IA-ZABHA-LABEL: atomicrmw_sub_i64:
+; RV64IA-ZABHA: # %bb.0:
+; RV64IA-ZABHA-NEXT: neg a1, a1
+; RV64IA-ZABHA-NEXT: amoadd.d.aqrl a0, a1, (a0)
+; RV64IA-ZABHA-NEXT: ret
+;
+; RV64IA-LABEL: atomicrmw_sub_i64:
+; RV64IA: # %bb.0:
+; RV64IA-NEXT: neg a1, a1
+; RV64IA-NEXT: amoadd.d.aqrl a0, a1, (a0)
+; RV64IA-NEXT: ret
+;
+; RV64I-LABEL: atomicrmw_sub_i64:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: li a2, 5
+; RV64I-NEXT: call __atomic_fetch_sub_8
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+ %res = atomicrmw sub ptr %ptr, i64 %rhs seq_cst
+ ret i64 %res
+}
+
+define i16 @atomicrmw_sub_i16_constant(ptr %a) nounwind {
+; RV32IA-ZABHA-LABEL: atomicrmw_sub_i16_constant:
+; RV32IA-...
[truncated]
|
f26899d
to
52ecdd4
Compare
|
||
auto VNeg = MIRBuilder.buildNeg(ValLLT, Val); | ||
auto NewRMW = | ||
MIRBuilder.buildAtomicRMW(G_ATOMICRMW_ADD, RetLLT, Mem, VNeg, *MMO); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can we use the Ret
register for this and avoid creating a copy?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
In the latest commit I’ve updated the code to directly reuse the Ret
register instead of creating a copy. However, I can’t be fully sure that this is completely safe.
RISCV does not provide a native atomic subtract instruction, so this patch lowers G_ATOMICRMW_SUB by negating the RHS value and performing an atomic add. The legalization rules in RISCVLegalizerInfo are updated accordingly, with libcall fallbacks when StdExtA is not available, and intrinsic legalization is extended to support riscv_masked_atomicrmw_sub.
52ecdd4
to
e353db9
Compare
RISCV does not provide a native atomic subtract instruction, so this patch lowers
G_ATOMICRMW_SUB
by negating the RHS value and performing an atomic add. The legalization rules inRISCVLegalizerInfo
are updated accordingly, with libcall fallbacks whenStdExtA
is not available, and intrinsic legalization is extended to supportriscv_masked_atomicrmw_sub
.For example, lowering
%1 = atomicrmw sub ptr %a, i32 1 seq_cst
on riscv32a produces:
On riscv64a, where the RHS type is narrower than XLEN, it currently produces:
There is still a constant-folding or InstConbiner gap. For instance, lowering
generates:
This sequence could be optimized further to eliminate the redundant neg. Addressing this may require improvements in the Combiner or Peephole Optimizer in future work.