-
Notifications
You must be signed in to change notification settings - Fork 13.4k
[Sparc] limit MaxAtomicSizeInBitsSupported to 32 for 32-bit Sparc. #81655
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
When in 32-bit mode, the backend doesn't currently implement 64-bit atomics, even though the hardware is capable if you have specified a V9 CPU. Thus, limit the width to 32-bit, for now, leaving behind a TODO. This fixes a regression triggered by PR llvm#73176.
@llvm/pr-subscribers-backend-sparc Author: James Y Knight (jyknight) ChangesWhen in 32-bit mode, the backend doesn't currently implement 64-bit atomics, even though the hardware is capable if you have specified a V9 CPU. Thus, limit the width to 32-bit, for now, leaving behind a TODO. This fixes a regression triggered by PR #73176. Full diff: https://github.com/llvm/llvm-project/pull/81655.diff 3 Files Affected:
diff --git a/llvm/lib/Target/Sparc/SparcISelLowering.cpp b/llvm/lib/Target/Sparc/SparcISelLowering.cpp
index bdefb0841a124b..13184a1eb0b101 100644
--- a/llvm/lib/Target/Sparc/SparcISelLowering.cpp
+++ b/llvm/lib/Target/Sparc/SparcISelLowering.cpp
@@ -1764,9 +1764,14 @@ SparcTargetLowering::SparcTargetLowering(const TargetMachine &TM,
// Atomics are supported on SparcV9. 32-bit atomics are also
// supported by some Leon SparcV8 variants. Otherwise, atomics
// are unsupported.
- if (Subtarget->isV9())
- setMaxAtomicSizeInBitsSupported(64);
- else if (Subtarget->hasLeonCasa())
+ if (Subtarget->isV9()) {
+ // TODO: we _ought_ to be able to support 64-bit atomics on 32-bit sparcv9,
+ // but it hasn't been implemented in the backend yet.
+ if (Subtarget->is64Bit())
+ setMaxAtomicSizeInBitsSupported(64);
+ else
+ setMaxAtomicSizeInBitsSupported(32);
+ } else if (Subtarget->hasLeonCasa())
setMaxAtomicSizeInBitsSupported(32);
else
setMaxAtomicSizeInBitsSupported(0);
diff --git a/llvm/test/CodeGen/SPARC/64atomics.ll b/llvm/test/CodeGen/SPARC/64atomics.ll
index 2c00f955f497b1..feb37fdae52b03 100644
--- a/llvm/test/CodeGen/SPARC/64atomics.ll
+++ b/llvm/test/CodeGen/SPARC/64atomics.ll
@@ -1,12 +1,14 @@
-; RUN: llc < %s -march=sparcv9 -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -march=sparc -mcpu=v9 -verify-machineinstrs | FileCheck %s --check-prefixes=SPARC,SPARC32
+; RUN: llc < %s -march=sparcv9 -verify-machineinstrs | FileCheck %s --check-prefixes=SPARC,SPARC64
-; CHECK-LABEL: test_atomic_i64
-; CHECK: ldx [%o0]
-; CHECK: membar
-; CHECK: ldx [%o1]
-; CHECK: membar
-; CHECK: membar
-; CHECK: stx {{.+}}, [%o2]
+; SPARC-LABEL: test_atomic_i64
+; SPARC32: __atomic_load_8
+; SPARC64: ldx [%o0]
+; SPARC64: membar
+; SPARC64: ldx [%o1]
+; SPARC64: membar
+; SPARC64: membar
+; SPARC64: stx {{.+}}, [%o2]
define i64 @test_atomic_i64(ptr %ptr1, ptr %ptr2, ptr %ptr3) {
entry:
%0 = load atomic i64, ptr %ptr1 acquire, align 8
@@ -16,9 +18,10 @@ entry:
ret i64 %2
}
-; CHECK-LABEL: test_cmpxchg_i64
-; CHECK: mov 123, [[R:%[gilo][0-7]]]
-; CHECK: casx [%o1], %o0, [[R]]
+; SPARC-LABEL: test_cmpxchg_i64
+; SPARC32: __atomic_compare_exchange_8
+; SPARC64: mov 123, [[R:%[gilo][0-7]]]
+; SPARC64: casx [%o1], %o0, [[R]]
define i64 @test_cmpxchg_i64(i64 %a, ptr %ptr) {
entry:
@@ -27,8 +30,9 @@ entry:
ret i64 %b
}
-; CHECK-LABEL: test_swap_i64
-; CHECK: casx [%o1],
+; SPARC-LABEL: test_swap_i64
+; SPARC32: __atomic_exchange_8
+; SPARC64: casx [%o1],
define i64 @test_swap_i64(i64 %a, ptr %ptr) {
entry:
@@ -36,23 +40,25 @@ entry:
ret i64 %b
}
-; CHECK-LABEL: test_load_sub_64
-; CHECK: membar
-; CHECK: sub
-; CHECK: casx [%o0]
-; CHECK: membar
+; SPARC-LABEL: test_load_sub_64
+; SPARC32: __atomic_fetch_sub_8
+; SPARC64: membar
+; SPARC64: sub
+; SPARC64: casx [%o0]
+; SPARC64: membar
define zeroext i64 @test_load_sub_64(ptr %p, i64 zeroext %v) {
entry:
%0 = atomicrmw sub ptr %p, i64 %v seq_cst
ret i64 %0
}
-; CHECK-LABEL: test_load_max_64
-; CHECK: membar
-; CHECK: cmp
-; CHECK: movg %xcc
-; CHECK: casx [%o0]
-; CHECK: membar
+; SPARC-LABEL: test_load_max_64
+; SPARC32: __atomic_compare_exchange_8
+; SPARC64: membar
+; SPARC64: cmp
+; SPARC64: movg %xcc
+; SPARC64: casx [%o0]
+; SPARC64: membar
define zeroext i64 @test_load_max_64(ptr %p, i64 zeroext %v) {
entry:
%0 = atomicrmw max ptr %p, i64 %v seq_cst
diff --git a/llvm/test/CodeGen/SPARC/atomicrmw-uinc-udec-wrap.ll b/llvm/test/CodeGen/SPARC/atomicrmw-uinc-udec-wrap.ll
index 9b49035c460407..0f9feeb17716af 100644
--- a/llvm/test/CodeGen/SPARC/atomicrmw-uinc-udec-wrap.ll
+++ b/llvm/test/CodeGen/SPARC/atomicrmw-uinc-udec-wrap.ll
@@ -117,43 +117,41 @@ define i64 @atomicrmw_uinc_wrap_i64(ptr %ptr, i64 %val) {
; CHECK-LABEL: atomicrmw_uinc_wrap_i64:
; CHECK: .cfi_startproc
; CHECK-NEXT: ! %bb.0:
-; CHECK-NEXT: save %sp, -96, %sp
+; CHECK-NEXT: save %sp, -104, %sp
; CHECK-NEXT: .cfi_def_cfa_register %fp
; CHECK-NEXT: .cfi_window_save
; CHECK-NEXT: .cfi_register %o7, %i7
-; CHECK-NEXT: membar #LoadLoad | #StoreLoad | #LoadStore | #StoreStore
-; CHECK-NEXT: ldd [%i0], %i4
+; CHECK-NEXT: ldd [%i0], %g2
+; CHECK-NEXT: add %fp, -8, %i3
+; CHECK-NEXT: mov 5, %i4
; CHECK-NEXT: .LBB3_1: ! %atomicrmw.start
; CHECK-NEXT: ! =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: mov %g0, %i3
-; CHECK-NEXT: mov %g0, %g2
-; CHECK-NEXT: addcc %i5, 1, %o4
-; CHECK-NEXT: addxcc %i4, 0, %o3
-; CHECK-NEXT: cmp %i4, %i1
-; CHECK-NEXT: movcc %icc, 1, %i3
-; CHECK-NEXT: cmp %i5, %i2
-; CHECK-NEXT: movcc %icc, 1, %g2
-; CHECK-NEXT: cmp %i4, %i1
-; CHECK-NEXT: move %icc, %g2, %i3
-; CHECK-NEXT: cmp %i3, 0
+; CHECK-NEXT: mov %g0, %i5
+; CHECK-NEXT: mov %g0, %g4
+; CHECK-NEXT: addcc %g3, 1, %o3
+; CHECK-NEXT: addxcc %g2, 0, %o2
+; CHECK-NEXT: cmp %g2, %i1
+; CHECK-NEXT: movcc %icc, 1, %i5
+; CHECK-NEXT: cmp %g3, %i2
+; CHECK-NEXT: movcc %icc, 1, %g4
+; CHECK-NEXT: cmp %g2, %i1
+; CHECK-NEXT: move %icc, %g4, %i5
+; CHECK-NEXT: cmp %i5, 0
+; CHECK-NEXT: movne %icc, 0, %o2
; CHECK-NEXT: movne %icc, 0, %o3
-; CHECK-NEXT: movne %icc, 0, %o4
+; CHECK-NEXT: std %g2, [%fp+-8]
; CHECK-NEXT: mov %i0, %o0
-; CHECK-NEXT: mov %i4, %o1
-; CHECK-NEXT: call __sync_val_compare_and_swap_8
-; CHECK-NEXT: mov %i5, %o2
-; CHECK-NEXT: xor %o0, %i4, %i3
-; CHECK-NEXT: xor %o1, %i5, %i4
-; CHECK-NEXT: or %i4, %i3, %i3
-; CHECK-NEXT: mov %o1, %i5
-; CHECK-NEXT: cmp %i3, 0
-; CHECK-NEXT: bne %icc, .LBB3_1
-; CHECK-NEXT: mov %o0, %i4
+; CHECK-NEXT: mov %i3, %o1
+; CHECK-NEXT: mov %i4, %o4
+; CHECK-NEXT: call __atomic_compare_exchange_8
+; CHECK-NEXT: mov %i4, %o5
+; CHECK-NEXT: cmp %o0, 0
+; CHECK-NEXT: be %icc, .LBB3_1
+; CHECK-NEXT: ldd [%fp+-8], %g2
; CHECK-NEXT: ! %bb.2: ! %atomicrmw.end
-; CHECK-NEXT: membar #LoadLoad | #StoreLoad | #LoadStore | #StoreStore
-; CHECK-NEXT: mov %i4, %i0
+; CHECK-NEXT: mov %g2, %i0
; CHECK-NEXT: ret
-; CHECK-NEXT: restore %g0, %i5, %o1
+; CHECK-NEXT: restore %g0, %g3, %o1
%result = atomicrmw uinc_wrap ptr %ptr, i64 %val seq_cst
ret i64 %result
}
@@ -280,48 +278,46 @@ define i64 @atomicrmw_udec_wrap_i64(ptr %ptr, i64 %val) {
; CHECK-LABEL: atomicrmw_udec_wrap_i64:
; CHECK: .cfi_startproc
; CHECK-NEXT: ! %bb.0:
-; CHECK-NEXT: save %sp, -96, %sp
+; CHECK-NEXT: save %sp, -104, %sp
; CHECK-NEXT: .cfi_def_cfa_register %fp
; CHECK-NEXT: .cfi_window_save
; CHECK-NEXT: .cfi_register %o7, %i7
-; CHECK-NEXT: membar #LoadLoad | #StoreLoad | #LoadStore | #StoreStore
-; CHECK-NEXT: ldd [%i0], %i4
+; CHECK-NEXT: ldd [%i0], %g2
+; CHECK-NEXT: add %fp, -8, %i3
+; CHECK-NEXT: mov 5, %i4
; CHECK-NEXT: .LBB7_1: ! %atomicrmw.start
; CHECK-NEXT: ! =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: mov %g0, %i3
-; CHECK-NEXT: mov %g0, %g2
-; CHECK-NEXT: mov %g0, %g3
-; CHECK-NEXT: addcc %i5, -1, %o4
-; CHECK-NEXT: addxcc %i4, -1, %o3
-; CHECK-NEXT: or %i5, %i4, %g4
-; CHECK-NEXT: cmp %g4, 0
-; CHECK-NEXT: move %icc, 1, %i3
-; CHECK-NEXT: cmp %i4, %i1
-; CHECK-NEXT: movgu %icc, 1, %g2
-; CHECK-NEXT: cmp %i5, %i2
-; CHECK-NEXT: movgu %icc, 1, %g3
-; CHECK-NEXT: cmp %i4, %i1
-; CHECK-NEXT: move %icc, %g3, %g2
-; CHECK-NEXT: or %i3, %g2, %i3
-; CHECK-NEXT: cmp %i3, 0
-; CHECK-NEXT: movne %icc, %i1, %o3
-; CHECK-NEXT: movne %icc, %i2, %o4
+; CHECK-NEXT: mov %g0, %i5
+; CHECK-NEXT: mov %g0, %g4
+; CHECK-NEXT: mov %g0, %l0
+; CHECK-NEXT: addcc %g3, -1, %o3
+; CHECK-NEXT: addxcc %g2, -1, %o2
+; CHECK-NEXT: or %g3, %g2, %l1
+; CHECK-NEXT: cmp %l1, 0
+; CHECK-NEXT: move %icc, 1, %i5
+; CHECK-NEXT: cmp %g2, %i1
+; CHECK-NEXT: movgu %icc, 1, %g4
+; CHECK-NEXT: cmp %g3, %i2
+; CHECK-NEXT: movgu %icc, 1, %l0
+; CHECK-NEXT: cmp %g2, %i1
+; CHECK-NEXT: move %icc, %l0, %g4
+; CHECK-NEXT: or %i5, %g4, %i5
+; CHECK-NEXT: cmp %i5, 0
+; CHECK-NEXT: movne %icc, %i1, %o2
+; CHECK-NEXT: movne %icc, %i2, %o3
+; CHECK-NEXT: std %g2, [%fp+-8]
; CHECK-NEXT: mov %i0, %o0
-; CHECK-NEXT: mov %i4, %o1
-; CHECK-NEXT: call __sync_val_compare_and_swap_8
-; CHECK-NEXT: mov %i5, %o2
-; CHECK-NEXT: xor %o0, %i4, %i3
-; CHECK-NEXT: xor %o1, %i5, %i4
-; CHECK-NEXT: or %i4, %i3, %i3
-; CHECK-NEXT: mov %o1, %i5
-; CHECK-NEXT: cmp %i3, 0
-; CHECK-NEXT: bne %icc, .LBB7_1
-; CHECK-NEXT: mov %o0, %i4
+; CHECK-NEXT: mov %i3, %o1
+; CHECK-NEXT: mov %i4, %o4
+; CHECK-NEXT: call __atomic_compare_exchange_8
+; CHECK-NEXT: mov %i4, %o5
+; CHECK-NEXT: cmp %o0, 0
+; CHECK-NEXT: be %icc, .LBB7_1
+; CHECK-NEXT: ldd [%fp+-8], %g2
; CHECK-NEXT: ! %bb.2: ! %atomicrmw.end
-; CHECK-NEXT: membar #LoadLoad | #StoreLoad | #LoadStore | #StoreStore
-; CHECK-NEXT: mov %i4, %i0
+; CHECK-NEXT: mov %g2, %i0
; CHECK-NEXT: ret
-; CHECK-NEXT: restore %g0, %i5, %o1
+; CHECK-NEXT: restore %g0, %g3, %o1
%result = atomicrmw udec_wrap ptr %ptr, i64 %val seq_cst
ret i64 %result
}
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
Sparc. (llvm#81655) When in 32-bit mode, the backend doesn't currently implement 64-bit atomics, even though the hardware is capable if you have specified a V9 CPU. Thus, limit the width to 32-bit, for now, leaving behind a TODO. This fixes a regression triggered by PR llvm#73176. (cherry picked from commit c1a99b2)
Thanks for the quick fix: I'd already started a 2-stage build with the patch last night before going to bed: as expected, the link failure was gone and no regressions. |
When in 32-bit mode, the backend doesn't currently implement 64-bit atomics, even though the hardware is capable if you have specified a V9 CPU. Thus, limit the width to 32-bit, for now, leaving behind a TODO.
This fixes a regression triggered by PR #73176.