Skip to content

Commit 775c60c

Browse files
author
Kane Wang
committed
[RISCV][GlobalISel] Legalize and select G_ATOMICRMW_ADD instruction
This patch adds legalization and instruction selection support for the G_ATOMICRMW_ADD opcode in the RISCV GlobalISel backend.
1 parent 3b4775d commit 775c60c

File tree

7 files changed

+505
-2
lines changed

7 files changed

+505
-2
lines changed

llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -692,6 +692,11 @@ RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST)
692692
.customIf(all(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST),
693693
typeIsLegalIntOrFPVec(1, IntOrFPVecTys, ST)));
694694

695+
getActionDefinitionsBuilder(G_ATOMICRMW_ADD)
696+
.legalFor(ST.hasStdExtA() && ST.hasStdExtZabha(), {{sXLen, p0}})
697+
.libcallFor(!ST.hasStdExtA(), {{s8, p0}, {s16, p0}, {s32, p0}, {s64, p0}})
698+
.clampScalar(0, sXLen, sXLen);
699+
695700
getLegacyLegalizerInfo().computeTables();
696701
verify(*ST.getInstrInfo());
697702
}
Lines changed: 182 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,182 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -mtriple=riscv32 -mattr=+a,+zabha -global-isel -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV32IA-ZABHA
3+
; RUN: llc -mtriple=riscv32 -global-isel -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV32
4+
; RUN: llc -mtriple=riscv64 -mattr=+a,+zabha -global-isel -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV64IA-ZABHA
5+
; RUN: llc -mtriple=riscv64 -global-isel -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV64
6+
7+
define i8 @atomicrmw_add_i8(ptr %ptr, i8 %rhs) {
8+
; RV32IA-ZABHA-LABEL: atomicrmw_add_i8:
9+
; RV32IA-ZABHA: # %bb.0:
10+
; RV32IA-ZABHA-NEXT: amoadd.b.aqrl a0, a1, (a0)
11+
; RV32IA-ZABHA-NEXT: ret
12+
;
13+
; RV32-LABEL: atomicrmw_add_i8:
14+
; RV32: # %bb.0:
15+
; RV32-NEXT: addi sp, sp, -16
16+
; RV32-NEXT: .cfi_def_cfa_offset 16
17+
; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
18+
; RV32-NEXT: .cfi_offset ra, -4
19+
; RV32-NEXT: li a2, 5
20+
; RV32-NEXT: call __atomic_fetch_add_1
21+
; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
22+
; RV32-NEXT: .cfi_restore ra
23+
; RV32-NEXT: addi sp, sp, 16
24+
; RV32-NEXT: .cfi_def_cfa_offset 0
25+
; RV32-NEXT: ret
26+
;
27+
; RV64IA-ZABHA-LABEL: atomicrmw_add_i8:
28+
; RV64IA-ZABHA: # %bb.0:
29+
; RV64IA-ZABHA-NEXT: amoadd.b.aqrl a0, a1, (a0)
30+
; RV64IA-ZABHA-NEXT: ret
31+
;
32+
; RV64-LABEL: atomicrmw_add_i8:
33+
; RV64: # %bb.0:
34+
; RV64-NEXT: addi sp, sp, -16
35+
; RV64-NEXT: .cfi_def_cfa_offset 16
36+
; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
37+
; RV64-NEXT: .cfi_offset ra, -8
38+
; RV64-NEXT: li a2, 5
39+
; RV64-NEXT: call __atomic_fetch_add_1
40+
; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
41+
; RV64-NEXT: .cfi_restore ra
42+
; RV64-NEXT: addi sp, sp, 16
43+
; RV64-NEXT: .cfi_def_cfa_offset 0
44+
; RV64-NEXT: ret
45+
%res = atomicrmw add ptr %ptr, i8 %rhs seq_cst
46+
ret i8 %res
47+
}
48+
49+
define i16 @atomicrmw_add_i16(ptr %ptr, i16 %rhs) {
50+
; RV32IA-ZABHA-LABEL: atomicrmw_add_i16:
51+
; RV32IA-ZABHA: # %bb.0:
52+
; RV32IA-ZABHA-NEXT: amoadd.h.aqrl a0, a1, (a0)
53+
; RV32IA-ZABHA-NEXT: ret
54+
;
55+
; RV32-LABEL: atomicrmw_add_i16:
56+
; RV32: # %bb.0:
57+
; RV32-NEXT: addi sp, sp, -16
58+
; RV32-NEXT: .cfi_def_cfa_offset 16
59+
; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
60+
; RV32-NEXT: .cfi_offset ra, -4
61+
; RV32-NEXT: li a2, 5
62+
; RV32-NEXT: call __atomic_fetch_add_2
63+
; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
64+
; RV32-NEXT: .cfi_restore ra
65+
; RV32-NEXT: addi sp, sp, 16
66+
; RV32-NEXT: .cfi_def_cfa_offset 0
67+
; RV32-NEXT: ret
68+
;
69+
; RV64IA-ZABHA-LABEL: atomicrmw_add_i16:
70+
; RV64IA-ZABHA: # %bb.0:
71+
; RV64IA-ZABHA-NEXT: amoadd.h.aqrl a0, a1, (a0)
72+
; RV64IA-ZABHA-NEXT: ret
73+
;
74+
; RV64-LABEL: atomicrmw_add_i16:
75+
; RV64: # %bb.0:
76+
; RV64-NEXT: addi sp, sp, -16
77+
; RV64-NEXT: .cfi_def_cfa_offset 16
78+
; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
79+
; RV64-NEXT: .cfi_offset ra, -8
80+
; RV64-NEXT: li a2, 5
81+
; RV64-NEXT: call __atomic_fetch_add_2
82+
; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
83+
; RV64-NEXT: .cfi_restore ra
84+
; RV64-NEXT: addi sp, sp, 16
85+
; RV64-NEXT: .cfi_def_cfa_offset 0
86+
; RV64-NEXT: ret
87+
%res = atomicrmw add ptr %ptr, i16 %rhs seq_cst
88+
ret i16 %res
89+
}
90+
91+
define i32 @atomicrmw_add_i32(ptr %ptr, i32 %rhs) {
92+
; RV32IA-ZABHA-LABEL: atomicrmw_add_i32:
93+
; RV32IA-ZABHA: # %bb.0:
94+
; RV32IA-ZABHA-NEXT: amoadd.w.aqrl a0, a1, (a0)
95+
; RV32IA-ZABHA-NEXT: ret
96+
;
97+
; RV32-LABEL: atomicrmw_add_i32:
98+
; RV32: # %bb.0:
99+
; RV32-NEXT: addi sp, sp, -16
100+
; RV32-NEXT: .cfi_def_cfa_offset 16
101+
; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
102+
; RV32-NEXT: .cfi_offset ra, -4
103+
; RV32-NEXT: li a2, 5
104+
; RV32-NEXT: call __atomic_fetch_add_4
105+
; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
106+
; RV32-NEXT: .cfi_restore ra
107+
; RV32-NEXT: addi sp, sp, 16
108+
; RV32-NEXT: .cfi_def_cfa_offset 0
109+
; RV32-NEXT: ret
110+
;
111+
; RV64IA-ZABHA-LABEL: atomicrmw_add_i32:
112+
; RV64IA-ZABHA: # %bb.0:
113+
; RV64IA-ZABHA-NEXT: amoadd.w.aqrl a0, a1, (a0)
114+
; RV64IA-ZABHA-NEXT: ret
115+
;
116+
; RV64-LABEL: atomicrmw_add_i32:
117+
; RV64: # %bb.0:
118+
; RV64-NEXT: addi sp, sp, -16
119+
; RV64-NEXT: .cfi_def_cfa_offset 16
120+
; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
121+
; RV64-NEXT: .cfi_offset ra, -8
122+
; RV64-NEXT: li a2, 5
123+
; RV64-NEXT: call __atomic_fetch_add_4
124+
; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
125+
; RV64-NEXT: .cfi_restore ra
126+
; RV64-NEXT: addi sp, sp, 16
127+
; RV64-NEXT: .cfi_def_cfa_offset 0
128+
; RV64-NEXT: ret
129+
%res = atomicrmw add ptr %ptr, i32 %rhs seq_cst
130+
ret i32 %res
131+
}
132+
133+
define i64 @atomicrmw_add_i64(ptr %ptr, i64 %rhs) {
134+
; RV32IA-ZABHA-LABEL: atomicrmw_add_i64:
135+
; RV32IA-ZABHA: # %bb.0:
136+
; RV32IA-ZABHA-NEXT: addi sp, sp, -16
137+
; RV32IA-ZABHA-NEXT: .cfi_def_cfa_offset 16
138+
; RV32IA-ZABHA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
139+
; RV32IA-ZABHA-NEXT: .cfi_offset ra, -4
140+
; RV32IA-ZABHA-NEXT: li a3, 5
141+
; RV32IA-ZABHA-NEXT: call __atomic_fetch_add_8
142+
; RV32IA-ZABHA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
143+
; RV32IA-ZABHA-NEXT: .cfi_restore ra
144+
; RV32IA-ZABHA-NEXT: addi sp, sp, 16
145+
; RV32IA-ZABHA-NEXT: .cfi_def_cfa_offset 0
146+
; RV32IA-ZABHA-NEXT: ret
147+
;
148+
; RV32-LABEL: atomicrmw_add_i64:
149+
; RV32: # %bb.0:
150+
; RV32-NEXT: addi sp, sp, -16
151+
; RV32-NEXT: .cfi_def_cfa_offset 16
152+
; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
153+
; RV32-NEXT: .cfi_offset ra, -4
154+
; RV32-NEXT: li a3, 5
155+
; RV32-NEXT: call __atomic_fetch_add_8
156+
; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
157+
; RV32-NEXT: .cfi_restore ra
158+
; RV32-NEXT: addi sp, sp, 16
159+
; RV32-NEXT: .cfi_def_cfa_offset 0
160+
; RV32-NEXT: ret
161+
;
162+
; RV64IA-ZABHA-LABEL: atomicrmw_add_i64:
163+
; RV64IA-ZABHA: # %bb.0:
164+
; RV64IA-ZABHA-NEXT: amoadd.d.aqrl a0, a1, (a0)
165+
; RV64IA-ZABHA-NEXT: ret
166+
;
167+
; RV64-LABEL: atomicrmw_add_i64:
168+
; RV64: # %bb.0:
169+
; RV64-NEXT: addi sp, sp, -16
170+
; RV64-NEXT: .cfi_def_cfa_offset 16
171+
; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
172+
; RV64-NEXT: .cfi_offset ra, -8
173+
; RV64-NEXT: li a2, 5
174+
; RV64-NEXT: call __atomic_fetch_add_8
175+
; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
176+
; RV64-NEXT: .cfi_restore ra
177+
; RV64-NEXT: addi sp, sp, 16
178+
; RV64-NEXT: .cfi_def_cfa_offset 0
179+
; RV64-NEXT: ret
180+
%res = atomicrmw add ptr %ptr, i64 %rhs seq_cst
181+
ret i64 %res
182+
}
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2+
# RUN: llc -mtriple=riscv32 -mattr=+a,+zabha -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s
3+
4+
---
5+
name: atomicrmw_add_i8_monotonic
6+
legalized: true
7+
regBankSelected: true
8+
tracksRegLiveness: true
9+
body: |
10+
bb.0.entry:
11+
liveins: $x10
12+
13+
; CHECK-LABEL: name: atomicrmw_add_i8_monotonic
14+
; CHECK: liveins: $x10
15+
; CHECK-NEXT: {{ $}}
16+
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
17+
; CHECK-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 1
18+
; CHECK-NEXT: [[AMOADD_B:%[0-9]+]]:gpr = AMOADD_B [[COPY]], [[ADDI]] :: (load store monotonic (s8))
19+
; CHECK-NEXT: $x10 = COPY [[AMOADD_B]]
20+
; CHECK-NEXT: PseudoRET implicit $x10
21+
%0:gprb(p0) = COPY $x10
22+
%1:gprb(s32) = G_CONSTANT i32 1
23+
%2:gprb(s32) = G_ATOMICRMW_ADD %0, %1 :: (load store monotonic (s8))
24+
$x10 = COPY %2(s32)
25+
PseudoRET implicit $x10
26+
...
27+
---
28+
name: atomicrmw_add_i16_monotonic
29+
legalized: true
30+
regBankSelected: true
31+
tracksRegLiveness: true
32+
body: |
33+
bb.0.entry:
34+
liveins: $x10
35+
36+
; CHECK-LABEL: name: atomicrmw_add_i16_monotonic
37+
; CHECK: liveins: $x10
38+
; CHECK-NEXT: {{ $}}
39+
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
40+
; CHECK-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 1
41+
; CHECK-NEXT: [[AMOADD_H:%[0-9]+]]:gpr = AMOADD_H [[COPY]], [[ADDI]] :: (load store monotonic (s16))
42+
; CHECK-NEXT: $x10 = COPY [[AMOADD_H]]
43+
; CHECK-NEXT: PseudoRET implicit $x10
44+
%0:gprb(p0) = COPY $x10
45+
%1:gprb(s32) = G_CONSTANT i32 1
46+
%2:gprb(s32) = G_ATOMICRMW_ADD %0, %1 :: (load store monotonic (s16))
47+
$x10 = COPY %2(s32)
48+
PseudoRET implicit $x10
49+
...
50+
---
51+
name: atomicrmw_add_i32_monotonic
52+
legalized: true
53+
regBankSelected: true
54+
tracksRegLiveness: true
55+
body: |
56+
bb.0.entry:
57+
liveins: $x10
58+
59+
; CHECK-LABEL: name: atomicrmw_add_i32_monotonic
60+
; CHECK: liveins: $x10
61+
; CHECK-NEXT: {{ $}}
62+
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
63+
; CHECK-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 1
64+
; CHECK-NEXT: [[AMOADD_W:%[0-9]+]]:gpr = AMOADD_W [[COPY]], [[ADDI]] :: (load store monotonic (s32))
65+
; CHECK-NEXT: $x10 = COPY [[AMOADD_W]]
66+
; CHECK-NEXT: PseudoRET implicit $x10
67+
%0:gprb(p0) = COPY $x10
68+
%1:gprb(s32) = G_CONSTANT i32 1
69+
%2:gprb(s32) = G_ATOMICRMW_ADD %0, %1 :: (load store monotonic (s32))
70+
$x10 = COPY %2(s32)
71+
PseudoRET implicit $x10
72+
...
Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2+
# RUN: llc -mtriple=riscv64 -mattr=+a,+zabha -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s
3+
4+
---
5+
name: atomicrmw_add_i8_monotonic
6+
legalized: true
7+
regBankSelected: true
8+
tracksRegLiveness: true
9+
body: |
10+
bb.0.entry:
11+
liveins: $x10
12+
13+
; CHECK-LABEL: name: atomicrmw_add_i8_monotonic
14+
; CHECK: liveins: $x10
15+
; CHECK-NEXT: {{ $}}
16+
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
17+
; CHECK-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 1
18+
; CHECK-NEXT: [[AMOADD_B:%[0-9]+]]:gpr = AMOADD_B [[COPY]], [[ADDI]] :: (load store monotonic (s8))
19+
; CHECK-NEXT: $x10 = COPY [[AMOADD_B]]
20+
; CHECK-NEXT: PseudoRET implicit $x10
21+
%0:gprb(p0) = COPY $x10
22+
%1:gprb(s64) = G_CONSTANT i64 1
23+
%2:gprb(s64) = G_ATOMICRMW_ADD %0, %1 :: (load store monotonic (s8))
24+
$x10 = COPY %2(s64)
25+
PseudoRET implicit $x10
26+
...
27+
---
28+
name: atomicrmw_add_i16_monotonic
29+
legalized: true
30+
regBankSelected: true
31+
tracksRegLiveness: true
32+
body: |
33+
bb.0.entry:
34+
liveins: $x10
35+
36+
; CHECK-LABEL: name: atomicrmw_add_i16_monotonic
37+
; CHECK: liveins: $x10
38+
; CHECK-NEXT: {{ $}}
39+
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
40+
; CHECK-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 1
41+
; CHECK-NEXT: [[AMOADD_H:%[0-9]+]]:gpr = AMOADD_H [[COPY]], [[ADDI]] :: (load store monotonic (s16))
42+
; CHECK-NEXT: $x10 = COPY [[AMOADD_H]]
43+
; CHECK-NEXT: PseudoRET implicit $x10
44+
%0:gprb(p0) = COPY $x10
45+
%1:gprb(s64) = G_CONSTANT i64 1
46+
%2:gprb(s64) = G_ATOMICRMW_ADD %0, %1 :: (load store monotonic (s16))
47+
$x10 = COPY %2(s64)
48+
PseudoRET implicit $x10
49+
...
50+
---
51+
name: atomicrmw_add_i32_monotonic
52+
legalized: true
53+
regBankSelected: true
54+
tracksRegLiveness: true
55+
body: |
56+
bb.0.entry:
57+
liveins: $x10
58+
59+
; CHECK-LABEL: name: atomicrmw_add_i32_monotonic
60+
; CHECK: liveins: $x10
61+
; CHECK-NEXT: {{ $}}
62+
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
63+
; CHECK-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 1
64+
; CHECK-NEXT: [[AMOADD_W:%[0-9]+]]:gpr = AMOADD_W [[COPY]], [[ADDI]] :: (load store monotonic (s32))
65+
; CHECK-NEXT: $x10 = COPY [[AMOADD_W]]
66+
; CHECK-NEXT: PseudoRET implicit $x10
67+
%0:gprb(p0) = COPY $x10
68+
%1:gprb(s64) = G_CONSTANT i64 1
69+
%2:gprb(s64) = G_ATOMICRMW_ADD %0, %1 :: (load store monotonic (s32))
70+
$x10 = COPY %2(s64)
71+
PseudoRET implicit $x10
72+
...
73+
---
74+
name: atomicrmw_add_i64_monotonic
75+
legalized: true
76+
regBankSelected: true
77+
tracksRegLiveness: true
78+
body: |
79+
bb.0.entry:
80+
liveins: $x10
81+
82+
; CHECK-LABEL: name: atomicrmw_add_i64_monotonic
83+
; CHECK: liveins: $x10
84+
; CHECK-NEXT: {{ $}}
85+
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
86+
; CHECK-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 1
87+
; CHECK-NEXT: [[AMOADD_D:%[0-9]+]]:gpr = AMOADD_D [[COPY]], [[ADDI]] :: (load store monotonic (s64))
88+
; CHECK-NEXT: $x10 = COPY [[AMOADD_D]]
89+
; CHECK-NEXT: PseudoRET implicit $x10
90+
%0:gprb(p0) = COPY $x10
91+
%1:gprb(s64) = G_CONSTANT i64 1
92+
%2:gprb(s64) = G_ATOMICRMW_ADD %0, %1 :: (load store monotonic (s64))
93+
$x10 = COPY %2(s64)
94+
PseudoRET implicit $x10
95+
...

llvm/test/CodeGen/RISCV/GlobalISel/legalizer-info-validation.mir

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -222,8 +222,8 @@
222222
# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined
223223
# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined
224224
# DEBUG-NEXT: G_ATOMICRMW_ADD (opcode {{[0-9]+}}): 2 type indices, 0 imm indices
225-
# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined
226-
# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined
225+
# DEBUG-NEXT: .. the first uncovered type index: 2, OK
226+
# DEBUG-NEXT: .. the first uncovered imm index: 0, OK
227227
# DEBUG-NEXT: G_ATOMICRMW_SUB (opcode {{[0-9]+}}): 2 type indices, 0 imm indices
228228
# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined
229229
# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined

0 commit comments

Comments
 (0)