Skip to content

Commit 422dfea

Browse files
kaz7Simon Moll
authored andcommitted
[VE] enable unaligned load/store isel
Summary: Enable unaligned load/store isel for iN and fp32/64 and tests. Reviewed By: arsenm Differential Revision: https://reviews.llvm.org/D73448
1 parent 879c825 commit 422dfea

File tree

10 files changed

+1250
-0
lines changed

10 files changed

+1250
-0
lines changed

llvm/lib/Target/VE/VEISelLowering.cpp

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -206,6 +206,27 @@ bool VETargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
206206
return VT == MVT::f32 || VT == MVT::f64;
207207
}
208208

209+
/// Determine if the target supports unaligned memory accesses.
210+
///
211+
/// This function returns true if the target allows unaligned memory accesses
212+
/// of the specified type in the given address space. If true, it also returns
213+
/// whether the unaligned memory access is "fast" in the last argument by
214+
/// reference. This is used, for example, in situations where an array
215+
/// copy/move/set is converted to a sequence of store operations. Its use
216+
/// helps to ensure that such replacements don't generate code that causes an
217+
/// alignment error (trap) on the target machine.
218+
bool VETargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
219+
unsigned AddrSpace,
220+
unsigned Align,
221+
MachineMemOperand::Flags,
222+
bool *Fast) const {
223+
if (Fast) {
224+
// It's fast anytime on VE
225+
*Fast = true;
226+
}
227+
return true;
228+
}
229+
209230
VETargetLowering::VETargetLowering(const TargetMachine &TM,
210231
const VESubtarget &STI)
211232
: TargetLowering(TM), Subtarget(&STI) {

llvm/lib/Target/VE/VEISelLowering.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,11 @@ class VETargetLowering : public TargetLowering {
7777

7878
bool isFPImmLegal(const APFloat &Imm, EVT VT,
7979
bool ForCodeSize) const override;
80+
/// Returns true if the target allows unaligned memory accesses of the
81+
/// specified type.
82+
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, unsigned Align,
83+
MachineMemOperand::Flags Flags,
84+
bool *Fast) const override;
8085
};
8186
} // namespace llvm
8287

llvm/test/CodeGen/VE/load-align1.ll

Lines changed: 153 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,153 @@
1+
; RUN: llc < %s -mtriple=ve-unknown-unknown | FileCheck %s
2+
3+
@vi8 = common dso_local local_unnamed_addr global i8 0, align 1
4+
@vi16 = common dso_local local_unnamed_addr global i16 0, align 1
5+
@vi32 = common dso_local local_unnamed_addr global i32 0, align 1
6+
@vi64 = common dso_local local_unnamed_addr global i64 0, align 1
7+
@vf32 = common dso_local local_unnamed_addr global float 0.000000e+00, align 1
8+
@vf64 = common dso_local local_unnamed_addr global double 0.000000e+00, align 1
9+
10+
; Function Attrs: norecurse nounwind readonly
11+
define double @loadf64stk() {
12+
; CHECK-LABEL: loadf64stk:
13+
; CHECK: .LBB{{[0-9]+}}_2:
14+
; CHECK-NEXT: ld %s0, 184(,%s11)
15+
; CHECK-NEXT: or %s11, 0, %s9
16+
%addr = alloca double, align 1
17+
%1 = load double, double* %addr, align 1
18+
ret double %1
19+
}
20+
21+
; Function Attrs: norecurse nounwind readonly
22+
define float @loadf32stk() {
23+
; CHECK-LABEL: loadf32stk:
24+
; CHECK: .LBB{{[0-9]+}}_2:
25+
; CHECK-NEXT: ldu %s0, 188(,%s11)
26+
; CHECK-NEXT: or %s11, 0, %s9
27+
%addr = alloca float, align 1
28+
%1 = load float, float* %addr, align 1
29+
ret float %1
30+
}
31+
32+
; Function Attrs: norecurse nounwind readonly
33+
define i64 @loadi64stk() {
34+
; CHECK-LABEL: loadi64stk:
35+
; CHECK: .LBB{{[0-9]+}}_2:
36+
; CHECK-NEXT: ld %s0, 184(,%s11)
37+
; CHECK-NEXT: or %s11, 0, %s9
38+
%addr = alloca i64, align 1
39+
%1 = load i64, i64* %addr, align 1
40+
ret i64 %1
41+
}
42+
43+
; Function Attrs: norecurse nounwind readonly
44+
define i32 @loadi32stk() {
45+
; CHECK-LABEL: loadi32stk:
46+
; CHECK: .LBB{{[0-9]+}}_2:
47+
; CHECK-NEXT: ldl.sx %s0, 188(,%s11)
48+
; CHECK-NEXT: or %s11, 0, %s9
49+
%addr = alloca i32, align 1
50+
%1 = load i32, i32* %addr, align 1
51+
ret i32 %1
52+
}
53+
54+
; Function Attrs: norecurse nounwind readonly
55+
define i16 @loadi16stk() {
56+
; CHECK-LABEL: loadi16stk:
57+
; CHECK: .LBB{{[0-9]+}}_2:
58+
; CHECK-NEXT: ld2b.zx %s0, 190(,%s11)
59+
; CHECK-NEXT: or %s11, 0, %s9
60+
%addr = alloca i16, align 1
61+
%1 = load i16, i16* %addr, align 1
62+
ret i16 %1
63+
}
64+
65+
; Function Attrs: norecurse nounwind readonly
66+
define i8 @loadi8stk() {
67+
; CHECK-LABEL: loadi8stk:
68+
; CHECK: .LBB{{[0-9]+}}_2:
69+
; CHECK-NEXT: ld1b.zx %s0, 191(,%s11)
70+
; CHECK-NEXT: or %s11, 0, %s9
71+
%addr = alloca i8, align 1
72+
%1 = load i8, i8* %addr, align 1
73+
ret i8 %1
74+
}
75+
76+
; Function Attrs: norecurse nounwind readonly
77+
define double @loadf64com() {
78+
; CHECK-LABEL: loadf64com:
79+
; CHECK: .LBB{{[0-9]+}}_2:
80+
; CHECK-NEXT: lea %s0, vf64@lo
81+
; CHECK-NEXT: and %s0, %s0, (32)0
82+
; CHECK-NEXT: lea.sl %s0, vf64@hi(%s0)
83+
; CHECK-NEXT: ld %s0, (,%s0)
84+
; CHECK-NEXT: or %s11, 0, %s9
85+
%1 = load double, double* @vf64, align 1
86+
ret double %1
87+
}
88+
89+
; Function Attrs: norecurse nounwind readonly
90+
define float @loadf32com() {
91+
; CHECK-LABEL: loadf32com:
92+
; CHECK: .LBB{{[0-9]+}}_2:
93+
; CHECK-NEXT: lea %s0, vf32@lo
94+
; CHECK-NEXT: and %s0, %s0, (32)0
95+
; CHECK-NEXT: lea.sl %s0, vf32@hi(%s0)
96+
; CHECK-NEXT: ldu %s0, (,%s0)
97+
; CHECK-NEXT: or %s11, 0, %s9
98+
%1 = load float, float* @vf32, align 1
99+
ret float %1
100+
}
101+
102+
; Function Attrs: norecurse nounwind readonly
103+
define i64 @loadi64com() {
104+
; CHECK-LABEL: loadi64com:
105+
; CHECK: .LBB{{[0-9]+}}_2:
106+
; CHECK-NEXT: lea %s0, vi64@lo
107+
; CHECK-NEXT: and %s0, %s0, (32)0
108+
; CHECK-NEXT: lea.sl %s0, vi64@hi(%s0)
109+
; CHECK-NEXT: ld %s0, (,%s0)
110+
; CHECK-NEXT: or %s11, 0, %s9
111+
%1 = load i64, i64* @vi64, align 1
112+
ret i64 %1
113+
}
114+
115+
; Function Attrs: norecurse nounwind readonly
116+
define i32 @loadi32com() {
117+
; CHECK-LABEL: loadi32com:
118+
; CHECK: .LBB{{[0-9]+}}_2:
119+
; CHECK-NEXT: lea %s0, vi32@lo
120+
; CHECK-NEXT: and %s0, %s0, (32)0
121+
; CHECK-NEXT: lea.sl %s0, vi32@hi(%s0)
122+
; CHECK-NEXT: ldl.sx %s0, (,%s0)
123+
; CHECK-NEXT: or %s11, 0, %s9
124+
%1 = load i32, i32* @vi32, align 1
125+
ret i32 %1
126+
}
127+
128+
; Function Attrs: norecurse nounwind readonly
129+
define i16 @loadi16com() {
130+
; CHECK-LABEL: loadi16com:
131+
; CHECK: .LBB{{[0-9]+}}_2:
132+
; CHECK-NEXT: lea %s0, vi16@lo
133+
; CHECK-NEXT: and %s0, %s0, (32)0
134+
; CHECK-NEXT: lea.sl %s0, vi16@hi(%s0)
135+
; CHECK-NEXT: ld2b.zx %s0, (,%s0)
136+
; CHECK-NEXT: or %s11, 0, %s9
137+
%1 = load i16, i16* @vi16, align 1
138+
ret i16 %1
139+
}
140+
141+
; Function Attrs: norecurse nounwind readonly
142+
define i8 @loadi8com() {
143+
; CHECK-LABEL: loadi8com:
144+
; CHECK: .LBB{{[0-9]+}}_2:
145+
; CHECK-NEXT: lea %s0, vi8@lo
146+
; CHECK-NEXT: and %s0, %s0, (32)0
147+
; CHECK-NEXT: lea.sl %s0, vi8@hi(%s0)
148+
; CHECK-NEXT: ld1b.zx %s0, (,%s0)
149+
; CHECK-NEXT: or %s11, 0, %s9
150+
%1 = load i8, i8* @vi8, align 1
151+
ret i8 %1
152+
}
153+

llvm/test/CodeGen/VE/load-align2.ll

Lines changed: 153 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,153 @@
1+
; RUN: llc < %s -mtriple=ve-unknown-unknown | FileCheck %s
2+
3+
@vi8 = common dso_local local_unnamed_addr global i8 0, align 2
4+
@vi16 = common dso_local local_unnamed_addr global i16 0, align 2
5+
@vi32 = common dso_local local_unnamed_addr global i32 0, align 2
6+
@vi64 = common dso_local local_unnamed_addr global i64 0, align 2
7+
@vf32 = common dso_local local_unnamed_addr global float 0.000000e+00, align 2
8+
@vf64 = common dso_local local_unnamed_addr global double 0.000000e+00, align 2
9+
10+
; Function Attrs: norecurse nounwind readonly
11+
define double @loadf64stk() {
12+
; CHECK-LABEL: loadf64stk:
13+
; CHECK: .LBB{{[0-9]+}}_2:
14+
; CHECK-NEXT: ld %s0, 184(,%s11)
15+
; CHECK-NEXT: or %s11, 0, %s9
16+
%addr = alloca double, align 2
17+
%1 = load double, double* %addr, align 2
18+
ret double %1
19+
}
20+
21+
; Function Attrs: norecurse nounwind readonly
22+
define float @loadf32stk() {
23+
; CHECK-LABEL: loadf32stk:
24+
; CHECK: .LBB{{[0-9]+}}_2:
25+
; CHECK-NEXT: ldu %s0, 188(,%s11)
26+
; CHECK-NEXT: or %s11, 0, %s9
27+
%addr = alloca float, align 2
28+
%1 = load float, float* %addr, align 2
29+
ret float %1
30+
}
31+
32+
; Function Attrs: norecurse nounwind readonly
33+
define i64 @loadi64stk() {
34+
; CHECK-LABEL: loadi64stk:
35+
; CHECK: .LBB{{[0-9]+}}_2:
36+
; CHECK-NEXT: ld %s0, 184(,%s11)
37+
; CHECK-NEXT: or %s11, 0, %s9
38+
%addr = alloca i64, align 2
39+
%1 = load i64, i64* %addr, align 2
40+
ret i64 %1
41+
}
42+
43+
; Function Attrs: norecurse nounwind readonly
44+
define i32 @loadi32stk() {
45+
; CHECK-LABEL: loadi32stk:
46+
; CHECK: .LBB{{[0-9]+}}_2:
47+
; CHECK-NEXT: ldl.sx %s0, 188(,%s11)
48+
; CHECK-NEXT: or %s11, 0, %s9
49+
%addr = alloca i32, align 2
50+
%1 = load i32, i32* %addr, align 2
51+
ret i32 %1
52+
}
53+
54+
; Function Attrs: norecurse nounwind readonly
55+
define i16 @loadi16stk() {
56+
; CHECK-LABEL: loadi16stk:
57+
; CHECK: .LBB{{[0-9]+}}_2:
58+
; CHECK-NEXT: ld2b.zx %s0, 190(,%s11)
59+
; CHECK-NEXT: or %s11, 0, %s9
60+
%addr = alloca i16, align 2
61+
%1 = load i16, i16* %addr, align 2
62+
ret i16 %1
63+
}
64+
65+
; Function Attrs: norecurse nounwind readonly
66+
define i8 @loadi8stk() {
67+
; CHECK-LABEL: loadi8stk:
68+
; CHECK: .LBB{{[0-9]+}}_2:
69+
; CHECK-NEXT: ld1b.zx %s0, 190(,%s11)
70+
; CHECK-NEXT: or %s11, 0, %s9
71+
%addr = alloca i8, align 2
72+
%1 = load i8, i8* %addr, align 2
73+
ret i8 %1
74+
}
75+
76+
; Function Attrs: norecurse nounwind readonly
77+
define double @loadf64com() {
78+
; CHECK-LABEL: loadf64com:
79+
; CHECK: .LBB{{[0-9]+}}_2:
80+
; CHECK-NEXT: lea %s0, vf64@lo
81+
; CHECK-NEXT: and %s0, %s0, (32)0
82+
; CHECK-NEXT: lea.sl %s0, vf64@hi(%s0)
83+
; CHECK-NEXT: ld %s0, (,%s0)
84+
; CHECK-NEXT: or %s11, 0, %s9
85+
%1 = load double, double* @vf64, align 2
86+
ret double %1
87+
}
88+
89+
; Function Attrs: norecurse nounwind readonly
90+
define float @loadf32com() {
91+
; CHECK-LABEL: loadf32com:
92+
; CHECK: .LBB{{[0-9]+}}_2:
93+
; CHECK-NEXT: lea %s0, vf32@lo
94+
; CHECK-NEXT: and %s0, %s0, (32)0
95+
; CHECK-NEXT: lea.sl %s0, vf32@hi(%s0)
96+
; CHECK-NEXT: ldu %s0, (,%s0)
97+
; CHECK-NEXT: or %s11, 0, %s9
98+
%1 = load float, float* @vf32, align 2
99+
ret float %1
100+
}
101+
102+
; Function Attrs: norecurse nounwind readonly
103+
define i64 @loadi64com() {
104+
; CHECK-LABEL: loadi64com:
105+
; CHECK: .LBB{{[0-9]+}}_2:
106+
; CHECK-NEXT: lea %s0, vi64@lo
107+
; CHECK-NEXT: and %s0, %s0, (32)0
108+
; CHECK-NEXT: lea.sl %s0, vi64@hi(%s0)
109+
; CHECK-NEXT: ld %s0, (,%s0)
110+
; CHECK-NEXT: or %s11, 0, %s9
111+
%1 = load i64, i64* @vi64, align 2
112+
ret i64 %1
113+
}
114+
115+
; Function Attrs: norecurse nounwind readonly
116+
define i32 @loadi32com() {
117+
; CHECK-LABEL: loadi32com:
118+
; CHECK: .LBB{{[0-9]+}}_2:
119+
; CHECK-NEXT: lea %s0, vi32@lo
120+
; CHECK-NEXT: and %s0, %s0, (32)0
121+
; CHECK-NEXT: lea.sl %s0, vi32@hi(%s0)
122+
; CHECK-NEXT: ldl.sx %s0, (,%s0)
123+
; CHECK-NEXT: or %s11, 0, %s9
124+
%1 = load i32, i32* @vi32, align 2
125+
ret i32 %1
126+
}
127+
128+
; Function Attrs: norecurse nounwind readonly
129+
define i16 @loadi16com() {
130+
; CHECK-LABEL: loadi16com:
131+
; CHECK: .LBB{{[0-9]+}}_2:
132+
; CHECK-NEXT: lea %s0, vi16@lo
133+
; CHECK-NEXT: and %s0, %s0, (32)0
134+
; CHECK-NEXT: lea.sl %s0, vi16@hi(%s0)
135+
; CHECK-NEXT: ld2b.zx %s0, (,%s0)
136+
; CHECK-NEXT: or %s11, 0, %s9
137+
%1 = load i16, i16* @vi16, align 2
138+
ret i16 %1
139+
}
140+
141+
; Function Attrs: norecurse nounwind readonly
142+
define i8 @loadi8com() {
143+
; CHECK-LABEL: loadi8com:
144+
; CHECK: .LBB{{[0-9]+}}_2:
145+
; CHECK-NEXT: lea %s0, vi8@lo
146+
; CHECK-NEXT: and %s0, %s0, (32)0
147+
; CHECK-NEXT: lea.sl %s0, vi8@hi(%s0)
148+
; CHECK-NEXT: ld1b.zx %s0, (,%s0)
149+
; CHECK-NEXT: or %s11, 0, %s9
150+
%1 = load i8, i8* @vi8, align 2
151+
ret i8 %1
152+
}
153+

0 commit comments

Comments
 (0)