16#include "TargetInfo.h"
18#include "llvm/IR/InlineAsm.h"
19#include "llvm/IR/IntrinsicsAArch64.h"
20#include "llvm/IR/IntrinsicsARM.h"
21#include "llvm/IR/IntrinsicsBPF.h"
22#include "llvm/TargetParser/AArch64TargetParser.h"
27using namespace CodeGen;
30static std::optional<CodeGenFunction::MSVCIntrin>
36 case clang::AArch64::BI_BitScanForward:
37 case clang::AArch64::BI_BitScanForward64:
38 return MSVCIntrin::_BitScanForward;
39 case clang::AArch64::BI_BitScanReverse:
40 case clang::AArch64::BI_BitScanReverse64:
41 return MSVCIntrin::_BitScanReverse;
42 case clang::AArch64::BI_InterlockedAnd64:
43 return MSVCIntrin::_InterlockedAnd;
44 case clang::AArch64::BI_InterlockedExchange64:
45 return MSVCIntrin::_InterlockedExchange;
46 case clang::AArch64::BI_InterlockedExchangeAdd64:
47 return MSVCIntrin::_InterlockedExchangeAdd;
48 case clang::AArch64::BI_InterlockedExchangeSub64:
49 return MSVCIntrin::_InterlockedExchangeSub;
50 case clang::AArch64::BI_InterlockedOr64:
51 return MSVCIntrin::_InterlockedOr;
52 case clang::AArch64::BI_InterlockedXor64:
53 return MSVCIntrin::_InterlockedXor;
54 case clang::AArch64::BI_InterlockedDecrement64:
55 return MSVCIntrin::_InterlockedDecrement;
56 case clang::AArch64::BI_InterlockedIncrement64:
57 return MSVCIntrin::_InterlockedIncrement;
58 case clang::AArch64::BI_InterlockedExchangeAdd8_acq:
59 case clang::AArch64::BI_InterlockedExchangeAdd16_acq:
60 case clang::AArch64::BI_InterlockedExchangeAdd_acq:
61 case clang::AArch64::BI_InterlockedExchangeAdd64_acq:
62 return MSVCIntrin::_InterlockedExchangeAdd_acq;
63 case clang::AArch64::BI_InterlockedExchangeAdd8_rel:
64 case clang::AArch64::BI_InterlockedExchangeAdd16_rel:
65 case clang::AArch64::BI_InterlockedExchangeAdd_rel:
66 case clang::AArch64::BI_InterlockedExchangeAdd64_rel:
67 return MSVCIntrin::_InterlockedExchangeAdd_rel;
68 case clang::AArch64::BI_InterlockedExchangeAdd8_nf:
69 case clang::AArch64::BI_InterlockedExchangeAdd16_nf:
70 case clang::AArch64::BI_InterlockedExchangeAdd_nf:
71 case clang::AArch64::BI_InterlockedExchangeAdd64_nf:
72 return MSVCIntrin::_InterlockedExchangeAdd_nf;
73 case clang::AArch64::BI_InterlockedExchange8_acq:
74 case clang::AArch64::BI_InterlockedExchange16_acq:
75 case clang::AArch64::BI_InterlockedExchange_acq:
76 case clang::AArch64::BI_InterlockedExchange64_acq:
77 case clang::AArch64::BI_InterlockedExchangePointer_acq:
78 return MSVCIntrin::_InterlockedExchange_acq;
79 case clang::AArch64::BI_InterlockedExchange8_rel:
80 case clang::AArch64::BI_InterlockedExchange16_rel:
81 case clang::AArch64::BI_InterlockedExchange_rel:
82 case clang::AArch64::BI_InterlockedExchange64_rel:
83 case clang::AArch64::BI_InterlockedExchangePointer_rel:
84 return MSVCIntrin::_InterlockedExchange_rel;
85 case clang::AArch64::BI_InterlockedExchange8_nf:
86 case clang::AArch64::BI_InterlockedExchange16_nf:
87 case clang::AArch64::BI_InterlockedExchange_nf:
88 case clang::AArch64::BI_InterlockedExchange64_nf:
89 case clang::AArch64::BI_InterlockedExchangePointer_nf:
90 return MSVCIntrin::_InterlockedExchange_nf;
91 case clang::AArch64::BI_InterlockedCompareExchange8_acq:
92 case clang::AArch64::BI_InterlockedCompareExchange16_acq:
93 case clang::AArch64::BI_InterlockedCompareExchange_acq:
94 case clang::AArch64::BI_InterlockedCompareExchange64_acq:
95 case clang::AArch64::BI_InterlockedCompareExchangePointer_acq:
96 return MSVCIntrin::_InterlockedCompareExchange_acq;
97 case clang::AArch64::BI_InterlockedCompareExchange8_rel:
98 case clang::AArch64::BI_InterlockedCompareExchange16_rel:
99 case clang::AArch64::BI_InterlockedCompareExchange_rel:
100 case clang::AArch64::BI_InterlockedCompareExchange64_rel:
101 case clang::AArch64::BI_InterlockedCompareExchangePointer_rel:
102 return MSVCIntrin::_InterlockedCompareExchange_rel;
103 case clang::AArch64::BI_InterlockedCompareExchange8_nf:
104 case clang::AArch64::BI_InterlockedCompareExchange16_nf:
105 case clang::AArch64::BI_InterlockedCompareExchange_nf:
106 case clang::AArch64::BI_InterlockedCompareExchange64_nf:
107 return MSVCIntrin::_InterlockedCompareExchange_nf;
108 case clang::AArch64::BI_InterlockedCompareExchange128:
109 return MSVCIntrin::_InterlockedCompareExchange128;
110 case clang::AArch64::BI_InterlockedCompareExchange128_acq:
111 return MSVCIntrin::_InterlockedCompareExchange128_acq;
112 case clang::AArch64::BI_InterlockedCompareExchange128_nf:
113 return MSVCIntrin::_InterlockedCompareExchange128_nf;
114 case clang::AArch64::BI_InterlockedCompareExchange128_rel:
115 return MSVCIntrin::_InterlockedCompareExchange128_rel;
116 case clang::AArch64::BI_InterlockedOr8_acq:
117 case clang::AArch64::BI_InterlockedOr16_acq:
118 case clang::AArch64::BI_InterlockedOr_acq:
119 case clang::AArch64::BI_InterlockedOr64_acq:
120 return MSVCIntrin::_InterlockedOr_acq;
121 case clang::AArch64::BI_InterlockedOr8_rel:
122 case clang::AArch64::BI_InterlockedOr16_rel:
123 case clang::AArch64::BI_InterlockedOr_rel:
124 case clang::AArch64::BI_InterlockedOr64_rel:
125 return MSVCIntrin::_InterlockedOr_rel;
126 case clang::AArch64::BI_InterlockedOr8_nf:
127 case clang::AArch64::BI_InterlockedOr16_nf:
128 case clang::AArch64::BI_InterlockedOr_nf:
129 case clang::AArch64::BI_InterlockedOr64_nf:
130 return MSVCIntrin::_InterlockedOr_nf;
131 case clang::AArch64::BI_InterlockedXor8_acq:
132 case clang::AArch64::BI_InterlockedXor16_acq:
133 case clang::AArch64::BI_InterlockedXor_acq:
134 case clang::AArch64::BI_InterlockedXor64_acq:
135 return MSVCIntrin::_InterlockedXor_acq;
136 case clang::AArch64::BI_InterlockedXor8_rel:
137 case clang::AArch64::BI_InterlockedXor16_rel:
138 case clang::AArch64::BI_InterlockedXor_rel:
139 case clang::AArch64::BI_InterlockedXor64_rel:
140 return MSVCIntrin::_InterlockedXor_rel;
141 case clang::AArch64::BI_InterlockedXor8_nf:
142 case clang::AArch64::BI_InterlockedXor16_nf:
143 case clang::AArch64::BI_InterlockedXor_nf:
144 case clang::AArch64::BI_InterlockedXor64_nf:
145 return MSVCIntrin::_InterlockedXor_nf;
146 case clang::AArch64::BI_InterlockedAnd8_acq:
147 case clang::AArch64::BI_InterlockedAnd16_acq:
148 case clang::AArch64::BI_InterlockedAnd_acq:
149 case clang::AArch64::BI_InterlockedAnd64_acq:
150 return MSVCIntrin::_InterlockedAnd_acq;
151 case clang::AArch64::BI_InterlockedAnd8_rel:
152 case clang::AArch64::BI_InterlockedAnd16_rel:
153 case clang::AArch64::BI_InterlockedAnd_rel:
154 case clang::AArch64::BI_InterlockedAnd64_rel:
155 return MSVCIntrin::_InterlockedAnd_rel;
156 case clang::AArch64::BI_InterlockedAnd8_nf:
157 case clang::AArch64::BI_InterlockedAnd16_nf:
158 case clang::AArch64::BI_InterlockedAnd_nf:
159 case clang::AArch64::BI_InterlockedAnd64_nf:
160 return MSVCIntrin::_InterlockedAnd_nf;
161 case clang::AArch64::BI_InterlockedIncrement16_acq:
162 case clang::AArch64::BI_InterlockedIncrement_acq:
163 case clang::AArch64::BI_InterlockedIncrement64_acq:
164 return MSVCIntrin::_InterlockedIncrement_acq;
165 case clang::AArch64::BI_InterlockedIncrement16_rel:
166 case clang::AArch64::BI_InterlockedIncrement_rel:
167 case clang::AArch64::BI_InterlockedIncrement64_rel:
168 return MSVCIntrin::_InterlockedIncrement_rel;
169 case clang::AArch64::BI_InterlockedIncrement16_nf:
170 case clang::AArch64::BI_InterlockedIncrement_nf:
171 case clang::AArch64::BI_InterlockedIncrement64_nf:
172 return MSVCIntrin::_InterlockedIncrement_nf;
173 case clang::AArch64::BI_InterlockedDecrement16_acq:
174 case clang::AArch64::BI_InterlockedDecrement_acq:
175 case clang::AArch64::BI_InterlockedDecrement64_acq:
176 return MSVCIntrin::_InterlockedDecrement_acq;
177 case clang::AArch64::BI_InterlockedDecrement16_rel:
178 case clang::AArch64::BI_InterlockedDecrement_rel:
179 case clang::AArch64::BI_InterlockedDecrement64_rel:
180 return MSVCIntrin::_InterlockedDecrement_rel;
181 case clang::AArch64::BI_InterlockedDecrement16_nf:
182 case clang::AArch64::BI_InterlockedDecrement_nf:
183 case clang::AArch64::BI_InterlockedDecrement64_nf:
184 return MSVCIntrin::_InterlockedDecrement_nf;
186 llvm_unreachable(
"must return from switch");
189static std::optional<CodeGenFunction::MSVCIntrin>
195 case clang::ARM::BI_BitScanForward:
196 case clang::ARM::BI_BitScanForward64:
197 return MSVCIntrin::_BitScanForward;
198 case clang::ARM::BI_BitScanReverse:
199 case clang::ARM::BI_BitScanReverse64:
200 return MSVCIntrin::_BitScanReverse;
201 case clang::ARM::BI_InterlockedAnd64:
202 return MSVCIntrin::_InterlockedAnd;
203 case clang::ARM::BI_InterlockedExchange64:
204 return MSVCIntrin::_InterlockedExchange;
205 case clang::ARM::BI_InterlockedExchangeAdd64:
206 return MSVCIntrin::_InterlockedExchangeAdd;
207 case clang::ARM::BI_InterlockedExchangeSub64:
208 return MSVCIntrin::_InterlockedExchangeSub;
209 case clang::ARM::BI_InterlockedOr64:
210 return MSVCIntrin::_InterlockedOr;
211 case clang::ARM::BI_InterlockedXor64:
212 return MSVCIntrin::_InterlockedXor;
213 case clang::ARM::BI_InterlockedDecrement64:
214 return MSVCIntrin::_InterlockedDecrement;
215 case clang::ARM::BI_InterlockedIncrement64:
216 return MSVCIntrin::_InterlockedIncrement;
217 case clang::ARM::BI_InterlockedExchangeAdd8_acq:
218 case clang::ARM::BI_InterlockedExchangeAdd16_acq:
219 case clang::ARM::BI_InterlockedExchangeAdd_acq:
220 case clang::ARM::BI_InterlockedExchangeAdd64_acq:
221 return MSVCIntrin::_InterlockedExchangeAdd_acq;
222 case clang::ARM::BI_InterlockedExchangeAdd8_rel:
223 case clang::ARM::BI_InterlockedExchangeAdd16_rel:
224 case clang::ARM::BI_InterlockedExchangeAdd_rel:
225 case clang::ARM::BI_InterlockedExchangeAdd64_rel:
226 return MSVCIntrin::_InterlockedExchangeAdd_rel;
227 case clang::ARM::BI_InterlockedExchangeAdd8_nf:
228 case clang::ARM::BI_InterlockedExchangeAdd16_nf:
229 case clang::ARM::BI_InterlockedExchangeAdd_nf:
230 case clang::ARM::BI_InterlockedExchangeAdd64_nf:
231 return MSVCIntrin::_InterlockedExchangeAdd_nf;
232 case clang::ARM::BI_InterlockedExchange8_acq:
233 case clang::ARM::BI_InterlockedExchange16_acq:
234 case clang::ARM::BI_InterlockedExchange_acq:
235 case clang::ARM::BI_InterlockedExchange64_acq:
236 case clang::ARM::BI_InterlockedExchangePointer_acq:
237 return MSVCIntrin::_InterlockedExchange_acq;
238 case clang::ARM::BI_InterlockedExchange8_rel:
239 case clang::ARM::BI_InterlockedExchange16_rel:
240 case clang::ARM::BI_InterlockedExchange_rel:
241 case clang::ARM::BI_InterlockedExchange64_rel:
242 case clang::ARM::BI_InterlockedExchangePointer_rel:
243 return MSVCIntrin::_InterlockedExchange_rel;
244 case clang::ARM::BI_InterlockedExchange8_nf:
245 case clang::ARM::BI_InterlockedExchange16_nf:
246 case clang::ARM::BI_InterlockedExchange_nf:
247 case clang::ARM::BI_InterlockedExchange64_nf:
248 case clang::ARM::BI_InterlockedExchangePointer_nf:
249 return MSVCIntrin::_InterlockedExchange_nf;
250 case clang::ARM::BI_InterlockedCompareExchange8_acq:
251 case clang::ARM::BI_InterlockedCompareExchange16_acq:
252 case clang::ARM::BI_InterlockedCompareExchange_acq:
253 case clang::ARM::BI_InterlockedCompareExchange64_acq:
254 case clang::ARM::BI_InterlockedCompareExchangePointer_acq:
255 return MSVCIntrin::_InterlockedCompareExchange_acq;
256 case clang::ARM::BI_InterlockedCompareExchange8_rel:
257 case clang::ARM::BI_InterlockedCompareExchange16_rel:
258 case clang::ARM::BI_InterlockedCompareExchange_rel:
259 case clang::ARM::BI_InterlockedCompareExchange64_rel:
260 case clang::ARM::BI_InterlockedCompareExchangePointer_rel:
261 return MSVCIntrin::_InterlockedCompareExchange_rel;
262 case clang::ARM::BI_InterlockedCompareExchange8_nf:
263 case clang::ARM::BI_InterlockedCompareExchange16_nf:
264 case clang::ARM::BI_InterlockedCompareExchange_nf:
265 case clang::ARM::BI_InterlockedCompareExchange64_nf:
266 return MSVCIntrin::_InterlockedCompareExchange_nf;
267 case clang::ARM::BI_InterlockedOr8_acq:
268 case clang::ARM::BI_InterlockedOr16_acq:
269 case clang::ARM::BI_InterlockedOr_acq:
270 case clang::ARM::BI_InterlockedOr64_acq:
271 return MSVCIntrin::_InterlockedOr_acq;
272 case clang::ARM::BI_InterlockedOr8_rel:
273 case clang::ARM::BI_InterlockedOr16_rel:
274 case clang::ARM::BI_InterlockedOr_rel:
275 case clang::ARM::BI_InterlockedOr64_rel:
276 return MSVCIntrin::_InterlockedOr_rel;
277 case clang::ARM::BI_InterlockedOr8_nf:
278 case clang::ARM::BI_InterlockedOr16_nf:
279 case clang::ARM::BI_InterlockedOr_nf:
280 case clang::ARM::BI_InterlockedOr64_nf:
281 return MSVCIntrin::_InterlockedOr_nf;
282 case clang::ARM::BI_InterlockedXor8_acq:
283 case clang::ARM::BI_InterlockedXor16_acq:
284 case clang::ARM::BI_InterlockedXor_acq:
285 case clang::ARM::BI_InterlockedXor64_acq:
286 return MSVCIntrin::_InterlockedXor_acq;
287 case clang::ARM::BI_InterlockedXor8_rel:
288 case clang::ARM::BI_InterlockedXor16_rel:
289 case clang::ARM::BI_InterlockedXor_rel:
290 case clang::ARM::BI_InterlockedXor64_rel:
291 return MSVCIntrin::_InterlockedXor_rel;
292 case clang::ARM::BI_InterlockedXor8_nf:
293 case clang::ARM::BI_InterlockedXor16_nf:
294 case clang::ARM::BI_InterlockedXor_nf:
295 case clang::ARM::BI_InterlockedXor64_nf:
296 return MSVCIntrin::_InterlockedXor_nf;
297 case clang::ARM::BI_InterlockedAnd8_acq:
298 case clang::ARM::BI_InterlockedAnd16_acq:
299 case clang::ARM::BI_InterlockedAnd_acq:
300 case clang::ARM::BI_InterlockedAnd64_acq:
301 return MSVCIntrin::_InterlockedAnd_acq;
302 case clang::ARM::BI_InterlockedAnd8_rel:
303 case clang::ARM::BI_InterlockedAnd16_rel:
304 case clang::ARM::BI_InterlockedAnd_rel:
305 case clang::ARM::BI_InterlockedAnd64_rel:
306 return MSVCIntrin::_InterlockedAnd_rel;
307 case clang::ARM::BI_InterlockedAnd8_nf:
308 case clang::ARM::BI_InterlockedAnd16_nf:
309 case clang::ARM::BI_InterlockedAnd_nf:
310 case clang::ARM::BI_InterlockedAnd64_nf:
311 return MSVCIntrin::_InterlockedAnd_nf;
312 case clang::ARM::BI_InterlockedIncrement16_acq:
313 case clang::ARM::BI_InterlockedIncrement_acq:
314 case clang::ARM::BI_InterlockedIncrement64_acq:
315 return MSVCIntrin::_InterlockedIncrement_acq;
316 case clang::ARM::BI_InterlockedIncrement16_rel:
317 case clang::ARM::BI_InterlockedIncrement_rel:
318 case clang::ARM::BI_InterlockedIncrement64_rel:
319 return MSVCIntrin::_InterlockedIncrement_rel;
320 case clang::ARM::BI_InterlockedIncrement16_nf:
321 case clang::ARM::BI_InterlockedIncrement_nf:
322 case clang::ARM::BI_InterlockedIncrement64_nf:
323 return MSVCIntrin::_InterlockedIncrement_nf;
324 case clang::ARM::BI_InterlockedDecrement16_acq:
325 case clang::ARM::BI_InterlockedDecrement_acq:
326 case clang::ARM::BI_InterlockedDecrement64_acq:
327 return MSVCIntrin::_InterlockedDecrement_acq;
328 case clang::ARM::BI_InterlockedDecrement16_rel:
329 case clang::ARM::BI_InterlockedDecrement_rel:
330 case clang::ARM::BI_InterlockedDecrement64_rel:
331 return MSVCIntrin::_InterlockedDecrement_rel;
332 case clang::ARM::BI_InterlockedDecrement16_nf:
333 case clang::ARM::BI_InterlockedDecrement_nf:
334 case clang::ARM::BI_InterlockedDecrement64_nf:
335 return MSVCIntrin::_InterlockedDecrement_nf;
337 llvm_unreachable(
"must return from switch");
343 unsigned IntrinsicID,
344 unsigned ConstrainedIntrinsicID,
348 if (CGF.
Builder.getIsFPConstrained())
353 if (CGF.
Builder.getIsFPConstrained())
354 return CGF.
Builder.CreateConstrainedFPCall(F, Args);
356 return CGF.
Builder.CreateCall(F, Args);
361 bool HasFastHalfType =
true,
363 bool AllowBFloatArgsAndRet =
true) {
364 int IsQuad = TypeFlags.
isQuad();
369 return llvm::FixedVectorType::get(CGF->
Int8Ty, V1Ty ? 1 : (8 << IsQuad));
372 return llvm::FixedVectorType::get(CGF->
Int16Ty, V1Ty ? 1 : (4 << IsQuad));
374 if (AllowBFloatArgsAndRet)
375 return llvm::FixedVectorType::get(CGF->
BFloatTy, V1Ty ? 1 : (4 << IsQuad));
377 return llvm::FixedVectorType::get(CGF->
Int16Ty, V1Ty ? 1 : (4 << IsQuad));
380 return llvm::FixedVectorType::get(CGF->
HalfTy, V1Ty ? 1 : (4 << IsQuad));
382 return llvm::FixedVectorType::get(CGF->
Int16Ty, V1Ty ? 1 : (4 << IsQuad));
384 return llvm::FixedVectorType::get(CGF->
Int32Ty, V1Ty ? 1 : (2 << IsQuad));
387 return llvm::FixedVectorType::get(CGF->
Int64Ty, V1Ty ? 1 : (1 << IsQuad));
392 return llvm::FixedVectorType::get(CGF->
Int8Ty, 16);
394 return llvm::FixedVectorType::get(CGF->
FloatTy, V1Ty ? 1 : (2 << IsQuad));
396 return llvm::FixedVectorType::get(CGF->
DoubleTy, V1Ty ? 1 : (1 << IsQuad));
398 llvm_unreachable(
"Unknown vector element type!");
403 int IsQuad = IntTypeFlags.
isQuad();
406 return llvm::FixedVectorType::get(CGF->
HalfTy, (4 << IsQuad));
408 return llvm::FixedVectorType::get(CGF->
FloatTy, (2 << IsQuad));
410 return llvm::FixedVectorType::get(CGF->
DoubleTy, (1 << IsQuad));
412 llvm_unreachable(
"Type can't be converted to floating-point!");
417 const ElementCount &Count) {
418 Value *SV = llvm::ConstantVector::getSplat(Count,
C);
419 return Builder.CreateShuffleVector(
V,
V, SV,
"lane");
423 ElementCount EC = cast<llvm::VectorType>(
V->getType())->getElementCount();
429 unsigned shift,
bool rightshift) {
431 for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
432 ai != ae; ++ai, ++j) {
433 if (F->isConstrainedFPIntrinsic())
434 if (ai->getType()->isMetadataTy())
436 if (shift > 0 && shift == j)
439 Ops[j] =
Builder.CreateBitCast(Ops[j], ai->getType(), name);
442 if (F->isConstrainedFPIntrinsic())
443 return Builder.CreateConstrainedFPCall(F, Ops, name);
445 return Builder.CreateCall(F, Ops, name);
459 unsigned IID,
bool ExtendLaneArg, llvm::Type *RetTy,
462 const unsigned ElemCount = Ops[0]->getType()->getPrimitiveSizeInBits() /
463 RetTy->getPrimitiveSizeInBits();
464 llvm::Type *Tys[] = {llvm::FixedVectorType::get(RetTy, ElemCount),
467 auto *VT = llvm::FixedVectorType::get(
Int8Ty, 16);
468 Ops[2] =
Builder.CreateInsertVector(VT, PoisonValue::get(VT), Ops[2],
475 unsigned IID,
bool ExtendLaneArg, llvm::Type *RetTy,
479 auto *VT = llvm::FixedVectorType::get(
Int8Ty, 16);
480 Ops[2] =
Builder.CreateInsertVector(VT, PoisonValue::get(VT), Ops[2],
483 const unsigned ElemCount = Ops[0]->getType()->getPrimitiveSizeInBits() /
484 RetTy->getPrimitiveSizeInBits();
485 return EmitFP8NeonCall(IID, {llvm::FixedVectorType::get(RetTy, ElemCount)},
491 int SV = cast<ConstantInt>(
V)->getSExtValue();
492 return ConstantInt::get(Ty, neg ? -SV : SV);
496 llvm::Type *Ty1,
bool Extract,
500 llvm::Type *Tys[] = {Ty0, Ty1};
504 Tys[1] = llvm::FixedVectorType::get(
Int8Ty, 8);
505 Ops[0] =
Builder.CreateExtractVector(Tys[1], Ops[0], uint64_t(0));
512 llvm::Type *Ty,
bool usgn,
514 llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
516 int ShiftAmt = cast<ConstantInt>(Shift)->getSExtValue();
517 int EltSize = VTy->getScalarSizeInBits();
519 Vec =
Builder.CreateBitCast(Vec, Ty);
523 if (ShiftAmt == EltSize) {
526 return llvm::ConstantAggregateZero::get(VTy);
531 Shift = ConstantInt::get(VTy->getElementType(), ShiftAmt);
537 return Builder.CreateLShr(Vec, Shift, name);
539 return Builder.CreateAShr(Vec, Shift, name);
565struct ARMVectorIntrinsicInfo {
566 const char *NameHint;
568 unsigned LLVMIntrinsic;
569 unsigned AltLLVMIntrinsic;
572 bool operator<(
unsigned RHSBuiltinID)
const {
573 return BuiltinID < RHSBuiltinID;
575 bool operator<(
const ARMVectorIntrinsicInfo &TE)
const {
576 return BuiltinID < TE.BuiltinID;
581#define NEONMAP0(NameBase) \
582 { #NameBase, NEON::BI__builtin_neon_ ## NameBase, 0, 0, 0 }
584#define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
585 { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
586 Intrinsic::LLVMIntrinsic, 0, TypeModifier }
588#define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier) \
589 { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
590 Intrinsic::LLVMIntrinsic, Intrinsic::AltLLVMIntrinsic, \
594 NEONMAP1(__a32_vcvt_bf16_f32, arm_neon_vcvtfp2bf, 0),
602 NEONMAP1(vabsq_v, arm_neon_vabs, 0),
606 NEONMAP1(vaesdq_u8, arm_neon_aesd, 0),
607 NEONMAP1(vaeseq_u8, arm_neon_aese, 0),
608 NEONMAP1(vaesimcq_u8, arm_neon_aesimc, 0),
609 NEONMAP1(vaesmcq_u8, arm_neon_aesmc, 0),
610 NEONMAP1(vbfdot_f32, arm_neon_bfdot, 0),
611 NEONMAP1(vbfdotq_f32, arm_neon_bfdot, 0),
612 NEONMAP1(vbfmlalbq_f32, arm_neon_bfmlalb, 0),
613 NEONMAP1(vbfmlaltq_f32, arm_neon_bfmlalt, 0),
614 NEONMAP1(vbfmmlaq_f32, arm_neon_bfmmla, 0),
627 NEONMAP1(vcage_v, arm_neon_vacge, 0),
628 NEONMAP1(vcageq_v, arm_neon_vacge, 0),
629 NEONMAP1(vcagt_v, arm_neon_vacgt, 0),
630 NEONMAP1(vcagtq_v, arm_neon_vacgt, 0),
631 NEONMAP1(vcale_v, arm_neon_vacge, 0),
632 NEONMAP1(vcaleq_v, arm_neon_vacge, 0),
633 NEONMAP1(vcalt_v, arm_neon_vacgt, 0),
634 NEONMAP1(vcaltq_v, arm_neon_vacgt, 0),
651 NEONMAP1(vcvt_f16_f32, arm_neon_vcvtfp2hf, 0),
654 NEONMAP1(vcvt_f32_f16, arm_neon_vcvthf2fp, 0),
656 NEONMAP1(vcvt_n_f16_s16, arm_neon_vcvtfxs2fp, 0),
657 NEONMAP1(vcvt_n_f16_u16, arm_neon_vcvtfxu2fp, 0),
658 NEONMAP2(vcvt_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
659 NEONMAP1(vcvt_n_s16_f16, arm_neon_vcvtfp2fxs, 0),
660 NEONMAP1(vcvt_n_s32_v, arm_neon_vcvtfp2fxs, 0),
661 NEONMAP1(vcvt_n_s64_v, arm_neon_vcvtfp2fxs, 0),
662 NEONMAP1(vcvt_n_u16_f16, arm_neon_vcvtfp2fxu, 0),
663 NEONMAP1(vcvt_n_u32_v, arm_neon_vcvtfp2fxu, 0),
664 NEONMAP1(vcvt_n_u64_v, arm_neon_vcvtfp2fxu, 0),
671 NEONMAP1(vcvta_s16_f16, arm_neon_vcvtas, 0),
672 NEONMAP1(vcvta_s32_v, arm_neon_vcvtas, 0),
673 NEONMAP1(vcvta_s64_v, arm_neon_vcvtas, 0),
674 NEONMAP1(vcvta_u16_f16, arm_neon_vcvtau, 0),
675 NEONMAP1(vcvta_u32_v, arm_neon_vcvtau, 0),
676 NEONMAP1(vcvta_u64_v, arm_neon_vcvtau, 0),
677 NEONMAP1(vcvtaq_s16_f16, arm_neon_vcvtas, 0),
678 NEONMAP1(vcvtaq_s32_v, arm_neon_vcvtas, 0),
679 NEONMAP1(vcvtaq_s64_v, arm_neon_vcvtas, 0),
680 NEONMAP1(vcvtaq_u16_f16, arm_neon_vcvtau, 0),
681 NEONMAP1(vcvtaq_u32_v, arm_neon_vcvtau, 0),
682 NEONMAP1(vcvtaq_u64_v, arm_neon_vcvtau, 0),
683 NEONMAP1(vcvth_bf16_f32, arm_neon_vcvtbfp2bf, 0),
684 NEONMAP1(vcvtm_s16_f16, arm_neon_vcvtms, 0),
685 NEONMAP1(vcvtm_s32_v, arm_neon_vcvtms, 0),
686 NEONMAP1(vcvtm_s64_v, arm_neon_vcvtms, 0),
687 NEONMAP1(vcvtm_u16_f16, arm_neon_vcvtmu, 0),
688 NEONMAP1(vcvtm_u32_v, arm_neon_vcvtmu, 0),
689 NEONMAP1(vcvtm_u64_v, arm_neon_vcvtmu, 0),
690 NEONMAP1(vcvtmq_s16_f16, arm_neon_vcvtms, 0),
691 NEONMAP1(vcvtmq_s32_v, arm_neon_vcvtms, 0),
692 NEONMAP1(vcvtmq_s64_v, arm_neon_vcvtms, 0),
693 NEONMAP1(vcvtmq_u16_f16, arm_neon_vcvtmu, 0),
694 NEONMAP1(vcvtmq_u32_v, arm_neon_vcvtmu, 0),
695 NEONMAP1(vcvtmq_u64_v, arm_neon_vcvtmu, 0),
696 NEONMAP1(vcvtn_s16_f16, arm_neon_vcvtns, 0),
697 NEONMAP1(vcvtn_s32_v, arm_neon_vcvtns, 0),
698 NEONMAP1(vcvtn_s64_v, arm_neon_vcvtns, 0),
699 NEONMAP1(vcvtn_u16_f16, arm_neon_vcvtnu, 0),
700 NEONMAP1(vcvtn_u32_v, arm_neon_vcvtnu, 0),
701 NEONMAP1(vcvtn_u64_v, arm_neon_vcvtnu, 0),
702 NEONMAP1(vcvtnq_s16_f16, arm_neon_vcvtns, 0),
703 NEONMAP1(vcvtnq_s32_v, arm_neon_vcvtns, 0),
704 NEONMAP1(vcvtnq_s64_v, arm_neon_vcvtns, 0),
705 NEONMAP1(vcvtnq_u16_f16, arm_neon_vcvtnu, 0),
706 NEONMAP1(vcvtnq_u32_v, arm_neon_vcvtnu, 0),
707 NEONMAP1(vcvtnq_u64_v, arm_neon_vcvtnu, 0),
708 NEONMAP1(vcvtp_s16_f16, arm_neon_vcvtps, 0),
709 NEONMAP1(vcvtp_s32_v, arm_neon_vcvtps, 0),
710 NEONMAP1(vcvtp_s64_v, arm_neon_vcvtps, 0),
711 NEONMAP1(vcvtp_u16_f16, arm_neon_vcvtpu, 0),
712 NEONMAP1(vcvtp_u32_v, arm_neon_vcvtpu, 0),
713 NEONMAP1(vcvtp_u64_v, arm_neon_vcvtpu, 0),
714 NEONMAP1(vcvtpq_s16_f16, arm_neon_vcvtps, 0),
715 NEONMAP1(vcvtpq_s32_v, arm_neon_vcvtps, 0),
716 NEONMAP1(vcvtpq_s64_v, arm_neon_vcvtps, 0),
717 NEONMAP1(vcvtpq_u16_f16, arm_neon_vcvtpu, 0),
718 NEONMAP1(vcvtpq_u32_v, arm_neon_vcvtpu, 0),
719 NEONMAP1(vcvtpq_u64_v, arm_neon_vcvtpu, 0),
723 NEONMAP1(vcvtq_n_f16_s16, arm_neon_vcvtfxs2fp, 0),
724 NEONMAP1(vcvtq_n_f16_u16, arm_neon_vcvtfxu2fp, 0),
725 NEONMAP2(vcvtq_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
726 NEONMAP1(vcvtq_n_s16_f16, arm_neon_vcvtfp2fxs, 0),
727 NEONMAP1(vcvtq_n_s32_v, arm_neon_vcvtfp2fxs, 0),
728 NEONMAP1(vcvtq_n_s64_v, arm_neon_vcvtfp2fxs, 0),
729 NEONMAP1(vcvtq_n_u16_f16, arm_neon_vcvtfp2fxu, 0),
730 NEONMAP1(vcvtq_n_u32_v, arm_neon_vcvtfp2fxu, 0),
731 NEONMAP1(vcvtq_n_u64_v, arm_neon_vcvtfp2fxu, 0),
738 NEONMAP1(vdot_s32, arm_neon_sdot, 0),
739 NEONMAP1(vdot_u32, arm_neon_udot, 0),
740 NEONMAP1(vdotq_s32, arm_neon_sdot, 0),
741 NEONMAP1(vdotq_u32, arm_neon_udot, 0),
752 NEONMAP1(vld1_x2_v, arm_neon_vld1x2, 0),
753 NEONMAP1(vld1_x3_v, arm_neon_vld1x3, 0),
754 NEONMAP1(vld1_x4_v, arm_neon_vld1x4, 0),
756 NEONMAP1(vld1q_v, arm_neon_vld1, 0),
757 NEONMAP1(vld1q_x2_v, arm_neon_vld1x2, 0),
758 NEONMAP1(vld1q_x3_v, arm_neon_vld1x3, 0),
759 NEONMAP1(vld1q_x4_v, arm_neon_vld1x4, 0),
760 NEONMAP1(vld2_dup_v, arm_neon_vld2dup, 0),
761 NEONMAP1(vld2_lane_v, arm_neon_vld2lane, 0),
763 NEONMAP1(vld2q_dup_v, arm_neon_vld2dup, 0),
764 NEONMAP1(vld2q_lane_v, arm_neon_vld2lane, 0),
765 NEONMAP1(vld2q_v, arm_neon_vld2, 0),
766 NEONMAP1(vld3_dup_v, arm_neon_vld3dup, 0),
767 NEONMAP1(vld3_lane_v, arm_neon_vld3lane, 0),
769 NEONMAP1(vld3q_dup_v, arm_neon_vld3dup, 0),
770 NEONMAP1(vld3q_lane_v, arm_neon_vld3lane, 0),
771 NEONMAP1(vld3q_v, arm_neon_vld3, 0),
772 NEONMAP1(vld4_dup_v, arm_neon_vld4dup, 0),
773 NEONMAP1(vld4_lane_v, arm_neon_vld4lane, 0),
775 NEONMAP1(vld4q_dup_v, arm_neon_vld4dup, 0),
776 NEONMAP1(vld4q_lane_v, arm_neon_vld4lane, 0),
777 NEONMAP1(vld4q_v, arm_neon_vld4, 0),
786 NEONMAP1(vmmlaq_s32, arm_neon_smmla, 0),
787 NEONMAP1(vmmlaq_u32, arm_neon_ummla, 0),
805 NEONMAP2(vqdmlal_v, arm_neon_vqdmull, sadd_sat, 0),
806 NEONMAP2(vqdmlsl_v, arm_neon_vqdmull, ssub_sat, 0),
830 NEONMAP1(vqshlu_n_v, arm_neon_vqshiftsu, 0),
831 NEONMAP1(vqshluq_n_v, arm_neon_vqshiftsu, 0),
835 NEONMAP2(vrecpe_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
836 NEONMAP2(vrecpeq_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
859 NEONMAP2(vrsqrte_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
860 NEONMAP2(vrsqrteq_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
864 NEONMAP1(vsha1su0q_u32, arm_neon_sha1su0, 0),
865 NEONMAP1(vsha1su1q_u32, arm_neon_sha1su1, 0),
866 NEONMAP1(vsha256h2q_u32, arm_neon_sha256h2, 0),
867 NEONMAP1(vsha256hq_u32, arm_neon_sha256h, 0),
868 NEONMAP1(vsha256su0q_u32, arm_neon_sha256su0, 0),
869 NEONMAP1(vsha256su1q_u32, arm_neon_sha256su1, 0),
879 NEONMAP1(vst1_x2_v, arm_neon_vst1x2, 0),
880 NEONMAP1(vst1_x3_v, arm_neon_vst1x3, 0),
881 NEONMAP1(vst1_x4_v, arm_neon_vst1x4, 0),
882 NEONMAP1(vst1q_v, arm_neon_vst1, 0),
883 NEONMAP1(vst1q_x2_v, arm_neon_vst1x2, 0),
884 NEONMAP1(vst1q_x3_v, arm_neon_vst1x3, 0),
885 NEONMAP1(vst1q_x4_v, arm_neon_vst1x4, 0),
886 NEONMAP1(vst2_lane_v, arm_neon_vst2lane, 0),
888 NEONMAP1(vst2q_lane_v, arm_neon_vst2lane, 0),
889 NEONMAP1(vst2q_v, arm_neon_vst2, 0),
890 NEONMAP1(vst3_lane_v, arm_neon_vst3lane, 0),
892 NEONMAP1(vst3q_lane_v, arm_neon_vst3lane, 0),
893 NEONMAP1(vst3q_v, arm_neon_vst3, 0),
894 NEONMAP1(vst4_lane_v, arm_neon_vst4lane, 0),
896 NEONMAP1(vst4q_lane_v, arm_neon_vst4lane, 0),
897 NEONMAP1(vst4q_v, arm_neon_vst4, 0),
903 NEONMAP1(vusdot_s32, arm_neon_usdot, 0),
904 NEONMAP1(vusdotq_s32, arm_neon_usdot, 0),
905 NEONMAP1(vusmmlaq_s32, arm_neon_usmmla, 0),
917 NEONMAP1(vabs_v, aarch64_neon_abs, 0),
918 NEONMAP1(vabsq_v, aarch64_neon_abs, 0),
923 NEONMAP1(vaesdq_u8, aarch64_crypto_aesd, 0),
924 NEONMAP1(vaeseq_u8, aarch64_crypto_aese, 0),
925 NEONMAP1(vaesimcq_u8, aarch64_crypto_aesimc, 0),
926 NEONMAP1(vaesmcq_u8, aarch64_crypto_aesmc, 0),
935 NEONMAP1(vbfdot_f32, aarch64_neon_bfdot, 0),
936 NEONMAP1(vbfdotq_f32, aarch64_neon_bfdot, 0),
937 NEONMAP1(vbfmlalbq_f32, aarch64_neon_bfmlalb, 0),
938 NEONMAP1(vbfmlaltq_f32, aarch64_neon_bfmlalt, 0),
939 NEONMAP1(vbfmmlaq_f32, aarch64_neon_bfmmla, 0),
950 NEONMAP1(vcage_v, aarch64_neon_facge, 0),
951 NEONMAP1(vcageq_v, aarch64_neon_facge, 0),
952 NEONMAP1(vcagt_v, aarch64_neon_facgt, 0),
953 NEONMAP1(vcagtq_v, aarch64_neon_facgt, 0),
954 NEONMAP1(vcale_v, aarch64_neon_facge, 0),
955 NEONMAP1(vcaleq_v, aarch64_neon_facge, 0),
956 NEONMAP1(vcalt_v, aarch64_neon_facgt, 0),
957 NEONMAP1(vcaltq_v, aarch64_neon_facgt, 0),
994 NEONMAP1(vcvt_f16_f32, aarch64_neon_vcvtfp2hf, 0),
997 NEONMAP1(vcvt_f32_f16, aarch64_neon_vcvthf2fp, 0),
999 NEONMAP1(vcvt_n_f16_s16, aarch64_neon_vcvtfxs2fp, 0),
1000 NEONMAP1(vcvt_n_f16_u16, aarch64_neon_vcvtfxu2fp, 0),
1001 NEONMAP2(vcvt_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
1002 NEONMAP2(vcvt_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
1003 NEONMAP1(vcvt_n_s16_f16, aarch64_neon_vcvtfp2fxs, 0),
1004 NEONMAP1(vcvt_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
1005 NEONMAP1(vcvt_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
1006 NEONMAP1(vcvt_n_u16_f16, aarch64_neon_vcvtfp2fxu, 0),
1007 NEONMAP1(vcvt_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
1008 NEONMAP1(vcvt_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
1014 NEONMAP1(vcvtq_n_f16_s16, aarch64_neon_vcvtfxs2fp, 0),
1015 NEONMAP1(vcvtq_n_f16_u16, aarch64_neon_vcvtfxu2fp, 0),
1016 NEONMAP2(vcvtq_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
1017 NEONMAP2(vcvtq_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
1018 NEONMAP1(vcvtq_n_s16_f16, aarch64_neon_vcvtfp2fxs, 0),
1019 NEONMAP1(vcvtq_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
1020 NEONMAP1(vcvtq_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
1021 NEONMAP1(vcvtq_n_u16_f16, aarch64_neon_vcvtfp2fxu, 0),
1022 NEONMAP1(vcvtq_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
1023 NEONMAP1(vcvtq_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
1025 NEONMAP1(vdot_s32, aarch64_neon_sdot, 0),
1026 NEONMAP1(vdot_u32, aarch64_neon_udot, 0),
1027 NEONMAP1(vdotq_s32, aarch64_neon_sdot, 0),
1028 NEONMAP1(vdotq_u32, aarch64_neon_udot, 0),
1041 NEONMAP1(vfmlal_high_f16, aarch64_neon_fmlal2, 0),
1042 NEONMAP1(vfmlal_low_f16, aarch64_neon_fmlal, 0),
1043 NEONMAP1(vfmlalq_high_f16, aarch64_neon_fmlal2, 0),
1044 NEONMAP1(vfmlalq_low_f16, aarch64_neon_fmlal, 0),
1045 NEONMAP1(vfmlsl_high_f16, aarch64_neon_fmlsl2, 0),
1046 NEONMAP1(vfmlsl_low_f16, aarch64_neon_fmlsl, 0),
1047 NEONMAP1(vfmlslq_high_f16, aarch64_neon_fmlsl2, 0),
1048 NEONMAP1(vfmlslq_low_f16, aarch64_neon_fmlsl, 0),
1053 NEONMAP1(vld1_x2_v, aarch64_neon_ld1x2, 0),
1054 NEONMAP1(vld1_x3_v, aarch64_neon_ld1x3, 0),
1055 NEONMAP1(vld1_x4_v, aarch64_neon_ld1x4, 0),
1056 NEONMAP1(vld1q_x2_v, aarch64_neon_ld1x2, 0),
1057 NEONMAP1(vld1q_x3_v, aarch64_neon_ld1x3, 0),
1058 NEONMAP1(vld1q_x4_v, aarch64_neon_ld1x4, 0),
1059 NEONMAP1(vmmlaq_s32, aarch64_neon_smmla, 0),
1060 NEONMAP1(vmmlaq_u32, aarch64_neon_ummla, 0),
1073 NEONMAP2(vqdmlal_v, aarch64_neon_sqdmull, aarch64_neon_sqadd, 0),
1074 NEONMAP2(vqdmlsl_v, aarch64_neon_sqdmull, aarch64_neon_sqsub, 0),
1075 NEONMAP1(vqdmulh_lane_v, aarch64_neon_sqdmulh_lane, 0),
1076 NEONMAP1(vqdmulh_laneq_v, aarch64_neon_sqdmulh_laneq, 0),
1078 NEONMAP1(vqdmulhq_lane_v, aarch64_neon_sqdmulh_lane, 0),
1079 NEONMAP1(vqdmulhq_laneq_v, aarch64_neon_sqdmulh_laneq, 0),
1094 NEONMAP1(vqrdmulh_lane_v, aarch64_neon_sqrdmulh_lane, 0),
1095 NEONMAP1(vqrdmulh_laneq_v, aarch64_neon_sqrdmulh_laneq, 0),
1097 NEONMAP1(vqrdmulhq_lane_v, aarch64_neon_sqrdmulh_lane, 0),
1098 NEONMAP1(vqrdmulhq_laneq_v, aarch64_neon_sqrdmulh_laneq, 0),
1106 NEONMAP1(vqshlu_n_v, aarch64_neon_sqshlu, 0),
1107 NEONMAP1(vqshluq_n_v, aarch64_neon_sqshlu, 0),
1111 NEONMAP1(vrax1q_u64, aarch64_crypto_rax1, 0),
1112 NEONMAP2(vrecpe_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
1113 NEONMAP2(vrecpeq_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
1140 NEONMAP2(vrsqrte_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
1141 NEONMAP2(vrsqrteq_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
1145 NEONMAP1(vsha1su0q_u32, aarch64_crypto_sha1su0, 0),
1146 NEONMAP1(vsha1su1q_u32, aarch64_crypto_sha1su1, 0),
1147 NEONMAP1(vsha256h2q_u32, aarch64_crypto_sha256h2, 0),
1148 NEONMAP1(vsha256hq_u32, aarch64_crypto_sha256h, 0),
1149 NEONMAP1(vsha256su0q_u32, aarch64_crypto_sha256su0, 0),
1150 NEONMAP1(vsha256su1q_u32, aarch64_crypto_sha256su1, 0),
1151 NEONMAP1(vsha512h2q_u64, aarch64_crypto_sha512h2, 0),
1152 NEONMAP1(vsha512hq_u64, aarch64_crypto_sha512h, 0),
1153 NEONMAP1(vsha512su0q_u64, aarch64_crypto_sha512su0, 0),
1154 NEONMAP1(vsha512su1q_u64, aarch64_crypto_sha512su1, 0),
1163 NEONMAP1(vsm3partw1q_u32, aarch64_crypto_sm3partw1, 0),
1164 NEONMAP1(vsm3partw2q_u32, aarch64_crypto_sm3partw2, 0),
1165 NEONMAP1(vsm3ss1q_u32, aarch64_crypto_sm3ss1, 0),
1166 NEONMAP1(vsm3tt1aq_u32, aarch64_crypto_sm3tt1a, 0),
1167 NEONMAP1(vsm3tt1bq_u32, aarch64_crypto_sm3tt1b, 0),
1168 NEONMAP1(vsm3tt2aq_u32, aarch64_crypto_sm3tt2a, 0),
1169 NEONMAP1(vsm3tt2bq_u32, aarch64_crypto_sm3tt2b, 0),
1170 NEONMAP1(vsm4ekeyq_u32, aarch64_crypto_sm4ekey, 0),
1171 NEONMAP1(vsm4eq_u32, aarch64_crypto_sm4e, 0),
1172 NEONMAP1(vst1_x2_v, aarch64_neon_st1x2, 0),
1173 NEONMAP1(vst1_x3_v, aarch64_neon_st1x3, 0),
1174 NEONMAP1(vst1_x4_v, aarch64_neon_st1x4, 0),
1175 NEONMAP1(vst1q_x2_v, aarch64_neon_st1x2, 0),
1176 NEONMAP1(vst1q_x3_v, aarch64_neon_st1x3, 0),
1177 NEONMAP1(vst1q_x4_v, aarch64_neon_st1x4, 0),
1181 NEONMAP1(vusdot_s32, aarch64_neon_usdot, 0),
1182 NEONMAP1(vusdotq_s32, aarch64_neon_usdot, 0),
1183 NEONMAP1(vusmmlaq_s32, aarch64_neon_usmmla, 0),
1184 NEONMAP1(vxarq_u64, aarch64_crypto_xar, 0),
1241 NEONMAP1(vcvtxd_f32_f64, aarch64_sisd_fcvtxn, 0),
1262 NEONMAP1(vmull_p64, aarch64_neon_pmull64, 0),
1290 NEONMAP1(vqdmulls_s32, aarch64_neon_sqdmulls_scalar, 0),
1371 NEONMAP1(vsha1cq_u32, aarch64_crypto_sha1c, 0),
1372 NEONMAP1(vsha1h_u32, aarch64_crypto_sha1h, 0),
1373 NEONMAP1(vsha1mq_u32, aarch64_crypto_sha1m, 0),
1374 NEONMAP1(vsha1pq_u32, aarch64_crypto_sha1p, 0),
1428 { NEON::BI__builtin_neon_splat_lane_bf16, NEON::BI__builtin_neon_splat_lane_v, },
1429 { NEON::BI__builtin_neon_splat_laneq_bf16, NEON::BI__builtin_neon_splat_laneq_v, },
1430 { NEON::BI__builtin_neon_splatq_lane_bf16, NEON::BI__builtin_neon_splatq_lane_v, },
1431 { NEON::BI__builtin_neon_splatq_laneq_bf16, NEON::BI__builtin_neon_splatq_laneq_v, },
1432 { NEON::BI__builtin_neon_vabd_f16, NEON::BI__builtin_neon_vabd_v, },
1433 { NEON::BI__builtin_neon_vabdq_f16, NEON::BI__builtin_neon_vabdq_v, },
1434 { NEON::BI__builtin_neon_vabs_f16, NEON::BI__builtin_neon_vabs_v, },
1435 { NEON::BI__builtin_neon_vabsq_f16, NEON::BI__builtin_neon_vabsq_v, },
1436 { NEON::BI__builtin_neon_vcage_f16, NEON::BI__builtin_neon_vcage_v, },
1437 { NEON::BI__builtin_neon_vcageq_f16, NEON::BI__builtin_neon_vcageq_v, },
1438 { NEON::BI__builtin_neon_vcagt_f16, NEON::BI__builtin_neon_vcagt_v, },
1439 { NEON::BI__builtin_neon_vcagtq_f16, NEON::BI__builtin_neon_vcagtq_v, },
1440 { NEON::BI__builtin_neon_vcale_f16, NEON::BI__builtin_neon_vcale_v, },
1441 { NEON::BI__builtin_neon_vcaleq_f16, NEON::BI__builtin_neon_vcaleq_v, },
1442 { NEON::BI__builtin_neon_vcalt_f16, NEON::BI__builtin_neon_vcalt_v, },
1443 { NEON::BI__builtin_neon_vcaltq_f16, NEON::BI__builtin_neon_vcaltq_v, },
1444 { NEON::BI__builtin_neon_vceqz_f16, NEON::BI__builtin_neon_vceqz_v, },
1445 { NEON::BI__builtin_neon_vceqzq_f16, NEON::BI__builtin_neon_vceqzq_v, },
1446 { NEON::BI__builtin_neon_vcgez_f16, NEON::BI__builtin_neon_vcgez_v, },
1447 { NEON::BI__builtin_neon_vcgezq_f16, NEON::BI__builtin_neon_vcgezq_v, },
1448 { NEON::BI__builtin_neon_vcgtz_f16, NEON::BI__builtin_neon_vcgtz_v, },
1449 { NEON::BI__builtin_neon_vcgtzq_f16, NEON::BI__builtin_neon_vcgtzq_v, },
1450 { NEON::BI__builtin_neon_vclez_f16, NEON::BI__builtin_neon_vclez_v, },
1451 { NEON::BI__builtin_neon_vclezq_f16, NEON::BI__builtin_neon_vclezq_v, },
1452 { NEON::BI__builtin_neon_vcltz_f16, NEON::BI__builtin_neon_vcltz_v, },
1453 { NEON::BI__builtin_neon_vcltzq_f16, NEON::BI__builtin_neon_vcltzq_v, },
1454 { NEON::BI__builtin_neon_vfma_f16, NEON::BI__builtin_neon_vfma_v, },
1455 { NEON::BI__builtin_neon_vfma_lane_f16, NEON::BI__builtin_neon_vfma_lane_v, },
1456 { NEON::BI__builtin_neon_vfma_laneq_f16, NEON::BI__builtin_neon_vfma_laneq_v, },
1457 { NEON::BI__builtin_neon_vfmaq_f16, NEON::BI__builtin_neon_vfmaq_v, },
1458 { NEON::BI__builtin_neon_vfmaq_lane_f16, NEON::BI__builtin_neon_vfmaq_lane_v, },
1459 { NEON::BI__builtin_neon_vfmaq_laneq_f16, NEON::BI__builtin_neon_vfmaq_laneq_v, },
1460 { NEON::BI__builtin_neon_vld1_bf16_x2, NEON::BI__builtin_neon_vld1_x2_v },
1461 { NEON::BI__builtin_neon_vld1_bf16_x3, NEON::BI__builtin_neon_vld1_x3_v },
1462 { NEON::BI__builtin_neon_vld1_bf16_x4, NEON::BI__builtin_neon_vld1_x4_v },
1463 { NEON::BI__builtin_neon_vld1_bf16, NEON::BI__builtin_neon_vld1_v },
1464 { NEON::BI__builtin_neon_vld1_dup_bf16, NEON::BI__builtin_neon_vld1_dup_v },
1465 { NEON::BI__builtin_neon_vld1_lane_bf16, NEON::BI__builtin_neon_vld1_lane_v },
1466 { NEON::BI__builtin_neon_vld1q_bf16_x2, NEON::BI__builtin_neon_vld1q_x2_v },
1467 { NEON::BI__builtin_neon_vld1q_bf16_x3, NEON::BI__builtin_neon_vld1q_x3_v },
1468 { NEON::BI__builtin_neon_vld1q_bf16_x4, NEON::BI__builtin_neon_vld1q_x4_v },
1469 { NEON::BI__builtin_neon_vld1q_bf16, NEON::BI__builtin_neon_vld1q_v },
1470 { NEON::BI__builtin_neon_vld1q_dup_bf16, NEON::BI__builtin_neon_vld1q_dup_v },
1471 { NEON::BI__builtin_neon_vld1q_lane_bf16, NEON::BI__builtin_neon_vld1q_lane_v },
1472 { NEON::BI__builtin_neon_vld2_bf16, NEON::BI__builtin_neon_vld2_v },
1473 { NEON::BI__builtin_neon_vld2_dup_bf16, NEON::BI__builtin_neon_vld2_dup_v },
1474 { NEON::BI__builtin_neon_vld2_lane_bf16, NEON::BI__builtin_neon_vld2_lane_v },
1475 { NEON::BI__builtin_neon_vld2q_bf16, NEON::BI__builtin_neon_vld2q_v },
1476 { NEON::BI__builtin_neon_vld2q_dup_bf16, NEON::BI__builtin_neon_vld2q_dup_v },
1477 { NEON::BI__builtin_neon_vld2q_lane_bf16, NEON::BI__builtin_neon_vld2q_lane_v },
1478 { NEON::BI__builtin_neon_vld3_bf16, NEON::BI__builtin_neon_vld3_v },
1479 { NEON::BI__builtin_neon_vld3_dup_bf16, NEON::BI__builtin_neon_vld3_dup_v },
1480 { NEON::BI__builtin_neon_vld3_lane_bf16, NEON::BI__builtin_neon_vld3_lane_v },
1481 { NEON::BI__builtin_neon_vld3q_bf16, NEON::BI__builtin_neon_vld3q_v },
1482 { NEON::BI__builtin_neon_vld3q_dup_bf16, NEON::BI__builtin_neon_vld3q_dup_v },
1483 { NEON::BI__builtin_neon_vld3q_lane_bf16, NEON::BI__builtin_neon_vld3q_lane_v },
1484 { NEON::BI__builtin_neon_vld4_bf16, NEON::BI__builtin_neon_vld4_v },
1485 { NEON::BI__builtin_neon_vld4_dup_bf16, NEON::BI__builtin_neon_vld4_dup_v },
1486 { NEON::BI__builtin_neon_vld4_lane_bf16, NEON::BI__builtin_neon_vld4_lane_v },
1487 { NEON::BI__builtin_neon_vld4q_bf16, NEON::BI__builtin_neon_vld4q_v },
1488 { NEON::BI__builtin_neon_vld4q_dup_bf16, NEON::BI__builtin_neon_vld4q_dup_v },
1489 { NEON::BI__builtin_neon_vld4q_lane_bf16, NEON::BI__builtin_neon_vld4q_lane_v },
1490 { NEON::BI__builtin_neon_vmax_f16, NEON::BI__builtin_neon_vmax_v, },
1491 { NEON::BI__builtin_neon_vmaxnm_f16, NEON::BI__builtin_neon_vmaxnm_v, },
1492 { NEON::BI__builtin_neon_vmaxnmq_f16, NEON::BI__builtin_neon_vmaxnmq_v, },
1493 { NEON::BI__builtin_neon_vmaxq_f16, NEON::BI__builtin_neon_vmaxq_v, },
1494 { NEON::BI__builtin_neon_vmin_f16, NEON::BI__builtin_neon_vmin_v, },
1495 { NEON::BI__builtin_neon_vminnm_f16, NEON::BI__builtin_neon_vminnm_v, },
1496 { NEON::BI__builtin_neon_vminnmq_f16, NEON::BI__builtin_neon_vminnmq_v, },
1497 { NEON::BI__builtin_neon_vminq_f16, NEON::BI__builtin_neon_vminq_v, },
1498 { NEON::BI__builtin_neon_vmulx_f16, NEON::BI__builtin_neon_vmulx_v, },
1499 { NEON::BI__builtin_neon_vmulxq_f16, NEON::BI__builtin_neon_vmulxq_v, },
1500 { NEON::BI__builtin_neon_vpadd_f16, NEON::BI__builtin_neon_vpadd_v, },
1501 { NEON::BI__builtin_neon_vpaddq_f16, NEON::BI__builtin_neon_vpaddq_v, },
1502 { NEON::BI__builtin_neon_vpmax_f16, NEON::BI__builtin_neon_vpmax_v, },
1503 { NEON::BI__builtin_neon_vpmaxnm_f16, NEON::BI__builtin_neon_vpmaxnm_v, },
1504 { NEON::BI__builtin_neon_vpmaxnmq_f16, NEON::BI__builtin_neon_vpmaxnmq_v, },
1505 { NEON::BI__builtin_neon_vpmaxq_f16, NEON::BI__builtin_neon_vpmaxq_v, },
1506 { NEON::BI__builtin_neon_vpmin_f16, NEON::BI__builtin_neon_vpmin_v, },
1507 { NEON::BI__builtin_neon_vpminnm_f16, NEON::BI__builtin_neon_vpminnm_v, },
1508 { NEON::BI__builtin_neon_vpminnmq_f16, NEON::BI__builtin_neon_vpminnmq_v, },
1509 { NEON::BI__builtin_neon_vpminq_f16, NEON::BI__builtin_neon_vpminq_v, },
1510 { NEON::BI__builtin_neon_vrecpe_f16, NEON::BI__builtin_neon_vrecpe_v, },
1511 { NEON::BI__builtin_neon_vrecpeq_f16, NEON::BI__builtin_neon_vrecpeq_v, },
1512 { NEON::BI__builtin_neon_vrecps_f16, NEON::BI__builtin_neon_vrecps_v, },
1513 { NEON::BI__builtin_neon_vrecpsq_f16, NEON::BI__builtin_neon_vrecpsq_v, },
1514 { NEON::BI__builtin_neon_vrnd_f16, NEON::BI__builtin_neon_vrnd_v, },
1515 { NEON::BI__builtin_neon_vrnda_f16, NEON::BI__builtin_neon_vrnda_v, },
1516 { NEON::BI__builtin_neon_vrndaq_f16, NEON::BI__builtin_neon_vrndaq_v, },
1517 { NEON::BI__builtin_neon_vrndi_f16, NEON::BI__builtin_neon_vrndi_v, },
1518 { NEON::BI__builtin_neon_vrndiq_f16, NEON::BI__builtin_neon_vrndiq_v, },
1519 { NEON::BI__builtin_neon_vrndm_f16, NEON::BI__builtin_neon_vrndm_v, },
1520 { NEON::BI__builtin_neon_vrndmq_f16, NEON::BI__builtin_neon_vrndmq_v, },
1521 { NEON::BI__builtin_neon_vrndn_f16, NEON::BI__builtin_neon_vrndn_v, },
1522 { NEON::BI__builtin_neon_vrndnq_f16, NEON::BI__builtin_neon_vrndnq_v, },
1523 { NEON::BI__builtin_neon_vrndp_f16, NEON::BI__builtin_neon_vrndp_v, },
1524 { NEON::BI__builtin_neon_vrndpq_f16, NEON::BI__builtin_neon_vrndpq_v, },
1525 { NEON::BI__builtin_neon_vrndq_f16, NEON::BI__builtin_neon_vrndq_v, },
1526 { NEON::BI__builtin_neon_vrndx_f16, NEON::BI__builtin_neon_vrndx_v, },
1527 { NEON::BI__builtin_neon_vrndxq_f16, NEON::BI__builtin_neon_vrndxq_v, },
1528 { NEON::BI__builtin_neon_vrsqrte_f16, NEON::BI__builtin_neon_vrsqrte_v, },
1529 { NEON::BI__builtin_neon_vrsqrteq_f16, NEON::BI__builtin_neon_vrsqrteq_v, },
1530 { NEON::BI__builtin_neon_vrsqrts_f16, NEON::BI__builtin_neon_vrsqrts_v, },
1531 { NEON::BI__builtin_neon_vrsqrtsq_f16, NEON::BI__builtin_neon_vrsqrtsq_v, },
1532 { NEON::BI__builtin_neon_vsqrt_f16, NEON::BI__builtin_neon_vsqrt_v, },
1533 { NEON::BI__builtin_neon_vsqrtq_f16, NEON::BI__builtin_neon_vsqrtq_v, },
1534 { NEON::BI__builtin_neon_vst1_bf16_x2, NEON::BI__builtin_neon_vst1_x2_v },
1535 { NEON::BI__builtin_neon_vst1_bf16_x3, NEON::BI__builtin_neon_vst1_x3_v },
1536 { NEON::BI__builtin_neon_vst1_bf16_x4, NEON::BI__builtin_neon_vst1_x4_v },
1537 { NEON::BI__builtin_neon_vst1_bf16, NEON::BI__builtin_neon_vst1_v },
1538 { NEON::BI__builtin_neon_vst1_lane_bf16, NEON::BI__builtin_neon_vst1_lane_v },
1539 { NEON::BI__builtin_neon_vst1q_bf16_x2, NEON::BI__builtin_neon_vst1q_x2_v },
1540 { NEON::BI__builtin_neon_vst1q_bf16_x3, NEON::BI__builtin_neon_vst1q_x3_v },
1541 { NEON::BI__builtin_neon_vst1q_bf16_x4, NEON::BI__builtin_neon_vst1q_x4_v },
1542 { NEON::BI__builtin_neon_vst1q_bf16, NEON::BI__builtin_neon_vst1q_v },
1543 { NEON::BI__builtin_neon_vst1q_lane_bf16, NEON::BI__builtin_neon_vst1q_lane_v },
1544 { NEON::BI__builtin_neon_vst2_bf16, NEON::BI__builtin_neon_vst2_v },
1545 { NEON::BI__builtin_neon_vst2_lane_bf16, NEON::BI__builtin_neon_vst2_lane_v },
1546 { NEON::BI__builtin_neon_vst2q_bf16, NEON::BI__builtin_neon_vst2q_v },
1547 { NEON::BI__builtin_neon_vst2q_lane_bf16, NEON::BI__builtin_neon_vst2q_lane_v },
1548 { NEON::BI__builtin_neon_vst3_bf16, NEON::BI__builtin_neon_vst3_v },
1549 { NEON::BI__builtin_neon_vst3_lane_bf16, NEON::BI__builtin_neon_vst3_lane_v },
1550 { NEON::BI__builtin_neon_vst3q_bf16, NEON::BI__builtin_neon_vst3q_v },
1551 { NEON::BI__builtin_neon_vst3q_lane_bf16, NEON::BI__builtin_neon_vst3q_lane_v },
1552 { NEON::BI__builtin_neon_vst4_bf16, NEON::BI__builtin_neon_vst4_v },
1553 { NEON::BI__builtin_neon_vst4_lane_bf16, NEON::BI__builtin_neon_vst4_lane_v },
1554 { NEON::BI__builtin_neon_vst4q_bf16, NEON::BI__builtin_neon_vst4q_v },
1555 { NEON::BI__builtin_neon_vst4q_lane_bf16, NEON::BI__builtin_neon_vst4q_lane_v },
1559 { NEON::BI__builtin_neon_vldap1_lane_u64, NEON::BI__builtin_neon_vldap1_lane_s64 },
1560 { NEON::BI__builtin_neon_vldap1_lane_f64, NEON::BI__builtin_neon_vldap1_lane_s64 },
1561 { NEON::BI__builtin_neon_vldap1_lane_p64, NEON::BI__builtin_neon_vldap1_lane_s64 },
1562 { NEON::BI__builtin_neon_vldap1q_lane_u64, NEON::BI__builtin_neon_vldap1q_lane_s64 },
1563 { NEON::BI__builtin_neon_vldap1q_lane_f64, NEON::BI__builtin_neon_vldap1q_lane_s64 },
1564 { NEON::BI__builtin_neon_vldap1q_lane_p64, NEON::BI__builtin_neon_vldap1q_lane_s64 },
1565 { NEON::BI__builtin_neon_vstl1_lane_u64, NEON::BI__builtin_neon_vstl1_lane_s64 },
1566 { NEON::BI__builtin_neon_vstl1_lane_f64, NEON::BI__builtin_neon_vstl1_lane_s64 },
1567 { NEON::BI__builtin_neon_vstl1_lane_p64, NEON::BI__builtin_neon_vstl1_lane_s64 },
1568 { NEON::BI__builtin_neon_vstl1q_lane_u64, NEON::BI__builtin_neon_vstl1q_lane_s64 },
1569 { NEON::BI__builtin_neon_vstl1q_lane_f64, NEON::BI__builtin_neon_vstl1q_lane_s64 },
1570 { NEON::BI__builtin_neon_vstl1q_lane_p64, NEON::BI__builtin_neon_vstl1q_lane_s64 },
1577#define SVEMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
1579 #NameBase, SVE::BI__builtin_sve_##NameBase, Intrinsic::LLVMIntrinsic, 0, \
1583#define SVEMAP2(NameBase, TypeModifier) \
1584 { #NameBase, SVE::BI__builtin_sve_##NameBase, 0, 0, TypeModifier }
1586#define GET_SVE_LLVM_INTRINSIC_MAP
1587#include "clang/Basic/arm_sve_builtin_cg.inc"
1588#include "clang/Basic/BuiltinsAArch64NeonSVEBridge_cg.def"
1589#undef GET_SVE_LLVM_INTRINSIC_MAP
1595#define SMEMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
1597 #NameBase, SME::BI__builtin_sme_##NameBase, Intrinsic::LLVMIntrinsic, 0, \
1601#define SMEMAP2(NameBase, TypeModifier) \
1602 { #NameBase, SME::BI__builtin_sme_##NameBase, 0, 0, TypeModifier }
1604#define GET_SME_LLVM_INTRINSIC_MAP
1605#include "clang/Basic/arm_sme_builtin_cg.inc"
1606#undef GET_SME_LLVM_INTRINSIC_MAP
1619static const ARMVectorIntrinsicInfo *
1621 unsigned BuiltinID,
bool &MapProvenSorted) {
1624 if (!MapProvenSorted) {
1625 assert(llvm::is_sorted(IntrinsicMap));
1626 MapProvenSorted =
true;
1630 const ARMVectorIntrinsicInfo *
Builtin =
1631 llvm::lower_bound(IntrinsicMap, BuiltinID);
1633 if (
Builtin != IntrinsicMap.end() &&
Builtin->BuiltinID == BuiltinID)
1641 llvm::Type *ArgType,
1654 Ty = llvm::FixedVectorType::get(
1655 Ty, VectorSize ? VectorSize / Ty->getPrimitiveSizeInBits() : 1);
1662 int Elts = VectorSize ? VectorSize / ArgType->getPrimitiveSizeInBits() : 1;
1663 ArgType = llvm::FixedVectorType::get(ArgType, Elts);
1667 Tys.push_back(ArgType);
1670 Tys.push_back(ArgType);
1681 unsigned BuiltinID = SISDInfo.BuiltinID;
1682 unsigned int Int = SISDInfo.LLVMIntrinsic;
1683 unsigned Modifier = SISDInfo.TypeModifier;
1684 const char *
s = SISDInfo.NameHint;
1686 switch (BuiltinID) {
1687 case NEON::BI__builtin_neon_vcled_s64:
1688 case NEON::BI__builtin_neon_vcled_u64:
1689 case NEON::BI__builtin_neon_vcles_f32:
1690 case NEON::BI__builtin_neon_vcled_f64:
1691 case NEON::BI__builtin_neon_vcltd_s64:
1692 case NEON::BI__builtin_neon_vcltd_u64:
1693 case NEON::BI__builtin_neon_vclts_f32:
1694 case NEON::BI__builtin_neon_vcltd_f64:
1695 case NEON::BI__builtin_neon_vcales_f32:
1696 case NEON::BI__builtin_neon_vcaled_f64:
1697 case NEON::BI__builtin_neon_vcalts_f32:
1698 case NEON::BI__builtin_neon_vcaltd_f64:
1702 std::swap(Ops[0], Ops[1]);
1706 assert(Int &&
"Generic code assumes a valid intrinsic");
1709 const Expr *Arg =
E->getArg(0);
1714 ConstantInt *C0 = ConstantInt::get(CGF.
SizeTy, 0);
1715 for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
1716 ai != ae; ++ai, ++j) {
1717 llvm::Type *ArgTy = ai->getType();
1718 if (Ops[j]->getType()->getPrimitiveSizeInBits() ==
1719 ArgTy->getPrimitiveSizeInBits())
1722 assert(ArgTy->isVectorTy() && !Ops[j]->getType()->isVectorTy());
1725 Ops[j] = CGF.
Builder.CreateTruncOrBitCast(
1726 Ops[j], cast<llvm::VectorType>(ArgTy)->getElementType());
1728 CGF.
Builder.CreateInsertElement(PoisonValue::get(ArgTy), Ops[j], C0);
1733 if (ResultType->getPrimitiveSizeInBits().getFixedValue() <
1734 Result->getType()->getPrimitiveSizeInBits().getFixedValue())
1741 unsigned BuiltinID,
unsigned LLVMIntrinsic,
unsigned AltLLVMIntrinsic,
1742 const char *NameHint,
unsigned Modifier,
const CallExpr *
E,
1744 llvm::Triple::ArchType
Arch) {
1746 const Expr *Arg =
E->getArg(
E->getNumArgs() - 1);
1747 std::optional<llvm::APSInt> NeonTypeConst =
1754 const bool Usgn =
Type.isUnsigned();
1755 const bool Quad =
Type.isQuad();
1756 const bool Floating =
Type.isFloatingPoint();
1758 const bool AllowBFloatArgsAndRet =
1761 llvm::FixedVectorType *VTy =
1762 GetNeonType(
this,
Type, HasFastHalfType,
false, AllowBFloatArgsAndRet);
1763 llvm::Type *Ty = VTy;
1767 auto getAlignmentValue32 = [&](
Address addr) ->
Value* {
1768 return Builder.getInt32(addr.getAlignment().getQuantity());
1771 unsigned Int = LLVMIntrinsic;
1773 Int = AltLLVMIntrinsic;
1775 switch (BuiltinID) {
1777 case NEON::BI__builtin_neon_splat_lane_v:
1778 case NEON::BI__builtin_neon_splat_laneq_v:
1779 case NEON::BI__builtin_neon_splatq_lane_v:
1780 case NEON::BI__builtin_neon_splatq_laneq_v: {
1781 auto NumElements = VTy->getElementCount();
1782 if (BuiltinID == NEON::BI__builtin_neon_splatq_lane_v)
1783 NumElements = NumElements * 2;
1784 if (BuiltinID == NEON::BI__builtin_neon_splat_laneq_v)
1785 NumElements = NumElements.divideCoefficientBy(2);
1787 Ops[0] =
Builder.CreateBitCast(Ops[0], VTy);
1788 return EmitNeonSplat(Ops[0], cast<ConstantInt>(Ops[1]), NumElements);
1790 case NEON::BI__builtin_neon_vpadd_v:
1791 case NEON::BI__builtin_neon_vpaddq_v:
1793 if (VTy->getElementType()->isFloatingPointTy() &&
1794 Int == Intrinsic::aarch64_neon_addp)
1795 Int = Intrinsic::aarch64_neon_faddp;
1797 case NEON::BI__builtin_neon_vabs_v:
1798 case NEON::BI__builtin_neon_vabsq_v:
1799 if (VTy->getElementType()->isFloatingPointTy())
1802 case NEON::BI__builtin_neon_vadd_v:
1803 case NEON::BI__builtin_neon_vaddq_v: {
1804 llvm::Type *VTy = llvm::FixedVectorType::get(
Int8Ty, Quad ? 16 : 8);
1805 Ops[0] =
Builder.CreateBitCast(Ops[0], VTy);
1806 Ops[1] =
Builder.CreateBitCast(Ops[1], VTy);
1807 Ops[0] =
Builder.CreateXor(Ops[0], Ops[1]);
1808 return Builder.CreateBitCast(Ops[0], Ty);
1810 case NEON::BI__builtin_neon_vaddhn_v: {
1811 llvm::FixedVectorType *SrcTy =
1812 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
1815 Ops[0] =
Builder.CreateBitCast(Ops[0], SrcTy);
1816 Ops[1] =
Builder.CreateBitCast(Ops[1], SrcTy);
1817 Ops[0] =
Builder.CreateAdd(Ops[0], Ops[1],
"vaddhn");
1820 Constant *ShiftAmt =
1821 ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
1822 Ops[0] =
Builder.CreateLShr(Ops[0], ShiftAmt,
"vaddhn");
1825 return Builder.CreateTrunc(Ops[0], VTy,
"vaddhn");
1827 case NEON::BI__builtin_neon_vcale_v:
1828 case NEON::BI__builtin_neon_vcaleq_v:
1829 case NEON::BI__builtin_neon_vcalt_v:
1830 case NEON::BI__builtin_neon_vcaltq_v:
1831 std::swap(Ops[0], Ops[1]);
1833 case NEON::BI__builtin_neon_vcage_v:
1834 case NEON::BI__builtin_neon_vcageq_v:
1835 case NEON::BI__builtin_neon_vcagt_v:
1836 case NEON::BI__builtin_neon_vcagtq_v: {
1838 switch (VTy->getScalarSizeInBits()) {
1839 default: llvm_unreachable(
"unexpected type");
1850 auto *VecFlt = llvm::FixedVectorType::get(Ty, VTy->getNumElements());
1851 llvm::Type *Tys[] = { VTy, VecFlt };
1855 case NEON::BI__builtin_neon_vceqz_v:
1856 case NEON::BI__builtin_neon_vceqzq_v:
1858 Ops[0], Ty, Floating ? ICmpInst::FCMP_OEQ : ICmpInst::ICMP_EQ,
"vceqz");
1859 case NEON::BI__builtin_neon_vcgez_v:
1860 case NEON::BI__builtin_neon_vcgezq_v:
1862 Ops[0], Ty, Floating ? ICmpInst::FCMP_OGE : ICmpInst::ICMP_SGE,
1864 case NEON::BI__builtin_neon_vclez_v:
1865 case NEON::BI__builtin_neon_vclezq_v:
1867 Ops[0], Ty, Floating ? ICmpInst::FCMP_OLE : ICmpInst::ICMP_SLE,
1869 case NEON::BI__builtin_neon_vcgtz_v:
1870 case NEON::BI__builtin_neon_vcgtzq_v:
1872 Ops[0], Ty, Floating ? ICmpInst::FCMP_OGT : ICmpInst::ICMP_SGT,
1874 case NEON::BI__builtin_neon_vcltz_v:
1875 case NEON::BI__builtin_neon_vcltzq_v:
1877 Ops[0], Ty, Floating ? ICmpInst::FCMP_OLT : ICmpInst::ICMP_SLT,
1879 case NEON::BI__builtin_neon_vclz_v:
1880 case NEON::BI__builtin_neon_vclzq_v:
1885 case NEON::BI__builtin_neon_vcvt_f32_v:
1886 case NEON::BI__builtin_neon_vcvtq_f32_v:
1887 Ops[0] =
Builder.CreateBitCast(Ops[0], Ty);
1890 return Usgn ?
Builder.CreateUIToFP(Ops[0], Ty,
"vcvt")
1891 :
Builder.CreateSIToFP(Ops[0], Ty,
"vcvt");
1892 case NEON::BI__builtin_neon_vcvt_f16_s16:
1893 case NEON::BI__builtin_neon_vcvt_f16_u16:
1894 case NEON::BI__builtin_neon_vcvtq_f16_s16:
1895 case NEON::BI__builtin_neon_vcvtq_f16_u16:
1896 Ops[0] =
Builder.CreateBitCast(Ops[0], Ty);
1899 return Usgn ?
Builder.CreateUIToFP(Ops[0], Ty,
"vcvt")
1900 :
Builder.CreateSIToFP(Ops[0], Ty,
"vcvt");
1901 case NEON::BI__builtin_neon_vcvt_n_f16_s16:
1902 case NEON::BI__builtin_neon_vcvt_n_f16_u16:
1903 case NEON::BI__builtin_neon_vcvtq_n_f16_s16:
1904 case NEON::BI__builtin_neon_vcvtq_n_f16_u16: {
1909 case NEON::BI__builtin_neon_vcvt_n_f32_v:
1910 case NEON::BI__builtin_neon_vcvt_n_f64_v:
1911 case NEON::BI__builtin_neon_vcvtq_n_f32_v:
1912 case NEON::BI__builtin_neon_vcvtq_n_f64_v: {
1914 Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic;
1918 case NEON::BI__builtin_neon_vcvt_n_s16_f16:
1919 case NEON::BI__builtin_neon_vcvt_n_s32_v:
1920 case NEON::BI__builtin_neon_vcvt_n_u16_f16:
1921 case NEON::BI__builtin_neon_vcvt_n_u32_v:
1922 case NEON::BI__builtin_neon_vcvt_n_s64_v:
1923 case NEON::BI__builtin_neon_vcvt_n_u64_v:
1924 case NEON::BI__builtin_neon_vcvtq_n_s16_f16:
1925 case NEON::BI__builtin_neon_vcvtq_n_s32_v:
1926 case NEON::BI__builtin_neon_vcvtq_n_u16_f16:
1927 case NEON::BI__builtin_neon_vcvtq_n_u32_v:
1928 case NEON::BI__builtin_neon_vcvtq_n_s64_v:
1929 case NEON::BI__builtin_neon_vcvtq_n_u64_v: {
1934 case NEON::BI__builtin_neon_vcvt_s32_v:
1935 case NEON::BI__builtin_neon_vcvt_u32_v:
1936 case NEON::BI__builtin_neon_vcvt_s64_v:
1937 case NEON::BI__builtin_neon_vcvt_u64_v:
1938 case NEON::BI__builtin_neon_vcvt_s16_f16:
1939 case NEON::BI__builtin_neon_vcvt_u16_f16:
1940 case NEON::BI__builtin_neon_vcvtq_s32_v:
1941 case NEON::BI__builtin_neon_vcvtq_u32_v:
1942 case NEON::BI__builtin_neon_vcvtq_s64_v:
1943 case NEON::BI__builtin_neon_vcvtq_u64_v:
1944 case NEON::BI__builtin_neon_vcvtq_s16_f16:
1945 case NEON::BI__builtin_neon_vcvtq_u16_f16: {
1947 return Usgn ?
Builder.CreateFPToUI(Ops[0], Ty,
"vcvt")
1948 :
Builder.CreateFPToSI(Ops[0], Ty,
"vcvt");
1950 case NEON::BI__builtin_neon_vcvta_s16_f16:
1951 case NEON::BI__builtin_neon_vcvta_s32_v:
1952 case NEON::BI__builtin_neon_vcvta_s64_v:
1953 case NEON::BI__builtin_neon_vcvta_u16_f16:
1954 case NEON::BI__builtin_neon_vcvta_u32_v:
1955 case NEON::BI__builtin_neon_vcvta_u64_v:
1956 case NEON::BI__builtin_neon_vcvtaq_s16_f16:
1957 case NEON::BI__builtin_neon_vcvtaq_s32_v:
1958 case NEON::BI__builtin_neon_vcvtaq_s64_v:
1959 case NEON::BI__builtin_neon_vcvtaq_u16_f16:
1960 case NEON::BI__builtin_neon_vcvtaq_u32_v:
1961 case NEON::BI__builtin_neon_vcvtaq_u64_v:
1962 case NEON::BI__builtin_neon_vcvtn_s16_f16:
1963 case NEON::BI__builtin_neon_vcvtn_s32_v:
1964 case NEON::BI__builtin_neon_vcvtn_s64_v:
1965 case NEON::BI__builtin_neon_vcvtn_u16_f16:
1966 case NEON::BI__builtin_neon_vcvtn_u32_v:
1967 case NEON::BI__builtin_neon_vcvtn_u64_v:
1968 case NEON::BI__builtin_neon_vcvtnq_s16_f16:
1969 case NEON::BI__builtin_neon_vcvtnq_s32_v:
1970 case NEON::BI__builtin_neon_vcvtnq_s64_v:
1971 case NEON::BI__builtin_neon_vcvtnq_u16_f16:
1972 case NEON::BI__builtin_neon_vcvtnq_u32_v:
1973 case NEON::BI__builtin_neon_vcvtnq_u64_v:
1974 case NEON::BI__builtin_neon_vcvtp_s16_f16:
1975 case NEON::BI__builtin_neon_vcvtp_s32_v:
1976 case NEON::BI__builtin_neon_vcvtp_s64_v:
1977 case NEON::BI__builtin_neon_vcvtp_u16_f16:
1978 case NEON::BI__builtin_neon_vcvtp_u32_v:
1979 case NEON::BI__builtin_neon_vcvtp_u64_v:
1980 case NEON::BI__builtin_neon_vcvtpq_s16_f16:
1981 case NEON::BI__builtin_neon_vcvtpq_s32_v:
1982 case NEON::BI__builtin_neon_vcvtpq_s64_v:
1983 case NEON::BI__builtin_neon_vcvtpq_u16_f16:
1984 case NEON::BI__builtin_neon_vcvtpq_u32_v:
1985 case NEON::BI__builtin_neon_vcvtpq_u64_v:
1986 case NEON::BI__builtin_neon_vcvtm_s16_f16:
1987 case NEON::BI__builtin_neon_vcvtm_s32_v:
1988 case NEON::BI__builtin_neon_vcvtm_s64_v:
1989 case NEON::BI__builtin_neon_vcvtm_u16_f16:
1990 case NEON::BI__builtin_neon_vcvtm_u32_v:
1991 case NEON::BI__builtin_neon_vcvtm_u64_v:
1992 case NEON::BI__builtin_neon_vcvtmq_s16_f16:
1993 case NEON::BI__builtin_neon_vcvtmq_s32_v:
1994 case NEON::BI__builtin_neon_vcvtmq_s64_v:
1995 case NEON::BI__builtin_neon_vcvtmq_u16_f16:
1996 case NEON::BI__builtin_neon_vcvtmq_u32_v:
1997 case NEON::BI__builtin_neon_vcvtmq_u64_v: {
2001 case NEON::BI__builtin_neon_vcvtx_f32_v: {
2002 llvm::Type *Tys[2] = { VTy->getTruncatedElementVectorType(VTy), Ty};
2006 case NEON::BI__builtin_neon_vext_v:
2007 case NEON::BI__builtin_neon_vextq_v: {
2008 int CV = cast<ConstantInt>(Ops[2])->getSExtValue();
2010 for (
unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
2011 Indices.push_back(i+CV);
2013 Ops[0] =
Builder.CreateBitCast(Ops[0], Ty);
2014 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
2015 return Builder.CreateShuffleVector(Ops[0], Ops[1], Indices,
"vext");
2017 case NEON::BI__builtin_neon_vfma_v:
2018 case NEON::BI__builtin_neon_vfmaq_v: {
2019 Ops[0] =
Builder.CreateBitCast(Ops[0], Ty);
2020 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
2021 Ops[2] =
Builder.CreateBitCast(Ops[2], Ty);
2025 *
this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
2026 {Ops[1], Ops[2], Ops[0]});
2028 case NEON::BI__builtin_neon_vld1_v:
2029 case NEON::BI__builtin_neon_vld1q_v: {
2031 Ops.push_back(getAlignmentValue32(PtrOp0));
2034 case NEON::BI__builtin_neon_vld1_x2_v:
2035 case NEON::BI__builtin_neon_vld1q_x2_v:
2036 case NEON::BI__builtin_neon_vld1_x3_v:
2037 case NEON::BI__builtin_neon_vld1q_x3_v:
2038 case NEON::BI__builtin_neon_vld1_x4_v:
2039 case NEON::BI__builtin_neon_vld1q_x4_v: {
2042 Ops[1] =
Builder.CreateCall(F, Ops[1],
"vld1xN");
2045 case NEON::BI__builtin_neon_vld2_v:
2046 case NEON::BI__builtin_neon_vld2q_v:
2047 case NEON::BI__builtin_neon_vld3_v:
2048 case NEON::BI__builtin_neon_vld3q_v:
2049 case NEON::BI__builtin_neon_vld4_v:
2050 case NEON::BI__builtin_neon_vld4q_v:
2051 case NEON::BI__builtin_neon_vld2_dup_v:
2052 case NEON::BI__builtin_neon_vld2q_dup_v:
2053 case NEON::BI__builtin_neon_vld3_dup_v:
2054 case NEON::BI__builtin_neon_vld3q_dup_v:
2055 case NEON::BI__builtin_neon_vld4_dup_v:
2056 case NEON::BI__builtin_neon_vld4q_dup_v: {
2059 Value *Align = getAlignmentValue32(PtrOp1);
2060 Ops[1] =
Builder.CreateCall(F, {Ops[1], Align}, NameHint);
2063 case NEON::BI__builtin_neon_vld1_dup_v:
2064 case NEON::BI__builtin_neon_vld1q_dup_v: {
2065 Value *
V = PoisonValue::get(Ty);
2068 llvm::Constant *CI = ConstantInt::get(
SizeTy, 0);
2069 Ops[0] =
Builder.CreateInsertElement(
V, Ld, CI);
2072 case NEON::BI__builtin_neon_vld2_lane_v:
2073 case NEON::BI__builtin_neon_vld2q_lane_v:
2074 case NEON::BI__builtin_neon_vld3_lane_v:
2075 case NEON::BI__builtin_neon_vld3q_lane_v:
2076 case NEON::BI__builtin_neon_vld4_lane_v:
2077 case NEON::BI__builtin_neon_vld4q_lane_v: {
2080 for (
unsigned I = 2; I < Ops.size() - 1; ++I)
2081 Ops[I] =
Builder.CreateBitCast(Ops[I], Ty);
2082 Ops.push_back(getAlignmentValue32(PtrOp1));
2086 case NEON::BI__builtin_neon_vmovl_v: {
2087 llvm::FixedVectorType *DTy =
2088 llvm::FixedVectorType::getTruncatedElementVectorType(VTy);
2089 Ops[0] =
Builder.CreateBitCast(Ops[0], DTy);
2091 return Builder.CreateZExt(Ops[0], Ty,
"vmovl");
2092 return Builder.CreateSExt(Ops[0], Ty,
"vmovl");
2094 case NEON::BI__builtin_neon_vmovn_v: {
2095 llvm::FixedVectorType *QTy =
2096 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
2097 Ops[0] =
Builder.CreateBitCast(Ops[0], QTy);
2098 return Builder.CreateTrunc(Ops[0], Ty,
"vmovn");
2100 case NEON::BI__builtin_neon_vmull_v:
2106 Int = Usgn ? Intrinsic::arm_neon_vmullu : Intrinsic::arm_neon_vmulls;
2107 Int =
Type.isPoly() ? (
unsigned)Intrinsic::arm_neon_vmullp : Int;
2109 case NEON::BI__builtin_neon_vpadal_v:
2110 case NEON::BI__builtin_neon_vpadalq_v: {
2112 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
2116 llvm::FixedVectorType::get(EltTy, VTy->getNumElements() * 2);
2117 llvm::Type *Tys[2] = { Ty, NarrowTy };
2120 case NEON::BI__builtin_neon_vpaddl_v:
2121 case NEON::BI__builtin_neon_vpaddlq_v: {
2123 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
2124 llvm::Type *EltTy = llvm::IntegerType::get(
getLLVMContext(), EltBits / 2);
2126 llvm::FixedVectorType::get(EltTy, VTy->getNumElements() * 2);
2127 llvm::Type *Tys[2] = { Ty, NarrowTy };
2130 case NEON::BI__builtin_neon_vqdmlal_v:
2131 case NEON::BI__builtin_neon_vqdmlsl_v: {
2138 case NEON::BI__builtin_neon_vqdmulhq_lane_v:
2139 case NEON::BI__builtin_neon_vqdmulh_lane_v:
2140 case NEON::BI__builtin_neon_vqrdmulhq_lane_v:
2141 case NEON::BI__builtin_neon_vqrdmulh_lane_v: {
2142 auto *RTy = cast<llvm::FixedVectorType>(Ty);
2143 if (BuiltinID == NEON::BI__builtin_neon_vqdmulhq_lane_v ||
2144 BuiltinID == NEON::BI__builtin_neon_vqrdmulhq_lane_v)
2145 RTy = llvm::FixedVectorType::get(RTy->getElementType(),
2146 RTy->getNumElements() * 2);
2147 llvm::Type *Tys[2] = {
2152 case NEON::BI__builtin_neon_vqdmulhq_laneq_v:
2153 case NEON::BI__builtin_neon_vqdmulh_laneq_v:
2154 case NEON::BI__builtin_neon_vqrdmulhq_laneq_v:
2155 case NEON::BI__builtin_neon_vqrdmulh_laneq_v: {
2156 llvm::Type *Tys[2] = {
2161 case NEON::BI__builtin_neon_vqshl_n_v:
2162 case NEON::BI__builtin_neon_vqshlq_n_v:
2165 case NEON::BI__builtin_neon_vqshlu_n_v:
2166 case NEON::BI__builtin_neon_vqshluq_n_v:
2169 case NEON::BI__builtin_neon_vrecpe_v:
2170 case NEON::BI__builtin_neon_vrecpeq_v:
2171 case NEON::BI__builtin_neon_vrsqrte_v:
2172 case NEON::BI__builtin_neon_vrsqrteq_v:
2173 Int = Ty->isFPOrFPVectorTy() ? LLVMIntrinsic : AltLLVMIntrinsic;
2175 case NEON::BI__builtin_neon_vrndi_v:
2176 case NEON::BI__builtin_neon_vrndiq_v:
2177 Int =
Builder.getIsFPConstrained()
2178 ? Intrinsic::experimental_constrained_nearbyint
2179 : Intrinsic::nearbyint;
2181 case NEON::BI__builtin_neon_vrshr_n_v:
2182 case NEON::BI__builtin_neon_vrshrq_n_v:
2185 case NEON::BI__builtin_neon_vsha512hq_u64:
2186 case NEON::BI__builtin_neon_vsha512h2q_u64:
2187 case NEON::BI__builtin_neon_vsha512su0q_u64:
2188 case NEON::BI__builtin_neon_vsha512su1q_u64: {
2192 case NEON::BI__builtin_neon_vshl_n_v:
2193 case NEON::BI__builtin_neon_vshlq_n_v:
2195 return Builder.CreateShl(
Builder.CreateBitCast(Ops[0],Ty), Ops[1],
2197 case NEON::BI__builtin_neon_vshll_n_v: {
2198 llvm::FixedVectorType *SrcTy =
2199 llvm::FixedVectorType::getTruncatedElementVectorType(VTy);
2200 Ops[0] =
Builder.CreateBitCast(Ops[0], SrcTy);
2202 Ops[0] =
Builder.CreateZExt(Ops[0], VTy);
2204 Ops[0] =
Builder.CreateSExt(Ops[0], VTy);
2206 return Builder.CreateShl(Ops[0], Ops[1],
"vshll_n");
2208 case NEON::BI__builtin_neon_vshrn_n_v: {
2209 llvm::FixedVectorType *SrcTy =
2210 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
2211 Ops[0] =
Builder.CreateBitCast(Ops[0], SrcTy);
2214 Ops[0] =
Builder.CreateLShr(Ops[0], Ops[1]);
2216 Ops[0] =
Builder.CreateAShr(Ops[0], Ops[1]);
2217 return Builder.CreateTrunc(Ops[0], Ty,
"vshrn_n");
2219 case NEON::BI__builtin_neon_vshr_n_v:
2220 case NEON::BI__builtin_neon_vshrq_n_v:
2222 case NEON::BI__builtin_neon_vst1_v:
2223 case NEON::BI__builtin_neon_vst1q_v:
2224 case NEON::BI__builtin_neon_vst2_v:
2225 case NEON::BI__builtin_neon_vst2q_v:
2226 case NEON::BI__builtin_neon_vst3_v:
2227 case NEON::BI__builtin_neon_vst3q_v:
2228 case NEON::BI__builtin_neon_vst4_v:
2229 case NEON::BI__builtin_neon_vst4q_v:
2230 case NEON::BI__builtin_neon_vst2_lane_v:
2231 case NEON::BI__builtin_neon_vst2q_lane_v:
2232 case NEON::BI__builtin_neon_vst3_lane_v:
2233 case NEON::BI__builtin_neon_vst3q_lane_v:
2234 case NEON::BI__builtin_neon_vst4_lane_v:
2235 case NEON::BI__builtin_neon_vst4q_lane_v: {
2237 Ops.push_back(getAlignmentValue32(PtrOp0));
2240 case NEON::BI__builtin_neon_vsm3partw1q_u32:
2241 case NEON::BI__builtin_neon_vsm3partw2q_u32:
2242 case NEON::BI__builtin_neon_vsm3ss1q_u32:
2243 case NEON::BI__builtin_neon_vsm4ekeyq_u32:
2244 case NEON::BI__builtin_neon_vsm4eq_u32: {
2248 case NEON::BI__builtin_neon_vsm3tt1aq_u32:
2249 case NEON::BI__builtin_neon_vsm3tt1bq_u32:
2250 case NEON::BI__builtin_neon_vsm3tt2aq_u32:
2251 case NEON::BI__builtin_neon_vsm3tt2bq_u32: {
2256 case NEON::BI__builtin_neon_vst1_x2_v:
2257 case NEON::BI__builtin_neon_vst1q_x2_v:
2258 case NEON::BI__builtin_neon_vst1_x3_v:
2259 case NEON::BI__builtin_neon_vst1q_x3_v:
2260 case NEON::BI__builtin_neon_vst1_x4_v:
2261 case NEON::BI__builtin_neon_vst1q_x4_v: {
2264 if (
Arch == llvm::Triple::aarch64 ||
Arch == llvm::Triple::aarch64_be ||
2265 Arch == llvm::Triple::aarch64_32) {
2267 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
2273 case NEON::BI__builtin_neon_vsubhn_v: {
2274 llvm::FixedVectorType *SrcTy =
2275 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
2278 Ops[0] =
Builder.CreateBitCast(Ops[0], SrcTy);
2279 Ops[1] =
Builder.CreateBitCast(Ops[1], SrcTy);
2280 Ops[0] =
Builder.CreateSub(Ops[0], Ops[1],
"vsubhn");
2283 Constant *ShiftAmt =
2284 ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
2285 Ops[0] =
Builder.CreateLShr(Ops[0], ShiftAmt,
"vsubhn");
2288 return Builder.CreateTrunc(Ops[0], VTy,
"vsubhn");
2290 case NEON::BI__builtin_neon_vtrn_v:
2291 case NEON::BI__builtin_neon_vtrnq_v: {
2292 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
2293 Ops[2] =
Builder.CreateBitCast(Ops[2], Ty);
2294 Value *SV =
nullptr;
2296 for (
unsigned vi = 0; vi != 2; ++vi) {
2298 for (
unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
2299 Indices.push_back(i+vi);
2300 Indices.push_back(i+e+vi);
2303 SV =
Builder.CreateShuffleVector(Ops[1], Ops[2], Indices,
"vtrn");
2308 case NEON::BI__builtin_neon_vtst_v:
2309 case NEON::BI__builtin_neon_vtstq_v: {
2310 Ops[0] =
Builder.CreateBitCast(Ops[0], Ty);
2311 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
2312 Ops[0] =
Builder.CreateAnd(Ops[0], Ops[1]);
2313 Ops[0] =
Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
2314 ConstantAggregateZero::get(Ty));
2315 return Builder.CreateSExt(Ops[0], Ty,
"vtst");
2317 case NEON::BI__builtin_neon_vuzp_v:
2318 case NEON::BI__builtin_neon_vuzpq_v: {
2319 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
2320 Ops[2] =
Builder.CreateBitCast(Ops[2], Ty);
2321 Value *SV =
nullptr;
2323 for (
unsigned vi = 0; vi != 2; ++vi) {
2325 for (
unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
2326 Indices.push_back(2*i+vi);
2329 SV =
Builder.CreateShuffleVector(Ops[1], Ops[2], Indices,
"vuzp");
2334 case NEON::BI__builtin_neon_vxarq_u64: {
2339 case NEON::BI__builtin_neon_vzip_v:
2340 case NEON::BI__builtin_neon_vzipq_v: {
2341 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
2342 Ops[2] =
Builder.CreateBitCast(Ops[2], Ty);
2343 Value *SV =
nullptr;
2345 for (
unsigned vi = 0; vi != 2; ++vi) {
2347 for (
unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
2348 Indices.push_back((i + vi*e) >> 1);
2349 Indices.push_back(((i + vi*e) >> 1)+e);
2352 SV =
Builder.CreateShuffleVector(Ops[1], Ops[2], Indices,
"vzip");
2357 case NEON::BI__builtin_neon_vdot_s32:
2358 case NEON::BI__builtin_neon_vdot_u32:
2359 case NEON::BI__builtin_neon_vdotq_s32:
2360 case NEON::BI__builtin_neon_vdotq_u32: {
2362 llvm::FixedVectorType::get(
Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
2363 llvm::Type *Tys[2] = { Ty, InputTy };
2366 case NEON::BI__builtin_neon_vfmlal_low_f16:
2367 case NEON::BI__builtin_neon_vfmlalq_low_f16: {
2369 llvm::FixedVectorType::get(
HalfTy, Ty->getPrimitiveSizeInBits() / 16);
2370 llvm::Type *Tys[2] = { Ty, InputTy };
2373 case NEON::BI__builtin_neon_vfmlsl_low_f16:
2374 case NEON::BI__builtin_neon_vfmlslq_low_f16: {
2376 llvm::FixedVectorType::get(
HalfTy, Ty->getPrimitiveSizeInBits() / 16);
2377 llvm::Type *Tys[2] = { Ty, InputTy };
2380 case NEON::BI__builtin_neon_vfmlal_high_f16:
2381 case NEON::BI__builtin_neon_vfmlalq_high_f16: {
2383 llvm::FixedVectorType::get(
HalfTy, Ty->getPrimitiveSizeInBits() / 16);
2384 llvm::Type *Tys[2] = { Ty, InputTy };
2387 case NEON::BI__builtin_neon_vfmlsl_high_f16:
2388 case NEON::BI__builtin_neon_vfmlslq_high_f16: {
2390 llvm::FixedVectorType::get(
HalfTy, Ty->getPrimitiveSizeInBits() / 16);
2391 llvm::Type *Tys[2] = { Ty, InputTy };
2394 case NEON::BI__builtin_neon_vmmlaq_s32:
2395 case NEON::BI__builtin_neon_vmmlaq_u32: {
2397 llvm::FixedVectorType::get(
Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
2398 llvm::Type *Tys[2] = { Ty, InputTy };
2401 case NEON::BI__builtin_neon_vusmmlaq_s32: {
2403 llvm::FixedVectorType::get(
Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
2404 llvm::Type *Tys[2] = { Ty, InputTy };
2407 case NEON::BI__builtin_neon_vusdot_s32:
2408 case NEON::BI__builtin_neon_vusdotq_s32: {
2410 llvm::FixedVectorType::get(
Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
2411 llvm::Type *Tys[2] = { Ty, InputTy };
2414 case NEON::BI__builtin_neon_vbfdot_f32:
2415 case NEON::BI__builtin_neon_vbfdotq_f32: {
2416 llvm::Type *InputTy =
2417 llvm::FixedVectorType::get(
BFloatTy, Ty->getPrimitiveSizeInBits() / 16);
2418 llvm::Type *Tys[2] = { Ty, InputTy };
2421 case NEON::BI__builtin_neon___a32_vcvt_bf16_f32: {
2422 llvm::Type *Tys[1] = { Ty };
2429 assert(Int &&
"Expected valid intrinsic number");
2443 const CmpInst::Predicate Pred,
2444 const Twine &Name) {
2446 if (isa<FixedVectorType>(Ty)) {
2448 Op =
Builder.CreateBitCast(Op, Ty);
2451 if (CmpInst::isFPPredicate(Pred)) {
2452 if (Pred == CmpInst::FCMP_OEQ)
2453 Op =
Builder.CreateFCmp(Pred, Op, Constant::getNullValue(Op->
getType()));
2455 Op =
Builder.CreateFCmpS(Pred, Op, Constant::getNullValue(Op->
getType()));
2457 Op =
Builder.CreateICmp(Pred, Op, Constant::getNullValue(Op->
getType()));
2460 llvm::Type *ResTy = Ty;
2461 if (
auto *VTy = dyn_cast<FixedVectorType>(Ty))
2462 ResTy = FixedVectorType::get(
2464 VTy->getNumElements());
2466 return Builder.CreateSExt(Op, ResTy, Name);
2471 llvm::Type *ResTy,
unsigned IntID,
2475 TblOps.push_back(ExtOp);
2479 auto *TblTy = cast<llvm::FixedVectorType>(Ops[0]->getType());
2480 for (
unsigned i = 0, e = TblTy->getNumElements(); i != e; ++i) {
2481 Indices.push_back(2*i);
2482 Indices.push_back(2*i+1);
2485 int PairPos = 0, End = Ops.size() - 1;
2486 while (PairPos < End) {
2487 TblOps.push_back(CGF.
Builder.CreateShuffleVector(Ops[PairPos],
2488 Ops[PairPos+1], Indices,
2495 if (PairPos == End) {
2496 Value *ZeroTbl = ConstantAggregateZero::get(TblTy);
2497 TblOps.push_back(CGF.
Builder.CreateShuffleVector(Ops[PairPos],
2498 ZeroTbl, Indices, Name));
2502 TblOps.push_back(IndexOp);
2508Value *CodeGenFunction::GetValueForARMHint(
unsigned BuiltinID) {
2510 switch (BuiltinID) {
2513 case clang::ARM::BI__builtin_arm_nop:
2516 case clang::ARM::BI__builtin_arm_yield:
2517 case clang::ARM::BI__yield:
2520 case clang::ARM::BI__builtin_arm_wfe:
2521 case clang::ARM::BI__wfe:
2524 case clang::ARM::BI__builtin_arm_wfi:
2525 case clang::ARM::BI__wfi:
2528 case clang::ARM::BI__builtin_arm_sev:
2529 case clang::ARM::BI__sev:
2532 case clang::ARM::BI__builtin_arm_sevl:
2533 case clang::ARM::BI__sevl:
2554 llvm::Type *ValueType,
2556 StringRef SysReg =
"") {
2560 "Unsupported size for register.");
2566 if (SysReg.empty()) {
2568 SysReg = cast<clang::StringLiteral>(SysRegStrExpr)->getString();
2571 llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysReg) };
2572 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
2573 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
2577 bool MixedTypes =
RegisterType->isIntegerTy(64) && ValueType->isIntegerTy(32);
2578 assert(!(
RegisterType->isIntegerTy(32) && ValueType->isIntegerTy(64))
2579 &&
"Can't fit 64-bit value in 32-bit register");
2581 if (AccessKind !=
Write) {
2584 AccessKind ==
VolatileRead ? Intrinsic::read_volatile_register
2585 : Intrinsic::read_register,
2587 llvm::Value *
Call = Builder.CreateCall(F, Metadata);
2591 return Builder.CreateTrunc(
Call, ValueType);
2593 if (ValueType->isPointerTy())
2595 return Builder.CreateIntToPtr(
Call, ValueType);
2600 llvm::Function *F = CGM.
getIntrinsic(Intrinsic::write_register, Types);
2605 return Builder.CreateCall(F, { Metadata, ArgValue });
2608 if (ValueType->isPointerTy()) {
2610 ArgValue = Builder.CreatePtrToInt(ArgValue,
RegisterType);
2611 return Builder.CreateCall(F, { Metadata, ArgValue });
2614 return Builder.CreateCall(F, { Metadata, ArgValue });
2620 switch (BuiltinID) {
2622 case NEON::BI__builtin_neon_vget_lane_i8:
2623 case NEON::BI__builtin_neon_vget_lane_i16:
2624 case NEON::BI__builtin_neon_vget_lane_bf16:
2625 case NEON::BI__builtin_neon_vget_lane_i32:
2626 case NEON::BI__builtin_neon_vget_lane_i64:
2627 case NEON::BI__builtin_neon_vget_lane_mf8:
2628 case NEON::BI__builtin_neon_vget_lane_f32:
2629 case NEON::BI__builtin_neon_vgetq_lane_i8:
2630 case NEON::BI__builtin_neon_vgetq_lane_i16:
2631 case NEON::BI__builtin_neon_vgetq_lane_bf16:
2632 case NEON::BI__builtin_neon_vgetq_lane_i32:
2633 case NEON::BI__builtin_neon_vgetq_lane_i64:
2634 case NEON::BI__builtin_neon_vgetq_lane_mf8:
2635 case NEON::BI__builtin_neon_vgetq_lane_f32:
2636 case NEON::BI__builtin_neon_vduph_lane_bf16:
2637 case NEON::BI__builtin_neon_vduph_laneq_bf16:
2638 case NEON::BI__builtin_neon_vset_lane_i8:
2639 case NEON::BI__builtin_neon_vset_lane_mf8:
2640 case NEON::BI__builtin_neon_vset_lane_i16:
2641 case NEON::BI__builtin_neon_vset_lane_bf16:
2642 case NEON::BI__builtin_neon_vset_lane_i32:
2643 case NEON::BI__builtin_neon_vset_lane_i64:
2644 case NEON::BI__builtin_neon_vset_lane_f32:
2645 case NEON::BI__builtin_neon_vsetq_lane_i8:
2646 case NEON::BI__builtin_neon_vsetq_lane_mf8:
2647 case NEON::BI__builtin_neon_vsetq_lane_i16:
2648 case NEON::BI__builtin_neon_vsetq_lane_bf16:
2649 case NEON::BI__builtin_neon_vsetq_lane_i32:
2650 case NEON::BI__builtin_neon_vsetq_lane_i64:
2651 case NEON::BI__builtin_neon_vsetq_lane_f32:
2652 case NEON::BI__builtin_neon_vsha1h_u32:
2653 case NEON::BI__builtin_neon_vsha1cq_u32:
2654 case NEON::BI__builtin_neon_vsha1pq_u32:
2655 case NEON::BI__builtin_neon_vsha1mq_u32:
2656 case NEON::BI__builtin_neon_vcvth_bf16_f32:
2657 case clang::ARM::BI_MoveToCoprocessor:
2658 case clang::ARM::BI_MoveToCoprocessor2:
2667 llvm::Triple::ArchType
Arch) {
2668 if (
auto Hint = GetValueForARMHint(BuiltinID))
2671 if (BuiltinID == clang::ARM::BI__emit) {
2673 llvm::FunctionType *FTy =
2674 llvm::FunctionType::get(
VoidTy,
false);
2678 llvm_unreachable(
"Sema will ensure that the parameter is constant");
2681 uint64_t ZExtValue =
Value.zextOrTrunc(IsThumb ? 16 : 32).getZExtValue();
2683 llvm::InlineAsm *Emit =
2684 IsThumb ? InlineAsm::get(FTy,
".inst.n 0x" + utohexstr(ZExtValue),
"",
2686 : InlineAsm::get(FTy,
".inst 0x" + utohexstr(ZExtValue),
"",
2689 return Builder.CreateCall(Emit);
2692 if (BuiltinID == clang::ARM::BI__builtin_arm_dbg) {
2697 if (BuiltinID == clang::ARM::BI__builtin_arm_prefetch) {
2709 if (BuiltinID == clang::ARM::BI__builtin_arm_rbit) {
2712 CGM.
getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg,
"rbit");
2715 if (BuiltinID == clang::ARM::BI__builtin_arm_clz ||
2716 BuiltinID == clang::ARM::BI__builtin_arm_clz64) {
2720 if (BuiltinID == clang::ARM::BI__builtin_arm_clz64)
2726 if (BuiltinID == clang::ARM::BI__builtin_arm_cls) {
2730 if (BuiltinID == clang::ARM::BI__builtin_arm_cls64) {
2736 if (BuiltinID == clang::ARM::BI__clear_cache) {
2737 assert(
E->getNumArgs() == 2 &&
"__clear_cache takes 2 arguments");
2740 for (
unsigned i = 0; i < 2; i++)
2743 llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
2744 StringRef Name = FD->
getName();
2748 if (BuiltinID == clang::ARM::BI__builtin_arm_mcrr ||
2749 BuiltinID == clang::ARM::BI__builtin_arm_mcrr2) {
2752 switch (BuiltinID) {
2753 default: llvm_unreachable(
"unexpected builtin");
2754 case clang::ARM::BI__builtin_arm_mcrr:
2757 case clang::ARM::BI__builtin_arm_mcrr2:
2779 return Builder.CreateCall(F, {Coproc, Opc1, Rt, Rt2, CRm});
2782 if (BuiltinID == clang::ARM::BI__builtin_arm_mrrc ||
2783 BuiltinID == clang::ARM::BI__builtin_arm_mrrc2) {
2786 switch (BuiltinID) {
2787 default: llvm_unreachable(
"unexpected builtin");
2788 case clang::ARM::BI__builtin_arm_mrrc:
2791 case clang::ARM::BI__builtin_arm_mrrc2:
2799 Value *RtAndRt2 =
Builder.CreateCall(F, {Coproc, Opc1, CRm});
2809 Value *ShiftCast = llvm::ConstantInt::get(
Int64Ty, 32);
2810 RtAndRt2 =
Builder.CreateShl(Rt, ShiftCast,
"shl",
true);
2811 RtAndRt2 =
Builder.CreateOr(RtAndRt2, Rt1);
2816 if (BuiltinID == clang::ARM::BI__builtin_arm_ldrexd ||
2817 ((BuiltinID == clang::ARM::BI__builtin_arm_ldrex ||
2818 BuiltinID == clang::ARM::BI__builtin_arm_ldaex) &&
2820 BuiltinID == clang::ARM::BI__ldrexd) {
2823 switch (BuiltinID) {
2824 default: llvm_unreachable(
"unexpected builtin");
2825 case clang::ARM::BI__builtin_arm_ldaex:
2828 case clang::ARM::BI__builtin_arm_ldrexd:
2829 case clang::ARM::BI__builtin_arm_ldrex:
2830 case clang::ARM::BI__ldrexd:
2844 Val =
Builder.CreateShl(Val0, ShiftCst,
"shl",
true );
2845 Val =
Builder.CreateOr(Val, Val1);
2849 if (BuiltinID == clang::ARM::BI__builtin_arm_ldrex ||
2850 BuiltinID == clang::ARM::BI__builtin_arm_ldaex) {
2859 BuiltinID == clang::ARM::BI__builtin_arm_ldaex ? Intrinsic::arm_ldaex
2860 : Intrinsic::arm_ldrex,
2862 CallInst *Val =
Builder.CreateCall(F, LoadAddr,
"ldrex");
2866 if (RealResTy->isPointerTy())
2867 return Builder.CreateIntToPtr(Val, RealResTy);
2869 llvm::Type *IntResTy = llvm::IntegerType::get(
2871 return Builder.CreateBitCast(
Builder.CreateTruncOrBitCast(Val, IntResTy),
2876 if (BuiltinID == clang::ARM::BI__builtin_arm_strexd ||
2877 ((BuiltinID == clang::ARM::BI__builtin_arm_stlex ||
2878 BuiltinID == clang::ARM::BI__builtin_arm_strex) &&
2881 BuiltinID == clang::ARM::BI__builtin_arm_stlex ? Intrinsic::arm_stlexd
2882 : Intrinsic::arm_strexd);
2895 return Builder.CreateCall(F, {Arg0, Arg1, StPtr},
"strexd");
2898 if (BuiltinID == clang::ARM::BI__builtin_arm_strex ||
2899 BuiltinID == clang::ARM::BI__builtin_arm_stlex) {
2904 llvm::Type *StoreTy =
2907 if (StoreVal->
getType()->isPointerTy())
2910 llvm::Type *
IntTy = llvm::IntegerType::get(
2918 BuiltinID == clang::ARM::BI__builtin_arm_stlex ? Intrinsic::arm_stlex
2919 : Intrinsic::arm_strex,
2922 CallInst *CI =
Builder.CreateCall(F, {StoreVal, StoreAddr},
"strex");
2924 1, Attribute::get(
getLLVMContext(), Attribute::ElementType, StoreTy));
2928 if (BuiltinID == clang::ARM::BI__builtin_arm_clrex) {
2934 Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
2935 switch (BuiltinID) {
2936 case clang::ARM::BI__builtin_arm_crc32b:
2937 CRCIntrinsicID = Intrinsic::arm_crc32b;
break;
2938 case clang::ARM::BI__builtin_arm_crc32cb:
2939 CRCIntrinsicID = Intrinsic::arm_crc32cb;
break;
2940 case clang::ARM::BI__builtin_arm_crc32h:
2941 CRCIntrinsicID = Intrinsic::arm_crc32h;
break;
2942 case clang::ARM::BI__builtin_arm_crc32ch:
2943 CRCIntrinsicID = Intrinsic::arm_crc32ch;
break;
2944 case clang::ARM::BI__builtin_arm_crc32w:
2945 case clang::ARM::BI__builtin_arm_crc32d:
2946 CRCIntrinsicID = Intrinsic::arm_crc32w;
break;
2947 case clang::ARM::BI__builtin_arm_crc32cw:
2948 case clang::ARM::BI__builtin_arm_crc32cd:
2949 CRCIntrinsicID = Intrinsic::arm_crc32cw;
break;
2952 if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
2958 if (BuiltinID == clang::ARM::BI__builtin_arm_crc32d ||
2959 BuiltinID == clang::ARM::BI__builtin_arm_crc32cd) {
2967 return Builder.CreateCall(F, {Res, Arg1b});
2972 return Builder.CreateCall(F, {Arg0, Arg1});
2976 if (BuiltinID == clang::ARM::BI__builtin_arm_rsr ||
2977 BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||
2978 BuiltinID == clang::ARM::BI__builtin_arm_rsrp ||
2979 BuiltinID == clang::ARM::BI__builtin_arm_wsr ||
2980 BuiltinID == clang::ARM::BI__builtin_arm_wsr64 ||
2981 BuiltinID == clang::ARM::BI__builtin_arm_wsrp) {
2984 if (BuiltinID == clang::ARM::BI__builtin_arm_rsr ||
2985 BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||
2986 BuiltinID == clang::ARM::BI__builtin_arm_rsrp)
2989 bool IsPointerBuiltin = BuiltinID == clang::ARM::BI__builtin_arm_rsrp ||
2990 BuiltinID == clang::ARM::BI__builtin_arm_wsrp;
2992 bool Is64Bit = BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||
2993 BuiltinID == clang::ARM::BI__builtin_arm_wsr64;
2995 llvm::Type *ValueType;
2997 if (IsPointerBuiltin) {
3000 }
else if (Is64Bit) {
3010 if (BuiltinID == ARM::BI__builtin_sponentry) {
3029 return P.first == BuiltinID;
3032 BuiltinID = It->second;
3036 unsigned ICEArguments = 0;
3041 auto getAlignmentValue32 = [&](
Address addr) ->
Value* {
3042 return Builder.getInt32(addr.getAlignment().getQuantity());
3049 unsigned NumArgs =
E->getNumArgs() - (HasExtraArg ? 1 : 0);
3050 for (
unsigned i = 0, e = NumArgs; i != e; i++) {
3052 switch (BuiltinID) {
3053 case NEON::BI__builtin_neon_vld1_v:
3054 case NEON::BI__builtin_neon_vld1q_v:
3055 case NEON::BI__builtin_neon_vld1q_lane_v:
3056 case NEON::BI__builtin_neon_vld1_lane_v:
3057 case NEON::BI__builtin_neon_vld1_dup_v:
3058 case NEON::BI__builtin_neon_vld1q_dup_v:
3059 case NEON::BI__builtin_neon_vst1_v:
3060 case NEON::BI__builtin_neon_vst1q_v:
3061 case NEON::BI__builtin_neon_vst1q_lane_v:
3062 case NEON::BI__builtin_neon_vst1_lane_v:
3063 case NEON::BI__builtin_neon_vst2_v:
3064 case NEON::BI__builtin_neon_vst2q_v:
3065 case NEON::BI__builtin_neon_vst2_lane_v:
3066 case NEON::BI__builtin_neon_vst2q_lane_v:
3067 case NEON::BI__builtin_neon_vst3_v:
3068 case NEON::BI__builtin_neon_vst3q_v:
3069 case NEON::BI__builtin_neon_vst3_lane_v:
3070 case NEON::BI__builtin_neon_vst3q_lane_v:
3071 case NEON::BI__builtin_neon_vst4_v:
3072 case NEON::BI__builtin_neon_vst4q_v:
3073 case NEON::BI__builtin_neon_vst4_lane_v:
3074 case NEON::BI__builtin_neon_vst4q_lane_v:
3083 switch (BuiltinID) {
3084 case NEON::BI__builtin_neon_vld2_v:
3085 case NEON::BI__builtin_neon_vld2q_v:
3086 case NEON::BI__builtin_neon_vld3_v:
3087 case NEON::BI__builtin_neon_vld3q_v:
3088 case NEON::BI__builtin_neon_vld4_v:
3089 case NEON::BI__builtin_neon_vld4q_v:
3090 case NEON::BI__builtin_neon_vld2_lane_v:
3091 case NEON::BI__builtin_neon_vld2q_lane_v:
3092 case NEON::BI__builtin_neon_vld3_lane_v:
3093 case NEON::BI__builtin_neon_vld3q_lane_v:
3094 case NEON::BI__builtin_neon_vld4_lane_v:
3095 case NEON::BI__builtin_neon_vld4q_lane_v:
3096 case NEON::BI__builtin_neon_vld2_dup_v:
3097 case NEON::BI__builtin_neon_vld2q_dup_v:
3098 case NEON::BI__builtin_neon_vld3_dup_v:
3099 case NEON::BI__builtin_neon_vld3q_dup_v:
3100 case NEON::BI__builtin_neon_vld4_dup_v:
3101 case NEON::BI__builtin_neon_vld4q_dup_v:
3113 switch (BuiltinID) {
3116 case NEON::BI__builtin_neon_vget_lane_i8:
3117 case NEON::BI__builtin_neon_vget_lane_i16:
3118 case NEON::BI__builtin_neon_vget_lane_i32:
3119 case NEON::BI__builtin_neon_vget_lane_i64:
3120 case NEON::BI__builtin_neon_vget_lane_bf16:
3121 case NEON::BI__builtin_neon_vget_lane_f32:
3122 case NEON::BI__builtin_neon_vgetq_lane_i8:
3123 case NEON::BI__builtin_neon_vgetq_lane_i16:
3124 case NEON::BI__builtin_neon_vgetq_lane_i32:
3125 case NEON::BI__builtin_neon_vgetq_lane_i64:
3126 case NEON::BI__builtin_neon_vgetq_lane_bf16:
3127 case NEON::BI__builtin_neon_vgetq_lane_f32:
3128 case NEON::BI__builtin_neon_vduph_lane_bf16:
3129 case NEON::BI__builtin_neon_vduph_laneq_bf16:
3130 return Builder.CreateExtractElement(Ops[0], Ops[1],
"vget_lane");
3132 case NEON::BI__builtin_neon_vrndns_f32: {
3134 llvm::Type *Tys[] = {Arg->
getType()};
3136 return Builder.CreateCall(F, {Arg},
"vrndn"); }
3138 case NEON::BI__builtin_neon_vset_lane_i8:
3139 case NEON::BI__builtin_neon_vset_lane_i16:
3140 case NEON::BI__builtin_neon_vset_lane_i32:
3141 case NEON::BI__builtin_neon_vset_lane_i64:
3142 case NEON::BI__builtin_neon_vset_lane_bf16:
3143 case NEON::BI__builtin_neon_vset_lane_f32:
3144 case NEON::BI__builtin_neon_vsetq_lane_i8:
3145 case NEON::BI__builtin_neon_vsetq_lane_i16:
3146 case NEON::BI__builtin_neon_vsetq_lane_i32:
3147 case NEON::BI__builtin_neon_vsetq_lane_i64:
3148 case NEON::BI__builtin_neon_vsetq_lane_bf16:
3149 case NEON::BI__builtin_neon_vsetq_lane_f32:
3150 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2],
"vset_lane");
3152 case NEON::BI__builtin_neon_vsha1h_u32:
3155 case NEON::BI__builtin_neon_vsha1cq_u32:
3158 case NEON::BI__builtin_neon_vsha1pq_u32:
3161 case NEON::BI__builtin_neon_vsha1mq_u32:
3165 case NEON::BI__builtin_neon_vcvth_bf16_f32: {
3172 case clang::ARM::BI_MoveToCoprocessor:
3173 case clang::ARM::BI_MoveToCoprocessor2: {
3175 ? Intrinsic::arm_mcr
3176 : Intrinsic::arm_mcr2);
3177 return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0],
3178 Ops[3], Ops[4], Ops[5]});
3183 assert(HasExtraArg);
3184 const Expr *Arg =
E->getArg(
E->getNumArgs()-1);
3185 std::optional<llvm::APSInt>
Result =
3190 if (BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_f ||
3191 BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_d) {
3194 if (BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_f)
3200 bool usgn =
Result->getZExtValue() == 1;
3201 unsigned Int = usgn ? Intrinsic::arm_vcvtru : Intrinsic::arm_vcvtr;
3205 return Builder.CreateCall(F, Ops,
"vcvtr");
3210 bool usgn =
Type.isUnsigned();
3211 bool rightShift =
false;
3213 llvm::FixedVectorType *VTy =
3216 llvm::Type *Ty = VTy;
3231 switch (BuiltinID) {
3232 default:
return nullptr;
3233 case NEON::BI__builtin_neon_vld1q_lane_v:
3236 if (VTy->getElementType()->isIntegerTy(64)) {
3238 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
3239 int Lane = cast<ConstantInt>(Ops[2])->getZExtValue();
3240 Value *SV = llvm::ConstantVector::get(ConstantInt::get(
Int32Ty, 1-Lane));
3241 Ops[1] =
Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
3243 Ty = llvm::FixedVectorType::get(VTy->getElementType(), 1);
3246 Value *Align = getAlignmentValue32(PtrOp0);
3249 int Indices[] = {1 - Lane, Lane};
3250 return Builder.CreateShuffleVector(Ops[1], Ld, Indices,
"vld1q_lane");
3253 case NEON::BI__builtin_neon_vld1_lane_v: {
3254 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
3257 return Builder.CreateInsertElement(Ops[1], Ld, Ops[2],
"vld1_lane");
3259 case NEON::BI__builtin_neon_vqrshrn_n_v:
3261 usgn ? Intrinsic::arm_neon_vqrshiftnu : Intrinsic::arm_neon_vqrshiftns;
3264 case NEON::BI__builtin_neon_vqrshrun_n_v:
3266 Ops,
"vqrshrun_n", 1,
true);
3267 case NEON::BI__builtin_neon_vqshrn_n_v:
3268 Int = usgn ? Intrinsic::arm_neon_vqshiftnu : Intrinsic::arm_neon_vqshiftns;
3271 case NEON::BI__builtin_neon_vqshrun_n_v:
3273 Ops,
"vqshrun_n", 1,
true);
3274 case NEON::BI__builtin_neon_vrecpe_v:
3275 case NEON::BI__builtin_neon_vrecpeq_v:
3278 case NEON::BI__builtin_neon_vrshrn_n_v:
3280 Ops,
"vrshrn_n", 1,
true);
3281 case NEON::BI__builtin_neon_vrsra_n_v:
3282 case NEON::BI__builtin_neon_vrsraq_n_v:
3283 Ops[0] =
Builder.CreateBitCast(Ops[0], Ty);
3284 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
3286 Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts;
3288 return Builder.CreateAdd(Ops[0], Ops[1],
"vrsra_n");
3289 case NEON::BI__builtin_neon_vsri_n_v:
3290 case NEON::BI__builtin_neon_vsriq_n_v:
3293 case NEON::BI__builtin_neon_vsli_n_v:
3294 case NEON::BI__builtin_neon_vsliq_n_v:
3298 case NEON::BI__builtin_neon_vsra_n_v:
3299 case NEON::BI__builtin_neon_vsraq_n_v:
3300 Ops[0] =
Builder.CreateBitCast(Ops[0], Ty);
3302 return Builder.CreateAdd(Ops[0], Ops[1]);
3303 case NEON::BI__builtin_neon_vst1q_lane_v:
3306 if (VTy->getElementType()->isIntegerTy(64)) {
3307 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
3308 Value *SV = llvm::ConstantVector::get(cast<llvm::Constant>(Ops[2]));
3309 Ops[1] =
Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
3310 Ops[2] = getAlignmentValue32(PtrOp0);
3311 llvm::Type *Tys[] = {
Int8PtrTy, Ops[1]->getType()};
3316 case NEON::BI__builtin_neon_vst1_lane_v: {
3317 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
3318 Ops[1] =
Builder.CreateExtractElement(Ops[1], Ops[2]);
3322 case NEON::BI__builtin_neon_vtbl1_v:
3325 case NEON::BI__builtin_neon_vtbl2_v:
3328 case NEON::BI__builtin_neon_vtbl3_v:
3331 case NEON::BI__builtin_neon_vtbl4_v:
3334 case NEON::BI__builtin_neon_vtbx1_v:
3337 case NEON::BI__builtin_neon_vtbx2_v:
3340 case NEON::BI__builtin_neon_vtbx3_v:
3343 case NEON::BI__builtin_neon_vtbx4_v:
3349template<
typename Integer>
3358 return Unsigned ? Builder.CreateZExt(
V,
T) : Builder.CreateSExt(
V,
T);
3368 unsigned LaneBits = cast<llvm::VectorType>(
V->getType())
3370 ->getPrimitiveSizeInBits();
3371 if (Shift == LaneBits) {
3376 return llvm::Constant::getNullValue(
V->getType());
3380 return Unsigned ? Builder.CreateLShr(
V, Shift) : Builder.CreateAShr(
V, Shift);
3387 unsigned Elements = 128 /
V->getType()->getPrimitiveSizeInBits();
3388 return Builder.CreateVectorSplat(Elements,
V);
3394 llvm::Type *DestType) {
3407 V->getType()->getScalarSizeInBits() != DestType->getScalarSizeInBits()) {
3408 return Builder.CreateCall(
3410 {DestType, V->getType()}),
3413 return Builder.CreateBitCast(
V, DestType);
3421 unsigned InputElements =
3422 cast<llvm::FixedVectorType>(
V->getType())->getNumElements();
3423 for (
unsigned i = 0; i < InputElements; i += 2)
3424 Indices.push_back(i + Odd);
3425 return Builder.CreateShuffleVector(
V, Indices);
3431 assert(V0->getType() == V1->getType() &&
"Can't zip different vector types");
3433 unsigned InputElements =
3434 cast<llvm::FixedVectorType>(V0->getType())->getNumElements();
3435 for (
unsigned i = 0; i < InputElements; i++) {
3436 Indices.push_back(i);
3437 Indices.push_back(i + InputElements);
3439 return Builder.CreateShuffleVector(V0, V1, Indices);
3442template<
unsigned HighBit,
unsigned OtherBits>
3446 llvm::Type *
T = cast<llvm::VectorType>(VT)->getElementType();
3447 unsigned LaneBits =
T->getPrimitiveSizeInBits();
3448 uint32_t
Value = HighBit << (LaneBits - 1);
3450 Value |= (1UL << (LaneBits - 1)) - 1;
3451 llvm::Value *Lane = llvm::ConstantInt::get(
T,
Value);
3457 unsigned ReverseWidth) {
3461 unsigned LaneSize =
V->getType()->getScalarSizeInBits();
3462 unsigned Elements = 128 / LaneSize;
3463 unsigned Mask = ReverseWidth / LaneSize - 1;
3464 for (
unsigned i = 0; i < Elements; i++)
3465 Indices.push_back(i ^ Mask);
3466 return Builder.CreateShuffleVector(
V, Indices);
3472 llvm::Triple::ArchType
Arch) {
3473 enum class CustomCodeGen { VLD24, VST24 } CustomCodeGenType;
3474 Intrinsic::ID IRIntr;
3475 unsigned NumVectors;
3478 switch (BuiltinID) {
3479 #include "clang/Basic/arm_mve_builtin_cg.inc"
3490 switch (CustomCodeGenType) {
3492 case CustomCodeGen::VLD24: {
3498 assert(MvecLType->isStructTy() &&
3499 "Return type for vld[24]q should be a struct");
3500 assert(MvecLType->getStructNumElements() == 1 &&
3501 "Return-type struct for vld[24]q should have one element");
3502 auto MvecLTypeInner = MvecLType->getStructElementType(0);
3503 assert(MvecLTypeInner->isArrayTy() &&
3504 "Return-type struct for vld[24]q should contain an array");
3505 assert(MvecLTypeInner->getArrayNumElements() == NumVectors &&
3506 "Array member of return-type struct vld[24]q has wrong length");
3507 auto VecLType = MvecLTypeInner->getArrayElementType();
3509 Tys.push_back(VecLType);
3511 auto Addr =
E->getArg(0);
3517 Value *MvecOut = PoisonValue::get(MvecLType);
3518 for (
unsigned i = 0; i < NumVectors; ++i) {
3519 Value *Vec =
Builder.CreateExtractValue(LoadResult, i);
3520 MvecOut =
Builder.CreateInsertValue(MvecOut, Vec, {0, i});
3529 case CustomCodeGen::VST24: {
3533 auto Addr =
E->getArg(0);
3537 auto MvecCType =
E->getArg(1)->
getType();
3539 assert(MvecLType->isStructTy() &&
"Data type for vst2q should be a struct");
3540 assert(MvecLType->getStructNumElements() == 1 &&
3541 "Data-type struct for vst2q should have one element");
3542 auto MvecLTypeInner = MvecLType->getStructElementType(0);
3543 assert(MvecLTypeInner->isArrayTy() &&
3544 "Data-type struct for vst2q should contain an array");
3545 assert(MvecLTypeInner->getArrayNumElements() == NumVectors &&
3546 "Array member of return-type struct vld[24]q has wrong length");
3547 auto VecLType = MvecLTypeInner->getArrayElementType();
3549 Tys.push_back(VecLType);
3554 for (
unsigned i = 0; i < NumVectors; i++)
3555 Ops.push_back(
Builder.CreateExtractValue(Mvec, {0, i}));
3558 Value *ToReturn =
nullptr;
3559 for (
unsigned i = 0; i < NumVectors; i++) {
3560 Ops.push_back(llvm::ConstantInt::get(
Int32Ty, i));
3561 ToReturn =
Builder.CreateCall(F, Ops);
3567 llvm_unreachable(
"unknown custom codegen type.");
3573 llvm::Triple::ArchType
Arch) {
3574 switch (BuiltinID) {
3577#include "clang/Basic/arm_cde_builtin_cg.inc"
3584 llvm::Triple::ArchType
Arch) {
3585 unsigned int Int = 0;
3586 const char *
s =
nullptr;
3588 switch (BuiltinID) {
3591 case NEON::BI__builtin_neon_vtbl1_v:
3592 case NEON::BI__builtin_neon_vqtbl1_v:
3593 case NEON::BI__builtin_neon_vqtbl1q_v:
3594 case NEON::BI__builtin_neon_vtbl2_v:
3595 case NEON::BI__builtin_neon_vqtbl2_v:
3596 case NEON::BI__builtin_neon_vqtbl2q_v:
3597 case NEON::BI__builtin_neon_vtbl3_v:
3598 case NEON::BI__builtin_neon_vqtbl3_v:
3599 case NEON::BI__builtin_neon_vqtbl3q_v:
3600 case NEON::BI__builtin_neon_vtbl4_v:
3601 case NEON::BI__builtin_neon_vqtbl4_v:
3602 case NEON::BI__builtin_neon_vqtbl4q_v:
3604 case NEON::BI__builtin_neon_vtbx1_v:
3605 case NEON::BI__builtin_neon_vqtbx1_v:
3606 case NEON::BI__builtin_neon_vqtbx1q_v:
3607 case NEON::BI__builtin_neon_vtbx2_v:
3608 case NEON::BI__builtin_neon_vqtbx2_v:
3609 case NEON::BI__builtin_neon_vqtbx2q_v:
3610 case NEON::BI__builtin_neon_vtbx3_v:
3611 case NEON::BI__builtin_neon_vqtbx3_v:
3612 case NEON::BI__builtin_neon_vqtbx3q_v:
3613 case NEON::BI__builtin_neon_vtbx4_v:
3614 case NEON::BI__builtin_neon_vqtbx4_v:
3615 case NEON::BI__builtin_neon_vqtbx4q_v:
3619 assert(
E->getNumArgs() >= 3);
3622 const Expr *Arg =
E->getArg(
E->getNumArgs() - 1);
3623 std::optional<llvm::APSInt>
Result =
3638 switch (BuiltinID) {
3639 case NEON::BI__builtin_neon_vtbl1_v: {
3641 Ty, Intrinsic::aarch64_neon_tbl1,
"vtbl1");
3643 case NEON::BI__builtin_neon_vtbl2_v: {
3645 Ty, Intrinsic::aarch64_neon_tbl1,
"vtbl1");
3647 case NEON::BI__builtin_neon_vtbl3_v: {
3649 Ty, Intrinsic::aarch64_neon_tbl2,
"vtbl2");
3651 case NEON::BI__builtin_neon_vtbl4_v: {
3653 Ty, Intrinsic::aarch64_neon_tbl2,
"vtbl2");
3655 case NEON::BI__builtin_neon_vtbx1_v: {
3658 Intrinsic::aarch64_neon_tbl1,
"vtbl1");
3660 llvm::Constant *EightV = ConstantInt::get(Ty, 8);
3661 Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[2], EightV);
3662 CmpRes = Builder.CreateSExt(CmpRes, Ty);
3664 Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
3665 Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
3666 return Builder.CreateOr(EltsFromInput, EltsFromTbl,
"vtbx");
3668 case NEON::BI__builtin_neon_vtbx2_v: {
3670 Ty, Intrinsic::aarch64_neon_tbx1,
"vtbx1");
3672 case NEON::BI__builtin_neon_vtbx3_v: {
3675 Intrinsic::aarch64_neon_tbl2,
"vtbl2");
3677 llvm::Constant *TwentyFourV = ConstantInt::get(Ty, 24);
3678 Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[4],
3680 CmpRes = Builder.CreateSExt(CmpRes, Ty);
3682 Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
3683 Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
3684 return Builder.CreateOr(EltsFromInput, EltsFromTbl,
"vtbx");
3686 case NEON::BI__builtin_neon_vtbx4_v: {
3688 Ty, Intrinsic::aarch64_neon_tbx2,
"vtbx2");
3690 case NEON::BI__builtin_neon_vqtbl1_v:
3691 case NEON::BI__builtin_neon_vqtbl1q_v:
3692 Int = Intrinsic::aarch64_neon_tbl1;
s =
"vtbl1";
break;
3693 case NEON::BI__builtin_neon_vqtbl2_v:
3694 case NEON::BI__builtin_neon_vqtbl2q_v: {
3695 Int = Intrinsic::aarch64_neon_tbl2;
s =
"vtbl2";
break;
3696 case NEON::BI__builtin_neon_vqtbl3_v:
3697 case NEON::BI__builtin_neon_vqtbl3q_v:
3698 Int = Intrinsic::aarch64_neon_tbl3;
s =
"vtbl3";
break;
3699 case NEON::BI__builtin_neon_vqtbl4_v:
3700 case NEON::BI__builtin_neon_vqtbl4q_v:
3701 Int = Intrinsic::aarch64_neon_tbl4;
s =
"vtbl4";
break;
3702 case NEON::BI__builtin_neon_vqtbx1_v:
3703 case NEON::BI__builtin_neon_vqtbx1q_v:
3704 Int = Intrinsic::aarch64_neon_tbx1;
s =
"vtbx1";
break;
3705 case NEON::BI__builtin_neon_vqtbx2_v:
3706 case NEON::BI__builtin_neon_vqtbx2q_v:
3707 Int = Intrinsic::aarch64_neon_tbx2;
s =
"vtbx2";
break;
3708 case NEON::BI__builtin_neon_vqtbx3_v:
3709 case NEON::BI__builtin_neon_vqtbx3q_v:
3710 Int = Intrinsic::aarch64_neon_tbx3;
s =
"vtbx3";
break;
3711 case NEON::BI__builtin_neon_vqtbx4_v:
3712 case NEON::BI__builtin_neon_vqtbx4q_v:
3713 Int = Intrinsic::aarch64_neon_tbx4;
s =
"vtbx4";
break;
3725 auto *VTy = llvm::FixedVectorType::get(
Int16Ty, 4);
3727 Value *
V = PoisonValue::get(VTy);
3728 llvm::Constant *CI = ConstantInt::get(
SizeTy, 0);
3729 Op =
Builder.CreateInsertElement(
V, Op, CI);
3738 case SVETypeFlags::MemEltTyDefault:
3740 case SVETypeFlags::MemEltTyInt8:
3742 case SVETypeFlags::MemEltTyInt16:
3744 case SVETypeFlags::MemEltTyInt32:
3746 case SVETypeFlags::MemEltTyInt64:
3749 llvm_unreachable(
"Unknown MemEltType");
3755 llvm_unreachable(
"Invalid SVETypeFlag!");
3757 case SVETypeFlags::EltTyMFloat8:
3758 case SVETypeFlags::EltTyInt8:
3760 case SVETypeFlags::EltTyInt16:
3762 case SVETypeFlags::EltTyInt32:
3764 case SVETypeFlags::EltTyInt64:
3766 case SVETypeFlags::EltTyInt128:
3769 case SVETypeFlags::EltTyFloat16:
3771 case SVETypeFlags::EltTyFloat32:
3773 case SVETypeFlags::EltTyFloat64:
3776 case SVETypeFlags::EltTyBFloat16:
3779 case SVETypeFlags::EltTyBool8:
3780 case SVETypeFlags::EltTyBool16:
3781 case SVETypeFlags::EltTyBool32:
3782 case SVETypeFlags::EltTyBool64:
3789llvm::ScalableVectorType *
3792 default: llvm_unreachable(
"Unhandled SVETypeFlag!");
3794 case SVETypeFlags::EltTyInt8:
3795 return llvm::ScalableVectorType::get(
Builder.getInt1Ty(), 16);
3796 case SVETypeFlags::EltTyInt16:
3797 return llvm::ScalableVectorType::get(
Builder.getInt1Ty(), 8);
3798 case SVETypeFlags::EltTyInt32:
3799 return llvm::ScalableVectorType::get(
Builder.getInt1Ty(), 4);
3800 case SVETypeFlags::EltTyInt64:
3801 return llvm::ScalableVectorType::get(
Builder.getInt1Ty(), 2);
3803 case SVETypeFlags::EltTyBFloat16:
3804 return llvm::ScalableVectorType::get(
Builder.getInt1Ty(), 8);
3805 case SVETypeFlags::EltTyFloat16:
3806 return llvm::ScalableVectorType::get(
Builder.getInt1Ty(), 8);
3807 case SVETypeFlags::EltTyFloat32:
3808 return llvm::ScalableVectorType::get(
Builder.getInt1Ty(), 4);
3809 case SVETypeFlags::EltTyFloat64:
3810 return llvm::ScalableVectorType::get(
Builder.getInt1Ty(), 2);
3812 case SVETypeFlags::EltTyBool8:
3813 return llvm::ScalableVectorType::get(
Builder.getInt1Ty(), 16);
3814 case SVETypeFlags::EltTyBool16:
3815 return llvm::ScalableVectorType::get(
Builder.getInt1Ty(), 8);
3816 case SVETypeFlags::EltTyBool32:
3817 return llvm::ScalableVectorType::get(
Builder.getInt1Ty(), 4);
3818 case SVETypeFlags::EltTyBool64:
3819 return llvm::ScalableVectorType::get(
Builder.getInt1Ty(), 2);
3824llvm::ScalableVectorType *
3828 llvm_unreachable(
"Invalid SVETypeFlag!");
3830 case SVETypeFlags::EltTyInt8:
3831 return llvm::ScalableVectorType::get(
Builder.getInt8Ty(), 16);
3832 case SVETypeFlags::EltTyInt16:
3833 return llvm::ScalableVectorType::get(
Builder.getInt16Ty(), 8);
3834 case SVETypeFlags::EltTyInt32:
3835 return llvm::ScalableVectorType::get(
Builder.getInt32Ty(), 4);
3836 case SVETypeFlags::EltTyInt64:
3837 return llvm::ScalableVectorType::get(
Builder.getInt64Ty(), 2);
3839 case SVETypeFlags::EltTyMFloat8:
3840 return llvm::ScalableVectorType::get(
Builder.getInt8Ty(), 16);
3841 case SVETypeFlags::EltTyFloat16:
3842 return llvm::ScalableVectorType::get(
Builder.getHalfTy(), 8);
3843 case SVETypeFlags::EltTyBFloat16:
3844 return llvm::ScalableVectorType::get(
Builder.getBFloatTy(), 8);
3845 case SVETypeFlags::EltTyFloat32:
3846 return llvm::ScalableVectorType::get(
Builder.getFloatTy(), 4);
3847 case SVETypeFlags::EltTyFloat64:
3848 return llvm::ScalableVectorType::get(
Builder.getDoubleTy(), 2);
3850 case SVETypeFlags::EltTyBool8:
3851 return llvm::ScalableVectorType::get(
Builder.getInt1Ty(), 16);
3852 case SVETypeFlags::EltTyBool16:
3853 return llvm::ScalableVectorType::get(
Builder.getInt1Ty(), 8);
3854 case SVETypeFlags::EltTyBool32:
3855 return llvm::ScalableVectorType::get(
Builder.getInt1Ty(), 4);
3856 case SVETypeFlags::EltTyBool64:
3857 return llvm::ScalableVectorType::get(
Builder.getInt1Ty(), 2);
3872 return llvm::ScalableVectorType::get(EltTy, NumElts);
3878 llvm::ScalableVectorType *VTy) {
3880 if (isa<TargetExtType>(Pred->
getType()) &&
3881 cast<TargetExtType>(Pred->
getType())->getName() ==
"aarch64.svcount")
3884 auto *RTy = llvm::VectorType::get(IntegerType::get(
getLLVMContext(), 1), VTy);
3889 llvm::Type *IntrinsicTy;
3890 switch (VTy->getMinNumElements()) {
3892 llvm_unreachable(
"unsupported element count!");
3897 IntID = Intrinsic::aarch64_sve_convert_from_svbool;
3901 IntID = Intrinsic::aarch64_sve_convert_to_svbool;
3902 IntrinsicTy = Pred->
getType();
3908 assert(
C->getType() == RTy &&
"Unexpected return type!");
3913 llvm::StructType *Ty) {
3914 if (PredTuple->
getType() == Ty)
3917 Value *Ret = llvm::PoisonValue::get(Ty);
3918 for (
unsigned I = 0; I < Ty->getNumElements(); ++I) {
3919 Value *Pred =
Builder.CreateExtractValue(PredTuple, I);
3921 Pred, cast<llvm::ScalableVectorType>(Ty->getTypeAtIndex(I)));
3922 Ret =
Builder.CreateInsertValue(Ret, Pred, I);
3932 auto *OverloadedTy =
3936 if (Ops[1]->getType()->isVectorTy())
3956 Ops[0], cast<llvm::ScalableVectorType>(F->getArg(0)->getType()));
3961 if (Ops.size() == 2) {
3962 assert(Ops[1]->getType()->isVectorTy() &&
"Scalar base requires an offset");
3963 Ops.push_back(ConstantInt::get(
Int64Ty, 0));
3968 if (!TypeFlags.
isByteIndexed() && Ops[1]->getType()->isVectorTy()) {
3969 unsigned BytesPerElt =
3970 OverloadedTy->getElementType()->getScalarSizeInBits() / 8;
3971 Ops[2] =
Builder.CreateShl(Ops[2], Log2_32(BytesPerElt));
3986 auto *OverloadedTy =
3991 Ops.insert(Ops.begin(), Ops.pop_back_val());
3994 if (Ops[2]->getType()->isVectorTy())
4009 if (Ops.size() == 3) {
4010 assert(Ops[1]->getType()->isVectorTy() &&
"Scalar base requires an offset");
4011 Ops.push_back(ConstantInt::get(
Int64Ty, 0));
4016 Ops[0] =
Builder.CreateTrunc(Ops[0], OverloadedTy);
4026 Ops[1], cast<llvm::ScalableVectorType>(F->getArg(1)->getType()));
4030 if (!TypeFlags.
isByteIndexed() && Ops[2]->getType()->isVectorTy()) {
4031 unsigned BytesPerElt =
4032 OverloadedTy->getElementType()->getScalarSizeInBits() / 8;
4033 Ops[3] =
Builder.CreateShl(Ops[3], Log2_32(BytesPerElt));
4036 return Builder.CreateCall(F, Ops);
4044 auto *OverloadedTy = dyn_cast<llvm::ScalableVectorType>(Ops[1]->getType());
4046 OverloadedTy = cast<llvm::ScalableVectorType>(Ops[2]->getType());
4052 if (Ops[1]->getType()->isVectorTy()) {
4053 if (Ops.size() == 3) {
4055 Ops.push_back(ConstantInt::get(
Int64Ty, 0));
4058 std::swap(Ops[2], Ops[3]);
4062 unsigned BytesPerElt = MemEltTy->getPrimitiveSizeInBits() / 8;
4063 if (BytesPerElt > 1)
4064 Ops[2] =
Builder.CreateShl(Ops[2], Log2_32(BytesPerElt));
4069 return Builder.CreateCall(F, Ops);
4075 llvm::ScalableVectorType *VTy =
getSVEType(TypeFlags);
4077 Value *BasePtr = Ops[1];
4084 return Builder.CreateCall(F, {Predicate, BasePtr});
4090 llvm::ScalableVectorType *VTy =
getSVEType(TypeFlags);
4094 case Intrinsic::aarch64_sve_st2:
4095 case Intrinsic::aarch64_sve_st1_pn_x2:
4096 case Intrinsic::aarch64_sve_stnt1_pn_x2:
4097 case Intrinsic::aarch64_sve_st2q:
4100 case Intrinsic::aarch64_sve_st3:
4101 case Intrinsic::aarch64_sve_st3q:
4104 case Intrinsic::aarch64_sve_st4:
4105 case Intrinsic::aarch64_sve_st1_pn_x4:
4106 case Intrinsic::aarch64_sve_stnt1_pn_x4:
4107 case Intrinsic::aarch64_sve_st4q:
4111 llvm_unreachable(
"unknown intrinsic!");
4115 Value *BasePtr = Ops[1];
4118 if (Ops.size() > (2 + N))
4124 for (
unsigned I = Ops.size() - N; I < Ops.size(); ++I)
4125 Operands.push_back(Ops[I]);
4126 Operands.append({Predicate, BasePtr});
4129 return Builder.CreateCall(F, Operands);
4137 unsigned BuiltinID) {
4149 llvm::ScalableVectorType *Ty =
getSVEType(TypeFlags);
4155 llvm::Type *OverloadedTy =
getSVEType(TypeFlags);
4162 unsigned BuiltinID) {
4165 auto *MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy);
4168 Value *BasePtr = Ops[1];
4174 Value *PrfOp = Ops.back();
4177 return Builder.CreateCall(F, {Predicate, BasePtr, PrfOp});
4181 llvm::Type *ReturnTy,
4183 unsigned IntrinsicID,
4184 bool IsZExtReturn) {
4191 if (MemEltTy->isVectorTy()) {
4192 assert(MemEltTy == FixedVectorType::get(
Int8Ty, 1) &&
4193 "Only <1 x i8> expected");
4194 MemEltTy = cast<llvm::VectorType>(MemEltTy)->getElementType();
4199 auto VectorTy = cast<llvm::ScalableVectorType>(ReturnTy);
4200 llvm::ScalableVectorType *MemoryTy =
nullptr;
4201 llvm::ScalableVectorType *PredTy =
nullptr;
4202 bool IsQuadLoad =
false;
4203 switch (IntrinsicID) {
4204 case Intrinsic::aarch64_sve_ld1uwq:
4205 case Intrinsic::aarch64_sve_ld1udq:
4206 MemoryTy = llvm::ScalableVectorType::get(MemEltTy, 1);
4207 PredTy = llvm::ScalableVectorType::get(
4212 MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy);
4218 Value *BasePtr = Ops[1];
4226 cast<llvm::Instruction>(
Builder.CreateCall(F, {Predicate, BasePtr}));
4233 return IsZExtReturn ?
Builder.CreateZExt(Load, VectorTy)
4234 :
Builder.CreateSExt(Load, VectorTy);
4239 unsigned IntrinsicID) {
4246 if (MemEltTy->isVectorTy()) {
4247 assert(MemEltTy == FixedVectorType::get(
Int8Ty, 1) &&
4248 "Only <1 x i8> expected");
4249 MemEltTy = cast<llvm::VectorType>(MemEltTy)->getElementType();
4254 auto VectorTy = cast<llvm::ScalableVectorType>(Ops.back()->getType());
4255 auto MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy);
4257 auto PredTy = MemoryTy;
4258 auto AddrMemoryTy = MemoryTy;
4259 bool IsQuadStore =
false;
4261 switch (IntrinsicID) {
4262 case Intrinsic::aarch64_sve_st1wq:
4263 case Intrinsic::aarch64_sve_st1dq:
4264 AddrMemoryTy = llvm::ScalableVectorType::get(MemEltTy, 1);
4266 llvm::ScalableVectorType::get(IntegerType::get(
getLLVMContext(), 1), 1);
4273 Value *BasePtr = Ops[1];
4276 if (Ops.size() == 4)
4281 IsQuadStore ? Ops.back() :
Builder.CreateTrunc(Ops.back(), MemoryTy);
4286 cast<llvm::Instruction>(
Builder.CreateCall(F, {Val, Predicate, BasePtr}));
4299 NewOps.push_back(Ops[2]);
4301 llvm::Value *BasePtr = Ops[3];
4302 llvm::Value *RealSlice = Ops[1];
4305 if (Ops.size() == 5) {
4308 llvm::Value *StreamingVectorLengthCall =
4309 Builder.CreateCall(StreamingVectorLength);
4310 llvm::Value *Mulvl =
4311 Builder.CreateMul(StreamingVectorLengthCall, Ops[4],
"mulvl");
4315 RealSlice =
Builder.CreateAdd(RealSlice, Ops[4]);
4318 NewOps.push_back(BasePtr);
4319 NewOps.push_back(Ops[0]);
4320 NewOps.push_back(RealSlice);
4322 return Builder.CreateCall(F, NewOps);
4334 return Builder.CreateCall(F, Ops);
4341 if (Ops.size() == 0)
4342 Ops.push_back(llvm::ConstantInt::get(
Int32Ty, 255));
4344 return Builder.CreateCall(F, Ops);
4350 if (Ops.size() == 2)
4351 Ops.push_back(
Builder.getInt32(0));
4355 return Builder.CreateCall(F, Ops);
4361 return Builder.CreateVectorSplat(
4362 cast<llvm::VectorType>(Ty)->getElementCount(), Scalar);
4366 if (
auto *Ty = Scalar->getType(); Ty->isVectorTy()) {
4368 auto *VecTy = cast<llvm::VectorType>(Ty);
4369 ElementCount EC = VecTy->getElementCount();
4370 assert(EC.isScalar() && VecTy->getElementType() ==
Int8Ty &&
4371 "Only <1 x i8> expected");
4373 Scalar =
Builder.CreateExtractElement(Scalar, uint64_t(0));
4386 if (
auto *StructTy = dyn_cast<StructType>(Ty)) {
4387 Value *Tuple = llvm::PoisonValue::get(Ty);
4389 for (
unsigned I = 0; I < StructTy->getNumElements(); ++I) {
4391 Value *Out =
Builder.CreateBitCast(In, StructTy->getTypeAtIndex(I));
4392 Tuple =
Builder.CreateInsertValue(Tuple, Out, I);
4398 return Builder.CreateBitCast(Val, Ty);
4403 auto *SplatZero = Constant::getNullValue(Ty);
4404 Ops.insert(Ops.begin(), SplatZero);
4409 auto *SplatUndef = UndefValue::get(Ty);
4410 Ops.insert(Ops.begin(), SplatUndef);
4415 llvm::Type *ResultType,
4420 llvm::Type *DefaultType =
getSVEType(TypeFlags);
4423 return {DefaultType, Ops[1]->getType()};
4429 return {Ops[0]->getType(), Ops.back()->getType()};
4431 if (TypeFlags.
isReductionQV() && !ResultType->isScalableTy() &&
4432 ResultType->isVectorTy())
4433 return {ResultType, Ops[1]->getType()};
4436 return {DefaultType};
4442 "Expects TypleFlags.isTupleSet() or TypeFlags.isTupleGet()");
4443 unsigned Idx = cast<ConstantInt>(Ops[1])->getZExtValue();
4446 return Builder.CreateInsertValue(Ops[0], Ops[2], Idx);
4447 return Builder.CreateExtractValue(Ops[0], Idx);
4453 assert(TypeFlags.
isTupleCreate() &&
"Expects TypleFlag isTupleCreate");
4455 Value *Tuple = llvm::PoisonValue::get(Ty);
4456 for (
unsigned Idx = 0; Idx < Ops.size(); Idx++)
4457 Tuple =
Builder.CreateInsertValue(Tuple, Ops[Idx], Idx);
4466 unsigned ICEArguments = 0;
4475 for (
unsigned i = 0, e =
E->getNumArgs(); i != e; i++) {
4476 bool IsICE = ICEArguments & (1 << i);
4482 std::optional<llvm::APSInt>
Result =
4484 assert(
Result &&
"Expected argument to be a constant");
4494 if (isa<StructType>(Arg->getType()) && !IsTupleGetOrSet) {
4495 for (
unsigned I = 0; I < Arg->getType()->getStructNumElements(); ++I)
4496 Ops.push_back(
Builder.CreateExtractValue(Arg, I));
4508 if (BuiltinID >= SVE::BI__builtin_sve_reinterpret_s8_s8 &&
4509 BuiltinID <= SVE::BI__builtin_sve_reinterpret_f64_f64_x4) {
4543 return UndefValue::get(Ty);
4544 else if (
Builtin->LLVMIntrinsic != 0) {
4548 Ops.pop_back_val());
4549 if (TypeFlags.
getMergeType() == SVETypeFlags::MergeZeroExp)
4552 if (TypeFlags.
getMergeType() == SVETypeFlags::MergeAnyExp)
4558 Ops.push_back(
Builder.getInt32( 31));
4560 Ops.insert(&Ops[1],
Builder.getInt32( 31));
4563 for (
Value *&Op : Ops)
4564 if (
auto PredTy = dyn_cast<llvm::VectorType>(Op->getType()))
4565 if (PredTy->getElementType()->isIntegerTy(1))
4575 std::swap(Ops[1], Ops[2]);
4577 std::swap(Ops[1], Ops[2]);
4580 std::swap(Ops[1], Ops[2]);
4583 std::swap(Ops[1], Ops[3]);
4586 if (TypeFlags.
getMergeType() == SVETypeFlags::MergeZero) {
4587 llvm::Type *OpndTy = Ops[1]->getType();
4588 auto *SplatZero = Constant::getNullValue(OpndTy);
4589 Ops[1] =
Builder.CreateSelect(Ops[0], Ops[1], SplatZero);
4596 if (
Call->getType() == Ty)
4600 if (
auto PredTy = dyn_cast<llvm::ScalableVectorType>(Ty))
4602 if (
auto PredTupleTy = dyn_cast<llvm::StructType>(Ty))
4605 llvm_unreachable(
"unsupported element count!");
4608 switch (BuiltinID) {
4612 case SVE::BI__builtin_sve_svreinterpret_b: {
4616 CGM.
getIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool, SVCountTy);
4617 return Builder.CreateCall(CastFromSVCountF, Ops[0]);
4619 case SVE::BI__builtin_sve_svreinterpret_c: {
4623 CGM.
getIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool, SVCountTy);
4624 return Builder.CreateCall(CastToSVCountF, Ops[0]);
4627 case SVE::BI__builtin_sve_svpsel_lane_b8:
4628 case SVE::BI__builtin_sve_svpsel_lane_b16:
4629 case SVE::BI__builtin_sve_svpsel_lane_b32:
4630 case SVE::BI__builtin_sve_svpsel_lane_b64:
4631 case SVE::BI__builtin_sve_svpsel_lane_c8:
4632 case SVE::BI__builtin_sve_svpsel_lane_c16:
4633 case SVE::BI__builtin_sve_svpsel_lane_c32:
4634 case SVE::BI__builtin_sve_svpsel_lane_c64: {
4635 bool IsSVCount = isa<TargetExtType>(Ops[0]->getType());
4636 assert(((!IsSVCount || cast<TargetExtType>(Ops[0]->getType())->getName() ==
4637 "aarch64.svcount")) &&
4638 "Unexpected TargetExtType");
4642 CGM.
getIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool, SVCountTy);
4644 CGM.
getIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool, SVCountTy);
4649 IsSVCount ?
Builder.CreateCall(CastFromSVCountF, Ops[0]) : Ops[0];
4651 llvm::Value *PSel =
Builder.CreateCall(F, {Ops0, Ops1, Ops[2]});
4652 return IsSVCount ?
Builder.CreateCall(CastToSVCountF, PSel) : PSel;
4654 case SVE::BI__builtin_sve_svmov_b_z: {
4657 llvm::Type* OverloadedTy =
getSVEType(TypeFlags);
4659 return Builder.CreateCall(F, {Ops[0], Ops[1], Ops[1]});
4662 case SVE::BI__builtin_sve_svnot_b_z: {
4665 llvm::Type* OverloadedTy =
getSVEType(TypeFlags);
4667 return Builder.CreateCall(F, {Ops[0], Ops[1], Ops[0]});
4670 case SVE::BI__builtin_sve_svmovlb_u16:
4671 case SVE::BI__builtin_sve_svmovlb_u32:
4672 case SVE::BI__builtin_sve_svmovlb_u64:
4673 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_ushllb);
4675 case SVE::BI__builtin_sve_svmovlb_s16:
4676 case SVE::BI__builtin_sve_svmovlb_s32:
4677 case SVE::BI__builtin_sve_svmovlb_s64:
4678 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_sshllb);
4680 case SVE::BI__builtin_sve_svmovlt_u16:
4681 case SVE::BI__builtin_sve_svmovlt_u32:
4682 case SVE::BI__builtin_sve_svmovlt_u64:
4683 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_ushllt);
4685 case SVE::BI__builtin_sve_svmovlt_s16:
4686 case SVE::BI__builtin_sve_svmovlt_s32:
4687 case SVE::BI__builtin_sve_svmovlt_s64:
4688 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_sshllt);
4690 case SVE::BI__builtin_sve_svpmullt_u16:
4691 case SVE::BI__builtin_sve_svpmullt_u64:
4692 case SVE::BI__builtin_sve_svpmullt_n_u16:
4693 case SVE::BI__builtin_sve_svpmullt_n_u64:
4694 return EmitSVEPMull(TypeFlags, Ops, Intrinsic::aarch64_sve_pmullt_pair);
4696 case SVE::BI__builtin_sve_svpmullb_u16:
4697 case SVE::BI__builtin_sve_svpmullb_u64:
4698 case SVE::BI__builtin_sve_svpmullb_n_u16:
4699 case SVE::BI__builtin_sve_svpmullb_n_u64:
4700 return EmitSVEPMull(TypeFlags, Ops, Intrinsic::aarch64_sve_pmullb_pair);
4702 case SVE::BI__builtin_sve_svdup_n_b8:
4703 case SVE::BI__builtin_sve_svdup_n_b16:
4704 case SVE::BI__builtin_sve_svdup_n_b32:
4705 case SVE::BI__builtin_sve_svdup_n_b64: {
4707 Builder.CreateICmpNE(Ops[0], Constant::getNullValue(Ops[0]->getType()));
4708 llvm::ScalableVectorType *OverloadedTy =
getSVEType(TypeFlags);
4713 case SVE::BI__builtin_sve_svdupq_n_b8:
4714 case SVE::BI__builtin_sve_svdupq_n_b16:
4715 case SVE::BI__builtin_sve_svdupq_n_b32:
4716 case SVE::BI__builtin_sve_svdupq_n_b64:
4717 case SVE::BI__builtin_sve_svdupq_n_u8:
4718 case SVE::BI__builtin_sve_svdupq_n_s8:
4719 case SVE::BI__builtin_sve_svdupq_n_u64:
4720 case SVE::BI__builtin_sve_svdupq_n_f64:
4721 case SVE::BI__builtin_sve_svdupq_n_s64:
4722 case SVE::BI__builtin_sve_svdupq_n_u16:
4723 case SVE::BI__builtin_sve_svdupq_n_f16:
4724 case SVE::BI__builtin_sve_svdupq_n_bf16:
4725 case SVE::BI__builtin_sve_svdupq_n_s16:
4726 case SVE::BI__builtin_sve_svdupq_n_u32:
4727 case SVE::BI__builtin_sve_svdupq_n_f32:
4728 case SVE::BI__builtin_sve_svdupq_n_s32: {
4731 unsigned NumOpnds = Ops.size();
4734 cast<llvm::VectorType>(Ty)->getElementType()->isIntegerTy(1);
4739 llvm::Type *EltTy = Ops[0]->getType();
4744 for (
unsigned I = 0; I < NumOpnds; ++I)
4745 VecOps.push_back(
Builder.CreateZExt(Ops[I], EltTy));
4750 OverloadedTy, PoisonValue::get(OverloadedTy), Vec, uint64_t(0));
4765 : Intrinsic::aarch64_sve_cmpne_wide,
4772 case SVE::BI__builtin_sve_svpfalse_b:
4773 return ConstantInt::getFalse(Ty);
4775 case SVE::BI__builtin_sve_svpfalse_c: {
4776 auto SVBoolTy = ScalableVectorType::get(
Builder.getInt1Ty(), 16);
4779 return Builder.CreateCall(CastToSVCountF, ConstantInt::getFalse(SVBoolTy));
4782 case SVE::BI__builtin_sve_svlen_bf16:
4783 case SVE::BI__builtin_sve_svlen_f16:
4784 case SVE::BI__builtin_sve_svlen_f32:
4785 case SVE::BI__builtin_sve_svlen_f64:
4786 case SVE::BI__builtin_sve_svlen_s8:
4787 case SVE::BI__builtin_sve_svlen_s16:
4788 case SVE::BI__builtin_sve_svlen_s32:
4789 case SVE::BI__builtin_sve_svlen_s64:
4790 case SVE::BI__builtin_sve_svlen_u8:
4791 case SVE::BI__builtin_sve_svlen_u16:
4792 case SVE::BI__builtin_sve_svlen_u32:
4793 case SVE::BI__builtin_sve_svlen_u64: {
4798 case SVE::BI__builtin_sve_svtbl2_u8:
4799 case SVE::BI__builtin_sve_svtbl2_s8:
4800 case SVE::BI__builtin_sve_svtbl2_u16:
4801 case SVE::BI__builtin_sve_svtbl2_s16:
4802 case SVE::BI__builtin_sve_svtbl2_u32:
4803 case SVE::BI__builtin_sve_svtbl2_s32:
4804 case SVE::BI__builtin_sve_svtbl2_u64:
4805 case SVE::BI__builtin_sve_svtbl2_s64:
4806 case SVE::BI__builtin_sve_svtbl2_f16:
4807 case SVE::BI__builtin_sve_svtbl2_bf16:
4808 case SVE::BI__builtin_sve_svtbl2_f32:
4809 case SVE::BI__builtin_sve_svtbl2_f64: {
4812 return Builder.CreateCall(F, Ops);
4815 case SVE::BI__builtin_sve_svset_neonq_s8:
4816 case SVE::BI__builtin_sve_svset_neonq_s16:
4817 case SVE::BI__builtin_sve_svset_neonq_s32:
4818 case SVE::BI__builtin_sve_svset_neonq_s64:
4819 case SVE::BI__builtin_sve_svset_neonq_u8:
4820 case SVE::BI__builtin_sve_svset_neonq_u16:
4821 case SVE::BI__builtin_sve_svset_neonq_u32:
4822 case SVE::BI__builtin_sve_svset_neonq_u64:
4823 case SVE::BI__builtin_sve_svset_neonq_f16:
4824 case SVE::BI__builtin_sve_svset_neonq_f32:
4825 case SVE::BI__builtin_sve_svset_neonq_f64:
4826 case SVE::BI__builtin_sve_svset_neonq_bf16: {
4827 return Builder.CreateInsertVector(Ty, Ops[0], Ops[1], uint64_t(0));
4830 case SVE::BI__builtin_sve_svget_neonq_s8:
4831 case SVE::BI__builtin_sve_svget_neonq_s16:
4832 case SVE::BI__builtin_sve_svget_neonq_s32:
4833 case SVE::BI__builtin_sve_svget_neonq_s64:
4834 case SVE::BI__builtin_sve_svget_neonq_u8:
4835 case SVE::BI__builtin_sve_svget_neonq_u16:
4836 case SVE::BI__builtin_sve_svget_neonq_u32:
4837 case SVE::BI__builtin_sve_svget_neonq_u64:
4838 case SVE::BI__builtin_sve_svget_neonq_f16:
4839 case SVE::BI__builtin_sve_svget_neonq_f32:
4840 case SVE::BI__builtin_sve_svget_neonq_f64:
4841 case SVE::BI__builtin_sve_svget_neonq_bf16: {
4842 return Builder.CreateExtractVector(Ty, Ops[0], uint64_t(0));
4845 case SVE::BI__builtin_sve_svdup_neonq_s8:
4846 case SVE::BI__builtin_sve_svdup_neonq_s16:
4847 case SVE::BI__builtin_sve_svdup_neonq_s32:
4848 case SVE::BI__builtin_sve_svdup_neonq_s64:
4849 case SVE::BI__builtin_sve_svdup_neonq_u8:
4850 case SVE::BI__builtin_sve_svdup_neonq_u16:
4851 case SVE::BI__builtin_sve_svdup_neonq_u32:
4852 case SVE::BI__builtin_sve_svdup_neonq_u64:
4853 case SVE::BI__builtin_sve_svdup_neonq_f16:
4854 case SVE::BI__builtin_sve_svdup_neonq_f32:
4855 case SVE::BI__builtin_sve_svdup_neonq_f64:
4856 case SVE::BI__builtin_sve_svdup_neonq_bf16: {
4857 Value *Insert =
Builder.CreateInsertVector(Ty, PoisonValue::get(Ty), Ops[0],
4859 return Builder.CreateIntrinsic(Intrinsic::aarch64_sve_dupq_lane, {Ty},
4860 {Insert,
Builder.getInt64(0)});
4871 switch (BuiltinID) {
4874 case SME::BI__builtin_sme_svsumla_za32_s8_vg4x1:
4877 case SME::BI__builtin_sme_svsumla_za32_s8_vg4x2:
4878 case SME::BI__builtin_sme_svsudot_za32_s8_vg1x2:
4881 case SME::BI__builtin_sme_svsudot_za32_s8_vg1x4:
4882 case SME::BI__builtin_sme_svsumla_za32_s8_vg4x4:
4888 for (
unsigned I = 0; I < MultiVec; ++I)
4889 std::swap(Ops[I + 1], Ops[I + 1 + MultiVec]);
4905 else if (BuiltinID == SME::BI__builtin_sme_svzero_mask_za ||
4906 BuiltinID == SME::BI__builtin_sme_svzero_za)
4908 else if (BuiltinID == SME::BI__builtin_sme_svldr_vnum_za ||
4909 BuiltinID == SME::BI__builtin_sme_svstr_vnum_za ||
4910 BuiltinID == SME::BI__builtin_sme_svldr_za ||
4911 BuiltinID == SME::BI__builtin_sme_svstr_za)
4917 Ops.pop_back_val());
4922 if (
Builtin->LLVMIntrinsic == 0)
4926 for (
Value *&Op : Ops)
4927 if (
auto PredTy = dyn_cast<llvm::VectorType>(Op->getType()))
4928 if (PredTy->getElementType()->isIntegerTy(1))
4936 return Builder.CreateCall(F, Ops);
4943 llvm::Metadata *Ops[] = {llvm::MDString::get(Context,
"x18")};
4944 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
4945 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
4948 llvm::Value *X18 = CGF.
Builder.CreateCall(F, Metadata);
4954 llvm::Triple::ArchType
Arch) {
4963 if (BuiltinID == Builtin::BI__builtin_cpu_supports)
4964 return EmitAArch64CpuSupports(
E);
4966 unsigned HintID =
static_cast<unsigned>(-1);
4967 switch (BuiltinID) {
4969 case clang::AArch64::BI__builtin_arm_nop:
4972 case clang::AArch64::BI__builtin_arm_yield:
4973 case clang::AArch64::BI__yield:
4976 case clang::AArch64::BI__builtin_arm_wfe:
4977 case clang::AArch64::BI__wfe:
4980 case clang::AArch64::BI__builtin_arm_wfi:
4981 case clang::AArch64::BI__wfi:
4984 case clang::AArch64::BI__builtin_arm_sev:
4985 case clang::AArch64::BI__sev:
4988 case clang::AArch64::BI__builtin_arm_sevl:
4989 case clang::AArch64::BI__sevl:
4994 if (HintID !=
static_cast<unsigned>(-1)) {
4996 return Builder.CreateCall(F, llvm::ConstantInt::get(
Int32Ty, HintID));
4999 if (BuiltinID == clang::AArch64::BI__builtin_arm_trap) {
5005 if (BuiltinID == clang::AArch64::BI__builtin_arm_get_sme_state) {
5010 "__arm_sme_state"));
5012 "aarch64_pstate_sm_compatible");
5013 CI->setAttributes(Attrs);
5016 AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2);
5023 if (BuiltinID == clang::AArch64::BI__builtin_arm_rbit) {
5025 "rbit of unusual size!");
5028 CGM.
getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg,
"rbit");
5030 if (BuiltinID == clang::AArch64::BI__builtin_arm_rbit64) {
5032 "rbit of unusual size!");
5035 CGM.
getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg,
"rbit");
5038 if (BuiltinID == clang::AArch64::BI__builtin_arm_clz ||
5039 BuiltinID == clang::AArch64::BI__builtin_arm_clz64) {
5043 if (BuiltinID == clang::AArch64::BI__builtin_arm_clz64)
5048 if (BuiltinID == clang::AArch64::BI__builtin_arm_cls) {
5053 if (BuiltinID == clang::AArch64::BI__builtin_arm_cls64) {
5059 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint32zf ||
5060 BuiltinID == clang::AArch64::BI__builtin_arm_rint32z) {
5062 llvm::Type *Ty = Arg->getType();
5067 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint64zf ||
5068 BuiltinID == clang::AArch64::BI__builtin_arm_rint64z) {
5070 llvm::Type *Ty = Arg->getType();
5075 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint32xf ||
5076 BuiltinID == clang::AArch64::BI__builtin_arm_rint32x) {
5078 llvm::Type *Ty = Arg->getType();
5083 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint64xf ||
5084 BuiltinID == clang::AArch64::BI__builtin_arm_rint64x) {
5086 llvm::Type *Ty = Arg->getType();
5091 if (BuiltinID == clang::AArch64::BI__builtin_arm_jcvt) {
5093 "__jcvt of unusual size!");
5099 if (BuiltinID == clang::AArch64::BI__builtin_arm_ld64b ||
5100 BuiltinID == clang::AArch64::BI__builtin_arm_st64b ||
5101 BuiltinID == clang::AArch64::BI__builtin_arm_st64bv ||
5102 BuiltinID == clang::AArch64::BI__builtin_arm_st64bv0) {
5106 if (BuiltinID == clang::AArch64::BI__builtin_arm_ld64b) {
5110 llvm::Value *Val =
Builder.CreateCall(F, MemAddr);
5112 for (
size_t i = 0; i < 8; i++) {
5113 llvm::Value *ValOffsetPtr =
5124 Args.push_back(MemAddr);
5125 for (
size_t i = 0; i < 8; i++) {
5126 llvm::Value *ValOffsetPtr =
5133 auto Intr = (BuiltinID == clang::AArch64::BI__builtin_arm_st64b
5134 ? Intrinsic::aarch64_st64b
5135 : BuiltinID == clang::AArch64::BI__builtin_arm_st64bv
5136 ? Intrinsic::aarch64_st64bv
5137 : Intrinsic::aarch64_st64bv0);
5139 return Builder.CreateCall(F, Args);
5143 if (BuiltinID == clang::AArch64::BI__builtin_arm_rndr ||
5144 BuiltinID == clang::AArch64::BI__builtin_arm_rndrrs) {
5146 auto Intr = (BuiltinID == clang::AArch64::BI__builtin_arm_rndr
5147 ? Intrinsic::aarch64_rndr
5148 : Intrinsic::aarch64_rndrrs);
5150 llvm::Value *Val =
Builder.CreateCall(F);
5151 Value *RandomValue =
Builder.CreateExtractValue(Val, 0);
5160 if (BuiltinID == clang::AArch64::BI__clear_cache) {
5161 assert(
E->getNumArgs() == 2 &&
"__clear_cache takes 2 arguments");
5164 for (
unsigned i = 0; i < 2; i++)
5167 llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
5168 StringRef Name = FD->
getName();
5172 if ((BuiltinID == clang::AArch64::BI__builtin_arm_ldrex ||
5173 BuiltinID == clang::AArch64::BI__builtin_arm_ldaex) &&
5177 ? Intrinsic::aarch64_ldaxp
5178 : Intrinsic::aarch64_ldxp);
5185 llvm::Type *Int128Ty = llvm::IntegerType::get(
getLLVMContext(), 128);
5186 Val0 =
Builder.CreateZExt(Val0, Int128Ty);
5187 Val1 =
Builder.CreateZExt(Val1, Int128Ty);
5189 Value *ShiftCst = llvm::ConstantInt::get(Int128Ty, 64);
5190 Val =
Builder.CreateShl(Val0, ShiftCst,
"shl",
true );
5191 Val =
Builder.CreateOr(Val, Val1);
5193 }
else if (BuiltinID == clang::AArch64::BI__builtin_arm_ldrex ||
5194 BuiltinID == clang::AArch64::BI__builtin_arm_ldaex) {
5204 ? Intrinsic::aarch64_ldaxr
5205 : Intrinsic::aarch64_ldxr,
5207 CallInst *Val =
Builder.CreateCall(F, LoadAddr,
"ldxr");
5211 if (RealResTy->isPointerTy())
5212 return Builder.CreateIntToPtr(Val, RealResTy);
5214 llvm::Type *IntResTy = llvm::IntegerType::get(
5216 return Builder.CreateBitCast(
Builder.CreateTruncOrBitCast(Val, IntResTy),
5220 if ((BuiltinID == clang::AArch64::BI__builtin_arm_strex ||
5221 BuiltinID == clang::AArch64::BI__builtin_arm_stlex) &&
5225 ? Intrinsic::aarch64_stlxp
5226 : Intrinsic::aarch64_stxp);
5238 return Builder.CreateCall(F, {Arg0, Arg1, StPtr},
"stxp");
5241 if (BuiltinID == clang::AArch64::BI__builtin_arm_strex ||
5242 BuiltinID == clang::AArch64::BI__builtin_arm_stlex) {
5247 llvm::Type *StoreTy =
5250 if (StoreVal->
getType()->isPointerTy())
5253 llvm::Type *
IntTy = llvm::IntegerType::get(
5262 ? Intrinsic::aarch64_stlxr
5263 : Intrinsic::aarch64_stxr,
5265 CallInst *CI =
Builder.CreateCall(F, {StoreVal, StoreAddr},
"stxr");
5267 1, Attribute::get(
getLLVMContext(), Attribute::ElementType, StoreTy));
5271 if (BuiltinID == clang::AArch64::BI__getReg) {
5274 llvm_unreachable(
"Sema will ensure that the parameter is constant");
5280 llvm::Metadata *Ops[] = {llvm::MDString::get(Context, Reg)};
5281 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
5282 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
5286 return Builder.CreateCall(F, Metadata);
5289 if (BuiltinID == clang::AArch64::BI__break) {
5292 llvm_unreachable(
"Sema will ensure that the parameter is constant");
5298 if (BuiltinID == clang::AArch64::BI__builtin_arm_clrex) {
5303 if (BuiltinID == clang::AArch64::BI_ReadWriteBarrier)
5304 return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
5305 llvm::SyncScope::SingleThread);
5308 Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
5309 switch (BuiltinID) {
5310 case clang::AArch64::BI__builtin_arm_crc32b:
5311 CRCIntrinsicID = Intrinsic::aarch64_crc32b;
break;
5312 case clang::AArch64::BI__builtin_arm_crc32cb:
5313 CRCIntrinsicID = Intrinsic::aarch64_crc32cb;
break;
5314 case clang::AArch64::BI__builtin_arm_crc32h:
5315 CRCIntrinsicID = Intrinsic::aarch64_crc32h;
break;
5316 case clang::AArch64::BI__builtin_arm_crc32ch:
5317 CRCIntrinsicID = Intrinsic::aarch64_crc32ch;
break;
5318 case clang::AArch64::BI__builtin_arm_crc32w:
5319 CRCIntrinsicID = Intrinsic::aarch64_crc32w;
break;
5320 case clang::AArch64::BI__builtin_arm_crc32cw:
5321 CRCIntrinsicID = Intrinsic::aarch64_crc32cw;
break;
5322 case clang::AArch64::BI__builtin_arm_crc32d:
5323 CRCIntrinsicID = Intrinsic::aarch64_crc32x;
break;
5324 case clang::AArch64::BI__builtin_arm_crc32cd:
5325 CRCIntrinsicID = Intrinsic::aarch64_crc32cx;
break;
5328 if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
5333 llvm::Type *DataTy = F->getFunctionType()->getParamType(1);
5334 Arg1 =
Builder.CreateZExtOrBitCast(Arg1, DataTy);
5336 return Builder.CreateCall(F, {Arg0, Arg1});
5340 if (BuiltinID == AArch64::BI__builtin_arm_mops_memset_tag) {
5347 CGM.
getIntrinsic(Intrinsic::aarch64_mops_memset_tag), {Dst, Val, Size});
5351 Intrinsic::ID MTEIntrinsicID = Intrinsic::not_intrinsic;
5352 switch (BuiltinID) {
5353 case clang::AArch64::BI__builtin_arm_irg:
5354 MTEIntrinsicID = Intrinsic::aarch64_irg;
break;
5355 case clang::AArch64::BI__builtin_arm_addg:
5356 MTEIntrinsicID = Intrinsic::aarch64_addg;
break;
5357 case clang::AArch64::BI__builtin_arm_gmi:
5358 MTEIntrinsicID = Intrinsic::aarch64_gmi;
break;
5359 case clang::AArch64::BI__builtin_arm_ldg:
5360 MTEIntrinsicID = Intrinsic::aarch64_ldg;
break;
5361 case clang::AArch64::BI__builtin_arm_stg:
5362 MTEIntrinsicID = Intrinsic::aarch64_stg;
break;
5363 case clang::AArch64::BI__builtin_arm_subp:
5364 MTEIntrinsicID = Intrinsic::aarch64_subp;
break;
5367 if (MTEIntrinsicID != Intrinsic::not_intrinsic) {
5368 if (MTEIntrinsicID == Intrinsic::aarch64_irg) {
5376 if (MTEIntrinsicID == Intrinsic::aarch64_addg) {
5382 {Pointer, TagOffset});
5384 if (MTEIntrinsicID == Intrinsic::aarch64_gmi) {
5395 if (MTEIntrinsicID == Intrinsic::aarch64_ldg) {
5398 {TagAddress, TagAddress});
5403 if (MTEIntrinsicID == Intrinsic::aarch64_stg) {
5406 {TagAddress, TagAddress});
5408 if (MTEIntrinsicID == Intrinsic::aarch64_subp) {
5416 if (BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
5417 BuiltinID == clang::AArch64::BI__builtin_arm_rsr64 ||
5418 BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
5419 BuiltinID == clang::AArch64::BI__builtin_arm_rsrp ||
5420 BuiltinID == clang::AArch64::BI__builtin_arm_wsr ||
5421 BuiltinID == clang::AArch64::BI__builtin_arm_wsr64 ||
5422 BuiltinID == clang::AArch64::BI__builtin_arm_wsr128 ||
5423 BuiltinID == clang::AArch64::BI__builtin_arm_wsrp) {
5426 if (BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
5427 BuiltinID == clang::AArch64::BI__builtin_arm_rsr64 ||
5428 BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
5429 BuiltinID == clang::AArch64::BI__builtin_arm_rsrp)
5432 bool IsPointerBuiltin = BuiltinID == clang::AArch64::BI__builtin_arm_rsrp ||
5433 BuiltinID == clang::AArch64::BI__builtin_arm_wsrp;
5435 bool Is32Bit = BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
5436 BuiltinID == clang::AArch64::BI__builtin_arm_wsr;
5438 bool Is128Bit = BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
5439 BuiltinID == clang::AArch64::BI__builtin_arm_wsr128;
5441 llvm::Type *ValueType;
5445 }
else if (Is128Bit) {
5446 llvm::Type *Int128Ty =
5448 ValueType = Int128Ty;
5450 }
else if (IsPointerBuiltin) {
5460 if (BuiltinID == clang::AArch64::BI_ReadStatusReg ||
5461 BuiltinID == clang::AArch64::BI_WriteStatusReg ||
5462 BuiltinID == clang::AArch64::BI__sys) {
5468 std::string SysRegStr;
5469 unsigned SysRegOp0 = (BuiltinID == clang::AArch64::BI_ReadStatusReg ||
5470 BuiltinID == clang::AArch64::BI_WriteStatusReg)
5471 ? ((1 << 1) | ((SysReg >> 14) & 1))
5473 llvm::raw_string_ostream(SysRegStr)
5474 << SysRegOp0 <<
":" << ((SysReg >> 11) & 7) <<
":"
5475 << ((SysReg >> 7) & 15) <<
":" << ((SysReg >> 3) & 15) <<
":"
5478 llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysRegStr) };
5479 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
5480 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
5485 if (BuiltinID == clang::AArch64::BI_ReadStatusReg) {
5486 llvm::Function *F =
CGM.
getIntrinsic(Intrinsic::read_register, Types);
5488 return Builder.CreateCall(F, Metadata);
5491 llvm::Function *F =
CGM.
getIntrinsic(Intrinsic::write_register, Types);
5493 llvm::Value *
Result =
Builder.CreateCall(F, {Metadata, ArgValue});
5494 if (BuiltinID == clang::AArch64::BI__sys) {
5502 if (BuiltinID == clang::AArch64::BI_AddressOfReturnAddress) {
5508 if (BuiltinID == clang::AArch64::BI__builtin_sponentry) {
5513 if (BuiltinID == clang::AArch64::BI__mulh ||
5514 BuiltinID == clang::AArch64::BI__umulh) {
5516 llvm::Type *Int128Ty = llvm::IntegerType::get(
getLLVMContext(), 128);
5518 bool IsSigned = BuiltinID == clang::AArch64::BI__mulh;
5524 Value *MulResult, *HigherBits;
5526 MulResult =
Builder.CreateNSWMul(LHS, RHS);
5527 HigherBits =
Builder.CreateAShr(MulResult, 64);
5529 MulResult =
Builder.CreateNUWMul(LHS, RHS);
5530 HigherBits =
Builder.CreateLShr(MulResult, 64);
5532 HigherBits =
Builder.CreateIntCast(HigherBits, ResType, IsSigned);
5537 if (BuiltinID == AArch64::BI__writex18byte ||
5538 BuiltinID == AArch64::BI__writex18word ||
5539 BuiltinID == AArch64::BI__writex18dword ||
5540 BuiltinID == AArch64::BI__writex18qword) {
5556 if (BuiltinID == AArch64::BI__readx18byte ||
5557 BuiltinID == AArch64::BI__readx18word ||
5558 BuiltinID == AArch64::BI__readx18dword ||
5559 BuiltinID == AArch64::BI__readx18qword) {
5574 if (BuiltinID == AArch64::BI__addx18byte ||
5575 BuiltinID == AArch64::BI__addx18word ||
5576 BuiltinID == AArch64::BI__addx18dword ||
5577 BuiltinID == AArch64::BI__addx18qword ||
5578 BuiltinID == AArch64::BI__incx18byte ||
5579 BuiltinID == AArch64::BI__incx18word ||
5580 BuiltinID == AArch64::BI__incx18dword ||
5581 BuiltinID == AArch64::BI__incx18qword) {
5584 switch (BuiltinID) {
5585 case AArch64::BI__incx18byte:
5589 case AArch64::BI__incx18word:
5593 case AArch64::BI__incx18dword:
5597 case AArch64::BI__incx18qword:
5603 isIncrement =
false;
5628 if (BuiltinID == AArch64::BI_CopyDoubleFromInt64 ||
5629 BuiltinID == AArch64::BI_CopyFloatFromInt32 ||
5630 BuiltinID == AArch64::BI_CopyInt32FromFloat ||
5631 BuiltinID == AArch64::BI_CopyInt64FromDouble) {
5634 return Builder.CreateBitCast(Arg, RetTy);
5637 if (BuiltinID == AArch64::BI_CountLeadingOnes ||
5638 BuiltinID == AArch64::BI_CountLeadingOnes64 ||
5639 BuiltinID == AArch64::BI_CountLeadingZeros ||
5640 BuiltinID == AArch64::BI_CountLeadingZeros64) {
5642 llvm::Type *ArgType = Arg->
getType();
5644 if (BuiltinID == AArch64::BI_CountLeadingOnes ||
5645 BuiltinID == AArch64::BI_CountLeadingOnes64)
5646 Arg =
Builder.CreateXor(Arg, Constant::getAllOnesValue(ArgType));
5651 if (BuiltinID == AArch64::BI_CountLeadingOnes64 ||
5652 BuiltinID == AArch64::BI_CountLeadingZeros64)
5657 if (BuiltinID == AArch64::BI_CountLeadingSigns ||
5658 BuiltinID == AArch64::BI_CountLeadingSigns64) {
5661 Function *F = (BuiltinID == AArch64::BI_CountLeadingSigns)
5666 if (BuiltinID == AArch64::BI_CountLeadingSigns64)
5671 if (BuiltinID == AArch64::BI_CountOneBits ||
5672 BuiltinID == AArch64::BI_CountOneBits64) {
5674 llvm::Type *ArgType = ArgValue->
getType();
5678 if (BuiltinID == AArch64::BI_CountOneBits64)
5683 if (BuiltinID == AArch64::BI__prefetch) {
5692 if (BuiltinID == AArch64::BI__hlt) {
5698 return ConstantInt::get(
Builder.getInt32Ty(), 0);
5701 if (BuiltinID == NEON::BI__builtin_neon_vcvth_bf16_f32)
5709 if (std::optional<MSVCIntrin> MsvcIntId =
5715 return P.first == BuiltinID;
5718 BuiltinID = It->second;
5722 unsigned ICEArguments = 0;
5729 for (
unsigned i = 0, e =
E->getNumArgs() - 1; i != e; i++) {
5731 switch (BuiltinID) {
5732 case NEON::BI__builtin_neon_vld1_v:
5733 case NEON::BI__builtin_neon_vld1q_v:
5734 case NEON::BI__builtin_neon_vld1_dup_v:
5735 case NEON::BI__builtin_neon_vld1q_dup_v:
5736 case NEON::BI__builtin_neon_vld1_lane_v:
5737 case NEON::BI__builtin_neon_vld1q_lane_v:
5738 case NEON::BI__builtin_neon_vst1_v:
5739 case NEON::BI__builtin_neon_vst1q_v:
5740 case NEON::BI__builtin_neon_vst1_lane_v:
5741 case NEON::BI__builtin_neon_vst1q_lane_v:
5742 case NEON::BI__builtin_neon_vldap1_lane_s64:
5743 case NEON::BI__builtin_neon_vldap1q_lane_s64:
5744 case NEON::BI__builtin_neon_vstl1_lane_s64:
5745 case NEON::BI__builtin_neon_vstl1q_lane_s64:
5763 assert(
Result &&
"SISD intrinsic should have been handled");
5767 const Expr *Arg =
E->getArg(
E->getNumArgs()-1);
5769 if (std::optional<llvm::APSInt>
Result =
5774 bool usgn =
Type.isUnsigned();
5775 bool quad =
Type.isQuad();
5778 switch (BuiltinID) {
5780 case NEON::BI__builtin_neon_vabsh_f16:
5783 case NEON::BI__builtin_neon_vaddq_p128: {
5786 Ops[0] =
Builder.CreateBitCast(Ops[0], Ty);
5787 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
5788 Ops[0] =
Builder.CreateXor(Ops[0], Ops[1]);
5789 llvm::Type *Int128Ty = llvm::Type::getIntNTy(
getLLVMContext(), 128);
5790 return Builder.CreateBitCast(Ops[0], Int128Ty);
5792 case NEON::BI__builtin_neon_vldrq_p128: {
5793 llvm::Type *Int128Ty = llvm::Type::getIntNTy(
getLLVMContext(), 128);
5798 case NEON::BI__builtin_neon_vstrq_p128: {
5799 Value *Ptr = Ops[0];
5802 case NEON::BI__builtin_neon_vcvts_f32_u32:
5803 case NEON::BI__builtin_neon_vcvtd_f64_u64:
5806 case NEON::BI__builtin_neon_vcvts_f32_s32:
5807 case NEON::BI__builtin_neon_vcvtd_f64_s64: {
5809 bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64;
5812 Ops[0] =
Builder.CreateBitCast(Ops[0], InTy);
5814 return Builder.CreateUIToFP(Ops[0], FTy);
5815 return Builder.CreateSIToFP(Ops[0], FTy);
5817 case NEON::BI__builtin_neon_vcvth_f16_u16:
5818 case NEON::BI__builtin_neon_vcvth_f16_u32:
5819 case NEON::BI__builtin_neon_vcvth_f16_u64:
5822 case NEON::BI__builtin_neon_vcvth_f16_s16:
5823 case NEON::BI__builtin_neon_vcvth_f16_s32:
5824 case NEON::BI__builtin_neon_vcvth_f16_s64: {
5826 llvm::Type *FTy =
HalfTy;
5828 if (Ops[0]->getType()->getPrimitiveSizeInBits() == 64)
5830 else if (Ops[0]->getType()->getPrimitiveSizeInBits() == 32)
5834 Ops[0] =
Builder.CreateBitCast(Ops[0], InTy);
5836 return Builder.CreateUIToFP(Ops[0], FTy);
5837 return Builder.CreateSIToFP(Ops[0], FTy);
5839 case NEON::BI__builtin_neon_vcvtah_u16_f16:
5840 case NEON::BI__builtin_neon_vcvtmh_u16_f16:
5841 case NEON::BI__builtin_neon_vcvtnh_u16_f16:
5842 case NEON::BI__builtin_neon_vcvtph_u16_f16:
5843 case NEON::BI__builtin_neon_vcvth_u16_f16:
5844 case NEON::BI__builtin_neon_vcvtah_s16_f16:
5845 case NEON::BI__builtin_neon_vcvtmh_s16_f16:
5846 case NEON::BI__builtin_neon_vcvtnh_s16_f16:
5847 case NEON::BI__builtin_neon_vcvtph_s16_f16:
5848 case NEON::BI__builtin_neon_vcvth_s16_f16: {
5851 llvm::Type* FTy =
HalfTy;
5852 llvm::Type *Tys[2] = {InTy, FTy};
5854 switch (BuiltinID) {
5855 default: llvm_unreachable(
"missing builtin ID in switch!");
5856 case NEON::BI__builtin_neon_vcvtah_u16_f16:
5857 Int = Intrinsic::aarch64_neon_fcvtau;
break;
5858 case NEON::BI__builtin_neon_vcvtmh_u16_f16:
5859 Int = Intrinsic::aarch64_neon_fcvtmu;
break;
5860 case NEON::BI__builtin_neon_vcvtnh_u16_f16:
5861 Int = Intrinsic::aarch64_neon_fcvtnu;
break;
5862 case NEON::BI__builtin_neon_vcvtph_u16_f16:
5863 Int = Intrinsic::aarch64_neon_fcvtpu;
break;
5864 case NEON::BI__builtin_neon_vcvth_u16_f16:
5865 Int = Intrinsic::aarch64_neon_fcvtzu;
break;
5866 case NEON::BI__builtin_neon_vcvtah_s16_f16:
5867 Int = Intrinsic::aarch64_neon_fcvtas;
break;
5868 case NEON::BI__builtin_neon_vcvtmh_s16_f16:
5869 Int = Intrinsic::aarch64_neon_fcvtms;
break;
5870 case NEON::BI__builtin_neon_vcvtnh_s16_f16:
5871 Int = Intrinsic::aarch64_neon_fcvtns;
break;
5872 case NEON::BI__builtin_neon_vcvtph_s16_f16:
5873 Int = Intrinsic::aarch64_neon_fcvtps;
break;
5874 case NEON::BI__builtin_neon_vcvth_s16_f16:
5875 Int = Intrinsic::aarch64_neon_fcvtzs;
break;
5879 case NEON::BI__builtin_neon_vcaleh_f16:
5880 case NEON::BI__builtin_neon_vcalth_f16:
5881 case NEON::BI__builtin_neon_vcageh_f16:
5882 case NEON::BI__builtin_neon_vcagth_f16: {
5885 llvm::Type* FTy =
HalfTy;
5886 llvm::Type *Tys[2] = {InTy, FTy};
5888 switch (BuiltinID) {
5889 default: llvm_unreachable(
"missing builtin ID in switch!");
5890 case NEON::BI__builtin_neon_vcageh_f16:
5891 Int = Intrinsic::aarch64_neon_facge;
break;
5892 case NEON::BI__builtin_neon_vcagth_f16:
5893 Int = Intrinsic::aarch64_neon_facgt;
break;
5894 case NEON::BI__builtin_neon_vcaleh_f16:
5895 Int = Intrinsic::aarch64_neon_facge; std::swap(Ops[0], Ops[1]);
break;
5896 case NEON::BI__builtin_neon_vcalth_f16:
5897 Int = Intrinsic::aarch64_neon_facgt; std::swap(Ops[0], Ops[1]);
break;
5902 case NEON::BI__builtin_neon_vcvth_n_s16_f16:
5903 case NEON::BI__builtin_neon_vcvth_n_u16_f16: {
5906 llvm::Type* FTy =
HalfTy;
5907 llvm::Type *Tys[2] = {InTy, FTy};
5909 switch (BuiltinID) {
5910 default: llvm_unreachable(
"missing builtin ID in switch!");
5911 case NEON::BI__builtin_neon_vcvth_n_s16_f16:
5912 Int = Intrinsic::aarch64_neon_vcvtfp2fxs;
break;
5913 case NEON::BI__builtin_neon_vcvth_n_u16_f16:
5914 Int = Intrinsic::aarch64_neon_vcvtfp2fxu;
break;
5919 case NEON::BI__builtin_neon_vcvth_n_f16_s16:
5920 case NEON::BI__builtin_neon_vcvth_n_f16_u16: {
5922 llvm::Type* FTy =
HalfTy;
5924 llvm::Type *Tys[2] = {FTy, InTy};
5926 switch (BuiltinID) {
5927 default: llvm_unreachable(
"missing builtin ID in switch!");
5928 case NEON::BI__builtin_neon_vcvth_n_f16_s16:
5929 Int = Intrinsic::aarch64_neon_vcvtfxs2fp;
5930 Ops[0] =
Builder.CreateSExt(Ops[0], InTy,
"sext");
5932 case NEON::BI__builtin_neon_vcvth_n_f16_u16:
5933 Int = Intrinsic::aarch64_neon_vcvtfxu2fp;
5934 Ops[0] =
Builder.CreateZExt(Ops[0], InTy);
5939 case NEON::BI__builtin_neon_vpaddd_s64: {
5940 auto *Ty = llvm::FixedVectorType::get(
Int64Ty, 2);
5943 Vec =
Builder.CreateBitCast(Vec, Ty,
"v2i64");
5944 llvm::Value *Idx0 = llvm::ConstantInt::get(
SizeTy, 0);
5945 llvm::Value *Idx1 = llvm::ConstantInt::get(
SizeTy, 1);
5946 Value *Op0 =
Builder.CreateExtractElement(Vec, Idx0,
"lane0");
5947 Value *Op1 =
Builder.CreateExtractElement(Vec, Idx1,
"lane1");
5949 return Builder.CreateAdd(Op0, Op1,
"vpaddd");
5951 case NEON::BI__builtin_neon_vpaddd_f64: {
5952 auto *Ty = llvm::FixedVectorType::get(
DoubleTy, 2);
5955 Vec =
Builder.CreateBitCast(Vec, Ty,
"v2f64");
5956 llvm::Value *Idx0 = llvm::ConstantInt::get(
SizeTy, 0);
5957 llvm::Value *Idx1 = llvm::ConstantInt::get(
SizeTy, 1);
5958 Value *Op0 =
Builder.CreateExtractElement(Vec, Idx0,
"lane0");
5959 Value *Op1 =
Builder.CreateExtractElement(Vec, Idx1,
"lane1");
5961 return Builder.CreateFAdd(Op0, Op1,
"vpaddd");
5963 case NEON::BI__builtin_neon_vpadds_f32: {
5964 auto *Ty = llvm::FixedVectorType::get(
FloatTy, 2);
5967 Vec =
Builder.CreateBitCast(Vec, Ty,
"v2f32");
5968 llvm::Value *Idx0 = llvm::ConstantInt::get(
SizeTy, 0);
5969 llvm::Value *Idx1 = llvm::ConstantInt::get(
SizeTy, 1);
5970 Value *Op0 =
Builder.CreateExtractElement(Vec, Idx0,
"lane0");
5971 Value *Op1 =
Builder.CreateExtractElement(Vec, Idx1,
"lane1");
5973 return Builder.CreateFAdd(Op0, Op1,
"vpaddd");
5975 case NEON::BI__builtin_neon_vceqzd_s64:
5979 ICmpInst::ICMP_EQ,
"vceqz");
5980 case NEON::BI__builtin_neon_vceqzd_f64:
5981 case NEON::BI__builtin_neon_vceqzs_f32:
5982 case NEON::BI__builtin_neon_vceqzh_f16:
5986 ICmpInst::FCMP_OEQ,
"vceqz");
5987 case NEON::BI__builtin_neon_vcgezd_s64:
5991 ICmpInst::ICMP_SGE,
"vcgez");
5992 case NEON::BI__builtin_neon_vcgezd_f64:
5993 case NEON::BI__builtin_neon_vcgezs_f32:
5994 case NEON::BI__builtin_neon_vcgezh_f16:
5998 ICmpInst::FCMP_OGE,
"vcgez");
5999 case NEON::BI__builtin_neon_vclezd_s64:
6003 ICmpInst::ICMP_SLE,
"vclez");
6004 case NEON::BI__builtin_neon_vclezd_f64:
6005 case NEON::BI__builtin_neon_vclezs_f32:
6006 case NEON::BI__builtin_neon_vclezh_f16:
6010 ICmpInst::FCMP_OLE,
"vclez");
6011 case NEON::BI__builtin_neon_vcgtzd_s64:
6015 ICmpInst::ICMP_SGT,
"vcgtz");
6016 case NEON::BI__builtin_neon_vcgtzd_f64:
6017 case NEON::BI__builtin_neon_vcgtzs_f32:
6018 case NEON::BI__builtin_neon_vcgtzh_f16:
6022 ICmpInst::FCMP_OGT,
"vcgtz");
6023 case NEON::BI__builtin_neon_vcltzd_s64:
6027 ICmpInst::ICMP_SLT,
"vcltz");
6029 case NEON::BI__builtin_neon_vcltzd_f64:
6030 case NEON::BI__builtin_neon_vcltzs_f32:
6031 case NEON::BI__builtin_neon_vcltzh_f16:
6035 ICmpInst::FCMP_OLT,
"vcltz");
6037 case NEON::BI__builtin_neon_vceqzd_u64: {
6041 Builder.CreateICmpEQ(Ops[0], llvm::Constant::getNullValue(
Int64Ty));
6044 case NEON::BI__builtin_neon_vceqd_f64:
6045 case NEON::BI__builtin_neon_vcled_f64:
6046 case NEON::BI__builtin_neon_vcltd_f64:
6047 case NEON::BI__builtin_neon_vcged_f64:
6048 case NEON::BI__builtin_neon_vcgtd_f64: {
6049 llvm::CmpInst::Predicate
P;
6050 switch (BuiltinID) {
6051 default: llvm_unreachable(
"missing builtin ID in switch!");
6052 case NEON::BI__builtin_neon_vceqd_f64:
P = llvm::FCmpInst::FCMP_OEQ;
break;
6053 case NEON::BI__builtin_neon_vcled_f64:
P = llvm::FCmpInst::FCMP_OLE;
break;
6054 case NEON::BI__builtin_neon_vcltd_f64:
P = llvm::FCmpInst::FCMP_OLT;
break;
6055 case NEON::BI__builtin_neon_vcged_f64:
P = llvm::FCmpInst::FCMP_OGE;
break;
6056 case NEON::BI__builtin_neon_vcgtd_f64:
P = llvm::FCmpInst::FCMP_OGT;
break;
6061 if (
P == llvm::FCmpInst::FCMP_OEQ)
6062 Ops[0] =
Builder.CreateFCmp(
P, Ops[0], Ops[1]);
6064 Ops[0] =
Builder.CreateFCmpS(
P, Ops[0], Ops[1]);
6067 case NEON::BI__builtin_neon_vceqs_f32:
6068 case NEON::BI__builtin_neon_vcles_f32:
6069 case NEON::BI__builtin_neon_vclts_f32:
6070 case NEON::BI__builtin_neon_vcges_f32:
6071 case NEON::BI__builtin_neon_vcgts_f32: {
6072 llvm::CmpInst::Predicate
P;
6073 switch (BuiltinID) {
6074 default: llvm_unreachable(
"missing builtin ID in switch!");
6075 case NEON::BI__builtin_neon_vceqs_f32:
P = llvm::FCmpInst::FCMP_OEQ;
break;
6076 case NEON::BI__builtin_neon_vcles_f32:
P = llvm::FCmpInst::FCMP_OLE;
break;
6077 case NEON::BI__builtin_neon_vclts_f32:
P = llvm::FCmpInst::FCMP_OLT;
break;
6078 case NEON::BI__builtin_neon_vcges_f32:
P = llvm::FCmpInst::FCMP_OGE;
break;
6079 case NEON::BI__builtin_neon_vcgts_f32:
P = llvm::FCmpInst::FCMP_OGT;
break;
6084 if (
P == llvm::FCmpInst::FCMP_OEQ)
6085 Ops[0] =
Builder.CreateFCmp(
P, Ops[0], Ops[1]);
6087 Ops[0] =
Builder.CreateFCmpS(
P, Ops[0], Ops[1]);
6090 case NEON::BI__builtin_neon_vceqh_f16:
6091 case NEON::BI__builtin_neon_vcleh_f16:
6092 case NEON::BI__builtin_neon_vclth_f16:
6093 case NEON::BI__builtin_neon_vcgeh_f16:
6094 case NEON::BI__builtin_neon_vcgth_f16: {
6095 llvm::CmpInst::Predicate
P;
6096 switch (BuiltinID) {
6097 default: llvm_unreachable(
"missing builtin ID in switch!");
6098 case NEON::BI__builtin_neon_vceqh_f16:
P = llvm::FCmpInst::FCMP_OEQ;
break;
6099 case NEON::BI__builtin_neon_vcleh_f16:
P = llvm::FCmpInst::FCMP_OLE;
break;
6100 case NEON::BI__builtin_neon_vclth_f16:
P = llvm::FCmpInst::FCMP_OLT;
break;
6101 case NEON::BI__builtin_neon_vcgeh_f16:
P = llvm::FCmpInst::FCMP_OGE;
break;
6102 case NEON::BI__builtin_neon_vcgth_f16:
P = llvm::FCmpInst::FCMP_OGT;
break;
6107 if (
P == llvm::FCmpInst::FCMP_OEQ)
6108 Ops[0] =
Builder.CreateFCmp(
P, Ops[0], Ops[1]);
6110 Ops[0] =
Builder.CreateFCmpS(
P, Ops[0], Ops[1]);
6113 case NEON::BI__builtin_neon_vceqd_s64:
6114 case NEON::BI__builtin_neon_vceqd_u64:
6115 case NEON::BI__builtin_neon_vcgtd_s64:
6116 case NEON::BI__builtin_neon_vcgtd_u64:
6117 case NEON::BI__builtin_neon_vcltd_s64:
6118 case NEON::BI__builtin_neon_vcltd_u64:
6119 case NEON::BI__builtin_neon_vcged_u64:
6120 case NEON::BI__builtin_neon_vcged_s64:
6121 case NEON::BI__builtin_neon_vcled_u64:
6122 case NEON::BI__builtin_neon_vcled_s64: {
6123 llvm::CmpInst::Predicate
P;
6124 switch (BuiltinID) {
6125 default: llvm_unreachable(
"missing builtin ID in switch!");
6126 case NEON::BI__builtin_neon_vceqd_s64:
6127 case NEON::BI__builtin_neon_vceqd_u64:
P = llvm::ICmpInst::ICMP_EQ;
break;
6128 case NEON::BI__builtin_neon_vcgtd_s64:
P = llvm::ICmpInst::ICMP_SGT;
break;
6129 case NEON::BI__builtin_neon_vcgtd_u64:
P = llvm::ICmpInst::ICMP_UGT;
break;
6130 case NEON::BI__builtin_neon_vcltd_s64:
P = llvm::ICmpInst::ICMP_SLT;
break;
6131 case NEON::BI__builtin_neon_vcltd_u64:
P = llvm::ICmpInst::ICMP_ULT;
break;
6132 case NEON::BI__builtin_neon_vcged_u64:
P = llvm::ICmpInst::ICMP_UGE;
break;
6133 case NEON::BI__builtin_neon_vcged_s64:
P = llvm::ICmpInst::ICMP_SGE;
break;
6134 case NEON::BI__builtin_neon_vcled_u64:
P = llvm::ICmpInst::ICMP_ULE;
break;
6135 case NEON::BI__builtin_neon_vcled_s64:
P = llvm::ICmpInst::ICMP_SLE;
break;
6140 Ops[0] =
Builder.CreateICmp(
P, Ops[0], Ops[1]);
6143 case NEON::BI__builtin_neon_vtstd_s64:
6144 case NEON::BI__builtin_neon_vtstd_u64: {
6148 Ops[0] =
Builder.CreateAnd(Ops[0], Ops[1]);
6149 Ops[0] =
Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
6150 llvm::Constant::getNullValue(
Int64Ty));
6153 case NEON::BI__builtin_neon_vset_lane_i8:
6154 case NEON::BI__builtin_neon_vset_lane_i16:
6155 case NEON::BI__builtin_neon_vset_lane_i32:
6156 case NEON::BI__builtin_neon_vset_lane_i64:
6157 case NEON::BI__builtin_neon_vset_lane_bf16:
6158 case NEON::BI__builtin_neon_vset_lane_f32:
6159 case NEON::BI__builtin_neon_vsetq_lane_i8:
6160 case NEON::BI__builtin_neon_vsetq_lane_i16:
6161 case NEON::BI__builtin_neon_vsetq_lane_i32:
6162 case NEON::BI__builtin_neon_vsetq_lane_i64:
6163 case NEON::BI__builtin_neon_vsetq_lane_bf16:
6164 case NEON::BI__builtin_neon_vsetq_lane_f32:
6166 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2],
"vset_lane");
6167 case NEON::BI__builtin_neon_vset_lane_f64:
6170 Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(
DoubleTy, 1));
6172 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2],
"vset_lane");
6173 case NEON::BI__builtin_neon_vset_lane_mf8:
6174 case NEON::BI__builtin_neon_vsetq_lane_mf8:
6179 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2],
"vset_lane");
6180 case NEON::BI__builtin_neon_vsetq_lane_f64:
6183 Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(
DoubleTy, 2));
6185 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2],
"vset_lane");
6187 case NEON::BI__builtin_neon_vget_lane_i8:
6188 case NEON::BI__builtin_neon_vdupb_lane_i8:
6190 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(
Int8Ty, 8));
6193 case NEON::BI__builtin_neon_vgetq_lane_i8:
6194 case NEON::BI__builtin_neon_vdupb_laneq_i8:
6196 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(
Int8Ty, 16));
6199 case NEON::BI__builtin_neon_vget_lane_mf8:
6200 case NEON::BI__builtin_neon_vdupb_lane_mf8:
6201 case NEON::BI__builtin_neon_vgetq_lane_mf8:
6202 case NEON::BI__builtin_neon_vdupb_laneq_mf8:
6205 case NEON::BI__builtin_neon_vget_lane_i16:
6206 case NEON::BI__builtin_neon_vduph_lane_i16:
6208 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(
Int16Ty, 4));
6211 case NEON::BI__builtin_neon_vgetq_lane_i16:
6212 case NEON::BI__builtin_neon_vduph_laneq_i16:
6214 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(
Int16Ty, 8));
6217 case NEON::BI__builtin_neon_vget_lane_i32:
6218 case NEON::BI__builtin_neon_vdups_lane_i32:
6220 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(
Int32Ty, 2));
6223 case NEON::BI__builtin_neon_vdups_lane_f32:
6225 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(
FloatTy, 2));
6228 case NEON::BI__builtin_neon_vgetq_lane_i32:
6229 case NEON::BI__builtin_neon_vdups_laneq_i32:
6231 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(
Int32Ty, 4));
6234 case NEON::BI__builtin_neon_vget_lane_i64:
6235 case NEON::BI__builtin_neon_vdupd_lane_i64:
6237 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(
Int64Ty, 1));
6240 case NEON::BI__builtin_neon_vdupd_lane_f64:
6242 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(
DoubleTy, 1));
6245 case NEON::BI__builtin_neon_vgetq_lane_i64:
6246 case NEON::BI__builtin_neon_vdupd_laneq_i64:
6248 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(
Int64Ty, 2));
6251 case NEON::BI__builtin_neon_vget_lane_f32:
6253 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(
FloatTy, 2));
6256 case NEON::BI__builtin_neon_vget_lane_f64:
6258 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(
DoubleTy, 1));
6261 case NEON::BI__builtin_neon_vgetq_lane_f32:
6262 case NEON::BI__builtin_neon_vdups_laneq_f32:
6264 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(
FloatTy, 4));
6267 case NEON::BI__builtin_neon_vgetq_lane_f64:
6268 case NEON::BI__builtin_neon_vdupd_laneq_f64:
6270 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(
DoubleTy, 2));
6273 case NEON::BI__builtin_neon_vaddh_f16:
6275 return Builder.CreateFAdd(Ops[0], Ops[1],
"vaddh");
6276 case NEON::BI__builtin_neon_vsubh_f16:
6278 return Builder.CreateFSub(Ops[0], Ops[1],
"vsubh");
6279 case NEON::BI__builtin_neon_vmulh_f16:
6281 return Builder.CreateFMul(Ops[0], Ops[1],
"vmulh");
6282 case NEON::BI__builtin_neon_vdivh_f16:
6284 return Builder.CreateFDiv(Ops[0], Ops[1],
"vdivh");
6285 case NEON::BI__builtin_neon_vfmah_f16:
6288 *
this, Intrinsic::fma, Intrinsic::experimental_constrained_fma,
HalfTy,
6290 case NEON::BI__builtin_neon_vfmsh_f16: {
6295 *
this, Intrinsic::fma, Intrinsic::experimental_constrained_fma,
HalfTy,
6298 case NEON::BI__builtin_neon_vaddd_s64:
6299 case NEON::BI__builtin_neon_vaddd_u64:
6301 case NEON::BI__builtin_neon_vsubd_s64:
6302 case NEON::BI__builtin_neon_vsubd_u64:
6304 case NEON::BI__builtin_neon_vqdmlalh_s16:
6305 case NEON::BI__builtin_neon_vqdmlslh_s16: {
6309 auto *VTy = llvm::FixedVectorType::get(
Int32Ty, 4);
6311 ProductOps,
"vqdmlXl");
6312 Constant *CI = ConstantInt::get(
SizeTy, 0);
6313 Ops[1] =
Builder.CreateExtractElement(Ops[1], CI,
"lane0");
6315 unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlalh_s16
6316 ? Intrinsic::aarch64_neon_sqadd
6317 : Intrinsic::aarch64_neon_sqsub;
6320 case NEON::BI__builtin_neon_vqshlud_n_s64: {
6326 case NEON::BI__builtin_neon_vqshld_n_u64:
6327 case NEON::BI__builtin_neon_vqshld_n_s64: {
6328 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vqshld_n_u64
6329 ? Intrinsic::aarch64_neon_uqshl
6330 : Intrinsic::aarch64_neon_sqshl;
6335 case NEON::BI__builtin_neon_vrshrd_n_u64:
6336 case NEON::BI__builtin_neon_vrshrd_n_s64: {
6337 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrshrd_n_u64
6338 ? Intrinsic::aarch64_neon_urshl
6339 : Intrinsic::aarch64_neon_srshl;
6341 int SV = cast<ConstantInt>(Ops[1])->getSExtValue();
6342 Ops[1] = ConstantInt::get(
Int64Ty, -SV);
6345 case NEON::BI__builtin_neon_vrsrad_n_u64:
6346 case NEON::BI__builtin_neon_vrsrad_n_s64: {
6347 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrsrad_n_u64
6348 ? Intrinsic::aarch64_neon_urshl
6349 : Intrinsic::aarch64_neon_srshl;
6353 {Ops[1], Builder.CreateSExt(Ops[2], Int64Ty)});
6356 case NEON::BI__builtin_neon_vshld_n_s64:
6357 case NEON::BI__builtin_neon_vshld_n_u64: {
6358 llvm::ConstantInt *Amt = cast<ConstantInt>(
EmitScalarExpr(
E->getArg(1)));
6360 Ops[0], ConstantInt::get(
Int64Ty, Amt->getZExtValue()),
"shld_n");
6362 case NEON::BI__builtin_neon_vshrd_n_s64: {
6363 llvm::ConstantInt *Amt = cast<ConstantInt>(
EmitScalarExpr(
E->getArg(1)));
6365 Ops[0], ConstantInt::get(
Int64Ty, std::min(
static_cast<uint64_t
>(63),
6366 Amt->getZExtValue())),
6369 case NEON::BI__builtin_neon_vshrd_n_u64: {
6370 llvm::ConstantInt *Amt = cast<ConstantInt>(
EmitScalarExpr(
E->getArg(1)));
6371 uint64_t ShiftAmt = Amt->getZExtValue();
6374 return ConstantInt::get(
Int64Ty, 0);
6375 return Builder.CreateLShr(Ops[0], ConstantInt::get(
Int64Ty, ShiftAmt),
6378 case NEON::BI__builtin_neon_vsrad_n_s64: {
6379 llvm::ConstantInt *Amt = cast<ConstantInt>(
EmitScalarExpr(
E->getArg(2)));
6381 Ops[1], ConstantInt::get(
Int64Ty, std::min(
static_cast<uint64_t
>(63),
6382 Amt->getZExtValue())),
6384 return Builder.CreateAdd(Ops[0], Ops[1]);
6386 case NEON::BI__builtin_neon_vsrad_n_u64: {
6387 llvm::ConstantInt *Amt = cast<ConstantInt>(
EmitScalarExpr(
E->getArg(2)));
6388 uint64_t ShiftAmt = Amt->getZExtValue();
6393 Ops[1] =
Builder.CreateLShr(Ops[1], ConstantInt::get(
Int64Ty, ShiftAmt),
6395 return Builder.CreateAdd(Ops[0], Ops[1]);
6397 case NEON::BI__builtin_neon_vqdmlalh_lane_s16:
6398 case NEON::BI__builtin_neon_vqdmlalh_laneq_s16:
6399 case NEON::BI__builtin_neon_vqdmlslh_lane_s16:
6400 case NEON::BI__builtin_neon_vqdmlslh_laneq_s16: {
6406 auto *VTy = llvm::FixedVectorType::get(
Int32Ty, 4);
6408 ProductOps,
"vqdmlXl");
6409 Constant *CI = ConstantInt::get(
SizeTy, 0);
6410 Ops[1] =
Builder.CreateExtractElement(Ops[1], CI,
"lane0");
6413 unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlalh_lane_s16 ||
6414 BuiltinID == NEON::BI__builtin_neon_vqdmlalh_laneq_s16)
6415 ? Intrinsic::aarch64_neon_sqadd
6416 : Intrinsic::aarch64_neon_sqsub;
6419 case NEON::BI__builtin_neon_vqdmlals_s32:
6420 case NEON::BI__builtin_neon_vqdmlsls_s32: {
6422 ProductOps.push_back(Ops[1]);
6426 ProductOps,
"vqdmlXl");
6428 unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlals_s32
6429 ? Intrinsic::aarch64_neon_sqadd
6430 : Intrinsic::aarch64_neon_sqsub;
6433 case NEON::BI__builtin_neon_vqdmlals_lane_s32:
6434 case NEON::BI__builtin_neon_vqdmlals_laneq_s32:
6435 case NEON::BI__builtin_neon_vqdmlsls_lane_s32:
6436 case NEON::BI__builtin_neon_vqdmlsls_laneq_s32: {
6440 ProductOps.push_back(Ops[1]);
6441 ProductOps.push_back(Ops[2]);
6444 ProductOps,
"vqdmlXl");
6447 unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlals_lane_s32 ||
6448 BuiltinID == NEON::BI__builtin_neon_vqdmlals_laneq_s32)
6449 ? Intrinsic::aarch64_neon_sqadd
6450 : Intrinsic::aarch64_neon_sqsub;
6453 case NEON::BI__builtin_neon_vget_lane_bf16:
6454 case NEON::BI__builtin_neon_vduph_lane_bf16:
6455 case NEON::BI__builtin_neon_vduph_lane_f16: {
6459 case NEON::BI__builtin_neon_vgetq_lane_bf16:
6460 case NEON::BI__builtin_neon_vduph_laneq_bf16:
6461 case NEON::BI__builtin_neon_vduph_laneq_f16: {
6465 case NEON::BI__builtin_neon_vcvt_bf16_f32: {
6466 llvm::Type *V4F32 = FixedVectorType::get(
Builder.getFloatTy(), 4);
6467 llvm::Type *V4BF16 = FixedVectorType::get(
Builder.getBFloatTy(), 4);
6468 return Builder.CreateFPTrunc(
Builder.CreateBitCast(Ops[0], V4F32), V4BF16);
6470 case NEON::BI__builtin_neon_vcvtq_low_bf16_f32: {
6472 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
6473 llvm::Type *V4F32 = FixedVectorType::get(
Builder.getFloatTy(), 4);
6474 llvm::Type *V4BF16 = FixedVectorType::get(
Builder.getBFloatTy(), 4);
6475 llvm::Value *Trunc =
6476 Builder.CreateFPTrunc(
Builder.CreateBitCast(Ops[0], V4F32), V4BF16);
6477 return Builder.CreateShuffleVector(
6478 Trunc, ConstantAggregateZero::get(V4BF16), ConcatMask);
6480 case NEON::BI__builtin_neon_vcvtq_high_bf16_f32: {
6482 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
6484 std::iota(LoMask.begin(), LoMask.end(), 0);
6485 llvm::Type *V4F32 = FixedVectorType::get(
Builder.getFloatTy(), 4);
6486 llvm::Type *V4BF16 = FixedVectorType::get(
Builder.getBFloatTy(), 4);
6487 llvm::Type *V8BF16 = FixedVectorType::get(
Builder.getBFloatTy(), 8);
6488 llvm::Value *Inactive =
Builder.CreateShuffleVector(
6489 Builder.CreateBitCast(Ops[0], V8BF16), LoMask);
6490 llvm::Value *Trunc =
6491 Builder.CreateFPTrunc(
Builder.CreateBitCast(Ops[1], V4F32), V4BF16);
6492 return Builder.CreateShuffleVector(Inactive, Trunc, ConcatMask);
6495 case clang::AArch64::BI_InterlockedAdd:
6496 case clang::AArch64::BI_InterlockedAdd_acq:
6497 case clang::AArch64::BI_InterlockedAdd_rel:
6498 case clang::AArch64::BI_InterlockedAdd_nf:
6499 case clang::AArch64::BI_InterlockedAdd64:
6500 case clang::AArch64::BI_InterlockedAdd64_acq:
6501 case clang::AArch64::BI_InterlockedAdd64_rel:
6502 case clang::AArch64::BI_InterlockedAdd64_nf: {
6505 llvm::AtomicOrdering Ordering;
6506 switch (BuiltinID) {
6507 case clang::AArch64::BI_InterlockedAdd:
6508 case clang::AArch64::BI_InterlockedAdd64:
6509 Ordering = llvm::AtomicOrdering::SequentiallyConsistent;
6511 case clang::AArch64::BI_InterlockedAdd_acq:
6512 case clang::AArch64::BI_InterlockedAdd64_acq:
6513 Ordering = llvm::AtomicOrdering::Acquire;
6515 case clang::AArch64::BI_InterlockedAdd_rel:
6516 case clang::AArch64::BI_InterlockedAdd64_rel:
6517 Ordering = llvm::AtomicOrdering::Release;
6519 case clang::AArch64::BI_InterlockedAdd_nf:
6520 case clang::AArch64::BI_InterlockedAdd64_nf:
6521 Ordering = llvm::AtomicOrdering::Monotonic;
6524 llvm_unreachable(
"missing builtin ID in switch!");
6526 AtomicRMWInst *RMWI =
6528 return Builder.CreateAdd(RMWI, Val);
6533 llvm::Type *Ty = VTy;
6552 bool ExtractLow =
false;
6553 bool ExtendLaneArg =
false;
6554 switch (BuiltinID) {
6555 default:
return nullptr;
6556 case NEON::BI__builtin_neon_vbsl_v:
6557 case NEON::BI__builtin_neon_vbslq_v: {
6558 llvm::Type *BitTy = llvm::VectorType::getInteger(VTy);
6559 Ops[0] =
Builder.CreateBitCast(Ops[0], BitTy,
"vbsl");
6560 Ops[1] =
Builder.CreateBitCast(Ops[1], BitTy,
"vbsl");
6561 Ops[2] =
Builder.CreateBitCast(Ops[2], BitTy,
"vbsl");
6563 Ops[1] =
Builder.CreateAnd(Ops[0], Ops[1],
"vbsl");
6564 Ops[2] =
Builder.CreateAnd(
Builder.CreateNot(Ops[0]), Ops[2],
"vbsl");
6565 Ops[0] =
Builder.CreateOr(Ops[1], Ops[2],
"vbsl");
6566 return Builder.CreateBitCast(Ops[0], Ty);
6568 case NEON::BI__builtin_neon_vfma_lane_v:
6569 case NEON::BI__builtin_neon_vfmaq_lane_v: {
6572 Value *Addend = Ops[0];
6573 Value *Multiplicand = Ops[1];
6574 Value *LaneSource = Ops[2];
6575 Ops[0] = Multiplicand;
6576 Ops[1] = LaneSource;
6580 auto *SourceTy = BuiltinID == NEON::BI__builtin_neon_vfmaq_lane_v
6581 ? llvm::FixedVectorType::get(VTy->getElementType(),
6582 VTy->getNumElements() / 2)
6584 llvm::Constant *cst = cast<Constant>(Ops[3]);
6585 Value *SV = llvm::ConstantVector::getSplat(VTy->getElementCount(), cst);
6586 Ops[1] =
Builder.CreateBitCast(Ops[1], SourceTy);
6587 Ops[1] =
Builder.CreateShuffleVector(Ops[1], Ops[1], SV,
"lane");
6590 Int =
Builder.getIsFPConstrained() ? Intrinsic::experimental_constrained_fma
6594 case NEON::BI__builtin_neon_vfma_laneq_v: {
6595 auto *VTy = cast<llvm::FixedVectorType>(Ty);
6597 if (VTy && VTy->getElementType() ==
DoubleTy) {
6600 llvm::FixedVectorType *VTy =
6602 Ops[2] =
Builder.CreateBitCast(Ops[2], VTy);
6603 Ops[2] =
Builder.CreateExtractElement(Ops[2], Ops[3],
"extract");
6606 *
this, Intrinsic::fma, Intrinsic::experimental_constrained_fma,
6607 DoubleTy, {Ops[1], Ops[2], Ops[0]});
6610 Ops[0] =
Builder.CreateBitCast(Ops[0], Ty);
6611 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
6613 auto *STy = llvm::FixedVectorType::get(VTy->getElementType(),
6614 VTy->getNumElements() * 2);
6615 Ops[2] =
Builder.CreateBitCast(Ops[2], STy);
6616 Value *SV = llvm::ConstantVector::getSplat(VTy->getElementCount(),
6617 cast<ConstantInt>(Ops[3]));
6618 Ops[2] =
Builder.CreateShuffleVector(Ops[2], Ops[2], SV,
"lane");
6621 *
this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
6622 {Ops[2], Ops[1], Ops[0]});
6624 case NEON::BI__builtin_neon_vfmaq_laneq_v: {
6625 Ops[0] =
Builder.CreateBitCast(Ops[0], Ty);
6626 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
6628 Ops[2] =
Builder.CreateBitCast(Ops[2], Ty);
6631 *
this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
6632 {Ops[2], Ops[1], Ops[0]});
6634 case NEON::BI__builtin_neon_vfmah_lane_f16:
6635 case NEON::BI__builtin_neon_vfmas_lane_f32:
6636 case NEON::BI__builtin_neon_vfmah_laneq_f16:
6637 case NEON::BI__builtin_neon_vfmas_laneq_f32:
6638 case NEON::BI__builtin_neon_vfmad_lane_f64:
6639 case NEON::BI__builtin_neon_vfmad_laneq_f64: {
6642 Ops[2] =
Builder.CreateExtractElement(Ops[2], Ops[3],
"extract");
6644 *
this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
6645 {Ops[1], Ops[2], Ops[0]});
6647 case NEON::BI__builtin_neon_vmull_v:
6649 Int = usgn ? Intrinsic::aarch64_neon_umull : Intrinsic::aarch64_neon_smull;
6650 if (
Type.isPoly()) Int = Intrinsic::aarch64_neon_pmull;
6652 case NEON::BI__builtin_neon_vmax_v:
6653 case NEON::BI__builtin_neon_vmaxq_v:
6655 Int = usgn ? Intrinsic::aarch64_neon_umax : Intrinsic::aarch64_neon_smax;
6656 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmax;
6658 case NEON::BI__builtin_neon_vmaxh_f16: {
6660 Int = Intrinsic::aarch64_neon_fmax;
6663 case NEON::BI__builtin_neon_vmin_v:
6664 case NEON::BI__builtin_neon_vminq_v:
6666 Int = usgn ? Intrinsic::aarch64_neon_umin : Intrinsic::aarch64_neon_smin;
6667 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmin;
6669 case NEON::BI__builtin_neon_vminh_f16: {
6671 Int = Intrinsic::aarch64_neon_fmin;
6674 case NEON::BI__builtin_neon_vabd_v:
6675 case NEON::BI__builtin_neon_vabdq_v:
6677 Int = usgn ? Intrinsic::aarch64_neon_uabd : Intrinsic::aarch64_neon_sabd;
6678 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fabd;
6680 case NEON::BI__builtin_neon_vpadal_v:
6681 case NEON::BI__builtin_neon_vpadalq_v: {
6682 unsigned ArgElts = VTy->getNumElements();
6683 llvm::IntegerType *EltTy = cast<IntegerType>(VTy->getElementType());
6684 unsigned BitWidth = EltTy->getBitWidth();
6685 auto *ArgTy = llvm::FixedVectorType::get(
6686 llvm::IntegerType::get(
getLLVMContext(), BitWidth / 2), 2 * ArgElts);
6687 llvm::Type* Tys[2] = { VTy, ArgTy };
6688 Int = usgn ? Intrinsic::aarch64_neon_uaddlp : Intrinsic::aarch64_neon_saddlp;
6690 TmpOps.push_back(Ops[1]);
6693 llvm::Value *addend =
Builder.CreateBitCast(Ops[0], tmp->getType());
6694 return Builder.CreateAdd(tmp, addend);
6696 case NEON::BI__builtin_neon_vpmin_v:
6697 case NEON::BI__builtin_neon_vpminq_v:
6699 Int = usgn ? Intrinsic::aarch64_neon_uminp : Intrinsic::aarch64_neon_sminp;
6700 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fminp;
6702 case NEON::BI__builtin_neon_vpmax_v:
6703 case NEON::BI__builtin_neon_vpmaxq_v:
6705 Int = usgn ? Intrinsic::aarch64_neon_umaxp : Intrinsic::aarch64_neon_smaxp;
6706 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmaxp;
6708 case NEON::BI__builtin_neon_vminnm_v:
6709 case NEON::BI__builtin_neon_vminnmq_v:
6710 Int = Intrinsic::aarch64_neon_fminnm;
6712 case NEON::BI__builtin_neon_vminnmh_f16:
6714 Int = Intrinsic::aarch64_neon_fminnm;
6716 case NEON::BI__builtin_neon_vmaxnm_v:
6717 case NEON::BI__builtin_neon_vmaxnmq_v:
6718 Int = Intrinsic::aarch64_neon_fmaxnm;
6720 case NEON::BI__builtin_neon_vmaxnmh_f16:
6722 Int = Intrinsic::aarch64_neon_fmaxnm;
6724 case NEON::BI__builtin_neon_vrecpss_f32: {
6729 case NEON::BI__builtin_neon_vrecpsd_f64:
6733 case NEON::BI__builtin_neon_vrecpsh_f16:
6737 case NEON::BI__builtin_neon_vqshrun_n_v:
6738 Int = Intrinsic::aarch64_neon_sqshrun;
6740 case NEON::BI__builtin_neon_vqrshrun_n_v:
6741 Int = Intrinsic::aarch64_neon_sqrshrun;
6743 case NEON::BI__builtin_neon_vqshrn_n_v:
6744 Int = usgn ? Intrinsic::aarch64_neon_uqshrn : Intrinsic::aarch64_neon_sqshrn;
6746 case NEON::BI__builtin_neon_vrshrn_n_v:
6747 Int = Intrinsic::aarch64_neon_rshrn;
6749 case NEON::BI__builtin_neon_vqrshrn_n_v:
6750 Int = usgn ? Intrinsic::aarch64_neon_uqrshrn : Intrinsic::aarch64_neon_sqrshrn;
6752 case NEON::BI__builtin_neon_vrndah_f16: {
6754 Int =
Builder.getIsFPConstrained()
6755 ? Intrinsic::experimental_constrained_round
6759 case NEON::BI__builtin_neon_vrnda_v:
6760 case NEON::BI__builtin_neon_vrndaq_v: {
6761 Int =
Builder.getIsFPConstrained()
6762 ? Intrinsic::experimental_constrained_round
6766 case NEON::BI__builtin_neon_vrndih_f16: {
6768 Int =
Builder.getIsFPConstrained()
6769 ? Intrinsic::experimental_constrained_nearbyint
6770 : Intrinsic::nearbyint;
6773 case NEON::BI__builtin_neon_vrndmh_f16: {
6775 Int =
Builder.getIsFPConstrained()
6776 ? Intrinsic::experimental_constrained_floor
6780 case NEON::BI__builtin_neon_vrndm_v:
6781 case NEON::BI__builtin_neon_vrndmq_v: {
6782 Int =
Builder.getIsFPConstrained()
6783 ? Intrinsic::experimental_constrained_floor
6787 case NEON::BI__builtin_neon_vrndnh_f16: {
6789 Int =
Builder.getIsFPConstrained()
6790 ? Intrinsic::experimental_constrained_roundeven
6791 : Intrinsic::roundeven;
6794 case NEON::BI__builtin_neon_vrndn_v:
6795 case NEON::BI__builtin_neon_vrndnq_v: {
6796 Int =
Builder.getIsFPConstrained()
6797 ? Intrinsic::experimental_constrained_roundeven
6798 : Intrinsic::roundeven;
6801 case NEON::BI__builtin_neon_vrndns_f32: {
6803 Int =
Builder.getIsFPConstrained()
6804 ? Intrinsic::experimental_constrained_roundeven
6805 : Intrinsic::roundeven;
6808 case NEON::BI__builtin_neon_vrndph_f16: {
6810 Int =
Builder.getIsFPConstrained()
6811 ? Intrinsic::experimental_constrained_ceil
6815 case NEON::BI__builtin_neon_vrndp_v:
6816 case NEON::BI__builtin_neon_vrndpq_v: {
6817 Int =
Builder.getIsFPConstrained()
6818 ? Intrinsic::experimental_constrained_ceil
6822 case NEON::BI__builtin_neon_vrndxh_f16: {
6824 Int =
Builder.getIsFPConstrained()
6825 ? Intrinsic::experimental_constrained_rint
6829 case NEON::BI__builtin_neon_vrndx_v:
6830 case NEON::BI__builtin_neon_vrndxq_v: {
6831 Int =
Builder.getIsFPConstrained()
6832 ? Intrinsic::experimental_constrained_rint
6836 case NEON::BI__builtin_neon_vrndh_f16: {
6838 Int =
Builder.getIsFPConstrained()
6839 ? Intrinsic::experimental_constrained_trunc
6843 case NEON::BI__builtin_neon_vrnd32x_f32:
6844 case NEON::BI__builtin_neon_vrnd32xq_f32:
6845 case NEON::BI__builtin_neon_vrnd32x_f64:
6846 case NEON::BI__builtin_neon_vrnd32xq_f64: {
6848 Int = Intrinsic::aarch64_neon_frint32x;
6851 case NEON::BI__builtin_neon_vrnd32z_f32:
6852 case NEON::BI__builtin_neon_vrnd32zq_f32:
6853 case NEON::BI__builtin_neon_vrnd32z_f64:
6854 case NEON::BI__builtin_neon_vrnd32zq_f64: {
6856 Int = Intrinsic::aarch64_neon_frint32z;
6859 case NEON::BI__builtin_neon_vrnd64x_f32:
6860 case NEON::BI__builtin_neon_vrnd64xq_f32:
6861 case NEON::BI__builtin_neon_vrnd64x_f64:
6862 case NEON::BI__builtin_neon_vrnd64xq_f64: {
6864 Int = Intrinsic::aarch64_neon_frint64x;
6867 case NEON::BI__builtin_neon_vrnd64z_f32:
6868 case NEON::BI__builtin_neon_vrnd64zq_f32:
6869 case NEON::BI__builtin_neon_vrnd64z_f64:
6870 case NEON::BI__builtin_neon_vrnd64zq_f64: {
6872 Int = Intrinsic::aarch64_neon_frint64z;
6875 case NEON::BI__builtin_neon_vrnd_v:
6876 case NEON::BI__builtin_neon_vrndq_v: {
6877 Int =
Builder.getIsFPConstrained()
6878 ? Intrinsic::experimental_constrained_trunc
6882 case NEON::BI__builtin_neon_vcvt_f64_v:
6883 case NEON::BI__builtin_neon_vcvtq_f64_v:
6884 Ops[0] =
Builder.CreateBitCast(Ops[0], Ty);
6886 return usgn ?
Builder.CreateUIToFP(Ops[0], Ty,
"vcvt")
6887 :
Builder.CreateSIToFP(Ops[0], Ty,
"vcvt");
6888 case NEON::BI__builtin_neon_vcvt_f64_f32: {
6890 "unexpected vcvt_f64_f32 builtin");
6894 return Builder.CreateFPExt(Ops[0], Ty,
"vcvt");
6896 case NEON::BI__builtin_neon_vcvt_f32_f64: {
6898 "unexpected vcvt_f32_f64 builtin");
6902 return Builder.CreateFPTrunc(Ops[0], Ty,
"vcvt");
6904 case NEON::BI__builtin_neon_vcvt_s32_v:
6905 case NEON::BI__builtin_neon_vcvt_u32_v:
6906 case NEON::BI__builtin_neon_vcvt_s64_v:
6907 case NEON::BI__builtin_neon_vcvt_u64_v:
6908 case NEON::BI__builtin_neon_vcvt_s16_f16:
6909 case NEON::BI__builtin_neon_vcvt_u16_f16:
6910 case NEON::BI__builtin_neon_vcvtq_s32_v:
6911 case NEON::BI__builtin_neon_vcvtq_u32_v:
6912 case NEON::BI__builtin_neon_vcvtq_s64_v:
6913 case NEON::BI__builtin_neon_vcvtq_u64_v:
6914 case NEON::BI__builtin_neon_vcvtq_s16_f16:
6915 case NEON::BI__builtin_neon_vcvtq_u16_f16: {
6917 usgn ? Intrinsic::aarch64_neon_fcvtzu : Intrinsic::aarch64_neon_fcvtzs;
6921 case NEON::BI__builtin_neon_vcvta_s16_f16:
6922 case NEON::BI__builtin_neon_vcvta_u16_f16:
6923 case NEON::BI__builtin_neon_vcvta_s32_v:
6924 case NEON::BI__builtin_neon_vcvtaq_s16_f16:
6925 case NEON::BI__builtin_neon_vcvtaq_s32_v:
6926 case NEON::BI__builtin_neon_vcvta_u32_v:
6927 case NEON::BI__builtin_neon_vcvtaq_u16_f16:
6928 case NEON::BI__builtin_neon_vcvtaq_u32_v:
6929 case NEON::BI__builtin_neon_vcvta_s64_v:
6930 case NEON::BI__builtin_neon_vcvtaq_s64_v:
6931 case NEON::BI__builtin_neon_vcvta_u64_v:
6932 case NEON::BI__builtin_neon_vcvtaq_u64_v: {
6933 Int = usgn ? Intrinsic::aarch64_neon_fcvtau : Intrinsic::aarch64_neon_fcvtas;
6937 case NEON::BI__builtin_neon_vcvtm_s16_f16:
6938 case NEON::BI__builtin_neon_vcvtm_s32_v:
6939 case NEON::BI__builtin_neon_vcvtmq_s16_f16:
6940 case NEON::BI__builtin_neon_vcvtmq_s32_v:
6941 case NEON::BI__builtin_neon_vcvtm_u16_f16:
6942 case NEON::BI__builtin_neon_vcvtm_u32_v:
6943 case NEON::BI__builtin_neon_vcvtmq_u16_f16:
6944 case NEON::BI__builtin_neon_vcvtmq_u32_v:
6945 case NEON::BI__builtin_neon_vcvtm_s64_v:
6946 case NEON::BI__builtin_neon_vcvtmq_s64_v:
6947 case NEON::BI__builtin_neon_vcvtm_u64_v:
6948 case NEON::BI__builtin_neon_vcvtmq_u64_v: {
6949 Int = usgn ? Intrinsic::aarch64_neon_fcvtmu : Intrinsic::aarch64_neon_fcvtms;
6953 case NEON::BI__builtin_neon_vcvtn_s16_f16:
6954 case NEON::BI__builtin_neon_vcvtn_s32_v:
6955 case NEON::BI__builtin_neon_vcvtnq_s16_f16:
6956 case NEON::BI__builtin_neon_vcvtnq_s32_v:
6957 case NEON::BI__builtin_neon_vcvtn_u16_f16:
6958 case NEON::BI__builtin_neon_vcvtn_u32_v:
6959 case NEON::BI__builtin_neon_vcvtnq_u16_f16:
6960 case NEON::BI__builtin_neon_vcvtnq_u32_v:
6961 case NEON::BI__builtin_neon_vcvtn_s64_v:
6962 case NEON::BI__builtin_neon_vcvtnq_s64_v:
6963 case NEON::BI__builtin_neon_vcvtn_u64_v:
6964 case NEON::BI__builtin_neon_vcvtnq_u64_v: {
6965 Int = usgn ? Intrinsic::aarch64_neon_fcvtnu : Intrinsic::aarch64_neon_fcvtns;
6969 case NEON::BI__builtin_neon_vcvtp_s16_f16:
6970 case NEON::BI__builtin_neon_vcvtp_s32_v:
6971 case NEON::BI__builtin_neon_vcvtpq_s16_f16:
6972 case NEON::BI__builtin_neon_vcvtpq_s32_v:
6973 case NEON::BI__builtin_neon_vcvtp_u16_f16:
6974 case NEON::BI__builtin_neon_vcvtp_u32_v:
6975 case NEON::BI__builtin_neon_vcvtpq_u16_f16:
6976 case NEON::BI__builtin_neon_vcvtpq_u32_v:
6977 case NEON::BI__builtin_neon_vcvtp_s64_v:
6978 case NEON::BI__builtin_neon_vcvtpq_s64_v:
6979 case NEON::BI__builtin_neon_vcvtp_u64_v:
6980 case NEON::BI__builtin_neon_vcvtpq_u64_v: {
6981 Int = usgn ? Intrinsic::aarch64_neon_fcvtpu : Intrinsic::aarch64_neon_fcvtps;
6985 case NEON::BI__builtin_neon_vmulx_v:
6986 case NEON::BI__builtin_neon_vmulxq_v: {
6987 Int = Intrinsic::aarch64_neon_fmulx;
6990 case NEON::BI__builtin_neon_vmulxh_lane_f16:
6991 case NEON::BI__builtin_neon_vmulxh_laneq_f16: {
6995 Ops[1] =
Builder.CreateExtractElement(Ops[1], Ops[2],
"extract");
6997 Int = Intrinsic::aarch64_neon_fmulx;
7000 case NEON::BI__builtin_neon_vmul_lane_v:
7001 case NEON::BI__builtin_neon_vmul_laneq_v: {
7004 if (BuiltinID == NEON::BI__builtin_neon_vmul_laneq_v)
7007 llvm::FixedVectorType *VTy =
7009 Ops[1] =
Builder.CreateBitCast(Ops[1], VTy);
7010 Ops[1] =
Builder.CreateExtractElement(Ops[1], Ops[2],
"extract");
7014 case NEON::BI__builtin_neon_vnegd_s64:
7016 case NEON::BI__builtin_neon_vnegh_f16:
7018 case NEON::BI__builtin_neon_vpmaxnm_v:
7019 case NEON::BI__builtin_neon_vpmaxnmq_v: {
7020 Int = Intrinsic::aarch64_neon_fmaxnmp;
7023 case NEON::BI__builtin_neon_vpminnm_v:
7024 case NEON::BI__builtin_neon_vpminnmq_v: {
7025 Int = Intrinsic::aarch64_neon_fminnmp;
7028 case NEON::BI__builtin_neon_vsqrth_f16: {
7030 Int =
Builder.getIsFPConstrained()
7031 ? Intrinsic::experimental_constrained_sqrt
7035 case NEON::BI__builtin_neon_vsqrt_v:
7036 case NEON::BI__builtin_neon_vsqrtq_v: {
7037 Int =
Builder.getIsFPConstrained()
7038 ? Intrinsic::experimental_constrained_sqrt
7040 Ops[0] =
Builder.CreateBitCast(Ops[0], Ty);
7043 case NEON::BI__builtin_neon_vrbit_v:
7044 case NEON::BI__builtin_neon_vrbitq_v: {
7045 Int = Intrinsic::bitreverse;
7048 case NEON::BI__builtin_neon_vaddv_u8:
7052 case NEON::BI__builtin_neon_vaddv_s8: {
7053 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
7055 VTy = llvm::FixedVectorType::get(
Int8Ty, 8);
7056 llvm::Type *Tys[2] = { Ty, VTy };
7061 case NEON::BI__builtin_neon_vaddv_u16:
7064 case NEON::BI__builtin_neon_vaddv_s16: {
7065 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
7067 VTy = llvm::FixedVectorType::get(
Int16Ty, 4);
7068 llvm::Type *Tys[2] = { Ty, VTy };
7073 case NEON::BI__builtin_neon_vaddvq_u8:
7076 case NEON::BI__builtin_neon_vaddvq_s8: {
7077 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
7079 VTy = llvm::FixedVectorType::get(
Int8Ty, 16);
7080 llvm::Type *Tys[2] = { Ty, VTy };
7085 case NEON::BI__builtin_neon_vaddvq_u16:
7088 case NEON::BI__builtin_neon_vaddvq_s16: {
7089 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
7091 VTy = llvm::FixedVectorType::get(
Int16Ty, 8);
7092 llvm::Type *Tys[2] = { Ty, VTy };
7097 case NEON::BI__builtin_neon_vmaxv_u8: {
7098 Int = Intrinsic::aarch64_neon_umaxv;
7100 VTy = llvm::FixedVectorType::get(
Int8Ty, 8);
7101 llvm::Type *Tys[2] = { Ty, VTy };
7106 case NEON::BI__builtin_neon_vmaxv_u16: {
7107 Int = Intrinsic::aarch64_neon_umaxv;
7109 VTy = llvm::FixedVectorType::get(
Int16Ty, 4);
7110 llvm::Type *Tys[2] = { Ty, VTy };
7115 case NEON::BI__builtin_neon_vmaxvq_u8: {
7116 Int = Intrinsic::aarch64_neon_umaxv;
7118 VTy = llvm::FixedVectorType::get(
Int8Ty, 16);
7119 llvm::Type *Tys[2] = { Ty, VTy };
7124 case NEON::BI__builtin_neon_vmaxvq_u16: {
7125 Int = Intrinsic::aarch64_neon_umaxv;
7127 VTy = llvm::FixedVectorType::get(
Int16Ty, 8);
7128 llvm::Type *Tys[2] = { Ty, VTy };
7133 case NEON::BI__builtin_neon_vmaxv_s8: {
7134 Int = Intrinsic::aarch64_neon_smaxv;
7136 VTy = llvm::FixedVectorType::get(
Int8Ty, 8);
7137 llvm::Type *Tys[2] = { Ty, VTy };
7142 case NEON::BI__builtin_neon_vmaxv_s16: {
7143 Int = Intrinsic::aarch64_neon_smaxv;
7145 VTy = llvm::FixedVectorType::get(
Int16Ty, 4);
7146 llvm::Type *Tys[2] = { Ty, VTy };
7151 case NEON::BI__builtin_neon_vmaxvq_s8: {
7152 Int = Intrinsic::aarch64_neon_smaxv;
7154 VTy = llvm::FixedVectorType::get(
Int8Ty, 16);
7155 llvm::Type *Tys[2] = { Ty, VTy };
7160 case NEON::BI__builtin_neon_vmaxvq_s16: {
7161 Int = Intrinsic::aarch64_neon_smaxv;
7163 VTy = llvm::FixedVectorType::get(
Int16Ty, 8);
7164 llvm::Type *Tys[2] = { Ty, VTy };
7169 case NEON::BI__builtin_neon_vmaxv_f16: {
7170 Int = Intrinsic::aarch64_neon_fmaxv;
7172 VTy = llvm::FixedVectorType::get(
HalfTy, 4);
7173 llvm::Type *Tys[2] = { Ty, VTy };
7178 case NEON::BI__builtin_neon_vmaxvq_f16: {
7179 Int = Intrinsic::aarch64_neon_fmaxv;
7181 VTy = llvm::FixedVectorType::get(
HalfTy, 8);
7182 llvm::Type *Tys[2] = { Ty, VTy };
7187 case NEON::BI__builtin_neon_vminv_u8: {
7188 Int = Intrinsic::aarch64_neon_uminv;
7190 VTy = llvm::FixedVectorType::get(
Int8Ty, 8);
7191 llvm::Type *Tys[2] = { Ty, VTy };
7196 case NEON::BI__builtin_neon_vminv_u16: {
7197 Int = Intrinsic::aarch64_neon_uminv;
7199 VTy = llvm::FixedVectorType::get(
Int16Ty, 4);
7200 llvm::Type *Tys[2] = { Ty, VTy };
7205 case NEON::BI__builtin_neon_vminvq_u8: {
7206 Int = Intrinsic::aarch64_neon_uminv;
7208 VTy = llvm::FixedVectorType::get(
Int8Ty, 16);
7209 llvm::Type *Tys[2] = { Ty, VTy };
7214 case NEON::BI__builtin_neon_vminvq_u16: {
7215 Int = Intrinsic::aarch64_neon_uminv;
7217 VTy = llvm::FixedVectorType::get(
Int16Ty, 8);
7218 llvm::Type *Tys[2] = { Ty, VTy };
7223 case NEON::BI__builtin_neon_vminv_s8: {
7224 Int = Intrinsic::aarch64_neon_sminv;
7226 VTy = llvm::FixedVectorType::get(
Int8Ty, 8);
7227 llvm::Type *Tys[2] = { Ty, VTy };
7232 case NEON::BI__builtin_neon_vminv_s16: {
7233 Int = Intrinsic::aarch64_neon_sminv;
7235 VTy = llvm::FixedVectorType::get(
Int16Ty, 4);
7236 llvm::Type *Tys[2] = { Ty, VTy };
7241 case NEON::BI__builtin_neon_vminvq_s8: {
7242 Int = Intrinsic::aarch64_neon_sminv;
7244 VTy = llvm::FixedVectorType::get(
Int8Ty, 16);
7245 llvm::Type *Tys[2] = { Ty, VTy };
7250 case NEON::BI__builtin_neon_vminvq_s16: {
7251 Int = Intrinsic::aarch64_neon_sminv;
7253 VTy = llvm::FixedVectorType::get(
Int16Ty, 8);
7254 llvm::Type *Tys[2] = { Ty, VTy };
7259 case NEON::BI__builtin_neon_vminv_f16: {
7260 Int = Intrinsic::aarch64_neon_fminv;
7262 VTy = llvm::FixedVectorType::get(
HalfTy, 4);
7263 llvm::Type *Tys[2] = { Ty, VTy };
7268 case NEON::BI__builtin_neon_vminvq_f16: {
7269 Int = Intrinsic::aarch64_neon_fminv;
7271 VTy = llvm::FixedVectorType::get(
HalfTy, 8);
7272 llvm::Type *Tys[2] = { Ty, VTy };
7277 case NEON::BI__builtin_neon_vmaxnmv_f16: {
7278 Int = Intrinsic::aarch64_neon_fmaxnmv;
7280 VTy = llvm::FixedVectorType::get(
HalfTy, 4);
7281 llvm::Type *Tys[2] = { Ty, VTy };
7286 case NEON::BI__builtin_neon_vmaxnmvq_f16: {
7287 Int = Intrinsic::aarch64_neon_fmaxnmv;
7289 VTy = llvm::FixedVectorType::get(
HalfTy, 8);
7290 llvm::Type *Tys[2] = { Ty, VTy };
7295 case NEON::BI__builtin_neon_vminnmv_f16: {
7296 Int = Intrinsic::aarch64_neon_fminnmv;
7298 VTy = llvm::FixedVectorType::get(
HalfTy, 4);
7299 llvm::Type *Tys[2] = { Ty, VTy };
7304 case NEON::BI__builtin_neon_vminnmvq_f16: {
7305 Int = Intrinsic::aarch64_neon_fminnmv;
7307 VTy = llvm::FixedVectorType::get(
HalfTy, 8);
7308 llvm::Type *Tys[2] = { Ty, VTy };
7313 case NEON::BI__builtin_neon_vmul_n_f64: {
7316 return Builder.CreateFMul(Ops[0], RHS);
7318 case NEON::BI__builtin_neon_vaddlv_u8: {
7319 Int = Intrinsic::aarch64_neon_uaddlv;
7321 VTy = llvm::FixedVectorType::get(
Int8Ty, 8);
7322 llvm::Type *Tys[2] = { Ty, VTy };
7327 case NEON::BI__builtin_neon_vaddlv_u16: {
7328 Int = Intrinsic::aarch64_neon_uaddlv;
7330 VTy = llvm::FixedVectorType::get(
Int16Ty, 4);
7331 llvm::Type *Tys[2] = { Ty, VTy };
7335 case NEON::BI__builtin_neon_vaddlvq_u8: {
7336 Int = Intrinsic::aarch64_neon_uaddlv;
7338 VTy = llvm::FixedVectorType::get(
Int8Ty, 16);
7339 llvm::Type *Tys[2] = { Ty, VTy };
7344 case NEON::BI__builtin_neon_vaddlvq_u16: {
7345 Int = Intrinsic::aarch64_neon_uaddlv;
7347 VTy = llvm::FixedVectorType::get(
Int16Ty, 8);
7348 llvm::Type *Tys[2] = { Ty, VTy };
7352 case NEON::BI__builtin_neon_vaddlv_s8: {
7353 Int = Intrinsic::aarch64_neon_saddlv;
7355 VTy = llvm::FixedVectorType::get(
Int8Ty, 8);
7356 llvm::Type *Tys[2] = { Ty, VTy };
7361 case NEON::BI__builtin_neon_vaddlv_s16: {
7362 Int = Intrinsic::aarch64_neon_saddlv;
7364 VTy = llvm::FixedVectorType::get(
Int16Ty, 4);
7365 llvm::Type *Tys[2] = { Ty, VTy };
7369 case NEON::BI__builtin_neon_vaddlvq_s8: {
7370 Int = Intrinsic::aarch64_neon_saddlv;
7372 VTy = llvm::FixedVectorType::get(
Int8Ty, 16);
7373 llvm::Type *Tys[2] = { Ty, VTy };
7378 case NEON::BI__builtin_neon_vaddlvq_s16: {
7379 Int = Intrinsic::aarch64_neon_saddlv;
7381 VTy = llvm::FixedVectorType::get(
Int16Ty, 8);
7382 llvm::Type *Tys[2] = { Ty, VTy };
7386 case NEON::BI__builtin_neon_vsri_n_v:
7387 case NEON::BI__builtin_neon_vsriq_n_v: {
7388 Int = Intrinsic::aarch64_neon_vsri;
7392 case NEON::BI__builtin_neon_vsli_n_v:
7393 case NEON::BI__builtin_neon_vsliq_n_v: {
7394 Int = Intrinsic::aarch64_neon_vsli;
7398 case NEON::BI__builtin_neon_vsra_n_v:
7399 case NEON::BI__builtin_neon_vsraq_n_v:
7400 Ops[0] =
Builder.CreateBitCast(Ops[0], Ty);
7402 return Builder.CreateAdd(Ops[0], Ops[1]);
7403 case NEON::BI__builtin_neon_vrsra_n_v:
7404 case NEON::BI__builtin_neon_vrsraq_n_v: {
7405 Int = usgn ? Intrinsic::aarch64_neon_urshl : Intrinsic::aarch64_neon_srshl;
7407 TmpOps.push_back(Ops[1]);
7408 TmpOps.push_back(Ops[2]);
7410 llvm::Value *tmp =
EmitNeonCall(F, TmpOps,
"vrshr_n", 1,
true);
7411 Ops[0] =
Builder.CreateBitCast(Ops[0], VTy);
7412 return Builder.CreateAdd(Ops[0], tmp);
7414 case NEON::BI__builtin_neon_vld1_v:
7415 case NEON::BI__builtin_neon_vld1q_v: {
7418 case NEON::BI__builtin_neon_vst1_v:
7419 case NEON::BI__builtin_neon_vst1q_v:
7420 Ops[1] =
Builder.CreateBitCast(Ops[1], VTy);
7422 case NEON::BI__builtin_neon_vld1_lane_v:
7423 case NEON::BI__builtin_neon_vld1q_lane_v: {
7424 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
7427 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2],
"vld1_lane");
7429 case NEON::BI__builtin_neon_vldap1_lane_s64:
7430 case NEON::BI__builtin_neon_vldap1q_lane_s64: {
7431 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
7434 LI->setAtomic(llvm::AtomicOrdering::Acquire);
7436 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2],
"vldap1_lane");
7438 case NEON::BI__builtin_neon_vld1_dup_v:
7439 case NEON::BI__builtin_neon_vld1q_dup_v: {
7440 Value *
V = PoisonValue::get(Ty);
7443 llvm::Constant *CI = ConstantInt::get(
Int32Ty, 0);
7444 Ops[0] =
Builder.CreateInsertElement(
V, Ops[0], CI);
7447 case NEON::BI__builtin_neon_vst1_lane_v:
7448 case NEON::BI__builtin_neon_vst1q_lane_v:
7449 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
7450 Ops[1] =
Builder.CreateExtractElement(Ops[1], Ops[2]);
7452 case NEON::BI__builtin_neon_vstl1_lane_s64:
7453 case NEON::BI__builtin_neon_vstl1q_lane_s64: {
7454 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
7455 Ops[1] =
Builder.CreateExtractElement(Ops[1], Ops[2]);
7456 llvm::StoreInst *SI =
7458 SI->setAtomic(llvm::AtomicOrdering::Release);
7461 case NEON::BI__builtin_neon_vld2_v:
7462 case NEON::BI__builtin_neon_vld2q_v: {
7465 Ops[1] =
Builder.CreateCall(F, Ops[1],
"vld2");
7468 case NEON::BI__builtin_neon_vld3_v:
7469 case NEON::BI__builtin_neon_vld3q_v: {
7472 Ops[1] =
Builder.CreateCall(F, Ops[1],
"vld3");
7475 case NEON::BI__builtin_neon_vld4_v:
7476 case NEON::BI__builtin_neon_vld4q_v: {
7479 Ops[1] =
Builder.CreateCall(F, Ops[1],
"vld4");
7482 case NEON::BI__builtin_neon_vld2_dup_v:
7483 case NEON::BI__builtin_neon_vld2q_dup_v: {
7486 Ops[1] =
Builder.CreateCall(F, Ops[1],
"vld2");
7489 case NEON::BI__builtin_neon_vld3_dup_v:
7490 case NEON::BI__builtin_neon_vld3q_dup_v: {
7493 Ops[1] =
Builder.CreateCall(F, Ops[1],
"vld3");
7496 case NEON::BI__builtin_neon_vld4_dup_v:
7497 case NEON::BI__builtin_neon_vld4q_dup_v: {
7500 Ops[1] =
Builder.CreateCall(F, Ops[1],
"vld4");
7503 case NEON::BI__builtin_neon_vld2_lane_v:
7504 case NEON::BI__builtin_neon_vld2q_lane_v: {
7505 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
7507 std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
7508 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
7509 Ops[2] =
Builder.CreateBitCast(Ops[2], Ty);
7514 case NEON::BI__builtin_neon_vld3_lane_v:
7515 case NEON::BI__builtin_neon_vld3q_lane_v: {
7516 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
7518 std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
7519 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
7520 Ops[2] =
Builder.CreateBitCast(Ops[2], Ty);
7521 Ops[3] =
Builder.CreateBitCast(Ops[3], Ty);
7526 case NEON::BI__builtin_neon_vld4_lane_v:
7527 case NEON::BI__builtin_neon_vld4q_lane_v: {
7528 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
7530 std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
7531 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
7532 Ops[2] =
Builder.CreateBitCast(Ops[2], Ty);
7533 Ops[3] =
Builder.CreateBitCast(Ops[3], Ty);
7534 Ops[4] =
Builder.CreateBitCast(Ops[4], Ty);
7539 case NEON::BI__builtin_neon_vst2_v:
7540 case NEON::BI__builtin_neon_vst2q_v: {
7541 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
7542 llvm::Type *Tys[2] = { VTy, Ops[2]->getType() };
7546 case NEON::BI__builtin_neon_vst2_lane_v:
7547 case NEON::BI__builtin_neon_vst2q_lane_v: {
7548 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
7550 llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
7554 case NEON::BI__builtin_neon_vst3_v:
7555 case NEON::BI__builtin_neon_vst3q_v: {
7556 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
7557 llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
7561 case NEON::BI__builtin_neon_vst3_lane_v:
7562 case NEON::BI__builtin_neon_vst3q_lane_v: {
7563 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
7565 llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
7569 case NEON::BI__builtin_neon_vst4_v:
7570 case NEON::BI__builtin_neon_vst4q_v: {
7571 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
7572 llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
7576 case NEON::BI__builtin_neon_vst4_lane_v:
7577 case NEON::BI__builtin_neon_vst4q_lane_v: {
7578 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
7580 llvm::Type *Tys[2] = { VTy, Ops[5]->getType() };
7584 case NEON::BI__builtin_neon_vtrn_v:
7585 case NEON::BI__builtin_neon_vtrnq_v: {
7586 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
7587 Ops[2] =
Builder.CreateBitCast(Ops[2], Ty);
7588 Value *SV =
nullptr;
7590 for (
unsigned vi = 0; vi != 2; ++vi) {
7592 for (
unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
7593 Indices.push_back(i+vi);
7594 Indices.push_back(i+e+vi);
7597 SV =
Builder.CreateShuffleVector(Ops[1], Ops[2], Indices,
"vtrn");
7602 case NEON::BI__builtin_neon_vuzp_v:
7603 case NEON::BI__builtin_neon_vuzpq_v: {
7604 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
7605 Ops[2] =
Builder.CreateBitCast(Ops[2], Ty);
7606 Value *SV =
nullptr;
7608 for (
unsigned vi = 0; vi != 2; ++vi) {
7610 for (
unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
7611 Indices.push_back(2*i+vi);
7614 SV =
Builder.CreateShuffleVector(Ops[1], Ops[2], Indices,
"vuzp");
7619 case NEON::BI__builtin_neon_vzip_v:
7620 case NEON::BI__builtin_neon_vzipq_v: {
7621 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
7622 Ops[2] =
Builder.CreateBitCast(Ops[2], Ty);
7623 Value *SV =
nullptr;
7625 for (
unsigned vi = 0; vi != 2; ++vi) {
7627 for (
unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
7628 Indices.push_back((i + vi*e) >> 1);
7629 Indices.push_back(((i + vi*e) >> 1)+e);
7632 SV =
Builder.CreateShuffleVector(Ops[1], Ops[2], Indices,
"vzip");
7637 case NEON::BI__builtin_neon_vqtbl1q_v: {
7641 case NEON::BI__builtin_neon_vqtbl2q_v: {
7645 case NEON::BI__builtin_neon_vqtbl3q_v: {
7649 case NEON::BI__builtin_neon_vqtbl4q_v: {
7653 case NEON::BI__builtin_neon_vqtbx1q_v: {
7657 case NEON::BI__builtin_neon_vqtbx2q_v: {
7661 case NEON::BI__builtin_neon_vqtbx3q_v: {
7665 case NEON::BI__builtin_neon_vqtbx4q_v: {
7669 case NEON::BI__builtin_neon_vsqadd_v:
7670 case NEON::BI__builtin_neon_vsqaddq_v: {
7671 Int = Intrinsic::aarch64_neon_usqadd;
7674 case NEON::BI__builtin_neon_vuqadd_v:
7675 case NEON::BI__builtin_neon_vuqaddq_v: {
7676 Int = Intrinsic::aarch64_neon_suqadd;
7680 case NEON::BI__builtin_neon_vluti2_laneq_mf8:
7681 case NEON::BI__builtin_neon_vluti2_laneq_bf16:
7682 case NEON::BI__builtin_neon_vluti2_laneq_f16:
7683 case NEON::BI__builtin_neon_vluti2_laneq_p16:
7684 case NEON::BI__builtin_neon_vluti2_laneq_p8:
7685 case NEON::BI__builtin_neon_vluti2_laneq_s16:
7686 case NEON::BI__builtin_neon_vluti2_laneq_s8:
7687 case NEON::BI__builtin_neon_vluti2_laneq_u16:
7688 case NEON::BI__builtin_neon_vluti2_laneq_u8: {
7689 Int = Intrinsic::aarch64_neon_vluti2_laneq;
7696 case NEON::BI__builtin_neon_vluti2q_laneq_mf8:
7697 case NEON::BI__builtin_neon_vluti2q_laneq_bf16:
7698 case NEON::BI__builtin_neon_vluti2q_laneq_f16:
7699 case NEON::BI__builtin_neon_vluti2q_laneq_p16:
7700 case NEON::BI__builtin_neon_vluti2q_laneq_p8:
7701 case NEON::BI__builtin_neon_vluti2q_laneq_s16:
7702 case NEON::BI__builtin_neon_vluti2q_laneq_s8:
7703 case NEON::BI__builtin_neon_vluti2q_laneq_u16:
7704 case NEON::BI__builtin_neon_vluti2q_laneq_u8: {
7705 Int = Intrinsic::aarch64_neon_vluti2_laneq;
7712 case NEON::BI__builtin_neon_vluti2_lane_mf8:
7713 case NEON::BI__builtin_neon_vluti2_lane_bf16:
7714 case NEON::BI__builtin_neon_vluti2_lane_f16:
7715 case NEON::BI__builtin_neon_vluti2_lane_p16:
7716 case NEON::BI__builtin_neon_vluti2_lane_p8:
7717 case NEON::BI__builtin_neon_vluti2_lane_s16:
7718 case NEON::BI__builtin_neon_vluti2_lane_s8:
7719 case NEON::BI__builtin_neon_vluti2_lane_u16:
7720 case NEON::BI__builtin_neon_vluti2_lane_u8: {
7721 Int = Intrinsic::aarch64_neon_vluti2_lane;
7728 case NEON::BI__builtin_neon_vluti2q_lane_mf8:
7729 case NEON::BI__builtin_neon_vluti2q_lane_bf16:
7730 case NEON::BI__builtin_neon_vluti2q_lane_f16:
7731 case NEON::BI__builtin_neon_vluti2q_lane_p16:
7732 case NEON::BI__builtin_neon_vluti2q_lane_p8:
7733 case NEON::BI__builtin_neon_vluti2q_lane_s16:
7734 case NEON::BI__builtin_neon_vluti2q_lane_s8:
7735 case NEON::BI__builtin_neon_vluti2q_lane_u16:
7736 case NEON::BI__builtin_neon_vluti2q_lane_u8: {
7737 Int = Intrinsic::aarch64_neon_vluti2_lane;
7744 case NEON::BI__builtin_neon_vluti4q_lane_mf8:
7745 case NEON::BI__builtin_neon_vluti4q_lane_p8:
7746 case NEON::BI__builtin_neon_vluti4q_lane_s8:
7747 case NEON::BI__builtin_neon_vluti4q_lane_u8: {
7748 Int = Intrinsic::aarch64_neon_vluti4q_lane;
7751 case NEON::BI__builtin_neon_vluti4q_laneq_mf8:
7752 case NEON::BI__builtin_neon_vluti4q_laneq_p8:
7753 case NEON::BI__builtin_neon_vluti4q_laneq_s8:
7754 case NEON::BI__builtin_neon_vluti4q_laneq_u8: {
7755 Int = Intrinsic::aarch64_neon_vluti4q_laneq;
7758 case NEON::BI__builtin_neon_vluti4q_lane_bf16_x2:
7759 case NEON::BI__builtin_neon_vluti4q_lane_f16_x2:
7760 case NEON::BI__builtin_neon_vluti4q_lane_p16_x2:
7761 case NEON::BI__builtin_neon_vluti4q_lane_s16_x2:
7762 case NEON::BI__builtin_neon_vluti4q_lane_u16_x2: {
7763 Int = Intrinsic::aarch64_neon_vluti4q_lane_x2;
7766 case NEON::BI__builtin_neon_vluti4q_laneq_bf16_x2:
7767 case NEON::BI__builtin_neon_vluti4q_laneq_f16_x2:
7768 case NEON::BI__builtin_neon_vluti4q_laneq_p16_x2:
7769 case NEON::BI__builtin_neon_vluti4q_laneq_s16_x2:
7770 case NEON::BI__builtin_neon_vluti4q_laneq_u16_x2: {
7771 Int = Intrinsic::aarch64_neon_vluti4q_laneq_x2;
7774 case NEON::BI__builtin_neon_vcvt1_low_bf16_mf8_fpm:
7777 case NEON::BI__builtin_neon_vcvt1_bf16_mf8_fpm:
7778 case NEON::BI__builtin_neon_vcvt1_high_bf16_mf8_fpm:
7780 llvm::FixedVectorType::get(
BFloatTy, 8),
7781 Ops[0]->getType(), ExtractLow, Ops,
E,
"vbfcvt1");
7782 case NEON::BI__builtin_neon_vcvt2_low_bf16_mf8_fpm:
7785 case NEON::BI__builtin_neon_vcvt2_bf16_mf8_fpm:
7786 case NEON::BI__builtin_neon_vcvt2_high_bf16_mf8_fpm:
7788 llvm::FixedVectorType::get(
BFloatTy, 8),
7789 Ops[0]->getType(), ExtractLow, Ops,
E,
"vbfcvt2");
7790 case NEON::BI__builtin_neon_vcvt1_low_f16_mf8_fpm:
7793 case NEON::BI__builtin_neon_vcvt1_f16_mf8_fpm:
7794 case NEON::BI__builtin_neon_vcvt1_high_f16_mf8_fpm:
7796 llvm::FixedVectorType::get(
HalfTy, 8),
7797 Ops[0]->getType(), ExtractLow, Ops,
E,
"vbfcvt1");
7798 case NEON::BI__builtin_neon_vcvt2_low_f16_mf8_fpm:
7801 case NEON::BI__builtin_neon_vcvt2_f16_mf8_fpm:
7802 case NEON::BI__builtin_neon_vcvt2_high_f16_mf8_fpm:
7804 llvm::FixedVectorType::get(
HalfTy, 8),
7805 Ops[0]->getType(), ExtractLow, Ops,
E,
"vbfcvt2");
7806 case NEON::BI__builtin_neon_vcvt_mf8_f32_fpm:
7808 llvm::FixedVectorType::get(
Int8Ty, 8),
7809 Ops[0]->getType(),
false, Ops,
E,
"vfcvtn");
7810 case NEON::BI__builtin_neon_vcvt_mf8_f16_fpm:
7812 llvm::FixedVectorType::get(
Int8Ty, 8),
7813 llvm::FixedVectorType::get(
HalfTy, 4),
false, Ops,
7815 case NEON::BI__builtin_neon_vcvtq_mf8_f16_fpm:
7817 llvm::FixedVectorType::get(
Int8Ty, 16),
7818 llvm::FixedVectorType::get(
HalfTy, 8),
false, Ops,
7820 case NEON::BI__builtin_neon_vcvt_high_mf8_f32_fpm: {
7821 llvm::Type *Ty = llvm::FixedVectorType::get(
Int8Ty, 16);
7822 Ops[0] =
Builder.CreateInsertVector(Ty, PoisonValue::get(Ty), Ops[0],
7825 Ops[1]->getType(),
false, Ops,
E,
"vfcvtn2");
7828 case NEON::BI__builtin_neon_vdot_f16_mf8_fpm:
7829 case NEON::BI__builtin_neon_vdotq_f16_mf8_fpm:
7832 case NEON::BI__builtin_neon_vdot_lane_f16_mf8_fpm:
7833 case NEON::BI__builtin_neon_vdotq_lane_f16_mf8_fpm:
7834 ExtendLaneArg =
true;
7836 case NEON::BI__builtin_neon_vdot_laneq_f16_mf8_fpm:
7837 case NEON::BI__builtin_neon_vdotq_laneq_f16_mf8_fpm:
7839 ExtendLaneArg,
HalfTy, Ops,
E,
"fdot2_lane");
7840 case NEON::BI__builtin_neon_vdot_f32_mf8_fpm:
7841 case NEON::BI__builtin_neon_vdotq_f32_mf8_fpm:
7844 case NEON::BI__builtin_neon_vdot_lane_f32_mf8_fpm:
7845 case NEON::BI__builtin_neon_vdotq_lane_f32_mf8_fpm:
7846 ExtendLaneArg =
true;
7848 case NEON::BI__builtin_neon_vdot_laneq_f32_mf8_fpm:
7849 case NEON::BI__builtin_neon_vdotq_laneq_f32_mf8_fpm:
7851 ExtendLaneArg,
FloatTy, Ops,
E,
"fdot4_lane");
7853 case NEON::BI__builtin_neon_vmlalbq_f16_mf8_fpm:
7855 {llvm::FixedVectorType::get(
HalfTy, 8)}, Ops,
E,
7857 case NEON::BI__builtin_neon_vmlaltq_f16_mf8_fpm:
7859 {llvm::FixedVectorType::get(
HalfTy, 8)}, Ops,
E,
7861 case NEON::BI__builtin_neon_vmlallbbq_f32_mf8_fpm:
7863 {llvm::FixedVectorType::get(
FloatTy, 4)}, Ops,
E,
7865 case NEON::BI__builtin_neon_vmlallbtq_f32_mf8_fpm:
7867 {llvm::FixedVectorType::get(
FloatTy, 4)}, Ops,
E,
7869 case NEON::BI__builtin_neon_vmlalltbq_f32_mf8_fpm:
7871 {llvm::FixedVectorType::get(
FloatTy, 4)}, Ops,
E,
7873 case NEON::BI__builtin_neon_vmlallttq_f32_mf8_fpm:
7875 {llvm::FixedVectorType::get(
FloatTy, 4)}, Ops,
E,
7877 case NEON::BI__builtin_neon_vmlalbq_lane_f16_mf8_fpm:
7878 ExtendLaneArg =
true;
7880 case NEON::BI__builtin_neon_vmlalbq_laneq_f16_mf8_fpm:
7882 ExtendLaneArg,
HalfTy, Ops,
E,
"vmlal_lane");
7883 case NEON::BI__builtin_neon_vmlaltq_lane_f16_mf8_fpm:
7884 ExtendLaneArg =
true;
7886 case NEON::BI__builtin_neon_vmlaltq_laneq_f16_mf8_fpm:
7888 ExtendLaneArg,
HalfTy, Ops,
E,
"vmlal_lane");
7889 case NEON::BI__builtin_neon_vmlallbbq_lane_f32_mf8_fpm:
7890 ExtendLaneArg =
true;
7892 case NEON::BI__builtin_neon_vmlallbbq_laneq_f32_mf8_fpm:
7894 ExtendLaneArg,
FloatTy, Ops,
E,
"vmlall_lane");
7895 case NEON::BI__builtin_neon_vmlallbtq_lane_f32_mf8_fpm:
7896 ExtendLaneArg =
true;
7898 case NEON::BI__builtin_neon_vmlallbtq_laneq_f32_mf8_fpm:
7900 ExtendLaneArg,
FloatTy, Ops,
E,
"vmlall_lane");
7901 case NEON::BI__builtin_neon_vmlalltbq_lane_f32_mf8_fpm:
7902 ExtendLaneArg =
true;
7904 case NEON::BI__builtin_neon_vmlalltbq_laneq_f32_mf8_fpm:
7906 ExtendLaneArg,
FloatTy, Ops,
E,
"vmlall_lane");
7907 case NEON::BI__builtin_neon_vmlallttq_lane_f32_mf8_fpm:
7908 ExtendLaneArg =
true;
7910 case NEON::BI__builtin_neon_vmlallttq_laneq_f32_mf8_fpm:
7912 ExtendLaneArg,
FloatTy, Ops,
E,
"vmlall_lane");
7913 case NEON::BI__builtin_neon_vamin_f16:
7914 case NEON::BI__builtin_neon_vaminq_f16:
7915 case NEON::BI__builtin_neon_vamin_f32:
7916 case NEON::BI__builtin_neon_vaminq_f32:
7917 case NEON::BI__builtin_neon_vaminq_f64: {
7918 Int = Intrinsic::aarch64_neon_famin;
7921 case NEON::BI__builtin_neon_vamax_f16:
7922 case NEON::BI__builtin_neon_vamaxq_f16:
7923 case NEON::BI__builtin_neon_vamax_f32:
7924 case NEON::BI__builtin_neon_vamaxq_f32:
7925 case NEON::BI__builtin_neon_vamaxq_f64: {
7926 Int = Intrinsic::aarch64_neon_famax;
7929 case NEON::BI__builtin_neon_vscale_f16:
7930 case NEON::BI__builtin_neon_vscaleq_f16:
7931 case NEON::BI__builtin_neon_vscale_f32:
7932 case NEON::BI__builtin_neon_vscaleq_f32:
7933 case NEON::BI__builtin_neon_vscaleq_f64: {
7934 Int = Intrinsic::aarch64_neon_fp8_fscale;
7942 assert((BuiltinID == BPF::BI__builtin_preserve_field_info ||
7943 BuiltinID == BPF::BI__builtin_btf_type_id ||
7944 BuiltinID == BPF::BI__builtin_preserve_type_info ||
7945 BuiltinID == BPF::BI__builtin_preserve_enum_value) &&
7946 "unexpected BPF builtin");
7951 static uint32_t BuiltinSeqNum;
7953 switch (BuiltinID) {
7955 llvm_unreachable(
"Unexpected BPF builtin");
7956 case BPF::BI__builtin_preserve_field_info: {
7957 const Expr *Arg =
E->getArg(0);
7962 "using __builtin_preserve_field_info() without -g");
7975 Value *InfoKind = ConstantInt::get(
Int64Ty,
C->getSExtValue());
7978 llvm::Function *FnGetFieldInfo = Intrinsic::getOrInsertDeclaration(
7980 {FieldAddr->getType()});
7981 return Builder.CreateCall(FnGetFieldInfo, {FieldAddr, InfoKind});
7983 case BPF::BI__builtin_btf_type_id:
7984 case BPF::BI__builtin_preserve_type_info: {
7990 const Expr *Arg0 =
E->getArg(0);
7995 Value *FlagValue = ConstantInt::get(
Int64Ty, Flag->getSExtValue());
7996 Value *SeqNumVal = ConstantInt::get(
Int32Ty, BuiltinSeqNum++);
7998 llvm::Function *FnDecl;
7999 if (BuiltinID == BPF::BI__builtin_btf_type_id)
8000 FnDecl = Intrinsic::getOrInsertDeclaration(
8003 FnDecl = Intrinsic::getOrInsertDeclaration(
8004 &
CGM.
getModule(), Intrinsic::bpf_preserve_type_info, {});
8005 CallInst *Fn =
Builder.CreateCall(FnDecl, {SeqNumVal, FlagValue});
8006 Fn->setMetadata(LLVMContext::MD_preserve_access_index, DbgInfo);
8009 case BPF::BI__builtin_preserve_enum_value: {
8015 const Expr *Arg0 =
E->getArg(0);
8020 const auto *UO = cast<UnaryOperator>(Arg0->
IgnoreParens());
8021 const auto *CE = cast<CStyleCastExpr>(UO->getSubExpr());
8022 const auto *DR = cast<DeclRefExpr>(CE->getSubExpr());
8023 const auto *
Enumerator = cast<EnumConstantDecl>(DR->getDecl());
8026 std::string InitValStr;
8027 if (InitVal.isNegative() || InitVal > uint64_t(INT64_MAX))
8028 InitValStr = std::to_string(InitVal.getSExtValue());
8030 InitValStr = std::to_string(InitVal.getZExtValue());
8031 std::string EnumStr =
Enumerator->getNameAsString() +
":" + InitValStr;
8032 Value *EnumStrVal =
Builder.CreateGlobalString(EnumStr);
8035 Value *FlagValue = ConstantInt::get(
Int64Ty, Flag->getSExtValue());
8036 Value *SeqNumVal = ConstantInt::get(
Int32Ty, BuiltinSeqNum++);
8038 llvm::Function *IntrinsicFn = Intrinsic::getOrInsertDeclaration(
8039 &
CGM.
getModule(), Intrinsic::bpf_preserve_enum_value, {});
8041 Builder.CreateCall(IntrinsicFn, {SeqNumVal, EnumStrVal, FlagValue});
8042 Fn->setMetadata(LLVMContext::MD_preserve_access_index, DbgInfo);
8050 assert((Ops.size() & (Ops.size() - 1)) == 0 &&
8051 "Not a power-of-two sized vector!");
8052 bool AllConstants =
true;
8053 for (
unsigned i = 0, e = Ops.size(); i != e && AllConstants; ++i)
8054 AllConstants &= isa<Constant>(Ops[i]);
8059 for (llvm::Value *Op : Ops)
8060 CstOps.push_back(cast<Constant>(Op));
8061 return llvm::ConstantVector::get(CstOps);
8066 llvm::FixedVectorType::get(Ops[0]->getType(), Ops.size()));
8068 for (
unsigned i = 0, e = Ops.size(); i != e; ++i)
8074Value *CodeGenFunction::EmitAArch64CpuInit() {
8075 llvm::FunctionType *FTy = llvm::FunctionType::get(
VoidTy,
false);
8076 llvm::FunctionCallee
Func =
8078 cast<llvm::GlobalValue>(
Func.getCallee())->setDSOLocal(
true);
8079 cast<llvm::GlobalValue>(
Func.getCallee())
8080 ->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass);
8084Value *CodeGenFunction::EmitAArch64CpuSupports(
const CallExpr *
E) {
8086 StringRef ArgStr = cast<StringLiteral>(ArgExpr)->getString();
8088 ArgStr.split(Features,
"+");
8089 for (
auto &
Feature : Features) {
8091 if (!llvm::AArch64::parseFMVExtension(
Feature))
8096 return EmitAArch64CpuSupports(Features);
8101 llvm::APInt FeaturesMask = llvm::AArch64::getCpuSupportsMask(FeaturesStrs);
8103 if (FeaturesMask != 0) {
8108 llvm::Type *STy = llvm::StructType::get(
Int64Ty);
8109 llvm::Constant *AArch64CPUFeatures =
8111 cast<llvm::GlobalValue>(AArch64CPUFeatures)->setDSOLocal(
true);
8113 STy, AArch64CPUFeatures,
Address CheckAtomicAlignment(CodeGenFunction &CGF, const CallExpr *E)
static Value * EmitSpecialRegisterBuiltin(CodeGenFunction &CGF, const CallExpr *E, llvm::Type *RegisterType, llvm::Type *ValueType, SpecialRegisterAccessKind AccessKind, StringRef SysReg="")
static llvm::Value * ARMMVEVectorReinterpret(CGBuilderTy &Builder, CodeGenFunction *CGF, llvm::Value *V, llvm::Type *DestType)
#define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier)
static llvm::VectorType * GetFloatNeonType(CodeGenFunction *CGF, NeonTypeFlags IntTypeFlags)
static llvm::Value * MVEImmediateShr(CGBuilderTy &Builder, llvm::Value *V, uint32_t Shift, bool Unsigned)
static llvm::Value * SignOrZeroExtend(CGBuilderTy &Builder, llvm::Value *V, llvm::Type *T, bool Unsigned)
static void InsertExplicitZeroOperand(CGBuilderTy &Builder, llvm::Type *Ty, SmallVectorImpl< Value * > &Ops)
static const ARMVectorIntrinsicInfo AArch64SMEIntrinsicMap[]
static Value * EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID, const CallExpr *E, SmallVectorImpl< Value * > &Ops, llvm::Triple::ArchType Arch)
static void swapCommutativeSMEOperands(unsigned BuiltinID, SmallVectorImpl< Value * > &Ops)
static bool AArch64SISDIntrinsicsProvenSorted
static const ARMVectorIntrinsicInfo AArch64SVEIntrinsicMap[]
static bool HasExtraNeonArgument(unsigned BuiltinID)
Return true if BuiltinID is an overloaded Neon intrinsic with an extra argument that specifies the ve...
#define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier)
static bool AArch64SVEIntrinsicsProvenSorted
static const ARMVectorIntrinsicInfo * findARMVectorIntrinsicInMap(ArrayRef< ARMVectorIntrinsicInfo > IntrinsicMap, unsigned BuiltinID, bool &MapProvenSorted)
static void InsertExplicitUndefOperand(CGBuilderTy &Builder, llvm::Type *Ty, SmallVectorImpl< Value * > &Ops)
static Integer GetIntegerConstantValue(const Expr *E, ASTContext &Context)
static bool AArch64SMEIntrinsicsProvenSorted
static llvm::Value * VectorZip(CGBuilderTy &Builder, llvm::Value *V0, llvm::Value *V1)
constexpr unsigned SVEBitsPerBlock
static const std::pair< unsigned, unsigned > NEONEquivalentIntrinsicMap[]
static llvm::FixedVectorType * GetNeonType(CodeGenFunction *CGF, NeonTypeFlags TypeFlags, bool HasFastHalfType=true, bool V1Ty=false, bool AllowBFloatArgsAndRet=true)
#define NEONMAP0(NameBase)
Value * readX18AsPtr(CodeGenFunction &CGF)
Helper for the read/write/add/inc X18 builtins: read the X18 register and return it as an i8 pointer.
static llvm::Value * ARMMVEVectorElementReverse(CGBuilderTy &Builder, llvm::Value *V, unsigned ReverseWidth)
static std::optional< CodeGenFunction::MSVCIntrin > translateAarch64ToMsvcIntrin(unsigned BuiltinID)
static std::optional< CodeGenFunction::MSVCIntrin > translateArmToMsvcIntrin(unsigned BuiltinID)
static const ARMVectorIntrinsicInfo ARMSIMDIntrinsicMap[]
static llvm::Value * VectorUnzip(CGBuilderTy &Builder, llvm::Value *V, bool Odd)
static llvm::ScalableVectorType * getSVEVectorForElementType(llvm::Type *EltTy)
static llvm::Value * ARMMVEConstantSplat(CGBuilderTy &Builder, llvm::Type *VT)
SpecialRegisterAccessKind
static llvm::Value * ARMMVEVectorSplat(CGBuilderTy &Builder, llvm::Value *V)
static bool NEONSIMDIntrinsicsProvenSorted
static const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap[]
static Value * EmitCommonNeonSISDBuiltinExpr(CodeGenFunction &CGF, const ARMVectorIntrinsicInfo &SISDInfo, SmallVectorImpl< Value * > &Ops, const CallExpr *E)
static Value * emitCallMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, unsigned IntrinsicID, unsigned ConstrainedIntrinsicID, llvm::Type *Ty, ArrayRef< Value * > Args)
static Value * packTBLDVectorList(CodeGenFunction &CGF, ArrayRef< Value * > Ops, Value *ExtOp, Value *IndexOp, llvm::Type *ResTy, unsigned IntID, const char *Name)
static bool AArch64SIMDIntrinsicsProvenSorted
static const ARMVectorIntrinsicInfo AArch64SISDIntrinsicMap[]
static std::string toString(const clang::SanitizerSet &Sanitizers)
Produce a string containing comma-separated names of sanitizers in Sanitizers set.
HLSLResourceBindingAttr::RegisterType RegisterType
Enumerates target-specific builtins in their own namespaces within namespace clang.
__device__ __2f16 float __ockl_bool s
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
QualType GetBuiltinType(unsigned ID, GetBuiltinTypeError &Error, unsigned *IntegerConstantArgs=nullptr) const
Return the type for the specified builtin.
CallExpr - Represents a function call (C99 6.5.2.2, C++ [expr.call]).
static CharUnits One()
One - Construct a CharUnits quantity of one.
static CharUnits fromQuantity(QuantityType Quantity)
fromQuantity - Construct a CharUnits quantity from a raw integer type.
Like RawAddress, an abstract representation of an aligned address, but the pointer contained in this ...
llvm::Value * emitRawPointer(CodeGenFunction &CGF) const
Return the pointer contained in this class after authenticating it and adding offset to it if necessa...
CharUnits getAlignment() const
Address withElementType(llvm::Type *ElemTy) const
Return address with different element type, but same pointer and alignment.
llvm::PointerType * getType() const
Return the type of the pointer value.
Address getAddress() const
llvm::StoreInst * CreateStore(llvm::Value *Val, Address Addr, bool IsVolatile=false)
llvm::StoreInst * CreateAlignedStore(llvm::Value *Val, llvm::Value *Addr, CharUnits Align, bool IsVolatile=false)
Address CreateGEP(CodeGenFunction &CGF, Address Addr, llvm::Value *Index, const llvm::Twine &Name="")
llvm::AtomicRMWInst * CreateAtomicRMW(llvm::AtomicRMWInst::BinOp Op, Address Addr, llvm::Value *Val, llvm::AtomicOrdering Ordering, llvm::SyncScope::ID SSID=llvm::SyncScope::System)
llvm::StoreInst * CreateDefaultAlignedStore(llvm::Value *Val, llvm::Value *Addr, bool IsVolatile=false)
llvm::LoadInst * CreateLoad(Address Addr, const llvm::Twine &Name="")
llvm::LoadInst * CreateAlignedLoad(llvm::Type *Ty, llvm::Value *Addr, CharUnits Align, const llvm::Twine &Name="")
llvm::DIType * getOrCreateStandaloneType(QualType Ty, SourceLocation Loc)
Emit standalone debug info for a type.
CodeGenFunction - This class organizes the per-function state that is used while generating LLVM code...
llvm::Value * EmitSVEPredicateCast(llvm::Value *Pred, llvm::ScalableVectorType *VTy)
llvm::Value * EmitFP8NeonFMLACall(unsigned IID, bool ExtendLaneArg, llvm::Type *RetTy, SmallVectorImpl< llvm::Value * > &Ops, const CallExpr *E, const char *name)
llvm::Value * BuildVector(ArrayRef< llvm::Value * > Ops)
llvm::Value * EmitScalarOrConstFoldImmArg(unsigned ICEArguments, unsigned Idx, const CallExpr *E)
llvm::Value * EmitSVEStructLoad(const SVETypeFlags &TypeFlags, SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
llvm::Value * EmitSVEMaskedLoad(const CallExpr *, llvm::Type *ReturnTy, SmallVectorImpl< llvm::Value * > &Ops, unsigned BuiltinID, bool IsZExtReturn)
llvm::Value * EmitFP8NeonCall(unsigned IID, ArrayRef< llvm::Type * > Tys, SmallVectorImpl< llvm::Value * > &O, const CallExpr *E, const char *name)
llvm::Type * ConvertType(QualType T)
llvm::Value * EmitSVEGatherPrefetch(const SVETypeFlags &TypeFlags, SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
llvm::Value * EmitSMEReadWrite(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
llvm::Type * SVEBuiltinMemEltTy(const SVETypeFlags &TypeFlags)
SVEBuiltinMemEltTy - Returns the memory element type for this memory access builtin.
llvm::Value * EmitSVEScatterStore(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
llvm::Value * EmitSVEMaskedStore(const CallExpr *, SmallVectorImpl< llvm::Value * > &Ops, unsigned BuiltinID)
llvm::Value * EmitAArch64SMEBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
void GetAArch64SVEProcessedOperands(unsigned BuiltinID, const CallExpr *E, SmallVectorImpl< llvm::Value * > &Ops, SVETypeFlags TypeFlags)
llvm::Value * EmitSVEGatherLoad(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
llvm::Function * LookupNeonLLVMIntrinsic(unsigned IntrinsicID, unsigned Modifier, llvm::Type *ArgTy, const CallExpr *E)
llvm::Type * getEltType(const SVETypeFlags &TypeFlags)
llvm::Value * EmitCommonNeonBuiltinExpr(unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic, const char *NameHint, unsigned Modifier, const CallExpr *E, SmallVectorImpl< llvm::Value * > &Ops, Address PtrOp0, Address PtrOp1, llvm::Triple::ArchType Arch)
llvm::Value * EmitNeonSplat(llvm::Value *V, llvm::Constant *Idx, const llvm::ElementCount &Count)
llvm::Value * EmitSVEDupX(llvm::Value *Scalar)
const TargetInfo & getTarget() const
llvm::Value * EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
llvm::Value * EmitFP8NeonCvtCall(unsigned IID, llvm::Type *Ty0, llvm::Type *Ty1, bool Extract, SmallVectorImpl< llvm::Value * > &Ops, const CallExpr *E, const char *name)
llvm::Value * EmitARMBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue, llvm::Triple::ArchType Arch)
llvm::ScalableVectorType * getSVEType(const SVETypeFlags &TypeFlags)
llvm::Value * EmitBPFBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
llvm::Value * EmitSMELdrStr(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
llvm::Value * EmitSVETupleCreate(const SVETypeFlags &TypeFlags, llvm::Type *ReturnType, ArrayRef< llvm::Value * > Ops)
llvm::Value * EmitSVEPMull(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned BuiltinID)
llvm::Value * EmitARMMVEBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue, llvm::Triple::ArchType Arch)
AggValueSlot CreateAggTemp(QualType T, const Twine &Name="tmp", RawAddress *Alloca=nullptr)
CreateAggTemp - Create a temporary memory object for the given aggregate type.
llvm::Value * EmitNeonRShiftImm(llvm::Value *Vec, llvm::Value *Amt, llvm::Type *Ty, bool usgn, const char *name)
CGDebugInfo * getDebugInfo()
llvm::Value * getTypeSize(QualType Ty)
Returns calculated size of the specified type.
SmallVector< llvm::Type *, 2 > getSVEOverloadTypes(const SVETypeFlags &TypeFlags, llvm::Type *ReturnType, ArrayRef< llvm::Value * > Ops)
const TargetCodeGenInfo & getTargetHooks() const
llvm::Value * EmitNeonShiftVector(llvm::Value *V, llvm::Type *Ty, bool negateForRightShift)
bool IsInPreservedAIRegion
True if CodeGen currently emits code inside presereved access index region.
llvm::CallInst * EmitNounwindRuntimeCall(llvm::FunctionCallee callee, const Twine &name="")
ASTContext & getContext() const
llvm::Value * EmitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E, llvm::Triple::ArchType Arch)
llvm::Value * EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID, const CallExpr *E)
llvm::Value * EmitFP8NeonFDOTCall(unsigned IID, bool ExtendLaneArg, llvm::Type *RetTy, SmallVectorImpl< llvm::Value * > &Ops, const CallExpr *E, const char *name)
llvm::Value * vectorWrapScalar16(llvm::Value *Op)
llvm::Value * EmitARMCDEBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue, llvm::Triple::ArchType Arch)
llvm::Value * EmitAArch64CompareBuiltinExpr(llvm::Value *Op, llvm::Type *Ty, const llvm::CmpInst::Predicate Pred, const llvm::Twine &Name="")
void EmitAnyExprToMem(const Expr *E, Address Location, Qualifiers Quals, bool IsInitializer)
EmitAnyExprToMem - Emits the code necessary to evaluate an arbitrary expression into the given memory...
llvm::CallInst * EmitRuntimeCall(llvm::FunctionCallee callee, const Twine &name="")
llvm::Value * EmitSVEMovl(const SVETypeFlags &TypeFlags, llvm::ArrayRef< llvm::Value * > Ops, unsigned BuiltinID)
llvm::Value * EmitSVEPredicateTupleCast(llvm::Value *PredTuple, llvm::StructType *Ty)
llvm::Value * EmitSVEPrefetchLoad(const SVETypeFlags &TypeFlags, SmallVectorImpl< llvm::Value * > &Ops, unsigned BuiltinID)
llvm::Value * EmitSMEZero(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Address EmitPointerWithAlignment(const Expr *Addr, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr, KnownNonNull_t IsKnownNonNull=NotKnownNonNull)
EmitPointerWithAlignment - Given an expression with a pointer type, emit the value and compute our be...
llvm::Value * EmitSVEStructStore(const SVETypeFlags &TypeFlags, SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
RawAddress CreateMemTemp(QualType T, const Twine &Name="tmp", RawAddress *Alloca=nullptr)
CreateMemTemp - Create a temporary memory object of the given type, with appropriate alignmen and cas...
llvm::Value * EmitSMELd1St1(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
void EmitAggExpr(const Expr *E, AggValueSlot AS)
EmitAggExpr - Emit the computation of the specified expression of aggregate type.
llvm::Value * EmitScalarExpr(const Expr *E, bool IgnoreResultAssign=false)
EmitScalarExpr - Emit the computation of the specified expression of LLVM scalar type,...
llvm::Value * EmitSVEAllTruePred(const SVETypeFlags &TypeFlags)
llvm::Value * EmitSVEReinterpret(llvm::Value *Val, llvm::Type *Ty)
Address ReturnValue
ReturnValue - The temporary alloca to hold the return value.
LValue EmitLValue(const Expr *E, KnownNonNull_t IsKnownNonNull=NotKnownNonNull)
EmitLValue - Emit code to compute a designator that specifies the location of the expression.
llvm::LLVMContext & getLLVMContext()
llvm::ScalableVectorType * getSVEPredType(const SVETypeFlags &TypeFlags)
llvm::Value * EmitNeonCall(llvm::Function *F, SmallVectorImpl< llvm::Value * > &O, const char *name, unsigned shift=0, bool rightshift=false)
llvm::Value * EmitSVETupleSetOrGet(const SVETypeFlags &TypeFlags, ArrayRef< llvm::Value * > Ops)
This class organizes the cross-function state that is used while generating LLVM code.
llvm::Module & getModule() const
llvm::FunctionCallee CreateRuntimeFunction(llvm::FunctionType *Ty, StringRef Name, llvm::AttributeList ExtraAttrs=llvm::AttributeList(), bool Local=false, bool AssumeConvergent=false)
Create or return a runtime function declaration with the specified type and name.
CodeGenTypes & getTypes()
const llvm::DataLayout & getDataLayout() const
void Error(SourceLocation loc, StringRef error)
Emit a general error that something can't be done.
void DecorateInstructionWithTBAA(llvm::Instruction *Inst, TBAAAccessInfo TBAAInfo)
DecorateInstructionWithTBAA - Decorate the instruction with a TBAA tag.
llvm::Constant * CreateRuntimeVariable(llvm::Type *Ty, StringRef Name)
Create a new runtime global variable with the specified type and name.
TBAAAccessInfo getTBAAAccessInfo(QualType AccessType)
getTBAAAccessInfo - Get TBAA information that describes an access to an object of the given type.
ASTContext & getContext() const
llvm::LLVMContext & getLLVMContext()
llvm::Function * getIntrinsic(unsigned IID, ArrayRef< llvm::Type * > Tys={})
llvm::Type * ConvertType(QualType T)
ConvertType - Convert type T into a llvm::Type.
llvm::Value * getRawBitFieldPointer(CodeGenFunction &CGF) const
llvm::Value * emitRawPointer(CodeGenFunction &CGF) const
ReturnValueSlot - Contains the address where the return value of a function can be stored,...
const T & getABIInfo() const
This represents one expression.
bool EvaluateAsInt(EvalResult &Result, const ASTContext &Ctx, SideEffectsKind AllowSideEffects=SE_NoSideEffects, bool InConstantContext=false) const
EvaluateAsInt - Return true if this is a constant which we can fold and convert to an integer,...
Expr * IgnoreParenCasts() LLVM_READONLY
Skip past any parentheses and casts which might surround this expression until reaching a fixed point...
llvm::APSInt EvaluateKnownConstInt(const ASTContext &Ctx, SmallVectorImpl< PartialDiagnosticAt > *Diag=nullptr) const
EvaluateKnownConstInt - Call EvaluateAsRValue and return the folded integer.
Expr * IgnoreParens() LLVM_READONLY
Skip past any parentheses which might surround this expression until reaching a fixed point.
std::optional< llvm::APSInt > getIntegerConstantExpr(const ASTContext &Ctx) const
isIntegerConstantExpr - Return the value if this expression is a valid integer constant expression.
ExprObjectKind getObjectKind() const
getObjectKind - The object kind that this expression produces.
SourceLocation getExprLoc() const LLVM_READONLY
getExprLoc - Return the preferred location for the arrow when diagnosing a problem with a generic exp...
Represents a function declaration or definition.
StringRef getName() const
Get the name of identifier for this declaration as a StringRef.
Flags to identify the types for overloaded Neon builtins.
EltType getEltType() const
PointerType - C99 6.7.5.1 - Pointer Declarators.
QualType getPointeeType() const
A (possibly-)qualified type.
The collection of all-type qualifiers we support.
Flags to identify the types for overloaded SVE builtins.
bool isZExtReturn() const
bool isReverseUSDOT() const
bool isOverloadNone() const
MemEltType getMemEltType() const
bool isGatherLoad() const
bool isOverloadCvt() const
EltType getEltType() const
bool isOverloadDefault() const
bool isOverloadWhileRW() const
bool isReverseMergeAnyAccOp() const
bool isReductionQV() const
bool isInsertOp1SVALL() const
bool isAppendSVALL() const
bool isReverseMergeAnyBinOp() const
bool isStructStore() const
bool isTupleCreate() const
bool isGatherPrefetch() const
bool hasSplatOperand() const
MergeType getMergeType() const
bool isByteIndexed() const
bool isStructLoad() const
bool isOverloadWhileOrMultiVecCvt() const
unsigned getSplatOperand() const
bool isScatterStore() const
bool isReverseCompare() const
const llvm::Triple & getTriple() const
Returns the target triple of the primary target.
virtual bool hasFastHalfType() const
Determine whether the target has fast native support for operations on half types.
The base class of the type hierarchy.
const T * castAs() const
Member-template castAs<specific type>.
QualType getPointeeType() const
If this is a pointer, ObjC object pointer, or block pointer, this returns the respective pointee.
@ Type
The l-value was considered opaque, so the alignment was determined from a type.
The JSON file list parser is used to communicate input to InstallAPI.
@ OK_BitField
A bitfield object is a bitfield on a C or C++ record.
bool operator<(DeclarationName LHS, DeclarationName RHS)
Ordering on two declaration names.
@ Result
The result type of a method or function.
const FunctionProtoType * T
@ Enumerator
Enumerator value with fixed underlying type.
Diagnostic wrappers for TextAPI types for error reporting.
llvm::PointerType * VoidPtrTy
llvm::IntegerType * Int64Ty
llvm::IntegerType * Int8Ty
i8, i16, i32, and i64
llvm::Type * HalfTy
half, bfloat, float, double
llvm::IntegerType * SizeTy
llvm::IntegerType * Int32Ty
llvm::IntegerType * IntTy
int
llvm::IntegerType * Int16Ty
llvm::PointerType * Int8PtrTy
llvm::PointerType * UnqualPtrTy
llvm::PointerType * AllocaInt8PtrTy
EvalResult is a struct with detailed info about an evaluated expression.