clang 22.0.0git
ARM.cpp
Go to the documentation of this file.
1//===---------- ARM.cpp - Emit LLVM Code for builtins ---------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This contains code to emit Builtin calls as LLVM code.
10//
11//===----------------------------------------------------------------------===//
12
13#include "ABIInfo.h"
14#include "CGBuiltin.h"
15#include "CGDebugInfo.h"
16#include "TargetInfo.h"
18#include "llvm/IR/InlineAsm.h"
19#include "llvm/IR/IntrinsicsAArch64.h"
20#include "llvm/IR/IntrinsicsARM.h"
21#include "llvm/IR/IntrinsicsBPF.h"
22#include "llvm/TargetParser/AArch64TargetParser.h"
23
24#include <numeric>
25
26using namespace clang;
27using namespace CodeGen;
28using namespace llvm;
29
30static std::optional<CodeGenFunction::MSVCIntrin>
31translateAarch64ToMsvcIntrin(unsigned BuiltinID) {
32 using MSVCIntrin = CodeGenFunction::MSVCIntrin;
33 switch (BuiltinID) {
34 default:
35 return std::nullopt;
36 case clang::AArch64::BI_BitScanForward:
37 case clang::AArch64::BI_BitScanForward64:
38 return MSVCIntrin::_BitScanForward;
39 case clang::AArch64::BI_BitScanReverse:
40 case clang::AArch64::BI_BitScanReverse64:
41 return MSVCIntrin::_BitScanReverse;
42 case clang::AArch64::BI_InterlockedAnd64:
43 return MSVCIntrin::_InterlockedAnd;
44 case clang::AArch64::BI_InterlockedExchange64:
45 return MSVCIntrin::_InterlockedExchange;
46 case clang::AArch64::BI_InterlockedExchangeAdd64:
47 return MSVCIntrin::_InterlockedExchangeAdd;
48 case clang::AArch64::BI_InterlockedExchangeSub64:
49 return MSVCIntrin::_InterlockedExchangeSub;
50 case clang::AArch64::BI_InterlockedOr64:
51 return MSVCIntrin::_InterlockedOr;
52 case clang::AArch64::BI_InterlockedXor64:
53 return MSVCIntrin::_InterlockedXor;
54 case clang::AArch64::BI_InterlockedDecrement64:
55 return MSVCIntrin::_InterlockedDecrement;
56 case clang::AArch64::BI_InterlockedIncrement64:
57 return MSVCIntrin::_InterlockedIncrement;
58 case clang::AArch64::BI_InterlockedExchangeAdd8_acq:
59 case clang::AArch64::BI_InterlockedExchangeAdd16_acq:
60 case clang::AArch64::BI_InterlockedExchangeAdd_acq:
61 case clang::AArch64::BI_InterlockedExchangeAdd64_acq:
62 return MSVCIntrin::_InterlockedExchangeAdd_acq;
63 case clang::AArch64::BI_InterlockedExchangeAdd8_rel:
64 case clang::AArch64::BI_InterlockedExchangeAdd16_rel:
65 case clang::AArch64::BI_InterlockedExchangeAdd_rel:
66 case clang::AArch64::BI_InterlockedExchangeAdd64_rel:
67 return MSVCIntrin::_InterlockedExchangeAdd_rel;
68 case clang::AArch64::BI_InterlockedExchangeAdd8_nf:
69 case clang::AArch64::BI_InterlockedExchangeAdd16_nf:
70 case clang::AArch64::BI_InterlockedExchangeAdd_nf:
71 case clang::AArch64::BI_InterlockedExchangeAdd64_nf:
72 return MSVCIntrin::_InterlockedExchangeAdd_nf;
73 case clang::AArch64::BI_InterlockedExchange8_acq:
74 case clang::AArch64::BI_InterlockedExchange16_acq:
75 case clang::AArch64::BI_InterlockedExchange_acq:
76 case clang::AArch64::BI_InterlockedExchange64_acq:
77 case clang::AArch64::BI_InterlockedExchangePointer_acq:
78 return MSVCIntrin::_InterlockedExchange_acq;
79 case clang::AArch64::BI_InterlockedExchange8_rel:
80 case clang::AArch64::BI_InterlockedExchange16_rel:
81 case clang::AArch64::BI_InterlockedExchange_rel:
82 case clang::AArch64::BI_InterlockedExchange64_rel:
83 case clang::AArch64::BI_InterlockedExchangePointer_rel:
84 return MSVCIntrin::_InterlockedExchange_rel;
85 case clang::AArch64::BI_InterlockedExchange8_nf:
86 case clang::AArch64::BI_InterlockedExchange16_nf:
87 case clang::AArch64::BI_InterlockedExchange_nf:
88 case clang::AArch64::BI_InterlockedExchange64_nf:
89 case clang::AArch64::BI_InterlockedExchangePointer_nf:
90 return MSVCIntrin::_InterlockedExchange_nf;
91 case clang::AArch64::BI_InterlockedCompareExchange8_acq:
92 case clang::AArch64::BI_InterlockedCompareExchange16_acq:
93 case clang::AArch64::BI_InterlockedCompareExchange_acq:
94 case clang::AArch64::BI_InterlockedCompareExchange64_acq:
95 case clang::AArch64::BI_InterlockedCompareExchangePointer_acq:
96 return MSVCIntrin::_InterlockedCompareExchange_acq;
97 case clang::AArch64::BI_InterlockedCompareExchange8_rel:
98 case clang::AArch64::BI_InterlockedCompareExchange16_rel:
99 case clang::AArch64::BI_InterlockedCompareExchange_rel:
100 case clang::AArch64::BI_InterlockedCompareExchange64_rel:
101 case clang::AArch64::BI_InterlockedCompareExchangePointer_rel:
102 return MSVCIntrin::_InterlockedCompareExchange_rel;
103 case clang::AArch64::BI_InterlockedCompareExchange8_nf:
104 case clang::AArch64::BI_InterlockedCompareExchange16_nf:
105 case clang::AArch64::BI_InterlockedCompareExchange_nf:
106 case clang::AArch64::BI_InterlockedCompareExchange64_nf:
107 return MSVCIntrin::_InterlockedCompareExchange_nf;
108 case clang::AArch64::BI_InterlockedCompareExchange128:
109 return MSVCIntrin::_InterlockedCompareExchange128;
110 case clang::AArch64::BI_InterlockedCompareExchange128_acq:
111 return MSVCIntrin::_InterlockedCompareExchange128_acq;
112 case clang::AArch64::BI_InterlockedCompareExchange128_nf:
113 return MSVCIntrin::_InterlockedCompareExchange128_nf;
114 case clang::AArch64::BI_InterlockedCompareExchange128_rel:
115 return MSVCIntrin::_InterlockedCompareExchange128_rel;
116 case clang::AArch64::BI_InterlockedOr8_acq:
117 case clang::AArch64::BI_InterlockedOr16_acq:
118 case clang::AArch64::BI_InterlockedOr_acq:
119 case clang::AArch64::BI_InterlockedOr64_acq:
120 return MSVCIntrin::_InterlockedOr_acq;
121 case clang::AArch64::BI_InterlockedOr8_rel:
122 case clang::AArch64::BI_InterlockedOr16_rel:
123 case clang::AArch64::BI_InterlockedOr_rel:
124 case clang::AArch64::BI_InterlockedOr64_rel:
125 return MSVCIntrin::_InterlockedOr_rel;
126 case clang::AArch64::BI_InterlockedOr8_nf:
127 case clang::AArch64::BI_InterlockedOr16_nf:
128 case clang::AArch64::BI_InterlockedOr_nf:
129 case clang::AArch64::BI_InterlockedOr64_nf:
130 return MSVCIntrin::_InterlockedOr_nf;
131 case clang::AArch64::BI_InterlockedXor8_acq:
132 case clang::AArch64::BI_InterlockedXor16_acq:
133 case clang::AArch64::BI_InterlockedXor_acq:
134 case clang::AArch64::BI_InterlockedXor64_acq:
135 return MSVCIntrin::_InterlockedXor_acq;
136 case clang::AArch64::BI_InterlockedXor8_rel:
137 case clang::AArch64::BI_InterlockedXor16_rel:
138 case clang::AArch64::BI_InterlockedXor_rel:
139 case clang::AArch64::BI_InterlockedXor64_rel:
140 return MSVCIntrin::_InterlockedXor_rel;
141 case clang::AArch64::BI_InterlockedXor8_nf:
142 case clang::AArch64::BI_InterlockedXor16_nf:
143 case clang::AArch64::BI_InterlockedXor_nf:
144 case clang::AArch64::BI_InterlockedXor64_nf:
145 return MSVCIntrin::_InterlockedXor_nf;
146 case clang::AArch64::BI_InterlockedAnd8_acq:
147 case clang::AArch64::BI_InterlockedAnd16_acq:
148 case clang::AArch64::BI_InterlockedAnd_acq:
149 case clang::AArch64::BI_InterlockedAnd64_acq:
150 return MSVCIntrin::_InterlockedAnd_acq;
151 case clang::AArch64::BI_InterlockedAnd8_rel:
152 case clang::AArch64::BI_InterlockedAnd16_rel:
153 case clang::AArch64::BI_InterlockedAnd_rel:
154 case clang::AArch64::BI_InterlockedAnd64_rel:
155 return MSVCIntrin::_InterlockedAnd_rel;
156 case clang::AArch64::BI_InterlockedAnd8_nf:
157 case clang::AArch64::BI_InterlockedAnd16_nf:
158 case clang::AArch64::BI_InterlockedAnd_nf:
159 case clang::AArch64::BI_InterlockedAnd64_nf:
160 return MSVCIntrin::_InterlockedAnd_nf;
161 case clang::AArch64::BI_InterlockedIncrement16_acq:
162 case clang::AArch64::BI_InterlockedIncrement_acq:
163 case clang::AArch64::BI_InterlockedIncrement64_acq:
164 return MSVCIntrin::_InterlockedIncrement_acq;
165 case clang::AArch64::BI_InterlockedIncrement16_rel:
166 case clang::AArch64::BI_InterlockedIncrement_rel:
167 case clang::AArch64::BI_InterlockedIncrement64_rel:
168 return MSVCIntrin::_InterlockedIncrement_rel;
169 case clang::AArch64::BI_InterlockedIncrement16_nf:
170 case clang::AArch64::BI_InterlockedIncrement_nf:
171 case clang::AArch64::BI_InterlockedIncrement64_nf:
172 return MSVCIntrin::_InterlockedIncrement_nf;
173 case clang::AArch64::BI_InterlockedDecrement16_acq:
174 case clang::AArch64::BI_InterlockedDecrement_acq:
175 case clang::AArch64::BI_InterlockedDecrement64_acq:
176 return MSVCIntrin::_InterlockedDecrement_acq;
177 case clang::AArch64::BI_InterlockedDecrement16_rel:
178 case clang::AArch64::BI_InterlockedDecrement_rel:
179 case clang::AArch64::BI_InterlockedDecrement64_rel:
180 return MSVCIntrin::_InterlockedDecrement_rel;
181 case clang::AArch64::BI_InterlockedDecrement16_nf:
182 case clang::AArch64::BI_InterlockedDecrement_nf:
183 case clang::AArch64::BI_InterlockedDecrement64_nf:
184 return MSVCIntrin::_InterlockedDecrement_nf;
185 }
186 llvm_unreachable("must return from switch");
187}
188
189static std::optional<CodeGenFunction::MSVCIntrin>
190translateArmToMsvcIntrin(unsigned BuiltinID) {
191 using MSVCIntrin = CodeGenFunction::MSVCIntrin;
192 switch (BuiltinID) {
193 default:
194 return std::nullopt;
195 case clang::ARM::BI_BitScanForward:
196 case clang::ARM::BI_BitScanForward64:
197 return MSVCIntrin::_BitScanForward;
198 case clang::ARM::BI_BitScanReverse:
199 case clang::ARM::BI_BitScanReverse64:
200 return MSVCIntrin::_BitScanReverse;
201 case clang::ARM::BI_InterlockedAnd64:
202 return MSVCIntrin::_InterlockedAnd;
203 case clang::ARM::BI_InterlockedExchange64:
204 return MSVCIntrin::_InterlockedExchange;
205 case clang::ARM::BI_InterlockedExchangeAdd64:
206 return MSVCIntrin::_InterlockedExchangeAdd;
207 case clang::ARM::BI_InterlockedExchangeSub64:
208 return MSVCIntrin::_InterlockedExchangeSub;
209 case clang::ARM::BI_InterlockedOr64:
210 return MSVCIntrin::_InterlockedOr;
211 case clang::ARM::BI_InterlockedXor64:
212 return MSVCIntrin::_InterlockedXor;
213 case clang::ARM::BI_InterlockedDecrement64:
214 return MSVCIntrin::_InterlockedDecrement;
215 case clang::ARM::BI_InterlockedIncrement64:
216 return MSVCIntrin::_InterlockedIncrement;
217 case clang::ARM::BI_InterlockedExchangeAdd8_acq:
218 case clang::ARM::BI_InterlockedExchangeAdd16_acq:
219 case clang::ARM::BI_InterlockedExchangeAdd_acq:
220 case clang::ARM::BI_InterlockedExchangeAdd64_acq:
221 return MSVCIntrin::_InterlockedExchangeAdd_acq;
222 case clang::ARM::BI_InterlockedExchangeAdd8_rel:
223 case clang::ARM::BI_InterlockedExchangeAdd16_rel:
224 case clang::ARM::BI_InterlockedExchangeAdd_rel:
225 case clang::ARM::BI_InterlockedExchangeAdd64_rel:
226 return MSVCIntrin::_InterlockedExchangeAdd_rel;
227 case clang::ARM::BI_InterlockedExchangeAdd8_nf:
228 case clang::ARM::BI_InterlockedExchangeAdd16_nf:
229 case clang::ARM::BI_InterlockedExchangeAdd_nf:
230 case clang::ARM::BI_InterlockedExchangeAdd64_nf:
231 return MSVCIntrin::_InterlockedExchangeAdd_nf;
232 case clang::ARM::BI_InterlockedExchange8_acq:
233 case clang::ARM::BI_InterlockedExchange16_acq:
234 case clang::ARM::BI_InterlockedExchange_acq:
235 case clang::ARM::BI_InterlockedExchange64_acq:
236 case clang::ARM::BI_InterlockedExchangePointer_acq:
237 return MSVCIntrin::_InterlockedExchange_acq;
238 case clang::ARM::BI_InterlockedExchange8_rel:
239 case clang::ARM::BI_InterlockedExchange16_rel:
240 case clang::ARM::BI_InterlockedExchange_rel:
241 case clang::ARM::BI_InterlockedExchange64_rel:
242 case clang::ARM::BI_InterlockedExchangePointer_rel:
243 return MSVCIntrin::_InterlockedExchange_rel;
244 case clang::ARM::BI_InterlockedExchange8_nf:
245 case clang::ARM::BI_InterlockedExchange16_nf:
246 case clang::ARM::BI_InterlockedExchange_nf:
247 case clang::ARM::BI_InterlockedExchange64_nf:
248 case clang::ARM::BI_InterlockedExchangePointer_nf:
249 return MSVCIntrin::_InterlockedExchange_nf;
250 case clang::ARM::BI_InterlockedCompareExchange8_acq:
251 case clang::ARM::BI_InterlockedCompareExchange16_acq:
252 case clang::ARM::BI_InterlockedCompareExchange_acq:
253 case clang::ARM::BI_InterlockedCompareExchange64_acq:
254 case clang::ARM::BI_InterlockedCompareExchangePointer_acq:
255 return MSVCIntrin::_InterlockedCompareExchange_acq;
256 case clang::ARM::BI_InterlockedCompareExchange8_rel:
257 case clang::ARM::BI_InterlockedCompareExchange16_rel:
258 case clang::ARM::BI_InterlockedCompareExchange_rel:
259 case clang::ARM::BI_InterlockedCompareExchange64_rel:
260 case clang::ARM::BI_InterlockedCompareExchangePointer_rel:
261 return MSVCIntrin::_InterlockedCompareExchange_rel;
262 case clang::ARM::BI_InterlockedCompareExchange8_nf:
263 case clang::ARM::BI_InterlockedCompareExchange16_nf:
264 case clang::ARM::BI_InterlockedCompareExchange_nf:
265 case clang::ARM::BI_InterlockedCompareExchange64_nf:
266 return MSVCIntrin::_InterlockedCompareExchange_nf;
267 case clang::ARM::BI_InterlockedOr8_acq:
268 case clang::ARM::BI_InterlockedOr16_acq:
269 case clang::ARM::BI_InterlockedOr_acq:
270 case clang::ARM::BI_InterlockedOr64_acq:
271 return MSVCIntrin::_InterlockedOr_acq;
272 case clang::ARM::BI_InterlockedOr8_rel:
273 case clang::ARM::BI_InterlockedOr16_rel:
274 case clang::ARM::BI_InterlockedOr_rel:
275 case clang::ARM::BI_InterlockedOr64_rel:
276 return MSVCIntrin::_InterlockedOr_rel;
277 case clang::ARM::BI_InterlockedOr8_nf:
278 case clang::ARM::BI_InterlockedOr16_nf:
279 case clang::ARM::BI_InterlockedOr_nf:
280 case clang::ARM::BI_InterlockedOr64_nf:
281 return MSVCIntrin::_InterlockedOr_nf;
282 case clang::ARM::BI_InterlockedXor8_acq:
283 case clang::ARM::BI_InterlockedXor16_acq:
284 case clang::ARM::BI_InterlockedXor_acq:
285 case clang::ARM::BI_InterlockedXor64_acq:
286 return MSVCIntrin::_InterlockedXor_acq;
287 case clang::ARM::BI_InterlockedXor8_rel:
288 case clang::ARM::BI_InterlockedXor16_rel:
289 case clang::ARM::BI_InterlockedXor_rel:
290 case clang::ARM::BI_InterlockedXor64_rel:
291 return MSVCIntrin::_InterlockedXor_rel;
292 case clang::ARM::BI_InterlockedXor8_nf:
293 case clang::ARM::BI_InterlockedXor16_nf:
294 case clang::ARM::BI_InterlockedXor_nf:
295 case clang::ARM::BI_InterlockedXor64_nf:
296 return MSVCIntrin::_InterlockedXor_nf;
297 case clang::ARM::BI_InterlockedAnd8_acq:
298 case clang::ARM::BI_InterlockedAnd16_acq:
299 case clang::ARM::BI_InterlockedAnd_acq:
300 case clang::ARM::BI_InterlockedAnd64_acq:
301 return MSVCIntrin::_InterlockedAnd_acq;
302 case clang::ARM::BI_InterlockedAnd8_rel:
303 case clang::ARM::BI_InterlockedAnd16_rel:
304 case clang::ARM::BI_InterlockedAnd_rel:
305 case clang::ARM::BI_InterlockedAnd64_rel:
306 return MSVCIntrin::_InterlockedAnd_rel;
307 case clang::ARM::BI_InterlockedAnd8_nf:
308 case clang::ARM::BI_InterlockedAnd16_nf:
309 case clang::ARM::BI_InterlockedAnd_nf:
310 case clang::ARM::BI_InterlockedAnd64_nf:
311 return MSVCIntrin::_InterlockedAnd_nf;
312 case clang::ARM::BI_InterlockedIncrement16_acq:
313 case clang::ARM::BI_InterlockedIncrement_acq:
314 case clang::ARM::BI_InterlockedIncrement64_acq:
315 return MSVCIntrin::_InterlockedIncrement_acq;
316 case clang::ARM::BI_InterlockedIncrement16_rel:
317 case clang::ARM::BI_InterlockedIncrement_rel:
318 case clang::ARM::BI_InterlockedIncrement64_rel:
319 return MSVCIntrin::_InterlockedIncrement_rel;
320 case clang::ARM::BI_InterlockedIncrement16_nf:
321 case clang::ARM::BI_InterlockedIncrement_nf:
322 case clang::ARM::BI_InterlockedIncrement64_nf:
323 return MSVCIntrin::_InterlockedIncrement_nf;
324 case clang::ARM::BI_InterlockedDecrement16_acq:
325 case clang::ARM::BI_InterlockedDecrement_acq:
326 case clang::ARM::BI_InterlockedDecrement64_acq:
327 return MSVCIntrin::_InterlockedDecrement_acq;
328 case clang::ARM::BI_InterlockedDecrement16_rel:
329 case clang::ARM::BI_InterlockedDecrement_rel:
330 case clang::ARM::BI_InterlockedDecrement64_rel:
331 return MSVCIntrin::_InterlockedDecrement_rel;
332 case clang::ARM::BI_InterlockedDecrement16_nf:
333 case clang::ARM::BI_InterlockedDecrement_nf:
334 case clang::ARM::BI_InterlockedDecrement64_nf:
335 return MSVCIntrin::_InterlockedDecrement_nf;
336 }
337 llvm_unreachable("must return from switch");
338}
339
340// Emit an intrinsic where all operands are of the same type as the result.
341// Depending on mode, this may be a constrained floating-point intrinsic.
343 unsigned IntrinsicID,
344 unsigned ConstrainedIntrinsicID,
345 llvm::Type *Ty,
346 ArrayRef<Value *> Args) {
347 Function *F;
348 if (CGF.Builder.getIsFPConstrained())
349 F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Ty);
350 else
351 F = CGF.CGM.getIntrinsic(IntrinsicID, Ty);
352
353 if (CGF.Builder.getIsFPConstrained())
354 return CGF.Builder.CreateConstrainedFPCall(F, Args);
355 else
356 return CGF.Builder.CreateCall(F, Args);
357}
358
359static llvm::FixedVectorType *GetNeonType(CodeGenFunction *CGF,
360 NeonTypeFlags TypeFlags,
361 bool HasFastHalfType = true,
362 bool V1Ty = false,
363 bool AllowBFloatArgsAndRet = true) {
364 int IsQuad = TypeFlags.isQuad();
365 switch (TypeFlags.getEltType()) {
369 return llvm::FixedVectorType::get(CGF->Int8Ty, V1Ty ? 1 : (8 << IsQuad));
372 return llvm::FixedVectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
374 if (AllowBFloatArgsAndRet)
375 return llvm::FixedVectorType::get(CGF->BFloatTy, V1Ty ? 1 : (4 << IsQuad));
376 else
377 return llvm::FixedVectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
379 if (HasFastHalfType)
380 return llvm::FixedVectorType::get(CGF->HalfTy, V1Ty ? 1 : (4 << IsQuad));
381 else
382 return llvm::FixedVectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
384 return llvm::FixedVectorType::get(CGF->Int32Ty, V1Ty ? 1 : (2 << IsQuad));
387 return llvm::FixedVectorType::get(CGF->Int64Ty, V1Ty ? 1 : (1 << IsQuad));
389 // FIXME: i128 and f128 doesn't get fully support in Clang and llvm.
390 // There is a lot of i128 and f128 API missing.
391 // so we use v16i8 to represent poly128 and get pattern matched.
392 return llvm::FixedVectorType::get(CGF->Int8Ty, 16);
394 return llvm::FixedVectorType::get(CGF->FloatTy, V1Ty ? 1 : (2 << IsQuad));
396 return llvm::FixedVectorType::get(CGF->DoubleTy, V1Ty ? 1 : (1 << IsQuad));
397 }
398 llvm_unreachable("Unknown vector element type!");
399}
400
401static llvm::VectorType *GetFloatNeonType(CodeGenFunction *CGF,
402 NeonTypeFlags IntTypeFlags) {
403 int IsQuad = IntTypeFlags.isQuad();
404 switch (IntTypeFlags.getEltType()) {
406 return llvm::FixedVectorType::get(CGF->HalfTy, (4 << IsQuad));
408 return llvm::FixedVectorType::get(CGF->FloatTy, (2 << IsQuad));
410 return llvm::FixedVectorType::get(CGF->DoubleTy, (1 << IsQuad));
411 default:
412 llvm_unreachable("Type can't be converted to floating-point!");
413 }
414}
415
417 const ElementCount &Count) {
418 Value *SV = llvm::ConstantVector::getSplat(Count, C);
419 return Builder.CreateShuffleVector(V, V, SV, "lane");
420}
421
423 ElementCount EC = cast<llvm::VectorType>(V->getType())->getElementCount();
424 return EmitNeonSplat(V, C, EC);
425}
426
428 const char *name,
429 unsigned shift, bool rightshift) {
430 unsigned j = 0;
431 for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
432 ai != ae; ++ai, ++j) {
433 if (F->isConstrainedFPIntrinsic())
434 if (ai->getType()->isMetadataTy())
435 continue;
436 if (shift > 0 && shift == j)
437 Ops[j] = EmitNeonShiftVector(Ops[j], ai->getType(), rightshift);
438 else
439 Ops[j] = Builder.CreateBitCast(Ops[j], ai->getType(), name);
440 }
441
442 if (F->isConstrainedFPIntrinsic())
443 return Builder.CreateConstrainedFPCall(F, Ops, name);
444 else
445 return Builder.CreateCall(F, Ops, name);
446}
447
451 const CallExpr *E, const char *name) {
452 llvm::Value *FPM =
453 EmitScalarOrConstFoldImmArg(/* ICEArguments */ 0, E->getNumArgs() - 1, E);
454 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_set_fpmr), FPM);
455 return EmitNeonCall(CGM.getIntrinsic(IID, Tys), Ops, name);
456}
457
459 unsigned IID, bool ExtendLaneArg, llvm::Type *RetTy,
460 SmallVectorImpl<llvm::Value *> &Ops, const CallExpr *E, const char *name) {
461
462 const unsigned ElemCount = Ops[0]->getType()->getPrimitiveSizeInBits() /
463 RetTy->getPrimitiveSizeInBits();
464 llvm::Type *Tys[] = {llvm::FixedVectorType::get(RetTy, ElemCount),
465 Ops[1]->getType()};
466 if (ExtendLaneArg) {
467 auto *VT = llvm::FixedVectorType::get(Int8Ty, 16);
468 Ops[2] = Builder.CreateInsertVector(VT, PoisonValue::get(VT), Ops[2],
469 uint64_t(0));
470 }
471 return EmitFP8NeonCall(IID, Tys, Ops, E, name);
472}
473
475 unsigned IID, bool ExtendLaneArg, llvm::Type *RetTy,
476 SmallVectorImpl<llvm::Value *> &Ops, const CallExpr *E, const char *name) {
477
478 if (ExtendLaneArg) {
479 auto *VT = llvm::FixedVectorType::get(Int8Ty, 16);
480 Ops[2] = Builder.CreateInsertVector(VT, PoisonValue::get(VT), Ops[2],
481 uint64_t(0));
482 }
483 const unsigned ElemCount = Ops[0]->getType()->getPrimitiveSizeInBits() /
484 RetTy->getPrimitiveSizeInBits();
485 return EmitFP8NeonCall(IID, {llvm::FixedVectorType::get(RetTy, ElemCount)},
486 Ops, E, name);
487}
488
490 bool neg) {
491 int SV = cast<ConstantInt>(V)->getSExtValue();
492 return ConstantInt::get(Ty, neg ? -SV : SV);
493}
494
495Value *CodeGenFunction::EmitFP8NeonCvtCall(unsigned IID, llvm::Type *Ty0,
496 llvm::Type *Ty1, bool Extract,
498 const CallExpr *E,
499 const char *name) {
500 llvm::Type *Tys[] = {Ty0, Ty1};
501 if (Extract) {
502 // Op[0] is mfloat8x16_t, but the intrinsic converts only the lower part of
503 // the vector.
504 Tys[1] = llvm::FixedVectorType::get(Int8Ty, 8);
505 Ops[0] = Builder.CreateExtractVector(Tys[1], Ops[0], uint64_t(0));
506 }
507 return EmitFP8NeonCall(IID, Tys, Ops, E, name);
508}
509
510// Right-shift a vector by a constant.
512 llvm::Type *Ty, bool usgn,
513 const char *name) {
514 llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
515
516 int ShiftAmt = cast<ConstantInt>(Shift)->getSExtValue();
517 int EltSize = VTy->getScalarSizeInBits();
518
519 Vec = Builder.CreateBitCast(Vec, Ty);
520
521 // lshr/ashr are undefined when the shift amount is equal to the vector
522 // element size.
523 if (ShiftAmt == EltSize) {
524 if (usgn) {
525 // Right-shifting an unsigned value by its size yields 0.
526 return llvm::ConstantAggregateZero::get(VTy);
527 } else {
528 // Right-shifting a signed value by its size is equivalent
529 // to a shift of size-1.
530 --ShiftAmt;
531 Shift = ConstantInt::get(VTy->getElementType(), ShiftAmt);
532 }
533 }
534
535 Shift = EmitNeonShiftVector(Shift, Ty, false);
536 if (usgn)
537 return Builder.CreateLShr(Vec, Shift, name);
538 else
539 return Builder.CreateAShr(Vec, Shift, name);
540}
541
542enum {
543 AddRetType = (1 << 0),
544 Add1ArgType = (1 << 1),
545 Add2ArgTypes = (1 << 2),
546
549
550 InventFloatType = (1 << 5),
551 UnsignedAlts = (1 << 6),
552
553 Use64BitVectors = (1 << 7),
555
563
564namespace {
565struct ARMVectorIntrinsicInfo {
566 const char *NameHint;
567 unsigned BuiltinID;
568 unsigned LLVMIntrinsic;
569 unsigned AltLLVMIntrinsic;
571
572 bool operator<(unsigned RHSBuiltinID) const {
573 return BuiltinID < RHSBuiltinID;
574 }
575 bool operator<(const ARMVectorIntrinsicInfo &TE) const {
576 return BuiltinID < TE.BuiltinID;
577 }
578};
579} // end anonymous namespace
580
581#define NEONMAP0(NameBase) \
582 { #NameBase, NEON::BI__builtin_neon_ ## NameBase, 0, 0, 0 }
583
584#define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
585 { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
586 Intrinsic::LLVMIntrinsic, 0, TypeModifier }
587
588#define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier) \
589 { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
590 Intrinsic::LLVMIntrinsic, Intrinsic::AltLLVMIntrinsic, \
591 TypeModifier }
592
593static const ARMVectorIntrinsicInfo ARMSIMDIntrinsicMap [] = {
594 NEONMAP1(__a32_vcvt_bf16_f32, arm_neon_vcvtfp2bf, 0),
595 NEONMAP0(splat_lane_v),
596 NEONMAP0(splat_laneq_v),
597 NEONMAP0(splatq_lane_v),
598 NEONMAP0(splatq_laneq_v),
599 NEONMAP2(vabd_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
600 NEONMAP2(vabdq_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
601 NEONMAP1(vabs_v, arm_neon_vabs, 0),
602 NEONMAP1(vabsq_v, arm_neon_vabs, 0),
603 NEONMAP0(vadd_v),
604 NEONMAP0(vaddhn_v),
605 NEONMAP0(vaddq_v),
606 NEONMAP1(vaesdq_u8, arm_neon_aesd, 0),
607 NEONMAP1(vaeseq_u8, arm_neon_aese, 0),
608 NEONMAP1(vaesimcq_u8, arm_neon_aesimc, 0),
609 NEONMAP1(vaesmcq_u8, arm_neon_aesmc, 0),
610 NEONMAP1(vbfdot_f32, arm_neon_bfdot, 0),
611 NEONMAP1(vbfdotq_f32, arm_neon_bfdot, 0),
612 NEONMAP1(vbfmlalbq_f32, arm_neon_bfmlalb, 0),
613 NEONMAP1(vbfmlaltq_f32, arm_neon_bfmlalt, 0),
614 NEONMAP1(vbfmmlaq_f32, arm_neon_bfmmla, 0),
615 NEONMAP1(vbsl_v, arm_neon_vbsl, AddRetType),
616 NEONMAP1(vbslq_v, arm_neon_vbsl, AddRetType),
617 NEONMAP1(vcadd_rot270_f16, arm_neon_vcadd_rot270, Add1ArgType),
618 NEONMAP1(vcadd_rot270_f32, arm_neon_vcadd_rot270, Add1ArgType),
619 NEONMAP1(vcadd_rot90_f16, arm_neon_vcadd_rot90, Add1ArgType),
620 NEONMAP1(vcadd_rot90_f32, arm_neon_vcadd_rot90, Add1ArgType),
621 NEONMAP1(vcaddq_rot270_f16, arm_neon_vcadd_rot270, Add1ArgType),
622 NEONMAP1(vcaddq_rot270_f32, arm_neon_vcadd_rot270, Add1ArgType),
623 NEONMAP1(vcaddq_rot270_f64, arm_neon_vcadd_rot270, Add1ArgType),
624 NEONMAP1(vcaddq_rot90_f16, arm_neon_vcadd_rot90, Add1ArgType),
625 NEONMAP1(vcaddq_rot90_f32, arm_neon_vcadd_rot90, Add1ArgType),
626 NEONMAP1(vcaddq_rot90_f64, arm_neon_vcadd_rot90, Add1ArgType),
627 NEONMAP1(vcage_v, arm_neon_vacge, 0),
628 NEONMAP1(vcageq_v, arm_neon_vacge, 0),
629 NEONMAP1(vcagt_v, arm_neon_vacgt, 0),
630 NEONMAP1(vcagtq_v, arm_neon_vacgt, 0),
631 NEONMAP1(vcale_v, arm_neon_vacge, 0),
632 NEONMAP1(vcaleq_v, arm_neon_vacge, 0),
633 NEONMAP1(vcalt_v, arm_neon_vacgt, 0),
634 NEONMAP1(vcaltq_v, arm_neon_vacgt, 0),
635 NEONMAP0(vceqz_v),
636 NEONMAP0(vceqzq_v),
637 NEONMAP0(vcgez_v),
638 NEONMAP0(vcgezq_v),
639 NEONMAP0(vcgtz_v),
640 NEONMAP0(vcgtzq_v),
641 NEONMAP0(vclez_v),
642 NEONMAP0(vclezq_v),
643 NEONMAP1(vcls_v, arm_neon_vcls, Add1ArgType),
644 NEONMAP1(vclsq_v, arm_neon_vcls, Add1ArgType),
645 NEONMAP0(vcltz_v),
646 NEONMAP0(vcltzq_v),
647 NEONMAP1(vclz_v, ctlz, Add1ArgType),
648 NEONMAP1(vclzq_v, ctlz, Add1ArgType),
649 NEONMAP1(vcnt_v, ctpop, Add1ArgType),
650 NEONMAP1(vcntq_v, ctpop, Add1ArgType),
651 NEONMAP1(vcvt_f16_f32, arm_neon_vcvtfp2hf, 0),
652 NEONMAP0(vcvt_f16_s16),
653 NEONMAP0(vcvt_f16_u16),
654 NEONMAP1(vcvt_f32_f16, arm_neon_vcvthf2fp, 0),
655 NEONMAP0(vcvt_f32_v),
656 NEONMAP1(vcvt_n_f16_s16, arm_neon_vcvtfxs2fp, 0),
657 NEONMAP1(vcvt_n_f16_u16, arm_neon_vcvtfxu2fp, 0),
658 NEONMAP2(vcvt_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
659 NEONMAP1(vcvt_n_s16_f16, arm_neon_vcvtfp2fxs, 0),
660 NEONMAP1(vcvt_n_s32_v, arm_neon_vcvtfp2fxs, 0),
661 NEONMAP1(vcvt_n_s64_v, arm_neon_vcvtfp2fxs, 0),
662 NEONMAP1(vcvt_n_u16_f16, arm_neon_vcvtfp2fxu, 0),
663 NEONMAP1(vcvt_n_u32_v, arm_neon_vcvtfp2fxu, 0),
664 NEONMAP1(vcvt_n_u64_v, arm_neon_vcvtfp2fxu, 0),
665 NEONMAP0(vcvt_s16_f16),
666 NEONMAP0(vcvt_s32_v),
667 NEONMAP0(vcvt_s64_v),
668 NEONMAP0(vcvt_u16_f16),
669 NEONMAP0(vcvt_u32_v),
670 NEONMAP0(vcvt_u64_v),
671 NEONMAP1(vcvta_s16_f16, arm_neon_vcvtas, 0),
672 NEONMAP1(vcvta_s32_v, arm_neon_vcvtas, 0),
673 NEONMAP1(vcvta_s64_v, arm_neon_vcvtas, 0),
674 NEONMAP1(vcvta_u16_f16, arm_neon_vcvtau, 0),
675 NEONMAP1(vcvta_u32_v, arm_neon_vcvtau, 0),
676 NEONMAP1(vcvta_u64_v, arm_neon_vcvtau, 0),
677 NEONMAP1(vcvtaq_s16_f16, arm_neon_vcvtas, 0),
678 NEONMAP1(vcvtaq_s32_v, arm_neon_vcvtas, 0),
679 NEONMAP1(vcvtaq_s64_v, arm_neon_vcvtas, 0),
680 NEONMAP1(vcvtaq_u16_f16, arm_neon_vcvtau, 0),
681 NEONMAP1(vcvtaq_u32_v, arm_neon_vcvtau, 0),
682 NEONMAP1(vcvtaq_u64_v, arm_neon_vcvtau, 0),
683 NEONMAP1(vcvth_bf16_f32, arm_neon_vcvtbfp2bf, 0),
684 NEONMAP1(vcvtm_s16_f16, arm_neon_vcvtms, 0),
685 NEONMAP1(vcvtm_s32_v, arm_neon_vcvtms, 0),
686 NEONMAP1(vcvtm_s64_v, arm_neon_vcvtms, 0),
687 NEONMAP1(vcvtm_u16_f16, arm_neon_vcvtmu, 0),
688 NEONMAP1(vcvtm_u32_v, arm_neon_vcvtmu, 0),
689 NEONMAP1(vcvtm_u64_v, arm_neon_vcvtmu, 0),
690 NEONMAP1(vcvtmq_s16_f16, arm_neon_vcvtms, 0),
691 NEONMAP1(vcvtmq_s32_v, arm_neon_vcvtms, 0),
692 NEONMAP1(vcvtmq_s64_v, arm_neon_vcvtms, 0),
693 NEONMAP1(vcvtmq_u16_f16, arm_neon_vcvtmu, 0),
694 NEONMAP1(vcvtmq_u32_v, arm_neon_vcvtmu, 0),
695 NEONMAP1(vcvtmq_u64_v, arm_neon_vcvtmu, 0),
696 NEONMAP1(vcvtn_s16_f16, arm_neon_vcvtns, 0),
697 NEONMAP1(vcvtn_s32_v, arm_neon_vcvtns, 0),
698 NEONMAP1(vcvtn_s64_v, arm_neon_vcvtns, 0),
699 NEONMAP1(vcvtn_u16_f16, arm_neon_vcvtnu, 0),
700 NEONMAP1(vcvtn_u32_v, arm_neon_vcvtnu, 0),
701 NEONMAP1(vcvtn_u64_v, arm_neon_vcvtnu, 0),
702 NEONMAP1(vcvtnq_s16_f16, arm_neon_vcvtns, 0),
703 NEONMAP1(vcvtnq_s32_v, arm_neon_vcvtns, 0),
704 NEONMAP1(vcvtnq_s64_v, arm_neon_vcvtns, 0),
705 NEONMAP1(vcvtnq_u16_f16, arm_neon_vcvtnu, 0),
706 NEONMAP1(vcvtnq_u32_v, arm_neon_vcvtnu, 0),
707 NEONMAP1(vcvtnq_u64_v, arm_neon_vcvtnu, 0),
708 NEONMAP1(vcvtp_s16_f16, arm_neon_vcvtps, 0),
709 NEONMAP1(vcvtp_s32_v, arm_neon_vcvtps, 0),
710 NEONMAP1(vcvtp_s64_v, arm_neon_vcvtps, 0),
711 NEONMAP1(vcvtp_u16_f16, arm_neon_vcvtpu, 0),
712 NEONMAP1(vcvtp_u32_v, arm_neon_vcvtpu, 0),
713 NEONMAP1(vcvtp_u64_v, arm_neon_vcvtpu, 0),
714 NEONMAP1(vcvtpq_s16_f16, arm_neon_vcvtps, 0),
715 NEONMAP1(vcvtpq_s32_v, arm_neon_vcvtps, 0),
716 NEONMAP1(vcvtpq_s64_v, arm_neon_vcvtps, 0),
717 NEONMAP1(vcvtpq_u16_f16, arm_neon_vcvtpu, 0),
718 NEONMAP1(vcvtpq_u32_v, arm_neon_vcvtpu, 0),
719 NEONMAP1(vcvtpq_u64_v, arm_neon_vcvtpu, 0),
720 NEONMAP0(vcvtq_f16_s16),
721 NEONMAP0(vcvtq_f16_u16),
722 NEONMAP0(vcvtq_f32_v),
723 NEONMAP1(vcvtq_n_f16_s16, arm_neon_vcvtfxs2fp, 0),
724 NEONMAP1(vcvtq_n_f16_u16, arm_neon_vcvtfxu2fp, 0),
725 NEONMAP2(vcvtq_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
726 NEONMAP1(vcvtq_n_s16_f16, arm_neon_vcvtfp2fxs, 0),
727 NEONMAP1(vcvtq_n_s32_v, arm_neon_vcvtfp2fxs, 0),
728 NEONMAP1(vcvtq_n_s64_v, arm_neon_vcvtfp2fxs, 0),
729 NEONMAP1(vcvtq_n_u16_f16, arm_neon_vcvtfp2fxu, 0),
730 NEONMAP1(vcvtq_n_u32_v, arm_neon_vcvtfp2fxu, 0),
731 NEONMAP1(vcvtq_n_u64_v, arm_neon_vcvtfp2fxu, 0),
732 NEONMAP0(vcvtq_s16_f16),
733 NEONMAP0(vcvtq_s32_v),
734 NEONMAP0(vcvtq_s64_v),
735 NEONMAP0(vcvtq_u16_f16),
736 NEONMAP0(vcvtq_u32_v),
737 NEONMAP0(vcvtq_u64_v),
738 NEONMAP1(vdot_s32, arm_neon_sdot, 0),
739 NEONMAP1(vdot_u32, arm_neon_udot, 0),
740 NEONMAP1(vdotq_s32, arm_neon_sdot, 0),
741 NEONMAP1(vdotq_u32, arm_neon_udot, 0),
742 NEONMAP0(vext_v),
743 NEONMAP0(vextq_v),
744 NEONMAP0(vfma_v),
745 NEONMAP0(vfmaq_v),
746 NEONMAP2(vhadd_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
747 NEONMAP2(vhaddq_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
748 NEONMAP2(vhsub_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
749 NEONMAP2(vhsubq_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
750 NEONMAP0(vld1_dup_v),
751 NEONMAP1(vld1_v, arm_neon_vld1, 0),
752 NEONMAP1(vld1_x2_v, arm_neon_vld1x2, 0),
753 NEONMAP1(vld1_x3_v, arm_neon_vld1x3, 0),
754 NEONMAP1(vld1_x4_v, arm_neon_vld1x4, 0),
755 NEONMAP0(vld1q_dup_v),
756 NEONMAP1(vld1q_v, arm_neon_vld1, 0),
757 NEONMAP1(vld1q_x2_v, arm_neon_vld1x2, 0),
758 NEONMAP1(vld1q_x3_v, arm_neon_vld1x3, 0),
759 NEONMAP1(vld1q_x4_v, arm_neon_vld1x4, 0),
760 NEONMAP1(vld2_dup_v, arm_neon_vld2dup, 0),
761 NEONMAP1(vld2_lane_v, arm_neon_vld2lane, 0),
762 NEONMAP1(vld2_v, arm_neon_vld2, 0),
763 NEONMAP1(vld2q_dup_v, arm_neon_vld2dup, 0),
764 NEONMAP1(vld2q_lane_v, arm_neon_vld2lane, 0),
765 NEONMAP1(vld2q_v, arm_neon_vld2, 0),
766 NEONMAP1(vld3_dup_v, arm_neon_vld3dup, 0),
767 NEONMAP1(vld3_lane_v, arm_neon_vld3lane, 0),
768 NEONMAP1(vld3_v, arm_neon_vld3, 0),
769 NEONMAP1(vld3q_dup_v, arm_neon_vld3dup, 0),
770 NEONMAP1(vld3q_lane_v, arm_neon_vld3lane, 0),
771 NEONMAP1(vld3q_v, arm_neon_vld3, 0),
772 NEONMAP1(vld4_dup_v, arm_neon_vld4dup, 0),
773 NEONMAP1(vld4_lane_v, arm_neon_vld4lane, 0),
774 NEONMAP1(vld4_v, arm_neon_vld4, 0),
775 NEONMAP1(vld4q_dup_v, arm_neon_vld4dup, 0),
776 NEONMAP1(vld4q_lane_v, arm_neon_vld4lane, 0),
777 NEONMAP1(vld4q_v, arm_neon_vld4, 0),
778 NEONMAP2(vmax_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
779 NEONMAP1(vmaxnm_v, arm_neon_vmaxnm, Add1ArgType),
780 NEONMAP1(vmaxnmq_v, arm_neon_vmaxnm, Add1ArgType),
781 NEONMAP2(vmaxq_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
782 NEONMAP2(vmin_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
783 NEONMAP1(vminnm_v, arm_neon_vminnm, Add1ArgType),
784 NEONMAP1(vminnmq_v, arm_neon_vminnm, Add1ArgType),
785 NEONMAP2(vminq_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
786 NEONMAP1(vmmlaq_s32, arm_neon_smmla, 0),
787 NEONMAP1(vmmlaq_u32, arm_neon_ummla, 0),
788 NEONMAP0(vmovl_v),
789 NEONMAP0(vmovn_v),
790 NEONMAP1(vmul_v, arm_neon_vmulp, Add1ArgType),
791 NEONMAP0(vmull_v),
792 NEONMAP1(vmulq_v, arm_neon_vmulp, Add1ArgType),
793 NEONMAP2(vpadal_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
794 NEONMAP2(vpadalq_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
795 NEONMAP1(vpadd_v, arm_neon_vpadd, Add1ArgType),
796 NEONMAP2(vpaddl_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
797 NEONMAP2(vpaddlq_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
798 NEONMAP1(vpaddq_v, arm_neon_vpadd, Add1ArgType),
799 NEONMAP2(vpmax_v, arm_neon_vpmaxu, arm_neon_vpmaxs, Add1ArgType | UnsignedAlts),
800 NEONMAP2(vpmin_v, arm_neon_vpminu, arm_neon_vpmins, Add1ArgType | UnsignedAlts),
801 NEONMAP1(vqabs_v, arm_neon_vqabs, Add1ArgType),
802 NEONMAP1(vqabsq_v, arm_neon_vqabs, Add1ArgType),
803 NEONMAP2(vqadd_v, uadd_sat, sadd_sat, Add1ArgType | UnsignedAlts),
804 NEONMAP2(vqaddq_v, uadd_sat, sadd_sat, Add1ArgType | UnsignedAlts),
805 NEONMAP2(vqdmlal_v, arm_neon_vqdmull, sadd_sat, 0),
806 NEONMAP2(vqdmlsl_v, arm_neon_vqdmull, ssub_sat, 0),
807 NEONMAP1(vqdmulh_v, arm_neon_vqdmulh, Add1ArgType),
808 NEONMAP1(vqdmulhq_v, arm_neon_vqdmulh, Add1ArgType),
809 NEONMAP1(vqdmull_v, arm_neon_vqdmull, Add1ArgType),
810 NEONMAP2(vqmovn_v, arm_neon_vqmovnu, arm_neon_vqmovns, Add1ArgType | UnsignedAlts),
811 NEONMAP1(vqmovun_v, arm_neon_vqmovnsu, Add1ArgType),
812 NEONMAP1(vqneg_v, arm_neon_vqneg, Add1ArgType),
813 NEONMAP1(vqnegq_v, arm_neon_vqneg, Add1ArgType),
814 NEONMAP1(vqrdmlah_s16, arm_neon_vqrdmlah, Add1ArgType),
815 NEONMAP1(vqrdmlah_s32, arm_neon_vqrdmlah, Add1ArgType),
816 NEONMAP1(vqrdmlahq_s16, arm_neon_vqrdmlah, Add1ArgType),
817 NEONMAP1(vqrdmlahq_s32, arm_neon_vqrdmlah, Add1ArgType),
818 NEONMAP1(vqrdmlsh_s16, arm_neon_vqrdmlsh, Add1ArgType),
819 NEONMAP1(vqrdmlsh_s32, arm_neon_vqrdmlsh, Add1ArgType),
820 NEONMAP1(vqrdmlshq_s16, arm_neon_vqrdmlsh, Add1ArgType),
821 NEONMAP1(vqrdmlshq_s32, arm_neon_vqrdmlsh, Add1ArgType),
822 NEONMAP1(vqrdmulh_v, arm_neon_vqrdmulh, Add1ArgType),
823 NEONMAP1(vqrdmulhq_v, arm_neon_vqrdmulh, Add1ArgType),
824 NEONMAP2(vqrshl_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
825 NEONMAP2(vqrshlq_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
826 NEONMAP2(vqshl_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
827 NEONMAP2(vqshl_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
828 NEONMAP2(vqshlq_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
829 NEONMAP2(vqshlq_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
830 NEONMAP1(vqshlu_n_v, arm_neon_vqshiftsu, 0),
831 NEONMAP1(vqshluq_n_v, arm_neon_vqshiftsu, 0),
832 NEONMAP2(vqsub_v, usub_sat, ssub_sat, Add1ArgType | UnsignedAlts),
833 NEONMAP2(vqsubq_v, usub_sat, ssub_sat, Add1ArgType | UnsignedAlts),
834 NEONMAP1(vraddhn_v, arm_neon_vraddhn, Add1ArgType),
835 NEONMAP2(vrecpe_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
836 NEONMAP2(vrecpeq_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
837 NEONMAP1(vrecps_v, arm_neon_vrecps, Add1ArgType),
838 NEONMAP1(vrecpsq_v, arm_neon_vrecps, Add1ArgType),
839 NEONMAP2(vrhadd_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
840 NEONMAP2(vrhaddq_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
841 NEONMAP1(vrnd_v, trunc, Add1ArgType),
842 NEONMAP1(vrnda_v, round, Add1ArgType),
843 NEONMAP1(vrndaq_v, round, Add1ArgType),
844 NEONMAP0(vrndi_v),
845 NEONMAP0(vrndiq_v),
846 NEONMAP1(vrndm_v, floor, Add1ArgType),
847 NEONMAP1(vrndmq_v, floor, Add1ArgType),
848 NEONMAP1(vrndn_v, roundeven, Add1ArgType),
849 NEONMAP1(vrndnq_v, roundeven, Add1ArgType),
850 NEONMAP1(vrndp_v, ceil, Add1ArgType),
851 NEONMAP1(vrndpq_v, ceil, Add1ArgType),
852 NEONMAP1(vrndq_v, trunc, Add1ArgType),
853 NEONMAP1(vrndx_v, rint, Add1ArgType),
854 NEONMAP1(vrndxq_v, rint, Add1ArgType),
855 NEONMAP2(vrshl_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
856 NEONMAP2(vrshlq_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
857 NEONMAP2(vrshr_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
858 NEONMAP2(vrshrq_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
859 NEONMAP2(vrsqrte_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
860 NEONMAP2(vrsqrteq_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
861 NEONMAP1(vrsqrts_v, arm_neon_vrsqrts, Add1ArgType),
862 NEONMAP1(vrsqrtsq_v, arm_neon_vrsqrts, Add1ArgType),
863 NEONMAP1(vrsubhn_v, arm_neon_vrsubhn, Add1ArgType),
864 NEONMAP1(vsha1su0q_u32, arm_neon_sha1su0, 0),
865 NEONMAP1(vsha1su1q_u32, arm_neon_sha1su1, 0),
866 NEONMAP1(vsha256h2q_u32, arm_neon_sha256h2, 0),
867 NEONMAP1(vsha256hq_u32, arm_neon_sha256h, 0),
868 NEONMAP1(vsha256su0q_u32, arm_neon_sha256su0, 0),
869 NEONMAP1(vsha256su1q_u32, arm_neon_sha256su1, 0),
870 NEONMAP0(vshl_n_v),
871 NEONMAP2(vshl_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
872 NEONMAP0(vshll_n_v),
873 NEONMAP0(vshlq_n_v),
874 NEONMAP2(vshlq_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
875 NEONMAP0(vshr_n_v),
876 NEONMAP0(vshrn_n_v),
877 NEONMAP0(vshrq_n_v),
878 NEONMAP1(vst1_v, arm_neon_vst1, 0),
879 NEONMAP1(vst1_x2_v, arm_neon_vst1x2, 0),
880 NEONMAP1(vst1_x3_v, arm_neon_vst1x3, 0),
881 NEONMAP1(vst1_x4_v, arm_neon_vst1x4, 0),
882 NEONMAP1(vst1q_v, arm_neon_vst1, 0),
883 NEONMAP1(vst1q_x2_v, arm_neon_vst1x2, 0),
884 NEONMAP1(vst1q_x3_v, arm_neon_vst1x3, 0),
885 NEONMAP1(vst1q_x4_v, arm_neon_vst1x4, 0),
886 NEONMAP1(vst2_lane_v, arm_neon_vst2lane, 0),
887 NEONMAP1(vst2_v, arm_neon_vst2, 0),
888 NEONMAP1(vst2q_lane_v, arm_neon_vst2lane, 0),
889 NEONMAP1(vst2q_v, arm_neon_vst2, 0),
890 NEONMAP1(vst3_lane_v, arm_neon_vst3lane, 0),
891 NEONMAP1(vst3_v, arm_neon_vst3, 0),
892 NEONMAP1(vst3q_lane_v, arm_neon_vst3lane, 0),
893 NEONMAP1(vst3q_v, arm_neon_vst3, 0),
894 NEONMAP1(vst4_lane_v, arm_neon_vst4lane, 0),
895 NEONMAP1(vst4_v, arm_neon_vst4, 0),
896 NEONMAP1(vst4q_lane_v, arm_neon_vst4lane, 0),
897 NEONMAP1(vst4q_v, arm_neon_vst4, 0),
898 NEONMAP0(vsubhn_v),
899 NEONMAP0(vtrn_v),
900 NEONMAP0(vtrnq_v),
901 NEONMAP0(vtst_v),
902 NEONMAP0(vtstq_v),
903 NEONMAP1(vusdot_s32, arm_neon_usdot, 0),
904 NEONMAP1(vusdotq_s32, arm_neon_usdot, 0),
905 NEONMAP1(vusmmlaq_s32, arm_neon_usmmla, 0),
906 NEONMAP0(vuzp_v),
907 NEONMAP0(vuzpq_v),
908 NEONMAP0(vzip_v),
909 NEONMAP0(vzipq_v)
910};
911
912static const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
913 NEONMAP0(splat_lane_v),
914 NEONMAP0(splat_laneq_v),
915 NEONMAP0(splatq_lane_v),
916 NEONMAP0(splatq_laneq_v),
917 NEONMAP1(vabs_v, aarch64_neon_abs, 0),
918 NEONMAP1(vabsq_v, aarch64_neon_abs, 0),
919 NEONMAP0(vadd_v),
920 NEONMAP0(vaddhn_v),
921 NEONMAP0(vaddq_p128),
922 NEONMAP0(vaddq_v),
923 NEONMAP1(vaesdq_u8, aarch64_crypto_aesd, 0),
924 NEONMAP1(vaeseq_u8, aarch64_crypto_aese, 0),
925 NEONMAP1(vaesimcq_u8, aarch64_crypto_aesimc, 0),
926 NEONMAP1(vaesmcq_u8, aarch64_crypto_aesmc, 0),
927 NEONMAP2(vbcaxq_s16, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
928 NEONMAP2(vbcaxq_s32, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
929 NEONMAP2(vbcaxq_s64, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
930 NEONMAP2(vbcaxq_s8, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
931 NEONMAP2(vbcaxq_u16, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
932 NEONMAP2(vbcaxq_u32, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
933 NEONMAP2(vbcaxq_u64, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
934 NEONMAP2(vbcaxq_u8, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
935 NEONMAP1(vbfdot_f32, aarch64_neon_bfdot, 0),
936 NEONMAP1(vbfdotq_f32, aarch64_neon_bfdot, 0),
937 NEONMAP1(vbfmlalbq_f32, aarch64_neon_bfmlalb, 0),
938 NEONMAP1(vbfmlaltq_f32, aarch64_neon_bfmlalt, 0),
939 NEONMAP1(vbfmmlaq_f32, aarch64_neon_bfmmla, 0),
940 NEONMAP1(vcadd_rot270_f16, aarch64_neon_vcadd_rot270, Add1ArgType),
941 NEONMAP1(vcadd_rot270_f32, aarch64_neon_vcadd_rot270, Add1ArgType),
942 NEONMAP1(vcadd_rot90_f16, aarch64_neon_vcadd_rot90, Add1ArgType),
943 NEONMAP1(vcadd_rot90_f32, aarch64_neon_vcadd_rot90, Add1ArgType),
944 NEONMAP1(vcaddq_rot270_f16, aarch64_neon_vcadd_rot270, Add1ArgType),
945 NEONMAP1(vcaddq_rot270_f32, aarch64_neon_vcadd_rot270, Add1ArgType),
946 NEONMAP1(vcaddq_rot270_f64, aarch64_neon_vcadd_rot270, Add1ArgType),
947 NEONMAP1(vcaddq_rot90_f16, aarch64_neon_vcadd_rot90, Add1ArgType),
948 NEONMAP1(vcaddq_rot90_f32, aarch64_neon_vcadd_rot90, Add1ArgType),
949 NEONMAP1(vcaddq_rot90_f64, aarch64_neon_vcadd_rot90, Add1ArgType),
950 NEONMAP1(vcage_v, aarch64_neon_facge, 0),
951 NEONMAP1(vcageq_v, aarch64_neon_facge, 0),
952 NEONMAP1(vcagt_v, aarch64_neon_facgt, 0),
953 NEONMAP1(vcagtq_v, aarch64_neon_facgt, 0),
954 NEONMAP1(vcale_v, aarch64_neon_facge, 0),
955 NEONMAP1(vcaleq_v, aarch64_neon_facge, 0),
956 NEONMAP1(vcalt_v, aarch64_neon_facgt, 0),
957 NEONMAP1(vcaltq_v, aarch64_neon_facgt, 0),
958 NEONMAP0(vceqz_v),
959 NEONMAP0(vceqzq_v),
960 NEONMAP0(vcgez_v),
961 NEONMAP0(vcgezq_v),
962 NEONMAP0(vcgtz_v),
963 NEONMAP0(vcgtzq_v),
964 NEONMAP0(vclez_v),
965 NEONMAP0(vclezq_v),
966 NEONMAP1(vcls_v, aarch64_neon_cls, Add1ArgType),
967 NEONMAP1(vclsq_v, aarch64_neon_cls, Add1ArgType),
968 NEONMAP0(vcltz_v),
969 NEONMAP0(vcltzq_v),
970 NEONMAP1(vclz_v, ctlz, Add1ArgType),
971 NEONMAP1(vclzq_v, ctlz, Add1ArgType),
972 NEONMAP1(vcmla_f16, aarch64_neon_vcmla_rot0, Add1ArgType),
973 NEONMAP1(vcmla_f32, aarch64_neon_vcmla_rot0, Add1ArgType),
974 NEONMAP1(vcmla_rot180_f16, aarch64_neon_vcmla_rot180, Add1ArgType),
975 NEONMAP1(vcmla_rot180_f32, aarch64_neon_vcmla_rot180, Add1ArgType),
976 NEONMAP1(vcmla_rot270_f16, aarch64_neon_vcmla_rot270, Add1ArgType),
977 NEONMAP1(vcmla_rot270_f32, aarch64_neon_vcmla_rot270, Add1ArgType),
978 NEONMAP1(vcmla_rot90_f16, aarch64_neon_vcmla_rot90, Add1ArgType),
979 NEONMAP1(vcmla_rot90_f32, aarch64_neon_vcmla_rot90, Add1ArgType),
980 NEONMAP1(vcmlaq_f16, aarch64_neon_vcmla_rot0, Add1ArgType),
981 NEONMAP1(vcmlaq_f32, aarch64_neon_vcmla_rot0, Add1ArgType),
982 NEONMAP1(vcmlaq_f64, aarch64_neon_vcmla_rot0, Add1ArgType),
983 NEONMAP1(vcmlaq_rot180_f16, aarch64_neon_vcmla_rot180, Add1ArgType),
984 NEONMAP1(vcmlaq_rot180_f32, aarch64_neon_vcmla_rot180, Add1ArgType),
985 NEONMAP1(vcmlaq_rot180_f64, aarch64_neon_vcmla_rot180, Add1ArgType),
986 NEONMAP1(vcmlaq_rot270_f16, aarch64_neon_vcmla_rot270, Add1ArgType),
987 NEONMAP1(vcmlaq_rot270_f32, aarch64_neon_vcmla_rot270, Add1ArgType),
988 NEONMAP1(vcmlaq_rot270_f64, aarch64_neon_vcmla_rot270, Add1ArgType),
989 NEONMAP1(vcmlaq_rot90_f16, aarch64_neon_vcmla_rot90, Add1ArgType),
990 NEONMAP1(vcmlaq_rot90_f32, aarch64_neon_vcmla_rot90, Add1ArgType),
991 NEONMAP1(vcmlaq_rot90_f64, aarch64_neon_vcmla_rot90, Add1ArgType),
992 NEONMAP1(vcnt_v, ctpop, Add1ArgType),
993 NEONMAP1(vcntq_v, ctpop, Add1ArgType),
994 NEONMAP1(vcvt_f16_f32, aarch64_neon_vcvtfp2hf, 0),
995 NEONMAP0(vcvt_f16_s16),
996 NEONMAP0(vcvt_f16_u16),
997 NEONMAP1(vcvt_f32_f16, aarch64_neon_vcvthf2fp, 0),
998 NEONMAP0(vcvt_f32_v),
999 NEONMAP1(vcvt_n_f16_s16, aarch64_neon_vcvtfxs2fp, 0),
1000 NEONMAP1(vcvt_n_f16_u16, aarch64_neon_vcvtfxu2fp, 0),
1001 NEONMAP2(vcvt_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
1002 NEONMAP2(vcvt_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
1003 NEONMAP1(vcvt_n_s16_f16, aarch64_neon_vcvtfp2fxs, 0),
1004 NEONMAP1(vcvt_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
1005 NEONMAP1(vcvt_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
1006 NEONMAP1(vcvt_n_u16_f16, aarch64_neon_vcvtfp2fxu, 0),
1007 NEONMAP1(vcvt_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
1008 NEONMAP1(vcvt_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
1009 NEONMAP0(vcvtq_f16_s16),
1010 NEONMAP0(vcvtq_f16_u16),
1011 NEONMAP0(vcvtq_f32_v),
1012 NEONMAP0(vcvtq_high_bf16_f32),
1013 NEONMAP0(vcvtq_low_bf16_f32),
1014 NEONMAP1(vcvtq_n_f16_s16, aarch64_neon_vcvtfxs2fp, 0),
1015 NEONMAP1(vcvtq_n_f16_u16, aarch64_neon_vcvtfxu2fp, 0),
1016 NEONMAP2(vcvtq_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
1017 NEONMAP2(vcvtq_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
1018 NEONMAP1(vcvtq_n_s16_f16, aarch64_neon_vcvtfp2fxs, 0),
1019 NEONMAP1(vcvtq_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
1020 NEONMAP1(vcvtq_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
1021 NEONMAP1(vcvtq_n_u16_f16, aarch64_neon_vcvtfp2fxu, 0),
1022 NEONMAP1(vcvtq_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
1023 NEONMAP1(vcvtq_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
1024 NEONMAP1(vcvtx_f32_v, aarch64_neon_fcvtxn, AddRetType | Add1ArgType),
1025 NEONMAP1(vdot_s32, aarch64_neon_sdot, 0),
1026 NEONMAP1(vdot_u32, aarch64_neon_udot, 0),
1027 NEONMAP1(vdotq_s32, aarch64_neon_sdot, 0),
1028 NEONMAP1(vdotq_u32, aarch64_neon_udot, 0),
1029 NEONMAP2(veor3q_s16, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
1030 NEONMAP2(veor3q_s32, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
1031 NEONMAP2(veor3q_s64, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
1032 NEONMAP2(veor3q_s8, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
1033 NEONMAP2(veor3q_u16, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
1034 NEONMAP2(veor3q_u32, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
1035 NEONMAP2(veor3q_u64, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
1036 NEONMAP2(veor3q_u8, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
1037 NEONMAP0(vext_v),
1038 NEONMAP0(vextq_v),
1039 NEONMAP0(vfma_v),
1040 NEONMAP0(vfmaq_v),
1041 NEONMAP1(vfmlal_high_f16, aarch64_neon_fmlal2, 0),
1042 NEONMAP1(vfmlal_low_f16, aarch64_neon_fmlal, 0),
1043 NEONMAP1(vfmlalq_high_f16, aarch64_neon_fmlal2, 0),
1044 NEONMAP1(vfmlalq_low_f16, aarch64_neon_fmlal, 0),
1045 NEONMAP1(vfmlsl_high_f16, aarch64_neon_fmlsl2, 0),
1046 NEONMAP1(vfmlsl_low_f16, aarch64_neon_fmlsl, 0),
1047 NEONMAP1(vfmlslq_high_f16, aarch64_neon_fmlsl2, 0),
1048 NEONMAP1(vfmlslq_low_f16, aarch64_neon_fmlsl, 0),
1049 NEONMAP2(vhadd_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
1050 NEONMAP2(vhaddq_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
1051 NEONMAP2(vhsub_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
1052 NEONMAP2(vhsubq_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
1053 NEONMAP1(vld1_x2_v, aarch64_neon_ld1x2, 0),
1054 NEONMAP1(vld1_x3_v, aarch64_neon_ld1x3, 0),
1055 NEONMAP1(vld1_x4_v, aarch64_neon_ld1x4, 0),
1056 NEONMAP1(vld1q_x2_v, aarch64_neon_ld1x2, 0),
1057 NEONMAP1(vld1q_x3_v, aarch64_neon_ld1x3, 0),
1058 NEONMAP1(vld1q_x4_v, aarch64_neon_ld1x4, 0),
1059 NEONMAP1(vmmlaq_s32, aarch64_neon_smmla, 0),
1060 NEONMAP1(vmmlaq_u32, aarch64_neon_ummla, 0),
1061 NEONMAP0(vmovl_v),
1062 NEONMAP0(vmovn_v),
1063 NEONMAP1(vmul_v, aarch64_neon_pmul, Add1ArgType),
1064 NEONMAP1(vmulq_v, aarch64_neon_pmul, Add1ArgType),
1065 NEONMAP1(vpadd_v, aarch64_neon_addp, Add1ArgType),
1066 NEONMAP2(vpaddl_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
1067 NEONMAP2(vpaddlq_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
1068 NEONMAP1(vpaddq_v, aarch64_neon_addp, Add1ArgType),
1069 NEONMAP1(vqabs_v, aarch64_neon_sqabs, Add1ArgType),
1070 NEONMAP1(vqabsq_v, aarch64_neon_sqabs, Add1ArgType),
1071 NEONMAP2(vqadd_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
1072 NEONMAP2(vqaddq_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
1073 NEONMAP2(vqdmlal_v, aarch64_neon_sqdmull, aarch64_neon_sqadd, 0),
1074 NEONMAP2(vqdmlsl_v, aarch64_neon_sqdmull, aarch64_neon_sqsub, 0),
1075 NEONMAP1(vqdmulh_lane_v, aarch64_neon_sqdmulh_lane, 0),
1076 NEONMAP1(vqdmulh_laneq_v, aarch64_neon_sqdmulh_laneq, 0),
1077 NEONMAP1(vqdmulh_v, aarch64_neon_sqdmulh, Add1ArgType),
1078 NEONMAP1(vqdmulhq_lane_v, aarch64_neon_sqdmulh_lane, 0),
1079 NEONMAP1(vqdmulhq_laneq_v, aarch64_neon_sqdmulh_laneq, 0),
1080 NEONMAP1(vqdmulhq_v, aarch64_neon_sqdmulh, Add1ArgType),
1081 NEONMAP1(vqdmull_v, aarch64_neon_sqdmull, Add1ArgType),
1082 NEONMAP2(vqmovn_v, aarch64_neon_uqxtn, aarch64_neon_sqxtn, Add1ArgType | UnsignedAlts),
1083 NEONMAP1(vqmovun_v, aarch64_neon_sqxtun, Add1ArgType),
1084 NEONMAP1(vqneg_v, aarch64_neon_sqneg, Add1ArgType),
1085 NEONMAP1(vqnegq_v, aarch64_neon_sqneg, Add1ArgType),
1086 NEONMAP1(vqrdmlah_s16, aarch64_neon_sqrdmlah, Add1ArgType),
1087 NEONMAP1(vqrdmlah_s32, aarch64_neon_sqrdmlah, Add1ArgType),
1088 NEONMAP1(vqrdmlahq_s16, aarch64_neon_sqrdmlah, Add1ArgType),
1089 NEONMAP1(vqrdmlahq_s32, aarch64_neon_sqrdmlah, Add1ArgType),
1090 NEONMAP1(vqrdmlsh_s16, aarch64_neon_sqrdmlsh, Add1ArgType),
1091 NEONMAP1(vqrdmlsh_s32, aarch64_neon_sqrdmlsh, Add1ArgType),
1092 NEONMAP1(vqrdmlshq_s16, aarch64_neon_sqrdmlsh, Add1ArgType),
1093 NEONMAP1(vqrdmlshq_s32, aarch64_neon_sqrdmlsh, Add1ArgType),
1094 NEONMAP1(vqrdmulh_lane_v, aarch64_neon_sqrdmulh_lane, 0),
1095 NEONMAP1(vqrdmulh_laneq_v, aarch64_neon_sqrdmulh_laneq, 0),
1096 NEONMAP1(vqrdmulh_v, aarch64_neon_sqrdmulh, Add1ArgType),
1097 NEONMAP1(vqrdmulhq_lane_v, aarch64_neon_sqrdmulh_lane, 0),
1098 NEONMAP1(vqrdmulhq_laneq_v, aarch64_neon_sqrdmulh_laneq, 0),
1099 NEONMAP1(vqrdmulhq_v, aarch64_neon_sqrdmulh, Add1ArgType),
1100 NEONMAP2(vqrshl_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
1101 NEONMAP2(vqrshlq_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
1102 NEONMAP2(vqshl_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl, UnsignedAlts),
1103 NEONMAP2(vqshl_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
1104 NEONMAP2(vqshlq_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl,UnsignedAlts),
1105 NEONMAP2(vqshlq_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
1106 NEONMAP1(vqshlu_n_v, aarch64_neon_sqshlu, 0),
1107 NEONMAP1(vqshluq_n_v, aarch64_neon_sqshlu, 0),
1108 NEONMAP2(vqsub_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
1109 NEONMAP2(vqsubq_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
1110 NEONMAP1(vraddhn_v, aarch64_neon_raddhn, Add1ArgType),
1111 NEONMAP1(vrax1q_u64, aarch64_crypto_rax1, 0),
1112 NEONMAP2(vrecpe_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
1113 NEONMAP2(vrecpeq_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
1114 NEONMAP1(vrecps_v, aarch64_neon_frecps, Add1ArgType),
1115 NEONMAP1(vrecpsq_v, aarch64_neon_frecps, Add1ArgType),
1116 NEONMAP2(vrhadd_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
1117 NEONMAP2(vrhaddq_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
1118 NEONMAP1(vrnd32x_f32, aarch64_neon_frint32x, Add1ArgType),
1119 NEONMAP1(vrnd32x_f64, aarch64_neon_frint32x, Add1ArgType),
1120 NEONMAP1(vrnd32xq_f32, aarch64_neon_frint32x, Add1ArgType),
1121 NEONMAP1(vrnd32xq_f64, aarch64_neon_frint32x, Add1ArgType),
1122 NEONMAP1(vrnd32z_f32, aarch64_neon_frint32z, Add1ArgType),
1123 NEONMAP1(vrnd32z_f64, aarch64_neon_frint32z, Add1ArgType),
1124 NEONMAP1(vrnd32zq_f32, aarch64_neon_frint32z, Add1ArgType),
1125 NEONMAP1(vrnd32zq_f64, aarch64_neon_frint32z, Add1ArgType),
1126 NEONMAP1(vrnd64x_f32, aarch64_neon_frint64x, Add1ArgType),
1127 NEONMAP1(vrnd64x_f64, aarch64_neon_frint64x, Add1ArgType),
1128 NEONMAP1(vrnd64xq_f32, aarch64_neon_frint64x, Add1ArgType),
1129 NEONMAP1(vrnd64xq_f64, aarch64_neon_frint64x, Add1ArgType),
1130 NEONMAP1(vrnd64z_f32, aarch64_neon_frint64z, Add1ArgType),
1131 NEONMAP1(vrnd64z_f64, aarch64_neon_frint64z, Add1ArgType),
1132 NEONMAP1(vrnd64zq_f32, aarch64_neon_frint64z, Add1ArgType),
1133 NEONMAP1(vrnd64zq_f64, aarch64_neon_frint64z, Add1ArgType),
1134 NEONMAP0(vrndi_v),
1135 NEONMAP0(vrndiq_v),
1136 NEONMAP2(vrshl_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
1137 NEONMAP2(vrshlq_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
1138 NEONMAP2(vrshr_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
1139 NEONMAP2(vrshrq_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
1140 NEONMAP2(vrsqrte_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
1141 NEONMAP2(vrsqrteq_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
1142 NEONMAP1(vrsqrts_v, aarch64_neon_frsqrts, Add1ArgType),
1143 NEONMAP1(vrsqrtsq_v, aarch64_neon_frsqrts, Add1ArgType),
1144 NEONMAP1(vrsubhn_v, aarch64_neon_rsubhn, Add1ArgType),
1145 NEONMAP1(vsha1su0q_u32, aarch64_crypto_sha1su0, 0),
1146 NEONMAP1(vsha1su1q_u32, aarch64_crypto_sha1su1, 0),
1147 NEONMAP1(vsha256h2q_u32, aarch64_crypto_sha256h2, 0),
1148 NEONMAP1(vsha256hq_u32, aarch64_crypto_sha256h, 0),
1149 NEONMAP1(vsha256su0q_u32, aarch64_crypto_sha256su0, 0),
1150 NEONMAP1(vsha256su1q_u32, aarch64_crypto_sha256su1, 0),
1151 NEONMAP1(vsha512h2q_u64, aarch64_crypto_sha512h2, 0),
1152 NEONMAP1(vsha512hq_u64, aarch64_crypto_sha512h, 0),
1153 NEONMAP1(vsha512su0q_u64, aarch64_crypto_sha512su0, 0),
1154 NEONMAP1(vsha512su1q_u64, aarch64_crypto_sha512su1, 0),
1155 NEONMAP0(vshl_n_v),
1156 NEONMAP2(vshl_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
1157 NEONMAP0(vshll_n_v),
1158 NEONMAP0(vshlq_n_v),
1159 NEONMAP2(vshlq_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
1160 NEONMAP0(vshr_n_v),
1161 NEONMAP0(vshrn_n_v),
1162 NEONMAP0(vshrq_n_v),
1163 NEONMAP1(vsm3partw1q_u32, aarch64_crypto_sm3partw1, 0),
1164 NEONMAP1(vsm3partw2q_u32, aarch64_crypto_sm3partw2, 0),
1165 NEONMAP1(vsm3ss1q_u32, aarch64_crypto_sm3ss1, 0),
1166 NEONMAP1(vsm3tt1aq_u32, aarch64_crypto_sm3tt1a, 0),
1167 NEONMAP1(vsm3tt1bq_u32, aarch64_crypto_sm3tt1b, 0),
1168 NEONMAP1(vsm3tt2aq_u32, aarch64_crypto_sm3tt2a, 0),
1169 NEONMAP1(vsm3tt2bq_u32, aarch64_crypto_sm3tt2b, 0),
1170 NEONMAP1(vsm4ekeyq_u32, aarch64_crypto_sm4ekey, 0),
1171 NEONMAP1(vsm4eq_u32, aarch64_crypto_sm4e, 0),
1172 NEONMAP1(vst1_x2_v, aarch64_neon_st1x2, 0),
1173 NEONMAP1(vst1_x3_v, aarch64_neon_st1x3, 0),
1174 NEONMAP1(vst1_x4_v, aarch64_neon_st1x4, 0),
1175 NEONMAP1(vst1q_x2_v, aarch64_neon_st1x2, 0),
1176 NEONMAP1(vst1q_x3_v, aarch64_neon_st1x3, 0),
1177 NEONMAP1(vst1q_x4_v, aarch64_neon_st1x4, 0),
1178 NEONMAP0(vsubhn_v),
1179 NEONMAP0(vtst_v),
1180 NEONMAP0(vtstq_v),
1181 NEONMAP1(vusdot_s32, aarch64_neon_usdot, 0),
1182 NEONMAP1(vusdotq_s32, aarch64_neon_usdot, 0),
1183 NEONMAP1(vusmmlaq_s32, aarch64_neon_usmmla, 0),
1184 NEONMAP1(vxarq_u64, aarch64_crypto_xar, 0),
1185};
1186
1187static const ARMVectorIntrinsicInfo AArch64SISDIntrinsicMap[] = {
1188 NEONMAP1(vabdd_f64, aarch64_sisd_fabd, Add1ArgType),
1189 NEONMAP1(vabds_f32, aarch64_sisd_fabd, Add1ArgType),
1190 NEONMAP1(vabsd_s64, aarch64_neon_abs, Add1ArgType),
1191 NEONMAP1(vaddlv_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
1192 NEONMAP1(vaddlv_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
1193 NEONMAP1(vaddlvq_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
1194 NEONMAP1(vaddlvq_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
1195 NEONMAP1(vaddv_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
1196 NEONMAP1(vaddv_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
1197 NEONMAP1(vaddv_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
1198 NEONMAP1(vaddvq_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
1199 NEONMAP1(vaddvq_f64, aarch64_neon_faddv, AddRetType | Add1ArgType),
1200 NEONMAP1(vaddvq_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
1201 NEONMAP1(vaddvq_s64, aarch64_neon_saddv, AddRetType | Add1ArgType),
1202 NEONMAP1(vaddvq_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
1203 NEONMAP1(vaddvq_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
1204 NEONMAP1(vcaged_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
1205 NEONMAP1(vcages_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
1206 NEONMAP1(vcagtd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
1207 NEONMAP1(vcagts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
1208 NEONMAP1(vcaled_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
1209 NEONMAP1(vcales_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
1210 NEONMAP1(vcaltd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
1211 NEONMAP1(vcalts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
1212 NEONMAP1(vcvtad_s64_f64, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
1213 NEONMAP1(vcvtad_u64_f64, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
1214 NEONMAP1(vcvtas_s32_f32, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
1215 NEONMAP1(vcvtas_u32_f32, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
1216 NEONMAP1(vcvtd_n_f64_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
1217 NEONMAP1(vcvtd_n_f64_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
1218 NEONMAP1(vcvtd_n_s64_f64, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
1219 NEONMAP1(vcvtd_n_u64_f64, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
1220 NEONMAP1(vcvtd_s64_f64, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
1221 NEONMAP1(vcvtd_u64_f64, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
1222 NEONMAP0(vcvth_bf16_f32),
1223 NEONMAP1(vcvtmd_s64_f64, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
1224 NEONMAP1(vcvtmd_u64_f64, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
1225 NEONMAP1(vcvtms_s32_f32, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
1226 NEONMAP1(vcvtms_u32_f32, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
1227 NEONMAP1(vcvtnd_s64_f64, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
1228 NEONMAP1(vcvtnd_u64_f64, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
1229 NEONMAP1(vcvtns_s32_f32, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
1230 NEONMAP1(vcvtns_u32_f32, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
1231 NEONMAP1(vcvtpd_s64_f64, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
1232 NEONMAP1(vcvtpd_u64_f64, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
1233 NEONMAP1(vcvtps_s32_f32, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
1234 NEONMAP1(vcvtps_u32_f32, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
1235 NEONMAP1(vcvts_n_f32_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
1236 NEONMAP1(vcvts_n_f32_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
1237 NEONMAP1(vcvts_n_s32_f32, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
1238 NEONMAP1(vcvts_n_u32_f32, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
1239 NEONMAP1(vcvts_s32_f32, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
1240 NEONMAP1(vcvts_u32_f32, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
1241 NEONMAP1(vcvtxd_f32_f64, aarch64_sisd_fcvtxn, 0),
1242 NEONMAP1(vmaxnmv_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
1243 NEONMAP1(vmaxnmvq_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
1244 NEONMAP1(vmaxnmvq_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
1245 NEONMAP1(vmaxv_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
1246 NEONMAP1(vmaxv_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
1247 NEONMAP1(vmaxv_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
1248 NEONMAP1(vmaxvq_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
1249 NEONMAP1(vmaxvq_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
1250 NEONMAP1(vmaxvq_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
1251 NEONMAP1(vmaxvq_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
1252 NEONMAP1(vminnmv_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
1253 NEONMAP1(vminnmvq_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
1254 NEONMAP1(vminnmvq_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
1255 NEONMAP1(vminv_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
1256 NEONMAP1(vminv_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
1257 NEONMAP1(vminv_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
1258 NEONMAP1(vminvq_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
1259 NEONMAP1(vminvq_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
1260 NEONMAP1(vminvq_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
1261 NEONMAP1(vminvq_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
1262 NEONMAP1(vmull_p64, aarch64_neon_pmull64, 0),
1263 NEONMAP1(vmulxd_f64, aarch64_neon_fmulx, Add1ArgType),
1264 NEONMAP1(vmulxs_f32, aarch64_neon_fmulx, Add1ArgType),
1265 NEONMAP1(vpaddd_s64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
1266 NEONMAP1(vpaddd_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
1267 NEONMAP1(vpmaxnmqd_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
1268 NEONMAP1(vpmaxnms_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
1269 NEONMAP1(vpmaxqd_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
1270 NEONMAP1(vpmaxs_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
1271 NEONMAP1(vpminnmqd_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
1272 NEONMAP1(vpminnms_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
1273 NEONMAP1(vpminqd_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
1274 NEONMAP1(vpmins_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
1275 NEONMAP1(vqabsb_s8, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
1276 NEONMAP1(vqabsd_s64, aarch64_neon_sqabs, Add1ArgType),
1277 NEONMAP1(vqabsh_s16, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
1278 NEONMAP1(vqabss_s32, aarch64_neon_sqabs, Add1ArgType),
1279 NEONMAP1(vqaddb_s8, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
1280 NEONMAP1(vqaddb_u8, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
1281 NEONMAP1(vqaddd_s64, aarch64_neon_sqadd, Add1ArgType),
1282 NEONMAP1(vqaddd_u64, aarch64_neon_uqadd, Add1ArgType),
1283 NEONMAP1(vqaddh_s16, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
1284 NEONMAP1(vqaddh_u16, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
1285 NEONMAP1(vqadds_s32, aarch64_neon_sqadd, Add1ArgType),
1286 NEONMAP1(vqadds_u32, aarch64_neon_uqadd, Add1ArgType),
1287 NEONMAP1(vqdmulhh_s16, aarch64_neon_sqdmulh, Vectorize1ArgType | Use64BitVectors),
1288 NEONMAP1(vqdmulhs_s32, aarch64_neon_sqdmulh, Add1ArgType),
1289 NEONMAP1(vqdmullh_s16, aarch64_neon_sqdmull, VectorRet | Use128BitVectors),
1290 NEONMAP1(vqdmulls_s32, aarch64_neon_sqdmulls_scalar, 0),
1291 NEONMAP1(vqmovnd_s64, aarch64_neon_scalar_sqxtn, AddRetType | Add1ArgType),
1292 NEONMAP1(vqmovnd_u64, aarch64_neon_scalar_uqxtn, AddRetType | Add1ArgType),
1293 NEONMAP1(vqmovnh_s16, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
1294 NEONMAP1(vqmovnh_u16, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
1295 NEONMAP1(vqmovns_s32, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
1296 NEONMAP1(vqmovns_u32, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
1297 NEONMAP1(vqmovund_s64, aarch64_neon_scalar_sqxtun, AddRetType | Add1ArgType),
1298 NEONMAP1(vqmovunh_s16, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
1299 NEONMAP1(vqmovuns_s32, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
1300 NEONMAP1(vqnegb_s8, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
1301 NEONMAP1(vqnegd_s64, aarch64_neon_sqneg, Add1ArgType),
1302 NEONMAP1(vqnegh_s16, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
1303 NEONMAP1(vqnegs_s32, aarch64_neon_sqneg, Add1ArgType),
1304 NEONMAP1(vqrdmlahh_s16, aarch64_neon_sqrdmlah, Vectorize1ArgType | Use64BitVectors),
1305 NEONMAP1(vqrdmlahs_s32, aarch64_neon_sqrdmlah, Add1ArgType),
1306 NEONMAP1(vqrdmlshh_s16, aarch64_neon_sqrdmlsh, Vectorize1ArgType | Use64BitVectors),
1307 NEONMAP1(vqrdmlshs_s32, aarch64_neon_sqrdmlsh, Add1ArgType),
1308 NEONMAP1(vqrdmulhh_s16, aarch64_neon_sqrdmulh, Vectorize1ArgType | Use64BitVectors),
1309 NEONMAP1(vqrdmulhs_s32, aarch64_neon_sqrdmulh, Add1ArgType),
1310 NEONMAP1(vqrshlb_s8, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
1311 NEONMAP1(vqrshlb_u8, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
1312 NEONMAP1(vqrshld_s64, aarch64_neon_sqrshl, Add1ArgType),
1313 NEONMAP1(vqrshld_u64, aarch64_neon_uqrshl, Add1ArgType),
1314 NEONMAP1(vqrshlh_s16, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
1315 NEONMAP1(vqrshlh_u16, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
1316 NEONMAP1(vqrshls_s32, aarch64_neon_sqrshl, Add1ArgType),
1317 NEONMAP1(vqrshls_u32, aarch64_neon_uqrshl, Add1ArgType),
1318 NEONMAP1(vqrshrnd_n_s64, aarch64_neon_sqrshrn, AddRetType),
1319 NEONMAP1(vqrshrnd_n_u64, aarch64_neon_uqrshrn, AddRetType),
1320 NEONMAP1(vqrshrnh_n_s16, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
1321 NEONMAP1(vqrshrnh_n_u16, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
1322 NEONMAP1(vqrshrns_n_s32, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
1323 NEONMAP1(vqrshrns_n_u32, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
1324 NEONMAP1(vqrshrund_n_s64, aarch64_neon_sqrshrun, AddRetType),
1325 NEONMAP1(vqrshrunh_n_s16, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
1326 NEONMAP1(vqrshruns_n_s32, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
1327 NEONMAP1(vqshlb_n_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
1328 NEONMAP1(vqshlb_n_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
1329 NEONMAP1(vqshlb_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
1330 NEONMAP1(vqshlb_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
1331 NEONMAP1(vqshld_s64, aarch64_neon_sqshl, Add1ArgType),
1332 NEONMAP1(vqshld_u64, aarch64_neon_uqshl, Add1ArgType),
1333 NEONMAP1(vqshlh_n_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
1334 NEONMAP1(vqshlh_n_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
1335 NEONMAP1(vqshlh_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
1336 NEONMAP1(vqshlh_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
1337 NEONMAP1(vqshls_n_s32, aarch64_neon_sqshl, Add1ArgType),
1338 NEONMAP1(vqshls_n_u32, aarch64_neon_uqshl, Add1ArgType),
1339 NEONMAP1(vqshls_s32, aarch64_neon_sqshl, Add1ArgType),
1340 NEONMAP1(vqshls_u32, aarch64_neon_uqshl, Add1ArgType),
1341 NEONMAP1(vqshlub_n_s8, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
1342 NEONMAP1(vqshluh_n_s16, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
1343 NEONMAP1(vqshlus_n_s32, aarch64_neon_sqshlu, Add1ArgType),
1344 NEONMAP1(vqshrnd_n_s64, aarch64_neon_sqshrn, AddRetType),
1345 NEONMAP1(vqshrnd_n_u64, aarch64_neon_uqshrn, AddRetType),
1346 NEONMAP1(vqshrnh_n_s16, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
1347 NEONMAP1(vqshrnh_n_u16, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
1348 NEONMAP1(vqshrns_n_s32, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
1349 NEONMAP1(vqshrns_n_u32, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
1350 NEONMAP1(vqshrund_n_s64, aarch64_neon_sqshrun, AddRetType),
1351 NEONMAP1(vqshrunh_n_s16, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
1352 NEONMAP1(vqshruns_n_s32, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
1353 NEONMAP1(vqsubb_s8, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
1354 NEONMAP1(vqsubb_u8, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
1355 NEONMAP1(vqsubd_s64, aarch64_neon_sqsub, Add1ArgType),
1356 NEONMAP1(vqsubd_u64, aarch64_neon_uqsub, Add1ArgType),
1357 NEONMAP1(vqsubh_s16, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
1358 NEONMAP1(vqsubh_u16, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
1359 NEONMAP1(vqsubs_s32, aarch64_neon_sqsub, Add1ArgType),
1360 NEONMAP1(vqsubs_u32, aarch64_neon_uqsub, Add1ArgType),
1361 NEONMAP1(vrecped_f64, aarch64_neon_frecpe, Add1ArgType),
1362 NEONMAP1(vrecpes_f32, aarch64_neon_frecpe, Add1ArgType),
1363 NEONMAP1(vrecpxd_f64, aarch64_neon_frecpx, Add1ArgType),
1364 NEONMAP1(vrecpxs_f32, aarch64_neon_frecpx, Add1ArgType),
1365 NEONMAP1(vrshld_s64, aarch64_neon_srshl, Add1ArgType),
1366 NEONMAP1(vrshld_u64, aarch64_neon_urshl, Add1ArgType),
1367 NEONMAP1(vrsqrted_f64, aarch64_neon_frsqrte, Add1ArgType),
1368 NEONMAP1(vrsqrtes_f32, aarch64_neon_frsqrte, Add1ArgType),
1369 NEONMAP1(vrsqrtsd_f64, aarch64_neon_frsqrts, Add1ArgType),
1370 NEONMAP1(vrsqrtss_f32, aarch64_neon_frsqrts, Add1ArgType),
1371 NEONMAP1(vsha1cq_u32, aarch64_crypto_sha1c, 0),
1372 NEONMAP1(vsha1h_u32, aarch64_crypto_sha1h, 0),
1373 NEONMAP1(vsha1mq_u32, aarch64_crypto_sha1m, 0),
1374 NEONMAP1(vsha1pq_u32, aarch64_crypto_sha1p, 0),
1375 NEONMAP1(vshld_s64, aarch64_neon_sshl, Add1ArgType),
1376 NEONMAP1(vshld_u64, aarch64_neon_ushl, Add1ArgType),
1377 NEONMAP1(vslid_n_s64, aarch64_neon_vsli, Vectorize1ArgType),
1378 NEONMAP1(vslid_n_u64, aarch64_neon_vsli, Vectorize1ArgType),
1379 NEONMAP1(vsqaddb_u8, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
1380 NEONMAP1(vsqaddd_u64, aarch64_neon_usqadd, Add1ArgType),
1381 NEONMAP1(vsqaddh_u16, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
1382 NEONMAP1(vsqadds_u32, aarch64_neon_usqadd, Add1ArgType),
1383 NEONMAP1(vsrid_n_s64, aarch64_neon_vsri, Vectorize1ArgType),
1384 NEONMAP1(vsrid_n_u64, aarch64_neon_vsri, Vectorize1ArgType),
1385 NEONMAP1(vuqaddb_s8, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
1386 NEONMAP1(vuqaddd_s64, aarch64_neon_suqadd, Add1ArgType),
1387 NEONMAP1(vuqaddh_s16, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
1388 NEONMAP1(vuqadds_s32, aarch64_neon_suqadd, Add1ArgType),
1389 // FP16 scalar intrinisics go here.
1390 NEONMAP1(vabdh_f16, aarch64_sisd_fabd, Add1ArgType),
1391 NEONMAP1(vcvtah_s32_f16, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
1392 NEONMAP1(vcvtah_s64_f16, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
1393 NEONMAP1(vcvtah_u32_f16, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
1394 NEONMAP1(vcvtah_u64_f16, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
1395 NEONMAP1(vcvth_n_f16_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
1396 NEONMAP1(vcvth_n_f16_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
1397 NEONMAP1(vcvth_n_f16_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
1398 NEONMAP1(vcvth_n_f16_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
1399 NEONMAP1(vcvth_n_s32_f16, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
1400 NEONMAP1(vcvth_n_s64_f16, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
1401 NEONMAP1(vcvth_n_u32_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
1402 NEONMAP1(vcvth_n_u64_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
1403 NEONMAP1(vcvth_s32_f16, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
1404 NEONMAP1(vcvth_s64_f16, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
1405 NEONMAP1(vcvth_u32_f16, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
1406 NEONMAP1(vcvth_u64_f16, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
1407 NEONMAP1(vcvtmh_s32_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
1408 NEONMAP1(vcvtmh_s64_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
1409 NEONMAP1(vcvtmh_u32_f16, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
1410 NEONMAP1(vcvtmh_u64_f16, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
1411 NEONMAP1(vcvtnh_s32_f16, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
1412 NEONMAP1(vcvtnh_s64_f16, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
1413 NEONMAP1(vcvtnh_u32_f16, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
1414 NEONMAP1(vcvtnh_u64_f16, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
1415 NEONMAP1(vcvtph_s32_f16, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
1416 NEONMAP1(vcvtph_s64_f16, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
1417 NEONMAP1(vcvtph_u32_f16, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
1418 NEONMAP1(vcvtph_u64_f16, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
1419 NEONMAP1(vmulxh_f16, aarch64_neon_fmulx, Add1ArgType),
1420 NEONMAP1(vrecpeh_f16, aarch64_neon_frecpe, Add1ArgType),
1421 NEONMAP1(vrecpxh_f16, aarch64_neon_frecpx, Add1ArgType),
1422 NEONMAP1(vrsqrteh_f16, aarch64_neon_frsqrte, Add1ArgType),
1423 NEONMAP1(vrsqrtsh_f16, aarch64_neon_frsqrts, Add1ArgType),
1424};
1425
1426// Some intrinsics are equivalent for codegen.
1427static const std::pair<unsigned, unsigned> NEONEquivalentIntrinsicMap[] = {
1428 { NEON::BI__builtin_neon_splat_lane_bf16, NEON::BI__builtin_neon_splat_lane_v, },
1429 { NEON::BI__builtin_neon_splat_laneq_bf16, NEON::BI__builtin_neon_splat_laneq_v, },
1430 { NEON::BI__builtin_neon_splatq_lane_bf16, NEON::BI__builtin_neon_splatq_lane_v, },
1431 { NEON::BI__builtin_neon_splatq_laneq_bf16, NEON::BI__builtin_neon_splatq_laneq_v, },
1432 { NEON::BI__builtin_neon_vabd_f16, NEON::BI__builtin_neon_vabd_v, },
1433 { NEON::BI__builtin_neon_vabdq_f16, NEON::BI__builtin_neon_vabdq_v, },
1434 { NEON::BI__builtin_neon_vabs_f16, NEON::BI__builtin_neon_vabs_v, },
1435 { NEON::BI__builtin_neon_vabsq_f16, NEON::BI__builtin_neon_vabsq_v, },
1436 { NEON::BI__builtin_neon_vcage_f16, NEON::BI__builtin_neon_vcage_v, },
1437 { NEON::BI__builtin_neon_vcageq_f16, NEON::BI__builtin_neon_vcageq_v, },
1438 { NEON::BI__builtin_neon_vcagt_f16, NEON::BI__builtin_neon_vcagt_v, },
1439 { NEON::BI__builtin_neon_vcagtq_f16, NEON::BI__builtin_neon_vcagtq_v, },
1440 { NEON::BI__builtin_neon_vcale_f16, NEON::BI__builtin_neon_vcale_v, },
1441 { NEON::BI__builtin_neon_vcaleq_f16, NEON::BI__builtin_neon_vcaleq_v, },
1442 { NEON::BI__builtin_neon_vcalt_f16, NEON::BI__builtin_neon_vcalt_v, },
1443 { NEON::BI__builtin_neon_vcaltq_f16, NEON::BI__builtin_neon_vcaltq_v, },
1444 { NEON::BI__builtin_neon_vceqz_f16, NEON::BI__builtin_neon_vceqz_v, },
1445 { NEON::BI__builtin_neon_vceqzq_f16, NEON::BI__builtin_neon_vceqzq_v, },
1446 { NEON::BI__builtin_neon_vcgez_f16, NEON::BI__builtin_neon_vcgez_v, },
1447 { NEON::BI__builtin_neon_vcgezq_f16, NEON::BI__builtin_neon_vcgezq_v, },
1448 { NEON::BI__builtin_neon_vcgtz_f16, NEON::BI__builtin_neon_vcgtz_v, },
1449 { NEON::BI__builtin_neon_vcgtzq_f16, NEON::BI__builtin_neon_vcgtzq_v, },
1450 { NEON::BI__builtin_neon_vclez_f16, NEON::BI__builtin_neon_vclez_v, },
1451 { NEON::BI__builtin_neon_vclezq_f16, NEON::BI__builtin_neon_vclezq_v, },
1452 { NEON::BI__builtin_neon_vcltz_f16, NEON::BI__builtin_neon_vcltz_v, },
1453 { NEON::BI__builtin_neon_vcltzq_f16, NEON::BI__builtin_neon_vcltzq_v, },
1454 { NEON::BI__builtin_neon_vfma_f16, NEON::BI__builtin_neon_vfma_v, },
1455 { NEON::BI__builtin_neon_vfma_lane_f16, NEON::BI__builtin_neon_vfma_lane_v, },
1456 { NEON::BI__builtin_neon_vfma_laneq_f16, NEON::BI__builtin_neon_vfma_laneq_v, },
1457 { NEON::BI__builtin_neon_vfmaq_f16, NEON::BI__builtin_neon_vfmaq_v, },
1458 { NEON::BI__builtin_neon_vfmaq_lane_f16, NEON::BI__builtin_neon_vfmaq_lane_v, },
1459 { NEON::BI__builtin_neon_vfmaq_laneq_f16, NEON::BI__builtin_neon_vfmaq_laneq_v, },
1460 { NEON::BI__builtin_neon_vld1_bf16_x2, NEON::BI__builtin_neon_vld1_x2_v },
1461 { NEON::BI__builtin_neon_vld1_bf16_x3, NEON::BI__builtin_neon_vld1_x3_v },
1462 { NEON::BI__builtin_neon_vld1_bf16_x4, NEON::BI__builtin_neon_vld1_x4_v },
1463 { NEON::BI__builtin_neon_vld1_bf16, NEON::BI__builtin_neon_vld1_v },
1464 { NEON::BI__builtin_neon_vld1_dup_bf16, NEON::BI__builtin_neon_vld1_dup_v },
1465 { NEON::BI__builtin_neon_vld1_lane_bf16, NEON::BI__builtin_neon_vld1_lane_v },
1466 { NEON::BI__builtin_neon_vld1q_bf16_x2, NEON::BI__builtin_neon_vld1q_x2_v },
1467 { NEON::BI__builtin_neon_vld1q_bf16_x3, NEON::BI__builtin_neon_vld1q_x3_v },
1468 { NEON::BI__builtin_neon_vld1q_bf16_x4, NEON::BI__builtin_neon_vld1q_x4_v },
1469 { NEON::BI__builtin_neon_vld1q_bf16, NEON::BI__builtin_neon_vld1q_v },
1470 { NEON::BI__builtin_neon_vld1q_dup_bf16, NEON::BI__builtin_neon_vld1q_dup_v },
1471 { NEON::BI__builtin_neon_vld1q_lane_bf16, NEON::BI__builtin_neon_vld1q_lane_v },
1472 { NEON::BI__builtin_neon_vld2_bf16, NEON::BI__builtin_neon_vld2_v },
1473 { NEON::BI__builtin_neon_vld2_dup_bf16, NEON::BI__builtin_neon_vld2_dup_v },
1474 { NEON::BI__builtin_neon_vld2_lane_bf16, NEON::BI__builtin_neon_vld2_lane_v },
1475 { NEON::BI__builtin_neon_vld2q_bf16, NEON::BI__builtin_neon_vld2q_v },
1476 { NEON::BI__builtin_neon_vld2q_dup_bf16, NEON::BI__builtin_neon_vld2q_dup_v },
1477 { NEON::BI__builtin_neon_vld2q_lane_bf16, NEON::BI__builtin_neon_vld2q_lane_v },
1478 { NEON::BI__builtin_neon_vld3_bf16, NEON::BI__builtin_neon_vld3_v },
1479 { NEON::BI__builtin_neon_vld3_dup_bf16, NEON::BI__builtin_neon_vld3_dup_v },
1480 { NEON::BI__builtin_neon_vld3_lane_bf16, NEON::BI__builtin_neon_vld3_lane_v },
1481 { NEON::BI__builtin_neon_vld3q_bf16, NEON::BI__builtin_neon_vld3q_v },
1482 { NEON::BI__builtin_neon_vld3q_dup_bf16, NEON::BI__builtin_neon_vld3q_dup_v },
1483 { NEON::BI__builtin_neon_vld3q_lane_bf16, NEON::BI__builtin_neon_vld3q_lane_v },
1484 { NEON::BI__builtin_neon_vld4_bf16, NEON::BI__builtin_neon_vld4_v },
1485 { NEON::BI__builtin_neon_vld4_dup_bf16, NEON::BI__builtin_neon_vld4_dup_v },
1486 { NEON::BI__builtin_neon_vld4_lane_bf16, NEON::BI__builtin_neon_vld4_lane_v },
1487 { NEON::BI__builtin_neon_vld4q_bf16, NEON::BI__builtin_neon_vld4q_v },
1488 { NEON::BI__builtin_neon_vld4q_dup_bf16, NEON::BI__builtin_neon_vld4q_dup_v },
1489 { NEON::BI__builtin_neon_vld4q_lane_bf16, NEON::BI__builtin_neon_vld4q_lane_v },
1490 { NEON::BI__builtin_neon_vmax_f16, NEON::BI__builtin_neon_vmax_v, },
1491 { NEON::BI__builtin_neon_vmaxnm_f16, NEON::BI__builtin_neon_vmaxnm_v, },
1492 { NEON::BI__builtin_neon_vmaxnmq_f16, NEON::BI__builtin_neon_vmaxnmq_v, },
1493 { NEON::BI__builtin_neon_vmaxq_f16, NEON::BI__builtin_neon_vmaxq_v, },
1494 { NEON::BI__builtin_neon_vmin_f16, NEON::BI__builtin_neon_vmin_v, },
1495 { NEON::BI__builtin_neon_vminnm_f16, NEON::BI__builtin_neon_vminnm_v, },
1496 { NEON::BI__builtin_neon_vminnmq_f16, NEON::BI__builtin_neon_vminnmq_v, },
1497 { NEON::BI__builtin_neon_vminq_f16, NEON::BI__builtin_neon_vminq_v, },
1498 { NEON::BI__builtin_neon_vmulx_f16, NEON::BI__builtin_neon_vmulx_v, },
1499 { NEON::BI__builtin_neon_vmulxq_f16, NEON::BI__builtin_neon_vmulxq_v, },
1500 { NEON::BI__builtin_neon_vpadd_f16, NEON::BI__builtin_neon_vpadd_v, },
1501 { NEON::BI__builtin_neon_vpaddq_f16, NEON::BI__builtin_neon_vpaddq_v, },
1502 { NEON::BI__builtin_neon_vpmax_f16, NEON::BI__builtin_neon_vpmax_v, },
1503 { NEON::BI__builtin_neon_vpmaxnm_f16, NEON::BI__builtin_neon_vpmaxnm_v, },
1504 { NEON::BI__builtin_neon_vpmaxnmq_f16, NEON::BI__builtin_neon_vpmaxnmq_v, },
1505 { NEON::BI__builtin_neon_vpmaxq_f16, NEON::BI__builtin_neon_vpmaxq_v, },
1506 { NEON::BI__builtin_neon_vpmin_f16, NEON::BI__builtin_neon_vpmin_v, },
1507 { NEON::BI__builtin_neon_vpminnm_f16, NEON::BI__builtin_neon_vpminnm_v, },
1508 { NEON::BI__builtin_neon_vpminnmq_f16, NEON::BI__builtin_neon_vpminnmq_v, },
1509 { NEON::BI__builtin_neon_vpminq_f16, NEON::BI__builtin_neon_vpminq_v, },
1510 { NEON::BI__builtin_neon_vrecpe_f16, NEON::BI__builtin_neon_vrecpe_v, },
1511 { NEON::BI__builtin_neon_vrecpeq_f16, NEON::BI__builtin_neon_vrecpeq_v, },
1512 { NEON::BI__builtin_neon_vrecps_f16, NEON::BI__builtin_neon_vrecps_v, },
1513 { NEON::BI__builtin_neon_vrecpsq_f16, NEON::BI__builtin_neon_vrecpsq_v, },
1514 { NEON::BI__builtin_neon_vrnd_f16, NEON::BI__builtin_neon_vrnd_v, },
1515 { NEON::BI__builtin_neon_vrnda_f16, NEON::BI__builtin_neon_vrnda_v, },
1516 { NEON::BI__builtin_neon_vrndaq_f16, NEON::BI__builtin_neon_vrndaq_v, },
1517 { NEON::BI__builtin_neon_vrndi_f16, NEON::BI__builtin_neon_vrndi_v, },
1518 { NEON::BI__builtin_neon_vrndiq_f16, NEON::BI__builtin_neon_vrndiq_v, },
1519 { NEON::BI__builtin_neon_vrndm_f16, NEON::BI__builtin_neon_vrndm_v, },
1520 { NEON::BI__builtin_neon_vrndmq_f16, NEON::BI__builtin_neon_vrndmq_v, },
1521 { NEON::BI__builtin_neon_vrndn_f16, NEON::BI__builtin_neon_vrndn_v, },
1522 { NEON::BI__builtin_neon_vrndnq_f16, NEON::BI__builtin_neon_vrndnq_v, },
1523 { NEON::BI__builtin_neon_vrndp_f16, NEON::BI__builtin_neon_vrndp_v, },
1524 { NEON::BI__builtin_neon_vrndpq_f16, NEON::BI__builtin_neon_vrndpq_v, },
1525 { NEON::BI__builtin_neon_vrndq_f16, NEON::BI__builtin_neon_vrndq_v, },
1526 { NEON::BI__builtin_neon_vrndx_f16, NEON::BI__builtin_neon_vrndx_v, },
1527 { NEON::BI__builtin_neon_vrndxq_f16, NEON::BI__builtin_neon_vrndxq_v, },
1528 { NEON::BI__builtin_neon_vrsqrte_f16, NEON::BI__builtin_neon_vrsqrte_v, },
1529 { NEON::BI__builtin_neon_vrsqrteq_f16, NEON::BI__builtin_neon_vrsqrteq_v, },
1530 { NEON::BI__builtin_neon_vrsqrts_f16, NEON::BI__builtin_neon_vrsqrts_v, },
1531 { NEON::BI__builtin_neon_vrsqrtsq_f16, NEON::BI__builtin_neon_vrsqrtsq_v, },
1532 { NEON::BI__builtin_neon_vsqrt_f16, NEON::BI__builtin_neon_vsqrt_v, },
1533 { NEON::BI__builtin_neon_vsqrtq_f16, NEON::BI__builtin_neon_vsqrtq_v, },
1534 { NEON::BI__builtin_neon_vst1_bf16_x2, NEON::BI__builtin_neon_vst1_x2_v },
1535 { NEON::BI__builtin_neon_vst1_bf16_x3, NEON::BI__builtin_neon_vst1_x3_v },
1536 { NEON::BI__builtin_neon_vst1_bf16_x4, NEON::BI__builtin_neon_vst1_x4_v },
1537 { NEON::BI__builtin_neon_vst1_bf16, NEON::BI__builtin_neon_vst1_v },
1538 { NEON::BI__builtin_neon_vst1_lane_bf16, NEON::BI__builtin_neon_vst1_lane_v },
1539 { NEON::BI__builtin_neon_vst1q_bf16_x2, NEON::BI__builtin_neon_vst1q_x2_v },
1540 { NEON::BI__builtin_neon_vst1q_bf16_x3, NEON::BI__builtin_neon_vst1q_x3_v },
1541 { NEON::BI__builtin_neon_vst1q_bf16_x4, NEON::BI__builtin_neon_vst1q_x4_v },
1542 { NEON::BI__builtin_neon_vst1q_bf16, NEON::BI__builtin_neon_vst1q_v },
1543 { NEON::BI__builtin_neon_vst1q_lane_bf16, NEON::BI__builtin_neon_vst1q_lane_v },
1544 { NEON::BI__builtin_neon_vst2_bf16, NEON::BI__builtin_neon_vst2_v },
1545 { NEON::BI__builtin_neon_vst2_lane_bf16, NEON::BI__builtin_neon_vst2_lane_v },
1546 { NEON::BI__builtin_neon_vst2q_bf16, NEON::BI__builtin_neon_vst2q_v },
1547 { NEON::BI__builtin_neon_vst2q_lane_bf16, NEON::BI__builtin_neon_vst2q_lane_v },
1548 { NEON::BI__builtin_neon_vst3_bf16, NEON::BI__builtin_neon_vst3_v },
1549 { NEON::BI__builtin_neon_vst3_lane_bf16, NEON::BI__builtin_neon_vst3_lane_v },
1550 { NEON::BI__builtin_neon_vst3q_bf16, NEON::BI__builtin_neon_vst3q_v },
1551 { NEON::BI__builtin_neon_vst3q_lane_bf16, NEON::BI__builtin_neon_vst3q_lane_v },
1552 { NEON::BI__builtin_neon_vst4_bf16, NEON::BI__builtin_neon_vst4_v },
1553 { NEON::BI__builtin_neon_vst4_lane_bf16, NEON::BI__builtin_neon_vst4_lane_v },
1554 { NEON::BI__builtin_neon_vst4q_bf16, NEON::BI__builtin_neon_vst4q_v },
1555 { NEON::BI__builtin_neon_vst4q_lane_bf16, NEON::BI__builtin_neon_vst4q_lane_v },
1556 // The mangling rules cause us to have one ID for each type for vldap1(q)_lane
1557 // and vstl1(q)_lane, but codegen is equivalent for all of them. Choose an
1558 // arbitrary one to be handled as tha canonical variation.
1559 { NEON::BI__builtin_neon_vldap1_lane_u64, NEON::BI__builtin_neon_vldap1_lane_s64 },
1560 { NEON::BI__builtin_neon_vldap1_lane_f64, NEON::BI__builtin_neon_vldap1_lane_s64 },
1561 { NEON::BI__builtin_neon_vldap1_lane_p64, NEON::BI__builtin_neon_vldap1_lane_s64 },
1562 { NEON::BI__builtin_neon_vldap1q_lane_u64, NEON::BI__builtin_neon_vldap1q_lane_s64 },
1563 { NEON::BI__builtin_neon_vldap1q_lane_f64, NEON::BI__builtin_neon_vldap1q_lane_s64 },
1564 { NEON::BI__builtin_neon_vldap1q_lane_p64, NEON::BI__builtin_neon_vldap1q_lane_s64 },
1565 { NEON::BI__builtin_neon_vstl1_lane_u64, NEON::BI__builtin_neon_vstl1_lane_s64 },
1566 { NEON::BI__builtin_neon_vstl1_lane_f64, NEON::BI__builtin_neon_vstl1_lane_s64 },
1567 { NEON::BI__builtin_neon_vstl1_lane_p64, NEON::BI__builtin_neon_vstl1_lane_s64 },
1568 { NEON::BI__builtin_neon_vstl1q_lane_u64, NEON::BI__builtin_neon_vstl1q_lane_s64 },
1569 { NEON::BI__builtin_neon_vstl1q_lane_f64, NEON::BI__builtin_neon_vstl1q_lane_s64 },
1570 { NEON::BI__builtin_neon_vstl1q_lane_p64, NEON::BI__builtin_neon_vstl1q_lane_s64 },
1571};
1572
1573#undef NEONMAP0
1574#undef NEONMAP1
1575#undef NEONMAP2
1576
1577#define SVEMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
1578 { \
1579 #NameBase, SVE::BI__builtin_sve_##NameBase, Intrinsic::LLVMIntrinsic, 0, \
1580 TypeModifier \
1581 }
1582
1583#define SVEMAP2(NameBase, TypeModifier) \
1584 { #NameBase, SVE::BI__builtin_sve_##NameBase, 0, 0, TypeModifier }
1585static const ARMVectorIntrinsicInfo AArch64SVEIntrinsicMap[] = {
1586#define GET_SVE_LLVM_INTRINSIC_MAP
1587#include "clang/Basic/arm_sve_builtin_cg.inc"
1588#include "clang/Basic/BuiltinsAArch64NeonSVEBridge_cg.def"
1589#undef GET_SVE_LLVM_INTRINSIC_MAP
1590};
1591
1592#undef SVEMAP1
1593#undef SVEMAP2
1594
1595#define SMEMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
1596 { \
1597 #NameBase, SME::BI__builtin_sme_##NameBase, Intrinsic::LLVMIntrinsic, 0, \
1598 TypeModifier \
1599 }
1600
1601#define SMEMAP2(NameBase, TypeModifier) \
1602 { #NameBase, SME::BI__builtin_sme_##NameBase, 0, 0, TypeModifier }
1603static const ARMVectorIntrinsicInfo AArch64SMEIntrinsicMap[] = {
1604#define GET_SME_LLVM_INTRINSIC_MAP
1605#include "clang/Basic/arm_sme_builtin_cg.inc"
1606#undef GET_SME_LLVM_INTRINSIC_MAP
1607};
1608
1609#undef SMEMAP1
1610#undef SMEMAP2
1611
1613
1618
1619static const ARMVectorIntrinsicInfo *
1621 unsigned BuiltinID, bool &MapProvenSorted) {
1622
1623#ifndef NDEBUG
1624 if (!MapProvenSorted) {
1625 assert(llvm::is_sorted(IntrinsicMap));
1626 MapProvenSorted = true;
1627 }
1628#endif
1629
1630 const ARMVectorIntrinsicInfo *Builtin =
1631 llvm::lower_bound(IntrinsicMap, BuiltinID);
1632
1633 if (Builtin != IntrinsicMap.end() && Builtin->BuiltinID == BuiltinID)
1634 return Builtin;
1635
1636 return nullptr;
1637}
1638
1640 unsigned Modifier,
1641 llvm::Type *ArgType,
1642 const CallExpr *E) {
1643 int VectorSize = 0;
1644 if (Modifier & Use64BitVectors)
1645 VectorSize = 64;
1646 else if (Modifier & Use128BitVectors)
1647 VectorSize = 128;
1648
1649 // Return type.
1651 if (Modifier & AddRetType) {
1652 llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
1653 if (Modifier & VectorizeRetType)
1654 Ty = llvm::FixedVectorType::get(
1655 Ty, VectorSize ? VectorSize / Ty->getPrimitiveSizeInBits() : 1);
1656
1657 Tys.push_back(Ty);
1658 }
1659
1660 // Arguments.
1661 if (Modifier & VectorizeArgTypes) {
1662 int Elts = VectorSize ? VectorSize / ArgType->getPrimitiveSizeInBits() : 1;
1663 ArgType = llvm::FixedVectorType::get(ArgType, Elts);
1664 }
1665
1666 if (Modifier & (Add1ArgType | Add2ArgTypes))
1667 Tys.push_back(ArgType);
1668
1669 if (Modifier & Add2ArgTypes)
1670 Tys.push_back(ArgType);
1671
1672 if (Modifier & InventFloatType)
1673 Tys.push_back(FloatTy);
1674
1675 return CGM.getIntrinsic(IntrinsicID, Tys);
1676}
1677
1679 CodeGenFunction &CGF, const ARMVectorIntrinsicInfo &SISDInfo,
1680 SmallVectorImpl<Value *> &Ops, const CallExpr *E) {
1681 unsigned BuiltinID = SISDInfo.BuiltinID;
1682 unsigned int Int = SISDInfo.LLVMIntrinsic;
1683 unsigned Modifier = SISDInfo.TypeModifier;
1684 const char *s = SISDInfo.NameHint;
1685
1686 switch (BuiltinID) {
1687 case NEON::BI__builtin_neon_vcled_s64:
1688 case NEON::BI__builtin_neon_vcled_u64:
1689 case NEON::BI__builtin_neon_vcles_f32:
1690 case NEON::BI__builtin_neon_vcled_f64:
1691 case NEON::BI__builtin_neon_vcltd_s64:
1692 case NEON::BI__builtin_neon_vcltd_u64:
1693 case NEON::BI__builtin_neon_vclts_f32:
1694 case NEON::BI__builtin_neon_vcltd_f64:
1695 case NEON::BI__builtin_neon_vcales_f32:
1696 case NEON::BI__builtin_neon_vcaled_f64:
1697 case NEON::BI__builtin_neon_vcalts_f32:
1698 case NEON::BI__builtin_neon_vcaltd_f64:
1699 // Only one direction of comparisons actually exist, cmle is actually a cmge
1700 // with swapped operands. The table gives us the right intrinsic but we
1701 // still need to do the swap.
1702 std::swap(Ops[0], Ops[1]);
1703 break;
1704 }
1705
1706 assert(Int && "Generic code assumes a valid intrinsic");
1707
1708 // Determine the type(s) of this overloaded AArch64 intrinsic.
1709 const Expr *Arg = E->getArg(0);
1710 llvm::Type *ArgTy = CGF.ConvertType(Arg->getType());
1711 Function *F = CGF.LookupNeonLLVMIntrinsic(Int, Modifier, ArgTy, E);
1712
1713 int j = 0;
1714 ConstantInt *C0 = ConstantInt::get(CGF.SizeTy, 0);
1715 for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
1716 ai != ae; ++ai, ++j) {
1717 llvm::Type *ArgTy = ai->getType();
1718 if (Ops[j]->getType()->getPrimitiveSizeInBits() ==
1719 ArgTy->getPrimitiveSizeInBits())
1720 continue;
1721
1722 assert(ArgTy->isVectorTy() && !Ops[j]->getType()->isVectorTy());
1723 // The constant argument to an _n_ intrinsic always has Int32Ty, so truncate
1724 // it before inserting.
1725 Ops[j] = CGF.Builder.CreateTruncOrBitCast(
1726 Ops[j], cast<llvm::VectorType>(ArgTy)->getElementType());
1727 Ops[j] =
1728 CGF.Builder.CreateInsertElement(PoisonValue::get(ArgTy), Ops[j], C0);
1729 }
1730
1731 Value *Result = CGF.EmitNeonCall(F, Ops, s);
1732 llvm::Type *ResultType = CGF.ConvertType(E->getType());
1733 if (ResultType->getPrimitiveSizeInBits().getFixedValue() <
1734 Result->getType()->getPrimitiveSizeInBits().getFixedValue())
1735 return CGF.Builder.CreateExtractElement(Result, C0);
1736
1737 return CGF.Builder.CreateBitCast(Result, ResultType, s);
1738}
1739
1741 unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic,
1742 const char *NameHint, unsigned Modifier, const CallExpr *E,
1743 SmallVectorImpl<llvm::Value *> &Ops, Address PtrOp0, Address PtrOp1,
1744 llvm::Triple::ArchType Arch) {
1745 // Get the last argument, which specifies the vector type.
1746 const Expr *Arg = E->getArg(E->getNumArgs() - 1);
1747 std::optional<llvm::APSInt> NeonTypeConst =
1749 if (!NeonTypeConst)
1750 return nullptr;
1751
1752 // Determine the type of this overloaded NEON intrinsic.
1753 NeonTypeFlags Type(NeonTypeConst->getZExtValue());
1754 const bool Usgn = Type.isUnsigned();
1755 const bool Quad = Type.isQuad();
1756 const bool Floating = Type.isFloatingPoint();
1757 const bool HasFastHalfType = getTarget().hasFastHalfType();
1758 const bool AllowBFloatArgsAndRet =
1759 getTargetHooks().getABIInfo().allowBFloatArgsAndRet();
1760
1761 llvm::FixedVectorType *VTy =
1762 GetNeonType(this, Type, HasFastHalfType, false, AllowBFloatArgsAndRet);
1763 llvm::Type *Ty = VTy;
1764 if (!Ty)
1765 return nullptr;
1766
1767 auto getAlignmentValue32 = [&](Address addr) -> Value* {
1768 return Builder.getInt32(addr.getAlignment().getQuantity());
1769 };
1770
1771 unsigned Int = LLVMIntrinsic;
1772 if ((Modifier & UnsignedAlts) && !Usgn)
1773 Int = AltLLVMIntrinsic;
1774
1775 switch (BuiltinID) {
1776 default: break;
1777 case NEON::BI__builtin_neon_splat_lane_v:
1778 case NEON::BI__builtin_neon_splat_laneq_v:
1779 case NEON::BI__builtin_neon_splatq_lane_v:
1780 case NEON::BI__builtin_neon_splatq_laneq_v: {
1781 auto NumElements = VTy->getElementCount();
1782 if (BuiltinID == NEON::BI__builtin_neon_splatq_lane_v)
1783 NumElements = NumElements * 2;
1784 if (BuiltinID == NEON::BI__builtin_neon_splat_laneq_v)
1785 NumElements = NumElements.divideCoefficientBy(2);
1786
1787 Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
1788 return EmitNeonSplat(Ops[0], cast<ConstantInt>(Ops[1]), NumElements);
1789 }
1790 case NEON::BI__builtin_neon_vpadd_v:
1791 case NEON::BI__builtin_neon_vpaddq_v:
1792 // We don't allow fp/int overloading of intrinsics.
1793 if (VTy->getElementType()->isFloatingPointTy() &&
1794 Int == Intrinsic::aarch64_neon_addp)
1795 Int = Intrinsic::aarch64_neon_faddp;
1796 break;
1797 case NEON::BI__builtin_neon_vabs_v:
1798 case NEON::BI__builtin_neon_vabsq_v:
1799 if (VTy->getElementType()->isFloatingPointTy())
1800 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, Ty), Ops, "vabs");
1801 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), Ops, "vabs");
1802 case NEON::BI__builtin_neon_vadd_v:
1803 case NEON::BI__builtin_neon_vaddq_v: {
1804 llvm::Type *VTy = llvm::FixedVectorType::get(Int8Ty, Quad ? 16 : 8);
1805 Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
1806 Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
1807 Ops[0] = Builder.CreateXor(Ops[0], Ops[1]);
1808 return Builder.CreateBitCast(Ops[0], Ty);
1809 }
1810 case NEON::BI__builtin_neon_vaddhn_v: {
1811 llvm::FixedVectorType *SrcTy =
1812 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
1813
1814 // %sum = add <4 x i32> %lhs, %rhs
1815 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
1816 Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
1817 Ops[0] = Builder.CreateAdd(Ops[0], Ops[1], "vaddhn");
1818
1819 // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
1820 Constant *ShiftAmt =
1821 ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
1822 Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vaddhn");
1823
1824 // %res = trunc <4 x i32> %high to <4 x i16>
1825 return Builder.CreateTrunc(Ops[0], VTy, "vaddhn");
1826 }
1827 case NEON::BI__builtin_neon_vcale_v:
1828 case NEON::BI__builtin_neon_vcaleq_v:
1829 case NEON::BI__builtin_neon_vcalt_v:
1830 case NEON::BI__builtin_neon_vcaltq_v:
1831 std::swap(Ops[0], Ops[1]);
1832 [[fallthrough]];
1833 case NEON::BI__builtin_neon_vcage_v:
1834 case NEON::BI__builtin_neon_vcageq_v:
1835 case NEON::BI__builtin_neon_vcagt_v:
1836 case NEON::BI__builtin_neon_vcagtq_v: {
1837 llvm::Type *Ty;
1838 switch (VTy->getScalarSizeInBits()) {
1839 default: llvm_unreachable("unexpected type");
1840 case 32:
1841 Ty = FloatTy;
1842 break;
1843 case 64:
1844 Ty = DoubleTy;
1845 break;
1846 case 16:
1847 Ty = HalfTy;
1848 break;
1849 }
1850 auto *VecFlt = llvm::FixedVectorType::get(Ty, VTy->getNumElements());
1851 llvm::Type *Tys[] = { VTy, VecFlt };
1852 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
1853 return EmitNeonCall(F, Ops, NameHint);
1854 }
1855 case NEON::BI__builtin_neon_vceqz_v:
1856 case NEON::BI__builtin_neon_vceqzq_v:
1858 Ops[0], Ty, Floating ? ICmpInst::FCMP_OEQ : ICmpInst::ICMP_EQ, "vceqz");
1859 case NEON::BI__builtin_neon_vcgez_v:
1860 case NEON::BI__builtin_neon_vcgezq_v:
1862 Ops[0], Ty, Floating ? ICmpInst::FCMP_OGE : ICmpInst::ICMP_SGE,
1863 "vcgez");
1864 case NEON::BI__builtin_neon_vclez_v:
1865 case NEON::BI__builtin_neon_vclezq_v:
1867 Ops[0], Ty, Floating ? ICmpInst::FCMP_OLE : ICmpInst::ICMP_SLE,
1868 "vclez");
1869 case NEON::BI__builtin_neon_vcgtz_v:
1870 case NEON::BI__builtin_neon_vcgtzq_v:
1872 Ops[0], Ty, Floating ? ICmpInst::FCMP_OGT : ICmpInst::ICMP_SGT,
1873 "vcgtz");
1874 case NEON::BI__builtin_neon_vcltz_v:
1875 case NEON::BI__builtin_neon_vcltzq_v:
1877 Ops[0], Ty, Floating ? ICmpInst::FCMP_OLT : ICmpInst::ICMP_SLT,
1878 "vcltz");
1879 case NEON::BI__builtin_neon_vclz_v:
1880 case NEON::BI__builtin_neon_vclzq_v:
1881 // We generate target-independent intrinsic, which needs a second argument
1882 // for whether or not clz of zero is undefined; on ARM it isn't.
1883 Ops.push_back(Builder.getInt1(getTarget().isCLZForZeroUndef()));
1884 break;
1885 case NEON::BI__builtin_neon_vcvt_f32_v:
1886 case NEON::BI__builtin_neon_vcvtq_f32_v:
1887 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1888 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, Quad),
1889 HasFastHalfType);
1890 return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
1891 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
1892 case NEON::BI__builtin_neon_vcvt_f16_s16:
1893 case NEON::BI__builtin_neon_vcvt_f16_u16:
1894 case NEON::BI__builtin_neon_vcvtq_f16_s16:
1895 case NEON::BI__builtin_neon_vcvtq_f16_u16:
1896 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1897 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float16, false, Quad),
1898 HasFastHalfType);
1899 return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
1900 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
1901 case NEON::BI__builtin_neon_vcvt_n_f16_s16:
1902 case NEON::BI__builtin_neon_vcvt_n_f16_u16:
1903 case NEON::BI__builtin_neon_vcvtq_n_f16_s16:
1904 case NEON::BI__builtin_neon_vcvtq_n_f16_u16: {
1905 llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty };
1906 Function *F = CGM.getIntrinsic(Int, Tys);
1907 return EmitNeonCall(F, Ops, "vcvt_n");
1908 }
1909 case NEON::BI__builtin_neon_vcvt_n_f32_v:
1910 case NEON::BI__builtin_neon_vcvt_n_f64_v:
1911 case NEON::BI__builtin_neon_vcvtq_n_f32_v:
1912 case NEON::BI__builtin_neon_vcvtq_n_f64_v: {
1913 llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty };
1914 Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic;
1915 Function *F = CGM.getIntrinsic(Int, Tys);
1916 return EmitNeonCall(F, Ops, "vcvt_n");
1917 }
1918 case NEON::BI__builtin_neon_vcvt_n_s16_f16:
1919 case NEON::BI__builtin_neon_vcvt_n_s32_v:
1920 case NEON::BI__builtin_neon_vcvt_n_u16_f16:
1921 case NEON::BI__builtin_neon_vcvt_n_u32_v:
1922 case NEON::BI__builtin_neon_vcvt_n_s64_v:
1923 case NEON::BI__builtin_neon_vcvt_n_u64_v:
1924 case NEON::BI__builtin_neon_vcvtq_n_s16_f16:
1925 case NEON::BI__builtin_neon_vcvtq_n_s32_v:
1926 case NEON::BI__builtin_neon_vcvtq_n_u16_f16:
1927 case NEON::BI__builtin_neon_vcvtq_n_u32_v:
1928 case NEON::BI__builtin_neon_vcvtq_n_s64_v:
1929 case NEON::BI__builtin_neon_vcvtq_n_u64_v: {
1930 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
1931 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
1932 return EmitNeonCall(F, Ops, "vcvt_n");
1933 }
1934 case NEON::BI__builtin_neon_vcvt_s32_v:
1935 case NEON::BI__builtin_neon_vcvt_u32_v:
1936 case NEON::BI__builtin_neon_vcvt_s64_v:
1937 case NEON::BI__builtin_neon_vcvt_u64_v:
1938 case NEON::BI__builtin_neon_vcvt_s16_f16:
1939 case NEON::BI__builtin_neon_vcvt_u16_f16:
1940 case NEON::BI__builtin_neon_vcvtq_s32_v:
1941 case NEON::BI__builtin_neon_vcvtq_u32_v:
1942 case NEON::BI__builtin_neon_vcvtq_s64_v:
1943 case NEON::BI__builtin_neon_vcvtq_u64_v:
1944 case NEON::BI__builtin_neon_vcvtq_s16_f16:
1945 case NEON::BI__builtin_neon_vcvtq_u16_f16: {
1946 Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type));
1947 return Usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt")
1948 : Builder.CreateFPToSI(Ops[0], Ty, "vcvt");
1949 }
1950 case NEON::BI__builtin_neon_vcvta_s16_f16:
1951 case NEON::BI__builtin_neon_vcvta_s32_v:
1952 case NEON::BI__builtin_neon_vcvta_s64_v:
1953 case NEON::BI__builtin_neon_vcvta_u16_f16:
1954 case NEON::BI__builtin_neon_vcvta_u32_v:
1955 case NEON::BI__builtin_neon_vcvta_u64_v:
1956 case NEON::BI__builtin_neon_vcvtaq_s16_f16:
1957 case NEON::BI__builtin_neon_vcvtaq_s32_v:
1958 case NEON::BI__builtin_neon_vcvtaq_s64_v:
1959 case NEON::BI__builtin_neon_vcvtaq_u16_f16:
1960 case NEON::BI__builtin_neon_vcvtaq_u32_v:
1961 case NEON::BI__builtin_neon_vcvtaq_u64_v:
1962 case NEON::BI__builtin_neon_vcvtn_s16_f16:
1963 case NEON::BI__builtin_neon_vcvtn_s32_v:
1964 case NEON::BI__builtin_neon_vcvtn_s64_v:
1965 case NEON::BI__builtin_neon_vcvtn_u16_f16:
1966 case NEON::BI__builtin_neon_vcvtn_u32_v:
1967 case NEON::BI__builtin_neon_vcvtn_u64_v:
1968 case NEON::BI__builtin_neon_vcvtnq_s16_f16:
1969 case NEON::BI__builtin_neon_vcvtnq_s32_v:
1970 case NEON::BI__builtin_neon_vcvtnq_s64_v:
1971 case NEON::BI__builtin_neon_vcvtnq_u16_f16:
1972 case NEON::BI__builtin_neon_vcvtnq_u32_v:
1973 case NEON::BI__builtin_neon_vcvtnq_u64_v:
1974 case NEON::BI__builtin_neon_vcvtp_s16_f16:
1975 case NEON::BI__builtin_neon_vcvtp_s32_v:
1976 case NEON::BI__builtin_neon_vcvtp_s64_v:
1977 case NEON::BI__builtin_neon_vcvtp_u16_f16:
1978 case NEON::BI__builtin_neon_vcvtp_u32_v:
1979 case NEON::BI__builtin_neon_vcvtp_u64_v:
1980 case NEON::BI__builtin_neon_vcvtpq_s16_f16:
1981 case NEON::BI__builtin_neon_vcvtpq_s32_v:
1982 case NEON::BI__builtin_neon_vcvtpq_s64_v:
1983 case NEON::BI__builtin_neon_vcvtpq_u16_f16:
1984 case NEON::BI__builtin_neon_vcvtpq_u32_v:
1985 case NEON::BI__builtin_neon_vcvtpq_u64_v:
1986 case NEON::BI__builtin_neon_vcvtm_s16_f16:
1987 case NEON::BI__builtin_neon_vcvtm_s32_v:
1988 case NEON::BI__builtin_neon_vcvtm_s64_v:
1989 case NEON::BI__builtin_neon_vcvtm_u16_f16:
1990 case NEON::BI__builtin_neon_vcvtm_u32_v:
1991 case NEON::BI__builtin_neon_vcvtm_u64_v:
1992 case NEON::BI__builtin_neon_vcvtmq_s16_f16:
1993 case NEON::BI__builtin_neon_vcvtmq_s32_v:
1994 case NEON::BI__builtin_neon_vcvtmq_s64_v:
1995 case NEON::BI__builtin_neon_vcvtmq_u16_f16:
1996 case NEON::BI__builtin_neon_vcvtmq_u32_v:
1997 case NEON::BI__builtin_neon_vcvtmq_u64_v: {
1998 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
1999 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint);
2000 }
2001 case NEON::BI__builtin_neon_vcvtx_f32_v: {
2002 llvm::Type *Tys[2] = { VTy->getTruncatedElementVectorType(VTy), Ty};
2003 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint);
2004
2005 }
2006 case NEON::BI__builtin_neon_vext_v:
2007 case NEON::BI__builtin_neon_vextq_v: {
2008 int CV = cast<ConstantInt>(Ops[2])->getSExtValue();
2009 SmallVector<int, 16> Indices;
2010 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
2011 Indices.push_back(i+CV);
2012
2013 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
2014 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
2015 return Builder.CreateShuffleVector(Ops[0], Ops[1], Indices, "vext");
2016 }
2017 case NEON::BI__builtin_neon_vfma_v:
2018 case NEON::BI__builtin_neon_vfmaq_v: {
2019 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
2020 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
2021 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
2022
2023 // NEON intrinsic puts accumulator first, unlike the LLVM fma.
2025 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
2026 {Ops[1], Ops[2], Ops[0]});
2027 }
2028 case NEON::BI__builtin_neon_vld1_v:
2029 case NEON::BI__builtin_neon_vld1q_v: {
2030 llvm::Type *Tys[] = {Ty, Int8PtrTy};
2031 Ops.push_back(getAlignmentValue32(PtrOp0));
2032 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "vld1");
2033 }
2034 case NEON::BI__builtin_neon_vld1_x2_v:
2035 case NEON::BI__builtin_neon_vld1q_x2_v:
2036 case NEON::BI__builtin_neon_vld1_x3_v:
2037 case NEON::BI__builtin_neon_vld1q_x3_v:
2038 case NEON::BI__builtin_neon_vld1_x4_v:
2039 case NEON::BI__builtin_neon_vld1q_x4_v: {
2040 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
2041 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
2042 Ops[1] = Builder.CreateCall(F, Ops[1], "vld1xN");
2043 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
2044 }
2045 case NEON::BI__builtin_neon_vld2_v:
2046 case NEON::BI__builtin_neon_vld2q_v:
2047 case NEON::BI__builtin_neon_vld3_v:
2048 case NEON::BI__builtin_neon_vld3q_v:
2049 case NEON::BI__builtin_neon_vld4_v:
2050 case NEON::BI__builtin_neon_vld4q_v:
2051 case NEON::BI__builtin_neon_vld2_dup_v:
2052 case NEON::BI__builtin_neon_vld2q_dup_v:
2053 case NEON::BI__builtin_neon_vld3_dup_v:
2054 case NEON::BI__builtin_neon_vld3q_dup_v:
2055 case NEON::BI__builtin_neon_vld4_dup_v:
2056 case NEON::BI__builtin_neon_vld4q_dup_v: {
2057 llvm::Type *Tys[] = {Ty, Int8PtrTy};
2058 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
2059 Value *Align = getAlignmentValue32(PtrOp1);
2060 Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, NameHint);
2061 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
2062 }
2063 case NEON::BI__builtin_neon_vld1_dup_v:
2064 case NEON::BI__builtin_neon_vld1q_dup_v: {
2065 Value *V = PoisonValue::get(Ty);
2066 PtrOp0 = PtrOp0.withElementType(VTy->getElementType());
2067 LoadInst *Ld = Builder.CreateLoad(PtrOp0);
2068 llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
2069 Ops[0] = Builder.CreateInsertElement(V, Ld, CI);
2070 return EmitNeonSplat(Ops[0], CI);
2071 }
2072 case NEON::BI__builtin_neon_vld2_lane_v:
2073 case NEON::BI__builtin_neon_vld2q_lane_v:
2074 case NEON::BI__builtin_neon_vld3_lane_v:
2075 case NEON::BI__builtin_neon_vld3q_lane_v:
2076 case NEON::BI__builtin_neon_vld4_lane_v:
2077 case NEON::BI__builtin_neon_vld4q_lane_v: {
2078 llvm::Type *Tys[] = {Ty, Int8PtrTy};
2079 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
2080 for (unsigned I = 2; I < Ops.size() - 1; ++I)
2081 Ops[I] = Builder.CreateBitCast(Ops[I], Ty);
2082 Ops.push_back(getAlignmentValue32(PtrOp1));
2083 Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), NameHint);
2084 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
2085 }
2086 case NEON::BI__builtin_neon_vmovl_v: {
2087 llvm::FixedVectorType *DTy =
2088 llvm::FixedVectorType::getTruncatedElementVectorType(VTy);
2089 Ops[0] = Builder.CreateBitCast(Ops[0], DTy);
2090 if (Usgn)
2091 return Builder.CreateZExt(Ops[0], Ty, "vmovl");
2092 return Builder.CreateSExt(Ops[0], Ty, "vmovl");
2093 }
2094 case NEON::BI__builtin_neon_vmovn_v: {
2095 llvm::FixedVectorType *QTy =
2096 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
2097 Ops[0] = Builder.CreateBitCast(Ops[0], QTy);
2098 return Builder.CreateTrunc(Ops[0], Ty, "vmovn");
2099 }
2100 case NEON::BI__builtin_neon_vmull_v:
2101 // FIXME: the integer vmull operations could be emitted in terms of pure
2102 // LLVM IR (2 exts followed by a mul). Unfortunately LLVM has a habit of
2103 // hoisting the exts outside loops. Until global ISel comes along that can
2104 // see through such movement this leads to bad CodeGen. So we need an
2105 // intrinsic for now.
2106 Int = Usgn ? Intrinsic::arm_neon_vmullu : Intrinsic::arm_neon_vmulls;
2107 Int = Type.isPoly() ? (unsigned)Intrinsic::arm_neon_vmullp : Int;
2108 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
2109 case NEON::BI__builtin_neon_vpadal_v:
2110 case NEON::BI__builtin_neon_vpadalq_v: {
2111 // The source operand type has twice as many elements of half the size.
2112 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
2113 llvm::Type *EltTy =
2114 llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
2115 auto *NarrowTy =
2116 llvm::FixedVectorType::get(EltTy, VTy->getNumElements() * 2);
2117 llvm::Type *Tys[2] = { Ty, NarrowTy };
2118 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
2119 }
2120 case NEON::BI__builtin_neon_vpaddl_v:
2121 case NEON::BI__builtin_neon_vpaddlq_v: {
2122 // The source operand type has twice as many elements of half the size.
2123 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
2124 llvm::Type *EltTy = llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
2125 auto *NarrowTy =
2126 llvm::FixedVectorType::get(EltTy, VTy->getNumElements() * 2);
2127 llvm::Type *Tys[2] = { Ty, NarrowTy };
2128 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpaddl");
2129 }
2130 case NEON::BI__builtin_neon_vqdmlal_v:
2131 case NEON::BI__builtin_neon_vqdmlsl_v: {
2132 SmallVector<Value *, 2> MulOps(Ops.begin() + 1, Ops.end());
2133 Ops[1] =
2134 EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), MulOps, "vqdmlal");
2135 Ops.resize(2);
2136 return EmitNeonCall(CGM.getIntrinsic(AltLLVMIntrinsic, Ty), Ops, NameHint);
2137 }
2138 case NEON::BI__builtin_neon_vqdmulhq_lane_v:
2139 case NEON::BI__builtin_neon_vqdmulh_lane_v:
2140 case NEON::BI__builtin_neon_vqrdmulhq_lane_v:
2141 case NEON::BI__builtin_neon_vqrdmulh_lane_v: {
2142 auto *RTy = cast<llvm::FixedVectorType>(Ty);
2143 if (BuiltinID == NEON::BI__builtin_neon_vqdmulhq_lane_v ||
2144 BuiltinID == NEON::BI__builtin_neon_vqrdmulhq_lane_v)
2145 RTy = llvm::FixedVectorType::get(RTy->getElementType(),
2146 RTy->getNumElements() * 2);
2147 llvm::Type *Tys[2] = {
2148 RTy, GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
2149 /*isQuad*/ false))};
2150 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
2151 }
2152 case NEON::BI__builtin_neon_vqdmulhq_laneq_v:
2153 case NEON::BI__builtin_neon_vqdmulh_laneq_v:
2154 case NEON::BI__builtin_neon_vqrdmulhq_laneq_v:
2155 case NEON::BI__builtin_neon_vqrdmulh_laneq_v: {
2156 llvm::Type *Tys[2] = {
2157 Ty, GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
2158 /*isQuad*/ true))};
2159 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
2160 }
2161 case NEON::BI__builtin_neon_vqshl_n_v:
2162 case NEON::BI__builtin_neon_vqshlq_n_v:
2163 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshl_n",
2164 1, false);
2165 case NEON::BI__builtin_neon_vqshlu_n_v:
2166 case NEON::BI__builtin_neon_vqshluq_n_v:
2167 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshlu_n",
2168 1, false);
2169 case NEON::BI__builtin_neon_vrecpe_v:
2170 case NEON::BI__builtin_neon_vrecpeq_v:
2171 case NEON::BI__builtin_neon_vrsqrte_v:
2172 case NEON::BI__builtin_neon_vrsqrteq_v:
2173 Int = Ty->isFPOrFPVectorTy() ? LLVMIntrinsic : AltLLVMIntrinsic;
2174 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint);
2175 case NEON::BI__builtin_neon_vrndi_v:
2176 case NEON::BI__builtin_neon_vrndiq_v:
2177 Int = Builder.getIsFPConstrained()
2178 ? Intrinsic::experimental_constrained_nearbyint
2179 : Intrinsic::nearbyint;
2180 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint);
2181 case NEON::BI__builtin_neon_vrshr_n_v:
2182 case NEON::BI__builtin_neon_vrshrq_n_v:
2183 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n",
2184 1, true);
2185 case NEON::BI__builtin_neon_vsha512hq_u64:
2186 case NEON::BI__builtin_neon_vsha512h2q_u64:
2187 case NEON::BI__builtin_neon_vsha512su0q_u64:
2188 case NEON::BI__builtin_neon_vsha512su1q_u64: {
2189 Function *F = CGM.getIntrinsic(Int);
2190 return EmitNeonCall(F, Ops, "");
2191 }
2192 case NEON::BI__builtin_neon_vshl_n_v:
2193 case NEON::BI__builtin_neon_vshlq_n_v:
2194 Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false);
2195 return Builder.CreateShl(Builder.CreateBitCast(Ops[0],Ty), Ops[1],
2196 "vshl_n");
2197 case NEON::BI__builtin_neon_vshll_n_v: {
2198 llvm::FixedVectorType *SrcTy =
2199 llvm::FixedVectorType::getTruncatedElementVectorType(VTy);
2200 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
2201 if (Usgn)
2202 Ops[0] = Builder.CreateZExt(Ops[0], VTy);
2203 else
2204 Ops[0] = Builder.CreateSExt(Ops[0], VTy);
2205 Ops[1] = EmitNeonShiftVector(Ops[1], VTy, false);
2206 return Builder.CreateShl(Ops[0], Ops[1], "vshll_n");
2207 }
2208 case NEON::BI__builtin_neon_vshrn_n_v: {
2209 llvm::FixedVectorType *SrcTy =
2210 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
2211 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
2212 Ops[1] = EmitNeonShiftVector(Ops[1], SrcTy, false);
2213 if (Usgn)
2214 Ops[0] = Builder.CreateLShr(Ops[0], Ops[1]);
2215 else
2216 Ops[0] = Builder.CreateAShr(Ops[0], Ops[1]);
2217 return Builder.CreateTrunc(Ops[0], Ty, "vshrn_n");
2218 }
2219 case NEON::BI__builtin_neon_vshr_n_v:
2220 case NEON::BI__builtin_neon_vshrq_n_v:
2221 return EmitNeonRShiftImm(Ops[0], Ops[1], Ty, Usgn, "vshr_n");
2222 case NEON::BI__builtin_neon_vst1_v:
2223 case NEON::BI__builtin_neon_vst1q_v:
2224 case NEON::BI__builtin_neon_vst2_v:
2225 case NEON::BI__builtin_neon_vst2q_v:
2226 case NEON::BI__builtin_neon_vst3_v:
2227 case NEON::BI__builtin_neon_vst3q_v:
2228 case NEON::BI__builtin_neon_vst4_v:
2229 case NEON::BI__builtin_neon_vst4q_v:
2230 case NEON::BI__builtin_neon_vst2_lane_v:
2231 case NEON::BI__builtin_neon_vst2q_lane_v:
2232 case NEON::BI__builtin_neon_vst3_lane_v:
2233 case NEON::BI__builtin_neon_vst3q_lane_v:
2234 case NEON::BI__builtin_neon_vst4_lane_v:
2235 case NEON::BI__builtin_neon_vst4q_lane_v: {
2236 llvm::Type *Tys[] = {Int8PtrTy, Ty};
2237 Ops.push_back(getAlignmentValue32(PtrOp0));
2238 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "");
2239 }
2240 case NEON::BI__builtin_neon_vsm3partw1q_u32:
2241 case NEON::BI__builtin_neon_vsm3partw2q_u32:
2242 case NEON::BI__builtin_neon_vsm3ss1q_u32:
2243 case NEON::BI__builtin_neon_vsm4ekeyq_u32:
2244 case NEON::BI__builtin_neon_vsm4eq_u32: {
2245 Function *F = CGM.getIntrinsic(Int);
2246 return EmitNeonCall(F, Ops, "");
2247 }
2248 case NEON::BI__builtin_neon_vsm3tt1aq_u32:
2249 case NEON::BI__builtin_neon_vsm3tt1bq_u32:
2250 case NEON::BI__builtin_neon_vsm3tt2aq_u32:
2251 case NEON::BI__builtin_neon_vsm3tt2bq_u32: {
2252 Function *F = CGM.getIntrinsic(Int);
2253 Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
2254 return EmitNeonCall(F, Ops, "");
2255 }
2256 case NEON::BI__builtin_neon_vst1_x2_v:
2257 case NEON::BI__builtin_neon_vst1q_x2_v:
2258 case NEON::BI__builtin_neon_vst1_x3_v:
2259 case NEON::BI__builtin_neon_vst1q_x3_v:
2260 case NEON::BI__builtin_neon_vst1_x4_v:
2261 case NEON::BI__builtin_neon_vst1q_x4_v: {
2262 // TODO: Currently in AArch32 mode the pointer operand comes first, whereas
2263 // in AArch64 it comes last. We may want to stick to one or another.
2264 if (Arch == llvm::Triple::aarch64 || Arch == llvm::Triple::aarch64_be ||
2265 Arch == llvm::Triple::aarch64_32) {
2266 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
2267 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
2268 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "");
2269 }
2270 llvm::Type *Tys[2] = {UnqualPtrTy, VTy};
2271 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "");
2272 }
2273 case NEON::BI__builtin_neon_vsubhn_v: {
2274 llvm::FixedVectorType *SrcTy =
2275 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
2276
2277 // %sum = add <4 x i32> %lhs, %rhs
2278 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
2279 Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
2280 Ops[0] = Builder.CreateSub(Ops[0], Ops[1], "vsubhn");
2281
2282 // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
2283 Constant *ShiftAmt =
2284 ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
2285 Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vsubhn");
2286
2287 // %res = trunc <4 x i32> %high to <4 x i16>
2288 return Builder.CreateTrunc(Ops[0], VTy, "vsubhn");
2289 }
2290 case NEON::BI__builtin_neon_vtrn_v:
2291 case NEON::BI__builtin_neon_vtrnq_v: {
2292 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
2293 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
2294 Value *SV = nullptr;
2295
2296 for (unsigned vi = 0; vi != 2; ++vi) {
2297 SmallVector<int, 16> Indices;
2298 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
2299 Indices.push_back(i+vi);
2300 Indices.push_back(i+e+vi);
2301 }
2302 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
2303 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");
2305 }
2306 return SV;
2307 }
2308 case NEON::BI__builtin_neon_vtst_v:
2309 case NEON::BI__builtin_neon_vtstq_v: {
2310 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
2311 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
2312 Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
2313 Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
2314 ConstantAggregateZero::get(Ty));
2315 return Builder.CreateSExt(Ops[0], Ty, "vtst");
2316 }
2317 case NEON::BI__builtin_neon_vuzp_v:
2318 case NEON::BI__builtin_neon_vuzpq_v: {
2319 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
2320 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
2321 Value *SV = nullptr;
2322
2323 for (unsigned vi = 0; vi != 2; ++vi) {
2324 SmallVector<int, 16> Indices;
2325 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
2326 Indices.push_back(2*i+vi);
2327
2328 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
2329 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");
2331 }
2332 return SV;
2333 }
2334 case NEON::BI__builtin_neon_vxarq_u64: {
2335 Function *F = CGM.getIntrinsic(Int);
2336 Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
2337 return EmitNeonCall(F, Ops, "");
2338 }
2339 case NEON::BI__builtin_neon_vzip_v:
2340 case NEON::BI__builtin_neon_vzipq_v: {
2341 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
2342 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
2343 Value *SV = nullptr;
2344
2345 for (unsigned vi = 0; vi != 2; ++vi) {
2346 SmallVector<int, 16> Indices;
2347 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
2348 Indices.push_back((i + vi*e) >> 1);
2349 Indices.push_back(((i + vi*e) >> 1)+e);
2350 }
2351 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
2352 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");
2354 }
2355 return SV;
2356 }
2357 case NEON::BI__builtin_neon_vdot_s32:
2358 case NEON::BI__builtin_neon_vdot_u32:
2359 case NEON::BI__builtin_neon_vdotq_s32:
2360 case NEON::BI__builtin_neon_vdotq_u32: {
2361 auto *InputTy =
2362 llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
2363 llvm::Type *Tys[2] = { Ty, InputTy };
2364 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vdot");
2365 }
2366 case NEON::BI__builtin_neon_vfmlal_low_f16:
2367 case NEON::BI__builtin_neon_vfmlalq_low_f16: {
2368 auto *InputTy =
2369 llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
2370 llvm::Type *Tys[2] = { Ty, InputTy };
2371 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlal_low");
2372 }
2373 case NEON::BI__builtin_neon_vfmlsl_low_f16:
2374 case NEON::BI__builtin_neon_vfmlslq_low_f16: {
2375 auto *InputTy =
2376 llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
2377 llvm::Type *Tys[2] = { Ty, InputTy };
2378 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlsl_low");
2379 }
2380 case NEON::BI__builtin_neon_vfmlal_high_f16:
2381 case NEON::BI__builtin_neon_vfmlalq_high_f16: {
2382 auto *InputTy =
2383 llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
2384 llvm::Type *Tys[2] = { Ty, InputTy };
2385 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlal_high");
2386 }
2387 case NEON::BI__builtin_neon_vfmlsl_high_f16:
2388 case NEON::BI__builtin_neon_vfmlslq_high_f16: {
2389 auto *InputTy =
2390 llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
2391 llvm::Type *Tys[2] = { Ty, InputTy };
2392 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlsl_high");
2393 }
2394 case NEON::BI__builtin_neon_vmmlaq_s32:
2395 case NEON::BI__builtin_neon_vmmlaq_u32: {
2396 auto *InputTy =
2397 llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
2398 llvm::Type *Tys[2] = { Ty, InputTy };
2399 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "vmmla");
2400 }
2401 case NEON::BI__builtin_neon_vusmmlaq_s32: {
2402 auto *InputTy =
2403 llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
2404 llvm::Type *Tys[2] = { Ty, InputTy };
2405 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vusmmla");
2406 }
2407 case NEON::BI__builtin_neon_vusdot_s32:
2408 case NEON::BI__builtin_neon_vusdotq_s32: {
2409 auto *InputTy =
2410 llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
2411 llvm::Type *Tys[2] = { Ty, InputTy };
2412 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vusdot");
2413 }
2414 case NEON::BI__builtin_neon_vbfdot_f32:
2415 case NEON::BI__builtin_neon_vbfdotq_f32: {
2416 llvm::Type *InputTy =
2417 llvm::FixedVectorType::get(BFloatTy, Ty->getPrimitiveSizeInBits() / 16);
2418 llvm::Type *Tys[2] = { Ty, InputTy };
2419 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vbfdot");
2420 }
2421 case NEON::BI__builtin_neon___a32_vcvt_bf16_f32: {
2422 llvm::Type *Tys[1] = { Ty };
2423 Function *F = CGM.getIntrinsic(Int, Tys);
2424 return EmitNeonCall(F, Ops, "vcvtfp2bf");
2425 }
2426
2427 }
2428
2429 assert(Int && "Expected valid intrinsic number");
2430
2431 // Determine the type(s) of this overloaded AArch64 intrinsic.
2432 Function *F = LookupNeonLLVMIntrinsic(Int, Modifier, Ty, E);
2433
2434 Value *Result = EmitNeonCall(F, Ops, NameHint);
2435 llvm::Type *ResultType = ConvertType(E->getType());
2436 // AArch64 intrinsic one-element vector type cast to
2437 // scalar type expected by the builtin
2438 return Builder.CreateBitCast(Result, ResultType, NameHint);
2439}
2440
2441Value *
2443 const CmpInst::Predicate Pred,
2444 const Twine &Name) {
2445
2446 if (isa<FixedVectorType>(Ty)) {
2447 // Vector types are cast to i8 vectors. Recover original type.
2448 Op = Builder.CreateBitCast(Op, Ty);
2449 }
2450
2451 if (CmpInst::isFPPredicate(Pred)) {
2452 if (Pred == CmpInst::FCMP_OEQ)
2453 Op = Builder.CreateFCmp(Pred, Op, Constant::getNullValue(Op->getType()));
2454 else
2455 Op = Builder.CreateFCmpS(Pred, Op, Constant::getNullValue(Op->getType()));
2456 } else {
2457 Op = Builder.CreateICmp(Pred, Op, Constant::getNullValue(Op->getType()));
2458 }
2459
2460 llvm::Type *ResTy = Ty;
2461 if (auto *VTy = dyn_cast<FixedVectorType>(Ty))
2462 ResTy = FixedVectorType::get(
2463 IntegerType::get(getLLVMContext(), VTy->getScalarSizeInBits()),
2464 VTy->getNumElements());
2465
2466 return Builder.CreateSExt(Op, ResTy, Name);
2467}
2468
2470 Value *ExtOp, Value *IndexOp,
2471 llvm::Type *ResTy, unsigned IntID,
2472 const char *Name) {
2474 if (ExtOp)
2475 TblOps.push_back(ExtOp);
2476
2477 // Build a vector containing sequential number like (0, 1, 2, ..., 15)
2478 SmallVector<int, 16> Indices;
2479 auto *TblTy = cast<llvm::FixedVectorType>(Ops[0]->getType());
2480 for (unsigned i = 0, e = TblTy->getNumElements(); i != e; ++i) {
2481 Indices.push_back(2*i);
2482 Indices.push_back(2*i+1);
2483 }
2484
2485 int PairPos = 0, End = Ops.size() - 1;
2486 while (PairPos < End) {
2487 TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
2488 Ops[PairPos+1], Indices,
2489 Name));
2490 PairPos += 2;
2491 }
2492
2493 // If there's an odd number of 64-bit lookup table, fill the high 64-bit
2494 // of the 128-bit lookup table with zero.
2495 if (PairPos == End) {
2496 Value *ZeroTbl = ConstantAggregateZero::get(TblTy);
2497 TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
2498 ZeroTbl, Indices, Name));
2499 }
2500
2501 Function *TblF;
2502 TblOps.push_back(IndexOp);
2503 TblF = CGF.CGM.getIntrinsic(IntID, ResTy);
2504
2505 return CGF.EmitNeonCall(TblF, TblOps, Name);
2506}
2507
2508Value *CodeGenFunction::GetValueForARMHint(unsigned BuiltinID) {
2509 unsigned Value;
2510 switch (BuiltinID) {
2511 default:
2512 return nullptr;
2513 case clang::ARM::BI__builtin_arm_nop:
2514 Value = 0;
2515 break;
2516 case clang::ARM::BI__builtin_arm_yield:
2517 case clang::ARM::BI__yield:
2518 Value = 1;
2519 break;
2520 case clang::ARM::BI__builtin_arm_wfe:
2521 case clang::ARM::BI__wfe:
2522 Value = 2;
2523 break;
2524 case clang::ARM::BI__builtin_arm_wfi:
2525 case clang::ARM::BI__wfi:
2526 Value = 3;
2527 break;
2528 case clang::ARM::BI__builtin_arm_sev:
2529 case clang::ARM::BI__sev:
2530 Value = 4;
2531 break;
2532 case clang::ARM::BI__builtin_arm_sevl:
2533 case clang::ARM::BI__sevl:
2534 Value = 5;
2535 break;
2536 }
2537
2538 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_hint),
2539 llvm::ConstantInt::get(Int32Ty, Value));
2540}
2541
2546};
2547
2548// Generates the IR for the read/write special register builtin,
2549// ValueType is the type of the value that is to be written or read,
2550// RegisterType is the type of the register being written to or read from.
2552 const CallExpr *E,
2553 llvm::Type *RegisterType,
2554 llvm::Type *ValueType,
2555 SpecialRegisterAccessKind AccessKind,
2556 StringRef SysReg = "") {
2557 // write and register intrinsics only support 32, 64 and 128 bit operations.
2558 assert((RegisterType->isIntegerTy(32) || RegisterType->isIntegerTy(64) ||
2559 RegisterType->isIntegerTy(128)) &&
2560 "Unsupported size for register.");
2561
2562 CodeGen::CGBuilderTy &Builder = CGF.Builder;
2563 CodeGen::CodeGenModule &CGM = CGF.CGM;
2564 LLVMContext &Context = CGM.getLLVMContext();
2565
2566 if (SysReg.empty()) {
2567 const Expr *SysRegStrExpr = E->getArg(0)->IgnoreParenCasts();
2568 SysReg = cast<clang::StringLiteral>(SysRegStrExpr)->getString();
2569 }
2570
2571 llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysReg) };
2572 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
2573 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
2574
2575 llvm::Type *Types[] = { RegisterType };
2576
2577 bool MixedTypes = RegisterType->isIntegerTy(64) && ValueType->isIntegerTy(32);
2578 assert(!(RegisterType->isIntegerTy(32) && ValueType->isIntegerTy(64))
2579 && "Can't fit 64-bit value in 32-bit register");
2580
2581 if (AccessKind != Write) {
2582 assert(AccessKind == NormalRead || AccessKind == VolatileRead);
2583 llvm::Function *F = CGM.getIntrinsic(
2584 AccessKind == VolatileRead ? Intrinsic::read_volatile_register
2585 : Intrinsic::read_register,
2586 Types);
2587 llvm::Value *Call = Builder.CreateCall(F, Metadata);
2588
2589 if (MixedTypes)
2590 // Read into 64 bit register and then truncate result to 32 bit.
2591 return Builder.CreateTrunc(Call, ValueType);
2592
2593 if (ValueType->isPointerTy())
2594 // Have i32/i64 result (Call) but want to return a VoidPtrTy (i8*).
2595 return Builder.CreateIntToPtr(Call, ValueType);
2596
2597 return Call;
2598 }
2599
2600 llvm::Function *F = CGM.getIntrinsic(Intrinsic::write_register, Types);
2601 llvm::Value *ArgValue = CGF.EmitScalarExpr(E->getArg(1));
2602 if (MixedTypes) {
2603 // Extend 32 bit write value to 64 bit to pass to write.
2604 ArgValue = Builder.CreateZExt(ArgValue, RegisterType);
2605 return Builder.CreateCall(F, { Metadata, ArgValue });
2606 }
2607
2608 if (ValueType->isPointerTy()) {
2609 // Have VoidPtrTy ArgValue but want to return an i32/i64.
2610 ArgValue = Builder.CreatePtrToInt(ArgValue, RegisterType);
2611 return Builder.CreateCall(F, { Metadata, ArgValue });
2612 }
2613
2614 return Builder.CreateCall(F, { Metadata, ArgValue });
2615}
2616
2617/// Return true if BuiltinID is an overloaded Neon intrinsic with an extra
2618/// argument that specifies the vector type.
2619static bool HasExtraNeonArgument(unsigned BuiltinID) {
2620 switch (BuiltinID) {
2621 default: break;
2622 case NEON::BI__builtin_neon_vget_lane_i8:
2623 case NEON::BI__builtin_neon_vget_lane_i16:
2624 case NEON::BI__builtin_neon_vget_lane_bf16:
2625 case NEON::BI__builtin_neon_vget_lane_i32:
2626 case NEON::BI__builtin_neon_vget_lane_i64:
2627 case NEON::BI__builtin_neon_vget_lane_mf8:
2628 case NEON::BI__builtin_neon_vget_lane_f32:
2629 case NEON::BI__builtin_neon_vgetq_lane_i8:
2630 case NEON::BI__builtin_neon_vgetq_lane_i16:
2631 case NEON::BI__builtin_neon_vgetq_lane_bf16:
2632 case NEON::BI__builtin_neon_vgetq_lane_i32:
2633 case NEON::BI__builtin_neon_vgetq_lane_i64:
2634 case NEON::BI__builtin_neon_vgetq_lane_mf8:
2635 case NEON::BI__builtin_neon_vgetq_lane_f32:
2636 case NEON::BI__builtin_neon_vduph_lane_bf16:
2637 case NEON::BI__builtin_neon_vduph_laneq_bf16:
2638 case NEON::BI__builtin_neon_vset_lane_i8:
2639 case NEON::BI__builtin_neon_vset_lane_mf8:
2640 case NEON::BI__builtin_neon_vset_lane_i16:
2641 case NEON::BI__builtin_neon_vset_lane_bf16:
2642 case NEON::BI__builtin_neon_vset_lane_i32:
2643 case NEON::BI__builtin_neon_vset_lane_i64:
2644 case NEON::BI__builtin_neon_vset_lane_f32:
2645 case NEON::BI__builtin_neon_vsetq_lane_i8:
2646 case NEON::BI__builtin_neon_vsetq_lane_mf8:
2647 case NEON::BI__builtin_neon_vsetq_lane_i16:
2648 case NEON::BI__builtin_neon_vsetq_lane_bf16:
2649 case NEON::BI__builtin_neon_vsetq_lane_i32:
2650 case NEON::BI__builtin_neon_vsetq_lane_i64:
2651 case NEON::BI__builtin_neon_vsetq_lane_f32:
2652 case NEON::BI__builtin_neon_vsha1h_u32:
2653 case NEON::BI__builtin_neon_vsha1cq_u32:
2654 case NEON::BI__builtin_neon_vsha1pq_u32:
2655 case NEON::BI__builtin_neon_vsha1mq_u32:
2656 case NEON::BI__builtin_neon_vcvth_bf16_f32:
2657 case clang::ARM::BI_MoveToCoprocessor:
2658 case clang::ARM::BI_MoveToCoprocessor2:
2659 return false;
2660 }
2661 return true;
2662}
2663
2665 const CallExpr *E,
2666 ReturnValueSlot ReturnValue,
2667 llvm::Triple::ArchType Arch) {
2668 if (auto Hint = GetValueForARMHint(BuiltinID))
2669 return Hint;
2670
2671 if (BuiltinID == clang::ARM::BI__emit) {
2672 bool IsThumb = getTarget().getTriple().getArch() == llvm::Triple::thumb;
2673 llvm::FunctionType *FTy =
2674 llvm::FunctionType::get(VoidTy, /*Variadic=*/false);
2675
2677 if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext()))
2678 llvm_unreachable("Sema will ensure that the parameter is constant");
2679
2680 llvm::APSInt Value = Result.Val.getInt();
2681 uint64_t ZExtValue = Value.zextOrTrunc(IsThumb ? 16 : 32).getZExtValue();
2682
2683 llvm::InlineAsm *Emit =
2684 IsThumb ? InlineAsm::get(FTy, ".inst.n 0x" + utohexstr(ZExtValue), "",
2685 /*hasSideEffects=*/true)
2686 : InlineAsm::get(FTy, ".inst 0x" + utohexstr(ZExtValue), "",
2687 /*hasSideEffects=*/true);
2688
2689 return Builder.CreateCall(Emit);
2690 }
2691
2692 if (BuiltinID == clang::ARM::BI__builtin_arm_dbg) {
2693 Value *Option = EmitScalarExpr(E->getArg(0));
2694 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_dbg), Option);
2695 }
2696
2697 if (BuiltinID == clang::ARM::BI__builtin_arm_prefetch) {
2698 Value *Address = EmitScalarExpr(E->getArg(0));
2699 Value *RW = EmitScalarExpr(E->getArg(1));
2700 Value *IsData = EmitScalarExpr(E->getArg(2));
2701
2702 // Locality is not supported on ARM target
2703 Value *Locality = llvm::ConstantInt::get(Int32Ty, 3);
2704
2705 Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
2706 return Builder.CreateCall(F, {Address, RW, Locality, IsData});
2707 }
2708
2709 if (BuiltinID == clang::ARM::BI__builtin_arm_rbit) {
2710 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
2711 return Builder.CreateCall(
2712 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
2713 }
2714
2715 if (BuiltinID == clang::ARM::BI__builtin_arm_clz ||
2716 BuiltinID == clang::ARM::BI__builtin_arm_clz64) {
2717 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
2718 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Arg->getType());
2719 Value *Res = Builder.CreateCall(F, {Arg, Builder.getInt1(false)});
2720 if (BuiltinID == clang::ARM::BI__builtin_arm_clz64)
2721 Res = Builder.CreateTrunc(Res, Builder.getInt32Ty());
2722 return Res;
2723 }
2724
2725
2726 if (BuiltinID == clang::ARM::BI__builtin_arm_cls) {
2727 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
2728 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_cls), Arg, "cls");
2729 }
2730 if (BuiltinID == clang::ARM::BI__builtin_arm_cls64) {
2731 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
2732 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_cls64), Arg,
2733 "cls");
2734 }
2735
2736 if (BuiltinID == clang::ARM::BI__clear_cache) {
2737 assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
2738 const FunctionDecl *FD = E->getDirectCallee();
2739 Value *Ops[2];
2740 for (unsigned i = 0; i < 2; i++)
2741 Ops[i] = EmitScalarExpr(E->getArg(i));
2742 llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
2743 llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
2744 StringRef Name = FD->getName();
2745 return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
2746 }
2747
2748 if (BuiltinID == clang::ARM::BI__builtin_arm_mcrr ||
2749 BuiltinID == clang::ARM::BI__builtin_arm_mcrr2) {
2750 Function *F;
2751
2752 switch (BuiltinID) {
2753 default: llvm_unreachable("unexpected builtin");
2754 case clang::ARM::BI__builtin_arm_mcrr:
2755 F = CGM.getIntrinsic(Intrinsic::arm_mcrr);
2756 break;
2757 case clang::ARM::BI__builtin_arm_mcrr2:
2758 F = CGM.getIntrinsic(Intrinsic::arm_mcrr2);
2759 break;
2760 }
2761
2762 // MCRR{2} instruction has 5 operands but
2763 // the intrinsic has 4 because Rt and Rt2
2764 // are represented as a single unsigned 64
2765 // bit integer in the intrinsic definition
2766 // but internally it's represented as 2 32
2767 // bit integers.
2768
2769 Value *Coproc = EmitScalarExpr(E->getArg(0));
2770 Value *Opc1 = EmitScalarExpr(E->getArg(1));
2771 Value *RtAndRt2 = EmitScalarExpr(E->getArg(2));
2772 Value *CRm = EmitScalarExpr(E->getArg(3));
2773
2774 Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
2775 Value *Rt = Builder.CreateTruncOrBitCast(RtAndRt2, Int32Ty);
2776 Value *Rt2 = Builder.CreateLShr(RtAndRt2, C1);
2777 Rt2 = Builder.CreateTruncOrBitCast(Rt2, Int32Ty);
2778
2779 return Builder.CreateCall(F, {Coproc, Opc1, Rt, Rt2, CRm});
2780 }
2781
2782 if (BuiltinID == clang::ARM::BI__builtin_arm_mrrc ||
2783 BuiltinID == clang::ARM::BI__builtin_arm_mrrc2) {
2784 Function *F;
2785
2786 switch (BuiltinID) {
2787 default: llvm_unreachable("unexpected builtin");
2788 case clang::ARM::BI__builtin_arm_mrrc:
2789 F = CGM.getIntrinsic(Intrinsic::arm_mrrc);
2790 break;
2791 case clang::ARM::BI__builtin_arm_mrrc2:
2792 F = CGM.getIntrinsic(Intrinsic::arm_mrrc2);
2793 break;
2794 }
2795
2796 Value *Coproc = EmitScalarExpr(E->getArg(0));
2797 Value *Opc1 = EmitScalarExpr(E->getArg(1));
2798 Value *CRm = EmitScalarExpr(E->getArg(2));
2799 Value *RtAndRt2 = Builder.CreateCall(F, {Coproc, Opc1, CRm});
2800
2801 // Returns an unsigned 64 bit integer, represented
2802 // as two 32 bit integers.
2803
2804 Value *Rt = Builder.CreateExtractValue(RtAndRt2, 1);
2805 Value *Rt1 = Builder.CreateExtractValue(RtAndRt2, 0);
2806 Rt = Builder.CreateZExt(Rt, Int64Ty);
2807 Rt1 = Builder.CreateZExt(Rt1, Int64Ty);
2808
2809 Value *ShiftCast = llvm::ConstantInt::get(Int64Ty, 32);
2810 RtAndRt2 = Builder.CreateShl(Rt, ShiftCast, "shl", true);
2811 RtAndRt2 = Builder.CreateOr(RtAndRt2, Rt1);
2812
2813 return Builder.CreateBitCast(RtAndRt2, ConvertType(E->getType()));
2814 }
2815
2816 if (BuiltinID == clang::ARM::BI__builtin_arm_ldrexd ||
2817 ((BuiltinID == clang::ARM::BI__builtin_arm_ldrex ||
2818 BuiltinID == clang::ARM::BI__builtin_arm_ldaex) &&
2819 getContext().getTypeSize(E->getType()) == 64) ||
2820 BuiltinID == clang::ARM::BI__ldrexd) {
2821 Function *F;
2822
2823 switch (BuiltinID) {
2824 default: llvm_unreachable("unexpected builtin");
2825 case clang::ARM::BI__builtin_arm_ldaex:
2826 F = CGM.getIntrinsic(Intrinsic::arm_ldaexd);
2827 break;
2828 case clang::ARM::BI__builtin_arm_ldrexd:
2829 case clang::ARM::BI__builtin_arm_ldrex:
2830 case clang::ARM::BI__ldrexd:
2831 F = CGM.getIntrinsic(Intrinsic::arm_ldrexd);
2832 break;
2833 }
2834
2835 Value *LdPtr = EmitScalarExpr(E->getArg(0));
2836 Value *Val = Builder.CreateCall(F, LdPtr, "ldrexd");
2837
2838 Value *Val0 = Builder.CreateExtractValue(Val, 1);
2839 Value *Val1 = Builder.CreateExtractValue(Val, 0);
2840 Val0 = Builder.CreateZExt(Val0, Int64Ty);
2841 Val1 = Builder.CreateZExt(Val1, Int64Ty);
2842
2843 Value *ShiftCst = llvm::ConstantInt::get(Int64Ty, 32);
2844 Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
2845 Val = Builder.CreateOr(Val, Val1);
2846 return Builder.CreateBitCast(Val, ConvertType(E->getType()));
2847 }
2848
2849 if (BuiltinID == clang::ARM::BI__builtin_arm_ldrex ||
2850 BuiltinID == clang::ARM::BI__builtin_arm_ldaex) {
2851 Value *LoadAddr = EmitScalarExpr(E->getArg(0));
2852
2853 QualType Ty = E->getType();
2854 llvm::Type *RealResTy = ConvertType(Ty);
2855 llvm::Type *IntTy =
2856 llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
2857
2859 BuiltinID == clang::ARM::BI__builtin_arm_ldaex ? Intrinsic::arm_ldaex
2860 : Intrinsic::arm_ldrex,
2861 UnqualPtrTy);
2862 CallInst *Val = Builder.CreateCall(F, LoadAddr, "ldrex");
2863 Val->addParamAttr(
2864 0, Attribute::get(getLLVMContext(), Attribute::ElementType, IntTy));
2865
2866 if (RealResTy->isPointerTy())
2867 return Builder.CreateIntToPtr(Val, RealResTy);
2868 else {
2869 llvm::Type *IntResTy = llvm::IntegerType::get(
2870 getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy));
2871 return Builder.CreateBitCast(Builder.CreateTruncOrBitCast(Val, IntResTy),
2872 RealResTy);
2873 }
2874 }
2875
2876 if (BuiltinID == clang::ARM::BI__builtin_arm_strexd ||
2877 ((BuiltinID == clang::ARM::BI__builtin_arm_stlex ||
2878 BuiltinID == clang::ARM::BI__builtin_arm_strex) &&
2879 getContext().getTypeSize(E->getArg(0)->getType()) == 64)) {
2881 BuiltinID == clang::ARM::BI__builtin_arm_stlex ? Intrinsic::arm_stlexd
2882 : Intrinsic::arm_strexd);
2883 llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty);
2884
2885 Address Tmp = CreateMemTemp(E->getArg(0)->getType());
2886 Value *Val = EmitScalarExpr(E->getArg(0));
2887 Builder.CreateStore(Val, Tmp);
2888
2889 Address LdPtr = Tmp.withElementType(STy);
2890 Val = Builder.CreateLoad(LdPtr);
2891
2892 Value *Arg0 = Builder.CreateExtractValue(Val, 0);
2893 Value *Arg1 = Builder.CreateExtractValue(Val, 1);
2894 Value *StPtr = EmitScalarExpr(E->getArg(1));
2895 return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "strexd");
2896 }
2897
2898 if (BuiltinID == clang::ARM::BI__builtin_arm_strex ||
2899 BuiltinID == clang::ARM::BI__builtin_arm_stlex) {
2900 Value *StoreVal = EmitScalarExpr(E->getArg(0));
2901 Value *StoreAddr = EmitScalarExpr(E->getArg(1));
2902
2903 QualType Ty = E->getArg(0)->getType();
2904 llvm::Type *StoreTy =
2905 llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
2906
2907 if (StoreVal->getType()->isPointerTy())
2908 StoreVal = Builder.CreatePtrToInt(StoreVal, Int32Ty);
2909 else {
2910 llvm::Type *IntTy = llvm::IntegerType::get(
2912 CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType()));
2913 StoreVal = Builder.CreateBitCast(StoreVal, IntTy);
2914 StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int32Ty);
2915 }
2916
2918 BuiltinID == clang::ARM::BI__builtin_arm_stlex ? Intrinsic::arm_stlex
2919 : Intrinsic::arm_strex,
2920 StoreAddr->getType());
2921
2922 CallInst *CI = Builder.CreateCall(F, {StoreVal, StoreAddr}, "strex");
2923 CI->addParamAttr(
2924 1, Attribute::get(getLLVMContext(), Attribute::ElementType, StoreTy));
2925 return CI;
2926 }
2927
2928 if (BuiltinID == clang::ARM::BI__builtin_arm_clrex) {
2929 Function *F = CGM.getIntrinsic(Intrinsic::arm_clrex);
2930 return Builder.CreateCall(F);
2931 }
2932
2933 // CRC32
2934 Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
2935 switch (BuiltinID) {
2936 case clang::ARM::BI__builtin_arm_crc32b:
2937 CRCIntrinsicID = Intrinsic::arm_crc32b; break;
2938 case clang::ARM::BI__builtin_arm_crc32cb:
2939 CRCIntrinsicID = Intrinsic::arm_crc32cb; break;
2940 case clang::ARM::BI__builtin_arm_crc32h:
2941 CRCIntrinsicID = Intrinsic::arm_crc32h; break;
2942 case clang::ARM::BI__builtin_arm_crc32ch:
2943 CRCIntrinsicID = Intrinsic::arm_crc32ch; break;
2944 case clang::ARM::BI__builtin_arm_crc32w:
2945 case clang::ARM::BI__builtin_arm_crc32d:
2946 CRCIntrinsicID = Intrinsic::arm_crc32w; break;
2947 case clang::ARM::BI__builtin_arm_crc32cw:
2948 case clang::ARM::BI__builtin_arm_crc32cd:
2949 CRCIntrinsicID = Intrinsic::arm_crc32cw; break;
2950 }
2951
2952 if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
2953 Value *Arg0 = EmitScalarExpr(E->getArg(0));
2954 Value *Arg1 = EmitScalarExpr(E->getArg(1));
2955
2956 // crc32{c,}d intrinsics are implemented as two calls to crc32{c,}w
2957 // intrinsics, hence we need different codegen for these cases.
2958 if (BuiltinID == clang::ARM::BI__builtin_arm_crc32d ||
2959 BuiltinID == clang::ARM::BI__builtin_arm_crc32cd) {
2960 Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
2961 Value *Arg1a = Builder.CreateTruncOrBitCast(Arg1, Int32Ty);
2962 Value *Arg1b = Builder.CreateLShr(Arg1, C1);
2963 Arg1b = Builder.CreateTruncOrBitCast(Arg1b, Int32Ty);
2964
2965 Function *F = CGM.getIntrinsic(CRCIntrinsicID);
2966 Value *Res = Builder.CreateCall(F, {Arg0, Arg1a});
2967 return Builder.CreateCall(F, {Res, Arg1b});
2968 } else {
2969 Arg1 = Builder.CreateZExtOrBitCast(Arg1, Int32Ty);
2970
2971 Function *F = CGM.getIntrinsic(CRCIntrinsicID);
2972 return Builder.CreateCall(F, {Arg0, Arg1});
2973 }
2974 }
2975
2976 if (BuiltinID == clang::ARM::BI__builtin_arm_rsr ||
2977 BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||
2978 BuiltinID == clang::ARM::BI__builtin_arm_rsrp ||
2979 BuiltinID == clang::ARM::BI__builtin_arm_wsr ||
2980 BuiltinID == clang::ARM::BI__builtin_arm_wsr64 ||
2981 BuiltinID == clang::ARM::BI__builtin_arm_wsrp) {
2982
2983 SpecialRegisterAccessKind AccessKind = Write;
2984 if (BuiltinID == clang::ARM::BI__builtin_arm_rsr ||
2985 BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||
2986 BuiltinID == clang::ARM::BI__builtin_arm_rsrp)
2987 AccessKind = VolatileRead;
2988
2989 bool IsPointerBuiltin = BuiltinID == clang::ARM::BI__builtin_arm_rsrp ||
2990 BuiltinID == clang::ARM::BI__builtin_arm_wsrp;
2991
2992 bool Is64Bit = BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||
2993 BuiltinID == clang::ARM::BI__builtin_arm_wsr64;
2994
2995 llvm::Type *ValueType;
2996 llvm::Type *RegisterType;
2997 if (IsPointerBuiltin) {
2998 ValueType = VoidPtrTy;
3000 } else if (Is64Bit) {
3001 ValueType = RegisterType = Int64Ty;
3002 } else {
3003 ValueType = RegisterType = Int32Ty;
3004 }
3005
3006 return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType,
3007 AccessKind);
3008 }
3009
3010 if (BuiltinID == ARM::BI__builtin_sponentry) {
3011 llvm::Function *F = CGM.getIntrinsic(Intrinsic::sponentry, AllocaInt8PtrTy);
3012 return Builder.CreateCall(F);
3013 }
3014
3015 // Handle MSVC intrinsics before argument evaluation to prevent double
3016 // evaluation.
3017 if (std::optional<MSVCIntrin> MsvcIntId = translateArmToMsvcIntrin(BuiltinID))
3018 return EmitMSVCBuiltinExpr(*MsvcIntId, E);
3019
3020 // Deal with MVE builtins
3021 if (Value *Result = EmitARMMVEBuiltinExpr(BuiltinID, E, ReturnValue, Arch))
3022 return Result;
3023 // Handle CDE builtins
3024 if (Value *Result = EmitARMCDEBuiltinExpr(BuiltinID, E, ReturnValue, Arch))
3025 return Result;
3026
3027 // Some intrinsics are equivalent - if they are use the base intrinsic ID.
3028 auto It = llvm::find_if(NEONEquivalentIntrinsicMap, [BuiltinID](auto &P) {
3029 return P.first == BuiltinID;
3030 });
3031 if (It != end(NEONEquivalentIntrinsicMap))
3032 BuiltinID = It->second;
3033
3034 // Find out if any arguments are required to be integer constant
3035 // expressions.
3036 unsigned ICEArguments = 0;
3038 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
3039 assert(Error == ASTContext::GE_None && "Should not codegen an error");
3040
3041 auto getAlignmentValue32 = [&](Address addr) -> Value* {
3042 return Builder.getInt32(addr.getAlignment().getQuantity());
3043 };
3044
3045 Address PtrOp0 = Address::invalid();
3046 Address PtrOp1 = Address::invalid();
3048 bool HasExtraArg = HasExtraNeonArgument(BuiltinID);
3049 unsigned NumArgs = E->getNumArgs() - (HasExtraArg ? 1 : 0);
3050 for (unsigned i = 0, e = NumArgs; i != e; i++) {
3051 if (i == 0) {
3052 switch (BuiltinID) {
3053 case NEON::BI__builtin_neon_vld1_v:
3054 case NEON::BI__builtin_neon_vld1q_v:
3055 case NEON::BI__builtin_neon_vld1q_lane_v:
3056 case NEON::BI__builtin_neon_vld1_lane_v:
3057 case NEON::BI__builtin_neon_vld1_dup_v:
3058 case NEON::BI__builtin_neon_vld1q_dup_v:
3059 case NEON::BI__builtin_neon_vst1_v:
3060 case NEON::BI__builtin_neon_vst1q_v:
3061 case NEON::BI__builtin_neon_vst1q_lane_v:
3062 case NEON::BI__builtin_neon_vst1_lane_v:
3063 case NEON::BI__builtin_neon_vst2_v:
3064 case NEON::BI__builtin_neon_vst2q_v:
3065 case NEON::BI__builtin_neon_vst2_lane_v:
3066 case NEON::BI__builtin_neon_vst2q_lane_v:
3067 case NEON::BI__builtin_neon_vst3_v:
3068 case NEON::BI__builtin_neon_vst3q_v:
3069 case NEON::BI__builtin_neon_vst3_lane_v:
3070 case NEON::BI__builtin_neon_vst3q_lane_v:
3071 case NEON::BI__builtin_neon_vst4_v:
3072 case NEON::BI__builtin_neon_vst4q_v:
3073 case NEON::BI__builtin_neon_vst4_lane_v:
3074 case NEON::BI__builtin_neon_vst4q_lane_v:
3075 // Get the alignment for the argument in addition to the value;
3076 // we'll use it later.
3077 PtrOp0 = EmitPointerWithAlignment(E->getArg(0));
3078 Ops.push_back(PtrOp0.emitRawPointer(*this));
3079 continue;
3080 }
3081 }
3082 if (i == 1) {
3083 switch (BuiltinID) {
3084 case NEON::BI__builtin_neon_vld2_v:
3085 case NEON::BI__builtin_neon_vld2q_v:
3086 case NEON::BI__builtin_neon_vld3_v:
3087 case NEON::BI__builtin_neon_vld3q_v:
3088 case NEON::BI__builtin_neon_vld4_v:
3089 case NEON::BI__builtin_neon_vld4q_v:
3090 case NEON::BI__builtin_neon_vld2_lane_v:
3091 case NEON::BI__builtin_neon_vld2q_lane_v:
3092 case NEON::BI__builtin_neon_vld3_lane_v:
3093 case NEON::BI__builtin_neon_vld3q_lane_v:
3094 case NEON::BI__builtin_neon_vld4_lane_v:
3095 case NEON::BI__builtin_neon_vld4q_lane_v:
3096 case NEON::BI__builtin_neon_vld2_dup_v:
3097 case NEON::BI__builtin_neon_vld2q_dup_v:
3098 case NEON::BI__builtin_neon_vld3_dup_v:
3099 case NEON::BI__builtin_neon_vld3q_dup_v:
3100 case NEON::BI__builtin_neon_vld4_dup_v:
3101 case NEON::BI__builtin_neon_vld4q_dup_v:
3102 // Get the alignment for the argument in addition to the value;
3103 // we'll use it later.
3104 PtrOp1 = EmitPointerWithAlignment(E->getArg(1));
3105 Ops.push_back(PtrOp1.emitRawPointer(*this));
3106 continue;
3107 }
3108 }
3109
3110 Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E));
3111 }
3112
3113 switch (BuiltinID) {
3114 default: break;
3115
3116 case NEON::BI__builtin_neon_vget_lane_i8:
3117 case NEON::BI__builtin_neon_vget_lane_i16:
3118 case NEON::BI__builtin_neon_vget_lane_i32:
3119 case NEON::BI__builtin_neon_vget_lane_i64:
3120 case NEON::BI__builtin_neon_vget_lane_bf16:
3121 case NEON::BI__builtin_neon_vget_lane_f32:
3122 case NEON::BI__builtin_neon_vgetq_lane_i8:
3123 case NEON::BI__builtin_neon_vgetq_lane_i16:
3124 case NEON::BI__builtin_neon_vgetq_lane_i32:
3125 case NEON::BI__builtin_neon_vgetq_lane_i64:
3126 case NEON::BI__builtin_neon_vgetq_lane_bf16:
3127 case NEON::BI__builtin_neon_vgetq_lane_f32:
3128 case NEON::BI__builtin_neon_vduph_lane_bf16:
3129 case NEON::BI__builtin_neon_vduph_laneq_bf16:
3130 return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane");
3131
3132 case NEON::BI__builtin_neon_vrndns_f32: {
3133 Value *Arg = EmitScalarExpr(E->getArg(0));
3134 llvm::Type *Tys[] = {Arg->getType()};
3135 Function *F = CGM.getIntrinsic(Intrinsic::roundeven, Tys);
3136 return Builder.CreateCall(F, {Arg}, "vrndn"); }
3137
3138 case NEON::BI__builtin_neon_vset_lane_i8:
3139 case NEON::BI__builtin_neon_vset_lane_i16:
3140 case NEON::BI__builtin_neon_vset_lane_i32:
3141 case NEON::BI__builtin_neon_vset_lane_i64:
3142 case NEON::BI__builtin_neon_vset_lane_bf16:
3143 case NEON::BI__builtin_neon_vset_lane_f32:
3144 case NEON::BI__builtin_neon_vsetq_lane_i8:
3145 case NEON::BI__builtin_neon_vsetq_lane_i16:
3146 case NEON::BI__builtin_neon_vsetq_lane_i32:
3147 case NEON::BI__builtin_neon_vsetq_lane_i64:
3148 case NEON::BI__builtin_neon_vsetq_lane_bf16:
3149 case NEON::BI__builtin_neon_vsetq_lane_f32:
3150 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
3151
3152 case NEON::BI__builtin_neon_vsha1h_u32:
3153 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1h), Ops,
3154 "vsha1h");
3155 case NEON::BI__builtin_neon_vsha1cq_u32:
3156 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1c), Ops,
3157 "vsha1h");
3158 case NEON::BI__builtin_neon_vsha1pq_u32:
3159 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1p), Ops,
3160 "vsha1h");
3161 case NEON::BI__builtin_neon_vsha1mq_u32:
3162 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1m), Ops,
3163 "vsha1h");
3164
3165 case NEON::BI__builtin_neon_vcvth_bf16_f32: {
3166 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vcvtbfp2bf), Ops,
3167 "vcvtbfp2bf");
3168 }
3169
3170 // The ARM _MoveToCoprocessor builtins put the input register value as
3171 // the first argument, but the LLVM intrinsic expects it as the third one.
3172 case clang::ARM::BI_MoveToCoprocessor:
3173 case clang::ARM::BI_MoveToCoprocessor2: {
3174 Function *F = CGM.getIntrinsic(BuiltinID == clang::ARM::BI_MoveToCoprocessor
3175 ? Intrinsic::arm_mcr
3176 : Intrinsic::arm_mcr2);
3177 return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0],
3178 Ops[3], Ops[4], Ops[5]});
3179 }
3180 }
3181
3182 // Get the last argument, which specifies the vector type.
3183 assert(HasExtraArg);
3184 const Expr *Arg = E->getArg(E->getNumArgs()-1);
3185 std::optional<llvm::APSInt> Result =
3187 if (!Result)
3188 return nullptr;
3189
3190 if (BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_f ||
3191 BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_d) {
3192 // Determine the overloaded type of this builtin.
3193 llvm::Type *Ty;
3194 if (BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_f)
3195 Ty = FloatTy;
3196 else
3197 Ty = DoubleTy;
3198
3199 // Determine whether this is an unsigned conversion or not.
3200 bool usgn = Result->getZExtValue() == 1;
3201 unsigned Int = usgn ? Intrinsic::arm_vcvtru : Intrinsic::arm_vcvtr;
3202
3203 // Call the appropriate intrinsic.
3204 Function *F = CGM.getIntrinsic(Int, Ty);
3205 return Builder.CreateCall(F, Ops, "vcvtr");
3206 }
3207
3208 // Determine the type of this overloaded NEON intrinsic.
3209 NeonTypeFlags Type = Result->getZExtValue();
3210 bool usgn = Type.isUnsigned();
3211 bool rightShift = false;
3212
3213 llvm::FixedVectorType *VTy =
3214 GetNeonType(this, Type, getTarget().hasFastHalfType(), false,
3215 getTarget().hasBFloat16Type());
3216 llvm::Type *Ty = VTy;
3217 if (!Ty)
3218 return nullptr;
3219
3220 // Many NEON builtins have identical semantics and uses in ARM and
3221 // AArch64. Emit these in a single function.
3222 auto IntrinsicMap = ArrayRef(ARMSIMDIntrinsicMap);
3223 const ARMVectorIntrinsicInfo *Builtin = findARMVectorIntrinsicInMap(
3224 IntrinsicMap, BuiltinID, NEONSIMDIntrinsicsProvenSorted);
3225 if (Builtin)
3227 Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
3228 Builtin->NameHint, Builtin->TypeModifier, E, Ops, PtrOp0, PtrOp1, Arch);
3229
3230 unsigned Int;
3231 switch (BuiltinID) {
3232 default: return nullptr;
3233 case NEON::BI__builtin_neon_vld1q_lane_v:
3234 // Handle 64-bit integer elements as a special case. Use shuffles of
3235 // one-element vectors to avoid poor code for i64 in the backend.
3236 if (VTy->getElementType()->isIntegerTy(64)) {
3237 // Extract the other lane.
3238 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3239 int Lane = cast<ConstantInt>(Ops[2])->getZExtValue();
3240 Value *SV = llvm::ConstantVector::get(ConstantInt::get(Int32Ty, 1-Lane));
3241 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
3242 // Load the value as a one-element vector.
3243 Ty = llvm::FixedVectorType::get(VTy->getElementType(), 1);
3244 llvm::Type *Tys[] = {Ty, Int8PtrTy};
3245 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Tys);
3246 Value *Align = getAlignmentValue32(PtrOp0);
3247 Value *Ld = Builder.CreateCall(F, {Ops[0], Align});
3248 // Combine them.
3249 int Indices[] = {1 - Lane, Lane};
3250 return Builder.CreateShuffleVector(Ops[1], Ld, Indices, "vld1q_lane");
3251 }
3252 [[fallthrough]];
3253 case NEON::BI__builtin_neon_vld1_lane_v: {
3254 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3255 PtrOp0 = PtrOp0.withElementType(VTy->getElementType());
3256 Value *Ld = Builder.CreateLoad(PtrOp0);
3257 return Builder.CreateInsertElement(Ops[1], Ld, Ops[2], "vld1_lane");
3258 }
3259 case NEON::BI__builtin_neon_vqrshrn_n_v:
3260 Int =
3261 usgn ? Intrinsic::arm_neon_vqrshiftnu : Intrinsic::arm_neon_vqrshiftns;
3262 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n",
3263 1, true);
3264 case NEON::BI__builtin_neon_vqrshrun_n_v:
3265 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrshiftnsu, Ty),
3266 Ops, "vqrshrun_n", 1, true);
3267 case NEON::BI__builtin_neon_vqshrn_n_v:
3268 Int = usgn ? Intrinsic::arm_neon_vqshiftnu : Intrinsic::arm_neon_vqshiftns;
3269 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n",
3270 1, true);
3271 case NEON::BI__builtin_neon_vqshrun_n_v:
3272 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftnsu, Ty),
3273 Ops, "vqshrun_n", 1, true);
3274 case NEON::BI__builtin_neon_vrecpe_v:
3275 case NEON::BI__builtin_neon_vrecpeq_v:
3276 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecpe, Ty),
3277 Ops, "vrecpe");
3278 case NEON::BI__builtin_neon_vrshrn_n_v:
3279 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrshiftn, Ty),
3280 Ops, "vrshrn_n", 1, true);
3281 case NEON::BI__builtin_neon_vrsra_n_v:
3282 case NEON::BI__builtin_neon_vrsraq_n_v:
3283 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3284 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3285 Ops[2] = EmitNeonShiftVector(Ops[2], Ty, true);
3286 Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts;
3287 Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Ty), {Ops[1], Ops[2]});
3288 return Builder.CreateAdd(Ops[0], Ops[1], "vrsra_n");
3289 case NEON::BI__builtin_neon_vsri_n_v:
3290 case NEON::BI__builtin_neon_vsriq_n_v:
3291 rightShift = true;
3292 [[fallthrough]];
3293 case NEON::BI__builtin_neon_vsli_n_v:
3294 case NEON::BI__builtin_neon_vsliq_n_v:
3295 Ops[2] = EmitNeonShiftVector(Ops[2], Ty, rightShift);
3296 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftins, Ty),
3297 Ops, "vsli_n");
3298 case NEON::BI__builtin_neon_vsra_n_v:
3299 case NEON::BI__builtin_neon_vsraq_n_v:
3300 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3301 Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
3302 return Builder.CreateAdd(Ops[0], Ops[1]);
3303 case NEON::BI__builtin_neon_vst1q_lane_v:
3304 // Handle 64-bit integer elements as a special case. Use a shuffle to get
3305 // a one-element vector and avoid poor code for i64 in the backend.
3306 if (VTy->getElementType()->isIntegerTy(64)) {
3307 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3308 Value *SV = llvm::ConstantVector::get(cast<llvm::Constant>(Ops[2]));
3309 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
3310 Ops[2] = getAlignmentValue32(PtrOp0);
3311 llvm::Type *Tys[] = {Int8PtrTy, Ops[1]->getType()};
3312 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1,
3313 Tys), Ops);
3314 }
3315 [[fallthrough]];
3316 case NEON::BI__builtin_neon_vst1_lane_v: {
3317 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3318 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
3319 return Builder.CreateStore(Ops[1],
3320 PtrOp0.withElementType(Ops[1]->getType()));
3321 }
3322 case NEON::BI__builtin_neon_vtbl1_v:
3323 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl1),
3324 Ops, "vtbl1");
3325 case NEON::BI__builtin_neon_vtbl2_v:
3326 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl2),
3327 Ops, "vtbl2");
3328 case NEON::BI__builtin_neon_vtbl3_v:
3329 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl3),
3330 Ops, "vtbl3");
3331 case NEON::BI__builtin_neon_vtbl4_v:
3332 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl4),
3333 Ops, "vtbl4");
3334 case NEON::BI__builtin_neon_vtbx1_v:
3335 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx1),
3336 Ops, "vtbx1");
3337 case NEON::BI__builtin_neon_vtbx2_v:
3338 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx2),
3339 Ops, "vtbx2");
3340 case NEON::BI__builtin_neon_vtbx3_v:
3341 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx3),
3342 Ops, "vtbx3");
3343 case NEON::BI__builtin_neon_vtbx4_v:
3344 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx4),
3345 Ops, "vtbx4");
3346 }
3347}
3348
3349template<typename Integer>
3351 return E->getIntegerConstantExpr(Context)->getExtValue();
3352}
3353
3354static llvm::Value *SignOrZeroExtend(CGBuilderTy &Builder, llvm::Value *V,
3355 llvm::Type *T, bool Unsigned) {
3356 // Helper function called by Tablegen-constructed ARM MVE builtin codegen,
3357 // which finds it convenient to specify signed/unsigned as a boolean flag.
3358 return Unsigned ? Builder.CreateZExt(V, T) : Builder.CreateSExt(V, T);
3359}
3360
3361static llvm::Value *MVEImmediateShr(CGBuilderTy &Builder, llvm::Value *V,
3362 uint32_t Shift, bool Unsigned) {
3363 // MVE helper function for integer shift right. This must handle signed vs
3364 // unsigned, and also deal specially with the case where the shift count is
3365 // equal to the lane size. In LLVM IR, an LShr with that parameter would be
3366 // undefined behavior, but in MVE it's legal, so we must convert it to code
3367 // that is not undefined in IR.
3368 unsigned LaneBits = cast<llvm::VectorType>(V->getType())
3369 ->getElementType()
3370 ->getPrimitiveSizeInBits();
3371 if (Shift == LaneBits) {
3372 // An unsigned shift of the full lane size always generates zero, so we can
3373 // simply emit a zero vector. A signed shift of the full lane size does the
3374 // same thing as shifting by one bit fewer.
3375 if (Unsigned)
3376 return llvm::Constant::getNullValue(V->getType());
3377 else
3378 --Shift;
3379 }
3380 return Unsigned ? Builder.CreateLShr(V, Shift) : Builder.CreateAShr(V, Shift);
3381}
3382
3383static llvm::Value *ARMMVEVectorSplat(CGBuilderTy &Builder, llvm::Value *V) {
3384 // MVE-specific helper function for a vector splat, which infers the element
3385 // count of the output vector by knowing that MVE vectors are all 128 bits
3386 // wide.
3387 unsigned Elements = 128 / V->getType()->getPrimitiveSizeInBits();
3388 return Builder.CreateVectorSplat(Elements, V);
3389}
3390
3391static llvm::Value *ARMMVEVectorReinterpret(CGBuilderTy &Builder,
3392 CodeGenFunction *CGF,
3393 llvm::Value *V,
3394 llvm::Type *DestType) {
3395 // Convert one MVE vector type into another by reinterpreting its in-register
3396 // format.
3397 //
3398 // Little-endian, this is identical to a bitcast (which reinterprets the
3399 // memory format). But big-endian, they're not necessarily the same, because
3400 // the register and memory formats map to each other differently depending on
3401 // the lane size.
3402 //
3403 // We generate a bitcast whenever we can (if we're little-endian, or if the
3404 // lane sizes are the same anyway). Otherwise we fall back to an IR intrinsic
3405 // that performs the different kind of reinterpretation.
3406 if (CGF->getTarget().isBigEndian() &&
3407 V->getType()->getScalarSizeInBits() != DestType->getScalarSizeInBits()) {
3408 return Builder.CreateCall(
3409 CGF->CGM.getIntrinsic(Intrinsic::arm_mve_vreinterpretq,
3410 {DestType, V->getType()}),
3411 V);
3412 } else {
3413 return Builder.CreateBitCast(V, DestType);
3414 }
3415}
3416
3417static llvm::Value *VectorUnzip(CGBuilderTy &Builder, llvm::Value *V, bool Odd) {
3418 // Make a shufflevector that extracts every other element of a vector (evens
3419 // or odds, as desired).
3420 SmallVector<int, 16> Indices;
3421 unsigned InputElements =
3422 cast<llvm::FixedVectorType>(V->getType())->getNumElements();
3423 for (unsigned i = 0; i < InputElements; i += 2)
3424 Indices.push_back(i + Odd);
3425 return Builder.CreateShuffleVector(V, Indices);
3426}
3427
3428static llvm::Value *VectorZip(CGBuilderTy &Builder, llvm::Value *V0,
3429 llvm::Value *V1) {
3430 // Make a shufflevector that interleaves two vectors element by element.
3431 assert(V0->getType() == V1->getType() && "Can't zip different vector types");
3432 SmallVector<int, 16> Indices;
3433 unsigned InputElements =
3434 cast<llvm::FixedVectorType>(V0->getType())->getNumElements();
3435 for (unsigned i = 0; i < InputElements; i++) {
3436 Indices.push_back(i);
3437 Indices.push_back(i + InputElements);
3438 }
3439 return Builder.CreateShuffleVector(V0, V1, Indices);
3440}
3441
3442template<unsigned HighBit, unsigned OtherBits>
3443static llvm::Value *ARMMVEConstantSplat(CGBuilderTy &Builder, llvm::Type *VT) {
3444 // MVE-specific helper function to make a vector splat of a constant such as
3445 // UINT_MAX or INT_MIN, in which all bits below the highest one are equal.
3446 llvm::Type *T = cast<llvm::VectorType>(VT)->getElementType();
3447 unsigned LaneBits = T->getPrimitiveSizeInBits();
3448 uint32_t Value = HighBit << (LaneBits - 1);
3449 if (OtherBits)
3450 Value |= (1UL << (LaneBits - 1)) - 1;
3451 llvm::Value *Lane = llvm::ConstantInt::get(T, Value);
3452 return ARMMVEVectorSplat(Builder, Lane);
3453}
3454
3455static llvm::Value *ARMMVEVectorElementReverse(CGBuilderTy &Builder,
3456 llvm::Value *V,
3457 unsigned ReverseWidth) {
3458 // MVE-specific helper function which reverses the elements of a
3459 // vector within every (ReverseWidth)-bit collection of lanes.
3460 SmallVector<int, 16> Indices;
3461 unsigned LaneSize = V->getType()->getScalarSizeInBits();
3462 unsigned Elements = 128 / LaneSize;
3463 unsigned Mask = ReverseWidth / LaneSize - 1;
3464 for (unsigned i = 0; i < Elements; i++)
3465 Indices.push_back(i ^ Mask);
3466 return Builder.CreateShuffleVector(V, Indices);
3467}
3468
3470 const CallExpr *E,
3471 ReturnValueSlot ReturnValue,
3472 llvm::Triple::ArchType Arch) {
3473 enum class CustomCodeGen { VLD24, VST24 } CustomCodeGenType;
3474 Intrinsic::ID IRIntr;
3475 unsigned NumVectors;
3476
3477 // Code autogenerated by Tablegen will handle all the simple builtins.
3478 switch (BuiltinID) {
3479 #include "clang/Basic/arm_mve_builtin_cg.inc"
3480
3481 // If we didn't match an MVE builtin id at all, go back to the
3482 // main EmitARMBuiltinExpr.
3483 default:
3484 return nullptr;
3485 }
3486
3487 // Anything that breaks from that switch is an MVE builtin that
3488 // needs handwritten code to generate.
3489
3490 switch (CustomCodeGenType) {
3491
3492 case CustomCodeGen::VLD24: {
3495
3496 auto MvecCType = E->getType();
3497 auto MvecLType = ConvertType(MvecCType);
3498 assert(MvecLType->isStructTy() &&
3499 "Return type for vld[24]q should be a struct");
3500 assert(MvecLType->getStructNumElements() == 1 &&
3501 "Return-type struct for vld[24]q should have one element");
3502 auto MvecLTypeInner = MvecLType->getStructElementType(0);
3503 assert(MvecLTypeInner->isArrayTy() &&
3504 "Return-type struct for vld[24]q should contain an array");
3505 assert(MvecLTypeInner->getArrayNumElements() == NumVectors &&
3506 "Array member of return-type struct vld[24]q has wrong length");
3507 auto VecLType = MvecLTypeInner->getArrayElementType();
3508
3509 Tys.push_back(VecLType);
3510
3511 auto Addr = E->getArg(0);
3512 Ops.push_back(EmitScalarExpr(Addr));
3513 Tys.push_back(ConvertType(Addr->getType()));
3514
3515 Function *F = CGM.getIntrinsic(IRIntr, ArrayRef(Tys));
3516 Value *LoadResult = Builder.CreateCall(F, Ops);
3517 Value *MvecOut = PoisonValue::get(MvecLType);
3518 for (unsigned i = 0; i < NumVectors; ++i) {
3519 Value *Vec = Builder.CreateExtractValue(LoadResult, i);
3520 MvecOut = Builder.CreateInsertValue(MvecOut, Vec, {0, i});
3521 }
3522
3523 if (ReturnValue.isNull())
3524 return MvecOut;
3525 else
3526 return Builder.CreateStore(MvecOut, ReturnValue.getAddress());
3527 }
3528
3529 case CustomCodeGen::VST24: {
3532
3533 auto Addr = E->getArg(0);
3534 Ops.push_back(EmitScalarExpr(Addr));
3535 Tys.push_back(ConvertType(Addr->getType()));
3536
3537 auto MvecCType = E->getArg(1)->getType();
3538 auto MvecLType = ConvertType(MvecCType);
3539 assert(MvecLType->isStructTy() && "Data type for vst2q should be a struct");
3540 assert(MvecLType->getStructNumElements() == 1 &&
3541 "Data-type struct for vst2q should have one element");
3542 auto MvecLTypeInner = MvecLType->getStructElementType(0);
3543 assert(MvecLTypeInner->isArrayTy() &&
3544 "Data-type struct for vst2q should contain an array");
3545 assert(MvecLTypeInner->getArrayNumElements() == NumVectors &&
3546 "Array member of return-type struct vld[24]q has wrong length");
3547 auto VecLType = MvecLTypeInner->getArrayElementType();
3548
3549 Tys.push_back(VecLType);
3550
3551 AggValueSlot MvecSlot = CreateAggTemp(MvecCType);
3552 EmitAggExpr(E->getArg(1), MvecSlot);
3553 auto Mvec = Builder.CreateLoad(MvecSlot.getAddress());
3554 for (unsigned i = 0; i < NumVectors; i++)
3555 Ops.push_back(Builder.CreateExtractValue(Mvec, {0, i}));
3556
3557 Function *F = CGM.getIntrinsic(IRIntr, ArrayRef(Tys));
3558 Value *ToReturn = nullptr;
3559 for (unsigned i = 0; i < NumVectors; i++) {
3560 Ops.push_back(llvm::ConstantInt::get(Int32Ty, i));
3561 ToReturn = Builder.CreateCall(F, Ops);
3562 Ops.pop_back();
3563 }
3564 return ToReturn;
3565 }
3566 }
3567 llvm_unreachable("unknown custom codegen type.");
3568}
3569
3571 const CallExpr *E,
3572 ReturnValueSlot ReturnValue,
3573 llvm::Triple::ArchType Arch) {
3574 switch (BuiltinID) {
3575 default:
3576 return nullptr;
3577#include "clang/Basic/arm_cde_builtin_cg.inc"
3578 }
3579}
3580
3581static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID,
3582 const CallExpr *E,
3584 llvm::Triple::ArchType Arch) {
3585 unsigned int Int = 0;
3586 const char *s = nullptr;
3587
3588 switch (BuiltinID) {
3589 default:
3590 return nullptr;
3591 case NEON::BI__builtin_neon_vtbl1_v:
3592 case NEON::BI__builtin_neon_vqtbl1_v:
3593 case NEON::BI__builtin_neon_vqtbl1q_v:
3594 case NEON::BI__builtin_neon_vtbl2_v:
3595 case NEON::BI__builtin_neon_vqtbl2_v:
3596 case NEON::BI__builtin_neon_vqtbl2q_v:
3597 case NEON::BI__builtin_neon_vtbl3_v:
3598 case NEON::BI__builtin_neon_vqtbl3_v:
3599 case NEON::BI__builtin_neon_vqtbl3q_v:
3600 case NEON::BI__builtin_neon_vtbl4_v:
3601 case NEON::BI__builtin_neon_vqtbl4_v:
3602 case NEON::BI__builtin_neon_vqtbl4q_v:
3603 break;
3604 case NEON::BI__builtin_neon_vtbx1_v:
3605 case NEON::BI__builtin_neon_vqtbx1_v:
3606 case NEON::BI__builtin_neon_vqtbx1q_v:
3607 case NEON::BI__builtin_neon_vtbx2_v:
3608 case NEON::BI__builtin_neon_vqtbx2_v:
3609 case NEON::BI__builtin_neon_vqtbx2q_v:
3610 case NEON::BI__builtin_neon_vtbx3_v:
3611 case NEON::BI__builtin_neon_vqtbx3_v:
3612 case NEON::BI__builtin_neon_vqtbx3q_v:
3613 case NEON::BI__builtin_neon_vtbx4_v:
3614 case NEON::BI__builtin_neon_vqtbx4_v:
3615 case NEON::BI__builtin_neon_vqtbx4q_v:
3616 break;
3617 }
3618
3619 assert(E->getNumArgs() >= 3);
3620
3621 // Get the last argument, which specifies the vector type.
3622 const Expr *Arg = E->getArg(E->getNumArgs() - 1);
3623 std::optional<llvm::APSInt> Result =
3625 if (!Result)
3626 return nullptr;
3627
3628 // Determine the type of this overloaded NEON intrinsic.
3629 NeonTypeFlags Type = Result->getZExtValue();
3630 llvm::FixedVectorType *Ty = GetNeonType(&CGF, Type);
3631 if (!Ty)
3632 return nullptr;
3633
3634 CodeGen::CGBuilderTy &Builder = CGF.Builder;
3635
3636 // AArch64 scalar builtins are not overloaded, they do not have an extra
3637 // argument that specifies the vector type, need to handle each case.
3638 switch (BuiltinID) {
3639 case NEON::BI__builtin_neon_vtbl1_v: {
3640 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 1), nullptr, Ops[1],
3641 Ty, Intrinsic::aarch64_neon_tbl1, "vtbl1");
3642 }
3643 case NEON::BI__builtin_neon_vtbl2_v: {
3644 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 2), nullptr, Ops[2],
3645 Ty, Intrinsic::aarch64_neon_tbl1, "vtbl1");
3646 }
3647 case NEON::BI__builtin_neon_vtbl3_v: {
3648 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 3), nullptr, Ops[3],
3649 Ty, Intrinsic::aarch64_neon_tbl2, "vtbl2");
3650 }
3651 case NEON::BI__builtin_neon_vtbl4_v: {
3652 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 4), nullptr, Ops[4],
3653 Ty, Intrinsic::aarch64_neon_tbl2, "vtbl2");
3654 }
3655 case NEON::BI__builtin_neon_vtbx1_v: {
3656 Value *TblRes =
3657 packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 1), nullptr, Ops[2], Ty,
3658 Intrinsic::aarch64_neon_tbl1, "vtbl1");
3659
3660 llvm::Constant *EightV = ConstantInt::get(Ty, 8);
3661 Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[2], EightV);
3662 CmpRes = Builder.CreateSExt(CmpRes, Ty);
3663
3664 Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
3665 Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
3666 return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
3667 }
3668 case NEON::BI__builtin_neon_vtbx2_v: {
3669 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 2), Ops[0], Ops[3],
3670 Ty, Intrinsic::aarch64_neon_tbx1, "vtbx1");
3671 }
3672 case NEON::BI__builtin_neon_vtbx3_v: {
3673 Value *TblRes =
3674 packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 3), nullptr, Ops[4], Ty,
3675 Intrinsic::aarch64_neon_tbl2, "vtbl2");
3676
3677 llvm::Constant *TwentyFourV = ConstantInt::get(Ty, 24);
3678 Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[4],
3679 TwentyFourV);
3680 CmpRes = Builder.CreateSExt(CmpRes, Ty);
3681
3682 Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
3683 Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
3684 return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
3685 }
3686 case NEON::BI__builtin_neon_vtbx4_v: {
3687 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 4), Ops[0], Ops[5],
3688 Ty, Intrinsic::aarch64_neon_tbx2, "vtbx2");
3689 }
3690 case NEON::BI__builtin_neon_vqtbl1_v:
3691 case NEON::BI__builtin_neon_vqtbl1q_v:
3692 Int = Intrinsic::aarch64_neon_tbl1; s = "vtbl1"; break;
3693 case NEON::BI__builtin_neon_vqtbl2_v:
3694 case NEON::BI__builtin_neon_vqtbl2q_v: {
3695 Int = Intrinsic::aarch64_neon_tbl2; s = "vtbl2"; break;
3696 case NEON::BI__builtin_neon_vqtbl3_v:
3697 case NEON::BI__builtin_neon_vqtbl3q_v:
3698 Int = Intrinsic::aarch64_neon_tbl3; s = "vtbl3"; break;
3699 case NEON::BI__builtin_neon_vqtbl4_v:
3700 case NEON::BI__builtin_neon_vqtbl4q_v:
3701 Int = Intrinsic::aarch64_neon_tbl4; s = "vtbl4"; break;
3702 case NEON::BI__builtin_neon_vqtbx1_v:
3703 case NEON::BI__builtin_neon_vqtbx1q_v:
3704 Int = Intrinsic::aarch64_neon_tbx1; s = "vtbx1"; break;
3705 case NEON::BI__builtin_neon_vqtbx2_v:
3706 case NEON::BI__builtin_neon_vqtbx2q_v:
3707 Int = Intrinsic::aarch64_neon_tbx2; s = "vtbx2"; break;
3708 case NEON::BI__builtin_neon_vqtbx3_v:
3709 case NEON::BI__builtin_neon_vqtbx3q_v:
3710 Int = Intrinsic::aarch64_neon_tbx3; s = "vtbx3"; break;
3711 case NEON::BI__builtin_neon_vqtbx4_v:
3712 case NEON::BI__builtin_neon_vqtbx4q_v:
3713 Int = Intrinsic::aarch64_neon_tbx4; s = "vtbx4"; break;
3714 }
3715 }
3716
3717 if (!Int)
3718 return nullptr;
3719
3720 Function *F = CGF.CGM.getIntrinsic(Int, Ty);
3721 return CGF.EmitNeonCall(F, Ops, s);
3722}
3723
3725 auto *VTy = llvm::FixedVectorType::get(Int16Ty, 4);
3726 Op = Builder.CreateBitCast(Op, Int16Ty);
3727 Value *V = PoisonValue::get(VTy);
3728 llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
3729 Op = Builder.CreateInsertElement(V, Op, CI);
3730 return Op;
3731}
3732
3733/// SVEBuiltinMemEltTy - Returns the memory element type for this memory
3734/// access builtin. Only required if it can't be inferred from the base pointer
3735/// operand.
3737 switch (TypeFlags.getMemEltType()) {
3738 case SVETypeFlags::MemEltTyDefault:
3739 return getEltType(TypeFlags);
3740 case SVETypeFlags::MemEltTyInt8:
3741 return Builder.getInt8Ty();
3742 case SVETypeFlags::MemEltTyInt16:
3743 return Builder.getInt16Ty();
3744 case SVETypeFlags::MemEltTyInt32:
3745 return Builder.getInt32Ty();
3746 case SVETypeFlags::MemEltTyInt64:
3747 return Builder.getInt64Ty();
3748 }
3749 llvm_unreachable("Unknown MemEltType");
3750}
3751
3752llvm::Type *CodeGenFunction::getEltType(const SVETypeFlags &TypeFlags) {
3753 switch (TypeFlags.getEltType()) {
3754 default:
3755 llvm_unreachable("Invalid SVETypeFlag!");
3756
3757 case SVETypeFlags::EltTyMFloat8:
3758 case SVETypeFlags::EltTyInt8:
3759 return Builder.getInt8Ty();
3760 case SVETypeFlags::EltTyInt16:
3761 return Builder.getInt16Ty();
3762 case SVETypeFlags::EltTyInt32:
3763 return Builder.getInt32Ty();
3764 case SVETypeFlags::EltTyInt64:
3765 return Builder.getInt64Ty();
3766 case SVETypeFlags::EltTyInt128:
3767 return Builder.getInt128Ty();
3768
3769 case SVETypeFlags::EltTyFloat16:
3770 return Builder.getHalfTy();
3771 case SVETypeFlags::EltTyFloat32:
3772 return Builder.getFloatTy();
3773 case SVETypeFlags::EltTyFloat64:
3774 return Builder.getDoubleTy();
3775
3776 case SVETypeFlags::EltTyBFloat16:
3777 return Builder.getBFloatTy();
3778
3779 case SVETypeFlags::EltTyBool8:
3780 case SVETypeFlags::EltTyBool16:
3781 case SVETypeFlags::EltTyBool32:
3782 case SVETypeFlags::EltTyBool64:
3783 return Builder.getInt1Ty();
3784 }
3785}
3786
3787// Return the llvm predicate vector type corresponding to the specified element
3788// TypeFlags.
3789llvm::ScalableVectorType *
3791 switch (TypeFlags.getEltType()) {
3792 default: llvm_unreachable("Unhandled SVETypeFlag!");
3793
3794 case SVETypeFlags::EltTyInt8:
3795 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16);
3796 case SVETypeFlags::EltTyInt16:
3797 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
3798 case SVETypeFlags::EltTyInt32:
3799 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
3800 case SVETypeFlags::EltTyInt64:
3801 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
3802
3803 case SVETypeFlags::EltTyBFloat16:
3804 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
3805 case SVETypeFlags::EltTyFloat16:
3806 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
3807 case SVETypeFlags::EltTyFloat32:
3808 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
3809 case SVETypeFlags::EltTyFloat64:
3810 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
3811
3812 case SVETypeFlags::EltTyBool8:
3813 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16);
3814 case SVETypeFlags::EltTyBool16:
3815 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
3816 case SVETypeFlags::EltTyBool32:
3817 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
3818 case SVETypeFlags::EltTyBool64:
3819 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
3820 }
3821}
3822
3823// Return the llvm vector type corresponding to the specified element TypeFlags.
3824llvm::ScalableVectorType *
3826 switch (TypeFlags.getEltType()) {
3827 default:
3828 llvm_unreachable("Invalid SVETypeFlag!");
3829
3830 case SVETypeFlags::EltTyInt8:
3831 return llvm::ScalableVectorType::get(Builder.getInt8Ty(), 16);
3832 case SVETypeFlags::EltTyInt16:
3833 return llvm::ScalableVectorType::get(Builder.getInt16Ty(), 8);
3834 case SVETypeFlags::EltTyInt32:
3835 return llvm::ScalableVectorType::get(Builder.getInt32Ty(), 4);
3836 case SVETypeFlags::EltTyInt64:
3837 return llvm::ScalableVectorType::get(Builder.getInt64Ty(), 2);
3838
3839 case SVETypeFlags::EltTyMFloat8:
3840 return llvm::ScalableVectorType::get(Builder.getInt8Ty(), 16);
3841 case SVETypeFlags::EltTyFloat16:
3842 return llvm::ScalableVectorType::get(Builder.getHalfTy(), 8);
3843 case SVETypeFlags::EltTyBFloat16:
3844 return llvm::ScalableVectorType::get(Builder.getBFloatTy(), 8);
3845 case SVETypeFlags::EltTyFloat32:
3846 return llvm::ScalableVectorType::get(Builder.getFloatTy(), 4);
3847 case SVETypeFlags::EltTyFloat64:
3848 return llvm::ScalableVectorType::get(Builder.getDoubleTy(), 2);
3849
3850 case SVETypeFlags::EltTyBool8:
3851 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16);
3852 case SVETypeFlags::EltTyBool16:
3853 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
3854 case SVETypeFlags::EltTyBool32:
3855 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
3856 case SVETypeFlags::EltTyBool64:
3857 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
3858 }
3859}
3860
3861llvm::Value *
3863 Function *Ptrue =
3864 CGM.getIntrinsic(Intrinsic::aarch64_sve_ptrue, getSVEPredType(TypeFlags));
3865 return Builder.CreateCall(Ptrue, {Builder.getInt32(/*SV_ALL*/ 31)});
3866}
3867
3868constexpr unsigned SVEBitsPerBlock = 128;
3869
3870static llvm::ScalableVectorType *getSVEVectorForElementType(llvm::Type *EltTy) {
3871 unsigned NumElts = SVEBitsPerBlock / EltTy->getScalarSizeInBits();
3872 return llvm::ScalableVectorType::get(EltTy, NumElts);
3873}
3874
3875// Reinterpret the input predicate so that it can be used to correctly isolate
3876// the elements of the specified datatype.
3878 llvm::ScalableVectorType *VTy) {
3879
3880 if (isa<TargetExtType>(Pred->getType()) &&
3881 cast<TargetExtType>(Pred->getType())->getName() == "aarch64.svcount")
3882 return Pred;
3883
3884 auto *RTy = llvm::VectorType::get(IntegerType::get(getLLVMContext(), 1), VTy);
3885 if (Pred->getType() == RTy)
3886 return Pred;
3887
3888 unsigned IntID;
3889 llvm::Type *IntrinsicTy;
3890 switch (VTy->getMinNumElements()) {
3891 default:
3892 llvm_unreachable("unsupported element count!");
3893 case 1:
3894 case 2:
3895 case 4:
3896 case 8:
3897 IntID = Intrinsic::aarch64_sve_convert_from_svbool;
3898 IntrinsicTy = RTy;
3899 break;
3900 case 16:
3901 IntID = Intrinsic::aarch64_sve_convert_to_svbool;
3902 IntrinsicTy = Pred->getType();
3903 break;
3904 }
3905
3906 Function *F = CGM.getIntrinsic(IntID, IntrinsicTy);
3907 Value *C = Builder.CreateCall(F, Pred);
3908 assert(C->getType() == RTy && "Unexpected return type!");
3909 return C;
3910}
3911
3913 llvm::StructType *Ty) {
3914 if (PredTuple->getType() == Ty)
3915 return PredTuple;
3916
3917 Value *Ret = llvm::PoisonValue::get(Ty);
3918 for (unsigned I = 0; I < Ty->getNumElements(); ++I) {
3919 Value *Pred = Builder.CreateExtractValue(PredTuple, I);
3920 Pred = EmitSVEPredicateCast(
3921 Pred, cast<llvm::ScalableVectorType>(Ty->getTypeAtIndex(I)));
3922 Ret = Builder.CreateInsertValue(Ret, Pred, I);
3923 }
3924
3925 return Ret;
3926}
3927
3930 unsigned IntID) {
3931 auto *ResultTy = getSVEType(TypeFlags);
3932 auto *OverloadedTy =
3933 llvm::ScalableVectorType::get(SVEBuiltinMemEltTy(TypeFlags), ResultTy);
3934
3935 Function *F = nullptr;
3936 if (Ops[1]->getType()->isVectorTy())
3937 // This is the "vector base, scalar offset" case. In order to uniquely
3938 // map this built-in to an LLVM IR intrinsic, we need both the return type
3939 // and the type of the vector base.
3940 F = CGM.getIntrinsic(IntID, {OverloadedTy, Ops[1]->getType()});
3941 else
3942 // This is the "scalar base, vector offset case". The type of the offset
3943 // is encoded in the name of the intrinsic. We only need to specify the
3944 // return type in order to uniquely map this built-in to an LLVM IR
3945 // intrinsic.
3946 F = CGM.getIntrinsic(IntID, OverloadedTy);
3947
3948 // At the ACLE level there's only one predicate type, svbool_t, which is
3949 // mapped to <n x 16 x i1>. However, this might be incompatible with the
3950 // actual type being loaded. For example, when loading doubles (i64) the
3951 // predicate should be <n x 2 x i1> instead. At the IR level the type of
3952 // the predicate and the data being loaded must match. Cast to the type
3953 // expected by the intrinsic. The intrinsic itself should be defined in
3954 // a way than enforces relations between parameter types.
3955 Ops[0] = EmitSVEPredicateCast(
3956 Ops[0], cast<llvm::ScalableVectorType>(F->getArg(0)->getType()));
3957
3958 // Pass 0 when the offset is missing. This can only be applied when using
3959 // the "vector base" addressing mode for which ACLE allows no offset. The
3960 // corresponding LLVM IR always requires an offset.
3961 if (Ops.size() == 2) {
3962 assert(Ops[1]->getType()->isVectorTy() && "Scalar base requires an offset");
3963 Ops.push_back(ConstantInt::get(Int64Ty, 0));
3964 }
3965
3966 // For "vector base, scalar index" scale the index so that it becomes a
3967 // scalar offset.
3968 if (!TypeFlags.isByteIndexed() && Ops[1]->getType()->isVectorTy()) {
3969 unsigned BytesPerElt =
3970 OverloadedTy->getElementType()->getScalarSizeInBits() / 8;
3971 Ops[2] = Builder.CreateShl(Ops[2], Log2_32(BytesPerElt));
3972 }
3973
3974 Value *Call = Builder.CreateCall(F, Ops);
3975
3976 // The following sext/zext is only needed when ResultTy != OverloadedTy. In
3977 // other cases it's folded into a nop.
3978 return TypeFlags.isZExtReturn() ? Builder.CreateZExt(Call, ResultTy)
3979 : Builder.CreateSExt(Call, ResultTy);
3980}
3981
3984 unsigned IntID) {
3985 auto *SrcDataTy = getSVEType(TypeFlags);
3986 auto *OverloadedTy =
3987 llvm::ScalableVectorType::get(SVEBuiltinMemEltTy(TypeFlags), SrcDataTy);
3988
3989 // In ACLE the source data is passed in the last argument, whereas in LLVM IR
3990 // it's the first argument. Move it accordingly.
3991 Ops.insert(Ops.begin(), Ops.pop_back_val());
3992
3993 Function *F = nullptr;
3994 if (Ops[2]->getType()->isVectorTy())
3995 // This is the "vector base, scalar offset" case. In order to uniquely
3996 // map this built-in to an LLVM IR intrinsic, we need both the return type
3997 // and the type of the vector base.
3998 F = CGM.getIntrinsic(IntID, {OverloadedTy, Ops[2]->getType()});
3999 else
4000 // This is the "scalar base, vector offset case". The type of the offset
4001 // is encoded in the name of the intrinsic. We only need to specify the
4002 // return type in order to uniquely map this built-in to an LLVM IR
4003 // intrinsic.
4004 F = CGM.getIntrinsic(IntID, OverloadedTy);
4005
4006 // Pass 0 when the offset is missing. This can only be applied when using
4007 // the "vector base" addressing mode for which ACLE allows no offset. The
4008 // corresponding LLVM IR always requires an offset.
4009 if (Ops.size() == 3) {
4010 assert(Ops[1]->getType()->isVectorTy() && "Scalar base requires an offset");
4011 Ops.push_back(ConstantInt::get(Int64Ty, 0));
4012 }
4013
4014 // Truncation is needed when SrcDataTy != OverloadedTy. In other cases it's
4015 // folded into a nop.
4016 Ops[0] = Builder.CreateTrunc(Ops[0], OverloadedTy);
4017
4018 // At the ACLE level there's only one predicate type, svbool_t, which is
4019 // mapped to <n x 16 x i1>. However, this might be incompatible with the
4020 // actual type being stored. For example, when storing doubles (i64) the
4021 // predicated should be <n x 2 x i1> instead. At the IR level the type of
4022 // the predicate and the data being stored must match. Cast to the type
4023 // expected by the intrinsic. The intrinsic itself should be defined in
4024 // a way that enforces relations between parameter types.
4025 Ops[1] = EmitSVEPredicateCast(
4026 Ops[1], cast<llvm::ScalableVectorType>(F->getArg(1)->getType()));
4027
4028 // For "vector base, scalar index" scale the index so that it becomes a
4029 // scalar offset.
4030 if (!TypeFlags.isByteIndexed() && Ops[2]->getType()->isVectorTy()) {
4031 unsigned BytesPerElt =
4032 OverloadedTy->getElementType()->getScalarSizeInBits() / 8;
4033 Ops[3] = Builder.CreateShl(Ops[3], Log2_32(BytesPerElt));
4034 }
4035
4036 return Builder.CreateCall(F, Ops);
4037}
4038
4041 unsigned IntID) {
4042 // The gather prefetches are overloaded on the vector input - this can either
4043 // be the vector of base addresses or vector of offsets.
4044 auto *OverloadedTy = dyn_cast<llvm::ScalableVectorType>(Ops[1]->getType());
4045 if (!OverloadedTy)
4046 OverloadedTy = cast<llvm::ScalableVectorType>(Ops[2]->getType());
4047
4048 // Cast the predicate from svbool_t to the right number of elements.
4049 Ops[0] = EmitSVEPredicateCast(Ops[0], OverloadedTy);
4050
4051 // vector + imm addressing modes
4052 if (Ops[1]->getType()->isVectorTy()) {
4053 if (Ops.size() == 3) {
4054 // Pass 0 for 'vector+imm' when the index is omitted.
4055 Ops.push_back(ConstantInt::get(Int64Ty, 0));
4056
4057 // The sv_prfop is the last operand in the builtin and IR intrinsic.
4058 std::swap(Ops[2], Ops[3]);
4059 } else {
4060 // Index needs to be passed as scaled offset.
4061 llvm::Type *MemEltTy = SVEBuiltinMemEltTy(TypeFlags);
4062 unsigned BytesPerElt = MemEltTy->getPrimitiveSizeInBits() / 8;
4063 if (BytesPerElt > 1)
4064 Ops[2] = Builder.CreateShl(Ops[2], Log2_32(BytesPerElt));
4065 }
4066 }
4067
4068 Function *F = CGM.getIntrinsic(IntID, OverloadedTy);
4069 return Builder.CreateCall(F, Ops);
4070}
4071
4074 unsigned IntID) {
4075 llvm::ScalableVectorType *VTy = getSVEType(TypeFlags);
4076 Value *Predicate = EmitSVEPredicateCast(Ops[0], VTy);
4077 Value *BasePtr = Ops[1];
4078
4079 // Does the load have an offset?
4080 if (Ops.size() > 2)
4081 BasePtr = Builder.CreateGEP(VTy, BasePtr, Ops[2]);
4082
4083 Function *F = CGM.getIntrinsic(IntID, {VTy});
4084 return Builder.CreateCall(F, {Predicate, BasePtr});
4085}
4086
4089 unsigned IntID) {
4090 llvm::ScalableVectorType *VTy = getSVEType(TypeFlags);
4091
4092 unsigned N;
4093 switch (IntID) {
4094 case Intrinsic::aarch64_sve_st2:
4095 case Intrinsic::aarch64_sve_st1_pn_x2:
4096 case Intrinsic::aarch64_sve_stnt1_pn_x2:
4097 case Intrinsic::aarch64_sve_st2q:
4098 N = 2;
4099 break;
4100 case Intrinsic::aarch64_sve_st3:
4101 case Intrinsic::aarch64_sve_st3q:
4102 N = 3;
4103 break;
4104 case Intrinsic::aarch64_sve_st4:
4105 case Intrinsic::aarch64_sve_st1_pn_x4:
4106 case Intrinsic::aarch64_sve_stnt1_pn_x4:
4107 case Intrinsic::aarch64_sve_st4q:
4108 N = 4;
4109 break;
4110 default:
4111 llvm_unreachable("unknown intrinsic!");
4112 }
4113
4114 Value *Predicate = EmitSVEPredicateCast(Ops[0], VTy);
4115 Value *BasePtr = Ops[1];
4116
4117 // Does the store have an offset?
4118 if (Ops.size() > (2 + N))
4119 BasePtr = Builder.CreateGEP(VTy, BasePtr, Ops[2]);
4120
4121 // The llvm.aarch64.sve.st2/3/4 intrinsics take legal part vectors, so we
4122 // need to break up the tuple vector.
4124 for (unsigned I = Ops.size() - N; I < Ops.size(); ++I)
4125 Operands.push_back(Ops[I]);
4126 Operands.append({Predicate, BasePtr});
4127 Function *F = CGM.getIntrinsic(IntID, { VTy });
4128
4129 return Builder.CreateCall(F, Operands);
4130}
4131
4132// SVE2's svpmullb and svpmullt builtins are similar to the svpmullb_pair and
4133// svpmullt_pair intrinsics, with the exception that their results are bitcast
4134// to a wider type.
4137 unsigned BuiltinID) {
4138 // Splat scalar operand to vector (intrinsics with _n infix)
4139 if (TypeFlags.hasSplatOperand()) {
4140 unsigned OpNo = TypeFlags.getSplatOperand();
4141 Ops[OpNo] = EmitSVEDupX(Ops[OpNo]);
4142 }
4143
4144 // The pair-wise function has a narrower overloaded type.
4145 Function *F = CGM.getIntrinsic(BuiltinID, Ops[0]->getType());
4146 Value *Call = Builder.CreateCall(F, {Ops[0], Ops[1]});
4147
4148 // Now bitcast to the wider result type.
4149 llvm::ScalableVectorType *Ty = getSVEType(TypeFlags);
4150 return EmitSVEReinterpret(Call, Ty);
4151}
4152
4154 ArrayRef<Value *> Ops, unsigned BuiltinID) {
4155 llvm::Type *OverloadedTy = getSVEType(TypeFlags);
4156 Function *F = CGM.getIntrinsic(BuiltinID, OverloadedTy);
4157 return Builder.CreateCall(F, {Ops[0], Builder.getInt32(0)});
4158}
4159
4162 unsigned BuiltinID) {
4163 auto *MemEltTy = SVEBuiltinMemEltTy(TypeFlags);
4164 auto *VectorTy = getSVEVectorForElementType(MemEltTy);
4165 auto *MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy);
4166
4167 Value *Predicate = EmitSVEPredicateCast(Ops[0], MemoryTy);
4168 Value *BasePtr = Ops[1];
4169
4170 // Implement the index operand if not omitted.
4171 if (Ops.size() > 3)
4172 BasePtr = Builder.CreateGEP(MemoryTy, BasePtr, Ops[2]);
4173
4174 Value *PrfOp = Ops.back();
4175
4176 Function *F = CGM.getIntrinsic(BuiltinID, Predicate->getType());
4177 return Builder.CreateCall(F, {Predicate, BasePtr, PrfOp});
4178}
4179
4181 llvm::Type *ReturnTy,
4183 unsigned IntrinsicID,
4184 bool IsZExtReturn) {
4185 QualType LangPTy = E->getArg(1)->getType();
4186 llvm::Type *MemEltTy = CGM.getTypes().ConvertType(
4187 LangPTy->castAs<PointerType>()->getPointeeType());
4188
4189 // Mfloat8 types is stored as a vector, so extra work
4190 // to extract sclar element type is necessary.
4191 if (MemEltTy->isVectorTy()) {
4192 assert(MemEltTy == FixedVectorType::get(Int8Ty, 1) &&
4193 "Only <1 x i8> expected");
4194 MemEltTy = cast<llvm::VectorType>(MemEltTy)->getElementType();
4195 }
4196
4197 // The vector type that is returned may be different from the
4198 // eventual type loaded from memory.
4199 auto VectorTy = cast<llvm::ScalableVectorType>(ReturnTy);
4200 llvm::ScalableVectorType *MemoryTy = nullptr;
4201 llvm::ScalableVectorType *PredTy = nullptr;
4202 bool IsQuadLoad = false;
4203 switch (IntrinsicID) {
4204 case Intrinsic::aarch64_sve_ld1uwq:
4205 case Intrinsic::aarch64_sve_ld1udq:
4206 MemoryTy = llvm::ScalableVectorType::get(MemEltTy, 1);
4207 PredTy = llvm::ScalableVectorType::get(
4208 llvm::Type::getInt1Ty(getLLVMContext()), 1);
4209 IsQuadLoad = true;
4210 break;
4211 default:
4212 MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy);
4213 PredTy = MemoryTy;
4214 break;
4215 }
4216
4217 Value *Predicate = EmitSVEPredicateCast(Ops[0], PredTy);
4218 Value *BasePtr = Ops[1];
4219
4220 // Does the load have an offset?
4221 if (Ops.size() > 2)
4222 BasePtr = Builder.CreateGEP(MemoryTy, BasePtr, Ops[2]);
4223
4224 Function *F = CGM.getIntrinsic(IntrinsicID, IsQuadLoad ? VectorTy : MemoryTy);
4225 auto *Load =
4226 cast<llvm::Instruction>(Builder.CreateCall(F, {Predicate, BasePtr}));
4227 auto TBAAInfo = CGM.getTBAAAccessInfo(LangPTy->getPointeeType());
4228 CGM.DecorateInstructionWithTBAA(Load, TBAAInfo);
4229
4230 if (IsQuadLoad)
4231 return Load;
4232
4233 return IsZExtReturn ? Builder.CreateZExt(Load, VectorTy)
4234 : Builder.CreateSExt(Load, VectorTy);
4235}
4236
4239 unsigned IntrinsicID) {
4240 QualType LangPTy = E->getArg(1)->getType();
4241 llvm::Type *MemEltTy = CGM.getTypes().ConvertType(
4242 LangPTy->castAs<PointerType>()->getPointeeType());
4243
4244 // Mfloat8 types is stored as a vector, so extra work
4245 // to extract sclar element type is necessary.
4246 if (MemEltTy->isVectorTy()) {
4247 assert(MemEltTy == FixedVectorType::get(Int8Ty, 1) &&
4248 "Only <1 x i8> expected");
4249 MemEltTy = cast<llvm::VectorType>(MemEltTy)->getElementType();
4250 }
4251
4252 // The vector type that is stored may be different from the
4253 // eventual type stored to memory.
4254 auto VectorTy = cast<llvm::ScalableVectorType>(Ops.back()->getType());
4255 auto MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy);
4256
4257 auto PredTy = MemoryTy;
4258 auto AddrMemoryTy = MemoryTy;
4259 bool IsQuadStore = false;
4260
4261 switch (IntrinsicID) {
4262 case Intrinsic::aarch64_sve_st1wq:
4263 case Intrinsic::aarch64_sve_st1dq:
4264 AddrMemoryTy = llvm::ScalableVectorType::get(MemEltTy, 1);
4265 PredTy =
4266 llvm::ScalableVectorType::get(IntegerType::get(getLLVMContext(), 1), 1);
4267 IsQuadStore = true;
4268 break;
4269 default:
4270 break;
4271 }
4272 Value *Predicate = EmitSVEPredicateCast(Ops[0], PredTy);
4273 Value *BasePtr = Ops[1];
4274
4275 // Does the store have an offset?
4276 if (Ops.size() == 4)
4277 BasePtr = Builder.CreateGEP(AddrMemoryTy, BasePtr, Ops[2]);
4278
4279 // Last value is always the data
4280 Value *Val =
4281 IsQuadStore ? Ops.back() : Builder.CreateTrunc(Ops.back(), MemoryTy);
4282
4283 Function *F =
4284 CGM.getIntrinsic(IntrinsicID, IsQuadStore ? VectorTy : MemoryTy);
4285 auto *Store =
4286 cast<llvm::Instruction>(Builder.CreateCall(F, {Val, Predicate, BasePtr}));
4287 auto TBAAInfo = CGM.getTBAAAccessInfo(LangPTy->getPointeeType());
4288 CGM.DecorateInstructionWithTBAA(Store, TBAAInfo);
4289 return Store;
4290}
4291
4294 unsigned IntID) {
4295 Ops[2] = EmitSVEPredicateCast(
4297
4298 SmallVector<Value *> NewOps;
4299 NewOps.push_back(Ops[2]);
4300
4301 llvm::Value *BasePtr = Ops[3];
4302 llvm::Value *RealSlice = Ops[1];
4303 // If the intrinsic contains the vnum parameter, multiply it with the vector
4304 // size in bytes.
4305 if (Ops.size() == 5) {
4306 Function *StreamingVectorLength =
4307 CGM.getIntrinsic(Intrinsic::aarch64_sme_cntsb);
4308 llvm::Value *StreamingVectorLengthCall =
4309 Builder.CreateCall(StreamingVectorLength);
4310 llvm::Value *Mulvl =
4311 Builder.CreateMul(StreamingVectorLengthCall, Ops[4], "mulvl");
4312 // The type of the ptr parameter is void *, so use Int8Ty here.
4313 BasePtr = Builder.CreateGEP(Int8Ty, Ops[3], Mulvl);
4314 RealSlice = Builder.CreateZExt(RealSlice, Int64Ty);
4315 RealSlice = Builder.CreateAdd(RealSlice, Ops[4]);
4316 RealSlice = Builder.CreateTrunc(RealSlice, Int32Ty);
4317 }
4318 NewOps.push_back(BasePtr);
4319 NewOps.push_back(Ops[0]);
4320 NewOps.push_back(RealSlice);
4321 Function *F = CGM.getIntrinsic(IntID);
4322 return Builder.CreateCall(F, NewOps);
4323}
4324
4327 unsigned IntID) {
4328 auto *VecTy = getSVEType(TypeFlags);
4329 Function *F = CGM.getIntrinsic(IntID, VecTy);
4330 if (TypeFlags.isReadZA())
4331 Ops[1] = EmitSVEPredicateCast(Ops[1], VecTy);
4332 else if (TypeFlags.isWriteZA())
4333 Ops[2] = EmitSVEPredicateCast(Ops[2], VecTy);
4334 return Builder.CreateCall(F, Ops);
4335}
4336
4339 unsigned IntID) {
4340 // svzero_za() intrinsic zeros the entire za tile and has no paramters.
4341 if (Ops.size() == 0)
4342 Ops.push_back(llvm::ConstantInt::get(Int32Ty, 255));
4343 Function *F = CGM.getIntrinsic(IntID, {});
4344 return Builder.CreateCall(F, Ops);
4345}
4346
4349 unsigned IntID) {
4350 if (Ops.size() == 2)
4351 Ops.push_back(Builder.getInt32(0));
4352 else
4353 Ops[2] = Builder.CreateIntCast(Ops[2], Int32Ty, true);
4354 Function *F = CGM.getIntrinsic(IntID, {});
4355 return Builder.CreateCall(F, Ops);
4356}
4357
4358// Limit the usage of scalable llvm IR generated by the ACLE by using the
4359// sve dup.x intrinsic instead of IRBuilder::CreateVectorSplat.
4360Value *CodeGenFunction::EmitSVEDupX(Value *Scalar, llvm::Type *Ty) {
4361 return Builder.CreateVectorSplat(
4362 cast<llvm::VectorType>(Ty)->getElementCount(), Scalar);
4363}
4364
4366 if (auto *Ty = Scalar->getType(); Ty->isVectorTy()) {
4367#ifndef NDEBUG
4368 auto *VecTy = cast<llvm::VectorType>(Ty);
4369 ElementCount EC = VecTy->getElementCount();
4370 assert(EC.isScalar() && VecTy->getElementType() == Int8Ty &&
4371 "Only <1 x i8> expected");
4372#endif
4373 Scalar = Builder.CreateExtractElement(Scalar, uint64_t(0));
4374 }
4375 return EmitSVEDupX(Scalar, getSVEVectorForElementType(Scalar->getType()));
4376}
4377
4379 // FIXME: For big endian this needs an additional REV, or needs a separate
4380 // intrinsic that is code-generated as a no-op, because the LLVM bitcast
4381 // instruction is defined as 'bitwise' equivalent from memory point of
4382 // view (when storing/reloading), whereas the svreinterpret builtin
4383 // implements bitwise equivalent cast from register point of view.
4384 // LLVM CodeGen for a bitcast must add an explicit REV for big-endian.
4385
4386 if (auto *StructTy = dyn_cast<StructType>(Ty)) {
4387 Value *Tuple = llvm::PoisonValue::get(Ty);
4388
4389 for (unsigned I = 0; I < StructTy->getNumElements(); ++I) {
4390 Value *In = Builder.CreateExtractValue(Val, I);
4391 Value *Out = Builder.CreateBitCast(In, StructTy->getTypeAtIndex(I));
4392 Tuple = Builder.CreateInsertValue(Tuple, Out, I);
4393 }
4394
4395 return Tuple;
4396 }
4397
4398 return Builder.CreateBitCast(Val, Ty);
4399}
4400
4401static void InsertExplicitZeroOperand(CGBuilderTy &Builder, llvm::Type *Ty,
4403 auto *SplatZero = Constant::getNullValue(Ty);
4404 Ops.insert(Ops.begin(), SplatZero);
4405}
4406
4407static void InsertExplicitUndefOperand(CGBuilderTy &Builder, llvm::Type *Ty,
4409 auto *SplatUndef = UndefValue::get(Ty);
4410 Ops.insert(Ops.begin(), SplatUndef);
4411}
4412
4415 llvm::Type *ResultType,
4416 ArrayRef<Value *> Ops) {
4417 if (TypeFlags.isOverloadNone())
4418 return {};
4419
4420 llvm::Type *DefaultType = getSVEType(TypeFlags);
4421
4422 if (TypeFlags.isOverloadWhileOrMultiVecCvt())
4423 return {DefaultType, Ops[1]->getType()};
4424
4425 if (TypeFlags.isOverloadWhileRW())
4426 return {getSVEPredType(TypeFlags), Ops[0]->getType()};
4427
4428 if (TypeFlags.isOverloadCvt())
4429 return {Ops[0]->getType(), Ops.back()->getType()};
4430
4431 if (TypeFlags.isReductionQV() && !ResultType->isScalableTy() &&
4432 ResultType->isVectorTy())
4433 return {ResultType, Ops[1]->getType()};
4434
4435 assert(TypeFlags.isOverloadDefault() && "Unexpected value for overloads");
4436 return {DefaultType};
4437}
4438
4440 ArrayRef<Value *> Ops) {
4441 assert((TypeFlags.isTupleSet() || TypeFlags.isTupleGet()) &&
4442 "Expects TypleFlags.isTupleSet() or TypeFlags.isTupleGet()");
4443 unsigned Idx = cast<ConstantInt>(Ops[1])->getZExtValue();
4444
4445 if (TypeFlags.isTupleSet())
4446 return Builder.CreateInsertValue(Ops[0], Ops[2], Idx);
4447 return Builder.CreateExtractValue(Ops[0], Idx);
4448}
4449
4451 llvm::Type *Ty,
4452 ArrayRef<Value *> Ops) {
4453 assert(TypeFlags.isTupleCreate() && "Expects TypleFlag isTupleCreate");
4454
4455 Value *Tuple = llvm::PoisonValue::get(Ty);
4456 for (unsigned Idx = 0; Idx < Ops.size(); Idx++)
4457 Tuple = Builder.CreateInsertValue(Tuple, Ops[Idx], Idx);
4458
4459 return Tuple;
4460}
4461
4463 unsigned BuiltinID, const CallExpr *E, SmallVectorImpl<Value *> &Ops,
4464 SVETypeFlags TypeFlags) {
4465 // Find out if any arguments are required to be integer constant expressions.
4466 unsigned ICEArguments = 0;
4468 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
4469 assert(Error == ASTContext::GE_None && "Should not codegen an error");
4470
4471 // Tuple set/get only requires one insert/extract vector, which is
4472 // created by EmitSVETupleSetOrGet.
4473 bool IsTupleGetOrSet = TypeFlags.isTupleSet() || TypeFlags.isTupleGet();
4474
4475 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
4476 bool IsICE = ICEArguments & (1 << i);
4477 Value *Arg = EmitScalarExpr(E->getArg(i));
4478
4479 if (IsICE) {
4480 // If this is required to be a constant, constant fold it so that we know
4481 // that the generated intrinsic gets a ConstantInt.
4482 std::optional<llvm::APSInt> Result =
4483 E->getArg(i)->getIntegerConstantExpr(getContext());
4484 assert(Result && "Expected argument to be a constant");
4485
4486 // Immediates for SVE llvm intrinsics are always 32bit. We can safely
4487 // truncate because the immediate has been range checked and no valid
4488 // immediate requires more than a handful of bits.
4489 *Result = Result->extOrTrunc(32);
4490 Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), *Result));
4491 continue;
4492 }
4493
4494 if (isa<StructType>(Arg->getType()) && !IsTupleGetOrSet) {
4495 for (unsigned I = 0; I < Arg->getType()->getStructNumElements(); ++I)
4496 Ops.push_back(Builder.CreateExtractValue(Arg, I));
4497
4498 continue;
4499 }
4500
4501 Ops.push_back(Arg);
4502 }
4503}
4504
4506 const CallExpr *E) {
4507 llvm::Type *Ty = ConvertType(E->getType());
4508 if (BuiltinID >= SVE::BI__builtin_sve_reinterpret_s8_s8 &&
4509 BuiltinID <= SVE::BI__builtin_sve_reinterpret_f64_f64_x4) {
4510 Value *Val = EmitScalarExpr(E->getArg(0));
4511 return EmitSVEReinterpret(Val, Ty);
4512 }
4513
4516
4518 SVETypeFlags TypeFlags(Builtin->TypeModifier);
4519 GetAArch64SVEProcessedOperands(BuiltinID, E, Ops, TypeFlags);
4520
4521 if (TypeFlags.isLoad())
4522 return EmitSVEMaskedLoad(E, Ty, Ops, Builtin->LLVMIntrinsic,
4523 TypeFlags.isZExtReturn());
4524 else if (TypeFlags.isStore())
4525 return EmitSVEMaskedStore(E, Ops, Builtin->LLVMIntrinsic);
4526 else if (TypeFlags.isGatherLoad())
4527 return EmitSVEGatherLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic);
4528 else if (TypeFlags.isScatterStore())
4529 return EmitSVEScatterStore(TypeFlags, Ops, Builtin->LLVMIntrinsic);
4530 else if (TypeFlags.isPrefetch())
4531 return EmitSVEPrefetchLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic);
4532 else if (TypeFlags.isGatherPrefetch())
4533 return EmitSVEGatherPrefetch(TypeFlags, Ops, Builtin->LLVMIntrinsic);
4534 else if (TypeFlags.isStructLoad())
4535 return EmitSVEStructLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic);
4536 else if (TypeFlags.isStructStore())
4537 return EmitSVEStructStore(TypeFlags, Ops, Builtin->LLVMIntrinsic);
4538 else if (TypeFlags.isTupleSet() || TypeFlags.isTupleGet())
4539 return EmitSVETupleSetOrGet(TypeFlags, Ops);
4540 else if (TypeFlags.isTupleCreate())
4541 return EmitSVETupleCreate(TypeFlags, Ty, Ops);
4542 else if (TypeFlags.isUndef())
4543 return UndefValue::get(Ty);
4544 else if (Builtin->LLVMIntrinsic != 0) {
4545 // Emit set FPMR for intrinsics that require it
4546 if (TypeFlags.setsFPMR())
4547 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_set_fpmr),
4548 Ops.pop_back_val());
4549 if (TypeFlags.getMergeType() == SVETypeFlags::MergeZeroExp)
4551
4552 if (TypeFlags.getMergeType() == SVETypeFlags::MergeAnyExp)
4554
4555 // Some ACLE builtins leave out the argument to specify the predicate
4556 // pattern, which is expected to be expanded to an SV_ALL pattern.
4557 if (TypeFlags.isAppendSVALL())
4558 Ops.push_back(Builder.getInt32(/*SV_ALL*/ 31));
4559 if (TypeFlags.isInsertOp1SVALL())
4560 Ops.insert(&Ops[1], Builder.getInt32(/*SV_ALL*/ 31));
4561
4562 // Predicates must match the main datatype.
4563 for (Value *&Op : Ops)
4564 if (auto PredTy = dyn_cast<llvm::VectorType>(Op->getType()))
4565 if (PredTy->getElementType()->isIntegerTy(1))
4566 Op = EmitSVEPredicateCast(Op, getSVEType(TypeFlags));
4567
4568 // Splat scalar operand to vector (intrinsics with _n infix)
4569 if (TypeFlags.hasSplatOperand()) {
4570 unsigned OpNo = TypeFlags.getSplatOperand();
4571 Ops[OpNo] = EmitSVEDupX(Ops[OpNo]);
4572 }
4573
4574 if (TypeFlags.isReverseCompare())
4575 std::swap(Ops[1], Ops[2]);
4576 else if (TypeFlags.isReverseUSDOT())
4577 std::swap(Ops[1], Ops[2]);
4578 else if (TypeFlags.isReverseMergeAnyBinOp() &&
4579 TypeFlags.getMergeType() == SVETypeFlags::MergeAny)
4580 std::swap(Ops[1], Ops[2]);
4581 else if (TypeFlags.isReverseMergeAnyAccOp() &&
4582 TypeFlags.getMergeType() == SVETypeFlags::MergeAny)
4583 std::swap(Ops[1], Ops[3]);
4584
4585 // Predicated intrinsics with _z suffix need a select w/ zeroinitializer.
4586 if (TypeFlags.getMergeType() == SVETypeFlags::MergeZero) {
4587 llvm::Type *OpndTy = Ops[1]->getType();
4588 auto *SplatZero = Constant::getNullValue(OpndTy);
4589 Ops[1] = Builder.CreateSelect(Ops[0], Ops[1], SplatZero);
4590 }
4591
4592 Function *F = CGM.getIntrinsic(Builtin->LLVMIntrinsic,
4593 getSVEOverloadTypes(TypeFlags, Ty, Ops));
4594 Value *Call = Builder.CreateCall(F, Ops);
4595
4596 if (Call->getType() == Ty)
4597 return Call;
4598
4599 // Predicate results must be converted to svbool_t.
4600 if (auto PredTy = dyn_cast<llvm::ScalableVectorType>(Ty))
4601 return EmitSVEPredicateCast(Call, PredTy);
4602 if (auto PredTupleTy = dyn_cast<llvm::StructType>(Ty))
4603 return EmitSVEPredicateTupleCast(Call, PredTupleTy);
4604
4605 llvm_unreachable("unsupported element count!");
4606 }
4607
4608 switch (BuiltinID) {
4609 default:
4610 return nullptr;
4611
4612 case SVE::BI__builtin_sve_svreinterpret_b: {
4613 auto SVCountTy =
4614 llvm::TargetExtType::get(getLLVMContext(), "aarch64.svcount");
4615 Function *CastFromSVCountF =
4616 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool, SVCountTy);
4617 return Builder.CreateCall(CastFromSVCountF, Ops[0]);
4618 }
4619 case SVE::BI__builtin_sve_svreinterpret_c: {
4620 auto SVCountTy =
4621 llvm::TargetExtType::get(getLLVMContext(), "aarch64.svcount");
4622 Function *CastToSVCountF =
4623 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool, SVCountTy);
4624 return Builder.CreateCall(CastToSVCountF, Ops[0]);
4625 }
4626
4627 case SVE::BI__builtin_sve_svpsel_lane_b8:
4628 case SVE::BI__builtin_sve_svpsel_lane_b16:
4629 case SVE::BI__builtin_sve_svpsel_lane_b32:
4630 case SVE::BI__builtin_sve_svpsel_lane_b64:
4631 case SVE::BI__builtin_sve_svpsel_lane_c8:
4632 case SVE::BI__builtin_sve_svpsel_lane_c16:
4633 case SVE::BI__builtin_sve_svpsel_lane_c32:
4634 case SVE::BI__builtin_sve_svpsel_lane_c64: {
4635 bool IsSVCount = isa<TargetExtType>(Ops[0]->getType());
4636 assert(((!IsSVCount || cast<TargetExtType>(Ops[0]->getType())->getName() ==
4637 "aarch64.svcount")) &&
4638 "Unexpected TargetExtType");
4639 auto SVCountTy =
4640 llvm::TargetExtType::get(getLLVMContext(), "aarch64.svcount");
4641 Function *CastFromSVCountF =
4642 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool, SVCountTy);
4643 Function *CastToSVCountF =
4644 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool, SVCountTy);
4645
4646 auto OverloadedTy = getSVEType(SVETypeFlags(Builtin->TypeModifier));
4647 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_psel, OverloadedTy);
4648 llvm::Value *Ops0 =
4649 IsSVCount ? Builder.CreateCall(CastFromSVCountF, Ops[0]) : Ops[0];
4650 llvm::Value *Ops1 = EmitSVEPredicateCast(Ops[1], OverloadedTy);
4651 llvm::Value *PSel = Builder.CreateCall(F, {Ops0, Ops1, Ops[2]});
4652 return IsSVCount ? Builder.CreateCall(CastToSVCountF, PSel) : PSel;
4653 }
4654 case SVE::BI__builtin_sve_svmov_b_z: {
4655 // svmov_b_z(pg, op) <=> svand_b_z(pg, op, op)
4656 SVETypeFlags TypeFlags(Builtin->TypeModifier);
4657 llvm::Type* OverloadedTy = getSVEType(TypeFlags);
4658 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_and_z, OverloadedTy);
4659 return Builder.CreateCall(F, {Ops[0], Ops[1], Ops[1]});
4660 }
4661
4662 case SVE::BI__builtin_sve_svnot_b_z: {
4663 // svnot_b_z(pg, op) <=> sveor_b_z(pg, op, pg)
4664 SVETypeFlags TypeFlags(Builtin->TypeModifier);
4665 llvm::Type* OverloadedTy = getSVEType(TypeFlags);
4666 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_eor_z, OverloadedTy);
4667 return Builder.CreateCall(F, {Ops[0], Ops[1], Ops[0]});
4668 }
4669
4670 case SVE::BI__builtin_sve_svmovlb_u16:
4671 case SVE::BI__builtin_sve_svmovlb_u32:
4672 case SVE::BI__builtin_sve_svmovlb_u64:
4673 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_ushllb);
4674
4675 case SVE::BI__builtin_sve_svmovlb_s16:
4676 case SVE::BI__builtin_sve_svmovlb_s32:
4677 case SVE::BI__builtin_sve_svmovlb_s64:
4678 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_sshllb);
4679
4680 case SVE::BI__builtin_sve_svmovlt_u16:
4681 case SVE::BI__builtin_sve_svmovlt_u32:
4682 case SVE::BI__builtin_sve_svmovlt_u64:
4683 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_ushllt);
4684
4685 case SVE::BI__builtin_sve_svmovlt_s16:
4686 case SVE::BI__builtin_sve_svmovlt_s32:
4687 case SVE::BI__builtin_sve_svmovlt_s64:
4688 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_sshllt);
4689
4690 case SVE::BI__builtin_sve_svpmullt_u16:
4691 case SVE::BI__builtin_sve_svpmullt_u64:
4692 case SVE::BI__builtin_sve_svpmullt_n_u16:
4693 case SVE::BI__builtin_sve_svpmullt_n_u64:
4694 return EmitSVEPMull(TypeFlags, Ops, Intrinsic::aarch64_sve_pmullt_pair);
4695
4696 case SVE::BI__builtin_sve_svpmullb_u16:
4697 case SVE::BI__builtin_sve_svpmullb_u64:
4698 case SVE::BI__builtin_sve_svpmullb_n_u16:
4699 case SVE::BI__builtin_sve_svpmullb_n_u64:
4700 return EmitSVEPMull(TypeFlags, Ops, Intrinsic::aarch64_sve_pmullb_pair);
4701
4702 case SVE::BI__builtin_sve_svdup_n_b8:
4703 case SVE::BI__builtin_sve_svdup_n_b16:
4704 case SVE::BI__builtin_sve_svdup_n_b32:
4705 case SVE::BI__builtin_sve_svdup_n_b64: {
4706 Value *CmpNE =
4707 Builder.CreateICmpNE(Ops[0], Constant::getNullValue(Ops[0]->getType()));
4708 llvm::ScalableVectorType *OverloadedTy = getSVEType(TypeFlags);
4709 Value *Dup = EmitSVEDupX(CmpNE, OverloadedTy);
4710 return EmitSVEPredicateCast(Dup, cast<llvm::ScalableVectorType>(Ty));
4711 }
4712
4713 case SVE::BI__builtin_sve_svdupq_n_b8:
4714 case SVE::BI__builtin_sve_svdupq_n_b16:
4715 case SVE::BI__builtin_sve_svdupq_n_b32:
4716 case SVE::BI__builtin_sve_svdupq_n_b64:
4717 case SVE::BI__builtin_sve_svdupq_n_u8:
4718 case SVE::BI__builtin_sve_svdupq_n_s8:
4719 case SVE::BI__builtin_sve_svdupq_n_u64:
4720 case SVE::BI__builtin_sve_svdupq_n_f64:
4721 case SVE::BI__builtin_sve_svdupq_n_s64:
4722 case SVE::BI__builtin_sve_svdupq_n_u16:
4723 case SVE::BI__builtin_sve_svdupq_n_f16:
4724 case SVE::BI__builtin_sve_svdupq_n_bf16:
4725 case SVE::BI__builtin_sve_svdupq_n_s16:
4726 case SVE::BI__builtin_sve_svdupq_n_u32:
4727 case SVE::BI__builtin_sve_svdupq_n_f32:
4728 case SVE::BI__builtin_sve_svdupq_n_s32: {
4729 // These builtins are implemented by storing each element to an array and using
4730 // ld1rq to materialize a vector.
4731 unsigned NumOpnds = Ops.size();
4732
4733 bool IsBoolTy =
4734 cast<llvm::VectorType>(Ty)->getElementType()->isIntegerTy(1);
4735
4736 // For svdupq_n_b* the element type of is an integer of type 128/numelts,
4737 // so that the compare can use the width that is natural for the expected
4738 // number of predicate lanes.
4739 llvm::Type *EltTy = Ops[0]->getType();
4740 if (IsBoolTy)
4741 EltTy = IntegerType::get(getLLVMContext(), SVEBitsPerBlock / NumOpnds);
4742
4744 for (unsigned I = 0; I < NumOpnds; ++I)
4745 VecOps.push_back(Builder.CreateZExt(Ops[I], EltTy));
4746 Value *Vec = BuildVector(VecOps);
4747
4748 llvm::Type *OverloadedTy = getSVEVectorForElementType(EltTy);
4749 Value *InsertSubVec = Builder.CreateInsertVector(
4750 OverloadedTy, PoisonValue::get(OverloadedTy), Vec, uint64_t(0));
4751
4752 Function *F =
4753 CGM.getIntrinsic(Intrinsic::aarch64_sve_dupq_lane, OverloadedTy);
4754 Value *DupQLane =
4755 Builder.CreateCall(F, {InsertSubVec, Builder.getInt64(0)});
4756
4757 if (!IsBoolTy)
4758 return DupQLane;
4759
4760 SVETypeFlags TypeFlags(Builtin->TypeModifier);
4761 Value *Pred = EmitSVEAllTruePred(TypeFlags);
4762
4763 // For svdupq_n_b* we need to add an additional 'cmpne' with '0'.
4764 F = CGM.getIntrinsic(NumOpnds == 2 ? Intrinsic::aarch64_sve_cmpne
4765 : Intrinsic::aarch64_sve_cmpne_wide,
4766 OverloadedTy);
4767 Value *Call = Builder.CreateCall(
4768 F, {Pred, DupQLane, EmitSVEDupX(Builder.getInt64(0))});
4769 return EmitSVEPredicateCast(Call, cast<llvm::ScalableVectorType>(Ty));
4770 }
4771
4772 case SVE::BI__builtin_sve_svpfalse_b:
4773 return ConstantInt::getFalse(Ty);
4774
4775 case SVE::BI__builtin_sve_svpfalse_c: {
4776 auto SVBoolTy = ScalableVectorType::get(Builder.getInt1Ty(), 16);
4777 Function *CastToSVCountF =
4778 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool, Ty);
4779 return Builder.CreateCall(CastToSVCountF, ConstantInt::getFalse(SVBoolTy));
4780 }
4781
4782 case SVE::BI__builtin_sve_svlen_bf16:
4783 case SVE::BI__builtin_sve_svlen_f16:
4784 case SVE::BI__builtin_sve_svlen_f32:
4785 case SVE::BI__builtin_sve_svlen_f64:
4786 case SVE::BI__builtin_sve_svlen_s8:
4787 case SVE::BI__builtin_sve_svlen_s16:
4788 case SVE::BI__builtin_sve_svlen_s32:
4789 case SVE::BI__builtin_sve_svlen_s64:
4790 case SVE::BI__builtin_sve_svlen_u8:
4791 case SVE::BI__builtin_sve_svlen_u16:
4792 case SVE::BI__builtin_sve_svlen_u32:
4793 case SVE::BI__builtin_sve_svlen_u64: {
4794 SVETypeFlags TF(Builtin->TypeModifier);
4795 return Builder.CreateElementCount(Ty, getSVEType(TF)->getElementCount());
4796 }
4797
4798 case SVE::BI__builtin_sve_svtbl2_u8:
4799 case SVE::BI__builtin_sve_svtbl2_s8:
4800 case SVE::BI__builtin_sve_svtbl2_u16:
4801 case SVE::BI__builtin_sve_svtbl2_s16:
4802 case SVE::BI__builtin_sve_svtbl2_u32:
4803 case SVE::BI__builtin_sve_svtbl2_s32:
4804 case SVE::BI__builtin_sve_svtbl2_u64:
4805 case SVE::BI__builtin_sve_svtbl2_s64:
4806 case SVE::BI__builtin_sve_svtbl2_f16:
4807 case SVE::BI__builtin_sve_svtbl2_bf16:
4808 case SVE::BI__builtin_sve_svtbl2_f32:
4809 case SVE::BI__builtin_sve_svtbl2_f64: {
4810 SVETypeFlags TF(Builtin->TypeModifier);
4811 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_tbl2, getSVEType(TF));
4812 return Builder.CreateCall(F, Ops);
4813 }
4814
4815 case SVE::BI__builtin_sve_svset_neonq_s8:
4816 case SVE::BI__builtin_sve_svset_neonq_s16:
4817 case SVE::BI__builtin_sve_svset_neonq_s32:
4818 case SVE::BI__builtin_sve_svset_neonq_s64:
4819 case SVE::BI__builtin_sve_svset_neonq_u8:
4820 case SVE::BI__builtin_sve_svset_neonq_u16:
4821 case SVE::BI__builtin_sve_svset_neonq_u32:
4822 case SVE::BI__builtin_sve_svset_neonq_u64:
4823 case SVE::BI__builtin_sve_svset_neonq_f16:
4824 case SVE::BI__builtin_sve_svset_neonq_f32:
4825 case SVE::BI__builtin_sve_svset_neonq_f64:
4826 case SVE::BI__builtin_sve_svset_neonq_bf16: {
4827 return Builder.CreateInsertVector(Ty, Ops[0], Ops[1], uint64_t(0));
4828 }
4829
4830 case SVE::BI__builtin_sve_svget_neonq_s8:
4831 case SVE::BI__builtin_sve_svget_neonq_s16:
4832 case SVE::BI__builtin_sve_svget_neonq_s32:
4833 case SVE::BI__builtin_sve_svget_neonq_s64:
4834 case SVE::BI__builtin_sve_svget_neonq_u8:
4835 case SVE::BI__builtin_sve_svget_neonq_u16:
4836 case SVE::BI__builtin_sve_svget_neonq_u32:
4837 case SVE::BI__builtin_sve_svget_neonq_u64:
4838 case SVE::BI__builtin_sve_svget_neonq_f16:
4839 case SVE::BI__builtin_sve_svget_neonq_f32:
4840 case SVE::BI__builtin_sve_svget_neonq_f64:
4841 case SVE::BI__builtin_sve_svget_neonq_bf16: {
4842 return Builder.CreateExtractVector(Ty, Ops[0], uint64_t(0));
4843 }
4844
4845 case SVE::BI__builtin_sve_svdup_neonq_s8:
4846 case SVE::BI__builtin_sve_svdup_neonq_s16:
4847 case SVE::BI__builtin_sve_svdup_neonq_s32:
4848 case SVE::BI__builtin_sve_svdup_neonq_s64:
4849 case SVE::BI__builtin_sve_svdup_neonq_u8:
4850 case SVE::BI__builtin_sve_svdup_neonq_u16:
4851 case SVE::BI__builtin_sve_svdup_neonq_u32:
4852 case SVE::BI__builtin_sve_svdup_neonq_u64:
4853 case SVE::BI__builtin_sve_svdup_neonq_f16:
4854 case SVE::BI__builtin_sve_svdup_neonq_f32:
4855 case SVE::BI__builtin_sve_svdup_neonq_f64:
4856 case SVE::BI__builtin_sve_svdup_neonq_bf16: {
4857 Value *Insert = Builder.CreateInsertVector(Ty, PoisonValue::get(Ty), Ops[0],
4858 uint64_t(0));
4859 return Builder.CreateIntrinsic(Intrinsic::aarch64_sve_dupq_lane, {Ty},
4860 {Insert, Builder.getInt64(0)});
4861 }
4862 }
4863
4864 /// Should not happen
4865 return nullptr;
4866}
4867
4868static void swapCommutativeSMEOperands(unsigned BuiltinID,
4870 unsigned MultiVec;
4871 switch (BuiltinID) {
4872 default:
4873 return;
4874 case SME::BI__builtin_sme_svsumla_za32_s8_vg4x1:
4875 MultiVec = 1;
4876 break;
4877 case SME::BI__builtin_sme_svsumla_za32_s8_vg4x2:
4878 case SME::BI__builtin_sme_svsudot_za32_s8_vg1x2:
4879 MultiVec = 2;
4880 break;
4881 case SME::BI__builtin_sme_svsudot_za32_s8_vg1x4:
4882 case SME::BI__builtin_sme_svsumla_za32_s8_vg4x4:
4883 MultiVec = 4;
4884 break;
4885 }
4886
4887 if (MultiVec > 0)
4888 for (unsigned I = 0; I < MultiVec; ++I)
4889 std::swap(Ops[I + 1], Ops[I + 1 + MultiVec]);
4890}
4891
4893 const CallExpr *E) {
4896
4898 SVETypeFlags TypeFlags(Builtin->TypeModifier);
4899 GetAArch64SVEProcessedOperands(BuiltinID, E, Ops, TypeFlags);
4900
4901 if (TypeFlags.isLoad() || TypeFlags.isStore())
4902 return EmitSMELd1St1(TypeFlags, Ops, Builtin->LLVMIntrinsic);
4903 else if (TypeFlags.isReadZA() || TypeFlags.isWriteZA())
4904 return EmitSMEReadWrite(TypeFlags, Ops, Builtin->LLVMIntrinsic);
4905 else if (BuiltinID == SME::BI__builtin_sme_svzero_mask_za ||
4906 BuiltinID == SME::BI__builtin_sme_svzero_za)
4907 return EmitSMEZero(TypeFlags, Ops, Builtin->LLVMIntrinsic);
4908 else if (BuiltinID == SME::BI__builtin_sme_svldr_vnum_za ||
4909 BuiltinID == SME::BI__builtin_sme_svstr_vnum_za ||
4910 BuiltinID == SME::BI__builtin_sme_svldr_za ||
4911 BuiltinID == SME::BI__builtin_sme_svstr_za)
4912 return EmitSMELdrStr(TypeFlags, Ops, Builtin->LLVMIntrinsic);
4913
4914 // Emit set FPMR for intrinsics that require it
4915 if (TypeFlags.setsFPMR())
4916 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_set_fpmr),
4917 Ops.pop_back_val());
4918 // Handle builtins which require their multi-vector operands to be swapped
4919 swapCommutativeSMEOperands(BuiltinID, Ops);
4920
4921 // Should not happen!
4922 if (Builtin->LLVMIntrinsic == 0)
4923 return nullptr;
4924
4925 // Predicates must match the main datatype.
4926 for (Value *&Op : Ops)
4927 if (auto PredTy = dyn_cast<llvm::VectorType>(Op->getType()))
4928 if (PredTy->getElementType()->isIntegerTy(1))
4929 Op = EmitSVEPredicateCast(Op, getSVEType(TypeFlags));
4930
4931 Function *F =
4932 TypeFlags.isOverloadNone()
4933 ? CGM.getIntrinsic(Builtin->LLVMIntrinsic)
4934 : CGM.getIntrinsic(Builtin->LLVMIntrinsic, {getSVEType(TypeFlags)});
4935
4936 return Builder.CreateCall(F, Ops);
4937}
4938
4939/// Helper for the read/write/add/inc X18 builtins: read the X18 register and
4940/// return it as an i8 pointer.
4942 LLVMContext &Context = CGF.CGM.getLLVMContext();
4943 llvm::Metadata *Ops[] = {llvm::MDString::get(Context, "x18")};
4944 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
4945 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
4946 llvm::Function *F =
4947 CGF.CGM.getIntrinsic(Intrinsic::read_register, {CGF.Int64Ty});
4948 llvm::Value *X18 = CGF.Builder.CreateCall(F, Metadata);
4949 return CGF.Builder.CreateIntToPtr(X18, CGF.Int8PtrTy);
4950}
4951
4953 const CallExpr *E,
4954 llvm::Triple::ArchType Arch) {
4955 if (BuiltinID >= clang::AArch64::FirstSVEBuiltin &&
4956 BuiltinID <= clang::AArch64::LastSVEBuiltin)
4957 return EmitAArch64SVEBuiltinExpr(BuiltinID, E);
4958
4959 if (BuiltinID >= clang::AArch64::FirstSMEBuiltin &&
4960 BuiltinID <= clang::AArch64::LastSMEBuiltin)
4961 return EmitAArch64SMEBuiltinExpr(BuiltinID, E);
4962
4963 if (BuiltinID == Builtin::BI__builtin_cpu_supports)
4964 return EmitAArch64CpuSupports(E);
4965
4966 unsigned HintID = static_cast<unsigned>(-1);
4967 switch (BuiltinID) {
4968 default: break;
4969 case clang::AArch64::BI__builtin_arm_nop:
4970 HintID = 0;
4971 break;
4972 case clang::AArch64::BI__builtin_arm_yield:
4973 case clang::AArch64::BI__yield:
4974 HintID = 1;
4975 break;
4976 case clang::AArch64::BI__builtin_arm_wfe:
4977 case clang::AArch64::BI__wfe:
4978 HintID = 2;
4979 break;
4980 case clang::AArch64::BI__builtin_arm_wfi:
4981 case clang::AArch64::BI__wfi:
4982 HintID = 3;
4983 break;
4984 case clang::AArch64::BI__builtin_arm_sev:
4985 case clang::AArch64::BI__sev:
4986 HintID = 4;
4987 break;
4988 case clang::AArch64::BI__builtin_arm_sevl:
4989 case clang::AArch64::BI__sevl:
4990 HintID = 5;
4991 break;
4992 }
4993
4994 if (HintID != static_cast<unsigned>(-1)) {
4995 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_hint);
4996 return Builder.CreateCall(F, llvm::ConstantInt::get(Int32Ty, HintID));
4997 }
4998
4999 if (BuiltinID == clang::AArch64::BI__builtin_arm_trap) {
5000 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_break);
5001 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
5002 return Builder.CreateCall(F, Builder.CreateZExt(Arg, CGM.Int32Ty));
5003 }
5004
5005 if (BuiltinID == clang::AArch64::BI__builtin_arm_get_sme_state) {
5006 // Create call to __arm_sme_state and store the results to the two pointers.
5008 llvm::FunctionType::get(StructType::get(CGM.Int64Ty, CGM.Int64Ty), {},
5009 false),
5010 "__arm_sme_state"));
5011 auto Attrs = AttributeList().addFnAttribute(getLLVMContext(),
5012 "aarch64_pstate_sm_compatible");
5013 CI->setAttributes(Attrs);
5014 CI->setCallingConv(
5015 llvm::CallingConv::
5016 AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2);
5017 Builder.CreateStore(Builder.CreateExtractValue(CI, 0),
5018 EmitPointerWithAlignment(E->getArg(0)));
5019 return Builder.CreateStore(Builder.CreateExtractValue(CI, 1),
5020 EmitPointerWithAlignment(E->getArg(1)));
5021 }
5022
5023 if (BuiltinID == clang::AArch64::BI__builtin_arm_rbit) {
5024 assert((getContext().getTypeSize(E->getType()) == 32) &&
5025 "rbit of unusual size!");
5026 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
5027 return Builder.CreateCall(
5028 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
5029 }
5030 if (BuiltinID == clang::AArch64::BI__builtin_arm_rbit64) {
5031 assert((getContext().getTypeSize(E->getType()) == 64) &&
5032 "rbit of unusual size!");
5033 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
5034 return Builder.CreateCall(
5035 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
5036 }
5037
5038 if (BuiltinID == clang::AArch64::BI__builtin_arm_clz ||
5039 BuiltinID == clang::AArch64::BI__builtin_arm_clz64) {
5040 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
5041 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Arg->getType());
5042 Value *Res = Builder.CreateCall(F, {Arg, Builder.getInt1(false)});
5043 if (BuiltinID == clang::AArch64::BI__builtin_arm_clz64)
5044 Res = Builder.CreateTrunc(Res, Builder.getInt32Ty());
5045 return Res;
5046 }
5047
5048 if (BuiltinID == clang::AArch64::BI__builtin_arm_cls) {
5049 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
5050 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_cls), Arg,
5051 "cls");
5052 }
5053 if (BuiltinID == clang::AArch64::BI__builtin_arm_cls64) {
5054 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
5055 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_cls64), Arg,
5056 "cls");
5057 }
5058
5059 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint32zf ||
5060 BuiltinID == clang::AArch64::BI__builtin_arm_rint32z) {
5061 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
5062 llvm::Type *Ty = Arg->getType();
5063 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint32z, Ty),
5064 Arg, "frint32z");
5065 }
5066
5067 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint64zf ||
5068 BuiltinID == clang::AArch64::BI__builtin_arm_rint64z) {
5069 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
5070 llvm::Type *Ty = Arg->getType();
5071 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint64z, Ty),
5072 Arg, "frint64z");
5073 }
5074
5075 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint32xf ||
5076 BuiltinID == clang::AArch64::BI__builtin_arm_rint32x) {
5077 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
5078 llvm::Type *Ty = Arg->getType();
5079 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint32x, Ty),
5080 Arg, "frint32x");
5081 }
5082
5083 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint64xf ||
5084 BuiltinID == clang::AArch64::BI__builtin_arm_rint64x) {
5085 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
5086 llvm::Type *Ty = Arg->getType();
5087 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint64x, Ty),
5088 Arg, "frint64x");
5089 }
5090
5091 if (BuiltinID == clang::AArch64::BI__builtin_arm_jcvt) {
5092 assert((getContext().getTypeSize(E->getType()) == 32) &&
5093 "__jcvt of unusual size!");
5094 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
5095 return Builder.CreateCall(
5096 CGM.getIntrinsic(Intrinsic::aarch64_fjcvtzs), Arg);
5097 }
5098
5099 if (BuiltinID == clang::AArch64::BI__builtin_arm_ld64b ||
5100 BuiltinID == clang::AArch64::BI__builtin_arm_st64b ||
5101 BuiltinID == clang::AArch64::BI__builtin_arm_st64bv ||
5102 BuiltinID == clang::AArch64::BI__builtin_arm_st64bv0) {
5103 llvm::Value *MemAddr = EmitScalarExpr(E->getArg(0));
5104 llvm::Value *ValPtr = EmitScalarExpr(E->getArg(1));
5105
5106 if (BuiltinID == clang::AArch64::BI__builtin_arm_ld64b) {
5107 // Load from the address via an LLVM intrinsic, receiving a
5108 // tuple of 8 i64 words, and store each one to ValPtr.
5109 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_ld64b);
5110 llvm::Value *Val = Builder.CreateCall(F, MemAddr);
5111 llvm::Value *ToRet;
5112 for (size_t i = 0; i < 8; i++) {
5113 llvm::Value *ValOffsetPtr =
5114 Builder.CreateGEP(Int64Ty, ValPtr, Builder.getInt32(i));
5115 Address Addr =
5116 Address(ValOffsetPtr, Int64Ty, CharUnits::fromQuantity(8));
5117 ToRet = Builder.CreateStore(Builder.CreateExtractValue(Val, i), Addr);
5118 }
5119 return ToRet;
5120 } else {
5121 // Load 8 i64 words from ValPtr, and store them to the address
5122 // via an LLVM intrinsic.
5124 Args.push_back(MemAddr);
5125 for (size_t i = 0; i < 8; i++) {
5126 llvm::Value *ValOffsetPtr =
5127 Builder.CreateGEP(Int64Ty, ValPtr, Builder.getInt32(i));
5128 Address Addr =
5129 Address(ValOffsetPtr, Int64Ty, CharUnits::fromQuantity(8));
5130 Args.push_back(Builder.CreateLoad(Addr));
5131 }
5132
5133 auto Intr = (BuiltinID == clang::AArch64::BI__builtin_arm_st64b
5134 ? Intrinsic::aarch64_st64b
5135 : BuiltinID == clang::AArch64::BI__builtin_arm_st64bv
5136 ? Intrinsic::aarch64_st64bv
5137 : Intrinsic::aarch64_st64bv0);
5138 Function *F = CGM.getIntrinsic(Intr);
5139 return Builder.CreateCall(F, Args);
5140 }
5141 }
5142
5143 if (BuiltinID == clang::AArch64::BI__builtin_arm_rndr ||
5144 BuiltinID == clang::AArch64::BI__builtin_arm_rndrrs) {
5145
5146 auto Intr = (BuiltinID == clang::AArch64::BI__builtin_arm_rndr
5147 ? Intrinsic::aarch64_rndr
5148 : Intrinsic::aarch64_rndrrs);
5149 Function *F = CGM.getIntrinsic(Intr);
5150 llvm::Value *Val = Builder.CreateCall(F);
5151 Value *RandomValue = Builder.CreateExtractValue(Val, 0);
5152 Value *Status = Builder.CreateExtractValue(Val, 1);
5153
5154 Address MemAddress = EmitPointerWithAlignment(E->getArg(0));
5155 Builder.CreateStore(RandomValue, MemAddress);
5156 Status = Builder.CreateZExt(Status, Int32Ty);
5157 return Status;
5158 }
5159
5160 if (BuiltinID == clang::AArch64::BI__clear_cache) {
5161 assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
5162 const FunctionDecl *FD = E->getDirectCallee();
5163 Value *Ops[2];
5164 for (unsigned i = 0; i < 2; i++)
5165 Ops[i] = EmitScalarExpr(E->getArg(i));
5166 llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
5167 llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
5168 StringRef Name = FD->getName();
5169 return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
5170 }
5171
5172 if ((BuiltinID == clang::AArch64::BI__builtin_arm_ldrex ||
5173 BuiltinID == clang::AArch64::BI__builtin_arm_ldaex) &&
5174 getContext().getTypeSize(E->getType()) == 128) {
5175 Function *F =
5176 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_ldaex
5177 ? Intrinsic::aarch64_ldaxp
5178 : Intrinsic::aarch64_ldxp);
5179
5180 Value *LdPtr = EmitScalarExpr(E->getArg(0));
5181 Value *Val = Builder.CreateCall(F, LdPtr, "ldxp");
5182
5183 Value *Val0 = Builder.CreateExtractValue(Val, 1);
5184 Value *Val1 = Builder.CreateExtractValue(Val, 0);
5185 llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
5186 Val0 = Builder.CreateZExt(Val0, Int128Ty);
5187 Val1 = Builder.CreateZExt(Val1, Int128Ty);
5188
5189 Value *ShiftCst = llvm::ConstantInt::get(Int128Ty, 64);
5190 Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
5191 Val = Builder.CreateOr(Val, Val1);
5192 return Builder.CreateBitCast(Val, ConvertType(E->getType()));
5193 } else if (BuiltinID == clang::AArch64::BI__builtin_arm_ldrex ||
5194 BuiltinID == clang::AArch64::BI__builtin_arm_ldaex) {
5195 Value *LoadAddr = EmitScalarExpr(E->getArg(0));
5196
5197 QualType Ty = E->getType();
5198 llvm::Type *RealResTy = ConvertType(Ty);
5199 llvm::Type *IntTy =
5200 llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
5201
5202 Function *F =
5203 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_ldaex
5204 ? Intrinsic::aarch64_ldaxr
5205 : Intrinsic::aarch64_ldxr,
5206 UnqualPtrTy);
5207 CallInst *Val = Builder.CreateCall(F, LoadAddr, "ldxr");
5208 Val->addParamAttr(
5209 0, Attribute::get(getLLVMContext(), Attribute::ElementType, IntTy));
5210
5211 if (RealResTy->isPointerTy())
5212 return Builder.CreateIntToPtr(Val, RealResTy);
5213
5214 llvm::Type *IntResTy = llvm::IntegerType::get(
5215 getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy));
5216 return Builder.CreateBitCast(Builder.CreateTruncOrBitCast(Val, IntResTy),
5217 RealResTy);
5218 }
5219
5220 if ((BuiltinID == clang::AArch64::BI__builtin_arm_strex ||
5221 BuiltinID == clang::AArch64::BI__builtin_arm_stlex) &&
5222 getContext().getTypeSize(E->getArg(0)->getType()) == 128) {
5223 Function *F =
5224 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_stlex
5225 ? Intrinsic::aarch64_stlxp
5226 : Intrinsic::aarch64_stxp);
5227 llvm::Type *STy = llvm::StructType::get(Int64Ty, Int64Ty);
5228
5229 Address Tmp = CreateMemTemp(E->getArg(0)->getType());
5230 EmitAnyExprToMem(E->getArg(0), Tmp, Qualifiers(), /*init*/ true);
5231
5232 Tmp = Tmp.withElementType(STy);
5233 llvm::Value *Val = Builder.CreateLoad(Tmp);
5234
5235 Value *Arg0 = Builder.CreateExtractValue(Val, 0);
5236 Value *Arg1 = Builder.CreateExtractValue(Val, 1);
5237 Value *StPtr = EmitScalarExpr(E->getArg(1));
5238 return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "stxp");
5239 }
5240
5241 if (BuiltinID == clang::AArch64::BI__builtin_arm_strex ||
5242 BuiltinID == clang::AArch64::BI__builtin_arm_stlex) {
5243 Value *StoreVal = EmitScalarExpr(E->getArg(0));
5244 Value *StoreAddr = EmitScalarExpr(E->getArg(1));
5245
5246 QualType Ty = E->getArg(0)->getType();
5247 llvm::Type *StoreTy =
5248 llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
5249
5250 if (StoreVal->getType()->isPointerTy())
5251 StoreVal = Builder.CreatePtrToInt(StoreVal, Int64Ty);
5252 else {
5253 llvm::Type *IntTy = llvm::IntegerType::get(
5255 CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType()));
5256 StoreVal = Builder.CreateBitCast(StoreVal, IntTy);
5257 StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int64Ty);
5258 }
5259
5260 Function *F =
5261 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_stlex
5262 ? Intrinsic::aarch64_stlxr
5263 : Intrinsic::aarch64_stxr,
5264 StoreAddr->getType());
5265 CallInst *CI = Builder.CreateCall(F, {StoreVal, StoreAddr}, "stxr");
5266 CI->addParamAttr(
5267 1, Attribute::get(getLLVMContext(), Attribute::ElementType, StoreTy));
5268 return CI;
5269 }
5270
5271 if (BuiltinID == clang::AArch64::BI__getReg) {
5273 if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext()))
5274 llvm_unreachable("Sema will ensure that the parameter is constant");
5275
5276 llvm::APSInt Value = Result.Val.getInt();
5277 LLVMContext &Context = CGM.getLLVMContext();
5278 std::string Reg = Value == 31 ? "sp" : "x" + toString(Value, 10);
5279
5280 llvm::Metadata *Ops[] = {llvm::MDString::get(Context, Reg)};
5281 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
5282 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
5283
5284 llvm::Function *F =
5285 CGM.getIntrinsic(Intrinsic::read_register, {Int64Ty});
5286 return Builder.CreateCall(F, Metadata);
5287 }
5288
5289 if (BuiltinID == clang::AArch64::BI__break) {
5291 if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext()))
5292 llvm_unreachable("Sema will ensure that the parameter is constant");
5293
5294 llvm::Function *F = CGM.getIntrinsic(Intrinsic::aarch64_break);
5295 return Builder.CreateCall(F, {EmitScalarExpr(E->getArg(0))});
5296 }
5297
5298 if (BuiltinID == clang::AArch64::BI__builtin_arm_clrex) {
5299 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_clrex);
5300 return Builder.CreateCall(F);
5301 }
5302
5303 if (BuiltinID == clang::AArch64::BI_ReadWriteBarrier)
5304 return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
5305 llvm::SyncScope::SingleThread);
5306
5307 // CRC32
5308 Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
5309 switch (BuiltinID) {
5310 case clang::AArch64::BI__builtin_arm_crc32b:
5311 CRCIntrinsicID = Intrinsic::aarch64_crc32b; break;
5312 case clang::AArch64::BI__builtin_arm_crc32cb:
5313 CRCIntrinsicID = Intrinsic::aarch64_crc32cb; break;
5314 case clang::AArch64::BI__builtin_arm_crc32h:
5315 CRCIntrinsicID = Intrinsic::aarch64_crc32h; break;
5316 case clang::AArch64::BI__builtin_arm_crc32ch:
5317 CRCIntrinsicID = Intrinsic::aarch64_crc32ch; break;
5318 case clang::AArch64::BI__builtin_arm_crc32w:
5319 CRCIntrinsicID = Intrinsic::aarch64_crc32w; break;
5320 case clang::AArch64::BI__builtin_arm_crc32cw:
5321 CRCIntrinsicID = Intrinsic::aarch64_crc32cw; break;
5322 case clang::AArch64::BI__builtin_arm_crc32d:
5323 CRCIntrinsicID = Intrinsic::aarch64_crc32x; break;
5324 case clang::AArch64::BI__builtin_arm_crc32cd:
5325 CRCIntrinsicID = Intrinsic::aarch64_crc32cx; break;
5326 }
5327
5328 if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
5329 Value *Arg0 = EmitScalarExpr(E->getArg(0));
5330 Value *Arg1 = EmitScalarExpr(E->getArg(1));
5331 Function *F = CGM.getIntrinsic(CRCIntrinsicID);
5332
5333 llvm::Type *DataTy = F->getFunctionType()->getParamType(1);
5334 Arg1 = Builder.CreateZExtOrBitCast(Arg1, DataTy);
5335
5336 return Builder.CreateCall(F, {Arg0, Arg1});
5337 }
5338
5339 // Memory Operations (MOPS)
5340 if (BuiltinID == AArch64::BI__builtin_arm_mops_memset_tag) {
5341 Value *Dst = EmitScalarExpr(E->getArg(0));
5342 Value *Val = EmitScalarExpr(E->getArg(1));
5343 Value *Size = EmitScalarExpr(E->getArg(2));
5344 Val = Builder.CreateTrunc(Val, Int8Ty);
5345 Size = Builder.CreateIntCast(Size, Int64Ty, false);
5346 return Builder.CreateCall(
5347 CGM.getIntrinsic(Intrinsic::aarch64_mops_memset_tag), {Dst, Val, Size});
5348 }
5349
5350 // Memory Tagging Extensions (MTE) Intrinsics
5351 Intrinsic::ID MTEIntrinsicID = Intrinsic::not_intrinsic;
5352 switch (BuiltinID) {
5353 case clang::AArch64::BI__builtin_arm_irg:
5354 MTEIntrinsicID = Intrinsic::aarch64_irg; break;
5355 case clang::AArch64::BI__builtin_arm_addg:
5356 MTEIntrinsicID = Intrinsic::aarch64_addg; break;
5357 case clang::AArch64::BI__builtin_arm_gmi:
5358 MTEIntrinsicID = Intrinsic::aarch64_gmi; break;
5359 case clang::AArch64::BI__builtin_arm_ldg:
5360 MTEIntrinsicID = Intrinsic::aarch64_ldg; break;
5361 case clang::AArch64::BI__builtin_arm_stg:
5362 MTEIntrinsicID = Intrinsic::aarch64_stg; break;
5363 case clang::AArch64::BI__builtin_arm_subp:
5364 MTEIntrinsicID = Intrinsic::aarch64_subp; break;
5365 }
5366
5367 if (MTEIntrinsicID != Intrinsic::not_intrinsic) {
5368 if (MTEIntrinsicID == Intrinsic::aarch64_irg) {
5369 Value *Pointer = EmitScalarExpr(E->getArg(0));
5370 Value *Mask = EmitScalarExpr(E->getArg(1));
5371
5372 Mask = Builder.CreateZExt(Mask, Int64Ty);
5373 return Builder.CreateCall(CGM.getIntrinsic(MTEIntrinsicID),
5374 {Pointer, Mask});
5375 }
5376 if (MTEIntrinsicID == Intrinsic::aarch64_addg) {
5377 Value *Pointer = EmitScalarExpr(E->getArg(0));
5378 Value *TagOffset = EmitScalarExpr(E->getArg(1));
5379
5380 TagOffset = Builder.CreateZExt(TagOffset, Int64Ty);
5381 return Builder.CreateCall(CGM.getIntrinsic(MTEIntrinsicID),
5382 {Pointer, TagOffset});
5383 }
5384 if (MTEIntrinsicID == Intrinsic::aarch64_gmi) {
5385 Value *Pointer = EmitScalarExpr(E->getArg(0));
5386 Value *ExcludedMask = EmitScalarExpr(E->getArg(1));
5387
5388 ExcludedMask = Builder.CreateZExt(ExcludedMask, Int64Ty);
5389 return Builder.CreateCall(
5390 CGM.getIntrinsic(MTEIntrinsicID), {Pointer, ExcludedMask});
5391 }
5392 // Although it is possible to supply a different return
5393 // address (first arg) to this intrinsic, for now we set
5394 // return address same as input address.
5395 if (MTEIntrinsicID == Intrinsic::aarch64_ldg) {
5396 Value *TagAddress = EmitScalarExpr(E->getArg(0));
5397 return Builder.CreateCall(CGM.getIntrinsic(MTEIntrinsicID),
5398 {TagAddress, TagAddress});
5399 }
5400 // Although it is possible to supply a different tag (to set)
5401 // to this intrinsic (as first arg), for now we supply
5402 // the tag that is in input address arg (common use case).
5403 if (MTEIntrinsicID == Intrinsic::aarch64_stg) {
5404 Value *TagAddress = EmitScalarExpr(E->getArg(0));
5405 return Builder.CreateCall(CGM.getIntrinsic(MTEIntrinsicID),
5406 {TagAddress, TagAddress});
5407 }
5408 if (MTEIntrinsicID == Intrinsic::aarch64_subp) {
5409 Value *PointerA = EmitScalarExpr(E->getArg(0));
5410 Value *PointerB = EmitScalarExpr(E->getArg(1));
5411 return Builder.CreateCall(
5412 CGM.getIntrinsic(MTEIntrinsicID), {PointerA, PointerB});
5413 }
5414 }
5415
5416 if (BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
5417 BuiltinID == clang::AArch64::BI__builtin_arm_rsr64 ||
5418 BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
5419 BuiltinID == clang::AArch64::BI__builtin_arm_rsrp ||
5420 BuiltinID == clang::AArch64::BI__builtin_arm_wsr ||
5421 BuiltinID == clang::AArch64::BI__builtin_arm_wsr64 ||
5422 BuiltinID == clang::AArch64::BI__builtin_arm_wsr128 ||
5423 BuiltinID == clang::AArch64::BI__builtin_arm_wsrp) {
5424
5425 SpecialRegisterAccessKind AccessKind = Write;
5426 if (BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
5427 BuiltinID == clang::AArch64::BI__builtin_arm_rsr64 ||
5428 BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
5429 BuiltinID == clang::AArch64::BI__builtin_arm_rsrp)
5430 AccessKind = VolatileRead;
5431
5432 bool IsPointerBuiltin = BuiltinID == clang::AArch64::BI__builtin_arm_rsrp ||
5433 BuiltinID == clang::AArch64::BI__builtin_arm_wsrp;
5434
5435 bool Is32Bit = BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
5436 BuiltinID == clang::AArch64::BI__builtin_arm_wsr;
5437
5438 bool Is128Bit = BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
5439 BuiltinID == clang::AArch64::BI__builtin_arm_wsr128;
5440
5441 llvm::Type *ValueType;
5442 llvm::Type *RegisterType = Int64Ty;
5443 if (Is32Bit) {
5444 ValueType = Int32Ty;
5445 } else if (Is128Bit) {
5446 llvm::Type *Int128Ty =
5447 llvm::IntegerType::getInt128Ty(CGM.getLLVMContext());
5448 ValueType = Int128Ty;
5449 RegisterType = Int128Ty;
5450 } else if (IsPointerBuiltin) {
5451 ValueType = VoidPtrTy;
5452 } else {
5453 ValueType = Int64Ty;
5454 };
5455
5456 return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType,
5457 AccessKind);
5458 }
5459
5460 if (BuiltinID == clang::AArch64::BI_ReadStatusReg ||
5461 BuiltinID == clang::AArch64::BI_WriteStatusReg ||
5462 BuiltinID == clang::AArch64::BI__sys) {
5463 LLVMContext &Context = CGM.getLLVMContext();
5464
5465 unsigned SysReg =
5466 E->getArg(0)->EvaluateKnownConstInt(getContext()).getZExtValue();
5467
5468 std::string SysRegStr;
5469 unsigned SysRegOp0 = (BuiltinID == clang::AArch64::BI_ReadStatusReg ||
5470 BuiltinID == clang::AArch64::BI_WriteStatusReg)
5471 ? ((1 << 1) | ((SysReg >> 14) & 1))
5472 : 1;
5473 llvm::raw_string_ostream(SysRegStr)
5474 << SysRegOp0 << ":" << ((SysReg >> 11) & 7) << ":"
5475 << ((SysReg >> 7) & 15) << ":" << ((SysReg >> 3) & 15) << ":"
5476 << (SysReg & 7);
5477
5478 llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysRegStr) };
5479 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
5480 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
5481
5482 llvm::Type *RegisterType = Int64Ty;
5483 llvm::Type *Types[] = { RegisterType };
5484
5485 if (BuiltinID == clang::AArch64::BI_ReadStatusReg) {
5486 llvm::Function *F = CGM.getIntrinsic(Intrinsic::read_register, Types);
5487
5488 return Builder.CreateCall(F, Metadata);
5489 }
5490
5491 llvm::Function *F = CGM.getIntrinsic(Intrinsic::write_register, Types);
5492 llvm::Value *ArgValue = EmitScalarExpr(E->getArg(1));
5493 llvm::Value *Result = Builder.CreateCall(F, {Metadata, ArgValue});
5494 if (BuiltinID == clang::AArch64::BI__sys) {
5495 // Return 0 for convenience, even though MSVC returns some other undefined
5496 // value.
5497 Result = ConstantInt::get(Builder.getInt32Ty(), 0);
5498 }
5499 return Result;
5500 }
5501
5502 if (BuiltinID == clang::AArch64::BI_AddressOfReturnAddress) {
5503 llvm::Function *F =
5504 CGM.getIntrinsic(Intrinsic::addressofreturnaddress, AllocaInt8PtrTy);
5505 return Builder.CreateCall(F);
5506 }
5507
5508 if (BuiltinID == clang::AArch64::BI__builtin_sponentry) {
5509 llvm::Function *F = CGM.getIntrinsic(Intrinsic::sponentry, AllocaInt8PtrTy);
5510 return Builder.CreateCall(F);
5511 }
5512
5513 if (BuiltinID == clang::AArch64::BI__mulh ||
5514 BuiltinID == clang::AArch64::BI__umulh) {
5515 llvm::Type *ResType = ConvertType(E->getType());
5516 llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
5517
5518 bool IsSigned = BuiltinID == clang::AArch64::BI__mulh;
5519 Value *LHS =
5520 Builder.CreateIntCast(EmitScalarExpr(E->getArg(0)), Int128Ty, IsSigned);
5521 Value *RHS =
5522 Builder.CreateIntCast(EmitScalarExpr(E->getArg(1)), Int128Ty, IsSigned);
5523
5524 Value *MulResult, *HigherBits;
5525 if (IsSigned) {
5526 MulResult = Builder.CreateNSWMul(LHS, RHS);
5527 HigherBits = Builder.CreateAShr(MulResult, 64);
5528 } else {
5529 MulResult = Builder.CreateNUWMul(LHS, RHS);
5530 HigherBits = Builder.CreateLShr(MulResult, 64);
5531 }
5532 HigherBits = Builder.CreateIntCast(HigherBits, ResType, IsSigned);
5533
5534 return HigherBits;
5535 }
5536
5537 if (BuiltinID == AArch64::BI__writex18byte ||
5538 BuiltinID == AArch64::BI__writex18word ||
5539 BuiltinID == AArch64::BI__writex18dword ||
5540 BuiltinID == AArch64::BI__writex18qword) {
5541 // Process the args first
5542 Value *OffsetArg = EmitScalarExpr(E->getArg(0));
5543 Value *DataArg = EmitScalarExpr(E->getArg(1));
5544
5545 // Read x18 as i8*
5546 llvm::Value *X18 = readX18AsPtr(*this);
5547
5548 // Store val at x18 + offset
5549 Value *Offset = Builder.CreateZExt(OffsetArg, Int64Ty);
5550 Value *Ptr = Builder.CreateGEP(Int8Ty, X18, Offset);
5551 StoreInst *Store =
5552 Builder.CreateAlignedStore(DataArg, Ptr, CharUnits::One());
5553 return Store;
5554 }
5555
5556 if (BuiltinID == AArch64::BI__readx18byte ||
5557 BuiltinID == AArch64::BI__readx18word ||
5558 BuiltinID == AArch64::BI__readx18dword ||
5559 BuiltinID == AArch64::BI__readx18qword) {
5560 // Process the args first
5561 Value *OffsetArg = EmitScalarExpr(E->getArg(0));
5562
5563 // Read x18 as i8*
5564 llvm::Value *X18 = readX18AsPtr(*this);
5565
5566 // Load x18 + offset
5567 Value *Offset = Builder.CreateZExt(OffsetArg, Int64Ty);
5568 Value *Ptr = Builder.CreateGEP(Int8Ty, X18, Offset);
5569 llvm::Type *IntTy = ConvertType(E->getType());
5570 LoadInst *Load = Builder.CreateAlignedLoad(IntTy, Ptr, CharUnits::One());
5571 return Load;
5572 }
5573
5574 if (BuiltinID == AArch64::BI__addx18byte ||
5575 BuiltinID == AArch64::BI__addx18word ||
5576 BuiltinID == AArch64::BI__addx18dword ||
5577 BuiltinID == AArch64::BI__addx18qword ||
5578 BuiltinID == AArch64::BI__incx18byte ||
5579 BuiltinID == AArch64::BI__incx18word ||
5580 BuiltinID == AArch64::BI__incx18dword ||
5581 BuiltinID == AArch64::BI__incx18qword) {
5582 llvm::Type *IntTy;
5583 bool isIncrement;
5584 switch (BuiltinID) {
5585 case AArch64::BI__incx18byte:
5586 IntTy = Int8Ty;
5587 isIncrement = true;
5588 break;
5589 case AArch64::BI__incx18word:
5590 IntTy = Int16Ty;
5591 isIncrement = true;
5592 break;
5593 case AArch64::BI__incx18dword:
5594 IntTy = Int32Ty;
5595 isIncrement = true;
5596 break;
5597 case AArch64::BI__incx18qword:
5598 IntTy = Int64Ty;
5599 isIncrement = true;
5600 break;
5601 default:
5602 IntTy = ConvertType(E->getArg(1)->getType());
5603 isIncrement = false;
5604 break;
5605 }
5606 // Process the args first
5607 Value *OffsetArg = EmitScalarExpr(E->getArg(0));
5608 Value *ValToAdd =
5609 isIncrement ? ConstantInt::get(IntTy, 1) : EmitScalarExpr(E->getArg(1));
5610
5611 // Read x18 as i8*
5612 llvm::Value *X18 = readX18AsPtr(*this);
5613
5614 // Load x18 + offset
5615 Value *Offset = Builder.CreateZExt(OffsetArg, Int64Ty);
5616 Value *Ptr = Builder.CreateGEP(Int8Ty, X18, Offset);
5617 LoadInst *Load = Builder.CreateAlignedLoad(IntTy, Ptr, CharUnits::One());
5618
5619 // Add values
5620 Value *AddResult = Builder.CreateAdd(Load, ValToAdd);
5621
5622 // Store val at x18 + offset
5623 StoreInst *Store =
5624 Builder.CreateAlignedStore(AddResult, Ptr, CharUnits::One());
5625 return Store;
5626 }
5627
5628 if (BuiltinID == AArch64::BI_CopyDoubleFromInt64 ||
5629 BuiltinID == AArch64::BI_CopyFloatFromInt32 ||
5630 BuiltinID == AArch64::BI_CopyInt32FromFloat ||
5631 BuiltinID == AArch64::BI_CopyInt64FromDouble) {
5632 Value *Arg = EmitScalarExpr(E->getArg(0));
5633 llvm::Type *RetTy = ConvertType(E->getType());
5634 return Builder.CreateBitCast(Arg, RetTy);
5635 }
5636
5637 if (BuiltinID == AArch64::BI_CountLeadingOnes ||
5638 BuiltinID == AArch64::BI_CountLeadingOnes64 ||
5639 BuiltinID == AArch64::BI_CountLeadingZeros ||
5640 BuiltinID == AArch64::BI_CountLeadingZeros64) {
5641 Value *Arg = EmitScalarExpr(E->getArg(0));
5642 llvm::Type *ArgType = Arg->getType();
5643
5644 if (BuiltinID == AArch64::BI_CountLeadingOnes ||
5645 BuiltinID == AArch64::BI_CountLeadingOnes64)
5646 Arg = Builder.CreateXor(Arg, Constant::getAllOnesValue(ArgType));
5647
5648 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
5649 Value *Result = Builder.CreateCall(F, {Arg, Builder.getInt1(false)});
5650
5651 if (BuiltinID == AArch64::BI_CountLeadingOnes64 ||
5652 BuiltinID == AArch64::BI_CountLeadingZeros64)
5653 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
5654 return Result;
5655 }
5656
5657 if (BuiltinID == AArch64::BI_CountLeadingSigns ||
5658 BuiltinID == AArch64::BI_CountLeadingSigns64) {
5659 Value *Arg = EmitScalarExpr(E->getArg(0));
5660
5661 Function *F = (BuiltinID == AArch64::BI_CountLeadingSigns)
5662 ? CGM.getIntrinsic(Intrinsic::aarch64_cls)
5663 : CGM.getIntrinsic(Intrinsic::aarch64_cls64);
5664
5665 Value *Result = Builder.CreateCall(F, Arg, "cls");
5666 if (BuiltinID == AArch64::BI_CountLeadingSigns64)
5667 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
5668 return Result;
5669 }
5670
5671 if (BuiltinID == AArch64::BI_CountOneBits ||
5672 BuiltinID == AArch64::BI_CountOneBits64) {
5673 Value *ArgValue = EmitScalarExpr(E->getArg(0));
5674 llvm::Type *ArgType = ArgValue->getType();
5675 Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
5676
5677 Value *Result = Builder.CreateCall(F, ArgValue);
5678 if (BuiltinID == AArch64::BI_CountOneBits64)
5679 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
5680 return Result;
5681 }
5682
5683 if (BuiltinID == AArch64::BI__prefetch) {
5684 Value *Address = EmitScalarExpr(E->getArg(0));
5685 Value *RW = llvm::ConstantInt::get(Int32Ty, 0);
5686 Value *Locality = ConstantInt::get(Int32Ty, 3);
5687 Value *Data = llvm::ConstantInt::get(Int32Ty, 1);
5688 Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
5689 return Builder.CreateCall(F, {Address, RW, Locality, Data});
5690 }
5691
5692 if (BuiltinID == AArch64::BI__hlt) {
5693 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_hlt);
5694 Builder.CreateCall(F, {EmitScalarExpr(E->getArg(0))});
5695
5696 // Return 0 for convenience, even though MSVC returns some other undefined
5697 // value.
5698 return ConstantInt::get(Builder.getInt32Ty(), 0);
5699 }
5700
5701 if (BuiltinID == NEON::BI__builtin_neon_vcvth_bf16_f32)
5702 return Builder.CreateFPTrunc(
5703 Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)),
5704 Builder.getFloatTy()),
5705 Builder.getBFloatTy());
5706
5707 // Handle MSVC intrinsics before argument evaluation to prevent double
5708 // evaluation.
5709 if (std::optional<MSVCIntrin> MsvcIntId =
5711 return EmitMSVCBuiltinExpr(*MsvcIntId, E);
5712
5713 // Some intrinsics are equivalent - if they are use the base intrinsic ID.
5714 auto It = llvm::find_if(NEONEquivalentIntrinsicMap, [BuiltinID](auto &P) {
5715 return P.first == BuiltinID;
5716 });
5717 if (It != end(NEONEquivalentIntrinsicMap))
5718 BuiltinID = It->second;
5719
5720 // Find out if any arguments are required to be integer constant
5721 // expressions.
5722 unsigned ICEArguments = 0;
5724 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
5725 assert(Error == ASTContext::GE_None && "Should not codegen an error");
5726
5728 Address PtrOp0 = Address::invalid();
5729 for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) {
5730 if (i == 0) {
5731 switch (BuiltinID) {
5732 case NEON::BI__builtin_neon_vld1_v:
5733 case NEON::BI__builtin_neon_vld1q_v:
5734 case NEON::BI__builtin_neon_vld1_dup_v:
5735 case NEON::BI__builtin_neon_vld1q_dup_v:
5736 case NEON::BI__builtin_neon_vld1_lane_v:
5737 case NEON::BI__builtin_neon_vld1q_lane_v:
5738 case NEON::BI__builtin_neon_vst1_v:
5739 case NEON::BI__builtin_neon_vst1q_v:
5740 case NEON::BI__builtin_neon_vst1_lane_v:
5741 case NEON::BI__builtin_neon_vst1q_lane_v:
5742 case NEON::BI__builtin_neon_vldap1_lane_s64:
5743 case NEON::BI__builtin_neon_vldap1q_lane_s64:
5744 case NEON::BI__builtin_neon_vstl1_lane_s64:
5745 case NEON::BI__builtin_neon_vstl1q_lane_s64:
5746 // Get the alignment for the argument in addition to the value;
5747 // we'll use it later.
5748 PtrOp0 = EmitPointerWithAlignment(E->getArg(0));
5749 Ops.push_back(PtrOp0.emitRawPointer(*this));
5750 continue;
5751 }
5752 }
5753 Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E));
5754 }
5755
5756 auto SISDMap = ArrayRef(AArch64SISDIntrinsicMap);
5757 const ARMVectorIntrinsicInfo *Builtin = findARMVectorIntrinsicInMap(
5758 SISDMap, BuiltinID, AArch64SISDIntrinsicsProvenSorted);
5759
5760 if (Builtin) {
5761 Ops.push_back(EmitScalarExpr(E->getArg(E->getNumArgs() - 1)));
5763 assert(Result && "SISD intrinsic should have been handled");
5764 return Result;
5765 }
5766
5767 const Expr *Arg = E->getArg(E->getNumArgs()-1);
5769 if (std::optional<llvm::APSInt> Result =
5771 // Determine the type of this overloaded NEON intrinsic.
5772 Type = NeonTypeFlags(Result->getZExtValue());
5773
5774 bool usgn = Type.isUnsigned();
5775 bool quad = Type.isQuad();
5776
5777 // Handle non-overloaded intrinsics first.
5778 switch (BuiltinID) {
5779 default: break;
5780 case NEON::BI__builtin_neon_vabsh_f16:
5781 Ops.push_back(EmitScalarExpr(E->getArg(0)));
5782 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, HalfTy), Ops, "vabs");
5783 case NEON::BI__builtin_neon_vaddq_p128: {
5784 llvm::Type *Ty = GetNeonType(this, NeonTypeFlags::Poly128);
5785 Ops.push_back(EmitScalarExpr(E->getArg(1)));
5786 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5787 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5788 Ops[0] = Builder.CreateXor(Ops[0], Ops[1]);
5789 llvm::Type *Int128Ty = llvm::Type::getIntNTy(getLLVMContext(), 128);
5790 return Builder.CreateBitCast(Ops[0], Int128Ty);
5791 }
5792 case NEON::BI__builtin_neon_vldrq_p128: {
5793 llvm::Type *Int128Ty = llvm::Type::getIntNTy(getLLVMContext(), 128);
5794 Value *Ptr = EmitScalarExpr(E->getArg(0));
5795 return Builder.CreateAlignedLoad(Int128Ty, Ptr,
5797 }
5798 case NEON::BI__builtin_neon_vstrq_p128: {
5799 Value *Ptr = Ops[0];
5800 return Builder.CreateDefaultAlignedStore(EmitScalarExpr(E->getArg(1)), Ptr);
5801 }
5802 case NEON::BI__builtin_neon_vcvts_f32_u32:
5803 case NEON::BI__builtin_neon_vcvtd_f64_u64:
5804 usgn = true;
5805 [[fallthrough]];
5806 case NEON::BI__builtin_neon_vcvts_f32_s32:
5807 case NEON::BI__builtin_neon_vcvtd_f64_s64: {
5808 Ops.push_back(EmitScalarExpr(E->getArg(0)));
5809 bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64;
5810 llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty;
5811 llvm::Type *FTy = Is64 ? DoubleTy : FloatTy;
5812 Ops[0] = Builder.CreateBitCast(Ops[0], InTy);
5813 if (usgn)
5814 return Builder.CreateUIToFP(Ops[0], FTy);
5815 return Builder.CreateSIToFP(Ops[0], FTy);
5816 }
5817 case NEON::BI__builtin_neon_vcvth_f16_u16:
5818 case NEON::BI__builtin_neon_vcvth_f16_u32:
5819 case NEON::BI__builtin_neon_vcvth_f16_u64:
5820 usgn = true;
5821 [[fallthrough]];
5822 case NEON::BI__builtin_neon_vcvth_f16_s16:
5823 case NEON::BI__builtin_neon_vcvth_f16_s32:
5824 case NEON::BI__builtin_neon_vcvth_f16_s64: {
5825 Ops.push_back(EmitScalarExpr(E->getArg(0)));
5826 llvm::Type *FTy = HalfTy;
5827 llvm::Type *InTy;
5828 if (Ops[0]->getType()->getPrimitiveSizeInBits() == 64)
5829 InTy = Int64Ty;
5830 else if (Ops[0]->getType()->getPrimitiveSizeInBits() == 32)
5831 InTy = Int32Ty;
5832 else
5833 InTy = Int16Ty;
5834 Ops[0] = Builder.CreateBitCast(Ops[0], InTy);
5835 if (usgn)
5836 return Builder.CreateUIToFP(Ops[0], FTy);
5837 return Builder.CreateSIToFP(Ops[0], FTy);
5838 }
5839 case NEON::BI__builtin_neon_vcvtah_u16_f16:
5840 case NEON::BI__builtin_neon_vcvtmh_u16_f16:
5841 case NEON::BI__builtin_neon_vcvtnh_u16_f16:
5842 case NEON::BI__builtin_neon_vcvtph_u16_f16:
5843 case NEON::BI__builtin_neon_vcvth_u16_f16:
5844 case NEON::BI__builtin_neon_vcvtah_s16_f16:
5845 case NEON::BI__builtin_neon_vcvtmh_s16_f16:
5846 case NEON::BI__builtin_neon_vcvtnh_s16_f16:
5847 case NEON::BI__builtin_neon_vcvtph_s16_f16:
5848 case NEON::BI__builtin_neon_vcvth_s16_f16: {
5849 unsigned Int;
5850 llvm::Type *InTy = Int16Ty;
5851 llvm::Type* FTy = HalfTy;
5852 llvm::Type *Tys[2] = {InTy, FTy};
5853 Ops.push_back(EmitScalarExpr(E->getArg(0)));
5854 switch (BuiltinID) {
5855 default: llvm_unreachable("missing builtin ID in switch!");
5856 case NEON::BI__builtin_neon_vcvtah_u16_f16:
5857 Int = Intrinsic::aarch64_neon_fcvtau; break;
5858 case NEON::BI__builtin_neon_vcvtmh_u16_f16:
5859 Int = Intrinsic::aarch64_neon_fcvtmu; break;
5860 case NEON::BI__builtin_neon_vcvtnh_u16_f16:
5861 Int = Intrinsic::aarch64_neon_fcvtnu; break;
5862 case NEON::BI__builtin_neon_vcvtph_u16_f16:
5863 Int = Intrinsic::aarch64_neon_fcvtpu; break;
5864 case NEON::BI__builtin_neon_vcvth_u16_f16:
5865 Int = Intrinsic::aarch64_neon_fcvtzu; break;
5866 case NEON::BI__builtin_neon_vcvtah_s16_f16:
5867 Int = Intrinsic::aarch64_neon_fcvtas; break;
5868 case NEON::BI__builtin_neon_vcvtmh_s16_f16:
5869 Int = Intrinsic::aarch64_neon_fcvtms; break;
5870 case NEON::BI__builtin_neon_vcvtnh_s16_f16:
5871 Int = Intrinsic::aarch64_neon_fcvtns; break;
5872 case NEON::BI__builtin_neon_vcvtph_s16_f16:
5873 Int = Intrinsic::aarch64_neon_fcvtps; break;
5874 case NEON::BI__builtin_neon_vcvth_s16_f16:
5875 Int = Intrinsic::aarch64_neon_fcvtzs; break;
5876 }
5877 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvt");
5878 }
5879 case NEON::BI__builtin_neon_vcaleh_f16:
5880 case NEON::BI__builtin_neon_vcalth_f16:
5881 case NEON::BI__builtin_neon_vcageh_f16:
5882 case NEON::BI__builtin_neon_vcagth_f16: {
5883 unsigned Int;
5884 llvm::Type* InTy = Int32Ty;
5885 llvm::Type* FTy = HalfTy;
5886 llvm::Type *Tys[2] = {InTy, FTy};
5887 Ops.push_back(EmitScalarExpr(E->getArg(1)));
5888 switch (BuiltinID) {
5889 default: llvm_unreachable("missing builtin ID in switch!");
5890 case NEON::BI__builtin_neon_vcageh_f16:
5891 Int = Intrinsic::aarch64_neon_facge; break;
5892 case NEON::BI__builtin_neon_vcagth_f16:
5893 Int = Intrinsic::aarch64_neon_facgt; break;
5894 case NEON::BI__builtin_neon_vcaleh_f16:
5895 Int = Intrinsic::aarch64_neon_facge; std::swap(Ops[0], Ops[1]); break;
5896 case NEON::BI__builtin_neon_vcalth_f16:
5897 Int = Intrinsic::aarch64_neon_facgt; std::swap(Ops[0], Ops[1]); break;
5898 }
5899 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "facg");
5900 return Builder.CreateTrunc(Ops[0], Int16Ty);
5901 }
5902 case NEON::BI__builtin_neon_vcvth_n_s16_f16:
5903 case NEON::BI__builtin_neon_vcvth_n_u16_f16: {
5904 unsigned Int;
5905 llvm::Type* InTy = Int32Ty;
5906 llvm::Type* FTy = HalfTy;
5907 llvm::Type *Tys[2] = {InTy, FTy};
5908 Ops.push_back(EmitScalarExpr(E->getArg(1)));
5909 switch (BuiltinID) {
5910 default: llvm_unreachable("missing builtin ID in switch!");
5911 case NEON::BI__builtin_neon_vcvth_n_s16_f16:
5912 Int = Intrinsic::aarch64_neon_vcvtfp2fxs; break;
5913 case NEON::BI__builtin_neon_vcvth_n_u16_f16:
5914 Int = Intrinsic::aarch64_neon_vcvtfp2fxu; break;
5915 }
5916 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvth_n");
5917 return Builder.CreateTrunc(Ops[0], Int16Ty);
5918 }
5919 case NEON::BI__builtin_neon_vcvth_n_f16_s16:
5920 case NEON::BI__builtin_neon_vcvth_n_f16_u16: {
5921 unsigned Int;
5922 llvm::Type* FTy = HalfTy;
5923 llvm::Type* InTy = Int32Ty;
5924 llvm::Type *Tys[2] = {FTy, InTy};
5925 Ops.push_back(EmitScalarExpr(E->getArg(1)));
5926 switch (BuiltinID) {
5927 default: llvm_unreachable("missing builtin ID in switch!");
5928 case NEON::BI__builtin_neon_vcvth_n_f16_s16:
5929 Int = Intrinsic::aarch64_neon_vcvtfxs2fp;
5930 Ops[0] = Builder.CreateSExt(Ops[0], InTy, "sext");
5931 break;
5932 case NEON::BI__builtin_neon_vcvth_n_f16_u16:
5933 Int = Intrinsic::aarch64_neon_vcvtfxu2fp;
5934 Ops[0] = Builder.CreateZExt(Ops[0], InTy);
5935 break;
5936 }
5937 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvth_n");
5938 }
5939 case NEON::BI__builtin_neon_vpaddd_s64: {
5940 auto *Ty = llvm::FixedVectorType::get(Int64Ty, 2);
5941 Value *Vec = EmitScalarExpr(E->getArg(0));
5942 // The vector is v2f64, so make sure it's bitcast to that.
5943 Vec = Builder.CreateBitCast(Vec, Ty, "v2i64");
5944 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
5945 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
5946 Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
5947 Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
5948 // Pairwise addition of a v2f64 into a scalar f64.
5949 return Builder.CreateAdd(Op0, Op1, "vpaddd");
5950 }
5951 case NEON::BI__builtin_neon_vpaddd_f64: {
5952 auto *Ty = llvm::FixedVectorType::get(DoubleTy, 2);
5953 Value *Vec = EmitScalarExpr(E->getArg(0));
5954 // The vector is v2f64, so make sure it's bitcast to that.
5955 Vec = Builder.CreateBitCast(Vec, Ty, "v2f64");
5956 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
5957 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
5958 Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
5959 Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
5960 // Pairwise addition of a v2f64 into a scalar f64.
5961 return Builder.CreateFAdd(Op0, Op1, "vpaddd");
5962 }
5963 case NEON::BI__builtin_neon_vpadds_f32: {
5964 auto *Ty = llvm::FixedVectorType::get(FloatTy, 2);
5965 Value *Vec = EmitScalarExpr(E->getArg(0));
5966 // The vector is v2f32, so make sure it's bitcast to that.
5967 Vec = Builder.CreateBitCast(Vec, Ty, "v2f32");
5968 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
5969 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
5970 Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
5971 Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
5972 // Pairwise addition of a v2f32 into a scalar f32.
5973 return Builder.CreateFAdd(Op0, Op1, "vpaddd");
5974 }
5975 case NEON::BI__builtin_neon_vceqzd_s64:
5976 Ops.push_back(EmitScalarExpr(E->getArg(0)));
5978 Ops[0], ConvertType(E->getCallReturnType(getContext())),
5979 ICmpInst::ICMP_EQ, "vceqz");
5980 case NEON::BI__builtin_neon_vceqzd_f64:
5981 case NEON::BI__builtin_neon_vceqzs_f32:
5982 case NEON::BI__builtin_neon_vceqzh_f16:
5983 Ops.push_back(EmitScalarExpr(E->getArg(0)));
5985 Ops[0], ConvertType(E->getCallReturnType(getContext())),
5986 ICmpInst::FCMP_OEQ, "vceqz");
5987 case NEON::BI__builtin_neon_vcgezd_s64:
5988 Ops.push_back(EmitScalarExpr(E->getArg(0)));
5990 Ops[0], ConvertType(E->getCallReturnType(getContext())),
5991 ICmpInst::ICMP_SGE, "vcgez");
5992 case NEON::BI__builtin_neon_vcgezd_f64:
5993 case NEON::BI__builtin_neon_vcgezs_f32:
5994 case NEON::BI__builtin_neon_vcgezh_f16:
5995 Ops.push_back(EmitScalarExpr(E->getArg(0)));
5997 Ops[0], ConvertType(E->getCallReturnType(getContext())),
5998 ICmpInst::FCMP_OGE, "vcgez");
5999 case NEON::BI__builtin_neon_vclezd_s64:
6000 Ops.push_back(EmitScalarExpr(E->getArg(0)));
6002 Ops[0], ConvertType(E->getCallReturnType(getContext())),
6003 ICmpInst::ICMP_SLE, "vclez");
6004 case NEON::BI__builtin_neon_vclezd_f64:
6005 case NEON::BI__builtin_neon_vclezs_f32:
6006 case NEON::BI__builtin_neon_vclezh_f16:
6007 Ops.push_back(EmitScalarExpr(E->getArg(0)));
6009 Ops[0], ConvertType(E->getCallReturnType(getContext())),
6010 ICmpInst::FCMP_OLE, "vclez");
6011 case NEON::BI__builtin_neon_vcgtzd_s64:
6012 Ops.push_back(EmitScalarExpr(E->getArg(0)));
6014 Ops[0], ConvertType(E->getCallReturnType(getContext())),
6015 ICmpInst::ICMP_SGT, "vcgtz");
6016 case NEON::BI__builtin_neon_vcgtzd_f64:
6017 case NEON::BI__builtin_neon_vcgtzs_f32:
6018 case NEON::BI__builtin_neon_vcgtzh_f16:
6019 Ops.push_back(EmitScalarExpr(E->getArg(0)));
6021 Ops[0], ConvertType(E->getCallReturnType(getContext())),
6022 ICmpInst::FCMP_OGT, "vcgtz");
6023 case NEON::BI__builtin_neon_vcltzd_s64:
6024 Ops.push_back(EmitScalarExpr(E->getArg(0)));
6026 Ops[0], ConvertType(E->getCallReturnType(getContext())),
6027 ICmpInst::ICMP_SLT, "vcltz");
6028
6029 case NEON::BI__builtin_neon_vcltzd_f64:
6030 case NEON::BI__builtin_neon_vcltzs_f32:
6031 case NEON::BI__builtin_neon_vcltzh_f16:
6032 Ops.push_back(EmitScalarExpr(E->getArg(0)));
6034 Ops[0], ConvertType(E->getCallReturnType(getContext())),
6035 ICmpInst::FCMP_OLT, "vcltz");
6036
6037 case NEON::BI__builtin_neon_vceqzd_u64: {
6038 Ops.push_back(EmitScalarExpr(E->getArg(0)));
6039 Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
6040 Ops[0] =
6041 Builder.CreateICmpEQ(Ops[0], llvm::Constant::getNullValue(Int64Ty));
6042 return Builder.CreateSExt(Ops[0], Int64Ty, "vceqzd");
6043 }
6044 case NEON::BI__builtin_neon_vceqd_f64:
6045 case NEON::BI__builtin_neon_vcled_f64:
6046 case NEON::BI__builtin_neon_vcltd_f64:
6047 case NEON::BI__builtin_neon_vcged_f64:
6048 case NEON::BI__builtin_neon_vcgtd_f64: {
6049 llvm::CmpInst::Predicate P;
6050 switch (BuiltinID) {
6051 default: llvm_unreachable("missing builtin ID in switch!");
6052 case NEON::BI__builtin_neon_vceqd_f64: P = llvm::FCmpInst::FCMP_OEQ; break;
6053 case NEON::BI__builtin_neon_vcled_f64: P = llvm::FCmpInst::FCMP_OLE; break;
6054 case NEON::BI__builtin_neon_vcltd_f64: P = llvm::FCmpInst::FCMP_OLT; break;
6055 case NEON::BI__builtin_neon_vcged_f64: P = llvm::FCmpInst::FCMP_OGE; break;
6056 case NEON::BI__builtin_neon_vcgtd_f64: P = llvm::FCmpInst::FCMP_OGT; break;
6057 }
6058 Ops.push_back(EmitScalarExpr(E->getArg(1)));
6059 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
6060 Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
6061 if (P == llvm::FCmpInst::FCMP_OEQ)
6062 Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
6063 else
6064 Ops[0] = Builder.CreateFCmpS(P, Ops[0], Ops[1]);
6065 return Builder.CreateSExt(Ops[0], Int64Ty, "vcmpd");
6066 }
6067 case NEON::BI__builtin_neon_vceqs_f32:
6068 case NEON::BI__builtin_neon_vcles_f32:
6069 case NEON::BI__builtin_neon_vclts_f32:
6070 case NEON::BI__builtin_neon_vcges_f32:
6071 case NEON::BI__builtin_neon_vcgts_f32: {
6072 llvm::CmpInst::Predicate P;
6073 switch (BuiltinID) {
6074 default: llvm_unreachable("missing builtin ID in switch!");
6075 case NEON::BI__builtin_neon_vceqs_f32: P = llvm::FCmpInst::FCMP_OEQ; break;
6076 case NEON::BI__builtin_neon_vcles_f32: P = llvm::FCmpInst::FCMP_OLE; break;
6077 case NEON::BI__builtin_neon_vclts_f32: P = llvm::FCmpInst::FCMP_OLT; break;
6078 case NEON::BI__builtin_neon_vcges_f32: P = llvm::FCmpInst::FCMP_OGE; break;
6079 case NEON::BI__builtin_neon_vcgts_f32: P = llvm::FCmpInst::FCMP_OGT; break;
6080 }
6081 Ops.push_back(EmitScalarExpr(E->getArg(1)));
6082 Ops[0] = Builder.CreateBitCast(Ops[0], FloatTy);
6083 Ops[1] = Builder.CreateBitCast(Ops[1], FloatTy);
6084 if (P == llvm::FCmpInst::FCMP_OEQ)
6085 Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
6086 else
6087 Ops[0] = Builder.CreateFCmpS(P, Ops[0], Ops[1]);
6088 return Builder.CreateSExt(Ops[0], Int32Ty, "vcmpd");
6089 }
6090 case NEON::BI__builtin_neon_vceqh_f16:
6091 case NEON::BI__builtin_neon_vcleh_f16:
6092 case NEON::BI__builtin_neon_vclth_f16:
6093 case NEON::BI__builtin_neon_vcgeh_f16:
6094 case NEON::BI__builtin_neon_vcgth_f16: {
6095 llvm::CmpInst::Predicate P;
6096 switch (BuiltinID) {
6097 default: llvm_unreachable("missing builtin ID in switch!");
6098 case NEON::BI__builtin_neon_vceqh_f16: P = llvm::FCmpInst::FCMP_OEQ; break;
6099 case NEON::BI__builtin_neon_vcleh_f16: P = llvm::FCmpInst::FCMP_OLE; break;
6100 case NEON::BI__builtin_neon_vclth_f16: P = llvm::FCmpInst::FCMP_OLT; break;
6101 case NEON::BI__builtin_neon_vcgeh_f16: P = llvm::FCmpInst::FCMP_OGE; break;
6102 case NEON::BI__builtin_neon_vcgth_f16: P = llvm::FCmpInst::FCMP_OGT; break;
6103 }
6104 Ops.push_back(EmitScalarExpr(E->getArg(1)));
6105 Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy);
6106 Ops[1] = Builder.CreateBitCast(Ops[1], HalfTy);
6107 if (P == llvm::FCmpInst::FCMP_OEQ)
6108 Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
6109 else
6110 Ops[0] = Builder.CreateFCmpS(P, Ops[0], Ops[1]);
6111 return Builder.CreateSExt(Ops[0], Int16Ty, "vcmpd");
6112 }
6113 case NEON::BI__builtin_neon_vceqd_s64:
6114 case NEON::BI__builtin_neon_vceqd_u64:
6115 case NEON::BI__builtin_neon_vcgtd_s64:
6116 case NEON::BI__builtin_neon_vcgtd_u64:
6117 case NEON::BI__builtin_neon_vcltd_s64:
6118 case NEON::BI__builtin_neon_vcltd_u64:
6119 case NEON::BI__builtin_neon_vcged_u64:
6120 case NEON::BI__builtin_neon_vcged_s64:
6121 case NEON::BI__builtin_neon_vcled_u64:
6122 case NEON::BI__builtin_neon_vcled_s64: {
6123 llvm::CmpInst::Predicate P;
6124 switch (BuiltinID) {
6125 default: llvm_unreachable("missing builtin ID in switch!");
6126 case NEON::BI__builtin_neon_vceqd_s64:
6127 case NEON::BI__builtin_neon_vceqd_u64:P = llvm::ICmpInst::ICMP_EQ;break;
6128 case NEON::BI__builtin_neon_vcgtd_s64:P = llvm::ICmpInst::ICMP_SGT;break;
6129 case NEON::BI__builtin_neon_vcgtd_u64:P = llvm::ICmpInst::ICMP_UGT;break;
6130 case NEON::BI__builtin_neon_vcltd_s64:P = llvm::ICmpInst::ICMP_SLT;break;
6131 case NEON::BI__builtin_neon_vcltd_u64:P = llvm::ICmpInst::ICMP_ULT;break;
6132 case NEON::BI__builtin_neon_vcged_u64:P = llvm::ICmpInst::ICMP_UGE;break;
6133 case NEON::BI__builtin_neon_vcged_s64:P = llvm::ICmpInst::ICMP_SGE;break;
6134 case NEON::BI__builtin_neon_vcled_u64:P = llvm::ICmpInst::ICMP_ULE;break;
6135 case NEON::BI__builtin_neon_vcled_s64:P = llvm::ICmpInst::ICMP_SLE;break;
6136 }
6137 Ops.push_back(EmitScalarExpr(E->getArg(1)));
6138 Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
6139 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
6140 Ops[0] = Builder.CreateICmp(P, Ops[0], Ops[1]);
6141 return Builder.CreateSExt(Ops[0], Int64Ty, "vceqd");
6142 }
6143 case NEON::BI__builtin_neon_vtstd_s64:
6144 case NEON::BI__builtin_neon_vtstd_u64: {
6145 Ops.push_back(EmitScalarExpr(E->getArg(1)));
6146 Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
6147 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
6148 Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
6149 Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
6150 llvm::Constant::getNullValue(Int64Ty));
6151 return Builder.CreateSExt(Ops[0], Int64Ty, "vtstd");
6152 }
6153 case NEON::BI__builtin_neon_vset_lane_i8:
6154 case NEON::BI__builtin_neon_vset_lane_i16:
6155 case NEON::BI__builtin_neon_vset_lane_i32:
6156 case NEON::BI__builtin_neon_vset_lane_i64:
6157 case NEON::BI__builtin_neon_vset_lane_bf16:
6158 case NEON::BI__builtin_neon_vset_lane_f32:
6159 case NEON::BI__builtin_neon_vsetq_lane_i8:
6160 case NEON::BI__builtin_neon_vsetq_lane_i16:
6161 case NEON::BI__builtin_neon_vsetq_lane_i32:
6162 case NEON::BI__builtin_neon_vsetq_lane_i64:
6163 case NEON::BI__builtin_neon_vsetq_lane_bf16:
6164 case NEON::BI__builtin_neon_vsetq_lane_f32:
6165 Ops.push_back(EmitScalarExpr(E->getArg(2)));
6166 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
6167 case NEON::BI__builtin_neon_vset_lane_f64:
6168 // The vector type needs a cast for the v1f64 variant.
6169 Ops[1] =
6170 Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(DoubleTy, 1));
6171 Ops.push_back(EmitScalarExpr(E->getArg(2)));
6172 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
6173 case NEON::BI__builtin_neon_vset_lane_mf8:
6174 case NEON::BI__builtin_neon_vsetq_lane_mf8:
6175 Ops.push_back(EmitScalarExpr(E->getArg(2)));
6176 // The input vector type needs a cast to scalar type.
6177 Ops[0] =
6178 Builder.CreateBitCast(Ops[0], llvm::Type::getInt8Ty(getLLVMContext()));
6179 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
6180 case NEON::BI__builtin_neon_vsetq_lane_f64:
6181 // The vector type needs a cast for the v2f64 variant.
6182 Ops[1] =
6183 Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(DoubleTy, 2));
6184 Ops.push_back(EmitScalarExpr(E->getArg(2)));
6185 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
6186
6187 case NEON::BI__builtin_neon_vget_lane_i8:
6188 case NEON::BI__builtin_neon_vdupb_lane_i8:
6189 Ops[0] =
6190 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int8Ty, 8));
6191 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6192 "vget_lane");
6193 case NEON::BI__builtin_neon_vgetq_lane_i8:
6194 case NEON::BI__builtin_neon_vdupb_laneq_i8:
6195 Ops[0] =
6196 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int8Ty, 16));
6197 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6198 "vgetq_lane");
6199 case NEON::BI__builtin_neon_vget_lane_mf8:
6200 case NEON::BI__builtin_neon_vdupb_lane_mf8:
6201 case NEON::BI__builtin_neon_vgetq_lane_mf8:
6202 case NEON::BI__builtin_neon_vdupb_laneq_mf8:
6203 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6204 "vget_lane");
6205 case NEON::BI__builtin_neon_vget_lane_i16:
6206 case NEON::BI__builtin_neon_vduph_lane_i16:
6207 Ops[0] =
6208 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int16Ty, 4));
6209 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6210 "vget_lane");
6211 case NEON::BI__builtin_neon_vgetq_lane_i16:
6212 case NEON::BI__builtin_neon_vduph_laneq_i16:
6213 Ops[0] =
6214 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int16Ty, 8));
6215 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6216 "vgetq_lane");
6217 case NEON::BI__builtin_neon_vget_lane_i32:
6218 case NEON::BI__builtin_neon_vdups_lane_i32:
6219 Ops[0] =
6220 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int32Ty, 2));
6221 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6222 "vget_lane");
6223 case NEON::BI__builtin_neon_vdups_lane_f32:
6224 Ops[0] =
6225 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(FloatTy, 2));
6226 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6227 "vdups_lane");
6228 case NEON::BI__builtin_neon_vgetq_lane_i32:
6229 case NEON::BI__builtin_neon_vdups_laneq_i32:
6230 Ops[0] =
6231 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int32Ty, 4));
6232 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6233 "vgetq_lane");
6234 case NEON::BI__builtin_neon_vget_lane_i64:
6235 case NEON::BI__builtin_neon_vdupd_lane_i64:
6236 Ops[0] =
6237 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int64Ty, 1));
6238 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6239 "vget_lane");
6240 case NEON::BI__builtin_neon_vdupd_lane_f64:
6241 Ops[0] =
6242 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(DoubleTy, 1));
6243 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6244 "vdupd_lane");
6245 case NEON::BI__builtin_neon_vgetq_lane_i64:
6246 case NEON::BI__builtin_neon_vdupd_laneq_i64:
6247 Ops[0] =
6248 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int64Ty, 2));
6249 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6250 "vgetq_lane");
6251 case NEON::BI__builtin_neon_vget_lane_f32:
6252 Ops[0] =
6253 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(FloatTy, 2));
6254 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6255 "vget_lane");
6256 case NEON::BI__builtin_neon_vget_lane_f64:
6257 Ops[0] =
6258 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(DoubleTy, 1));
6259 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6260 "vget_lane");
6261 case NEON::BI__builtin_neon_vgetq_lane_f32:
6262 case NEON::BI__builtin_neon_vdups_laneq_f32:
6263 Ops[0] =
6264 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(FloatTy, 4));
6265 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6266 "vgetq_lane");
6267 case NEON::BI__builtin_neon_vgetq_lane_f64:
6268 case NEON::BI__builtin_neon_vdupd_laneq_f64:
6269 Ops[0] =
6270 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(DoubleTy, 2));
6271 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6272 "vgetq_lane");
6273 case NEON::BI__builtin_neon_vaddh_f16:
6274 Ops.push_back(EmitScalarExpr(E->getArg(1)));
6275 return Builder.CreateFAdd(Ops[0], Ops[1], "vaddh");
6276 case NEON::BI__builtin_neon_vsubh_f16:
6277 Ops.push_back(EmitScalarExpr(E->getArg(1)));
6278 return Builder.CreateFSub(Ops[0], Ops[1], "vsubh");
6279 case NEON::BI__builtin_neon_vmulh_f16:
6280 Ops.push_back(EmitScalarExpr(E->getArg(1)));
6281 return Builder.CreateFMul(Ops[0], Ops[1], "vmulh");
6282 case NEON::BI__builtin_neon_vdivh_f16:
6283 Ops.push_back(EmitScalarExpr(E->getArg(1)));
6284 return Builder.CreateFDiv(Ops[0], Ops[1], "vdivh");
6285 case NEON::BI__builtin_neon_vfmah_f16:
6286 // NEON intrinsic puts accumulator first, unlike the LLVM fma.
6288 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, HalfTy,
6289 {EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2)), Ops[0]});
6290 case NEON::BI__builtin_neon_vfmsh_f16: {
6291 Value* Neg = Builder.CreateFNeg(EmitScalarExpr(E->getArg(1)), "vsubh");
6292
6293 // NEON intrinsic puts accumulator first, unlike the LLVM fma.
6295 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, HalfTy,
6296 {Neg, EmitScalarExpr(E->getArg(2)), Ops[0]});
6297 }
6298 case NEON::BI__builtin_neon_vaddd_s64:
6299 case NEON::BI__builtin_neon_vaddd_u64:
6300 return Builder.CreateAdd(Ops[0], EmitScalarExpr(E->getArg(1)), "vaddd");
6301 case NEON::BI__builtin_neon_vsubd_s64:
6302 case NEON::BI__builtin_neon_vsubd_u64:
6303 return Builder.CreateSub(Ops[0], EmitScalarExpr(E->getArg(1)), "vsubd");
6304 case NEON::BI__builtin_neon_vqdmlalh_s16:
6305 case NEON::BI__builtin_neon_vqdmlslh_s16: {
6306 SmallVector<Value *, 2> ProductOps;
6307 ProductOps.push_back(vectorWrapScalar16(Ops[1]));
6308 ProductOps.push_back(vectorWrapScalar16(EmitScalarExpr(E->getArg(2))));
6309 auto *VTy = llvm::FixedVectorType::get(Int32Ty, 4);
6310 Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
6311 ProductOps, "vqdmlXl");
6312 Constant *CI = ConstantInt::get(SizeTy, 0);
6313 Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
6314
6315 unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlalh_s16
6316 ? Intrinsic::aarch64_neon_sqadd
6317 : Intrinsic::aarch64_neon_sqsub;
6318 return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int32Ty), Ops, "vqdmlXl");
6319 }
6320 case NEON::BI__builtin_neon_vqshlud_n_s64: {
6321 Ops.push_back(EmitScalarExpr(E->getArg(1)));
6322 Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
6323 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqshlu, Int64Ty),
6324 Ops, "vqshlu_n");
6325 }
6326 case NEON::BI__builtin_neon_vqshld_n_u64:
6327 case NEON::BI__builtin_neon_vqshld_n_s64: {
6328 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vqshld_n_u64
6329 ? Intrinsic::aarch64_neon_uqshl
6330 : Intrinsic::aarch64_neon_sqshl;
6331 Ops.push_back(EmitScalarExpr(E->getArg(1)));
6332 Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
6333 return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vqshl_n");
6334 }
6335 case NEON::BI__builtin_neon_vrshrd_n_u64:
6336 case NEON::BI__builtin_neon_vrshrd_n_s64: {
6337 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrshrd_n_u64
6338 ? Intrinsic::aarch64_neon_urshl
6339 : Intrinsic::aarch64_neon_srshl;
6340 Ops.push_back(EmitScalarExpr(E->getArg(1)));
6341 int SV = cast<ConstantInt>(Ops[1])->getSExtValue();
6342 Ops[1] = ConstantInt::get(Int64Ty, -SV);
6343 return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vrshr_n");
6344 }
6345 case NEON::BI__builtin_neon_vrsrad_n_u64:
6346 case NEON::BI__builtin_neon_vrsrad_n_s64: {
6347 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrsrad_n_u64
6348 ? Intrinsic::aarch64_neon_urshl
6349 : Intrinsic::aarch64_neon_srshl;
6350 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
6351 Ops.push_back(Builder.CreateNeg(EmitScalarExpr(E->getArg(2))));
6352 Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Int64Ty),
6353 {Ops[1], Builder.CreateSExt(Ops[2], Int64Ty)});
6354 return Builder.CreateAdd(Ops[0], Builder.CreateBitCast(Ops[1], Int64Ty));
6355 }
6356 case NEON::BI__builtin_neon_vshld_n_s64:
6357 case NEON::BI__builtin_neon_vshld_n_u64: {
6358 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
6359 return Builder.CreateShl(
6360 Ops[0], ConstantInt::get(Int64Ty, Amt->getZExtValue()), "shld_n");
6361 }
6362 case NEON::BI__builtin_neon_vshrd_n_s64: {
6363 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
6364 return Builder.CreateAShr(
6365 Ops[0], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
6366 Amt->getZExtValue())),
6367 "shrd_n");
6368 }
6369 case NEON::BI__builtin_neon_vshrd_n_u64: {
6370 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
6371 uint64_t ShiftAmt = Amt->getZExtValue();
6372 // Right-shifting an unsigned value by its size yields 0.
6373 if (ShiftAmt == 64)
6374 return ConstantInt::get(Int64Ty, 0);
6375 return Builder.CreateLShr(Ops[0], ConstantInt::get(Int64Ty, ShiftAmt),
6376 "shrd_n");
6377 }
6378 case NEON::BI__builtin_neon_vsrad_n_s64: {
6379 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));
6380 Ops[1] = Builder.CreateAShr(
6381 Ops[1], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
6382 Amt->getZExtValue())),
6383 "shrd_n");
6384 return Builder.CreateAdd(Ops[0], Ops[1]);
6385 }
6386 case NEON::BI__builtin_neon_vsrad_n_u64: {
6387 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));
6388 uint64_t ShiftAmt = Amt->getZExtValue();
6389 // Right-shifting an unsigned value by its size yields 0.
6390 // As Op + 0 = Op, return Ops[0] directly.
6391 if (ShiftAmt == 64)
6392 return Ops[0];
6393 Ops[1] = Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, ShiftAmt),
6394 "shrd_n");
6395 return Builder.CreateAdd(Ops[0], Ops[1]);
6396 }
6397 case NEON::BI__builtin_neon_vqdmlalh_lane_s16:
6398 case NEON::BI__builtin_neon_vqdmlalh_laneq_s16:
6399 case NEON::BI__builtin_neon_vqdmlslh_lane_s16:
6400 case NEON::BI__builtin_neon_vqdmlslh_laneq_s16: {
6401 Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)),
6402 "lane");
6403 SmallVector<Value *, 2> ProductOps;
6404 ProductOps.push_back(vectorWrapScalar16(Ops[1]));
6405 ProductOps.push_back(vectorWrapScalar16(Ops[2]));
6406 auto *VTy = llvm::FixedVectorType::get(Int32Ty, 4);
6407 Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
6408 ProductOps, "vqdmlXl");
6409 Constant *CI = ConstantInt::get(SizeTy, 0);
6410 Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
6411 Ops.pop_back();
6412
6413 unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlalh_lane_s16 ||
6414 BuiltinID == NEON::BI__builtin_neon_vqdmlalh_laneq_s16)
6415 ? Intrinsic::aarch64_neon_sqadd
6416 : Intrinsic::aarch64_neon_sqsub;
6417 return EmitNeonCall(CGM.getIntrinsic(AccInt, Int32Ty), Ops, "vqdmlXl");
6418 }
6419 case NEON::BI__builtin_neon_vqdmlals_s32:
6420 case NEON::BI__builtin_neon_vqdmlsls_s32: {
6421 SmallVector<Value *, 2> ProductOps;
6422 ProductOps.push_back(Ops[1]);
6423 ProductOps.push_back(EmitScalarExpr(E->getArg(2)));
6424 Ops[1] =
6425 EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
6426 ProductOps, "vqdmlXl");
6427
6428 unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlals_s32
6429 ? Intrinsic::aarch64_neon_sqadd
6430 : Intrinsic::aarch64_neon_sqsub;
6431 return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int64Ty), Ops, "vqdmlXl");
6432 }
6433 case NEON::BI__builtin_neon_vqdmlals_lane_s32:
6434 case NEON::BI__builtin_neon_vqdmlals_laneq_s32:
6435 case NEON::BI__builtin_neon_vqdmlsls_lane_s32:
6436 case NEON::BI__builtin_neon_vqdmlsls_laneq_s32: {
6437 Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)),
6438 "lane");
6439 SmallVector<Value *, 2> ProductOps;
6440 ProductOps.push_back(Ops[1]);
6441 ProductOps.push_back(Ops[2]);
6442 Ops[1] =
6443 EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
6444 ProductOps, "vqdmlXl");
6445 Ops.pop_back();
6446
6447 unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlals_lane_s32 ||
6448 BuiltinID == NEON::BI__builtin_neon_vqdmlals_laneq_s32)
6449 ? Intrinsic::aarch64_neon_sqadd
6450 : Intrinsic::aarch64_neon_sqsub;
6451 return EmitNeonCall(CGM.getIntrinsic(AccInt, Int64Ty), Ops, "vqdmlXl");
6452 }
6453 case NEON::BI__builtin_neon_vget_lane_bf16:
6454 case NEON::BI__builtin_neon_vduph_lane_bf16:
6455 case NEON::BI__builtin_neon_vduph_lane_f16: {
6456 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6457 "vget_lane");
6458 }
6459 case NEON::BI__builtin_neon_vgetq_lane_bf16:
6460 case NEON::BI__builtin_neon_vduph_laneq_bf16:
6461 case NEON::BI__builtin_neon_vduph_laneq_f16: {
6462 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6463 "vgetq_lane");
6464 }
6465 case NEON::BI__builtin_neon_vcvt_bf16_f32: {
6466 llvm::Type *V4F32 = FixedVectorType::get(Builder.getFloatTy(), 4);
6467 llvm::Type *V4BF16 = FixedVectorType::get(Builder.getBFloatTy(), 4);
6468 return Builder.CreateFPTrunc(Builder.CreateBitCast(Ops[0], V4F32), V4BF16);
6469 }
6470 case NEON::BI__builtin_neon_vcvtq_low_bf16_f32: {
6471 SmallVector<int, 16> ConcatMask(8);
6472 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
6473 llvm::Type *V4F32 = FixedVectorType::get(Builder.getFloatTy(), 4);
6474 llvm::Type *V4BF16 = FixedVectorType::get(Builder.getBFloatTy(), 4);
6475 llvm::Value *Trunc =
6476 Builder.CreateFPTrunc(Builder.CreateBitCast(Ops[0], V4F32), V4BF16);
6477 return Builder.CreateShuffleVector(
6478 Trunc, ConstantAggregateZero::get(V4BF16), ConcatMask);
6479 }
6480 case NEON::BI__builtin_neon_vcvtq_high_bf16_f32: {
6481 SmallVector<int, 16> ConcatMask(8);
6482 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
6483 SmallVector<int, 16> LoMask(4);
6484 std::iota(LoMask.begin(), LoMask.end(), 0);
6485 llvm::Type *V4F32 = FixedVectorType::get(Builder.getFloatTy(), 4);
6486 llvm::Type *V4BF16 = FixedVectorType::get(Builder.getBFloatTy(), 4);
6487 llvm::Type *V8BF16 = FixedVectorType::get(Builder.getBFloatTy(), 8);
6488 llvm::Value *Inactive = Builder.CreateShuffleVector(
6489 Builder.CreateBitCast(Ops[0], V8BF16), LoMask);
6490 llvm::Value *Trunc =
6491 Builder.CreateFPTrunc(Builder.CreateBitCast(Ops[1], V4F32), V4BF16);
6492 return Builder.CreateShuffleVector(Inactive, Trunc, ConcatMask);
6493 }
6494
6495 case clang::AArch64::BI_InterlockedAdd:
6496 case clang::AArch64::BI_InterlockedAdd_acq:
6497 case clang::AArch64::BI_InterlockedAdd_rel:
6498 case clang::AArch64::BI_InterlockedAdd_nf:
6499 case clang::AArch64::BI_InterlockedAdd64:
6500 case clang::AArch64::BI_InterlockedAdd64_acq:
6501 case clang::AArch64::BI_InterlockedAdd64_rel:
6502 case clang::AArch64::BI_InterlockedAdd64_nf: {
6503 Address DestAddr = CheckAtomicAlignment(*this, E);
6504 Value *Val = EmitScalarExpr(E->getArg(1));
6505 llvm::AtomicOrdering Ordering;
6506 switch (BuiltinID) {
6507 case clang::AArch64::BI_InterlockedAdd:
6508 case clang::AArch64::BI_InterlockedAdd64:
6509 Ordering = llvm::AtomicOrdering::SequentiallyConsistent;
6510 break;
6511 case clang::AArch64::BI_InterlockedAdd_acq:
6512 case clang::AArch64::BI_InterlockedAdd64_acq:
6513 Ordering = llvm::AtomicOrdering::Acquire;
6514 break;
6515 case clang::AArch64::BI_InterlockedAdd_rel:
6516 case clang::AArch64::BI_InterlockedAdd64_rel:
6517 Ordering = llvm::AtomicOrdering::Release;
6518 break;
6519 case clang::AArch64::BI_InterlockedAdd_nf:
6520 case clang::AArch64::BI_InterlockedAdd64_nf:
6521 Ordering = llvm::AtomicOrdering::Monotonic;
6522 break;
6523 default:
6524 llvm_unreachable("missing builtin ID in switch!");
6525 }
6526 AtomicRMWInst *RMWI =
6527 Builder.CreateAtomicRMW(AtomicRMWInst::Add, DestAddr, Val, Ordering);
6528 return Builder.CreateAdd(RMWI, Val);
6529 }
6530 }
6531
6532 llvm::FixedVectorType *VTy = GetNeonType(this, Type);
6533 llvm::Type *Ty = VTy;
6534 if (!Ty)
6535 return nullptr;
6536
6537 // Not all intrinsics handled by the common case work for AArch64 yet, so only
6538 // defer to common code if it's been added to our special map.
6541
6542 if (Builtin)
6544 Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
6545 Builtin->NameHint, Builtin->TypeModifier, E, Ops,
6546 /*never use addresses*/ Address::invalid(), Address::invalid(), Arch);
6547
6548 if (Value *V = EmitAArch64TblBuiltinExpr(*this, BuiltinID, E, Ops, Arch))
6549 return V;
6550
6551 unsigned Int;
6552 bool ExtractLow = false;
6553 bool ExtendLaneArg = false;
6554 switch (BuiltinID) {
6555 default: return nullptr;
6556 case NEON::BI__builtin_neon_vbsl_v:
6557 case NEON::BI__builtin_neon_vbslq_v: {
6558 llvm::Type *BitTy = llvm::VectorType::getInteger(VTy);
6559 Ops[0] = Builder.CreateBitCast(Ops[0], BitTy, "vbsl");
6560 Ops[1] = Builder.CreateBitCast(Ops[1], BitTy, "vbsl");
6561 Ops[2] = Builder.CreateBitCast(Ops[2], BitTy, "vbsl");
6562
6563 Ops[1] = Builder.CreateAnd(Ops[0], Ops[1], "vbsl");
6564 Ops[2] = Builder.CreateAnd(Builder.CreateNot(Ops[0]), Ops[2], "vbsl");
6565 Ops[0] = Builder.CreateOr(Ops[1], Ops[2], "vbsl");
6566 return Builder.CreateBitCast(Ops[0], Ty);
6567 }
6568 case NEON::BI__builtin_neon_vfma_lane_v:
6569 case NEON::BI__builtin_neon_vfmaq_lane_v: { // Only used for FP types
6570 // The ARM builtins (and instructions) have the addend as the first
6571 // operand, but the 'fma' intrinsics have it last. Swap it around here.
6572 Value *Addend = Ops[0];
6573 Value *Multiplicand = Ops[1];
6574 Value *LaneSource = Ops[2];
6575 Ops[0] = Multiplicand;
6576 Ops[1] = LaneSource;
6577 Ops[2] = Addend;
6578
6579 // Now adjust things to handle the lane access.
6580 auto *SourceTy = BuiltinID == NEON::BI__builtin_neon_vfmaq_lane_v
6581 ? llvm::FixedVectorType::get(VTy->getElementType(),
6582 VTy->getNumElements() / 2)
6583 : VTy;
6584 llvm::Constant *cst = cast<Constant>(Ops[3]);
6585 Value *SV = llvm::ConstantVector::getSplat(VTy->getElementCount(), cst);
6586 Ops[1] = Builder.CreateBitCast(Ops[1], SourceTy);
6587 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV, "lane");
6588
6589 Ops.pop_back();
6590 Int = Builder.getIsFPConstrained() ? Intrinsic::experimental_constrained_fma
6591 : Intrinsic::fma;
6592 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fmla");
6593 }
6594 case NEON::BI__builtin_neon_vfma_laneq_v: {
6595 auto *VTy = cast<llvm::FixedVectorType>(Ty);
6596 // v1f64 fma should be mapped to Neon scalar f64 fma
6597 if (VTy && VTy->getElementType() == DoubleTy) {
6598 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
6599 Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
6600 llvm::FixedVectorType *VTy =
6602 Ops[2] = Builder.CreateBitCast(Ops[2], VTy);
6603 Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
6604 Value *Result;
6606 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma,
6607 DoubleTy, {Ops[1], Ops[2], Ops[0]});
6608 return Builder.CreateBitCast(Result, Ty);
6609 }
6610 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6611 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6612
6613 auto *STy = llvm::FixedVectorType::get(VTy->getElementType(),
6614 VTy->getNumElements() * 2);
6615 Ops[2] = Builder.CreateBitCast(Ops[2], STy);
6616 Value *SV = llvm::ConstantVector::getSplat(VTy->getElementCount(),
6617 cast<ConstantInt>(Ops[3]));
6618 Ops[2] = Builder.CreateShuffleVector(Ops[2], Ops[2], SV, "lane");
6619
6621 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
6622 {Ops[2], Ops[1], Ops[0]});
6623 }
6624 case NEON::BI__builtin_neon_vfmaq_laneq_v: {
6625 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6626 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6627
6628 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6629 Ops[2] = EmitNeonSplat(Ops[2], cast<ConstantInt>(Ops[3]));
6631 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
6632 {Ops[2], Ops[1], Ops[0]});
6633 }
6634 case NEON::BI__builtin_neon_vfmah_lane_f16:
6635 case NEON::BI__builtin_neon_vfmas_lane_f32:
6636 case NEON::BI__builtin_neon_vfmah_laneq_f16:
6637 case NEON::BI__builtin_neon_vfmas_laneq_f32:
6638 case NEON::BI__builtin_neon_vfmad_lane_f64:
6639 case NEON::BI__builtin_neon_vfmad_laneq_f64: {
6640 Ops.push_back(EmitScalarExpr(E->getArg(3)));
6641 llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
6642 Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
6644 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
6645 {Ops[1], Ops[2], Ops[0]});
6646 }
6647 case NEON::BI__builtin_neon_vmull_v:
6648 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6649 Int = usgn ? Intrinsic::aarch64_neon_umull : Intrinsic::aarch64_neon_smull;
6650 if (Type.isPoly()) Int = Intrinsic::aarch64_neon_pmull;
6651 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
6652 case NEON::BI__builtin_neon_vmax_v:
6653 case NEON::BI__builtin_neon_vmaxq_v:
6654 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6655 Int = usgn ? Intrinsic::aarch64_neon_umax : Intrinsic::aarch64_neon_smax;
6656 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmax;
6657 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmax");
6658 case NEON::BI__builtin_neon_vmaxh_f16: {
6659 Ops.push_back(EmitScalarExpr(E->getArg(1)));
6660 Int = Intrinsic::aarch64_neon_fmax;
6661 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmax");
6662 }
6663 case NEON::BI__builtin_neon_vmin_v:
6664 case NEON::BI__builtin_neon_vminq_v:
6665 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6666 Int = usgn ? Intrinsic::aarch64_neon_umin : Intrinsic::aarch64_neon_smin;
6667 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmin;
6668 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmin");
6669 case NEON::BI__builtin_neon_vminh_f16: {
6670 Ops.push_back(EmitScalarExpr(E->getArg(1)));
6671 Int = Intrinsic::aarch64_neon_fmin;
6672 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmin");
6673 }
6674 case NEON::BI__builtin_neon_vabd_v:
6675 case NEON::BI__builtin_neon_vabdq_v:
6676 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6677 Int = usgn ? Intrinsic::aarch64_neon_uabd : Intrinsic::aarch64_neon_sabd;
6678 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fabd;
6679 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vabd");
6680 case NEON::BI__builtin_neon_vpadal_v:
6681 case NEON::BI__builtin_neon_vpadalq_v: {
6682 unsigned ArgElts = VTy->getNumElements();
6683 llvm::IntegerType *EltTy = cast<IntegerType>(VTy->getElementType());
6684 unsigned BitWidth = EltTy->getBitWidth();
6685 auto *ArgTy = llvm::FixedVectorType::get(
6686 llvm::IntegerType::get(getLLVMContext(), BitWidth / 2), 2 * ArgElts);
6687 llvm::Type* Tys[2] = { VTy, ArgTy };
6688 Int = usgn ? Intrinsic::aarch64_neon_uaddlp : Intrinsic::aarch64_neon_saddlp;
6690 TmpOps.push_back(Ops[1]);
6691 Function *F = CGM.getIntrinsic(Int, Tys);
6692 llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vpadal");
6693 llvm::Value *addend = Builder.CreateBitCast(Ops[0], tmp->getType());
6694 return Builder.CreateAdd(tmp, addend);
6695 }
6696 case NEON::BI__builtin_neon_vpmin_v:
6697 case NEON::BI__builtin_neon_vpminq_v:
6698 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6699 Int = usgn ? Intrinsic::aarch64_neon_uminp : Intrinsic::aarch64_neon_sminp;
6700 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fminp;
6701 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmin");
6702 case NEON::BI__builtin_neon_vpmax_v:
6703 case NEON::BI__builtin_neon_vpmaxq_v:
6704 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6705 Int = usgn ? Intrinsic::aarch64_neon_umaxp : Intrinsic::aarch64_neon_smaxp;
6706 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmaxp;
6707 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmax");
6708 case NEON::BI__builtin_neon_vminnm_v:
6709 case NEON::BI__builtin_neon_vminnmq_v:
6710 Int = Intrinsic::aarch64_neon_fminnm;
6711 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vminnm");
6712 case NEON::BI__builtin_neon_vminnmh_f16:
6713 Ops.push_back(EmitScalarExpr(E->getArg(1)));
6714 Int = Intrinsic::aarch64_neon_fminnm;
6715 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vminnm");
6716 case NEON::BI__builtin_neon_vmaxnm_v:
6717 case NEON::BI__builtin_neon_vmaxnmq_v:
6718 Int = Intrinsic::aarch64_neon_fmaxnm;
6719 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmaxnm");
6720 case NEON::BI__builtin_neon_vmaxnmh_f16:
6721 Ops.push_back(EmitScalarExpr(E->getArg(1)));
6722 Int = Intrinsic::aarch64_neon_fmaxnm;
6723 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmaxnm");
6724 case NEON::BI__builtin_neon_vrecpss_f32: {
6725 Ops.push_back(EmitScalarExpr(E->getArg(1)));
6726 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, FloatTy),
6727 Ops, "vrecps");
6728 }
6729 case NEON::BI__builtin_neon_vrecpsd_f64:
6730 Ops.push_back(EmitScalarExpr(E->getArg(1)));
6731 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, DoubleTy),
6732 Ops, "vrecps");
6733 case NEON::BI__builtin_neon_vrecpsh_f16:
6734 Ops.push_back(EmitScalarExpr(E->getArg(1)));
6735 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, HalfTy),
6736 Ops, "vrecps");
6737 case NEON::BI__builtin_neon_vqshrun_n_v:
6738 Int = Intrinsic::aarch64_neon_sqshrun;
6739 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrun_n");
6740 case NEON::BI__builtin_neon_vqrshrun_n_v:
6741 Int = Intrinsic::aarch64_neon_sqrshrun;
6742 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrun_n");
6743 case NEON::BI__builtin_neon_vqshrn_n_v:
6744 Int = usgn ? Intrinsic::aarch64_neon_uqshrn : Intrinsic::aarch64_neon_sqshrn;
6745 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n");
6746 case NEON::BI__builtin_neon_vrshrn_n_v:
6747 Int = Intrinsic::aarch64_neon_rshrn;
6748 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshrn_n");
6749 case NEON::BI__builtin_neon_vqrshrn_n_v:
6750 Int = usgn ? Intrinsic::aarch64_neon_uqrshrn : Intrinsic::aarch64_neon_sqrshrn;
6751 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n");
6752 case NEON::BI__builtin_neon_vrndah_f16: {
6753 Ops.push_back(EmitScalarExpr(E->getArg(0)));
6754 Int = Builder.getIsFPConstrained()
6755 ? Intrinsic::experimental_constrained_round
6756 : Intrinsic::round;
6757 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrnda");
6758 }
6759 case NEON::BI__builtin_neon_vrnda_v:
6760 case NEON::BI__builtin_neon_vrndaq_v: {
6761 Int = Builder.getIsFPConstrained()
6762 ? Intrinsic::experimental_constrained_round
6763 : Intrinsic::round;
6764 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnda");
6765 }
6766 case NEON::BI__builtin_neon_vrndih_f16: {
6767 Ops.push_back(EmitScalarExpr(E->getArg(0)));
6768 Int = Builder.getIsFPConstrained()
6769 ? Intrinsic::experimental_constrained_nearbyint
6770 : Intrinsic::nearbyint;
6771 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndi");
6772 }
6773 case NEON::BI__builtin_neon_vrndmh_f16: {
6774 Ops.push_back(EmitScalarExpr(E->getArg(0)));
6775 Int = Builder.getIsFPConstrained()
6776 ? Intrinsic::experimental_constrained_floor
6777 : Intrinsic::floor;
6778 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndm");
6779 }
6780 case NEON::BI__builtin_neon_vrndm_v:
6781 case NEON::BI__builtin_neon_vrndmq_v: {
6782 Int = Builder.getIsFPConstrained()
6783 ? Intrinsic::experimental_constrained_floor
6784 : Intrinsic::floor;
6785 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndm");
6786 }
6787 case NEON::BI__builtin_neon_vrndnh_f16: {
6788 Ops.push_back(EmitScalarExpr(E->getArg(0)));
6789 Int = Builder.getIsFPConstrained()
6790 ? Intrinsic::experimental_constrained_roundeven
6791 : Intrinsic::roundeven;
6792 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndn");
6793 }
6794 case NEON::BI__builtin_neon_vrndn_v:
6795 case NEON::BI__builtin_neon_vrndnq_v: {
6796 Int = Builder.getIsFPConstrained()
6797 ? Intrinsic::experimental_constrained_roundeven
6798 : Intrinsic::roundeven;
6799 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndn");
6800 }
6801 case NEON::BI__builtin_neon_vrndns_f32: {
6802 Ops.push_back(EmitScalarExpr(E->getArg(0)));
6803 Int = Builder.getIsFPConstrained()
6804 ? Intrinsic::experimental_constrained_roundeven
6805 : Intrinsic::roundeven;
6806 return EmitNeonCall(CGM.getIntrinsic(Int, FloatTy), Ops, "vrndn");
6807 }
6808 case NEON::BI__builtin_neon_vrndph_f16: {
6809 Ops.push_back(EmitScalarExpr(E->getArg(0)));
6810 Int = Builder.getIsFPConstrained()
6811 ? Intrinsic::experimental_constrained_ceil
6812 : Intrinsic::ceil;
6813 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndp");
6814 }
6815 case NEON::BI__builtin_neon_vrndp_v:
6816 case NEON::BI__builtin_neon_vrndpq_v: {
6817 Int = Builder.getIsFPConstrained()
6818 ? Intrinsic::experimental_constrained_ceil
6819 : Intrinsic::ceil;
6820 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndp");
6821 }
6822 case NEON::BI__builtin_neon_vrndxh_f16: {
6823 Ops.push_back(EmitScalarExpr(E->getArg(0)));
6824 Int = Builder.getIsFPConstrained()
6825 ? Intrinsic::experimental_constrained_rint
6826 : Intrinsic::rint;
6827 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndx");
6828 }
6829 case NEON::BI__builtin_neon_vrndx_v:
6830 case NEON::BI__builtin_neon_vrndxq_v: {
6831 Int = Builder.getIsFPConstrained()
6832 ? Intrinsic::experimental_constrained_rint
6833 : Intrinsic::rint;
6834 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndx");
6835 }
6836 case NEON::BI__builtin_neon_vrndh_f16: {
6837 Ops.push_back(EmitScalarExpr(E->getArg(0)));
6838 Int = Builder.getIsFPConstrained()
6839 ? Intrinsic::experimental_constrained_trunc
6840 : Intrinsic::trunc;
6841 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndz");
6842 }
6843 case NEON::BI__builtin_neon_vrnd32x_f32:
6844 case NEON::BI__builtin_neon_vrnd32xq_f32:
6845 case NEON::BI__builtin_neon_vrnd32x_f64:
6846 case NEON::BI__builtin_neon_vrnd32xq_f64: {
6847 Ops.push_back(EmitScalarExpr(E->getArg(0)));
6848 Int = Intrinsic::aarch64_neon_frint32x;
6849 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd32x");
6850 }
6851 case NEON::BI__builtin_neon_vrnd32z_f32:
6852 case NEON::BI__builtin_neon_vrnd32zq_f32:
6853 case NEON::BI__builtin_neon_vrnd32z_f64:
6854 case NEON::BI__builtin_neon_vrnd32zq_f64: {
6855 Ops.push_back(EmitScalarExpr(E->getArg(0)));
6856 Int = Intrinsic::aarch64_neon_frint32z;
6857 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd32z");
6858 }
6859 case NEON::BI__builtin_neon_vrnd64x_f32:
6860 case NEON::BI__builtin_neon_vrnd64xq_f32:
6861 case NEON::BI__builtin_neon_vrnd64x_f64:
6862 case NEON::BI__builtin_neon_vrnd64xq_f64: {
6863 Ops.push_back(EmitScalarExpr(E->getArg(0)));
6864 Int = Intrinsic::aarch64_neon_frint64x;
6865 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd64x");
6866 }
6867 case NEON::BI__builtin_neon_vrnd64z_f32:
6868 case NEON::BI__builtin_neon_vrnd64zq_f32:
6869 case NEON::BI__builtin_neon_vrnd64z_f64:
6870 case NEON::BI__builtin_neon_vrnd64zq_f64: {
6871 Ops.push_back(EmitScalarExpr(E->getArg(0)));
6872 Int = Intrinsic::aarch64_neon_frint64z;
6873 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd64z");
6874 }
6875 case NEON::BI__builtin_neon_vrnd_v:
6876 case NEON::BI__builtin_neon_vrndq_v: {
6877 Int = Builder.getIsFPConstrained()
6878 ? Intrinsic::experimental_constrained_trunc
6879 : Intrinsic::trunc;
6880 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndz");
6881 }
6882 case NEON::BI__builtin_neon_vcvt_f64_v:
6883 case NEON::BI__builtin_neon_vcvtq_f64_v:
6884 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6885 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad));
6886 return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
6887 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
6888 case NEON::BI__builtin_neon_vcvt_f64_f32: {
6889 assert(Type.getEltType() == NeonTypeFlags::Float64 && quad &&
6890 "unexpected vcvt_f64_f32 builtin");
6891 NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float32, false, false);
6892 Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
6893
6894 return Builder.CreateFPExt(Ops[0], Ty, "vcvt");
6895 }
6896 case NEON::BI__builtin_neon_vcvt_f32_f64: {
6897 assert(Type.getEltType() == NeonTypeFlags::Float32 &&
6898 "unexpected vcvt_f32_f64 builtin");
6899 NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float64, false, true);
6900 Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
6901
6902 return Builder.CreateFPTrunc(Ops[0], Ty, "vcvt");
6903 }
6904 case NEON::BI__builtin_neon_vcvt_s32_v:
6905 case NEON::BI__builtin_neon_vcvt_u32_v:
6906 case NEON::BI__builtin_neon_vcvt_s64_v:
6907 case NEON::BI__builtin_neon_vcvt_u64_v:
6908 case NEON::BI__builtin_neon_vcvt_s16_f16:
6909 case NEON::BI__builtin_neon_vcvt_u16_f16:
6910 case NEON::BI__builtin_neon_vcvtq_s32_v:
6911 case NEON::BI__builtin_neon_vcvtq_u32_v:
6912 case NEON::BI__builtin_neon_vcvtq_s64_v:
6913 case NEON::BI__builtin_neon_vcvtq_u64_v:
6914 case NEON::BI__builtin_neon_vcvtq_s16_f16:
6915 case NEON::BI__builtin_neon_vcvtq_u16_f16: {
6916 Int =
6917 usgn ? Intrinsic::aarch64_neon_fcvtzu : Intrinsic::aarch64_neon_fcvtzs;
6918 llvm::Type *Tys[2] = {Ty, GetFloatNeonType(this, Type)};
6919 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtz");
6920 }
6921 case NEON::BI__builtin_neon_vcvta_s16_f16:
6922 case NEON::BI__builtin_neon_vcvta_u16_f16:
6923 case NEON::BI__builtin_neon_vcvta_s32_v:
6924 case NEON::BI__builtin_neon_vcvtaq_s16_f16:
6925 case NEON::BI__builtin_neon_vcvtaq_s32_v:
6926 case NEON::BI__builtin_neon_vcvta_u32_v:
6927 case NEON::BI__builtin_neon_vcvtaq_u16_f16:
6928 case NEON::BI__builtin_neon_vcvtaq_u32_v:
6929 case NEON::BI__builtin_neon_vcvta_s64_v:
6930 case NEON::BI__builtin_neon_vcvtaq_s64_v:
6931 case NEON::BI__builtin_neon_vcvta_u64_v:
6932 case NEON::BI__builtin_neon_vcvtaq_u64_v: {
6933 Int = usgn ? Intrinsic::aarch64_neon_fcvtau : Intrinsic::aarch64_neon_fcvtas;
6934 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
6935 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvta");
6936 }
6937 case NEON::BI__builtin_neon_vcvtm_s16_f16:
6938 case NEON::BI__builtin_neon_vcvtm_s32_v:
6939 case NEON::BI__builtin_neon_vcvtmq_s16_f16:
6940 case NEON::BI__builtin_neon_vcvtmq_s32_v:
6941 case NEON::BI__builtin_neon_vcvtm_u16_f16:
6942 case NEON::BI__builtin_neon_vcvtm_u32_v:
6943 case NEON::BI__builtin_neon_vcvtmq_u16_f16:
6944 case NEON::BI__builtin_neon_vcvtmq_u32_v:
6945 case NEON::BI__builtin_neon_vcvtm_s64_v:
6946 case NEON::BI__builtin_neon_vcvtmq_s64_v:
6947 case NEON::BI__builtin_neon_vcvtm_u64_v:
6948 case NEON::BI__builtin_neon_vcvtmq_u64_v: {
6949 Int = usgn ? Intrinsic::aarch64_neon_fcvtmu : Intrinsic::aarch64_neon_fcvtms;
6950 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
6951 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtm");
6952 }
6953 case NEON::BI__builtin_neon_vcvtn_s16_f16:
6954 case NEON::BI__builtin_neon_vcvtn_s32_v:
6955 case NEON::BI__builtin_neon_vcvtnq_s16_f16:
6956 case NEON::BI__builtin_neon_vcvtnq_s32_v:
6957 case NEON::BI__builtin_neon_vcvtn_u16_f16:
6958 case NEON::BI__builtin_neon_vcvtn_u32_v:
6959 case NEON::BI__builtin_neon_vcvtnq_u16_f16:
6960 case NEON::BI__builtin_neon_vcvtnq_u32_v:
6961 case NEON::BI__builtin_neon_vcvtn_s64_v:
6962 case NEON::BI__builtin_neon_vcvtnq_s64_v:
6963 case NEON::BI__builtin_neon_vcvtn_u64_v:
6964 case NEON::BI__builtin_neon_vcvtnq_u64_v: {
6965 Int = usgn ? Intrinsic::aarch64_neon_fcvtnu : Intrinsic::aarch64_neon_fcvtns;
6966 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
6967 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtn");
6968 }
6969 case NEON::BI__builtin_neon_vcvtp_s16_f16:
6970 case NEON::BI__builtin_neon_vcvtp_s32_v:
6971 case NEON::BI__builtin_neon_vcvtpq_s16_f16:
6972 case NEON::BI__builtin_neon_vcvtpq_s32_v:
6973 case NEON::BI__builtin_neon_vcvtp_u16_f16:
6974 case NEON::BI__builtin_neon_vcvtp_u32_v:
6975 case NEON::BI__builtin_neon_vcvtpq_u16_f16:
6976 case NEON::BI__builtin_neon_vcvtpq_u32_v:
6977 case NEON::BI__builtin_neon_vcvtp_s64_v:
6978 case NEON::BI__builtin_neon_vcvtpq_s64_v:
6979 case NEON::BI__builtin_neon_vcvtp_u64_v:
6980 case NEON::BI__builtin_neon_vcvtpq_u64_v: {
6981 Int = usgn ? Intrinsic::aarch64_neon_fcvtpu : Intrinsic::aarch64_neon_fcvtps;
6982 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
6983 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtp");
6984 }
6985 case NEON::BI__builtin_neon_vmulx_v:
6986 case NEON::BI__builtin_neon_vmulxq_v: {
6987 Int = Intrinsic::aarch64_neon_fmulx;
6988 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmulx");
6989 }
6990 case NEON::BI__builtin_neon_vmulxh_lane_f16:
6991 case NEON::BI__builtin_neon_vmulxh_laneq_f16: {
6992 // vmulx_lane should be mapped to Neon scalar mulx after
6993 // extracting the scalar element
6994 Ops.push_back(EmitScalarExpr(E->getArg(2)));
6995 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract");
6996 Ops.pop_back();
6997 Int = Intrinsic::aarch64_neon_fmulx;
6998 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmulx");
6999 }
7000 case NEON::BI__builtin_neon_vmul_lane_v:
7001 case NEON::BI__builtin_neon_vmul_laneq_v: {
7002 // v1f64 vmul_lane should be mapped to Neon scalar mul lane
7003 bool Quad = false;
7004 if (BuiltinID == NEON::BI__builtin_neon_vmul_laneq_v)
7005 Quad = true;
7006 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
7007 llvm::FixedVectorType *VTy =
7009 Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
7010 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract");
7011 Value *Result = Builder.CreateFMul(Ops[0], Ops[1]);
7012 return Builder.CreateBitCast(Result, Ty);
7013 }
7014 case NEON::BI__builtin_neon_vnegd_s64:
7015 return Builder.CreateNeg(EmitScalarExpr(E->getArg(0)), "vnegd");
7016 case NEON::BI__builtin_neon_vnegh_f16:
7017 return Builder.CreateFNeg(EmitScalarExpr(E->getArg(0)), "vnegh");
7018 case NEON::BI__builtin_neon_vpmaxnm_v:
7019 case NEON::BI__builtin_neon_vpmaxnmq_v: {
7020 Int = Intrinsic::aarch64_neon_fmaxnmp;
7021 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmaxnm");
7022 }
7023 case NEON::BI__builtin_neon_vpminnm_v:
7024 case NEON::BI__builtin_neon_vpminnmq_v: {
7025 Int = Intrinsic::aarch64_neon_fminnmp;
7026 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpminnm");
7027 }
7028 case NEON::BI__builtin_neon_vsqrth_f16: {
7029 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7030 Int = Builder.getIsFPConstrained()
7031 ? Intrinsic::experimental_constrained_sqrt
7032 : Intrinsic::sqrt;
7033 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vsqrt");
7034 }
7035 case NEON::BI__builtin_neon_vsqrt_v:
7036 case NEON::BI__builtin_neon_vsqrtq_v: {
7037 Int = Builder.getIsFPConstrained()
7038 ? Intrinsic::experimental_constrained_sqrt
7039 : Intrinsic::sqrt;
7040 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
7041 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqrt");
7042 }
7043 case NEON::BI__builtin_neon_vrbit_v:
7044 case NEON::BI__builtin_neon_vrbitq_v: {
7045 Int = Intrinsic::bitreverse;
7046 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrbit");
7047 }
7048 case NEON::BI__builtin_neon_vaddv_u8:
7049 // FIXME: These are handled by the AArch64 scalar code.
7050 usgn = true;
7051 [[fallthrough]];
7052 case NEON::BI__builtin_neon_vaddv_s8: {
7053 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
7054 Ty = Int32Ty;
7055 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
7056 llvm::Type *Tys[2] = { Ty, VTy };
7057 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7058 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
7059 return Builder.CreateTrunc(Ops[0], Int8Ty);
7060 }
7061 case NEON::BI__builtin_neon_vaddv_u16:
7062 usgn = true;
7063 [[fallthrough]];
7064 case NEON::BI__builtin_neon_vaddv_s16: {
7065 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
7066 Ty = Int32Ty;
7067 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
7068 llvm::Type *Tys[2] = { Ty, VTy };
7069 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7070 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
7071 return Builder.CreateTrunc(Ops[0], Int16Ty);
7072 }
7073 case NEON::BI__builtin_neon_vaddvq_u8:
7074 usgn = true;
7075 [[fallthrough]];
7076 case NEON::BI__builtin_neon_vaddvq_s8: {
7077 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
7078 Ty = Int32Ty;
7079 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
7080 llvm::Type *Tys[2] = { Ty, VTy };
7081 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7082 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
7083 return Builder.CreateTrunc(Ops[0], Int8Ty);
7084 }
7085 case NEON::BI__builtin_neon_vaddvq_u16:
7086 usgn = true;
7087 [[fallthrough]];
7088 case NEON::BI__builtin_neon_vaddvq_s16: {
7089 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
7090 Ty = Int32Ty;
7091 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
7092 llvm::Type *Tys[2] = { Ty, VTy };
7093 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7094 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
7095 return Builder.CreateTrunc(Ops[0], Int16Ty);
7096 }
7097 case NEON::BI__builtin_neon_vmaxv_u8: {
7098 Int = Intrinsic::aarch64_neon_umaxv;
7099 Ty = Int32Ty;
7100 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
7101 llvm::Type *Tys[2] = { Ty, VTy };
7102 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7103 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
7104 return Builder.CreateTrunc(Ops[0], Int8Ty);
7105 }
7106 case NEON::BI__builtin_neon_vmaxv_u16: {
7107 Int = Intrinsic::aarch64_neon_umaxv;
7108 Ty = Int32Ty;
7109 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
7110 llvm::Type *Tys[2] = { Ty, VTy };
7111 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7112 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
7113 return Builder.CreateTrunc(Ops[0], Int16Ty);
7114 }
7115 case NEON::BI__builtin_neon_vmaxvq_u8: {
7116 Int = Intrinsic::aarch64_neon_umaxv;
7117 Ty = Int32Ty;
7118 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
7119 llvm::Type *Tys[2] = { Ty, VTy };
7120 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7121 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
7122 return Builder.CreateTrunc(Ops[0], Int8Ty);
7123 }
7124 case NEON::BI__builtin_neon_vmaxvq_u16: {
7125 Int = Intrinsic::aarch64_neon_umaxv;
7126 Ty = Int32Ty;
7127 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
7128 llvm::Type *Tys[2] = { Ty, VTy };
7129 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7130 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
7131 return Builder.CreateTrunc(Ops[0], Int16Ty);
7132 }
7133 case NEON::BI__builtin_neon_vmaxv_s8: {
7134 Int = Intrinsic::aarch64_neon_smaxv;
7135 Ty = Int32Ty;
7136 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
7137 llvm::Type *Tys[2] = { Ty, VTy };
7138 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7139 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
7140 return Builder.CreateTrunc(Ops[0], Int8Ty);
7141 }
7142 case NEON::BI__builtin_neon_vmaxv_s16: {
7143 Int = Intrinsic::aarch64_neon_smaxv;
7144 Ty = Int32Ty;
7145 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
7146 llvm::Type *Tys[2] = { Ty, VTy };
7147 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7148 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
7149 return Builder.CreateTrunc(Ops[0], Int16Ty);
7150 }
7151 case NEON::BI__builtin_neon_vmaxvq_s8: {
7152 Int = Intrinsic::aarch64_neon_smaxv;
7153 Ty = Int32Ty;
7154 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
7155 llvm::Type *Tys[2] = { Ty, VTy };
7156 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7157 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
7158 return Builder.CreateTrunc(Ops[0], Int8Ty);
7159 }
7160 case NEON::BI__builtin_neon_vmaxvq_s16: {
7161 Int = Intrinsic::aarch64_neon_smaxv;
7162 Ty = Int32Ty;
7163 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
7164 llvm::Type *Tys[2] = { Ty, VTy };
7165 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7166 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
7167 return Builder.CreateTrunc(Ops[0], Int16Ty);
7168 }
7169 case NEON::BI__builtin_neon_vmaxv_f16: {
7170 Int = Intrinsic::aarch64_neon_fmaxv;
7171 Ty = HalfTy;
7172 VTy = llvm::FixedVectorType::get(HalfTy, 4);
7173 llvm::Type *Tys[2] = { Ty, VTy };
7174 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7175 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
7176 return Builder.CreateTrunc(Ops[0], HalfTy);
7177 }
7178 case NEON::BI__builtin_neon_vmaxvq_f16: {
7179 Int = Intrinsic::aarch64_neon_fmaxv;
7180 Ty = HalfTy;
7181 VTy = llvm::FixedVectorType::get(HalfTy, 8);
7182 llvm::Type *Tys[2] = { Ty, VTy };
7183 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7184 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
7185 return Builder.CreateTrunc(Ops[0], HalfTy);
7186 }
7187 case NEON::BI__builtin_neon_vminv_u8: {
7188 Int = Intrinsic::aarch64_neon_uminv;
7189 Ty = Int32Ty;
7190 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
7191 llvm::Type *Tys[2] = { Ty, VTy };
7192 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7193 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
7194 return Builder.CreateTrunc(Ops[0], Int8Ty);
7195 }
7196 case NEON::BI__builtin_neon_vminv_u16: {
7197 Int = Intrinsic::aarch64_neon_uminv;
7198 Ty = Int32Ty;
7199 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
7200 llvm::Type *Tys[2] = { Ty, VTy };
7201 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7202 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
7203 return Builder.CreateTrunc(Ops[0], Int16Ty);
7204 }
7205 case NEON::BI__builtin_neon_vminvq_u8: {
7206 Int = Intrinsic::aarch64_neon_uminv;
7207 Ty = Int32Ty;
7208 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
7209 llvm::Type *Tys[2] = { Ty, VTy };
7210 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7211 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
7212 return Builder.CreateTrunc(Ops[0], Int8Ty);
7213 }
7214 case NEON::BI__builtin_neon_vminvq_u16: {
7215 Int = Intrinsic::aarch64_neon_uminv;
7216 Ty = Int32Ty;
7217 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
7218 llvm::Type *Tys[2] = { Ty, VTy };
7219 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7220 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
7221 return Builder.CreateTrunc(Ops[0], Int16Ty);
7222 }
7223 case NEON::BI__builtin_neon_vminv_s8: {
7224 Int = Intrinsic::aarch64_neon_sminv;
7225 Ty = Int32Ty;
7226 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
7227 llvm::Type *Tys[2] = { Ty, VTy };
7228 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7229 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
7230 return Builder.CreateTrunc(Ops[0], Int8Ty);
7231 }
7232 case NEON::BI__builtin_neon_vminv_s16: {
7233 Int = Intrinsic::aarch64_neon_sminv;
7234 Ty = Int32Ty;
7235 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
7236 llvm::Type *Tys[2] = { Ty, VTy };
7237 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7238 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
7239 return Builder.CreateTrunc(Ops[0], Int16Ty);
7240 }
7241 case NEON::BI__builtin_neon_vminvq_s8: {
7242 Int = Intrinsic::aarch64_neon_sminv;
7243 Ty = Int32Ty;
7244 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
7245 llvm::Type *Tys[2] = { Ty, VTy };
7246 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7247 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
7248 return Builder.CreateTrunc(Ops[0], Int8Ty);
7249 }
7250 case NEON::BI__builtin_neon_vminvq_s16: {
7251 Int = Intrinsic::aarch64_neon_sminv;
7252 Ty = Int32Ty;
7253 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
7254 llvm::Type *Tys[2] = { Ty, VTy };
7255 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7256 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
7257 return Builder.CreateTrunc(Ops[0], Int16Ty);
7258 }
7259 case NEON::BI__builtin_neon_vminv_f16: {
7260 Int = Intrinsic::aarch64_neon_fminv;
7261 Ty = HalfTy;
7262 VTy = llvm::FixedVectorType::get(HalfTy, 4);
7263 llvm::Type *Tys[2] = { Ty, VTy };
7264 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7265 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
7266 return Builder.CreateTrunc(Ops[0], HalfTy);
7267 }
7268 case NEON::BI__builtin_neon_vminvq_f16: {
7269 Int = Intrinsic::aarch64_neon_fminv;
7270 Ty = HalfTy;
7271 VTy = llvm::FixedVectorType::get(HalfTy, 8);
7272 llvm::Type *Tys[2] = { Ty, VTy };
7273 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7274 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
7275 return Builder.CreateTrunc(Ops[0], HalfTy);
7276 }
7277 case NEON::BI__builtin_neon_vmaxnmv_f16: {
7278 Int = Intrinsic::aarch64_neon_fmaxnmv;
7279 Ty = HalfTy;
7280 VTy = llvm::FixedVectorType::get(HalfTy, 4);
7281 llvm::Type *Tys[2] = { Ty, VTy };
7282 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7283 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxnmv");
7284 return Builder.CreateTrunc(Ops[0], HalfTy);
7285 }
7286 case NEON::BI__builtin_neon_vmaxnmvq_f16: {
7287 Int = Intrinsic::aarch64_neon_fmaxnmv;
7288 Ty = HalfTy;
7289 VTy = llvm::FixedVectorType::get(HalfTy, 8);
7290 llvm::Type *Tys[2] = { Ty, VTy };
7291 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7292 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxnmv");
7293 return Builder.CreateTrunc(Ops[0], HalfTy);
7294 }
7295 case NEON::BI__builtin_neon_vminnmv_f16: {
7296 Int = Intrinsic::aarch64_neon_fminnmv;
7297 Ty = HalfTy;
7298 VTy = llvm::FixedVectorType::get(HalfTy, 4);
7299 llvm::Type *Tys[2] = { Ty, VTy };
7300 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7301 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminnmv");
7302 return Builder.CreateTrunc(Ops[0], HalfTy);
7303 }
7304 case NEON::BI__builtin_neon_vminnmvq_f16: {
7305 Int = Intrinsic::aarch64_neon_fminnmv;
7306 Ty = HalfTy;
7307 VTy = llvm::FixedVectorType::get(HalfTy, 8);
7308 llvm::Type *Tys[2] = { Ty, VTy };
7309 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7310 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminnmv");
7311 return Builder.CreateTrunc(Ops[0], HalfTy);
7312 }
7313 case NEON::BI__builtin_neon_vmul_n_f64: {
7314 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
7315 Value *RHS = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), DoubleTy);
7316 return Builder.CreateFMul(Ops[0], RHS);
7317 }
7318 case NEON::BI__builtin_neon_vaddlv_u8: {
7319 Int = Intrinsic::aarch64_neon_uaddlv;
7320 Ty = Int32Ty;
7321 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
7322 llvm::Type *Tys[2] = { Ty, VTy };
7323 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7324 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
7325 return Builder.CreateTrunc(Ops[0], Int16Ty);
7326 }
7327 case NEON::BI__builtin_neon_vaddlv_u16: {
7328 Int = Intrinsic::aarch64_neon_uaddlv;
7329 Ty = Int32Ty;
7330 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
7331 llvm::Type *Tys[2] = { Ty, VTy };
7332 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7333 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
7334 }
7335 case NEON::BI__builtin_neon_vaddlvq_u8: {
7336 Int = Intrinsic::aarch64_neon_uaddlv;
7337 Ty = Int32Ty;
7338 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
7339 llvm::Type *Tys[2] = { Ty, VTy };
7340 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7341 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
7342 return Builder.CreateTrunc(Ops[0], Int16Ty);
7343 }
7344 case NEON::BI__builtin_neon_vaddlvq_u16: {
7345 Int = Intrinsic::aarch64_neon_uaddlv;
7346 Ty = Int32Ty;
7347 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
7348 llvm::Type *Tys[2] = { Ty, VTy };
7349 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7350 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
7351 }
7352 case NEON::BI__builtin_neon_vaddlv_s8: {
7353 Int = Intrinsic::aarch64_neon_saddlv;
7354 Ty = Int32Ty;
7355 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
7356 llvm::Type *Tys[2] = { Ty, VTy };
7357 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7358 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
7359 return Builder.CreateTrunc(Ops[0], Int16Ty);
7360 }
7361 case NEON::BI__builtin_neon_vaddlv_s16: {
7362 Int = Intrinsic::aarch64_neon_saddlv;
7363 Ty = Int32Ty;
7364 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
7365 llvm::Type *Tys[2] = { Ty, VTy };
7366 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7367 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
7368 }
7369 case NEON::BI__builtin_neon_vaddlvq_s8: {
7370 Int = Intrinsic::aarch64_neon_saddlv;
7371 Ty = Int32Ty;
7372 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
7373 llvm::Type *Tys[2] = { Ty, VTy };
7374 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7375 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
7376 return Builder.CreateTrunc(Ops[0], Int16Ty);
7377 }
7378 case NEON::BI__builtin_neon_vaddlvq_s16: {
7379 Int = Intrinsic::aarch64_neon_saddlv;
7380 Ty = Int32Ty;
7381 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
7382 llvm::Type *Tys[2] = { Ty, VTy };
7383 Ops.push_back(EmitScalarExpr(E->getArg(0)));
7384 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
7385 }
7386 case NEON::BI__builtin_neon_vsri_n_v:
7387 case NEON::BI__builtin_neon_vsriq_n_v: {
7388 Int = Intrinsic::aarch64_neon_vsri;
7389 llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
7390 return EmitNeonCall(Intrin, Ops, "vsri_n");
7391 }
7392 case NEON::BI__builtin_neon_vsli_n_v:
7393 case NEON::BI__builtin_neon_vsliq_n_v: {
7394 Int = Intrinsic::aarch64_neon_vsli;
7395 llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
7396 return EmitNeonCall(Intrin, Ops, "vsli_n");
7397 }
7398 case NEON::BI__builtin_neon_vsra_n_v:
7399 case NEON::BI__builtin_neon_vsraq_n_v:
7400 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
7401 Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
7402 return Builder.CreateAdd(Ops[0], Ops[1]);
7403 case NEON::BI__builtin_neon_vrsra_n_v:
7404 case NEON::BI__builtin_neon_vrsraq_n_v: {
7405 Int = usgn ? Intrinsic::aarch64_neon_urshl : Intrinsic::aarch64_neon_srshl;
7407 TmpOps.push_back(Ops[1]);
7408 TmpOps.push_back(Ops[2]);
7409 Function* F = CGM.getIntrinsic(Int, Ty);
7410 llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vrshr_n", 1, true);
7411 Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
7412 return Builder.CreateAdd(Ops[0], tmp);
7413 }
7414 case NEON::BI__builtin_neon_vld1_v:
7415 case NEON::BI__builtin_neon_vld1q_v: {
7416 return Builder.CreateAlignedLoad(VTy, Ops[0], PtrOp0.getAlignment());
7417 }
7418 case NEON::BI__builtin_neon_vst1_v:
7419 case NEON::BI__builtin_neon_vst1q_v:
7420 Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
7421 return Builder.CreateAlignedStore(Ops[1], Ops[0], PtrOp0.getAlignment());
7422 case NEON::BI__builtin_neon_vld1_lane_v:
7423 case NEON::BI__builtin_neon_vld1q_lane_v: {
7424 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7425 Ops[0] = Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0],
7426 PtrOp0.getAlignment());
7427 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vld1_lane");
7428 }
7429 case NEON::BI__builtin_neon_vldap1_lane_s64:
7430 case NEON::BI__builtin_neon_vldap1q_lane_s64: {
7431 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7432 llvm::LoadInst *LI = Builder.CreateAlignedLoad(
7433 VTy->getElementType(), Ops[0], PtrOp0.getAlignment());
7434 LI->setAtomic(llvm::AtomicOrdering::Acquire);
7435 Ops[0] = LI;
7436 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vldap1_lane");
7437 }
7438 case NEON::BI__builtin_neon_vld1_dup_v:
7439 case NEON::BI__builtin_neon_vld1q_dup_v: {
7440 Value *V = PoisonValue::get(Ty);
7441 Ops[0] = Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0],
7442 PtrOp0.getAlignment());
7443 llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
7444 Ops[0] = Builder.CreateInsertElement(V, Ops[0], CI);
7445 return EmitNeonSplat(Ops[0], CI);
7446 }
7447 case NEON::BI__builtin_neon_vst1_lane_v:
7448 case NEON::BI__builtin_neon_vst1q_lane_v:
7449 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7450 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
7451 return Builder.CreateAlignedStore(Ops[1], Ops[0], PtrOp0.getAlignment());
7452 case NEON::BI__builtin_neon_vstl1_lane_s64:
7453 case NEON::BI__builtin_neon_vstl1q_lane_s64: {
7454 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7455 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
7456 llvm::StoreInst *SI =
7457 Builder.CreateAlignedStore(Ops[1], Ops[0], PtrOp0.getAlignment());
7458 SI->setAtomic(llvm::AtomicOrdering::Release);
7459 return SI;
7460 }
7461 case NEON::BI__builtin_neon_vld2_v:
7462 case NEON::BI__builtin_neon_vld2q_v: {
7463 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
7464 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2, Tys);
7465 Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
7466 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7467 }
7468 case NEON::BI__builtin_neon_vld3_v:
7469 case NEON::BI__builtin_neon_vld3q_v: {
7470 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
7471 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3, Tys);
7472 Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
7473 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7474 }
7475 case NEON::BI__builtin_neon_vld4_v:
7476 case NEON::BI__builtin_neon_vld4q_v: {
7477 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
7478 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4, Tys);
7479 Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
7480 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7481 }
7482 case NEON::BI__builtin_neon_vld2_dup_v:
7483 case NEON::BI__builtin_neon_vld2q_dup_v: {
7484 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
7485 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2r, Tys);
7486 Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
7487 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7488 }
7489 case NEON::BI__builtin_neon_vld3_dup_v:
7490 case NEON::BI__builtin_neon_vld3q_dup_v: {
7491 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
7492 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3r, Tys);
7493 Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
7494 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7495 }
7496 case NEON::BI__builtin_neon_vld4_dup_v:
7497 case NEON::BI__builtin_neon_vld4q_dup_v: {
7498 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
7499 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4r, Tys);
7500 Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
7501 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7502 }
7503 case NEON::BI__builtin_neon_vld2_lane_v:
7504 case NEON::BI__builtin_neon_vld2q_lane_v: {
7505 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
7506 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2lane, Tys);
7507 std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
7508 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7509 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
7510 Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
7511 Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), "vld2_lane");
7512 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7513 }
7514 case NEON::BI__builtin_neon_vld3_lane_v:
7515 case NEON::BI__builtin_neon_vld3q_lane_v: {
7516 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
7517 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3lane, Tys);
7518 std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
7519 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7520 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
7521 Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
7522 Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
7523 Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), "vld3_lane");
7524 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7525 }
7526 case NEON::BI__builtin_neon_vld4_lane_v:
7527 case NEON::BI__builtin_neon_vld4q_lane_v: {
7528 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
7529 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4lane, Tys);
7530 std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
7531 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7532 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
7533 Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
7534 Ops[4] = Builder.CreateBitCast(Ops[4], Ty);
7535 Ops[5] = Builder.CreateZExt(Ops[5], Int64Ty);
7536 Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), "vld4_lane");
7537 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7538 }
7539 case NEON::BI__builtin_neon_vst2_v:
7540 case NEON::BI__builtin_neon_vst2q_v: {
7541 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
7542 llvm::Type *Tys[2] = { VTy, Ops[2]->getType() };
7543 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2, Tys),
7544 Ops, "");
7545 }
7546 case NEON::BI__builtin_neon_vst2_lane_v:
7547 case NEON::BI__builtin_neon_vst2q_lane_v: {
7548 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
7549 Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
7550 llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
7551 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2lane, Tys),
7552 Ops, "");
7553 }
7554 case NEON::BI__builtin_neon_vst3_v:
7555 case NEON::BI__builtin_neon_vst3q_v: {
7556 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
7557 llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
7558 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3, Tys),
7559 Ops, "");
7560 }
7561 case NEON::BI__builtin_neon_vst3_lane_v:
7562 case NEON::BI__builtin_neon_vst3q_lane_v: {
7563 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
7564 Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
7565 llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
7566 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3lane, Tys),
7567 Ops, "");
7568 }
7569 case NEON::BI__builtin_neon_vst4_v:
7570 case NEON::BI__builtin_neon_vst4q_v: {
7571 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
7572 llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
7573 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4, Tys),
7574 Ops, "");
7575 }
7576 case NEON::BI__builtin_neon_vst4_lane_v:
7577 case NEON::BI__builtin_neon_vst4q_lane_v: {
7578 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
7579 Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
7580 llvm::Type *Tys[2] = { VTy, Ops[5]->getType() };
7581 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4lane, Tys),
7582 Ops, "");
7583 }
7584 case NEON::BI__builtin_neon_vtrn_v:
7585 case NEON::BI__builtin_neon_vtrnq_v: {
7586 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7587 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
7588 Value *SV = nullptr;
7589
7590 for (unsigned vi = 0; vi != 2; ++vi) {
7591 SmallVector<int, 16> Indices;
7592 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
7593 Indices.push_back(i+vi);
7594 Indices.push_back(i+e+vi);
7595 }
7596 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
7597 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");
7599 }
7600 return SV;
7601 }
7602 case NEON::BI__builtin_neon_vuzp_v:
7603 case NEON::BI__builtin_neon_vuzpq_v: {
7604 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7605 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
7606 Value *SV = nullptr;
7607
7608 for (unsigned vi = 0; vi != 2; ++vi) {
7609 SmallVector<int, 16> Indices;
7610 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
7611 Indices.push_back(2*i+vi);
7612
7613 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
7614 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");
7616 }
7617 return SV;
7618 }
7619 case NEON::BI__builtin_neon_vzip_v:
7620 case NEON::BI__builtin_neon_vzipq_v: {
7621 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7622 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
7623 Value *SV = nullptr;
7624
7625 for (unsigned vi = 0; vi != 2; ++vi) {
7626 SmallVector<int, 16> Indices;
7627 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
7628 Indices.push_back((i + vi*e) >> 1);
7629 Indices.push_back(((i + vi*e) >> 1)+e);
7630 }
7631 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
7632 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");
7634 }
7635 return SV;
7636 }
7637 case NEON::BI__builtin_neon_vqtbl1q_v: {
7638 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl1, Ty),
7639 Ops, "vtbl1");
7640 }
7641 case NEON::BI__builtin_neon_vqtbl2q_v: {
7642 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl2, Ty),
7643 Ops, "vtbl2");
7644 }
7645 case NEON::BI__builtin_neon_vqtbl3q_v: {
7646 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl3, Ty),
7647 Ops, "vtbl3");
7648 }
7649 case NEON::BI__builtin_neon_vqtbl4q_v: {
7650 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl4, Ty),
7651 Ops, "vtbl4");
7652 }
7653 case NEON::BI__builtin_neon_vqtbx1q_v: {
7654 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx1, Ty),
7655 Ops, "vtbx1");
7656 }
7657 case NEON::BI__builtin_neon_vqtbx2q_v: {
7658 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx2, Ty),
7659 Ops, "vtbx2");
7660 }
7661 case NEON::BI__builtin_neon_vqtbx3q_v: {
7662 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx3, Ty),
7663 Ops, "vtbx3");
7664 }
7665 case NEON::BI__builtin_neon_vqtbx4q_v: {
7666 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx4, Ty),
7667 Ops, "vtbx4");
7668 }
7669 case NEON::BI__builtin_neon_vsqadd_v:
7670 case NEON::BI__builtin_neon_vsqaddq_v: {
7671 Int = Intrinsic::aarch64_neon_usqadd;
7672 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqadd");
7673 }
7674 case NEON::BI__builtin_neon_vuqadd_v:
7675 case NEON::BI__builtin_neon_vuqaddq_v: {
7676 Int = Intrinsic::aarch64_neon_suqadd;
7677 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vuqadd");
7678 }
7679
7680 case NEON::BI__builtin_neon_vluti2_laneq_mf8:
7681 case NEON::BI__builtin_neon_vluti2_laneq_bf16:
7682 case NEON::BI__builtin_neon_vluti2_laneq_f16:
7683 case NEON::BI__builtin_neon_vluti2_laneq_p16:
7684 case NEON::BI__builtin_neon_vluti2_laneq_p8:
7685 case NEON::BI__builtin_neon_vluti2_laneq_s16:
7686 case NEON::BI__builtin_neon_vluti2_laneq_s8:
7687 case NEON::BI__builtin_neon_vluti2_laneq_u16:
7688 case NEON::BI__builtin_neon_vluti2_laneq_u8: {
7689 Int = Intrinsic::aarch64_neon_vluti2_laneq;
7690 llvm::Type *Tys[2];
7691 Tys[0] = Ty;
7692 Tys[1] = GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
7693 /*isQuad*/ false));
7694 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vluti2_laneq");
7695 }
7696 case NEON::BI__builtin_neon_vluti2q_laneq_mf8:
7697 case NEON::BI__builtin_neon_vluti2q_laneq_bf16:
7698 case NEON::BI__builtin_neon_vluti2q_laneq_f16:
7699 case NEON::BI__builtin_neon_vluti2q_laneq_p16:
7700 case NEON::BI__builtin_neon_vluti2q_laneq_p8:
7701 case NEON::BI__builtin_neon_vluti2q_laneq_s16:
7702 case NEON::BI__builtin_neon_vluti2q_laneq_s8:
7703 case NEON::BI__builtin_neon_vluti2q_laneq_u16:
7704 case NEON::BI__builtin_neon_vluti2q_laneq_u8: {
7705 Int = Intrinsic::aarch64_neon_vluti2_laneq;
7706 llvm::Type *Tys[2];
7707 Tys[0] = Ty;
7708 Tys[1] = GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
7709 /*isQuad*/ true));
7710 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vluti2_laneq");
7711 }
7712 case NEON::BI__builtin_neon_vluti2_lane_mf8:
7713 case NEON::BI__builtin_neon_vluti2_lane_bf16:
7714 case NEON::BI__builtin_neon_vluti2_lane_f16:
7715 case NEON::BI__builtin_neon_vluti2_lane_p16:
7716 case NEON::BI__builtin_neon_vluti2_lane_p8:
7717 case NEON::BI__builtin_neon_vluti2_lane_s16:
7718 case NEON::BI__builtin_neon_vluti2_lane_s8:
7719 case NEON::BI__builtin_neon_vluti2_lane_u16:
7720 case NEON::BI__builtin_neon_vluti2_lane_u8: {
7721 Int = Intrinsic::aarch64_neon_vluti2_lane;
7722 llvm::Type *Tys[2];
7723 Tys[0] = Ty;
7724 Tys[1] = GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
7725 /*isQuad*/ false));
7726 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vluti2_lane");
7727 }
7728 case NEON::BI__builtin_neon_vluti2q_lane_mf8:
7729 case NEON::BI__builtin_neon_vluti2q_lane_bf16:
7730 case NEON::BI__builtin_neon_vluti2q_lane_f16:
7731 case NEON::BI__builtin_neon_vluti2q_lane_p16:
7732 case NEON::BI__builtin_neon_vluti2q_lane_p8:
7733 case NEON::BI__builtin_neon_vluti2q_lane_s16:
7734 case NEON::BI__builtin_neon_vluti2q_lane_s8:
7735 case NEON::BI__builtin_neon_vluti2q_lane_u16:
7736 case NEON::BI__builtin_neon_vluti2q_lane_u8: {
7737 Int = Intrinsic::aarch64_neon_vluti2_lane;
7738 llvm::Type *Tys[2];
7739 Tys[0] = Ty;
7740 Tys[1] = GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
7741 /*isQuad*/ true));
7742 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vluti2_lane");
7743 }
7744 case NEON::BI__builtin_neon_vluti4q_lane_mf8:
7745 case NEON::BI__builtin_neon_vluti4q_lane_p8:
7746 case NEON::BI__builtin_neon_vluti4q_lane_s8:
7747 case NEON::BI__builtin_neon_vluti4q_lane_u8: {
7748 Int = Intrinsic::aarch64_neon_vluti4q_lane;
7749 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vluti4q_lane");
7750 }
7751 case NEON::BI__builtin_neon_vluti4q_laneq_mf8:
7752 case NEON::BI__builtin_neon_vluti4q_laneq_p8:
7753 case NEON::BI__builtin_neon_vluti4q_laneq_s8:
7754 case NEON::BI__builtin_neon_vluti4q_laneq_u8: {
7755 Int = Intrinsic::aarch64_neon_vluti4q_laneq;
7756 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vluti4q_laneq");
7757 }
7758 case NEON::BI__builtin_neon_vluti4q_lane_bf16_x2:
7759 case NEON::BI__builtin_neon_vluti4q_lane_f16_x2:
7760 case NEON::BI__builtin_neon_vluti4q_lane_p16_x2:
7761 case NEON::BI__builtin_neon_vluti4q_lane_s16_x2:
7762 case NEON::BI__builtin_neon_vluti4q_lane_u16_x2: {
7763 Int = Intrinsic::aarch64_neon_vluti4q_lane_x2;
7764 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vluti4q_lane_x2");
7765 }
7766 case NEON::BI__builtin_neon_vluti4q_laneq_bf16_x2:
7767 case NEON::BI__builtin_neon_vluti4q_laneq_f16_x2:
7768 case NEON::BI__builtin_neon_vluti4q_laneq_p16_x2:
7769 case NEON::BI__builtin_neon_vluti4q_laneq_s16_x2:
7770 case NEON::BI__builtin_neon_vluti4q_laneq_u16_x2: {
7771 Int = Intrinsic::aarch64_neon_vluti4q_laneq_x2;
7772 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vluti4q_laneq_x2");
7773 }
7774 case NEON::BI__builtin_neon_vcvt1_low_bf16_mf8_fpm:
7775 ExtractLow = true;
7776 LLVM_FALLTHROUGH;
7777 case NEON::BI__builtin_neon_vcvt1_bf16_mf8_fpm:
7778 case NEON::BI__builtin_neon_vcvt1_high_bf16_mf8_fpm:
7779 return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_cvtl1,
7780 llvm::FixedVectorType::get(BFloatTy, 8),
7781 Ops[0]->getType(), ExtractLow, Ops, E, "vbfcvt1");
7782 case NEON::BI__builtin_neon_vcvt2_low_bf16_mf8_fpm:
7783 ExtractLow = true;
7784 LLVM_FALLTHROUGH;
7785 case NEON::BI__builtin_neon_vcvt2_bf16_mf8_fpm:
7786 case NEON::BI__builtin_neon_vcvt2_high_bf16_mf8_fpm:
7787 return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_cvtl2,
7788 llvm::FixedVectorType::get(BFloatTy, 8),
7789 Ops[0]->getType(), ExtractLow, Ops, E, "vbfcvt2");
7790 case NEON::BI__builtin_neon_vcvt1_low_f16_mf8_fpm:
7791 ExtractLow = true;
7792 LLVM_FALLTHROUGH;
7793 case NEON::BI__builtin_neon_vcvt1_f16_mf8_fpm:
7794 case NEON::BI__builtin_neon_vcvt1_high_f16_mf8_fpm:
7795 return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_cvtl1,
7796 llvm::FixedVectorType::get(HalfTy, 8),
7797 Ops[0]->getType(), ExtractLow, Ops, E, "vbfcvt1");
7798 case NEON::BI__builtin_neon_vcvt2_low_f16_mf8_fpm:
7799 ExtractLow = true;
7800 LLVM_FALLTHROUGH;
7801 case NEON::BI__builtin_neon_vcvt2_f16_mf8_fpm:
7802 case NEON::BI__builtin_neon_vcvt2_high_f16_mf8_fpm:
7803 return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_cvtl2,
7804 llvm::FixedVectorType::get(HalfTy, 8),
7805 Ops[0]->getType(), ExtractLow, Ops, E, "vbfcvt2");
7806 case NEON::BI__builtin_neon_vcvt_mf8_f32_fpm:
7807 return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_fcvtn,
7808 llvm::FixedVectorType::get(Int8Ty, 8),
7809 Ops[0]->getType(), false, Ops, E, "vfcvtn");
7810 case NEON::BI__builtin_neon_vcvt_mf8_f16_fpm:
7811 return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_fcvtn,
7812 llvm::FixedVectorType::get(Int8Ty, 8),
7813 llvm::FixedVectorType::get(HalfTy, 4), false, Ops,
7814 E, "vfcvtn");
7815 case NEON::BI__builtin_neon_vcvtq_mf8_f16_fpm:
7816 return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_fcvtn,
7817 llvm::FixedVectorType::get(Int8Ty, 16),
7818 llvm::FixedVectorType::get(HalfTy, 8), false, Ops,
7819 E, "vfcvtn");
7820 case NEON::BI__builtin_neon_vcvt_high_mf8_f32_fpm: {
7821 llvm::Type *Ty = llvm::FixedVectorType::get(Int8Ty, 16);
7822 Ops[0] = Builder.CreateInsertVector(Ty, PoisonValue::get(Ty), Ops[0],
7823 uint64_t(0));
7824 return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_fcvtn2, Ty,
7825 Ops[1]->getType(), false, Ops, E, "vfcvtn2");
7826 }
7827
7828 case NEON::BI__builtin_neon_vdot_f16_mf8_fpm:
7829 case NEON::BI__builtin_neon_vdotq_f16_mf8_fpm:
7830 return EmitFP8NeonFDOTCall(Intrinsic::aarch64_neon_fp8_fdot2, false, HalfTy,
7831 Ops, E, "fdot2");
7832 case NEON::BI__builtin_neon_vdot_lane_f16_mf8_fpm:
7833 case NEON::BI__builtin_neon_vdotq_lane_f16_mf8_fpm:
7834 ExtendLaneArg = true;
7835 LLVM_FALLTHROUGH;
7836 case NEON::BI__builtin_neon_vdot_laneq_f16_mf8_fpm:
7837 case NEON::BI__builtin_neon_vdotq_laneq_f16_mf8_fpm:
7838 return EmitFP8NeonFDOTCall(Intrinsic::aarch64_neon_fp8_fdot2_lane,
7839 ExtendLaneArg, HalfTy, Ops, E, "fdot2_lane");
7840 case NEON::BI__builtin_neon_vdot_f32_mf8_fpm:
7841 case NEON::BI__builtin_neon_vdotq_f32_mf8_fpm:
7842 return EmitFP8NeonFDOTCall(Intrinsic::aarch64_neon_fp8_fdot4, false,
7843 FloatTy, Ops, E, "fdot4");
7844 case NEON::BI__builtin_neon_vdot_lane_f32_mf8_fpm:
7845 case NEON::BI__builtin_neon_vdotq_lane_f32_mf8_fpm:
7846 ExtendLaneArg = true;
7847 LLVM_FALLTHROUGH;
7848 case NEON::BI__builtin_neon_vdot_laneq_f32_mf8_fpm:
7849 case NEON::BI__builtin_neon_vdotq_laneq_f32_mf8_fpm:
7850 return EmitFP8NeonFDOTCall(Intrinsic::aarch64_neon_fp8_fdot4_lane,
7851 ExtendLaneArg, FloatTy, Ops, E, "fdot4_lane");
7852
7853 case NEON::BI__builtin_neon_vmlalbq_f16_mf8_fpm:
7854 return EmitFP8NeonCall(Intrinsic::aarch64_neon_fp8_fmlalb,
7855 {llvm::FixedVectorType::get(HalfTy, 8)}, Ops, E,
7856 "vmlal");
7857 case NEON::BI__builtin_neon_vmlaltq_f16_mf8_fpm:
7858 return EmitFP8NeonCall(Intrinsic::aarch64_neon_fp8_fmlalt,
7859 {llvm::FixedVectorType::get(HalfTy, 8)}, Ops, E,
7860 "vmlal");
7861 case NEON::BI__builtin_neon_vmlallbbq_f32_mf8_fpm:
7862 return EmitFP8NeonCall(Intrinsic::aarch64_neon_fp8_fmlallbb,
7863 {llvm::FixedVectorType::get(FloatTy, 4)}, Ops, E,
7864 "vmlall");
7865 case NEON::BI__builtin_neon_vmlallbtq_f32_mf8_fpm:
7866 return EmitFP8NeonCall(Intrinsic::aarch64_neon_fp8_fmlallbt,
7867 {llvm::FixedVectorType::get(FloatTy, 4)}, Ops, E,
7868 "vmlall");
7869 case NEON::BI__builtin_neon_vmlalltbq_f32_mf8_fpm:
7870 return EmitFP8NeonCall(Intrinsic::aarch64_neon_fp8_fmlalltb,
7871 {llvm::FixedVectorType::get(FloatTy, 4)}, Ops, E,
7872 "vmlall");
7873 case NEON::BI__builtin_neon_vmlallttq_f32_mf8_fpm:
7874 return EmitFP8NeonCall(Intrinsic::aarch64_neon_fp8_fmlalltt,
7875 {llvm::FixedVectorType::get(FloatTy, 4)}, Ops, E,
7876 "vmlall");
7877 case NEON::BI__builtin_neon_vmlalbq_lane_f16_mf8_fpm:
7878 ExtendLaneArg = true;
7879 LLVM_FALLTHROUGH;
7880 case NEON::BI__builtin_neon_vmlalbq_laneq_f16_mf8_fpm:
7881 return EmitFP8NeonFMLACall(Intrinsic::aarch64_neon_fp8_fmlalb_lane,
7882 ExtendLaneArg, HalfTy, Ops, E, "vmlal_lane");
7883 case NEON::BI__builtin_neon_vmlaltq_lane_f16_mf8_fpm:
7884 ExtendLaneArg = true;
7885 LLVM_FALLTHROUGH;
7886 case NEON::BI__builtin_neon_vmlaltq_laneq_f16_mf8_fpm:
7887 return EmitFP8NeonFMLACall(Intrinsic::aarch64_neon_fp8_fmlalt_lane,
7888 ExtendLaneArg, HalfTy, Ops, E, "vmlal_lane");
7889 case NEON::BI__builtin_neon_vmlallbbq_lane_f32_mf8_fpm:
7890 ExtendLaneArg = true;
7891 LLVM_FALLTHROUGH;
7892 case NEON::BI__builtin_neon_vmlallbbq_laneq_f32_mf8_fpm:
7893 return EmitFP8NeonFMLACall(Intrinsic::aarch64_neon_fp8_fmlallbb_lane,
7894 ExtendLaneArg, FloatTy, Ops, E, "vmlall_lane");
7895 case NEON::BI__builtin_neon_vmlallbtq_lane_f32_mf8_fpm:
7896 ExtendLaneArg = true;
7897 LLVM_FALLTHROUGH;
7898 case NEON::BI__builtin_neon_vmlallbtq_laneq_f32_mf8_fpm:
7899 return EmitFP8NeonFMLACall(Intrinsic::aarch64_neon_fp8_fmlallbt_lane,
7900 ExtendLaneArg, FloatTy, Ops, E, "vmlall_lane");
7901 case NEON::BI__builtin_neon_vmlalltbq_lane_f32_mf8_fpm:
7902 ExtendLaneArg = true;
7903 LLVM_FALLTHROUGH;
7904 case NEON::BI__builtin_neon_vmlalltbq_laneq_f32_mf8_fpm:
7905 return EmitFP8NeonFMLACall(Intrinsic::aarch64_neon_fp8_fmlalltb_lane,
7906 ExtendLaneArg, FloatTy, Ops, E, "vmlall_lane");
7907 case NEON::BI__builtin_neon_vmlallttq_lane_f32_mf8_fpm:
7908 ExtendLaneArg = true;
7909 LLVM_FALLTHROUGH;
7910 case NEON::BI__builtin_neon_vmlallttq_laneq_f32_mf8_fpm:
7911 return EmitFP8NeonFMLACall(Intrinsic::aarch64_neon_fp8_fmlalltt_lane,
7912 ExtendLaneArg, FloatTy, Ops, E, "vmlall_lane");
7913 case NEON::BI__builtin_neon_vamin_f16:
7914 case NEON::BI__builtin_neon_vaminq_f16:
7915 case NEON::BI__builtin_neon_vamin_f32:
7916 case NEON::BI__builtin_neon_vaminq_f32:
7917 case NEON::BI__builtin_neon_vaminq_f64: {
7918 Int = Intrinsic::aarch64_neon_famin;
7919 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "famin");
7920 }
7921 case NEON::BI__builtin_neon_vamax_f16:
7922 case NEON::BI__builtin_neon_vamaxq_f16:
7923 case NEON::BI__builtin_neon_vamax_f32:
7924 case NEON::BI__builtin_neon_vamaxq_f32:
7925 case NEON::BI__builtin_neon_vamaxq_f64: {
7926 Int = Intrinsic::aarch64_neon_famax;
7927 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "famax");
7928 }
7929 case NEON::BI__builtin_neon_vscale_f16:
7930 case NEON::BI__builtin_neon_vscaleq_f16:
7931 case NEON::BI__builtin_neon_vscale_f32:
7932 case NEON::BI__builtin_neon_vscaleq_f32:
7933 case NEON::BI__builtin_neon_vscaleq_f64: {
7934 Int = Intrinsic::aarch64_neon_fp8_fscale;
7935 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fscale");
7936 }
7937 }
7938}
7939
7941 const CallExpr *E) {
7942 assert((BuiltinID == BPF::BI__builtin_preserve_field_info ||
7943 BuiltinID == BPF::BI__builtin_btf_type_id ||
7944 BuiltinID == BPF::BI__builtin_preserve_type_info ||
7945 BuiltinID == BPF::BI__builtin_preserve_enum_value) &&
7946 "unexpected BPF builtin");
7947
7948 // A sequence number, injected into IR builtin functions, to
7949 // prevent CSE given the only difference of the function
7950 // may just be the debuginfo metadata.
7951 static uint32_t BuiltinSeqNum;
7952
7953 switch (BuiltinID) {
7954 default:
7955 llvm_unreachable("Unexpected BPF builtin");
7956 case BPF::BI__builtin_preserve_field_info: {
7957 const Expr *Arg = E->getArg(0);
7958 bool IsBitField = Arg->IgnoreParens()->getObjectKind() == OK_BitField;
7959
7960 if (!getDebugInfo()) {
7961 CGM.Error(E->getExprLoc(),
7962 "using __builtin_preserve_field_info() without -g");
7963 return IsBitField ? EmitLValue(Arg).getRawBitFieldPointer(*this)
7964 : EmitLValue(Arg).emitRawPointer(*this);
7965 }
7966
7967 // Enable underlying preserve_*_access_index() generation.
7968 bool OldIsInPreservedAIRegion = IsInPreservedAIRegion;
7969 IsInPreservedAIRegion = true;
7970 Value *FieldAddr = IsBitField ? EmitLValue(Arg).getRawBitFieldPointer(*this)
7971 : EmitLValue(Arg).emitRawPointer(*this);
7972 IsInPreservedAIRegion = OldIsInPreservedAIRegion;
7973
7974 ConstantInt *C = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
7975 Value *InfoKind = ConstantInt::get(Int64Ty, C->getSExtValue());
7976
7977 // Built the IR for the preserve_field_info intrinsic.
7978 llvm::Function *FnGetFieldInfo = Intrinsic::getOrInsertDeclaration(
7979 &CGM.getModule(), Intrinsic::bpf_preserve_field_info,
7980 {FieldAddr->getType()});
7981 return Builder.CreateCall(FnGetFieldInfo, {FieldAddr, InfoKind});
7982 }
7983 case BPF::BI__builtin_btf_type_id:
7984 case BPF::BI__builtin_preserve_type_info: {
7985 if (!getDebugInfo()) {
7986 CGM.Error(E->getExprLoc(), "using builtin function without -g");
7987 return nullptr;
7988 }
7989
7990 const Expr *Arg0 = E->getArg(0);
7991 llvm::DIType *DbgInfo = getDebugInfo()->getOrCreateStandaloneType(
7992 Arg0->getType(), Arg0->getExprLoc());
7993
7994 ConstantInt *Flag = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
7995 Value *FlagValue = ConstantInt::get(Int64Ty, Flag->getSExtValue());
7996 Value *SeqNumVal = ConstantInt::get(Int32Ty, BuiltinSeqNum++);
7997
7998 llvm::Function *FnDecl;
7999 if (BuiltinID == BPF::BI__builtin_btf_type_id)
8000 FnDecl = Intrinsic::getOrInsertDeclaration(
8001 &CGM.getModule(), Intrinsic::bpf_btf_type_id, {});
8002 else
8003 FnDecl = Intrinsic::getOrInsertDeclaration(
8004 &CGM.getModule(), Intrinsic::bpf_preserve_type_info, {});
8005 CallInst *Fn = Builder.CreateCall(FnDecl, {SeqNumVal, FlagValue});
8006 Fn->setMetadata(LLVMContext::MD_preserve_access_index, DbgInfo);
8007 return Fn;
8008 }
8009 case BPF::BI__builtin_preserve_enum_value: {
8010 if (!getDebugInfo()) {
8011 CGM.Error(E->getExprLoc(), "using builtin function without -g");
8012 return nullptr;
8013 }
8014
8015 const Expr *Arg0 = E->getArg(0);
8016 llvm::DIType *DbgInfo = getDebugInfo()->getOrCreateStandaloneType(
8017 Arg0->getType(), Arg0->getExprLoc());
8018
8019 // Find enumerator
8020 const auto *UO = cast<UnaryOperator>(Arg0->IgnoreParens());
8021 const auto *CE = cast<CStyleCastExpr>(UO->getSubExpr());
8022 const auto *DR = cast<DeclRefExpr>(CE->getSubExpr());
8023 const auto *Enumerator = cast<EnumConstantDecl>(DR->getDecl());
8024
8025 auto InitVal = Enumerator->getInitVal();
8026 std::string InitValStr;
8027 if (InitVal.isNegative() || InitVal > uint64_t(INT64_MAX))
8028 InitValStr = std::to_string(InitVal.getSExtValue());
8029 else
8030 InitValStr = std::to_string(InitVal.getZExtValue());
8031 std::string EnumStr = Enumerator->getNameAsString() + ":" + InitValStr;
8032 Value *EnumStrVal = Builder.CreateGlobalString(EnumStr);
8033
8034 ConstantInt *Flag = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
8035 Value *FlagValue = ConstantInt::get(Int64Ty, Flag->getSExtValue());
8036 Value *SeqNumVal = ConstantInt::get(Int32Ty, BuiltinSeqNum++);
8037
8038 llvm::Function *IntrinsicFn = Intrinsic::getOrInsertDeclaration(
8039 &CGM.getModule(), Intrinsic::bpf_preserve_enum_value, {});
8040 CallInst *Fn =
8041 Builder.CreateCall(IntrinsicFn, {SeqNumVal, EnumStrVal, FlagValue});
8042 Fn->setMetadata(LLVMContext::MD_preserve_access_index, DbgInfo);
8043 return Fn;
8044 }
8045 }
8046}
8047
8050 assert((Ops.size() & (Ops.size() - 1)) == 0 &&
8051 "Not a power-of-two sized vector!");
8052 bool AllConstants = true;
8053 for (unsigned i = 0, e = Ops.size(); i != e && AllConstants; ++i)
8054 AllConstants &= isa<Constant>(Ops[i]);
8055
8056 // If this is a constant vector, create a ConstantVector.
8057 if (AllConstants) {
8059 for (llvm::Value *Op : Ops)
8060 CstOps.push_back(cast<Constant>(Op));
8061 return llvm::ConstantVector::get(CstOps);
8062 }
8063
8064 // Otherwise, insertelement the values to build the vector.
8065 Value *Result = llvm::PoisonValue::get(
8066 llvm::FixedVectorType::get(Ops[0]->getType(), Ops.size()));
8067
8068 for (unsigned i = 0, e = Ops.size(); i != e; ++i)
8069 Result = Builder.CreateInsertElement(Result, Ops[i], Builder.getInt64(i));
8070
8071 return Result;
8072}
8073
8074Value *CodeGenFunction::EmitAArch64CpuInit() {
8075 llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false);
8076 llvm::FunctionCallee Func =
8077 CGM.CreateRuntimeFunction(FTy, "__init_cpu_features_resolver");
8078 cast<llvm::GlobalValue>(Func.getCallee())->setDSOLocal(true);
8079 cast<llvm::GlobalValue>(Func.getCallee())
8080 ->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass);
8081 return Builder.CreateCall(Func);
8082}
8083
8084Value *CodeGenFunction::EmitAArch64CpuSupports(const CallExpr *E) {
8085 const Expr *ArgExpr = E->getArg(0)->IgnoreParenCasts();
8086 StringRef ArgStr = cast<StringLiteral>(ArgExpr)->getString();
8088 ArgStr.split(Features, "+");
8089 for (auto &Feature : Features) {
8090 Feature = Feature.trim();
8091 if (!llvm::AArch64::parseFMVExtension(Feature))
8092 return Builder.getFalse();
8093 if (Feature != "default")
8094 Features.push_back(Feature);
8095 }
8096 return EmitAArch64CpuSupports(Features);
8097}
8098
8099llvm::Value *
8100CodeGenFunction::EmitAArch64CpuSupports(ArrayRef<StringRef> FeaturesStrs) {
8101 llvm::APInt FeaturesMask = llvm::AArch64::getCpuSupportsMask(FeaturesStrs);
8102 Value *Result = Builder.getTrue();
8103 if (FeaturesMask != 0) {
8104 // Get features from structure in runtime library
8105 // struct {
8106 // unsigned long long features;
8107 // } __aarch64_cpu_features;
8108 llvm::Type *STy = llvm::StructType::get(Int64Ty);
8109 llvm::Constant *AArch64CPUFeatures =
8110 CGM.CreateRuntimeVariable(STy, "__aarch64_cpu_features");
8111 cast<llvm::GlobalValue>(AArch64CPUFeatures)->setDSOLocal(true);
8112 llvm::Value *CpuFeatures = Builder.CreateGEP(
8113 STy, AArch64CPUFeatures,
8114 {ConstantInt::get(Int32Ty, 0), ConstantInt::get(Int32Ty, 0)});
8115 Value *Features = Builder.CreateAlignedLoad(Int64Ty, CpuFeatures,
8117 Value *Mask = Builder.getInt(FeaturesMask.trunc(64));
8118 Value *Bitset = Builder.CreateAnd(Features, Mask);
8119 Value *Cmp = Builder.CreateICmpEQ(Bitset, Mask);
8120 Result = Builder.CreateAnd(Result, Cmp);
8121 }
8122 return Result;
8123}
#define V(N, I)
Definition: ASTContext.h:3597
StringRef P
Address CheckAtomicAlignment(CodeGenFunction &CGF, const CallExpr *E)
Definition: CGBuiltin.cpp:274
Expr * E
static Value * EmitSpecialRegisterBuiltin(CodeGenFunction &CGF, const CallExpr *E, llvm::Type *RegisterType, llvm::Type *ValueType, SpecialRegisterAccessKind AccessKind, StringRef SysReg="")
Definition: ARM.cpp:2551
static llvm::Value * ARMMVEVectorReinterpret(CGBuilderTy &Builder, CodeGenFunction *CGF, llvm::Value *V, llvm::Type *DestType)
Definition: ARM.cpp:3391
#define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier)
Definition: ARM.cpp:588
static llvm::VectorType * GetFloatNeonType(CodeGenFunction *CGF, NeonTypeFlags IntTypeFlags)
Definition: ARM.cpp:401
static llvm::Value * MVEImmediateShr(CGBuilderTy &Builder, llvm::Value *V, uint32_t Shift, bool Unsigned)
Definition: ARM.cpp:3361
static llvm::Value * SignOrZeroExtend(CGBuilderTy &Builder, llvm::Value *V, llvm::Type *T, bool Unsigned)
Definition: ARM.cpp:3354
static void InsertExplicitZeroOperand(CGBuilderTy &Builder, llvm::Type *Ty, SmallVectorImpl< Value * > &Ops)
Definition: ARM.cpp:4401
static const ARMVectorIntrinsicInfo AArch64SMEIntrinsicMap[]
Definition: ARM.cpp:1603
static Value * EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID, const CallExpr *E, SmallVectorImpl< Value * > &Ops, llvm::Triple::ArchType Arch)
Definition: ARM.cpp:3581
static void swapCommutativeSMEOperands(unsigned BuiltinID, SmallVectorImpl< Value * > &Ops)
Definition: ARM.cpp:4868
static bool AArch64SISDIntrinsicsProvenSorted
Definition: ARM.cpp:1615
static const ARMVectorIntrinsicInfo AArch64SVEIntrinsicMap[]
Definition: ARM.cpp:1585
static bool HasExtraNeonArgument(unsigned BuiltinID)
Return true if BuiltinID is an overloaded Neon intrinsic with an extra argument that specifies the ve...
Definition: ARM.cpp:2619
#define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier)
Definition: ARM.cpp:584
static bool AArch64SVEIntrinsicsProvenSorted
Definition: ARM.cpp:1616
static const ARMVectorIntrinsicInfo * findARMVectorIntrinsicInMap(ArrayRef< ARMVectorIntrinsicInfo > IntrinsicMap, unsigned BuiltinID, bool &MapProvenSorted)
Definition: ARM.cpp:1620
static void InsertExplicitUndefOperand(CGBuilderTy &Builder, llvm::Type *Ty, SmallVectorImpl< Value * > &Ops)
Definition: ARM.cpp:4407
static Integer GetIntegerConstantValue(const Expr *E, ASTContext &Context)
Definition: ARM.cpp:3350
static bool AArch64SMEIntrinsicsProvenSorted
Definition: ARM.cpp:1617
static llvm::Value * VectorZip(CGBuilderTy &Builder, llvm::Value *V0, llvm::Value *V1)
Definition: ARM.cpp:3428
constexpr unsigned SVEBitsPerBlock
Definition: ARM.cpp:3868
static const std::pair< unsigned, unsigned > NEONEquivalentIntrinsicMap[]
Definition: ARM.cpp:1427
static llvm::FixedVectorType * GetNeonType(CodeGenFunction *CGF, NeonTypeFlags TypeFlags, bool HasFastHalfType=true, bool V1Ty=false, bool AllowBFloatArgsAndRet=true)
Definition: ARM.cpp:359
#define NEONMAP0(NameBase)
Definition: ARM.cpp:581
Value * readX18AsPtr(CodeGenFunction &CGF)
Helper for the read/write/add/inc X18 builtins: read the X18 register and return it as an i8 pointer.
Definition: ARM.cpp:4941
static llvm::Value * ARMMVEVectorElementReverse(CGBuilderTy &Builder, llvm::Value *V, unsigned ReverseWidth)
Definition: ARM.cpp:3455
static std::optional< CodeGenFunction::MSVCIntrin > translateAarch64ToMsvcIntrin(unsigned BuiltinID)
Definition: ARM.cpp:31
static std::optional< CodeGenFunction::MSVCIntrin > translateArmToMsvcIntrin(unsigned BuiltinID)
Definition: ARM.cpp:190
static const ARMVectorIntrinsicInfo ARMSIMDIntrinsicMap[]
Definition: ARM.cpp:593
static llvm::Value * VectorUnzip(CGBuilderTy &Builder, llvm::Value *V, bool Odd)
Definition: ARM.cpp:3417
static llvm::ScalableVectorType * getSVEVectorForElementType(llvm::Type *EltTy)
Definition: ARM.cpp:3870
static llvm::Value * ARMMVEConstantSplat(CGBuilderTy &Builder, llvm::Type *VT)
Definition: ARM.cpp:3443
SpecialRegisterAccessKind
Definition: ARM.cpp:2542
@ VolatileRead
Definition: ARM.cpp:2544
@ NormalRead
Definition: ARM.cpp:2543
static llvm::Value * ARMMVEVectorSplat(CGBuilderTy &Builder, llvm::Value *V)
Definition: ARM.cpp:3383
@ UnsignedAlts
Definition: ARM.cpp:551
@ Vectorize1ArgType
Definition: ARM.cpp:556
@ FpCmpzModifiers
Definition: ARM.cpp:560
@ Use64BitVectors
Definition: ARM.cpp:553
@ VectorizeArgTypes
Definition: ARM.cpp:548
@ VectorRetGetArgs01
Definition: ARM.cpp:558
@ InventFloatType
Definition: ARM.cpp:550
@ AddRetType
Definition: ARM.cpp:543
@ Add2ArgTypes
Definition: ARM.cpp:545
@ VectorizeRetType
Definition: ARM.cpp:547
@ VectorRet
Definition: ARM.cpp:557
@ Add1ArgType
Definition: ARM.cpp:544
@ Use128BitVectors
Definition: ARM.cpp:554
static bool NEONSIMDIntrinsicsProvenSorted
Definition: ARM.cpp:1612
static const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap[]
Definition: ARM.cpp:912
static Value * EmitCommonNeonSISDBuiltinExpr(CodeGenFunction &CGF, const ARMVectorIntrinsicInfo &SISDInfo, SmallVectorImpl< Value * > &Ops, const CallExpr *E)
Definition: ARM.cpp:1678
static Value * emitCallMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, unsigned IntrinsicID, unsigned ConstrainedIntrinsicID, llvm::Type *Ty, ArrayRef< Value * > Args)
Definition: ARM.cpp:342
static Value * packTBLDVectorList(CodeGenFunction &CGF, ArrayRef< Value * > Ops, Value *ExtOp, Value *IndexOp, llvm::Type *ResTy, unsigned IntID, const char *Name)
Definition: ARM.cpp:2469
static bool AArch64SIMDIntrinsicsProvenSorted
Definition: ARM.cpp:1614
static const ARMVectorIntrinsicInfo AArch64SISDIntrinsicMap[]
Definition: ARM.cpp:1187
OffloadArch Arch
Definition: OffloadArch.cpp:10
static std::string toString(const clang::SanitizerSet &Sanitizers)
Produce a string containing comma-separated names of sanitizers in Sanitizers set.
HLSLResourceBindingAttr::RegisterType RegisterType
Definition: SemaHLSL.cpp:55
Enumerates target-specific builtins in their own namespaces within namespace clang.
__device__ __2f16 float __ockl_bool s
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
Definition: ASTContext.h:188
QualType GetBuiltinType(unsigned ID, GetBuiltinTypeError &Error, unsigned *IntegerConstantArgs=nullptr) const
Return the type for the specified builtin.
@ GE_None
No error.
Definition: ASTContext.h:2536
CallExpr - Represents a function call (C99 6.5.2.2, C++ [expr.call]).
Definition: Expr.h:2879
static CharUnits One()
One - Construct a CharUnits quantity of one.
Definition: CharUnits.h:58
static CharUnits fromQuantity(QuantityType Quantity)
fromQuantity - Construct a CharUnits quantity from a raw integer type.
Definition: CharUnits.h:63
Like RawAddress, an abstract representation of an aligned address, but the pointer contained in this ...
Definition: Address.h:128
static Address invalid()
Definition: Address.h:176
llvm::Value * emitRawPointer(CodeGenFunction &CGF) const
Return the pointer contained in this class after authenticating it and adding offset to it if necessa...
Definition: Address.h:253
CharUnits getAlignment() const
Definition: Address.h:194
Address withElementType(llvm::Type *ElemTy) const
Return address with different element type, but same pointer and alignment.
Definition: Address.h:276
llvm::PointerType * getType() const
Return the type of the pointer value.
Definition: Address.h:204
An aggregate value slot.
Definition: CGValue.h:504
Address getAddress() const
Definition: CGValue.h:644
llvm::StoreInst * CreateStore(llvm::Value *Val, Address Addr, bool IsVolatile=false)
Definition: CGBuilder.h:140
llvm::StoreInst * CreateAlignedStore(llvm::Value *Val, llvm::Value *Addr, CharUnits Align, bool IsVolatile=false)
Definition: CGBuilder.h:147
Address CreateGEP(CodeGenFunction &CGF, Address Addr, llvm::Value *Index, const llvm::Twine &Name="")
Definition: CGBuilder.h:296
llvm::AtomicRMWInst * CreateAtomicRMW(llvm::AtomicRMWInst::BinOp Op, Address Addr, llvm::Value *Val, llvm::AtomicOrdering Ordering, llvm::SyncScope::ID SSID=llvm::SyncScope::System)
Definition: CGBuilder.h:184
llvm::StoreInst * CreateDefaultAlignedStore(llvm::Value *Val, llvm::Value *Addr, bool IsVolatile=false)
Definition: CGBuilder.h:155
llvm::LoadInst * CreateLoad(Address Addr, const llvm::Twine &Name="")
Definition: CGBuilder.h:112
llvm::LoadInst * CreateAlignedLoad(llvm::Type *Ty, llvm::Value *Addr, CharUnits Align, const llvm::Twine &Name="")
Definition: CGBuilder.h:132
llvm::DIType * getOrCreateStandaloneType(QualType Ty, SourceLocation Loc)
Emit standalone debug info for a type.
CodeGenFunction - This class organizes the per-function state that is used while generating LLVM code...
llvm::Value * EmitSVEPredicateCast(llvm::Value *Pred, llvm::ScalableVectorType *VTy)
Definition: ARM.cpp:3877
llvm::Value * EmitFP8NeonFMLACall(unsigned IID, bool ExtendLaneArg, llvm::Type *RetTy, SmallVectorImpl< llvm::Value * > &Ops, const CallExpr *E, const char *name)
Definition: ARM.cpp:474
llvm::Value * BuildVector(ArrayRef< llvm::Value * > Ops)
Definition: ARM.cpp:8049
llvm::Value * EmitScalarOrConstFoldImmArg(unsigned ICEArguments, unsigned Idx, const CallExpr *E)
Definition: AMDGPU.cpp:258
llvm::Value * EmitSVEStructLoad(const SVETypeFlags &TypeFlags, SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Definition: ARM.cpp:4072
llvm::Value * EmitSVEMaskedLoad(const CallExpr *, llvm::Type *ReturnTy, SmallVectorImpl< llvm::Value * > &Ops, unsigned BuiltinID, bool IsZExtReturn)
Definition: ARM.cpp:4180
llvm::Value * EmitFP8NeonCall(unsigned IID, ArrayRef< llvm::Type * > Tys, SmallVectorImpl< llvm::Value * > &O, const CallExpr *E, const char *name)
Definition: ARM.cpp:448
llvm::Type * ConvertType(QualType T)
llvm::Value * EmitSVEGatherPrefetch(const SVETypeFlags &TypeFlags, SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Definition: ARM.cpp:4039
llvm::Value * EmitSMEReadWrite(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Definition: ARM.cpp:4325
llvm::Type * SVEBuiltinMemEltTy(const SVETypeFlags &TypeFlags)
SVEBuiltinMemEltTy - Returns the memory element type for this memory access builtin.
Definition: ARM.cpp:3736
llvm::Value * EmitSVEScatterStore(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Definition: ARM.cpp:3982
llvm::Value * EmitSVEMaskedStore(const CallExpr *, SmallVectorImpl< llvm::Value * > &Ops, unsigned BuiltinID)
Definition: ARM.cpp:4237
llvm::Value * EmitAArch64SMEBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
Definition: ARM.cpp:4892
void GetAArch64SVEProcessedOperands(unsigned BuiltinID, const CallExpr *E, SmallVectorImpl< llvm::Value * > &Ops, SVETypeFlags TypeFlags)
Definition: ARM.cpp:4462
llvm::Value * EmitSVEGatherLoad(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Definition: ARM.cpp:3928
llvm::Function * LookupNeonLLVMIntrinsic(unsigned IntrinsicID, unsigned Modifier, llvm::Type *ArgTy, const CallExpr *E)
Definition: ARM.cpp:1639
llvm::Type * getEltType(const SVETypeFlags &TypeFlags)
Definition: ARM.cpp:3752
llvm::Value * EmitCommonNeonBuiltinExpr(unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic, const char *NameHint, unsigned Modifier, const CallExpr *E, SmallVectorImpl< llvm::Value * > &Ops, Address PtrOp0, Address PtrOp1, llvm::Triple::ArchType Arch)
Definition: ARM.cpp:1740
llvm::Value * EmitNeonSplat(llvm::Value *V, llvm::Constant *Idx, const llvm::ElementCount &Count)
llvm::Value * EmitSVEDupX(llvm::Value *Scalar)
const TargetInfo & getTarget() const
llvm::Value * EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
Definition: ARM.cpp:4505
llvm::Value * EmitFP8NeonCvtCall(unsigned IID, llvm::Type *Ty0, llvm::Type *Ty1, bool Extract, SmallVectorImpl< llvm::Value * > &Ops, const CallExpr *E, const char *name)
Definition: ARM.cpp:495
llvm::Value * EmitARMBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue, llvm::Triple::ArchType Arch)
Definition: ARM.cpp:2664
llvm::ScalableVectorType * getSVEType(const SVETypeFlags &TypeFlags)
Definition: ARM.cpp:3825
llvm::Value * EmitBPFBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
Definition: ARM.cpp:7940
llvm::Value * EmitSMELdrStr(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Definition: ARM.cpp:4347
llvm::Value * EmitSVETupleCreate(const SVETypeFlags &TypeFlags, llvm::Type *ReturnType, ArrayRef< llvm::Value * > Ops)
Definition: ARM.cpp:4450
llvm::Value * EmitSVEPMull(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned BuiltinID)
Definition: ARM.cpp:4135
llvm::Value * EmitARMMVEBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue, llvm::Triple::ArchType Arch)
Definition: ARM.cpp:3469
AggValueSlot CreateAggTemp(QualType T, const Twine &Name="tmp", RawAddress *Alloca=nullptr)
CreateAggTemp - Create a temporary memory object for the given aggregate type.
llvm::Value * EmitNeonRShiftImm(llvm::Value *Vec, llvm::Value *Amt, llvm::Type *Ty, bool usgn, const char *name)
Definition: ARM.cpp:511
llvm::Value * getTypeSize(QualType Ty)
Returns calculated size of the specified type.
SmallVector< llvm::Type *, 2 > getSVEOverloadTypes(const SVETypeFlags &TypeFlags, llvm::Type *ReturnType, ArrayRef< llvm::Value * > Ops)
Definition: ARM.cpp:4414
const TargetCodeGenInfo & getTargetHooks() const
llvm::Value * EmitNeonShiftVector(llvm::Value *V, llvm::Type *Ty, bool negateForRightShift)
Definition: ARM.cpp:489
bool IsInPreservedAIRegion
True if CodeGen currently emits code inside presereved access index region.
llvm::CallInst * EmitNounwindRuntimeCall(llvm::FunctionCallee callee, const Twine &name="")
llvm::Value * EmitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E, llvm::Triple::ArchType Arch)
Definition: ARM.cpp:4952
llvm::Value * EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID, const CallExpr *E)
Definition: CGBuiltin.cpp:1843
llvm::Value * EmitFP8NeonFDOTCall(unsigned IID, bool ExtendLaneArg, llvm::Type *RetTy, SmallVectorImpl< llvm::Value * > &Ops, const CallExpr *E, const char *name)
Definition: ARM.cpp:458
llvm::Value * vectorWrapScalar16(llvm::Value *Op)
Definition: ARM.cpp:3724
llvm::Value * EmitARMCDEBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue, llvm::Triple::ArchType Arch)
Definition: ARM.cpp:3570
llvm::Value * EmitAArch64CompareBuiltinExpr(llvm::Value *Op, llvm::Type *Ty, const llvm::CmpInst::Predicate Pred, const llvm::Twine &Name="")
Definition: ARM.cpp:2442
void EmitAnyExprToMem(const Expr *E, Address Location, Qualifiers Quals, bool IsInitializer)
EmitAnyExprToMem - Emits the code necessary to evaluate an arbitrary expression into the given memory...
Definition: CGExpr.cpp:293
llvm::CallInst * EmitRuntimeCall(llvm::FunctionCallee callee, const Twine &name="")
llvm::Value * EmitSVEMovl(const SVETypeFlags &TypeFlags, llvm::ArrayRef< llvm::Value * > Ops, unsigned BuiltinID)
Definition: ARM.cpp:4153
llvm::Value * EmitSVEPredicateTupleCast(llvm::Value *PredTuple, llvm::StructType *Ty)
Definition: ARM.cpp:3912
llvm::Value * EmitSVEPrefetchLoad(const SVETypeFlags &TypeFlags, SmallVectorImpl< llvm::Value * > &Ops, unsigned BuiltinID)
Definition: ARM.cpp:4160
llvm::Value * EmitSMEZero(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Definition: ARM.cpp:4337
Address EmitPointerWithAlignment(const Expr *Addr, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr, KnownNonNull_t IsKnownNonNull=NotKnownNonNull)
EmitPointerWithAlignment - Given an expression with a pointer type, emit the value and compute our be...
Definition: CGExpr.cpp:1515
llvm::Value * EmitSVEStructStore(const SVETypeFlags &TypeFlags, SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Definition: ARM.cpp:4087
RawAddress CreateMemTemp(QualType T, const Twine &Name="tmp", RawAddress *Alloca=nullptr)
CreateMemTemp - Create a temporary memory object of the given type, with appropriate alignmen and cas...
Definition: CGExpr.cpp:186
llvm::Value * EmitSMELd1St1(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Definition: ARM.cpp:4292
void EmitAggExpr(const Expr *E, AggValueSlot AS)
EmitAggExpr - Emit the computation of the specified expression of aggregate type.
Definition: CGExprAgg.cpp:2205
llvm::Value * EmitScalarExpr(const Expr *E, bool IgnoreResultAssign=false)
EmitScalarExpr - Emit the computation of the specified expression of LLVM scalar type,...
llvm::Value * EmitSVEAllTruePred(const SVETypeFlags &TypeFlags)
Definition: ARM.cpp:3862
llvm::Value * EmitSVEReinterpret(llvm::Value *Val, llvm::Type *Ty)
Definition: ARM.cpp:4378
Address ReturnValue
ReturnValue - The temporary alloca to hold the return value.
LValue EmitLValue(const Expr *E, KnownNonNull_t IsKnownNonNull=NotKnownNonNull)
EmitLValue - Emit code to compute a designator that specifies the location of the expression.
Definition: CGExpr.cpp:1631
llvm::LLVMContext & getLLVMContext()
llvm::ScalableVectorType * getSVEPredType(const SVETypeFlags &TypeFlags)
Definition: ARM.cpp:3790
llvm::Value * EmitNeonCall(llvm::Function *F, SmallVectorImpl< llvm::Value * > &O, const char *name, unsigned shift=0, bool rightshift=false)
Definition: ARM.cpp:427
llvm::Value * EmitSVETupleSetOrGet(const SVETypeFlags &TypeFlags, ArrayRef< llvm::Value * > Ops)
Definition: ARM.cpp:4439
This class organizes the cross-function state that is used while generating LLVM code.
llvm::Module & getModule() const
llvm::FunctionCallee CreateRuntimeFunction(llvm::FunctionType *Ty, StringRef Name, llvm::AttributeList ExtraAttrs=llvm::AttributeList(), bool Local=false, bool AssumeConvergent=false)
Create or return a runtime function declaration with the specified type and name.
const llvm::DataLayout & getDataLayout() const
void Error(SourceLocation loc, StringRef error)
Emit a general error that something can't be done.
void DecorateInstructionWithTBAA(llvm::Instruction *Inst, TBAAAccessInfo TBAAInfo)
DecorateInstructionWithTBAA - Decorate the instruction with a TBAA tag.
llvm::Constant * CreateRuntimeVariable(llvm::Type *Ty, StringRef Name)
Create a new runtime global variable with the specified type and name.
TBAAAccessInfo getTBAAAccessInfo(QualType AccessType)
getTBAAAccessInfo - Get TBAA information that describes an access to an object of the given type.
ASTContext & getContext() const
llvm::LLVMContext & getLLVMContext()
llvm::Function * getIntrinsic(unsigned IID, ArrayRef< llvm::Type * > Tys={})
llvm::Type * ConvertType(QualType T)
ConvertType - Convert type T into a llvm::Type.
llvm::Value * getRawBitFieldPointer(CodeGenFunction &CGF) const
Definition: CGValue.h:419
llvm::Value * emitRawPointer(CodeGenFunction &CGF) const
ReturnValueSlot - Contains the address where the return value of a function can be stored,...
Definition: CGCall.h:379
const T & getABIInfo() const
Definition: TargetInfo.h:57
This represents one expression.
Definition: Expr.h:112
bool EvaluateAsInt(EvalResult &Result, const ASTContext &Ctx, SideEffectsKind AllowSideEffects=SE_NoSideEffects, bool InConstantContext=false) const
EvaluateAsInt - Return true if this is a constant which we can fold and convert to an integer,...
Expr * IgnoreParenCasts() LLVM_READONLY
Skip past any parentheses and casts which might surround this expression until reaching a fixed point...
Definition: Expr.cpp:3078
llvm::APSInt EvaluateKnownConstInt(const ASTContext &Ctx, SmallVectorImpl< PartialDiagnosticAt > *Diag=nullptr) const
EvaluateKnownConstInt - Call EvaluateAsRValue and return the folded integer.
Expr * IgnoreParens() LLVM_READONLY
Skip past any parentheses which might surround this expression until reaching a fixed point.
Definition: Expr.cpp:3069
std::optional< llvm::APSInt > getIntegerConstantExpr(const ASTContext &Ctx) const
isIntegerConstantExpr - Return the value if this expression is a valid integer constant expression.
ExprObjectKind getObjectKind() const
getObjectKind - The object kind that this expression produces.
Definition: Expr.h:451
SourceLocation getExprLoc() const LLVM_READONLY
getExprLoc - Return the preferred location for the arrow when diagnosing a problem with a generic exp...
Definition: Expr.cpp:273
QualType getType() const
Definition: Expr.h:144
Represents a function declaration or definition.
Definition: Decl.h:1999
StringRef getName() const
Get the name of identifier for this declaration as a StringRef.
Definition: Decl.h:300
Flags to identify the types for overloaded Neon builtins.
EltType getEltType() const
PointerType - C99 6.7.5.1 - Pointer Declarators.
Definition: TypeBase.h:3346
QualType getPointeeType() const
Definition: TypeBase.h:3356
A (possibly-)qualified type.
Definition: TypeBase.h:937
The collection of all-type qualifiers we support.
Definition: TypeBase.h:331
Flags to identify the types for overloaded SVE builtins.
bool isZExtReturn() const
bool isReverseUSDOT() const
bool isOverloadNone() const
bool isUndef() const
MemEltType getMemEltType() const
bool isWriteZA() const
bool isGatherLoad() const
bool isOverloadCvt() const
EltType getEltType() const
bool isOverloadDefault() const
bool isPrefetch() const
bool isOverloadWhileRW() const
bool isReadZA() const
bool isTupleSet() const
bool isReverseMergeAnyAccOp() const
bool isReductionQV() const
bool isTupleGet() const
bool isInsertOp1SVALL() const
bool isAppendSVALL() const
bool isReverseMergeAnyBinOp() const
bool isStructStore() const
bool isTupleCreate() const
bool isGatherPrefetch() const
bool hasSplatOperand() const
MergeType getMergeType() const
bool isByteIndexed() const
bool isStructLoad() const
bool setsFPMR() const
bool isOverloadWhileOrMultiVecCvt() const
unsigned getSplatOperand() const
bool isStore() const
bool isScatterStore() const
bool isReverseCompare() const
const llvm::Triple & getTriple() const
Returns the target triple of the primary target.
Definition: TargetInfo.h:1288
virtual bool hasFastHalfType() const
Determine whether the target has fast native support for operations on half types.
Definition: TargetInfo.h:706
bool isBigEndian() const
Definition: TargetInfo.h:1705
The base class of the type hierarchy.
Definition: TypeBase.h:1833
const T * castAs() const
Member-template castAs<specific type>.
Definition: TypeBase.h:9226
QualType getPointeeType() const
If this is a pointer, ObjC object pointer, or block pointer, this returns the respective pointee.
Definition: Type.cpp:752
QualType getType() const
Definition: Decl.h:722
QualType getType() const
Definition: Value.cpp:237
@ Type
The l-value was considered opaque, so the alignment was determined from a type.
The JSON file list parser is used to communicate input to InstallAPI.
@ OK_BitField
A bitfield object is a bitfield on a C or C++ record.
Definition: Specifiers.h:154
bool operator<(DeclarationName LHS, DeclarationName RHS)
Ordering on two declaration names.
@ Result
The result type of a method or function.
const FunctionProtoType * T
@ Enumerator
Enumerator value with fixed underlying type.
unsigned long uint64_t
Diagnostic wrappers for TextAPI types for error reporting.
Definition: Dominators.h:30
llvm::IntegerType * Int8Ty
i8, i16, i32, and i64
llvm::Type * HalfTy
half, bfloat, float, double
llvm::IntegerType * IntTy
int
llvm::PointerType * AllocaInt8PtrTy
EvalResult is a struct with detailed info about an evaluated expression.
Definition: Expr.h:645
#define trunc(__x)
Definition: tgmath.h:1216
#define round(__x)
Definition: tgmath.h:1148
#define rint(__x)
Definition: tgmath.h:1131
#define floor(__x)
Definition: tgmath.h:722
#define ceil(__x)
Definition: tgmath.h:601