clang 22.0.0git
CGOpenMPRuntime.cpp
Go to the documentation of this file.
1//===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This provides a class for OpenMP runtime code generation.
10//
11//===----------------------------------------------------------------------===//
12
13#include "CGOpenMPRuntime.h"
14#include "ABIInfoImpl.h"
15#include "CGCXXABI.h"
16#include "CGCleanup.h"
17#include "CGDebugInfo.h"
18#include "CGRecordLayout.h"
19#include "CodeGenFunction.h"
20#include "TargetInfo.h"
21#include "clang/AST/APValue.h"
22#include "clang/AST/Attr.h"
23#include "clang/AST/Decl.h"
30#include "llvm/ADT/ArrayRef.h"
31#include "llvm/ADT/SmallVector.h"
32#include "llvm/ADT/StringExtras.h"
33#include "llvm/Bitcode/BitcodeReader.h"
34#include "llvm/IR/Constants.h"
35#include "llvm/IR/DerivedTypes.h"
36#include "llvm/IR/GlobalValue.h"
37#include "llvm/IR/InstrTypes.h"
38#include "llvm/IR/Value.h"
39#include "llvm/Support/AtomicOrdering.h"
40#include "llvm/Support/raw_ostream.h"
41#include <cassert>
42#include <cstdint>
43#include <numeric>
44#include <optional>
45
46using namespace clang;
47using namespace CodeGen;
48using namespace llvm::omp;
49
50namespace {
51/// Base class for handling code generation inside OpenMP regions.
52class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
53public:
54 /// Kinds of OpenMP regions used in codegen.
55 enum CGOpenMPRegionKind {
56 /// Region with outlined function for standalone 'parallel'
57 /// directive.
58 ParallelOutlinedRegion,
59 /// Region with outlined function for standalone 'task' directive.
60 TaskOutlinedRegion,
61 /// Region for constructs that do not require function outlining,
62 /// like 'for', 'sections', 'atomic' etc. directives.
63 InlinedRegion,
64 /// Region with outlined function for standalone 'target' directive.
65 TargetRegion,
66 };
67
68 CGOpenMPRegionInfo(const CapturedStmt &CS,
69 const CGOpenMPRegionKind RegionKind,
70 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
71 bool HasCancel)
72 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
73 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
74
75 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
76 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
77 bool HasCancel)
78 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
79 Kind(Kind), HasCancel(HasCancel) {}
80
81 /// Get a variable or parameter for storing global thread id
82 /// inside OpenMP construct.
83 virtual const VarDecl *getThreadIDVariable() const = 0;
84
85 /// Emit the captured statement body.
86 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
87
88 /// Get an LValue for the current ThreadID variable.
89 /// \return LValue for thread id variable. This LValue always has type int32*.
90 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
91
92 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
93
94 CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
95
96 OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
97
98 bool hasCancel() const { return HasCancel; }
99
100 static bool classof(const CGCapturedStmtInfo *Info) {
101 return Info->getKind() == CR_OpenMP;
102 }
103
104 ~CGOpenMPRegionInfo() override = default;
105
106protected:
107 CGOpenMPRegionKind RegionKind;
108 RegionCodeGenTy CodeGen;
110 bool HasCancel;
111};
112
113/// API for captured statement code generation in OpenMP constructs.
114class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
115public:
116 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
117 const RegionCodeGenTy &CodeGen,
118 OpenMPDirectiveKind Kind, bool HasCancel,
119 StringRef HelperName)
120 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
121 HasCancel),
122 ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
123 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
124 }
125
126 /// Get a variable or parameter for storing global thread id
127 /// inside OpenMP construct.
128 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
129
130 /// Get the name of the capture helper.
131 StringRef getHelperName() const override { return HelperName; }
132
133 static bool classof(const CGCapturedStmtInfo *Info) {
134 return CGOpenMPRegionInfo::classof(Info) &&
135 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
136 ParallelOutlinedRegion;
137 }
138
139private:
140 /// A variable or parameter storing global thread id for OpenMP
141 /// constructs.
142 const VarDecl *ThreadIDVar;
143 StringRef HelperName;
144};
145
146/// API for captured statement code generation in OpenMP constructs.
147class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
148public:
149 class UntiedTaskActionTy final : public PrePostActionTy {
150 bool Untied;
151 const VarDecl *PartIDVar;
152 const RegionCodeGenTy UntiedCodeGen;
153 llvm::SwitchInst *UntiedSwitch = nullptr;
154
155 public:
156 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
157 const RegionCodeGenTy &UntiedCodeGen)
158 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
159 void Enter(CodeGenFunction &CGF) override {
160 if (Untied) {
161 // Emit task switching point.
162 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
163 CGF.GetAddrOfLocalVar(PartIDVar),
164 PartIDVar->getType()->castAs<PointerType>());
165 llvm::Value *Res =
166 CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
167 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
168 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
169 CGF.EmitBlock(DoneBB);
171 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
172 UntiedSwitch->addCase(CGF.Builder.getInt32(0),
173 CGF.Builder.GetInsertBlock());
174 emitUntiedSwitch(CGF);
175 }
176 }
177 void emitUntiedSwitch(CodeGenFunction &CGF) const {
178 if (Untied) {
179 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
180 CGF.GetAddrOfLocalVar(PartIDVar),
181 PartIDVar->getType()->castAs<PointerType>());
182 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
183 PartIdLVal);
184 UntiedCodeGen(CGF);
186 CGF.getJumpDestInCurrentScope(".untied.next.");
188 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
189 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
190 CGF.Builder.GetInsertBlock());
191 CGF.EmitBranchThroughCleanup(CurPoint);
192 CGF.EmitBlock(CurPoint.getBlock());
193 }
194 }
195 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
196 };
197 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
198 const VarDecl *ThreadIDVar,
199 const RegionCodeGenTy &CodeGen,
200 OpenMPDirectiveKind Kind, bool HasCancel,
201 const UntiedTaskActionTy &Action)
202 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
203 ThreadIDVar(ThreadIDVar), Action(Action) {
204 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
205 }
206
207 /// Get a variable or parameter for storing global thread id
208 /// inside OpenMP construct.
209 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
210
211 /// Get an LValue for the current ThreadID variable.
212 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
213
214 /// Get the name of the capture helper.
215 StringRef getHelperName() const override { return ".omp_outlined."; }
216
217 void emitUntiedSwitch(CodeGenFunction &CGF) override {
218 Action.emitUntiedSwitch(CGF);
219 }
220
221 static bool classof(const CGCapturedStmtInfo *Info) {
222 return CGOpenMPRegionInfo::classof(Info) &&
223 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
224 TaskOutlinedRegion;
225 }
226
227private:
228 /// A variable or parameter storing global thread id for OpenMP
229 /// constructs.
230 const VarDecl *ThreadIDVar;
231 /// Action for emitting code for untied tasks.
232 const UntiedTaskActionTy &Action;
233};
234
235/// API for inlined captured statement code generation in OpenMP
236/// constructs.
237class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
238public:
239 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
240 const RegionCodeGenTy &CodeGen,
241 OpenMPDirectiveKind Kind, bool HasCancel)
242 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
243 OldCSI(OldCSI),
244 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
245
246 // Retrieve the value of the context parameter.
247 llvm::Value *getContextValue() const override {
248 if (OuterRegionInfo)
249 return OuterRegionInfo->getContextValue();
250 llvm_unreachable("No context value for inlined OpenMP region");
251 }
252
253 void setContextValue(llvm::Value *V) override {
254 if (OuterRegionInfo) {
255 OuterRegionInfo->setContextValue(V);
256 return;
257 }
258 llvm_unreachable("No context value for inlined OpenMP region");
259 }
260
261 /// Lookup the captured field decl for a variable.
262 const FieldDecl *lookup(const VarDecl *VD) const override {
263 if (OuterRegionInfo)
264 return OuterRegionInfo->lookup(VD);
265 // If there is no outer outlined region,no need to lookup in a list of
266 // captured variables, we can use the original one.
267 return nullptr;
268 }
269
270 FieldDecl *getThisFieldDecl() const override {
271 if (OuterRegionInfo)
272 return OuterRegionInfo->getThisFieldDecl();
273 return nullptr;
274 }
275
276 /// Get a variable or parameter for storing global thread id
277 /// inside OpenMP construct.
278 const VarDecl *getThreadIDVariable() const override {
279 if (OuterRegionInfo)
280 return OuterRegionInfo->getThreadIDVariable();
281 return nullptr;
282 }
283
284 /// Get an LValue for the current ThreadID variable.
285 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
286 if (OuterRegionInfo)
287 return OuterRegionInfo->getThreadIDVariableLValue(CGF);
288 llvm_unreachable("No LValue for inlined OpenMP construct");
289 }
290
291 /// Get the name of the capture helper.
292 StringRef getHelperName() const override {
293 if (auto *OuterRegionInfo = getOldCSI())
294 return OuterRegionInfo->getHelperName();
295 llvm_unreachable("No helper name for inlined OpenMP construct");
296 }
297
298 void emitUntiedSwitch(CodeGenFunction &CGF) override {
299 if (OuterRegionInfo)
300 OuterRegionInfo->emitUntiedSwitch(CGF);
301 }
302
303 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
304
305 static bool classof(const CGCapturedStmtInfo *Info) {
306 return CGOpenMPRegionInfo::classof(Info) &&
307 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
308 }
309
310 ~CGOpenMPInlinedRegionInfo() override = default;
311
312private:
313 /// CodeGen info about outer OpenMP region.
315 CGOpenMPRegionInfo *OuterRegionInfo;
316};
317
318/// API for captured statement code generation in OpenMP target
319/// constructs. For this captures, implicit parameters are used instead of the
320/// captured fields. The name of the target region has to be unique in a given
321/// application so it is provided by the client, because only the client has
322/// the information to generate that.
323class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
324public:
325 CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
326 const RegionCodeGenTy &CodeGen, StringRef HelperName)
327 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
328 /*HasCancel=*/false),
329 HelperName(HelperName) {}
330
331 /// This is unused for target regions because each starts executing
332 /// with a single thread.
333 const VarDecl *getThreadIDVariable() const override { return nullptr; }
334
335 /// Get the name of the capture helper.
336 StringRef getHelperName() const override { return HelperName; }
337
338 static bool classof(const CGCapturedStmtInfo *Info) {
339 return CGOpenMPRegionInfo::classof(Info) &&
340 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
341 }
342
343private:
344 StringRef HelperName;
345};
346
347static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
348 llvm_unreachable("No codegen for expressions");
349}
350/// API for generation of expressions captured in a innermost OpenMP
351/// region.
352class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
353public:
354 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
355 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
356 OMPD_unknown,
357 /*HasCancel=*/false),
358 PrivScope(CGF) {
359 // Make sure the globals captured in the provided statement are local by
360 // using the privatization logic. We assume the same variable is not
361 // captured more than once.
362 for (const auto &C : CS.captures()) {
363 if (!C.capturesVariable() && !C.capturesVariableByCopy())
364 continue;
365
366 const VarDecl *VD = C.getCapturedVar();
367 if (VD->isLocalVarDeclOrParm())
368 continue;
369
370 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
371 /*RefersToEnclosingVariableOrCapture=*/false,
373 C.getLocation());
374 PrivScope.addPrivate(VD, CGF.EmitLValue(&DRE).getAddress());
375 }
376 (void)PrivScope.Privatize();
377 }
378
379 /// Lookup the captured field decl for a variable.
380 const FieldDecl *lookup(const VarDecl *VD) const override {
381 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
382 return FD;
383 return nullptr;
384 }
385
386 /// Emit the captured statement body.
387 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
388 llvm_unreachable("No body for expressions");
389 }
390
391 /// Get a variable or parameter for storing global thread id
392 /// inside OpenMP construct.
393 const VarDecl *getThreadIDVariable() const override {
394 llvm_unreachable("No thread id for expressions");
395 }
396
397 /// Get the name of the capture helper.
398 StringRef getHelperName() const override {
399 llvm_unreachable("No helper name for expressions");
400 }
401
402 static bool classof(const CGCapturedStmtInfo *Info) { return false; }
403
404private:
405 /// Private scope to capture global variables.
407};
408
409/// RAII for emitting code of OpenMP constructs.
410class InlinedOpenMPRegionRAII {
411 CodeGenFunction &CGF;
412 llvm::DenseMap<const ValueDecl *, FieldDecl *> LambdaCaptureFields;
413 FieldDecl *LambdaThisCaptureField = nullptr;
414 const CodeGen::CGBlockInfo *BlockInfo = nullptr;
415 bool NoInheritance = false;
416
417public:
418 /// Constructs region for combined constructs.
419 /// \param CodeGen Code generation sequence for combined directives. Includes
420 /// a list of functions used for code generation of implicitly inlined
421 /// regions.
422 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
423 OpenMPDirectiveKind Kind, bool HasCancel,
424 bool NoInheritance = true)
425 : CGF(CGF), NoInheritance(NoInheritance) {
426 // Start emission for the construct.
427 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
428 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
429 if (NoInheritance) {
430 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
431 LambdaThisCaptureField = CGF.LambdaThisCaptureField;
432 CGF.LambdaThisCaptureField = nullptr;
433 BlockInfo = CGF.BlockInfo;
434 CGF.BlockInfo = nullptr;
435 }
436 }
437
438 ~InlinedOpenMPRegionRAII() {
439 // Restore original CapturedStmtInfo only if we're done with code emission.
440 auto *OldCSI =
441 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
442 delete CGF.CapturedStmtInfo;
443 CGF.CapturedStmtInfo = OldCSI;
444 if (NoInheritance) {
445 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
446 CGF.LambdaThisCaptureField = LambdaThisCaptureField;
447 CGF.BlockInfo = BlockInfo;
448 }
449 }
450};
451
452/// Values for bit flags used in the ident_t to describe the fields.
453/// All enumeric elements are named and described in accordance with the code
454/// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
455enum OpenMPLocationFlags : unsigned {
456 /// Use trampoline for internal microtask.
457 OMP_IDENT_IMD = 0x01,
458 /// Use c-style ident structure.
459 OMP_IDENT_KMPC = 0x02,
460 /// Atomic reduction option for kmpc_reduce.
461 OMP_ATOMIC_REDUCE = 0x10,
462 /// Explicit 'barrier' directive.
463 OMP_IDENT_BARRIER_EXPL = 0x20,
464 /// Implicit barrier in code.
465 OMP_IDENT_BARRIER_IMPL = 0x40,
466 /// Implicit barrier in 'for' directive.
467 OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
468 /// Implicit barrier in 'sections' directive.
469 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
470 /// Implicit barrier in 'single' directive.
471 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
472 /// Call of __kmp_for_static_init for static loop.
473 OMP_IDENT_WORK_LOOP = 0x200,
474 /// Call of __kmp_for_static_init for sections.
475 OMP_IDENT_WORK_SECTIONS = 0x400,
476 /// Call of __kmp_for_static_init for distribute.
477 OMP_IDENT_WORK_DISTRIBUTE = 0x800,
478 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
479};
480
481/// Describes ident structure that describes a source location.
482/// All descriptions are taken from
483/// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
484/// Original structure:
485/// typedef struct ident {
486/// kmp_int32 reserved_1; /**< might be used in Fortran;
487/// see above */
488/// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags;
489/// KMP_IDENT_KMPC identifies this union
490/// member */
491/// kmp_int32 reserved_2; /**< not really used in Fortran any more;
492/// see above */
493///#if USE_ITT_BUILD
494/// /* but currently used for storing
495/// region-specific ITT */
496/// /* contextual information. */
497///#endif /* USE_ITT_BUILD */
498/// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for
499/// C++ */
500/// char const *psource; /**< String describing the source location.
501/// The string is composed of semi-colon separated
502// fields which describe the source file,
503/// the function and a pair of line numbers that
504/// delimit the construct.
505/// */
506/// } ident_t;
507enum IdentFieldIndex {
508 /// might be used in Fortran
509 IdentField_Reserved_1,
510 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
511 IdentField_Flags,
512 /// Not really used in Fortran any more
513 IdentField_Reserved_2,
514 /// Source[4] in Fortran, do not use for C++
515 IdentField_Reserved_3,
516 /// String describing the source location. The string is composed of
517 /// semi-colon separated fields which describe the source file, the function
518 /// and a pair of line numbers that delimit the construct.
519 IdentField_PSource
520};
521
522/// Schedule types for 'omp for' loops (these enumerators are taken from
523/// the enum sched_type in kmp.h).
524enum OpenMPSchedType {
525 /// Lower bound for default (unordered) versions.
526 OMP_sch_lower = 32,
527 OMP_sch_static_chunked = 33,
528 OMP_sch_static = 34,
529 OMP_sch_dynamic_chunked = 35,
530 OMP_sch_guided_chunked = 36,
531 OMP_sch_runtime = 37,
532 OMP_sch_auto = 38,
533 /// static with chunk adjustment (e.g., simd)
534 OMP_sch_static_balanced_chunked = 45,
535 /// Lower bound for 'ordered' versions.
536 OMP_ord_lower = 64,
537 OMP_ord_static_chunked = 65,
538 OMP_ord_static = 66,
539 OMP_ord_dynamic_chunked = 67,
540 OMP_ord_guided_chunked = 68,
541 OMP_ord_runtime = 69,
542 OMP_ord_auto = 70,
543 OMP_sch_default = OMP_sch_static,
544 /// dist_schedule types
545 OMP_dist_sch_static_chunked = 91,
546 OMP_dist_sch_static = 92,
547 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
548 /// Set if the monotonic schedule modifier was present.
549 OMP_sch_modifier_monotonic = (1 << 29),
550 /// Set if the nonmonotonic schedule modifier was present.
551 OMP_sch_modifier_nonmonotonic = (1 << 30),
552};
553
554/// A basic class for pre|post-action for advanced codegen sequence for OpenMP
555/// region.
556class CleanupTy final : public EHScopeStack::Cleanup {
557 PrePostActionTy *Action;
558
559public:
560 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
561 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
562 if (!CGF.HaveInsertPoint())
563 return;
564 Action->Exit(CGF);
565 }
566};
567
568} // anonymous namespace
569
572 if (PrePostAction) {
573 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
574 Callback(CodeGen, CGF, *PrePostAction);
575 } else {
576 PrePostActionTy Action;
577 Callback(CodeGen, CGF, Action);
578 }
579}
580
581/// Check if the combiner is a call to UDR combiner and if it is so return the
582/// UDR decl used for reduction.
583static const OMPDeclareReductionDecl *
584getReductionInit(const Expr *ReductionOp) {
585 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
586 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
587 if (const auto *DRE =
588 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
589 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
590 return DRD;
591 return nullptr;
592}
593
595 const OMPDeclareReductionDecl *DRD,
596 const Expr *InitOp,
597 Address Private, Address Original,
598 QualType Ty) {
599 if (DRD->getInitializer()) {
600 std::pair<llvm::Function *, llvm::Function *> Reduction =
602 const auto *CE = cast<CallExpr>(InitOp);
603 const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
604 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
605 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
606 const auto *LHSDRE =
607 cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
608 const auto *RHSDRE =
609 cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
610 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
611 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), Private);
612 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), Original);
613 (void)PrivateScope.Privatize();
616 CGF.EmitIgnoredExpr(InitOp);
617 } else {
618 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
619 std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
620 auto *GV = new llvm::GlobalVariable(
621 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
622 llvm::GlobalValue::PrivateLinkage, Init, Name);
623 LValue LV = CGF.MakeNaturalAlignRawAddrLValue(GV, Ty);
624 RValue InitRVal;
625 switch (CGF.getEvaluationKind(Ty)) {
626 case TEK_Scalar:
627 InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
628 break;
629 case TEK_Complex:
630 InitRVal =
632 break;
633 case TEK_Aggregate: {
634 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue);
635 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV);
636 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
637 /*IsInitializer=*/false);
638 return;
639 }
640 }
641 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_PRValue);
642 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
643 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
644 /*IsInitializer=*/false);
645 }
646}
647
648/// Emit initialization of arrays of complex types.
649/// \param DestAddr Address of the array.
650/// \param Type Type of array.
651/// \param Init Initial expression of array.
652/// \param SrcAddr Address of the original array.
654 QualType Type, bool EmitDeclareReductionInit,
655 const Expr *Init,
656 const OMPDeclareReductionDecl *DRD,
657 Address SrcAddr = Address::invalid()) {
658 // Perform element-by-element initialization.
659 QualType ElementTy;
660
661 // Drill down to the base element type on both arrays.
662 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
663 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
664 if (DRD)
665 SrcAddr = SrcAddr.withElementType(DestAddr.getElementType());
666
667 llvm::Value *SrcBegin = nullptr;
668 if (DRD)
669 SrcBegin = SrcAddr.emitRawPointer(CGF);
670 llvm::Value *DestBegin = DestAddr.emitRawPointer(CGF);
671 // Cast from pointer to array type to pointer to single element.
672 llvm::Value *DestEnd =
673 CGF.Builder.CreateGEP(DestAddr.getElementType(), DestBegin, NumElements);
674 // The basic structure here is a while-do loop.
675 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
676 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
677 llvm::Value *IsEmpty =
678 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
679 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
680
681 // Enter the loop body, making that address the current address.
682 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
683 CGF.EmitBlock(BodyBB);
684
685 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
686
687 llvm::PHINode *SrcElementPHI = nullptr;
688 Address SrcElementCurrent = Address::invalid();
689 if (DRD) {
690 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
691 "omp.arraycpy.srcElementPast");
692 SrcElementPHI->addIncoming(SrcBegin, EntryBB);
693 SrcElementCurrent =
694 Address(SrcElementPHI, SrcAddr.getElementType(),
695 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
696 }
697 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
698 DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
699 DestElementPHI->addIncoming(DestBegin, EntryBB);
700 Address DestElementCurrent =
701 Address(DestElementPHI, DestAddr.getElementType(),
702 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
703
704 // Emit copy.
705 {
707 if (EmitDeclareReductionInit) {
708 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
709 SrcElementCurrent, ElementTy);
710 } else
711 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
712 /*IsInitializer=*/false);
713 }
714
715 if (DRD) {
716 // Shift the address forward by one element.
717 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
718 SrcAddr.getElementType(), SrcElementPHI, /*Idx0=*/1,
719 "omp.arraycpy.dest.element");
720 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
721 }
722
723 // Shift the address forward by one element.
724 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
725 DestAddr.getElementType(), DestElementPHI, /*Idx0=*/1,
726 "omp.arraycpy.dest.element");
727 // Check whether we've reached the end.
728 llvm::Value *Done =
729 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
730 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
731 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
732
733 // Done.
734 CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
735}
736
737LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
738 return CGF.EmitOMPSharedLValue(E);
739}
740
741LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
742 const Expr *E) {
743 if (const auto *OASE = dyn_cast<ArraySectionExpr>(E))
744 return CGF.EmitArraySectionExpr(OASE, /*IsLowerBound=*/false);
745 return LValue();
746}
747
748void ReductionCodeGen::emitAggregateInitialization(
749 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
750 const OMPDeclareReductionDecl *DRD) {
751 // Emit VarDecl with copy init for arrays.
752 // Get the address of the original variable captured in current
753 // captured region.
754 const auto *PrivateVD =
755 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
756 bool EmitDeclareReductionInit =
757 DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
758 EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
759 EmitDeclareReductionInit,
760 EmitDeclareReductionInit ? ClausesData[N].ReductionOp
761 : PrivateVD->getInit(),
762 DRD, SharedAddr);
763}
764
767 ArrayRef<const Expr *> Privates,
768 ArrayRef<const Expr *> ReductionOps) {
769 ClausesData.reserve(Shareds.size());
770 SharedAddresses.reserve(Shareds.size());
771 Sizes.reserve(Shareds.size());
772 BaseDecls.reserve(Shareds.size());
773 const auto *IOrig = Origs.begin();
774 const auto *IPriv = Privates.begin();
775 const auto *IRed = ReductionOps.begin();
776 for (const Expr *Ref : Shareds) {
777 ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed);
778 std::advance(IOrig, 1);
779 std::advance(IPriv, 1);
780 std::advance(IRed, 1);
781 }
782}
783
785 assert(SharedAddresses.size() == N && OrigAddresses.size() == N &&
786 "Number of generated lvalues must be exactly N.");
787 LValue First = emitSharedLValue(CGF, ClausesData[N].Shared);
788 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared);
789 SharedAddresses.emplace_back(First, Second);
790 if (ClausesData[N].Shared == ClausesData[N].Ref) {
791 OrigAddresses.emplace_back(First, Second);
792 } else {
793 LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
794 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
795 OrigAddresses.emplace_back(First, Second);
796 }
797}
798
800 QualType PrivateType = getPrivateType(N);
801 bool AsArraySection = isa<ArraySectionExpr>(ClausesData[N].Ref);
802 if (!PrivateType->isVariablyModifiedType()) {
803 Sizes.emplace_back(
804 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()),
805 nullptr);
806 return;
807 }
808 llvm::Value *Size;
809 llvm::Value *SizeInChars;
810 auto *ElemType = OrigAddresses[N].first.getAddress().getElementType();
811 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
812 if (AsArraySection) {
813 Size = CGF.Builder.CreatePtrDiff(ElemType,
814 OrigAddresses[N].second.getPointer(CGF),
815 OrigAddresses[N].first.getPointer(CGF));
816 Size = CGF.Builder.CreateNUWAdd(
817 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
818 SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
819 } else {
820 SizeInChars =
821 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType());
822 Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
823 }
824 Sizes.emplace_back(SizeInChars, Size);
826 CGF,
827 cast<OpaqueValueExpr>(
828 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
829 RValue::get(Size));
830 CGF.EmitVariablyModifiedType(PrivateType);
831}
832
834 llvm::Value *Size) {
835 QualType PrivateType = getPrivateType(N);
836 if (!PrivateType->isVariablyModifiedType()) {
837 assert(!Size && !Sizes[N].second &&
838 "Size should be nullptr for non-variably modified reduction "
839 "items.");
840 return;
841 }
843 CGF,
844 cast<OpaqueValueExpr>(
845 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
846 RValue::get(Size));
847 CGF.EmitVariablyModifiedType(PrivateType);
848}
849
851 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
852 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
853 assert(SharedAddresses.size() > N && "No variable was generated");
854 const auto *PrivateVD =
855 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
856 const OMPDeclareReductionDecl *DRD =
857 getReductionInit(ClausesData[N].ReductionOp);
858 if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
859 if (DRD && DRD->getInitializer())
860 (void)DefaultInit(CGF);
861 emitAggregateInitialization(CGF, N, PrivateAddr, SharedAddr, DRD);
862 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
863 (void)DefaultInit(CGF);
864 QualType SharedType = SharedAddresses[N].first.getType();
865 emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
866 PrivateAddr, SharedAddr, SharedType);
867 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
868 !CGF.isTrivialInitializer(PrivateVD->getInit())) {
869 CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
870 PrivateVD->getType().getQualifiers(),
871 /*IsInitializer=*/false);
872 }
873}
874
876 QualType PrivateType = getPrivateType(N);
877 QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
878 return DTorKind != QualType::DK_none;
879}
880
882 Address PrivateAddr) {
883 QualType PrivateType = getPrivateType(N);
884 QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
885 if (needCleanups(N)) {
886 PrivateAddr =
887 PrivateAddr.withElementType(CGF.ConvertTypeForMem(PrivateType));
888 CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
889 }
890}
891
893 LValue BaseLV) {
894 BaseTy = BaseTy.getNonReferenceType();
895 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
896 !CGF.getContext().hasSameType(BaseTy, ElTy)) {
897 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
898 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy);
899 } else {
900 LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(), BaseTy);
901 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
902 }
903 BaseTy = BaseTy->getPointeeType();
904 }
905 return CGF.MakeAddrLValue(
906 BaseLV.getAddress().withElementType(CGF.ConvertTypeForMem(ElTy)),
907 BaseLV.getType(), BaseLV.getBaseInfo(),
908 CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
909}
910
912 Address OriginalBaseAddress, llvm::Value *Addr) {
914 Address TopTmp = Address::invalid();
915 Address MostTopTmp = Address::invalid();
916 BaseTy = BaseTy.getNonReferenceType();
917 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
918 !CGF.getContext().hasSameType(BaseTy, ElTy)) {
919 Tmp = CGF.CreateMemTemp(BaseTy);
920 if (TopTmp.isValid())
921 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
922 else
923 MostTopTmp = Tmp;
924 TopTmp = Tmp;
925 BaseTy = BaseTy->getPointeeType();
926 }
927
928 if (Tmp.isValid()) {
930 Addr, Tmp.getElementType());
931 CGF.Builder.CreateStore(Addr, Tmp);
932 return MostTopTmp;
933 }
934
936 Addr, OriginalBaseAddress.getType());
937 return OriginalBaseAddress.withPointer(Addr, NotKnownNonNull);
938}
939
940static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
941 const VarDecl *OrigVD = nullptr;
942 if (const auto *OASE = dyn_cast<ArraySectionExpr>(Ref)) {
943 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
944 while (const auto *TempOASE = dyn_cast<ArraySectionExpr>(Base))
945 Base = TempOASE->getBase()->IgnoreParenImpCasts();
946 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
947 Base = TempASE->getBase()->IgnoreParenImpCasts();
948 DE = cast<DeclRefExpr>(Base);
949 OrigVD = cast<VarDecl>(DE->getDecl());
950 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
951 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
952 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
953 Base = TempASE->getBase()->IgnoreParenImpCasts();
954 DE = cast<DeclRefExpr>(Base);
955 OrigVD = cast<VarDecl>(DE->getDecl());
956 }
957 return OrigVD;
958}
959
961 Address PrivateAddr) {
962 const DeclRefExpr *DE;
963 if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
964 BaseDecls.emplace_back(OrigVD);
965 LValue OriginalBaseLValue = CGF.EmitLValue(DE);
966 LValue BaseLValue =
967 loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
968 OriginalBaseLValue);
969 Address SharedAddr = SharedAddresses[N].first.getAddress();
970 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
971 SharedAddr.getElementType(), BaseLValue.getPointer(CGF),
972 SharedAddr.emitRawPointer(CGF));
973 llvm::Value *PrivatePointer =
975 PrivateAddr.emitRawPointer(CGF), SharedAddr.getType());
976 llvm::Value *Ptr = CGF.Builder.CreateGEP(
977 SharedAddr.getElementType(), PrivatePointer, Adjustment);
978 return castToBase(CGF, OrigVD->getType(),
979 SharedAddresses[N].first.getType(),
980 OriginalBaseLValue.getAddress(), Ptr);
981 }
982 BaseDecls.emplace_back(
983 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
984 return PrivateAddr;
985}
986
988 const OMPDeclareReductionDecl *DRD =
989 getReductionInit(ClausesData[N].ReductionOp);
990 return DRD && DRD->getInitializer();
991}
992
993LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
994 return CGF.EmitLoadOfPointerLValue(
995 CGF.GetAddrOfLocalVar(getThreadIDVariable()),
996 getThreadIDVariable()->getType()->castAs<PointerType>());
997}
998
999void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) {
1000 if (!CGF.HaveInsertPoint())
1001 return;
1002 // 1.2.2 OpenMP Language Terminology
1003 // Structured block - An executable statement with a single entry at the
1004 // top and a single exit at the bottom.
1005 // The point of exit cannot be a branch out of the structured block.
1006 // longjmp() and throw() must not violate the entry/exit criteria.
1007 CGF.EHStack.pushTerminate();
1008 if (S)
1010 CodeGen(CGF);
1011 CGF.EHStack.popTerminate();
1012}
1013
1014LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1015 CodeGenFunction &CGF) {
1016 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1017 getThreadIDVariable()->getType(),
1019}
1020
1022 QualType FieldTy) {
1023 auto *Field = FieldDecl::Create(
1024 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1025 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1026 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1027 Field->setAccess(AS_public);
1028 DC->addDecl(Field);
1029 return Field;
1030}
1031
1033 : CGM(CGM), OMPBuilder(CGM.getModule()) {
1034 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1035 llvm::OpenMPIRBuilderConfig Config(
1036 CGM.getLangOpts().OpenMPIsTargetDevice, isGPU(),
1037 CGM.getLangOpts().OpenMPOffloadMandatory,
1038 /*HasRequiresReverseOffload*/ false, /*HasRequiresUnifiedAddress*/ false,
1039 hasRequiresUnifiedSharedMemory(), /*HasRequiresDynamicAllocators*/ false);
1040 OMPBuilder.initialize();
1041 OMPBuilder.loadOffloadInfoMetadata(CGM.getLangOpts().OpenMPIsTargetDevice
1043 : StringRef{});
1044 OMPBuilder.setConfig(Config);
1045
1046 // The user forces the compiler to behave as if omp requires
1047 // unified_shared_memory was given.
1048 if (CGM.getLangOpts().OpenMPForceUSM) {
1050 OMPBuilder.Config.setHasRequiresUnifiedSharedMemory(true);
1051 }
1052}
1053
1055 InternalVars.clear();
1056 // Clean non-target variable declarations possibly used only in debug info.
1057 for (const auto &Data : EmittedNonTargetVariables) {
1058 if (!Data.getValue().pointsToAliveValue())
1059 continue;
1060 auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1061 if (!GV)
1062 continue;
1063 if (!GV->isDeclaration() || GV->getNumUses() > 0)
1064 continue;
1065 GV->eraseFromParent();
1066 }
1067}
1068
1070 return OMPBuilder.createPlatformSpecificName(Parts);
1071}
1072
1073static llvm::Function *
1075 const Expr *CombinerInitializer, const VarDecl *In,
1076 const VarDecl *Out, bool IsCombiner) {
1077 // void .omp_combiner.(Ty *in, Ty *out);
1078 ASTContext &C = CGM.getContext();
1079 QualType PtrTy = C.getPointerType(Ty).withRestrict();
1080 FunctionArgList Args;
1081 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1082 /*Id=*/nullptr, PtrTy, ImplicitParamKind::Other);
1083 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1084 /*Id=*/nullptr, PtrTy, ImplicitParamKind::Other);
1085 Args.push_back(&OmpOutParm);
1086 Args.push_back(&OmpInParm);
1087 const CGFunctionInfo &FnInfo =
1088 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1089 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1090 std::string Name = CGM.getOpenMPRuntime().getName(
1091 {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1092 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1093 Name, &CGM.getModule());
1094 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1095 if (CGM.getCodeGenOpts().OptimizationLevel != 0) {
1096 Fn->removeFnAttr(llvm::Attribute::NoInline);
1097 Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1098 Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1099 }
1100 CodeGenFunction CGF(CGM);
1101 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1102 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1103 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1104 Out->getLocation());
1106 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1107 Scope.addPrivate(
1108 In, CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1109 .getAddress());
1110 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1111 Scope.addPrivate(
1112 Out, CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1113 .getAddress());
1114 (void)Scope.Privatize();
1115 if (!IsCombiner && Out->hasInit() &&
1116 !CGF.isTrivialInitializer(Out->getInit())) {
1117 CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1118 Out->getType().getQualifiers(),
1119 /*IsInitializer=*/true);
1120 }
1121 if (CombinerInitializer)
1122 CGF.EmitIgnoredExpr(CombinerInitializer);
1123 Scope.ForceCleanup();
1124 CGF.FinishFunction();
1125 return Fn;
1126}
1127
1130 if (UDRMap.count(D) > 0)
1131 return;
1132 llvm::Function *Combiner = emitCombinerOrInitializer(
1133 CGM, D->getType(), D->getCombiner(),
1134 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1135 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1136 /*IsCombiner=*/true);
1137 llvm::Function *Initializer = nullptr;
1138 if (const Expr *Init = D->getInitializer()) {
1140 CGM, D->getType(),
1141 D->getInitializerKind() == OMPDeclareReductionInitKind::Call ? Init
1142 : nullptr,
1143 cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1144 cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1145 /*IsCombiner=*/false);
1146 }
1147 UDRMap.try_emplace(D, Combiner, Initializer);
1148 if (CGF)
1149 FunctionUDRMap[CGF->CurFn].push_back(D);
1150}
1151
1152std::pair<llvm::Function *, llvm::Function *>
1154 auto I = UDRMap.find(D);
1155 if (I != UDRMap.end())
1156 return I->second;
1157 emitUserDefinedReduction(/*CGF=*/nullptr, D);
1158 return UDRMap.lookup(D);
1159}
1160
1161namespace {
1162// Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1163// Builder if one is present.
1164struct PushAndPopStackRAII {
1165 PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1166 bool HasCancel, llvm::omp::Directive Kind)
1167 : OMPBuilder(OMPBuilder) {
1168 if (!OMPBuilder)
1169 return;
1170
1171 // The following callback is the crucial part of clangs cleanup process.
1172 //
1173 // NOTE:
1174 // Once the OpenMPIRBuilder is used to create parallel regions (and
1175 // similar), the cancellation destination (Dest below) is determined via
1176 // IP. That means if we have variables to finalize we split the block at IP,
1177 // use the new block (=BB) as destination to build a JumpDest (via
1178 // getJumpDestInCurrentScope(BB)) which then is fed to
1179 // EmitBranchThroughCleanup. Furthermore, there will not be the need
1180 // to push & pop an FinalizationInfo object.
1181 // The FiniCB will still be needed but at the point where the
1182 // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1183 auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1184 assert(IP.getBlock()->end() == IP.getPoint() &&
1185 "Clang CG should cause non-terminated block!");
1186 CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1187 CGF.Builder.restoreIP(IP);
1189 CGF.getOMPCancelDestination(OMPD_parallel);
1190 CGF.EmitBranchThroughCleanup(Dest);
1191 return llvm::Error::success();
1192 };
1193
1194 // TODO: Remove this once we emit parallel regions through the
1195 // OpenMPIRBuilder as it can do this setup internally.
1196 llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel});
1197 OMPBuilder->pushFinalizationCB(std::move(FI));
1198 }
1199 ~PushAndPopStackRAII() {
1200 if (OMPBuilder)
1201 OMPBuilder->popFinalizationCB();
1202 }
1203 llvm::OpenMPIRBuilder *OMPBuilder;
1204};
1205} // namespace
1206
1208 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1209 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1210 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1211 assert(ThreadIDVar->getType()->isPointerType() &&
1212 "thread id variable must be of type kmp_int32 *");
1213 CodeGenFunction CGF(CGM, true);
1214 bool HasCancel = false;
1215 if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1216 HasCancel = OPD->hasCancel();
1217 else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D))
1218 HasCancel = OPD->hasCancel();
1219 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1220 HasCancel = OPSD->hasCancel();
1221 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1222 HasCancel = OPFD->hasCancel();
1223 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1224 HasCancel = OPFD->hasCancel();
1225 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1226 HasCancel = OPFD->hasCancel();
1227 else if (const auto *OPFD =
1228 dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1229 HasCancel = OPFD->hasCancel();
1230 else if (const auto *OPFD =
1231 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1232 HasCancel = OPFD->hasCancel();
1233
1234 // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1235 // parallel region to make cancellation barriers work properly.
1236 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1237 PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind);
1238 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1239 HasCancel, OutlinedHelperName);
1240 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1242}
1243
1244std::string CGOpenMPRuntime::getOutlinedHelperName(StringRef Name) const {
1245 std::string Suffix = getName({"omp_outlined"});
1246 return (Name + Suffix).str();
1247}
1248
1250 return getOutlinedHelperName(CGF.CurFn->getName());
1251}
1252
1253std::string CGOpenMPRuntime::getReductionFuncName(StringRef Name) const {
1254 std::string Suffix = getName({"omp", "reduction", "reduction_func"});
1255 return (Name + Suffix).str();
1256}
1257
1260 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1261 const RegionCodeGenTy &CodeGen) {
1262 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1264 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(CGF),
1265 CodeGen);
1266}
1267
1270 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1271 const RegionCodeGenTy &CodeGen) {
1272 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1274 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(CGF),
1275 CodeGen);
1276}
1277
1279 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1280 const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1281 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1282 bool Tied, unsigned &NumberOfParts) {
1283 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1284 PrePostActionTy &) {
1285 llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1286 llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1287 llvm::Value *TaskArgs[] = {
1288 UpLoc, ThreadID,
1289 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1290 TaskTVar->getType()->castAs<PointerType>())
1291 .getPointer(CGF)};
1292 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1293 CGM.getModule(), OMPRTL___kmpc_omp_task),
1294 TaskArgs);
1295 };
1296 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1297 UntiedCodeGen);
1298 CodeGen.setAction(Action);
1299 assert(!ThreadIDVar->getType()->isPointerType() &&
1300 "thread id variable must be of type kmp_int32 for tasks");
1301 const OpenMPDirectiveKind Region =
1302 isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1303 : OMPD_task;
1304 const CapturedStmt *CS = D.getCapturedStmt(Region);
1305 bool HasCancel = false;
1306 if (const auto *TD = dyn_cast<OMPTaskDirective>(&D))
1307 HasCancel = TD->hasCancel();
1308 else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D))
1309 HasCancel = TD->hasCancel();
1310 else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D))
1311 HasCancel = TD->hasCancel();
1312 else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D))
1313 HasCancel = TD->hasCancel();
1314
1315 CodeGenFunction CGF(CGM, true);
1316 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1317 InnermostKind, HasCancel, Action);
1318 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1319 llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1320 if (!Tied)
1321 NumberOfParts = Action.getNumberOfParts();
1322 return Res;
1323}
1324
1326 bool AtCurrentPoint) {
1327 auto &Elem = OpenMPLocThreadIDMap[CGF.CurFn];
1328 assert(!Elem.ServiceInsertPt && "Insert point is set already.");
1329
1330 llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1331 if (AtCurrentPoint) {
1332 Elem.ServiceInsertPt = new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt",
1333 CGF.Builder.GetInsertBlock());
1334 } else {
1335 Elem.ServiceInsertPt = new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1336 Elem.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt->getIterator());
1337 }
1338}
1339
1341 auto &Elem = OpenMPLocThreadIDMap[CGF.CurFn];
1342 if (Elem.ServiceInsertPt) {
1343 llvm::Instruction *Ptr = Elem.ServiceInsertPt;
1344 Elem.ServiceInsertPt = nullptr;
1345 Ptr->eraseFromParent();
1346 }
1347}
1348
1351 SmallString<128> &Buffer) {
1352 llvm::raw_svector_ostream OS(Buffer);
1353 // Build debug location
1355 OS << ";";
1356 if (auto *DbgInfo = CGF.getDebugInfo())
1357 OS << DbgInfo->remapDIPath(PLoc.getFilename());
1358 else
1359 OS << PLoc.getFilename();
1360 OS << ";";
1361 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1362 OS << FD->getQualifiedNameAsString();
1363 OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1364 return OS.str();
1365}
1366
1369 unsigned Flags, bool EmitLoc) {
1370 uint32_t SrcLocStrSize;
1371 llvm::Constant *SrcLocStr;
1372 if ((!EmitLoc && CGM.getCodeGenOpts().getDebugInfo() ==
1373 llvm::codegenoptions::NoDebugInfo) ||
1374 Loc.isInvalid()) {
1375 SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
1376 } else {
1377 std::string FunctionName;
1378 std::string FileName;
1379 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1380 FunctionName = FD->getQualifiedNameAsString();
1382 if (auto *DbgInfo = CGF.getDebugInfo())
1383 FileName = DbgInfo->remapDIPath(PLoc.getFilename());
1384 else
1385 FileName = PLoc.getFilename();
1386 unsigned Line = PLoc.getLine();
1387 unsigned Column = PLoc.getColumn();
1388 SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName, FileName, Line,
1389 Column, SrcLocStrSize);
1390 }
1391 unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1392 return OMPBuilder.getOrCreateIdent(
1393 SrcLocStr, SrcLocStrSize, llvm::omp::IdentFlag(Flags), Reserved2Flags);
1394}
1395
1398 assert(CGF.CurFn && "No function in current CodeGenFunction.");
1399 // If the OpenMPIRBuilder is used we need to use it for all thread id calls as
1400 // the clang invariants used below might be broken.
1401 if (CGM.getLangOpts().OpenMPIRBuilder) {
1402 SmallString<128> Buffer;
1403 OMPBuilder.updateToLocation(CGF.Builder.saveIP());
1404 uint32_t SrcLocStrSize;
1405 auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(
1406 getIdentStringFromSourceLocation(CGF, Loc, Buffer), SrcLocStrSize);
1407 return OMPBuilder.getOrCreateThreadID(
1408 OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize));
1409 }
1410
1411 llvm::Value *ThreadID = nullptr;
1412 // Check whether we've already cached a load of the thread id in this
1413 // function.
1414 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1415 if (I != OpenMPLocThreadIDMap.end()) {
1416 ThreadID = I->second.ThreadID;
1417 if (ThreadID != nullptr)
1418 return ThreadID;
1419 }
1420 // If exceptions are enabled, do not use parameter to avoid possible crash.
1421 if (auto *OMPRegionInfo =
1422 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1423 if (OMPRegionInfo->getThreadIDVariable()) {
1424 // Check if this an outlined function with thread id passed as argument.
1425 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1426 llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1427 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1428 !CGF.getLangOpts().CXXExceptions ||
1429 CGF.Builder.GetInsertBlock() == TopBlock ||
1430 !isa<llvm::Instruction>(LVal.getPointer(CGF)) ||
1431 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1432 TopBlock ||
1433 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1434 CGF.Builder.GetInsertBlock()) {
1435 ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1436 // If value loaded in entry block, cache it and use it everywhere in
1437 // function.
1438 if (CGF.Builder.GetInsertBlock() == TopBlock)
1439 OpenMPLocThreadIDMap[CGF.CurFn].ThreadID = ThreadID;
1440 return ThreadID;
1441 }
1442 }
1443 }
1444
1445 // This is not an outlined function region - need to call __kmpc_int32
1446 // kmpc_global_thread_num(ident_t *loc).
1447 // Generate thread id value and cache this value for use across the
1448 // function.
1449 auto &Elem = OpenMPLocThreadIDMap[CGF.CurFn];
1450 if (!Elem.ServiceInsertPt)
1452 CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1453 CGF.Builder.SetInsertPoint(Elem.ServiceInsertPt);
1455 llvm::CallInst *Call = CGF.Builder.CreateCall(
1456 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
1457 OMPRTL___kmpc_global_thread_num),
1458 emitUpdateLocation(CGF, Loc));
1459 Call->setCallingConv(CGF.getRuntimeCC());
1460 Elem.ThreadID = Call;
1461 return Call;
1462}
1463
1465 assert(CGF.CurFn && "No function in current CodeGenFunction.");
1466 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1468 OpenMPLocThreadIDMap.erase(CGF.CurFn);
1469 }
1470 if (auto I = FunctionUDRMap.find(CGF.CurFn); I != FunctionUDRMap.end()) {
1471 for (const auto *D : I->second)
1472 UDRMap.erase(D);
1473 FunctionUDRMap.erase(I);
1474 }
1475 if (auto I = FunctionUDMMap.find(CGF.CurFn); I != FunctionUDMMap.end()) {
1476 for (const auto *D : I->second)
1477 UDMMap.erase(D);
1478 FunctionUDMMap.erase(I);
1479 }
1482}
1483
1485 return OMPBuilder.IdentPtr;
1486}
1487
1488static llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseKind
1490 std::optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
1491 OMPDeclareTargetDeclAttr::getDeviceType(VD);
1492 if (!DevTy)
1493 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNone;
1494
1495 switch ((int)*DevTy) { // Avoid -Wcovered-switch-default
1496 case OMPDeclareTargetDeclAttr::DT_Host:
1497 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseHost;
1498 break;
1499 case OMPDeclareTargetDeclAttr::DT_NoHost:
1500 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNoHost;
1501 break;
1502 case OMPDeclareTargetDeclAttr::DT_Any:
1503 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseAny;
1504 break;
1505 default:
1506 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNone;
1507 break;
1508 }
1509}
1510
1511static llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind
1513 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> MapType =
1514 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1515 if (!MapType)
1516 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryNone;
1517 switch ((int)*MapType) { // Avoid -Wcovered-switch-default
1518 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_To:
1519 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo;
1520 break;
1521 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Enter:
1522 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryEnter;
1523 break;
1524 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Link:
1525 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink;
1526 break;
1527 default:
1528 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryNone;
1529 break;
1530 }
1531}
1532
1533static llvm::TargetRegionEntryInfo getEntryInfoFromPresumedLoc(
1534 CodeGenModule &CGM, llvm::OpenMPIRBuilder &OMPBuilder,
1535 SourceLocation BeginLoc, llvm::StringRef ParentName = "") {
1536
1537 auto FileInfoCallBack = [&]() {
1539 PresumedLoc PLoc = SM.getPresumedLoc(BeginLoc);
1540
1541 llvm::sys::fs::UniqueID ID;
1542 if (llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) {
1543 PLoc = SM.getPresumedLoc(BeginLoc, /*UseLineDirectives=*/false);
1544 }
1545
1546 return std::pair<std::string, uint64_t>(PLoc.getFilename(), PLoc.getLine());
1547 };
1548
1549 return OMPBuilder.getTargetEntryUniqueInfo(FileInfoCallBack, ParentName);
1550}
1551
1553 auto AddrOfGlobal = [&VD, this]() { return CGM.GetAddrOfGlobal(VD); };
1554
1555 auto LinkageForVariable = [&VD, this]() {
1557 };
1558
1559 std::vector<llvm::GlobalVariable *> GeneratedRefs;
1560
1561 llvm::Type *LlvmPtrTy = CGM.getTypes().ConvertTypeForMem(
1563 llvm::Constant *addr = OMPBuilder.getAddrOfDeclareTargetVar(
1566 VD->isExternallyVisible(),
1568 VD->getCanonicalDecl()->getBeginLoc()),
1569 CGM.getMangledName(VD), GeneratedRefs, CGM.getLangOpts().OpenMPSimd,
1570 CGM.getLangOpts().OMPTargetTriples, LlvmPtrTy, AddrOfGlobal,
1571 LinkageForVariable);
1572
1573 if (!addr)
1574 return ConstantAddress::invalid();
1575 return ConstantAddress(addr, LlvmPtrTy, CGM.getContext().getDeclAlign(VD));
1576}
1577
1578llvm::Constant *
1580 assert(!CGM.getLangOpts().OpenMPUseTLS ||
1582 // Lookup the entry, lazily creating it if necessary.
1583 std::string Suffix = getName({"cache", ""});
1584 return OMPBuilder.getOrCreateInternalVariable(
1585 CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix).str());
1586}
1587
1589 const VarDecl *VD,
1590 Address VDAddr,
1592 if (CGM.getLangOpts().OpenMPUseTLS &&
1594 return VDAddr;
1595
1596 llvm::Type *VarTy = VDAddr.getElementType();
1597 llvm::Value *Args[] = {
1599 CGF.Builder.CreatePointerCast(VDAddr.emitRawPointer(CGF), CGM.Int8PtrTy),
1602 return Address(
1603 CGF.EmitRuntimeCall(
1604 OMPBuilder.getOrCreateRuntimeFunction(
1605 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1606 Args),
1607 CGF.Int8Ty, VDAddr.getAlignment());
1608}
1609
1611 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1612 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1613 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1614 // library.
1615 llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
1616 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1617 CGM.getModule(), OMPRTL___kmpc_global_thread_num),
1618 OMPLoc);
1619 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1620 // to register constructor/destructor for variable.
1621 llvm::Value *Args[] = {
1622 OMPLoc,
1623 CGF.Builder.CreatePointerCast(VDAddr.emitRawPointer(CGF), CGM.VoidPtrTy),
1624 Ctor, CopyCtor, Dtor};
1625 CGF.EmitRuntimeCall(
1626 OMPBuilder.getOrCreateRuntimeFunction(
1627 CGM.getModule(), OMPRTL___kmpc_threadprivate_register),
1628 Args);
1629}
1630
1632 const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1633 bool PerformInit, CodeGenFunction *CGF) {
1634 if (CGM.getLangOpts().OpenMPUseTLS &&
1636 return nullptr;
1637
1638 VD = VD->getDefinition(CGM.getContext());
1639 if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
1640 QualType ASTTy = VD->getType();
1641
1642 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1643 const Expr *Init = VD->getAnyInitializer();
1644 if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1645 // Generate function that re-emits the declaration's initializer into the
1646 // threadprivate copy of the variable VD
1647 CodeGenFunction CtorCGF(CGM);
1648 FunctionArgList Args;
1649 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1650 /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1652 Args.push_back(&Dst);
1653
1655 CGM.getContext().VoidPtrTy, Args);
1656 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1657 std::string Name = getName({"__kmpc_global_ctor_", ""});
1658 llvm::Function *Fn =
1660 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
1661 Args, Loc, Loc);
1662 llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
1663 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1665 Address Arg(ArgVal, CtorCGF.ConvertTypeForMem(ASTTy),
1666 VDAddr.getAlignment());
1667 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
1668 /*IsInitializer=*/true);
1669 ArgVal = CtorCGF.EmitLoadOfScalar(
1670 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1672 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
1673 CtorCGF.FinishFunction();
1674 Ctor = Fn;
1675 }
1677 // Generate function that emits destructor call for the threadprivate copy
1678 // of the variable VD
1679 CodeGenFunction DtorCGF(CGM);
1680 FunctionArgList Args;
1681 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1682 /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1684 Args.push_back(&Dst);
1685
1687 CGM.getContext().VoidTy, Args);
1688 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1689 std::string Name = getName({"__kmpc_global_dtor_", ""});
1690 llvm::Function *Fn =
1692 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1693 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
1694 Loc, Loc);
1695 // Create a scope with an artificial location for the body of this function.
1696 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1697 llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
1698 DtorCGF.GetAddrOfLocalVar(&Dst),
1699 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
1700 DtorCGF.emitDestroy(
1701 Address(ArgVal, DtorCGF.Int8Ty, VDAddr.getAlignment()), ASTTy,
1702 DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1703 DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1704 DtorCGF.FinishFunction();
1705 Dtor = Fn;
1706 }
1707 // Do not emit init function if it is not required.
1708 if (!Ctor && !Dtor)
1709 return nullptr;
1710
1711 // Copying constructor for the threadprivate variable.
1712 // Must be NULL - reserved by runtime, but currently it requires that this
1713 // parameter is always NULL. Otherwise it fires assertion.
1714 CopyCtor = llvm::Constant::getNullValue(CGM.UnqualPtrTy);
1715 if (Ctor == nullptr) {
1716 Ctor = llvm::Constant::getNullValue(CGM.UnqualPtrTy);
1717 }
1718 if (Dtor == nullptr) {
1719 Dtor = llvm::Constant::getNullValue(CGM.UnqualPtrTy);
1720 }
1721 if (!CGF) {
1722 auto *InitFunctionTy =
1723 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
1724 std::string Name = getName({"__omp_threadprivate_init_", ""});
1725 llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction(
1726 InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
1727 CodeGenFunction InitCGF(CGM);
1728 FunctionArgList ArgList;
1729 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
1730 CGM.getTypes().arrangeNullaryFunction(), ArgList,
1731 Loc, Loc);
1732 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1733 InitCGF.FinishFunction();
1734 return InitFunction;
1735 }
1736 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1737 }
1738 return nullptr;
1739}
1740
1742 llvm::GlobalValue *GV) {
1743 std::optional<OMPDeclareTargetDeclAttr *> ActiveAttr =
1744 OMPDeclareTargetDeclAttr::getActiveAttr(FD);
1745
1746 // We only need to handle active 'indirect' declare target functions.
1747 if (!ActiveAttr || !(*ActiveAttr)->getIndirect())
1748 return;
1749
1750 // Get a mangled name to store the new device global in.
1751 llvm::TargetRegionEntryInfo EntryInfo = getEntryInfoFromPresumedLoc(
1753 SmallString<128> Name;
1754 OMPBuilder.OffloadInfoManager.getTargetRegionEntryFnName(Name, EntryInfo);
1755
1756 // We need to generate a new global to hold the address of the indirectly
1757 // called device function. Doing this allows us to keep the visibility and
1758 // linkage of the associated function unchanged while allowing the runtime to
1759 // access its value.
1760 llvm::GlobalValue *Addr = GV;
1761 if (CGM.getLangOpts().OpenMPIsTargetDevice) {
1762 Addr = new llvm::GlobalVariable(
1764 /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, GV, Name,
1765 nullptr, llvm::GlobalValue::NotThreadLocal,
1766 CGM.getModule().getDataLayout().getDefaultGlobalsAddressSpace());
1767 Addr->setVisibility(llvm::GlobalValue::ProtectedVisibility);
1768 }
1769
1770 OMPBuilder.OffloadInfoManager.registerDeviceGlobalVarEntryInfo(
1772 llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect,
1773 llvm::GlobalValue::WeakODRLinkage);
1774}
1775
1777 QualType VarType,
1778 StringRef Name) {
1779 std::string Suffix = getName({"artificial", ""});
1780 llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
1781 llvm::GlobalVariable *GAddr = OMPBuilder.getOrCreateInternalVariable(
1782 VarLVType, Twine(Name).concat(Suffix).str());
1783 if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
1785 GAddr->setThreadLocal(/*Val=*/true);
1786 return Address(GAddr, GAddr->getValueType(),
1788 }
1789 std::string CacheSuffix = getName({"cache", ""});
1790 llvm::Value *Args[] = {
1794 CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
1795 /*isSigned=*/false),
1796 OMPBuilder.getOrCreateInternalVariable(
1798 Twine(Name).concat(Suffix).concat(CacheSuffix).str())};
1799 return Address(
1801 CGF.EmitRuntimeCall(
1802 OMPBuilder.getOrCreateRuntimeFunction(
1803 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1804 Args),
1805 CGF.Builder.getPtrTy(0)),
1806 VarLVType, CGM.getContext().getTypeAlignInChars(VarType));
1807}
1808
1810 const RegionCodeGenTy &ThenGen,
1811 const RegionCodeGenTy &ElseGen) {
1812 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
1813
1814 // If the condition constant folds and can be elided, try to avoid emitting
1815 // the condition and the dead arm of the if/else.
1816 bool CondConstant;
1817 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
1818 if (CondConstant)
1819 ThenGen(CGF);
1820 else
1821 ElseGen(CGF);
1822 return;
1823 }
1824
1825 // Otherwise, the condition did not fold, or we couldn't elide it. Just
1826 // emit the conditional branch.
1827 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
1828 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
1829 llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
1830 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
1831
1832 // Emit the 'then' code.
1833 CGF.EmitBlock(ThenBlock);
1834 ThenGen(CGF);
1835 CGF.EmitBranch(ContBlock);
1836 // Emit the 'else' code if present.
1837 // There is no need to emit line number for unconditional branch.
1839 CGF.EmitBlock(ElseBlock);
1840 ElseGen(CGF);
1841 // There is no need to emit line number for unconditional branch.
1843 CGF.EmitBranch(ContBlock);
1844 // Emit the continuation block for code after the if.
1845 CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
1846}
1847
1849 CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn,
1850 ArrayRef<llvm::Value *> CapturedVars, const Expr *IfCond,
1851 llvm::Value *NumThreads, OpenMPNumThreadsClauseModifier NumThreadsModifier,
1852 OpenMPSeverityClauseKind Severity, const Expr *Message) {
1853 if (!CGF.HaveInsertPoint())
1854 return;
1855 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
1856 auto &M = CGM.getModule();
1857 auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc,
1858 this](CodeGenFunction &CGF, PrePostActionTy &) {
1859 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
1860 llvm::Value *Args[] = {
1861 RTLoc,
1862 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
1863 OutlinedFn};
1865 RealArgs.append(std::begin(Args), std::end(Args));
1866 RealArgs.append(CapturedVars.begin(), CapturedVars.end());
1867
1868 llvm::FunctionCallee RTLFn =
1869 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call);
1870 CGF.EmitRuntimeCall(RTLFn, RealArgs);
1871 };
1872 auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc,
1873 this](CodeGenFunction &CGF, PrePostActionTy &) {
1875 llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
1876 // Build calls:
1877 // __kmpc_serialized_parallel(&Loc, GTid);
1878 llvm::Value *Args[] = {RTLoc, ThreadID};
1879 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1880 M, OMPRTL___kmpc_serialized_parallel),
1881 Args);
1882
1883 // OutlinedFn(&GTid, &zero_bound, CapturedStruct);
1884 Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
1885 RawAddress ZeroAddrBound =
1887 /*Name=*/".bound.zero.addr");
1888 CGF.Builder.CreateStore(CGF.Builder.getInt32(/*C*/ 0), ZeroAddrBound);
1890 // ThreadId for serialized parallels is 0.
1891 OutlinedFnArgs.push_back(ThreadIDAddr.emitRawPointer(CGF));
1892 OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
1893 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
1894
1895 // Ensure we do not inline the function. This is trivially true for the ones
1896 // passed to __kmpc_fork_call but the ones called in serialized regions
1897 // could be inlined. This is not a perfect but it is closer to the invariant
1898 // we want, namely, every data environment starts with a new function.
1899 // TODO: We should pass the if condition to the runtime function and do the
1900 // handling there. Much cleaner code.
1901 OutlinedFn->removeFnAttr(llvm::Attribute::AlwaysInline);
1902 OutlinedFn->addFnAttr(llvm::Attribute::NoInline);
1903 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
1904
1905 // __kmpc_end_serialized_parallel(&Loc, GTid);
1906 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
1907 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1908 M, OMPRTL___kmpc_end_serialized_parallel),
1909 EndArgs);
1910 };
1911 if (IfCond) {
1912 emitIfClause(CGF, IfCond, ThenGen, ElseGen);
1913 } else {
1914 RegionCodeGenTy ThenRCG(ThenGen);
1915 ThenRCG(CGF);
1916 }
1917}
1918
1919// If we're inside an (outlined) parallel region, use the region info's
1920// thread-ID variable (it is passed in a first argument of the outlined function
1921// as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
1922// regular serial code region, get thread ID by calling kmp_int32
1923// kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
1924// return the address of that temp.
1927 if (auto *OMPRegionInfo =
1928 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
1929 if (OMPRegionInfo->getThreadIDVariable())
1930 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress();
1931
1932 llvm::Value *ThreadID = getThreadID(CGF, Loc);
1933 QualType Int32Ty =
1934 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
1935 Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
1936 CGF.EmitStoreOfScalar(ThreadID,
1937 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
1938
1939 return ThreadIDTemp;
1940}
1941
1942llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
1943 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
1944 std::string Name = getName({Prefix, "var"});
1945 return OMPBuilder.getOrCreateInternalVariable(KmpCriticalNameTy, Name);
1946}
1947
1948namespace {
1949/// Common pre(post)-action for different OpenMP constructs.
1950class CommonActionTy final : public PrePostActionTy {
1951 llvm::FunctionCallee EnterCallee;
1952 ArrayRef<llvm::Value *> EnterArgs;
1953 llvm::FunctionCallee ExitCallee;
1954 ArrayRef<llvm::Value *> ExitArgs;
1955 bool Conditional;
1956 llvm::BasicBlock *ContBlock = nullptr;
1957
1958public:
1959 CommonActionTy(llvm::FunctionCallee EnterCallee,
1960 ArrayRef<llvm::Value *> EnterArgs,
1961 llvm::FunctionCallee ExitCallee,
1962 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
1963 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
1964 ExitArgs(ExitArgs), Conditional(Conditional) {}
1965 void Enter(CodeGenFunction &CGF) override {
1966 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
1967 if (Conditional) {
1968 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
1969 auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
1970 ContBlock = CGF.createBasicBlock("omp_if.end");
1971 // Generate the branch (If-stmt)
1972 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
1973 CGF.EmitBlock(ThenBlock);
1974 }
1975 }
1976 void Done(CodeGenFunction &CGF) {
1977 // Emit the rest of blocks/branches
1978 CGF.EmitBranch(ContBlock);
1979 CGF.EmitBlock(ContBlock, true);
1980 }
1981 void Exit(CodeGenFunction &CGF) override {
1982 CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
1983 }
1984};
1985} // anonymous namespace
1986
1988 StringRef CriticalName,
1989 const RegionCodeGenTy &CriticalOpGen,
1990 SourceLocation Loc, const Expr *Hint) {
1991 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
1992 // CriticalOpGen();
1993 // __kmpc_end_critical(ident_t *, gtid, Lock);
1994 // Prepare arguments and build a call to __kmpc_critical
1995 if (!CGF.HaveInsertPoint())
1996 return;
1997 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1998 getCriticalRegionLock(CriticalName)};
1999 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
2000 std::end(Args));
2001 if (Hint) {
2002 EnterArgs.push_back(CGF.Builder.CreateIntCast(
2003 CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false));
2004 }
2005 CommonActionTy Action(
2006 OMPBuilder.getOrCreateRuntimeFunction(
2007 CGM.getModule(),
2008 Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical),
2009 EnterArgs,
2010 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2011 OMPRTL___kmpc_end_critical),
2012 Args);
2013 CriticalOpGen.setAction(Action);
2014 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
2015}
2016
2018 const RegionCodeGenTy &MasterOpGen,
2020 if (!CGF.HaveInsertPoint())
2021 return;
2022 // if(__kmpc_master(ident_t *, gtid)) {
2023 // MasterOpGen();
2024 // __kmpc_end_master(ident_t *, gtid);
2025 // }
2026 // Prepare arguments and build a call to __kmpc_master
2027 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2028 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2029 CGM.getModule(), OMPRTL___kmpc_master),
2030 Args,
2031 OMPBuilder.getOrCreateRuntimeFunction(
2032 CGM.getModule(), OMPRTL___kmpc_end_master),
2033 Args,
2034 /*Conditional=*/true);
2035 MasterOpGen.setAction(Action);
2036 emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
2037 Action.Done(CGF);
2038}
2039
2041 const RegionCodeGenTy &MaskedOpGen,
2042 SourceLocation Loc, const Expr *Filter) {
2043 if (!CGF.HaveInsertPoint())
2044 return;
2045 // if(__kmpc_masked(ident_t *, gtid, filter)) {
2046 // MaskedOpGen();
2047 // __kmpc_end_masked(iden_t *, gtid);
2048 // }
2049 // Prepare arguments and build a call to __kmpc_masked
2050 llvm::Value *FilterVal = Filter
2051 ? CGF.EmitScalarExpr(Filter, CGF.Int32Ty)
2052 : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0);
2053 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2054 FilterVal};
2055 llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc),
2056 getThreadID(CGF, Loc)};
2057 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2058 CGM.getModule(), OMPRTL___kmpc_masked),
2059 Args,
2060 OMPBuilder.getOrCreateRuntimeFunction(
2061 CGM.getModule(), OMPRTL___kmpc_end_masked),
2062 ArgsEnd,
2063 /*Conditional=*/true);
2064 MaskedOpGen.setAction(Action);
2065 emitInlinedDirective(CGF, OMPD_masked, MaskedOpGen);
2066 Action.Done(CGF);
2067}
2068
2071 if (!CGF.HaveInsertPoint())
2072 return;
2073 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2074 OMPBuilder.createTaskyield(CGF.Builder);
2075 } else {
2076 // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2077 llvm::Value *Args[] = {
2079 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
2080 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2081 CGM.getModule(), OMPRTL___kmpc_omp_taskyield),
2082 Args);
2083 }
2084
2085 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2086 Region->emitUntiedSwitch(CGF);
2087}
2088
2090 const RegionCodeGenTy &TaskgroupOpGen,
2092 if (!CGF.HaveInsertPoint())
2093 return;
2094 // __kmpc_taskgroup(ident_t *, gtid);
2095 // TaskgroupOpGen();
2096 // __kmpc_end_taskgroup(ident_t *, gtid);
2097 // Prepare arguments and build a call to __kmpc_taskgroup
2098 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2099 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2100 CGM.getModule(), OMPRTL___kmpc_taskgroup),
2101 Args,
2102 OMPBuilder.getOrCreateRuntimeFunction(
2103 CGM.getModule(), OMPRTL___kmpc_end_taskgroup),
2104 Args);
2105 TaskgroupOpGen.setAction(Action);
2106 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
2107}
2108
2109/// Given an array of pointers to variables, project the address of a
2110/// given variable.
2112 unsigned Index, const VarDecl *Var) {
2113 // Pull out the pointer to the variable.
2114 Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
2115 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
2116
2117 llvm::Type *ElemTy = CGF.ConvertTypeForMem(Var->getType());
2118 return Address(Ptr, ElemTy, CGF.getContext().getDeclAlign(Var));
2119}
2120
2122 CodeGenModule &CGM, llvm::Type *ArgsElemType,
2123 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2124 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
2126 ASTContext &C = CGM.getContext();
2127 // void copy_func(void *LHSArg, void *RHSArg);
2128 FunctionArgList Args;
2129 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2131 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2133 Args.push_back(&LHSArg);
2134 Args.push_back(&RHSArg);
2135 const auto &CGFI =
2136 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2137 std::string Name =
2138 CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
2139 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
2140 llvm::GlobalValue::InternalLinkage, Name,
2141 &CGM.getModule());
2143 Fn->setDoesNotRecurse();
2144 CodeGenFunction CGF(CGM);
2145 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
2146 // Dest = (void*[n])(LHSArg);
2147 // Src = (void*[n])(RHSArg);
2149 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
2150 CGF.Builder.getPtrTy(0)),
2151 ArgsElemType, CGF.getPointerAlign());
2153 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
2154 CGF.Builder.getPtrTy(0)),
2155 ArgsElemType, CGF.getPointerAlign());
2156 // *(Type0*)Dst[0] = *(Type0*)Src[0];
2157 // *(Type1*)Dst[1] = *(Type1*)Src[1];
2158 // ...
2159 // *(Typen*)Dst[n] = *(Typen*)Src[n];
2160 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
2161 const auto *DestVar =
2162 cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
2163 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
2164
2165 const auto *SrcVar =
2166 cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
2167 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
2168
2169 const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
2170 QualType Type = VD->getType();
2171 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
2172 }
2173 CGF.FinishFunction();
2174 return Fn;
2175}
2176
2178 const RegionCodeGenTy &SingleOpGen,
2180 ArrayRef<const Expr *> CopyprivateVars,
2181 ArrayRef<const Expr *> SrcExprs,
2182 ArrayRef<const Expr *> DstExprs,
2183 ArrayRef<const Expr *> AssignmentOps) {
2184 if (!CGF.HaveInsertPoint())
2185 return;
2186 assert(CopyprivateVars.size() == SrcExprs.size() &&
2187 CopyprivateVars.size() == DstExprs.size() &&
2188 CopyprivateVars.size() == AssignmentOps.size());
2190 // int32 did_it = 0;
2191 // if(__kmpc_single(ident_t *, gtid)) {
2192 // SingleOpGen();
2193 // __kmpc_end_single(ident_t *, gtid);
2194 // did_it = 1;
2195 // }
2196 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2197 // <copy_func>, did_it);
2198
2199 Address DidIt = Address::invalid();
2200 if (!CopyprivateVars.empty()) {
2201 // int32 did_it = 0;
2202 QualType KmpInt32Ty =
2203 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2204 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
2205 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
2206 }
2207 // Prepare arguments and build a call to __kmpc_single
2208 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2209 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2210 CGM.getModule(), OMPRTL___kmpc_single),
2211 Args,
2212 OMPBuilder.getOrCreateRuntimeFunction(
2213 CGM.getModule(), OMPRTL___kmpc_end_single),
2214 Args,
2215 /*Conditional=*/true);
2216 SingleOpGen.setAction(Action);
2217 emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
2218 if (DidIt.isValid()) {
2219 // did_it = 1;
2220 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
2221 }
2222 Action.Done(CGF);
2223 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2224 // <copy_func>, did_it);
2225 if (DidIt.isValid()) {
2226 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
2227 QualType CopyprivateArrayTy = C.getConstantArrayType(
2228 C.VoidPtrTy, ArraySize, nullptr, ArraySizeModifier::Normal,
2229 /*IndexTypeQuals=*/0);
2230 // Create a list of all private variables for copyprivate.
2231 Address CopyprivateList =
2232 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
2233 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
2234 Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
2235 CGF.Builder.CreateStore(
2237 CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF),
2238 CGF.VoidPtrTy),
2239 Elem);
2240 }
2241 // Build function that copies private values from single region to all other
2242 // threads in the corresponding parallel region.
2243 llvm::Value *CpyFn = emitCopyprivateCopyFunction(
2244 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy), CopyprivateVars,
2245 SrcExprs, DstExprs, AssignmentOps, Loc);
2246 llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
2248 CopyprivateList, CGF.VoidPtrTy, CGF.Int8Ty);
2249 llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
2250 llvm::Value *Args[] = {
2251 emitUpdateLocation(CGF, Loc), // ident_t *<loc>
2252 getThreadID(CGF, Loc), // i32 <gtid>
2253 BufSize, // size_t <buf_size>
2254 CL.emitRawPointer(CGF), // void *<copyprivate list>
2255 CpyFn, // void (*) (void *, void *) <copy_func>
2256 DidItVal // i32 did_it
2257 };
2258 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2259 CGM.getModule(), OMPRTL___kmpc_copyprivate),
2260 Args);
2261 }
2262}
2263
2265 const RegionCodeGenTy &OrderedOpGen,
2266 SourceLocation Loc, bool IsThreads) {
2267 if (!CGF.HaveInsertPoint())
2268 return;
2269 // __kmpc_ordered(ident_t *, gtid);
2270 // OrderedOpGen();
2271 // __kmpc_end_ordered(ident_t *, gtid);
2272 // Prepare arguments and build a call to __kmpc_ordered
2273 if (IsThreads) {
2274 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2275 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2276 CGM.getModule(), OMPRTL___kmpc_ordered),
2277 Args,
2278 OMPBuilder.getOrCreateRuntimeFunction(
2279 CGM.getModule(), OMPRTL___kmpc_end_ordered),
2280 Args);
2281 OrderedOpGen.setAction(Action);
2282 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2283 return;
2284 }
2285 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2286}
2287
2289 unsigned Flags;
2290 if (Kind == OMPD_for)
2291 Flags = OMP_IDENT_BARRIER_IMPL_FOR;
2292 else if (Kind == OMPD_sections)
2293 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
2294 else if (Kind == OMPD_single)
2295 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
2296 else if (Kind == OMPD_barrier)
2297 Flags = OMP_IDENT_BARRIER_EXPL;
2298 else
2299 Flags = OMP_IDENT_BARRIER_IMPL;
2300 return Flags;
2301}
2302
2304 CodeGenFunction &CGF, const OMPLoopDirective &S,
2305 OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
2306 // Check if the loop directive is actually a doacross loop directive. In this
2307 // case choose static, 1 schedule.
2308 if (llvm::any_of(
2309 S.getClausesOfKind<OMPOrderedClause>(),
2310 [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
2311 ScheduleKind = OMPC_SCHEDULE_static;
2312 // Chunk size is 1 in this case.
2313 llvm::APInt ChunkSize(32, 1);
2314 ChunkExpr = IntegerLiteral::Create(
2315 CGF.getContext(), ChunkSize,
2316 CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
2317 SourceLocation());
2318 }
2319}
2320
2322 OpenMPDirectiveKind Kind, bool EmitChecks,
2323 bool ForceSimpleCall) {
2324 // Check if we should use the OMPBuilder
2325 auto *OMPRegionInfo =
2326 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
2327 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2328 llvm::OpenMPIRBuilder::InsertPointTy AfterIP =
2329 cantFail(OMPBuilder.createBarrier(CGF.Builder, Kind, ForceSimpleCall,
2330 EmitChecks));
2331 CGF.Builder.restoreIP(AfterIP);
2332 return;
2333 }
2334
2335 if (!CGF.HaveInsertPoint())
2336 return;
2337 // Build call __kmpc_cancel_barrier(loc, thread_id);
2338 // Build call __kmpc_barrier(loc, thread_id);
2339 unsigned Flags = getDefaultFlagsForBarriers(Kind);
2340 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2341 // thread_id);
2342 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
2343 getThreadID(CGF, Loc)};
2344 if (OMPRegionInfo) {
2345 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
2346 llvm::Value *Result = CGF.EmitRuntimeCall(
2347 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2348 OMPRTL___kmpc_cancel_barrier),
2349 Args);
2350 if (EmitChecks) {
2351 // if (__kmpc_cancel_barrier()) {
2352 // exit from construct;
2353 // }
2354 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
2355 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
2356 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
2357 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
2358 CGF.EmitBlock(ExitBB);
2359 // exit from construct;
2360 CodeGenFunction::JumpDest CancelDestination =
2361 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2362 CGF.EmitBranchThroughCleanup(CancelDestination);
2363 CGF.EmitBlock(ContBB, /*IsFinished=*/true);
2364 }
2365 return;
2366 }
2367 }
2368 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2369 CGM.getModule(), OMPRTL___kmpc_barrier),
2370 Args);
2371}
2372
2374 Expr *ME, bool IsFatal) {
2375 llvm::Value *MVL = ME ? CGF.EmitScalarExpr(ME)
2376 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
2377 // Build call void __kmpc_error(ident_t *loc, int severity, const char
2378 // *message)
2379 llvm::Value *Args[] = {
2380 emitUpdateLocation(CGF, Loc, /*Flags=*/0, /*GenLoc=*/true),
2381 llvm::ConstantInt::get(CGM.Int32Ty, IsFatal ? 2 : 1),
2382 CGF.Builder.CreatePointerCast(MVL, CGM.Int8PtrTy)};
2383 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2384 CGM.getModule(), OMPRTL___kmpc_error),
2385 Args);
2386}
2387
2388/// Map the OpenMP loop schedule to the runtime enumeration.
2389static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
2390 bool Chunked, bool Ordered) {
2391 switch (ScheduleKind) {
2392 case OMPC_SCHEDULE_static:
2393 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
2394 : (Ordered ? OMP_ord_static : OMP_sch_static);
2395 case OMPC_SCHEDULE_dynamic:
2396 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
2397 case OMPC_SCHEDULE_guided:
2398 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
2399 case OMPC_SCHEDULE_runtime:
2400 return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
2401 case OMPC_SCHEDULE_auto:
2402 return Ordered ? OMP_ord_auto : OMP_sch_auto;
2404 assert(!Chunked && "chunk was specified but schedule kind not known");
2405 return Ordered ? OMP_ord_static : OMP_sch_static;
2406 }
2407 llvm_unreachable("Unexpected runtime schedule");
2408}
2409
2410/// Map the OpenMP distribute schedule to the runtime enumeration.
2411static OpenMPSchedType
2413 // only static is allowed for dist_schedule
2414 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
2415}
2416
2418 bool Chunked) const {
2419 OpenMPSchedType Schedule =
2420 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2421 return Schedule == OMP_sch_static;
2422}
2423
2425 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2426 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2427 return Schedule == OMP_dist_sch_static;
2428}
2429
2431 bool Chunked) const {
2432 OpenMPSchedType Schedule =
2433 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2434 return Schedule == OMP_sch_static_chunked;
2435}
2436
2438 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2439 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2440 return Schedule == OMP_dist_sch_static_chunked;
2441}
2442
2444 OpenMPSchedType Schedule =
2445 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
2446 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
2447 return Schedule != OMP_sch_static;
2448}
2449
2450static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
2453 int Modifier = 0;
2454 switch (M1) {
2455 case OMPC_SCHEDULE_MODIFIER_monotonic:
2456 Modifier = OMP_sch_modifier_monotonic;
2457 break;
2458 case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2459 Modifier = OMP_sch_modifier_nonmonotonic;
2460 break;
2461 case OMPC_SCHEDULE_MODIFIER_simd:
2462 if (Schedule == OMP_sch_static_chunked)
2463 Schedule = OMP_sch_static_balanced_chunked;
2464 break;
2467 break;
2468 }
2469 switch (M2) {
2470 case OMPC_SCHEDULE_MODIFIER_monotonic:
2471 Modifier = OMP_sch_modifier_monotonic;
2472 break;
2473 case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2474 Modifier = OMP_sch_modifier_nonmonotonic;
2475 break;
2476 case OMPC_SCHEDULE_MODIFIER_simd:
2477 if (Schedule == OMP_sch_static_chunked)
2478 Schedule = OMP_sch_static_balanced_chunked;
2479 break;
2482 break;
2483 }
2484 // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
2485 // If the static schedule kind is specified or if the ordered clause is
2486 // specified, and if the nonmonotonic modifier is not specified, the effect is
2487 // as if the monotonic modifier is specified. Otherwise, unless the monotonic
2488 // modifier is specified, the effect is as if the nonmonotonic modifier is
2489 // specified.
2490 if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
2491 if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
2492 Schedule == OMP_sch_static_balanced_chunked ||
2493 Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
2494 Schedule == OMP_dist_sch_static_chunked ||
2495 Schedule == OMP_dist_sch_static))
2496 Modifier = OMP_sch_modifier_nonmonotonic;
2497 }
2498 return Schedule | Modifier;
2499}
2500
2503 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
2504 bool Ordered, const DispatchRTInput &DispatchValues) {
2505 if (!CGF.HaveInsertPoint())
2506 return;
2507 OpenMPSchedType Schedule = getRuntimeSchedule(
2508 ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
2509 assert(Ordered ||
2510 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
2511 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
2512 Schedule != OMP_sch_static_balanced_chunked));
2513 // Call __kmpc_dispatch_init(
2514 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2515 // kmp_int[32|64] lower, kmp_int[32|64] upper,
2516 // kmp_int[32|64] stride, kmp_int[32|64] chunk);
2517
2518 // If the Chunk was not specified in the clause - use default value 1.
2519 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
2520 : CGF.Builder.getIntN(IVSize, 1);
2521 llvm::Value *Args[] = {
2522 emitUpdateLocation(CGF, Loc),
2523 getThreadID(CGF, Loc),
2524 CGF.Builder.getInt32(addMonoNonMonoModifier(
2525 CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
2526 DispatchValues.LB, // Lower
2527 DispatchValues.UB, // Upper
2528 CGF.Builder.getIntN(IVSize, 1), // Stride
2529 Chunk // Chunk
2530 };
2531 CGF.EmitRuntimeCall(OMPBuilder.createDispatchInitFunction(IVSize, IVSigned),
2532 Args);
2533}
2534
2537 if (!CGF.HaveInsertPoint())
2538 return;
2539 // Call __kmpc_dispatch_deinit(ident_t *loc, kmp_int32 tid);
2540 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2541 CGF.EmitRuntimeCall(OMPBuilder.createDispatchDeinitFunction(), Args);
2542}
2543
2545 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
2546 llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
2548 const CGOpenMPRuntime::StaticRTInput &Values) {
2549 if (!CGF.HaveInsertPoint())
2550 return;
2551
2552 assert(!Values.Ordered);
2553 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
2554 Schedule == OMP_sch_static_balanced_chunked ||
2555 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
2556 Schedule == OMP_dist_sch_static ||
2557 Schedule == OMP_dist_sch_static_chunked);
2558
2559 // Call __kmpc_for_static_init(
2560 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2561 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2562 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2563 // kmp_int[32|64] incr, kmp_int[32|64] chunk);
2564 llvm::Value *Chunk = Values.Chunk;
2565 if (Chunk == nullptr) {
2566 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
2567 Schedule == OMP_dist_sch_static) &&
2568 "expected static non-chunked schedule");
2569 // If the Chunk was not specified in the clause - use default value 1.
2570 Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
2571 } else {
2572 assert((Schedule == OMP_sch_static_chunked ||
2573 Schedule == OMP_sch_static_balanced_chunked ||
2574 Schedule == OMP_ord_static_chunked ||
2575 Schedule == OMP_dist_sch_static_chunked) &&
2576 "expected static chunked schedule");
2577 }
2578 llvm::Value *Args[] = {
2579 UpdateLocation,
2580 ThreadId,
2581 CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
2582 M2)), // Schedule type
2583 Values.IL.emitRawPointer(CGF), // &isLastIter
2584 Values.LB.emitRawPointer(CGF), // &LB
2585 Values.UB.emitRawPointer(CGF), // &UB
2586 Values.ST.emitRawPointer(CGF), // &Stride
2587 CGF.Builder.getIntN(Values.IVSize, 1), // Incr
2588 Chunk // Chunk
2589 };
2590 CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
2591}
2592
2595 OpenMPDirectiveKind DKind,
2596 const OpenMPScheduleTy &ScheduleKind,
2597 const StaticRTInput &Values) {
2598 OpenMPSchedType ScheduleNum = getRuntimeSchedule(
2599 ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
2600 assert((isOpenMPWorksharingDirective(DKind) || (DKind == OMPD_loop)) &&
2601 "Expected loop-based or sections-based directive.");
2602 llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
2604 ? OMP_IDENT_WORK_LOOP
2605 : OMP_IDENT_WORK_SECTIONS);
2606 llvm::Value *ThreadId = getThreadID(CGF, Loc);
2607 llvm::FunctionCallee StaticInitFunction =
2608 OMPBuilder.createForStaticInitFunction(Values.IVSize, Values.IVSigned,
2609 false);
2611 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2612 ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
2613}
2614
2618 const CGOpenMPRuntime::StaticRTInput &Values) {
2619 OpenMPSchedType ScheduleNum =
2620 getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
2621 llvm::Value *UpdatedLocation =
2622 emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
2623 llvm::Value *ThreadId = getThreadID(CGF, Loc);
2624 llvm::FunctionCallee StaticInitFunction;
2625 bool isGPUDistribute =
2626 CGM.getLangOpts().OpenMPIsTargetDevice && CGM.getTriple().isGPU();
2627 StaticInitFunction = OMPBuilder.createForStaticInitFunction(
2628 Values.IVSize, Values.IVSigned, isGPUDistribute);
2629
2630 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2631 ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
2633}
2634
2637 OpenMPDirectiveKind DKind) {
2638 assert((DKind == OMPD_distribute || DKind == OMPD_for ||
2639 DKind == OMPD_sections) &&
2640 "Expected distribute, for, or sections directive kind");
2641 if (!CGF.HaveInsertPoint())
2642 return;
2643 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2644 llvm::Value *Args[] = {
2647 (DKind == OMPD_target_teams_loop)
2648 ? OMP_IDENT_WORK_DISTRIBUTE
2649 : isOpenMPLoopDirective(DKind)
2650 ? OMP_IDENT_WORK_LOOP
2651 : OMP_IDENT_WORK_SECTIONS),
2652 getThreadID(CGF, Loc)};
2654 if (isOpenMPDistributeDirective(DKind) &&
2655 CGM.getLangOpts().OpenMPIsTargetDevice && CGM.getTriple().isGPU())
2656 CGF.EmitRuntimeCall(
2657 OMPBuilder.getOrCreateRuntimeFunction(
2658 CGM.getModule(), OMPRTL___kmpc_distribute_static_fini),
2659 Args);
2660 else
2661 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2662 CGM.getModule(), OMPRTL___kmpc_for_static_fini),
2663 Args);
2664}
2665
2668 unsigned IVSize,
2669 bool IVSigned) {
2670 if (!CGF.HaveInsertPoint())
2671 return;
2672 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2673 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2674 CGF.EmitRuntimeCall(OMPBuilder.createDispatchFiniFunction(IVSize, IVSigned),
2675 Args);
2676}
2677
2679 SourceLocation Loc, unsigned IVSize,
2680 bool IVSigned, Address IL,
2681 Address LB, Address UB,
2682 Address ST) {
2683 // Call __kmpc_dispatch_next(
2684 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2685 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2686 // kmp_int[32|64] *p_stride);
2687 llvm::Value *Args[] = {
2689 IL.emitRawPointer(CGF), // &isLastIter
2690 LB.emitRawPointer(CGF), // &Lower
2691 UB.emitRawPointer(CGF), // &Upper
2692 ST.emitRawPointer(CGF) // &Stride
2693 };
2694 llvm::Value *Call = CGF.EmitRuntimeCall(
2695 OMPBuilder.createDispatchNextFunction(IVSize, IVSigned), Args);
2696 return CGF.EmitScalarConversion(
2697 Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
2698 CGF.getContext().BoolTy, Loc);
2699}
2700
2702 const Expr *Message) {
2703 if (!Message)
2704 return llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
2705 return CGF.EmitScalarExpr(Message);
2706}
2707
2708llvm::Value *
2710 const OMPMessageClause *MessageClause) {
2711 return emitMessageClause(
2712 CGF, MessageClause ? MessageClause->getMessageString() : nullptr);
2713}
2714
2715llvm::Value *
2717 // OpenMP 6.0, 10.4: "If no severity clause is specified then the effect is
2718 // as if sev-level is fatal."
2719 return llvm::ConstantInt::get(CGM.Int32Ty,
2720 Severity == OMPC_SEVERITY_warning ? 1 : 2);
2721}
2722
2723llvm::Value *
2725 return emitSeverityClause(SeverityClause ? SeverityClause->getSeverityKind()
2727}
2728
2730 CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc,
2732 const Expr *Message) {
2733 if (!CGF.HaveInsertPoint())
2734 return;
2736 {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2737 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)});
2738 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2739 // or __kmpc_push_num_threads_strict(&loc, global_tid, num_threads, severity,
2740 // messsage) if strict modifier is used.
2741 RuntimeFunction FnID = OMPRTL___kmpc_push_num_threads;
2742 if (Modifier == OMPC_NUMTHREADS_strict) {
2743 FnID = OMPRTL___kmpc_push_num_threads_strict;
2744 Args.push_back(emitSeverityClause(Severity));
2745 Args.push_back(emitMessageClause(CGF, Message));
2746 }
2747 CGF.EmitRuntimeCall(
2748 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), FnID), Args);
2749}
2750
2752 ProcBindKind ProcBind,
2754 if (!CGF.HaveInsertPoint())
2755 return;
2756 assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");
2757 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2758 llvm::Value *Args[] = {
2760 llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)};
2761 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2762 CGM.getModule(), OMPRTL___kmpc_push_proc_bind),
2763 Args);
2764}
2765
2767 SourceLocation Loc, llvm::AtomicOrdering AO) {
2768 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2769 OMPBuilder.createFlush(CGF.Builder);
2770 } else {
2771 if (!CGF.HaveInsertPoint())
2772 return;
2773 // Build call void __kmpc_flush(ident_t *loc)
2774 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2775 CGM.getModule(), OMPRTL___kmpc_flush),
2776 emitUpdateLocation(CGF, Loc));
2777 }
2778}
2779
2780namespace {
2781/// Indexes of fields for type kmp_task_t.
2782enum KmpTaskTFields {
2783 /// List of shared variables.
2784 KmpTaskTShareds,
2785 /// Task routine.
2786 KmpTaskTRoutine,
2787 /// Partition id for the untied tasks.
2788 KmpTaskTPartId,
2789 /// Function with call of destructors for private variables.
2790 Data1,
2791 /// Task priority.
2792 Data2,
2793 /// (Taskloops only) Lower bound.
2794 KmpTaskTLowerBound,
2795 /// (Taskloops only) Upper bound.
2796 KmpTaskTUpperBound,
2797 /// (Taskloops only) Stride.
2798 KmpTaskTStride,
2799 /// (Taskloops only) Is last iteration flag.
2800 KmpTaskTLastIter,
2801 /// (Taskloops only) Reduction data.
2802 KmpTaskTReductions,
2803};
2804} // anonymous namespace
2805
2807 // If we are in simd mode or there are no entries, we don't need to do
2808 // anything.
2809 if (CGM.getLangOpts().OpenMPSimd || OMPBuilder.OffloadInfoManager.empty())
2810 return;
2811
2812 llvm::OpenMPIRBuilder::EmitMetadataErrorReportFunctionTy &&ErrorReportFn =
2813 [this](llvm::OpenMPIRBuilder::EmitMetadataErrorKind Kind,
2814 const llvm::TargetRegionEntryInfo &EntryInfo) -> void {
2816 if (Kind != llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR) {
2817 for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
2819 I != E; ++I) {
2820 if (I->getFirst().getUniqueID().getDevice() == EntryInfo.DeviceID &&
2821 I->getFirst().getUniqueID().getFile() == EntryInfo.FileID) {
2823 I->getFirst(), EntryInfo.Line, 1);
2824 break;
2825 }
2826 }
2827 }
2828 switch (Kind) {
2829 case llvm::OpenMPIRBuilder::EMIT_MD_TARGET_REGION_ERROR: {
2830 unsigned DiagID = CGM.getDiags().getCustomDiagID(
2831 DiagnosticsEngine::Error, "Offloading entry for target region in "
2832 "%0 is incorrect: either the "
2833 "address or the ID is invalid.");
2834 CGM.getDiags().Report(Loc, DiagID) << EntryInfo.ParentName;
2835 } break;
2836 case llvm::OpenMPIRBuilder::EMIT_MD_DECLARE_TARGET_ERROR: {
2837 unsigned DiagID = CGM.getDiags().getCustomDiagID(
2838 DiagnosticsEngine::Error, "Offloading entry for declare target "
2839 "variable %0 is incorrect: the "
2840 "address is invalid.");
2841 CGM.getDiags().Report(Loc, DiagID) << EntryInfo.ParentName;
2842 } break;
2843 case llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR: {
2844 unsigned DiagID = CGM.getDiags().getCustomDiagID(
2846 "Offloading entry for declare target variable is incorrect: the "
2847 "address is invalid.");
2848 CGM.getDiags().Report(DiagID);
2849 } break;
2850 }
2851 };
2852
2853 OMPBuilder.createOffloadEntriesAndInfoMetadata(ErrorReportFn);
2854}
2855
2857 if (!KmpRoutineEntryPtrTy) {
2858 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
2860 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
2862 KmpRoutineEntryPtrQTy = C.getPointerType(
2863 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
2865 }
2866}
2867
2868namespace {
2869struct PrivateHelpersTy {
2870 PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original,
2871 const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit)
2872 : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy),
2873 PrivateElemInit(PrivateElemInit) {}
2874 PrivateHelpersTy(const VarDecl *Original) : Original(Original) {}
2875 const Expr *OriginalRef = nullptr;
2876 const VarDecl *Original = nullptr;
2877 const VarDecl *PrivateCopy = nullptr;
2878 const VarDecl *PrivateElemInit = nullptr;
2879 bool isLocalPrivate() const {
2880 return !OriginalRef && !PrivateCopy && !PrivateElemInit;
2881 }
2882};
2883typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
2884} // anonymous namespace
2885
2886static bool isAllocatableDecl(const VarDecl *VD) {
2887 const VarDecl *CVD = VD->getCanonicalDecl();
2888 if (!CVD->hasAttr<OMPAllocateDeclAttr>())
2889 return false;
2890 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
2891 // Use the default allocation.
2892 return !(AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc &&
2893 !AA->getAllocator());
2894}
2895
2896static RecordDecl *
2898 if (!Privates.empty()) {
2899 ASTContext &C = CGM.getContext();
2900 // Build struct .kmp_privates_t. {
2901 // /* private vars */
2902 // };
2903 RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
2904 RD->startDefinition();
2905 for (const auto &Pair : Privates) {
2906 const VarDecl *VD = Pair.second.Original;
2908 // If the private variable is a local variable with lvalue ref type,
2909 // allocate the pointer instead of the pointee type.
2910 if (Pair.second.isLocalPrivate()) {
2911 if (VD->getType()->isLValueReferenceType())
2912 Type = C.getPointerType(Type);
2913 if (isAllocatableDecl(VD))
2914 Type = C.getPointerType(Type);
2915 }
2917 if (VD->hasAttrs()) {
2918 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
2919 E(VD->getAttrs().end());
2920 I != E; ++I)
2921 FD->addAttr(*I);
2922 }
2923 }
2924 RD->completeDefinition();
2925 return RD;
2926 }
2927 return nullptr;
2928}
2929
2930static RecordDecl *
2932 QualType KmpInt32Ty,
2933 QualType KmpRoutineEntryPointerQTy) {
2934 ASTContext &C = CGM.getContext();
2935 // Build struct kmp_task_t {
2936 // void * shareds;
2937 // kmp_routine_entry_t routine;
2938 // kmp_int32 part_id;
2939 // kmp_cmplrdata_t data1;
2940 // kmp_cmplrdata_t data2;
2941 // For taskloops additional fields:
2942 // kmp_uint64 lb;
2943 // kmp_uint64 ub;
2944 // kmp_int64 st;
2945 // kmp_int32 liter;
2946 // void * reductions;
2947 // };
2948 RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TagTypeKind::Union);
2949 UD->startDefinition();
2950 addFieldToRecordDecl(C, UD, KmpInt32Ty);
2951 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
2952 UD->completeDefinition();
2953 CanQualType KmpCmplrdataTy = C.getCanonicalTagType(UD);
2954 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
2955 RD->startDefinition();
2956 addFieldToRecordDecl(C, RD, C.VoidPtrTy);
2957 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
2958 addFieldToRecordDecl(C, RD, KmpInt32Ty);
2959 addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
2960 addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
2961 if (isOpenMPTaskLoopDirective(Kind)) {
2962 QualType KmpUInt64Ty =
2963 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
2964 QualType KmpInt64Ty =
2965 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
2966 addFieldToRecordDecl(C, RD, KmpUInt64Ty);
2967 addFieldToRecordDecl(C, RD, KmpUInt64Ty);
2968 addFieldToRecordDecl(C, RD, KmpInt64Ty);
2969 addFieldToRecordDecl(C, RD, KmpInt32Ty);
2970 addFieldToRecordDecl(C, RD, C.VoidPtrTy);
2971 }
2972 RD->completeDefinition();
2973 return RD;
2974}
2975
2976static RecordDecl *
2978 ArrayRef<PrivateDataTy> Privates) {
2979 ASTContext &C = CGM.getContext();
2980 // Build struct kmp_task_t_with_privates {
2981 // kmp_task_t task_data;
2982 // .kmp_privates_t. privates;
2983 // };
2984 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
2985 RD->startDefinition();
2986 addFieldToRecordDecl(C, RD, KmpTaskTQTy);
2987 if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
2988 addFieldToRecordDecl(C, RD, C.getCanonicalTagType(PrivateRD));
2989 RD->completeDefinition();
2990 return RD;
2991}
2992
2993/// Emit a proxy function which accepts kmp_task_t as the second
2994/// argument.
2995/// \code
2996/// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
2997/// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
2998/// For taskloops:
2999/// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3000/// tt->reductions, tt->shareds);
3001/// return 0;
3002/// }
3003/// \endcode
3004static llvm::Function *
3006 OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
3007 QualType KmpTaskTWithPrivatesPtrQTy,
3008 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
3009 QualType SharedsPtrTy, llvm::Function *TaskFunction,
3010 llvm::Value *TaskPrivatesMap) {
3011 ASTContext &C = CGM.getContext();
3012 FunctionArgList Args;
3013 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3015 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3016 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3018 Args.push_back(&GtidArg);
3019 Args.push_back(&TaskTypeArg);
3020 const auto &TaskEntryFnInfo =
3021 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3022 llvm::FunctionType *TaskEntryTy =
3023 CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
3024 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
3025 auto *TaskEntry = llvm::Function::Create(
3026 TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3027 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
3028 TaskEntry->setDoesNotRecurse();
3029 CodeGenFunction CGF(CGM);
3030 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
3031 Loc, Loc);
3032
3033 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
3034 // tt,
3035 // For taskloops:
3036 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3037 // tt->task_data.shareds);
3038 llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
3039 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
3040 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3041 CGF.GetAddrOfLocalVar(&TaskTypeArg),
3042 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3043 const auto *KmpTaskTWithPrivatesQTyRD =
3044 KmpTaskTWithPrivatesQTy->castAsRecordDecl();
3045 LValue Base =
3046 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3047 const auto *KmpTaskTQTyRD = KmpTaskTQTy->castAsRecordDecl();
3048 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
3049 LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
3050 llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
3051
3052 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
3053 LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
3054 llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3055 CGF.EmitLoadOfScalar(SharedsLVal, Loc),
3056 CGF.ConvertTypeForMem(SharedsPtrTy));
3057
3058 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
3059 llvm::Value *PrivatesParam;
3060 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
3061 LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
3062 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3063 PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy);
3064 } else {
3065 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3066 }
3067
3068 llvm::Value *CommonArgs[] = {
3069 GtidParam, PartidParam, PrivatesParam, TaskPrivatesMap,
3070 CGF.Builder
3072 CGF.VoidPtrTy, CGF.Int8Ty)
3073 .emitRawPointer(CGF)};
3074 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
3075 std::end(CommonArgs));
3076 if (isOpenMPTaskLoopDirective(Kind)) {
3077 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
3078 LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
3079 llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
3080 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
3081 LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
3082 llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
3083 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
3084 LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
3085 llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
3086 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3087 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3088 llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
3089 auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
3090 LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
3091 llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
3092 CallArgs.push_back(LBParam);
3093 CallArgs.push_back(UBParam);
3094 CallArgs.push_back(StParam);
3095 CallArgs.push_back(LIParam);
3096 CallArgs.push_back(RParam);
3097 }
3098 CallArgs.push_back(SharedsParam);
3099
3100 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
3101 CallArgs);
3102 CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
3103 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
3104 CGF.FinishFunction();
3105 return TaskEntry;
3106}
3107
3110 QualType KmpInt32Ty,
3111 QualType KmpTaskTWithPrivatesPtrQTy,
3112 QualType KmpTaskTWithPrivatesQTy) {
3113 ASTContext &C = CGM.getContext();
3114 FunctionArgList Args;
3115 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3117 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3118 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3120 Args.push_back(&GtidArg);
3121 Args.push_back(&TaskTypeArg);
3122 const auto &DestructorFnInfo =
3123 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3124 llvm::FunctionType *DestructorFnTy =
3125 CGM.getTypes().GetFunctionType(DestructorFnInfo);
3126 std::string Name =
3127 CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
3128 auto *DestructorFn =
3129 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
3130 Name, &CGM.getModule());
3131 CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
3132 DestructorFnInfo);
3133 DestructorFn->setDoesNotRecurse();
3134 CodeGenFunction CGF(CGM);
3135 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
3136 Args, Loc, Loc);
3137
3139 CGF.GetAddrOfLocalVar(&TaskTypeArg),
3140 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3141 const auto *KmpTaskTWithPrivatesQTyRD =
3142 KmpTaskTWithPrivatesQTy->castAsRecordDecl();
3143 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3144 Base = CGF.EmitLValueForField(Base, *FI);
3145 for (const auto *Field : FI->getType()->castAsRecordDecl()->fields()) {
3146 if (QualType::DestructionKind DtorKind =
3147 Field->getType().isDestructedType()) {
3148 LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
3149 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType());
3150 }
3151 }
3152 CGF.FinishFunction();
3153 return DestructorFn;
3154}
3155
3156/// Emit a privates mapping function for correct handling of private and
3157/// firstprivate variables.
3158/// \code
3159/// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3160/// **noalias priv1,..., <tyn> **noalias privn) {
3161/// *priv1 = &.privates.priv1;
3162/// ...;
3163/// *privn = &.privates.privn;
3164/// }
3165/// \endcode
3166static llvm::Value *
3168 const OMPTaskDataTy &Data, QualType PrivatesQTy,
3169 ArrayRef<PrivateDataTy> Privates) {
3170 ASTContext &C = CGM.getContext();
3171 FunctionArgList Args;
3172 ImplicitParamDecl TaskPrivatesArg(
3173 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3174 C.getPointerType(PrivatesQTy).withConst().withRestrict(),
3176 Args.push_back(&TaskPrivatesArg);
3177 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos;
3178 unsigned Counter = 1;
3179 for (const Expr *E : Data.PrivateVars) {
3180 Args.push_back(ImplicitParamDecl::Create(
3181 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3182 C.getPointerType(C.getPointerType(E->getType()))
3183 .withConst()
3184 .withRestrict(),
3186 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3187 PrivateVarsPos[VD] = Counter;
3188 ++Counter;
3189 }
3190 for (const Expr *E : Data.FirstprivateVars) {
3191 Args.push_back(ImplicitParamDecl::Create(
3192 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3193 C.getPointerType(C.getPointerType(E->getType()))
3194 .withConst()
3195 .withRestrict(),
3197 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3198 PrivateVarsPos[VD] = Counter;
3199 ++Counter;
3200 }
3201 for (const Expr *E : Data.LastprivateVars) {
3202 Args.push_back(ImplicitParamDecl::Create(
3203 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3204 C.getPointerType(C.getPointerType(E->getType()))
3205 .withConst()
3206 .withRestrict(),
3208 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3209 PrivateVarsPos[VD] = Counter;
3210 ++Counter;
3211 }
3212 for (const VarDecl *VD : Data.PrivateLocals) {
3214 if (VD->getType()->isLValueReferenceType())
3215 Ty = C.getPointerType(Ty);
3216 if (isAllocatableDecl(VD))
3217 Ty = C.getPointerType(Ty);
3218 Args.push_back(ImplicitParamDecl::Create(
3219 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3220 C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(),
3222 PrivateVarsPos[VD] = Counter;
3223 ++Counter;
3224 }
3225 const auto &TaskPrivatesMapFnInfo =
3226 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3227 llvm::FunctionType *TaskPrivatesMapTy =
3228 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
3229 std::string Name =
3230 CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
3231 auto *TaskPrivatesMap = llvm::Function::Create(
3232 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
3233 &CGM.getModule());
3234 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
3235 TaskPrivatesMapFnInfo);
3236 if (CGM.getCodeGenOpts().OptimizationLevel != 0) {
3237 TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
3238 TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
3239 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
3240 }
3241 CodeGenFunction CGF(CGM);
3242 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
3243 TaskPrivatesMapFnInfo, Args, Loc, Loc);
3244
3245 // *privi = &.privates.privi;
3247 CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
3248 TaskPrivatesArg.getType()->castAs<PointerType>());
3249 const auto *PrivatesQTyRD = PrivatesQTy->castAsRecordDecl();
3250 Counter = 0;
3251 for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
3252 LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
3253 const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
3254 LValue RefLVal =
3255 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
3256 LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
3257 RefLVal.getAddress(), RefLVal.getType()->castAs<PointerType>());
3258 CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal);
3259 ++Counter;
3260 }
3261 CGF.FinishFunction();
3262 return TaskPrivatesMap;
3263}
3264
3265/// Emit initialization for private variables in task-based directives.
3268 Address KmpTaskSharedsPtr, LValue TDBase,
3269 const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3270 QualType SharedsTy, QualType SharedsPtrTy,
3271 const OMPTaskDataTy &Data,
3272 ArrayRef<PrivateDataTy> Privates, bool ForDup) {
3273 ASTContext &C = CGF.getContext();
3274 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3275 LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
3276 OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
3277 ? OMPD_taskloop
3278 : OMPD_task;
3279 const CapturedStmt &CS = *D.getCapturedStmt(Kind);
3280 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
3281 LValue SrcBase;
3282 bool IsTargetTask =
3283 isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
3284 isOpenMPTargetExecutionDirective(D.getDirectiveKind());
3285 // For target-based directives skip 4 firstprivate arrays BasePointersArray,
3286 // PointersArray, SizesArray, and MappersArray. The original variables for
3287 // these arrays are not captured and we get their addresses explicitly.
3288 if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) ||
3289 (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
3290 SrcBase = CGF.MakeAddrLValue(
3292 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy),
3293 CGF.ConvertTypeForMem(SharedsTy)),
3294 SharedsTy);
3295 }
3296 FI = FI->getType()->castAsRecordDecl()->field_begin();
3297 for (const PrivateDataTy &Pair : Privates) {
3298 // Do not initialize private locals.
3299 if (Pair.second.isLocalPrivate()) {
3300 ++FI;
3301 continue;
3302 }
3303 const VarDecl *VD = Pair.second.PrivateCopy;
3304 const Expr *Init = VD->getAnyInitializer();
3305 if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
3306 !CGF.isTrivialInitializer(Init)))) {
3307 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
3308 if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
3309 const VarDecl *OriginalVD = Pair.second.Original;
3310 // Check if the variable is the target-based BasePointersArray,
3311 // PointersArray, SizesArray, or MappersArray.
3312 LValue SharedRefLValue;
3313 QualType Type = PrivateLValue.getType();
3314 const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
3315 if (IsTargetTask && !SharedField) {
3316 assert(isa<ImplicitParamDecl>(OriginalVD) &&
3317 isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
3318 cast<CapturedDecl>(OriginalVD->getDeclContext())
3319 ->getNumParams() == 0 &&
3320 isa<TranslationUnitDecl>(
3321 cast<CapturedDecl>(OriginalVD->getDeclContext())
3322 ->getDeclContext()) &&
3323 "Expected artificial target data variable.");
3324 SharedRefLValue =
3325 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
3326 } else if (ForDup) {
3327 SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
3328 SharedRefLValue = CGF.MakeAddrLValue(
3329 SharedRefLValue.getAddress().withAlignment(
3330 C.getDeclAlign(OriginalVD)),
3331 SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
3332 SharedRefLValue.getTBAAInfo());
3333 } else if (CGF.LambdaCaptureFields.count(
3334 Pair.second.Original->getCanonicalDecl()) > 0 ||
3335 isa_and_nonnull<BlockDecl>(CGF.CurCodeDecl)) {
3336 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3337 } else {
3338 // Processing for implicitly captured variables.
3339 InlinedOpenMPRegionRAII Region(
3340 CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown,
3341 /*HasCancel=*/false, /*NoInheritance=*/true);
3342 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3343 }
3344 if (Type->isArrayType()) {
3345 // Initialize firstprivate array.
3346 if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
3347 // Perform simple memcpy.
3348 CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
3349 } else {
3350 // Initialize firstprivate array using element-by-element
3351 // initialization.
3353 PrivateLValue.getAddress(), SharedRefLValue.getAddress(), Type,
3354 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
3355 Address SrcElement) {
3356 // Clean up any temporaries needed by the initialization.
3357 CodeGenFunction::OMPPrivateScope InitScope(CGF);
3358 InitScope.addPrivate(Elem, SrcElement);
3359 (void)InitScope.Privatize();
3360 // Emit initialization for single element.
3361 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
3362 CGF, &CapturesInfo);
3363 CGF.EmitAnyExprToMem(Init, DestElement,
3364 Init->getType().getQualifiers(),
3365 /*IsInitializer=*/false);
3366 });
3367 }
3368 } else {
3369 CodeGenFunction::OMPPrivateScope InitScope(CGF);
3370 InitScope.addPrivate(Elem, SharedRefLValue.getAddress());
3371 (void)InitScope.Privatize();
3372 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
3373 CGF.EmitExprAsInit(Init, VD, PrivateLValue,
3374 /*capturedByInit=*/false);
3375 }
3376 } else {
3377 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
3378 }
3379 }
3380 ++FI;
3381 }
3382}
3383
3384/// Check if duplication function is required for taskloops.
3386 ArrayRef<PrivateDataTy> Privates) {
3387 bool InitRequired = false;
3388 for (const PrivateDataTy &Pair : Privates) {
3389 if (Pair.second.isLocalPrivate())
3390 continue;
3391 const VarDecl *VD = Pair.second.PrivateCopy;
3392 const Expr *Init = VD->getAnyInitializer();
3393 InitRequired = InitRequired || (isa_and_nonnull<CXXConstructExpr>(Init) &&
3395 if (InitRequired)
3396 break;
3397 }
3398 return InitRequired;
3399}
3400
3401
3402/// Emit task_dup function (for initialization of
3403/// private/firstprivate/lastprivate vars and last_iter flag)
3404/// \code
3405/// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
3406/// lastpriv) {
3407/// // setup lastprivate flag
3408/// task_dst->last = lastpriv;
3409/// // could be constructor calls here...
3410/// }
3411/// \endcode
3412static llvm::Value *
3415 QualType KmpTaskTWithPrivatesPtrQTy,
3416 const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3417 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
3418 QualType SharedsPtrTy, const OMPTaskDataTy &Data,
3419 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
3420 ASTContext &C = CGM.getContext();
3421 FunctionArgList Args;
3422 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3423 KmpTaskTWithPrivatesPtrQTy,
3425 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3426 KmpTaskTWithPrivatesPtrQTy,
3428 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
3430 Args.push_back(&DstArg);
3431 Args.push_back(&SrcArg);
3432 Args.push_back(&LastprivArg);
3433 const auto &TaskDupFnInfo =
3434 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3435 llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
3436 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
3437 auto *TaskDup = llvm::Function::Create(
3438 TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3439 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
3440 TaskDup->setDoesNotRecurse();
3441 CodeGenFunction CGF(CGM);
3442 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
3443 Loc);
3444
3445 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3446 CGF.GetAddrOfLocalVar(&DstArg),
3447 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3448 // task_dst->liter = lastpriv;
3449 if (WithLastIter) {
3450 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3452 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3453 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3454 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
3455 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
3456 CGF.EmitStoreOfScalar(Lastpriv, LILVal);
3457 }
3458
3459 // Emit initial values for private copies (if any).
3460 assert(!Privates.empty());
3461 Address KmpTaskSharedsPtr = Address::invalid();
3462 if (!Data.FirstprivateVars.empty()) {
3463 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3464 CGF.GetAddrOfLocalVar(&SrcArg),
3465 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3467 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3468 KmpTaskSharedsPtr = Address(
3470 Base, *std::next(KmpTaskTQTyRD->field_begin(),
3471 KmpTaskTShareds)),
3472 Loc),
3473 CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy));
3474 }
3475 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
3476 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
3477 CGF.FinishFunction();
3478 return TaskDup;
3479}
3480
3481/// Checks if destructor function is required to be generated.
3482/// \return true if cleanups are required, false otherwise.
3483static bool
3484checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3485 ArrayRef<PrivateDataTy> Privates) {
3486 for (const PrivateDataTy &P : Privates) {
3487 if (P.second.isLocalPrivate())
3488 continue;
3489 QualType Ty = P.second.Original->getType().getNonReferenceType();
3490 if (Ty.isDestructedType())
3491 return true;
3492 }
3493 return false;
3494}
3495
3496namespace {
3497/// Loop generator for OpenMP iterator expression.
3498class OMPIteratorGeneratorScope final
3500 CodeGenFunction &CGF;
3501 const OMPIteratorExpr *E = nullptr;
3504 OMPIteratorGeneratorScope() = delete;
3505 OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete;
3506
3507public:
3508 OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E)
3509 : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) {
3510 if (!E)
3511 return;
3513 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
3514 Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper));
3515 const auto *VD = cast<VarDecl>(E->getIteratorDecl(I));
3516 addPrivate(VD, CGF.CreateMemTemp(VD->getType(), VD->getName()));
3517 const OMPIteratorHelperData &HelperData = E->getHelper(I);
3518 addPrivate(
3519 HelperData.CounterVD,
3520 CGF.CreateMemTemp(HelperData.CounterVD->getType(), "counter.addr"));
3521 }
3522 Privatize();
3523
3524 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
3525 const OMPIteratorHelperData &HelperData = E->getHelper(I);
3526 LValue CLVal =
3527 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD),
3528 HelperData.CounterVD->getType());
3529 // Counter = 0;
3531 llvm::ConstantInt::get(CLVal.getAddress().getElementType(), 0),
3532 CLVal);
3533 CodeGenFunction::JumpDest &ContDest =
3534 ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont"));
3535 CodeGenFunction::JumpDest &ExitDest =
3536 ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit"));
3537 // N = <number-of_iterations>;
3538 llvm::Value *N = Uppers[I];
3539 // cont:
3540 // if (Counter < N) goto body; else goto exit;
3541 CGF.EmitBlock(ContDest.getBlock());
3542 auto *CVal =
3543 CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation());
3544 llvm::Value *Cmp =
3546 ? CGF.Builder.CreateICmpSLT(CVal, N)
3547 : CGF.Builder.CreateICmpULT(CVal, N);
3548 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body");
3549 CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock());
3550 // body:
3551 CGF.EmitBlock(BodyBB);
3552 // Iteri = Begini + Counter * Stepi;
3553 CGF.EmitIgnoredExpr(HelperData.Update);
3554 }
3555 }
3556 ~OMPIteratorGeneratorScope() {
3557 if (!E)
3558 return;
3559 for (unsigned I = E->numOfIterators(); I > 0; --I) {
3560 // Counter = Counter + 1;
3561 const OMPIteratorHelperData &HelperData = E->getHelper(I - 1);
3562 CGF.EmitIgnoredExpr(HelperData.CounterUpdate);
3563 // goto cont;
3564 CGF.EmitBranchThroughCleanup(ContDests[I - 1]);
3565 // exit:
3566 CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1);
3567 }
3568 }
3569};
3570} // namespace
3571
3572static std::pair<llvm::Value *, llvm::Value *>
3574 const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E);
3575 llvm::Value *Addr;
3576 if (OASE) {
3577 const Expr *Base = OASE->getBase();
3578 Addr = CGF.EmitScalarExpr(Base);
3579 } else {
3580 Addr = CGF.EmitLValue(E).getPointer(CGF);
3581 }
3582 llvm::Value *SizeVal;
3583 QualType Ty = E->getType();
3584 if (OASE) {
3585 SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType());
3586 for (const Expr *SE : OASE->getDimensions()) {
3587 llvm::Value *Sz = CGF.EmitScalarExpr(SE);
3588 Sz = CGF.EmitScalarConversion(
3589 Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc());
3590 SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz);
3591 }
3592 } else if (const auto *ASE =
3593 dyn_cast<ArraySectionExpr>(E->IgnoreParenImpCasts())) {
3594 LValue UpAddrLVal = CGF.EmitArraySectionExpr(ASE, /*IsLowerBound=*/false);
3595 Address UpAddrAddress = UpAddrLVal.getAddress();
3596 llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32(
3597 UpAddrAddress.getElementType(), UpAddrAddress.emitRawPointer(CGF),
3598 /*Idx0=*/1);
3599 llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy);
3600 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy);
3601 SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
3602 } else {
3603 SizeVal = CGF.getTypeSize(Ty);
3604 }
3605 return std::make_pair(Addr, SizeVal);
3606}
3607
3608/// Builds kmp_depend_info, if it is not built yet, and builds flags type.
3609static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) {
3610 QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false);
3611 if (KmpTaskAffinityInfoTy.isNull()) {
3612 RecordDecl *KmpAffinityInfoRD =
3613 C.buildImplicitRecord("kmp_task_affinity_info_t");
3614 KmpAffinityInfoRD->startDefinition();
3615 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType());
3616 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType());
3617 addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy);
3618 KmpAffinityInfoRD->completeDefinition();
3619 KmpTaskAffinityInfoTy = C.getCanonicalTagType(KmpAffinityInfoRD);
3620 }
3621}
3622
3626 llvm::Function *TaskFunction, QualType SharedsTy,
3627 Address Shareds, const OMPTaskDataTy &Data) {
3630 // Aggregate privates and sort them by the alignment.
3631 const auto *I = Data.PrivateCopies.begin();
3632 for (const Expr *E : Data.PrivateVars) {
3633 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3634 Privates.emplace_back(
3635 C.getDeclAlign(VD),
3636 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3637 /*PrivateElemInit=*/nullptr));
3638 ++I;
3639 }
3640 I = Data.FirstprivateCopies.begin();
3641 const auto *IElemInitRef = Data.FirstprivateInits.begin();
3642 for (const Expr *E : Data.FirstprivateVars) {
3643 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3644 Privates.emplace_back(
3645 C.getDeclAlign(VD),
3646 PrivateHelpersTy(
3647 E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3648 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
3649 ++I;
3650 ++IElemInitRef;
3651 }
3652 I = Data.LastprivateCopies.begin();
3653 for (const Expr *E : Data.LastprivateVars) {
3654 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3655 Privates.emplace_back(
3656 C.getDeclAlign(VD),
3657 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3658 /*PrivateElemInit=*/nullptr));
3659 ++I;
3660 }
3661 for (const VarDecl *VD : Data.PrivateLocals) {
3662 if (isAllocatableDecl(VD))
3663 Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD));
3664 else
3665 Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD));
3666 }
3667 llvm::stable_sort(Privates,
3668 [](const PrivateDataTy &L, const PrivateDataTy &R) {
3669 return L.first > R.first;
3670 });
3671 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
3672 // Build type kmp_routine_entry_t (if not built yet).
3673 emitKmpRoutineEntryT(KmpInt32Ty);
3674 // Build type kmp_task_t (if not built yet).
3675 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
3677 SavedKmpTaskloopTQTy = C.getCanonicalTagType(createKmpTaskTRecordDecl(
3678 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
3679 }
3681 } else {
3682 assert((D.getDirectiveKind() == OMPD_task ||
3683 isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
3684 isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
3685 "Expected taskloop, task or target directive");
3686 if (SavedKmpTaskTQTy.isNull()) {
3687 SavedKmpTaskTQTy = C.getCanonicalTagType(createKmpTaskTRecordDecl(
3688 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
3689 }
3691 }
3692 const auto *KmpTaskTQTyRD = KmpTaskTQTy->castAsRecordDecl();
3693 // Build particular struct kmp_task_t for the given task.
3694 const RecordDecl *KmpTaskTWithPrivatesQTyRD =
3696 CanQualType KmpTaskTWithPrivatesQTy =
3697 C.getCanonicalTagType(KmpTaskTWithPrivatesQTyRD);
3698 QualType KmpTaskTWithPrivatesPtrQTy =
3699 C.getPointerType(KmpTaskTWithPrivatesQTy);
3700 llvm::Type *KmpTaskTWithPrivatesPtrTy = CGF.Builder.getPtrTy(0);
3701 llvm::Value *KmpTaskTWithPrivatesTySize =
3702 CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
3703 QualType SharedsPtrTy = C.getPointerType(SharedsTy);
3704
3705 // Emit initial values for private copies (if any).
3706 llvm::Value *TaskPrivatesMap = nullptr;
3707 llvm::Type *TaskPrivatesMapTy =
3708 std::next(TaskFunction->arg_begin(), 3)->getType();
3709 if (!Privates.empty()) {
3710 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3711 TaskPrivatesMap =
3712 emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates);
3713 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3714 TaskPrivatesMap, TaskPrivatesMapTy);
3715 } else {
3716 TaskPrivatesMap = llvm::ConstantPointerNull::get(
3717 cast<llvm::PointerType>(TaskPrivatesMapTy));
3718 }
3719 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
3720 // kmp_task_t *tt);
3721 llvm::Function *TaskEntry = emitProxyTaskFunction(
3722 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
3723 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
3724 TaskPrivatesMap);
3725
3726 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
3727 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
3728 // kmp_routine_entry_t *task_entry);
3729 // Task flags. Format is taken from
3730 // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h,
3731 // description of kmp_tasking_flags struct.
3732 enum {
3733 TiedFlag = 0x1,
3734 FinalFlag = 0x2,
3735 DestructorsFlag = 0x8,
3736 PriorityFlag = 0x20,
3737 DetachableFlag = 0x40,
3738 };
3739 unsigned Flags = Data.Tied ? TiedFlag : 0;
3740 bool NeedsCleanup = false;
3741 if (!Privates.empty()) {
3742 NeedsCleanup =
3743 checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates);
3744 if (NeedsCleanup)
3745 Flags = Flags | DestructorsFlag;
3746 }
3747 if (Data.Priority.getInt())
3748 Flags = Flags | PriorityFlag;
3749 if (D.hasClausesOfKind<OMPDetachClause>())
3750 Flags = Flags | DetachableFlag;
3751 llvm::Value *TaskFlags =
3752 Data.Final.getPointer()
3753 ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
3754 CGF.Builder.getInt32(FinalFlag),
3755 CGF.Builder.getInt32(/*C=*/0))
3756 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
3757 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
3758 llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
3760 getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
3762 TaskEntry, KmpRoutineEntryPtrTy)};
3763 llvm::Value *NewTask;
3764 if (D.hasClausesOfKind<OMPNowaitClause>()) {
3765 // Check if we have any device clause associated with the directive.
3766 const Expr *Device = nullptr;
3767 if (auto *C = D.getSingleClause<OMPDeviceClause>())
3768 Device = C->getDevice();
3769 // Emit device ID if any otherwise use default value.
3770 llvm::Value *DeviceID;
3771 if (Device)
3772 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
3773 CGF.Int64Ty, /*isSigned=*/true);
3774 else
3775 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
3776 AllocArgs.push_back(DeviceID);
3777 NewTask = CGF.EmitRuntimeCall(
3778 OMPBuilder.getOrCreateRuntimeFunction(
3779 CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc),
3780 AllocArgs);
3781 } else {
3782 NewTask =
3783 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
3784 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc),
3785 AllocArgs);
3786 }
3787 // Emit detach clause initialization.
3788 // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
3789 // task_descriptor);
3790 if (const auto *DC = D.getSingleClause<OMPDetachClause>()) {
3791 const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts();
3792 LValue EvtLVal = CGF.EmitLValue(Evt);
3793
3794 // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
3795 // int gtid, kmp_task_t *task);
3796 llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc());
3797 llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc());
3798 Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false);
3799 llvm::Value *EvtVal = CGF.EmitRuntimeCall(
3800 OMPBuilder.getOrCreateRuntimeFunction(
3801 CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event),
3802 {Loc, Tid, NewTask});
3803 EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(),
3804 Evt->getExprLoc());
3805 CGF.EmitStoreOfScalar(EvtVal, EvtLVal);
3806 }
3807 // Process affinity clauses.
3808 if (D.hasClausesOfKind<OMPAffinityClause>()) {
3809 // Process list of affinity data.
3811 Address AffinitiesArray = Address::invalid();
3812 // Calculate number of elements to form the array of affinity data.
3813 llvm::Value *NumOfElements = nullptr;
3814 unsigned NumAffinities = 0;
3815 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
3816 if (const Expr *Modifier = C->getModifier()) {
3817 const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts());
3818 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
3819 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
3820 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
3821 NumOfElements =
3822 NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz;
3823 }
3824 } else {
3825 NumAffinities += C->varlist_size();
3826 }
3827 }
3829 // Fields ids in kmp_task_affinity_info record.
3830 enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags };
3831
3832 QualType KmpTaskAffinityInfoArrayTy;
3833 if (NumOfElements) {
3834 NumOfElements = CGF.Builder.CreateNUWAdd(
3835 llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements);
3836 auto *OVE = new (C) OpaqueValueExpr(
3837 Loc,
3838 C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0),
3839 VK_PRValue);
3840 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
3841 RValue::get(NumOfElements));
3842 KmpTaskAffinityInfoArrayTy = C.getVariableArrayType(
3844 /*IndexTypeQuals=*/0);
3845 // Properly emit variable-sized array.
3846 auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy,
3848 CGF.EmitVarDecl(*PD);
3849 AffinitiesArray = CGF.GetAddrOfLocalVar(PD);
3850 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
3851 /*isSigned=*/false);
3852 } else {
3853 KmpTaskAffinityInfoArrayTy = C.getConstantArrayType(
3855 llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr,
3856 ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
3857 AffinitiesArray =
3858 CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr");
3859 AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0);
3860 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities,
3861 /*isSigned=*/false);
3862 }
3863
3864 const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl();
3865 // Fill array by elements without iterators.
3866 unsigned Pos = 0;
3867 bool HasIterator = false;
3868 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
3869 if (C->getModifier()) {
3870 HasIterator = true;
3871 continue;
3872 }
3873 for (const Expr *E : C->varlist()) {
3874 llvm::Value *Addr;
3875 llvm::Value *Size;
3876 std::tie(Addr, Size) = getPointerAndSize(CGF, E);
3877 LValue Base =
3878 CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos),
3880 // affs[i].base_addr = &<Affinities[i].second>;
3881 LValue BaseAddrLVal = CGF.EmitLValueForField(
3882 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
3883 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
3884 BaseAddrLVal);
3885 // affs[i].len = sizeof(<Affinities[i].second>);
3886 LValue LenLVal = CGF.EmitLValueForField(
3887 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
3888 CGF.EmitStoreOfScalar(Size, LenLVal);
3889 ++Pos;
3890 }
3891 }
3892 LValue PosLVal;
3893 if (HasIterator) {
3894 PosLVal = CGF.MakeAddrLValue(
3895 CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"),
3896 C.getSizeType());
3897 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
3898 }
3899 // Process elements with iterators.
3900 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
3901 const Expr *Modifier = C->getModifier();
3902 if (!Modifier)
3903 continue;
3904 OMPIteratorGeneratorScope IteratorScope(
3905 CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()));
3906 for (const Expr *E : C->varlist()) {
3907 llvm::Value *Addr;
3908 llvm::Value *Size;
3909 std::tie(Addr, Size) = getPointerAndSize(CGF, E);
3910 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
3911 LValue Base =
3912 CGF.MakeAddrLValue(CGF.Builder.CreateGEP(CGF, AffinitiesArray, Idx),
3914 // affs[i].base_addr = &<Affinities[i].second>;
3915 LValue BaseAddrLVal = CGF.EmitLValueForField(
3916 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
3917 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
3918 BaseAddrLVal);
3919 // affs[i].len = sizeof(<Affinities[i].second>);
3920 LValue LenLVal = CGF.EmitLValueForField(
3921 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
3922 CGF.EmitStoreOfScalar(Size, LenLVal);
3923 Idx = CGF.Builder.CreateNUWAdd(
3924 Idx, llvm::ConstantInt::get(Idx->getType(), 1));
3925 CGF.EmitStoreOfScalar(Idx, PosLVal);
3926 }
3927 }
3928 // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref,
3929 // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32
3930 // naffins, kmp_task_affinity_info_t *affin_list);
3931 llvm::Value *LocRef = emitUpdateLocation(CGF, Loc);
3932 llvm::Value *GTid = getThreadID(CGF, Loc);
3933 llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3934 AffinitiesArray.emitRawPointer(CGF), CGM.VoidPtrTy);
3935 // FIXME: Emit the function and ignore its result for now unless the
3936 // runtime function is properly implemented.
3937 (void)CGF.EmitRuntimeCall(
3938 OMPBuilder.getOrCreateRuntimeFunction(
3939 CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity),
3940 {LocRef, GTid, NewTask, NumOfElements, AffinListPtr});
3941 }
3942 llvm::Value *NewTaskNewTaskTTy =
3944 NewTask, KmpTaskTWithPrivatesPtrTy);
3945 LValue Base = CGF.MakeNaturalAlignRawAddrLValue(NewTaskNewTaskTTy,
3946 KmpTaskTWithPrivatesQTy);
3947 LValue TDBase =
3948 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
3949 // Fill the data in the resulting kmp_task_t record.
3950 // Copy shareds if there are any.
3951 Address KmpTaskSharedsPtr = Address::invalid();
3952 if (!SharedsTy->castAsRecordDecl()->field_empty()) {
3953 KmpTaskSharedsPtr = Address(
3954 CGF.EmitLoadOfScalar(
3956 TDBase,
3957 *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds)),
3958 Loc),
3959 CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy));
3960 LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
3961 LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
3962 CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
3963 }
3964 // Emit initial values for private copies (if any).
3966 if (!Privates.empty()) {
3967 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
3968 SharedsTy, SharedsPtrTy, Data, Privates,
3969 /*ForDup=*/false);
3970 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
3971 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
3972 Result.TaskDupFn = emitTaskDupFunction(
3973 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
3974 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
3975 /*WithLastIter=*/!Data.LastprivateVars.empty());
3976 }
3977 }
3978 // Fields of union "kmp_cmplrdata_t" for destructors and priority.
3979 enum { Priority = 0, Destructors = 1 };
3980 // Provide pointer to function with destructors for privates.
3981 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
3982 const auto *KmpCmplrdataUD = (*FI)->getType()->castAsRecordDecl();
3983 assert(KmpCmplrdataUD->isUnion());
3984 if (NeedsCleanup) {
3985 llvm::Value *DestructorFn = emitDestructorsFunction(
3986 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
3987 KmpTaskTWithPrivatesQTy);
3988 LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
3989 LValue DestructorsLV = CGF.EmitLValueForField(
3990 Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
3992 DestructorFn, KmpRoutineEntryPtrTy),
3993 DestructorsLV);
3994 }
3995 // Set priority.
3996 if (Data.Priority.getInt()) {
3997 LValue Data2LV = CGF.EmitLValueForField(
3998 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
3999 LValue PriorityLV = CGF.EmitLValueForField(
4000 Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
4001 CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
4002 }
4003 Result.NewTask = NewTask;
4004 Result.TaskEntry = TaskEntry;
4005 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
4006 Result.TDBase = TDBase;
4007 Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
4008 return Result;
4009}
4010
4011/// Translates internal dependency kind into the runtime kind.
4013 RTLDependenceKindTy DepKind;
4014 switch (K) {
4015 case OMPC_DEPEND_in:
4016 DepKind = RTLDependenceKindTy::DepIn;
4017 break;
4018 // Out and InOut dependencies must use the same code.
4019 case OMPC_DEPEND_out:
4020 case OMPC_DEPEND_inout:
4021 DepKind = RTLDependenceKindTy::DepInOut;
4022 break;
4023 case OMPC_DEPEND_mutexinoutset:
4024 DepKind = RTLDependenceKindTy::DepMutexInOutSet;
4025 break;
4026 case OMPC_DEPEND_inoutset:
4027 DepKind = RTLDependenceKindTy::DepInOutSet;
4028 break;
4029 case OMPC_DEPEND_outallmemory:
4030 DepKind = RTLDependenceKindTy::DepOmpAllMem;
4031 break;
4032 case OMPC_DEPEND_source:
4033 case OMPC_DEPEND_sink:
4034 case OMPC_DEPEND_depobj:
4035 case OMPC_DEPEND_inoutallmemory:
4037 llvm_unreachable("Unknown task dependence type");
4038 }
4039 return DepKind;
4040}
4041
4042/// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4043static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,
4044 QualType &FlagsTy) {
4045 FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
4046 if (KmpDependInfoTy.isNull()) {
4047 RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
4048 KmpDependInfoRD->startDefinition();
4049 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
4050 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
4051 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
4052 KmpDependInfoRD->completeDefinition();
4053 KmpDependInfoTy = C.getCanonicalTagType(KmpDependInfoRD);
4054 }
4055}
4056
4057std::pair<llvm::Value *, LValue>
4061 QualType FlagsTy;
4062 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4063 auto *KmpDependInfoRD = KmpDependInfoTy->castAsRecordDecl();
4064 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4066 DepobjLVal.getAddress().withElementType(
4067 CGF.ConvertTypeForMem(KmpDependInfoPtrTy)),
4068 KmpDependInfoPtrTy->castAs<PointerType>());
4069 Address DepObjAddr = CGF.Builder.CreateGEP(
4070 CGF, Base.getAddress(),
4071 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4072 LValue NumDepsBase = CGF.MakeAddrLValue(
4073 DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo());
4074 // NumDeps = deps[i].base_addr;
4075 LValue BaseAddrLVal = CGF.EmitLValueForField(
4076 NumDepsBase,
4077 *std::next(KmpDependInfoRD->field_begin(),
4078 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4079 llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc);
4080 return std::make_pair(NumDeps, Base);
4081}
4082
4083static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4084 llvm::PointerUnion<unsigned *, LValue *> Pos,
4086 Address DependenciesArray) {
4087 CodeGenModule &CGM = CGF.CGM;
4088 ASTContext &C = CGM.getContext();
4089 QualType FlagsTy;
4090 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4091 auto *KmpDependInfoRD = KmpDependInfoTy->castAsRecordDecl();
4092 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4093
4094 OMPIteratorGeneratorScope IteratorScope(
4095 CGF, cast_or_null<OMPIteratorExpr>(
4096 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4097 : nullptr));
4098 for (const Expr *E : Data.DepExprs) {
4099 llvm::Value *Addr;
4100 llvm::Value *Size;
4101
4102 // The expression will be a nullptr in the 'omp_all_memory' case.
4103 if (E) {
4104 std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4105 Addr = CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy);
4106 } else {
4107 Addr = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4108 Size = llvm::ConstantInt::get(CGF.SizeTy, 0);
4109 }
4110 LValue Base;
4111 if (unsigned *P = dyn_cast<unsigned *>(Pos)) {
4112 Base = CGF.MakeAddrLValue(
4113 CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy);
4114 } else {
4115 assert(E && "Expected a non-null expression");
4116 LValue &PosLVal = *cast<LValue *>(Pos);
4117 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4118 Base = CGF.MakeAddrLValue(
4119 CGF.Builder.CreateGEP(CGF, DependenciesArray, Idx), KmpDependInfoTy);
4120 }
4121 // deps[i].base_addr = &<Dependencies[i].second>;
4122 LValue BaseAddrLVal = CGF.EmitLValueForField(
4123 Base,
4124 *std::next(KmpDependInfoRD->field_begin(),
4125 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4126 CGF.EmitStoreOfScalar(Addr, BaseAddrLVal);
4127 // deps[i].len = sizeof(<Dependencies[i].second>);
4128 LValue LenLVal = CGF.EmitLValueForField(
4129 Base, *std::next(KmpDependInfoRD->field_begin(),
4130 static_cast<unsigned int>(RTLDependInfoFields::Len)));
4131 CGF.EmitStoreOfScalar(Size, LenLVal);
4132 // deps[i].flags = <Dependencies[i].first>;
4133 RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind);
4134 LValue FlagsLVal = CGF.EmitLValueForField(
4135 Base,
4136 *std::next(KmpDependInfoRD->field_begin(),
4137 static_cast<unsigned int>(RTLDependInfoFields::Flags)));
4139 llvm::ConstantInt::get(LLVMFlagsTy, static_cast<unsigned int>(DepKind)),
4140 FlagsLVal);
4141 if (unsigned *P = dyn_cast<unsigned *>(Pos)) {
4142 ++(*P);
4143 } else {
4144 LValue &PosLVal = *cast<LValue *>(Pos);
4145 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4146 Idx = CGF.Builder.CreateNUWAdd(Idx,
4147 llvm::ConstantInt::get(Idx->getType(), 1));
4148 CGF.EmitStoreOfScalar(Idx, PosLVal);
4149 }
4150 }
4151}
4152
4154 CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4156 assert(Data.DepKind == OMPC_DEPEND_depobj &&
4157 "Expected depobj dependency kind.");
4159 SmallVector<LValue, 4> SizeLVals;
4160 ASTContext &C = CGF.getContext();
4161 {
4162 OMPIteratorGeneratorScope IteratorScope(
4163 CGF, cast_or_null<OMPIteratorExpr>(
4164 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4165 : nullptr));
4166 for (const Expr *E : Data.DepExprs) {
4167 llvm::Value *NumDeps;
4168 LValue Base;
4169 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4170 std::tie(NumDeps, Base) =
4171 getDepobjElements(CGF, DepobjLVal, E->getExprLoc());
4172 LValue NumLVal = CGF.MakeAddrLValue(
4173 CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"),
4174 C.getUIntPtrType());
4175 CGF.Builder.CreateStore(llvm::ConstantInt::get(CGF.IntPtrTy, 0),
4176 NumLVal.getAddress());
4177 llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc());
4178 llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps);
4179 CGF.EmitStoreOfScalar(Add, NumLVal);
4180 SizeLVals.push_back(NumLVal);
4181 }
4182 }
4183 for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) {
4184 llvm::Value *Size =
4185 CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc());
4186 Sizes.push_back(Size);
4187 }
4188 return Sizes;
4189}
4190
4192 QualType &KmpDependInfoTy,
4193 LValue PosLVal,
4195 Address DependenciesArray) {
4196 assert(Data.DepKind == OMPC_DEPEND_depobj &&
4197 "Expected depobj dependency kind.");
4198 llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy);
4199 {
4200 OMPIteratorGeneratorScope IteratorScope(
4201 CGF, cast_or_null<OMPIteratorExpr>(
4202 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4203 : nullptr));
4204 for (const Expr *E : Data.DepExprs) {
4205 llvm::Value *NumDeps;
4206 LValue Base;
4207 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4208 std::tie(NumDeps, Base) =
4209 getDepobjElements(CGF, DepobjLVal, E->getExprLoc());
4210
4211 // memcopy dependency data.
4212 llvm::Value *Size = CGF.Builder.CreateNUWMul(
4213 ElSize,
4214 CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false));
4215 llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4216 Address DepAddr = CGF.Builder.CreateGEP(CGF, DependenciesArray, Pos);
4217 CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(), Size);
4218
4219 // Increase pos.
4220 // pos += size;
4221 llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps);
4222 CGF.EmitStoreOfScalar(Add, PosLVal);
4223 }
4224 }
4225}
4226
4227std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
4230 if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) {
4231 return D.DepExprs.empty();
4232 }))
4233 return std::make_pair(nullptr, Address::invalid());
4234 // Process list of dependencies.
4236 Address DependenciesArray = Address::invalid();
4237 llvm::Value *NumOfElements = nullptr;
4238 unsigned NumDependencies = std::accumulate(
4239 Dependencies.begin(), Dependencies.end(), 0,
4240 [](unsigned V, const OMPTaskDataTy::DependData &D) {
4241 return D.DepKind == OMPC_DEPEND_depobj
4242 ? V
4243 : (V + (D.IteratorExpr ? 0 : D.DepExprs.size()));
4244 });
4245 QualType FlagsTy;
4246 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4247 bool HasDepobjDeps = false;
4248 bool HasRegularWithIterators = false;
4249 llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4250 llvm::Value *NumOfRegularWithIterators =
4251 llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4252 // Calculate number of depobj dependencies and regular deps with the
4253 // iterators.
4254 for (const OMPTaskDataTy::DependData &D : Dependencies) {
4255 if (D.DepKind == OMPC_DEPEND_depobj) {
4258 for (llvm::Value *Size : Sizes) {
4259 NumOfDepobjElements =
4260 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size);
4261 }
4262 HasDepobjDeps = true;
4263 continue;
4264 }
4265 // Include number of iterations, if any.
4266
4267 if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) {
4268 llvm::Value *ClauseIteratorSpace =
4269 llvm::ConstantInt::get(CGF.IntPtrTy, 1);
4270 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4271 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4272 Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false);
4273 ClauseIteratorSpace = CGF.Builder.CreateNUWMul(Sz, ClauseIteratorSpace);
4274 }
4275 llvm::Value *NumClauseDeps = CGF.Builder.CreateNUWMul(
4276 ClauseIteratorSpace,
4277 llvm::ConstantInt::get(CGF.IntPtrTy, D.DepExprs.size()));
4278 NumOfRegularWithIterators =
4279 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumClauseDeps);
4280 HasRegularWithIterators = true;
4281 continue;
4282 }
4283 }
4284
4285 QualType KmpDependInfoArrayTy;
4286 if (HasDepobjDeps || HasRegularWithIterators) {
4287 NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies,
4288 /*isSigned=*/false);
4289 if (HasDepobjDeps) {
4290 NumOfElements =
4291 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements);
4292 }
4293 if (HasRegularWithIterators) {
4294 NumOfElements =
4295 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements);
4296 }
4297 auto *OVE = new (C) OpaqueValueExpr(
4298 Loc, C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
4299 VK_PRValue);
4300 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
4301 RValue::get(NumOfElements));
4302 KmpDependInfoArrayTy =
4303 C.getVariableArrayType(KmpDependInfoTy, OVE, ArraySizeModifier::Normal,
4304 /*IndexTypeQuals=*/0);
4305 // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
4306 // Properly emit variable-sized array.
4307 auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy,
4309 CGF.EmitVarDecl(*PD);
4310 DependenciesArray = CGF.GetAddrOfLocalVar(PD);
4311 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4312 /*isSigned=*/false);
4313 } else {
4314 KmpDependInfoArrayTy = C.getConstantArrayType(
4315 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr,
4316 ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
4317 DependenciesArray =
4318 CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
4319 DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0);
4320 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies,
4321 /*isSigned=*/false);
4322 }
4323 unsigned Pos = 0;
4324 for (const OMPTaskDataTy::DependData &Dep : Dependencies) {
4325 if (Dep.DepKind == OMPC_DEPEND_depobj || Dep.IteratorExpr)
4326 continue;
4327 emitDependData(CGF, KmpDependInfoTy, &Pos, Dep, DependenciesArray);
4328 }
4329 // Copy regular dependencies with iterators.
4330 LValue PosLVal = CGF.MakeAddrLValue(
4331 CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType());
4332 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4333 for (const OMPTaskDataTy::DependData &Dep : Dependencies) {
4334 if (Dep.DepKind == OMPC_DEPEND_depobj || !Dep.IteratorExpr)
4335 continue;
4336 emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dep, DependenciesArray);
4337 }
4338 // Copy final depobj arrays without iterators.
4339 if (HasDepobjDeps) {
4340 for (const OMPTaskDataTy::DependData &Dep : Dependencies) {
4341 if (Dep.DepKind != OMPC_DEPEND_depobj)
4342 continue;
4343 emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dep, DependenciesArray);
4344 }
4345 }
4346 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4347 DependenciesArray, CGF.VoidPtrTy, CGF.Int8Ty);
4348 return std::make_pair(NumOfElements, DependenciesArray);
4349}
4350
4352 CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies,
4354 if (Dependencies.DepExprs.empty())
4355 return Address::invalid();
4356 // Process list of dependencies.
4358 Address DependenciesArray = Address::invalid();
4359 unsigned NumDependencies = Dependencies.DepExprs.size();
4360 QualType FlagsTy;
4361 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4362 auto *KmpDependInfoRD = KmpDependInfoTy->castAsRecordDecl();
4363
4364 llvm::Value *Size;
4365 // Define type kmp_depend_info[<Dependencies.size()>];
4366 // For depobj reserve one extra element to store the number of elements.
4367 // It is required to handle depobj(x) update(in) construct.
4368 // kmp_depend_info[<Dependencies.size()>] deps;
4369 llvm::Value *NumDepsVal;
4370 CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy);
4371 if (const auto *IE =
4372 cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) {
4373 NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1);
4374 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4375 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4376 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4377 NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz);
4378 }
4379 Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1),
4380 NumDepsVal);
4381 CharUnits SizeInBytes =
4382 C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align);
4383 llvm::Value *RecSize = CGM.getSize(SizeInBytes);
4384 Size = CGF.Builder.CreateNUWMul(Size, RecSize);
4385 NumDepsVal =
4386 CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false);
4387 } else {
4388 QualType KmpDependInfoArrayTy = C.getConstantArrayType(
4389 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1),
4390 nullptr, ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
4391 CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy);
4392 Size = CGM.getSize(Sz.alignTo(Align));
4393 NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies);
4394 }
4395 // Need to allocate on the dynamic memory.
4396 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4397 // Use default allocator.
4398 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4399 llvm::Value *Args[] = {ThreadID, Size, Allocator};
4400
4401 llvm::Value *Addr =
4402 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4403 CGM.getModule(), OMPRTL___kmpc_alloc),
4404 Args, ".dep.arr.addr");
4405 llvm::Type *KmpDependInfoLlvmTy = CGF.ConvertTypeForMem(KmpDependInfoTy);
4407 Addr, CGF.Builder.getPtrTy(0));
4408 DependenciesArray = Address(Addr, KmpDependInfoLlvmTy, Align);
4409 // Write number of elements in the first element of array for depobj.
4410 LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy);
4411 // deps[i].base_addr = NumDependencies;
4412 LValue BaseAddrLVal = CGF.EmitLValueForField(
4413 Base,
4414 *std::next(KmpDependInfoRD->field_begin(),
4415 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4416 CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal);
4417 llvm::PointerUnion<unsigned *, LValue *> Pos;
4418 unsigned Idx = 1;
4419 LValue PosLVal;
4420 if (Dependencies.IteratorExpr) {
4421 PosLVal = CGF.MakeAddrLValue(
4422 CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"),
4423 C.getSizeType());
4424 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal,
4425 /*IsInit=*/true);
4426 Pos = &PosLVal;
4427 } else {
4428 Pos = &Idx;
4429 }
4430 emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray);
4431 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4432 CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy,
4433 CGF.Int8Ty);
4434 return DependenciesArray;
4435}
4436
4440 QualType FlagsTy;
4441 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4442 LValue Base = CGF.EmitLoadOfPointerLValue(DepobjLVal.getAddress(),
4443 C.VoidPtrTy.castAs<PointerType>());
4444 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4446 Base.getAddress(), CGF.ConvertTypeForMem(KmpDependInfoPtrTy),
4448 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4449 Addr.getElementType(), Addr.emitRawPointer(CGF),
4450 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4451 DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr,
4452 CGF.VoidPtrTy);
4453 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4454 // Use default allocator.
4455 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4456 llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator};
4457
4458 // _kmpc_free(gtid, addr, nullptr);
4459 (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4460 CGM.getModule(), OMPRTL___kmpc_free),
4461 Args);
4462}
4463
4465 OpenMPDependClauseKind NewDepKind,
4468 QualType FlagsTy;
4469 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4470 auto *KmpDependInfoRD = KmpDependInfoTy->castAsRecordDecl();
4471 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4472 llvm::Value *NumDeps;
4473 LValue Base;
4474 std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc);
4475
4476 Address Begin = Base.getAddress();
4477 // Cast from pointer to array type to pointer to single element.
4478 llvm::Value *End = CGF.Builder.CreateGEP(Begin.getElementType(),
4479 Begin.emitRawPointer(CGF), NumDeps);
4480 // The basic structure here is a while-do loop.
4481 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body");
4482 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done");
4483 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
4484 CGF.EmitBlock(BodyBB);
4485 llvm::PHINode *ElementPHI =
4486 CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast");
4487 ElementPHI->addIncoming(Begin.emitRawPointer(CGF), EntryBB);
4488 Begin = Begin.withPointer(ElementPHI, KnownNonNull);
4489 Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(),
4490 Base.getTBAAInfo());
4491 // deps[i].flags = NewDepKind;
4492 RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind);
4493 LValue FlagsLVal = CGF.EmitLValueForField(
4494 Base, *std::next(KmpDependInfoRD->field_begin(),
4495 static_cast<unsigned int>(RTLDependInfoFields::Flags)));
4497 llvm::ConstantInt::get(LLVMFlagsTy, static_cast<unsigned int>(DepKind)),
4498 FlagsLVal);
4499
4500 // Shift the address forward by one element.
4501 llvm::Value *ElementNext =
4502 CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext")
4503 .emitRawPointer(CGF);
4504 ElementPHI->addIncoming(ElementNext, CGF.Builder.GetInsertBlock());
4505 llvm::Value *IsEmpty =
4506 CGF.Builder.CreateICmpEQ(ElementNext, End, "omp.isempty");
4507 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
4508 // Done.
4509 CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
4510}
4511
4514 llvm::Function *TaskFunction,
4515 QualType SharedsTy, Address Shareds,
4516 const Expr *IfCond,
4517 const OMPTaskDataTy &Data) {
4518 if (!CGF.HaveInsertPoint())
4519 return;
4520
4522 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
4523 llvm::Value *NewTask = Result.NewTask;
4524 llvm::Function *TaskEntry = Result.TaskEntry;
4525 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
4526 LValue TDBase = Result.TDBase;
4527 const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
4528 // Process list of dependences.
4529 Address DependenciesArray = Address::invalid();
4530 llvm::Value *NumOfElements;
4531 std::tie(NumOfElements, DependenciesArray) =
4532 emitDependClause(CGF, Data.Dependences, Loc);
4533
4534 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
4535 // libcall.
4536 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
4537 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
4538 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
4539 // list is not empty
4540 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4541 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
4542 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
4543 llvm::Value *DepTaskArgs[7];
4544 if (!Data.Dependences.empty()) {
4545 DepTaskArgs[0] = UpLoc;
4546 DepTaskArgs[1] = ThreadID;
4547 DepTaskArgs[2] = NewTask;
4548 DepTaskArgs[3] = NumOfElements;
4549 DepTaskArgs[4] = DependenciesArray.emitRawPointer(CGF);
4550 DepTaskArgs[5] = CGF.Builder.getInt32(0);
4551 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4552 }
4553 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs,
4554 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
4555 if (!Data.Tied) {
4556 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
4557 LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
4558 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
4559 }
4560 if (!Data.Dependences.empty()) {
4561 CGF.EmitRuntimeCall(
4562 OMPBuilder.getOrCreateRuntimeFunction(
4563 CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps),
4564 DepTaskArgs);
4565 } else {
4566 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4567 CGM.getModule(), OMPRTL___kmpc_omp_task),
4568 TaskArgs);
4569 }
4570 // Check if parent region is untied and build return for untied task;
4571 if (auto *Region =
4572 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
4573 Region->emitUntiedSwitch(CGF);
4574 };
4575
4576 llvm::Value *DepWaitTaskArgs[7];
4577 if (!Data.Dependences.empty()) {
4578 DepWaitTaskArgs[0] = UpLoc;
4579 DepWaitTaskArgs[1] = ThreadID;
4580 DepWaitTaskArgs[2] = NumOfElements;
4581 DepWaitTaskArgs[3] = DependenciesArray.emitRawPointer(CGF);
4582 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
4583 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4584 DepWaitTaskArgs[6] =
4585 llvm::ConstantInt::get(CGF.Int32Ty, Data.HasNowaitClause);
4586 }
4587 auto &M = CGM.getModule();
4588 auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy,
4589 TaskEntry, &Data, &DepWaitTaskArgs,
4591 CodeGenFunction::RunCleanupsScope LocalScope(CGF);
4592 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
4593 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
4594 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
4595 // is specified.
4596 if (!Data.Dependences.empty())
4597 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4598 M, OMPRTL___kmpc_omp_taskwait_deps_51),
4599 DepWaitTaskArgs);
4600 // Call proxy_task_entry(gtid, new_task);
4601 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
4602 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
4603 Action.Enter(CGF);
4604 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
4605 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
4606 OutlinedFnArgs);
4607 };
4608
4609 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
4610 // kmp_task_t *new_task);
4611 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
4612 // kmp_task_t *new_task);
4613 RegionCodeGenTy RCG(CodeGen);
4614 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
4615 M, OMPRTL___kmpc_omp_task_begin_if0),
4616 TaskArgs,
4617 OMPBuilder.getOrCreateRuntimeFunction(
4618 M, OMPRTL___kmpc_omp_task_complete_if0),
4619 TaskArgs);
4620 RCG.setAction(Action);
4621 RCG(CGF);
4622 };
4623
4624 if (IfCond) {
4625 emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
4626 } else {
4627 RegionCodeGenTy ThenRCG(ThenCodeGen);
4628 ThenRCG(CGF);
4629 }
4630}
4631
4633 const OMPLoopDirective &D,
4634 llvm::Function *TaskFunction,
4635 QualType SharedsTy, Address Shareds,
4636 const Expr *IfCond,
4637 const OMPTaskDataTy &Data) {
4638 if (!CGF.HaveInsertPoint())
4639 return;
4641 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
4642 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
4643 // libcall.
4644 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
4645 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
4646 // sched, kmp_uint64 grainsize, void *task_dup);
4647 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4648 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
4649 llvm::Value *IfVal;
4650 if (IfCond) {
4651 IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
4652 /*isSigned=*/true);
4653 } else {
4654 IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
4655 }
4656
4657 LValue LBLVal = CGF.EmitLValueForField(
4658 Result.TDBase,
4659 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
4660 const auto *LBVar =
4661 cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
4662 CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(), LBLVal.getQuals(),
4663 /*IsInitializer=*/true);
4664 LValue UBLVal = CGF.EmitLValueForField(
4665 Result.TDBase,
4666 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
4667 const auto *UBVar =
4668 cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
4669 CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(), UBLVal.getQuals(),
4670 /*IsInitializer=*/true);
4671 LValue StLVal = CGF.EmitLValueForField(
4672 Result.TDBase,
4673 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
4674 const auto *StVar =
4675 cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
4676 CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(), StLVal.getQuals(),
4677 /*IsInitializer=*/true);
4678 // Store reductions address.
4679 LValue RedLVal = CGF.EmitLValueForField(
4680 Result.TDBase,
4681 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
4682 if (Data.Reductions) {
4683 CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
4684 } else {
4685 CGF.EmitNullInitialization(RedLVal.getAddress(),
4686 CGF.getContext().VoidPtrTy);
4687 }
4688 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
4690 UpLoc,
4691 ThreadID,
4692 Result.NewTask,
4693 IfVal,
4694 LBLVal.getPointer(CGF),
4695 UBLVal.getPointer(CGF),
4696 CGF.EmitLoadOfScalar(StLVal, Loc),
4697 llvm::ConstantInt::getSigned(
4698 CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
4699 llvm::ConstantInt::getSigned(
4700 CGF.IntTy, Data.Schedule.getPointer()
4701 ? Data.Schedule.getInt() ? NumTasks : Grainsize
4702 : NoSchedule),
4703 Data.Schedule.getPointer()
4704 ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
4705 /*isSigned=*/false)
4706 : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0)};
4707 if (Data.HasModifier)
4708 TaskArgs.push_back(llvm::ConstantInt::get(CGF.Int32Ty, 1));
4709
4710 TaskArgs.push_back(Result.TaskDupFn
4712 Result.TaskDupFn, CGF.VoidPtrTy)
4713 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy));
4714 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4715 CGM.getModule(), Data.HasModifier
4716 ? OMPRTL___kmpc_taskloop_5
4717 : OMPRTL___kmpc_taskloop),
4718 TaskArgs);
4719}
4720
4721/// Emit reduction operation for each element of array (required for
4722/// array sections) LHS op = RHS.
4723/// \param Type Type of array.
4724/// \param LHSVar Variable on the left side of the reduction operation
4725/// (references element of array in original variable).
4726/// \param RHSVar Variable on the right side of the reduction operation
4727/// (references element of array in original variable).
4728/// \param RedOpGen Generator of reduction operation with use of LHSVar and
4729/// RHSVar.
4731 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
4732 const VarDecl *RHSVar,
4733 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
4734 const Expr *, const Expr *)> &RedOpGen,
4735 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
4736 const Expr *UpExpr = nullptr) {
4737 // Perform element-by-element initialization.
4738 QualType ElementTy;
4739 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
4740 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
4741
4742 // Drill down to the base element type on both arrays.
4743 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
4744 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
4745
4746 llvm::Value *RHSBegin = RHSAddr.emitRawPointer(CGF);
4747 llvm::Value *LHSBegin = LHSAddr.emitRawPointer(CGF);
4748 // Cast from pointer to array type to pointer to single element.
4749 llvm::Value *LHSEnd =
4750 CGF.Builder.CreateGEP(LHSAddr.getElementType(), LHSBegin, NumElements);
4751 // The basic structure here is a while-do loop.
4752 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
4753 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
4754 llvm::Value *IsEmpty =
4755 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
4756 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
4757
4758 // Enter the loop body, making that address the current address.
4759 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
4760 CGF.EmitBlock(BodyBB);
4761
4762 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
4763
4764 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
4765 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
4766 RHSElementPHI->addIncoming(RHSBegin, EntryBB);
4767 Address RHSElementCurrent(
4768 RHSElementPHI, RHSAddr.getElementType(),
4769 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
4770
4771 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
4772 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
4773 LHSElementPHI->addIncoming(LHSBegin, EntryBB);
4774 Address LHSElementCurrent(
4775 LHSElementPHI, LHSAddr.getElementType(),
4776 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
4777
4778 // Emit copy.
4780 Scope.addPrivate(LHSVar, LHSElementCurrent);
4781 Scope.addPrivate(RHSVar, RHSElementCurrent);
4782 Scope.Privatize();
4783 RedOpGen(CGF, XExpr, EExpr, UpExpr);
4784 Scope.ForceCleanup();
4785
4786 // Shift the address forward by one element.
4787 llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
4788 LHSAddr.getElementType(), LHSElementPHI, /*Idx0=*/1,
4789 "omp.arraycpy.dest.element");
4790 llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
4791 RHSAddr.getElementType(), RHSElementPHI, /*Idx0=*/1,
4792 "omp.arraycpy.src.element");
4793 // Check whether we've reached the end.
4794 llvm::Value *Done =
4795 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
4796 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
4797 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
4798 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
4799
4800 // Done.
4801 CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
4802}
4803
4804/// Emit reduction combiner. If the combiner is a simple expression emit it as
4805/// is, otherwise consider it as combiner of UDR decl and emit it as a call of
4806/// UDR combiner function.
4808 const Expr *ReductionOp) {
4809 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
4810 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
4811 if (const auto *DRE =
4812 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
4813 if (const auto *DRD =
4814 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
4815 std::pair<llvm::Function *, llvm::Function *> Reduction =
4819 CGF.EmitIgnoredExpr(ReductionOp);
4820 return;
4821 }
4822 CGF.EmitIgnoredExpr(ReductionOp);
4823}
4824
4826 StringRef ReducerName, SourceLocation Loc, llvm::Type *ArgsElemType,
4828 ArrayRef<const Expr *> RHSExprs, ArrayRef<const Expr *> ReductionOps) {
4830
4831 // void reduction_func(void *LHSArg, void *RHSArg);
4832 FunctionArgList Args;
4833 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
4835 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
4837 Args.push_back(&LHSArg);
4838 Args.push_back(&RHSArg);
4839 const auto &CGFI =
4841 std::string Name = getReductionFuncName(ReducerName);
4842 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
4843 llvm::GlobalValue::InternalLinkage, Name,
4844 &CGM.getModule());
4846 Fn->setDoesNotRecurse();
4847 CodeGenFunction CGF(CGM);
4848 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
4849
4850 // Dst = (void*[n])(LHSArg);
4851 // Src = (void*[n])(RHSArg);
4853 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
4854 CGF.Builder.getPtrTy(0)),
4855 ArgsElemType, CGF.getPointerAlign());
4857 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
4858 CGF.Builder.getPtrTy(0)),
4859 ArgsElemType, CGF.getPointerAlign());
4860
4861 // ...
4862 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
4863 // ...
4865 const auto *IPriv = Privates.begin();
4866 unsigned Idx = 0;
4867 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
4868 const auto *RHSVar =
4869 cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
4870 Scope.addPrivate(RHSVar, emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar));
4871 const auto *LHSVar =
4872 cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
4873 Scope.addPrivate(LHSVar, emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar));
4874 QualType PrivTy = (*IPriv)->getType();
4875 if (PrivTy->isVariablyModifiedType()) {
4876 // Get array size and emit VLA type.
4877 ++Idx;
4878 Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
4879 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
4880 const VariableArrayType *VLA =
4881 CGF.getContext().getAsVariableArrayType(PrivTy);
4882 const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
4884 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
4885 CGF.EmitVariablyModifiedType(PrivTy);
4886 }
4887 }
4888 Scope.Privatize();
4889 IPriv = Privates.begin();
4890 const auto *ILHS = LHSExprs.begin();
4891 const auto *IRHS = RHSExprs.begin();
4892 for (const Expr *E : ReductionOps) {
4893 if ((*IPriv)->getType()->isArrayType()) {
4894 // Emit reduction for array section.
4895 const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
4896 const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
4898 CGF, (*IPriv)->getType(), LHSVar, RHSVar,
4899 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
4900 emitReductionCombiner(CGF, E);
4901 });
4902 } else {
4903 // Emit reduction for array subscript or single variable.
4905 }
4906 ++IPriv;
4907 ++ILHS;
4908 ++IRHS;
4909 }
4910 Scope.ForceCleanup();
4911 CGF.FinishFunction();
4912 return Fn;
4913}
4914
4916 const Expr *ReductionOp,
4917 const Expr *PrivateRef,
4918 const DeclRefExpr *LHS,
4919 const DeclRefExpr *RHS) {
4920 if (PrivateRef->getType()->isArrayType()) {
4921 // Emit reduction for array section.
4922 const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
4923 const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
4925 CGF, PrivateRef->getType(), LHSVar, RHSVar,
4926 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
4927 emitReductionCombiner(CGF, ReductionOp);
4928 });
4929 } else {
4930 // Emit reduction for array subscript or single variable.
4931 emitReductionCombiner(CGF, ReductionOp);
4932 }
4933}
4934
4935static std::string generateUniqueName(CodeGenModule &CGM,
4936 llvm::StringRef Prefix, const Expr *Ref);
4937
4939 CodeGenFunction &CGF, SourceLocation Loc, const Expr *Privates,
4940 const Expr *LHSExprs, const Expr *RHSExprs, const Expr *ReductionOps) {
4941
4942 // Create a shared global variable (__shared_reduction_var) to accumulate the
4943 // final result.
4944 //
4945 // Call __kmpc_barrier to synchronize threads before initialization.
4946 //
4947 // The master thread (thread_id == 0) initializes __shared_reduction_var
4948 // with the identity value or initializer.
4949 //
4950 // Call __kmpc_barrier to synchronize before combining.
4951 // For each i:
4952 // - Thread enters critical section.
4953 // - Reads its private value from LHSExprs[i].
4954 // - Updates __shared_reduction_var[i] = RedOp_i(__shared_reduction_var[i],
4955 // Privates[i]).
4956 // - Exits critical section.
4957 //
4958 // Call __kmpc_barrier after combining.
4959 //
4960 // Each thread copies __shared_reduction_var[i] back to RHSExprs[i].
4961 //
4962 // Final __kmpc_barrier to synchronize after broadcasting
4963 QualType PrivateType = Privates->getType();
4964 llvm::Type *LLVMType = CGF.ConvertTypeForMem(PrivateType);
4965
4966 const OMPDeclareReductionDecl *UDR = getReductionInit(ReductionOps);
4967 std::string ReductionVarNameStr;
4968 if (const auto *DRE = dyn_cast<DeclRefExpr>(Privates->IgnoreParenCasts()))
4969 ReductionVarNameStr =
4970 generateUniqueName(CGM, DRE->getDecl()->getNameAsString(), Privates);
4971 else
4972 ReductionVarNameStr = "unnamed_priv_var";
4973
4974 // Create an internal shared variable
4975 std::string SharedName =
4976 CGM.getOpenMPRuntime().getName({"internal_pivate_", ReductionVarNameStr});
4977 llvm::GlobalVariable *SharedVar = OMPBuilder.getOrCreateInternalVariable(
4978 LLVMType, ".omp.reduction." + SharedName);
4979
4980 SharedVar->setAlignment(
4981 llvm::MaybeAlign(CGF.getContext().getTypeAlign(PrivateType) / 8));
4982
4983 Address SharedResult =
4984 CGF.MakeNaturalAlignRawAddrLValue(SharedVar, PrivateType).getAddress();
4985
4986 llvm::Value *ThreadId = getThreadID(CGF, Loc);
4987 llvm::Value *BarrierLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
4988 llvm::Value *BarrierArgs[] = {BarrierLoc, ThreadId};
4989
4990 llvm::BasicBlock *InitBB = CGF.createBasicBlock("init");
4991 llvm::BasicBlock *InitEndBB = CGF.createBasicBlock("init.end");
4992
4993 llvm::Value *IsWorker = CGF.Builder.CreateICmpEQ(
4994 ThreadId, llvm::ConstantInt::get(ThreadId->getType(), 0));
4995 CGF.Builder.CreateCondBr(IsWorker, InitBB, InitEndBB);
4996
4997 CGF.EmitBlock(InitBB);
4998
4999 auto EmitSharedInit = [&]() {
5000 if (UDR) { // Check if it's a User-Defined Reduction
5001 if (const Expr *UDRInitExpr = UDR->getInitializer()) {
5002 std::pair<llvm::Function *, llvm::Function *> FnPair =
5004 llvm::Function *InitializerFn = FnPair.second;
5005 if (InitializerFn) {
5006 if (const auto *CE =
5007 dyn_cast<CallExpr>(UDRInitExpr->IgnoreParenImpCasts())) {
5008 const auto *OutDRE = cast<DeclRefExpr>(
5009 cast<UnaryOperator>(CE->getArg(0)->IgnoreParenImpCasts())
5010 ->getSubExpr());
5011 const VarDecl *OutVD = cast<VarDecl>(OutDRE->getDecl());
5012
5013 CodeGenFunction::OMPPrivateScope LocalScope(CGF);
5014 LocalScope.addPrivate(OutVD, SharedResult);
5015
5016 (void)LocalScope.Privatize();
5017 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(
5018 CE->getCallee()->IgnoreParenImpCasts())) {
5020 CGF, OVE, RValue::get(InitializerFn));
5021 CGF.EmitIgnoredExpr(CE);
5022 } else {
5023 CGF.EmitAnyExprToMem(UDRInitExpr, SharedResult,
5024 PrivateType.getQualifiers(),
5025 /*IsInitializer=*/true);
5026 }
5027 } else {
5028 CGF.EmitAnyExprToMem(UDRInitExpr, SharedResult,
5029 PrivateType.getQualifiers(),
5030 /*IsInitializer=*/true);
5031 }
5032 } else {
5033 CGF.EmitAnyExprToMem(UDRInitExpr, SharedResult,
5034 PrivateType.getQualifiers(),
5035 /*IsInitializer=*/true);
5036 }
5037 } else {
5038 // EmitNullInitialization handles default construction for C++ classes
5039 // and zeroing for scalars, which is a reasonable default.
5040 CGF.EmitNullInitialization(SharedResult, PrivateType);
5041 }
5042 return; // UDR initialization handled
5043 }
5044 if (const auto *DRE = dyn_cast<DeclRefExpr>(Privates)) {
5045 if (const auto *VD = dyn_cast<VarDecl>(DRE->getDecl())) {
5046 if (const Expr *InitExpr = VD->getInit()) {
5047 CGF.EmitAnyExprToMem(InitExpr, SharedResult,
5048 PrivateType.getQualifiers(), true);
5049 return;
5050 }
5051 }
5052 }
5053 CGF.EmitNullInitialization(SharedResult, PrivateType);
5054 };
5055 EmitSharedInit();
5056 CGF.Builder.CreateBr(InitEndBB);
5057 CGF.EmitBlock(InitEndBB);
5058
5059 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5060 CGM.getModule(), OMPRTL___kmpc_barrier),
5061 BarrierArgs);
5062
5063 const Expr *ReductionOp = ReductionOps;
5064 const OMPDeclareReductionDecl *CurrentUDR = getReductionInit(ReductionOp);
5065 LValue SharedLV = CGF.MakeAddrLValue(SharedResult, PrivateType);
5066 LValue LHSLV = CGF.EmitLValue(Privates);
5067
5068 auto EmitCriticalReduction = [&](auto ReductionGen) {
5069 std::string CriticalName = getName({"reduction_critical"});
5070 emitCriticalRegion(CGF, CriticalName, ReductionGen, Loc);
5071 };
5072
5073 if (CurrentUDR) {
5074 // Handle user-defined reduction.
5075 auto ReductionGen = [&](CodeGenFunction &CGF, PrePostActionTy &Action) {
5076 Action.Enter(CGF);
5077 std::pair<llvm::Function *, llvm::Function *> FnPair =
5078 getUserDefinedReduction(CurrentUDR);
5079 if (FnPair.first) {
5080 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) {
5081 const auto *OutDRE = cast<DeclRefExpr>(
5082 cast<UnaryOperator>(CE->getArg(0)->IgnoreParenImpCasts())
5083 ->getSubExpr());
5084 const auto *InDRE = cast<DeclRefExpr>(
5085 cast<UnaryOperator>(CE->getArg(1)->IgnoreParenImpCasts())
5086 ->getSubExpr());
5087 CodeGenFunction::OMPPrivateScope LocalScope(CGF);
5088 LocalScope.addPrivate(cast<VarDecl>(OutDRE->getDecl()),
5089 SharedLV.getAddress());
5090 LocalScope.addPrivate(cast<VarDecl>(InDRE->getDecl()),
5091 LHSLV.getAddress());
5092 (void)LocalScope.Privatize();
5093 emitReductionCombiner(CGF, ReductionOp);
5094 }
5095 }
5096 };
5097 EmitCriticalReduction(ReductionGen);
5098 } else {
5099 // Handle built-in reduction operations.
5100#ifndef NDEBUG
5101 const Expr *ReductionClauseExpr = ReductionOp->IgnoreParenCasts();
5102 if (const auto *Cleanup = dyn_cast<ExprWithCleanups>(ReductionClauseExpr))
5103 ReductionClauseExpr = Cleanup->getSubExpr()->IgnoreParenCasts();
5104
5105 const Expr *AssignRHS = nullptr;
5106 if (const auto *BinOp = dyn_cast<BinaryOperator>(ReductionClauseExpr)) {
5107 if (BinOp->getOpcode() == BO_Assign)
5108 AssignRHS = BinOp->getRHS();
5109 } else if (const auto *OpCall =
5110 dyn_cast<CXXOperatorCallExpr>(ReductionClauseExpr)) {
5111 if (OpCall->getOperator() == OO_Equal)
5112 AssignRHS = OpCall->getArg(1);
5113 }
5114
5115 assert(AssignRHS &&
5116 "Private Variable Reduction : Invalid ReductionOp expression");
5117#endif
5118
5119 auto ReductionGen = [&](CodeGenFunction &CGF, PrePostActionTy &Action) {
5120 Action.Enter(CGF);
5121 const auto *OmpOutDRE =
5122 dyn_cast<DeclRefExpr>(LHSExprs->IgnoreParenImpCasts());
5123 const auto *OmpInDRE =
5124 dyn_cast<DeclRefExpr>(RHSExprs->IgnoreParenImpCasts());
5125 assert(
5126 OmpOutDRE && OmpInDRE &&
5127 "Private Variable Reduction : LHSExpr/RHSExpr must be DeclRefExprs");
5128 const VarDecl *OmpOutVD = cast<VarDecl>(OmpOutDRE->getDecl());
5129 const VarDecl *OmpInVD = cast<VarDecl>(OmpInDRE->getDecl());
5130 CodeGenFunction::OMPPrivateScope LocalScope(CGF);
5131 LocalScope.addPrivate(OmpOutVD, SharedLV.getAddress());
5132 LocalScope.addPrivate(OmpInVD, LHSLV.getAddress());
5133 (void)LocalScope.Privatize();
5134 // Emit the actual reduction operation
5135 CGF.EmitIgnoredExpr(ReductionOp);
5136 };
5137 EmitCriticalReduction(ReductionGen);
5138 }
5139
5140 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5141 CGM.getModule(), OMPRTL___kmpc_barrier),
5142 BarrierArgs);
5143
5144 // Broadcast final result
5145 bool IsAggregate = PrivateType->isAggregateType();
5146 LValue SharedLV1 = CGF.MakeAddrLValue(SharedResult, PrivateType);
5147 llvm::Value *FinalResultVal = nullptr;
5148 Address FinalResultAddr = Address::invalid();
5149
5150 if (IsAggregate)
5151 FinalResultAddr = SharedResult;
5152 else
5153 FinalResultVal = CGF.EmitLoadOfScalar(SharedLV1, Loc);
5154
5155 LValue TargetLHSLV = CGF.EmitLValue(RHSExprs);
5156 if (IsAggregate) {
5157 CGF.EmitAggregateCopy(TargetLHSLV,
5158 CGF.MakeAddrLValue(FinalResultAddr, PrivateType),
5159 PrivateType, AggValueSlot::DoesNotOverlap, false);
5160 } else {
5161 CGF.EmitStoreOfScalar(FinalResultVal, TargetLHSLV);
5162 }
5163 // Final synchronization barrier
5164 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5165 CGM.getModule(), OMPRTL___kmpc_barrier),
5166 BarrierArgs);
5167
5168 // Combiner with original list item
5169 auto OriginalListCombiner = [&](CodeGenFunction &CGF,
5170 PrePostActionTy &Action) {
5171 Action.Enter(CGF);
5172 emitSingleReductionCombiner(CGF, ReductionOps, Privates,
5173 cast<DeclRefExpr>(LHSExprs),
5174 cast<DeclRefExpr>(RHSExprs));
5175 };
5176 EmitCriticalReduction(OriginalListCombiner);
5177}
5178
5180 ArrayRef<const Expr *> OrgPrivates,
5181 ArrayRef<const Expr *> OrgLHSExprs,
5182 ArrayRef<const Expr *> OrgRHSExprs,
5183 ArrayRef<const Expr *> OrgReductionOps,
5184 ReductionOptionsTy Options) {
5185 if (!CGF.HaveInsertPoint())
5186 return;
5187
5188 bool WithNowait = Options.WithNowait;
5189 bool SimpleReduction = Options.SimpleReduction;
5190
5191 // Next code should be emitted for reduction:
5192 //
5193 // static kmp_critical_name lock = { 0 };
5194 //
5195 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
5196 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
5197 // ...
5198 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
5199 // *(Type<n>-1*)rhs[<n>-1]);
5200 // }
5201 //
5202 // ...
5203 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
5204 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5205 // RedList, reduce_func, &<lock>)) {
5206 // case 1:
5207 // ...
5208 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5209 // ...
5210 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5211 // break;
5212 // case 2:
5213 // ...
5214 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5215 // ...
5216 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
5217 // break;
5218 // default:;
5219 // }
5220 //
5221 // if SimpleReduction is true, only the next code is generated:
5222 // ...
5223 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5224 // ...
5225
5227
5228 if (SimpleReduction) {
5230 const auto *IPriv = OrgPrivates.begin();
5231 const auto *ILHS = OrgLHSExprs.begin();
5232 const auto *IRHS = OrgRHSExprs.begin();
5233 for (const Expr *E : OrgReductionOps) {
5234 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5235 cast<DeclRefExpr>(*IRHS));
5236 ++IPriv;
5237 ++ILHS;
5238 ++IRHS;
5239 }
5240 return;
5241 }
5242
5243 // Filter out shared reduction variables based on IsPrivateVarReduction flag.
5244 // Only keep entries where the corresponding variable is not private.
5245 SmallVector<const Expr *> FilteredPrivates, FilteredLHSExprs,
5246 FilteredRHSExprs, FilteredReductionOps;
5247 for (unsigned I : llvm::seq<unsigned>(
5248 std::min(OrgReductionOps.size(), OrgLHSExprs.size()))) {
5249 if (!Options.IsPrivateVarReduction[I]) {
5250 FilteredPrivates.emplace_back(OrgPrivates[I]);
5251 FilteredLHSExprs.emplace_back(OrgLHSExprs[I]);
5252 FilteredRHSExprs.emplace_back(OrgRHSExprs[I]);
5253 FilteredReductionOps.emplace_back(OrgReductionOps[I]);
5254 }
5255 }
5256 // Wrap filtered vectors in ArrayRef for downstream shared reduction
5257 // processing.
5258 ArrayRef<const Expr *> Privates = FilteredPrivates;
5259 ArrayRef<const Expr *> LHSExprs = FilteredLHSExprs;
5260 ArrayRef<const Expr *> RHSExprs = FilteredRHSExprs;
5261 ArrayRef<const Expr *> ReductionOps = FilteredReductionOps;
5262
5263 // 1. Build a list of reduction variables.
5264 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
5265 auto Size = RHSExprs.size();
5266 for (const Expr *E : Privates) {
5268 // Reserve place for array size.
5269 ++Size;
5270 }
5271 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
5272 QualType ReductionArrayTy = C.getConstantArrayType(
5273 C.VoidPtrTy, ArraySize, nullptr, ArraySizeModifier::Normal,
5274 /*IndexTypeQuals=*/0);
5275 RawAddress ReductionList =
5276 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
5277 const auto *IPriv = Privates.begin();
5278 unsigned Idx = 0;
5279 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
5280 Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5281 CGF.Builder.CreateStore(
5283 CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy),
5284 Elem);
5285 if ((*IPriv)->getType()->isVariablyModifiedType()) {
5286 // Store array size.
5287 ++Idx;
5288 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5289 llvm::Value *Size = CGF.Builder.CreateIntCast(
5290 CGF.getVLASize(
5291 CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5292 .NumElts,
5293 CGF.SizeTy, /*isSigned=*/false);
5294 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5295 Elem);
5296 }
5297 }
5298
5299 // 2. Emit reduce_func().
5300 llvm::Function *ReductionFn = emitReductionFunction(
5301 CGF.CurFn->getName(), Loc, CGF.ConvertTypeForMem(ReductionArrayTy),
5302 Privates, LHSExprs, RHSExprs, ReductionOps);
5303
5304 // 3. Create static kmp_critical_name lock = { 0 };
5305 std::string Name = getName({"reduction"});
5306 llvm::Value *Lock = getCriticalRegionLock(Name);
5307
5308 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5309 // RedList, reduce_func, &<lock>);
5310 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5311 llvm::Value *ThreadId = getThreadID(CGF, Loc);
5312 llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5313 llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5314 ReductionList.getPointer(), CGF.VoidPtrTy);
5315 llvm::Value *Args[] = {
5316 IdentTLoc, // ident_t *<loc>
5317 ThreadId, // i32 <gtid>
5318 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5319 ReductionArrayTySize, // size_type sizeof(RedList)
5320 RL, // void *RedList
5321 ReductionFn, // void (*) (void *, void *) <reduce_func>
5322 Lock // kmp_critical_name *&<lock>
5323 };
5324 llvm::Value *Res = CGF.EmitRuntimeCall(
5325 OMPBuilder.getOrCreateRuntimeFunction(
5326 CGM.getModule(),
5327 WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce),
5328 Args);
5329
5330 // 5. Build switch(res)
5331 llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5332 llvm::SwitchInst *SwInst =
5333 CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5334
5335 // 6. Build case 1:
5336 // ...
5337 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5338 // ...
5339 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5340 // break;
5341 llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5342 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5343 CGF.EmitBlock(Case1BB);
5344
5345 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5346 llvm::Value *EndArgs[] = {
5347 IdentTLoc, // ident_t *<loc>
5348 ThreadId, // i32 <gtid>
5349 Lock // kmp_critical_name *&<lock>
5350 };
5351 auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5352 CodeGenFunction &CGF, PrePostActionTy &Action) {
5354 const auto *IPriv = Privates.begin();
5355 const auto *ILHS = LHSExprs.begin();
5356 const auto *IRHS = RHSExprs.begin();
5357 for (const Expr *E : ReductionOps) {
5358 RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5359 cast<DeclRefExpr>(*IRHS));
5360 ++IPriv;
5361 ++ILHS;
5362 ++IRHS;
5363 }
5364 };
5365 RegionCodeGenTy RCG(CodeGen);
5366 CommonActionTy Action(
5367 nullptr, {},
5368 OMPBuilder.getOrCreateRuntimeFunction(
5369 CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait
5370 : OMPRTL___kmpc_end_reduce),
5371 EndArgs);
5372 RCG.setAction(Action);
5373 RCG(CGF);
5374
5375 CGF.EmitBranch(DefaultBB);
5376
5377 // 7. Build case 2:
5378 // ...
5379 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5380 // ...
5381 // break;
5382 llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5383 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5384 CGF.EmitBlock(Case2BB);
5385
5386 auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5387 CodeGenFunction &CGF, PrePostActionTy &Action) {
5388 const auto *ILHS = LHSExprs.begin();
5389 const auto *IRHS = RHSExprs.begin();
5390 const auto *IPriv = Privates.begin();
5391 for (const Expr *E : ReductionOps) {
5392 const Expr *XExpr = nullptr;
5393 const Expr *EExpr = nullptr;
5394 const Expr *UpExpr = nullptr;
5395 BinaryOperatorKind BO = BO_Comma;
5396 if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
5397 if (BO->getOpcode() == BO_Assign) {
5398 XExpr = BO->getLHS();
5399 UpExpr = BO->getRHS();
5400 }
5401 }
5402 // Try to emit update expression as a simple atomic.
5403 const Expr *RHSExpr = UpExpr;
5404 if (RHSExpr) {
5405 // Analyze RHS part of the whole expression.
5406 if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5407 RHSExpr->IgnoreParenImpCasts())) {
5408 // If this is a conditional operator, analyze its condition for
5409 // min/max reduction operator.
5410 RHSExpr = ACO->getCond();
5411 }
5412 if (const auto *BORHS =
5413 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5414 EExpr = BORHS->getRHS();
5415 BO = BORHS->getOpcode();
5416 }
5417 }
5418 if (XExpr) {
5419 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5420 auto &&AtomicRedGen = [BO, VD,
5421 Loc](CodeGenFunction &CGF, const Expr *XExpr,
5422 const Expr *EExpr, const Expr *UpExpr) {
5423 LValue X = CGF.EmitLValue(XExpr);
5424 RValue E;
5425 if (EExpr)
5426 E = CGF.EmitAnyExpr(EExpr);
5427 CGF.EmitOMPAtomicSimpleUpdateExpr(
5428 X, E, BO, /*IsXLHSInRHSPart=*/true,
5429 llvm::AtomicOrdering::Monotonic, Loc,
5430 [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5431 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5432 Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5433 CGF.emitOMPSimpleStore(
5434 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5435 VD->getType().getNonReferenceType(), Loc);
5436 PrivateScope.addPrivate(VD, LHSTemp);
5437 (void)PrivateScope.Privatize();
5438 return CGF.EmitAnyExpr(UpExpr);
5439 });
5440 };
5441 if ((*IPriv)->getType()->isArrayType()) {
5442 // Emit atomic reduction for array section.
5443 const auto *RHSVar =
5444 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5445 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5446 AtomicRedGen, XExpr, EExpr, UpExpr);
5447 } else {
5448 // Emit atomic reduction for array subscript or single variable.
5449 AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5450 }
5451 } else {
5452 // Emit as a critical region.
5453 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5454 const Expr *, const Expr *) {
5456 std::string Name = RT.getName({"atomic_reduction"});
5458 CGF, Name,
5459 [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5460 Action.Enter(CGF);
5462 },
5463 Loc);
5464 };
5465 if ((*IPriv)->getType()->isArrayType()) {
5466 const auto *LHSVar =
5467 cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5468 const auto *RHSVar =
5469 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5470 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5471 CritRedGen);
5472 } else {
5473 CritRedGen(CGF, nullptr, nullptr, nullptr);
5474 }
5475 }
5476 ++ILHS;
5477 ++IRHS;
5478 ++IPriv;
5479 }
5480 };
5481 RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5482 if (!WithNowait) {
5483 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5484 llvm::Value *EndArgs[] = {
5485 IdentTLoc, // ident_t *<loc>
5486 ThreadId, // i32 <gtid>
5487 Lock // kmp_critical_name *&<lock>
5488 };
5489 CommonActionTy Action(nullptr, {},
5490 OMPBuilder.getOrCreateRuntimeFunction(
5491 CGM.getModule(), OMPRTL___kmpc_end_reduce),
5492 EndArgs);
5493 AtomicRCG.setAction(Action);
5494 AtomicRCG(CGF);
5495 } else {
5496 AtomicRCG(CGF);
5497 }
5498
5499 CGF.EmitBranch(DefaultBB);
5500 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
5501 assert(OrgLHSExprs.size() == OrgPrivates.size() &&
5502 "PrivateVarReduction: Privates size mismatch");
5503 assert(OrgLHSExprs.size() == OrgReductionOps.size() &&
5504 "PrivateVarReduction: ReductionOps size mismatch");
5505 for (unsigned I : llvm::seq<unsigned>(
5506 std::min(OrgReductionOps.size(), OrgLHSExprs.size()))) {
5507 if (Options.IsPrivateVarReduction[I])
5508 emitPrivateReduction(CGF, Loc, OrgPrivates[I], OrgLHSExprs[I],
5509 OrgRHSExprs[I], OrgReductionOps[I]);
5510 }
5511}
5512
5513/// Generates unique name for artificial threadprivate variables.
5514/// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
5515static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5516 const Expr *Ref) {
5517 SmallString<256> Buffer;
5518 llvm::raw_svector_ostream Out(Buffer);
5519 const clang::DeclRefExpr *DE;
5520 const VarDecl *D = ::getBaseDecl(Ref, DE);
5521 if (!D)
5522 D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
5523 D = D->getCanonicalDecl();
5524 std::string Name = CGM.getOpenMPRuntime().getName(
5525 {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
5526 Out << Prefix << Name << "_"
5528 return std::string(Out.str());
5529}
5530
5531/// Emits reduction initializer function:
5532/// \code
5533/// void @.red_init(void* %arg, void* %orig) {
5534/// %0 = bitcast void* %arg to <type>*
5535/// store <type> <init>, <type>* %0
5536/// ret void
5537/// }
5538/// \endcode
5539static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
5541 ReductionCodeGen &RCG, unsigned N) {
5542 ASTContext &C = CGM.getContext();
5543 QualType VoidPtrTy = C.VoidPtrTy;
5544 VoidPtrTy.addRestrict();
5545 FunctionArgList Args;
5546 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5548 ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5550 Args.emplace_back(&Param);
5551 Args.emplace_back(&ParamOrig);
5552 const auto &FnInfo =
5553 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5554 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5555 std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
5556 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5557 Name, &CGM.getModule());
5558 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5559 Fn->setDoesNotRecurse();
5560 CodeGenFunction CGF(CGM);
5561 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5562 QualType PrivateType = RCG.getPrivateType(N);
5563 Address PrivateAddr = CGF.EmitLoadOfPointer(
5564 CGF.GetAddrOfLocalVar(&Param).withElementType(CGF.Builder.getPtrTy(0)),
5565 C.getPointerType(PrivateType)->castAs<PointerType>());
5566 llvm::Value *Size = nullptr;
5567 // If the size of the reduction item is non-constant, load it from global
5568 // threadprivate variable.
5569 if (RCG.getSizes(N).second) {
5571 CGF, CGM.getContext().getSizeType(),
5572 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5573 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5574 CGM.getContext().getSizeType(), Loc);
5575 }
5576 RCG.emitAggregateType(CGF, N, Size);
5577 Address OrigAddr = Address::invalid();
5578 // If initializer uses initializer from declare reduction construct, emit a
5579 // pointer to the address of the original reduction item (reuired by reduction
5580 // initializer)
5581 if (RCG.usesReductionInitializer(N)) {
5582 Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig);
5583 OrigAddr = CGF.EmitLoadOfPointer(
5584 SharedAddr,
5585 CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
5586 }
5587 // Emit the initializer:
5588 // %0 = bitcast void* %arg to <type>*
5589 // store <type> <init>, <type>* %0
5590 RCG.emitInitialization(CGF, N, PrivateAddr, OrigAddr,
5591 [](CodeGenFunction &) { return false; });
5592 CGF.FinishFunction();
5593 return Fn;
5594}
5595
5596/// Emits reduction combiner function:
5597/// \code
5598/// void @.red_comb(void* %arg0, void* %arg1) {
5599/// %lhs = bitcast void* %arg0 to <type>*
5600/// %rhs = bitcast void* %arg1 to <type>*
5601/// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
5602/// store <type> %2, <type>* %lhs
5603/// ret void
5604/// }
5605/// \endcode
5606static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
5608 ReductionCodeGen &RCG, unsigned N,
5609 const Expr *ReductionOp,
5610 const Expr *LHS, const Expr *RHS,
5611 const Expr *PrivateRef) {
5612 ASTContext &C = CGM.getContext();
5613 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
5614 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
5615 FunctionArgList Args;
5616 ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
5617 C.VoidPtrTy, ImplicitParamKind::Other);
5618 ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5620 Args.emplace_back(&ParamInOut);
5621 Args.emplace_back(&ParamIn);
5622 const auto &FnInfo =
5623 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5624 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5625 std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
5626 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5627 Name, &CGM.getModule());
5628 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5629 Fn->setDoesNotRecurse();
5630 CodeGenFunction CGF(CGM);
5631 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5632 llvm::Value *Size = nullptr;
5633 // If the size of the reduction item is non-constant, load it from global
5634 // threadprivate variable.
5635 if (RCG.getSizes(N).second) {
5637 CGF, CGM.getContext().getSizeType(),
5638 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5639 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5640 CGM.getContext().getSizeType(), Loc);
5641 }
5642 RCG.emitAggregateType(CGF, N, Size);
5643 // Remap lhs and rhs variables to the addresses of the function arguments.
5644 // %lhs = bitcast void* %arg0 to <type>*
5645 // %rhs = bitcast void* %arg1 to <type>*
5646 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5647 PrivateScope.addPrivate(
5648 LHSVD,
5649 // Pull out the pointer to the variable.
5651 CGF.GetAddrOfLocalVar(&ParamInOut)
5652 .withElementType(CGF.Builder.getPtrTy(0)),
5653 C.getPointerType(LHSVD->getType())->castAs<PointerType>()));
5654 PrivateScope.addPrivate(
5655 RHSVD,
5656 // Pull out the pointer to the variable.
5658 CGF.GetAddrOfLocalVar(&ParamIn).withElementType(
5659 CGF.Builder.getPtrTy(0)),
5660 C.getPointerType(RHSVD->getType())->castAs<PointerType>()));
5661 PrivateScope.Privatize();
5662 // Emit the combiner body:
5663 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
5664 // store <type> %2, <type>* %lhs
5666 CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
5667 cast<DeclRefExpr>(RHS));
5668 CGF.FinishFunction();
5669 return Fn;
5670}
5671
5672/// Emits reduction finalizer function:
5673/// \code
5674/// void @.red_fini(void* %arg) {
5675/// %0 = bitcast void* %arg to <type>*
5676/// <destroy>(<type>* %0)
5677/// ret void
5678/// }
5679/// \endcode
5680static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
5682 ReductionCodeGen &RCG, unsigned N) {
5683 if (!RCG.needCleanups(N))
5684 return nullptr;
5685 ASTContext &C = CGM.getContext();
5686 FunctionArgList Args;
5687 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5689 Args.emplace_back(&Param);
5690 const auto &FnInfo =
5691 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5692 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5693 std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
5694 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5695 Name, &CGM.getModule());
5696 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5697 Fn->setDoesNotRecurse();
5698 CodeGenFunction CGF(CGM);
5699 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5700 Address PrivateAddr = CGF.EmitLoadOfPointer(
5701 CGF.GetAddrOfLocalVar(&Param), C.VoidPtrTy.castAs<PointerType>());
5702 llvm::Value *Size = nullptr;
5703 // If the size of the reduction item is non-constant, load it from global
5704 // threadprivate variable.
5705 if (RCG.getSizes(N).second) {
5707 CGF, CGM.getContext().getSizeType(),
5708 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5709 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5710 CGM.getContext().getSizeType(), Loc);
5711 }
5712 RCG.emitAggregateType(CGF, N, Size);
5713 // Emit the finalizer body:
5714 // <destroy>(<type>* %0)
5715 RCG.emitCleanups(CGF, N, PrivateAddr);
5716 CGF.FinishFunction(Loc);
5717 return Fn;
5718}
5719
5722 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
5723 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
5724 return nullptr;
5725
5726 // Build typedef struct:
5727 // kmp_taskred_input {
5728 // void *reduce_shar; // shared reduction item
5729 // void *reduce_orig; // original reduction item used for initialization
5730 // size_t reduce_size; // size of data item
5731 // void *reduce_init; // data initialization routine
5732 // void *reduce_fini; // data finalization routine
5733 // void *reduce_comb; // data combiner routine
5734 // kmp_task_red_flags_t flags; // flags for additional info from compiler
5735 // } kmp_taskred_input_t;
5737 RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t");
5738 RD->startDefinition();
5739 const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5740 const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5741 const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
5742 const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5743 const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5744 const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5745 const FieldDecl *FlagsFD = addFieldToRecordDecl(
5746 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
5747 RD->completeDefinition();
5748 CanQualType RDType = C.getCanonicalTagType(RD);
5749 unsigned Size = Data.ReductionVars.size();
5750 llvm::APInt ArraySize(/*numBits=*/64, Size);
5751 QualType ArrayRDType =
5752 C.getConstantArrayType(RDType, ArraySize, nullptr,
5753 ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
5754 // kmp_task_red_input_t .rd_input.[Size];
5755 RawAddress TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
5756 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs,
5757 Data.ReductionCopies, Data.ReductionOps);
5758 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
5759 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
5760 llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
5761 llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
5762 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
5763 TaskRedInput.getElementType(), TaskRedInput.getPointer(), Idxs,
5764 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
5765 ".rd_input.gep.");
5766 LValue ElemLVal = CGF.MakeNaturalAlignRawAddrLValue(GEP, RDType);
5767 // ElemLVal.reduce_shar = &Shareds[Cnt];
5768 LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
5769 RCG.emitSharedOrigLValue(CGF, Cnt);
5770 llvm::Value *Shared = RCG.getSharedLValue(Cnt).getPointer(CGF);
5771 CGF.EmitStoreOfScalar(Shared, SharedLVal);
5772 // ElemLVal.reduce_orig = &Origs[Cnt];
5773 LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD);
5774 llvm::Value *Orig = RCG.getOrigLValue(Cnt).getPointer(CGF);
5775 CGF.EmitStoreOfScalar(Orig, OrigLVal);
5776 RCG.emitAggregateType(CGF, Cnt);
5777 llvm::Value *SizeValInChars;
5778 llvm::Value *SizeVal;
5779 std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
5780 // We use delayed creation/initialization for VLAs and array sections. It is
5781 // required because runtime does not provide the way to pass the sizes of
5782 // VLAs/array sections to initializer/combiner/finalizer functions. Instead
5783 // threadprivate global variables are used to store these values and use
5784 // them in the functions.
5785 bool DelayedCreation = !!SizeVal;
5786 SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
5787 /*isSigned=*/false);
5788 LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
5789 CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
5790 // ElemLVal.reduce_init = init;
5791 LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
5792 llvm::Value *InitAddr = emitReduceInitFunction(CGM, Loc, RCG, Cnt);
5793 CGF.EmitStoreOfScalar(InitAddr, InitLVal);
5794 // ElemLVal.reduce_fini = fini;
5795 LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
5796 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
5797 llvm::Value *FiniAddr =
5798 Fini ? Fini : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
5799 CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
5800 // ElemLVal.reduce_comb = comb;
5801 LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
5802 llvm::Value *CombAddr = emitReduceCombFunction(
5803 CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
5804 RHSExprs[Cnt], Data.ReductionCopies[Cnt]);
5805 CGF.EmitStoreOfScalar(CombAddr, CombLVal);
5806 // ElemLVal.flags = 0;
5807 LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
5808 if (DelayedCreation) {
5810 llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true),
5811 FlagsLVal);
5812 } else
5813 CGF.EmitNullInitialization(FlagsLVal.getAddress(), FlagsLVal.getType());
5814 }
5815 if (Data.IsReductionWithTaskMod) {
5816 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
5817 // is_ws, int num, void *data);
5818 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
5819 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
5820 CGM.IntTy, /*isSigned=*/true);
5821 llvm::Value *Args[] = {
5822 IdentTLoc, GTid,
5823 llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0,
5824 /*isSigned=*/true),
5825 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
5827 TaskRedInput.getPointer(), CGM.VoidPtrTy)};
5828 return CGF.EmitRuntimeCall(
5829 OMPBuilder.getOrCreateRuntimeFunction(
5830 CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init),
5831 Args);
5832 }
5833 // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data);
5834 llvm::Value *Args[] = {
5835 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
5836 /*isSigned=*/true),
5837 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
5839 CGM.VoidPtrTy)};
5840 return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5841 CGM.getModule(), OMPRTL___kmpc_taskred_init),
5842 Args);
5843}
5844
5847 bool IsWorksharingReduction) {
5848 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
5849 // is_ws, int num, void *data);
5850 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
5851 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
5852 CGM.IntTy, /*isSigned=*/true);
5853 llvm::Value *Args[] = {IdentTLoc, GTid,
5854 llvm::ConstantInt::get(CGM.IntTy,
5855 IsWorksharingReduction ? 1 : 0,
5856 /*isSigned=*/true)};
5857 (void)CGF.EmitRuntimeCall(
5858 OMPBuilder.getOrCreateRuntimeFunction(
5859 CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini),
5860 Args);
5861}
5862
5865 ReductionCodeGen &RCG,
5866 unsigned N) {
5867 auto Sizes = RCG.getSizes(N);
5868 // Emit threadprivate global variable if the type is non-constant
5869 // (Sizes.second = nullptr).
5870 if (Sizes.second) {
5871 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
5872 /*isSigned=*/false);
5874 CGF, CGM.getContext().getSizeType(),
5875 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5876 CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
5877 }
5878}
5879
5882 llvm::Value *ReductionsPtr,
5883 LValue SharedLVal) {
5884 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
5885 // *d);
5886 llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
5887 CGM.IntTy,
5888 /*isSigned=*/true),
5889 ReductionsPtr,
5891 SharedLVal.getPointer(CGF), CGM.VoidPtrTy)};
5892 return Address(
5893 CGF.EmitRuntimeCall(
5894 OMPBuilder.getOrCreateRuntimeFunction(
5895 CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data),
5896 Args),
5897 CGF.Int8Ty, SharedLVal.getAlignment());
5898}
5899
5901 const OMPTaskDataTy &Data) {
5902 if (!CGF.HaveInsertPoint())
5903 return;
5904
5905 if (CGF.CGM.getLangOpts().OpenMPIRBuilder && Data.Dependences.empty()) {
5906 // TODO: Need to support taskwait with dependences in the OpenMPIRBuilder.
5907 OMPBuilder.createTaskwait(CGF.Builder);
5908 } else {
5909 llvm::Value *ThreadID = getThreadID(CGF, Loc);
5910 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5911 auto &M = CGM.getModule();
5912 Address DependenciesArray = Address::invalid();
5913 llvm::Value *NumOfElements;
5914 std::tie(NumOfElements, DependenciesArray) =
5915 emitDependClause(CGF, Data.Dependences, Loc);
5916 if (!Data.Dependences.empty()) {
5917 llvm::Value *DepWaitTaskArgs[7];
5918 DepWaitTaskArgs[0] = UpLoc;
5919 DepWaitTaskArgs[1] = ThreadID;
5920 DepWaitTaskArgs[2] = NumOfElements;
5921 DepWaitTaskArgs[3] = DependenciesArray.emitRawPointer(CGF);
5922 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
5923 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5924 DepWaitTaskArgs[6] =
5925 llvm::ConstantInt::get(CGF.Int32Ty, Data.HasNowaitClause);
5926
5927 CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5928
5929 // Build void __kmpc_omp_taskwait_deps_51(ident_t *, kmp_int32 gtid,
5930 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5931 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list,
5932 // kmp_int32 has_no_wait); if dependence info is specified.
5933 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5934 M, OMPRTL___kmpc_omp_taskwait_deps_51),
5935 DepWaitTaskArgs);
5936
5937 } else {
5938
5939 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
5940 // global_tid);
5941 llvm::Value *Args[] = {UpLoc, ThreadID};
5942 // Ignore return result until untied tasks are supported.
5943 CGF.EmitRuntimeCall(
5944 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_taskwait),
5945 Args);
5946 }
5947 }
5948
5949 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5950 Region->emitUntiedSwitch(CGF);
5951}
5952
5954 OpenMPDirectiveKind InnerKind,
5955 const RegionCodeGenTy &CodeGen,
5956 bool HasCancel) {
5957 if (!CGF.HaveInsertPoint())
5958 return;
5959 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel,
5960 InnerKind != OMPD_critical &&
5961 InnerKind != OMPD_master &&
5962 InnerKind != OMPD_masked);
5963 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
5964}
5965
5966namespace {
5967enum RTCancelKind {
5968 CancelNoreq = 0,
5969 CancelParallel = 1,
5970 CancelLoop = 2,
5971 CancelSections = 3,
5972 CancelTaskgroup = 4
5973};
5974} // anonymous namespace
5975
5976static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
5977 RTCancelKind CancelKind = CancelNoreq;
5978 if (CancelRegion == OMPD_parallel)
5979 CancelKind = CancelParallel;
5980 else if (CancelRegion == OMPD_for)
5981 CancelKind = CancelLoop;
5982 else if (CancelRegion == OMPD_sections)
5983 CancelKind = CancelSections;
5984 else {
5985 assert(CancelRegion == OMPD_taskgroup);
5986 CancelKind = CancelTaskgroup;
5987 }
5988 return CancelKind;
5989}
5990
5993 OpenMPDirectiveKind CancelRegion) {
5994 if (!CGF.HaveInsertPoint())
5995 return;
5996 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
5997 // global_tid, kmp_int32 cncl_kind);
5998 if (auto *OMPRegionInfo =
5999 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6000 // For 'cancellation point taskgroup', the task region info may not have a
6001 // cancel. This may instead happen in another adjacent task.
6002 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
6003 llvm::Value *Args[] = {
6005 CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6006 // Ignore return result until untied tasks are supported.
6007 llvm::Value *Result = CGF.EmitRuntimeCall(
6008 OMPBuilder.getOrCreateRuntimeFunction(
6009 CGM.getModule(), OMPRTL___kmpc_cancellationpoint),
6010 Args);
6011 // if (__kmpc_cancellationpoint()) {
6012 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
6013 // exit from construct;
6014 // }
6015 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6016 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6017 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6018 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6019 CGF.EmitBlock(ExitBB);
6020 if (CancelRegion == OMPD_parallel)
6021 emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
6022 // exit from construct;
6023 CodeGenFunction::JumpDest CancelDest =
6024 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6025 CGF.EmitBranchThroughCleanup(CancelDest);
6026 CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6027 }
6028 }
6029}
6030
6032 const Expr *IfCond,
6033 OpenMPDirectiveKind CancelRegion) {
6034 if (!CGF.HaveInsertPoint())
6035 return;
6036 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
6037 // kmp_int32 cncl_kind);
6038 auto &M = CGM.getModule();
6039 if (auto *OMPRegionInfo =
6040 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6041 auto &&ThenGen = [this, &M, Loc, CancelRegion,
6042 OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) {
6044 llvm::Value *Args[] = {
6045 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
6046 CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6047 // Ignore return result until untied tasks are supported.
6048 llvm::Value *Result = CGF.EmitRuntimeCall(
6049 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args);
6050 // if (__kmpc_cancel()) {
6051 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
6052 // exit from construct;
6053 // }
6054 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6055 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6056 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6057 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6058 CGF.EmitBlock(ExitBB);
6059 if (CancelRegion == OMPD_parallel)
6060 RT.emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
6061 // exit from construct;
6062 CodeGenFunction::JumpDest CancelDest =
6063 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6064 CGF.EmitBranchThroughCleanup(CancelDest);
6065 CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6066 };
6067 if (IfCond) {
6068 emitIfClause(CGF, IfCond, ThenGen,
6069 [](CodeGenFunction &, PrePostActionTy &) {});
6070 } else {
6071 RegionCodeGenTy ThenRCG(ThenGen);
6072 ThenRCG(CGF);
6073 }
6074 }
6075}
6076
6077namespace {
6078/// Cleanup action for uses_allocators support.
6079class OMPUsesAllocatorsActionTy final : public PrePostActionTy {
6081
6082public:
6083 OMPUsesAllocatorsActionTy(
6084 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators)
6085 : Allocators(Allocators) {}
6086 void Enter(CodeGenFunction &CGF) override {
6087 if (!CGF.HaveInsertPoint())
6088 return;
6089 for (const auto &AllocatorData : Allocators) {
6091 CGF, AllocatorData.first, AllocatorData.second);
6092 }
6093 }
6094 void Exit(CodeGenFunction &CGF) override {
6095 if (!CGF.HaveInsertPoint())
6096 return;
6097 for (const auto &AllocatorData : Allocators) {
6099 AllocatorData.first);
6100 }
6101 }
6102};
6103} // namespace
6104
6106 const OMPExecutableDirective &D, StringRef ParentName,
6107 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6108 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6109 assert(!ParentName.empty() && "Invalid target entry parent name!");
6112 for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) {
6113 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
6114 const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
6115 if (!D.AllocatorTraits)
6116 continue;
6117 Allocators.emplace_back(D.Allocator, D.AllocatorTraits);
6118 }
6119 }
6120 OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators);
6121 CodeGen.setAction(UsesAllocatorAction);
6122 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
6123 IsOffloadEntry, CodeGen);
6124}
6125
6127 const Expr *Allocator,
6128 const Expr *AllocatorTraits) {
6129 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6130 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6131 // Use default memspace handle.
6132 llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
6133 llvm::Value *NumTraits = llvm::ConstantInt::get(
6134 CGF.IntTy, cast<ConstantArrayType>(
6135 AllocatorTraits->getType()->getAsArrayTypeUnsafe())
6136 ->getSize()
6137 .getLimitedValue());
6138 LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits);
6140 AllocatorTraitsLVal.getAddress(), CGF.VoidPtrPtrTy, CGF.VoidPtrTy);
6141 AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy,
6142 AllocatorTraitsLVal.getBaseInfo(),
6143 AllocatorTraitsLVal.getTBAAInfo());
6144 llvm::Value *Traits = Addr.emitRawPointer(CGF);
6145
6146 llvm::Value *AllocatorVal =
6147 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6148 CGM.getModule(), OMPRTL___kmpc_init_allocator),
6149 {ThreadId, MemSpaceHandle, NumTraits, Traits});
6150 // Store to allocator.
6151 CGF.EmitAutoVarAlloca(*cast<VarDecl>(
6152 cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl()));
6153 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6154 AllocatorVal =
6155 CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy,
6156 Allocator->getType(), Allocator->getExprLoc());
6157 CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal);
6158}
6159
6161 const Expr *Allocator) {
6162 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6163 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6164 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6165 llvm::Value *AllocatorVal =
6166 CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc());
6167 AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(),
6168 CGF.getContext().VoidPtrTy,
6169 Allocator->getExprLoc());
6170 (void)CGF.EmitRuntimeCall(
6171 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
6172 OMPRTL___kmpc_destroy_allocator),
6173 {ThreadId, AllocatorVal});
6174}
6175
6178 llvm::OpenMPIRBuilder::TargetKernelDefaultAttrs &Attrs) {
6179 assert(Attrs.MaxTeams.size() == 1 && Attrs.MaxThreads.size() == 1 &&
6180 "invalid default attrs structure");
6181 int32_t &MaxTeamsVal = Attrs.MaxTeams.front();
6182 int32_t &MaxThreadsVal = Attrs.MaxThreads.front();
6183
6184 getNumTeamsExprForTargetDirective(CGF, D, Attrs.MinTeams, MaxTeamsVal);
6185 getNumThreadsExprForTargetDirective(CGF, D, MaxThreadsVal,
6186 /*UpperBoundOnly=*/true);
6187
6188 for (auto *C : D.getClausesOfKind<OMPXAttributeClause>()) {
6189 for (auto *A : C->getAttrs()) {
6190 int32_t AttrMinThreadsVal = 1, AttrMaxThreadsVal = -1;
6191 int32_t AttrMinBlocksVal = 1, AttrMaxBlocksVal = -1;
6192 if (auto *Attr = dyn_cast<CUDALaunchBoundsAttr>(A))
6193 CGM.handleCUDALaunchBoundsAttr(nullptr, Attr, &AttrMaxThreadsVal,
6194 &AttrMinBlocksVal, &AttrMaxBlocksVal);
6195 else if (auto *Attr = dyn_cast<AMDGPUFlatWorkGroupSizeAttr>(A))
6197 nullptr, Attr, /*ReqdWGS=*/nullptr, &AttrMinThreadsVal,
6198 &AttrMaxThreadsVal);
6199 else
6200 continue;
6201
6202 Attrs.MinThreads = std::max(Attrs.MinThreads, AttrMinThreadsVal);
6203 if (AttrMaxThreadsVal > 0)
6204 MaxThreadsVal = MaxThreadsVal > 0
6205 ? std::min(MaxThreadsVal, AttrMaxThreadsVal)
6206 : AttrMaxThreadsVal;
6207 Attrs.MinTeams = std::max(Attrs.MinTeams, AttrMinBlocksVal);
6208 if (AttrMaxBlocksVal > 0)
6209 MaxTeamsVal = MaxTeamsVal > 0 ? std::min(MaxTeamsVal, AttrMaxBlocksVal)
6210 : AttrMaxBlocksVal;
6211 }
6212 }
6213}
6214
6216 const OMPExecutableDirective &D, StringRef ParentName,
6217 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6218 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6219
6220 llvm::TargetRegionEntryInfo EntryInfo =
6222
6223 CodeGenFunction CGF(CGM, true);
6224 llvm::OpenMPIRBuilder::FunctionGenCallback &&GenerateOutlinedFunction =
6225 [&CGF, &D, &CodeGen](StringRef EntryFnName) {
6226 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
6227
6228 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
6229 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6231 };
6232
6233 cantFail(OMPBuilder.emitTargetRegionFunction(
6234 EntryInfo, GenerateOutlinedFunction, IsOffloadEntry, OutlinedFn,
6235 OutlinedFnID));
6236
6237 if (!OutlinedFn)
6238 return;
6239
6240 CGM.getTargetCodeGenInfo().setTargetAttributes(nullptr, OutlinedFn, CGM);
6241
6242 for (auto *C : D.getClausesOfKind<OMPXAttributeClause>()) {
6243 for (auto *A : C->getAttrs()) {
6244 if (auto *Attr = dyn_cast<AMDGPUWavesPerEUAttr>(A))
6245 CGM.handleAMDGPUWavesPerEUAttr(OutlinedFn, Attr);
6246 }
6247 }
6248}
6249
6250/// Checks if the expression is constant or does not have non-trivial function
6251/// calls.
6252static bool isTrivial(ASTContext &Ctx, const Expr * E) {
6253 // We can skip constant expressions.
6254 // We can skip expressions with trivial calls or simple expressions.
6256 !E->hasNonTrivialCall(Ctx)) &&
6257 !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
6258}
6259
6261 const Stmt *Body) {
6262 const Stmt *Child = Body->IgnoreContainers();
6263 while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
6264 Child = nullptr;
6265 for (const Stmt *S : C->body()) {
6266 if (const auto *E = dyn_cast<Expr>(S)) {
6267 if (isTrivial(Ctx, E))
6268 continue;
6269 }
6270 // Some of the statements can be ignored.
6271 if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
6272 isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
6273 continue;
6274 // Analyze declarations.
6275 if (const auto *DS = dyn_cast<DeclStmt>(S)) {
6276 if (llvm::all_of(DS->decls(), [](const Decl *D) {
6277 if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
6278 isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
6279 isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
6280 isa<UsingDirectiveDecl>(D) ||
6281 isa<OMPDeclareReductionDecl>(D) ||
6282 isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
6283 return true;
6284 const auto *VD = dyn_cast<VarDecl>(D);
6285 if (!VD)
6286 return false;
6287 return VD->hasGlobalStorage() || !VD->isUsed();
6288 }))
6289 continue;
6290 }
6291 // Found multiple children - cannot get the one child only.
6292 if (Child)
6293 return nullptr;
6294 Child = S;
6295 }
6296 if (Child)
6297 Child = Child->IgnoreContainers();
6298 }
6299 return Child;
6300}
6301
6303 CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &MinTeamsVal,
6304 int32_t &MaxTeamsVal) {
6305
6306 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6307 assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6308 "Expected target-based executable directive.");
6309 switch (DirectiveKind) {
6310 case OMPD_target: {
6311 const auto *CS = D.getInnermostCapturedStmt();
6312 const auto *Body =
6313 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6314 const Stmt *ChildStmt =
6316 if (const auto *NestedDir =
6317 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
6318 if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
6319 if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6320 const Expr *NumTeams = NestedDir->getSingleClause<OMPNumTeamsClause>()
6321 ->getNumTeams()
6322 .front();
6323 if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6324 if (auto Constant =
6325 NumTeams->getIntegerConstantExpr(CGF.getContext()))
6326 MinTeamsVal = MaxTeamsVal = Constant->getExtValue();
6327 return NumTeams;
6328 }
6329 MinTeamsVal = MaxTeamsVal = 0;
6330 return nullptr;
6331 }
6332 MinTeamsVal = MaxTeamsVal = 1;
6333 return nullptr;
6334 }
6335 // A value of -1 is used to check if we need to emit no teams region
6336 MinTeamsVal = MaxTeamsVal = -1;
6337 return nullptr;
6338 }
6339 case OMPD_target_teams_loop:
6340 case OMPD_target_teams:
6341 case OMPD_target_teams_distribute:
6342 case OMPD_target_teams_distribute_simd:
6343 case OMPD_target_teams_distribute_parallel_for:
6344 case OMPD_target_teams_distribute_parallel_for_simd: {
6345 if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6346 const Expr *NumTeams =
6347 D.getSingleClause<OMPNumTeamsClause>()->getNumTeams().front();
6348 if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6349 if (auto Constant = NumTeams->getIntegerConstantExpr(CGF.getContext()))
6350 MinTeamsVal = MaxTeamsVal = Constant->getExtValue();
6351 return NumTeams;
6352 }
6353 MinTeamsVal = MaxTeamsVal = 0;
6354 return nullptr;
6355 }
6356 case OMPD_target_parallel:
6357 case OMPD_target_parallel_for:
6358 case OMPD_target_parallel_for_simd:
6359 case OMPD_target_parallel_loop:
6360 case OMPD_target_simd:
6361 MinTeamsVal = MaxTeamsVal = 1;
6362 return nullptr;
6363 case OMPD_parallel:
6364 case OMPD_for:
6365 case OMPD_parallel_for:
6366 case OMPD_parallel_loop:
6367 case OMPD_parallel_master:
6368 case OMPD_parallel_sections:
6369 case OMPD_for_simd:
6370 case OMPD_parallel_for_simd:
6371 case OMPD_cancel:
6372 case OMPD_cancellation_point:
6373 case OMPD_ordered:
6374 case OMPD_threadprivate:
6375 case OMPD_allocate:
6376 case OMPD_task:
6377 case OMPD_simd:
6378 case OMPD_tile:
6379 case OMPD_unroll:
6380 case OMPD_sections:
6381 case OMPD_section:
6382 case OMPD_single:
6383 case OMPD_master:
6384 case OMPD_critical:
6385 case OMPD_taskyield:
6386 case OMPD_barrier:
6387 case OMPD_taskwait:
6388 case OMPD_taskgroup:
6389 case OMPD_atomic:
6390 case OMPD_flush:
6391 case OMPD_depobj:
6392 case OMPD_scan:
6393 case OMPD_teams:
6394 case OMPD_target_data:
6395 case OMPD_target_exit_data:
6396 case OMPD_target_enter_data:
6397 case OMPD_distribute:
6398 case OMPD_distribute_simd:
6399 case OMPD_distribute_parallel_for:
6400 case OMPD_distribute_parallel_for_simd:
6401 case OMPD_teams_distribute:
6402 case OMPD_teams_distribute_simd:
6403 case OMPD_teams_distribute_parallel_for:
6404 case OMPD_teams_distribute_parallel_for_simd:
6405 case OMPD_target_update:
6406 case OMPD_declare_simd:
6407 case OMPD_declare_variant:
6408 case OMPD_begin_declare_variant:
6409 case OMPD_end_declare_variant:
6410 case OMPD_declare_target:
6411 case OMPD_end_declare_target:
6412 case OMPD_declare_reduction:
6413 case OMPD_declare_mapper:
6414 case OMPD_taskloop:
6415 case OMPD_taskloop_simd:
6416 case OMPD_master_taskloop:
6417 case OMPD_master_taskloop_simd:
6418 case OMPD_parallel_master_taskloop:
6419 case OMPD_parallel_master_taskloop_simd:
6420 case OMPD_requires:
6421 case OMPD_metadirective:
6422 case OMPD_unknown:
6423 break;
6424 default:
6425 break;
6426 }
6427 llvm_unreachable("Unexpected directive kind.");
6428}
6429
6432 assert(!CGF.getLangOpts().OpenMPIsTargetDevice &&
6433 "Clauses associated with the teams directive expected to be emitted "
6434 "only for the host!");
6435 CGBuilderTy &Bld = CGF.Builder;
6436 int32_t MinNT = -1, MaxNT = -1;
6437 const Expr *NumTeams =
6438 getNumTeamsExprForTargetDirective(CGF, D, MinNT, MaxNT);
6439 if (NumTeams != nullptr) {
6440 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6441
6442 switch (DirectiveKind) {
6443 case OMPD_target: {
6444 const auto *CS = D.getInnermostCapturedStmt();
6445 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6446 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6447 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6448 /*IgnoreResultAssign*/ true);
6449 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6450 /*isSigned=*/true);
6451 }
6452 case OMPD_target_teams:
6453 case OMPD_target_teams_distribute:
6454 case OMPD_target_teams_distribute_simd:
6455 case OMPD_target_teams_distribute_parallel_for:
6456 case OMPD_target_teams_distribute_parallel_for_simd: {
6457 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6458 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6459 /*IgnoreResultAssign*/ true);
6460 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6461 /*isSigned=*/true);
6462 }
6463 default:
6464 break;
6465 }
6466 }
6467
6468 assert(MinNT == MaxNT && "Num threads ranges require handling here.");
6469 return llvm::ConstantInt::get(CGF.Int32Ty, MinNT);
6470}
6471
6472/// Check for a num threads constant value (stored in \p DefaultVal), or
6473/// expression (stored in \p E). If the value is conditional (via an if-clause),
6474/// store the condition in \p CondVal. If \p E, and \p CondVal respectively, are
6475/// nullptr, no expression evaluation is perfomed.
6476static void getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6477 const Expr **E, int32_t &UpperBound,
6478 bool UpperBoundOnly, llvm::Value **CondVal) {
6480 CGF.getContext(), CS->getCapturedStmt());
6481 const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6482 if (!Dir)
6483 return;
6484
6485 if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6486 // Handle if clause. If if clause present, the number of threads is
6487 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6488 if (CondVal && Dir->hasClausesOfKind<OMPIfClause>()) {
6489 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6490 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6491 const OMPIfClause *IfClause = nullptr;
6492 for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6493 if (C->getNameModifier() == OMPD_unknown ||
6494 C->getNameModifier() == OMPD_parallel) {
6495 IfClause = C;
6496 break;
6497 }
6498 }
6499 if (IfClause) {
6500 const Expr *CondExpr = IfClause->getCondition();
6501 bool Result;
6502 if (CondExpr->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6503 if (!Result) {
6504 UpperBound = 1;
6505 return;
6506 }
6507 } else {
6509 if (const auto *PreInit =
6510 cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
6511 for (const auto *I : PreInit->decls()) {
6512 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6513 CGF.EmitVarDecl(cast<VarDecl>(*I));
6514 } else {
6516 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6517 CGF.EmitAutoVarCleanups(Emission);
6518 }
6519 }
6520 *CondVal = CGF.EvaluateExprAsBool(CondExpr);
6521 }
6522 }
6523 }
6524 }
6525 // Check the value of num_threads clause iff if clause was not specified
6526 // or is not evaluated to false.
6527 if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6528 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6529 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6530 const auto *NumThreadsClause =
6531 Dir->getSingleClause<OMPNumThreadsClause>();
6532 const Expr *NTExpr = NumThreadsClause->getNumThreads();
6533 if (NTExpr->isIntegerConstantExpr(CGF.getContext()))
6534 if (auto Constant = NTExpr->getIntegerConstantExpr(CGF.getContext()))
6535 UpperBound =
6536 UpperBound
6537 ? Constant->getZExtValue()
6538 : std::min(UpperBound,
6539 static_cast<int32_t>(Constant->getZExtValue()));
6540 // If we haven't found a upper bound, remember we saw a thread limiting
6541 // clause.
6542 if (UpperBound == -1)
6543 UpperBound = 0;
6544 if (!E)
6545 return;
6546 CodeGenFunction::LexicalScope Scope(CGF, NTExpr->getSourceRange());
6547 if (const auto *PreInit =
6548 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
6549 for (const auto *I : PreInit->decls()) {
6550 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6551 CGF.EmitVarDecl(cast<VarDecl>(*I));
6552 } else {
6554 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6555 CGF.EmitAutoVarCleanups(Emission);
6556 }
6557 }
6558 }
6559 *E = NTExpr;
6560 }
6561 return;
6562 }
6563 if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
6564 UpperBound = 1;
6565}
6566
6568 CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &UpperBound,
6569 bool UpperBoundOnly, llvm::Value **CondVal, const Expr **ThreadLimitExpr) {
6570 assert((!CGF.getLangOpts().OpenMPIsTargetDevice || UpperBoundOnly) &&
6571 "Clauses associated with the teams directive expected to be emitted "
6572 "only for the host!");
6573 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6574 assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6575 "Expected target-based executable directive.");
6576
6577 const Expr *NT = nullptr;
6578 const Expr **NTPtr = UpperBoundOnly ? nullptr : &NT;
6579
6580 auto CheckForConstExpr = [&](const Expr *E, const Expr **EPtr) {
6581 if (E->isIntegerConstantExpr(CGF.getContext())) {
6582 if (auto Constant = E->getIntegerConstantExpr(CGF.getContext()))
6583 UpperBound = UpperBound ? Constant->getZExtValue()
6584 : std::min(UpperBound,
6585 int32_t(Constant->getZExtValue()));
6586 }
6587 // If we haven't found a upper bound, remember we saw a thread limiting
6588 // clause.
6589 if (UpperBound == -1)
6590 UpperBound = 0;
6591 if (EPtr)
6592 *EPtr = E;
6593 };
6594
6595 auto ReturnSequential = [&]() {
6596 UpperBound = 1;
6597 return NT;
6598 };
6599
6600 switch (DirectiveKind) {
6601 case OMPD_target: {
6602 const CapturedStmt *CS = D.getInnermostCapturedStmt();
6603 getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6605 CGF.getContext(), CS->getCapturedStmt());
6606 // TODO: The standard is not clear how to resolve two thread limit clauses,
6607 // let's pick the teams one if it's present, otherwise the target one.
6608 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6609 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6610 if (const auto *TLC = Dir->getSingleClause<OMPThreadLimitClause>()) {
6611 ThreadLimitClause = TLC;
6612 if (ThreadLimitExpr) {
6613 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6614 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6616 CGF,
6617 ThreadLimitClause->getThreadLimit().front()->getSourceRange());
6618 if (const auto *PreInit =
6619 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
6620 for (const auto *I : PreInit->decls()) {
6621 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6622 CGF.EmitVarDecl(cast<VarDecl>(*I));
6623 } else {
6625 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6626 CGF.EmitAutoVarCleanups(Emission);
6627 }
6628 }
6629 }
6630 }
6631 }
6632 }
6633 if (ThreadLimitClause)
6634 CheckForConstExpr(ThreadLimitClause->getThreadLimit().front(),
6635 ThreadLimitExpr);
6636 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6637 if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
6638 !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
6639 CS = Dir->getInnermostCapturedStmt();
6641 CGF.getContext(), CS->getCapturedStmt());
6642 Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6643 }
6644 if (Dir && isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6645 CS = Dir->getInnermostCapturedStmt();
6646 getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6647 } else if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
6648 return ReturnSequential();
6649 }
6650 return NT;
6651 }
6652 case OMPD_target_teams: {
6653 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6654 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6655 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6656 CheckForConstExpr(ThreadLimitClause->getThreadLimit().front(),
6657 ThreadLimitExpr);
6658 }
6659 const CapturedStmt *CS = D.getInnermostCapturedStmt();
6660 getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6662 CGF.getContext(), CS->getCapturedStmt());
6663 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6664 if (Dir->getDirectiveKind() == OMPD_distribute) {
6665 CS = Dir->getInnermostCapturedStmt();
6666 getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6667 }
6668 }
6669 return NT;
6670 }
6671 case OMPD_target_teams_distribute:
6672 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6673 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6674 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6675 CheckForConstExpr(ThreadLimitClause->getThreadLimit().front(),
6676 ThreadLimitExpr);
6677 }
6678 getNumThreads(CGF, D.getInnermostCapturedStmt(), NTPtr, UpperBound,
6679 UpperBoundOnly, CondVal);
6680 return NT;
6681 case OMPD_target_teams_loop:
6682 case OMPD_target_parallel_loop:
6683 case OMPD_target_parallel:
6684 case OMPD_target_parallel_for:
6685 case OMPD_target_parallel_for_simd:
6686 case OMPD_target_teams_distribute_parallel_for:
6687 case OMPD_target_teams_distribute_parallel_for_simd: {
6688 if (CondVal && D.hasClausesOfKind<OMPIfClause>()) {
6689 const OMPIfClause *IfClause = nullptr;
6690 for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
6691 if (C->getNameModifier() == OMPD_unknown ||
6692 C->getNameModifier() == OMPD_parallel) {
6693 IfClause = C;
6694 break;
6695 }
6696 }
6697 if (IfClause) {
6698 const Expr *Cond = IfClause->getCondition();
6699 bool Result;
6700 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6701 if (!Result)
6702 return ReturnSequential();
6703 } else {
6705 *CondVal = CGF.EvaluateExprAsBool(Cond);
6706 }
6707 }
6708 }
6709 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6710 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6711 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6712 CheckForConstExpr(ThreadLimitClause->getThreadLimit().front(),
6713 ThreadLimitExpr);
6714 }
6715 if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
6716 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
6717 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6718 CheckForConstExpr(NumThreadsClause->getNumThreads(), nullptr);
6719 return NumThreadsClause->getNumThreads();
6720 }
6721 return NT;
6722 }
6723 case OMPD_target_teams_distribute_simd:
6724 case OMPD_target_simd:
6725 return ReturnSequential();
6726 default:
6727 break;
6728 }
6729 llvm_unreachable("Unsupported directive kind.");
6730}
6731
6734 llvm::Value *NumThreadsVal = nullptr;
6735 llvm::Value *CondVal = nullptr;
6736 llvm::Value *ThreadLimitVal = nullptr;
6737 const Expr *ThreadLimitExpr = nullptr;
6738 int32_t UpperBound = -1;
6739
6741 CGF, D, UpperBound, /* UpperBoundOnly */ false, &CondVal,
6742 &ThreadLimitExpr);
6743
6744 // Thread limit expressions are used below, emit them.
6745 if (ThreadLimitExpr) {
6746 ThreadLimitVal =
6747 CGF.EmitScalarExpr(ThreadLimitExpr, /*IgnoreResultAssign=*/true);
6748 ThreadLimitVal = CGF.Builder.CreateIntCast(ThreadLimitVal, CGF.Int32Ty,
6749 /*isSigned=*/false);
6750 }
6751
6752 // Generate the num teams expression.
6753 if (UpperBound == 1) {
6754 NumThreadsVal = CGF.Builder.getInt32(UpperBound);
6755 } else if (NT) {
6756 NumThreadsVal = CGF.EmitScalarExpr(NT, /*IgnoreResultAssign=*/true);
6757 NumThreadsVal = CGF.Builder.CreateIntCast(NumThreadsVal, CGF.Int32Ty,
6758 /*isSigned=*/false);
6759 } else if (ThreadLimitVal) {
6760 // If we do not have a num threads value but a thread limit, replace the
6761 // former with the latter. We know handled the thread limit expression.
6762 NumThreadsVal = ThreadLimitVal;
6763 ThreadLimitVal = nullptr;
6764 } else {
6765 // Default to "0" which means runtime choice.
6766 assert(!ThreadLimitVal && "Default not applicable with thread limit value");
6767 NumThreadsVal = CGF.Builder.getInt32(0);
6768 }
6769
6770 // Handle if clause. If if clause present, the number of threads is
6771 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6772 if (CondVal) {
6774 NumThreadsVal = CGF.Builder.CreateSelect(CondVal, NumThreadsVal,
6775 CGF.Builder.getInt32(1));
6776 }
6777
6778 // If the thread limit and num teams expression were present, take the
6779 // minimum.
6780 if (ThreadLimitVal) {
6781 NumThreadsVal = CGF.Builder.CreateSelect(
6782 CGF.Builder.CreateICmpULT(ThreadLimitVal, NumThreadsVal),
6783 ThreadLimitVal, NumThreadsVal);
6784 }
6785
6786 return NumThreadsVal;
6787}
6788
6789namespace {
6791
6792// Utility to handle information from clauses associated with a given
6793// construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
6794// It provides a convenient interface to obtain the information and generate
6795// code for that information.
6796class MappableExprsHandler {
6797public:
6798 /// Get the offset of the OMP_MAP_MEMBER_OF field.
6799 static unsigned getFlagMemberOffset() {
6800 unsigned Offset = 0;
6801 for (uint64_t Remain =
6802 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
6803 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
6804 !(Remain & 1); Remain = Remain >> 1)
6805 Offset++;
6806 return Offset;
6807 }
6808
6809 /// Class that holds debugging information for a data mapping to be passed to
6810 /// the runtime library.
6811 class MappingExprInfo {
6812 /// The variable declaration used for the data mapping.
6813 const ValueDecl *MapDecl = nullptr;
6814 /// The original expression used in the map clause, or null if there is
6815 /// none.
6816 const Expr *MapExpr = nullptr;
6817
6818 public:
6819 MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr)
6820 : MapDecl(MapDecl), MapExpr(MapExpr) {}
6821
6822 const ValueDecl *getMapDecl() const { return MapDecl; }
6823 const Expr *getMapExpr() const { return MapExpr; }
6824 };
6825
6826 using DeviceInfoTy = llvm::OpenMPIRBuilder::DeviceInfoTy;
6827 using MapBaseValuesArrayTy = llvm::OpenMPIRBuilder::MapValuesArrayTy;
6828 using MapValuesArrayTy = llvm::OpenMPIRBuilder::MapValuesArrayTy;
6829 using MapFlagsArrayTy = llvm::OpenMPIRBuilder::MapFlagsArrayTy;
6830 using MapDimArrayTy = llvm::OpenMPIRBuilder::MapDimArrayTy;
6831 using MapNonContiguousArrayTy =
6832 llvm::OpenMPIRBuilder::MapNonContiguousArrayTy;
6833 using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>;
6834 using MapValueDeclsArrayTy = SmallVector<const ValueDecl *, 4>;
6835 using MapData =
6838 bool /*IsImplicit*/, const ValueDecl *, const Expr *>;
6839 using MapDataArrayTy = SmallVector<MapData, 4>;
6840
6841 /// This structure contains combined information generated for mappable
6842 /// clauses, including base pointers, pointers, sizes, map types, user-defined
6843 /// mappers, and non-contiguous information.
6844 struct MapCombinedInfoTy : llvm::OpenMPIRBuilder::MapInfosTy {
6845 MapExprsArrayTy Exprs;
6846 MapValueDeclsArrayTy Mappers;
6847 MapValueDeclsArrayTy DevicePtrDecls;
6848
6849 /// Append arrays in \a CurInfo.
6850 void append(MapCombinedInfoTy &CurInfo) {
6851 Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end());
6852 DevicePtrDecls.append(CurInfo.DevicePtrDecls.begin(),
6853 CurInfo.DevicePtrDecls.end());
6854 Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end());
6855 llvm::OpenMPIRBuilder::MapInfosTy::append(CurInfo);
6856 }
6857 };
6858
6859 /// Map between a struct and the its lowest & highest elements which have been
6860 /// mapped.
6861 /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
6862 /// HE(FieldIndex, Pointer)}
6863 struct StructRangeInfoTy {
6864 MapCombinedInfoTy PreliminaryMapData;
6865 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
6866 0, Address::invalid()};
6867 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
6868 0, Address::invalid()};
6871 bool IsArraySection = false;
6872 bool HasCompleteRecord = false;
6873 };
6874
6875private:
6876 /// Kind that defines how a device pointer has to be returned.
6877 struct MapInfo {
6881 ArrayRef<OpenMPMotionModifierKind> MotionModifiers;
6882 bool ReturnDevicePointer = false;
6883 bool IsImplicit = false;
6884 const ValueDecl *Mapper = nullptr;
6885 const Expr *VarRef = nullptr;
6886 bool ForDeviceAddr = false;
6887
6888 MapInfo() = default;
6889 MapInfo(
6891 OpenMPMapClauseKind MapType,
6893 ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
6894 bool ReturnDevicePointer, bool IsImplicit,
6895 const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr,
6896 bool ForDeviceAddr = false)
6897 : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
6898 MotionModifiers(MotionModifiers),
6899 ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit),
6900 Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {}
6901 };
6902
6903 /// If use_device_ptr or use_device_addr is used on a decl which is a struct
6904 /// member and there is no map information about it, then emission of that
6905 /// entry is deferred until the whole struct has been processed.
6906 struct DeferredDevicePtrEntryTy {
6907 const Expr *IE = nullptr;
6908 const ValueDecl *VD = nullptr;
6909 bool ForDeviceAddr = false;
6910
6911 DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD,
6912 bool ForDeviceAddr)
6913 : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {}
6914 };
6915
6916 /// The target directive from where the mappable clauses were extracted. It
6917 /// is either a executable directive or a user-defined mapper directive.
6918 llvm::PointerUnion<const OMPExecutableDirective *,
6919 const OMPDeclareMapperDecl *>
6920 CurDir;
6921
6922 /// Function the directive is being generated for.
6923 CodeGenFunction &CGF;
6924
6925 /// Set of all first private variables in the current directive.
6926 /// bool data is set to true if the variable is implicitly marked as
6927 /// firstprivate, false otherwise.
6928 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
6929
6930 /// Map between device pointer declarations and their expression components.
6931 /// The key value for declarations in 'this' is null.
6932 llvm::DenseMap<
6933 const ValueDecl *,
6935 DevPointersMap;
6936
6937 /// Map between device addr declarations and their expression components.
6938 /// The key value for declarations in 'this' is null.
6939 llvm::DenseMap<
6940 const ValueDecl *,
6942 HasDevAddrsMap;
6943
6944 /// Map between lambda declarations and their map type.
6945 llvm::DenseMap<const ValueDecl *, const OMPMapClause *> LambdasMap;
6946
6947 llvm::Value *getExprTypeSize(const Expr *E) const {
6948 QualType ExprTy = E->getType().getCanonicalType();
6949
6950 // Calculate the size for array shaping expression.
6951 if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) {
6952 llvm::Value *Size =
6953 CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType());
6954 for (const Expr *SE : OAE->getDimensions()) {
6955 llvm::Value *Sz = CGF.EmitScalarExpr(SE);
6956 Sz = CGF.EmitScalarConversion(Sz, SE->getType(),
6957 CGF.getContext().getSizeType(),
6958 SE->getExprLoc());
6959 Size = CGF.Builder.CreateNUWMul(Size, Sz);
6960 }
6961 return Size;
6962 }
6963
6964 // Reference types are ignored for mapping purposes.
6965 if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
6966 ExprTy = RefTy->getPointeeType().getCanonicalType();
6967
6968 // Given that an array section is considered a built-in type, we need to
6969 // do the calculation based on the length of the section instead of relying
6970 // on CGF.getTypeSize(E->getType()).
6971 if (const auto *OAE = dyn_cast<ArraySectionExpr>(E)) {
6973 OAE->getBase()->IgnoreParenImpCasts())
6975
6976 // If there is no length associated with the expression and lower bound is
6977 // not specified too, that means we are using the whole length of the
6978 // base.
6979 if (!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
6980 !OAE->getLowerBound())
6981 return CGF.getTypeSize(BaseTy);
6982
6983 llvm::Value *ElemSize;
6984 if (const auto *PTy = BaseTy->getAs<PointerType>()) {
6985 ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
6986 } else {
6987 const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
6988 assert(ATy && "Expecting array type if not a pointer type.");
6989 ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
6990 }
6991
6992 // If we don't have a length at this point, that is because we have an
6993 // array section with a single element.
6994 if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid())
6995 return ElemSize;
6996
6997 if (const Expr *LenExpr = OAE->getLength()) {
6998 llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr);
6999 LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(),
7000 CGF.getContext().getSizeType(),
7001 LenExpr->getExprLoc());
7002 return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
7003 }
7004 assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7005 OAE->getLowerBound() && "expected array_section[lb:].");
7006 // Size = sizetype - lb * elemtype;
7007 llvm::Value *LengthVal = CGF.getTypeSize(BaseTy);
7008 llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound());
7009 LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(),
7010 CGF.getContext().getSizeType(),
7011 OAE->getLowerBound()->getExprLoc());
7012 LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize);
7013 llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal);
7014 llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal);
7015 LengthVal = CGF.Builder.CreateSelect(
7016 Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0));
7017 return LengthVal;
7018 }
7019 return CGF.getTypeSize(ExprTy);
7020 }
7021
7022 /// Return the corresponding bits for a given map clause modifier. Add
7023 /// a flag marking the map as a pointer if requested. Add a flag marking the
7024 /// map as the first one of a series of maps that relate to the same map
7025 /// expression.
7026 OpenMPOffloadMappingFlags getMapTypeBits(
7028 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit,
7029 bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const {
7030 OpenMPOffloadMappingFlags Bits =
7031 IsImplicit ? OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT
7032 : OpenMPOffloadMappingFlags::OMP_MAP_NONE;
7033 switch (MapType) {
7034 case OMPC_MAP_alloc:
7035 case OMPC_MAP_release:
7036 // alloc and release is the default behavior in the runtime library, i.e.
7037 // if we don't pass any bits alloc/release that is what the runtime is
7038 // going to do. Therefore, we don't need to signal anything for these two
7039 // type modifiers.
7040 break;
7041 case OMPC_MAP_to:
7042 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TO;
7043 break;
7044 case OMPC_MAP_from:
7045 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_FROM;
7046 break;
7047 case OMPC_MAP_tofrom:
7048 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TO |
7049 OpenMPOffloadMappingFlags::OMP_MAP_FROM;
7050 break;
7051 case OMPC_MAP_delete:
7052 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_DELETE;
7053 break;
7054 case OMPC_MAP_unknown:
7055 llvm_unreachable("Unexpected map type!");
7056 }
7057 if (AddPtrFlag)
7058 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
7059 if (AddIsTargetParamFlag)
7060 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
7061 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_always))
7062 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS;
7063 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_close))
7064 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_CLOSE;
7065 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_present) ||
7066 llvm::is_contained(MotionModifiers, OMPC_MOTION_MODIFIER_present))
7067 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT;
7068 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_ompx_hold))
7069 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
7070 if (IsNonContiguous)
7071 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_NON_CONTIG;
7072 return Bits;
7073 }
7074
7075 /// Return true if the provided expression is a final array section. A
7076 /// final array section, is one whose length can't be proved to be one.
7077 bool isFinalArraySectionExpression(const Expr *E) const {
7078 const auto *OASE = dyn_cast<ArraySectionExpr>(E);
7079
7080 // It is not an array section and therefore not a unity-size one.
7081 if (!OASE)
7082 return false;
7083
7084 // An array section with no colon always refer to a single element.
7085 if (OASE->getColonLocFirst().isInvalid())
7086 return false;
7087
7088 const Expr *Length = OASE->getLength();
7089
7090 // If we don't have a length we have to check if the array has size 1
7091 // for this dimension. Also, we should always expect a length if the
7092 // base type is pointer.
7093 if (!Length) {
7095 OASE->getBase()->IgnoreParenImpCasts())
7097 if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
7098 return ATy->getSExtSize() != 1;
7099 // If we don't have a constant dimension length, we have to consider
7100 // the current section as having any size, so it is not necessarily
7101 // unitary. If it happen to be unity size, that's user fault.
7102 return true;
7103 }
7104
7105 // Check if the length evaluates to 1.
7106 Expr::EvalResult Result;
7107 if (!Length->EvaluateAsInt(Result, CGF.getContext()))
7108 return true; // Can have more that size 1.
7109
7110 llvm::APSInt ConstLength = Result.Val.getInt();
7111 return ConstLength.getSExtValue() != 1;
7112 }
7113
7114 /// A helper class to copy structures with overlapped elements, i.e. those
7115 /// which have mappings of both "s" and "s.mem". Consecutive elements that
7116 /// are not explicitly copied have mapping nodes synthesized for them,
7117 /// taking care to avoid generating zero-sized copies.
7118 class CopyOverlappedEntryGaps {
7119 CodeGenFunction &CGF;
7120 MapCombinedInfoTy &CombinedInfo;
7121 OpenMPOffloadMappingFlags Flags = OpenMPOffloadMappingFlags::OMP_MAP_NONE;
7122 const ValueDecl *MapDecl = nullptr;
7123 const Expr *MapExpr = nullptr;
7125 bool IsNonContiguous = false;
7126 uint64_t DimSize = 0;
7127 // These elements track the position as the struct is iterated over
7128 // (in order of increasing element address).
7129 const RecordDecl *LastParent = nullptr;
7130 uint64_t Cursor = 0;
7131 unsigned LastIndex = -1u;
7133
7134 public:
7135 CopyOverlappedEntryGaps(CodeGenFunction &CGF,
7136 MapCombinedInfoTy &CombinedInfo,
7137 OpenMPOffloadMappingFlags Flags,
7138 const ValueDecl *MapDecl, const Expr *MapExpr,
7139 Address BP, Address LB, bool IsNonContiguous,
7140 uint64_t DimSize)
7141 : CGF(CGF), CombinedInfo(CombinedInfo), Flags(Flags), MapDecl(MapDecl),
7142 MapExpr(MapExpr), BP(BP), IsNonContiguous(IsNonContiguous),
7143 DimSize(DimSize), LB(LB) {}
7144
7145 void processField(
7147 const FieldDecl *FD,
7148 llvm::function_ref<LValue(CodeGenFunction &, const MemberExpr *)>
7149 EmitMemberExprBase) {
7150 const RecordDecl *RD = FD->getParent();
7151 const ASTRecordLayout &RL = CGF.getContext().getASTRecordLayout(RD);
7152 uint64_t FieldOffset = RL.getFieldOffset(FD->getFieldIndex());
7153 uint64_t FieldSize =
7155 Address ComponentLB = Address::invalid();
7156
7157 if (FD->getType()->isLValueReferenceType()) {
7158 const auto *ME = cast<MemberExpr>(MC.getAssociatedExpression());
7159 LValue BaseLVal = EmitMemberExprBase(CGF, ME);
7160 ComponentLB =
7161 CGF.EmitLValueForFieldInitialization(BaseLVal, FD).getAddress();
7162 } else {
7163 ComponentLB =
7165 }
7166
7167 if (!LastParent)
7168 LastParent = RD;
7169 if (FD->getParent() == LastParent) {
7170 if (FD->getFieldIndex() != LastIndex + 1)
7171 copyUntilField(FD, ComponentLB);
7172 } else {
7173 LastParent = FD->getParent();
7174 if (((int64_t)FieldOffset - (int64_t)Cursor) > 0)
7175 copyUntilField(FD, ComponentLB);
7176 }
7177 Cursor = FieldOffset + FieldSize;
7178 LastIndex = FD->getFieldIndex();
7179 LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
7180 }
7181
7182 void copyUntilField(const FieldDecl *FD, Address ComponentLB) {
7183 llvm::Value *ComponentLBPtr = ComponentLB.emitRawPointer(CGF);
7184 llvm::Value *LBPtr = LB.emitRawPointer(CGF);
7185 llvm::Value *Size =
7186 CGF.Builder.CreatePtrDiff(CGF.Int8Ty, ComponentLBPtr, LBPtr);
7187 copySizedChunk(LBPtr, Size);
7188 }
7189
7190 void copyUntilEnd(Address HB) {
7191 if (LastParent) {
7192 const ASTRecordLayout &RL =
7193 CGF.getContext().getASTRecordLayout(LastParent);
7194 if ((uint64_t)CGF.getContext().toBits(RL.getSize()) <= Cursor)
7195 return;
7196 }
7197 llvm::Value *LBPtr = LB.emitRawPointer(CGF);
7198 llvm::Value *Size = CGF.Builder.CreatePtrDiff(
7199 CGF.Int8Ty, CGF.Builder.CreateConstGEP(HB, 1).emitRawPointer(CGF),
7200 LBPtr);
7201 copySizedChunk(LBPtr, Size);
7202 }
7203
7204 void copySizedChunk(llvm::Value *Base, llvm::Value *Size) {
7205 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7206 CombinedInfo.BasePointers.push_back(BP.emitRawPointer(CGF));
7207 CombinedInfo.DevicePtrDecls.push_back(nullptr);
7208 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
7209 CombinedInfo.Pointers.push_back(Base);
7210 CombinedInfo.Sizes.push_back(
7211 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7212 CombinedInfo.Types.push_back(Flags);
7213 CombinedInfo.Mappers.push_back(nullptr);
7214 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize : 1);
7215 }
7216 };
7217
7218 /// Generate the base pointers, section pointers, sizes, map type bits, and
7219 /// user-defined mappers (all included in \a CombinedInfo) for the provided
7220 /// map type, map or motion modifiers, and expression components.
7221 /// \a IsFirstComponent should be set to true if the provided set of
7222 /// components is the first associated with a capture.
7223 void generateInfoForComponentList(
7225 ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7227 MapCombinedInfoTy &CombinedInfo,
7228 MapCombinedInfoTy &StructBaseCombinedInfo,
7229 StructRangeInfoTy &PartialStruct, bool IsFirstComponentList,
7230 bool IsImplicit, bool GenerateAllInfoForClauses,
7231 const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false,
7232 const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr,
7234 OverlappedElements = {},
7235 bool AreBothBasePtrAndPteeMapped = false) const {
7236 // The following summarizes what has to be generated for each map and the
7237 // types below. The generated information is expressed in this order:
7238 // base pointer, section pointer, size, flags
7239 // (to add to the ones that come from the map type and modifier).
7240 //
7241 // double d;
7242 // int i[100];
7243 // float *p;
7244 // int **a = &i;
7245 //
7246 // struct S1 {
7247 // int i;
7248 // float f[50];
7249 // }
7250 // struct S2 {
7251 // int i;
7252 // float f[50];
7253 // S1 s;
7254 // double *p;
7255 // struct S2 *ps;
7256 // int &ref;
7257 // }
7258 // S2 s;
7259 // S2 *ps;
7260 //
7261 // map(d)
7262 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
7263 //
7264 // map(i)
7265 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
7266 //
7267 // map(i[1:23])
7268 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
7269 //
7270 // map(p)
7271 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
7272 //
7273 // map(p[1:24])
7274 // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ
7275 // in unified shared memory mode or for local pointers
7276 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
7277 //
7278 // map((*a)[0:3])
7279 // &(*a), &(*a), sizeof(pointer), TARGET_PARAM | TO | FROM
7280 // &(*a), &(*a)[0], 3*sizeof(int), PTR_AND_OBJ | TO | FROM
7281 //
7282 // map(**a)
7283 // &(*a), &(*a), sizeof(pointer), TARGET_PARAM | TO | FROM
7284 // &(*a), &(**a), sizeof(int), PTR_AND_OBJ | TO | FROM
7285 //
7286 // map(s)
7287 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
7288 //
7289 // map(s.i)
7290 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
7291 //
7292 // map(s.s.f)
7293 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7294 //
7295 // map(s.p)
7296 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
7297 //
7298 // map(to: s.p[:22])
7299 // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
7300 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
7301 // &(s.p), &(s.p[0]), 22*sizeof(double),
7302 // MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7303 // (*) alloc space for struct members, only this is a target parameter
7304 // (**) map the pointer (nothing to be mapped in this example) (the compiler
7305 // optimizes this entry out, same in the examples below)
7306 // (***) map the pointee (map: to)
7307 //
7308 // map(to: s.ref)
7309 // &s, &(s.ref), sizeof(int*), TARGET_PARAM (*)
7310 // &s, &(s.ref), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7311 // (*) alloc space for struct members, only this is a target parameter
7312 // (**) map the pointer (nothing to be mapped in this example) (the compiler
7313 // optimizes this entry out, same in the examples below)
7314 // (***) map the pointee (map: to)
7315 //
7316 // map(s.ps)
7317 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7318 //
7319 // map(from: s.ps->s.i)
7320 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7321 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7322 // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7323 //
7324 // map(to: s.ps->ps)
7325 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7326 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7327 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO
7328 //
7329 // map(s.ps->ps->ps)
7330 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7331 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7332 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7333 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7334 //
7335 // map(to: s.ps->ps->s.f[:22])
7336 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7337 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7338 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7339 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7340 //
7341 // map(ps)
7342 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
7343 //
7344 // map(ps->i)
7345 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
7346 //
7347 // map(ps->s.f)
7348 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7349 //
7350 // map(from: ps->p)
7351 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
7352 //
7353 // map(to: ps->p[:22])
7354 // ps, &(ps->p), sizeof(double*), TARGET_PARAM
7355 // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
7356 // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
7357 //
7358 // map(ps->ps)
7359 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7360 //
7361 // map(from: ps->ps->s.i)
7362 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7363 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7364 // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7365 //
7366 // map(from: ps->ps->ps)
7367 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7368 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7369 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7370 //
7371 // map(ps->ps->ps->ps)
7372 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7373 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7374 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7375 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7376 //
7377 // map(to: ps->ps->ps->s.f[:22])
7378 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7379 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7380 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7381 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7382 //
7383 // map(to: s.f[:22]) map(from: s.p[:33])
7384 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
7385 // sizeof(double*) (**), TARGET_PARAM
7386 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
7387 // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
7388 // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7389 // (*) allocate contiguous space needed to fit all mapped members even if
7390 // we allocate space for members not mapped (in this example,
7391 // s.f[22..49] and s.s are not mapped, yet we must allocate space for
7392 // them as well because they fall between &s.f[0] and &s.p)
7393 //
7394 // map(from: s.f[:22]) map(to: ps->p[:33])
7395 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
7396 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7397 // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
7398 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
7399 // (*) the struct this entry pertains to is the 2nd element in the list of
7400 // arguments, hence MEMBER_OF(2)
7401 //
7402 // map(from: s.f[:22], s.s) map(to: ps->p[:33])
7403 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
7404 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7405 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7406 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7407 // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
7408 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
7409 // (*) the struct this entry pertains to is the 4th element in the list
7410 // of arguments, hence MEMBER_OF(4)
7411 //
7412 // map(p, p[:100])
7413 // ===> map(p[:100])
7414 // &p, &p[0], 100*sizeof(float), TARGET_PARAM | PTR_AND_OBJ | TO | FROM
7415
7416 // Track if the map information being generated is the first for a capture.
7417 bool IsCaptureFirstInfo = IsFirstComponentList;
7418 // When the variable is on a declare target link or in a to clause with
7419 // unified memory, a reference is needed to hold the host/device address
7420 // of the variable.
7421 bool RequiresReference = false;
7422
7423 // Scan the components from the base to the complete expression.
7424 auto CI = Components.rbegin();
7425 auto CE = Components.rend();
7426 auto I = CI;
7427
7428 // Track if the map information being generated is the first for a list of
7429 // components.
7430 bool IsExpressionFirstInfo = true;
7431 bool FirstPointerInComplexData = false;
7433 const Expr *AssocExpr = I->getAssociatedExpression();
7434 const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
7435 const auto *OASE = dyn_cast<ArraySectionExpr>(AssocExpr);
7436 const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr);
7437
7438 if (AreBothBasePtrAndPteeMapped && std::next(I) == CE)
7439 return;
7440 if (isa<MemberExpr>(AssocExpr)) {
7441 // The base is the 'this' pointer. The content of the pointer is going
7442 // to be the base of the field being mapped.
7443 BP = CGF.LoadCXXThisAddress();
7444 } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
7445 (OASE &&
7446 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
7447 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress();
7448 } else if (OAShE &&
7449 isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) {
7450 BP = Address(
7451 CGF.EmitScalarExpr(OAShE->getBase()),
7452 CGF.ConvertTypeForMem(OAShE->getBase()->getType()->getPointeeType()),
7453 CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType()));
7454 } else {
7455 // The base is the reference to the variable.
7456 // BP = &Var.
7457 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress();
7458 if (const auto *VD =
7459 dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
7460 if (std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7461 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
7462 if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
7463 ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
7464 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
7466 RequiresReference = true;
7468 }
7469 }
7470 }
7471
7472 // If the variable is a pointer and is being dereferenced (i.e. is not
7473 // the last component), the base has to be the pointer itself, not its
7474 // reference. References are ignored for mapping purposes.
7475 QualType Ty =
7476 I->getAssociatedDeclaration()->getType().getNonReferenceType();
7477 if (Ty->isAnyPointerType() && std::next(I) != CE) {
7478 // No need to generate individual map information for the pointer, it
7479 // can be associated with the combined storage if shared memory mode is
7480 // active or the base declaration is not global variable.
7481 const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration());
7482 if (!AreBothBasePtrAndPteeMapped &&
7484 !VD || VD->hasLocalStorage()))
7485 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7486 else
7487 FirstPointerInComplexData = true;
7488 ++I;
7489 }
7490 }
7491
7492 // Track whether a component of the list should be marked as MEMBER_OF some
7493 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7494 // in a component list should be marked as MEMBER_OF, all subsequent entries
7495 // do not belong to the base struct. E.g.
7496 // struct S2 s;
7497 // s.ps->ps->ps->f[:]
7498 // (1) (2) (3) (4)
7499 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7500 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7501 // is the pointee of ps(2) which is not member of struct s, so it should not
7502 // be marked as such (it is still PTR_AND_OBJ).
7503 // The variable is initialized to false so that PTR_AND_OBJ entries which
7504 // are not struct members are not considered (e.g. array of pointers to
7505 // data).
7506 bool ShouldBeMemberOf = false;
7507
7508 // Variable keeping track of whether or not we have encountered a component
7509 // in the component list which is a member expression. Useful when we have a
7510 // pointer or a final array section, in which case it is the previous
7511 // component in the list which tells us whether we have a member expression.
7512 // E.g. X.f[:]
7513 // While processing the final array section "[:]" it is "f" which tells us
7514 // whether we are dealing with a member of a declared struct.
7515 const MemberExpr *EncounteredME = nullptr;
7516
7517 // Track for the total number of dimension. Start from one for the dummy
7518 // dimension.
7519 uint64_t DimSize = 1;
7520
7521 // Detects non-contiguous updates due to strided accesses.
7522 // Sets the 'IsNonContiguous' flag so that the 'MapType' bits are set
7523 // correctly when generating information to be passed to the runtime. The
7524 // flag is set to true if any array section has a stride not equal to 1, or
7525 // if the stride is not a constant expression (conservatively assumed
7526 // non-contiguous).
7527 bool IsNonContiguous =
7528 CombinedInfo.NonContigInfo.IsNonContiguous ||
7529 any_of(Components, [&](const auto &Component) {
7530 const auto *OASE =
7531 dyn_cast<ArraySectionExpr>(Component.getAssociatedExpression());
7532 if (!OASE)
7533 return false;
7534
7535 const Expr *StrideExpr = OASE->getStride();
7536 if (!StrideExpr)
7537 return false;
7538
7539 const auto Constant =
7540 StrideExpr->getIntegerConstantExpr(CGF.getContext());
7541 if (!Constant)
7542 return false;
7543
7544 return !Constant->isOne();
7545 });
7546
7547 bool IsPrevMemberReference = false;
7548
7549 bool IsPartialMapped =
7550 !PartialStruct.PreliminaryMapData.BasePointers.empty();
7551
7552 // We need to check if we will be encountering any MEs. If we do not
7553 // encounter any ME expression it means we will be mapping the whole struct.
7554 // In that case we need to skip adding an entry for the struct to the
7555 // CombinedInfo list and instead add an entry to the StructBaseCombinedInfo
7556 // list only when generating all info for clauses.
7557 bool IsMappingWholeStruct = true;
7558 if (!GenerateAllInfoForClauses) {
7559 IsMappingWholeStruct = false;
7560 } else {
7561 for (auto TempI = I; TempI != CE; ++TempI) {
7562 const MemberExpr *PossibleME =
7563 dyn_cast<MemberExpr>(TempI->getAssociatedExpression());
7564 if (PossibleME) {
7565 IsMappingWholeStruct = false;
7566 break;
7567 }
7568 }
7569 }
7570
7571 for (; I != CE; ++I) {
7572 // If the current component is member of a struct (parent struct) mark it.
7573 if (!EncounteredME) {
7574 EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
7575 // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7576 // as MEMBER_OF the parent struct.
7577 if (EncounteredME) {
7578 ShouldBeMemberOf = true;
7579 // Do not emit as complex pointer if this is actually not array-like
7580 // expression.
7581 if (FirstPointerInComplexData) {
7582 QualType Ty = std::prev(I)
7583 ->getAssociatedDeclaration()
7584 ->getType()
7585 .getNonReferenceType();
7586 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7587 FirstPointerInComplexData = false;
7588 }
7589 }
7590 }
7591
7592 auto Next = std::next(I);
7593
7594 // We need to generate the addresses and sizes if this is the last
7595 // component, if the component is a pointer or if it is an array section
7596 // whose length can't be proved to be one. If this is a pointer, it
7597 // becomes the base address for the following components.
7598
7599 // A final array section, is one whose length can't be proved to be one.
7600 // If the map item is non-contiguous then we don't treat any array section
7601 // as final array section.
7602 bool IsFinalArraySection =
7603 !IsNonContiguous &&
7604 isFinalArraySectionExpression(I->getAssociatedExpression());
7605
7606 // If we have a declaration for the mapping use that, otherwise use
7607 // the base declaration of the map clause.
7608 const ValueDecl *MapDecl = (I->getAssociatedDeclaration())
7609 ? I->getAssociatedDeclaration()
7610 : BaseDecl;
7611 MapExpr = (I->getAssociatedExpression()) ? I->getAssociatedExpression()
7612 : MapExpr;
7613
7614 // Get information on whether the element is a pointer. Have to do a
7615 // special treatment for array sections given that they are built-in
7616 // types.
7617 const auto *OASE =
7618 dyn_cast<ArraySectionExpr>(I->getAssociatedExpression());
7619 const auto *OAShE =
7620 dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression());
7621 const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression());
7622 const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression());
7623 bool IsPointer =
7624 OAShE ||
7627 ->isAnyPointerType()) ||
7628 I->getAssociatedExpression()->getType()->isAnyPointerType();
7629 bool IsMemberReference = isa<MemberExpr>(I->getAssociatedExpression()) &&
7630 MapDecl &&
7631 MapDecl->getType()->isLValueReferenceType();
7632 bool IsNonDerefPointer = IsPointer &&
7633 !(UO && UO->getOpcode() != UO_Deref) && !BO &&
7634 !IsNonContiguous;
7635
7636 if (OASE)
7637 ++DimSize;
7638
7639 if (Next == CE || IsMemberReference || IsNonDerefPointer ||
7640 IsFinalArraySection) {
7641 // If this is not the last component, we expect the pointer to be
7642 // associated with an array expression or member expression.
7643 assert((Next == CE ||
7644 isa<MemberExpr>(Next->getAssociatedExpression()) ||
7645 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
7646 isa<ArraySectionExpr>(Next->getAssociatedExpression()) ||
7647 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) ||
7648 isa<UnaryOperator>(Next->getAssociatedExpression()) ||
7649 isa<BinaryOperator>(Next->getAssociatedExpression())) &&
7650 "Unexpected expression");
7651
7653 Address LowestElem = Address::invalid();
7654 auto &&EmitMemberExprBase = [](CodeGenFunction &CGF,
7655 const MemberExpr *E) {
7656 const Expr *BaseExpr = E->getBase();
7657 // If this is s.x, emit s as an lvalue. If it is s->x, emit s as a
7658 // scalar.
7659 LValue BaseLV;
7660 if (E->isArrow()) {
7661 LValueBaseInfo BaseInfo;
7662 TBAAAccessInfo TBAAInfo;
7663 Address Addr =
7664 CGF.EmitPointerWithAlignment(BaseExpr, &BaseInfo, &TBAAInfo);
7665 QualType PtrTy = BaseExpr->getType()->getPointeeType();
7666 BaseLV = CGF.MakeAddrLValue(Addr, PtrTy, BaseInfo, TBAAInfo);
7667 } else {
7668 BaseLV = CGF.EmitOMPSharedLValue(BaseExpr);
7669 }
7670 return BaseLV;
7671 };
7672 if (OAShE) {
7673 LowestElem = LB =
7674 Address(CGF.EmitScalarExpr(OAShE->getBase()),
7676 OAShE->getBase()->getType()->getPointeeType()),
7678 OAShE->getBase()->getType()));
7679 } else if (IsMemberReference) {
7680 const auto *ME = cast<MemberExpr>(I->getAssociatedExpression());
7681 LValue BaseLVal = EmitMemberExprBase(CGF, ME);
7682 LowestElem = CGF.EmitLValueForFieldInitialization(
7683 BaseLVal, cast<FieldDecl>(MapDecl))
7684 .getAddress();
7685 LB = CGF.EmitLoadOfReferenceLValue(LowestElem, MapDecl->getType())
7686 .getAddress();
7687 } else {
7688 LowestElem = LB =
7689 CGF.EmitOMPSharedLValue(I->getAssociatedExpression())
7690 .getAddress();
7691 }
7692
7693 // If this component is a pointer inside the base struct then we don't
7694 // need to create any entry for it - it will be combined with the object
7695 // it is pointing to into a single PTR_AND_OBJ entry.
7696 bool IsMemberPointerOrAddr =
7697 EncounteredME &&
7698 (((IsPointer || ForDeviceAddr) &&
7699 I->getAssociatedExpression() == EncounteredME) ||
7700 (IsPrevMemberReference && !IsPointer) ||
7701 (IsMemberReference && Next != CE &&
7702 !Next->getAssociatedExpression()->getType()->isPointerType()));
7703 if (!OverlappedElements.empty() && Next == CE) {
7704 // Handle base element with the info for overlapped elements.
7705 assert(!PartialStruct.Base.isValid() && "The base element is set.");
7706 assert(!IsPointer &&
7707 "Unexpected base element with the pointer type.");
7708 // Mark the whole struct as the struct that requires allocation on the
7709 // device.
7710 PartialStruct.LowestElem = {0, LowestElem};
7711 CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
7712 I->getAssociatedExpression()->getType());
7715 LowestElem, CGF.VoidPtrTy, CGF.Int8Ty),
7716 TypeSize.getQuantity() - 1);
7717 PartialStruct.HighestElem = {
7718 std::numeric_limits<decltype(
7719 PartialStruct.HighestElem.first)>::max(),
7720 HB};
7721 PartialStruct.Base = BP;
7722 PartialStruct.LB = LB;
7723 assert(
7724 PartialStruct.PreliminaryMapData.BasePointers.empty() &&
7725 "Overlapped elements must be used only once for the variable.");
7726 std::swap(PartialStruct.PreliminaryMapData, CombinedInfo);
7727 // Emit data for non-overlapped data.
7728 OpenMPOffloadMappingFlags Flags =
7729 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
7730 getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
7731 /*AddPtrFlag=*/false,
7732 /*AddIsTargetParamFlag=*/false, IsNonContiguous);
7733 CopyOverlappedEntryGaps CopyGaps(CGF, CombinedInfo, Flags, MapDecl,
7734 MapExpr, BP, LB, IsNonContiguous,
7735 DimSize);
7736 // Do bitcopy of all non-overlapped structure elements.
7738 Component : OverlappedElements) {
7740 Component) {
7741 if (const ValueDecl *VD = MC.getAssociatedDeclaration()) {
7742 if (const auto *FD = dyn_cast<FieldDecl>(VD)) {
7743 CopyGaps.processField(MC, FD, EmitMemberExprBase);
7744 }
7745 }
7746 }
7747 }
7748 CopyGaps.copyUntilEnd(HB);
7749 break;
7750 }
7751 llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
7752 // Skip adding an entry in the CurInfo of this combined entry if the
7753 // whole struct is currently being mapped. The struct needs to be added
7754 // in the first position before any data internal to the struct is being
7755 // mapped.
7756 // Skip adding an entry in the CurInfo of this combined entry if the
7757 // PartialStruct.PreliminaryMapData.BasePointers has been mapped.
7758 if ((!IsMemberPointerOrAddr && !IsPartialMapped) ||
7759 (Next == CE && MapType != OMPC_MAP_unknown)) {
7760 if (!IsMappingWholeStruct) {
7761 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7762 CombinedInfo.BasePointers.push_back(BP.emitRawPointer(CGF));
7763 CombinedInfo.DevicePtrDecls.push_back(nullptr);
7764 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
7765 CombinedInfo.Pointers.push_back(LB.emitRawPointer(CGF));
7766 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
7767 Size, CGF.Int64Ty, /*isSigned=*/true));
7768 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7769 : 1);
7770 } else {
7771 StructBaseCombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7772 StructBaseCombinedInfo.BasePointers.push_back(
7773 BP.emitRawPointer(CGF));
7774 StructBaseCombinedInfo.DevicePtrDecls.push_back(nullptr);
7775 StructBaseCombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
7776 StructBaseCombinedInfo.Pointers.push_back(LB.emitRawPointer(CGF));
7777 StructBaseCombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
7778 Size, CGF.Int64Ty, /*isSigned=*/true));
7779 StructBaseCombinedInfo.NonContigInfo.Dims.push_back(
7780 IsNonContiguous ? DimSize : 1);
7781 }
7782
7783 // If Mapper is valid, the last component inherits the mapper.
7784 bool HasMapper = Mapper && Next == CE;
7785 if (!IsMappingWholeStruct)
7786 CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr);
7787 else
7788 StructBaseCombinedInfo.Mappers.push_back(HasMapper ? Mapper
7789 : nullptr);
7790
7791 // We need to add a pointer flag for each map that comes from the
7792 // same expression except for the first one. We also need to signal
7793 // this map is the first one that relates with the current capture
7794 // (there is a set of entries for each capture).
7795 OpenMPOffloadMappingFlags Flags =
7796 getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
7797 !IsExpressionFirstInfo || RequiresReference ||
7798 FirstPointerInComplexData || IsMemberReference,
7799 AreBothBasePtrAndPteeMapped ||
7800 (IsCaptureFirstInfo && !RequiresReference),
7801 IsNonContiguous);
7802
7803 if (!IsExpressionFirstInfo || IsMemberReference) {
7804 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
7805 // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
7806 if (IsPointer || (IsMemberReference && Next != CE))
7807 Flags &= ~(OpenMPOffloadMappingFlags::OMP_MAP_TO |
7808 OpenMPOffloadMappingFlags::OMP_MAP_FROM |
7809 OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS |
7810 OpenMPOffloadMappingFlags::OMP_MAP_DELETE |
7811 OpenMPOffloadMappingFlags::OMP_MAP_CLOSE);
7812
7813 if (ShouldBeMemberOf) {
7814 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
7815 // should be later updated with the correct value of MEMBER_OF.
7816 Flags |= OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF;
7817 // From now on, all subsequent PTR_AND_OBJ entries should not be
7818 // marked as MEMBER_OF.
7819 ShouldBeMemberOf = false;
7820 }
7821 }
7822
7823 if (!IsMappingWholeStruct)
7824 CombinedInfo.Types.push_back(Flags);
7825 else
7826 StructBaseCombinedInfo.Types.push_back(Flags);
7827 }
7828
7829 // If we have encountered a member expression so far, keep track of the
7830 // mapped member. If the parent is "*this", then the value declaration
7831 // is nullptr.
7832 if (EncounteredME) {
7833 const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl());
7834 unsigned FieldIndex = FD->getFieldIndex();
7835
7836 // Update info about the lowest and highest elements for this struct
7837 if (!PartialStruct.Base.isValid()) {
7838 PartialStruct.LowestElem = {FieldIndex, LowestElem};
7839 if (IsFinalArraySection && OASE) {
7840 Address HB =
7841 CGF.EmitArraySectionExpr(OASE, /*IsLowerBound=*/false)
7842 .getAddress();
7843 PartialStruct.HighestElem = {FieldIndex, HB};
7844 } else {
7845 PartialStruct.HighestElem = {FieldIndex, LowestElem};
7846 }
7847 PartialStruct.Base = BP;
7848 PartialStruct.LB = BP;
7849 } else if (FieldIndex < PartialStruct.LowestElem.first) {
7850 PartialStruct.LowestElem = {FieldIndex, LowestElem};
7851 } else if (FieldIndex > PartialStruct.HighestElem.first) {
7852 if (IsFinalArraySection && OASE) {
7853 Address HB =
7854 CGF.EmitArraySectionExpr(OASE, /*IsLowerBound=*/false)
7855 .getAddress();
7856 PartialStruct.HighestElem = {FieldIndex, HB};
7857 } else {
7858 PartialStruct.HighestElem = {FieldIndex, LowestElem};
7859 }
7860 }
7861 }
7862
7863 // Need to emit combined struct for array sections.
7864 if (IsFinalArraySection || IsNonContiguous)
7865 PartialStruct.IsArraySection = true;
7866
7867 // If we have a final array section, we are done with this expression.
7868 if (IsFinalArraySection)
7869 break;
7870
7871 // The pointer becomes the base for the next element.
7872 if (Next != CE)
7873 BP = IsMemberReference ? LowestElem : LB;
7874 if (!IsPartialMapped)
7875 IsExpressionFirstInfo = false;
7876 IsCaptureFirstInfo = false;
7877 FirstPointerInComplexData = false;
7878 IsPrevMemberReference = IsMemberReference;
7879 } else if (FirstPointerInComplexData) {
7880 QualType Ty = Components.rbegin()
7881 ->getAssociatedDeclaration()
7882 ->getType()
7883 .getNonReferenceType();
7884 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7885 FirstPointerInComplexData = false;
7886 }
7887 }
7888 // If ran into the whole component - allocate the space for the whole
7889 // record.
7890 if (!EncounteredME)
7891 PartialStruct.HasCompleteRecord = true;
7892
7893 if (!IsNonContiguous)
7894 return;
7895
7896 const ASTContext &Context = CGF.getContext();
7897
7898 // For supporting stride in array section, we need to initialize the first
7899 // dimension size as 1, first offset as 0, and first count as 1
7900 MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)};
7901 MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
7902 MapValuesArrayTy CurStrides;
7903 MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
7904 uint64_t ElementTypeSize;
7905
7906 // Collect Size information for each dimension and get the element size as
7907 // the first Stride. For example, for `int arr[10][10]`, the DimSizes
7908 // should be [10, 10] and the first stride is 4 btyes.
7910 Components) {
7911 const Expr *AssocExpr = Component.getAssociatedExpression();
7912 const auto *OASE = dyn_cast<ArraySectionExpr>(AssocExpr);
7913
7914 if (!OASE)
7915 continue;
7916
7917 QualType Ty = ArraySectionExpr::getBaseOriginalType(OASE->getBase());
7918 auto *CAT = Context.getAsConstantArrayType(Ty);
7919 auto *VAT = Context.getAsVariableArrayType(Ty);
7920
7921 // We need all the dimension size except for the last dimension.
7922 assert((VAT || CAT || &Component == &*Components.begin()) &&
7923 "Should be either ConstantArray or VariableArray if not the "
7924 "first Component");
7925
7926 // Get element size if CurStrides is empty.
7927 if (CurStrides.empty()) {
7928 const Type *ElementType = nullptr;
7929 if (CAT)
7930 ElementType = CAT->getElementType().getTypePtr();
7931 else if (VAT)
7932 ElementType = VAT->getElementType().getTypePtr();
7933 else
7934 assert(&Component == &*Components.begin() &&
7935 "Only expect pointer (non CAT or VAT) when this is the "
7936 "first Component");
7937 // If ElementType is null, then it means the base is a pointer
7938 // (neither CAT nor VAT) and we'll attempt to get ElementType again
7939 // for next iteration.
7940 if (ElementType) {
7941 // For the case that having pointer as base, we need to remove one
7942 // level of indirection.
7943 if (&Component != &*Components.begin())
7944 ElementType = ElementType->getPointeeOrArrayElementType();
7945 ElementTypeSize =
7946 Context.getTypeSizeInChars(ElementType).getQuantity();
7947 CurStrides.push_back(
7948 llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize));
7949 }
7950 }
7951 // Get dimension value except for the last dimension since we don't need
7952 // it.
7953 if (DimSizes.size() < Components.size() - 1) {
7954 if (CAT)
7955 DimSizes.push_back(
7956 llvm::ConstantInt::get(CGF.Int64Ty, CAT->getZExtSize()));
7957 else if (VAT)
7958 DimSizes.push_back(CGF.Builder.CreateIntCast(
7959 CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty,
7960 /*IsSigned=*/false));
7961 }
7962 }
7963
7964 // Skip the dummy dimension since we have already have its information.
7965 auto *DI = DimSizes.begin() + 1;
7966 // Product of dimension.
7967 llvm::Value *DimProd =
7968 llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize);
7969
7970 // Collect info for non-contiguous. Notice that offset, count, and stride
7971 // are only meaningful for array-section, so we insert a null for anything
7972 // other than array-section.
7973 // Also, the size of offset, count, and stride are not the same as
7974 // pointers, base_pointers, sizes, or dims. Instead, the size of offset,
7975 // count, and stride are the same as the number of non-contiguous
7976 // declaration in target update to/from clause.
7978 Components) {
7979 const Expr *AssocExpr = Component.getAssociatedExpression();
7980
7981 if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) {
7982 llvm::Value *Offset = CGF.Builder.CreateIntCast(
7983 CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty,
7984 /*isSigned=*/false);
7985 CurOffsets.push_back(Offset);
7986 CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1));
7987 CurStrides.push_back(CurStrides.back());
7988 continue;
7989 }
7990
7991 const auto *OASE = dyn_cast<ArraySectionExpr>(AssocExpr);
7992
7993 if (!OASE)
7994 continue;
7995
7996 // Offset
7997 const Expr *OffsetExpr = OASE->getLowerBound();
7998 llvm::Value *Offset = nullptr;
7999 if (!OffsetExpr) {
8000 // If offset is absent, then we just set it to zero.
8001 Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0);
8002 } else {
8003 Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr),
8004 CGF.Int64Ty,
8005 /*isSigned=*/false);
8006 }
8007 CurOffsets.push_back(Offset);
8008
8009 // Count
8010 const Expr *CountExpr = OASE->getLength();
8011 llvm::Value *Count = nullptr;
8012 if (!CountExpr) {
8013 // In Clang, once a high dimension is an array section, we construct all
8014 // the lower dimension as array section, however, for case like
8015 // arr[0:2][2], Clang construct the inner dimension as an array section
8016 // but it actually is not in an array section form according to spec.
8017 if (!OASE->getColonLocFirst().isValid() &&
8018 !OASE->getColonLocSecond().isValid()) {
8019 Count = llvm::ConstantInt::get(CGF.Int64Ty, 1);
8020 } else {
8021 // OpenMP 5.0, 2.1.5 Array Sections, Description.
8022 // When the length is absent it defaults to ⌈(size −
8023 // lower-bound)/stride⌉, where size is the size of the array
8024 // dimension.
8025 const Expr *StrideExpr = OASE->getStride();
8026 llvm::Value *Stride =
8027 StrideExpr
8028 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
8029 CGF.Int64Ty, /*isSigned=*/false)
8030 : nullptr;
8031 if (Stride)
8032 Count = CGF.Builder.CreateUDiv(
8033 CGF.Builder.CreateNUWSub(*DI, Offset), Stride);
8034 else
8035 Count = CGF.Builder.CreateNUWSub(*DI, Offset);
8036 }
8037 } else {
8038 Count = CGF.EmitScalarExpr(CountExpr);
8039 }
8040 Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false);
8041 CurCounts.push_back(Count);
8042
8043 // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size
8044 // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example:
8045 // Offset Count Stride
8046 // D0 0 1 4 (int) <- dummy dimension
8047 // D1 0 2 8 (2 * (1) * 4)
8048 // D2 1 2 20 (1 * (1 * 5) * 4)
8049 // D3 0 2 200 (2 * (1 * 5 * 4) * 4)
8050 const Expr *StrideExpr = OASE->getStride();
8051 llvm::Value *Stride =
8052 StrideExpr
8053 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
8054 CGF.Int64Ty, /*isSigned=*/false)
8055 : nullptr;
8056 DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1));
8057 if (Stride)
8058 CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride));
8059 else
8060 CurStrides.push_back(DimProd);
8061 if (DI != DimSizes.end())
8062 ++DI;
8063 }
8064
8065 CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets);
8066 CombinedInfo.NonContigInfo.Counts.push_back(CurCounts);
8067 CombinedInfo.NonContigInfo.Strides.push_back(CurStrides);
8068 }
8069
8070 /// Return the adjusted map modifiers if the declaration a capture refers to
8071 /// appears in a first-private clause. This is expected to be used only with
8072 /// directives that start with 'target'.
8073 OpenMPOffloadMappingFlags
8074 getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
8075 assert(Cap.capturesVariable() && "Expected capture by reference only!");
8076
8077 // A first private variable captured by reference will use only the
8078 // 'private ptr' and 'map to' flag. Return the right flags if the captured
8079 // declaration is known as first-private in this handler.
8080 if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
8081 if (Cap.getCapturedVar()->getType()->isAnyPointerType())
8082 return OpenMPOffloadMappingFlags::OMP_MAP_TO |
8083 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
8084 return OpenMPOffloadMappingFlags::OMP_MAP_PRIVATE |
8085 OpenMPOffloadMappingFlags::OMP_MAP_TO;
8086 }
8087 auto I = LambdasMap.find(Cap.getCapturedVar()->getCanonicalDecl());
8088 if (I != LambdasMap.end())
8089 // for map(to: lambda): using user specified map type.
8090 return getMapTypeBits(
8091 I->getSecond()->getMapType(), I->getSecond()->getMapTypeModifiers(),
8092 /*MotionModifiers=*/{}, I->getSecond()->isImplicit(),
8093 /*AddPtrFlag=*/false,
8094 /*AddIsTargetParamFlag=*/false,
8095 /*isNonContiguous=*/false);
8096 return OpenMPOffloadMappingFlags::OMP_MAP_TO |
8097 OpenMPOffloadMappingFlags::OMP_MAP_FROM;
8098 }
8099
8100 void getPlainLayout(const CXXRecordDecl *RD,
8102 bool AsBase) const {
8103 const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
8104
8105 llvm::StructType *St =
8106 AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
8107
8108 unsigned NumElements = St->getNumElements();
8110 llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
8111 RecordLayout(NumElements);
8112
8113 // Fill bases.
8114 for (const auto &I : RD->bases()) {
8115 if (I.isVirtual())
8116 continue;
8117
8118 QualType BaseTy = I.getType();
8119 const auto *Base = BaseTy->getAsCXXRecordDecl();
8120 // Ignore empty bases.
8121 if (isEmptyRecordForLayout(CGF.getContext(), BaseTy) ||
8122 CGF.getContext()
8125 .isZero())
8126 continue;
8127
8128 unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
8129 RecordLayout[FieldIndex] = Base;
8130 }
8131 // Fill in virtual bases.
8132 for (const auto &I : RD->vbases()) {
8133 QualType BaseTy = I.getType();
8134 // Ignore empty bases.
8135 if (isEmptyRecordForLayout(CGF.getContext(), BaseTy))
8136 continue;
8137
8138 const auto *Base = BaseTy->getAsCXXRecordDecl();
8139 unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
8140 if (RecordLayout[FieldIndex])
8141 continue;
8142 RecordLayout[FieldIndex] = Base;
8143 }
8144 // Fill in all the fields.
8145 assert(!RD->isUnion() && "Unexpected union.");
8146 for (const auto *Field : RD->fields()) {
8147 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
8148 // will fill in later.)
8149 if (!Field->isBitField() &&
8150 !isEmptyFieldForLayout(CGF.getContext(), Field)) {
8151 unsigned FieldIndex = RL.getLLVMFieldNo(Field);
8152 RecordLayout[FieldIndex] = Field;
8153 }
8154 }
8155 for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
8156 &Data : RecordLayout) {
8157 if (Data.isNull())
8158 continue;
8159 if (const auto *Base = dyn_cast<const CXXRecordDecl *>(Data))
8160 getPlainLayout(Base, Layout, /*AsBase=*/true);
8161 else
8162 Layout.push_back(cast<const FieldDecl *>(Data));
8163 }
8164 }
8165
8166 /// Generate all the base pointers, section pointers, sizes, map types, and
8167 /// mappers for the extracted mappable expressions (all included in \a
8168 /// CombinedInfo). Also, for each item that relates with a device pointer, a
8169 /// pair of the relevant declaration and index where it occurs is appended to
8170 /// the device pointers info array.
8171 void generateAllInfoForClauses(
8172 ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo,
8173 llvm::OpenMPIRBuilder &OMPBuilder,
8174 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8175 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
8176 // We have to process the component lists that relate with the same
8177 // declaration in a single chunk so that we can generate the map flags
8178 // correctly. Therefore, we organize all lists in a map.
8179 enum MapKind { Present, Allocs, Other, Total };
8180 llvm::MapVector<CanonicalDeclPtr<const Decl>,
8182 Info;
8183
8184 // Helper function to fill the information map for the different supported
8185 // clauses.
8186 auto &&InfoGen =
8187 [&Info, &SkipVarSet](
8188 const ValueDecl *D, MapKind Kind,
8190 OpenMPMapClauseKind MapType,
8192 ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
8193 bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper,
8194 const Expr *VarRef = nullptr, bool ForDeviceAddr = false) {
8195 if (SkipVarSet.contains(D))
8196 return;
8197 auto It = Info.try_emplace(D, Total).first;
8198 It->second[Kind].emplace_back(
8199 L, MapType, MapModifiers, MotionModifiers, ReturnDevicePointer,
8200 IsImplicit, Mapper, VarRef, ForDeviceAddr);
8201 };
8202
8203 for (const auto *Cl : Clauses) {
8204 const auto *C = dyn_cast<OMPMapClause>(Cl);
8205 if (!C)
8206 continue;
8207 MapKind Kind = Other;
8208 if (llvm::is_contained(C->getMapTypeModifiers(),
8209 OMPC_MAP_MODIFIER_present))
8210 Kind = Present;
8211 else if (C->getMapType() == OMPC_MAP_alloc)
8212 Kind = Allocs;
8213 const auto *EI = C->getVarRefs().begin();
8214 for (const auto L : C->component_lists()) {
8215 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
8216 InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(),
8217 C->getMapTypeModifiers(), {},
8218 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
8219 E);
8220 ++EI;
8221 }
8222 }
8223 for (const auto *Cl : Clauses) {
8224 const auto *C = dyn_cast<OMPToClause>(Cl);
8225 if (!C)
8226 continue;
8227 MapKind Kind = Other;
8228 if (llvm::is_contained(C->getMotionModifiers(),
8229 OMPC_MOTION_MODIFIER_present))
8230 Kind = Present;
8231 const auto *EI = C->getVarRefs().begin();
8232 for (const auto L : C->component_lists()) {
8233 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, {},
8234 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8235 C->isImplicit(), std::get<2>(L), *EI);
8236 ++EI;
8237 }
8238 }
8239 for (const auto *Cl : Clauses) {
8240 const auto *C = dyn_cast<OMPFromClause>(Cl);
8241 if (!C)
8242 continue;
8243 MapKind Kind = Other;
8244 if (llvm::is_contained(C->getMotionModifiers(),
8245 OMPC_MOTION_MODIFIER_present))
8246 Kind = Present;
8247 const auto *EI = C->getVarRefs().begin();
8248 for (const auto L : C->component_lists()) {
8249 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from, {},
8250 C->getMotionModifiers(),
8251 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
8252 *EI);
8253 ++EI;
8254 }
8255 }
8256
8257 // Look at the use_device_ptr and use_device_addr clauses information and
8258 // mark the existing map entries as such. If there is no map information for
8259 // an entry in the use_device_ptr and use_device_addr list, we create one
8260 // with map type 'alloc' and zero size section. It is the user fault if that
8261 // was not mapped before. If there is no map information and the pointer is
8262 // a struct member, then we defer the emission of that entry until the whole
8263 // struct has been processed.
8264 llvm::MapVector<CanonicalDeclPtr<const Decl>,
8266 DeferredInfo;
8267 MapCombinedInfoTy UseDeviceDataCombinedInfo;
8268
8269 auto &&UseDeviceDataCombinedInfoGen =
8270 [&UseDeviceDataCombinedInfo](const ValueDecl *VD, llvm::Value *Ptr,
8271 CodeGenFunction &CGF, bool IsDevAddr) {
8272 UseDeviceDataCombinedInfo.Exprs.push_back(VD);
8273 UseDeviceDataCombinedInfo.BasePointers.emplace_back(Ptr);
8274 UseDeviceDataCombinedInfo.DevicePtrDecls.emplace_back(VD);
8275 UseDeviceDataCombinedInfo.DevicePointers.emplace_back(
8276 IsDevAddr ? DeviceInfoTy::Address : DeviceInfoTy::Pointer);
8277 UseDeviceDataCombinedInfo.Pointers.push_back(Ptr);
8278 UseDeviceDataCombinedInfo.Sizes.push_back(
8279 llvm::Constant::getNullValue(CGF.Int64Ty));
8280 UseDeviceDataCombinedInfo.Types.push_back(
8281 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM);
8282 UseDeviceDataCombinedInfo.Mappers.push_back(nullptr);
8283 };
8284
8285 auto &&MapInfoGen =
8286 [&DeferredInfo, &UseDeviceDataCombinedInfoGen,
8287 &InfoGen](CodeGenFunction &CGF, const Expr *IE, const ValueDecl *VD,
8289 Components,
8290 bool IsImplicit, bool IsDevAddr) {
8291 // We didn't find any match in our map information - generate a zero
8292 // size array section - if the pointer is a struct member we defer
8293 // this action until the whole struct has been processed.
8294 if (isa<MemberExpr>(IE)) {
8295 // Insert the pointer into Info to be processed by
8296 // generateInfoForComponentList. Because it is a member pointer
8297 // without a pointee, no entry will be generated for it, therefore
8298 // we need to generate one after the whole struct has been
8299 // processed. Nonetheless, generateInfoForComponentList must be
8300 // called to take the pointer into account for the calculation of
8301 // the range of the partial struct.
8302 InfoGen(nullptr, Other, Components, OMPC_MAP_unknown, {}, {},
8303 /*ReturnDevicePointer=*/false, IsImplicit, nullptr, nullptr,
8304 IsDevAddr);
8305 DeferredInfo[nullptr].emplace_back(IE, VD, IsDevAddr);
8306 } else {
8307 llvm::Value *Ptr;
8308 if (IsDevAddr) {
8309 if (IE->isGLValue())
8310 Ptr = CGF.EmitLValue(IE).getPointer(CGF);
8311 else
8312 Ptr = CGF.EmitScalarExpr(IE);
8313 } else {
8314 Ptr = CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
8315 }
8316 UseDeviceDataCombinedInfoGen(VD, Ptr, CGF, IsDevAddr);
8317 }
8318 };
8319
8320 auto &&IsMapInfoExist = [&Info](CodeGenFunction &CGF, const ValueDecl *VD,
8321 const Expr *IE, bool IsDevAddr) -> bool {
8322 // We potentially have map information for this declaration already.
8323 // Look for the first set of components that refer to it. If found,
8324 // return true.
8325 // If the first component is a member expression, we have to look into
8326 // 'this', which maps to null in the map of map information. Otherwise
8327 // look directly for the information.
8328 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8329 if (It != Info.end()) {
8330 bool Found = false;
8331 for (auto &Data : It->second) {
8332 auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) {
8333 return MI.Components.back().getAssociatedDeclaration() == VD;
8334 });
8335 // If we found a map entry, signal that the pointer has to be
8336 // returned and move on to the next declaration. Exclude cases where
8337 // the base pointer is mapped as array subscript, array section or
8338 // array shaping. The base address is passed as a pointer to base in
8339 // this case and cannot be used as a base for use_device_ptr list
8340 // item.
8341 if (CI != Data.end()) {
8342 if (IsDevAddr) {
8343 CI->ForDeviceAddr = IsDevAddr;
8344 CI->ReturnDevicePointer = true;
8345 Found = true;
8346 break;
8347 } else {
8348 auto PrevCI = std::next(CI->Components.rbegin());
8349 const auto *VarD = dyn_cast<VarDecl>(VD);
8351 isa<MemberExpr>(IE) ||
8352 !VD->getType().getNonReferenceType()->isPointerType() ||
8353 PrevCI == CI->Components.rend() ||
8354 isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD ||
8355 VarD->hasLocalStorage()) {
8356 CI->ForDeviceAddr = IsDevAddr;
8357 CI->ReturnDevicePointer = true;
8358 Found = true;
8359 break;
8360 }
8361 }
8362 }
8363 }
8364 return Found;
8365 }
8366 return false;
8367 };
8368
8369 // Look at the use_device_ptr clause information and mark the existing map
8370 // entries as such. If there is no map information for an entry in the
8371 // use_device_ptr list, we create one with map type 'alloc' and zero size
8372 // section. It is the user fault if that was not mapped before. If there is
8373 // no map information and the pointer is a struct member, then we defer the
8374 // emission of that entry until the whole struct has been processed.
8375 for (const auto *Cl : Clauses) {
8376 const auto *C = dyn_cast<OMPUseDevicePtrClause>(Cl);
8377 if (!C)
8378 continue;
8379 for (const auto L : C->component_lists()) {
8381 std::get<1>(L);
8382 assert(!Components.empty() &&
8383 "Not expecting empty list of components!");
8384 const ValueDecl *VD = Components.back().getAssociatedDeclaration();
8385 VD = cast<ValueDecl>(VD->getCanonicalDecl());
8386 const Expr *IE = Components.back().getAssociatedExpression();
8387 if (IsMapInfoExist(CGF, VD, IE, /*IsDevAddr=*/false))
8388 continue;
8389 MapInfoGen(CGF, IE, VD, Components, C->isImplicit(),
8390 /*IsDevAddr=*/false);
8391 }
8392 }
8393
8394 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
8395 for (const auto *Cl : Clauses) {
8396 const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Cl);
8397 if (!C)
8398 continue;
8399 for (const auto L : C->component_lists()) {
8401 std::get<1>(L);
8402 assert(!std::get<1>(L).empty() &&
8403 "Not expecting empty list of components!");
8404 const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration();
8405 if (!Processed.insert(VD).second)
8406 continue;
8407 VD = cast<ValueDecl>(VD->getCanonicalDecl());
8408 const Expr *IE = std::get<1>(L).back().getAssociatedExpression();
8409 if (IsMapInfoExist(CGF, VD, IE, /*IsDevAddr=*/true))
8410 continue;
8411 MapInfoGen(CGF, IE, VD, Components, C->isImplicit(),
8412 /*IsDevAddr=*/true);
8413 }
8414 }
8415
8416 for (const auto &Data : Info) {
8417 StructRangeInfoTy PartialStruct;
8418 // Current struct information:
8419 MapCombinedInfoTy CurInfo;
8420 // Current struct base information:
8421 MapCombinedInfoTy StructBaseCurInfo;
8422 const Decl *D = Data.first;
8423 const ValueDecl *VD = cast_or_null<ValueDecl>(D);
8424 bool HasMapBasePtr = false;
8425 bool HasMapArraySec = false;
8426 if (VD && VD->getType()->isAnyPointerType()) {
8427 for (const auto &M : Data.second) {
8428 HasMapBasePtr = any_of(M, [](const MapInfo &L) {
8429 return isa_and_present<DeclRefExpr>(L.VarRef);
8430 });
8431 HasMapArraySec = any_of(M, [](const MapInfo &L) {
8432 return isa_and_present<ArraySectionExpr, ArraySubscriptExpr>(
8433 L.VarRef);
8434 });
8435 if (HasMapBasePtr && HasMapArraySec)
8436 break;
8437 }
8438 }
8439 for (const auto &M : Data.second) {
8440 for (const MapInfo &L : M) {
8441 assert(!L.Components.empty() &&
8442 "Not expecting declaration with no component lists.");
8443
8444 // Remember the current base pointer index.
8445 unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size();
8446 unsigned StructBasePointersIdx =
8447 StructBaseCurInfo.BasePointers.size();
8448 CurInfo.NonContigInfo.IsNonContiguous =
8449 L.Components.back().isNonContiguous();
8450 generateInfoForComponentList(
8451 L.MapType, L.MapModifiers, L.MotionModifiers, L.Components,
8452 CurInfo, StructBaseCurInfo, PartialStruct,
8453 /*IsFirstComponentList=*/false, L.IsImplicit,
8454 /*GenerateAllInfoForClauses*/ true, L.Mapper, L.ForDeviceAddr, VD,
8455 L.VarRef, /*OverlappedElements*/ {},
8456 HasMapBasePtr && HasMapArraySec);
8457
8458 // If this entry relates to a device pointer, set the relevant
8459 // declaration and add the 'return pointer' flag.
8460 if (L.ReturnDevicePointer) {
8461 // Check whether a value was added to either CurInfo or
8462 // StructBaseCurInfo and error if no value was added to either of
8463 // them:
8464 assert((CurrentBasePointersIdx < CurInfo.BasePointers.size() ||
8465 StructBasePointersIdx <
8466 StructBaseCurInfo.BasePointers.size()) &&
8467 "Unexpected number of mapped base pointers.");
8468
8469 // Choose a base pointer index which is always valid:
8470 const ValueDecl *RelevantVD =
8471 L.Components.back().getAssociatedDeclaration();
8472 assert(RelevantVD &&
8473 "No relevant declaration related with device pointer??");
8474
8475 // If StructBaseCurInfo has been updated this iteration then work on
8476 // the first new entry added to it i.e. make sure that when multiple
8477 // values are added to any of the lists, the first value added is
8478 // being modified by the assignments below (not the last value
8479 // added).
8480 if (StructBasePointersIdx < StructBaseCurInfo.BasePointers.size()) {
8481 StructBaseCurInfo.DevicePtrDecls[StructBasePointersIdx] =
8482 RelevantVD;
8483 StructBaseCurInfo.DevicePointers[StructBasePointersIdx] =
8484 L.ForDeviceAddr ? DeviceInfoTy::Address
8485 : DeviceInfoTy::Pointer;
8486 StructBaseCurInfo.Types[StructBasePointersIdx] |=
8487 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM;
8488 } else {
8489 CurInfo.DevicePtrDecls[CurrentBasePointersIdx] = RelevantVD;
8490 CurInfo.DevicePointers[CurrentBasePointersIdx] =
8491 L.ForDeviceAddr ? DeviceInfoTy::Address
8492 : DeviceInfoTy::Pointer;
8493 CurInfo.Types[CurrentBasePointersIdx] |=
8494 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM;
8495 }
8496 }
8497 }
8498 }
8499
8500 // Append any pending zero-length pointers which are struct members and
8501 // used with use_device_ptr or use_device_addr.
8502 auto CI = DeferredInfo.find(Data.first);
8503 if (CI != DeferredInfo.end()) {
8504 for (const DeferredDevicePtrEntryTy &L : CI->second) {
8505 llvm::Value *BasePtr;
8506 llvm::Value *Ptr;
8507 if (L.ForDeviceAddr) {
8508 if (L.IE->isGLValue())
8509 Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8510 else
8511 Ptr = this->CGF.EmitScalarExpr(L.IE);
8512 BasePtr = Ptr;
8513 // Entry is RETURN_PARAM. Also, set the placeholder value
8514 // MEMBER_OF=FFFF so that the entry is later updated with the
8515 // correct value of MEMBER_OF.
8516 CurInfo.Types.push_back(
8517 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM |
8518 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
8519 } else {
8520 BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8521 Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE),
8522 L.IE->getExprLoc());
8523 // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the
8524 // placeholder value MEMBER_OF=FFFF so that the entry is later
8525 // updated with the correct value of MEMBER_OF.
8526 CurInfo.Types.push_back(
8527 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8528 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM |
8529 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
8530 }
8531 CurInfo.Exprs.push_back(L.VD);
8532 CurInfo.BasePointers.emplace_back(BasePtr);
8533 CurInfo.DevicePtrDecls.emplace_back(L.VD);
8534 CurInfo.DevicePointers.emplace_back(
8535 L.ForDeviceAddr ? DeviceInfoTy::Address : DeviceInfoTy::Pointer);
8536 CurInfo.Pointers.push_back(Ptr);
8537 CurInfo.Sizes.push_back(
8538 llvm::Constant::getNullValue(this->CGF.Int64Ty));
8539 CurInfo.Mappers.push_back(nullptr);
8540 }
8541 }
8542
8543 // Unify entries in one list making sure the struct mapping precedes the
8544 // individual fields:
8545 MapCombinedInfoTy UnionCurInfo;
8546 UnionCurInfo.append(StructBaseCurInfo);
8547 UnionCurInfo.append(CurInfo);
8548
8549 // If there is an entry in PartialStruct it means we have a struct with
8550 // individual members mapped. Emit an extra combined entry.
8551 if (PartialStruct.Base.isValid()) {
8552 UnionCurInfo.NonContigInfo.Dims.push_back(0);
8553 // Emit a combined entry:
8554 emitCombinedEntry(CombinedInfo, UnionCurInfo.Types, PartialStruct,
8555 /*IsMapThis*/ !VD, OMPBuilder, VD);
8556 }
8557
8558 // We need to append the results of this capture to what we already have.
8559 CombinedInfo.append(UnionCurInfo);
8560 }
8561 // Append data for use_device_ptr clauses.
8562 CombinedInfo.append(UseDeviceDataCombinedInfo);
8563 }
8564
8565public:
8566 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
8567 : CurDir(&Dir), CGF(CGF) {
8568 // Extract firstprivate clause information.
8569 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
8570 for (const auto *D : C->varlist())
8571 FirstPrivateDecls.try_emplace(
8572 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
8573 // Extract implicit firstprivates from uses_allocators clauses.
8574 for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) {
8575 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
8576 OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
8577 if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits))
8578 FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()),
8579 /*Implicit=*/true);
8580 else if (const auto *VD = dyn_cast<VarDecl>(
8581 cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts())
8582 ->getDecl()))
8583 FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true);
8584 }
8585 }
8586 // Extract device pointer clause information.
8587 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
8588 for (auto L : C->component_lists())
8589 DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L));
8590 // Extract device addr clause information.
8591 for (const auto *C : Dir.getClausesOfKind<OMPHasDeviceAddrClause>())
8592 for (auto L : C->component_lists())
8593 HasDevAddrsMap[std::get<0>(L)].push_back(std::get<1>(L));
8594 // Extract map information.
8595 for (const auto *C : Dir.getClausesOfKind<OMPMapClause>()) {
8596 if (C->getMapType() != OMPC_MAP_to)
8597 continue;
8598 for (auto L : C->component_lists()) {
8599 const ValueDecl *VD = std::get<0>(L);
8600 const auto *RD = VD ? VD->getType()
8604 : nullptr;
8605 if (RD && RD->isLambda())
8606 LambdasMap.try_emplace(std::get<0>(L), C);
8607 }
8608 }
8609 }
8610
8611 /// Constructor for the declare mapper directive.
8612 MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
8613 : CurDir(&Dir), CGF(CGF) {}
8614
8615 /// Generate code for the combined entry if we have a partially mapped struct
8616 /// and take care of the mapping flags of the arguments corresponding to
8617 /// individual struct members.
8618 void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo,
8619 MapFlagsArrayTy &CurTypes,
8620 const StructRangeInfoTy &PartialStruct, bool IsMapThis,
8621 llvm::OpenMPIRBuilder &OMPBuilder,
8622 const ValueDecl *VD = nullptr,
8623 unsigned OffsetForMemberOfFlag = 0,
8624 bool NotTargetParams = true) const {
8625 if (CurTypes.size() == 1 &&
8626 ((CurTypes.back() & OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) !=
8627 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) &&
8628 !PartialStruct.IsArraySection)
8629 return;
8630 Address LBAddr = PartialStruct.LowestElem.second;
8631 Address HBAddr = PartialStruct.HighestElem.second;
8632 if (PartialStruct.HasCompleteRecord) {
8633 LBAddr = PartialStruct.LB;
8634 HBAddr = PartialStruct.LB;
8635 }
8636 CombinedInfo.Exprs.push_back(VD);
8637 // Base is the base of the struct
8638 CombinedInfo.BasePointers.push_back(PartialStruct.Base.emitRawPointer(CGF));
8639 CombinedInfo.DevicePtrDecls.push_back(nullptr);
8640 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8641 // Pointer is the address of the lowest element
8642 llvm::Value *LB = LBAddr.emitRawPointer(CGF);
8643 const CXXMethodDecl *MD =
8644 CGF.CurFuncDecl ? dyn_cast<CXXMethodDecl>(CGF.CurFuncDecl) : nullptr;
8645 const CXXRecordDecl *RD = MD ? MD->getParent() : nullptr;
8646 bool HasBaseClass = RD && IsMapThis ? RD->getNumBases() > 0 : false;
8647 // There should not be a mapper for a combined entry.
8648 if (HasBaseClass) {
8649 // OpenMP 5.2 148:21:
8650 // If the target construct is within a class non-static member function,
8651 // and a variable is an accessible data member of the object for which the
8652 // non-static data member function is invoked, the variable is treated as
8653 // if the this[:1] expression had appeared in a map clause with a map-type
8654 // of tofrom.
8655 // Emit this[:1]
8656 CombinedInfo.Pointers.push_back(PartialStruct.Base.emitRawPointer(CGF));
8658 llvm::Value *Size =
8659 CGF.Builder.CreateIntCast(CGF.getTypeSize(Ty), CGF.Int64Ty,
8660 /*isSigned=*/true);
8661 CombinedInfo.Sizes.push_back(Size);
8662 } else {
8663 CombinedInfo.Pointers.push_back(LB);
8664 // Size is (addr of {highest+1} element) - (addr of lowest element)
8665 llvm::Value *HB = HBAddr.emitRawPointer(CGF);
8666 llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(
8667 HBAddr.getElementType(), HB, /*Idx0=*/1);
8668 llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
8669 llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
8670 llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CGF.Int8Ty, CHAddr, CLAddr);
8671 llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
8672 /*isSigned=*/false);
8673 CombinedInfo.Sizes.push_back(Size);
8674 }
8675 CombinedInfo.Mappers.push_back(nullptr);
8676 // Map type is always TARGET_PARAM, if generate info for captures.
8677 CombinedInfo.Types.push_back(
8678 NotTargetParams ? OpenMPOffloadMappingFlags::OMP_MAP_NONE
8679 : !PartialStruct.PreliminaryMapData.BasePointers.empty()
8680 ? OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ
8681 : OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM);
8682 // If any element has the present modifier, then make sure the runtime
8683 // doesn't attempt to allocate the struct.
8684 if (CurTypes.end() !=
8685 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
8686 return static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
8687 Type & OpenMPOffloadMappingFlags::OMP_MAP_PRESENT);
8688 }))
8689 CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT;
8690 // Remove TARGET_PARAM flag from the first element
8691 (*CurTypes.begin()) &= ~OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
8692 // If any element has the ompx_hold modifier, then make sure the runtime
8693 // uses the hold reference count for the struct as a whole so that it won't
8694 // be unmapped by an extra dynamic reference count decrement. Add it to all
8695 // elements as well so the runtime knows which reference count to check
8696 // when determining whether it's time for device-to-host transfers of
8697 // individual elements.
8698 if (CurTypes.end() !=
8699 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
8700 return static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
8701 Type & OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD);
8702 })) {
8703 CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
8704 for (auto &M : CurTypes)
8705 M |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
8706 }
8707
8708 // All other current entries will be MEMBER_OF the combined entry
8709 // (except for PTR_AND_OBJ entries which do not have a placeholder value
8710 // 0xFFFF in the MEMBER_OF field).
8711 OpenMPOffloadMappingFlags MemberOfFlag = OMPBuilder.getMemberOfFlag(
8712 OffsetForMemberOfFlag + CombinedInfo.BasePointers.size() - 1);
8713 for (auto &M : CurTypes)
8714 OMPBuilder.setCorrectMemberOfFlag(M, MemberOfFlag);
8715 }
8716
8717 /// Generate all the base pointers, section pointers, sizes, map types, and
8718 /// mappers for the extracted mappable expressions (all included in \a
8719 /// CombinedInfo). Also, for each item that relates with a device pointer, a
8720 /// pair of the relevant declaration and index where it occurs is appended to
8721 /// the device pointers info array.
8722 void generateAllInfo(
8723 MapCombinedInfoTy &CombinedInfo, llvm::OpenMPIRBuilder &OMPBuilder,
8724 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8725 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
8726 assert(isa<const OMPExecutableDirective *>(CurDir) &&
8727 "Expect a executable directive");
8728 const auto *CurExecDir = cast<const OMPExecutableDirective *>(CurDir);
8729 generateAllInfoForClauses(CurExecDir->clauses(), CombinedInfo, OMPBuilder,
8730 SkipVarSet);
8731 }
8732
8733 /// Generate all the base pointers, section pointers, sizes, map types, and
8734 /// mappers for the extracted map clauses of user-defined mapper (all included
8735 /// in \a CombinedInfo).
8736 void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo,
8737 llvm::OpenMPIRBuilder &OMPBuilder) const {
8738 assert(isa<const OMPDeclareMapperDecl *>(CurDir) &&
8739 "Expect a declare mapper directive");
8740 const auto *CurMapperDir = cast<const OMPDeclareMapperDecl *>(CurDir);
8741 generateAllInfoForClauses(CurMapperDir->clauses(), CombinedInfo,
8742 OMPBuilder);
8743 }
8744
8745 /// Emit capture info for lambdas for variables captured by reference.
8746 void generateInfoForLambdaCaptures(
8747 const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
8748 llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
8750 const auto *RD = VDType->getAsCXXRecordDecl();
8751 if (!RD || !RD->isLambda())
8752 return;
8753 Address VDAddr(Arg, CGF.ConvertTypeForMem(VDType),
8754 CGF.getContext().getDeclAlign(VD));
8755 LValue VDLVal = CGF.MakeAddrLValue(VDAddr, VDType);
8756 llvm::DenseMap<const ValueDecl *, FieldDecl *> Captures;
8757 FieldDecl *ThisCapture = nullptr;
8758 RD->getCaptureFields(Captures, ThisCapture);
8759 if (ThisCapture) {
8760 LValue ThisLVal =
8761 CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
8762 LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
8763 LambdaPointers.try_emplace(ThisLVal.getPointer(CGF),
8764 VDLVal.getPointer(CGF));
8765 CombinedInfo.Exprs.push_back(VD);
8766 CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF));
8767 CombinedInfo.DevicePtrDecls.push_back(nullptr);
8768 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8769 CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF));
8770 CombinedInfo.Sizes.push_back(
8771 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
8772 CGF.Int64Ty, /*isSigned=*/true));
8773 CombinedInfo.Types.push_back(
8774 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8775 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
8776 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
8777 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
8778 CombinedInfo.Mappers.push_back(nullptr);
8779 }
8780 for (const LambdaCapture &LC : RD->captures()) {
8781 if (!LC.capturesVariable())
8782 continue;
8783 const VarDecl *VD = cast<VarDecl>(LC.getCapturedVar());
8784 if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
8785 continue;
8786 auto It = Captures.find(VD);
8787 assert(It != Captures.end() && "Found lambda capture without field.");
8788 LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
8789 if (LC.getCaptureKind() == LCK_ByRef) {
8790 LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
8791 LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8792 VDLVal.getPointer(CGF));
8793 CombinedInfo.Exprs.push_back(VD);
8794 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
8795 CombinedInfo.DevicePtrDecls.push_back(nullptr);
8796 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8797 CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF));
8798 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8799 CGF.getTypeSize(
8801 CGF.Int64Ty, /*isSigned=*/true));
8802 } else {
8803 RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
8804 LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8805 VDLVal.getPointer(CGF));
8806 CombinedInfo.Exprs.push_back(VD);
8807 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
8808 CombinedInfo.DevicePtrDecls.push_back(nullptr);
8809 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8810 CombinedInfo.Pointers.push_back(VarRVal.getScalarVal());
8811 CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
8812 }
8813 CombinedInfo.Types.push_back(
8814 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8815 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
8816 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
8817 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
8818 CombinedInfo.Mappers.push_back(nullptr);
8819 }
8820 }
8821
8822 /// Set correct indices for lambdas captures.
8823 void adjustMemberOfForLambdaCaptures(
8824 llvm::OpenMPIRBuilder &OMPBuilder,
8825 const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
8826 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
8827 MapFlagsArrayTy &Types) const {
8828 for (unsigned I = 0, E = Types.size(); I < E; ++I) {
8829 // Set correct member_of idx for all implicit lambda captures.
8830 if (Types[I] != (OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8831 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
8832 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
8833 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT))
8834 continue;
8835 llvm::Value *BasePtr = LambdaPointers.lookup(BasePointers[I]);
8836 assert(BasePtr && "Unable to find base lambda address.");
8837 int TgtIdx = -1;
8838 for (unsigned J = I; J > 0; --J) {
8839 unsigned Idx = J - 1;
8840 if (Pointers[Idx] != BasePtr)
8841 continue;
8842 TgtIdx = Idx;
8843 break;
8844 }
8845 assert(TgtIdx != -1 && "Unable to find parent lambda.");
8846 // All other current entries will be MEMBER_OF the combined entry
8847 // (except for PTR_AND_OBJ entries which do not have a placeholder value
8848 // 0xFFFF in the MEMBER_OF field).
8849 OpenMPOffloadMappingFlags MemberOfFlag =
8850 OMPBuilder.getMemberOfFlag(TgtIdx);
8851 OMPBuilder.setCorrectMemberOfFlag(Types[I], MemberOfFlag);
8852 }
8853 }
8854
8855 /// For a capture that has an associated clause, generate the base pointers,
8856 /// section pointers, sizes, map types, and mappers (all included in
8857 /// \a CurCaptureVarInfo).
8858 void generateInfoForCaptureFromClauseInfo(
8859 const CapturedStmt::Capture *Cap, llvm::Value *Arg,
8860 MapCombinedInfoTy &CurCaptureVarInfo, llvm::OpenMPIRBuilder &OMPBuilder,
8861 unsigned OffsetForMemberOfFlag) const {
8862 assert(!Cap->capturesVariableArrayType() &&
8863 "Not expecting to generate map info for a variable array type!");
8864
8865 // We need to know when we generating information for the first component
8866 const ValueDecl *VD = Cap->capturesThis()
8867 ? nullptr
8868 : Cap->getCapturedVar()->getCanonicalDecl();
8869
8870 // for map(to: lambda): skip here, processing it in
8871 // generateDefaultMapInfo
8872 if (LambdasMap.count(VD))
8873 return;
8874
8875 // If this declaration appears in a is_device_ptr clause we just have to
8876 // pass the pointer by value. If it is a reference to a declaration, we just
8877 // pass its value.
8878 if (VD && (DevPointersMap.count(VD) || HasDevAddrsMap.count(VD))) {
8879 CurCaptureVarInfo.Exprs.push_back(VD);
8880 CurCaptureVarInfo.BasePointers.emplace_back(Arg);
8881 CurCaptureVarInfo.DevicePtrDecls.emplace_back(VD);
8882 CurCaptureVarInfo.DevicePointers.emplace_back(DeviceInfoTy::Pointer);
8883 CurCaptureVarInfo.Pointers.push_back(Arg);
8884 CurCaptureVarInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8885 CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty,
8886 /*isSigned=*/true));
8887 CurCaptureVarInfo.Types.push_back(
8888 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
8889 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM);
8890 CurCaptureVarInfo.Mappers.push_back(nullptr);
8891 return;
8892 }
8893
8894 MapDataArrayTy DeclComponentLists;
8895 // For member fields list in is_device_ptr, store it in
8896 // DeclComponentLists for generating components info.
8898 auto It = DevPointersMap.find(VD);
8899 if (It != DevPointersMap.end())
8900 for (const auto &MCL : It->second)
8901 DeclComponentLists.emplace_back(MCL, OMPC_MAP_to, Unknown,
8902 /*IsImpicit = */ true, nullptr,
8903 nullptr);
8904 auto I = HasDevAddrsMap.find(VD);
8905 if (I != HasDevAddrsMap.end())
8906 for (const auto &MCL : I->second)
8907 DeclComponentLists.emplace_back(MCL, OMPC_MAP_tofrom, Unknown,
8908 /*IsImpicit = */ true, nullptr,
8909 nullptr);
8910 assert(isa<const OMPExecutableDirective *>(CurDir) &&
8911 "Expect a executable directive");
8912 const auto *CurExecDir = cast<const OMPExecutableDirective *>(CurDir);
8913 bool HasMapBasePtr = false;
8914 bool HasMapArraySec = false;
8915 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
8916 const auto *EI = C->getVarRefs().begin();
8917 for (const auto L : C->decl_component_lists(VD)) {
8918 const ValueDecl *VDecl, *Mapper;
8919 // The Expression is not correct if the mapping is implicit
8920 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
8922 std::tie(VDecl, Components, Mapper) = L;
8923 assert(VDecl == VD && "We got information for the wrong declaration??");
8924 assert(!Components.empty() &&
8925 "Not expecting declaration with no component lists.");
8926 if (VD && E && VD->getType()->isAnyPointerType() && isa<DeclRefExpr>(E))
8927 HasMapBasePtr = true;
8928 if (VD && E && VD->getType()->isAnyPointerType() &&
8929 (isa<ArraySectionExpr>(E) || isa<ArraySubscriptExpr>(E)))
8930 HasMapArraySec = true;
8931 DeclComponentLists.emplace_back(Components, C->getMapType(),
8932 C->getMapTypeModifiers(),
8933 C->isImplicit(), Mapper, E);
8934 ++EI;
8935 }
8936 }
8937 llvm::stable_sort(DeclComponentLists, [](const MapData &LHS,
8938 const MapData &RHS) {
8939 ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(LHS);
8940 OpenMPMapClauseKind MapType = std::get<1>(RHS);
8941 bool HasPresent =
8942 llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present);
8943 bool HasAllocs = MapType == OMPC_MAP_alloc;
8944 MapModifiers = std::get<2>(RHS);
8945 MapType = std::get<1>(LHS);
8946 bool HasPresentR =
8947 llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present);
8948 bool HasAllocsR = MapType == OMPC_MAP_alloc;
8949 return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR);
8950 });
8951
8952 auto GenerateInfoForComponentLists =
8953 [&](ArrayRef<MapData> DeclComponentLists,
8954 bool IsEligibleForTargetParamFlag) {
8955 MapCombinedInfoTy CurInfoForComponentLists;
8956 StructRangeInfoTy PartialStruct;
8957
8958 if (DeclComponentLists.empty())
8959 return;
8960
8961 generateInfoForCaptureFromComponentLists(
8962 VD, DeclComponentLists, CurInfoForComponentLists, PartialStruct,
8963 IsEligibleForTargetParamFlag,
8964 /*AreBothBasePtrAndPteeMapped=*/HasMapBasePtr && HasMapArraySec);
8965
8966 // If there is an entry in PartialStruct it means we have a
8967 // struct with individual members mapped. Emit an extra combined
8968 // entry.
8969 if (PartialStruct.Base.isValid()) {
8970 CurCaptureVarInfo.append(PartialStruct.PreliminaryMapData);
8971 emitCombinedEntry(
8972 CurCaptureVarInfo, CurInfoForComponentLists.Types,
8973 PartialStruct, Cap->capturesThis(), OMPBuilder, nullptr,
8974 OffsetForMemberOfFlag,
8975 /*NotTargetParams*/ !IsEligibleForTargetParamFlag);
8976 }
8977
8978 // Return if we didn't add any entries.
8979 if (CurInfoForComponentLists.BasePointers.empty())
8980 return;
8981
8982 CurCaptureVarInfo.append(CurInfoForComponentLists);
8983 };
8984
8985 GenerateInfoForComponentLists(DeclComponentLists,
8986 /*IsEligibleForTargetParamFlag=*/true);
8987 }
8988
8989 /// Generate the base pointers, section pointers, sizes, map types, and
8990 /// mappers associated to \a DeclComponentLists for a given capture
8991 /// \a VD (all included in \a CurComponentListInfo).
8992 void generateInfoForCaptureFromComponentLists(
8993 const ValueDecl *VD, ArrayRef<MapData> DeclComponentLists,
8994 MapCombinedInfoTy &CurComponentListInfo, StructRangeInfoTy &PartialStruct,
8995 bool IsListEligibleForTargetParamFlag,
8996 bool AreBothBasePtrAndPteeMapped = false) const {
8997 // Find overlapping elements (including the offset from the base element).
8998 llvm::SmallDenseMap<
8999 const MapData *,
9002 4>
9003 OverlappedData;
9004 size_t Count = 0;
9005 for (const MapData &L : DeclComponentLists) {
9007 OpenMPMapClauseKind MapType;
9009 bool IsImplicit;
9010 const ValueDecl *Mapper;
9011 const Expr *VarRef;
9012 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9013 L;
9014 ++Count;
9015 for (const MapData &L1 : ArrayRef(DeclComponentLists).slice(Count)) {
9017 std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper,
9018 VarRef) = L1;
9019 auto CI = Components.rbegin();
9020 auto CE = Components.rend();
9021 auto SI = Components1.rbegin();
9022 auto SE = Components1.rend();
9023 for (; CI != CE && SI != SE; ++CI, ++SI) {
9024 if (CI->getAssociatedExpression()->getStmtClass() !=
9025 SI->getAssociatedExpression()->getStmtClass())
9026 break;
9027 // Are we dealing with different variables/fields?
9028 if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
9029 break;
9030 }
9031 // Found overlapping if, at least for one component, reached the head
9032 // of the components list.
9033 if (CI == CE || SI == SE) {
9034 // Ignore it if it is the same component.
9035 if (CI == CE && SI == SE)
9036 continue;
9037 const auto It = (SI == SE) ? CI : SI;
9038 // If one component is a pointer and another one is a kind of
9039 // dereference of this pointer (array subscript, section, dereference,
9040 // etc.), it is not an overlapping.
9041 // Same, if one component is a base and another component is a
9042 // dereferenced pointer memberexpr with the same base.
9043 if (!isa<MemberExpr>(It->getAssociatedExpression()) ||
9044 (std::prev(It)->getAssociatedDeclaration() &&
9045 std::prev(It)
9046 ->getAssociatedDeclaration()
9047 ->getType()
9048 ->isPointerType()) ||
9049 (It->getAssociatedDeclaration() &&
9050 It->getAssociatedDeclaration()->getType()->isPointerType() &&
9051 std::next(It) != CE && std::next(It) != SE))
9052 continue;
9053 const MapData &BaseData = CI == CE ? L : L1;
9055 SI == SE ? Components : Components1;
9056 OverlappedData[&BaseData].push_back(SubData);
9057 }
9058 }
9059 }
9060 // Sort the overlapped elements for each item.
9062 if (!OverlappedData.empty()) {
9063 const Type *BaseType = VD->getType().getCanonicalType().getTypePtr();
9064 const Type *OrigType = BaseType->getPointeeOrArrayElementType();
9065 while (BaseType != OrigType) {
9066 BaseType = OrigType->getCanonicalTypeInternal().getTypePtr();
9067 OrigType = BaseType->getPointeeOrArrayElementType();
9068 }
9069
9070 if (const auto *CRD = BaseType->getAsCXXRecordDecl())
9071 getPlainLayout(CRD, Layout, /*AsBase=*/false);
9072 else {
9073 const auto *RD = BaseType->getAsRecordDecl();
9074 Layout.append(RD->field_begin(), RD->field_end());
9075 }
9076 }
9077 for (auto &Pair : OverlappedData) {
9078 llvm::stable_sort(
9079 Pair.getSecond(),
9080 [&Layout](
9083 Second) {
9084 auto CI = First.rbegin();
9085 auto CE = First.rend();
9086 auto SI = Second.rbegin();
9087 auto SE = Second.rend();
9088 for (; CI != CE && SI != SE; ++CI, ++SI) {
9089 if (CI->getAssociatedExpression()->getStmtClass() !=
9090 SI->getAssociatedExpression()->getStmtClass())
9091 break;
9092 // Are we dealing with different variables/fields?
9093 if (CI->getAssociatedDeclaration() !=
9094 SI->getAssociatedDeclaration())
9095 break;
9096 }
9097
9098 // Lists contain the same elements.
9099 if (CI == CE && SI == SE)
9100 return false;
9101
9102 // List with less elements is less than list with more elements.
9103 if (CI == CE || SI == SE)
9104 return CI == CE;
9105
9106 const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
9107 const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
9108 if (FD1->getParent() == FD2->getParent())
9109 return FD1->getFieldIndex() < FD2->getFieldIndex();
9110 const auto *It =
9111 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
9112 return FD == FD1 || FD == FD2;
9113 });
9114 return *It == FD1;
9115 });
9116 }
9117
9118 // Associated with a capture, because the mapping flags depend on it.
9119 // Go through all of the elements with the overlapped elements.
9120 bool AddTargetParamFlag = IsListEligibleForTargetParamFlag;
9121 MapCombinedInfoTy StructBaseCombinedInfo;
9122 for (const auto &Pair : OverlappedData) {
9123 const MapData &L = *Pair.getFirst();
9125 OpenMPMapClauseKind MapType;
9127 bool IsImplicit;
9128 const ValueDecl *Mapper;
9129 const Expr *VarRef;
9130 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9131 L;
9133 OverlappedComponents = Pair.getSecond();
9134 generateInfoForComponentList(
9135 MapType, MapModifiers, {}, Components, CurComponentListInfo,
9136 StructBaseCombinedInfo, PartialStruct, AddTargetParamFlag, IsImplicit,
9137 /*GenerateAllInfoForClauses*/ false, Mapper,
9138 /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents);
9139 AddTargetParamFlag = false;
9140 }
9141 // Go through other elements without overlapped elements.
9142 for (const MapData &L : DeclComponentLists) {
9144 OpenMPMapClauseKind MapType;
9146 bool IsImplicit;
9147 const ValueDecl *Mapper;
9148 const Expr *VarRef;
9149 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9150 L;
9151 auto It = OverlappedData.find(&L);
9152 if (It == OverlappedData.end())
9153 generateInfoForComponentList(
9154 MapType, MapModifiers, {}, Components, CurComponentListInfo,
9155 StructBaseCombinedInfo, PartialStruct, AddTargetParamFlag,
9156 IsImplicit, /*GenerateAllInfoForClauses*/ false, Mapper,
9157 /*ForDeviceAddr=*/false, VD, VarRef,
9158 /*OverlappedElements*/ {}, AreBothBasePtrAndPteeMapped);
9159 AddTargetParamFlag = false;
9160 }
9161 }
9162
9163 /// Generate the default map information for a given capture \a CI,
9164 /// record field declaration \a RI and captured value \a CV.
9165 void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
9166 const FieldDecl &RI, llvm::Value *CV,
9167 MapCombinedInfoTy &CombinedInfo) const {
9168 bool IsImplicit = true;
9169 // Do the default mapping.
9170 if (CI.capturesThis()) {
9171 CombinedInfo.Exprs.push_back(nullptr);
9172 CombinedInfo.BasePointers.push_back(CV);
9173 CombinedInfo.DevicePtrDecls.push_back(nullptr);
9174 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
9175 CombinedInfo.Pointers.push_back(CV);
9176 const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
9177 CombinedInfo.Sizes.push_back(
9178 CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
9179 CGF.Int64Ty, /*isSigned=*/true));
9180 // Default map type.
9181 CombinedInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_TO |
9182 OpenMPOffloadMappingFlags::OMP_MAP_FROM);
9183 } else if (CI.capturesVariableByCopy()) {
9184 const VarDecl *VD = CI.getCapturedVar();
9185 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
9186 CombinedInfo.BasePointers.push_back(CV);
9187 CombinedInfo.DevicePtrDecls.push_back(nullptr);
9188 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
9189 CombinedInfo.Pointers.push_back(CV);
9190 if (!RI.getType()->isAnyPointerType()) {
9191 // We have to signal to the runtime captures passed by value that are
9192 // not pointers.
9193 CombinedInfo.Types.push_back(
9194 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL);
9195 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9196 CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
9197 } else {
9198 // Pointers are implicitly mapped with a zero size and no flags
9199 // (other than first map that is added for all implicit maps).
9200 CombinedInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_NONE);
9201 CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
9202 }
9203 auto I = FirstPrivateDecls.find(VD);
9204 if (I != FirstPrivateDecls.end())
9205 IsImplicit = I->getSecond();
9206 } else {
9207 assert(CI.capturesVariable() && "Expected captured reference.");
9208 const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
9209 QualType ElementType = PtrTy->getPointeeType();
9210 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9211 CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));
9212 // The default map type for a scalar/complex type is 'to' because by
9213 // default the value doesn't have to be retrieved. For an aggregate
9214 // type, the default is 'tofrom'.
9215 CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI));
9216 const VarDecl *VD = CI.getCapturedVar();
9217 auto I = FirstPrivateDecls.find(VD);
9218 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
9219 CombinedInfo.BasePointers.push_back(CV);
9220 CombinedInfo.DevicePtrDecls.push_back(nullptr);
9221 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
9222 if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) {
9223 Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue(
9224 CV, ElementType, CGF.getContext().getDeclAlign(VD),
9226 CombinedInfo.Pointers.push_back(PtrAddr.emitRawPointer(CGF));
9227 } else {
9228 CombinedInfo.Pointers.push_back(CV);
9229 }
9230 if (I != FirstPrivateDecls.end())
9231 IsImplicit = I->getSecond();
9232 }
9233 // Every default map produces a single argument which is a target parameter.
9234 CombinedInfo.Types.back() |=
9235 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
9236
9237 // Add flag stating this is an implicit map.
9238 if (IsImplicit)
9239 CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT;
9240
9241 // No user-defined mapper for default mapping.
9242 CombinedInfo.Mappers.push_back(nullptr);
9243 }
9244};
9245} // anonymous namespace
9246
9247// Try to extract the base declaration from a `this->x` expression if possible.
9249 if (!E)
9250 return nullptr;
9251
9252 if (const auto *OASE = dyn_cast<ArraySectionExpr>(E->IgnoreParenCasts()))
9253 if (const MemberExpr *ME =
9254 dyn_cast<MemberExpr>(OASE->getBase()->IgnoreParenImpCasts()))
9255 return ME->getMemberDecl();
9256 return nullptr;
9257}
9258
9259/// Emit a string constant containing the names of the values mapped to the
9260/// offloading runtime library.
9261static llvm::Constant *
9262emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder,
9263 MappableExprsHandler::MappingExprInfo &MapExprs) {
9264
9265 uint32_t SrcLocStrSize;
9266 if (!MapExprs.getMapDecl() && !MapExprs.getMapExpr())
9267 return OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
9268
9270 if (!MapExprs.getMapDecl() && MapExprs.getMapExpr()) {
9271 if (const ValueDecl *VD = getDeclFromThisExpr(MapExprs.getMapExpr()))
9272 Loc = VD->getLocation();
9273 else
9274 Loc = MapExprs.getMapExpr()->getExprLoc();
9275 } else {
9276 Loc = MapExprs.getMapDecl()->getLocation();
9277 }
9278
9279 std::string ExprName;
9280 if (MapExprs.getMapExpr()) {
9282 llvm::raw_string_ostream OS(ExprName);
9283 MapExprs.getMapExpr()->printPretty(OS, nullptr, P);
9284 } else {
9285 ExprName = MapExprs.getMapDecl()->getNameAsString();
9286 }
9287
9288 std::string FileName;
9290 if (auto *DbgInfo = CGF.getDebugInfo())
9291 FileName = DbgInfo->remapDIPath(PLoc.getFilename());
9292 else
9293 FileName = PLoc.getFilename();
9294 return OMPBuilder.getOrCreateSrcLocStr(FileName, ExprName, PLoc.getLine(),
9295 PLoc.getColumn(), SrcLocStrSize);
9296}
9297/// Emit the arrays used to pass the captures and map information to the
9298/// offloading runtime library. If there is no map or capture information,
9299/// return nullptr by reference.
9301 CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
9302 CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder,
9303 bool IsNonContiguous = false, bool ForEndCall = false) {
9304 CodeGenModule &CGM = CGF.CGM;
9305
9306 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
9307 InsertPointTy AllocaIP(CGF.AllocaInsertPt->getParent(),
9308 CGF.AllocaInsertPt->getIterator());
9309 InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(),
9310 CGF.Builder.GetInsertPoint());
9311
9312 auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) {
9313 if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) {
9314 Info.CaptureDeviceAddrMap.try_emplace(DevVD, NewDecl);
9315 }
9316 };
9317
9318 auto CustomMapperCB = [&](unsigned int I) {
9319 llvm::Function *MFunc = nullptr;
9320 if (CombinedInfo.Mappers[I]) {
9321 Info.HasMapper = true;
9323 cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
9324 }
9325 return MFunc;
9326 };
9327 cantFail(OMPBuilder.emitOffloadingArraysAndArgs(
9328 AllocaIP, CodeGenIP, Info, Info.RTArgs, CombinedInfo, CustomMapperCB,
9329 IsNonContiguous, ForEndCall, DeviceAddrCB));
9330}
9331
9332/// Check for inner distribute directive.
9333static const OMPExecutableDirective *
9335 const auto *CS = D.getInnermostCapturedStmt();
9336 const auto *Body =
9337 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
9338 const Stmt *ChildStmt =
9340
9341 if (const auto *NestedDir =
9342 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9343 OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
9344 switch (D.getDirectiveKind()) {
9345 case OMPD_target:
9346 // For now, treat 'target' with nested 'teams loop' as if it's
9347 // distributed (target teams distribute).
9348 if (isOpenMPDistributeDirective(DKind) || DKind == OMPD_teams_loop)
9349 return NestedDir;
9350 if (DKind == OMPD_teams) {
9351 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
9352 /*IgnoreCaptured=*/true);
9353 if (!Body)
9354 return nullptr;
9355 ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9356 if (const auto *NND =
9357 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9358 DKind = NND->getDirectiveKind();
9359 if (isOpenMPDistributeDirective(DKind))
9360 return NND;
9361 }
9362 }
9363 return nullptr;
9364 case OMPD_target_teams:
9365 if (isOpenMPDistributeDirective(DKind))
9366 return NestedDir;
9367 return nullptr;
9368 case OMPD_target_parallel:
9369 case OMPD_target_simd:
9370 case OMPD_target_parallel_for:
9371 case OMPD_target_parallel_for_simd:
9372 return nullptr;
9373 case OMPD_target_teams_distribute:
9374 case OMPD_target_teams_distribute_simd:
9375 case OMPD_target_teams_distribute_parallel_for:
9376 case OMPD_target_teams_distribute_parallel_for_simd:
9377 case OMPD_parallel:
9378 case OMPD_for:
9379 case OMPD_parallel_for:
9380 case OMPD_parallel_master:
9381 case OMPD_parallel_sections:
9382 case OMPD_for_simd:
9383 case OMPD_parallel_for_simd:
9384 case OMPD_cancel:
9385 case OMPD_cancellation_point:
9386 case OMPD_ordered:
9387 case OMPD_threadprivate:
9388 case OMPD_allocate:
9389 case OMPD_task:
9390 case OMPD_simd:
9391 case OMPD_tile:
9392 case OMPD_unroll:
9393 case OMPD_sections:
9394 case OMPD_section:
9395 case OMPD_single:
9396 case OMPD_master:
9397 case OMPD_critical:
9398 case OMPD_taskyield:
9399 case OMPD_barrier:
9400 case OMPD_taskwait:
9401 case OMPD_taskgroup:
9402 case OMPD_atomic:
9403 case OMPD_flush:
9404 case OMPD_depobj:
9405 case OMPD_scan:
9406 case OMPD_teams:
9407 case OMPD_target_data:
9408 case OMPD_target_exit_data:
9409 case OMPD_target_enter_data:
9410 case OMPD_distribute:
9411 case OMPD_distribute_simd:
9412 case OMPD_distribute_parallel_for:
9413 case OMPD_distribute_parallel_for_simd:
9414 case OMPD_teams_distribute:
9415 case OMPD_teams_distribute_simd:
9416 case OMPD_teams_distribute_parallel_for:
9417 case OMPD_teams_distribute_parallel_for_simd:
9418 case OMPD_target_update:
9419 case OMPD_declare_simd:
9420 case OMPD_declare_variant:
9421 case OMPD_begin_declare_variant:
9422 case OMPD_end_declare_variant:
9423 case OMPD_declare_target:
9424 case OMPD_end_declare_target:
9425 case OMPD_declare_reduction:
9426 case OMPD_declare_mapper:
9427 case OMPD_taskloop:
9428 case OMPD_taskloop_simd:
9429 case OMPD_master_taskloop:
9430 case OMPD_master_taskloop_simd:
9431 case OMPD_parallel_master_taskloop:
9432 case OMPD_parallel_master_taskloop_simd:
9433 case OMPD_requires:
9434 case OMPD_metadirective:
9435 case OMPD_unknown:
9436 default:
9437 llvm_unreachable("Unexpected directive.");
9438 }
9439 }
9440
9441 return nullptr;
9442}
9443
9444/// Emit the user-defined mapper function. The code generation follows the
9445/// pattern in the example below.
9446/// \code
9447/// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
9448/// void *base, void *begin,
9449/// int64_t size, int64_t type,
9450/// void *name = nullptr) {
9451/// // Allocate space for an array section first or add a base/begin for
9452/// // pointer dereference.
9453/// if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) &&
9454/// !maptype.IsDelete)
9455/// __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9456/// size*sizeof(Ty), clearToFromMember(type));
9457/// // Map members.
9458/// for (unsigned i = 0; i < size; i++) {
9459/// // For each component specified by this mapper:
9460/// for (auto c : begin[i]->all_components) {
9461/// if (c.hasMapper())
9462/// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
9463/// c.arg_type, c.arg_name);
9464/// else
9465/// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
9466/// c.arg_begin, c.arg_size, c.arg_type,
9467/// c.arg_name);
9468/// }
9469/// }
9470/// // Delete the array section.
9471/// if (size > 1 && maptype.IsDelete)
9472/// __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9473/// size*sizeof(Ty), clearToFromMember(type));
9474/// }
9475/// \endcode
9477 CodeGenFunction *CGF) {
9478 if (UDMMap.count(D) > 0)
9479 return;
9481 QualType Ty = D->getType();
9482 auto *MapperVarDecl =
9483 cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl());
9484 CharUnits ElementSize = C.getTypeSizeInChars(Ty);
9485 llvm::Type *ElemTy = CGM.getTypes().ConvertTypeForMem(Ty);
9486
9487 CodeGenFunction MapperCGF(CGM);
9488 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
9489 auto PrivatizeAndGenMapInfoCB =
9490 [&](llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP, llvm::Value *PtrPHI,
9491 llvm::Value *BeginArg) -> llvm::OpenMPIRBuilder::MapInfosTy & {
9492 MapperCGF.Builder.restoreIP(CodeGenIP);
9493
9494 // Privatize the declared variable of mapper to be the current array
9495 // element.
9496 Address PtrCurrent(
9497 PtrPHI, ElemTy,
9498 Address(BeginArg, MapperCGF.VoidPtrTy, CGM.getPointerAlign())
9499 .getAlignment()
9500 .alignmentOfArrayElement(ElementSize));
9502 Scope.addPrivate(MapperVarDecl, PtrCurrent);
9503 (void)Scope.Privatize();
9504
9505 // Get map clause information.
9506 MappableExprsHandler MEHandler(*D, MapperCGF);
9507 MEHandler.generateAllInfoForMapper(CombinedInfo, OMPBuilder);
9508
9509 auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
9510 return emitMappingInformation(MapperCGF, OMPBuilder, MapExpr);
9511 };
9512 if (CGM.getCodeGenOpts().getDebugInfo() !=
9513 llvm::codegenoptions::NoDebugInfo) {
9514 CombinedInfo.Names.resize(CombinedInfo.Exprs.size());
9515 llvm::transform(CombinedInfo.Exprs, CombinedInfo.Names.begin(),
9516 FillInfoMap);
9517 }
9518
9519 return CombinedInfo;
9520 };
9521
9522 auto CustomMapperCB = [&](unsigned I) {
9523 llvm::Function *MapperFunc = nullptr;
9524 if (CombinedInfo.Mappers[I]) {
9525 // Call the corresponding mapper function.
9527 cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
9528 assert(MapperFunc && "Expect a valid mapper function is available.");
9529 }
9530 return MapperFunc;
9531 };
9532
9533 SmallString<64> TyStr;
9534 llvm::raw_svector_ostream Out(TyStr);
9536 std::string Name = getName({"omp_mapper", TyStr, D->getName()});
9537
9538 llvm::Function *NewFn = cantFail(OMPBuilder.emitUserDefinedMapper(
9539 PrivatizeAndGenMapInfoCB, ElemTy, Name, CustomMapperCB));
9540 UDMMap.try_emplace(D, NewFn);
9541 if (CGF)
9542 FunctionUDMMap[CGF->CurFn].push_back(D);
9543}
9544
9546 const OMPDeclareMapperDecl *D) {
9547 auto I = UDMMap.find(D);
9548 if (I != UDMMap.end())
9549 return I->second;
9551 return UDMMap.lookup(D);
9552}
9553
9556 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9557 const OMPLoopDirective &D)>
9558 SizeEmitter) {
9559 OpenMPDirectiveKind Kind = D.getDirectiveKind();
9560 const OMPExecutableDirective *TD = &D;
9561 // Get nested teams distribute kind directive, if any. For now, treat
9562 // 'target_teams_loop' as if it's really a target_teams_distribute.
9563 if ((!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) &&
9564 Kind != OMPD_target_teams_loop)
9566 if (!TD)
9567 return llvm::ConstantInt::get(CGF.Int64Ty, 0);
9568
9569 const auto *LD = cast<OMPLoopDirective>(TD);
9570 if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD))
9571 return NumIterations;
9572 return llvm::ConstantInt::get(CGF.Int64Ty, 0);
9573}
9574
9575static void
9576emitTargetCallFallback(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
9579 bool RequiresOuterTask, const CapturedStmt &CS,
9580 bool OffloadingMandatory, CodeGenFunction &CGF) {
9581 if (OffloadingMandatory) {
9582 CGF.Builder.CreateUnreachable();
9583 } else {
9584 if (RequiresOuterTask) {
9585 CapturedVars.clear();
9586 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9587 }
9588 OMPRuntime->emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn,
9589 CapturedVars);
9590 }
9591}
9592
9593static llvm::Value *emitDeviceID(
9594 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
9595 CodeGenFunction &CGF) {
9596 // Emit device ID if any.
9597 llvm::Value *DeviceID;
9598 if (Device.getPointer()) {
9599 assert((Device.getInt() == OMPC_DEVICE_unknown ||
9600 Device.getInt() == OMPC_DEVICE_device_num) &&
9601 "Expected device_num modifier.");
9602 llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer());
9603 DeviceID =
9604 CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true);
9605 } else {
9606 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
9607 }
9608 return DeviceID;
9609}
9610
9611static llvm::Value *emitDynCGGroupMem(const OMPExecutableDirective &D,
9612 CodeGenFunction &CGF) {
9613 llvm::Value *DynCGroupMem = CGF.Builder.getInt32(0);
9614
9615 if (auto *DynMemClause = D.getSingleClause<OMPXDynCGroupMemClause>()) {
9616 CodeGenFunction::RunCleanupsScope DynCGroupMemScope(CGF);
9617 llvm::Value *DynCGroupMemVal = CGF.EmitScalarExpr(
9618 DynMemClause->getSize(), /*IgnoreResultAssign=*/true);
9619 DynCGroupMem = CGF.Builder.CreateIntCast(DynCGroupMemVal, CGF.Int32Ty,
9620 /*isSigned=*/false);
9621 }
9622 return DynCGroupMem;
9623}
9625 MappableExprsHandler &MEHandler, CodeGenFunction &CGF,
9626 const CapturedStmt &CS, llvm::SmallVectorImpl<llvm::Value *> &CapturedVars,
9627 llvm::OpenMPIRBuilder &OMPBuilder,
9628 llvm::DenseSet<CanonicalDeclPtr<const Decl>> &MappedVarSet,
9629 MappableExprsHandler::MapCombinedInfoTy &CombinedInfo) {
9630
9631 llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
9632 auto RI = CS.getCapturedRecordDecl()->field_begin();
9633 auto *CV = CapturedVars.begin();
9635 CE = CS.capture_end();
9636 CI != CE; ++CI, ++RI, ++CV) {
9637 MappableExprsHandler::MapCombinedInfoTy CurInfo;
9638
9639 // VLA sizes are passed to the outlined region by copy and do not have map
9640 // information associated.
9641 if (CI->capturesVariableArrayType()) {
9642 CurInfo.Exprs.push_back(nullptr);
9643 CurInfo.BasePointers.push_back(*CV);
9644 CurInfo.DevicePtrDecls.push_back(nullptr);
9645 CurInfo.DevicePointers.push_back(
9646 MappableExprsHandler::DeviceInfoTy::None);
9647 CurInfo.Pointers.push_back(*CV);
9648 CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9649 CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
9650 // Copy to the device as an argument. No need to retrieve it.
9651 CurInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
9652 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM |
9653 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
9654 CurInfo.Mappers.push_back(nullptr);
9655 } else {
9656 // If we have any information in the map clause, we use it, otherwise we
9657 // just do a default mapping.
9658 MEHandler.generateInfoForCaptureFromClauseInfo(
9659 CI, *CV, CurInfo, OMPBuilder,
9660 /*OffsetForMemberOfFlag=*/CombinedInfo.BasePointers.size());
9661
9662 if (!CI->capturesThis())
9663 MappedVarSet.insert(CI->getCapturedVar());
9664 else
9665 MappedVarSet.insert(nullptr);
9666
9667 if (CurInfo.BasePointers.empty())
9668 MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo);
9669
9670 // Generate correct mapping for variables captured by reference in
9671 // lambdas.
9672 if (CI->capturesVariable())
9673 MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV,
9674 CurInfo, LambdaPointers);
9675 }
9676 // We expect to have at least an element of information for this capture.
9677 assert(!CurInfo.BasePointers.empty() &&
9678 "Non-existing map pointer for capture!");
9679 assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() &&
9680 CurInfo.BasePointers.size() == CurInfo.Sizes.size() &&
9681 CurInfo.BasePointers.size() == CurInfo.Types.size() &&
9682 CurInfo.BasePointers.size() == CurInfo.Mappers.size() &&
9683 "Inconsistent map information sizes!");
9684
9685 // We need to append the results of this capture to what we already have.
9686 CombinedInfo.append(CurInfo);
9687 }
9688 // Adjust MEMBER_OF flags for the lambdas captures.
9689 MEHandler.adjustMemberOfForLambdaCaptures(
9690 OMPBuilder, LambdaPointers, CombinedInfo.BasePointers,
9691 CombinedInfo.Pointers, CombinedInfo.Types);
9692}
9693static void
9694genMapInfo(MappableExprsHandler &MEHandler, CodeGenFunction &CGF,
9695 MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
9696 llvm::OpenMPIRBuilder &OMPBuilder,
9697 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkippedVarSet =
9698 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) {
9699
9700 CodeGenModule &CGM = CGF.CGM;
9701 // Map any list items in a map clause that were not captures because they
9702 // weren't referenced within the construct.
9703 MEHandler.generateAllInfo(CombinedInfo, OMPBuilder, SkippedVarSet);
9704
9705 auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
9706 return emitMappingInformation(CGF, OMPBuilder, MapExpr);
9707 };
9708 if (CGM.getCodeGenOpts().getDebugInfo() !=
9709 llvm::codegenoptions::NoDebugInfo) {
9710 CombinedInfo.Names.resize(CombinedInfo.Exprs.size());
9711 llvm::transform(CombinedInfo.Exprs, CombinedInfo.Names.begin(),
9712 FillInfoMap);
9713 }
9714}
9715
9717 const CapturedStmt &CS,
9719 llvm::OpenMPIRBuilder &OMPBuilder,
9720 MappableExprsHandler::MapCombinedInfoTy &CombinedInfo) {
9721 // Get mappable expression information.
9722 MappableExprsHandler MEHandler(D, CGF);
9723 llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet;
9724
9725 genMapInfoForCaptures(MEHandler, CGF, CS, CapturedVars, OMPBuilder,
9726 MappedVarSet, CombinedInfo);
9727 genMapInfo(MEHandler, CGF, CombinedInfo, OMPBuilder, MappedVarSet);
9728}
9729
9730template <typename ClauseTy>
9731static void
9735 const auto *C = D.getSingleClause<ClauseTy>();
9736 assert(!C->varlist_empty() &&
9737 "ompx_bare requires explicit num_teams and thread_limit");
9739 for (auto *E : C->varlist()) {
9740 llvm::Value *V = CGF.EmitScalarExpr(E);
9741 Values.push_back(
9742 CGF.Builder.CreateIntCast(V, CGF.Int32Ty, /*isSigned=*/true));
9743 }
9744}
9745
9747 CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
9749 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars, bool RequiresOuterTask,
9750 const CapturedStmt &CS, bool OffloadingMandatory,
9751 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
9752 llvm::Value *OutlinedFnID, CodeGenFunction::OMPTargetDataInfo &InputInfo,
9753 llvm::Value *&MapTypesArray, llvm::Value *&MapNamesArray,
9754 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9755 const OMPLoopDirective &D)>
9756 SizeEmitter,
9757 CodeGenFunction &CGF, CodeGenModule &CGM) {
9758 llvm::OpenMPIRBuilder &OMPBuilder = OMPRuntime->getOMPBuilder();
9759
9760 // Fill up the arrays with all the captured variables.
9761 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
9763 genMapInfo(D, CGF, CS, CapturedVars, OMPBuilder, CombinedInfo);
9764
9765 emitOffloadingArraysAndArgs(CGF, CombinedInfo, Info, OMPBuilder,
9766 /*IsNonContiguous=*/true, /*ForEndCall=*/false);
9767
9768 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
9769 InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
9770 CGF.VoidPtrTy, CGM.getPointerAlign());
9771 InputInfo.PointersArray =
9772 Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
9773 InputInfo.SizesArray =
9774 Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());
9775 InputInfo.MappersArray =
9776 Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
9777 MapTypesArray = Info.RTArgs.MapTypesArray;
9778 MapNamesArray = Info.RTArgs.MapNamesArray;
9779
9780 auto &&ThenGen = [&OMPRuntime, OutlinedFn, &D, &CapturedVars,
9781 RequiresOuterTask, &CS, OffloadingMandatory, Device,
9782 OutlinedFnID, &InputInfo, &MapTypesArray, &MapNamesArray,
9783 SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) {
9784 bool IsReverseOffloading = Device.getInt() == OMPC_DEVICE_ancestor;
9785
9786 if (IsReverseOffloading) {
9787 // Reverse offloading is not supported, so just execute on the host.
9788 // FIXME: This fallback solution is incorrect since it ignores the
9789 // OMP_TARGET_OFFLOAD environment variable. Instead it would be better to
9790 // assert here and ensure SEMA emits an error.
9791 emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
9792 RequiresOuterTask, CS, OffloadingMandatory, CGF);
9793 return;
9794 }
9795
9796 bool HasNoWait = D.hasClausesOfKind<OMPNowaitClause>();
9797 unsigned NumTargetItems = InputInfo.NumberOfTargetItems;
9798
9799 llvm::Value *BasePointersArray =
9800 InputInfo.BasePointersArray.emitRawPointer(CGF);
9801 llvm::Value *PointersArray = InputInfo.PointersArray.emitRawPointer(CGF);
9802 llvm::Value *SizesArray = InputInfo.SizesArray.emitRawPointer(CGF);
9803 llvm::Value *MappersArray = InputInfo.MappersArray.emitRawPointer(CGF);
9804
9805 auto &&EmitTargetCallFallbackCB =
9806 [&OMPRuntime, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
9807 OffloadingMandatory, &CGF](llvm::OpenMPIRBuilder::InsertPointTy IP)
9808 -> llvm::OpenMPIRBuilder::InsertPointTy {
9809 CGF.Builder.restoreIP(IP);
9810 emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
9811 RequiresOuterTask, CS, OffloadingMandatory, CGF);
9812 return CGF.Builder.saveIP();
9813 };
9814
9815 bool IsBare = D.hasClausesOfKind<OMPXBareClause>();
9818 if (IsBare) {
9819 emitClauseForBareTargetDirective<OMPNumTeamsClause>(CGF, D, NumTeams);
9820 emitClauseForBareTargetDirective<OMPThreadLimitClause>(CGF, D,
9821 NumThreads);
9822 } else {
9823 NumTeams.push_back(OMPRuntime->emitNumTeamsForTargetDirective(CGF, D));
9824 NumThreads.push_back(
9825 OMPRuntime->emitNumThreadsForTargetDirective(CGF, D));
9826 }
9827
9828 llvm::Value *DeviceID = emitDeviceID(Device, CGF);
9829 llvm::Value *RTLoc = OMPRuntime->emitUpdateLocation(CGF, D.getBeginLoc());
9830 llvm::Value *NumIterations =
9831 OMPRuntime->emitTargetNumIterationsCall(CGF, D, SizeEmitter);
9832 llvm::Value *DynCGGroupMem = emitDynCGGroupMem(D, CGF);
9833 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
9834 CGF.AllocaInsertPt->getParent(), CGF.AllocaInsertPt->getIterator());
9835
9836 llvm::OpenMPIRBuilder::TargetDataRTArgs RTArgs(
9837 BasePointersArray, PointersArray, SizesArray, MapTypesArray,
9838 nullptr /* MapTypesArrayEnd */, MappersArray, MapNamesArray);
9839
9840 llvm::OpenMPIRBuilder::TargetKernelArgs Args(
9841 NumTargetItems, RTArgs, NumIterations, NumTeams, NumThreads,
9842 DynCGGroupMem, HasNoWait);
9843
9844 llvm::OpenMPIRBuilder::InsertPointTy AfterIP =
9845 cantFail(OMPRuntime->getOMPBuilder().emitKernelLaunch(
9846 CGF.Builder, OutlinedFnID, EmitTargetCallFallbackCB, Args, DeviceID,
9847 RTLoc, AllocaIP));
9848 CGF.Builder.restoreIP(AfterIP);
9849 };
9850
9851 if (RequiresOuterTask)
9852 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
9853 else
9854 OMPRuntime->emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
9855}
9856
9857static void
9858emitTargetCallElse(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
9861 bool RequiresOuterTask, const CapturedStmt &CS,
9862 bool OffloadingMandatory, CodeGenFunction &CGF) {
9863
9864 // Notify that the host version must be executed.
9865 auto &&ElseGen =
9866 [&OMPRuntime, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
9867 OffloadingMandatory](CodeGenFunction &CGF, PrePostActionTy &) {
9868 emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
9869 RequiresOuterTask, CS, OffloadingMandatory, CGF);
9870 };
9871
9872 if (RequiresOuterTask) {
9874 CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
9875 } else {
9876 OMPRuntime->emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
9877 }
9878}
9879
9882 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
9883 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
9884 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9885 const OMPLoopDirective &D)>
9886 SizeEmitter) {
9887 if (!CGF.HaveInsertPoint())
9888 return;
9889
9890 const bool OffloadingMandatory = !CGM.getLangOpts().OpenMPIsTargetDevice &&
9891 CGM.getLangOpts().OpenMPOffloadMandatory;
9892
9893 assert((OffloadingMandatory || OutlinedFn) && "Invalid outlined function!");
9894
9895 const bool RequiresOuterTask =
9896 D.hasClausesOfKind<OMPDependClause>() ||
9897 D.hasClausesOfKind<OMPNowaitClause>() ||
9898 D.hasClausesOfKind<OMPInReductionClause>() ||
9899 (CGM.getLangOpts().OpenMP >= 51 &&
9900 needsTaskBasedThreadLimit(D.getDirectiveKind()) &&
9901 D.hasClausesOfKind<OMPThreadLimitClause>());
9903 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
9904 auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
9905 PrePostActionTy &) {
9906 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9907 };
9908 emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
9909
9911 llvm::Value *MapTypesArray = nullptr;
9912 llvm::Value *MapNamesArray = nullptr;
9913
9914 auto &&TargetThenGen = [this, OutlinedFn, &D, &CapturedVars,
9915 RequiresOuterTask, &CS, OffloadingMandatory, Device,
9916 OutlinedFnID, &InputInfo, &MapTypesArray,
9917 &MapNamesArray, SizeEmitter](CodeGenFunction &CGF,
9918 PrePostActionTy &) {
9919 emitTargetCallKernelLaunch(this, OutlinedFn, D, CapturedVars,
9920 RequiresOuterTask, CS, OffloadingMandatory,
9921 Device, OutlinedFnID, InputInfo, MapTypesArray,
9922 MapNamesArray, SizeEmitter, CGF, CGM);
9923 };
9924
9925 auto &&TargetElseGen =
9926 [this, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
9927 OffloadingMandatory](CodeGenFunction &CGF, PrePostActionTy &) {
9928 emitTargetCallElse(this, OutlinedFn, D, CapturedVars, RequiresOuterTask,
9929 CS, OffloadingMandatory, CGF);
9930 };
9931
9932 // If we have a target function ID it means that we need to support
9933 // offloading, otherwise, just execute on the host. We need to execute on host
9934 // regardless of the conditional in the if clause if, e.g., the user do not
9935 // specify target triples.
9936 if (OutlinedFnID) {
9937 if (IfCond) {
9938 emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
9939 } else {
9940 RegionCodeGenTy ThenRCG(TargetThenGen);
9941 ThenRCG(CGF);
9942 }
9943 } else {
9944 RegionCodeGenTy ElseRCG(TargetElseGen);
9945 ElseRCG(CGF);
9946 }
9947}
9948
9950 StringRef ParentName) {
9951 if (!S)
9952 return;
9953
9954 // Codegen OMP target directives that offload compute to the device.
9955 bool RequiresDeviceCodegen =
9956 isa<OMPExecutableDirective>(S) &&
9958 cast<OMPExecutableDirective>(S)->getDirectiveKind());
9959
9960 if (RequiresDeviceCodegen) {
9961 const auto &E = *cast<OMPExecutableDirective>(S);
9962
9963 llvm::TargetRegionEntryInfo EntryInfo = getEntryInfoFromPresumedLoc(
9964 CGM, OMPBuilder, E.getBeginLoc(), ParentName);
9965
9966 // Is this a target region that should not be emitted as an entry point? If
9967 // so just signal we are done with this target region.
9968 if (!OMPBuilder.OffloadInfoManager.hasTargetRegionEntryInfo(EntryInfo))
9969 return;
9970
9971 switch (E.getDirectiveKind()) {
9972 case OMPD_target:
9974 cast<OMPTargetDirective>(E));
9975 break;
9976 case OMPD_target_parallel:
9978 CGM, ParentName, cast<OMPTargetParallelDirective>(E));
9979 break;
9980 case OMPD_target_teams:
9982 CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
9983 break;
9984 case OMPD_target_teams_distribute:
9986 CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E));
9987 break;
9988 case OMPD_target_teams_distribute_simd:
9990 CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E));
9991 break;
9992 case OMPD_target_parallel_for:
9994 CGM, ParentName, cast<OMPTargetParallelForDirective>(E));
9995 break;
9996 case OMPD_target_parallel_for_simd:
9998 CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E));
9999 break;
10000 case OMPD_target_simd:
10002 CGM, ParentName, cast<OMPTargetSimdDirective>(E));
10003 break;
10004 case OMPD_target_teams_distribute_parallel_for:
10006 CGM, ParentName,
10007 cast<OMPTargetTeamsDistributeParallelForDirective>(E));
10008 break;
10009 case OMPD_target_teams_distribute_parallel_for_simd:
10012 CGM, ParentName,
10013 cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E));
10014 break;
10015 case OMPD_target_teams_loop:
10017 CGM, ParentName, cast<OMPTargetTeamsGenericLoopDirective>(E));
10018 break;
10019 case OMPD_target_parallel_loop:
10021 CGM, ParentName, cast<OMPTargetParallelGenericLoopDirective>(E));
10022 break;
10023 case OMPD_parallel:
10024 case OMPD_for:
10025 case OMPD_parallel_for:
10026 case OMPD_parallel_master:
10027 case OMPD_parallel_sections:
10028 case OMPD_for_simd:
10029 case OMPD_parallel_for_simd:
10030 case OMPD_cancel:
10031 case OMPD_cancellation_point:
10032 case OMPD_ordered:
10033 case OMPD_threadprivate:
10034 case OMPD_allocate:
10035 case OMPD_task:
10036 case OMPD_simd:
10037 case OMPD_tile:
10038 case OMPD_unroll:
10039 case OMPD_sections:
10040 case OMPD_section:
10041 case OMPD_single:
10042 case OMPD_master:
10043 case OMPD_critical:
10044 case OMPD_taskyield:
10045 case OMPD_barrier:
10046 case OMPD_taskwait:
10047 case OMPD_taskgroup:
10048 case OMPD_atomic:
10049 case OMPD_flush:
10050 case OMPD_depobj:
10051 case OMPD_scan:
10052 case OMPD_teams:
10053 case OMPD_target_data:
10054 case OMPD_target_exit_data:
10055 case OMPD_target_enter_data:
10056 case OMPD_distribute:
10057 case OMPD_distribute_simd:
10058 case OMPD_distribute_parallel_for:
10059 case OMPD_distribute_parallel_for_simd:
10060 case OMPD_teams_distribute:
10061 case OMPD_teams_distribute_simd:
10062 case OMPD_teams_distribute_parallel_for:
10063 case OMPD_teams_distribute_parallel_for_simd:
10064 case OMPD_target_update:
10065 case OMPD_declare_simd:
10066 case OMPD_declare_variant:
10067 case OMPD_begin_declare_variant:
10068 case OMPD_end_declare_variant:
10069 case OMPD_declare_target:
10070 case OMPD_end_declare_target:
10071 case OMPD_declare_reduction:
10072 case OMPD_declare_mapper:
10073 case OMPD_taskloop:
10074 case OMPD_taskloop_simd:
10075 case OMPD_master_taskloop:
10076 case OMPD_master_taskloop_simd:
10077 case OMPD_parallel_master_taskloop:
10078 case OMPD_parallel_master_taskloop_simd:
10079 case OMPD_requires:
10080 case OMPD_metadirective:
10081 case OMPD_unknown:
10082 default:
10083 llvm_unreachable("Unknown target directive for OpenMP device codegen.");
10084 }
10085 return;
10086 }
10087
10088 if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
10089 if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
10090 return;
10091
10092 scanForTargetRegionsFunctions(E->getRawStmt(), ParentName);
10093 return;
10094 }
10095
10096 // If this is a lambda function, look into its body.
10097 if (const auto *L = dyn_cast<LambdaExpr>(S))
10098 S = L->getBody();
10099
10100 // Keep looking for target regions recursively.
10101 for (const Stmt *II : S->children())
10102 scanForTargetRegionsFunctions(II, ParentName);
10103}
10104
10105static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice) {
10106 std::optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
10107 OMPDeclareTargetDeclAttr::getDeviceType(VD);
10108 if (!DevTy)
10109 return false;
10110 // Do not emit device_type(nohost) functions for the host.
10111 if (!IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
10112 return true;
10113 // Do not emit device_type(host) functions for the device.
10114 if (IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_Host)
10115 return true;
10116 return false;
10117}
10118
10120 // If emitting code for the host, we do not process FD here. Instead we do
10121 // the normal code generation.
10122 if (!CGM.getLangOpts().OpenMPIsTargetDevice) {
10123 if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl()))
10124 if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD),
10125 CGM.getLangOpts().OpenMPIsTargetDevice))
10126 return true;
10127 return false;
10128 }
10129
10130 const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
10131 // Try to detect target regions in the function.
10132 if (const auto *FD = dyn_cast<FunctionDecl>(VD)) {
10133 StringRef Name = CGM.getMangledName(GD);
10135 if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD),
10136 CGM.getLangOpts().OpenMPIsTargetDevice))
10137 return true;
10138 }
10139
10140 // Do not to emit function if it is not marked as declare target.
10141 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
10142 AlreadyEmittedTargetDecls.count(VD) == 0;
10143}
10144
10146 if (isAssumedToBeNotEmitted(cast<ValueDecl>(GD.getDecl()),
10147 CGM.getLangOpts().OpenMPIsTargetDevice))
10148 return true;
10149
10150 if (!CGM.getLangOpts().OpenMPIsTargetDevice)
10151 return false;
10152
10153 // Check if there are Ctors/Dtors in this declaration and look for target
10154 // regions in it. We use the complete variant to produce the kernel name
10155 // mangling.
10156 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
10157 if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
10158 for (const CXXConstructorDecl *Ctor : RD->ctors()) {
10159 StringRef ParentName =
10161 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
10162 }
10163 if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
10164 StringRef ParentName =
10166 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
10167 }
10168 }
10169
10170 // Do not to emit variable if it is not marked as declare target.
10171 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10172 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
10173 cast<VarDecl>(GD.getDecl()));
10174 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
10175 ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
10176 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
10178 DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl()));
10179 return true;
10180 }
10181 return false;
10182}
10183
10185 llvm::Constant *Addr) {
10186 if (CGM.getLangOpts().OMPTargetTriples.empty() &&
10187 !CGM.getLangOpts().OpenMPIsTargetDevice)
10188 return;
10189
10190 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10191 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10192
10193 // If this is an 'extern' declaration we defer to the canonical definition and
10194 // do not emit an offloading entry.
10195 if (Res && *Res != OMPDeclareTargetDeclAttr::MT_Link &&
10196 VD->hasExternalStorage())
10197 return;
10198
10199 if (!Res) {
10200 if (CGM.getLangOpts().OpenMPIsTargetDevice) {
10201 // Register non-target variables being emitted in device code (debug info
10202 // may cause this).
10203 StringRef VarName = CGM.getMangledName(VD);
10204 EmittedNonTargetVariables.try_emplace(VarName, Addr);
10205 }
10206 return;
10207 }
10208
10209 auto AddrOfGlobal = [&VD, this]() { return CGM.GetAddrOfGlobal(VD); };
10210 auto LinkageForVariable = [&VD, this]() {
10212 };
10213
10214 std::vector<llvm::GlobalVariable *> GeneratedRefs;
10215 OMPBuilder.registerTargetGlobalVariable(
10218 VD->isExternallyVisible(),
10220 VD->getCanonicalDecl()->getBeginLoc()),
10221 CGM.getMangledName(VD), GeneratedRefs, CGM.getLangOpts().OpenMPSimd,
10222 CGM.getLangOpts().OMPTargetTriples, AddrOfGlobal, LinkageForVariable,
10225 Addr);
10226
10227 for (auto *ref : GeneratedRefs)
10229}
10230
10232 if (isa<FunctionDecl>(GD.getDecl()) ||
10233 isa<OMPDeclareReductionDecl>(GD.getDecl()))
10234 return emitTargetFunctions(GD);
10235
10236 return emitTargetGlobalVariable(GD);
10237}
10238
10240 for (const VarDecl *VD : DeferredGlobalVariables) {
10241 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10242 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10243 if (!Res)
10244 continue;
10245 if ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
10246 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
10248 CGM.EmitGlobal(VD);
10249 } else {
10250 assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
10251 ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
10252 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
10254 "Expected link clause or to clause with unified memory.");
10256 }
10257 }
10258}
10259
10261 CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
10262 assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
10263 " Expected target-based directive.");
10264}
10265
10267 for (const OMPClause *Clause : D->clauselists()) {
10268 if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
10270 OMPBuilder.Config.setHasRequiresUnifiedSharedMemory(true);
10271 } else if (const auto *AC =
10272 dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) {
10273 switch (AC->getAtomicDefaultMemOrderKind()) {
10274 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel:
10275 RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease;
10276 break;
10277 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst:
10278 RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent;
10279 break;
10280 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed:
10281 RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic;
10282 break;
10284 break;
10285 }
10286 }
10287 }
10288}
10289
10290llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const {
10292}
10293
10295 LangAS &AS) {
10296 if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
10297 return false;
10298 const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
10299 switch(A->getAllocatorType()) {
10300 case OMPAllocateDeclAttr::OMPNullMemAlloc:
10301 case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
10302 // Not supported, fallback to the default mem space.
10303 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
10304 case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
10305 case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
10306 case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
10307 case OMPAllocateDeclAttr::OMPThreadMemAlloc:
10308 case OMPAllocateDeclAttr::OMPConstMemAlloc:
10309 case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
10310 AS = LangAS::Default;
10311 return true;
10312 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
10313 llvm_unreachable("Expected predefined allocator for the variables with the "
10314 "static storage.");
10315 }
10316 return false;
10317}
10318
10321}
10322
10324 CodeGenModule &CGM)
10325 : CGM(CGM) {
10326 if (CGM.getLangOpts().OpenMPIsTargetDevice) {
10327 SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
10329 }
10330}
10331
10333 if (CGM.getLangOpts().OpenMPIsTargetDevice)
10334 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
10335}
10336
10338 if (!CGM.getLangOpts().OpenMPIsTargetDevice || !ShouldMarkAsGlobal)
10339 return true;
10340
10341 const auto *D = cast<FunctionDecl>(GD.getDecl());
10342 // Do not to emit function if it is marked as declare target as it was already
10343 // emitted.
10344 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
10345 if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) {
10346 if (auto *F = dyn_cast_or_null<llvm::Function>(
10348 return !F->isDeclaration();
10349 return false;
10350 }
10351 return true;
10352 }
10353
10354 return !AlreadyEmittedTargetDecls.insert(D).second;
10355}
10356
10360 llvm::Function *OutlinedFn,
10361 ArrayRef<llvm::Value *> CapturedVars) {
10362 if (!CGF.HaveInsertPoint())
10363 return;
10364
10365 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10367
10368 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
10369 llvm::Value *Args[] = {
10370 RTLoc,
10371 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
10372 OutlinedFn};
10374 RealArgs.append(std::begin(Args), std::end(Args));
10375 RealArgs.append(CapturedVars.begin(), CapturedVars.end());
10376
10377 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
10378 CGM.getModule(), OMPRTL___kmpc_fork_teams);
10379 CGF.EmitRuntimeCall(RTLFn, RealArgs);
10380}
10381
10383 const Expr *NumTeams,
10384 const Expr *ThreadLimit,
10386 if (!CGF.HaveInsertPoint())
10387 return;
10388
10389 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10390
10391 llvm::Value *NumTeamsVal =
10392 NumTeams
10393 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
10394 CGF.CGM.Int32Ty, /* isSigned = */ true)
10395 : CGF.Builder.getInt32(0);
10396
10397 llvm::Value *ThreadLimitVal =
10398 ThreadLimit
10399 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
10400 CGF.CGM.Int32Ty, /* isSigned = */ true)
10401 : CGF.Builder.getInt32(0);
10402
10403 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
10404 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
10405 ThreadLimitVal};
10406 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10407 CGM.getModule(), OMPRTL___kmpc_push_num_teams),
10408 PushNumTeamsArgs);
10409}
10410
10412 const Expr *ThreadLimit,
10414 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10415 llvm::Value *ThreadLimitVal =
10416 ThreadLimit
10417 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
10418 CGF.CGM.Int32Ty, /* isSigned = */ true)
10419 : CGF.Builder.getInt32(0);
10420
10421 // Build call __kmpc_set_thread_limit(&loc, global_tid, thread_limit)
10422 llvm::Value *ThreadLimitArgs[] = {RTLoc, getThreadID(CGF, Loc),
10423 ThreadLimitVal};
10424 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10425 CGM.getModule(), OMPRTL___kmpc_set_thread_limit),
10426 ThreadLimitArgs);
10427}
10428
10430 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10431 const Expr *Device, const RegionCodeGenTy &CodeGen,
10433 if (!CGF.HaveInsertPoint())
10434 return;
10435
10436 // Action used to replace the default codegen action and turn privatization
10437 // off.
10438 PrePostActionTy NoPrivAction;
10439
10440 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
10441
10442 llvm::Value *IfCondVal = nullptr;
10443 if (IfCond)
10444 IfCondVal = CGF.EvaluateExprAsBool(IfCond);
10445
10446 // Emit device ID if any.
10447 llvm::Value *DeviceID = nullptr;
10448 if (Device) {
10449 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10450 CGF.Int64Ty, /*isSigned=*/true);
10451 } else {
10452 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10453 }
10454
10455 // Fill up the arrays with all the mapped variables.
10456 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10457 auto GenMapInfoCB =
10458 [&](InsertPointTy CodeGenIP) -> llvm::OpenMPIRBuilder::MapInfosTy & {
10459 CGF.Builder.restoreIP(CodeGenIP);
10460 // Get map clause information.
10461 MappableExprsHandler MEHandler(D, CGF);
10462 MEHandler.generateAllInfo(CombinedInfo, OMPBuilder);
10463
10464 auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
10465 return emitMappingInformation(CGF, OMPBuilder, MapExpr);
10466 };
10467 if (CGM.getCodeGenOpts().getDebugInfo() !=
10468 llvm::codegenoptions::NoDebugInfo) {
10469 CombinedInfo.Names.resize(CombinedInfo.Exprs.size());
10470 llvm::transform(CombinedInfo.Exprs, CombinedInfo.Names.begin(),
10471 FillInfoMap);
10472 }
10473
10474 return CombinedInfo;
10475 };
10476 using BodyGenTy = llvm::OpenMPIRBuilder::BodyGenTy;
10477 auto BodyCB = [&](InsertPointTy CodeGenIP, BodyGenTy BodyGenType) {
10478 CGF.Builder.restoreIP(CodeGenIP);
10479 switch (BodyGenType) {
10480 case BodyGenTy::Priv:
10481 if (!Info.CaptureDeviceAddrMap.empty())
10482 CodeGen(CGF);
10483 break;
10484 case BodyGenTy::DupNoPriv:
10485 if (!Info.CaptureDeviceAddrMap.empty()) {
10486 CodeGen.setAction(NoPrivAction);
10487 CodeGen(CGF);
10488 }
10489 break;
10490 case BodyGenTy::NoPriv:
10491 if (Info.CaptureDeviceAddrMap.empty()) {
10492 CodeGen.setAction(NoPrivAction);
10493 CodeGen(CGF);
10494 }
10495 break;
10496 }
10497 return InsertPointTy(CGF.Builder.GetInsertBlock(),
10498 CGF.Builder.GetInsertPoint());
10499 };
10500
10501 auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) {
10502 if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) {
10503 Info.CaptureDeviceAddrMap.try_emplace(DevVD, NewDecl);
10504 }
10505 };
10506
10507 auto CustomMapperCB = [&](unsigned int I) {
10508 llvm::Function *MFunc = nullptr;
10509 if (CombinedInfo.Mappers[I]) {
10510 Info.HasMapper = true;
10512 cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
10513 }
10514 return MFunc;
10515 };
10516
10517 // Source location for the ident struct
10518 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10519
10520 InsertPointTy AllocaIP(CGF.AllocaInsertPt->getParent(),
10521 CGF.AllocaInsertPt->getIterator());
10522 InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(),
10523 CGF.Builder.GetInsertPoint());
10524 llvm::OpenMPIRBuilder::LocationDescription OmpLoc(CodeGenIP);
10525 llvm::OpenMPIRBuilder::InsertPointTy AfterIP =
10526 cantFail(OMPBuilder.createTargetData(
10527 OmpLoc, AllocaIP, CodeGenIP, DeviceID, IfCondVal, Info, GenMapInfoCB,
10528 CustomMapperCB,
10529 /*MapperFunc=*/nullptr, BodyCB, DeviceAddrCB, RTLoc));
10530 CGF.Builder.restoreIP(AfterIP);
10531}
10532
10534 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10535 const Expr *Device) {
10536 if (!CGF.HaveInsertPoint())
10537 return;
10538
10539 assert((isa<OMPTargetEnterDataDirective>(D) ||
10540 isa<OMPTargetExitDataDirective>(D) ||
10541 isa<OMPTargetUpdateDirective>(D)) &&
10542 "Expecting either target enter, exit data, or update directives.");
10543
10545 llvm::Value *MapTypesArray = nullptr;
10546 llvm::Value *MapNamesArray = nullptr;
10547 // Generate the code for the opening of the data environment.
10548 auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray,
10549 &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) {
10550 // Emit device ID if any.
10551 llvm::Value *DeviceID = nullptr;
10552 if (Device) {
10553 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10554 CGF.Int64Ty, /*isSigned=*/true);
10555 } else {
10556 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10557 }
10558
10559 // Emit the number of elements in the offloading arrays.
10560 llvm::Constant *PointerNum =
10561 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
10562
10563 // Source location for the ident struct
10564 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10565
10566 SmallVector<llvm::Value *, 13> OffloadingArgs(
10567 {RTLoc, DeviceID, PointerNum,
10568 InputInfo.BasePointersArray.emitRawPointer(CGF),
10569 InputInfo.PointersArray.emitRawPointer(CGF),
10570 InputInfo.SizesArray.emitRawPointer(CGF), MapTypesArray, MapNamesArray,
10571 InputInfo.MappersArray.emitRawPointer(CGF)});
10572
10573 // Select the right runtime function call for each standalone
10574 // directive.
10575 const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
10576 RuntimeFunction RTLFn;
10577 switch (D.getDirectiveKind()) {
10578 case OMPD_target_enter_data:
10579 RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper
10580 : OMPRTL___tgt_target_data_begin_mapper;
10581 break;
10582 case OMPD_target_exit_data:
10583 RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper
10584 : OMPRTL___tgt_target_data_end_mapper;
10585 break;
10586 case OMPD_target_update:
10587 RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper
10588 : OMPRTL___tgt_target_data_update_mapper;
10589 break;
10590 case OMPD_parallel:
10591 case OMPD_for:
10592 case OMPD_parallel_for:
10593 case OMPD_parallel_master:
10594 case OMPD_parallel_sections:
10595 case OMPD_for_simd:
10596 case OMPD_parallel_for_simd:
10597 case OMPD_cancel:
10598 case OMPD_cancellation_point:
10599 case OMPD_ordered:
10600 case OMPD_threadprivate:
10601 case OMPD_allocate:
10602 case OMPD_task:
10603 case OMPD_simd:
10604 case OMPD_tile:
10605 case OMPD_unroll:
10606 case OMPD_sections:
10607 case OMPD_section:
10608 case OMPD_single:
10609 case OMPD_master:
10610 case OMPD_critical:
10611 case OMPD_taskyield:
10612 case OMPD_barrier:
10613 case OMPD_taskwait:
10614 case OMPD_taskgroup:
10615 case OMPD_atomic:
10616 case OMPD_flush:
10617 case OMPD_depobj:
10618 case OMPD_scan:
10619 case OMPD_teams:
10620 case OMPD_target_data:
10621 case OMPD_distribute:
10622 case OMPD_distribute_simd:
10623 case OMPD_distribute_parallel_for:
10624 case OMPD_distribute_parallel_for_simd:
10625 case OMPD_teams_distribute:
10626 case OMPD_teams_distribute_simd:
10627 case OMPD_teams_distribute_parallel_for:
10628 case OMPD_teams_distribute_parallel_for_simd:
10629 case OMPD_declare_simd:
10630 case OMPD_declare_variant:
10631 case OMPD_begin_declare_variant:
10632 case OMPD_end_declare_variant:
10633 case OMPD_declare_target:
10634 case OMPD_end_declare_target:
10635 case OMPD_declare_reduction:
10636 case OMPD_declare_mapper:
10637 case OMPD_taskloop:
10638 case OMPD_taskloop_simd:
10639 case OMPD_master_taskloop:
10640 case OMPD_master_taskloop_simd:
10641 case OMPD_parallel_master_taskloop:
10642 case OMPD_parallel_master_taskloop_simd:
10643 case OMPD_target:
10644 case OMPD_target_simd:
10645 case OMPD_target_teams_distribute:
10646 case OMPD_target_teams_distribute_simd:
10647 case OMPD_target_teams_distribute_parallel_for:
10648 case OMPD_target_teams_distribute_parallel_for_simd:
10649 case OMPD_target_teams:
10650 case OMPD_target_parallel:
10651 case OMPD_target_parallel_for:
10652 case OMPD_target_parallel_for_simd:
10653 case OMPD_requires:
10654 case OMPD_metadirective:
10655 case OMPD_unknown:
10656 default:
10657 llvm_unreachable("Unexpected standalone target data directive.");
10658 break;
10659 }
10660 if (HasNowait) {
10661 OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.Int32Ty));
10662 OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.VoidPtrTy));
10663 OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.Int32Ty));
10664 OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.VoidPtrTy));
10665 }
10666 CGF.EmitRuntimeCall(
10667 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn),
10668 OffloadingArgs);
10669 };
10670
10671 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
10672 &MapNamesArray](CodeGenFunction &CGF,
10673 PrePostActionTy &) {
10674 // Fill up the arrays with all the mapped variables.
10675 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10677 MappableExprsHandler MEHandler(D, CGF);
10678 genMapInfo(MEHandler, CGF, CombinedInfo, OMPBuilder);
10679 emitOffloadingArraysAndArgs(CGF, CombinedInfo, Info, OMPBuilder,
10680 /*IsNonContiguous=*/true, /*ForEndCall=*/false);
10681
10682 bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
10683 D.hasClausesOfKind<OMPNowaitClause>();
10684
10685 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10686 InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
10688 InputInfo.PointersArray = Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy,
10690 InputInfo.SizesArray =
10691 Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());
10692 InputInfo.MappersArray =
10693 Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
10694 MapTypesArray = Info.RTArgs.MapTypesArray;
10695 MapNamesArray = Info.RTArgs.MapNamesArray;
10696 if (RequiresOuterTask)
10697 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
10698 else
10699 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
10700 };
10701
10702 if (IfCond) {
10703 emitIfClause(CGF, IfCond, TargetThenGen,
10704 [](CodeGenFunction &CGF, PrePostActionTy &) {});
10705 } else {
10706 RegionCodeGenTy ThenRCG(TargetThenGen);
10707 ThenRCG(CGF);
10708 }
10709}
10710
10711namespace {
10712 /// Kind of parameter in a function with 'declare simd' directive.
10713enum ParamKindTy {
10714 Linear,
10715 LinearRef,
10716 LinearUVal,
10717 LinearVal,
10718 Uniform,
10719 Vector,
10720};
10721/// Attribute set of the parameter.
10722struct ParamAttrTy {
10723 ParamKindTy Kind = Vector;
10724 llvm::APSInt StrideOrArg;
10725 llvm::APSInt Alignment;
10726 bool HasVarStride = false;
10727};
10728} // namespace
10729
10730static unsigned evaluateCDTSize(const FunctionDecl *FD,
10731 ArrayRef<ParamAttrTy> ParamAttrs) {
10732 // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
10733 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
10734 // of that clause. The VLEN value must be power of 2.
10735 // In other case the notion of the function`s "characteristic data type" (CDT)
10736 // is used to compute the vector length.
10737 // CDT is defined in the following order:
10738 // a) For non-void function, the CDT is the return type.
10739 // b) If the function has any non-uniform, non-linear parameters, then the
10740 // CDT is the type of the first such parameter.
10741 // c) If the CDT determined by a) or b) above is struct, union, or class
10742 // type which is pass-by-value (except for the type that maps to the
10743 // built-in complex data type), the characteristic data type is int.
10744 // d) If none of the above three cases is applicable, the CDT is int.
10745 // The VLEN is then determined based on the CDT and the size of vector
10746 // register of that ISA for which current vector version is generated. The
10747 // VLEN is computed using the formula below:
10748 // VLEN = sizeof(vector_register) / sizeof(CDT),
10749 // where vector register size specified in section 3.2.1 Registers and the
10750 // Stack Frame of original AMD64 ABI document.
10751 QualType RetType = FD->getReturnType();
10752 if (RetType.isNull())
10753 return 0;
10754 ASTContext &C = FD->getASTContext();
10755 QualType CDT;
10756 if (!RetType.isNull() && !RetType->isVoidType()) {
10757 CDT = RetType;
10758 } else {
10759 unsigned Offset = 0;
10760 if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
10761 if (ParamAttrs[Offset].Kind == Vector)
10762 CDT = C.getPointerType(C.getCanonicalTagType(MD->getParent()));
10763 ++Offset;
10764 }
10765 if (CDT.isNull()) {
10766 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
10767 if (ParamAttrs[I + Offset].Kind == Vector) {
10768 CDT = FD->getParamDecl(I)->getType();
10769 break;
10770 }
10771 }
10772 }
10773 }
10774 if (CDT.isNull())
10775 CDT = C.IntTy;
10776 CDT = CDT->getCanonicalTypeUnqualified();
10777 if (CDT->isRecordType() || CDT->isUnionType())
10778 CDT = C.IntTy;
10779 return C.getTypeSize(CDT);
10780}
10781
10782/// Mangle the parameter part of the vector function name according to
10783/// their OpenMP classification. The mangling function is defined in
10784/// section 4.5 of the AAVFABI(2021Q1).
10785static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
10786 SmallString<256> Buffer;
10787 llvm::raw_svector_ostream Out(Buffer);
10788 for (const auto &ParamAttr : ParamAttrs) {
10789 switch (ParamAttr.Kind) {
10790 case Linear:
10791 Out << 'l';
10792 break;
10793 case LinearRef:
10794 Out << 'R';
10795 break;
10796 case LinearUVal:
10797 Out << 'U';
10798 break;
10799 case LinearVal:
10800 Out << 'L';
10801 break;
10802 case Uniform:
10803 Out << 'u';
10804 break;
10805 case Vector:
10806 Out << 'v';
10807 break;
10808 }
10809 if (ParamAttr.HasVarStride)
10810 Out << "s" << ParamAttr.StrideOrArg;
10811 else if (ParamAttr.Kind == Linear || ParamAttr.Kind == LinearRef ||
10812 ParamAttr.Kind == LinearUVal || ParamAttr.Kind == LinearVal) {
10813 // Don't print the step value if it is not present or if it is
10814 // equal to 1.
10815 if (ParamAttr.StrideOrArg < 0)
10816 Out << 'n' << -ParamAttr.StrideOrArg;
10817 else if (ParamAttr.StrideOrArg != 1)
10818 Out << ParamAttr.StrideOrArg;
10819 }
10820
10821 if (!!ParamAttr.Alignment)
10822 Out << 'a' << ParamAttr.Alignment;
10823 }
10824
10825 return std::string(Out.str());
10826}
10827
10828static void
10829emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
10830 const llvm::APSInt &VLENVal,
10831 ArrayRef<ParamAttrTy> ParamAttrs,
10832 OMPDeclareSimdDeclAttr::BranchStateTy State) {
10833 struct ISADataTy {
10834 char ISA;
10835 unsigned VecRegSize;
10836 };
10837 ISADataTy ISAData[] = {
10838 {
10839 'b', 128
10840 }, // SSE
10841 {
10842 'c', 256
10843 }, // AVX
10844 {
10845 'd', 256
10846 }, // AVX2
10847 {
10848 'e', 512
10849 }, // AVX512
10850 };
10852 switch (State) {
10853 case OMPDeclareSimdDeclAttr::BS_Undefined:
10854 Masked.push_back('N');
10855 Masked.push_back('M');
10856 break;
10857 case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10858 Masked.push_back('N');
10859 break;
10860 case OMPDeclareSimdDeclAttr::BS_Inbranch:
10861 Masked.push_back('M');
10862 break;
10863 }
10864 for (char Mask : Masked) {
10865 for (const ISADataTy &Data : ISAData) {
10866 SmallString<256> Buffer;
10867 llvm::raw_svector_ostream Out(Buffer);
10868 Out << "_ZGV" << Data.ISA << Mask;
10869 if (!VLENVal) {
10870 unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
10871 assert(NumElts && "Non-zero simdlen/cdtsize expected");
10872 Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts);
10873 } else {
10874 Out << VLENVal;
10875 }
10876 Out << mangleVectorParameters(ParamAttrs);
10877 Out << '_' << Fn->getName();
10878 Fn->addFnAttr(Out.str());
10879 }
10880 }
10881}
10882
10883// This are the Functions that are needed to mangle the name of the
10884// vector functions generated by the compiler, according to the rules
10885// defined in the "Vector Function ABI specifications for AArch64",
10886// available at
10887// https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
10888
10889/// Maps To Vector (MTV), as defined in 4.1.1 of the AAVFABI (2021Q1).
10890static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
10891 QT = QT.getCanonicalType();
10892
10893 if (QT->isVoidType())
10894 return false;
10895
10896 if (Kind == ParamKindTy::Uniform)
10897 return false;
10898
10899 if (Kind == ParamKindTy::LinearUVal || Kind == ParamKindTy::LinearRef)
10900 return false;
10901
10902 if ((Kind == ParamKindTy::Linear || Kind == ParamKindTy::LinearVal) &&
10903 !QT->isReferenceType())
10904 return false;
10905
10906 return true;
10907}
10908
10909/// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
10911 QT = QT.getCanonicalType();
10912 unsigned Size = C.getTypeSize(QT);
10913
10914 // Only scalars and complex within 16 bytes wide set PVB to true.
10915 if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
10916 return false;
10917
10918 if (QT->isFloatingType())
10919 return true;
10920
10921 if (QT->isIntegerType())
10922 return true;
10923
10924 if (QT->isPointerType())
10925 return true;
10926
10927 // TODO: Add support for complex types (section 3.1.2, item 2).
10928
10929 return false;
10930}
10931
10932/// Computes the lane size (LS) of a return type or of an input parameter,
10933/// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
10934/// TODO: Add support for references, section 3.2.1, item 1.
10935static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
10936 if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
10938 if (getAArch64PBV(PTy, C))
10939 return C.getTypeSize(PTy);
10940 }
10941 if (getAArch64PBV(QT, C))
10942 return C.getTypeSize(QT);
10943
10944 return C.getTypeSize(C.getUIntPtrType());
10945}
10946
10947// Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
10948// signature of the scalar function, as defined in 3.2.2 of the
10949// AAVFABI.
10950static std::tuple<unsigned, unsigned, bool>
10952 QualType RetType = FD->getReturnType().getCanonicalType();
10953
10954 ASTContext &C = FD->getASTContext();
10955
10956 bool OutputBecomesInput = false;
10957
10959 if (!RetType->isVoidType()) {
10960 Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
10961 if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
10962 OutputBecomesInput = true;
10963 }
10964 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
10966 Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
10967 }
10968
10969 assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
10970 // The LS of a function parameter / return value can only be a power
10971 // of 2, starting from 8 bits, up to 128.
10972 assert(llvm::all_of(Sizes,
10973 [](unsigned Size) {
10974 return Size == 8 || Size == 16 || Size == 32 ||
10975 Size == 64 || Size == 128;
10976 }) &&
10977 "Invalid size");
10978
10979 return std::make_tuple(*llvm::min_element(Sizes), *llvm::max_element(Sizes),
10980 OutputBecomesInput);
10981}
10982
10983// Function used to add the attribute. The parameter `VLEN` is
10984// templated to allow the use of "x" when targeting scalable functions
10985// for SVE.
10986template <typename T>
10987static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
10988 char ISA, StringRef ParSeq,
10989 StringRef MangledName, bool OutputBecomesInput,
10990 llvm::Function *Fn) {
10991 SmallString<256> Buffer;
10992 llvm::raw_svector_ostream Out(Buffer);
10993 Out << Prefix << ISA << LMask << VLEN;
10994 if (OutputBecomesInput)
10995 Out << "v";
10996 Out << ParSeq << "_" << MangledName;
10997 Fn->addFnAttr(Out.str());
10998}
10999
11000// Helper function to generate the Advanced SIMD names depending on
11001// the value of the NDS when simdlen is not present.
11002static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
11003 StringRef Prefix, char ISA,
11004 StringRef ParSeq, StringRef MangledName,
11005 bool OutputBecomesInput,
11006 llvm::Function *Fn) {
11007 switch (NDS) {
11008 case 8:
11009 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
11010 OutputBecomesInput, Fn);
11011 addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
11012 OutputBecomesInput, Fn);
11013 break;
11014 case 16:
11015 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
11016 OutputBecomesInput, Fn);
11017 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
11018 OutputBecomesInput, Fn);
11019 break;
11020 case 32:
11021 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
11022 OutputBecomesInput, Fn);
11023 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
11024 OutputBecomesInput, Fn);
11025 break;
11026 case 64:
11027 case 128:
11028 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
11029 OutputBecomesInput, Fn);
11030 break;
11031 default:
11032 llvm_unreachable("Scalar type is too wide.");
11033 }
11034}
11035
11036/// Emit vector function attributes for AArch64, as defined in the AAVFABI.
11038 CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
11039 ArrayRef<ParamAttrTy> ParamAttrs,
11040 OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
11041 char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
11042
11043 // Get basic data for building the vector signature.
11044 const auto Data = getNDSWDS(FD, ParamAttrs);
11045 const unsigned NDS = std::get<0>(Data);
11046 const unsigned WDS = std::get<1>(Data);
11047 const bool OutputBecomesInput = std::get<2>(Data);
11048
11049 // Check the values provided via `simdlen` by the user.
11050 // 1. A `simdlen(1)` doesn't produce vector signatures,
11051 if (UserVLEN == 1) {
11052 unsigned DiagID = CGM.getDiags().getCustomDiagID(
11054 "The clause simdlen(1) has no effect when targeting aarch64.");
11055 CGM.getDiags().Report(SLoc, DiagID);
11056 return;
11057 }
11058
11059 // 2. Section 3.3.1, item 1: user input must be a power of 2 for
11060 // Advanced SIMD output.
11061 if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
11062 unsigned DiagID = CGM.getDiags().getCustomDiagID(
11063 DiagnosticsEngine::Warning, "The value specified in simdlen must be a "
11064 "power of 2 when targeting Advanced SIMD.");
11065 CGM.getDiags().Report(SLoc, DiagID);
11066 return;
11067 }
11068
11069 // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
11070 // limits.
11071 if (ISA == 's' && UserVLEN != 0) {
11072 if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
11073 unsigned DiagID = CGM.getDiags().getCustomDiagID(
11074 DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit "
11075 "lanes in the architectural constraints "
11076 "for SVE (min is 128-bit, max is "
11077 "2048-bit, by steps of 128-bit)");
11078 CGM.getDiags().Report(SLoc, DiagID) << WDS;
11079 return;
11080 }
11081 }
11082
11083 // Sort out parameter sequence.
11084 const std::string ParSeq = mangleVectorParameters(ParamAttrs);
11085 StringRef Prefix = "_ZGV";
11086 // Generate simdlen from user input (if any).
11087 if (UserVLEN) {
11088 if (ISA == 's') {
11089 // SVE generates only a masked function.
11090 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11091 OutputBecomesInput, Fn);
11092 } else {
11093 assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
11094 // Advanced SIMD generates one or two functions, depending on
11095 // the `[not]inbranch` clause.
11096 switch (State) {
11097 case OMPDeclareSimdDeclAttr::BS_Undefined:
11098 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
11099 OutputBecomesInput, Fn);
11100 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11101 OutputBecomesInput, Fn);
11102 break;
11103 case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11104 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
11105 OutputBecomesInput, Fn);
11106 break;
11107 case OMPDeclareSimdDeclAttr::BS_Inbranch:
11108 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11109 OutputBecomesInput, Fn);
11110 break;
11111 }
11112 }
11113 } else {
11114 // If no user simdlen is provided, follow the AAVFABI rules for
11115 // generating the vector length.
11116 if (ISA == 's') {
11117 // SVE, section 3.4.1, item 1.
11118 addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
11119 OutputBecomesInput, Fn);
11120 } else {
11121 assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
11122 // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
11123 // two vector names depending on the use of the clause
11124 // `[not]inbranch`.
11125 switch (State) {
11126 case OMPDeclareSimdDeclAttr::BS_Undefined:
11127 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11128 OutputBecomesInput, Fn);
11129 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11130 OutputBecomesInput, Fn);
11131 break;
11132 case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11133 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11134 OutputBecomesInput, Fn);
11135 break;
11136 case OMPDeclareSimdDeclAttr::BS_Inbranch:
11137 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11138 OutputBecomesInput, Fn);
11139 break;
11140 }
11141 }
11142 }
11143}
11144
11146 llvm::Function *Fn) {
11148 FD = FD->getMostRecentDecl();
11149 while (FD) {
11150 // Map params to their positions in function decl.
11151 llvm::DenseMap<const Decl *, unsigned> ParamPositions;
11152 if (isa<CXXMethodDecl>(FD))
11153 ParamPositions.try_emplace(FD, 0);
11154 unsigned ParamPos = ParamPositions.size();
11155 for (const ParmVarDecl *P : FD->parameters()) {
11156 ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
11157 ++ParamPos;
11158 }
11159 for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
11160 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
11161 // Mark uniform parameters.
11162 for (const Expr *E : Attr->uniforms()) {
11163 E = E->IgnoreParenImpCasts();
11164 unsigned Pos;
11165 if (isa<CXXThisExpr>(E)) {
11166 Pos = ParamPositions[FD];
11167 } else {
11168 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11169 ->getCanonicalDecl();
11170 auto It = ParamPositions.find(PVD);
11171 assert(It != ParamPositions.end() && "Function parameter not found");
11172 Pos = It->second;
11173 }
11174 ParamAttrs[Pos].Kind = Uniform;
11175 }
11176 // Get alignment info.
11177 auto *NI = Attr->alignments_begin();
11178 for (const Expr *E : Attr->aligneds()) {
11179 E = E->IgnoreParenImpCasts();
11180 unsigned Pos;
11181 QualType ParmTy;
11182 if (isa<CXXThisExpr>(E)) {
11183 Pos = ParamPositions[FD];
11184 ParmTy = E->getType();
11185 } else {
11186 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11187 ->getCanonicalDecl();
11188 auto It = ParamPositions.find(PVD);
11189 assert(It != ParamPositions.end() && "Function parameter not found");
11190 Pos = It->second;
11191 ParmTy = PVD->getType();
11192 }
11193 ParamAttrs[Pos].Alignment =
11194 (*NI)
11195 ? (*NI)->EvaluateKnownConstInt(C)
11196 : llvm::APSInt::getUnsigned(
11197 C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
11198 .getQuantity());
11199 ++NI;
11200 }
11201 // Mark linear parameters.
11202 auto *SI = Attr->steps_begin();
11203 auto *MI = Attr->modifiers_begin();
11204 for (const Expr *E : Attr->linears()) {
11205 E = E->IgnoreParenImpCasts();
11206 unsigned Pos;
11207 bool IsReferenceType = false;
11208 // Rescaling factor needed to compute the linear parameter
11209 // value in the mangled name.
11210 unsigned PtrRescalingFactor = 1;
11211 if (isa<CXXThisExpr>(E)) {
11212 Pos = ParamPositions[FD];
11213 auto *P = cast<PointerType>(E->getType());
11214 PtrRescalingFactor = CGM.getContext()
11215 .getTypeSizeInChars(P->getPointeeType())
11216 .getQuantity();
11217 } else {
11218 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11219 ->getCanonicalDecl();
11220 auto It = ParamPositions.find(PVD);
11221 assert(It != ParamPositions.end() && "Function parameter not found");
11222 Pos = It->second;
11223 if (auto *P = dyn_cast<PointerType>(PVD->getType()))
11224 PtrRescalingFactor = CGM.getContext()
11225 .getTypeSizeInChars(P->getPointeeType())
11226 .getQuantity();
11227 else if (PVD->getType()->isReferenceType()) {
11228 IsReferenceType = true;
11229 PtrRescalingFactor =
11230 CGM.getContext()
11231 .getTypeSizeInChars(PVD->getType().getNonReferenceType())
11232 .getQuantity();
11233 }
11234 }
11235 ParamAttrTy &ParamAttr = ParamAttrs[Pos];
11236 if (*MI == OMPC_LINEAR_ref)
11237 ParamAttr.Kind = LinearRef;
11238 else if (*MI == OMPC_LINEAR_uval)
11239 ParamAttr.Kind = LinearUVal;
11240 else if (IsReferenceType)
11241 ParamAttr.Kind = LinearVal;
11242 else
11243 ParamAttr.Kind = Linear;
11244 // Assuming a stride of 1, for `linear` without modifiers.
11245 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1);
11246 if (*SI) {
11248 if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
11249 if (const auto *DRE =
11250 cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
11251 if (const auto *StridePVD =
11252 dyn_cast<ParmVarDecl>(DRE->getDecl())) {
11253 ParamAttr.HasVarStride = true;
11254 auto It = ParamPositions.find(StridePVD->getCanonicalDecl());
11255 assert(It != ParamPositions.end() &&
11256 "Function parameter not found");
11257 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(It->second);
11258 }
11259 }
11260 } else {
11261 ParamAttr.StrideOrArg = Result.Val.getInt();
11262 }
11263 }
11264 // If we are using a linear clause on a pointer, we need to
11265 // rescale the value of linear_step with the byte size of the
11266 // pointee type.
11267 if (!ParamAttr.HasVarStride &&
11268 (ParamAttr.Kind == Linear || ParamAttr.Kind == LinearRef))
11269 ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor;
11270 ++SI;
11271 ++MI;
11272 }
11273 llvm::APSInt VLENVal;
11274 SourceLocation ExprLoc;
11275 const Expr *VLENExpr = Attr->getSimdlen();
11276 if (VLENExpr) {
11277 VLENVal = VLENExpr->EvaluateKnownConstInt(C);
11278 ExprLoc = VLENExpr->getExprLoc();
11279 }
11280 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
11281 if (CGM.getTriple().isX86()) {
11282 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
11283 } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
11284 unsigned VLEN = VLENVal.getExtValue();
11285 StringRef MangledName = Fn->getName();
11286 if (CGM.getTarget().hasFeature("sve"))
11287 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11288 MangledName, 's', 128, Fn, ExprLoc);
11289 else if (CGM.getTarget().hasFeature("neon"))
11290 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11291 MangledName, 'n', 128, Fn, ExprLoc);
11292 }
11293 }
11294 FD = FD->getPreviousDecl();
11295 }
11296}
11297
11298namespace {
11299/// Cleanup action for doacross support.
11300class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
11301public:
11302 static const int DoacrossFinArgs = 2;
11303
11304private:
11305 llvm::FunctionCallee RTLFn;
11306 llvm::Value *Args[DoacrossFinArgs];
11307
11308public:
11309 DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
11310 ArrayRef<llvm::Value *> CallArgs)
11311 : RTLFn(RTLFn) {
11312 assert(CallArgs.size() == DoacrossFinArgs);
11313 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
11314 }
11315 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11316 if (!CGF.HaveInsertPoint())
11317 return;
11318 CGF.EmitRuntimeCall(RTLFn, Args);
11319 }
11320};
11321} // namespace
11322
11324 const OMPLoopDirective &D,
11325 ArrayRef<Expr *> NumIterations) {
11326 if (!CGF.HaveInsertPoint())
11327 return;
11328
11330 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
11331 RecordDecl *RD;
11332 if (KmpDimTy.isNull()) {
11333 // Build struct kmp_dim { // loop bounds info casted to kmp_int64
11334 // kmp_int64 lo; // lower
11335 // kmp_int64 up; // upper
11336 // kmp_int64 st; // stride
11337 // };
11338 RD = C.buildImplicitRecord("kmp_dim");
11339 RD->startDefinition();
11340 addFieldToRecordDecl(C, RD, Int64Ty);
11341 addFieldToRecordDecl(C, RD, Int64Ty);
11342 addFieldToRecordDecl(C, RD, Int64Ty);
11343 RD->completeDefinition();
11344 KmpDimTy = C.getCanonicalTagType(RD);
11345 } else {
11346 RD = KmpDimTy->castAsRecordDecl();
11347 }
11348 llvm::APInt Size(/*numBits=*/32, NumIterations.size());
11349 QualType ArrayTy = C.getConstantArrayType(KmpDimTy, Size, nullptr,
11351
11352 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
11353 CGF.EmitNullInitialization(DimsAddr, ArrayTy);
11354 enum { LowerFD = 0, UpperFD, StrideFD };
11355 // Fill dims with data.
11356 for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
11357 LValue DimsLVal = CGF.MakeAddrLValue(
11358 CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
11359 // dims.upper = num_iterations;
11360 LValue UpperLVal = CGF.EmitLValueForField(
11361 DimsLVal, *std::next(RD->field_begin(), UpperFD));
11362 llvm::Value *NumIterVal = CGF.EmitScalarConversion(
11363 CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(),
11364 Int64Ty, NumIterations[I]->getExprLoc());
11365 CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
11366 // dims.stride = 1;
11367 LValue StrideLVal = CGF.EmitLValueForField(
11368 DimsLVal, *std::next(RD->field_begin(), StrideFD));
11369 CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
11370 StrideLVal);
11371 }
11372
11373 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
11374 // kmp_int32 num_dims, struct kmp_dim * dims);
11375 llvm::Value *Args[] = {
11377 getThreadID(CGF, D.getBeginLoc()),
11378 llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
11380 CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).emitRawPointer(CGF),
11381 CGM.VoidPtrTy)};
11382
11383 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11384 CGM.getModule(), OMPRTL___kmpc_doacross_init);
11385 CGF.EmitRuntimeCall(RTLFn, Args);
11386 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
11388 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11389 CGM.getModule(), OMPRTL___kmpc_doacross_fini);
11390 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
11391 llvm::ArrayRef(FiniArgs));
11392}
11393
11394template <typename T>
11396 const T *C, llvm::Value *ULoc,
11397 llvm::Value *ThreadID) {
11398 QualType Int64Ty =
11399 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
11400 llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
11402 Int64Ty, Size, nullptr, ArraySizeModifier::Normal, 0);
11403 Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
11404 for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
11405 const Expr *CounterVal = C->getLoopData(I);
11406 assert(CounterVal);
11407 llvm::Value *CntVal = CGF.EmitScalarConversion(
11408 CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
11409 CounterVal->getExprLoc());
11410 CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
11411 /*Volatile=*/false, Int64Ty);
11412 }
11413 llvm::Value *Args[] = {
11414 ULoc, ThreadID,
11415 CGF.Builder.CreateConstArrayGEP(CntAddr, 0).emitRawPointer(CGF)};
11416 llvm::FunctionCallee RTLFn;
11417 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
11418 OMPDoacrossKind<T> ODK;
11419 if (ODK.isSource(C)) {
11420 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
11421 OMPRTL___kmpc_doacross_post);
11422 } else {
11423 assert(ODK.isSink(C) && "Expect sink modifier.");
11424 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
11425 OMPRTL___kmpc_doacross_wait);
11426 }
11427 CGF.EmitRuntimeCall(RTLFn, Args);
11428}
11429
11431 const OMPDependClause *C) {
11432 return EmitDoacrossOrdered<OMPDependClause>(
11433 CGF, CGM, C, emitUpdateLocation(CGF, C->getBeginLoc()),
11434 getThreadID(CGF, C->getBeginLoc()));
11435}
11436
11438 const OMPDoacrossClause *C) {
11439 return EmitDoacrossOrdered<OMPDoacrossClause>(
11440 CGF, CGM, C, emitUpdateLocation(CGF, C->getBeginLoc()),
11441 getThreadID(CGF, C->getBeginLoc()));
11442}
11443
11445 llvm::FunctionCallee Callee,
11446 ArrayRef<llvm::Value *> Args) const {
11447 assert(Loc.isValid() && "Outlined function call location must be valid.");
11449
11450 if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
11451 if (Fn->doesNotThrow()) {
11452 CGF.EmitNounwindRuntimeCall(Fn, Args);
11453 return;
11454 }
11455 }
11456 CGF.EmitRuntimeCall(Callee, Args);
11457}
11458
11460 CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
11461 ArrayRef<llvm::Value *> Args) const {
11462 emitCall(CGF, Loc, OutlinedFn, Args);
11463}
11464
11466 if (const auto *FD = dyn_cast<FunctionDecl>(D))
11467 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
11469}
11470
11472 const VarDecl *NativeParam,
11473 const VarDecl *TargetParam) const {
11474 return CGF.GetAddrOfLocalVar(NativeParam);
11475}
11476
11477/// Return allocator value from expression, or return a null allocator (default
11478/// when no allocator specified).
11479static llvm::Value *getAllocatorVal(CodeGenFunction &CGF,
11480 const Expr *Allocator) {
11481 llvm::Value *AllocVal;
11482 if (Allocator) {
11483 AllocVal = CGF.EmitScalarExpr(Allocator);
11484 // According to the standard, the original allocator type is a enum
11485 // (integer). Convert to pointer type, if required.
11486 AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(),
11487 CGF.getContext().VoidPtrTy,
11488 Allocator->getExprLoc());
11489 } else {
11490 // If no allocator specified, it defaults to the null allocator.
11491 AllocVal = llvm::Constant::getNullValue(
11493 }
11494 return AllocVal;
11495}
11496
11497/// Return the alignment from an allocate directive if present.
11498static llvm::Value *getAlignmentValue(CodeGenModule &CGM, const VarDecl *VD) {
11499 std::optional<CharUnits> AllocateAlignment = CGM.getOMPAllocateAlignment(VD);
11500
11501 if (!AllocateAlignment)
11502 return nullptr;
11503
11504 return llvm::ConstantInt::get(CGM.SizeTy, AllocateAlignment->getQuantity());
11505}
11506
11508 const VarDecl *VD) {
11509 if (!VD)
11510 return Address::invalid();
11511 Address UntiedAddr = Address::invalid();
11512 Address UntiedRealAddr = Address::invalid();
11513 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
11514 if (It != FunctionToUntiedTaskStackMap.end()) {
11515 const UntiedLocalVarsAddressesMap &UntiedData =
11516 UntiedLocalVarsStack[It->second];
11517 auto I = UntiedData.find(VD);
11518 if (I != UntiedData.end()) {
11519 UntiedAddr = I->second.first;
11520 UntiedRealAddr = I->second.second;
11521 }
11522 }
11523 const VarDecl *CVD = VD->getCanonicalDecl();
11524 if (CVD->hasAttr<OMPAllocateDeclAttr>()) {
11525 // Use the default allocation.
11526 if (!isAllocatableDecl(VD))
11527 return UntiedAddr;
11528 llvm::Value *Size;
11529 CharUnits Align = CGM.getContext().getDeclAlign(CVD);
11530 if (CVD->getType()->isVariablyModifiedType()) {
11531 Size = CGF.getTypeSize(CVD->getType());
11532 // Align the size: ((size + align - 1) / align) * align
11533 Size = CGF.Builder.CreateNUWAdd(
11534 Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
11535 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
11536 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
11537 } else {
11539 Size = CGM.getSize(Sz.alignTo(Align));
11540 }
11541 llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
11542 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
11543 const Expr *Allocator = AA->getAllocator();
11544 llvm::Value *AllocVal = getAllocatorVal(CGF, Allocator);
11545 llvm::Value *Alignment = getAlignmentValue(CGM, CVD);
11547 Args.push_back(ThreadID);
11548 if (Alignment)
11549 Args.push_back(Alignment);
11550 Args.push_back(Size);
11551 Args.push_back(AllocVal);
11552 llvm::omp::RuntimeFunction FnID =
11553 Alignment ? OMPRTL___kmpc_aligned_alloc : OMPRTL___kmpc_alloc;
11554 llvm::Value *Addr = CGF.EmitRuntimeCall(
11555 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), FnID), Args,
11556 getName({CVD->getName(), ".void.addr"}));
11557 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11558 CGM.getModule(), OMPRTL___kmpc_free);
11561 Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"}));
11562 if (UntiedAddr.isValid())
11563 CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty);
11564
11565 // Cleanup action for allocate support.
11566 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
11567 llvm::FunctionCallee RTLFn;
11568 SourceLocation::UIntTy LocEncoding;
11569 Address Addr;
11570 const Expr *AllocExpr;
11571
11572 public:
11573 OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
11574 SourceLocation::UIntTy LocEncoding, Address Addr,
11575 const Expr *AllocExpr)
11576 : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr),
11577 AllocExpr(AllocExpr) {}
11578 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11579 if (!CGF.HaveInsertPoint())
11580 return;
11581 llvm::Value *Args[3];
11582 Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID(
11583 CGF, SourceLocation::getFromRawEncoding(LocEncoding));
11585 Addr.emitRawPointer(CGF), CGF.VoidPtrTy);
11586 llvm::Value *AllocVal = getAllocatorVal(CGF, AllocExpr);
11587 Args[2] = AllocVal;
11588 CGF.EmitRuntimeCall(RTLFn, Args);
11589 }
11590 };
11591 Address VDAddr =
11592 UntiedRealAddr.isValid()
11593 ? UntiedRealAddr
11594 : Address(Addr, CGF.ConvertTypeForMem(CVD->getType()), Align);
11595 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(
11596 NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(),
11597 VDAddr, Allocator);
11598 if (UntiedRealAddr.isValid())
11599 if (auto *Region =
11600 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
11601 Region->emitUntiedSwitch(CGF);
11602 return VDAddr;
11603 }
11604 return UntiedAddr;
11605}
11606
11608 const VarDecl *VD) const {
11609 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
11610 if (It == FunctionToUntiedTaskStackMap.end())
11611 return false;
11612 return UntiedLocalVarsStack[It->second].count(VD) > 0;
11613}
11614
11617 : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {
11618 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11619 if (!NeedToPush)
11620 return;
11622 CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back();
11623 for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) {
11624 for (const Stmt *Ref : C->private_refs()) {
11625 const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts();
11626 const ValueDecl *VD;
11627 if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) {
11628 VD = DRE->getDecl();
11629 } else {
11630 const auto *ME = cast<MemberExpr>(SimpleRefExpr);
11631 assert((ME->isImplicitCXXThis() ||
11632 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) &&
11633 "Expected member of current class.");
11634 VD = ME->getMemberDecl();
11635 }
11636 DS.insert(VD);
11637 }
11638 }
11639}
11640
11642 if (!NeedToPush)
11643 return;
11645}
11646
11648 CodeGenFunction &CGF,
11649 const llvm::MapVector<CanonicalDeclPtr<const VarDecl>,
11650 std::pair<Address, Address>> &LocalVars)
11651 : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) {
11652 if (!NeedToPush)
11653 return;
11655 CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size());
11656 CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars);
11657}
11658
11660 if (!NeedToPush)
11661 return;
11663}
11664
11666 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11667
11668 return llvm::any_of(
11670 [VD](const NontemporalDeclsSet &Set) { return Set.contains(VD); });
11671}
11672
11673void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis(
11674 const OMPExecutableDirective &S,
11675 llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled)
11676 const {
11677 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs;
11678 // Vars in target/task regions must be excluded completely.
11679 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) ||
11680 isOpenMPTaskingDirective(S.getDirectiveKind())) {
11682 getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind());
11683 const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front());
11684 for (const CapturedStmt::Capture &Cap : CS->captures()) {
11685 if (Cap.capturesVariable() || Cap.capturesVariableByCopy())
11686 NeedToCheckForLPCs.insert(Cap.getCapturedVar());
11687 }
11688 }
11689 // Exclude vars in private clauses.
11690 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
11691 for (const Expr *Ref : C->varlist()) {
11692 if (!Ref->getType()->isScalarType())
11693 continue;
11694 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11695 if (!DRE)
11696 continue;
11697 NeedToCheckForLPCs.insert(DRE->getDecl());
11698 }
11699 }
11700 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
11701 for (const Expr *Ref : C->varlist()) {
11702 if (!Ref->getType()->isScalarType())
11703 continue;
11704 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11705 if (!DRE)
11706 continue;
11707 NeedToCheckForLPCs.insert(DRE->getDecl());
11708 }
11709 }
11710 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
11711 for (const Expr *Ref : C->varlist()) {
11712 if (!Ref->getType()->isScalarType())
11713 continue;
11714 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11715 if (!DRE)
11716 continue;
11717 NeedToCheckForLPCs.insert(DRE->getDecl());
11718 }
11719 }
11720 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
11721 for (const Expr *Ref : C->varlist()) {
11722 if (!Ref->getType()->isScalarType())
11723 continue;
11724 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11725 if (!DRE)
11726 continue;
11727 NeedToCheckForLPCs.insert(DRE->getDecl());
11728 }
11729 }
11730 for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
11731 for (const Expr *Ref : C->varlist()) {
11732 if (!Ref->getType()->isScalarType())
11733 continue;
11734 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11735 if (!DRE)
11736 continue;
11737 NeedToCheckForLPCs.insert(DRE->getDecl());
11738 }
11739 }
11740 for (const Decl *VD : NeedToCheckForLPCs) {
11741 for (const LastprivateConditionalData &Data :
11743 if (Data.DeclToUniqueName.count(VD) > 0) {
11744 if (!Data.Disabled)
11745 NeedToAddForLPCsAsDisabled.insert(VD);
11746 break;
11747 }
11748 }
11749 }
11750}
11751
11752CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
11753 CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
11754 : CGM(CGF.CGM),
11755 Action((CGM.getLangOpts().OpenMP >= 50 &&
11756 llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
11757 [](const OMPLastprivateClause *C) {
11758 return C->getKind() ==
11759 OMPC_LASTPRIVATE_conditional;
11760 }))
11761 ? ActionToDo::PushAsLastprivateConditional
11762 : ActionToDo::DoNotPush) {
11763 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11764 if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush)
11765 return;
11766 assert(Action == ActionToDo::PushAsLastprivateConditional &&
11767 "Expected a push action.");
11770 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
11771 if (C->getKind() != OMPC_LASTPRIVATE_conditional)
11772 continue;
11773
11774 for (const Expr *Ref : C->varlist()) {
11775 Data.DeclToUniqueName.insert(std::make_pair(
11776 cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(),
11777 SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref))));
11778 }
11779 }
11780 Data.IVLVal = IVLVal;
11781 Data.Fn = CGF.CurFn;
11782}
11783
11784CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
11786 : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) {
11787 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11788 if (CGM.getLangOpts().OpenMP < 50)
11789 return;
11790 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled;
11791 tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled);
11792 if (!NeedToAddForLPCsAsDisabled.empty()) {
11793 Action = ActionToDo::DisableLastprivateConditional;
11794 LastprivateConditionalData &Data =
11795 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
11796 for (const Decl *VD : NeedToAddForLPCsAsDisabled)
11797 Data.DeclToUniqueName.try_emplace(VD);
11798 Data.Fn = CGF.CurFn;
11799 Data.Disabled = true;
11800 }
11801}
11802
11805 CodeGenFunction &CGF, const OMPExecutableDirective &S) {
11806 return LastprivateConditionalRAII(CGF, S);
11807}
11808
11810 if (CGM.getLangOpts().OpenMP < 50)
11811 return;
11812 if (Action == ActionToDo::DisableLastprivateConditional) {
11813 assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
11814 "Expected list of disabled private vars.");
11816 }
11817 if (Action == ActionToDo::PushAsLastprivateConditional) {
11818 assert(
11820 "Expected list of lastprivate conditional vars.");
11822 }
11823}
11824
11826 const VarDecl *VD) {
11828 auto I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first;
11829 QualType NewType;
11830 const FieldDecl *VDField;
11831 const FieldDecl *FiredField;
11832 LValue BaseLVal;
11833 auto VI = I->getSecond().find(VD);
11834 if (VI == I->getSecond().end()) {
11835 RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional");
11836 RD->startDefinition();
11837 VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType());
11838 FiredField = addFieldToRecordDecl(C, RD, C.CharTy);
11839 RD->completeDefinition();
11840 NewType = C.getCanonicalTagType(RD);
11841 Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName());
11842 BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl);
11843 I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal);
11844 } else {
11845 NewType = std::get<0>(VI->getSecond());
11846 VDField = std::get<1>(VI->getSecond());
11847 FiredField = std::get<2>(VI->getSecond());
11848 BaseLVal = std::get<3>(VI->getSecond());
11849 }
11850 LValue FiredLVal =
11851 CGF.EmitLValueForField(BaseLVal, FiredField);
11853 llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)),
11854 FiredLVal);
11855 return CGF.EmitLValueForField(BaseLVal, VDField).getAddress();
11856}
11857
11858namespace {
11859/// Checks if the lastprivate conditional variable is referenced in LHS.
11860class LastprivateConditionalRefChecker final
11861 : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {
11863 const Expr *FoundE = nullptr;
11864 const Decl *FoundD = nullptr;
11865 StringRef UniqueDeclName;
11866 LValue IVLVal;
11867 llvm::Function *FoundFn = nullptr;
11869
11870public:
11871 bool VisitDeclRefExpr(const DeclRefExpr *E) {
11873 llvm::reverse(LPM)) {
11874 auto It = D.DeclToUniqueName.find(E->getDecl());
11875 if (It == D.DeclToUniqueName.end())
11876 continue;
11877 if (D.Disabled)
11878 return false;
11879 FoundE = E;
11880 FoundD = E->getDecl()->getCanonicalDecl();
11881 UniqueDeclName = It->second;
11882 IVLVal = D.IVLVal;
11883 FoundFn = D.Fn;
11884 break;
11885 }
11886 return FoundE == E;
11887 }
11888 bool VisitMemberExpr(const MemberExpr *E) {
11889 if (!CodeGenFunction::IsWrappedCXXThis(E->getBase()))
11890 return false;
11892 llvm::reverse(LPM)) {
11893 auto It = D.DeclToUniqueName.find(E->getMemberDecl());
11894 if (It == D.DeclToUniqueName.end())
11895 continue;
11896 if (D.Disabled)
11897 return false;
11898 FoundE = E;
11899 FoundD = E->getMemberDecl()->getCanonicalDecl();
11900 UniqueDeclName = It->second;
11901 IVLVal = D.IVLVal;
11902 FoundFn = D.Fn;
11903 break;
11904 }
11905 return FoundE == E;
11906 }
11907 bool VisitStmt(const Stmt *S) {
11908 for (const Stmt *Child : S->children()) {
11909 if (!Child)
11910 continue;
11911 if (const auto *E = dyn_cast<Expr>(Child))
11912 if (!E->isGLValue())
11913 continue;
11914 if (Visit(Child))
11915 return true;
11916 }
11917 return false;
11918 }
11919 explicit LastprivateConditionalRefChecker(
11921 : LPM(LPM) {}
11922 std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *>
11923 getFoundData() const {
11924 return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn);
11925 }
11926};
11927} // namespace
11928
11930 LValue IVLVal,
11931 StringRef UniqueDeclName,
11932 LValue LVal,
11934 // Last updated loop counter for the lastprivate conditional var.
11935 // int<xx> last_iv = 0;
11936 llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType());
11937 llvm::Constant *LastIV = OMPBuilder.getOrCreateInternalVariable(
11938 LLIVTy, getName({UniqueDeclName, "iv"}));
11939 cast<llvm::GlobalVariable>(LastIV)->setAlignment(
11940 IVLVal.getAlignment().getAsAlign());
11941 LValue LastIVLVal =
11942 CGF.MakeNaturalAlignRawAddrLValue(LastIV, IVLVal.getType());
11943
11944 // Last value of the lastprivate conditional.
11945 // decltype(priv_a) last_a;
11946 llvm::GlobalVariable *Last = OMPBuilder.getOrCreateInternalVariable(
11947 CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName);
11948 cast<llvm::GlobalVariable>(Last)->setAlignment(
11949 LVal.getAlignment().getAsAlign());
11950 LValue LastLVal =
11951 CGF.MakeRawAddrLValue(Last, LVal.getType(), LVal.getAlignment());
11952
11953 // Global loop counter. Required to handle inner parallel-for regions.
11954 // iv
11955 llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc);
11956
11957 // #pragma omp critical(a)
11958 // if (last_iv <= iv) {
11959 // last_iv = iv;
11960 // last_a = priv_a;
11961 // }
11962 auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,
11963 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
11964 Action.Enter(CGF);
11965 llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc);
11966 // (last_iv <= iv) ? Check if the variable is updated and store new
11967 // value in global var.
11968 llvm::Value *CmpRes;
11969 if (IVLVal.getType()->isSignedIntegerType()) {
11970 CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal);
11971 } else {
11972 assert(IVLVal.getType()->isUnsignedIntegerType() &&
11973 "Loop iteration variable must be integer.");
11974 CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal);
11975 }
11976 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then");
11977 llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit");
11978 CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB);
11979 // {
11980 CGF.EmitBlock(ThenBB);
11981
11982 // last_iv = iv;
11983 CGF.EmitStoreOfScalar(IVVal, LastIVLVal);
11984
11985 // last_a = priv_a;
11986 switch (CGF.getEvaluationKind(LVal.getType())) {
11987 case TEK_Scalar: {
11988 llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc);
11989 CGF.EmitStoreOfScalar(PrivVal, LastLVal);
11990 break;
11991 }
11992 case TEK_Complex: {
11994 CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false);
11995 break;
11996 }
11997 case TEK_Aggregate:
11998 llvm_unreachable(
11999 "Aggregates are not supported in lastprivate conditional.");
12000 }
12001 // }
12002 CGF.EmitBranch(ExitBB);
12003 // There is no need to emit line number for unconditional branch.
12005 CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
12006 };
12007
12008 if (CGM.getLangOpts().OpenMPSimd) {
12009 // Do not emit as a critical region as no parallel region could be emitted.
12010 RegionCodeGenTy ThenRCG(CodeGen);
12011 ThenRCG(CGF);
12012 } else {
12013 emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc);
12014 }
12015}
12016
12018 const Expr *LHS) {
12019 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12020 return;
12021 LastprivateConditionalRefChecker Checker(LastprivateConditionalStack);
12022 if (!Checker.Visit(LHS))
12023 return;
12024 const Expr *FoundE;
12025 const Decl *FoundD;
12026 StringRef UniqueDeclName;
12027 LValue IVLVal;
12028 llvm::Function *FoundFn;
12029 std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) =
12030 Checker.getFoundData();
12031 if (FoundFn != CGF.CurFn) {
12032 // Special codegen for inner parallel regions.
12033 // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1;
12034 auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD);
12035 assert(It != LastprivateConditionalToTypes[FoundFn].end() &&
12036 "Lastprivate conditional is not found in outer region.");
12037 QualType StructTy = std::get<0>(It->getSecond());
12038 const FieldDecl* FiredDecl = std::get<2>(It->getSecond());
12039 LValue PrivLVal = CGF.EmitLValue(FoundE);
12041 PrivLVal.getAddress(),
12042 CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy)),
12043 CGF.ConvertTypeForMem(StructTy));
12044 LValue BaseLVal =
12045 CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl);
12046 LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl);
12047 CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get(
12048 CGF.ConvertTypeForMem(FiredDecl->getType()), 1)),
12049 FiredLVal, llvm::AtomicOrdering::Unordered,
12050 /*IsVolatile=*/true, /*isInit=*/false);
12051 return;
12052 }
12053
12054 // Private address of the lastprivate conditional in the current context.
12055 // priv_a
12056 LValue LVal = CGF.EmitLValue(FoundE);
12057 emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal,
12058 FoundE->getExprLoc());
12059}
12060
12063 const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) {
12064 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12065 return;
12066 auto Range = llvm::reverse(LastprivateConditionalStack);
12067 auto It = llvm::find_if(
12068 Range, [](const LastprivateConditionalData &D) { return !D.Disabled; });
12069 if (It == Range.end() || It->Fn != CGF.CurFn)
12070 return;
12071 auto LPCI = LastprivateConditionalToTypes.find(It->Fn);
12072 assert(LPCI != LastprivateConditionalToTypes.end() &&
12073 "Lastprivates must be registered already.");
12075 getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
12076 const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back());
12077 for (const auto &Pair : It->DeclToUniqueName) {
12078 const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl());
12079 if (!CS->capturesVariable(VD) || IgnoredDecls.contains(VD))
12080 continue;
12081 auto I = LPCI->getSecond().find(Pair.first);
12082 assert(I != LPCI->getSecond().end() &&
12083 "Lastprivate must be rehistered already.");
12084 // bool Cmp = priv_a.Fired != 0;
12085 LValue BaseLVal = std::get<3>(I->getSecond());
12086 LValue FiredLVal =
12087 CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond()));
12088 llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc());
12089 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res);
12090 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then");
12091 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done");
12092 // if (Cmp) {
12093 CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB);
12094 CGF.EmitBlock(ThenBB);
12095 Address Addr = CGF.GetAddrOfLocalVar(VD);
12096 LValue LVal;
12097 if (VD->getType()->isReferenceType())
12098 LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(),
12100 else
12101 LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(),
12103 emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal,
12104 D.getBeginLoc());
12106 CGF.EmitBlock(DoneBB, /*IsFinal=*/true);
12107 // }
12108 }
12109}
12110
12112 CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD,
12114 if (CGF.getLangOpts().OpenMP < 50)
12115 return;
12116 auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD);
12117 assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() &&
12118 "Unknown lastprivate conditional variable.");
12119 StringRef UniqueName = It->second;
12120 llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName);
12121 // The variable was not updated in the region - exit.
12122 if (!GV)
12123 return;
12124 LValue LPLVal = CGF.MakeRawAddrLValue(
12125 GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment());
12126 llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc);
12127 CGF.EmitStoreOfScalar(Res, PrivLVal);
12128}
12129
12132 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
12133 const RegionCodeGenTy &CodeGen) {
12134 llvm_unreachable("Not supported in SIMD-only mode");
12135}
12136
12139 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
12140 const RegionCodeGenTy &CodeGen) {
12141 llvm_unreachable("Not supported in SIMD-only mode");
12142}
12143
12145 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12146 const VarDecl *PartIDVar, const VarDecl *TaskTVar,
12147 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
12148 bool Tied, unsigned &NumberOfParts) {
12149 llvm_unreachable("Not supported in SIMD-only mode");
12150}
12151
12153 CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn,
12154 ArrayRef<llvm::Value *> CapturedVars, const Expr *IfCond,
12155 llvm::Value *NumThreads, OpenMPNumThreadsClauseModifier NumThreadsModifier,
12156 OpenMPSeverityClauseKind Severity, const Expr *Message) {
12157 llvm_unreachable("Not supported in SIMD-only mode");
12158}
12159
12161 CodeGenFunction &CGF, StringRef CriticalName,
12162 const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
12163 const Expr *Hint) {
12164 llvm_unreachable("Not supported in SIMD-only mode");
12165}
12166
12168 const RegionCodeGenTy &MasterOpGen,
12170 llvm_unreachable("Not supported in SIMD-only mode");
12171}
12172
12174 const RegionCodeGenTy &MasterOpGen,
12176 const Expr *Filter) {
12177 llvm_unreachable("Not supported in SIMD-only mode");
12178}
12179
12182 llvm_unreachable("Not supported in SIMD-only mode");
12183}
12184
12186 CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
12188 llvm_unreachable("Not supported in SIMD-only mode");
12189}
12190
12192 CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
12193 SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
12195 ArrayRef<const Expr *> AssignmentOps) {
12196 llvm_unreachable("Not supported in SIMD-only mode");
12197}
12198
12200 const RegionCodeGenTy &OrderedOpGen,
12202 bool IsThreads) {
12203 llvm_unreachable("Not supported in SIMD-only mode");
12204}
12205
12209 bool EmitChecks,
12210 bool ForceSimpleCall) {
12211 llvm_unreachable("Not supported in SIMD-only mode");
12212}
12213
12216 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
12217 bool Ordered, const DispatchRTInput &DispatchValues) {
12218 llvm_unreachable("Not supported in SIMD-only mode");
12219}
12220
12223 llvm_unreachable("Not supported in SIMD-only mode");
12224}
12225
12228 const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
12229 llvm_unreachable("Not supported in SIMD-only mode");
12230}
12231
12234 OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
12235 llvm_unreachable("Not supported in SIMD-only mode");
12236}
12237
12240 unsigned IVSize,
12241 bool IVSigned) {
12242 llvm_unreachable("Not supported in SIMD-only mode");
12243}
12244
12247 OpenMPDirectiveKind DKind) {
12248 llvm_unreachable("Not supported in SIMD-only mode");
12249}
12250
12253 unsigned IVSize, bool IVSigned,
12254 Address IL, Address LB,
12255 Address UB, Address ST) {
12256 llvm_unreachable("Not supported in SIMD-only mode");
12257}
12258
12260 CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc,
12262 const Expr *Message) {
12263 llvm_unreachable("Not supported in SIMD-only mode");
12264}
12265
12267 ProcBindKind ProcBind,
12269 llvm_unreachable("Not supported in SIMD-only mode");
12270}
12271
12273 const VarDecl *VD,
12274 Address VDAddr,
12276 llvm_unreachable("Not supported in SIMD-only mode");
12277}
12278
12280 const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
12281 CodeGenFunction *CGF) {
12282 llvm_unreachable("Not supported in SIMD-only mode");
12283}
12284
12286 CodeGenFunction &CGF, QualType VarType, StringRef Name) {
12287 llvm_unreachable("Not supported in SIMD-only mode");
12288}
12289
12293 llvm::AtomicOrdering AO) {
12294 llvm_unreachable("Not supported in SIMD-only mode");
12295}
12296
12299 llvm::Function *TaskFunction,
12300 QualType SharedsTy, Address Shareds,
12301 const Expr *IfCond,
12302 const OMPTaskDataTy &Data) {
12303 llvm_unreachable("Not supported in SIMD-only mode");
12304}
12305
12308 llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
12309 const Expr *IfCond, const OMPTaskDataTy &Data) {
12310 llvm_unreachable("Not supported in SIMD-only mode");
12311}
12312
12316 ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
12317 assert(Options.SimpleReduction && "Only simple reduction is expected.");
12318 CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
12319 ReductionOps, Options);
12320}
12321
12324 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
12325 llvm_unreachable("Not supported in SIMD-only mode");
12326}
12327
12330 bool IsWorksharingReduction) {
12331 llvm_unreachable("Not supported in SIMD-only mode");
12332}
12333
12336 ReductionCodeGen &RCG,
12337 unsigned N) {
12338 llvm_unreachable("Not supported in SIMD-only mode");
12339}
12340
12343 llvm::Value *ReductionsPtr,
12344 LValue SharedLVal) {
12345 llvm_unreachable("Not supported in SIMD-only mode");
12346}
12347
12350 const OMPTaskDataTy &Data) {
12351 llvm_unreachable("Not supported in SIMD-only mode");
12352}
12353
12356 OpenMPDirectiveKind CancelRegion) {
12357 llvm_unreachable("Not supported in SIMD-only mode");
12358}
12359
12361 SourceLocation Loc, const Expr *IfCond,
12362 OpenMPDirectiveKind CancelRegion) {
12363 llvm_unreachable("Not supported in SIMD-only mode");
12364}
12365
12367 const OMPExecutableDirective &D, StringRef ParentName,
12368 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
12369 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
12370 llvm_unreachable("Not supported in SIMD-only mode");
12371}
12372
12375 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
12376 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
12377 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
12378 const OMPLoopDirective &D)>
12379 SizeEmitter) {
12380 llvm_unreachable("Not supported in SIMD-only mode");
12381}
12382
12384 llvm_unreachable("Not supported in SIMD-only mode");
12385}
12386
12388 llvm_unreachable("Not supported in SIMD-only mode");
12389}
12390
12392 return false;
12393}
12394
12398 llvm::Function *OutlinedFn,
12399 ArrayRef<llvm::Value *> CapturedVars) {
12400 llvm_unreachable("Not supported in SIMD-only mode");
12401}
12402
12404 const Expr *NumTeams,
12405 const Expr *ThreadLimit,
12407 llvm_unreachable("Not supported in SIMD-only mode");
12408}
12409
12411 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
12412 const Expr *Device, const RegionCodeGenTy &CodeGen,
12414 llvm_unreachable("Not supported in SIMD-only mode");
12415}
12416
12418 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
12419 const Expr *Device) {
12420 llvm_unreachable("Not supported in SIMD-only mode");
12421}
12422
12424 const OMPLoopDirective &D,
12425 ArrayRef<Expr *> NumIterations) {
12426 llvm_unreachable("Not supported in SIMD-only mode");
12427}
12428
12430 const OMPDependClause *C) {
12431 llvm_unreachable("Not supported in SIMD-only mode");
12432}
12433
12435 const OMPDoacrossClause *C) {
12436 llvm_unreachable("Not supported in SIMD-only mode");
12437}
12438
12439const VarDecl *
12441 const VarDecl *NativeParam) const {
12442 llvm_unreachable("Not supported in SIMD-only mode");
12443}
12444
12445Address
12447 const VarDecl *NativeParam,
12448 const VarDecl *TargetParam) const {
12449 llvm_unreachable("Not supported in SIMD-only mode");
12450}
#define V(N, I)
Definition: ASTContext.h:3597
StringRef P
static llvm::Value * emitCopyprivateCopyFunction(CodeGenModule &CGM, llvm::Type *ArgsElemType, ArrayRef< const Expr * > CopyprivateVars, ArrayRef< const Expr * > DestExprs, ArrayRef< const Expr * > SrcExprs, ArrayRef< const Expr * > AssignmentOps, SourceLocation Loc)
static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF, SourceLocation Loc, SmallString< 128 > &Buffer)
static void emitOffloadingArraysAndArgs(CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder, bool IsNonContiguous=false, bool ForEndCall=false)
Emit the arrays used to pass the captures and map information to the offloading runtime library.
static RecordDecl * createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, ArrayRef< PrivateDataTy > Privates)
static void emitInitWithReductionInitializer(CodeGenFunction &CGF, const OMPDeclareReductionDecl *DRD, const Expr *InitOp, Address Private, Address Original, QualType Ty)
static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, Address OriginalBaseAddress, llvm::Value *Addr)
static void emitPrivatesInit(CodeGenFunction &CGF, const OMPExecutableDirective &D, Address KmpTaskSharedsPtr, LValue TDBase, const RecordDecl *KmpTaskTWithPrivatesQTyRD, QualType SharedsTy, QualType SharedsPtrTy, const OMPTaskDataTy &Data, ArrayRef< PrivateDataTy > Privates, bool ForDup)
Emit initialization for private variables in task-based directives.
static void emitClauseForBareTargetDirective(CodeGenFunction &CGF, const OMPExecutableDirective &D, llvm::SmallVectorImpl< llvm::Value * > &Values)
static llvm::Value * emitDestructorsFunction(CodeGenModule &CGM, SourceLocation Loc, QualType KmpInt32Ty, QualType KmpTaskTWithPrivatesPtrQTy, QualType KmpTaskTWithPrivatesQTy)
static unsigned evaluateCDTSize(const FunctionDecl *FD, ArrayRef< ParamAttrTy > ParamAttrs)
static void EmitOMPAggregateReduction(CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar, const VarDecl *RHSVar, const llvm::function_ref< void(CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *)> &RedOpGen, const Expr *XExpr=nullptr, const Expr *EExpr=nullptr, const Expr *UpExpr=nullptr)
Emit reduction operation for each element of array (required for array sections) LHS op = RHS.
static void emitTargetCallFallback(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn, const OMPExecutableDirective &D, llvm::SmallVectorImpl< llvm::Value * > &CapturedVars, bool RequiresOuterTask, const CapturedStmt &CS, bool OffloadingMandatory, CodeGenFunction &CGF)
static llvm::Value * emitReduceInitFunction(CodeGenModule &CGM, SourceLocation Loc, ReductionCodeGen &RCG, unsigned N)
Emits reduction initializer function:
static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion)
static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy, llvm::PointerUnion< unsigned *, LValue * > Pos, const OMPTaskDataTy::DependData &Data, Address DependenciesArray)
static llvm::Value * emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, const OMPTaskDataTy &Data, QualType PrivatesQTy, ArrayRef< PrivateDataTy > Privates)
Emit a privates mapping function for correct handling of private and firstprivate variables.
static llvm::Value * emitReduceCombFunction(CodeGenModule &CGM, SourceLocation Loc, ReductionCodeGen &RCG, unsigned N, const Expr *ReductionOp, const Expr *LHS, const Expr *RHS, const Expr *PrivateRef)
Emits reduction combiner function:
static RecordDecl * createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef< PrivateDataTy > Privates)
static llvm::Value * getAllocatorVal(CodeGenFunction &CGF, const Expr *Allocator)
Return allocator value from expression, or return a null allocator (default when no allocator specifi...
static llvm::Function * emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, OpenMPDirectiveKind Kind, QualType KmpInt32Ty, QualType KmpTaskTWithPrivatesPtrQTy, QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, QualType SharedsPtrTy, llvm::Function *TaskFunction, llvm::Value *TaskPrivatesMap)
Emit a proxy function which accepts kmp_task_t as the second argument.
static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix, char ISA, StringRef ParSeq, StringRef MangledName, bool OutputBecomesInput, llvm::Function *Fn)
static bool isAllocatableDecl(const VarDecl *VD)
static llvm::Value * getAlignmentValue(CodeGenModule &CGM, const VarDecl *VD)
Return the alignment from an allocate directive if present.
static void emitTargetCallKernelLaunch(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn, const OMPExecutableDirective &D, llvm::SmallVectorImpl< llvm::Value * > &CapturedVars, bool RequiresOuterTask, const CapturedStmt &CS, bool OffloadingMandatory, llvm::PointerIntPair< const Expr *, 2, OpenMPDeviceClauseModifier > Device, llvm::Value *OutlinedFnID, CodeGenFunction::OMPTargetDataInfo &InputInfo, llvm::Value *&MapTypesArray, llvm::Value *&MapNamesArray, llvm::function_ref< llvm::Value *(CodeGenFunction &CGF, const OMPLoopDirective &D)> SizeEmitter, CodeGenFunction &CGF, CodeGenModule &CGM)
static llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind convertCaptureClause(const VarDecl *VD)
static std::tuple< unsigned, unsigned, bool > getNDSWDS(const FunctionDecl *FD, ArrayRef< ParamAttrTy > ParamAttrs)
static const OMPExecutableDirective * getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D)
Check for inner distribute directive.
static std::pair< llvm::Value *, llvm::Value * > getPointerAndSize(CodeGenFunction &CGF, const Expr *E)
static const VarDecl * getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE)
static bool isTrivial(ASTContext &Ctx, const Expr *E)
Checks if the expression is constant or does not have non-trivial function calls.
static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, bool Chunked, bool Ordered)
Map the OpenMP loop schedule to the runtime enumeration.
static void getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS, const Expr **E, int32_t &UpperBound, bool UpperBoundOnly, llvm::Value **CondVal)
Check for a num threads constant value (stored in DefaultVal), or expression (stored in E).
static llvm::Value * emitDeviceID(llvm::PointerIntPair< const Expr *, 2, OpenMPDeviceClauseModifier > Device, CodeGenFunction &CGF)
static const OMPDeclareReductionDecl * getReductionInit(const Expr *ReductionOp)
Check if the combiner is a call to UDR combiner and if it is so return the UDR decl used for reductio...
static bool checkInitIsRequired(CodeGenFunction &CGF, ArrayRef< PrivateDataTy > Privates)
Check if duplication function is required for taskloops.
static bool checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD, ArrayRef< PrivateDataTy > Privates)
Checks if destructor function is required to be generated.
static llvm::TargetRegionEntryInfo getEntryInfoFromPresumedLoc(CodeGenModule &CGM, llvm::OpenMPIRBuilder &OMPBuilder, SourceLocation BeginLoc, llvm::StringRef ParentName="")
static void genMapInfo(MappableExprsHandler &MEHandler, CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, llvm::OpenMPIRBuilder &OMPBuilder, const llvm::DenseSet< CanonicalDeclPtr< const Decl > > &SkippedVarSet=llvm::DenseSet< CanonicalDeclPtr< const Decl > >())
static void emitForStaticInitCall(CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId, llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule, OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, const CGOpenMPRuntime::StaticRTInput &Values)
static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, LValue BaseLV)
static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy)
Builds kmp_depend_info, if it is not built yet, and builds flags type.
static llvm::Constant * emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder, MappableExprsHandler::MappingExprInfo &MapExprs)
Emit a string constant containing the names of the values mapped to the offloading runtime library.
static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy, QualType &FlagsTy)
Builds kmp_depend_info, if it is not built yet, and builds flags type.
static llvm::Value * emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, const OMPExecutableDirective &D, QualType KmpTaskTWithPrivatesPtrQTy, const RecordDecl *KmpTaskTWithPrivatesQTyRD, const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy, QualType SharedsPtrTy, const OMPTaskDataTy &Data, ArrayRef< PrivateDataTy > Privates, bool WithLastIter)
Emit task_dup function (for initialization of private/firstprivate/lastprivate vars and last_iter fla...
static llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseKind convertDeviceClause(const VarDecl *VD)
static llvm::Value * emitReduceFiniFunction(CodeGenModule &CGM, SourceLocation Loc, ReductionCodeGen &RCG, unsigned N)
Emits reduction finalizer function:
static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, QualType Type, bool EmitDeclareReductionInit, const Expr *Init, const OMPDeclareReductionDecl *DRD, Address SrcAddr=Address::invalid())
Emit initialization of arrays of complex types.
static bool getAArch64PBV(QualType QT, ASTContext &C)
Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C)
Computes the lane size (LS) of a return type or of an input parameter, as defined by LS(P) in 3....
static void EmitDoacrossOrdered(CodeGenFunction &CGF, CodeGenModule &CGM, const T *C, llvm::Value *ULoc, llvm::Value *ThreadID)
static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K)
Translates internal dependency kind into the runtime kind.
static void emitTargetCallElse(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn, const OMPExecutableDirective &D, llvm::SmallVectorImpl< llvm::Value * > &CapturedVars, bool RequiresOuterTask, const CapturedStmt &CS, bool OffloadingMandatory, CodeGenFunction &CGF)
static llvm::Function * emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, const Expr *CombinerInitializer, const VarDecl *In, const VarDecl *Out, bool IsCombiner)
static void emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, const llvm::APSInt &VLENVal, ArrayRef< ParamAttrTy > ParamAttrs, OMPDeclareSimdDeclAttr::BranchStateTy State)
static void emitReductionCombiner(CodeGenFunction &CGF, const Expr *ReductionOp)
Emit reduction combiner.
static std::string mangleVectorParameters(ArrayRef< ParamAttrTy > ParamAttrs)
Mangle the parameter part of the vector function name according to their OpenMP classification.
static std::string generateUniqueName(CodeGenModule &CGM, llvm::StringRef Prefix, const Expr *Ref)
static llvm::Function * emitParallelOrTeamsOutlinedFunction(CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen)
static void emitAArch64DeclareSimdFunction(CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN, ArrayRef< ParamAttrTy > ParamAttrs, OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName, char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc)
Emit vector function attributes for AArch64, as defined in the AAVFABI.
static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, unsigned Index, const VarDecl *Var)
Given an array of pointers to variables, project the address of a given variable.
static llvm::Value * emitDynCGGroupMem(const OMPExecutableDirective &D, CodeGenFunction &CGF)
static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice)
static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask, StringRef Prefix, char ISA, StringRef ParSeq, StringRef MangledName, bool OutputBecomesInput, llvm::Function *Fn)
static FieldDecl * addFieldToRecordDecl(ASTContext &C, DeclContext *DC, QualType FieldTy)
static ValueDecl * getDeclFromThisExpr(const Expr *E)
static void genMapInfoForCaptures(MappableExprsHandler &MEHandler, CodeGenFunction &CGF, const CapturedStmt &CS, llvm::SmallVectorImpl< llvm::Value * > &CapturedVars, llvm::OpenMPIRBuilder &OMPBuilder, llvm::DenseSet< CanonicalDeclPtr< const Decl > > &MappedVarSet, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo)
static RecordDecl * createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, QualType KmpInt32Ty, QualType KmpRoutineEntryPointerQTy)
static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule, OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2)
static bool getAArch64MTV(QualType QT, ParamKindTy Kind)
Maps To Vector (MTV), as defined in 4.1.1 of the AAVFABI (2021Q1).
const Decl * D
enum clang::sema::@1840::IndirectLocalPathEntry::EntryKind Kind
Expr * E
int Priority
Definition: Format.cpp:3181
#define X(type, name)
Definition: Value.h:145
#define SM(sm)
Definition: OffloadArch.cpp:16
This file defines OpenMP AST classes for clauses.
Defines some OpenMP-specific enums and functions.
SourceRange Range
Definition: SemaObjC.cpp:753
SourceLocation Loc
Definition: SemaObjC.cpp:754
Defines the SourceManager interface.
const char * Data
This file defines OpenMP AST classes for executable directives and clauses.
SourceLocation Begin
__DEVICE__ int max(int __a, int __b)
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
Definition: ASTContext.h:188
SourceManager & getSourceManager()
Definition: ASTContext.h:801
const ConstantArrayType * getAsConstantArrayType(QualType T) const
Definition: ASTContext.h:3056
CharUnits getTypeAlignInChars(QualType T) const
Return the ABI-specified alignment of a (complete) type T, in characters.
const ASTRecordLayout & getASTRecordLayout(const RecordDecl *D) const
Get or compute information about the layout of the specified record (struct/union/class) D,...
bool hasSameType(QualType T1, QualType T2) const
Determine whether the given types T1 and T2 are equivalent.
Definition: ASTContext.h:2867
QualType getPointerType(QualType T) const
Return the uniqued reference to the type for a pointer to the specified type.
CanQualType VoidPtrTy
Definition: ASTContext.h:1249
QualType getConstantArrayType(QualType EltTy, const llvm::APInt &ArySize, const Expr *SizeExpr, ArraySizeModifier ASM, unsigned IndexTypeQuals) const
Return the unique reference to the type for a constant array of the specified element type.
const LangOptions & getLangOpts() const
Definition: ASTContext.h:894
CanQualType BoolTy
Definition: ASTContext.h:1223
QualType getIntTypeForBitwidth(unsigned DestWidth, unsigned Signed) const
getIntTypeForBitwidth - sets integer QualTy according to specified details: bitwidth,...
CharUnits getDeclAlign(const Decl *D, bool ForAlignof=false) const
Return a conservative estimate of the alignment of the specified decl D.
int64_t toBits(CharUnits CharSize) const
Convert a size in characters to a size in bits.
const ArrayType * getAsArrayType(QualType T) const
Type Query functions.
uint64_t getTypeSize(QualType T) const
Return the size of the specified (complete) type T, in bits.
Definition: ASTContext.h:2625
CharUnits getTypeSizeInChars(QualType T) const
Return the size of the specified (complete) type T, in characters.
CanQualType VoidTy
Definition: ASTContext.h:1222
const VariableArrayType * getAsVariableArrayType(QualType T) const
Definition: ASTContext.h:3059
QualType getSizeType() const
Return the unique type for "size_t" (C99 7.17), defined in <stddef.h>.
const TargetInfo & getTargetInfo() const
Definition: ASTContext.h:859
unsigned getTypeAlign(QualType T) const
Return the ABI-specified alignment of a (complete) type T, in bits.
Definition: ASTContext.h:2656
ASTRecordLayout - This class contains layout information for one RecordDecl, which is a struct/union/...
Definition: RecordLayout.h:38
CharUnits getSize() const
getSize - Get the record size in characters.
Definition: RecordLayout.h:194
uint64_t getFieldOffset(unsigned FieldNo) const
getFieldOffset - Get the offset of the given field index, in bits.
Definition: RecordLayout.h:201
CharUnits getNonVirtualSize() const
getNonVirtualSize - Get the non-virtual size (in chars) of an object, which is the size of the object...
Definition: RecordLayout.h:211
static QualType getBaseOriginalType(const Expr *Base)
Return original type of the base expression for array section.
Definition: Expr.cpp:5224
Represents an array type, per C99 6.7.5.2 - Array Declarators.
Definition: TypeBase.h:3738
Attr - This represents one attribute.
Definition: Attr.h:44
Represents a C++ constructor within a class.
Definition: DeclCXX.h:2604
Represents a C++ destructor within a class.
Definition: DeclCXX.h:2869
Represents a static or instance method of a struct/union/class.
Definition: DeclCXX.h:2129
const CXXRecordDecl * getParent() const
Return the parent of this method declaration, which is the class in which this method is defined.
Definition: DeclCXX.h:2255
QualType getFunctionObjectParameterType() const
Definition: DeclCXX.h:2279
Represents a C++ struct/union/class.
Definition: DeclCXX.h:258
base_class_range bases()
Definition: DeclCXX.h:608
bool isLambda() const
Determine whether this class describes a lambda function object.
Definition: DeclCXX.h:1018
void getCaptureFields(llvm::DenseMap< const ValueDecl *, FieldDecl * > &Captures, FieldDecl *&ThisCapture) const
For a closure type, retrieve the mapping from captured variables and this to the non-static data memb...
Definition: DeclCXX.cpp:1784
unsigned getNumBases() const
Retrieves the number of base classes of this class.
Definition: DeclCXX.h:602
base_class_range vbases()
Definition: DeclCXX.h:625
capture_const_range captures() const
Definition: DeclCXX.h:1097
ctor_range ctors() const
Definition: DeclCXX.h:670
CXXDestructorDecl * getDestructor() const
Returns the destructor decl for this class.
Definition: DeclCXX.cpp:2121
CanProxy< U > castAs() const
A wrapper class around a pointer that always points to its canonical declaration.
Definition: Redeclarable.h:346
Describes the capture of either a variable, or 'this', or variable-length array type.
Definition: Stmt.h:3899
bool capturesVariableByCopy() const
Determine whether this capture handles a variable by copy.
Definition: Stmt.h:3933
VarDecl * getCapturedVar() const
Retrieve the declaration of the variable being captured.
Definition: Stmt.cpp:1349
bool capturesVariableArrayType() const
Determine whether this capture handles a variable-length array type.
Definition: Stmt.h:3939
bool capturesThis() const
Determine whether this capture handles the C++ 'this' pointer.
Definition: Stmt.h:3927
bool capturesVariable() const
Determine whether this capture handles a variable (by reference).
Definition: Stmt.h:3930
This captures a statement into a function.
Definition: Stmt.h:3886
capture_iterator capture_end() const
Retrieve an iterator pointing past the end of the sequence of captures.
Definition: Stmt.h:4037
const RecordDecl * getCapturedRecordDecl() const
Retrieve the record declaration for captured variables.
Definition: Stmt.h:4007
Stmt * getCapturedStmt()
Retrieve the statement being captured.
Definition: Stmt.h:3990
bool capturesVariable(const VarDecl *Var) const
True if this variable has been captured.
Definition: Stmt.cpp:1475
capture_iterator capture_begin()
Retrieve an iterator pointing to the first capture.
Definition: Stmt.h:4032
capture_range captures()
Definition: Stmt.h:4024
CharUnits - This is an opaque type for sizes expressed in character units.
Definition: CharUnits.h:38
bool isZero() const
isZero - Test whether the quantity equals zero.
Definition: CharUnits.h:122
llvm::Align getAsAlign() const
getAsAlign - Returns Quantity as a valid llvm::Align, Beware llvm::Align assumes power of two 8-bit b...
Definition: CharUnits.h:189
QuantityType getQuantity() const
getQuantity - Get the raw integer representation of this quantity.
Definition: CharUnits.h:185
CharUnits alignmentOfArrayElement(CharUnits elementSize) const
Given that this is the alignment of the first element of an array, return the minimum alignment of an...
Definition: CharUnits.h:214
static CharUnits fromQuantity(QuantityType Quantity)
fromQuantity - Construct a CharUnits quantity from a raw integer type.
Definition: CharUnits.h:63
CharUnits alignTo(const CharUnits &Align) const
alignTo - Returns the next integer (mod 2**64) that is greater than or equal to this quantity and is ...
Definition: CharUnits.h:201
Like RawAddress, an abstract representation of an aligned address, but the pointer contained in this ...
Definition: Address.h:128
static Address invalid()
Definition: Address.h:176
llvm::Value * emitRawPointer(CodeGenFunction &CGF) const
Return the pointer contained in this class after authenticating it and adding offset to it if necessa...
Definition: Address.h:253
CharUnits getAlignment() const
Definition: Address.h:194
llvm::Type * getElementType() const
Return the type of the values stored in this address.
Definition: Address.h:209
Address withPointer(llvm::Value *NewPointer, KnownNonNull_t IsKnownNonNull) const
Return address with different pointer, but same element type and alignment.
Definition: Address.h:261
Address withElementType(llvm::Type *ElemTy) const
Return address with different element type, but same pointer and alignment.
Definition: Address.h:276
Address withAlignment(CharUnits NewAlignment) const
Return address with different alignment, but same pointer and element type.
Definition: Address.h:269
bool isValid() const
Definition: Address.h:177
llvm::PointerType * getType() const
Return the type of the pointer value.
Definition: Address.h:204
static ApplyDebugLocation CreateArtificial(CodeGenFunction &CGF)
Apply TemporaryLocation if it is valid.
Definition: CGDebugInfo.h:946
static ApplyDebugLocation CreateDefaultArtificial(CodeGenFunction &CGF, SourceLocation TemporaryLocation)
Apply TemporaryLocation if it is valid.
Definition: CGDebugInfo.h:953
static ApplyDebugLocation CreateEmpty(CodeGenFunction &CGF)
Set the IRBuilder to not attach debug locations.
Definition: CGDebugInfo.h:963
CGBlockInfo - Information to generate a block literal.
Definition: CGBlocks.h:157
llvm::StoreInst * CreateStore(llvm::Value *Val, Address Addr, bool IsVolatile=false)
Definition: CGBuilder.h:140
Address CreateGEP(CodeGenFunction &CGF, Address Addr, llvm::Value *Index, const llvm::Twine &Name="")
Definition: CGBuilder.h:296
Address CreatePointerBitCastOrAddrSpaceCast(Address Addr, llvm::Type *Ty, llvm::Type *ElementTy, const llvm::Twine &Name="")
Definition: CGBuilder.h:207
Address CreateConstArrayGEP(Address Addr, uint64_t Index, const llvm::Twine &Name="")
Given addr = [n x T]* ... produce name = getelementptr inbounds addr, i64 0, i64 index where i64 is a...
Definition: CGBuilder.h:245
llvm::LoadInst * CreateLoad(Address Addr, const llvm::Twine &Name="")
Definition: CGBuilder.h:112
llvm::CallInst * CreateMemCpy(Address Dest, Address Src, llvm::Value *Size, bool IsVolatile=false)
Definition: CGBuilder.h:369
Address CreateConstGEP(Address Addr, uint64_t Index, const llvm::Twine &Name="")
Given addr = T* ... produce name = getelementptr inbounds addr, i64 index where i64 is actually the t...
Definition: CGBuilder.h:282
MangleContext & getMangleContext()
Gets the mangle context.
Definition: CGCXXABI.h:113
CGFunctionInfo - Class to encapsulate the information about a function definition.
Manages list of lastprivate conditional decls for the specified directive.
static LastprivateConditionalRAII disable(CodeGenFunction &CGF, const OMPExecutableDirective &S)
NontemporalDeclsRAII(CodeGenModule &CGM, const OMPLoopDirective &S)
Struct that keeps all the relevant information that should be kept throughout a 'target data' region.
llvm::DenseMap< const ValueDecl *, llvm::Value * > CaptureDeviceAddrMap
Map between the a declaration of a capture and the corresponding new llvm address where the runtime r...
UntiedTaskLocalDeclsRAII(CodeGenFunction &CGF, const llvm::MapVector< CanonicalDeclPtr< const VarDecl >, std::pair< Address, Address > > &LocalVars)
virtual Address emitThreadIDAddress(CodeGenFunction &CGF, SourceLocation Loc)
Emits address of the word in a memory where current thread id is stored.
llvm::StringSet ThreadPrivateWithDefinition
Set of threadprivate variables with the generated initializer.
virtual void emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D, llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const Expr *IfCond, const OMPTaskDataTy &Data)
Emit task region for the task directive.
void createOffloadEntriesAndInfoMetadata()
Creates all the offload entries in the current compilation unit along with the associated metadata.
const Expr * getNumTeamsExprForTargetDirective(CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &MinTeamsVal, int32_t &MaxTeamsVal)
Emit the number of teams for a target directive.
virtual Address getAddrOfThreadPrivate(CodeGenFunction &CGF, const VarDecl *VD, Address VDAddr, SourceLocation Loc)
Returns address of the threadprivate variable for the current thread.
void emitDeferredTargetDecls() const
Emit deferred declare target variables marked for deferred emission.
virtual llvm::Value * emitForNext(CodeGenFunction &CGF, SourceLocation Loc, unsigned IVSize, bool IVSigned, Address IL, Address LB, Address UB, Address ST)
Call __kmpc_dispatch_next( ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, kmp_int[32|64] *p_lowe...
bool markAsGlobalTarget(GlobalDecl GD)
Marks the declaration as already emitted for the device code and returns true, if it was marked alrea...
virtual void emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef< llvm::Value * > CapturedVars, const Expr *IfCond, llvm::Value *NumThreads, OpenMPNumThreadsClauseModifier NumThreadsModifier=OMPC_NUMTHREADS_unknown, OpenMPSeverityClauseKind Severity=OMPC_SEVERITY_fatal, const Expr *Message=nullptr)
Emits code for parallel or serial call of the OutlinedFn with variables captured in a record which ad...
virtual void emitTargetDataStandAloneCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, const Expr *Device)
Emit the data mapping/movement code associated with the directive D that should be of the form 'targe...
QualType SavedKmpTaskloopTQTy
Saved kmp_task_t for taskloop-based directive.
virtual void emitSingleRegion(CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, SourceLocation Loc, ArrayRef< const Expr * > CopyprivateVars, ArrayRef< const Expr * > DestExprs, ArrayRef< const Expr * > SrcExprs, ArrayRef< const Expr * > AssignmentOps)
Emits a single region.
virtual bool emitTargetGlobal(GlobalDecl GD)
Emit the global GD if it is meaningful for the target.
void setLocThreadIdInsertPt(CodeGenFunction &CGF, bool AtCurrentPoint=false)
std::string getOutlinedHelperName(StringRef Name) const
Get the function name of an outlined region.
bool HasEmittedDeclareTargetRegion
Flag for keeping track of weather a device routine has been emitted.
llvm::Constant * getOrCreateThreadPrivateCache(const VarDecl *VD)
If the specified mangled name is not in the module, create and return threadprivate cache object.
virtual Address getTaskReductionItem(CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *ReductionsPtr, LValue SharedLVal)
Get the address of void * type of the privatue copy of the reduction item specified by the SharedLVal...
virtual void emitForDispatchDeinit(CodeGenFunction &CGF, SourceLocation Loc)
This is used for non static scheduled types and when the ordered clause is present on the loop constr...
void emitCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee Callee, ArrayRef< llvm::Value * > Args={}) const
Emits Callee function call with arguments Args with location Loc.
virtual void getDefaultScheduleAndChunk(CodeGenFunction &CGF, const OMPLoopDirective &S, OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const
Choose default schedule type and chunk value for the schedule clause.
virtual std::pair< llvm::Function *, llvm::Function * > getUserDefinedReduction(const OMPDeclareReductionDecl *D)
Get combiner/initializer for the specified user-defined reduction, if any.
virtual bool isGPU() const
Returns true if the current target is a GPU.
static const Stmt * getSingleCompoundChild(ASTContext &Ctx, const Stmt *Body)
Checks if the Body is the CompoundStmt and returns its child statement iff there is only one that is ...
virtual void emitDeclareTargetFunction(const FunctionDecl *FD, llvm::GlobalValue *GV)
Emit code for handling declare target functions in the runtime.
bool HasRequiresUnifiedSharedMemory
Flag for keeping track of weather a requires unified_shared_memory directive is present.
llvm::Value * emitUpdateLocation(CodeGenFunction &CGF, SourceLocation Loc, unsigned Flags=0, bool EmitLoc=false)
Emits object of ident_t type with info for source location.
bool isLocalVarInUntiedTask(CodeGenFunction &CGF, const VarDecl *VD) const
Returns true if the variable is a local variable in untied task.
virtual void emitTeamsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef< llvm::Value * > CapturedVars)
Emits code for teams call of the OutlinedFn with variables captured in a record which address is stor...
virtual void emitCancellationPointCall(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind CancelRegion)
Emit code for 'cancellation point' construct.
virtual llvm::Function * emitThreadPrivateVarDefinition(const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, CodeGenFunction *CGF=nullptr)
Emit a code for initialization of threadprivate variable.
virtual ConstantAddress getAddrOfDeclareTargetVar(const VarDecl *VD)
Returns the address of the variable marked as declare target with link clause OR as declare target wi...
llvm::MapVector< CanonicalDeclPtr< const VarDecl >, std::pair< Address, Address > > UntiedLocalVarsAddressesMap
llvm::Function * getOrCreateUserDefinedMapperFunc(const OMPDeclareMapperDecl *D)
Get the function for the specified user-defined mapper.
OpenMPLocThreadIDMapTy OpenMPLocThreadIDMap
virtual void functionFinished(CodeGenFunction &CGF)
Cleans up references to the objects in finished function.
virtual llvm::Function * emitTeamsOutlinedFunction(CodeGenFunction &CGF, const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen)
Emits outlined function for the specified OpenMP teams directive D.
QualType KmpTaskTQTy
Type typedef struct kmp_task { void * shareds; /‍**< pointer to block of pointers to shared vars ‍/ k...
llvm::OpenMPIRBuilder OMPBuilder
An OpenMP-IR-Builder instance.
virtual void emitDoacrossInit(CodeGenFunction &CGF, const OMPLoopDirective &D, ArrayRef< Expr * > NumIterations)
Emit initialization for doacross loop nesting support.
virtual void adjustTargetSpecificDataForLambdas(CodeGenFunction &CGF, const OMPExecutableDirective &D) const
Adjust some parameters for the target-based directives, like addresses of the variables captured by r...
virtual void emitTargetDataCalls(CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, const Expr *Device, const RegionCodeGenTy &CodeGen, CGOpenMPRuntime::TargetDataInfo &Info)
Emit the target data mapping code associated with D.
virtual unsigned getDefaultLocationReserved2Flags() const
Returns additional flags that can be stored in reserved_2 field of the default location.
virtual Address getParameterAddress(CodeGenFunction &CGF, const VarDecl *NativeParam, const VarDecl *TargetParam) const
Gets the address of the native argument basing on the address of the target-specific parameter.
void emitUsesAllocatorsFini(CodeGenFunction &CGF, const Expr *Allocator)
Destroys user defined allocators specified in the uses_allocators clause.
QualType KmpTaskAffinityInfoTy
Type typedef struct kmp_task_affinity_info { kmp_intptr_t base_addr; size_t len; struct { bool flag1 ...
llvm::SmallVector< NontemporalDeclsSet, 4 > NontemporalDeclsStack
Stack for list of declarations in current context marked as nontemporal.
void emitPrivateReduction(CodeGenFunction &CGF, SourceLocation Loc, const Expr *Privates, const Expr *LHSExprs, const Expr *RHSExprs, const Expr *ReductionOps)
Emits code for private variable reduction.
llvm::Value * emitNumTeamsForTargetDirective(CodeGenFunction &CGF, const OMPExecutableDirective &D)
virtual void emitTargetOutlinedFunctionHelper(const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, const RegionCodeGenTy &CodeGen)
Helper to emit outlined function for 'target' directive.
void scanForTargetRegionsFunctions(const Stmt *S, StringRef ParentName)
Start scanning from statement S and emit all target regions found along the way.
SmallVector< llvm::Value *, 4 > emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy, const OMPTaskDataTy::DependData &Data)
virtual void emitTaskgroupRegion(CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, SourceLocation Loc)
Emit a taskgroup region.
virtual llvm::Value * emitMessageClause(CodeGenFunction &CGF, const Expr *Message)
llvm::DenseMap< llvm::Function *, llvm::DenseMap< CanonicalDeclPtr< const Decl >, std::tuple< QualType, const FieldDecl *, const FieldDecl *, LValue > > > LastprivateConditionalToTypes
Maps local variables marked as lastprivate conditional to their internal types.
virtual bool emitTargetGlobalVariable(GlobalDecl GD)
Emit the global variable if it is a valid device global variable.
virtual void emitNumTeamsClause(CodeGenFunction &CGF, const Expr *NumTeams, const Expr *ThreadLimit, SourceLocation Loc)
Emits call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams,...
bool hasRequiresUnifiedSharedMemory() const
Return whether the unified_shared_memory has been specified.
virtual Address getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, QualType VarType, StringRef Name)
Creates artificial threadprivate variable with name Name and type VarType.
void emitUserDefinedMapper(const OMPDeclareMapperDecl *D, CodeGenFunction *CGF=nullptr)
Emit the function for the user defined mapper construct.
bool HasEmittedTargetRegion
Flag for keeping track of weather a target region has been emitted.
void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy, LValue PosLVal, const OMPTaskDataTy::DependData &Data, Address DependenciesArray)
std::string getReductionFuncName(StringRef Name) const
Get the function name of a reduction function.
virtual void processRequiresDirective(const OMPRequiresDecl *D)
Perform check on requires decl to ensure that target architecture supports unified addressing.
llvm::DenseSet< CanonicalDeclPtr< const Decl > > AlreadyEmittedTargetDecls
List of the emitted declarations.
virtual llvm::Value * emitTaskReductionInit(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef< const Expr * > LHSExprs, ArrayRef< const Expr * > RHSExprs, const OMPTaskDataTy &Data)
Emit a code for initialization of task reduction clause.
llvm::Value * getThreadID(CodeGenFunction &CGF, SourceLocation Loc)
Gets thread id value for the current thread.
void emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal, OpenMPDependClauseKind NewDepKind, SourceLocation Loc)
Updates the dependency kind in the specified depobj object.
virtual void emitLastprivateConditionalFinalUpdate(CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD, SourceLocation Loc)
Gets the address of the global copy used for lastprivate conditional update, if any.
virtual void emitErrorCall(CodeGenFunction &CGF, SourceLocation Loc, Expr *ME, bool IsFatal)
Emit __kmpc_error call for error directive extern void __kmpc_error(ident_t *loc, int severity,...
void clearLocThreadIdInsertPt(CodeGenFunction &CGF)
virtual void emitTaskyieldCall(CodeGenFunction &CGF, SourceLocation Loc)
Emits code for a taskyield directive.
std::string getName(ArrayRef< StringRef > Parts) const
Get the platform-specific name separator.
void computeMinAndMaxThreadsAndTeams(const OMPExecutableDirective &D, CodeGenFunction &CGF, llvm::OpenMPIRBuilder::TargetKernelDefaultAttrs &Attrs)
Helper to determine the min/max number of threads/teams for D.
virtual void emitFlush(CodeGenFunction &CGF, ArrayRef< const Expr * > Vars, SourceLocation Loc, llvm::AtomicOrdering AO)
Emit flush of the variables specified in 'omp flush' directive.
virtual void emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPTaskDataTy &Data)
Emit code for 'taskwait' directive.
virtual void emitProcBindClause(CodeGenFunction &CGF, llvm::omp::ProcBindKind ProcBind, SourceLocation Loc)
Emit call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, int proc_bind) to generat...
void emitLastprivateConditionalUpdate(CodeGenFunction &CGF, LValue IVLVal, StringRef UniqueDeclName, LValue LVal, SourceLocation Loc)
Emit update for lastprivate conditional data.
virtual void emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const Expr *IfCond, const OMPTaskDataTy &Data)
Emit task region for the taskloop directive.
virtual void emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind Kind, bool EmitChecks=true, bool ForceSimpleCall=false)
Emit an implicit/explicit barrier for OpenMP threads.
static unsigned getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind)
Returns default flags for the barriers depending on the directive, for which this barier is going to ...
virtual bool emitTargetFunctions(GlobalDecl GD)
Emit the target regions enclosed in GD function definition or the function itself in case it is a val...
TaskResultTy emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D, llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const OMPTaskDataTy &Data)
Emit task region for the task directive.
llvm::Value * emitTargetNumIterationsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, llvm::function_ref< llvm::Value *(CodeGenFunction &CGF, const OMPLoopDirective &D)> SizeEmitter)
Return the trip count of loops associated with constructs / 'target teams distribute' and 'teams dist...
llvm::StringMap< llvm::AssertingVH< llvm::GlobalVariable >, llvm::BumpPtrAllocator > InternalVars
An ordered map of auto-generated variables to their unique names.
virtual void emitDistributeStaticInit(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values)
llvm::SmallVector< UntiedLocalVarsAddressesMap, 4 > UntiedLocalVarsStack
virtual void emitForStaticFinish(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind)
Call the appropriate runtime routine to notify that we finished all the work with current loop.
virtual void emitThreadLimitClause(CodeGenFunction &CGF, const Expr *ThreadLimit, SourceLocation Loc)
Emits call to void __kmpc_set_thread_limit(ident_t *loc, kmp_int32 global_tid, kmp_int32 thread_limit...
void emitIfClause(CodeGenFunction &CGF, const Expr *Cond, const RegionCodeGenTy &ThenGen, const RegionCodeGenTy &ElseGen)
Emits code for OpenMP 'if' clause using specified CodeGen function.
Address emitDepobjDependClause(CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies, SourceLocation Loc)
Emits list of dependecies based on the provided data (array of dependence/expression pairs) for depob...
virtual llvm::Value * emitSeverityClause(OpenMPSeverityClauseKind Severity)
bool isNontemporalDecl(const ValueDecl *VD) const
Checks if the VD variable is marked as nontemporal declaration in current context.
virtual llvm::Function * emitParallelOutlinedFunction(CodeGenFunction &CGF, const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen)
Emits outlined function for the specified OpenMP parallel directive D.
virtual void emitNumThreadsClause(CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc, OpenMPNumThreadsClauseModifier Modifier=OMPC_NUMTHREADS_unknown, OpenMPSeverityClauseKind Severity=OMPC_SEVERITY_fatal, const Expr *Message=nullptr)
Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads)...
const Expr * getNumThreadsExprForTargetDirective(CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &UpperBound, bool UpperBoundOnly, llvm::Value **CondExpr=nullptr, const Expr **ThreadLimitExpr=nullptr)
Check for a number of threads upper bound constant value (stored in UpperBound), or expression (retur...
llvm::SmallVector< LastprivateConditionalData, 4 > LastprivateConditionalStack
Stack for list of addresses of declarations in current context marked as lastprivate conditional.
virtual void emitForStaticInit(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values)
Call the appropriate runtime routine to initialize it before start of loop.
virtual void emitDeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn)
Marks function Fn with properly mangled versions of vector functions.
llvm::AtomicOrdering getDefaultMemoryOrdering() const
Gets default memory ordering as specified in requires directive.
llvm::SmallDenseSet< CanonicalDeclPtr< const Decl > > NontemporalDeclsSet
virtual bool isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, bool Chunked) const
Check if the specified ScheduleKind is static non-chunked.
llvm::Value * getCriticalRegionLock(StringRef CriticalName)
Returns corresponding lock object for the specified critical region name.
virtual void emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, const Expr *IfCond, OpenMPDirectiveKind CancelRegion)
Emit code for 'cancel' construct.
QualType SavedKmpTaskTQTy
Saved kmp_task_t for task directive.
virtual void emitMasterRegion(CodeGenFunction &CGF, const RegionCodeGenTy &MasterOpGen, SourceLocation Loc)
Emits a master region.
virtual llvm::Function * emitTaskOutlinedFunction(const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, const VarDecl *PartIDVar, const VarDecl *TaskTVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, bool Tied, unsigned &NumberOfParts)
Emits outlined function for the OpenMP task directive D.
llvm::DenseMap< llvm::Function *, unsigned > FunctionToUntiedTaskStackMap
Maps function to the position of the untied task locals stack.
void emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal, SourceLocation Loc)
Emits the code to destroy the dependency object provided in depobj directive.
virtual void emitTaskReductionFixups(CodeGenFunction &CGF, SourceLocation Loc, ReductionCodeGen &RCG, unsigned N)
Required to resolve existing problems in the runtime.
llvm::ArrayType * KmpCriticalNameTy
Type kmp_critical_name, originally defined as typedef kmp_int32 kmp_critical_name[8];.
virtual void emitDoacrossOrdered(CodeGenFunction &CGF, const OMPDependClause *C)
Emit code for doacross ordered directive with 'depend' clause.
llvm::DenseMap< const OMPDeclareMapperDecl *, llvm::Function * > UDMMap
Map from the user-defined mapper declaration to its corresponding functions.
virtual void checkAndEmitLastprivateConditional(CodeGenFunction &CGF, const Expr *LHS)
Checks if the provided LVal is lastprivate conditional and emits the code to update the value of the ...
std::pair< llvm::Value *, LValue > getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal, SourceLocation Loc)
Returns the number of the elements and the address of the depobj dependency array.
llvm::SmallDenseSet< const VarDecl * > DeferredGlobalVariables
List of variables that can become declare target implicitly and, thus, must be emitted.
void emitUsesAllocatorsInit(CodeGenFunction &CGF, const Expr *Allocator, const Expr *AllocatorTraits)
Initializes user defined allocators specified in the uses_allocators clauses.
llvm::Type * KmpRoutineEntryPtrTy
Type typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *);.
llvm::Type * getIdentTyPointerTy()
Returns pointer to ident_t type.
void emitSingleReductionCombiner(CodeGenFunction &CGF, const Expr *ReductionOp, const Expr *PrivateRef, const DeclRefExpr *LHS, const DeclRefExpr *RHS)
Emits single reduction combiner.
llvm::OpenMPIRBuilder & getOMPBuilder()
virtual void emitTargetOutlinedFunction(const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, const RegionCodeGenTy &CodeGen)
Emit outilined function for 'target' directive.
virtual void emitCriticalRegion(CodeGenFunction &CGF, StringRef CriticalName, const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, const Expr *Hint=nullptr)
Emits a critical region.
virtual void emitForOrderedIterationEnd(CodeGenFunction &CGF, SourceLocation Loc, unsigned IVSize, bool IVSigned)
Call the appropriate runtime routine to notify that we finished iteration of the ordered loop with th...
virtual void emitOutlinedFunctionCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, ArrayRef< llvm::Value * > Args={}) const
Emits call of the outlined function with the provided arguments, translating these arguments to corre...
llvm::Value * emitNumThreadsForTargetDirective(CodeGenFunction &CGF, const OMPExecutableDirective &D)
Emit an expression that denotes the number of threads a target region shall use.
void emitThreadPrivateVarInit(CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor, llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc)
Emits initialization code for the threadprivate variables.
virtual void emitUserDefinedReduction(CodeGenFunction *CGF, const OMPDeclareReductionDecl *D)
Emit code for the specified user defined reduction construct.
virtual void checkAndEmitSharedLastprivateConditional(CodeGenFunction &CGF, const OMPExecutableDirective &D, const llvm::DenseSet< CanonicalDeclPtr< const VarDecl > > &IgnoredDecls)
Checks if the lastprivate conditional was updated in inner region and writes the value.
QualType KmpDimTy
struct kmp_dim { // loop bounds info casted to kmp_int64 kmp_int64 lo; // lower kmp_int64 up; // uppe...
virtual void emitInlinedDirective(CodeGenFunction &CGF, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, bool HasCancel=false)
Emit code for the directive that does not require outlining.
virtual void registerTargetGlobalVariable(const VarDecl *VD, llvm::Constant *Addr)
Checks if the provided global decl GD is a declare target variable and registers it when emitting cod...
virtual void emitFunctionProlog(CodeGenFunction &CGF, const Decl *D)
Emits OpenMP-specific function prolog.
void emitKmpRoutineEntryT(QualType KmpInt32Ty)
Build type kmp_routine_entry_t (if not built yet).
virtual bool isStaticChunked(OpenMPScheduleClauseKind ScheduleKind, bool Chunked) const
Check if the specified ScheduleKind is static chunked.
virtual void emitTargetCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, llvm::PointerIntPair< const Expr *, 2, OpenMPDeviceClauseModifier > Device, llvm::function_ref< llvm::Value *(CodeGenFunction &CGF, const OMPLoopDirective &D)> SizeEmitter)
Emit the target offloading code associated with D.
virtual bool hasAllocateAttributeForGlobalVar(const VarDecl *VD, LangAS &AS)
Checks if the variable has associated OMPAllocateDeclAttr attribute with the predefined allocator and...
llvm::AtomicOrdering RequiresAtomicOrdering
Atomic ordering from the omp requires directive.
virtual void emitReduction(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef< const Expr * > Privates, ArrayRef< const Expr * > LHSExprs, ArrayRef< const Expr * > RHSExprs, ArrayRef< const Expr * > ReductionOps, ReductionOptionsTy Options)
Emit a code for reduction clause.
std::pair< llvm::Value *, Address > emitDependClause(CodeGenFunction &CGF, ArrayRef< OMPTaskDataTy::DependData > Dependencies, SourceLocation Loc)
Emits list of dependecies based on the provided data (array of dependence/expression pairs).
llvm::StringMap< llvm::WeakTrackingVH > EmittedNonTargetVariables
List of the global variables with their addresses that should not be emitted for the target.
virtual bool isDynamic(OpenMPScheduleClauseKind ScheduleKind) const
Check if the specified ScheduleKind is dynamic.
Address emitLastprivateConditionalInit(CodeGenFunction &CGF, const VarDecl *VD)
Create specialized alloca to handle lastprivate conditionals.
virtual void emitOrderedRegion(CodeGenFunction &CGF, const RegionCodeGenTy &OrderedOpGen, SourceLocation Loc, bool IsThreads)
Emit an ordered region.
virtual Address getAddressOfLocalVariable(CodeGenFunction &CGF, const VarDecl *VD)
Gets the OpenMP-specific address of the local variable.
virtual void emitTaskReductionFini(CodeGenFunction &CGF, SourceLocation Loc, bool IsWorksharingReduction)
Emits the following code for reduction clause with task modifier:
virtual void emitMaskedRegion(CodeGenFunction &CGF, const RegionCodeGenTy &MaskedOpGen, SourceLocation Loc, const Expr *Filter=nullptr)
Emits a masked region.
QualType KmpDependInfoTy
Type typedef struct kmp_depend_info { kmp_intptr_t base_addr; size_t len; struct { bool in:1; bool ou...
llvm::Function * emitReductionFunction(StringRef ReducerName, SourceLocation Loc, llvm::Type *ArgsElemType, ArrayRef< const Expr * > Privates, ArrayRef< const Expr * > LHSExprs, ArrayRef< const Expr * > RHSExprs, ArrayRef< const Expr * > ReductionOps)
Emits reduction function.
virtual void emitForDispatchInit(CodeGenFunction &CGF, SourceLocation Loc, const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, bool Ordered, const DispatchRTInput &DispatchValues)
Call the appropriate runtime routine to initialize it before start of loop.
Address getTaskReductionItem(CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *ReductionsPtr, LValue SharedLVal) override
Get the address of void * type of the privatue copy of the reduction item specified by the SharedLVal...
void emitCriticalRegion(CodeGenFunction &CGF, StringRef CriticalName, const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, const Expr *Hint=nullptr) override
Emits a critical region.
void emitDistributeStaticInit(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) override
void emitForStaticInit(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) override
Call the appropriate runtime routine to initialize it before start of loop.
bool emitTargetGlobalVariable(GlobalDecl GD) override
Emit the global variable if it is a valid device global variable.
llvm::Value * emitForNext(CodeGenFunction &CGF, SourceLocation Loc, unsigned IVSize, bool IVSigned, Address IL, Address LB, Address UB, Address ST) override
Call __kmpc_dispatch_next( ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, kmp_int[32|64] *p_lowe...
void emitNumThreadsClause(CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc, OpenMPNumThreadsClauseModifier Modifier=OMPC_NUMTHREADS_unknown, OpenMPSeverityClauseKind Severity=OMPC_SEVERITY_fatal, const Expr *Message=nullptr) override
Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads)...
llvm::Function * emitThreadPrivateVarDefinition(const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, CodeGenFunction *CGF=nullptr) override
Emit a code for initialization of threadprivate variable.
void emitTargetDataStandAloneCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, const Expr *Device) override
Emit the data mapping/movement code associated with the directive D that should be of the form 'targe...
llvm::Function * emitTeamsOutlinedFunction(CodeGenFunction &CGF, const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) override
Emits outlined function for the specified OpenMP teams directive D.
void emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef< llvm::Value * > CapturedVars, const Expr *IfCond, llvm::Value *NumThreads, OpenMPNumThreadsClauseModifier NumThreadsModifier=OMPC_NUMTHREADS_unknown, OpenMPSeverityClauseKind Severity=OMPC_SEVERITY_fatal, const Expr *Message=nullptr) override
Emits code for parallel or serial call of the OutlinedFn with variables captured in a record which ad...
void emitReduction(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef< const Expr * > Privates, ArrayRef< const Expr * > LHSExprs, ArrayRef< const Expr * > RHSExprs, ArrayRef< const Expr * > ReductionOps, ReductionOptionsTy Options) override
Emit a code for reduction clause.
void emitFlush(CodeGenFunction &CGF, ArrayRef< const Expr * > Vars, SourceLocation Loc, llvm::AtomicOrdering AO) override
Emit flush of the variables specified in 'omp flush' directive.
void emitDoacrossOrdered(CodeGenFunction &CGF, const OMPDependClause *C) override
Emit code for doacross ordered directive with 'depend' clause.
void emitTaskyieldCall(CodeGenFunction &CGF, SourceLocation Loc) override
Emits a masked region.
Address getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, QualType VarType, StringRef Name) override
Creates artificial threadprivate variable with name Name and type VarType.
Address getAddrOfThreadPrivate(CodeGenFunction &CGF, const VarDecl *VD, Address VDAddr, SourceLocation Loc) override
Returns address of the threadprivate variable for the current thread.
void emitSingleRegion(CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, SourceLocation Loc, ArrayRef< const Expr * > CopyprivateVars, ArrayRef< const Expr * > DestExprs, ArrayRef< const Expr * > SrcExprs, ArrayRef< const Expr * > AssignmentOps) override
Emits a single region.
void emitTaskReductionFixups(CodeGenFunction &CGF, SourceLocation Loc, ReductionCodeGen &RCG, unsigned N) override
Required to resolve existing problems in the runtime.
llvm::Function * emitParallelOutlinedFunction(CodeGenFunction &CGF, const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) override
Emits outlined function for the specified OpenMP parallel directive D.
void emitCancellationPointCall(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind CancelRegion) override
Emit code for 'cancellation point' construct.
void emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind Kind, bool EmitChecks=true, bool ForceSimpleCall=false) override
Emit an implicit/explicit barrier for OpenMP threads.
Address getParameterAddress(CodeGenFunction &CGF, const VarDecl *NativeParam, const VarDecl *TargetParam) const override
Gets the address of the native argument basing on the address of the target-specific parameter.
void emitTeamsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef< llvm::Value * > CapturedVars) override
Emits code for teams call of the OutlinedFn with variables captured in a record which address is stor...
void emitForOrderedIterationEnd(CodeGenFunction &CGF, SourceLocation Loc, unsigned IVSize, bool IVSigned) override
Call the appropriate runtime routine to notify that we finished iteration of the ordered loop with th...
bool emitTargetGlobal(GlobalDecl GD) override
Emit the global GD if it is meaningful for the target.
void emitTaskReductionFini(CodeGenFunction &CGF, SourceLocation Loc, bool IsWorksharingReduction) override
Emits the following code for reduction clause with task modifier:
void emitOrderedRegion(CodeGenFunction &CGF, const RegionCodeGenTy &OrderedOpGen, SourceLocation Loc, bool IsThreads) override
Emit an ordered region.
void emitForStaticFinish(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind) override
Call the appropriate runtime routine to notify that we finished all the work with current loop.
llvm::Value * emitTaskReductionInit(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef< const Expr * > LHSExprs, ArrayRef< const Expr * > RHSExprs, const OMPTaskDataTy &Data) override
Emit a code for initialization of task reduction clause.
void emitProcBindClause(CodeGenFunction &CGF, llvm::omp::ProcBindKind ProcBind, SourceLocation Loc) override
Emit call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, int proc_bind) to generat...
void emitTargetOutlinedFunction(const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) override
Emit outilined function for 'target' directive.
void emitMasterRegion(CodeGenFunction &CGF, const RegionCodeGenTy &MasterOpGen, SourceLocation Loc) override
Emits a master region.
void emitNumTeamsClause(CodeGenFunction &CGF, const Expr *NumTeams, const Expr *ThreadLimit, SourceLocation Loc) override
Emits call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams,...
void emitForDispatchDeinit(CodeGenFunction &CGF, SourceLocation Loc) override
This is used for non static scheduled types and when the ordered clause is present on the loop constr...
const VarDecl * translateParameter(const FieldDecl *FD, const VarDecl *NativeParam) const override
Translates the native parameter of outlined function if this is required for target.
void emitMaskedRegion(CodeGenFunction &CGF, const RegionCodeGenTy &MaskedOpGen, SourceLocation Loc, const Expr *Filter=nullptr) override
Emits a masked region.
void emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D, llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const Expr *IfCond, const OMPTaskDataTy &Data) override
Emit task region for the task directive.
void emitTargetCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, llvm::PointerIntPair< const Expr *, 2, OpenMPDeviceClauseModifier > Device, llvm::function_ref< llvm::Value *(CodeGenFunction &CGF, const OMPLoopDirective &D)> SizeEmitter) override
Emit the target offloading code associated with D.
bool emitTargetFunctions(GlobalDecl GD) override
Emit the target regions enclosed in GD function definition or the function itself in case it is a val...
void emitDoacrossInit(CodeGenFunction &CGF, const OMPLoopDirective &D, ArrayRef< Expr * > NumIterations) override
Emit initialization for doacross loop nesting support.
void emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, const Expr *IfCond, OpenMPDirectiveKind CancelRegion) override
Emit code for 'cancel' construct.
void emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPTaskDataTy &Data) override
Emit code for 'taskwait' directive.
void emitTaskgroupRegion(CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, SourceLocation Loc) override
Emit a taskgroup region.
void emitTargetDataCalls(CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, const Expr *Device, const RegionCodeGenTy &CodeGen, CGOpenMPRuntime::TargetDataInfo &Info) override
Emit the target data mapping code associated with D.
void emitForDispatchInit(CodeGenFunction &CGF, SourceLocation Loc, const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, bool Ordered, const DispatchRTInput &DispatchValues) override
This is used for non static scheduled types and when the ordered clause is present on the loop constr...
llvm::Function * emitTaskOutlinedFunction(const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, const VarDecl *PartIDVar, const VarDecl *TaskTVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, bool Tied, unsigned &NumberOfParts) override
Emits outlined function for the OpenMP task directive D.
void emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const Expr *IfCond, const OMPTaskDataTy &Data) override
Emit task region for the taskloop directive.
CGRecordLayout - This class handles struct and union layout info while lowering AST types to LLVM typ...
unsigned getNonVirtualBaseLLVMFieldNo(const CXXRecordDecl *RD) const
llvm::StructType * getLLVMType() const
Return the "complete object" LLVM type associated with this record.
llvm::StructType * getBaseSubobjectLLVMType() const
Return the "base subobject" LLVM type associated with this record.
unsigned getLLVMFieldNo(const FieldDecl *FD) const
Return llvm::StructType element number that corresponds to the field FD.
unsigned getVirtualBaseIndex(const CXXRecordDecl *base) const
Return the LLVM field index corresponding to the given virtual base.
API for captured statement code generation.
static bool classof(const CGCapturedStmtInfo *)
virtual void EmitBody(CodeGenFunction &CGF, const Stmt *S)
Emit the captured statement body.
virtual const FieldDecl * lookup(const VarDecl *VD) const
Lookup the captured field decl for a variable.
RAII for correct setting/restoring of CapturedStmtInfo.
The scope used to remap some variables as private in the OpenMP loop body (or other captured region e...
bool Privatize()
Privatizes local variables previously registered as private.
bool addPrivate(const VarDecl *LocalVD, Address Addr)
Registers LocalVD variable as a private with Addr as the address of the corresponding private variabl...
An RAII object to set (and then clear) a mapping for an OpaqueValueExpr.
Enters a new scope for capturing cleanups, all of which will be executed once the scope is exited.
CodeGenFunction - This class organizes the per-function state that is used while generating LLVM code...
llvm::Function * GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S, SourceLocation Loc)
LValue EmitLoadOfReferenceLValue(LValue RefLVal)
Definition: CGExpr.cpp:3030
void EmitBranchOnBoolExpr(const Expr *Cond, llvm::BasicBlock *TrueBlock, llvm::BasicBlock *FalseBlock, uint64_t TrueCount, Stmt::Likelihood LH=Stmt::LH_None, const Expr *ConditionalOp=nullptr, const VarDecl *ConditionalDecl=nullptr)
EmitBranchOnBoolExpr - Emit a branch on a boolean condition (e.g.
void emitDestroy(Address addr, QualType type, Destroyer *destroyer, bool useEHCleanupForArray)
emitDestroy - Immediately perform the destruction of the given object.
Definition: CGDecl.cpp:2395
JumpDest getJumpDestInCurrentScope(llvm::BasicBlock *Target)
The given basic block lies in the current EH scope, but may be a target of a potentially scope-crossi...
static void EmitOMPTargetParallelDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetParallelDirective &S)
void EmitNullInitialization(Address DestPtr, QualType Ty)
EmitNullInitialization - Generate code to set a value of the given type to null, If the type contains...
CGCapturedStmtInfo * CapturedStmtInfo
ComplexPairTy EmitLoadOfComplex(LValue src, SourceLocation loc)
EmitLoadOfComplex - Load a complex number from the specified l-value.
static void EmitOMPTargetDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetDirective &S)
Emit device code for the target directive.
static void EmitOMPTargetTeamsDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDirective &S)
Emit device code for the target teams directive.
static void EmitOMPTargetTeamsDistributeDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDistributeDirective &S)
Emit device code for the target teams distribute directive.
llvm::BasicBlock * createBasicBlock(const Twine &name="", llvm::Function *parent=nullptr, llvm::BasicBlock *before=nullptr)
createBasicBlock - Create an LLVM basic block.
const LangOptions & getLangOpts() const
AutoVarEmission EmitAutoVarAlloca(const VarDecl &var)
EmitAutoVarAlloca - Emit the alloca and debug information for a local variable.
Definition: CGDecl.cpp:1483
void pushDestroy(QualType::DestructionKind dtorKind, Address addr, QualType type)
pushDestroy - Push the standard destructor for the given type as at least a normal cleanup.
Definition: CGDecl.cpp:2279
const CodeGen::CGBlockInfo * BlockInfo
Address EmitLoadOfPointer(Address Ptr, const PointerType *PtrTy, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr)
Load a pointer with type PtrTy stored at address Ptr.
Definition: CGExpr.cpp:3039
void EmitBranchThroughCleanup(JumpDest Dest)
EmitBranchThroughCleanup - Emit a branch from the current insert block through the normal cleanup han...
Definition: CGCleanup.cpp:1112
const Decl * CurCodeDecl
CurCodeDecl - This is the inner-most code context, which includes blocks.
Destroyer * getDestroyer(QualType::DestructionKind destructionKind)
Definition: CGDecl.cpp:2252
llvm::AssertingVH< llvm::Instruction > AllocaInsertPt
AllocaInsertPoint - This is an instruction in the entry block before which we prefer to insert alloca...
void EmitAggregateAssign(LValue Dest, LValue Src, QualType EltTy)
Emit an aggregate assignment.
JumpDest ReturnBlock
ReturnBlock - Unified return block.
void EmitAggregateCopy(LValue Dest, LValue Src, QualType EltTy, AggValueSlot::Overlap_t MayOverlap, bool isVolatile=false)
EmitAggregateCopy - Emit an aggregate copy.
Definition: CGExprAgg.cpp:2283
LValue EmitLValueForField(LValue Base, const FieldDecl *Field, bool IsInBounds=true)
Definition: CGExpr.cpp:5253
RawAddress CreateDefaultAlignTempAlloca(llvm::Type *Ty, const Twine &Name="tmp")
CreateDefaultAlignedTempAlloca - This creates an alloca with the default ABI alignment of the given L...
Definition: CGExpr.cpp:174
void GenerateOpenMPCapturedVars(const CapturedStmt &S, SmallVectorImpl< llvm::Value * > &CapturedVars)
void EmitIgnoredExpr(const Expr *E)
EmitIgnoredExpr - Emit an expression in a context which ignores the result.
Definition: CGExpr.cpp:242
RValue EmitLoadOfLValue(LValue V, SourceLocation Loc)
EmitLoadOfLValue - Given an expression that represents a value lvalue, this method emits the address ...
Definition: CGExpr.cpp:2336
LValue EmitArraySectionExpr(const ArraySectionExpr *E, bool IsLowerBound=true)
Definition: CGExpr.cpp:4836
LValue EmitOMPSharedLValue(const Expr *E)
Emits the lvalue for the expression with possibly captured variable.
void StartFunction(GlobalDecl GD, QualType RetTy, llvm::Function *Fn, const CGFunctionInfo &FnInfo, const FunctionArgList &Args, SourceLocation Loc=SourceLocation(), SourceLocation StartLoc=SourceLocation())
Emit code for the start of a function.
void EmitOMPCopy(QualType OriginalType, Address DestAddr, Address SrcAddr, const VarDecl *DestVD, const VarDecl *SrcVD, const Expr *Copy)
Emit proper copying of data from one variable to another.
llvm::Value * EvaluateExprAsBool(const Expr *E)
EvaluateExprAsBool - Perform the usual unary conversions on the specified expression and compare the ...
Definition: CGExpr.cpp:223
JumpDest getOMPCancelDestination(OpenMPDirectiveKind Kind)
llvm::Value * emitArrayLength(const ArrayType *arrayType, QualType &baseType, Address &addr)
emitArrayLength - Compute the length of an array, even if it's a VLA, and drill down to the base elem...
void EmitOMPAggregateAssign(Address DestAddr, Address SrcAddr, QualType OriginalType, const llvm::function_ref< void(Address, Address)> CopyGen)
Perform element by element copying of arrays with type OriginalType from SrcAddr to DestAddr using co...
bool HaveInsertPoint() const
HaveInsertPoint - True if an insertion point is defined.
llvm::Value * getTypeSize(QualType Ty)
Returns calculated size of the specified type.
LValue MakeRawAddrLValue(llvm::Value *V, QualType T, CharUnits Alignment, AlignmentSource Source=AlignmentSource::Type)
Same as MakeAddrLValue above except that the pointer is known to be unsigned.
LValue EmitLValueForFieldInitialization(LValue Base, const FieldDecl *Field)
EmitLValueForFieldInitialization - Like EmitLValueForField, except that if the Field is a reference,...
Definition: CGExpr.cpp:5427
void incrementProfileCounter(const Stmt *S, llvm::Value *StepV=nullptr)
Increment the profiler's counter for the given statement by StepV.
VlaSizePair getVLASize(const VariableArrayType *vla)
Returns an LLVM value that corresponds to the size, in non-variably-sized elements,...
llvm::CallInst * EmitNounwindRuntimeCall(llvm::FunctionCallee callee, const Twine &name="")
llvm::Value * EmitLoadOfScalar(Address Addr, bool Volatile, QualType Ty, SourceLocation Loc, AlignmentSource Source=AlignmentSource::Type, bool isNontemporal=false)
EmitLoadOfScalar - Load a scalar value from an address, taking care to appropriately convert from the...
void EmitStoreOfComplex(ComplexPairTy V, LValue dest, bool isInit)
EmitStoreOfComplex - Store a complex number into the specified l-value.
const Decl * CurFuncDecl
CurFuncDecl - Holds the Decl for the current outermost non-closure context.
void EmitAutoVarCleanups(const AutoVarEmission &emission)
Definition: CGDecl.cpp:2203
void EmitStoreThroughLValue(RValue Src, LValue Dst, bool isInit=false)
EmitStoreThroughLValue - Store the specified rvalue into the specified lvalue, where both are guarant...
Definition: CGExpr.cpp:2533
LValue EmitLoadOfPointerLValue(Address Ptr, const PointerType *PtrTy)
Definition: CGExpr.cpp:3049
void EmitAnyExprToMem(const Expr *E, Address Location, Qualifiers Quals, bool IsInitializer)
EmitAnyExprToMem - Emits the code necessary to evaluate an arbitrary expression into the given memory...
Definition: CGExpr.cpp:293
bool needsEHCleanup(QualType::DestructionKind kind)
Determines whether an EH cleanup is required to destroy a type with the given destruction kind.
llvm::DenseMap< const ValueDecl *, FieldDecl * > LambdaCaptureFields
llvm::CallInst * EmitRuntimeCall(llvm::FunctionCallee callee, const Twine &name="")
llvm::Type * ConvertTypeForMem(QualType T)
static void EmitOMPTargetTeamsDistributeParallelForDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDistributeParallelForDirective &S)
static void EmitOMPTargetParallelForSimdDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetParallelForSimdDirective &S)
Emit device code for the target parallel for simd directive.
CodeGenTypes & getTypes() const
static TypeEvaluationKind getEvaluationKind(QualType T)
getEvaluationKind - Return the TypeEvaluationKind of QualType T.
void EmitOMPTargetTaskBasedDirective(const OMPExecutableDirective &S, const RegionCodeGenTy &BodyGen, OMPTargetDataInfo &InputInfo)
Address EmitPointerWithAlignment(const Expr *Addr, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr, KnownNonNull_t IsKnownNonNull=NotKnownNonNull)
EmitPointerWithAlignment - Given an expression with a pointer type, emit the value and compute our be...
Definition: CGExpr.cpp:1515
static void EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDistributeParallelForSimdDirective &S)
Emit device code for the target teams distribute parallel for simd directive.
void EmitBranch(llvm::BasicBlock *Block)
EmitBranch - Emit a branch to the specified basic block from the current insert block,...
Definition: CGStmt.cpp:672
RawAddress CreateMemTemp(QualType T, const Twine &Name="tmp", RawAddress *Alloca=nullptr)
CreateMemTemp - Create a temporary memory object of the given type, with appropriate alignmen and cas...
Definition: CGExpr.cpp:186
Address EmitLoadOfReference(LValue RefLVal, LValueBaseInfo *PointeeBaseInfo=nullptr, TBAAAccessInfo *PointeeTBAAInfo=nullptr)
Definition: CGExpr.cpp:2997
void EmitVarDecl(const VarDecl &D)
EmitVarDecl - Emit a local variable declaration.
Definition: CGDecl.cpp:202
llvm::Value * EmitCheckedInBoundsGEP(llvm::Type *ElemTy, llvm::Value *Ptr, ArrayRef< llvm::Value * > IdxList, bool SignedIndices, bool IsSubtraction, SourceLocation Loc, const Twine &Name="")
Same as IRBuilder::CreateInBoundsGEP, but additionally emits a check to detect undefined behavior whe...
static void EmitOMPTargetParallelGenericLoopDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetParallelGenericLoopDirective &S)
Emit device code for the target parallel loop directive.
llvm::Value * EmitScalarExpr(const Expr *E, bool IgnoreResultAssign=false)
EmitScalarExpr - Emit the computation of the specified expression of LLVM scalar type,...
static bool IsWrappedCXXThis(const Expr *E)
Check if E is a C++ "this" pointer wrapped in value-preserving casts.
Definition: CGExpr.cpp:1573
LValue MakeAddrLValue(Address Addr, QualType T, AlignmentSource Source=AlignmentSource::Type)
void FinishFunction(SourceLocation EndLoc=SourceLocation())
FinishFunction - Complete IR generation of the current function.
void EmitAtomicStore(RValue rvalue, LValue lvalue, bool isInit)
Definition: CGAtomic.cpp:1973
static void EmitOMPTargetSimdDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetSimdDirective &S)
Emit device code for the target simd directive.
static void EmitOMPTargetParallelForDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetParallelForDirective &S)
Emit device code for the target parallel for directive.
Address GetAddrOfLocalVar(const VarDecl *VD)
GetAddrOfLocalVar - Return the address of a local variable.
bool ConstantFoldsToSimpleInteger(const Expr *Cond, bool &Result, bool AllowLabels=false)
ConstantFoldsToSimpleInteger - If the specified expression does not fold to a constant,...
static void EmitOMPTargetTeamsGenericLoopDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsGenericLoopDirective &S)
Emit device code for the target teams loop directive.
std::pair< llvm::Value *, llvm::Value * > ComplexPairTy
Address ReturnValue
ReturnValue - The temporary alloca to hold the return value.
LValue EmitLValue(const Expr *E, KnownNonNull_t IsKnownNonNull=NotKnownNonNull)
EmitLValue - Emit code to compute a designator that specifies the location of the expression.
Definition: CGExpr.cpp:1631
static void EmitOMPTargetTeamsDistributeSimdDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDistributeSimdDirective &S)
Emit device code for the target teams distribute simd directive.
llvm::Value * EmitScalarConversion(llvm::Value *Src, QualType SrcTy, QualType DstTy, SourceLocation Loc)
Emit a conversion from the specified type to the specified destination type, both of which are LLVM s...
void EmitVariablyModifiedType(QualType Ty)
EmitVLASize - Capture all the sizes for the VLA expressions in the given variably-modified type and s...
bool isTrivialInitializer(const Expr *Init)
Determine whether the given initializer is trivial in the sense that it requires no code to be genera...
Definition: CGDecl.cpp:1808
void EmitStoreOfScalar(llvm::Value *Value, Address Addr, bool Volatile, QualType Ty, AlignmentSource Source=AlignmentSource::Type, bool isInit=false, bool isNontemporal=false)
EmitStoreOfScalar - Store a scalar value to an address, taking care to appropriately convert from the...
void EmitBlock(llvm::BasicBlock *BB, bool IsFinished=false)
EmitBlock - Emit the given block.
Definition: CGStmt.cpp:652
void EmitExprAsInit(const Expr *init, const ValueDecl *D, LValue lvalue, bool capturedByInit)
EmitExprAsInit - Emits the code necessary to initialize a location in memory with the given initializ...
Definition: CGDecl.cpp:2093
LValue MakeNaturalAlignRawAddrLValue(llvm::Value *V, QualType T)
This class organizes the cross-function state that is used while generating LLVM code.
void handleCUDALaunchBoundsAttr(llvm::Function *F, const CUDALaunchBoundsAttr *A, int32_t *MaxThreadsVal=nullptr, int32_t *MinBlocksVal=nullptr, int32_t *MaxClusterRankVal=nullptr)
Emit the IR encoding to attach the CUDA launch bounds attribute to F.
Definition: NVPTX.cpp:320
void SetInternalFunctionAttributes(GlobalDecl GD, llvm::Function *F, const CGFunctionInfo &FI)
Set the attributes on the LLVM function for the given decl and function info.
llvm::Module & getModule() const
void addCompilerUsedGlobal(llvm::GlobalValue *GV)
Add a global to a list to be added to the llvm.compiler.used metadata.
CharUnits GetTargetTypeStoreSize(llvm::Type *Ty) const
Return the store size, in character units, of the given LLVM type.
void handleAMDGPUWavesPerEUAttr(llvm::Function *F, const AMDGPUWavesPerEUAttr *A)
Emit the IR encoding to attach the AMD GPU waves-per-eu attribute to F.
Definition: AMDGPU.cpp:737
DiagnosticsEngine & getDiags() const
const LangOptions & getLangOpts() const
CharUnits getNaturalTypeAlignment(QualType T, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr, bool forPointeeType=false)
const TargetInfo & getTarget() const
void EmitGlobal(GlobalDecl D)
Emit code for a single global function or var decl.
void handleAMDGPUFlatWorkGroupSizeAttr(llvm::Function *F, const AMDGPUFlatWorkGroupSizeAttr *A, const ReqdWorkGroupSizeAttr *ReqdWGS=nullptr, int32_t *MinThreadsVal=nullptr, int32_t *MaxThreadsVal=nullptr)
Emit the IR encoding to attach the AMD GPU flat-work-group-size attribute to F.
Definition: AMDGPU.cpp:706
llvm::GlobalValue::LinkageTypes getLLVMLinkageVarDefinition(const VarDecl *VD)
Returns LLVM linkage for a declarator.
CGCXXABI & getCXXABI() const
CGOpenMPRuntime & getOpenMPRuntime()
Return a reference to the configured OpenMP runtime.
const llvm::Triple & getTriple() const
TBAAAccessInfo getTBAAInfoForSubobject(LValue Base, QualType AccessType)
getTBAAInfoForSubobject - Get TBAA information for an access with a given base lvalue.
llvm::Constant * GetAddrOfGlobal(GlobalDecl GD, ForDefinition_t IsForDefinition=NotForDefinition)
ASTContext & getContext() const
const TargetCodeGenInfo & getTargetCodeGenInfo()
const CodeGenOptions & getCodeGenOpts() const
StringRef getMangledName(GlobalDecl GD)
std::optional< CharUnits > getOMPAllocateAlignment(const VarDecl *VD)
Return the alignment specified in an allocate directive, if present.
Definition: CGDecl.cpp:2945
llvm::GlobalValue * GetGlobalValue(StringRef Ref)
llvm::Constant * EmitNullConstant(QualType T)
Return the result of value-initializing the given type, i.e.
llvm::Function * CreateGlobalInitOrCleanUpFunction(llvm::FunctionType *ty, const Twine &name, const CGFunctionInfo &FI, SourceLocation Loc=SourceLocation(), bool TLS=false, llvm::GlobalVariable::LinkageTypes Linkage=llvm::GlobalVariable::InternalLinkage)
Definition: CGDeclCXX.cpp:438
llvm::ConstantInt * getSize(CharUnits numChars)
Emit the given number of characters as a value of type size_t.
llvm::Type * ConvertType(QualType T)
ConvertType - Convert type T into a llvm::Type.
llvm::FunctionType * GetFunctionType(const CGFunctionInfo &Info)
GetFunctionType - Get the LLVM function type for.
Definition: CGCall.cpp:1702
const CGFunctionInfo & arrangeBuiltinFunctionDeclaration(QualType resultType, const FunctionArgList &args)
A builtin function is a freestanding function using the default C conventions.
Definition: CGCall.cpp:739
const CGRecordLayout & getCGRecordLayout(const RecordDecl *)
getCGRecordLayout - Return record layout info for the given record decl.
llvm::Type * ConvertTypeForMem(QualType T)
ConvertTypeForMem - Convert type T into a llvm::Type.
const CGFunctionInfo & arrangeNullaryFunction()
A nullary function is a freestanding function of type 'void ()'.
Definition: CGCall.cpp:788
A specialization of Address that requires the address to be an LLVM Constant.
Definition: Address.h:296
static ConstantAddress invalid()
Definition: Address.h:304
Information for lazily generating a cleanup.
Definition: EHScopeStack.h:146
void popTerminate()
Pops a terminate handler off the stack.
Definition: CGCleanup.h:639
void pushTerminate()
Push a terminate handler on the stack.
Definition: CGCleanup.cpp:250
FunctionArgList - Type for representing both the decl and type of parameters to a function.
Definition: CGCall.h:375
LValue - This represents an lvalue references.
Definition: CGValue.h:182
CharUnits getAlignment() const
Definition: CGValue.h:343
llvm::Value * getPointer(CodeGenFunction &CGF) const
const Qualifiers & getQuals() const
Definition: CGValue.h:338
Address getAddress() const
Definition: CGValue.h:361
LValueBaseInfo getBaseInfo() const
Definition: CGValue.h:346
QualType getType() const
Definition: CGValue.h:291
TBAAAccessInfo getTBAAInfo() const
Definition: CGValue.h:335
A basic class for pre|post-action for advanced codegen sequence for OpenMP region.
virtual void Enter(CodeGenFunction &CGF)
RValue - This trivial value class is used to represent the result of an expression that is evaluated.
Definition: CGValue.h:42
static RValue get(llvm::Value *V)
Definition: CGValue.h:98
static RValue getComplex(llvm::Value *V1, llvm::Value *V2)
Definition: CGValue.h:108
llvm::Value * getScalarVal() const
getScalarVal() - Return the Value* of this scalar value.
Definition: CGValue.h:71
An abstract representation of an aligned address.
Definition: Address.h:42
llvm::Type * getElementType() const
Return the type of the values stored in this address.
Definition: Address.h:77
llvm::Value * getPointer() const
Definition: Address.h:66
static RawAddress invalid()
Definition: Address.h:61
bool isValid() const
Definition: Address.h:62
Class intended to support codegen of all kind of the reduction clauses.
LValue getSharedLValue(unsigned N) const
Returns LValue for the reduction item.
const Expr * getRefExpr(unsigned N) const
Returns the base declaration of the reduction item.
LValue getOrigLValue(unsigned N) const
Returns LValue for the original reduction item.
bool needCleanups(unsigned N)
Returns true if the private copy requires cleanups.
void emitAggregateType(CodeGenFunction &CGF, unsigned N)
Emits the code for the variable-modified type, if required.
const VarDecl * getBaseDecl(unsigned N) const
Returns the base declaration of the reduction item.
QualType getPrivateType(unsigned N) const
Return the type of the private item.
bool usesReductionInitializer(unsigned N) const
Returns true if the initialization of the reduction item uses initializer from declare reduction cons...
void emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N)
Emits lvalue for the shared and original reduction item.
void emitInitialization(CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr, llvm::function_ref< bool(CodeGenFunction &)> DefaultInit)
Performs initialization of the private copy for the reduction item.
std::pair< llvm::Value *, llvm::Value * > getSizes(unsigned N) const
Returns the size of the reduction item (in chars and total number of elements in the item),...
ReductionCodeGen(ArrayRef< const Expr * > Shareds, ArrayRef< const Expr * > Origs, ArrayRef< const Expr * > Privates, ArrayRef< const Expr * > ReductionOps)
void emitCleanups(CodeGenFunction &CGF, unsigned N, Address PrivateAddr)
Emits cleanup code for the reduction item.
Address adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, Address PrivateAddr)
Adjusts PrivatedAddr for using instead of the original variable address in normal operations.
Class provides a way to call simple version of codegen for OpenMP region, or an advanced with possibl...
void operator()(CodeGenFunction &CGF) const
void setAction(PrePostActionTy &Action) const
virtual void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const
setTargetAttributes - Provides a convenient hook to handle extra target-specific attributes for the g...
Definition: TargetInfo.h:80
ConstStmtVisitor - This class implements a simple visitor for Stmt subclasses.
Definition: StmtVisitor.h:196
DeclContext - This is used only as base class of specific decl types that can act as declaration cont...
Definition: DeclBase.h:1449
void addDecl(Decl *D)
Add the declaration D into this context.
Definition: DeclBase.cpp:1793
A reference to a declared variable, function, enum, etc.
Definition: Expr.h:1272
ValueDecl * getDecl()
Definition: Expr.h:1340
Decl - This represents one declaration (or definition), e.g.
Definition: DeclBase.h:86
SourceLocation getEndLoc() const LLVM_READONLY
Definition: DeclBase.h:435
T * getAttr() const
Definition: DeclBase.h:573
bool hasAttrs() const
Definition: DeclBase.h:518
ASTContext & getASTContext() const LLVM_READONLY
Definition: DeclBase.cpp:524
void addAttr(Attr *A)
Definition: DeclBase.cpp:1022
virtual Stmt * getBody() const
getBody - If this Decl represents a declaration for a body of code, such as a function or method defi...
Definition: DeclBase.h:1087
virtual bool hasBody() const
Returns true if this Decl represents a declaration for a body of code, such as a function or method d...
Definition: DeclBase.h:1093
llvm::iterator_range< specific_attr_iterator< T > > specific_attrs() const
Definition: DeclBase.h:559
SourceLocation getLocation() const
Definition: DeclBase.h:439
DeclContext * getDeclContext()
Definition: DeclBase.h:448
SourceLocation getBeginLoc() const LLVM_READONLY
Definition: DeclBase.h:431
AttrVec & getAttrs()
Definition: DeclBase.h:524
bool hasAttr() const
Definition: DeclBase.h:577
virtual Decl * getCanonicalDecl()
Retrieves the "canonical" declaration of the given declaration.
Definition: DeclBase.h:978
SourceLocation getBeginLoc() const LLVM_READONLY
Definition: Decl.h:830
DiagnosticBuilder Report(SourceLocation Loc, unsigned DiagID)
Issue the message to the client.
Definition: Diagnostic.h:1529
unsigned getCustomDiagID(Level L, const char(&FormatString)[N])
Return an ID for a diagnostic with the specified format string and level.
Definition: Diagnostic.h:904
The return type of classify().
Definition: Expr.h:337
This represents one expression.
Definition: Expr.h:112
bool isIntegerConstantExpr(const ASTContext &Ctx) const
bool isGLValue() const
Definition: Expr.h:287
@ SE_AllowSideEffects
Allow any unmodeled side effect.
Definition: Expr.h:674
@ SE_AllowUndefinedBehavior
Allow UB that we can give a value, but not arbitrary unmodeled side effects.
Definition: Expr.h:672
Expr * IgnoreParenCasts() LLVM_READONLY
Skip past any parentheses and casts which might surround this expression until reaching a fixed point...
Definition: Expr.cpp:3078
llvm::APSInt EvaluateKnownConstInt(const ASTContext &Ctx, SmallVectorImpl< PartialDiagnosticAt > *Diag=nullptr) const
EvaluateKnownConstInt - Call EvaluateAsRValue and return the folded integer.
Expr * IgnoreParenImpCasts() LLVM_READONLY
Skip past any parentheses and implicit casts which might surround this expression until reaching a fi...
Definition: Expr.cpp:3073
bool isEvaluatable(const ASTContext &Ctx, SideEffectsKind AllowSideEffects=SE_NoSideEffects) const
isEvaluatable - Call EvaluateAsRValue to see if this expression can be constant folded without side-e...
std::optional< llvm::APSInt > getIntegerConstantExpr(const ASTContext &Ctx) const
isIntegerConstantExpr - Return the value if this expression is a valid integer constant expression.
bool HasSideEffects(const ASTContext &Ctx, bool IncludePossibleEffects=true) const
HasSideEffects - This routine returns true for all those expressions which have any effect other than...
Definition: Expr.cpp:3624
bool EvaluateAsBooleanCondition(bool &Result, const ASTContext &Ctx, bool InConstantContext=false) const
EvaluateAsBooleanCondition - Return true if this is a constant which we can fold and convert to a boo...
SourceLocation getExprLoc() const LLVM_READONLY
getExprLoc - Return the preferred location for the arrow when diagnosing a problem with a generic exp...
Definition: Expr.cpp:273
QualType getType() const
Definition: Expr.h:144
bool hasNonTrivialCall(const ASTContext &Ctx) const
Determine whether this expression involves a call to any function that is not trivial.
Definition: Expr.cpp:3989
Represents a member of a struct/union/class.
Definition: Decl.h:3157
unsigned getFieldIndex() const
Returns the index of this field within its record, as appropriate for passing to ASTRecordLayout::get...
Definition: Decl.h:3242
const RecordDecl * getParent() const
Returns the parent of this field declaration, which is the struct in which this field is defined.
Definition: Decl.h:3393
static FieldDecl * Create(const ASTContext &C, DeclContext *DC, SourceLocation StartLoc, SourceLocation IdLoc, const IdentifierInfo *Id, QualType T, TypeSourceInfo *TInfo, Expr *BW, bool Mutable, InClassInitStyle InitStyle)
Definition: Decl.cpp:4641
Represents a function declaration or definition.
Definition: Decl.h:1999
const ParmVarDecl * getParamDecl(unsigned i) const
Definition: Decl.h:2794
QualType getReturnType() const
Definition: Decl.h:2842
ArrayRef< ParmVarDecl * > parameters() const
Definition: Decl.h:2771
FunctionDecl * getCanonicalDecl() override
Retrieves the "canonical" declaration of the given declaration.
Definition: Decl.cpp:3688
unsigned getNumParams() const
Return the number of parameters this function must have based on its FunctionType.
Definition: Decl.cpp:3767
GlobalDecl - represents a global declaration.
Definition: GlobalDecl.h:57
const Decl * getDecl() const
Definition: GlobalDecl.h:106
static ImplicitParamDecl * Create(ASTContext &C, DeclContext *DC, SourceLocation IdLoc, IdentifierInfo *Id, QualType T, ImplicitParamKind ParamKind)
Create implicit parameter.
Definition: Decl.cpp:5470
static IntegerLiteral * Create(const ASTContext &C, const llvm::APInt &V, QualType type, SourceLocation l)
Returns a new integer literal with value 'V' and type 'type'.
Definition: Expr.cpp:971
Describes the capture of a variable or of this, or of a C++1y init-capture.
Definition: LambdaCapture.h:25
std::string OMPHostIRFile
Name of the IR file that contains the result of the OpenMP target host code generation.
Definition: LangOptions.h:512
std::vector< llvm::Triple > OMPTargetTriples
Triples of the OpenMP targets that the host code codegen should take into account in order to generat...
Definition: LangOptions.h:508
virtual void mangleCanonicalTypeName(QualType T, raw_ostream &, bool NormalizeIntegers=false)=0
Generates a unique string for an externally visible type for use with TBAA or type uniquing.
MemberExpr - [C99 6.5.2.3] Structure and Union Members.
Definition: Expr.h:3300
ValueDecl * getMemberDecl() const
Retrieve the member declaration to which this expression refers.
Definition: Expr.h:3383
StringRef getName() const
Get the name of identifier for this declaration as a StringRef.
Definition: Decl.h:300
bool isExternallyVisible() const
Definition: Decl.h:432
This represents clause 'affinity' in the '#pragma omp task'-based directives.
Class that represents a component of a mappable expression.
ArrayRef< MappableComponent > MappableExprComponentListRef
const Stmt * getPreInitStmt() const
Get pre-initialization statement for the clause.
Definition: OpenMPClause.h:219
This is a basic class for representing single OpenMP clause.
Definition: OpenMPClause.h:55
This represents '#pragma omp declare mapper ...' directive.
Definition: DeclOpenMP.h:287
This represents '#pragma omp declare reduction ...' directive.
Definition: DeclOpenMP.h:177
Expr * getInitializer()
Get initializer expression (if specified) of the declare reduction construct.
Definition: DeclOpenMP.h:238
This represents implicit clause 'depend' for the '#pragma omp task' directive.
This represents 'detach' clause in the '#pragma omp task' directive.
This represents 'device' clause in the '#pragma omp ...' directive.
This represents the 'doacross' clause for the '#pragma omp ordered' directive.
This is a basic class for representing single OpenMP executable directive.
Definition: StmtOpenMP.h:266
static llvm::iterator_range< specific_clause_iterator< SpecificClause > > getClausesOfKind(ArrayRef< OMPClause * > Clauses)
Definition: StmtOpenMP.h:445
This represents clause 'firstprivate' in the '#pragma omp ...' directives.
This represents clause 'has_device_ptr' in the '#pragma omp ...' directives.
This represents 'if' clause in the '#pragma omp ...' directive.
Definition: OpenMPClause.h:676
Expr * getCondition() const
Returns condition.
Definition: OpenMPClause.h:745
This represents clause 'in_reduction' in the '#pragma omp task' directives.
This represents clause 'is_device_ptr' in the '#pragma omp ...' directives.
OpenMP 5.0 [2.1.6 Iterators] Iterators are identifiers that expand to multiple values in the clause o...
Definition: ExprOpenMP.h:151
This represents clause 'lastprivate' in the '#pragma omp ...' directives.
This represents clause 'linear' in the '#pragma omp ...' directives.
This is a common base class for loop directives ('omp simd', 'omp for', 'omp for simd' etc....
Definition: StmtOpenMP.h:1004
This represents clause 'map' in the '#pragma omp ...' directives.
This represents the 'message' clause in the '#pragma omp error' and the '#pragma omp parallel' direct...
Expr * getMessageString() const
Returns message string of the clause.
This represents clause 'nontemporal' in the '#pragma omp ...' directives.
This represents 'nowait' clause in the '#pragma omp ...' directive.
This represents 'num_teams' clause in the '#pragma omp ...' directive.
This represents 'num_threads' clause in the '#pragma omp ...' directive.
Definition: OpenMPClause.h:825
This represents 'ordered' clause in the '#pragma omp ...' directive.
This represents clause 'private' in the '#pragma omp ...' directives.
This represents clause 'reduction' in the '#pragma omp ...' directives.
This represents '#pragma omp requires...' directive.
Definition: DeclOpenMP.h:417
This represents the 'severity' clause in the '#pragma omp error' and the '#pragma omp parallel' direc...
OpenMPSeverityClauseKind getSeverityKind() const
Returns kind of the clause.
This represents 'thread_limit' clause in the '#pragma omp ...' directive.
This represents clause 'uses_allocators' in the '#pragma omp target'-based directives.
This represents 'ompx_attribute' clause in a directive that might generate an outlined function.
This represents 'ompx_bare' clause in the '#pragma omp target teams ...' directive.
This represents 'ompx_dyn_cgroup_mem' clause in the '#pragma omp target ...' directive.
OpaqueValueExpr - An expression referring to an opaque object of a fixed type and value class.
Definition: Expr.h:1180
Represents a parameter to a function.
Definition: Decl.h:1789
PointerType - C99 6.7.5.1 - Pointer Declarators.
Definition: TypeBase.h:3346
Represents an unpacked "presumed" location which can be presented to the user.
unsigned getColumn() const
Return the presumed column number of this location.
const char * getFilename() const
Return the presumed filename of this location.
unsigned getLine() const
Return the presumed line number of this location.
A (possibly-)qualified type.
Definition: TypeBase.h:937
void addRestrict()
Add the restrict qualifier to this QualType.
Definition: TypeBase.h:1172
QualType withRestrict() const
Definition: TypeBase.h:1175
bool isNull() const
Return true if this QualType doesn't point to a type yet.
Definition: TypeBase.h:1004
const Type * getTypePtr() const
Retrieves a pointer to the underlying (unqualified) type.
Definition: TypeBase.h:8343
Qualifiers getQualifiers() const
Retrieve the set of qualifiers applied to this type.
Definition: TypeBase.h:8383
QualType getNonReferenceType() const
If Type is a reference type (e.g., const int&), returns the type that the reference refers to ("const...
Definition: TypeBase.h:8528
QualType getCanonicalType() const
Definition: TypeBase.h:8395
DestructionKind isDestructedType() const
Returns a nonzero value if objects of this type require non-trivial work to clean up after.
Definition: TypeBase.h:1545
Represents a struct/union/class.
Definition: Decl.h:4309
field_iterator field_end() const
Definition: Decl.h:4515
field_range fields() const
Definition: Decl.h:4512
virtual void completeDefinition()
Note that the definition of this type is now complete.
Definition: Decl.cpp:5166
bool field_empty() const
Definition: Decl.h:4520
field_iterator field_begin() const
Definition: Decl.cpp:5154
decl_type * getPreviousDecl()
Return the previous declaration of this declaration or NULL if this is the first declaration.
Definition: Redeclarable.h:201
decl_type * getMostRecentDecl()
Returns the most recent (re)declaration of this declaration.
Definition: Redeclarable.h:223
Base for LValueReferenceType and RValueReferenceType.
Definition: TypeBase.h:3589
Scope - A scope is a transient data structure that is used while parsing the program.
Definition: Scope.h:41
Encodes a location in the source.
static SourceLocation getFromRawEncoding(UIntTy Encoding)
Turn a raw encoding of a SourceLocation object into a real SourceLocation.
bool isValid() const
Return true if this is a valid SourceLocation object.
UIntTy getRawEncoding() const
When a SourceLocation itself cannot be used, this returns an (opaque) 32-bit integer encoding for it.
This class handles loading and caching of source files into memory.
PresumedLoc getPresumedLoc(SourceLocation Loc, bool UseLineDirectives=true) const
Returns the "presumed" location of a SourceLocation specifies.
fileinfo_iterator fileinfo_end() const
SourceLocation translateFileLineCol(const FileEntry *SourceFile, unsigned Line, unsigned Col) const
Get the source location for the given file:line:col triplet.
fileinfo_iterator fileinfo_begin() const
Stmt - This represents one statement.
Definition: Stmt.h:85
SourceRange getSourceRange() const LLVM_READONLY
SourceLocation tokens are not useful in isolation - they are low level value objects created/interpre...
Definition: Stmt.cpp:334
Stmt * IgnoreContainers(bool IgnoreCaptured=false)
Skip no-op (attributed, compound) container stmts and skip captured stmt at the top,...
Definition: Stmt.cpp:205
SourceLocation getBeginLoc() const LLVM_READONLY
Definition: Stmt.cpp:346
void startDefinition()
Starts the definition of this tag declaration.
Definition: Decl.cpp:4847
bool isUnion() const
Definition: Decl.h:3919
bool isTLSSupported() const
Whether the target supports thread-local storage.
Definition: TargetInfo.h:1616
virtual bool hasFeature(StringRef Feature) const
Determine whether the given target has the given feature.
Definition: TargetInfo.h:1526
The base class of the type hierarchy.
Definition: TypeBase.h:1833
bool isVoidType() const
Definition: TypeBase.h:8936
bool isSignedIntegerOrEnumerationType() const
Determines whether this is an integer type that is signed or an enumeration types whose underlying ty...
Definition: Type.cpp:2229
const Type * getPointeeOrArrayElementType() const
If this is a pointer type, return the pointee type.
Definition: TypeBase.h:9116
bool isSignedIntegerType() const
Return true if this is an integer type that is signed, according to C99 6.2.5p4 [char,...
Definition: Type.cpp:2209
CXXRecordDecl * getAsCXXRecordDecl() const
Retrieves the CXXRecordDecl that this type refers to, either because the type is a RecordType or beca...
Definition: Type.h:26
RecordDecl * getAsRecordDecl() const
Retrieves the RecordDecl this type refers to.
Definition: Type.h:41
bool isArrayType() const
Definition: TypeBase.h:8679
bool isPointerType() const
Definition: TypeBase.h:8580
CanQualType getCanonicalTypeUnqualified() const
bool isIntegerType() const
isIntegerType() does not include complex integers (a GCC extension).
Definition: TypeBase.h:8980
const T * castAs() const
Member-template castAs<specific type>.
Definition: TypeBase.h:9226
bool isReferenceType() const
Definition: TypeBase.h:8604
QualType getPointeeType() const
If this is a pointer, ObjC object pointer, or block pointer, this returns the respective pointee.
Definition: Type.cpp:752
bool isLValueReferenceType() const
Definition: TypeBase.h:8608
bool isAggregateType() const
Determines whether the type is a C++ aggregate type or C aggregate or union type.
Definition: Type.cpp:2415
RecordDecl * castAsRecordDecl() const
Definition: Type.h:48
QualType getCanonicalTypeInternal() const
Definition: TypeBase.h:3137
const Type * getBaseElementTypeUnsafe() const
Get the base element type of this type, potentially discarding type qualifiers.
Definition: TypeBase.h:9109
bool isVariablyModifiedType() const
Whether this type is a variably-modified type (C99 6.7.5).
Definition: TypeBase.h:2818
const ArrayType * getAsArrayTypeUnsafe() const
A variant of getAs<> for array types which silently discards qualifiers from the outermost type.
Definition: TypeBase.h:9212
bool isFloatingType() const
Definition: Type.cpp:2308
bool isUnsignedIntegerType() const
Return true if this is an integer type that is unsigned, according to C99 6.2.5p6 [which returns true...
Definition: Type.cpp:2257
bool isAnyPointerType() const
Definition: TypeBase.h:8588
const T * getAs() const
Member-template getAs<specific type>'.
Definition: TypeBase.h:9159
bool isRecordType() const
Definition: TypeBase.h:8707
bool isUnionType() const
Definition: Type.cpp:718
Represent the declaration of a variable (in which case it is an lvalue) a function (in which case it ...
Definition: Decl.h:711
QualType getType() const
Definition: Decl.h:722
Represents a variable declaration or definition.
Definition: Decl.h:925
VarDecl * getCanonicalDecl() override
Retrieves the "canonical" declaration of the given declaration.
Definition: Decl.cpp:2257
VarDecl * getDefinition(ASTContext &)
Get the real (not just tentative) definition for this declaration.
Definition: Decl.cpp:2366
const Expr * getInit() const
Definition: Decl.h:1367
bool hasExternalStorage() const
Returns true if a variable has extern or private_extern storage.
Definition: Decl.h:1216
bool hasLocalStorage() const
Returns true if a variable with function scope is a non-static local variable.
Definition: Decl.h:1183
@ DeclarationOnly
This declaration is only a declaration.
Definition: Decl.h:1294
DefinitionKind hasDefinition(ASTContext &) const
Check whether this variable is defined in this translation unit.
Definition: Decl.cpp:2375
bool isLocalVarDeclOrParm() const
Similar to isLocalVarDecl but also includes parameters.
Definition: Decl.h:1261
const Expr * getAnyInitializer() const
Get the initializer for this variable, no matter which declaration it is attached to.
Definition: Decl.h:1357
Represents a C array with a specified size that is not an integer-constant-expression.
Definition: TypeBase.h:3982
Expr * getSizeExpr() const
Definition: TypeBase.h:3996
specific_attr_iterator - Iterates over a subrange of an AttrVec, only providing attributes that are o...
Definition: AttrIterator.h:36
Definition: SPIR.cpp:35
bool isEmptyRecordForLayout(const ASTContext &Context, QualType T)
isEmptyRecordForLayout - Return true iff a structure contains only empty base classes (per isEmptyRec...
@ Decl
The l-value was an access to a declared entity or something equivalently strong, like the address of ...
bool isEmptyFieldForLayout(const ASTContext &Context, const FieldDecl *FD)
isEmptyFieldForLayout - Return true iff the field is "empty", that is, either a zero-width bit-field ...
@ NotKnownNonNull
Definition: Address.h:33
The JSON file list parser is used to communicate input to InstallAPI.
bool isOpenMPWorksharingDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a worksharing directive.
bool needsTaskBasedThreadLimit(OpenMPDirectiveKind DKind)
Checks if the specified target directive, combined or not, needs task based thread_limit.
@ Ctor_Complete
Complete object ctor.
Definition: ABI.h:25
if(T->getSizeExpr()) TRY_TO(TraverseStmt(const_cast< Expr * >(T -> getSizeExpr())))
bool isOpenMPTargetDataManagementDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a target data offload directive.
llvm::omp::Directive OpenMPDirectiveKind
OpenMP directives.
Definition: OpenMPKinds.h:25
@ Conditional
A conditional (?:) operator.
@ ICIS_NoInit
No in-class initializer.
Definition: Specifiers.h:272
bool isOpenMPDistributeDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a distribute directive.
@ LCK_ByRef
Capturing by reference.
Definition: Lambda.h:37
BinaryOperatorKind
LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE()
@ Private
'private' clause, allowed on 'parallel', 'serial', 'loop', 'parallel loop', and 'serial loop' constru...
@ Vector
'vector' clause, allowed on 'loop', Combined, and 'routine' directives.
@ Reduction
'reduction' clause, allowed on Parallel, Serial, Loop, and the combined constructs.
@ Present
'present' clause, allowed on Compute and Combined constructs, plus 'data' and 'declare'.
OpenMPScheduleClauseModifier
OpenMP modifiers for 'schedule' clause.
Definition: OpenMPKinds.h:39
@ OMPC_SCHEDULE_MODIFIER_last
Definition: OpenMPKinds.h:44
@ OMPC_SCHEDULE_MODIFIER_unknown
Definition: OpenMPKinds.h:40
@ AS_public
Definition: Specifiers.h:124
@ CR_OpenMP
Definition: CapturedStmt.h:19
bool isOpenMPParallelDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a parallel-kind directive.
OpenMPDistScheduleClauseKind
OpenMP attributes for 'dist_schedule' clause.
Definition: OpenMPKinds.h:104
bool isOpenMPTaskingDirective(OpenMPDirectiveKind Kind)
Checks if the specified directive kind is one of tasking directives - task, taskloop,...
bool isOpenMPTargetExecutionDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a target code offload directive.
@ Result
The result type of a method or function.
bool isOpenMPTeamsDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a teams-kind directive.
OpenMPDependClauseKind
OpenMP attributes for 'depend' clause.
Definition: OpenMPKinds.h:55
@ OMPC_DEPEND_unknown
Definition: OpenMPKinds.h:59
@ Dtor_Complete
Complete object dtor.
Definition: ABI.h:35
@ Union
The "union" keyword.
bool isOpenMPLoopDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a directive with an associated loop construct.
OpenMPSeverityClauseKind
OpenMP attributes for 'severity' clause.
Definition: OpenMPKinds.h:143
@ OMPC_SEVERITY_unknown
Definition: OpenMPKinds.h:146
LangAS
Defines the address space values used by the address space qualifier of QualType.
Definition: AddressSpaces.h:25
bool isOpenMPSimdDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a simd directive.
@ VK_PRValue
A pr-value expression (in the C++11 taxonomy) produces a temporary value.
Definition: Specifiers.h:135
@ VK_LValue
An l-value expression is a reference to an object with independent storage.
Definition: Specifiers.h:139
const FunctionProtoType * T
void getOpenMPCaptureRegions(llvm::SmallVectorImpl< OpenMPDirectiveKind > &CaptureRegions, OpenMPDirectiveKind DKind)
Return the captured regions of an OpenMP directive.
OpenMPNumThreadsClauseModifier
Definition: OpenMPKinds.h:226
@ OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown
Definition: OpenMPKinds.h:132
@ OMPC_DEVICE_unknown
Definition: OpenMPKinds.h:51
OpenMPMapModifierKind
OpenMP modifier kind for 'map' clause.
Definition: OpenMPKinds.h:79
@ OMPC_MAP_MODIFIER_unknown
Definition: OpenMPKinds.h:80
@ Other
Other implicit parameter.
OpenMPScheduleClauseKind
OpenMP attributes for 'schedule' clause.
Definition: OpenMPKinds.h:31
@ OMPC_SCHEDULE_unknown
Definition: OpenMPKinds.h:35
bool isOpenMPTaskLoopDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a taskloop directive.
OpenMPMapClauseKind
OpenMP mapping kind for 'map' clause.
Definition: OpenMPKinds.h:71
@ OMPC_MAP_unknown
Definition: OpenMPKinds.h:75
unsigned long uint64_t
Diagnostic wrappers for TextAPI types for error reporting.
Definition: Dominators.h:30
#define false
Definition: stdbool.h:26
struct with the values to be passed to the dispatch runtime function
llvm::Value * Chunk
Chunk size specified using 'schedule' clause (nullptr if chunk was not specified)
Maps the expression for the lastprivate variable to the global copy used to store new value because o...
Struct with the values to be passed to the static runtime function.
bool IVSigned
Sign of the iteration variable.
Address UB
Address of the output variable in which the upper iteration number is returned.
Address IL
Address of the output variable in which the flag of the last iteration is returned.
llvm::Value * Chunk
Value of the chunk for the static_chunked scheduled loop.
unsigned IVSize
Size of the iteration variable in bits.
Address ST
Address of the output variable in which the stride value is returned necessary to generated the stati...
bool Ordered
true if loop is ordered, false otherwise.
Address LB
Address of the output variable in which the lower iteration number is returned.
A jump destination is an abstract label, branching to which may require a jump out through normal cle...
llvm::IntegerType * Int8Ty
i8, i16, i32, and i64
llvm::CallingConv::ID getRuntimeCC() const
llvm::IntegerType * IntTy
int
SmallVector< const Expr *, 4 > DepExprs
EvalResult is a struct with detailed info about an evaluated expression.
Definition: Expr.h:645
Extra information about a function prototype.
Definition: TypeBase.h:5367
Helper expressions and declaration for OMPIteratorExpr class for each iteration space.
Definition: ExprOpenMP.h:111
Expr * CounterUpdate
Updater for the internal counter: ++CounterVD;.
Definition: ExprOpenMP.h:121
Expr * Update
Update expression for the originally specified iteration variable, calculated as VD = Begin + Counter...
Definition: ExprOpenMP.h:119
VarDecl * CounterVD
Internal normalized counter.
Definition: ExprOpenMP.h:113
Data for list of allocators.
Scheduling data for loop-based OpenMP directives.
Definition: OpenMPKinds.h:180
OpenMPScheduleClauseModifier M2
Definition: OpenMPKinds.h:183
OpenMPScheduleClauseModifier M1
Definition: OpenMPKinds.h:182
OpenMPScheduleClauseKind Schedule
Definition: OpenMPKinds.h:181
Describes how types, statements, expressions, and declarations should be printed.
Definition: PrettyPrinter.h:57