Skip to content

Commit e6d2583

Browse files
committed
[OPENMP50]Track changes of lastprivate conditional in parallel-based
regions with reductions, lastprivates or linears clauses. If the lastprivate conditional variable is updated in inner parallel region with reduction, lastprivate or linear clause, the value must be considred as a candidate for lastprivate conditional. Also, tracking in inner parallel regions is not required.
1 parent c72a6ac commit e6d2583

File tree

6 files changed

+80
-91
lines changed

6 files changed

+80
-91
lines changed

clang/lib/CodeGen/CGOpenMPRuntime.cpp

Lines changed: 6 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -11357,25 +11357,7 @@ CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
1135711357
}
1135811358
}
1135911359
Data.IVLVal = IVLVal;
11360-
// In simd only mode or for simd directives no need to generate threadprivate
11361-
// references for the loop iteration counter, we can use the original one
11362-
// since outlining cannot happen in simd regions.
11363-
if (CGF.getLangOpts().OpenMPSimd ||
11364-
isOpenMPSimdDirective(S.getDirectiveKind())) {
11365-
Data.UseOriginalIV = true;
11366-
return;
11367-
}
11368-
PresumedLoc PLoc =
11369-
CGM.getContext().getSourceManager().getPresumedLoc(S.getBeginLoc());
11370-
assert(PLoc.isValid() && "Source location is expected to be always valid.");
11371-
11372-
llvm::sys::fs::UniqueID ID;
11373-
if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
11374-
CGM.getDiags().Report(diag::err_cannot_open_file)
11375-
<< PLoc.getFilename() << EC.message();
11376-
Data.IVName = CGM.getOpenMPRuntime().getName(
11377-
{"pl_cond", llvm::utostr(ID.getDevice()), llvm::utostr(ID.getFile()),
11378-
llvm::utostr(PLoc.getLine()), llvm::utostr(PLoc.getColumn()), "iv"});
11360+
Data.CGF = &CGF;
1137911361
}
1138011362

1138111363
CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() {
@@ -11384,27 +11366,6 @@ CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() {
1138411366
CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
1138511367
}
1138611368

11387-
void CGOpenMPRuntime::initLastprivateConditionalCounter(
11388-
CodeGenFunction &CGF, const OMPExecutableDirective &S) {
11389-
if (CGM.getLangOpts().OpenMPSimd ||
11390-
!llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
11391-
[](const OMPLastprivateClause *C) {
11392-
return C->getKind() == OMPC_LASTPRIVATE_conditional;
11393-
}))
11394-
return;
11395-
const CGOpenMPRuntime::LastprivateConditionalData &Data =
11396-
LastprivateConditionalStack.back();
11397-
if (Data.UseOriginalIV)
11398-
return;
11399-
// Global loop counter. Required to handle inner parallel-for regions.
11400-
// global_iv = iv;
11401-
Address GlobIVAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
11402-
CGF, Data.IVLVal.getType(), Data.IVName);
11403-
LValue GlobIVLVal = CGF.MakeAddrLValue(GlobIVAddr, Data.IVLVal.getType());
11404-
llvm::Value *IVVal = CGF.EmitLoadOfScalar(Data.IVLVal, S.getBeginLoc());
11405-
CGF.EmitStoreOfScalar(IVVal, GlobIVLVal);
11406-
}
11407-
1140811369
namespace {
1140911370
/// Checks if the lastprivate conditional variable is referenced in LHS.
1141011371
class LastprivateConditionalRefChecker final
@@ -11415,9 +11376,7 @@ class LastprivateConditionalRefChecker final
1141511376
const Decl *FoundD = nullptr;
1141611377
StringRef UniqueDeclName;
1141711378
LValue IVLVal;
11418-
StringRef IVName;
1141911379
SourceLocation Loc;
11420-
bool UseOriginalIV = false;
1142111380

1142211381
public:
1142311382
bool VisitDeclRefExpr(const DeclRefExpr *E) {
@@ -11430,8 +11389,6 @@ class LastprivateConditionalRefChecker final
1143011389
FoundD = E->getDecl()->getCanonicalDecl();
1143111390
UniqueDeclName = It->getSecond();
1143211391
IVLVal = D.IVLVal;
11433-
IVName = D.IVName;
11434-
UseOriginalIV = D.UseOriginalIV;
1143511392
break;
1143611393
}
1143711394
return FoundE == E;
@@ -11448,8 +11405,6 @@ class LastprivateConditionalRefChecker final
1144811405
FoundD = E->getMemberDecl()->getCanonicalDecl();
1144911406
UniqueDeclName = It->getSecond();
1145011407
IVLVal = D.IVLVal;
11451-
IVName = D.IVName;
11452-
UseOriginalIV = D.UseOriginalIV;
1145311408
break;
1145411409
}
1145511410
return FoundE == E;
@@ -11470,17 +11425,17 @@ class LastprivateConditionalRefChecker final
1147011425
CodeGenFunction &CGF,
1147111426
ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)
1147211427
: CGF(CGF), LPM(LPM) {}
11473-
std::tuple<const Expr *, const Decl *, StringRef, LValue, StringRef, bool>
11428+
std::tuple<const Expr *, const Decl *, StringRef, LValue>
1147411429
getFoundData() const {
11475-
return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, IVName,
11476-
UseOriginalIV);
11430+
return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal);
1147711431
}
1147811432
};
1147911433
} // namespace
1148011434

1148111435
void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF,
1148211436
const Expr *LHS) {
11483-
if (CGF.getLangOpts().OpenMP < 50)
11437+
if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty() ||
11438+
LastprivateConditionalStack.back().CGF != &CGF)
1148411439
return;
1148511440
LastprivateConditionalRefChecker Checker(CGF, LastprivateConditionalStack);
1148611441
if (!Checker.Visit(LHS))
@@ -11489,10 +11444,7 @@ void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF,
1148911444
const Decl *FoundD;
1149011445
StringRef UniqueDeclName;
1149111446
LValue IVLVal;
11492-
StringRef IVName;
11493-
bool UseOriginalIV;
11494-
std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, IVName, UseOriginalIV) =
11495-
Checker.getFoundData();
11447+
std::tie(FoundE, FoundD, UniqueDeclName, IVLVal) = Checker.getFoundData();
1149611448

1149711449
// Last updated loop counter for the lastprivate conditional var.
1149811450
// int<xx> last_iv = 0;
@@ -11517,11 +11469,6 @@ void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF,
1151711469

1151811470
// Global loop counter. Required to handle inner parallel-for regions.
1151911471
// global_iv
11520-
if (!UseOriginalIV) {
11521-
Address IVAddr =
11522-
getAddrOfArtificialThreadPrivate(CGF, IVLVal.getType(), IVName);
11523-
IVLVal = CGF.MakeAddrLValue(IVAddr, IVLVal.getType());
11524-
}
1152511472
llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, FoundE->getExprLoc());
1152611473

1152711474
// #pragma omp critical(a)

clang/lib/CodeGen/CGOpenMPRuntime.h

Lines changed: 1 addition & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -233,11 +233,7 @@ class CGOpenMPRuntime {
233233
llvm::SmallDenseMap<CanonicalDeclPtr<const Decl>, SmallString<16>>
234234
DeclToUniqeName;
235235
LValue IVLVal;
236-
SmallString<16> IVName;
237-
/// True if original lvalue for loop counter can be used in codegen (simd
238-
/// region or simd only mode) and no need to create threadprivate
239-
/// references.
240-
bool UseOriginalIV = false;
236+
CodeGenFunction *CGF = nullptr;
241237
};
242238
/// Manages list of lastprivate conditional decls for the specified directive.
243239
class LastprivateConditionalRAII {
@@ -1692,11 +1688,6 @@ class CGOpenMPRuntime {
16921688
/// current context.
16931689
bool isNontemporalDecl(const ValueDecl *VD) const;
16941690

1695-
/// Initializes global counter for lastprivate conditional.
1696-
virtual void
1697-
initLastprivateConditionalCounter(CodeGenFunction &CGF,
1698-
const OMPExecutableDirective &S);
1699-
17001691
/// Checks if the provided \p LVal is lastprivate conditional and emits the
17011692
/// code to update the value of the original variable.
17021693
/// \code

clang/lib/CodeGen/CGStmtOpenMP.cpp

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
#include "clang/AST/ASTContext.h"
1919
#include "clang/AST/Attr.h"
2020
#include "clang/AST/DeclOpenMP.h"
21+
#include "clang/AST/OpenMPClause.h"
2122
#include "clang/AST/Stmt.h"
2223
#include "clang/AST/StmtOpenMP.h"
2324
#include "clang/Basic/PrettyStackTrace.h"
@@ -1332,6 +1333,19 @@ static void emitCommonOMPParallelDirective(
13321333
CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
13331334
CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getBeginLoc(), OutlinedFn,
13341335
CapturedVars, IfCond);
1336+
// Check for outer lastprivate conditional update.
1337+
for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
1338+
for (const Expr *Ref : C->varlists())
1339+
CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref);
1340+
}
1341+
for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
1342+
for (const Expr *Ref : C->varlists())
1343+
CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref);
1344+
}
1345+
for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
1346+
for (const Expr *Ref : C->varlists())
1347+
CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref);
1348+
}
13351349
}
13361350

13371351
static void emitEmptyBoundParameters(CodeGenFunction &,
@@ -1890,7 +1904,6 @@ void CodeGenFunction::EmitOMPSimdFinal(
18901904
static void emitOMPLoopBodyWithStopPoint(CodeGenFunction &CGF,
18911905
const OMPLoopDirective &S,
18921906
CodeGenFunction::JumpDest LoopExit) {
1893-
CGF.CGM.getOpenMPRuntime().initLastprivateConditionalCounter(CGF, S);
18941907
CGF.EmitOMPLoopBody(S, LoopExit);
18951908
CGF.EmitStopPoint(&S);
18961909
}
@@ -2011,8 +2024,6 @@ static void emitOMPSimdRegion(CodeGenFunction &CGF, const OMPLoopDirective &S,
20112024
CGF.EmitOMPInnerLoop(
20122025
S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(),
20132026
[&S](CodeGenFunction &CGF) {
2014-
CGF.CGM.getOpenMPRuntime().initLastprivateConditionalCounter(
2015-
CGF, S);
20162027
CGF.EmitOMPLoopBody(S, CodeGenFunction::JumpDest());
20172028
CGF.EmitStopPoint(&S);
20182029
},
@@ -2667,8 +2678,6 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(
26672678
: S.getCond(),
26682679
StaticChunkedOne ? S.getDistInc() : S.getInc(),
26692680
[&S, LoopExit](CodeGenFunction &CGF) {
2670-
CGF.CGM.getOpenMPRuntime()
2671-
.initLastprivateConditionalCounter(CGF, S);
26722681
CGF.EmitOMPLoopBody(S, LoopExit);
26732682
CGF.EmitStopPoint(&S);
26742683
},
@@ -2851,7 +2860,6 @@ void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
28512860
// break;
28522861
// }
28532862
// .omp.sections.exit:
2854-
CGF.CGM.getOpenMPRuntime().initLastprivateConditionalCounter(CGF, S);
28552863
llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".omp.sections.exit");
28562864
llvm::SwitchInst *SwitchStmt =
28572865
CGF.Builder.CreateSwitch(CGF.EmitLoadOfScalar(IV, S.getBeginLoc()),

clang/test/OpenMP/for_lastprivate_codegen.cpp

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -176,7 +176,6 @@ char cnt;
176176
// CHECK-DAG: [[X:@.+]] = global double 0.0
177177
// CHECK-DAG: [[F:@.+]] = global float 0.0
178178
// CHECK-DAG: [[CNT:@.+]] = global i8 0
179-
// OMP50-DAG: [[IV_REF:@.+]] = {{.*}}common global i32 0
180179
// OMP50-DAG: [[LAST_IV_F:@.+]] = {{.*}}common global i32 0
181180
// OMP50-DAG: [[LAST_F:@.+]] = {{.*}}common global float 0.000000e+00,
182181

@@ -674,16 +673,10 @@ int main() {
674673
// CHECK-NEXT: [[LB:%.+]] = load i32, i32* [[OMP_LB]]
675674
// CHECK-NEXT: store i32 [[LB]], i32* [[OMP_IV:[^,]+]]
676675
// <Skip loop body>
677-
// OMP50: [[LOCAL_IV_REF:%.+]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @{{.+}}, i32 [[GTID]], i8* bitcast (i32* [[IV_REF]] to i8*), i64 4, i8*** @{{.+}})
678-
// OMP50: [[BC:%.+]] = bitcast i8* [[LOCAL_IV_REF]] to i32*
679-
// OMP50: store i32 %{{.+}}, i32* [[BC]],
680676
// CHECK: store float 0.000000e+00, float* [[F_PRIV:%.+]],
681-
// OMP50: [[LOCAL_IV_REF:%.+]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @{{.+}}, i32 [[GTID]], i8* bitcast (i32* [[IV_REF]] to i8*), i64 4, i8*** @{{.+}})
682-
// OMP50: [[BC:%.+]] = bitcast i8* [[LOCAL_IV_REF]] to i32*
683-
// OMP50: [[IV:%.+]] = load i32, i32* [[BC]],
684677
// OMP50: call void @__kmpc_critical(%struct.ident_t* @{{.+}}, i32 [[GTID]], [8 x i32]* [[F_REGION:@.+]])
685678
// OMP50: [[LAST_IV:%.+]] = load i32, i32* [[LAST_IV_F]],
686-
// OMP50: [[CMP:%.+]] = icmp sle i32 [[LAST_IV]], [[IV]]
679+
// OMP50: [[CMP:%.+]] = icmp sle i32 [[LAST_IV]], [[IV:%.+]]
687680
// OMP50: br i1 [[CMP]], label %[[LP_THEN:.+]], label %[[LP_DONE:[^,]+]]
688681

689682
// OMP50: [[LP_THEN]]:
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -DOMP5 -x c++ -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck %s
2+
// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -DOMP5 -x c++ -std=c++11 -triple x86_64-apple-darwin10 -emit-pch -o %t %s
3+
// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -DOMP5 -x c++ -triple x86_64-apple-darwin10 -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
4+
5+
// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 -DOMP5 -x c++ -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
6+
// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -DOMP5 -x c++ -std=c++11 -triple x86_64-apple-darwin10 -emit-pch -o %t %s
7+
// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -DOMP5 -x c++ -triple x86_64-apple-darwin10 -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
8+
// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
9+
// expected-no-diagnostics
10+
11+
#ifndef HEADER
12+
#define HEADER
13+
14+
int main() {
15+
int a = 0;
16+
#pragma omp parallel for lastprivate(conditional: a)
17+
for (int i = 0; i < 10; ++i) {
18+
if (i < 5) {
19+
a = 0;
20+
#pragma omp parallel reduction(+:a) num_threads(10)
21+
a += i;
22+
}
23+
}
24+
return 0;
25+
}
26+
27+
// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.+}}, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* [[OUTLINED:@.+]] to void (i32*, i32*, ...)*), i32* %{{.+}})
28+
29+
// CHECK: define internal void [[OUTLINED]](
30+
// CHECK: call void @__kmpc_push_num_threads(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 10)
31+
// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.+}}, i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @{{.+}} to void (i32*, i32*, ...)*), i32* {{.+}} i32* %{{.+}})
32+
// CHECK: call void @__kmpc_critical(%struct.ident_t* @{{.+}}, i32 %{{.+}}, [8 x i32]* @{{.+}})
33+
// CHECK: [[LAST_IV_VAL:%.+]] = load i32, i32* [[LAST_IV:@.+]],
34+
// CHECK: [[RES:%.+]] = icmp sle i32 [[LAST_IV_VAL]], [[IV:%.+]]
35+
// CHECK: br i1 [[RES]], label %[[THEN:.+]], label %[[DONE:.+]]
36+
// CHECK: [[THEN]]:
37+
// CHECK: store i32 [[IV]], i32* [[LAST_IV]],
38+
// CHECK: [[A_VAL:%.+]] = load i32, i32* [[A_PRIV:%.+]],
39+
// CHECK: store i32 [[A_VAL]], i32* [[A_GLOB:@.+]],
40+
// CHECK: br label %[[DONE]]
41+
// CHECK: [[DONE]]:
42+
// CHECK: call void @__kmpc_end_critical(%struct.ident_t* @{{.+}}, i32 %{{.+}}, [8 x i32]* @{{.+}})
43+
// CHECK: call void @__kmpc_for_static_fini(%struct.ident_t* @{{.+}}, i32 %{{.+}})
44+
// CHECK: [[IS_LAST:%.+]] = load i32, i32* %{{.+}},
45+
// CHECK: [[RES:%.+]] = icmp ne i32 [[IS_LAST]], 0
46+
// CHECK: call void @__kmpc_barrier(%struct.ident_t* @{{.+}}, i32 %{{.+}})
47+
// CHECK: br i1 [[RES]], label %[[THEN:.+]], label %[[DONE:.+]]
48+
// CHECK: [[THEN]]:
49+
// CHECK: [[A_VAL:%.+]] = load i32, i32* [[A_GLOB]],
50+
// CHECK: store i32 [[A_VAL]], i32* [[A_PRIV]],
51+
// CHECK: [[A_VAL:%.+]] = load i32, i32* [[A_PRIV]],
52+
// CHECK: store i32 [[A_VAL]], i32* %{{.+}},
53+
// CHECK: br label %[[DONE]]
54+
// CHECK: [[DONE]]:
55+
// CHECK: ret void
56+
57+
#endif // HEADER

clang/test/OpenMP/sections_lastprivate_codegen.cpp

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,6 @@ volatile int g = 1212;
4646
// CHECK: [[S_INT_TY:%.+]] = type { i32 }
4747
// CHECK-DAG: [[SECTIONS_BARRIER_LOC:@.+]] = private unnamed_addr global %{{.+}} { i32 0, i32 194, i32 0, i32 0, i8*
4848
// CHECK-DAG: [[X:@.+]] = global double 0.0
49-
// OMP50-DAG: [[IV_REF:@.+]] = common global i32 0
5049
// OMP50-DAG: [[LAST_IV_X:@.+]] = {{.*}}common global i32 0
5150
// OMP50-DAG: [[LAST_X:@.+]] = {{.*}}common global double 0.000000e+00,
5251
template <typename T>
@@ -294,15 +293,9 @@ int main() {
294293
// CHECK: [[GTID:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[GTID_REF]]
295294
// CHECK: call void @__kmpc_for_static_init_4(%{{.+}}* @{{.+}}, i32 [[GTID]], i32 34, i32* [[IS_LAST_ADDR:%.+]], i32* %{{.+}}, i32* %{{.+}}, i32* %{{.+}}, i32 1, i32 1)
296295
// <Skip loop body>
297-
// OMP50: [[IV_GLOB_REF:%.+]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @{{.+}}, i32 [[GTID]], i8* bitcast (i32* [[IV_REF]] to i8*), i64 4, i8*** @{{.+}})
298-
// OMP50: [[BC:%.+]] = bitcast i8* [[IV_GLOB_REF]] to i32*
299-
// OMP50: store i32 %{{.+}}, i32* [[BC]],
300-
// OMP50: [[LOCAL_IV_REF:%.+]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @{{.+}}, i32 [[GTID]], i8* bitcast (i32* [[IV_REF]] to i8*), i64 4, i8*** @{{.+}})
301-
// OMP50: [[BC:%.+]] = bitcast i8* [[LOCAL_IV_REF]] to i32*
302-
// OMP50: [[IV:%.+]] = load i32, i32* [[BC]],
303296
// OMP50: call void @__kmpc_critical(%struct.ident_t* @{{.+}}, i32 [[GTID]], [8 x i32]* [[X_REGION:@.+]])
304297
// OMP50: [[LAST_IV:%.+]] = load i32, i32* [[LAST_IV_X]],
305-
// OMP50: [[CMP:%.+]] = icmp sle i32 [[LAST_IV]], [[IV]]
298+
// OMP50: [[CMP:%.+]] = icmp sle i32 [[LAST_IV]], [[IV:%.+]]
306299
// OMP50: br i1 [[CMP]], label %[[LP_THEN:.+]], label %[[LP_DONE:[^,]+]]
307300

308301
// OMP50: [[LP_THEN]]:

0 commit comments

Comments
 (0)