@@ -9374,22 +9374,16 @@ static Comparison compareEnableIfAttrs(const Sema &S, const FunctionDecl *Cand1,
9374
9374
return Comparison::Equal;
9375
9375
}
9376
9376
9377
- static Comparison
9378
- isBetterMultiversionCandidate(const OverloadCandidate &Cand1,
9379
- const OverloadCandidate &Cand2) {
9377
+ static bool isBetterMultiversionCandidate(const OverloadCandidate &Cand1,
9378
+ const OverloadCandidate &Cand2) {
9380
9379
if (!Cand1.Function || !Cand1.Function->isMultiVersion() || !Cand2.Function ||
9381
9380
!Cand2.Function->isMultiVersion())
9382
- return Comparison::Equal ;
9381
+ return false ;
9383
9382
9384
- // If both are invalid, they are equal. If one of them is invalid, the other
9385
- // is better.
9386
- if (Cand1.Function->isInvalidDecl()) {
9387
- if (Cand2.Function->isInvalidDecl())
9388
- return Comparison::Equal;
9389
- return Comparison::Worse;
9390
- }
9391
- if (Cand2.Function->isInvalidDecl())
9392
- return Comparison::Better;
9383
+ // If Cand1 is invalid, it cannot be a better match, if Cand2 is invalid, this
9384
+ // is obviously better.
9385
+ if (Cand1.Function->isInvalidDecl()) return false;
9386
+ if (Cand2.Function->isInvalidDecl()) return true;
9393
9387
9394
9388
// If this is a cpu_dispatch/cpu_specific multiversion situation, prefer
9395
9389
// cpu_dispatch, else arbitrarily based on the identifiers.
@@ -9399,18 +9393,16 @@ isBetterMultiversionCandidate(const OverloadCandidate &Cand1,
9399
9393
const auto *Cand2CPUSpec = Cand2.Function->getAttr<CPUSpecificAttr>();
9400
9394
9401
9395
if (!Cand1CPUDisp && !Cand2CPUDisp && !Cand1CPUSpec && !Cand2CPUSpec)
9402
- return Comparison::Equal ;
9396
+ return false ;
9403
9397
9404
9398
if (Cand1CPUDisp && !Cand2CPUDisp)
9405
- return Comparison::Better ;
9399
+ return true ;
9406
9400
if (Cand2CPUDisp && !Cand1CPUDisp)
9407
- return Comparison::Worse ;
9401
+ return false ;
9408
9402
9409
9403
if (Cand1CPUSpec && Cand2CPUSpec) {
9410
9404
if (Cand1CPUSpec->cpus_size() != Cand2CPUSpec->cpus_size())
9411
- return Cand1CPUSpec->cpus_size() < Cand2CPUSpec->cpus_size()
9412
- ? Comparison::Better
9413
- : Comparison::Worse;
9405
+ return Cand1CPUSpec->cpus_size() < Cand2CPUSpec->cpus_size();
9414
9406
9415
9407
std::pair<CPUSpecificAttr::cpus_iterator, CPUSpecificAttr::cpus_iterator>
9416
9408
FirstDiff = std::mismatch(
@@ -9423,9 +9415,7 @@ isBetterMultiversionCandidate(const OverloadCandidate &Cand1,
9423
9415
assert(FirstDiff.first != Cand1CPUSpec->cpus_end() &&
9424
9416
"Two different cpu-specific versions should not have the same "
9425
9417
"identifier list, otherwise they'd be the same decl!");
9426
- return (*FirstDiff.first)->getName() < (*FirstDiff.second)->getName()
9427
- ? Comparison::Better
9428
- : Comparison::Worse;
9418
+ return (*FirstDiff.first)->getName() < (*FirstDiff.second)->getName();
9429
9419
}
9430
9420
llvm_unreachable("No way to get here unless both had cpu_dispatch");
9431
9421
}
@@ -9485,50 +9475,6 @@ bool clang::isBetterOverloadCandidate(
9485
9475
else if (!Cand1.Viable)
9486
9476
return false;
9487
9477
9488
- // [CUDA] A function with 'never' preference is marked not viable, therefore
9489
- // is never shown up here. The worst preference shown up here is 'wrong side',
9490
- // e.g. a host function called by a device host function in device
9491
- // compilation. This is valid AST as long as the host device function is not
9492
- // emitted, e.g. it is an inline function which is called only by a host
9493
- // function. A deferred diagnostic will be triggered if it is emitted.
9494
- // However a wrong-sided function is still a viable candidate here.
9495
- //
9496
- // If Cand1 can be emitted and Cand2 cannot be emitted in the current
9497
- // context, Cand1 is better than Cand2. If Cand1 can not be emitted and Cand2
9498
- // can be emitted, Cand1 is not better than Cand2. This rule should have
9499
- // precedence over other rules.
9500
- //
9501
- // If both Cand1 and Cand2 can be emitted, or neither can be emitted, then
9502
- // other rules should be used to determine which is better. This is because
9503
- // host/device based overloading resolution is mostly for determining
9504
- // viability of a function. If two functions are both viable, other factors
9505
- // should take precedence in preference, e.g. the standard-defined preferences
9506
- // like argument conversion ranks or enable_if partial-ordering. The
9507
- // preference for pass-object-size parameters is probably most similar to a
9508
- // type-based-overloading decision and so should take priority.
9509
- //
9510
- // If other rules cannot determine which is better, CUDA preference will be
9511
- // used again to determine which is better.
9512
- //
9513
- // TODO: Currently IdentifyCUDAPreference does not return correct values
9514
- // for functions called in global variable initializers due to missing
9515
- // correct context about device/host. Therefore we can only enforce this
9516
- // rule when there is a caller. We should enforce this rule for functions
9517
- // in global variable initializers once proper context is added.
9518
- if (S.getLangOpts().CUDA && Cand1.Function && Cand2.Function) {
9519
- if (FunctionDecl *Caller = dyn_cast<FunctionDecl>(S.CurContext)) {
9520
- auto P1 = S.IdentifyCUDAPreference(Caller, Cand1.Function);
9521
- auto P2 = S.IdentifyCUDAPreference(Caller, Cand2.Function);
9522
- assert(P1 != Sema::CFP_Never && P2 != Sema::CFP_Never);
9523
- auto Cand1Emittable = P1 > Sema::CFP_WrongSide;
9524
- auto Cand2Emittable = P2 > Sema::CFP_WrongSide;
9525
- if (Cand1Emittable && !Cand2Emittable)
9526
- return true;
9527
- if (!Cand1Emittable && Cand2Emittable)
9528
- return false;
9529
- }
9530
- }
9531
-
9532
9478
// C++ [over.match.best]p1:
9533
9479
//
9534
9480
// -- if F is a static member function, ICS1(F) is defined such
@@ -9763,29 +9709,20 @@ bool clang::isBetterOverloadCandidate(
9763
9709
return Cmp == Comparison::Better;
9764
9710
}
9765
9711
9712
+ if (S.getLangOpts().CUDA && Cand1.Function && Cand2.Function) {
9713
+ FunctionDecl *Caller = dyn_cast<FunctionDecl>(S.CurContext);
9714
+ return S.IdentifyCUDAPreference(Caller, Cand1.Function) >
9715
+ S.IdentifyCUDAPreference(Caller, Cand2.Function);
9716
+ }
9717
+
9766
9718
bool HasPS1 = Cand1.Function != nullptr &&
9767
9719
functionHasPassObjectSizeParams(Cand1.Function);
9768
9720
bool HasPS2 = Cand2.Function != nullptr &&
9769
9721
functionHasPassObjectSizeParams(Cand2.Function);
9770
9722
if (HasPS1 != HasPS2 && HasPS1)
9771
9723
return true;
9772
9724
9773
- auto MV = isBetterMultiversionCandidate(Cand1, Cand2);
9774
- if (MV == Comparison::Better)
9775
- return true;
9776
- if (MV == Comparison::Worse)
9777
- return false;
9778
-
9779
- // If other rules cannot determine which is better, CUDA preference is used
9780
- // to determine which is better.
9781
- if (S.getLangOpts().CUDA && Cand1.Function && Cand2.Function) {
9782
- if (FunctionDecl *Caller = dyn_cast<FunctionDecl>(S.CurContext)) {
9783
- return S.IdentifyCUDAPreference(Caller, Cand1.Function) >
9784
- S.IdentifyCUDAPreference(Caller, Cand2.Function);
9785
- }
9786
- }
9787
-
9788
- return false;
9725
+ return isBetterMultiversionCandidate(Cand1, Cand2);
9789
9726
}
9790
9727
9791
9728
/// Determine whether two declarations are "equivalent" for the purposes of
@@ -9871,6 +9808,33 @@ OverloadCandidateSet::BestViableFunction(Sema &S, SourceLocation Loc,
9871
9808
std::transform(begin(), end(), std::back_inserter(Candidates),
9872
9809
[](OverloadCandidate &Cand) { return &Cand; });
9873
9810
9811
+ // [CUDA] HD->H or HD->D calls are technically not allowed by CUDA but
9812
+ // are accepted by both clang and NVCC. However, during a particular
9813
+ // compilation mode only one call variant is viable. We need to
9814
+ // exclude non-viable overload candidates from consideration based
9815
+ // only on their host/device attributes. Specifically, if one
9816
+ // candidate call is WrongSide and the other is SameSide, we ignore
9817
+ // the WrongSide candidate.
9818
+ if (S.getLangOpts().CUDA) {
9819
+ const FunctionDecl *Caller = dyn_cast<FunctionDecl>(S.CurContext);
9820
+ bool ContainsSameSideCandidate =
9821
+ llvm::any_of(Candidates, [&](OverloadCandidate *Cand) {
9822
+ // Check viable function only.
9823
+ return Cand->Viable && Cand->Function &&
9824
+ S.IdentifyCUDAPreference(Caller, Cand->Function) ==
9825
+ Sema::CFP_SameSide;
9826
+ });
9827
+ if (ContainsSameSideCandidate) {
9828
+ auto IsWrongSideCandidate = [&](OverloadCandidate *Cand) {
9829
+ // Check viable function only to avoid unnecessary data copying/moving.
9830
+ return Cand->Viable && Cand->Function &&
9831
+ S.IdentifyCUDAPreference(Caller, Cand->Function) ==
9832
+ Sema::CFP_WrongSide;
9833
+ };
9834
+ llvm::erase_if(Candidates, IsWrongSideCandidate);
9835
+ }
9836
+ }
9837
+
9874
9838
// Find the best viable function.
9875
9839
Best = end();
9876
9840
for (auto *Cand : Candidates) {
0 commit comments