@@ -9374,16 +9374,22 @@ static Comparison compareEnableIfAttrs(const Sema &S, const FunctionDecl *Cand1,
9374
9374
return Comparison::Equal;
9375
9375
}
9376
9376
9377
- static bool isBetterMultiversionCandidate(const OverloadCandidate &Cand1,
9378
- const OverloadCandidate &Cand2) {
9377
+ static Comparison
9378
+ isBetterMultiversionCandidate(const OverloadCandidate &Cand1,
9379
+ const OverloadCandidate &Cand2) {
9379
9380
if (!Cand1.Function || !Cand1.Function->isMultiVersion() || !Cand2.Function ||
9380
9381
!Cand2.Function->isMultiVersion())
9381
- return false ;
9382
+ return Comparison::Equal ;
9382
9383
9383
- // If Cand1 is invalid, it cannot be a better match, if Cand2 is invalid, this
9384
- // is obviously better.
9385
- if (Cand1.Function->isInvalidDecl()) return false;
9386
- if (Cand2.Function->isInvalidDecl()) return true;
9384
+ // If both are invalid, they are equal. If one of them is invalid, the other
9385
+ // is better.
9386
+ if (Cand1.Function->isInvalidDecl()) {
9387
+ if (Cand2.Function->isInvalidDecl())
9388
+ return Comparison::Equal;
9389
+ return Comparison::Worse;
9390
+ }
9391
+ if (Cand2.Function->isInvalidDecl())
9392
+ return Comparison::Better;
9387
9393
9388
9394
// If this is a cpu_dispatch/cpu_specific multiversion situation, prefer
9389
9395
// cpu_dispatch, else arbitrarily based on the identifiers.
@@ -9393,16 +9399,18 @@ static bool isBetterMultiversionCandidate(const OverloadCandidate &Cand1,
9393
9399
const auto *Cand2CPUSpec = Cand2.Function->getAttr<CPUSpecificAttr>();
9394
9400
9395
9401
if (!Cand1CPUDisp && !Cand2CPUDisp && !Cand1CPUSpec && !Cand2CPUSpec)
9396
- return false ;
9402
+ return Comparison::Equal ;
9397
9403
9398
9404
if (Cand1CPUDisp && !Cand2CPUDisp)
9399
- return true ;
9405
+ return Comparison::Better ;
9400
9406
if (Cand2CPUDisp && !Cand1CPUDisp)
9401
- return false ;
9407
+ return Comparison::Worse ;
9402
9408
9403
9409
if (Cand1CPUSpec && Cand2CPUSpec) {
9404
9410
if (Cand1CPUSpec->cpus_size() != Cand2CPUSpec->cpus_size())
9405
- return Cand1CPUSpec->cpus_size() < Cand2CPUSpec->cpus_size();
9411
+ return Cand1CPUSpec->cpus_size() < Cand2CPUSpec->cpus_size()
9412
+ ? Comparison::Better
9413
+ : Comparison::Worse;
9406
9414
9407
9415
std::pair<CPUSpecificAttr::cpus_iterator, CPUSpecificAttr::cpus_iterator>
9408
9416
FirstDiff = std::mismatch(
@@ -9415,7 +9423,9 @@ static bool isBetterMultiversionCandidate(const OverloadCandidate &Cand1,
9415
9423
assert(FirstDiff.first != Cand1CPUSpec->cpus_end() &&
9416
9424
"Two different cpu-specific versions should not have the same "
9417
9425
"identifier list, otherwise they'd be the same decl!");
9418
- return (*FirstDiff.first)->getName() < (*FirstDiff.second)->getName();
9426
+ return (*FirstDiff.first)->getName() < (*FirstDiff.second)->getName()
9427
+ ? Comparison::Better
9428
+ : Comparison::Worse;
9419
9429
}
9420
9430
llvm_unreachable("No way to get here unless both had cpu_dispatch");
9421
9431
}
@@ -9475,6 +9485,50 @@ bool clang::isBetterOverloadCandidate(
9475
9485
else if (!Cand1.Viable)
9476
9486
return false;
9477
9487
9488
+ // [CUDA] A function with 'never' preference is marked not viable, therefore
9489
+ // is never shown up here. The worst preference shown up here is 'wrong side',
9490
+ // e.g. a host function called by a device host function in device
9491
+ // compilation. This is valid AST as long as the host device function is not
9492
+ // emitted, e.g. it is an inline function which is called only by a host
9493
+ // function. A deferred diagnostic will be triggered if it is emitted.
9494
+ // However a wrong-sided function is still a viable candidate here.
9495
+ //
9496
+ // If Cand1 can be emitted and Cand2 cannot be emitted in the current
9497
+ // context, Cand1 is better than Cand2. If Cand1 can not be emitted and Cand2
9498
+ // can be emitted, Cand1 is not better than Cand2. This rule should have
9499
+ // precedence over other rules.
9500
+ //
9501
+ // If both Cand1 and Cand2 can be emitted, or neither can be emitted, then
9502
+ // other rules should be used to determine which is better. This is because
9503
+ // host/device based overloading resolution is mostly for determining
9504
+ // viability of a function. If two functions are both viable, other factors
9505
+ // should take precedence in preference, e.g. the standard-defined preferences
9506
+ // like argument conversion ranks or enable_if partial-ordering. The
9507
+ // preference for pass-object-size parameters is probably most similar to a
9508
+ // type-based-overloading decision and so should take priority.
9509
+ //
9510
+ // If other rules cannot determine which is better, CUDA preference will be
9511
+ // used again to determine which is better.
9512
+ //
9513
+ // TODO: Currently IdentifyCUDAPreference does not return correct values
9514
+ // for functions called in global variable initializers due to missing
9515
+ // correct context about device/host. Therefore we can only enforce this
9516
+ // rule when there is a caller. We should enforce this rule for functions
9517
+ // in global variable initializers once proper context is added.
9518
+ if (S.getLangOpts().CUDA && Cand1.Function && Cand2.Function) {
9519
+ if (FunctionDecl *Caller = dyn_cast<FunctionDecl>(S.CurContext)) {
9520
+ auto P1 = S.IdentifyCUDAPreference(Caller, Cand1.Function);
9521
+ auto P2 = S.IdentifyCUDAPreference(Caller, Cand2.Function);
9522
+ assert(P1 != Sema::CFP_Never && P2 != Sema::CFP_Never);
9523
+ auto Cand1Emittable = P1 > Sema::CFP_WrongSide;
9524
+ auto Cand2Emittable = P2 > Sema::CFP_WrongSide;
9525
+ if (Cand1Emittable && !Cand2Emittable)
9526
+ return true;
9527
+ if (!Cand1Emittable && Cand2Emittable)
9528
+ return false;
9529
+ }
9530
+ }
9531
+
9478
9532
// C++ [over.match.best]p1:
9479
9533
//
9480
9534
// -- if F is a static member function, ICS1(F) is defined such
@@ -9709,20 +9763,28 @@ bool clang::isBetterOverloadCandidate(
9709
9763
return Cmp == Comparison::Better;
9710
9764
}
9711
9765
9712
- if (S.getLangOpts().CUDA && Cand1.Function && Cand2.Function) {
9713
- FunctionDecl *Caller = dyn_cast<FunctionDecl>(S.CurContext);
9714
- return S.IdentifyCUDAPreference(Caller, Cand1.Function) >
9715
- S.IdentifyCUDAPreference(Caller, Cand2.Function);
9716
- }
9717
-
9718
9766
bool HasPS1 = Cand1.Function != nullptr &&
9719
9767
functionHasPassObjectSizeParams(Cand1.Function);
9720
9768
bool HasPS2 = Cand2.Function != nullptr &&
9721
9769
functionHasPassObjectSizeParams(Cand2.Function);
9722
9770
if (HasPS1 != HasPS2 && HasPS1)
9723
9771
return true;
9724
9772
9725
- return isBetterMultiversionCandidate(Cand1, Cand2);
9773
+ auto MV = isBetterMultiversionCandidate(Cand1, Cand2);
9774
+ if (MV == Comparison::Better)
9775
+ return true;
9776
+ if (MV == Comparison::Worse)
9777
+ return false;
9778
+
9779
+ // If other rules cannot determine which is better, CUDA preference is used
9780
+ // to determine which is better.
9781
+ if (S.getLangOpts().CUDA && Cand1.Function && Cand2.Function) {
9782
+ FunctionDecl *Caller = dyn_cast<FunctionDecl>(S.CurContext);
9783
+ return S.IdentifyCUDAPreference(Caller, Cand1.Function) >
9784
+ S.IdentifyCUDAPreference(Caller, Cand2.Function);
9785
+ }
9786
+
9787
+ return false;
9726
9788
}
9727
9789
9728
9790
/// Determine whether two declarations are "equivalent" for the purposes of
@@ -9808,33 +9870,6 @@ OverloadCandidateSet::BestViableFunction(Sema &S, SourceLocation Loc,
9808
9870
std::transform(begin(), end(), std::back_inserter(Candidates),
9809
9871
[](OverloadCandidate &Cand) { return &Cand; });
9810
9872
9811
- // [CUDA] HD->H or HD->D calls are technically not allowed by CUDA but
9812
- // are accepted by both clang and NVCC. However, during a particular
9813
- // compilation mode only one call variant is viable. We need to
9814
- // exclude non-viable overload candidates from consideration based
9815
- // only on their host/device attributes. Specifically, if one
9816
- // candidate call is WrongSide and the other is SameSide, we ignore
9817
- // the WrongSide candidate.
9818
- if (S.getLangOpts().CUDA) {
9819
- const FunctionDecl *Caller = dyn_cast<FunctionDecl>(S.CurContext);
9820
- bool ContainsSameSideCandidate =
9821
- llvm::any_of(Candidates, [&](OverloadCandidate *Cand) {
9822
- // Check viable function only.
9823
- return Cand->Viable && Cand->Function &&
9824
- S.IdentifyCUDAPreference(Caller, Cand->Function) ==
9825
- Sema::CFP_SameSide;
9826
- });
9827
- if (ContainsSameSideCandidate) {
9828
- auto IsWrongSideCandidate = [&](OverloadCandidate *Cand) {
9829
- // Check viable function only to avoid unnecessary data copying/moving.
9830
- return Cand->Viable && Cand->Function &&
9831
- S.IdentifyCUDAPreference(Caller, Cand->Function) ==
9832
- Sema::CFP_WrongSide;
9833
- };
9834
- llvm::erase_if(Candidates, IsWrongSideCandidate);
9835
- }
9836
- }
9837
-
9838
9873
// Find the best viable function.
9839
9874
Best = end();
9840
9875
for (auto *Cand : Candidates) {
0 commit comments