Skip to content

Commit 55bcb96

Browse files
committed
recommit c77a407 with fix
https://reviews.llvm.org/D77954 caused a regression about ambiguity of new operator in file scope. This patch recovered the previous behavior for comparison without a caller. This is a workaround. For real fix we need D71227 https://reviews.llvm.org/D78970
1 parent 44ad58b commit 55bcb96

File tree

2 files changed

+137
-58
lines changed

2 files changed

+137
-58
lines changed

clang/lib/Sema/SemaOverload.cpp

Lines changed: 81 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -9374,16 +9374,22 @@ static Comparison compareEnableIfAttrs(const Sema &S, const FunctionDecl *Cand1,
93749374
return Comparison::Equal;
93759375
}
93769376

9377-
static bool isBetterMultiversionCandidate(const OverloadCandidate &Cand1,
9378-
const OverloadCandidate &Cand2) {
9377+
static Comparison
9378+
isBetterMultiversionCandidate(const OverloadCandidate &Cand1,
9379+
const OverloadCandidate &Cand2) {
93799380
if (!Cand1.Function || !Cand1.Function->isMultiVersion() || !Cand2.Function ||
93809381
!Cand2.Function->isMultiVersion())
9381-
return false;
9382+
return Comparison::Equal;
93829383

9383-
// If Cand1 is invalid, it cannot be a better match, if Cand2 is invalid, this
9384-
// is obviously better.
9385-
if (Cand1.Function->isInvalidDecl()) return false;
9386-
if (Cand2.Function->isInvalidDecl()) return true;
9384+
// If both are invalid, they are equal. If one of them is invalid, the other
9385+
// is better.
9386+
if (Cand1.Function->isInvalidDecl()) {
9387+
if (Cand2.Function->isInvalidDecl())
9388+
return Comparison::Equal;
9389+
return Comparison::Worse;
9390+
}
9391+
if (Cand2.Function->isInvalidDecl())
9392+
return Comparison::Better;
93879393

93889394
// If this is a cpu_dispatch/cpu_specific multiversion situation, prefer
93899395
// cpu_dispatch, else arbitrarily based on the identifiers.
@@ -9393,16 +9399,18 @@ static bool isBetterMultiversionCandidate(const OverloadCandidate &Cand1,
93939399
const auto *Cand2CPUSpec = Cand2.Function->getAttr<CPUSpecificAttr>();
93949400

93959401
if (!Cand1CPUDisp && !Cand2CPUDisp && !Cand1CPUSpec && !Cand2CPUSpec)
9396-
return false;
9402+
return Comparison::Equal;
93979403

93989404
if (Cand1CPUDisp && !Cand2CPUDisp)
9399-
return true;
9405+
return Comparison::Better;
94009406
if (Cand2CPUDisp && !Cand1CPUDisp)
9401-
return false;
9407+
return Comparison::Worse;
94029408

94039409
if (Cand1CPUSpec && Cand2CPUSpec) {
94049410
if (Cand1CPUSpec->cpus_size() != Cand2CPUSpec->cpus_size())
9405-
return Cand1CPUSpec->cpus_size() < Cand2CPUSpec->cpus_size();
9411+
return Cand1CPUSpec->cpus_size() < Cand2CPUSpec->cpus_size()
9412+
? Comparison::Better
9413+
: Comparison::Worse;
94069414

94079415
std::pair<CPUSpecificAttr::cpus_iterator, CPUSpecificAttr::cpus_iterator>
94089416
FirstDiff = std::mismatch(
@@ -9415,7 +9423,9 @@ static bool isBetterMultiversionCandidate(const OverloadCandidate &Cand1,
94159423
assert(FirstDiff.first != Cand1CPUSpec->cpus_end() &&
94169424
"Two different cpu-specific versions should not have the same "
94179425
"identifier list, otherwise they'd be the same decl!");
9418-
return (*FirstDiff.first)->getName() < (*FirstDiff.second)->getName();
9426+
return (*FirstDiff.first)->getName() < (*FirstDiff.second)->getName()
9427+
? Comparison::Better
9428+
: Comparison::Worse;
94199429
}
94209430
llvm_unreachable("No way to get here unless both had cpu_dispatch");
94219431
}
@@ -9475,6 +9485,50 @@ bool clang::isBetterOverloadCandidate(
94759485
else if (!Cand1.Viable)
94769486
return false;
94779487

9488+
// [CUDA] A function with 'never' preference is marked not viable, therefore
9489+
// is never shown up here. The worst preference shown up here is 'wrong side',
9490+
// e.g. a host function called by a device host function in device
9491+
// compilation. This is valid AST as long as the host device function is not
9492+
// emitted, e.g. it is an inline function which is called only by a host
9493+
// function. A deferred diagnostic will be triggered if it is emitted.
9494+
// However a wrong-sided function is still a viable candidate here.
9495+
//
9496+
// If Cand1 can be emitted and Cand2 cannot be emitted in the current
9497+
// context, Cand1 is better than Cand2. If Cand1 can not be emitted and Cand2
9498+
// can be emitted, Cand1 is not better than Cand2. This rule should have
9499+
// precedence over other rules.
9500+
//
9501+
// If both Cand1 and Cand2 can be emitted, or neither can be emitted, then
9502+
// other rules should be used to determine which is better. This is because
9503+
// host/device based overloading resolution is mostly for determining
9504+
// viability of a function. If two functions are both viable, other factors
9505+
// should take precedence in preference, e.g. the standard-defined preferences
9506+
// like argument conversion ranks or enable_if partial-ordering. The
9507+
// preference for pass-object-size parameters is probably most similar to a
9508+
// type-based-overloading decision and so should take priority.
9509+
//
9510+
// If other rules cannot determine which is better, CUDA preference will be
9511+
// used again to determine which is better.
9512+
//
9513+
// TODO: Currently IdentifyCUDAPreference does not return correct values
9514+
// for functions called in global variable initializers due to missing
9515+
// correct context about device/host. Therefore we can only enforce this
9516+
// rule when there is a caller. We should enforce this rule for functions
9517+
// in global variable initializers once proper context is added.
9518+
if (S.getLangOpts().CUDA && Cand1.Function && Cand2.Function) {
9519+
if (FunctionDecl *Caller = dyn_cast<FunctionDecl>(S.CurContext)) {
9520+
auto P1 = S.IdentifyCUDAPreference(Caller, Cand1.Function);
9521+
auto P2 = S.IdentifyCUDAPreference(Caller, Cand2.Function);
9522+
assert(P1 != Sema::CFP_Never && P2 != Sema::CFP_Never);
9523+
auto Cand1Emittable = P1 > Sema::CFP_WrongSide;
9524+
auto Cand2Emittable = P2 > Sema::CFP_WrongSide;
9525+
if (Cand1Emittable && !Cand2Emittable)
9526+
return true;
9527+
if (!Cand1Emittable && Cand2Emittable)
9528+
return false;
9529+
}
9530+
}
9531+
94789532
// C++ [over.match.best]p1:
94799533
//
94809534
// -- if F is a static member function, ICS1(F) is defined such
@@ -9709,20 +9763,28 @@ bool clang::isBetterOverloadCandidate(
97099763
return Cmp == Comparison::Better;
97109764
}
97119765

9712-
if (S.getLangOpts().CUDA && Cand1.Function && Cand2.Function) {
9713-
FunctionDecl *Caller = dyn_cast<FunctionDecl>(S.CurContext);
9714-
return S.IdentifyCUDAPreference(Caller, Cand1.Function) >
9715-
S.IdentifyCUDAPreference(Caller, Cand2.Function);
9716-
}
9717-
97189766
bool HasPS1 = Cand1.Function != nullptr &&
97199767
functionHasPassObjectSizeParams(Cand1.Function);
97209768
bool HasPS2 = Cand2.Function != nullptr &&
97219769
functionHasPassObjectSizeParams(Cand2.Function);
97229770
if (HasPS1 != HasPS2 && HasPS1)
97239771
return true;
97249772

9725-
return isBetterMultiversionCandidate(Cand1, Cand2);
9773+
auto MV = isBetterMultiversionCandidate(Cand1, Cand2);
9774+
if (MV == Comparison::Better)
9775+
return true;
9776+
if (MV == Comparison::Worse)
9777+
return false;
9778+
9779+
// If other rules cannot determine which is better, CUDA preference is used
9780+
// to determine which is better.
9781+
if (S.getLangOpts().CUDA && Cand1.Function && Cand2.Function) {
9782+
FunctionDecl *Caller = dyn_cast<FunctionDecl>(S.CurContext);
9783+
return S.IdentifyCUDAPreference(Caller, Cand1.Function) >
9784+
S.IdentifyCUDAPreference(Caller, Cand2.Function);
9785+
}
9786+
9787+
return false;
97269788
}
97279789

97289790
/// Determine whether two declarations are "equivalent" for the purposes of
@@ -9808,33 +9870,6 @@ OverloadCandidateSet::BestViableFunction(Sema &S, SourceLocation Loc,
98089870
std::transform(begin(), end(), std::back_inserter(Candidates),
98099871
[](OverloadCandidate &Cand) { return &Cand; });
98109872

9811-
// [CUDA] HD->H or HD->D calls are technically not allowed by CUDA but
9812-
// are accepted by both clang and NVCC. However, during a particular
9813-
// compilation mode only one call variant is viable. We need to
9814-
// exclude non-viable overload candidates from consideration based
9815-
// only on their host/device attributes. Specifically, if one
9816-
// candidate call is WrongSide and the other is SameSide, we ignore
9817-
// the WrongSide candidate.
9818-
if (S.getLangOpts().CUDA) {
9819-
const FunctionDecl *Caller = dyn_cast<FunctionDecl>(S.CurContext);
9820-
bool ContainsSameSideCandidate =
9821-
llvm::any_of(Candidates, [&](OverloadCandidate *Cand) {
9822-
// Check viable function only.
9823-
return Cand->Viable && Cand->Function &&
9824-
S.IdentifyCUDAPreference(Caller, Cand->Function) ==
9825-
Sema::CFP_SameSide;
9826-
});
9827-
if (ContainsSameSideCandidate) {
9828-
auto IsWrongSideCandidate = [&](OverloadCandidate *Cand) {
9829-
// Check viable function only to avoid unnecessary data copying/moving.
9830-
return Cand->Viable && Cand->Function &&
9831-
S.IdentifyCUDAPreference(Caller, Cand->Function) ==
9832-
Sema::CFP_WrongSide;
9833-
};
9834-
llvm::erase_if(Candidates, IsWrongSideCandidate);
9835-
}
9836-
}
9837-
98389873
// Find the best viable function.
98399874
Best = end();
98409875
for (auto *Cand : Candidates) {

clang/test/SemaCUDA/function-overload.cu

Lines changed: 56 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
// REQUIRES: x86-registered-target
22
// REQUIRES: nvptx-registered-target
33

4-
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fsyntax-only -verify %s
5-
// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fsyntax-only -fcuda-is-device -verify %s
4+
// RUN: %clang_cc1 -std=c++11 -triple x86_64-unknown-linux-gnu -fsyntax-only -verify %s
5+
// RUN: %clang_cc1 -std=c++11 -triple nvptx64-nvidia-cuda -fsyntax-only -fcuda-is-device -verify %s
66

77
#include "Inputs/cuda.h"
88

@@ -331,9 +331,6 @@ __device__ void test_device_calls_template_fn() {
331331
// If we have a mix of HD and H-only or D-only candidates in the overload set,
332332
// normal C++ overload resolution rules apply first.
333333
template <typename T> TemplateReturnTy template_vs_hd_function(T arg)
334-
#ifdef __CUDA_ARCH__
335-
//expected-note@-2 {{declared here}}
336-
#endif
337334
{
338335
return TemplateReturnTy();
339336
}
@@ -342,11 +339,13 @@ __host__ __device__ HostDeviceReturnTy template_vs_hd_function(float arg) {
342339
}
343340

344341
__host__ __device__ void test_host_device_calls_hd_template() {
345-
HostDeviceReturnTy ret1 = template_vs_hd_function(1.0f);
346-
TemplateReturnTy ret2 = template_vs_hd_function(1);
347342
#ifdef __CUDA_ARCH__
348-
// expected-error@-2 {{reference to __host__ function 'template_vs_hd_function<int>' in __host__ __device__ function}}
343+
typedef HostDeviceReturnTy ExpectedReturnTy;
344+
#else
345+
typedef TemplateReturnTy ExpectedReturnTy;
349346
#endif
347+
HostDeviceReturnTy ret1 = template_vs_hd_function(1.0f);
348+
ExpectedReturnTy ret2 = template_vs_hd_function(1);
350349
}
351350

352351
__host__ void test_host_calls_hd_template() {
@@ -367,14 +366,14 @@ __device__ void test_device_calls_hd_template() {
367366
__device__ DeviceReturnTy device_only_function(int arg) { return DeviceReturnTy(); }
368367
__device__ DeviceReturnTy2 device_only_function(float arg) { return DeviceReturnTy2(); }
369368
#ifndef __CUDA_ARCH__
370-
// expected-note@-3 {{'device_only_function' declared here}}
371-
// expected-note@-3 {{'device_only_function' declared here}}
369+
// expected-note@-3 2{{'device_only_function' declared here}}
370+
// expected-note@-3 2{{'device_only_function' declared here}}
372371
#endif
373372
__host__ HostReturnTy host_only_function(int arg) { return HostReturnTy(); }
374373
__host__ HostReturnTy2 host_only_function(float arg) { return HostReturnTy2(); }
375374
#ifdef __CUDA_ARCH__
376-
// expected-note@-3 {{'host_only_function' declared here}}
377-
// expected-note@-3 {{'host_only_function' declared here}}
375+
// expected-note@-3 2{{'host_only_function' declared here}}
376+
// expected-note@-3 2{{'host_only_function' declared here}}
378377
#endif
379378

380379
__host__ __device__ void test_host_device_single_side_overloading() {
@@ -392,6 +391,37 @@ __host__ __device__ void test_host_device_single_side_overloading() {
392391
#endif
393392
}
394393

394+
// wrong-sided overloading should not cause diagnostic unless it is emitted.
395+
// This inline function is not emitted.
396+
inline __host__ __device__ void test_host_device_wrong_side_overloading_inline_no_diag() {
397+
DeviceReturnTy ret1 = device_only_function(1);
398+
DeviceReturnTy2 ret2 = device_only_function(1.0f);
399+
HostReturnTy ret3 = host_only_function(1);
400+
HostReturnTy2 ret4 = host_only_function(1.0f);
401+
}
402+
403+
// wrong-sided overloading should cause diagnostic if it is emitted.
404+
// This inline function is emitted since it is called by an emitted function.
405+
inline __host__ __device__ void test_host_device_wrong_side_overloading_inline_diag() {
406+
DeviceReturnTy ret1 = device_only_function(1);
407+
DeviceReturnTy2 ret2 = device_only_function(1.0f);
408+
#ifndef __CUDA_ARCH__
409+
// expected-error@-3 {{reference to __device__ function 'device_only_function' in __host__ __device__ function}}
410+
// expected-error@-3 {{reference to __device__ function 'device_only_function' in __host__ __device__ function}}
411+
#endif
412+
HostReturnTy ret3 = host_only_function(1);
413+
HostReturnTy2 ret4 = host_only_function(1.0f);
414+
#ifdef __CUDA_ARCH__
415+
// expected-error@-3 {{reference to __host__ function 'host_only_function' in __host__ __device__ function}}
416+
// expected-error@-3 {{reference to __host__ function 'host_only_function' in __host__ __device__ function}}
417+
#endif
418+
}
419+
420+
__host__ __device__ void test_host_device_wrong_side_overloading_inline_diag_caller() {
421+
test_host_device_wrong_side_overloading_inline_diag();
422+
// expected-note@-1 {{called by 'test_host_device_wrong_side_overloading_inline_diag_caller'}}
423+
}
424+
395425
// Verify that we allow overloading function templates.
396426
template <typename T> __host__ T template_overload(const T &a) { return a; };
397427
template <typename T> __device__ T template_overload(const T &a) { return a; };
@@ -419,3 +449,17 @@ __host__ __device__ int constexpr_overload(const T &x, const T &y) {
419449
int test_constexpr_overload(C2 &x, C2 &y) {
420450
return constexpr_overload(x, y);
421451
}
452+
453+
// Verify no ambiguity for new operator.
454+
void *a = new int;
455+
__device__ void *b = new int;
456+
// expected-error@-1{{dynamic initialization is not supported for __device__, __constant__, and __shared__ variables.}}
457+
458+
// Verify no ambiguity for new operator.
459+
template<typename _Tp> _Tp&& f();
460+
template<typename _Tp, typename = decltype(new _Tp(f<_Tp>()))>
461+
void __test();
462+
463+
void foo() {
464+
__test<int>();
465+
}

0 commit comments

Comments
 (0)