[CUDA][HIP] Fix template argument deduction
nvcc allows using std::malloc and std::free in device code.
When std::malloc or std::free is passed as a template
function argument with template argument deduction,
there is no diagnostics. e.g.
__global__ void kern() {
void *p = std::malloc(1);
std::free(p);
}
int main()
{
std::shared_ptr<float> a;
a = std::shared_ptr<float>(
(float*)std::malloc(sizeof(float) * 100),
std::free
);
return 0;
}
However, the same code fails to compile with clang
(https://godbolt.org/z/1roGvo6YY). The reason is
that clang does not have logic to choose a function
argument from an overloaded set of candidates
based on host/device attributes for template argument
deduction.
Currently, clang does have a logic to choose a candidate
based on the constraints of the candidates. This patch
extends that logic to account for the CUDA host/device-based
preference.
Reviewed by: Artem Belevich
Differential Revision: https://reviews.llvm.org/D154300
This commit is contained in:
@@ -12770,6 +12770,13 @@ Sema::resolveAddressOfSingleOverloadCandidate(Expr *E, DeclAccessPair &Pair) {
|
||||
DeclAccessPair DAP;
|
||||
SmallVector<FunctionDecl *, 2> AmbiguousDecls;
|
||||
|
||||
// Return positive for better, negative for worse, 0 for equal preference.
|
||||
auto CheckCUDAPreference = [&](FunctionDecl *FD1, FunctionDecl *FD2) {
|
||||
FunctionDecl *Caller = getCurFunctionDecl(/*AllowLambda=*/true);
|
||||
return static_cast<int>(IdentifyCUDAPreference(Caller, FD1)) -
|
||||
static_cast<int>(IdentifyCUDAPreference(Caller, FD2));
|
||||
};
|
||||
|
||||
auto CheckMoreConstrained = [&](FunctionDecl *FD1,
|
||||
FunctionDecl *FD2) -> std::optional<bool> {
|
||||
if (FunctionDecl *MF = FD1->getInstantiatedFromMemberFunction())
|
||||
@@ -12800,9 +12807,31 @@ Sema::resolveAddressOfSingleOverloadCandidate(Expr *E, DeclAccessPair &Pair) {
|
||||
if (!checkAddressOfFunctionIsAvailable(FD))
|
||||
continue;
|
||||
|
||||
// If we found a better result, update Result.
|
||||
auto FoundBetter = [&]() {
|
||||
IsResultAmbiguous = false;
|
||||
DAP = I.getPair();
|
||||
Result = FD;
|
||||
};
|
||||
|
||||
// We have more than one result - see if it is more constrained than the
|
||||
// previous one.
|
||||
if (Result) {
|
||||
// Check CUDA preference first. If the candidates have differennt CUDA
|
||||
// preference, choose the one with higher CUDA preference. Otherwise,
|
||||
// choose the one with more constraints.
|
||||
if (getLangOpts().CUDA) {
|
||||
int PreferenceByCUDA = CheckCUDAPreference(FD, Result);
|
||||
// FD has different preference than Result.
|
||||
if (PreferenceByCUDA != 0) {
|
||||
// FD is more preferable than Result.
|
||||
if (PreferenceByCUDA > 0)
|
||||
FoundBetter();
|
||||
continue;
|
||||
}
|
||||
}
|
||||
// FD has the same CUDA prefernece than Result. Continue check
|
||||
// constraints.
|
||||
std::optional<bool> MoreConstrainedThanPrevious =
|
||||
CheckMoreConstrained(FD, Result);
|
||||
if (!MoreConstrainedThanPrevious) {
|
||||
@@ -12814,9 +12843,7 @@ Sema::resolveAddressOfSingleOverloadCandidate(Expr *E, DeclAccessPair &Pair) {
|
||||
continue;
|
||||
// FD is more constrained - replace Result with it.
|
||||
}
|
||||
IsResultAmbiguous = false;
|
||||
DAP = I.getPair();
|
||||
Result = FD;
|
||||
FoundBetter();
|
||||
}
|
||||
|
||||
if (IsResultAmbiguous)
|
||||
@@ -12826,9 +12853,15 @@ Sema::resolveAddressOfSingleOverloadCandidate(Expr *E, DeclAccessPair &Pair) {
|
||||
SmallVector<const Expr *, 1> ResultAC;
|
||||
// We skipped over some ambiguous declarations which might be ambiguous with
|
||||
// the selected result.
|
||||
for (FunctionDecl *Skipped : AmbiguousDecls)
|
||||
for (FunctionDecl *Skipped : AmbiguousDecls) {
|
||||
// If skipped candidate has different CUDA preference than the result,
|
||||
// there is no ambiguity. Otherwise check whether they have different
|
||||
// constraints.
|
||||
if (getLangOpts().CUDA && CheckCUDAPreference(Skipped, Result) != 0)
|
||||
continue;
|
||||
if (!CheckMoreConstrained(Skipped, Result))
|
||||
return nullptr;
|
||||
}
|
||||
Pair = DAP;
|
||||
}
|
||||
return Result;
|
||||
|
||||
27
clang/test/SemaCUDA/template-arg-deduction.cu
Normal file
27
clang/test/SemaCUDA/template-arg-deduction.cu
Normal file
@@ -0,0 +1,27 @@
|
||||
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fsyntax-only -verify %s
|
||||
// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fsyntax-only -fcuda-is-device -verify %s
|
||||
|
||||
// expected-no-diagnostics
|
||||
|
||||
#include "Inputs/cuda.h"
|
||||
|
||||
void foo();
|
||||
__device__ void foo();
|
||||
|
||||
template<class F>
|
||||
void host_temp(F f);
|
||||
|
||||
template<class F>
|
||||
__device__ void device_temp(F f);
|
||||
|
||||
void host_caller() {
|
||||
host_temp(foo);
|
||||
}
|
||||
|
||||
__global__ void kernel_caller() {
|
||||
device_temp(foo);
|
||||
}
|
||||
|
||||
__device__ void device_caller() {
|
||||
device_temp(foo);
|
||||
}
|
||||
Reference in New Issue
Block a user