Skip to content

Commit 82d335e

Browse files
authored
[NVPTX] Add support for -march=native in standalone NVPTX (llvm#79373)
Summary: We support `--target=nvptx64-nvidia-cuda` as a way to target the NVPTX architecture from standard CPU. This patch simply uses the existing support for handling `--offload-arch=native` to also apply to the standalone toolchain.
1 parent c2e5f4d commit 82d335e

File tree

4 files changed

+61
-31
lines changed

4 files changed

+61
-31
lines changed

clang/lib/Driver/ToolChains/Cuda.cpp

Lines changed: 39 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -738,9 +738,22 @@ NVPTXToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
738738
if (!llvm::is_contained(*DAL, A))
739739
DAL->append(A);
740740

741-
if (!DAL->hasArg(options::OPT_march_EQ))
741+
if (!DAL->hasArg(options::OPT_march_EQ)) {
742742
DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ),
743743
CudaArchToString(CudaArch::CudaDefault));
744+
} else if (DAL->getLastArgValue(options::OPT_march_EQ) == "native") {
745+
auto GPUsOrErr = getSystemGPUArchs(Args);
746+
if (!GPUsOrErr) {
747+
getDriver().Diag(diag::err_drv_undetermined_gpu_arch)
748+
<< getArchName() << llvm::toString(GPUsOrErr.takeError()) << "-march";
749+
} else {
750+
if (GPUsOrErr->size() > 1)
751+
getDriver().Diag(diag::warn_drv_multi_gpu_arch)
752+
<< getArchName() << llvm::join(*GPUsOrErr, ", ") << "-march";
753+
DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ),
754+
Args.MakeArgString(GPUsOrErr->front()));
755+
}
756+
}
744757

745758
return DAL;
746759
}
@@ -783,6 +796,31 @@ void NVPTXToolChain::adjustDebugInfoKind(
783796
}
784797
}
785798

799+
Expected<SmallVector<std::string>>
800+
NVPTXToolChain::getSystemGPUArchs(const ArgList &Args) const {
801+
// Detect NVIDIA GPUs availible on the system.
802+
std::string Program;
803+
if (Arg *A = Args.getLastArg(options::OPT_nvptx_arch_tool_EQ))
804+
Program = A->getValue();
805+
else
806+
Program = GetProgramPath("nvptx-arch");
807+
808+
auto StdoutOrErr = executeToolChainProgram(Program);
809+
if (!StdoutOrErr)
810+
return StdoutOrErr.takeError();
811+
812+
SmallVector<std::string, 1> GPUArchs;
813+
for (StringRef Arch : llvm::split((*StdoutOrErr)->getBuffer(), "\n"))
814+
if (!Arch.empty())
815+
GPUArchs.push_back(Arch.str());
816+
817+
if (GPUArchs.empty())
818+
return llvm::createStringError(std::error_code(),
819+
"No NVIDIA GPU detected in the system");
820+
821+
return std::move(GPUArchs);
822+
}
823+
786824
/// CUDA toolchain. Our assembler is ptxas, and our "linker" is fatbinary,
787825
/// which isn't properly a linker but nonetheless performs the step of stitching
788826
/// together object files from the assembler into a single blob.
@@ -948,31 +986,6 @@ CudaToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
948986
return DAL;
949987
}
950988

951-
Expected<SmallVector<std::string>>
952-
CudaToolChain::getSystemGPUArchs(const ArgList &Args) const {
953-
// Detect NVIDIA GPUs availible on the system.
954-
std::string Program;
955-
if (Arg *A = Args.getLastArg(options::OPT_nvptx_arch_tool_EQ))
956-
Program = A->getValue();
957-
else
958-
Program = GetProgramPath("nvptx-arch");
959-
960-
auto StdoutOrErr = executeToolChainProgram(Program);
961-
if (!StdoutOrErr)
962-
return StdoutOrErr.takeError();
963-
964-
SmallVector<std::string, 1> GPUArchs;
965-
for (StringRef Arch : llvm::split((*StdoutOrErr)->getBuffer(), "\n"))
966-
if (!Arch.empty())
967-
GPUArchs.push_back(Arch.str());
968-
969-
if (GPUArchs.empty())
970-
return llvm::createStringError(std::error_code(),
971-
"No NVIDIA GPU detected in the system");
972-
973-
return std::move(GPUArchs);
974-
}
975-
976989
Tool *NVPTXToolChain::buildAssembler() const {
977990
return new tools::NVPTX::Assembler(*this);
978991
}

clang/lib/Driver/ToolChains/Cuda.h

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,11 @@ class LLVM_LIBRARY_VISIBILITY NVPTXToolChain : public ToolChain {
168168
unsigned GetDefaultDwarfVersion() const override { return 2; }
169169
unsigned getMaxDwarfVersion() const override { return 2; }
170170

171+
/// Uses nvptx-arch tool to get arch of the system GPU. Will return error
172+
/// if unable to find one.
173+
virtual Expected<SmallVector<std::string>>
174+
getSystemGPUArchs(const llvm::opt::ArgList &Args) const override;
175+
171176
CudaInstallationDetector CudaInstallation;
172177

173178
protected:
@@ -223,11 +228,6 @@ class LLVM_LIBRARY_VISIBILITY CudaToolChain : public NVPTXToolChain {
223228

224229
const ToolChain &HostTC;
225230

226-
/// Uses nvptx-arch tool to get arch of the system GPU. Will return error
227-
/// if unable to find one.
228-
virtual Expected<SmallVector<std::string>>
229-
getSystemGPUArchs(const llvm::opt::ArgList &Args) const override;
230-
231231
protected:
232232
Tool *buildAssembler() const override; // ptxas
233233
Tool *buildLinker() const override; // fatbinary (ok, not really a linker)
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
#!/bin/sh
2+
echo sm_89
3+
echo sm_80
4+
exit 0

clang/test/Driver/nvptx-cuda-system-arch.c

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,11 @@
66
// RUN: mkdir -p %t
77
// RUN: cp %S/Inputs/nvptx-arch/nvptx_arch_fail %t/
88
// RUN: cp %S/Inputs/nvptx-arch/nvptx_arch_sm_70 %t/
9+
// RUN: cp %S/Inputs/nvptx-arch/nvptx_arch_sm_89_sm_80 %t/
910
// RUN: echo '#!/bin/sh' > %t/nvptx_arch_empty
1011
// RUN: chmod +x %t/nvptx_arch_fail
1112
// RUN: chmod +x %t/nvptx_arch_sm_70
13+
// RUN: chmod +x %t/nvptx_arch_sm_89_sm_80
1214
// RUN: chmod +x %t/nvptx_arch_empty
1315

1416
// case when nvptx-arch returns nothing or fails
@@ -31,3 +33,14 @@
3133
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpulib --offload-arch=native --offload-new-driver --nvptx-arch-tool=%t/nvptx_arch_sm_70 -x cuda --cuda-path=%S/Inputs/CUDA_102/usr/local/cuda %s 2>&1 \
3234
// RUN: | FileCheck %s --check-prefix=ARCH-sm_70
3335
// ARCH-sm_70: "-cc1" "-triple" "nvptx64-nvidia-cuda"{{.*}}"-target-cpu" "sm_70"
36+
37+
// case when nvptx-arch is used via '-march=native'
38+
// RUN: %clang -### --target=nvptx64-nvidia-cuda -nogpulib -march=native --nvptx-arch-tool=%t/nvptx_arch_sm_70 \
39+
// RUN: --cuda-path=%S/Inputs/CUDA_102/usr/local/cuda %s 2>&1 | FileCheck %s --check-prefix=MARCH-sm_70
40+
// MARCH-sm_70: "-cc1" "-triple" "nvptx64-nvidia-cuda"{{.*}}"-target-cpu" "sm_70"
41+
42+
// case when nvptx-arch is used via '-march=native'
43+
// RUN: %clang -### --target=nvptx64-nvidia-cuda -nogpulib -march=native --nvptx-arch-tool=%t/nvptx_arch_sm_89_sm_80 \
44+
// RUN: --cuda-path=%S/Inputs/CUDA_102/usr/local/cuda %s 2>&1 | FileCheck %s --check-prefix=MARCH-sm_89
45+
// MARCH-sm_89: warning: multiple nvptx64 architectures are detected: sm_89, sm_80; only the first one is used for '-march' [-Wmulti-gpu]
46+
// MARCH-sm_89: "-cc1" "-triple" "nvptx64-nvidia-cuda"{{.*}}"-target-cpu" "sm_89"

0 commit comments

Comments
 (0)