Coverage Report

Created: 2023-09-30 09:22

/Users/buildslave/jenkins/workspace/coverage/llvm-project/clang/lib/Driver/ToolChains/Cuda.cpp
Line
Count
Source (jump to first uncovered line)
1
//===--- Cuda.cpp - Cuda Tool and ToolChain Implementations -----*- C++ -*-===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
9
#include "Cuda.h"
10
#include "CommonArgs.h"
11
#include "clang/Basic/Cuda.h"
12
#include "clang/Config/config.h"
13
#include "clang/Driver/Compilation.h"
14
#include "clang/Driver/Distro.h"
15
#include "clang/Driver/Driver.h"
16
#include "clang/Driver/DriverDiagnostic.h"
17
#include "clang/Driver/InputInfo.h"
18
#include "clang/Driver/Options.h"
19
#include "llvm/ADT/StringExtras.h"
20
#include "llvm/Option/ArgList.h"
21
#include "llvm/Support/FileSystem.h"
22
#include "llvm/Support/FormatAdapters.h"
23
#include "llvm/Support/FormatVariadic.h"
24
#include "llvm/Support/Path.h"
25
#include "llvm/Support/Process.h"
26
#include "llvm/Support/Program.h"
27
#include "llvm/Support/VirtualFileSystem.h"
28
#include "llvm/TargetParser/Host.h"
29
#include "llvm/TargetParser/TargetParser.h"
30
#include <system_error>
31
32
using namespace clang::driver;
33
using namespace clang::driver::toolchains;
34
using namespace clang::driver::tools;
35
using namespace clang;
36
using namespace llvm::opt;
37
38
namespace {
39
40
18
CudaVersion getCudaVersion(uint32_t raw_version) {
41
18
  if (raw_version < 7050)
42
0
    return CudaVersion::CUDA_70;
43
18
  if (raw_version < 8000)
44
0
    return CudaVersion::CUDA_75;
45
18
  if (raw_version < 9000)
46
18
    return CudaVersion::CUDA_80;
47
0
  if (raw_version < 9010)
48
0
    return CudaVersion::CUDA_90;
49
0
  if (raw_version < 9020)
50
0
    return CudaVersion::CUDA_91;
51
0
  if (raw_version < 10000)
52
0
    return CudaVersion::CUDA_92;
53
0
  if (raw_version < 10010)
54
0
    return CudaVersion::CUDA_100;
55
0
  if (raw_version < 10020)
56
0
    return CudaVersion::CUDA_101;
57
0
  if (raw_version < 11000)
58
0
    return CudaVersion::CUDA_102;
59
0
  if (raw_version < 11010)
60
0
    return CudaVersion::CUDA_110;
61
0
  if (raw_version < 11020)
62
0
    return CudaVersion::CUDA_111;
63
0
  if (raw_version < 11030)
64
0
    return CudaVersion::CUDA_112;
65
0
  if (raw_version < 11040)
66
0
    return CudaVersion::CUDA_113;
67
0
  if (raw_version < 11050)
68
0
    return CudaVersion::CUDA_114;
69
0
  if (raw_version < 11060)
70
0
    return CudaVersion::CUDA_115;
71
0
  if (raw_version < 11070)
72
0
    return CudaVersion::CUDA_116;
73
0
  if (raw_version < 11080)
74
0
    return CudaVersion::CUDA_117;
75
0
  if (raw_version < 11090)
76
0
    return CudaVersion::CUDA_118;
77
0
  if (raw_version < 12010)
78
0
    return CudaVersion::CUDA_120;
79
0
  if (raw_version < 12020)
80
0
    return CudaVersion::CUDA_121;
81
0
  return CudaVersion::NEW;
82
0
}
83
84
18
CudaVersion parseCudaHFile(llvm::StringRef Input) {
85
  // Helper lambda which skips the words if the line starts with them or returns
86
  // std::nullopt otherwise.
87
18
  auto StartsWithWords =
88
18
      [](llvm::StringRef Line,
89
72
         const SmallVector<StringRef, 3> words) -> std::optional<StringRef> {
90
108
    for (StringRef word : words) {
91
108
      if (!Line.consume_front(word))
92
54
        return {};
93
54
      Line = Line.ltrim();
94
54
    }
95
18
    return Line;
96
72
  };
97
98
18
  Input = Input.ltrim();
99
72
  while (!Input.empty()) {
100
72
    if (auto Line =
101
72
            StartsWithWords(Input.ltrim(), {"#", "define", "CUDA_VERSION"})) {
102
18
      uint32_t RawVersion;
103
18
      Line->consumeInteger(10, RawVersion);
104
18
      return getCudaVersion(RawVersion);
105
18
    }
106
    // Find next non-empty line.
107
54
    Input = Input.drop_front(Input.find_first_of("\n\r")).ltrim();
108
54
  }
109
0
  return CudaVersion::UNKNOWN;
110
18
}
111
} // namespace
112
113
18
void CudaInstallationDetector::WarnIfUnsupportedVersion() {
114
18
  if (Version > CudaVersion::PARTIALLY_SUPPORTED) {
115
0
    std::string VersionString = CudaVersionToString(Version);
116
0
    if (!VersionString.empty())
117
0
      VersionString.insert(0, " ");
118
0
    D.Diag(diag::warn_drv_new_cuda_version)
119
0
        << VersionString
120
0
        << (CudaVersion::PARTIALLY_SUPPORTED != CudaVersion::FULLY_SUPPORTED)
121
0
        << CudaVersionToString(CudaVersion::PARTIALLY_SUPPORTED);
122
18
  } else if (Version > CudaVersion::FULLY_SUPPORTED)
123
0
    D.Diag(diag::warn_drv_partially_supported_cuda_version)
124
0
        << CudaVersionToString(Version);
125
18
}
126
127
CudaInstallationDetector::CudaInstallationDetector(
128
    const Driver &D, const llvm::Triple &HostTriple,
129
    const llvm::opt::ArgList &Args)
130
31.0k
    : D(D) {
131
31.0k
  struct Candidate {
132
31.0k
    std::string Path;
133
31.0k
    bool StrictChecking;
134
135
31.0k
    Candidate(std::string Path, bool StrictChecking = false)
136
115k
        : Path(Path), StrictChecking(StrictChecking) {}
137
31.0k
  };
138
31.0k
  SmallVector<Candidate, 4> Candidates;
139
140
  // In decreasing order so we prefer newer versions to older versions.
141
31.0k
  std::initializer_list<const char *> Versions = {"8.0", "7.5", "7.0"};
142
31.0k
  auto &FS = D.getVFS();
143
144
31.0k
  if (Args.hasArg(clang::driver::options::OPT_cuda_path_EQ)) {
145
60
    Candidates.emplace_back(
146
60
        Args.getLastArgValue(clang::driver::options::OPT_cuda_path_EQ).str());
147
31.0k
  } else if (HostTriple.isOSWindows()) {
148
8.70k
    for (const char *Ver : Versions)
149
26.1k
      Candidates.emplace_back(
150
26.1k
          D.SysRoot + "/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v" +
151
26.1k
          Ver);
152
22.2k
  } else {
153
22.2k
    if (!Args.hasArg(clang::driver::options::OPT_cuda_path_ignore_env)) {
154
      // Try to find ptxas binary. If the executable is located in a directory
155
      // called 'bin/', its parent directory might be a good guess for a valid
156
      // CUDA installation.
157
      // However, some distributions might installs 'ptxas' to /usr/bin. In that
158
      // case the candidate would be '/usr' which passes the following checks
159
      // because '/usr/include' exists as well. To avoid this case, we always
160
      // check for the directory potentially containing files for libdevice,
161
      // even if the user passes -nocudalib.
162
22.2k
      if (llvm::ErrorOr<std::string> ptxas =
163
22.2k
              llvm::sys::findProgramByName("ptxas")) {
164
0
        SmallString<256> ptxasAbsolutePath;
165
0
        llvm::sys::fs::real_path(*ptxas, ptxasAbsolutePath);
166
167
0
        StringRef ptxasDir = llvm::sys::path::parent_path(ptxasAbsolutePath);
168
0
        if (llvm::sys::path::filename(ptxasDir) == "bin")
169
0
          Candidates.emplace_back(
170
0
              std::string(llvm::sys::path::parent_path(ptxasDir)),
171
0
              /*StrictChecking=*/true);
172
0
      }
173
22.2k
    }
174
175
22.2k
    Candidates.emplace_back(D.SysRoot + "/usr/local/cuda");
176
22.2k
    for (const char *Ver : Versions)
177
66.8k
      Candidates.emplace_back(D.SysRoot + "/usr/local/cuda-" + Ver);
178
179
22.2k
    Distro Dist(FS, llvm::Triple(llvm::sys::getProcessTriple()));
180
22.2k
    if (Dist.IsDebian() || Dist.IsUbuntu())
181
      // Special case for Debian to have nvidia-cuda-toolkit work
182
      // out of the box. More info on http://bugs.debian.org/882505
183
0
      Candidates.emplace_back(D.SysRoot + "/usr/lib/cuda");
184
22.2k
  }
185
186
31.0k
  bool NoCudaLib = Args.hasArg(options::OPT_nogpulib);
187
188
115k
  for (const auto &Candidate : Candidates) {
189
115k
    InstallPath = Candidate.Path;
190
115k
    if (
InstallPath.empty()115k
|| !FS.exists(InstallPath))
191
115k
      continue;
192
193
50
    BinPath = InstallPath + "/bin";
194
50
    IncludePath = InstallPath + "/include";
195
50
    LibDevicePath = InstallPath + "/nvvm/libdevice";
196
197
54
    if (
!(50
FS.exists(IncludePath)50
&& FS.exists(BinPath)))
198
0
      continue;
199
50
    bool CheckLibDevice = (!NoCudaLib || 
Candidate.StrictChecking29
);
200
50
    if (CheckLibDevice && 
!FS.exists(LibDevicePath)25
)
201
0
      continue;
202
203
50
    Version = CudaVersion::UNKNOWN;
204
50
    if (auto CudaHFile = FS.getBufferForFile(InstallPath + "/include/cuda.h"))
205
18
      Version = parseCudaHFile((*CudaHFile)->getBuffer());
206
    // As the last resort, make an educated guess between CUDA-7.0, which had
207
    // old-style libdevice bitcode, and an unknown recent CUDA version.
208
50
    if (Version == CudaVersion::UNKNOWN) {
209
36
      Version = FS.exists(LibDevicePath + "/libdevice.10.bc")
210
36
                    ? 
CudaVersion::NEW0
211
36
                    : CudaVersion::CUDA_70;
212
36
    }
213
214
50
    if (Version >= CudaVersion::CUDA_90) {
215
      // CUDA-9+ uses single libdevice file for all GPU variants.
216
0
      std::string FilePath = LibDevicePath + "/libdevice.10.bc";
217
0
      if (FS.exists(FilePath)) {
218
0
        for (int Arch = (int)CudaArch::SM_30, E = (int)CudaArch::LAST; Arch < E;
219
0
             ++Arch) {
220
0
          CudaArch GpuArch = static_cast<CudaArch>(Arch);
221
0
          if (!IsNVIDIAGpuArch(GpuArch))
222
0
            continue;
223
0
          std::string GpuArchName(CudaArchToString(GpuArch));
224
0
          LibDeviceMap[GpuArchName] = FilePath;
225
0
        }
226
0
      }
227
50
    } else {
228
50
      std::error_code EC;
229
50
      for (llvm::vfs::directory_iterator LI = FS.dir_begin(LibDevicePath, EC),
230
50
                                         LE;
231
198
           
!EC194
&& LI != LE;
LI = LI.increment(EC)144
) {
232
144
        StringRef FilePath = LI->path();
233
144
        StringRef FileName = llvm::sys::path::filename(FilePath);
234
        // Process all bitcode filenames that look like
235
        // libdevice.compute_XX.YY.bc
236
144
        const StringRef LibDeviceName = "libdevice.";
237
144
        if (!(FileName.startswith(LibDeviceName) && FileName.endswith(".bc")))
238
0
          continue;
239
144
        StringRef GpuArch = FileName.slice(
240
144
            LibDeviceName.size(), FileName.find('.', LibDeviceName.size()));
241
144
        LibDeviceMap[GpuArch] = FilePath.str();
242
        // Insert map entries for specific devices with this compute
243
        // capability. NVCC's choice of the libdevice library version is
244
        // rather peculiar and depends on the CUDA version.
245
144
        if (GpuArch == "compute_20") {
246
18
          LibDeviceMap["sm_20"] = std::string(FilePath);
247
18
          LibDeviceMap["sm_21"] = std::string(FilePath);
248
18
          LibDeviceMap["sm_32"] = std::string(FilePath);
249
126
        } else if (GpuArch == "compute_30") {
250
54
          LibDeviceMap["sm_30"] = std::string(FilePath);
251
54
          if (Version < CudaVersion::CUDA_80) {
252
36
            LibDeviceMap["sm_50"] = std::string(FilePath);
253
36
            LibDeviceMap["sm_52"] = std::string(FilePath);
254
36
            LibDeviceMap["sm_53"] = std::string(FilePath);
255
36
          }
256
54
          LibDeviceMap["sm_60"] = std::string(FilePath);
257
54
          LibDeviceMap["sm_61"] = std::string(FilePath);
258
54
          LibDeviceMap["sm_62"] = std::string(FilePath);
259
72
        } else if (GpuArch == "compute_35") {
260
54
          LibDeviceMap["sm_35"] = std::string(FilePath);
261
54
          LibDeviceMap["sm_37"] = std::string(FilePath);
262
54
        } else 
if (18
GpuArch == "compute_50"18
) {
263
18
          if (Version >= CudaVersion::CUDA_80) {
264
18
            LibDeviceMap["sm_50"] = std::string(FilePath);
265
18
            LibDeviceMap["sm_52"] = std::string(FilePath);
266
18
            LibDeviceMap["sm_53"] = std::string(FilePath);
267
18
          }
268
18
        }
269
144
      }
270
50
    }
271
272
    // Check that we have found at least one libdevice that we can link in if
273
    // -nocudalib hasn't been specified.
274
50
    if (LibDeviceMap.empty() && 
!NoCudaLib0
)
275
0
      continue;
276
277
50
    IsValid = true;
278
50
    break;
279
50
  }
280
31.0k
}
281
282
void CudaInstallationDetector::AddCudaIncludeArgs(
283
134
    const ArgList &DriverArgs, ArgStringList &CC1Args) const {
284
134
  if (!DriverArgs.hasArg(options::OPT_nobuiltininc)) {
285
    // Add cuda_wrappers/* to our system include path.  This lets us wrap
286
    // standard library headers.
287
134
    SmallString<128> P(D.ResourceDir);
288
134
    llvm::sys::path::append(P, "include");
289
134
    llvm::sys::path::append(P, "cuda_wrappers");
290
134
    CC1Args.push_back("-internal-isystem");
291
134
    CC1Args.push_back(DriverArgs.MakeArgString(P));
292
134
  }
293
294
134
  if (DriverArgs.hasArg(options::OPT_nogpuinc))
295
56
    return;
296
297
78
  if (!isValid()) {
298
64
    D.Diag(diag::err_drv_no_cuda_installation);
299
64
    return;
300
64
  }
301
302
14
  CC1Args.push_back("-include");
303
14
  CC1Args.push_back("__clang_cuda_runtime_wrapper.h");
304
14
}
305
306
void CudaInstallationDetector::CheckCudaVersionSupportsArch(
307
86
    CudaArch Arch) const {
308
86
  if (Arch == CudaArch::UNKNOWN || Version == CudaVersion::UNKNOWN ||
309
86
      
ArchsWithBadVersion[(int)Arch]22
)
310
64
    return;
311
312
22
  auto MinVersion = MinVersionForCudaArch(Arch);
313
22
  auto MaxVersion = MaxVersionForCudaArch(Arch);
314
22
  if (Version < MinVersion || Version > MaxVersion) {
315
0
    ArchsWithBadVersion[(int)Arch] = true;
316
0
    D.Diag(diag::err_drv_cuda_version_unsupported)
317
0
        << CudaArchToString(Arch) << CudaVersionToString(MinVersion)
318
0
        << CudaVersionToString(MaxVersion) << InstallPath
319
0
        << CudaVersionToString(Version);
320
0
  }
321
22
}
322
323
148
void CudaInstallationDetector::print(raw_ostream &OS) const {
324
148
  if (isValid())
325
0
    OS << "Found CUDA installation: " << InstallPath << ", version "
326
0
       << CudaVersionToString(Version) << "\n";
327
148
}
328
329
namespace {
330
/// Debug info level for the NVPTX devices. We may need to emit different debug
331
/// info level for the host and for the device itselfi. This type controls
332
/// emission of the debug info for the devices. It either prohibits disable info
333
/// emission completely, or emits debug directives only, or emits same debug
334
/// info as for the host.
335
enum DeviceDebugInfoLevel {
336
  DisableDebugInfo,        /// Do not emit debug info for the devices.
337
  DebugDirectivesOnly,     /// Emit only debug directives.
338
  EmitSameDebugInfoAsHost, /// Use the same debug info level just like for the
339
                           /// host.
340
};
341
} // anonymous namespace
342
343
/// Define debug info level for the NVPTX devices. If the debug info for both
344
/// the host and device are disabled (-g0/-ggdb0 or no debug options at all). If
345
/// only debug directives are requested for the both host and device
346
/// (-gline-directvies-only), or the debug info only for the device is disabled
347
/// (optimization is on and --cuda-noopt-device-debug was not specified), the
348
/// debug directves only must be emitted for the device. Otherwise, use the same
349
/// debug info level just like for the host (with the limitations of only
350
/// supported DWARF2 standard).
351
160
static DeviceDebugInfoLevel mustEmitDebugInfo(const ArgList &Args) {
352
160
  const Arg *A = Args.getLastArg(options::OPT_O_Group);
353
160
  bool IsDebugEnabled = !A || 
A->getOption().matches(options::OPT_O0)36
||
354
160
                        Args.hasFlag(options::OPT_cuda_noopt_device_debug,
355
27
                                     options::OPT_no_cuda_noopt_device_debug,
356
27
                                     /*Default=*/false);
357
160
  if (const Arg *A = Args.getLastArg(options::OPT_g_Group)) {
358
48
    const Option &Opt = A->getOption();
359
48
    if (Opt.matches(options::OPT_gN_Group)) {
360
27
      if (Opt.matches(options::OPT_g0) || 
Opt.matches(options::OPT_ggdb0)24
)
361
6
        return DisableDebugInfo;
362
21
      if (Opt.matches(options::OPT_gline_directives_only))
363
3
        return DebugDirectivesOnly;
364
21
    }
365
39
    return IsDebugEnabled ? 
EmitSameDebugInfoAsHost30
:
DebugDirectivesOnly9
;
366
48
  }
367
112
  return willEmitRemarks(Args) ? 
DebugDirectivesOnly4
:
DisableDebugInfo108
;
368
160
}
369
370
void NVPTX::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
371
                                    const InputInfo &Output,
372
                                    const InputInfoList &Inputs,
373
                                    const ArgList &Args,
374
47
                                    const char *LinkingOutput) const {
375
47
  const auto &TC =
376
47
      static_cast<const toolchains::NVPTXToolChain &>(getToolChain());
377
47
  assert(TC.getTriple().isNVPTX() && "Wrong platform");
378
379
47
  StringRef GPUArchName;
380
  // If this is a CUDA action we need to extract the device architecture
381
  // from the Job's associated architecture, otherwise use the -march=arch
382
  // option. This option may come from -Xopenmp-target flag or the default
383
  // value.
384
47
  if (JA.isDeviceOffloading(Action::OFK_Cuda)) {
385
47
    GPUArchName = JA.getOffloadingArch();
386
47
  } else {
387
0
    GPUArchName = Args.getLastArgValue(options::OPT_march_EQ);
388
0
    assert(!GPUArchName.empty() && "Must have an architecture passed in.");
389
0
  }
390
391
  // Obtain architecture from the action.
392
47
  CudaArch gpu_arch = StringToCudaArch(GPUArchName);
393
47
  assert(gpu_arch != CudaArch::UNKNOWN &&
394
47
         "Device action expected to have an architecture.");
395
396
  // Check that our installation's ptxas supports gpu_arch.
397
47
  if (!Args.hasArg(options::OPT_no_cuda_version_check)) {
398
47
    TC.CudaInstallation.CheckCudaVersionSupportsArch(gpu_arch);
399
47
  }
400
401
47
  ArgStringList CmdArgs;
402
47
  CmdArgs.push_back(TC.getTriple().isArch64Bit() ? "-m64" : 
"-m32"0
);
403
47
  DeviceDebugInfoLevel DIKind = mustEmitDebugInfo(Args);
404
47
  if (DIKind == EmitSameDebugInfoAsHost) {
405
    // ptxas does not accept -g option if optimization is enabled, so
406
    // we ignore the compiler's -O* options if we want debug info.
407
10
    CmdArgs.push_back("-g");
408
10
    CmdArgs.push_back("--dont-merge-basicblocks");
409
10
    CmdArgs.push_back("--return-at-end");
410
37
  } else if (Arg *A = Args.getLastArg(options::OPT_O_Group)) {
411
    // Map the -O we received to -O{0,1,2,3}.
412
    //
413
    // TODO: Perhaps we should map host -O2 to ptxas -O3. -O3 is ptxas's
414
    // default, so it may correspond more closely to the spirit of clang -O2.
415
416
    // -O3 seems like the least-bad option when -Osomething is specified to
417
    // clang but it isn't handled below.
418
5
    StringRef OOpt = "3";
419
5
    if (A->getOption().matches(options::OPT_O4) ||
420
5
        A->getOption().matches(options::OPT_Ofast))
421
0
      OOpt = "3";
422
5
    else if (A->getOption().matches(options::OPT_O0))
423
0
      OOpt = "0";
424
5
    else if (A->getOption().matches(options::OPT_O)) {
425
      // -Os, -Oz, and -O(anything else) map to -O2, for lack of better options.
426
5
      OOpt = llvm::StringSwitch<const char *>(A->getValue())
427
5
                 .Case("1", "1")
428
5
                 .Case("2", "2")
429
5
                 .Case("3", "3")
430
5
                 .Case("s", "2")
431
5
                 .Case("z", "2")
432
5
                 .Default("2");
433
5
    }
434
5
    CmdArgs.push_back(Args.MakeArgString(llvm::Twine("-O") + OOpt));
435
32
  } else {
436
    // If no -O was passed, pass -O0 to ptxas -- no opt flag should correspond
437
    // to no optimizations, but ptxas's default is -O3.
438
32
    CmdArgs.push_back("-O0");
439
32
  }
440
47
  if (DIKind == DebugDirectivesOnly)
441
5
    CmdArgs.push_back("-lineinfo");
442
443
  // Pass -v to ptxas if it was passed to the driver.
444
47
  if (Args.hasArg(options::OPT_v))
445
0
    CmdArgs.push_back("-v");
446
447
47
  CmdArgs.push_back("--gpu-name");
448
47
  CmdArgs.push_back(Args.MakeArgString(CudaArchToString(gpu_arch)));
449
47
  CmdArgs.push_back("--output-file");
450
47
  std::string OutputFileName = TC.getInputFilename(Output);
451
452
  // If we are invoking `nvlink` internally we need to output a `.cubin` file.
453
  // FIXME: This should hopefully be removed if NVIDIA updates their tooling.
454
47
  if (!C.getInputArgs().getLastArg(options::OPT_c)) {
455
5
    SmallString<256> Filename(Output.getFilename());
456
5
    llvm::sys::path::replace_extension(Filename, "cubin");
457
5
    OutputFileName = Filename.str();
458
5
  }
459
47
  if (Output.isFilename() && OutputFileName != Output.getFilename())
460
40
    C.addTempFile(Args.MakeArgString(OutputFileName));
461
462
47
  CmdArgs.push_back(Args.MakeArgString(OutputFileName));
463
47
  for (const auto &II : Inputs)
464
47
    CmdArgs.push_back(Args.MakeArgString(II.getFilename()));
465
466
47
  for (const auto &A : Args.getAllArgValues(options::OPT_Xcuda_ptxas))
467
0
    CmdArgs.push_back(Args.MakeArgString(A));
468
469
47
  bool Relocatable;
470
47
  if (JA.isOffloading(Action::OFK_OpenMP))
471
    // In OpenMP we need to generate relocatable code.
472
0
    Relocatable = Args.hasFlag(options::OPT_fopenmp_relocatable_target,
473
0
                               options::OPT_fnoopenmp_relocatable_target,
474
0
                               /*Default=*/true);
475
47
  else if (JA.isOffloading(Action::OFK_Cuda))
476
    // In CUDA we generate relocatable code by default.
477
47
    Relocatable = Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc,
478
47
                               /*Default=*/false);
479
0
  else
480
    // Otherwise, we are compiling directly and should create linkable output.
481
0
    Relocatable = true;
482
483
47
  if (Relocatable)
484
0
    CmdArgs.push_back("-c");
485
486
47
  const char *Exec;
487
47
  if (Arg *A = Args.getLastArg(options::OPT_ptxas_path_EQ))
488
0
    Exec = A->getValue();
489
47
  else
490
47
    Exec = Args.MakeArgString(TC.GetProgramPath("ptxas"));
491
47
  C.addCommand(std::make_unique<Command>(
492
47
      JA, *this,
493
47
      ResponseFileSupport{ResponseFileSupport::RF_Full, llvm::sys::WEM_UTF8,
494
47
                          "--options-file"},
495
47
      Exec, CmdArgs, Inputs, Output));
496
47
}
497
498
40
static bool shouldIncludePTX(const ArgList &Args, const char *gpu_arch) {
499
40
  bool includePTX = true;
500
392
  for (Arg *A : Args) {
501
392
    if (!(A->getOption().matches(options::OPT_cuda_include_ptx_EQ) ||
502
392
          A->getOption().matches(options::OPT_no_cuda_include_ptx_EQ)))
503
392
      continue;
504
0
    A->claim();
505
0
    const StringRef ArchStr = A->getValue();
506
0
    if (ArchStr == "all" || ArchStr == gpu_arch) {
507
0
      includePTX = A->getOption().matches(options::OPT_cuda_include_ptx_EQ);
508
0
      continue;
509
0
    }
510
0
  }
511
40
  return includePTX;
512
40
}
513
514
// All inputs to this linker must be from CudaDeviceActions, as we need to look
515
// at the Inputs' Actions in order to figure out which GPU architecture they
516
// correspond to.
517
void NVPTX::FatBinary::ConstructJob(Compilation &C, const JobAction &JA,
518
                                    const InputInfo &Output,
519
                                    const InputInfoList &Inputs,
520
                                    const ArgList &Args,
521
40
                                    const char *LinkingOutput) const {
522
40
  const auto &TC =
523
40
      static_cast<const toolchains::CudaToolChain &>(getToolChain());
524
40
  assert(TC.getTriple().isNVPTX() && "Wrong platform");
525
526
40
  ArgStringList CmdArgs;
527
40
  if (TC.CudaInstallation.version() <= CudaVersion::CUDA_100)
528
40
    CmdArgs.push_back("--cuda");
529
40
  CmdArgs.push_back(TC.getTriple().isArch64Bit() ? "-64" : 
"-32"0
);
530
40
  CmdArgs.push_back(Args.MakeArgString("--create"));
531
40
  CmdArgs.push_back(Args.MakeArgString(Output.getFilename()));
532
40
  if (mustEmitDebugInfo(Args) == EmitSameDebugInfoAsHost)
533
10
    CmdArgs.push_back("-g");
534
535
80
  for (const auto &II : Inputs) {
536
80
    auto *A = II.getAction();
537
80
    assert(A->getInputs().size() == 1 &&
538
80
           "Device offload action is expected to have a single input");
539
80
    const char *gpu_arch_str = A->getOffloadingArch();
540
80
    assert(gpu_arch_str &&
541
80
           "Device action expected to have associated a GPU architecture!");
542
80
    CudaArch gpu_arch = StringToCudaArch(gpu_arch_str);
543
544
80
    if (II.getType() == types::TY_PP_Asm &&
545
80
        
!shouldIncludePTX(Args, gpu_arch_str)40
)
546
0
      continue;
547
    // We need to pass an Arch of the form "sm_XX" for cubin files and
548
    // "compute_XX" for ptx.
549
80
    const char *Arch = (II.getType() == types::TY_PP_Asm)
550
80
                           ? 
CudaArchToVirtualArchString(gpu_arch)40
551
80
                           : 
gpu_arch_str40
;
552
80
    CmdArgs.push_back(
553
80
        Args.MakeArgString(llvm::Twine("--image=profile=") + Arch +
554
80
                           ",file=" + getToolChain().getInputFilename(II)));
555
80
  }
556
557
40
  for (const auto &A : Args.getAllArgValues(options::OPT_Xcuda_fatbinary))
558
0
    CmdArgs.push_back(Args.MakeArgString(A));
559
560
40
  const char *Exec = Args.MakeArgString(TC.GetProgramPath("fatbinary"));
561
40
  C.addCommand(std::make_unique<Command>(
562
40
      JA, *this,
563
40
      ResponseFileSupport{ResponseFileSupport::RF_Full, llvm::sys::WEM_UTF8,
564
40
                          "--options-file"},
565
40
      Exec, CmdArgs, Inputs, Output));
566
40
}
567
568
void NVPTX::Linker::ConstructJob(Compilation &C, const JobAction &JA,
569
                                 const InputInfo &Output,
570
                                 const InputInfoList &Inputs,
571
                                 const ArgList &Args,
572
0
                                 const char *LinkingOutput) const {
573
0
  const auto &TC =
574
0
      static_cast<const toolchains::NVPTXToolChain &>(getToolChain());
575
0
  ArgStringList CmdArgs;
576
577
0
  assert(TC.getTriple().isNVPTX() && "Wrong platform");
578
579
0
  assert((Output.isFilename() || Output.isNothing()) && "Invalid output.");
580
0
  if (Output.isFilename()) {
581
0
    CmdArgs.push_back("-o");
582
0
    CmdArgs.push_back(Output.getFilename());
583
0
  }
584
585
0
  if (mustEmitDebugInfo(Args) == EmitSameDebugInfoAsHost)
586
0
    CmdArgs.push_back("-g");
587
588
0
  if (Args.hasArg(options::OPT_v))
589
0
    CmdArgs.push_back("-v");
590
591
0
  StringRef GPUArch = Args.getLastArgValue(options::OPT_march_EQ);
592
0
  assert(!GPUArch.empty() && "At least one GPU Arch required for nvlink.");
593
594
0
  CmdArgs.push_back("-arch");
595
0
  CmdArgs.push_back(Args.MakeArgString(GPUArch));
596
597
  // Add paths specified in LIBRARY_PATH environment variable as -L options.
598
0
  addDirectoryList(Args, CmdArgs, "-L", "LIBRARY_PATH");
599
600
  // Add paths for the default clang library path.
601
0
  SmallString<256> DefaultLibPath =
602
0
      llvm::sys::path::parent_path(TC.getDriver().Dir);
603
0
  llvm::sys::path::append(DefaultLibPath, CLANG_INSTALL_LIBDIR_BASENAME);
604
0
  CmdArgs.push_back(Args.MakeArgString(Twine("-L") + DefaultLibPath));
605
606
0
  for (const auto &II : Inputs) {
607
0
    if (II.getType() == types::TY_LLVM_IR || II.getType() == types::TY_LTO_IR ||
608
0
        II.getType() == types::TY_LTO_BC || II.getType() == types::TY_LLVM_BC) {
609
0
      C.getDriver().Diag(diag::err_drv_no_linker_llvm_support)
610
0
          << getToolChain().getTripleString();
611
0
      continue;
612
0
    }
613
614
    // Currently, we only pass the input files to the linker, we do not pass
615
    // any libraries that may be valid only for the host.
616
0
    if (!II.isFilename())
617
0
      continue;
618
619
    // The 'nvlink' application performs RDC-mode linking when given a '.o'
620
    // file and device linking when given a '.cubin' file. We always want to
621
    // perform device linking, so just rename any '.o' files.
622
    // FIXME: This should hopefully be removed if NVIDIA updates their tooling.
623
0
    auto InputFile = getToolChain().getInputFilename(II);
624
0
    if (llvm::sys::path::extension(InputFile) != ".cubin") {
625
      // If there are no actions above this one then this is direct input and we
626
      // can copy it. Otherwise the input is internal so a `.cubin` file should
627
      // exist.
628
0
      if (II.getAction() && II.getAction()->getInputs().size() == 0) {
629
0
        const char *CubinF =
630
0
            Args.MakeArgString(getToolChain().getDriver().GetTemporaryPath(
631
0
                llvm::sys::path::stem(InputFile), "cubin"));
632
0
        if (llvm::sys::fs::copy_file(InputFile, C.addTempFile(CubinF)))
633
0
          continue;
634
635
0
        CmdArgs.push_back(CubinF);
636
0
      } else {
637
0
        SmallString<256> Filename(InputFile);
638
0
        llvm::sys::path::replace_extension(Filename, "cubin");
639
0
        CmdArgs.push_back(Args.MakeArgString(Filename));
640
0
      }
641
0
    } else {
642
0
      CmdArgs.push_back(Args.MakeArgString(InputFile));
643
0
    }
644
0
  }
645
646
0
  C.addCommand(std::make_unique<Command>(
647
0
      JA, *this,
648
0
      ResponseFileSupport{ResponseFileSupport::RF_Full, llvm::sys::WEM_UTF8,
649
0
                          "--options-file"},
650
0
      Args.MakeArgString(getToolChain().GetProgramPath("nvlink")), CmdArgs,
651
0
      Inputs, Output));
652
0
}
653
654
void NVPTX::getNVPTXTargetFeatures(const Driver &D, const llvm::Triple &Triple,
655
                                   const llvm::opt::ArgList &Args,
656
63
                                   std::vector<StringRef> &Features) {
657
63
  if (Args.hasArg(options::OPT_cuda_feature_EQ)) {
658
0
    StringRef PtxFeature =
659
0
        Args.getLastArgValue(options::OPT_cuda_feature_EQ, "+ptx42");
660
0
    Features.push_back(Args.MakeArgString(PtxFeature));
661
0
    return;
662
0
  }
663
63
  CudaInstallationDetector CudaInstallation(D, Triple, Args);
664
665
  // New CUDA versions often introduce new instructions that are only supported
666
  // by new PTX version, so we need to raise PTX level to enable them in NVPTX
667
  // back-end.
668
63
  const char *PtxFeature = nullptr;
669
63
  switch (CudaInstallation.version()) {
670
0
#define CASE_CUDA_VERSION(CUDA_VER, PTX_VER)                                   \
671
0
  case CudaVersion::CUDA_##CUDA_VER:                                           \
672
0
    PtxFeature = "+ptx" #PTX_VER;                                              \
673
0
    break;
674
0
    CASE_CUDA_VERSION(121, 81);
675
0
    CASE_CUDA_VERSION(120, 80);
676
0
    CASE_CUDA_VERSION(118, 78);
677
0
    CASE_CUDA_VERSION(117, 77);
678
0
    CASE_CUDA_VERSION(116, 76);
679
0
    CASE_CUDA_VERSION(115, 75);
680
0
    CASE_CUDA_VERSION(114, 74);
681
0
    CASE_CUDA_VERSION(113, 73);
682
0
    CASE_CUDA_VERSION(112, 72);
683
0
    CASE_CUDA_VERSION(111, 71);
684
0
    CASE_CUDA_VERSION(110, 70);
685
0
    CASE_CUDA_VERSION(102, 65);
686
0
    CASE_CUDA_VERSION(101, 64);
687
0
    CASE_CUDA_VERSION(100, 63);
688
0
    CASE_CUDA_VERSION(92, 61);
689
0
    CASE_CUDA_VERSION(91, 61);
690
0
    CASE_CUDA_VERSION(90, 60);
691
0
#undef CASE_CUDA_VERSION
692
63
  default:
693
63
    PtxFeature = "+ptx42";
694
63
  }
695
63
  Features.push_back(PtxFeature);
696
63
}
697
698
/// NVPTX toolchain. Our assembler is ptxas, and our linker is nvlink. This
699
/// operates as a stand-alone version of the NVPTX tools without the host
700
/// toolchain.
701
NVPTXToolChain::NVPTXToolChain(const Driver &D, const llvm::Triple &Triple,
702
                               const llvm::Triple &HostTriple,
703
                               const ArgList &Args, bool Freestanding = false)
704
101
    : ToolChain(D, Triple, Args), CudaInstallation(D, HostTriple, Args),
705
101
      Freestanding(Freestanding) {
706
101
  if (CudaInstallation.isValid())
707
18
    getProgramPaths().push_back(std::string(CudaInstallation.getBinPath()));
708
  // Lookup binaries into the driver directory, this is used to
709
  // discover the 'nvptx-arch' executable.
710
101
  getProgramPaths().push_back(getDriver().Dir);
711
101
}
712
713
/// We only need the host triple to locate the CUDA binary utilities, use the
714
/// system's default triple if not provided.
715
NVPTXToolChain::NVPTXToolChain(const Driver &D, const llvm::Triple &Triple,
716
                               const ArgList &Args)
717
0
    : NVPTXToolChain(D, Triple, llvm::Triple(LLVM_HOST_TRIPLE), Args,
718
0
                     /*Freestanding=*/true) {}
Unexecuted instantiation: clang::driver::toolchains::NVPTXToolChain::NVPTXToolChain(clang::driver::Driver const&, llvm::Triple const&, llvm::opt::ArgList const&)
Unexecuted instantiation: clang::driver::toolchains::NVPTXToolChain::NVPTXToolChain(clang::driver::Driver const&, llvm::Triple const&, llvm::opt::ArgList const&)
719
720
llvm::opt::DerivedArgList *
721
NVPTXToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
722
                              StringRef BoundArch,
723
0
                              Action::OffloadKind DeviceOffloadKind) const {
724
0
  DerivedArgList *DAL =
725
0
      ToolChain::TranslateArgs(Args, BoundArch, DeviceOffloadKind);
726
0
  if (!DAL)
727
0
    DAL = new DerivedArgList(Args.getBaseArgs());
728
729
0
  const OptTable &Opts = getDriver().getOpts();
730
731
0
  for (Arg *A : Args)
732
0
    if (!llvm::is_contained(*DAL, A))
733
0
      DAL->append(A);
734
735
0
  if (!DAL->hasArg(options::OPT_march_EQ))
736
0
    DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ),
737
0
                      CudaArchToString(CudaArch::CudaDefault));
738
739
0
  return DAL;
740
0
}
741
742
void NVPTXToolChain::addClangTargetOptions(
743
    const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args,
744
0
    Action::OffloadKind DeviceOffloadingKind) const {
745
  // If we are compiling with a standalone NVPTX toolchain we want to try to
746
  // mimic a standard environment as much as possible. So we enable lowering
747
  // ctor / dtor functions to global symbols that can be registered.
748
0
  if (Freestanding)
749
0
    CC1Args.append({"-mllvm", "--nvptx-lower-global-ctor-dtor"});
750
0
}
751
752
20
bool NVPTXToolChain::supportsDebugInfoOption(const llvm::opt::Arg *A) const {
753
20
  const Option &O = A->getOption();
754
20
  return (O.matches(options::OPT_gN_Group) &&
755
20
          
!O.matches(options::OPT_gmodules)13
) ||
756
20
         
O.matches(options::OPT_g_Flag)7
||
757
20
         
O.matches(options::OPT_ggdbN_Group)0
||
O.matches(options::OPT_ggdb)0
||
758
20
         
O.matches(options::OPT_gdwarf)0
||
O.matches(options::OPT_gdwarf_2)0
||
759
20
         
O.matches(options::OPT_gdwarf_3)0
||
O.matches(options::OPT_gdwarf_4)0
||
760
20
         
O.matches(options::OPT_gdwarf_5)0
||
761
20
         
O.matches(options::OPT_gcolumn_info)0
;
762
20
}
763
764
void NVPTXToolChain::adjustDebugInfoKind(
765
    llvm::codegenoptions::DebugInfoKind &DebugInfoKind,
766
73
    const ArgList &Args) const {
767
73
  switch (mustEmitDebugInfo(Args)) {
768
57
  case DisableDebugInfo:
769
57
    DebugInfoKind = llvm::codegenoptions::NoDebugInfo;
770
57
    break;
771
6
  case DebugDirectivesOnly:
772
6
    DebugInfoKind = llvm::codegenoptions::DebugDirectivesOnly;
773
6
    break;
774
10
  case EmitSameDebugInfoAsHost:
775
    // Use same debug info level as the host.
776
10
    break;
777
73
  }
778
73
}
779
780
/// CUDA toolchain.  Our assembler is ptxas, and our "linker" is fatbinary,
781
/// which isn't properly a linker but nonetheless performs the step of stitching
782
/// together object files from the assembler into a single blob.
783
784
CudaToolChain::CudaToolChain(const Driver &D, const llvm::Triple &Triple,
785
                             const ToolChain &HostTC, const ArgList &Args)
786
101
    : NVPTXToolChain(D, Triple, HostTC.getTriple(), Args), HostTC(HostTC) {}
787
788
void CudaToolChain::addClangTargetOptions(
789
    const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args,
790
73
    Action::OffloadKind DeviceOffloadingKind) const {
791
73
  HostTC.addClangTargetOptions(DriverArgs, CC1Args, DeviceOffloadingKind);
792
793
73
  StringRef GpuArch = DriverArgs.getLastArgValue(options::OPT_march_EQ);
794
73
  assert(!GpuArch.empty() && "Must have an explicit GPU arch.");
795
73
  assert((DeviceOffloadingKind == Action::OFK_OpenMP ||
796
73
          DeviceOffloadingKind == Action::OFK_Cuda) &&
797
73
         "Only OpenMP or CUDA offloading kinds are supported for NVIDIA GPUs.");
798
799
73
  if (DeviceOffloadingKind == Action::OFK_Cuda) {
800
73
    CC1Args.append(
801
73
        {"-fcuda-is-device", "-mllvm", "-enable-memcpyopt-without-libcalls"});
802
803
    // Unsized function arguments used for variadics were introduced in CUDA-9.0
804
    // We still do not support generating code that actually uses variadic
805
    // arguments yet, but we do need to allow parsing them as recent CUDA
806
    // headers rely on that. https://github.com/llvm/llvm-project/issues/58410
807
73
    if (CudaInstallation.version() >= CudaVersion::CUDA_90)
808
0
      CC1Args.push_back("-fcuda-allow-variadic-functions");
809
73
  }
810
811
73
  if (DriverArgs.hasArg(options::OPT_nogpulib))
812
30
    return;
813
814
43
  if (DeviceOffloadingKind == Action::OFK_OpenMP &&
815
43
      
DriverArgs.hasArg(options::OPT_S)0
)
816
0
    return;
817
818
43
  std::string LibDeviceFile = CudaInstallation.getLibDeviceFile(GpuArch);
819
43
  if (LibDeviceFile.empty()) {
820
34
    getDriver().Diag(diag::err_drv_no_cuda_libdevice) << GpuArch;
821
34
    return;
822
34
  }
823
824
9
  CC1Args.push_back("-mlink-builtin-bitcode");
825
9
  CC1Args.push_back(DriverArgs.MakeArgString(LibDeviceFile));
826
827
9
  clang::CudaVersion CudaInstallationVersion = CudaInstallation.version();
828
829
9
  if (DriverArgs.hasFlag(options::OPT_fcuda_short_ptr,
830
9
                         options::OPT_fno_cuda_short_ptr, false))
831
0
    CC1Args.append({"-mllvm", "--nvptx-short-ptr"});
832
833
9
  if (CudaInstallationVersion >= CudaVersion::UNKNOWN)
834
9
    CC1Args.push_back(
835
9
        DriverArgs.MakeArgString(Twine("-target-sdk-version=") +
836
9
                                 CudaVersionToString(CudaInstallationVersion)));
837
838
9
  if (DeviceOffloadingKind == Action::OFK_OpenMP) {
839
0
    if (CudaInstallationVersion < CudaVersion::CUDA_92) {
840
0
      getDriver().Diag(
841
0
          diag::err_drv_omp_offload_target_cuda_version_not_support)
842
0
          << CudaVersionToString(CudaInstallationVersion);
843
0
      return;
844
0
    }
845
846
    // Link the bitcode library late if we're using device LTO.
847
0
    if (getDriver().isUsingLTO(/* IsOffload */ true))
848
0
      return;
849
850
0
    addOpenMPDeviceRTL(getDriver(), DriverArgs, CC1Args, GpuArch.str(),
851
0
                       getTriple());
852
0
  }
853
9
}
854
855
llvm::DenormalMode CudaToolChain::getDefaultDenormalModeForType(
856
    const llvm::opt::ArgList &DriverArgs, const JobAction &JA,
857
146
    const llvm::fltSemantics *FPType) const {
858
146
  if (JA.getOffloadingDeviceKind() == Action::OFK_Cuda) {
859
146
    if (FPType && 
FPType == &llvm::APFloat::IEEEsingle()73
&&
860
146
        DriverArgs.hasFlag(options::OPT_fgpu_flush_denormals_to_zero,
861
73
                           options::OPT_fno_gpu_flush_denormals_to_zero, false))
862
3
      return llvm::DenormalMode::getPreserveSign();
863
146
  }
864
865
143
  assert(JA.getOffloadingDeviceKind() != Action::OFK_Host);
866
143
  return llvm::DenormalMode::getIEEE();
867
143
}
868
869
void CudaToolChain::AddCudaIncludeArgs(const ArgList &DriverArgs,
870
71
                                       ArgStringList &CC1Args) const {
871
  // Check our CUDA version if we're going to include the CUDA headers.
872
71
  if (!DriverArgs.hasArg(options::OPT_nogpuinc) &&
873
71
      
!DriverArgs.hasArg(options::OPT_no_cuda_version_check)39
) {
874
39
    StringRef Arch = DriverArgs.getLastArgValue(options::OPT_march_EQ);
875
39
    assert(!Arch.empty() && "Must have an explicit GPU arch.");
876
39
    CudaInstallation.CheckCudaVersionSupportsArch(StringToCudaArch(Arch));
877
39
  }
878
71
  CudaInstallation.AddCudaIncludeArgs(DriverArgs, CC1Args);
879
71
}
880
881
127
std::string CudaToolChain::getInputFilename(const InputInfo &Input) const {
882
  // Only object files are changed, for example assembly files keep their .s
883
  // extensions. If the user requested device-only compilation don't change it.
884
127
  if (Input.getType() != types::TY_Object || 
getDriver().offloadDeviceOnly()87
)
885
47
    return ToolChain::getInputFilename(Input);
886
887
  // Replace extension for object files with cubin because nvlink relies on
888
  // these particular file names.
889
80
  SmallString<256> Filename(ToolChain::getInputFilename(Input));
890
80
  llvm::sys::path::replace_extension(Filename, "cubin");
891
80
  return std::string(Filename.str());
892
127
}
893
894
llvm::opt::DerivedArgList *
895
CudaToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
896
                             StringRef BoundArch,
897
151
                             Action::OffloadKind DeviceOffloadKind) const {
898
151
  DerivedArgList *DAL =
899
151
      HostTC.TranslateArgs(Args, BoundArch, DeviceOffloadKind);
900
151
  if (!DAL)
901
105
    DAL = new DerivedArgList(Args.getBaseArgs());
902
903
151
  const OptTable &Opts = getDriver().getOpts();
904
905
  // For OpenMP device offloading, append derived arguments. Make sure
906
  // flags are not duplicated.
907
  // Also append the compute capability.
908
151
  if (DeviceOffloadKind == Action::OFK_OpenMP) {
909
0
    for (Arg *A : Args)
910
0
      if (!llvm::is_contained(*DAL, A))
911
0
        DAL->append(A);
912
913
0
    if (!DAL->hasArg(options::OPT_march_EQ)) {
914
0
      StringRef Arch = BoundArch;
915
0
      if (Arch.empty()) {
916
0
        auto ArchsOrErr = getSystemGPUArchs(Args);
917
0
        if (!ArchsOrErr) {
918
0
          std::string ErrMsg =
919
0
              llvm::formatv("{0}", llvm::fmt_consume(ArchsOrErr.takeError()));
920
0
          getDriver().Diag(diag::err_drv_undetermined_gpu_arch)
921
0
              << llvm::Triple::getArchTypeName(getArch()) << ErrMsg << "-march";
922
0
          Arch = CudaArchToString(CudaArch::CudaDefault);
923
0
        } else {
924
0
          Arch = Args.MakeArgString(ArchsOrErr->front());
925
0
        }
926
0
      }
927
0
      DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ), Arch);
928
0
    }
929
930
0
    return DAL;
931
0
  }
932
933
1.28k
  
for (Arg *A : Args)151
{
934
1.28k
    DAL->append(A);
935
1.28k
  }
936
937
151
  if (!BoundArch.empty()) {
938
71
    DAL->eraseArg(options::OPT_march_EQ);
939
71
    DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ),
940
71
                      BoundArch);
941
71
  }
942
151
  return DAL;
943
151
}
944
945
Expected<SmallVector<std::string>>
946
0
CudaToolChain::getSystemGPUArchs(const ArgList &Args) const {
947
  // Detect NVIDIA GPUs availible on the system.
948
0
  std::string Program;
949
0
  if (Arg *A = Args.getLastArg(options::OPT_nvptx_arch_tool_EQ))
950
0
    Program = A->getValue();
951
0
  else
952
0
    Program = GetProgramPath("nvptx-arch");
953
954
0
  auto StdoutOrErr = executeToolChainProgram(Program);
955
0
  if (!StdoutOrErr)
956
0
    return StdoutOrErr.takeError();
957
958
0
  SmallVector<std::string, 1> GPUArchs;
959
0
  for (StringRef Arch : llvm::split((*StdoutOrErr)->getBuffer(), "\n"))
960
0
    if (!Arch.empty())
961
0
      GPUArchs.push_back(Arch.str());
962
963
0
  if (GPUArchs.empty())
964
0
    return llvm::createStringError(std::error_code(),
965
0
                                   "No NVIDIA GPU detected in the system");
966
967
0
  return std::move(GPUArchs);
968
0
}
969
970
0
Tool *NVPTXToolChain::buildAssembler() const {
971
0
  return new tools::NVPTX::Assembler(*this);
972
0
}
973
974
0
Tool *NVPTXToolChain::buildLinker() const {
975
0
  return new tools::NVPTX::Linker(*this);
976
0
}
977
978
47
Tool *CudaToolChain::buildAssembler() const {
979
47
  return new tools::NVPTX::Assembler(*this);
980
47
}
981
982
40
Tool *CudaToolChain::buildLinker() const {
983
40
  return new tools::NVPTX::FatBinary(*this);
984
40
}
985
986
73
void CudaToolChain::addClangWarningOptions(ArgStringList &CC1Args) const {
987
73
  HostTC.addClangWarningOptions(CC1Args);
988
73
}
989
990
ToolChain::CXXStdlibType
991
0
CudaToolChain::GetCXXStdlibType(const ArgList &Args) const {
992
0
  return HostTC.GetCXXStdlibType(Args);
993
0
}
994
995
void CudaToolChain::AddClangSystemIncludeArgs(const ArgList &DriverArgs,
996
139
                                              ArgStringList &CC1Args) const {
997
139
  HostTC.AddClangSystemIncludeArgs(DriverArgs, CC1Args);
998
999
139
  if (!DriverArgs.hasArg(options::OPT_nogpuinc) && 
CudaInstallation.isValid()78
)
1000
14
    CC1Args.append(
1001
14
        {"-internal-isystem",
1002
14
         DriverArgs.MakeArgString(CudaInstallation.getIncludePath())});
1003
139
}
1004
1005
void CudaToolChain::AddClangCXXStdlibIncludeArgs(const ArgList &Args,
1006
139
                                                 ArgStringList &CC1Args) const {
1007
139
  HostTC.AddClangCXXStdlibIncludeArgs(Args, CC1Args);
1008
139
}
1009
1010
void CudaToolChain::AddIAMCUIncludeArgs(const ArgList &Args,
1011
0
                                        ArgStringList &CC1Args) const {
1012
0
  HostTC.AddIAMCUIncludeArgs(Args, CC1Args);
1013
0
}
1014
1015
73
SanitizerMask CudaToolChain::getSupportedSanitizers() const {
1016
  // The CudaToolChain only supports sanitizers in the sense that it allows
1017
  // sanitizer arguments on the command line if they are supported by the host
1018
  // toolchain. The CudaToolChain will actually ignore any command line
1019
  // arguments for any of these "supported" sanitizers. That means that no
1020
  // sanitization of device code is actually supported at this time.
1021
  //
1022
  // This behavior is necessary because the host and device toolchains
1023
  // invocations often share the command line, so the device toolchain must
1024
  // tolerate flags meant only for the host toolchain.
1025
73
  return HostTC.getSupportedSanitizers();
1026
73
}
1027
1028
VersionTuple CudaToolChain::computeMSVCVersion(const Driver *D,
1029
73
                                               const ArgList &Args) const {
1030
73
  return HostTC.computeMSVCVersion(D, Args);
1031
73
}