Coverage Report

Created: 2020-09-22 08:39

/Users/buildslave/jenkins/workspace/coverage/llvm-project/clang/lib/Driver/ToolChains/Cuda.cpp
Line
Count
Source (jump to first uncovered line)
1
//===--- Cuda.cpp - Cuda Tool and ToolChain Implementations -----*- C++ -*-===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
9
#include "Cuda.h"
10
#include "CommonArgs.h"
11
#include "InputInfo.h"
12
#include "clang/Basic/Cuda.h"
13
#include "clang/Config/config.h"
14
#include "clang/Driver/Compilation.h"
15
#include "clang/Driver/Distro.h"
16
#include "clang/Driver/Driver.h"
17
#include "clang/Driver/DriverDiagnostic.h"
18
#include "clang/Driver/Options.h"
19
#include "llvm/Option/ArgList.h"
20
#include "llvm/Support/FileSystem.h"
21
#include "llvm/Support/Host.h"
22
#include "llvm/Support/Path.h"
23
#include "llvm/Support/Process.h"
24
#include "llvm/Support/Program.h"
25
#include "llvm/Support/TargetParser.h"
26
#include "llvm/Support/VirtualFileSystem.h"
27
#include <system_error>
28
29
using namespace clang::driver;
30
using namespace clang::driver::toolchains;
31
using namespace clang::driver::tools;
32
using namespace clang;
33
using namespace llvm::opt;
34
35
// Parses the contents of version.txt in an CUDA installation.  It should
36
// contain one line of the from e.g. "CUDA Version 7.5.2".
37
39
void CudaInstallationDetector::ParseCudaVersionFile(llvm::StringRef V) {
38
39
  Version = CudaVersion::UNKNOWN;
39
39
  if (!V.startswith("CUDA Version "))
40
0
    return;
41
39
  V = V.substr(strlen("CUDA Version "));
42
39
  SmallVector<StringRef,4> VersionParts;
43
39
  V.split(VersionParts, '.');
44
39
  if (VersionParts.size() < 2)
45
0
    return;
46
39
  DetectedVersion = join_items(".", VersionParts[0], VersionParts[1]);
47
39
  Version = CudaStringToVersion(DetectedVersion);
48
39
  if (Version != CudaVersion::UNKNOWN) {
49
    // TODO(tra): remove the warning once we have all features of 10.2 and 11.0
50
    // implemented.
51
36
    DetectedVersionIsNotSupported = Version > CudaVersion::LATEST_SUPPORTED;
52
36
    return;
53
36
  }
54
55
3
  Version = CudaVersion::LATEST_SUPPORTED;
56
3
  DetectedVersionIsNotSupported = true;
57
3
}
58
59
42
void CudaInstallationDetector::WarnIfUnsupportedVersion() {
60
42
  if (DetectedVersionIsNotSupported)
61
1
    D.Diag(diag::warn_drv_unknown_cuda_version)
62
1
        << DetectedVersion
63
1
        << CudaVersionToString(CudaVersion::LATEST_SUPPORTED);
64
42
}
65
66
CudaInstallationDetector::CudaInstallationDetector(
67
    const Driver &D, const llvm::Triple &HostTriple,
68
    const llvm::opt::ArgList &Args)
69
46.6k
    : D(D) {
70
46.6k
  struct Candidate {
71
46.6k
    std::string Path;
72
46.6k
    bool StrictChecking;
73
74
46.6k
    Candidate(std::string Path, bool StrictChecking = false)
75
178k
        : Path(Path), StrictChecking(StrictChecking) {}
76
46.6k
  };
77
46.6k
  SmallVector<Candidate, 4> Candidates;
78
79
  // In decreasing order so we prefer newer versions to older versions.
80
46.6k
  std::initializer_list<const char *> Versions = {"8.0", "7.5", "7.0"};
81
46.6k
  auto &FS = D.getVFS();
82
83
46.6k
  if (Args.hasArg(clang::driver::options::OPT_cuda_path_EQ)) {
84
95
    Candidates.emplace_back(
85
95
        Args.getLastArgValue(clang::driver::options::OPT_cuda_path_EQ).str());
86
46.5k
  } else if (HostTriple.isOSWindows()) {
87
7.80k
    for (const char *Ver : Versions)
88
23.4k
      Candidates.emplace_back(
89
23.4k
          D.SysRoot + "/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v" +
90
23.4k
          Ver);
91
38.7k
  } else {
92
38.7k
    if (!Args.hasArg(clang::driver::options::OPT_cuda_path_ignore_env)) {
93
      // Try to find ptxas binary. If the executable is located in a directory
94
      // called 'bin/', its parent directory might be a good guess for a valid
95
      // CUDA installation.
96
      // However, some distributions might installs 'ptxas' to /usr/bin. In that
97
      // case the candidate would be '/usr' which passes the following checks
98
      // because '/usr/include' exists as well. To avoid this case, we always
99
      // check for the directory potentially containing files for libdevice,
100
      // even if the user passes -nocudalib.
101
38.7k
      if (llvm::ErrorOr<std::string> ptxas =
102
16
              llvm::sys::findProgramByName("ptxas")) {
103
16
        SmallString<256> ptxasAbsolutePath;
104
16
        llvm::sys::fs::real_path(*ptxas, ptxasAbsolutePath);
105
106
16
        StringRef ptxasDir = llvm::sys::path::parent_path(ptxasAbsolutePath);
107
16
        if (llvm::sys::path::filename(ptxasDir) == "bin")
108
16
          Candidates.emplace_back(
109
16
              std::string(llvm::sys::path::parent_path(ptxasDir)),
110
16
              /*StrictChecking=*/true);
111
16
      }
112
38.7k
    }
113
114
38.7k
    Candidates.emplace_back(D.SysRoot + "/usr/local/cuda");
115
38.7k
    for (const char *Ver : Versions)
116
116k
      Candidates.emplace_back(D.SysRoot + "/usr/local/cuda-" + Ver);
117
118
38.7k
    Distro Dist(FS, llvm::Triple(llvm::sys::getProcessTriple()));
119
38.7k
    if (
Dist.IsDebian()38.7k
|| Dist.IsUbuntu())
120
      // Special case for Debian to have nvidia-cuda-toolkit work
121
      // out of the box. More info on http://bugs.debian.org/882505
122
0
      Candidates.emplace_back(D.SysRoot + "/usr/lib/cuda");
123
38.7k
  }
124
125
46.6k
  bool NoCudaLib = Args.hasArg(options::OPT_nogpulib);
126
127
178k
  for (const auto &Candidate : Candidates) {
128
178k
    InstallPath = Candidate.Path;
129
178k
    if (InstallPath.empty() || 
!FS.exists(InstallPath)178k
)
130
178k
      continue;
131
132
114
    BinPath = InstallPath + "/bin";
133
114
    IncludePath = InstallPath + "/include";
134
114
    LibDevicePath = InstallPath + "/nvvm/libdevice";
135
136
114
    if (!(FS.exists(IncludePath) && FS.exists(BinPath)))
137
0
      continue;
138
114
    bool CheckLibDevice = (!NoCudaLib || 
Candidate.StrictChecking16
);
139
114
    if (CheckLibDevice && 
!FS.exists(LibDevicePath)102
)
140
12
      continue;
141
142
    // On Linux, we have both lib and lib64 directories, and we need to choose
143
    // based on our triple.  On MacOS, we have only a lib directory.
144
    //
145
    // It's sufficient for our purposes to be flexible: If both lib and lib64
146
    // exist, we choose whichever one matches our triple.  Otherwise, if only
147
    // lib exists, we use it.
148
102
    if (HostTriple.isArch64Bit() && 
FS.exists(InstallPath + "/lib64")57
)
149
55
      LibPath = InstallPath + "/lib64";
150
47
    else if (FS.exists(InstallPath + "/lib"))
151
47
      LibPath = InstallPath + "/lib";
152
0
    else
153
0
      continue;
154
155
102
    llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> VersionFile =
156
102
        FS.getBufferForFile(InstallPath + "/version.txt");
157
102
    if (!VersionFile) {
158
      // CUDA 7.0 doesn't have a version.txt, so guess that's our version if
159
      // version.txt isn't present.
160
63
      Version = CudaVersion::CUDA_70;
161
39
    } else {
162
39
      ParseCudaVersionFile((*VersionFile)->getBuffer());
163
39
    }
164
165
102
    if (Version >= CudaVersion::CUDA_90) {
166
      // CUDA-9+ uses single libdevice file for all GPU variants.
167
11
      std::string FilePath = LibDevicePath + "/libdevice.10.bc";
168
11
      if (FS.exists(FilePath)) {
169
407
        for (int Arch = (int)CudaArch::SM_30, E = (int)CudaArch::LAST; Arch < E;
170
396
             ++Arch) {
171
396
          CudaArch GpuArch = static_cast<CudaArch>(Arch);
172
396
          if (!IsNVIDIAGpuArch(GpuArch))
173
242
            continue;
174
154
          std::string GpuArchName(CudaArchToString(GpuArch));
175
154
          LibDeviceMap[GpuArchName] = FilePath;
176
154
        }
177
11
      }
178
91
    } else {
179
91
      std::error_code EC;
180
91
      for (llvm::vfs::directory_iterator LI = FS.dir_begin(LibDevicePath, EC),
181
91
                                         LE;
182
321
           !EC && 
LI != LE317
;
LI = LI.increment(EC)230
) {
183
230
        StringRef FilePath = LI->path();
184
230
        StringRef FileName = llvm::sys::path::filename(FilePath);
185
        // Process all bitcode filenames that look like
186
        // libdevice.compute_XX.YY.bc
187
230
        const StringRef LibDeviceName = "libdevice.";
188
230
        if (!(FileName.startswith(LibDeviceName) && FileName.endswith(".bc")))
189
0
          continue;
190
230
        StringRef GpuArch = FileName.slice(
191
230
            LibDeviceName.size(), FileName.find('.', LibDeviceName.size()));
192
230
        LibDeviceMap[GpuArch] = FilePath.str();
193
        // Insert map entries for specific devices with this compute
194
        // capability. NVCC's choice of the libdevice library version is
195
        // rather peculiar and depends on the CUDA version.
196
230
        if (GpuArch == "compute_20") {
197
28
          LibDeviceMap["sm_20"] = std::string(FilePath);
198
28
          LibDeviceMap["sm_21"] = std::string(FilePath);
199
28
          LibDeviceMap["sm_32"] = std::string(FilePath);
200
202
        } else if (GpuArch == "compute_30") {
201
87
          LibDeviceMap["sm_30"] = std::string(FilePath);
202
87
          if (Version < CudaVersion::CUDA_80) {
203
59
            LibDeviceMap["sm_50"] = std::string(FilePath);
204
59
            LibDeviceMap["sm_52"] = std::string(FilePath);
205
59
            LibDeviceMap["sm_53"] = std::string(FilePath);
206
59
          }
207
87
          LibDeviceMap["sm_60"] = std::string(FilePath);
208
87
          LibDeviceMap["sm_61"] = std::string(FilePath);
209
87
          LibDeviceMap["sm_62"] = std::string(FilePath);
210
115
        } else if (GpuArch == "compute_35") {
211
87
          LibDeviceMap["sm_35"] = std::string(FilePath);
212
87
          LibDeviceMap["sm_37"] = std::string(FilePath);
213
28
        } else if (GpuArch == "compute_50") {
214
28
          if (Version >= CudaVersion::CUDA_80) {
215
28
            LibDeviceMap["sm_50"] = std::string(FilePath);
216
28
            LibDeviceMap["sm_52"] = std::string(FilePath);
217
28
            LibDeviceMap["sm_53"] = std::string(FilePath);
218
28
          }
219
28
        }
220
230
      }
221
91
    }
222
223
    // Check that we have found at least one libdevice that we can link in if
224
    // -nocudalib hasn't been specified.
225
102
    if (LibDeviceMap.empty() && 
!NoCudaLib4
)
226
0
      continue;
227
228
102
    IsValid = true;
229
102
    break;
230
102
  }
231
46.6k
}
232
233
void CudaInstallationDetector::AddCudaIncludeArgs(
234
471
    const ArgList &DriverArgs, ArgStringList &CC1Args) const {
235
471
  if (!DriverArgs.hasArg(options::OPT_nobuiltininc)) {
236
    // Add cuda_wrappers/* to our system include path.  This lets us wrap
237
    // standard library headers.
238
471
    SmallString<128> P(D.ResourceDir);
239
471
    llvm::sys::path::append(P, "include");
240
471
    llvm::sys::path::append(P, "cuda_wrappers");
241
471
    CC1Args.push_back("-internal-isystem");
242
471
    CC1Args.push_back(DriverArgs.MakeArgString(P));
243
471
  }
244
245
471
  if (DriverArgs.hasArg(options::OPT_nogpuinc))
246
93
    return;
247
248
378
  if (!isValid()) {
249
314
    D.Diag(diag::err_drv_no_cuda_installation);
250
314
    return;
251
314
  }
252
253
64
  CC1Args.push_back("-internal-isystem");
254
64
  CC1Args.push_back(DriverArgs.MakeArgString(getIncludePath()));
255
64
  CC1Args.push_back("-include");
256
64
  CC1Args.push_back("__clang_cuda_runtime_wrapper.h");
257
64
}
258
259
void CudaInstallationDetector::CheckCudaVersionSupportsArch(
260
462
    CudaArch Arch) const {
261
462
  if (Arch == CudaArch::UNKNOWN || Version == CudaVersion::UNKNOWN ||
262
70
      ArchsWithBadVersion[(int)Arch])
263
397
    return;
264
265
65
  auto MinVersion = MinVersionForCudaArch(Arch);
266
65
  auto MaxVersion = MaxVersionForCudaArch(Arch);
267
65
  if (Version < MinVersion || 
Version > MaxVersion59
) {
268
7
    ArchsWithBadVersion[(int)Arch] = true;
269
7
    D.Diag(diag::err_drv_cuda_version_unsupported)
270
7
        << CudaArchToString(Arch) << CudaVersionToString(MinVersion)
271
7
        << CudaVersionToString(MaxVersion) << InstallPath
272
7
        << CudaVersionToString(Version);
273
7
  }
274
65
}
275
276
193
void CudaInstallationDetector::print(raw_ostream &OS) const {
277
193
  if (isValid())
278
52
    OS << "Found CUDA installation: " << InstallPath << ", version "
279
52
       << CudaVersionToString(Version) << "\n";
280
193
}
281
282
namespace {
283
/// Debug info level for the NVPTX devices. We may need to emit different debug
284
/// info level for the host and for the device itselfi. This type controls
285
/// emission of the debug info for the devices. It either prohibits disable info
286
/// emission completely, or emits debug directives only, or emits same debug
287
/// info as for the host.
288
enum DeviceDebugInfoLevel {
289
  DisableDebugInfo,        /// Do not emit debug info for the devices.
290
  DebugDirectivesOnly,     /// Emit only debug directives.
291
  EmitSameDebugInfoAsHost, /// Use the same debug info level just like for the
292
                           /// host.
293
};
294
} // anonymous namespace
295
296
/// Define debug info level for the NVPTX devices. If the debug info for both
297
/// the host and device are disabled (-g0/-ggdb0 or no debug options at all). If
298
/// only debug directives are requested for the both host and device
299
/// (-gline-directvies-only), or the debug info only for the device is disabled
300
/// (optimization is on and --cuda-noopt-device-debug was not specified), the
301
/// debug directves only must be emitted for the device. Otherwise, use the same
302
/// debug info level just like for the host (with the limitations of only
303
/// supported DWARF2 standard).
304
809
static DeviceDebugInfoLevel mustEmitDebugInfo(const ArgList &Args) {
305
809
  const Arg *A = Args.getLastArg(options::OPT_O_Group);
306
809
  bool IsDebugEnabled = !A || 
A->getOption().matches(options::OPT_O0)105
||
307
78
                        Args.hasFlag(options::OPT_cuda_noopt_device_debug,
308
78
                                     options::OPT_no_cuda_noopt_device_debug,
309
78
                                     /*Default=*/false);
310
809
  if (const Arg *A = Args.getLastArg(options::OPT_g_Group)) {
311
110
    const Option &Opt = A->getOption();
312
110
    if (Opt.matches(options::OPT_gN_Group)) {
313
54
      if (Opt.matches(options::OPT_g0) || 
Opt.matches(options::OPT_ggdb0)48
)
314
12
        return DisableDebugInfo;
315
42
      if (Opt.matches(options::OPT_gline_directives_only))
316
6
        return DebugDirectivesOnly;
317
92
    }
318
92
    return IsDebugEnabled ? 
EmitSameDebugInfoAsHost74
:
DebugDirectivesOnly18
;
319
92
  }
320
699
  return DisableDebugInfo;
321
699
}
322
323
void NVPTX::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
324
                                    const InputInfo &Output,
325
                                    const InputInfoList &Inputs,
326
                                    const ArgList &Args,
327
257
                                    const char *LinkingOutput) const {
328
257
  const auto &TC =
329
257
      static_cast<const toolchains::CudaToolChain &>(getToolChain());
330
257
  assert(TC.getTriple().isNVPTX() && "Wrong platform");
331
332
257
  StringRef GPUArchName;
333
  // If this is an OpenMP action we need to extract the device architecture
334
  // from the -march=arch option. This option may come from -Xopenmp-target
335
  // flag or the default value.
336
257
  if (JA.isDeviceOffloading(Action::OFK_OpenMP)) {
337
43
    GPUArchName = Args.getLastArgValue(options::OPT_march_EQ);
338
43
    assert(!GPUArchName.empty() && "Must have an architecture passed in.");
339
43
  } else
340
214
    GPUArchName = JA.getOffloadingArch();
341
342
  // Obtain architecture from the action.
343
257
  CudaArch gpu_arch = StringToCudaArch(GPUArchName);
344
257
  assert(gpu_arch != CudaArch::UNKNOWN &&
345
257
         "Device action expected to have an architecture.");
346
347
  // Check that our installation's ptxas supports gpu_arch.
348
257
  if (!Args.hasArg(options::OPT_no_cuda_version_check)) {
349
256
    TC.CudaInstallation.CheckCudaVersionSupportsArch(gpu_arch);
350
256
  }
351
352
257
  ArgStringList CmdArgs;
353
232
  CmdArgs.push_back(TC.getTriple().isArch64Bit() ? "-m64" : 
"-m32"25
);
354
257
  DeviceDebugInfoLevel DIKind = mustEmitDebugInfo(Args);
355
257
  if (DIKind == EmitSameDebugInfoAsHost) {
356
    // ptxas does not accept -g option if optimization is enabled, so
357
    // we ignore the compiler's -O* options if we want debug info.
358
25
    CmdArgs.push_back("-g");
359
25
    CmdArgs.push_back("--dont-merge-basicblocks");
360
25
    CmdArgs.push_back("--return-at-end");
361
232
  } else if (Arg *A = Args.getLastArg(options::OPT_O_Group)) {
362
    // Map the -O we received to -O{0,1,2,3}.
363
    //
364
    // TODO: Perhaps we should map host -O2 to ptxas -O3. -O3 is ptxas's
365
    // default, so it may correspond more closely to the spirit of clang -O2.
366
367
    // -O3 seems like the least-bad option when -Osomething is specified to
368
    // clang but it isn't handled below.
369
20
    StringRef OOpt = "3";
370
20
    if (A->getOption().matches(options::OPT_O4) ||
371
19
        A->getOption().matches(options::OPT_Ofast))
372
2
      OOpt = "3";
373
18
    else if (A->getOption().matches(options::OPT_O0))
374
3
      OOpt = "0";
375
15
    else if (A->getOption().matches(options::OPT_O)) {
376
      // -Os, -Oz, and -O(anything else) map to -O2, for lack of better options.
377
15
      OOpt = llvm::StringSwitch<const char *>(A->getValue())
378
15
                 .Case("1", "1")
379
15
                 .Case("2", "2")
380
15
                 .Case("3", "3")
381
15
                 .Case("s", "2")
382
15
                 .Case("z", "2")
383
15
                 .Default("2");
384
15
    }
385
20
    CmdArgs.push_back(Args.MakeArgString(llvm::Twine("-O") + OOpt));
386
212
  } else {
387
    // If no -O was passed, pass -O0 to ptxas -- no opt flag should correspond
388
    // to no optimizations, but ptxas's default is -O3.
389
212
    CmdArgs.push_back("-O0");
390
212
  }
391
257
  if (DIKind == DebugDirectivesOnly)
392
8
    CmdArgs.push_back("-lineinfo");
393
394
  // Pass -v to ptxas if it was passed to the driver.
395
257
  if (Args.hasArg(options::OPT_v))
396
38
    CmdArgs.push_back("-v");
397
398
257
  CmdArgs.push_back("--gpu-name");
399
257
  CmdArgs.push_back(Args.MakeArgString(CudaArchToString(gpu_arch)));
400
257
  CmdArgs.push_back("--output-file");
401
257
  CmdArgs.push_back(Args.MakeArgString(TC.getInputFilename(Output)));
402
257
  for (const auto& II : Inputs)
403
257
    CmdArgs.push_back(Args.MakeArgString(II.getFilename()));
404
405
257
  for (const auto& A : Args.getAllArgValues(options::OPT_Xcuda_ptxas))
406
2
    CmdArgs.push_back(Args.MakeArgString(A));
407
408
257
  bool Relocatable = false;
409
257
  if (JA.isOffloading(Action::OFK_OpenMP))
410
    // In OpenMP we need to generate relocatable code.
411
43
    Relocatable = Args.hasFlag(options::OPT_fopenmp_relocatable_target,
412
43
                               options::OPT_fnoopenmp_relocatable_target,
413
43
                               /*Default=*/true);
414
214
  else if (JA.isOffloading(Action::OFK_Cuda))
415
214
    Relocatable = Args.hasFlag(options::OPT_fgpu_rdc,
416
214
                               options::OPT_fno_gpu_rdc, /*Default=*/false);
417
418
257
  if (Relocatable)
419
49
    CmdArgs.push_back("-c");
420
421
257
  const char *Exec;
422
257
  if (Arg *A = Args.getLastArg(options::OPT_ptxas_path_EQ))
423
1
    Exec = A->getValue();
424
256
  else
425
256
    Exec = Args.MakeArgString(TC.GetProgramPath("ptxas"));
426
257
  C.addCommand(std::make_unique<Command>(
427
257
      JA, *this,
428
257
      ResponseFileSupport{ResponseFileSupport::RF_Full, llvm::sys::WEM_UTF8,
429
257
                          "--options-file"},
430
257
      Exec, CmdArgs, Inputs));
431
257
}
432
433
191
static bool shouldIncludePTX(const ArgList &Args, const char *gpu_arch) {
434
191
  bool includePTX = true;
435
1.52k
  for (Arg *A : Args) {
436
1.52k
    if (!(A->getOption().matches(options::OPT_cuda_include_ptx_EQ) ||
437
1.51k
          A->getOption().matches(options::OPT_no_cuda_include_ptx_EQ)))
438
1.49k
      continue;
439
28
    A->claim();
440
28
    const StringRef ArchStr = A->getValue();
441
28
    if (ArchStr == "all" || 
ArchStr == gpu_arch12
) {
442
22
      includePTX = A->getOption().matches(options::OPT_cuda_include_ptx_EQ);
443
22
      continue;
444
22
    }
445
28
  }
446
191
  return includePTX;
447
191
}
448
449
// All inputs to this linker must be from CudaDeviceActions, as we need to look
450
// at the Inputs' Actions in order to figure out which GPU architecture they
451
// correspond to.
452
void NVPTX::Linker::ConstructJob(Compilation &C, const JobAction &JA,
453
                                 const InputInfo &Output,
454
                                 const InputInfoList &Inputs,
455
                                 const ArgList &Args,
456
175
                                 const char *LinkingOutput) const {
457
175
  const auto &TC =
458
175
      static_cast<const toolchains::CudaToolChain &>(getToolChain());
459
175
  assert(TC.getTriple().isNVPTX() && "Wrong platform");
460
461
175
  ArgStringList CmdArgs;
462
175
  if (TC.CudaInstallation.version() <= CudaVersion::CUDA_100)
463
174
    CmdArgs.push_back("--cuda");
464
150
  CmdArgs.push_back(TC.getTriple().isArch64Bit() ? "-64" : 
"-32"25
);
465
175
  CmdArgs.push_back(Args.MakeArgString("--create"));
466
175
  CmdArgs.push_back(Args.MakeArgString(Output.getFilename()));
467
175
  if (mustEmitDebugInfo(Args) == EmitSameDebugInfoAsHost)
468
14
    CmdArgs.push_back("-g");
469
470
382
  for (const auto& II : Inputs) {
471
382
    auto *A = II.getAction();
472
382
    assert(A->getInputs().size() == 1 &&
473
382
           "Device offload action is expected to have a single input");
474
382
    const char *gpu_arch_str = A->getOffloadingArch();
475
382
    assert(gpu_arch_str &&
476
382
           "Device action expected to have associated a GPU architecture!");
477
382
    CudaArch gpu_arch = StringToCudaArch(gpu_arch_str);
478
479
382
    if (II.getType() == types::TY_PP_Asm &&
480
191
        !shouldIncludePTX(Args, gpu_arch_str))
481
8
      continue;
482
    // We need to pass an Arch of the form "sm_XX" for cubin files and
483
    // "compute_XX" for ptx.
484
374
    const char *Arch = (II.getType() == types::TY_PP_Asm)
485
183
                           ? CudaArchToVirtualArchString(gpu_arch)
486
191
                           : gpu_arch_str;
487
374
    CmdArgs.push_back(Args.MakeArgString(llvm::Twine("--image=profile=") +
488
374
                                         Arch + ",file=" + II.getFilename()));
489
374
  }
490
491
175
  for (const auto& A : Args.getAllArgValues(options::OPT_Xcuda_fatbinary))
492
2
    CmdArgs.push_back(Args.MakeArgString(A));
493
494
175
  const char *Exec = Args.MakeArgString(TC.GetProgramPath("fatbinary"));
495
175
  C.addCommand(std::make_unique<Command>(
496
175
      JA, *this,
497
175
      ResponseFileSupport{ResponseFileSupport::RF_Full, llvm::sys::WEM_UTF8,
498
175
                          "--options-file"},
499
175
      Exec, CmdArgs, Inputs));
500
175
}
501
502
void NVPTX::OpenMPLinker::ConstructJob(Compilation &C, const JobAction &JA,
503
                                       const InputInfo &Output,
504
                                       const InputInfoList &Inputs,
505
                                       const ArgList &Args,
506
44
                                       const char *LinkingOutput) const {
507
44
  const auto &TC =
508
44
      static_cast<const toolchains::CudaToolChain &>(getToolChain());
509
44
  assert(TC.getTriple().isNVPTX() && "Wrong platform");
510
511
44
  ArgStringList CmdArgs;
512
513
  // OpenMP uses nvlink to link cubin files. The result will be embedded in the
514
  // host binary by the host linker.
515
44
  assert(!JA.isHostOffloading(Action::OFK_OpenMP) &&
516
44
         "CUDA toolchain not expected for an OpenMP host device.");
517
518
44
  if (Output.isFilename()) {
519
44
    CmdArgs.push_back("-o");
520
44
    CmdArgs.push_back(Output.getFilename());
521
44
  } else
522
44
    assert(Output.isNothing() && "Invalid output.");
523
44
  if (mustEmitDebugInfo(Args) == EmitSameDebugInfoAsHost)
524
10
    CmdArgs.push_back("-g");
525
526
44
  if (Args.hasArg(options::OPT_v))
527
0
    CmdArgs.push_back("-v");
528
529
44
  StringRef GPUArch =
530
44
      Args.getLastArgValue(options::OPT_march_EQ);
531
44
  assert(!GPUArch.empty() && "At least one GPU Arch required for ptxas.");
532
533
44
  CmdArgs.push_back("-arch");
534
44
  CmdArgs.push_back(Args.MakeArgString(GPUArch));
535
536
  // Assume that the directory specified with --libomptarget_nvptx_path
537
  // contains the static library libomptarget-nvptx.a.
538
44
  if (const Arg *A = Args.getLastArg(options::OPT_libomptarget_nvptx_path_EQ))
539
2
    CmdArgs.push_back(Args.MakeArgString(Twine("-L") + A->getValue()));
540
541
  // Add paths specified in LIBRARY_PATH environment variable as -L options.
542
44
  addDirectoryList(Args, CmdArgs, "-L", "LIBRARY_PATH");
543
544
  // Add paths for the default clang library path.
545
44
  SmallString<256> DefaultLibPath =
546
44
      llvm::sys::path::parent_path(TC.getDriver().Dir);
547
44
  llvm::sys::path::append(DefaultLibPath, "lib" CLANG_LIBDIR_SUFFIX);
548
44
  CmdArgs.push_back(Args.MakeArgString(Twine("-L") + DefaultLibPath));
549
550
  // Add linking against library implementing OpenMP calls on NVPTX target.
551
44
  CmdArgs.push_back("-lomptarget-nvptx");
552
553
46
  for (const auto &II : Inputs) {
554
46
    if (II.getType() == types::TY_LLVM_IR ||
555
46
        II.getType() == types::TY_LTO_IR ||
556
46
        II.getType() == types::TY_LTO_BC ||
557
46
        II.getType() == types::TY_LLVM_BC) {
558
0
      C.getDriver().Diag(diag::err_drv_no_linker_llvm_support)
559
0
          << getToolChain().getTripleString();
560
0
      continue;
561
0
    }
562
563
    // Currently, we only pass the input files to the linker, we do not pass
564
    // any libraries that may be valid only for the host.
565
46
    if (!II.isFilename())
566
0
      continue;
567
568
46
    const char *CubinF = C.addTempFile(
569
46
        C.getArgs().MakeArgString(getToolChain().getInputFilename(II)));
570
571
46
    CmdArgs.push_back(CubinF);
572
46
  }
573
574
44
  const char *Exec =
575
44
      Args.MakeArgString(getToolChain().GetProgramPath("nvlink"));
576
44
  C.addCommand(std::make_unique<Command>(
577
44
      JA, *this,
578
44
      ResponseFileSupport{ResponseFileSupport::RF_Full, llvm::sys::WEM_UTF8,
579
44
                          "--options-file"},
580
44
      Exec, CmdArgs, Inputs));
581
44
}
582
583
/// CUDA toolchain.  Our assembler is ptxas, and our "linker" is fatbinary,
584
/// which isn't properly a linker but nonetheless performs the step of stitching
585
/// together object files from the assembler into a single blob.
586
587
CudaToolChain::CudaToolChain(const Driver &D, const llvm::Triple &Triple,
588
                             const ToolChain &HostTC, const ArgList &Args,
589
                             const Action::OffloadKind OK)
590
    : ToolChain(D, Triple, Args), HostTC(HostTC),
591
337
      CudaInstallation(D, HostTC.getTriple(), Args), OK(OK) {
592
337
  if (CudaInstallation.isValid()) {
593
42
    CudaInstallation.WarnIfUnsupportedVersion();
594
42
    getProgramPaths().push_back(std::string(CudaInstallation.getBinPath()));
595
42
  }
596
  // Lookup binaries into the driver directory, this is used to
597
  // discover the clang-offload-bundler executable.
598
337
  getProgramPaths().push_back(getDriver().Dir);
599
337
}
600
601
314
std::string CudaToolChain::getInputFilename(const InputInfo &Input) const {
602
  // Only object files are changed, for example assembly files keep their .s
603
  // extensions. CUDA also continues to use .o as they don't use nvlink but
604
  // fatbinary.
605
314
  if (!(OK == Action::OFK_OpenMP && 
Input.getType() == types::TY_Object100
))
606
216
    return ToolChain::getInputFilename(Input);
607
608
  // Replace extension for object files with cubin because nvlink relies on
609
  // these particular file names.
610
98
  SmallString<256> Filename(ToolChain::getInputFilename(Input));
611
98
  llvm::sys::path::replace_extension(Filename, "cubin");
612
98
  return std::string(Filename.str());
613
98
}
614
615
void CudaToolChain::addClangTargetOptions(
616
    const llvm::opt::ArgList &DriverArgs,
617
    llvm::opt::ArgStringList &CC1Args,
618
333
    Action::OffloadKind DeviceOffloadingKind) const {
619
333
  HostTC.addClangTargetOptions(DriverArgs, CC1Args, DeviceOffloadingKind);
620
621
333
  StringRef GpuArch = DriverArgs.getLastArgValue(options::OPT_march_EQ);
622
333
  assert(!GpuArch.empty() && "Must have an explicit GPU arch.");
623
333
  assert((DeviceOffloadingKind == Action::OFK_OpenMP ||
624
333
          DeviceOffloadingKind == Action::OFK_Cuda) &&
625
333
         "Only OpenMP or CUDA offloading kinds are supported for NVIDIA GPUs.");
626
627
333
  if (DeviceOffloadingKind == Action::OFK_Cuda) {
628
272
    CC1Args.push_back("-fcuda-is-device");
629
630
272
    if (DriverArgs.hasFlag(options::OPT_fcuda_approx_transcendentals,
631
272
                           options::OPT_fno_cuda_approx_transcendentals, false))
632
1
      CC1Args.push_back("-fcuda-approx-transcendentals");
633
634
272
    if (DriverArgs.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc,
635
272
                           false))
636
7
      CC1Args.push_back("-fgpu-rdc");
637
272
  }
638
639
333
  if (DriverArgs.hasArg(options::OPT_nogpulib))
640
56
    return;
641
642
277
  std::string LibDeviceFile = CudaInstallation.getLibDeviceFile(GpuArch);
643
644
277
  if (LibDeviceFile.empty()) {
645
240
    if (DeviceOffloadingKind == Action::OFK_OpenMP &&
646
49
        DriverArgs.hasArg(options::OPT_S))
647
3
      return;
648
649
237
    getDriver().Diag(diag::err_drv_no_cuda_libdevice) << GpuArch;
650
237
    return;
651
237
  }
652
653
37
  CC1Args.push_back("-mlink-builtin-bitcode");
654
37
  CC1Args.push_back(DriverArgs.MakeArgString(LibDeviceFile));
655
656
  // New CUDA versions often introduce new instructions that are only supported
657
  // by new PTX version, so we need to raise PTX level to enable them in NVPTX
658
  // back-end.
659
37
  const char *PtxFeature = nullptr;
660
37
  switch (CudaInstallation.version()) {
661
0
  case CudaVersion::CUDA_110:
662
0
    PtxFeature = "+ptx70";
663
0
    break;
664
0
  case CudaVersion::CUDA_102:
665
0
    PtxFeature = "+ptx65";
666
0
    break;
667
1
  case CudaVersion::CUDA_101:
668
1
    PtxFeature = "+ptx64";
669
1
    break;
670
0
  case CudaVersion::CUDA_100:
671
0
    PtxFeature = "+ptx63";
672
0
    break;
673
0
  case CudaVersion::CUDA_92:
674
0
    PtxFeature = "+ptx61";
675
0
    break;
676
0
  case CudaVersion::CUDA_91:
677
0
    PtxFeature = "+ptx61";
678
0
    break;
679
3
  case CudaVersion::CUDA_90:
680
3
    PtxFeature = "+ptx60";
681
3
    break;
682
33
  default:
683
33
    PtxFeature = "+ptx42";
684
37
  }
685
37
  CC1Args.append({"-target-feature", PtxFeature});
686
37
  if (DriverArgs.hasFlag(options::OPT_fcuda_short_ptr,
687
37
                         options::OPT_fno_cuda_short_ptr, false))
688
0
    CC1Args.append({"-mllvm", "--nvptx-short-ptr"});
689
690
37
  if (CudaInstallation.version() >= CudaVersion::UNKNOWN)
691
37
    CC1Args.push_back(DriverArgs.MakeArgString(
692
37
        Twine("-target-sdk-version=") +
693
37
        CudaVersionToString(CudaInstallation.version())));
694
695
37
  if (DeviceOffloadingKind == Action::OFK_OpenMP) {
696
9
    SmallVector<StringRef, 8> LibraryPaths;
697
9
    if (const Arg *A = DriverArgs.getLastArg(options::OPT_libomptarget_nvptx_path_EQ))
698
3
      LibraryPaths.push_back(A->getValue());
699
700
    // Add user defined library paths from LIBRARY_PATH.
701
9
    llvm::Optional<std::string> LibPath =
702
9
        llvm::sys::Process::GetEnv("LIBRARY_PATH");
703
9
    if (LibPath) {
704
3
      SmallVector<StringRef, 8> Frags;
705
3
      const char EnvPathSeparatorStr[] = {llvm::sys::EnvPathSeparator, '\0'};
706
3
      llvm::SplitString(*LibPath, Frags, EnvPathSeparatorStr);
707
3
      for (StringRef Path : Frags)
708
3
        LibraryPaths.emplace_back(Path.trim());
709
3
    }
710
711
    // Add path to lib / lib64 folder.
712
9
    SmallString<256> DefaultLibPath =
713
9
        llvm::sys::path::parent_path(getDriver().Dir);
714
9
    llvm::sys::path::append(DefaultLibPath, Twine("lib") + CLANG_LIBDIR_SUFFIX);
715
9
    LibraryPaths.emplace_back(DefaultLibPath.c_str());
716
717
9
    std::string LibOmpTargetName =
718
9
      "libomptarget-nvptx-" + GpuArch.str() + ".bc";
719
9
    bool FoundBCLibrary = false;
720
9
    for (StringRef LibraryPath : LibraryPaths) {
721
9
      SmallString<128> LibOmpTargetFile(LibraryPath);
722
9
      llvm::sys::path::append(LibOmpTargetFile, LibOmpTargetName);
723
9
      if (llvm::sys::fs::exists(LibOmpTargetFile)) {
724
6
        CC1Args.push_back("-mlink-builtin-bitcode");
725
6
        CC1Args.push_back(DriverArgs.MakeArgString(LibOmpTargetFile));
726
6
        FoundBCLibrary = true;
727
6
        break;
728
6
      }
729
9
    }
730
9
    if (!FoundBCLibrary)
731
3
      getDriver().Diag(diag::warn_drv_omp_offload_target_missingbcruntime)
732
3
          << LibOmpTargetName;
733
9
  }
734
37
}
735
736
llvm::DenormalMode CudaToolChain::getDefaultDenormalModeForType(
737
    const llvm::opt::ArgList &DriverArgs, const JobAction &JA,
738
666
    const llvm::fltSemantics *FPType) const {
739
666
  if (JA.getOffloadingDeviceKind() == Action::OFK_Cuda) {
740
544
    if (FPType && 
FPType == &llvm::APFloat::IEEEsingle()272
&&
741
272
        DriverArgs.hasFlag(options::OPT_fcuda_flush_denormals_to_zero,
742
272
                           options::OPT_fno_cuda_flush_denormals_to_zero,
743
272
                           false))
744
3
      return llvm::DenormalMode::getPreserveSign();
745
663
  }
746
747
663
  assert(JA.getOffloadingDeviceKind() != Action::OFK_Host);
748
663
  return llvm::DenormalMode::getIEEE();
749
663
}
750
751
48
bool CudaToolChain::supportsDebugInfoOption(const llvm::opt::Arg *A) const {
752
48
  const Option &O = A->getOption();
753
48
  return (O.matches(options::OPT_gN_Group) &&
754
26
          !O.matches(options::OPT_gmodules)) ||
755
22
         O.matches(options::OPT_g_Flag) ||
756
4
         O.matches(options::OPT_ggdbN_Group) || O.matches(options::OPT_ggdb) ||
757
4
         O.matches(options::OPT_gdwarf) || O.matches(options::OPT_gdwarf_2) ||
758
4
         O.matches(options::OPT_gdwarf_3) || O.matches(options::OPT_gdwarf_4) ||
759
4
         O.matches(options::OPT_gdwarf_5) ||
760
4
         O.matches(options::OPT_gcolumn_info);
761
48
}
762
763
void CudaToolChain::adjustDebugInfoKind(
764
333
    codegenoptions::DebugInfoKind &DebugInfoKind, const ArgList &Args) const {
765
333
  switch (mustEmitDebugInfo(Args)) {
766
300
  case DisableDebugInfo:
767
300
    DebugInfoKind = codegenoptions::NoDebugInfo;
768
300
    break;
769
8
  case DebugDirectivesOnly:
770
8
    DebugInfoKind = codegenoptions::DebugDirectivesOnly;
771
8
    break;
772
25
  case EmitSameDebugInfoAsHost:
773
    // Use same debug info level as the host.
774
25
    break;
775
333
  }
776
333
}
777
778
void CudaToolChain::AddCudaIncludeArgs(const ArgList &DriverArgs,
779
262
                                       ArgStringList &CC1Args) const {
780
  // Check our CUDA version if we're going to include the CUDA headers.
781
262
  if (!DriverArgs.hasArg(options::OPT_nogpuinc) &&
782
207
      !DriverArgs.hasArg(options::OPT_no_cuda_version_check)) {
783
206
    StringRef Arch = DriverArgs.getLastArgValue(options::OPT_march_EQ);
784
206
    assert(!Arch.empty() && "Must have an explicit GPU arch.");
785
206
    CudaInstallation.CheckCudaVersionSupportsArch(StringToCudaArch(Arch));
786
206
  }
787
262
  CudaInstallation.AddCudaIncludeArgs(DriverArgs, CC1Args);
788
262
}
789
790
llvm::opt::DerivedArgList *
791
CudaToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
792
                             StringRef BoundArch,
793
685
                             Action::OffloadKind DeviceOffloadKind) const {
794
685
  DerivedArgList *DAL =
795
685
      HostTC.TranslateArgs(Args, BoundArch, DeviceOffloadKind);
796
685
  if (!DAL)
797
545
    DAL = new DerivedArgList(Args.getBaseArgs());
798
799
685
  const OptTable &Opts = getDriver().getOpts();
800
801
  // For OpenMP device offloading, append derived arguments. Make sure
802
  // flags are not duplicated.
803
  // Also append the compute capability.
804
685
  if (DeviceOffloadKind == Action::OFK_OpenMP) {
805
405
    for (Arg *A : Args) {
806
405
      bool IsDuplicate = false;
807
2.09k
      for (Arg *DALArg : *DAL) {
808
2.09k
        if (A == DALArg) {
809
405
          IsDuplicate = true;
810
405
          break;
811
405
        }
812
2.09k
      }
813
405
      if (!IsDuplicate)
814
0
        DAL->append(A);
815
405
    }
816
817
47
    StringRef Arch = DAL->getLastArgValue(options::OPT_march_EQ);
818
47
    if (Arch.empty())
819
15
      DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ),
820
15
                        CLANG_OPENMP_NVPTX_DEFAULT_ARCH);
821
822
47
    return DAL;
823
47
  }
824
825
4.64k
  
for (Arg *A : Args)638
{
826
4.64k
    DAL->append(A);
827
4.64k
  }
828
829
638
  if (!BoundArch.empty()) {
830
274
    DAL->eraseArg(options::OPT_march_EQ);
831
274
    DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ), BoundArch);
832
274
  }
833
638
  return DAL;
834
638
}
835
836
241
Tool *CudaToolChain::buildAssembler() const {
837
241
  return new tools::NVPTX::Assembler(*this);
838
241
}
839
840
221
Tool *CudaToolChain::buildLinker() const {
841
221
  if (OK == Action::OFK_OpenMP)
842
44
    return new tools::NVPTX::OpenMPLinker(*this);
843
177
  return new tools::NVPTX::Linker(*this);
844
177
}
845
846
333
void CudaToolChain::addClangWarningOptions(ArgStringList &CC1Args) const {
847
333
  HostTC.addClangWarningOptions(CC1Args);
848
333
}
849
850
ToolChain::CXXStdlibType
851
0
CudaToolChain::GetCXXStdlibType(const ArgList &Args) const {
852
0
  return HostTC.GetCXXStdlibType(Args);
853
0
}
854
855
void CudaToolChain::AddClangSystemIncludeArgs(const ArgList &DriverArgs,
856
600
                                              ArgStringList &CC1Args) const {
857
600
  HostTC.AddClangSystemIncludeArgs(DriverArgs, CC1Args);
858
600
}
859
860
void CudaToolChain::AddClangCXXStdlibIncludeArgs(const ArgList &Args,
861
475
                                                 ArgStringList &CC1Args) const {
862
475
  HostTC.AddClangCXXStdlibIncludeArgs(Args, CC1Args);
863
475
}
864
865
void CudaToolChain::AddIAMCUIncludeArgs(const ArgList &Args,
866
0
                                        ArgStringList &CC1Args) const {
867
0
  HostTC.AddIAMCUIncludeArgs(Args, CC1Args);
868
0
}
869
870
274
SanitizerMask CudaToolChain::getSupportedSanitizers() const {
871
  // The CudaToolChain only supports sanitizers in the sense that it allows
872
  // sanitizer arguments on the command line if they are supported by the host
873
  // toolchain. The CudaToolChain will actually ignore any command line
874
  // arguments for any of these "supported" sanitizers. That means that no
875
  // sanitization of device code is actually supported at this time.
876
  //
877
  // This behavior is necessary because the host and device toolchains
878
  // invocations often share the command line, so the device toolchain must
879
  // tolerate flags meant only for the host toolchain.
880
274
  return HostTC.getSupportedSanitizers();
881
274
}
882
883
VersionTuple CudaToolChain::computeMSVCVersion(const Driver *D,
884
333
                                               const ArgList &Args) const {
885
333
  return HostTC.computeMSVCVersion(D, Args);
886
333
}