Coverage Report

Created: 2020-02-15 09:57

/Users/buildslave/jenkins/workspace/coverage/llvm-project/clang/lib/Driver/ToolChains/Cuda.cpp
Line
Count
Source (jump to first uncovered line)
1
//===--- Cuda.cpp - Cuda Tool and ToolChain Implementations -----*- C++ -*-===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
9
#include "Cuda.h"
10
#include "CommonArgs.h"
11
#include "InputInfo.h"
12
#include "clang/Basic/Cuda.h"
13
#include "clang/Config/config.h"
14
#include "clang/Driver/Compilation.h"
15
#include "clang/Driver/Distro.h"
16
#include "clang/Driver/Driver.h"
17
#include "clang/Driver/DriverDiagnostic.h"
18
#include "clang/Driver/Options.h"
19
#include "llvm/Option/ArgList.h"
20
#include "llvm/Support/FileSystem.h"
21
#include "llvm/Support/Path.h"
22
#include "llvm/Support/Process.h"
23
#include "llvm/Support/Program.h"
24
#include "llvm/Support/TargetParser.h"
25
#include "llvm/Support/VirtualFileSystem.h"
26
#include <system_error>
27
28
using namespace clang::driver;
29
using namespace clang::driver::toolchains;
30
using namespace clang::driver::tools;
31
using namespace clang;
32
using namespace llvm::opt;
33
34
// Parses the contents of version.txt in an CUDA installation.  It should
35
// contain one line of the from e.g. "CUDA Version 7.5.2".
36
32
static CudaVersion ParseCudaVersionFile(const Driver &D, llvm::StringRef V) {
37
32
  if (!V.startswith("CUDA Version "))
38
0
    return CudaVersion::UNKNOWN;
39
32
  V = V.substr(strlen("CUDA Version "));
40
32
  SmallVector<StringRef,4> VersionParts;
41
32
  V.split(VersionParts, '.');
42
32
  if (VersionParts.size() < 2)
43
0
    return CudaVersion::UNKNOWN;
44
32
  std::string MajorMinor = join_items(".", VersionParts[0], VersionParts[1]);
45
32
  CudaVersion Version = CudaStringToVersion(MajorMinor);
46
32
  if (Version != CudaVersion::UNKNOWN)
47
30
    return Version;
48
2
49
2
  // Issue a warning and assume that the version we've found is compatible with
50
2
  // the latest version we support.
51
2
  D.Diag(diag::warn_drv_unknown_cuda_version)
52
2
      << MajorMinor << CudaVersionToString(CudaVersion::LATEST);
53
2
  return CudaVersion::LATEST;
54
2
}
55
56
CudaInstallationDetector::CudaInstallationDetector(
57
    const Driver &D, const llvm::Triple &HostTriple,
58
    const llvm::opt::ArgList &Args)
59
29.6k
    : D(D) {
60
29.6k
  struct Candidate {
61
29.6k
    std::string Path;
62
29.6k
    bool StrictChecking;
63
29.6k
64
29.6k
    Candidate(std::string Path, bool StrictChecking = false)
65
117k
        : Path(Path), StrictChecking(StrictChecking) {}
66
29.6k
  };
67
29.6k
  SmallVector<Candidate, 4> Candidates;
68
29.6k
69
29.6k
  // In decreasing order so we prefer newer versions to older versions.
70
29.6k
  std::initializer_list<const char *> Versions = {"8.0", "7.5", "7.0"};
71
29.6k
72
29.6k
  if (Args.hasArg(clang::driver::options::OPT_cuda_path_EQ)) {
73
82
    Candidates.emplace_back(
74
82
        Args.getLastArgValue(clang::driver::options::OPT_cuda_path_EQ).str());
75
29.5k
  } else if (HostTriple.isOSWindows()) {
76
815
    for (const char *Ver : Versions)
77
2.44k
      Candidates.emplace_back(
78
2.44k
          D.SysRoot + "/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v" +
79
2.44k
          Ver);
80
28.7k
  } else {
81
28.7k
    if (!Args.hasArg(clang::driver::options::OPT_cuda_path_ignore_env)) {
82
28.7k
      // Try to find ptxas binary. If the executable is located in a directory
83
28.7k
      // called 'bin/', its parent directory might be a good guess for a valid
84
28.7k
      // CUDA installation.
85
28.7k
      // However, some distributions might installs 'ptxas' to /usr/bin. In that
86
28.7k
      // case the candidate would be '/usr' which passes the following checks
87
28.7k
      // because '/usr/include' exists as well. To avoid this case, we always
88
28.7k
      // check for the directory potentially containing files for libdevice,
89
28.7k
      // even if the user passes -nocudalib.
90
28.7k
      if (llvm::ErrorOr<std::string> ptxas =
91
16
              llvm::sys::findProgramByName("ptxas")) {
92
16
        SmallString<256> ptxasAbsolutePath;
93
16
        llvm::sys::fs::real_path(*ptxas, ptxasAbsolutePath);
94
16
95
16
        StringRef ptxasDir = llvm::sys::path::parent_path(ptxasAbsolutePath);
96
16
        if (llvm::sys::path::filename(ptxasDir) == "bin")
97
16
          Candidates.emplace_back(
98
16
              std::string(llvm::sys::path::parent_path(ptxasDir)),
99
16
              /*StrictChecking=*/true);
100
16
      }
101
28.7k
    }
102
28.7k
103
28.7k
    Candidates.emplace_back(D.SysRoot + "/usr/local/cuda");
104
28.7k
    for (const char *Ver : Versions)
105
86.2k
      Candidates.emplace_back(D.SysRoot + "/usr/local/cuda-" + Ver);
106
28.7k
107
28.7k
    Distro Dist(D.getVFS(), llvm::Triple(llvm::sys::getProcessTriple()));
108
28.7k
    if (Dist.IsDebian() || 
Dist.IsUbuntu()28.7k
)
109
0
      // Special case for Debian to have nvidia-cuda-toolkit work
110
0
      // out of the box. More info on http://bugs.debian.org/882505
111
0
      Candidates.emplace_back(D.SysRoot + "/usr/lib/cuda");
112
28.7k
  }
113
29.6k
114
29.6k
  bool NoCudaLib = Args.hasArg(options::OPT_nogpulib);
115
29.6k
116
117k
  for (const auto &Candidate : Candidates) {
117
117k
    InstallPath = Candidate.Path;
118
117k
    if (
InstallPath.empty()117k
|| !D.getVFS().exists(InstallPath))
119
117k
      continue;
120
100
121
100
    BinPath = InstallPath + "/bin";
122
100
    IncludePath = InstallPath + "/include";
123
100
    LibDevicePath = InstallPath + "/nvvm/libdevice";
124
100
125
100
    auto &FS = D.getVFS();
126
101
    if (
!(100
FS.exists(IncludePath)100
&& FS.exists(BinPath)))
127
0
      continue;
128
100
    bool CheckLibDevice = (!NoCudaLib || 
Candidate.StrictChecking14
);
129
100
    if (CheckLibDevice && 
!FS.exists(LibDevicePath)91
)
130
12
      continue;
131
88
132
88
    // On Linux, we have both lib and lib64 directories, and we need to choose
133
88
    // based on our triple.  On MacOS, we have only a lib directory.
134
88
    //
135
88
    // It's sufficient for our purposes to be flexible: If both lib and lib64
136
88
    // exist, we choose whichever one matches our triple.  Otherwise, if only
137
88
    // lib exists, we use it.
138
88
    if (HostTriple.isArch64Bit() && 
FS.exists(InstallPath + "/lib64")44
)
139
42
      LibPath = InstallPath + "/lib64";
140
46
    else if (FS.exists(InstallPath + "/lib"))
141
47
      LibPath = InstallPath + "/lib";
142
18.4E
    else
143
18.4E
      continue;
144
89
145
89
    llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> VersionFile =
146
89
        FS.getBufferForFile(InstallPath + "/version.txt");
147
89
    if (!VersionFile) {
148
57
      // CUDA 7.0 doesn't have a version.txt, so guess that's our version if
149
57
      // version.txt isn't present.
150
57
      Version = CudaVersion::CUDA_70;
151
57
    } else {
152
32
      Version = ParseCudaVersionFile(D, (*VersionFile)->getBuffer());
153
32
    }
154
89
155
89
    if (Version >= CudaVersion::CUDA_90) {
156
4
      // CUDA-9+ uses single libdevice file for all GPU variants.
157
4
      std::string FilePath = LibDevicePath + "/libdevice.10.bc";
158
4
      if (FS.exists(FilePath)) {
159
4
        for (const char *GpuArchName :
160
4
             {"sm_30", "sm_32", "sm_35", "sm_37", "sm_50", "sm_52", "sm_53",
161
52
              "sm_60", "sm_61", "sm_62", "sm_70", "sm_72", "sm_75"}) {
162
52
          const CudaArch GpuArch = StringToCudaArch(GpuArchName);
163
52
          if (Version >= MinVersionForCudaArch(GpuArch) &&
164
52
              
Version <= MaxVersionForCudaArch(GpuArch)48
)
165
48
            LibDeviceMap[GpuArchName] = FilePath;
166
52
        }
167
4
      }
168
85
    } else {
169
85
      std::error_code EC;
170
85
      for (llvm::sys::fs::directory_iterator LI(LibDevicePath, EC), LE;
171
303
           !EC && 
LI != LE299
;
LI = LI.increment(EC)218
) {
172
218
        StringRef FilePath = LI->path();
173
218
        StringRef FileName = llvm::sys::path::filename(FilePath);
174
218
        // Process all bitcode filenames that look like
175
218
        // libdevice.compute_XX.YY.bc
176
218
        const StringRef LibDeviceName = "libdevice.";
177
218
        if (!(FileName.startswith(LibDeviceName) && FileName.endswith(".bc")))
178
0
          continue;
179
218
        StringRef GpuArch = FileName.slice(
180
218
            LibDeviceName.size(), FileName.find('.', LibDeviceName.size()));
181
218
        LibDeviceMap[GpuArch] = FilePath.str();
182
218
        // Insert map entries for specific devices with this compute
183
218
        // capability. NVCC's choice of the libdevice library version is
184
218
        // rather peculiar and depends on the CUDA version.
185
218
        if (GpuArch == "compute_20") {
186
28
          LibDeviceMap["sm_20"] = std::string(FilePath);
187
28
          LibDeviceMap["sm_21"] = std::string(FilePath);
188
28
          LibDeviceMap["sm_32"] = std::string(FilePath);
189
190
        } else if (GpuArch == "compute_30") {
190
81
          LibDeviceMap["sm_30"] = std::string(FilePath);
191
81
          if (Version < CudaVersion::CUDA_80) {
192
53
            LibDeviceMap["sm_50"] = std::string(FilePath);
193
53
            LibDeviceMap["sm_52"] = std::string(FilePath);
194
53
            LibDeviceMap["sm_53"] = std::string(FilePath);
195
53
          }
196
81
          LibDeviceMap["sm_60"] = std::string(FilePath);
197
81
          LibDeviceMap["sm_61"] = std::string(FilePath);
198
81
          LibDeviceMap["sm_62"] = std::string(FilePath);
199
109
        } else if (GpuArch == "compute_35") {
200
81
          LibDeviceMap["sm_35"] = std::string(FilePath);
201
81
          LibDeviceMap["sm_37"] = std::string(FilePath);
202
81
        } else 
if (28
GpuArch == "compute_50"28
) {
203
28
          if (Version >= CudaVersion::CUDA_80) {
204
28
            LibDeviceMap["sm_50"] = std::string(FilePath);
205
28
            LibDeviceMap["sm_52"] = std::string(FilePath);
206
28
            LibDeviceMap["sm_53"] = std::string(FilePath);
207
28
          }
208
28
        }
209
218
      }
210
85
    }
211
89
212
89
    // Check that we have found at least one libdevice that we can link in if
213
89
    // -nocudalib hasn't been specified.
214
89
    if (LibDeviceMap.empty() && 
!NoCudaLib4
)
215
0
      continue;
216
89
217
89
    IsValid = true;
218
89
    break;
219
89
  }
220
29.6k
}
221
222
void CudaInstallationDetector::AddCudaIncludeArgs(
223
455
    const ArgList &DriverArgs, ArgStringList &CC1Args) const {
224
455
  if (!DriverArgs.hasArg(options::OPT_nobuiltininc)) {
225
455
    // Add cuda_wrappers/* to our system include path.  This lets us wrap
226
455
    // standard library headers.
227
455
    SmallString<128> P(D.ResourceDir);
228
455
    llvm::sys::path::append(P, "include");
229
455
    llvm::sys::path::append(P, "cuda_wrappers");
230
455
    CC1Args.push_back("-internal-isystem");
231
455
    CC1Args.push_back(DriverArgs.MakeArgString(P));
232
455
  }
233
455
234
455
  if (DriverArgs.hasArg(options::OPT_nocudainc))
235
89
    return;
236
366
237
366
  if (!isValid()) {
238
310
    D.Diag(diag::err_drv_no_cuda_installation);
239
310
    return;
240
310
  }
241
56
242
56
  CC1Args.push_back("-internal-isystem");
243
56
  CC1Args.push_back(DriverArgs.MakeArgString(getIncludePath()));
244
56
  CC1Args.push_back("-include");
245
56
  CC1Args.push_back("__clang_cuda_runtime_wrapper.h");
246
56
}
247
248
void CudaInstallationDetector::CheckCudaVersionSupportsArch(
249
448
    CudaArch Arch) const {
250
448
  if (Arch == CudaArch::UNKNOWN || Version == CudaVersion::UNKNOWN ||
251
448
      
ArchsWithBadVersion.count(Arch) > 062
)
252
391
    return;
253
57
254
57
  auto MinVersion = MinVersionForCudaArch(Arch);
255
57
  auto MaxVersion = MaxVersionForCudaArch(Arch);
256
57
  if (Version < MinVersion || 
Version > MaxVersion51
) {
257
7
    ArchsWithBadVersion.insert(Arch);
258
7
    D.Diag(diag::err_drv_cuda_version_unsupported)
259
7
        << CudaArchToString(Arch) << CudaVersionToString(MinVersion)
260
7
        << CudaVersionToString(MaxVersion) << InstallPath
261
7
        << CudaVersionToString(Version);
262
7
  }
263
57
}
264
265
354
void CudaInstallationDetector::print(raw_ostream &OS) const {
266
354
  if (isValid())
267
48
    OS << "Found CUDA installation: " << InstallPath << ", version "
268
48
       << CudaVersionToString(Version) << "\n";
269
354
}
270
271
namespace {
272
/// Debug info level for the NVPTX devices. We may need to emit different debug
273
/// info level for the host and for the device itselfi. This type controls
274
/// emission of the debug info for the devices. It either prohibits disable info
275
/// emission completely, or emits debug directives only, or emits same debug
276
/// info as for the host.
277
enum DeviceDebugInfoLevel {
278
  DisableDebugInfo,        /// Do not emit debug info for the devices.
279
  DebugDirectivesOnly,     /// Emit only debug directives.
280
  EmitSameDebugInfoAsHost, /// Use the same debug info level just like for the
281
                           /// host.
282
};
283
} // anonymous namespace
284
285
/// Define debug info level for the NVPTX devices. If the debug info for both
286
/// the host and device are disabled (-g0/-ggdb0 or no debug options at all). If
287
/// only debug directives are requested for the both host and device
288
/// (-gline-directvies-only), or the debug info only for the device is disabled
289
/// (optimization is on and --cuda-noopt-device-debug was not specified), the
290
/// debug directves only must be emitted for the device. Otherwise, use the same
291
/// debug info level just like for the host (with the limitations of only
292
/// supported DWARF2 standard).
293
785
static DeviceDebugInfoLevel mustEmitDebugInfo(const ArgList &Args) {
294
785
  const Arg *A = Args.getLastArg(options::OPT_O_Group);
295
785
  bool IsDebugEnabled = !A || 
A->getOption().matches(options::OPT_O0)105
||
296
785
                        Args.hasFlag(options::OPT_cuda_noopt_device_debug,
297
78
                                     options::OPT_no_cuda_noopt_device_debug,
298
78
                                     /*Default=*/false);
299
785
  if (const Arg *A = Args.getLastArg(options::OPT_g_Group)) {
300
110
    const Option &Opt = A->getOption();
301
110
    if (Opt.matches(options::OPT_gN_Group)) {
302
54
      if (Opt.matches(options::OPT_g0) || 
Opt.matches(options::OPT_ggdb0)48
)
303
12
        return DisableDebugInfo;
304
42
      if (Opt.matches(options::OPT_gline_directives_only))
305
6
        return DebugDirectivesOnly;
306
92
    }
307
92
    return IsDebugEnabled ? 
EmitSameDebugInfoAsHost74
:
DebugDirectivesOnly18
;
308
92
  }
309
675
  return DisableDebugInfo;
310
675
}
311
312
void NVPTX::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
313
                                    const InputInfo &Output,
314
                                    const InputInfoList &Inputs,
315
                                    const ArgList &Args,
316
249
                                    const char *LinkingOutput) const {
317
249
  const auto &TC =
318
249
      static_cast<const toolchains::CudaToolChain &>(getToolChain());
319
249
  assert(TC.getTriple().isNVPTX() && "Wrong platform");
320
249
321
249
  StringRef GPUArchName;
322
249
  // If this is an OpenMP action we need to extract the device architecture
323
249
  // from the -march=arch option. This option may come from -Xopenmp-target
324
249
  // flag or the default value.
325
249
  if (JA.isDeviceOffloading(Action::OFK_OpenMP)) {
326
43
    GPUArchName = Args.getLastArgValue(options::OPT_march_EQ);
327
43
    assert(!GPUArchName.empty() && "Must have an architecture passed in.");
328
43
  } else
329
206
    GPUArchName = JA.getOffloadingArch();
330
249
331
249
  // Obtain architecture from the action.
332
249
  CudaArch gpu_arch = StringToCudaArch(GPUArchName);
333
249
  assert(gpu_arch != CudaArch::UNKNOWN &&
334
249
         "Device action expected to have an architecture.");
335
249
336
249
  // Check that our installation's ptxas supports gpu_arch.
337
249
  if (!Args.hasArg(options::OPT_no_cuda_version_check)) {
338
248
    TC.CudaInstallation.CheckCudaVersionSupportsArch(gpu_arch);
339
248
  }
340
249
341
249
  ArgStringList CmdArgs;
342
249
  CmdArgs.push_back(TC.getTriple().isArch64Bit() ? 
"-m64"224
:
"-m32"25
);
343
249
  DeviceDebugInfoLevel DIKind = mustEmitDebugInfo(Args);
344
249
  if (DIKind == EmitSameDebugInfoAsHost) {
345
25
    // ptxas does not accept -g option if optimization is enabled, so
346
25
    // we ignore the compiler's -O* options if we want debug info.
347
25
    CmdArgs.push_back("-g");
348
25
    CmdArgs.push_back("--dont-merge-basicblocks");
349
25
    CmdArgs.push_back("--return-at-end");
350
224
  } else if (Arg *A = Args.getLastArg(options::OPT_O_Group)) {
351
20
    // Map the -O we received to -O{0,1,2,3}.
352
20
    //
353
20
    // TODO: Perhaps we should map host -O2 to ptxas -O3. -O3 is ptxas's
354
20
    // default, so it may correspond more closely to the spirit of clang -O2.
355
20
356
20
    // -O3 seems like the least-bad option when -Osomething is specified to
357
20
    // clang but it isn't handled below.
358
20
    StringRef OOpt = "3";
359
20
    if (A->getOption().matches(options::OPT_O4) ||
360
20
        
A->getOption().matches(options::OPT_Ofast)19
)
361
2
      OOpt = "3";
362
18
    else if (A->getOption().matches(options::OPT_O0))
363
3
      OOpt = "0";
364
15
    else if (A->getOption().matches(options::OPT_O)) {
365
15
      // -Os, -Oz, and -O(anything else) map to -O2, for lack of better options.
366
15
      OOpt = llvm::StringSwitch<const char *>(A->getValue())
367
15
                 .Case("1", "1")
368
15
                 .Case("2", "2")
369
15
                 .Case("3", "3")
370
15
                 .Case("s", "2")
371
15
                 .Case("z", "2")
372
15
                 .Default("2");
373
15
    }
374
20
    CmdArgs.push_back(Args.MakeArgString(llvm::Twine("-O") + OOpt));
375
204
  } else {
376
204
    // If no -O was passed, pass -O0 to ptxas -- no opt flag should correspond
377
204
    // to no optimizations, but ptxas's default is -O3.
378
204
    CmdArgs.push_back("-O0");
379
204
  }
380
249
  if (DIKind == DebugDirectivesOnly)
381
8
    CmdArgs.push_back("-lineinfo");
382
249
383
249
  // Pass -v to ptxas if it was passed to the driver.
384
249
  if (Args.hasArg(options::OPT_v))
385
35
    CmdArgs.push_back("-v");
386
249
387
249
  CmdArgs.push_back("--gpu-name");
388
249
  CmdArgs.push_back(Args.MakeArgString(CudaArchToString(gpu_arch)));
389
249
  CmdArgs.push_back("--output-file");
390
249
  CmdArgs.push_back(Args.MakeArgString(TC.getInputFilename(Output)));
391
249
  for (const auto& II : Inputs)
392
249
    CmdArgs.push_back(Args.MakeArgString(II.getFilename()));
393
249
394
249
  for (const auto& A : Args.getAllArgValues(options::OPT_Xcuda_ptxas))
395
2
    CmdArgs.push_back(Args.MakeArgString(A));
396
249
397
249
  bool Relocatable = false;
398
249
  if (JA.isOffloading(Action::OFK_OpenMP))
399
43
    // In OpenMP we need to generate relocatable code.
400
43
    Relocatable = Args.hasFlag(options::OPT_fopenmp_relocatable_target,
401
43
                               options::OPT_fnoopenmp_relocatable_target,
402
43
                               /*Default=*/true);
403
206
  else if (JA.isOffloading(Action::OFK_Cuda))
404
206
    Relocatable = Args.hasFlag(options::OPT_fgpu_rdc,
405
206
                               options::OPT_fno_gpu_rdc, /*Default=*/false);
406
249
407
249
  if (Relocatable)
408
49
    CmdArgs.push_back("-c");
409
249
410
249
  const char *Exec;
411
249
  if (Arg *A = Args.getLastArg(options::OPT_ptxas_path_EQ))
412
1
    Exec = A->getValue();
413
248
  else
414
248
    Exec = Args.MakeArgString(TC.GetProgramPath("ptxas"));
415
249
  C.addCommand(std::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
416
249
}
417
418
183
static bool shouldIncludePTX(const ArgList &Args, const char *gpu_arch) {
419
183
  bool includePTX = true;
420
1.45k
  for (Arg *A : Args) {
421
1.45k
    if (!(A->getOption().matches(options::OPT_cuda_include_ptx_EQ) ||
422
1.45k
          
A->getOption().matches(options::OPT_no_cuda_include_ptx_EQ)1.44k
))
423
1.42k
      continue;
424
28
    A->claim();
425
28
    const StringRef ArchStr = A->getValue();
426
28
    if (ArchStr == "all" || 
ArchStr == gpu_arch12
) {
427
22
      includePTX = A->getOption().matches(options::OPT_cuda_include_ptx_EQ);
428
22
      continue;
429
22
    }
430
28
  }
431
183
  return includePTX;
432
183
}
433
434
// All inputs to this linker must be from CudaDeviceActions, as we need to look
435
// at the Inputs' Actions in order to figure out which GPU architecture they
436
// correspond to.
437
void NVPTX::Linker::ConstructJob(Compilation &C, const JobAction &JA,
438
                                 const InputInfo &Output,
439
                                 const InputInfoList &Inputs,
440
                                 const ArgList &Args,
441
167
                                 const char *LinkingOutput) const {
442
167
  const auto &TC =
443
167
      static_cast<const toolchains::CudaToolChain &>(getToolChain());
444
167
  assert(TC.getTriple().isNVPTX() && "Wrong platform");
445
167
446
167
  ArgStringList CmdArgs;
447
167
  if (TC.CudaInstallation.version() <= CudaVersion::CUDA_100)
448
166
    CmdArgs.push_back("--cuda");
449
167
  CmdArgs.push_back(TC.getTriple().isArch64Bit() ? 
"-64"142
:
"-32"25
);
450
167
  CmdArgs.push_back(Args.MakeArgString("--create"));
451
167
  CmdArgs.push_back(Args.MakeArgString(Output.getFilename()));
452
167
  if (mustEmitDebugInfo(Args) == EmitSameDebugInfoAsHost)
453
14
    CmdArgs.push_back("-g");
454
167
455
366
  for (const auto& II : Inputs) {
456
366
    auto *A = II.getAction();
457
366
    assert(A->getInputs().size() == 1 &&
458
366
           "Device offload action is expected to have a single input");
459
366
    const char *gpu_arch_str = A->getOffloadingArch();
460
366
    assert(gpu_arch_str &&
461
366
           "Device action expected to have associated a GPU architecture!");
462
366
    CudaArch gpu_arch = StringToCudaArch(gpu_arch_str);
463
366
464
366
    if (II.getType() == types::TY_PP_Asm &&
465
366
        
!shouldIncludePTX(Args, gpu_arch_str)183
)
466
8
      continue;
467
358
    // We need to pass an Arch of the form "sm_XX" for cubin files and
468
358
    // "compute_XX" for ptx.
469
358
    const char *Arch =
470
358
        (II.getType() == types::TY_PP_Asm)
471
358
            ? 
CudaVirtualArchToString(VirtualArchForCudaArch(gpu_arch))175
472
358
            : 
gpu_arch_str183
;
473
358
    CmdArgs.push_back(Args.MakeArgString(llvm::Twine("--image=profile=") +
474
358
                                         Arch + ",file=" + II.getFilename()));
475
358
  }
476
167
477
167
  for (const auto& A : Args.getAllArgValues(options::OPT_Xcuda_fatbinary))
478
2
    CmdArgs.push_back(Args.MakeArgString(A));
479
167
480
167
  const char *Exec = Args.MakeArgString(TC.GetProgramPath("fatbinary"));
481
167
  C.addCommand(std::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
482
167
}
483
484
void NVPTX::OpenMPLinker::ConstructJob(Compilation &C, const JobAction &JA,
485
                                       const InputInfo &Output,
486
                                       const InputInfoList &Inputs,
487
                                       const ArgList &Args,
488
44
                                       const char *LinkingOutput) const {
489
44
  const auto &TC =
490
44
      static_cast<const toolchains::CudaToolChain &>(getToolChain());
491
44
  assert(TC.getTriple().isNVPTX() && "Wrong platform");
492
44
493
44
  ArgStringList CmdArgs;
494
44
495
44
  // OpenMP uses nvlink to link cubin files. The result will be embedded in the
496
44
  // host binary by the host linker.
497
44
  assert(!JA.isHostOffloading(Action::OFK_OpenMP) &&
498
44
         "CUDA toolchain not expected for an OpenMP host device.");
499
44
500
44
  if (Output.isFilename()) {
501
44
    CmdArgs.push_back("-o");
502
44
    CmdArgs.push_back(Output.getFilename());
503
44
  } else
504
44
    assert(Output.isNothing() && "Invalid output.");
505
44
  if (mustEmitDebugInfo(Args) == EmitSameDebugInfoAsHost)
506
10
    CmdArgs.push_back("-g");
507
44
508
44
  if (Args.hasArg(options::OPT_v))
509
0
    CmdArgs.push_back("-v");
510
44
511
44
  StringRef GPUArch =
512
44
      Args.getLastArgValue(options::OPT_march_EQ);
513
44
  assert(!GPUArch.empty() && "At least one GPU Arch required for ptxas.");
514
44
515
44
  CmdArgs.push_back("-arch");
516
44
  CmdArgs.push_back(Args.MakeArgString(GPUArch));
517
44
518
44
  // Assume that the directory specified with --libomptarget_nvptx_path
519
44
  // contains the static library libomptarget-nvptx.a.
520
44
  if (const Arg *A = Args.getLastArg(options::OPT_libomptarget_nvptx_path_EQ))
521
2
    CmdArgs.push_back(Args.MakeArgString(Twine("-L") + A->getValue()));
522
44
523
44
  // Add paths specified in LIBRARY_PATH environment variable as -L options.
524
44
  addDirectoryList(Args, CmdArgs, "-L", "LIBRARY_PATH");
525
44
526
44
  // Add paths for the default clang library path.
527
44
  SmallString<256> DefaultLibPath =
528
44
      llvm::sys::path::parent_path(TC.getDriver().Dir);
529
44
  llvm::sys::path::append(DefaultLibPath, "lib" CLANG_LIBDIR_SUFFIX);
530
44
  CmdArgs.push_back(Args.MakeArgString(Twine("-L") + DefaultLibPath));
531
44
532
44
  // Add linking against library implementing OpenMP calls on NVPTX target.
533
44
  CmdArgs.push_back("-lomptarget-nvptx");
534
44
535
46
  for (const auto &II : Inputs) {
536
46
    if (II.getType() == types::TY_LLVM_IR ||
537
46
        II.getType() == types::TY_LTO_IR ||
538
46
        II.getType() == types::TY_LTO_BC ||
539
46
        II.getType() == types::TY_LLVM_BC) {
540
0
      C.getDriver().Diag(diag::err_drv_no_linker_llvm_support)
541
0
          << getToolChain().getTripleString();
542
0
      continue;
543
0
    }
544
46
545
46
    // Currently, we only pass the input files to the linker, we do not pass
546
46
    // any libraries that may be valid only for the host.
547
46
    if (!II.isFilename())
548
0
      continue;
549
46
550
46
    const char *CubinF = C.addTempFile(
551
46
        C.getArgs().MakeArgString(getToolChain().getInputFilename(II)));
552
46
553
46
    CmdArgs.push_back(CubinF);
554
46
  }
555
44
556
44
  const char *Exec =
557
44
      Args.MakeArgString(getToolChain().GetProgramPath("nvlink"));
558
44
  C.addCommand(std::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
559
44
}
560
561
/// CUDA toolchain.  Our assembler is ptxas, and our "linker" is fatbinary,
562
/// which isn't properly a linker but nonetheless performs the step of stitching
563
/// together object files from the assembler into a single blob.
564
565
CudaToolChain::CudaToolChain(const Driver &D, const llvm::Triple &Triple,
566
                             const ToolChain &HostTC, const ArgList &Args,
567
                             const Action::OffloadKind OK)
568
    : ToolChain(D, Triple, Args), HostTC(HostTC),
569
329
      CudaInstallation(D, HostTC.getTriple(), Args), OK(OK) {
570
329
  if (CudaInstallation.isValid())
571
36
    getProgramPaths().push_back(std::string(CudaInstallation.getBinPath()));
572
329
  // Lookup binaries into the driver directory, this is used to
573
329
  // discover the clang-offload-bundler executable.
574
329
  getProgramPaths().push_back(getDriver().Dir);
575
329
}
576
577
306
std::string CudaToolChain::getInputFilename(const InputInfo &Input) const {
578
306
  // Only object files are changed, for example assembly files keep their .s
579
306
  // extensions. CUDA also continues to use .o as they don't use nvlink but
580
306
  // fatbinary.
581
306
  if (!(OK == Action::OFK_OpenMP && 
Input.getType() == types::TY_Object100
))
582
208
    return ToolChain::getInputFilename(Input);
583
98
584
98
  // Replace extension for object files with cubin because nvlink relies on
585
98
  // these particular file names.
586
98
  SmallString<256> Filename(ToolChain::getInputFilename(Input));
587
98
  llvm::sys::path::replace_extension(Filename, "cubin");
588
98
  return std::string(Filename.str());
589
98
}
590
591
void CudaToolChain::addClangTargetOptions(
592
    const llvm::opt::ArgList &DriverArgs,
593
    llvm::opt::ArgStringList &CC1Args,
594
325
    Action::OffloadKind DeviceOffloadingKind) const {
595
325
  HostTC.addClangTargetOptions(DriverArgs, CC1Args, DeviceOffloadingKind);
596
325
597
325
  StringRef GpuArch = DriverArgs.getLastArgValue(options::OPT_march_EQ);
598
325
  assert(!GpuArch.empty() && "Must have an explicit GPU arch.");
599
325
  assert((DeviceOffloadingKind == Action::OFK_OpenMP ||
600
325
          DeviceOffloadingKind == Action::OFK_Cuda) &&
601
325
         "Only OpenMP or CUDA offloading kinds are supported for NVIDIA GPUs.");
602
325
603
325
  if (DeviceOffloadingKind == Action::OFK_Cuda) {
604
264
    CC1Args.push_back("-fcuda-is-device");
605
264
606
264
    if (DriverArgs.hasFlag(options::OPT_fcuda_approx_transcendentals,
607
264
                           options::OPT_fno_cuda_approx_transcendentals, false))
608
1
      CC1Args.push_back("-fcuda-approx-transcendentals");
609
264
610
264
    if (DriverArgs.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc,
611
264
                           false))
612
7
      CC1Args.push_back("-fgpu-rdc");
613
264
  }
614
325
615
325
  if (DriverArgs.hasArg(options::OPT_nogpulib))
616
54
    return;
617
271
618
271
  std::string LibDeviceFile = CudaInstallation.getLibDeviceFile(GpuArch);
619
271
620
271
  if (LibDeviceFile.empty()) {
621
237
    if (DeviceOffloadingKind == Action::OFK_OpenMP &&
622
237
        
DriverArgs.hasArg(options::OPT_S)49
)
623
3
      return;
624
234
625
234
    getDriver().Diag(diag::err_drv_no_cuda_libdevice) << GpuArch;
626
234
    return;
627
234
  }
628
34
629
34
  CC1Args.push_back("-mlink-builtin-bitcode");
630
34
  CC1Args.push_back(DriverArgs.MakeArgString(LibDeviceFile));
631
34
632
34
  // New CUDA versions often introduce new instructions that are only supported
633
34
  // by new PTX version, so we need to raise PTX level to enable them in NVPTX
634
34
  // back-end.
635
34
  const char *PtxFeature = nullptr;
636
34
  switch(CudaInstallation.version()) {
637
1
    case CudaVersion::CUDA_101:
638
1
      PtxFeature = "+ptx64";
639
1
      break;
640
0
    case CudaVersion::CUDA_100:
641
0
      PtxFeature = "+ptx63";
642
0
      break;
643
0
    case CudaVersion::CUDA_92:
644
0
      PtxFeature = "+ptx61";
645
0
      break;
646
0
    case CudaVersion::CUDA_91:
647
0
      PtxFeature = "+ptx61";
648
0
      break;
649
0
    case CudaVersion::CUDA_90:
650
0
      PtxFeature = "+ptx60";
651
0
      break;
652
33
    default:
653
33
      PtxFeature = "+ptx42";
654
34
  }
655
34
  CC1Args.append({"-target-feature", PtxFeature});
656
34
  if (DriverArgs.hasFlag(options::OPT_fcuda_short_ptr,
657
34
                         options::OPT_fno_cuda_short_ptr, false))
658
0
    CC1Args.append({"-mllvm", "--nvptx-short-ptr"});
659
34
660
34
  if (CudaInstallation.version() >= CudaVersion::UNKNOWN)
661
34
    CC1Args.push_back(DriverArgs.MakeArgString(
662
34
        Twine("-target-sdk-version=") +
663
34
        CudaVersionToString(CudaInstallation.version())));
664
34
665
34
  if (DeviceOffloadingKind == Action::OFK_OpenMP) {
666
9
    SmallVector<StringRef, 8> LibraryPaths;
667
9
    if (const Arg *A = DriverArgs.getLastArg(options::OPT_libomptarget_nvptx_path_EQ))
668
3
      LibraryPaths.push_back(A->getValue());
669
9
670
9
    // Add user defined library paths from LIBRARY_PATH.
671
9
    llvm::Optional<std::string> LibPath =
672
9
        llvm::sys::Process::GetEnv("LIBRARY_PATH");
673
9
    if (LibPath) {
674
3
      SmallVector<StringRef, 8> Frags;
675
3
      const char EnvPathSeparatorStr[] = {llvm::sys::EnvPathSeparator, '\0'};
676
3
      llvm::SplitString(*LibPath, Frags, EnvPathSeparatorStr);
677
3
      for (StringRef Path : Frags)
678
3
        LibraryPaths.emplace_back(Path.trim());
679
3
    }
680
9
681
9
    // Add path to lib / lib64 folder.
682
9
    SmallString<256> DefaultLibPath =
683
9
        llvm::sys::path::parent_path(getDriver().Dir);
684
9
    llvm::sys::path::append(DefaultLibPath, Twine("lib") + CLANG_LIBDIR_SUFFIX);
685
9
    LibraryPaths.emplace_back(DefaultLibPath.c_str());
686
9
687
9
    std::string LibOmpTargetName =
688
9
      "libomptarget-nvptx-" + GpuArch.str() + ".bc";
689
9
    bool FoundBCLibrary = false;
690
9
    for (StringRef LibraryPath : LibraryPaths) {
691
9
      SmallString<128> LibOmpTargetFile(LibraryPath);
692
9
      llvm::sys::path::append(LibOmpTargetFile, LibOmpTargetName);
693
9
      if (llvm::sys::fs::exists(LibOmpTargetFile)) {
694
6
        CC1Args.push_back("-mlink-builtin-bitcode");
695
6
        CC1Args.push_back(DriverArgs.MakeArgString(LibOmpTargetFile));
696
6
        FoundBCLibrary = true;
697
6
        break;
698
6
      }
699
9
    }
700
9
    if (!FoundBCLibrary)
701
3
      getDriver().Diag(diag::warn_drv_omp_offload_target_missingbcruntime)
702
3
          << LibOmpTargetName;
703
9
  }
704
34
}
705
706
llvm::DenormalMode CudaToolChain::getDefaultDenormalModeForType(
707
    const llvm::opt::ArgList &DriverArgs, Action::OffloadKind DeviceOffloadKind,
708
650
    const llvm::fltSemantics *FPType) const {
709
650
  if (DeviceOffloadKind == Action::OFK_Cuda) {
710
528
    if (FPType && 
FPType == &llvm::APFloat::IEEEsingle()264
&&
711
528
        DriverArgs.hasFlag(options::OPT_fcuda_flush_denormals_to_zero,
712
264
                           options::OPT_fno_cuda_flush_denormals_to_zero,
713
264
                           false))
714
2
      return llvm::DenormalMode::getPreserveSign();
715
648
  }
716
648
717
648
  assert(DeviceOffloadKind != Action::OFK_Host);
718
648
  return llvm::DenormalMode::getIEEE();
719
648
}
720
721
48
bool CudaToolChain::supportsDebugInfoOption(const llvm::opt::Arg *A) const {
722
48
  const Option &O = A->getOption();
723
48
  return (O.matches(options::OPT_gN_Group) &&
724
48
          
!O.matches(options::OPT_gmodules)26
) ||
725
48
         
O.matches(options::OPT_g_Flag)22
||
726
48
         
O.matches(options::OPT_ggdbN_Group)4
||
O.matches(options::OPT_ggdb)4
||
727
48
         
O.matches(options::OPT_gdwarf)4
||
O.matches(options::OPT_gdwarf_2)4
||
728
48
         
O.matches(options::OPT_gdwarf_3)4
||
O.matches(options::OPT_gdwarf_4)4
||
729
48
         
O.matches(options::OPT_gdwarf_5)4
||
730
48
         
O.matches(options::OPT_gcolumn_info)4
;
731
48
}
732
733
void CudaToolChain::adjustDebugInfoKind(
734
325
    codegenoptions::DebugInfoKind &DebugInfoKind, const ArgList &Args) const {
735
325
  switch (mustEmitDebugInfo(Args)) {
736
292
  case DisableDebugInfo:
737
292
    DebugInfoKind = codegenoptions::NoDebugInfo;
738
292
    break;
739
8
  case DebugDirectivesOnly:
740
8
    DebugInfoKind = codegenoptions::DebugDirectivesOnly;
741
8
    break;
742
25
  case EmitSameDebugInfoAsHost:
743
25
    // Use same debug info level as the host.
744
25
    break;
745
325
  }
746
325
}
747
748
void CudaToolChain::AddCudaIncludeArgs(const ArgList &DriverArgs,
749
254
                                       ArgStringList &CC1Args) const {
750
254
  // Check our CUDA version if we're going to include the CUDA headers.
751
254
  if (!DriverArgs.hasArg(options::OPT_nocudainc) &&
752
254
      
!DriverArgs.hasArg(options::OPT_no_cuda_version_check)201
) {
753
200
    StringRef Arch = DriverArgs.getLastArgValue(options::OPT_march_EQ);
754
200
    assert(!Arch.empty() && "Must have an explicit GPU arch.");
755
200
    CudaInstallation.CheckCudaVersionSupportsArch(StringToCudaArch(Arch));
756
200
  }
757
254
  CudaInstallation.AddCudaIncludeArgs(DriverArgs, CC1Args);
758
254
}
759
760
llvm::opt::DerivedArgList *
761
CudaToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
762
                             StringRef BoundArch,
763
661
                             Action::OffloadKind DeviceOffloadKind) const {
764
661
  DerivedArgList *DAL =
765
661
      HostTC.TranslateArgs(Args, BoundArch, DeviceOffloadKind);
766
661
  if (!DAL)
767
530
    DAL = new DerivedArgList(Args.getBaseArgs());
768
661
769
661
  const OptTable &Opts = getDriver().getOpts();
770
661
771
661
  // For OpenMP device offloading, append derived arguments. Make sure
772
661
  // flags are not duplicated.
773
661
  // Also append the compute capability.
774
661
  if (DeviceOffloadKind == Action::OFK_OpenMP) {
775
405
    for (Arg *A : Args) {
776
405
      bool IsDuplicate = false;
777
2.09k
      for (Arg *DALArg : *DAL) {
778
2.09k
        if (A == DALArg) {
779
405
          IsDuplicate = true;
780
405
          break;
781
405
        }
782
2.09k
      }
783
405
      if (!IsDuplicate)
784
0
        DAL->append(A);
785
405
    }
786
47
787
47
    StringRef Arch = DAL->getLastArgValue(options::OPT_march_EQ);
788
47
    if (Arch.empty())
789
15
      DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ),
790
15
                        CLANG_OPENMP_NVPTX_DEFAULT_ARCH);
791
47
792
47
    return DAL;
793
47
  }
794
614
795
4.43k
  
for (Arg *A : Args)614
{
796
4.43k
    if (A->getOption().matches(options::OPT_Xarch__)) {
797
0
      // Skip this argument unless the architecture matches BoundArch
798
0
      if (BoundArch.empty() || A->getValue(0) != BoundArch)
799
0
        continue;
800
0
801
0
      unsigned Index = Args.getBaseArgs().MakeIndex(A->getValue(1));
802
0
      unsigned Prev = Index;
803
0
      std::unique_ptr<Arg> XarchArg(Opts.ParseOneArg(Args, Index));
804
0
805
0
      // If the argument parsing failed or more than one argument was
806
0
      // consumed, the -Xarch_ argument's parameter tried to consume
807
0
      // extra arguments. Emit an error and ignore.
808
0
      //
809
0
      // We also want to disallow any options which would alter the
810
0
      // driver behavior; that isn't going to work in our model. We
811
0
      // use isDriverOption() as an approximation, although things
812
0
      // like -O4 are going to slip through.
813
0
      if (!XarchArg || Index > Prev + 1) {
814
0
        getDriver().Diag(diag::err_drv_invalid_Xarch_argument_with_args)
815
0
            << A->getAsString(Args);
816
0
        continue;
817
0
      } else if (XarchArg->getOption().hasFlag(options::DriverOption)) {
818
0
        getDriver().Diag(diag::err_drv_invalid_Xarch_argument_isdriver)
819
0
            << A->getAsString(Args);
820
0
        continue;
821
0
      }
822
0
      XarchArg->setBaseArg(A);
823
0
      A = XarchArg.release();
824
0
      DAL->AddSynthesizedArg(A);
825
0
    }
826
4.43k
    DAL->append(A);
827
4.43k
  }
828
614
829
614
  if (!BoundArch.empty()) {
830
266
    DAL->eraseArg(options::OPT_march_EQ);
831
266
    DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ), BoundArch);
832
266
  }
833
614
  return DAL;
834
614
}
835
836
233
Tool *CudaToolChain::buildAssembler() const {
837
233
  return new tools::NVPTX::Assembler(*this);
838
233
}
839
840
213
Tool *CudaToolChain::buildLinker() const {
841
213
  if (OK == Action::OFK_OpenMP)
842
44
    return new tools::NVPTX::OpenMPLinker(*this);
843
169
  return new tools::NVPTX::Linker(*this);
844
169
}
845
846
325
void CudaToolChain::addClangWarningOptions(ArgStringList &CC1Args) const {
847
325
  HostTC.addClangWarningOptions(CC1Args);
848
325
}
849
850
ToolChain::CXXStdlibType
851
0
CudaToolChain::GetCXXStdlibType(const ArgList &Args) const {
852
0
  return HostTC.GetCXXStdlibType(Args);
853
0
}
854
855
void CudaToolChain::AddClangSystemIncludeArgs(const ArgList &DriverArgs,
856
584
                                              ArgStringList &CC1Args) const {
857
584
  HostTC.AddClangSystemIncludeArgs(DriverArgs, CC1Args);
858
584
}
859
860
void CudaToolChain::AddClangCXXStdlibIncludeArgs(const ArgList &Args,
861
459
                                                 ArgStringList &CC1Args) const {
862
459
  HostTC.AddClangCXXStdlibIncludeArgs(Args, CC1Args);
863
459
}
864
865
void CudaToolChain::AddIAMCUIncludeArgs(const ArgList &Args,
866
0
                                        ArgStringList &CC1Args) const {
867
0
  HostTC.AddIAMCUIncludeArgs(Args, CC1Args);
868
0
}
869
870
266
SanitizerMask CudaToolChain::getSupportedSanitizers() const {
871
266
  // The CudaToolChain only supports sanitizers in the sense that it allows
872
266
  // sanitizer arguments on the command line if they are supported by the host
873
266
  // toolchain. The CudaToolChain will actually ignore any command line
874
266
  // arguments for any of these "supported" sanitizers. That means that no
875
266
  // sanitization of device code is actually supported at this time.
876
266
  //
877
266
  // This behavior is necessary because the host and device toolchains
878
266
  // invocations often share the command line, so the device toolchain must
879
266
  // tolerate flags meant only for the host toolchain.
880
266
  return HostTC.getSupportedSanitizers();
881
266
}
882
883
VersionTuple CudaToolChain::computeMSVCVersion(const Driver *D,
884
325
                                               const ArgList &Args) const {
885
325
  return HostTC.computeMSVCVersion(D, Args);
886
325
}