Coverage Report

Created: 2021-01-19 06:58

/Users/buildslave/jenkins/workspace/coverage/llvm-project/clang/lib/Driver/ToolChains/AMDGPU.cpp
Line
Count
Source (jump to first uncovered line)
1
//===--- AMDGPU.cpp - AMDGPU ToolChain Implementations ----------*- C++ -*-===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
9
#include "AMDGPU.h"
10
#include "CommonArgs.h"
11
#include "InputInfo.h"
12
#include "clang/Basic/TargetID.h"
13
#include "clang/Driver/Compilation.h"
14
#include "clang/Driver/DriverDiagnostic.h"
15
#include "llvm/Option/ArgList.h"
16
#include "llvm/Support/Path.h"
17
#include "llvm/Support/VirtualFileSystem.h"
18
19
using namespace clang::driver;
20
using namespace clang::driver::tools;
21
using namespace clang::driver::toolchains;
22
using namespace clang;
23
using namespace llvm::opt;
24
25
281
void RocmInstallationDetector::scanLibDevicePath(llvm::StringRef Path) {
26
281
  assert(!Path.empty());
27
28
281
  const StringRef Suffix(".bc");
29
281
  const StringRef Suffix2(".amdgcn.bc");
30
31
281
  std::error_code EC;
32
281
  for (llvm::vfs::directory_iterator LI = D.getVFS().dir_begin(Path, EC), LE;
33
80.7k
       !EC && LI != LE; 
LI = LI.increment(EC)80.4k
) {
34
80.4k
    StringRef FilePath = LI->path();
35
80.4k
    StringRef FileName = llvm::sys::path::filename(FilePath);
36
80.4k
    if (!FileName.endswith(Suffix))
37
79.4k
      continue;
38
39
1.04k
    StringRef BaseName;
40
1.04k
    if (FileName.endswith(Suffix2))
41
0
      BaseName = FileName.drop_back(Suffix2.size());
42
1.04k
    else if (FileName.endswith(Suffix))
43
1.04k
      BaseName = FileName.drop_back(Suffix.size());
44
45
1.04k
    if (BaseName == "ocml") {
46
52
      OCML = FilePath;
47
997
    } else if (BaseName == "ockl") {
48
52
      OCKL = FilePath;
49
945
    } else if (BaseName == "opencl") {
50
52
      OpenCL = FilePath;
51
893
    } else if (BaseName == "hip") {
52
52
      HIP = FilePath;
53
841
    } else if (BaseName == "oclc_finite_only_off") {
54
52
      FiniteOnly.Off = FilePath;
55
789
    } else if (BaseName == "oclc_finite_only_on") {
56
52
      FiniteOnly.On = FilePath;
57
737
    } else if (BaseName == "oclc_daz_opt_on") {
58
52
      DenormalsAreZero.On = FilePath;
59
685
    } else if (BaseName == "oclc_daz_opt_off") {
60
52
      DenormalsAreZero.Off = FilePath;
61
633
    } else if (BaseName == "oclc_correctly_rounded_sqrt_on") {
62
52
      CorrectlyRoundedSqrt.On = FilePath;
63
581
    } else if (BaseName == "oclc_correctly_rounded_sqrt_off") {
64
52
      CorrectlyRoundedSqrt.Off = FilePath;
65
529
    } else if (BaseName == "oclc_unsafe_math_on") {
66
52
      UnsafeMath.On = FilePath;
67
477
    } else if (BaseName == "oclc_unsafe_math_off") {
68
52
      UnsafeMath.Off = FilePath;
69
425
    } else if (BaseName == "oclc_wavefrontsize64_on") {
70
52
      WavefrontSize64.On = FilePath;
71
373
    } else if (BaseName == "oclc_wavefrontsize64_off") {
72
52
      WavefrontSize64.Off = FilePath;
73
321
    } else {
74
      // Process all bitcode filenames that look like
75
      // ocl_isa_version_XXX.amdgcn.bc
76
321
      const StringRef DeviceLibPrefix = "oclc_isa_version_";
77
321
      if (!BaseName.startswith(DeviceLibPrefix))
78
9
        continue;
79
80
312
      StringRef IsaVersionNumber =
81
312
        BaseName.drop_front(DeviceLibPrefix.size());
82
83
312
      llvm::Twine GfxName = Twine("gfx") + IsaVersionNumber;
84
312
      SmallString<8> Tmp;
85
312
      LibDeviceMap.insert(
86
312
        std::make_pair(GfxName.toStringRef(Tmp), FilePath.str()));
87
312
    }
88
1.04k
  }
89
281
}
90
91
// Parse and extract version numbers from `.hipVersion`. Return `true` if
92
// the parsing fails.
93
79
bool RocmInstallationDetector::parseHIPVersionFile(llvm::StringRef V) {
94
79
  SmallVector<StringRef, 4> VersionParts;
95
79
  V.split(VersionParts, '\n');
96
79
  unsigned Major = ~0U;
97
79
  unsigned Minor = ~0U;
98
553
  for (auto Part : VersionParts) {
99
553
    auto Splits = Part.rtrim().split('=');
100
553
    if (Splits.first == "HIP_VERSION_MAJOR") {
101
79
      if (Splits.second.getAsInteger(0, Major))
102
0
        return true;
103
474
    } else if (Splits.first == "HIP_VERSION_MINOR") {
104
79
      if (Splits.second.getAsInteger(0, Minor))
105
0
        return true;
106
395
    } else if (Splits.first == "HIP_VERSION_PATCH")
107
79
      VersionPatch = Splits.second.str();
108
553
  }
109
79
  if (Major == ~0U || Minor == ~0U)
110
0
    return true;
111
79
  VersionMajorMinor = llvm::VersionTuple(Major, Minor);
112
79
  DetectedVersion =
113
79
      (Twine(Major) + "." + Twine(Minor) + "." + VersionPatch).str();
114
79
  return false;
115
79
}
116
117
// For candidate specified by --rocm-path we do not do strict check.
118
SmallVector<RocmInstallationDetector::Candidate, 4>
119
47.9k
RocmInstallationDetector::getInstallationPathCandidates() {
120
47.9k
  SmallVector<Candidate, 4> Candidates;
121
47.9k
  if (!RocmPathArg.empty()) {
122
139
    Candidates.emplace_back(RocmPathArg.str());
123
139
    return Candidates;
124
139
  }
125
126
  // Try to find relative to the compiler binary.
127
47.7k
  const char *InstallDir = D.getInstalledDir();
128
129
  // Check both a normal Unix prefix position of the clang binary, as well as
130
  // the Windows-esque layout the ROCm packages use with the host architecture
131
  // subdirectory of bin.
132
133
  // Strip off directory (usually bin)
134
47.7k
  StringRef ParentDir = llvm::sys::path::parent_path(InstallDir);
135
47.7k
  StringRef ParentName = llvm::sys::path::filename(ParentDir);
136
137
  // Some builds use bin/{host arch}, so go up again.
138
47.7k
  if (ParentName == "bin") {
139
6
    ParentDir = llvm::sys::path::parent_path(ParentDir);
140
6
    ParentName = llvm::sys::path::filename(ParentDir);
141
6
  }
142
143
  // Some versions of the rocm llvm package install to /opt/rocm/llvm/bin
144
47.7k
  if (ParentName == "llvm")
145
0
    ParentDir = llvm::sys::path::parent_path(ParentDir);
146
147
47.7k
  Candidates.emplace_back(ParentDir.str(), /*StrictChecking=*/true);
148
149
  // Device library may be installed in clang resource directory.
150
47.7k
  Candidates.emplace_back(D.ResourceDir, /*StrictChecking=*/true);
151
152
47.7k
  Candidates.emplace_back(D.SysRoot + "/opt/rocm", /*StrictChecking=*/true);
153
47.7k
  return Candidates;
154
47.7k
}
155
156
RocmInstallationDetector::RocmInstallationDetector(
157
    const Driver &D, const llvm::Triple &HostTriple,
158
    const llvm::opt::ArgList &Args, bool DetectHIPRuntime, bool DetectDeviceLib)
159
47.6k
    : D(D) {
160
47.6k
  RocmPathArg = Args.getLastArgValue(clang::driver::options::OPT_rocm_path_EQ);
161
47.6k
  RocmDeviceLibPathArg =
162
47.6k
      Args.getAllArgValues(clang::driver::options::OPT_rocm_device_lib_path_EQ);
163
47.6k
  if (auto *A = Args.getLastArg(clang::driver::options::OPT_hip_version_EQ)) {
164
4
    HIPVersionArg = A->getValue();
165
4
    unsigned Major = 0;
166
4
    unsigned Minor = 0;
167
4
    SmallVector<StringRef, 3> Parts;
168
4
    HIPVersionArg.split(Parts, '.');
169
4
    if (Parts.size())
170
4
      Parts[0].getAsInteger(0, Major);
171
4
    if (Parts.size() > 1)
172
4
      Parts[1].getAsInteger(0, Minor);
173
4
    if (Parts.size() > 2)
174
2
      VersionPatch = Parts[2].str();
175
4
    if (VersionPatch.empty())
176
2
      VersionPatch = "0";
177
4
    if (Major == 0 || 
Minor == 03
)
178
1
      D.Diag(diag::err_drv_invalid_value)
179
1
          << A->getAsString(Args) << HIPVersionArg;
180
181
4
    VersionMajorMinor = llvm::VersionTuple(Major, Minor);
182
4
    DetectedVersion =
183
4
        (Twine(Major) + "." + Twine(Minor) + "." + VersionPatch).str();
184
47.6k
  } else {
185
47.6k
    VersionPatch = DefaultVersionPatch;
186
47.6k
    VersionMajorMinor =
187
47.6k
        llvm::VersionTuple(DefaultVersionMajor, DefaultVersionMinor);
188
47.6k
    DetectedVersion = (Twine(DefaultVersionMajor) + "." +
189
47.6k
                       Twine(DefaultVersionMinor) + "." + VersionPatch)
190
47.6k
                          .str();
191
47.6k
  }
192
193
47.6k
  if (DetectHIPRuntime)
194
47.6k
    detectHIPRuntime();
195
47.6k
  if (DetectDeviceLib)
196
0
    detectDeviceLibrary();
197
47.6k
}
198
199
285
void RocmInstallationDetector::detectDeviceLibrary() {
200
285
  assert(LibDevicePath.empty());
201
202
285
  if (!RocmDeviceLibPathArg.empty())
203
11
    LibDevicePath = RocmDeviceLibPathArg[RocmDeviceLibPathArg.size() - 1];
204
274
  else if (const char *LibPathEnv = ::getenv("HIP_DEVICE_LIB_PATH"))
205
2
    LibDevicePath = LibPathEnv;
206
207
285
  auto &FS = D.getVFS();
208
285
  if (!LibDevicePath.empty()) {
209
    // Maintain compatability with HIP flag/envvar pointing directly at the
210
    // bitcode library directory. This points directly at the library path instead
211
    // of the rocm root installation.
212
13
    if (!FS.exists(LibDevicePath))
213
0
      return;
214
215
13
    scanLibDevicePath(LibDevicePath);
216
13
    HasDeviceLibrary = allGenericLibsValid() && 
!LibDeviceMap.empty()4
;
217
13
    return;
218
13
  }
219
220
  // The install path situation in old versions of ROCm is a real mess, and
221
  // use a different install layout. Multiple copies of the device libraries
222
  // exist for each frontend project, and differ depending on which build
223
  // system produced the packages. Standalone OpenCL builds also have a
224
  // different directory structure from the ROCm OpenCL package.
225
272
  auto Candidates = getInstallationPathCandidates();
226
714
  for (const auto &Candidate : Candidates) {
227
714
    auto CandidatePath = Candidate.Path;
228
229
    // Check device library exists at the given path.
230
2.04k
    auto CheckDeviceLib = [&](StringRef Path) {
231
2.04k
      bool CheckLibDevice = (!NoBuiltinLibs || 
Candidate.StrictChecking0
);
232
2.04k
      if (CheckLibDevice && !FS.exists(Path))
233
1.77k
        return false;
234
235
268
      scanLibDevicePath(Path);
236
237
268
      if (!NoBuiltinLibs) {
238
        // Check that the required non-target libraries are all available.
239
268
        if (!allGenericLibsValid())
240
220
          return false;
241
242
        // Check that we have found at least one libdevice that we can link in
243
        // if -nobuiltinlib hasn't been specified.
244
48
        if (LibDeviceMap.empty())
245
0
          return false;
246
48
      }
247
48
      return true;
248
48
    };
249
250
    // The possible structures are:
251
    // - ${ROCM_ROOT}/amdgcn/bitcode/*
252
    // - ${ROCM_ROOT}/lib/*
253
    // - ${ROCM_ROOT}/lib/bitcode/*
254
    // so try to detect these layouts.
255
714
    static constexpr std::array<const char *, 2> SubDirsList[] = {
256
714
        {"amdgcn", "bitcode"},
257
714
        {"lib", ""},
258
714
        {"lib", "bitcode"},
259
714
    };
260
261
    // Make a path by appending sub-directories to InstallPath.
262
2.04k
    auto MakePath = [&](const llvm::ArrayRef<const char *> &SubDirs) {
263
2.04k
      auto Path = CandidatePath;
264
2.04k
      for (auto SubDir : SubDirs)
265
4.09k
        llvm::sys::path::append(Path, SubDir);
266
2.04k
      return Path;
267
2.04k
    };
268
269
2.04k
    for (auto SubDirs : SubDirsList) {
270
2.04k
      LibDevicePath = MakePath(SubDirs);
271
2.04k
      HasDeviceLibrary = CheckDeviceLib(LibDevicePath);
272
2.04k
      if (HasDeviceLibrary)
273
48
        return;
274
2.04k
    }
275
714
  }
276
272
}
277
278
47.6k
void RocmInstallationDetector::detectHIPRuntime() {
279
47.6k
  auto Candidates = getInstallationPathCandidates();
280
47.6k
  auto &FS = D.getVFS();
281
282
142k
  for (const auto &Candidate : Candidates) {
283
142k
    InstallPath = Candidate.Path;
284
142k
    if (InstallPath.empty() || 
!FS.exists(InstallPath)115k
)
285
100k
      continue;
286
287
42.2k
    BinPath = InstallPath;
288
42.2k
    llvm::sys::path::append(BinPath, "bin");
289
42.2k
    IncludePath = InstallPath;
290
42.2k
    llvm::sys::path::append(IncludePath, "include");
291
42.2k
    LibPath = InstallPath;
292
42.2k
    llvm::sys::path::append(LibPath, "lib");
293
294
42.2k
    llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> VersionFile =
295
42.2k
        FS.getBufferForFile(BinPath + "/.hipVersion");
296
42.2k
    if (!VersionFile && 
Candidate.StrictChecking42.1k
)
297
42.1k
      continue;
298
299
82
    if (HIPVersionArg.empty() && 
VersionFile81
)
300
79
      if (parseHIPVersionFile((*VersionFile)->getBuffer()))
301
0
        continue;
302
303
82
    HasHIPRuntime = true;
304
82
    return;
305
82
  }
306
47.5k
  HasHIPRuntime = false;
307
47.5k
}
308
309
197
void RocmInstallationDetector::print(raw_ostream &OS) const {
310
197
  if (hasHIPRuntime())
311
13
    OS << "Found HIP installation: " << InstallPath << ", version "
312
13
       << DetectedVersion << '\n';
313
197
}
314
315
void RocmInstallationDetector::AddHIPIncludeArgs(const ArgList &DriverArgs,
316
323
                                                 ArgStringList &CC1Args) const {
317
323
  bool UsesRuntimeWrapper = VersionMajorMinor > llvm::VersionTuple(3, 5);
318
319
323
  if (!DriverArgs.hasArg(options::OPT_nobuiltininc)) {
320
    // HIP header includes standard library wrapper headers under clang
321
    // cuda_wrappers directory. Since these wrapper headers include_next
322
    // standard C++ headers, whereas libc++ headers include_next other clang
323
    // headers. The include paths have to follow this order:
324
    // - wrapper include path
325
    // - standard C++ include path
326
    // - other clang include path
327
    // Since standard C++ and other clang include paths are added in other
328
    // places after this function, here we only need to make sure wrapper
329
    // include path is added.
330
    //
331
    // ROCm 3.5 does not fully support the wrapper headers. Therefore it needs
332
    // a workaround.
333
321
    SmallString<128> P(D.ResourceDir);
334
321
    if (UsesRuntimeWrapper)
335
49
      llvm::sys::path::append(P, "include", "cuda_wrappers");
336
321
    CC1Args.push_back("-internal-isystem");
337
321
    CC1Args.push_back(DriverArgs.MakeArgString(P));
338
321
  }
339
340
323
  if (DriverArgs.hasArg(options::OPT_nogpuinc))
341
127
    return;
342
343
196
  if (!hasHIPRuntime()) {
344
147
    D.Diag(diag::err_drv_no_hip_runtime);
345
147
    return;
346
147
  }
347
348
49
  CC1Args.push_back("-internal-isystem");
349
49
  CC1Args.push_back(DriverArgs.MakeArgString(getIncludePath()));
350
49
  if (UsesRuntimeWrapper)
351
49
    CC1Args.append({"-include", "__clang_hip_runtime_wrapper.h"});
352
49
}
353
354
void amdgpu::Linker::ConstructJob(Compilation &C, const JobAction &JA,
355
                                  const InputInfo &Output,
356
                                  const InputInfoList &Inputs,
357
                                  const ArgList &Args,
358
77
                                  const char *LinkingOutput) const {
359
360
77
  std::string Linker = getToolChain().GetProgramPath(getShortName());
361
77
  ArgStringList CmdArgs;
362
77
  addLinkerCompressDebugSectionsOption(getToolChain(), Args, CmdArgs);
363
77
  AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs, JA);
364
77
  CmdArgs.push_back("-shared");
365
77
  CmdArgs.push_back("-o");
366
77
  CmdArgs.push_back(Output.getFilename());
367
77
  C.addCommand(std::make_unique<Command>(
368
77
      JA, *this, ResponseFileSupport::AtFileCurCP(), Args.MakeArgString(Linker),
369
77
      CmdArgs, Inputs, Output));
370
77
}
371
372
void amdgpu::getAMDGPUTargetFeatures(const Driver &D,
373
                                     const llvm::Triple &Triple,
374
                                     const llvm::opt::ArgList &Args,
375
663
                                     std::vector<StringRef> &Features) {
376
  // Add target ID features to -target-feature options. No diagnostics should
377
  // be emitted here since invalid target ID is diagnosed at other places.
378
663
  StringRef TargetID = Args.getLastArgValue(options::OPT_mcpu_EQ);
379
663
  if (!TargetID.empty()) {
380
625
    llvm::StringMap<bool> FeatureMap;
381
625
    auto OptionalGpuArch = parseTargetID(Triple, TargetID, &FeatureMap);
382
625
    if (OptionalGpuArch) {
383
611
      StringRef GpuArch = OptionalGpuArch.getValue();
384
      // Iterate through all possible target ID features for the given GPU.
385
      // If it is mapped to true, add +feature.
386
      // If it is mapped to false, add -feature.
387
      // If it is not in the map (default), do not add it
388
611
      for (auto &&Feature : getAllPossibleTargetIDFeatures(Triple, GpuArch)) {
389
611
        auto Pos = FeatureMap.find(Feature);
390
611
        if (Pos == FeatureMap.end())
391
518
          continue;
392
93
        Features.push_back(Args.MakeArgStringRef(
393
59
            (Twine(Pos->second ? "+" : 
"-"34
) + Feature).str()));
394
93
      }
395
611
    }
396
625
  }
397
398
663
  if (Args.hasFlag(options::OPT_mwavefrontsize64,
399
663
                   options::OPT_mno_wavefrontsize64, false))
400
13
    Features.push_back("+wavefrontsize64");
401
402
663
  handleTargetFeaturesGroup(
403
663
    Args, Features, options::OPT_m_amdgpu_Features_Group);
404
663
}
405
406
/// AMDGPU Toolchain
407
AMDGPUToolChain::AMDGPUToolChain(const Driver &D, const llvm::Triple &Triple,
408
                                 const ArgList &Args)
409
    : Generic_ELF(D, Triple, Args),
410
      OptionsDefault(
411
308
          {{options::OPT_O, "3"}, {options::OPT_cl_std_EQ, "CL1.2"}}) {
412
  // Check code object version options. Emit warnings for legacy options
413
  // and errors for the last invalid code object version options.
414
  // It is done here to avoid repeated warning or error messages for
415
  // each tool invocation.
416
308
  (void)getOrCheckAMDGPUCodeObjectVersion(D, Args, /*Diagnose=*/true);
417
308
}
418
419
77
Tool *AMDGPUToolChain::buildLinker() const {
420
77
  return new tools::amdgpu::Linker(*this);
421
77
}
422
423
DerivedArgList *
424
AMDGPUToolChain::TranslateArgs(const DerivedArgList &Args, StringRef BoundArch,
425
120
                               Action::OffloadKind DeviceOffloadKind) const {
426
427
120
  DerivedArgList *DAL =
428
120
      Generic_ELF::TranslateArgs(Args, BoundArch, DeviceOffloadKind);
429
430
120
  const OptTable &Opts = getDriver().getOpts();
431
432
120
  if (!DAL)
433
112
    DAL = new DerivedArgList(Args.getBaseArgs());
434
435
838
  for (Arg *A : Args) {
436
838
    if (!shouldSkipArgument(A))
437
838
      DAL->append(A);
438
838
  }
439
440
120
  checkTargetID(*DAL);
441
442
120
  if (!Args.getLastArgValue(options::OPT_x).equals("cl"))
443
88
    return DAL;
444
445
  // Phase 1 (.cl -> .bc)
446
32
  if (Args.hasArg(options::OPT_c) && 
Args.hasArg(options::OPT_emit_llvm)15
) {
447
15
    DAL->AddFlagArg(nullptr, Opts.getOption(getTriple().isArch64Bit()
448
15
                                                ? options::OPT_m64
449
0
                                                : options::OPT_m32));
450
451
    // Have to check OPT_O4, OPT_O0 & OPT_Ofast separately
452
    // as they defined that way in Options.td
453
15
    if (!Args.hasArg(options::OPT_O, options::OPT_O0, options::OPT_O4,
454
15
                     options::OPT_Ofast))
455
7
      DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_O),
456
7
                        getOptionDefault(options::OPT_O));
457
15
  }
458
459
32
  return DAL;
460
32
}
461
462
bool AMDGPUToolChain::getDefaultDenormsAreZeroForTarget(
463
367
    llvm::AMDGPU::GPUKind Kind) {
464
465
  // Assume nothing without a specific target.
466
367
  if (Kind == llvm::AMDGPU::GK_NONE)
467
33
    return false;
468
469
334
  const unsigned ArchAttr = llvm::AMDGPU::getArchAttrAMDGCN(Kind);
470
471
  // Default to enabling f32 denormals by default on subtargets where fma is
472
  // fast with denormals
473
334
  const bool BothDenormAndFMAFast =
474
334
      (ArchAttr & llvm::AMDGPU::FEATURE_FAST_FMA_F32) &&
475
218
      (ArchAttr & llvm::AMDGPU::FEATURE_FAST_DENORMAL_F32);
476
334
  return !BothDenormAndFMAFast;
477
334
}
478
479
llvm::DenormalMode AMDGPUToolChain::getDefaultDenormalModeForType(
480
    const llvm::opt::ArgList &DriverArgs, const JobAction &JA,
481
654
    const llvm::fltSemantics *FPType) const {
482
  // Denormals should always be enabled for f16 and f64.
483
654
  if (!FPType || 
FPType != &llvm::APFloat::IEEEsingle()327
)
484
327
    return llvm::DenormalMode::getIEEE();
485
486
327
  if (JA.getOffloadingDeviceKind() == Action::OFK_HIP ||
487
224
      
JA.getOffloadingDeviceKind() == Action::OFK_Cuda103
) {
488
224
    auto Arch = getProcessorFromTargetID(getTriple(), JA.getOffloadingArch());
489
224
    auto Kind = llvm::AMDGPU::parseArchAMDGCN(Arch);
490
224
    if (FPType && FPType == &llvm::APFloat::IEEEsingle() &&
491
224
        DriverArgs.hasFlag(options::OPT_fcuda_flush_denormals_to_zero,
492
224
                           options::OPT_fno_cuda_flush_denormals_to_zero,
493
224
                           getDefaultDenormsAreZeroForTarget(Kind)))
494
71
      return llvm::DenormalMode::getPreserveSign();
495
496
153
    return llvm::DenormalMode::getIEEE();
497
153
  }
498
499
103
  const StringRef GpuArch = getGPUArch(DriverArgs);
500
103
  auto Kind = llvm::AMDGPU::parseArchAMDGCN(GpuArch);
501
502
  // TODO: There are way too many flags that change this. Do we need to check
503
  // them all?
504
103
  bool DAZ = DriverArgs.hasArg(options::OPT_cl_denorms_are_zero) ||
505
96
             getDefaultDenormsAreZeroForTarget(Kind);
506
507
  // Outputs are flushed to zero (FTZ), preserving sign. Denormal inputs are
508
  // also implicit treated as zero (DAZ).
509
39
  return DAZ ? llvm::DenormalMode::getPreserveSign() :
510
64
               llvm::DenormalMode::getIEEE();
511
103
}
512
513
bool AMDGPUToolChain::isWave64(const llvm::opt::ArgList &DriverArgs,
514
49
                               llvm::AMDGPU::GPUKind Kind) {
515
49
  const unsigned ArchAttr = llvm::AMDGPU::getArchAttrAMDGCN(Kind);
516
49
  bool HasWave32 = (ArchAttr & llvm::AMDGPU::FEATURE_WAVE32);
517
518
49
  return !HasWave32 || DriverArgs.hasFlag(
519
7
    options::OPT_mwavefrontsize64, options::OPT_mno_wavefrontsize64, false);
520
49
}
521
522
523
/// ROCM Toolchain
524
ROCMToolChain::ROCMToolChain(const Driver &D, const llvm::Triple &Triple,
525
                             const ArgList &Args)
526
285
    : AMDGPUToolChain(D, Triple, Args) {
527
285
  RocmInstallation.detectDeviceLibrary();
528
285
}
529
530
void AMDGPUToolChain::addClangTargetOptions(
531
    const llvm::opt::ArgList &DriverArgs,
532
    llvm::opt::ArgStringList &CC1Args,
533
103
    Action::OffloadKind DeviceOffloadingKind) const {
534
  // Default to "hidden" visibility, as object level linking will not be
535
  // supported for the foreseeable future.
536
103
  if (!DriverArgs.hasArg(options::OPT_fvisibility_EQ,
537
99
                         options::OPT_fvisibility_ms_compat)) {
538
99
    CC1Args.push_back("-fvisibility");
539
99
    CC1Args.push_back("hidden");
540
99
    CC1Args.push_back("-fapply-global-visibility-to-externs");
541
99
  }
542
103
}
543
544
StringRef
545
345
AMDGPUToolChain::getGPUArch(const llvm::opt::ArgList &DriverArgs) const {
546
345
  return getProcessorFromTargetID(
547
345
      getTriple(), DriverArgs.getLastArgValue(options::OPT_mcpu_EQ));
548
345
}
549
550
void AMDGPUToolChain::checkTargetID(
551
333
    const llvm::opt::ArgList &DriverArgs) const {
552
333
  StringRef TargetID = DriverArgs.getLastArgValue(options::OPT_mcpu_EQ);
553
333
  if (TargetID.empty())
554
35
    return;
555
556
298
  llvm::StringMap<bool> FeatureMap;
557
298
  auto OptionalGpuArch = parseTargetID(getTriple(), TargetID, &FeatureMap);
558
298
  if (!OptionalGpuArch) {
559
14
    getDriver().Diag(clang::diag::err_drv_bad_target_id) << TargetID;
560
14
  }
561
298
}
562
563
void ROCMToolChain::addClangTargetOptions(
564
    const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args,
565
84
    Action::OffloadKind DeviceOffloadingKind) const {
566
84
  AMDGPUToolChain::addClangTargetOptions(DriverArgs, CC1Args,
567
84
                                         DeviceOffloadingKind);
568
569
  // For the OpenCL case where there is no offload target, accept -nostdlib to
570
  // disable bitcode linking.
571
84
  if (DeviceOffloadingKind == Action::OFK_None &&
572
76
      DriverArgs.hasArg(options::OPT_nostdlib))
573
8
    return;
574
575
76
  if (DriverArgs.hasArg(options::OPT_nogpulib))
576
16
    return;
577
578
60
  if (!RocmInstallation.hasDeviceLibrary()) {
579
42
    getDriver().Diag(diag::err_drv_no_rocm_device_lib) << 0;
580
42
    return;
581
42
  }
582
583
  // Get the device name and canonicalize it
584
18
  const StringRef GpuArch = getGPUArch(DriverArgs);
585
18
  auto Kind = llvm::AMDGPU::parseArchAMDGCN(GpuArch);
586
18
  const StringRef CanonArch = llvm::AMDGPU::getArchNameAMDGCN(Kind);
587
18
  std::string LibDeviceFile = RocmInstallation.getLibDeviceFile(CanonArch);
588
18
  if (LibDeviceFile.empty()) {
589
1
    getDriver().Diag(diag::err_drv_no_rocm_device_lib) << 1 << GpuArch;
590
1
    return;
591
1
  }
592
593
17
  bool Wave64 = isWave64(DriverArgs, Kind);
594
595
  // TODO: There are way too many flags that change this. Do we need to check
596
  // them all?
597
17
  bool DAZ = DriverArgs.hasArg(options::OPT_cl_denorms_are_zero) ||
598
15
             getDefaultDenormsAreZeroForTarget(Kind);
599
17
  bool FiniteOnly = DriverArgs.hasArg(options::OPT_cl_finite_math_only);
600
601
17
  bool UnsafeMathOpt =
602
17
      DriverArgs.hasArg(options::OPT_cl_unsafe_math_optimizations);
603
17
  bool FastRelaxedMath = DriverArgs.hasArg(options::OPT_cl_fast_relaxed_math);
604
17
  bool CorrectSqrt =
605
17
      DriverArgs.hasArg(options::OPT_cl_fp32_correctly_rounded_divide_sqrt);
606
607
  // Add the OpenCL specific bitcode library.
608
17
  CC1Args.push_back("-mlink-builtin-bitcode");
609
17
  CC1Args.push_back(DriverArgs.MakeArgString(RocmInstallation.getOpenCLPath()));
610
611
  // Add the generic set of libraries.
612
17
  RocmInstallation.addCommonBitcodeLibCC1Args(
613
17
      DriverArgs, CC1Args, LibDeviceFile, Wave64, DAZ, FiniteOnly,
614
17
      UnsafeMathOpt, FastRelaxedMath, CorrectSqrt);
615
17
}
616
617
void RocmInstallationDetector::addCommonBitcodeLibCC1Args(
618
    const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args,
619
    StringRef LibDeviceFile, bool Wave64, bool DAZ, bool FiniteOnly,
620
49
    bool UnsafeMathOpt, bool FastRelaxedMath, bool CorrectSqrt) const {
621
49
  static const char LinkBitcodeFlag[] = "-mlink-builtin-bitcode";
622
623
49
  CC1Args.push_back(LinkBitcodeFlag);
624
49
  CC1Args.push_back(DriverArgs.MakeArgString(getOCMLPath()));
625
626
49
  CC1Args.push_back(LinkBitcodeFlag);
627
49
  CC1Args.push_back(DriverArgs.MakeArgString(getOCKLPath()));
628
629
49
  CC1Args.push_back(LinkBitcodeFlag);
630
49
  CC1Args.push_back(DriverArgs.MakeArgString(getDenormalsAreZeroPath(DAZ)));
631
632
49
  CC1Args.push_back(LinkBitcodeFlag);
633
49
  CC1Args.push_back(DriverArgs.MakeArgString(
634
49
      getUnsafeMathPath(UnsafeMathOpt || 
FastRelaxedMath48
)));
635
636
49
  CC1Args.push_back(LinkBitcodeFlag);
637
49
  CC1Args.push_back(DriverArgs.MakeArgString(
638
49
      getFiniteOnlyPath(FiniteOnly || 
FastRelaxedMath48
)));
639
640
49
  CC1Args.push_back(LinkBitcodeFlag);
641
49
  CC1Args.push_back(
642
49
      DriverArgs.MakeArgString(getCorrectlyRoundedSqrtPath(CorrectSqrt)));
643
644
49
  CC1Args.push_back(LinkBitcodeFlag);
645
49
  CC1Args.push_back(DriverArgs.MakeArgString(getWavefrontSize64Path(Wave64)));
646
647
49
  CC1Args.push_back(LinkBitcodeFlag);
648
49
  CC1Args.push_back(DriverArgs.MakeArgString(LibDeviceFile));
649
49
}
650
651
4.86k
bool AMDGPUToolChain::shouldSkipArgument(const llvm::opt::Arg *A) const {
652
4.86k
  Option O = A->getOption();
653
4.86k
  if (O.matches(options::OPT_fPIE) || 
O.matches(options::OPT_fpie)4.86k
)
654
6
    return true;
655
4.86k
  return false;
656
4.86k
}