Coverage Report

Created: 2022-05-21 09:15

/Users/buildslave/jenkins/workspace/coverage/llvm-project/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
Line
Count
Source (jump to first uncovered line)
1
//===-- clang-linker-wrapper/ClangLinkerWrapper.cpp - wrapper over linker-===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===---------------------------------------------------------------------===//
8
//
9
// This tool works as a wrapper over a linking job. This tool is used to create
10
// linked device images for offloading. It scans the linker's input for embedded
11
// device offloading data stored in sections `.llvm.offloading.<triple>.<arch>`
12
// and extracts it as a temporary file. The extracted device files will then be
13
// passed to a device linking job to create a final device image.
14
//
15
//===---------------------------------------------------------------------===//
16
17
#include "OffloadWrapper.h"
18
#include "clang/Basic/Version.h"
19
#include "llvm/BinaryFormat/Magic.h"
20
#include "llvm/Bitcode/BitcodeWriter.h"
21
#include "llvm/CodeGen/CommandFlags.h"
22
#include "llvm/IR/Constants.h"
23
#include "llvm/IR/DiagnosticPrinter.h"
24
#include "llvm/IR/Module.h"
25
#include "llvm/IRReader/IRReader.h"
26
#include "llvm/LTO/LTO.h"
27
#include "llvm/MC/TargetRegistry.h"
28
#include "llvm/Object/Archive.h"
29
#include "llvm/Object/ArchiveWriter.h"
30
#include "llvm/Object/Binary.h"
31
#include "llvm/Object/ObjectFile.h"
32
#include "llvm/Object/OffloadBinary.h"
33
#include "llvm/Support/CommandLine.h"
34
#include "llvm/Support/Errc.h"
35
#include "llvm/Support/FileOutputBuffer.h"
36
#include "llvm/Support/FileSystem.h"
37
#include "llvm/Support/Host.h"
38
#include "llvm/Support/InitLLVM.h"
39
#include "llvm/Support/MemoryBuffer.h"
40
#include "llvm/Support/Path.h"
41
#include "llvm/Support/Program.h"
42
#include "llvm/Support/Signals.h"
43
#include "llvm/Support/SourceMgr.h"
44
#include "llvm/Support/StringSaver.h"
45
#include "llvm/Support/TargetSelect.h"
46
#include "llvm/Support/WithColor.h"
47
#include "llvm/Support/raw_ostream.h"
48
#include "llvm/Target/TargetMachine.h"
49
50
using namespace llvm;
51
using namespace llvm::object;
52
53
static cl::opt<bool> Help("h", cl::desc("Alias for -help"), cl::Hidden);
54
55
enum DebugKind {
56
  NoDebugInfo,
57
  DirectivesOnly,
58
  FullDebugInfo,
59
};
60
61
// Mark all our options with this category, everything else (except for -help)
62
// will be hidden.
63
static cl::OptionCategory
64
    ClangLinkerWrapperCategory("clang-linker-wrapper options");
65
66
static cl::opt<std::string> LinkerUserPath("linker-path", cl::Required,
67
                                           cl::desc("Path of linker binary"),
68
                                           cl::cat(ClangLinkerWrapperCategory));
69
70
static cl::opt<std::string>
71
    TargetFeatures("target-feature", cl::ZeroOrMore,
72
                   cl::desc("Target features for triple"),
73
                   cl::cat(ClangLinkerWrapperCategory));
74
75
static cl::opt<std::string> OptLevel("opt-level", cl::ZeroOrMore,
76
                                     cl::desc("Optimization level for LTO"),
77
                                     cl::init("O2"),
78
                                     cl::cat(ClangLinkerWrapperCategory));
79
80
static cl::list<std::string>
81
    BitcodeLibraries("target-library", cl::ZeroOrMore,
82
                     cl::desc("Path for the target bitcode library"),
83
                     cl::cat(ClangLinkerWrapperCategory));
84
85
static cl::opt<bool> EmbedBitcode(
86
    "target-embed-bc", cl::ZeroOrMore,
87
    cl::desc("Embed linked bitcode instead of an executable device image"),
88
    cl::init(false), cl::cat(ClangLinkerWrapperCategory));
89
90
static cl::opt<bool> DryRun(
91
    "dry-run", cl::ZeroOrMore,
92
    cl::desc("List the linker commands to be run without executing them"),
93
    cl::init(false), cl::cat(ClangLinkerWrapperCategory));
94
95
static cl::opt<bool>
96
    PrintWrappedModule("print-wrapped-module", cl::ZeroOrMore,
97
                       cl::desc("Print the wrapped module's IR for testing"),
98
                       cl::init(false), cl::cat(ClangLinkerWrapperCategory));
99
100
static cl::opt<std::string>
101
    HostTriple("host-triple", cl::ZeroOrMore,
102
               cl::desc("Triple to use for the host compilation"),
103
               cl::init(sys::getDefaultTargetTriple()),
104
               cl::cat(ClangLinkerWrapperCategory));
105
106
static cl::list<std::string>
107
    PtxasArgs("ptxas-args", cl::ZeroOrMore,
108
              cl::desc("Argument to pass to the ptxas invocation"),
109
              cl::cat(ClangLinkerWrapperCategory));
110
111
static cl::opt<bool> Verbose("v", cl::ZeroOrMore,
112
                             cl::desc("Verbose output from tools"),
113
                             cl::init(false),
114
                             cl::cat(ClangLinkerWrapperCategory));
115
116
static cl::opt<DebugKind> DebugInfo(
117
    cl::desc("Choose debugging level:"), cl::init(NoDebugInfo),
118
    cl::values(clEnumValN(NoDebugInfo, "g0", "No debug information"),
119
               clEnumValN(DirectivesOnly, "gline-directives-only",
120
                          "Direction information"),
121
               clEnumValN(FullDebugInfo, "g", "Full debugging support")));
122
123
static cl::opt<bool> SaveTemps("save-temps", cl::ZeroOrMore,
124
                               cl::desc("Save intermediary results."),
125
                               cl::cat(ClangLinkerWrapperCategory));
126
127
static cl::opt<std::string> CudaPath("cuda-path", cl::ZeroOrMore,
128
                                     cl::desc("Save intermediary results."),
129
                                     cl::cat(ClangLinkerWrapperCategory));
130
131
// Do not parse linker options.
132
static cl::list<std::string>
133
    HostLinkerArgs(cl::Positional,
134
                   cl::desc("<options to be passed to linker>..."));
135
136
/// Path of the current binary.
137
static const char *LinkerExecutable;
138
139
/// Filename of the executable being created.
140
static StringRef ExecutableName;
141
142
/// System root if passed in to the linker via. '--sysroot='.
143
static StringRef Sysroot = "";
144
145
/// Binary path for the CUDA installation.
146
static std::string CudaBinaryPath;
147
148
/// Temporary files created by the linker wrapper.
149
static SmallVector<std::string, 16> TempFiles;
150
151
/// Codegen flags for LTO backend.
152
static codegen::RegisterCodeGenFlags CodeGenFlags;
153
154
/// Magic section string that marks the existence of offloading data. The
155
/// section will contain one or more offloading binaries stored contiguously.
156
0
#define OFFLOAD_SECTION_MAGIC_STR ".llvm.offloading"
157
158
/// The magic offset for the first object inside CUDA's fatbinary. This can be
159
/// different but it should work for what is passed here.
160
static constexpr unsigned FatbinaryOffset = 0x50;
161
162
/// Information for a device offloading file extracted from the host.
163
struct DeviceFile {
164
  DeviceFile(OffloadKind Kind, StringRef TheTriple, StringRef Arch,
165
             StringRef Filename)
166
0
      : Kind(Kind), TheTriple(TheTriple), Arch(Arch), Filename(Filename) {}
167
168
  OffloadKind Kind;
169
  std::string TheTriple;
170
  std::string Arch;
171
  std::string Filename;
172
};
173
174
namespace llvm {
175
/// Helper that allows DeviceFile to be used as a key in a DenseMap. For now we
176
/// assume device files with matching architectures and triples but different
177
/// offloading kinds should be handlded together, this may not be true in the
178
/// future.
179
180
// Provide DenseMapInfo for OffloadKind.
181
template <> struct DenseMapInfo<OffloadKind> {
182
0
  static inline OffloadKind getEmptyKey() { return OFK_LAST; }
183
0
  static inline OffloadKind getTombstoneKey() {
184
0
    return static_cast<OffloadKind>(OFK_LAST + 1);
185
0
  }
186
0
  static unsigned getHashValue(const OffloadKind &Val) { return Val * 37U; }
187
188
0
  static bool isEqual(const OffloadKind &LHS, const OffloadKind &RHS) {
189
0
    return LHS == RHS;
190
0
  }
191
};
192
template <> struct DenseMapInfo<DeviceFile> {
193
0
  static DeviceFile getEmptyKey() {
194
0
    return {DenseMapInfo<OffloadKind>::getEmptyKey(),
195
0
            DenseMapInfo<StringRef>::getEmptyKey(),
196
0
            DenseMapInfo<StringRef>::getEmptyKey(),
197
0
            DenseMapInfo<StringRef>::getEmptyKey()};
198
0
  }
199
0
  static DeviceFile getTombstoneKey() {
200
0
    return {DenseMapInfo<OffloadKind>::getTombstoneKey(),
201
0
            DenseMapInfo<StringRef>::getTombstoneKey(),
202
0
            DenseMapInfo<StringRef>::getTombstoneKey(),
203
0
            DenseMapInfo<StringRef>::getTombstoneKey()};
204
0
  }
205
0
  static unsigned getHashValue(const DeviceFile &I) {
206
0
    return DenseMapInfo<StringRef>::getHashValue(I.TheTriple) ^
207
0
           DenseMapInfo<StringRef>::getHashValue(I.Arch);
208
0
  }
209
0
  static bool isEqual(const DeviceFile &LHS, const DeviceFile &RHS) {
210
0
    return LHS.TheTriple == RHS.TheTriple && LHS.Arch == RHS.Arch;
211
0
  }
212
};
213
} // namespace llvm
214
215
namespace {
216
217
Error extractFromBuffer(std::unique_ptr<MemoryBuffer> Buffer,
218
                        SmallVectorImpl<DeviceFile> &DeviceFiles);
219
220
0
void printCommands(ArrayRef<StringRef> CmdArgs) {
221
0
  if (CmdArgs.empty())
222
0
    return;
223
224
0
  llvm::errs() << " \"" << CmdArgs.front() << "\" ";
225
0
  for (auto IC = std::next(CmdArgs.begin()), IE = CmdArgs.end(); IC != IE; ++IC)
226
0
    llvm::errs() << *IC << (std::next(IC) != IE ? " " : "\n");
227
0
}
228
229
0
std::string getMainExecutable(const char *Name) {
230
0
  void *Ptr = (void *)(intptr_t)&getMainExecutable;
231
0
  auto COWPath = sys::fs::getMainExecutable(Name, Ptr);
232
0
  return sys::path::parent_path(COWPath).str();
233
0
}
234
235
/// Extract the device file from the string '<kind>-<triple>-<arch>=<library>'.
236
0
DeviceFile getBitcodeLibrary(StringRef LibraryStr) {
237
0
  auto DeviceAndPath = StringRef(LibraryStr).split('=');
238
0
  auto StringAndArch = DeviceAndPath.first.rsplit('-');
239
0
  auto KindAndTriple = StringAndArch.first.split('-');
240
0
  return DeviceFile(getOffloadKind(KindAndTriple.first), KindAndTriple.second,
241
0
                    StringAndArch.second, DeviceAndPath.second);
242
0
}
243
244
/// Get a temporary filename suitable for output.
245
Error createOutputFile(const Twine &Prefix, StringRef Extension,
246
0
                       SmallString<128> &NewFilename) {
247
0
  if (!SaveTemps) {
248
0
    if (std::error_code EC =
249
0
            sys::fs::createTemporaryFile(Prefix, Extension, NewFilename))
250
0
      return createFileError(NewFilename, EC);
251
0
    TempFiles.push_back(static_cast<std::string>(NewFilename));
252
0
  } else {
253
0
    const Twine &Filename = Prefix + "." + Extension;
254
0
    Filename.toNullTerminatedStringRef(NewFilename);
255
0
  }
256
257
0
  return Error::success();
258
0
}
259
260
/// Execute the command \p ExecutablePath with the arguments \p Args.
261
0
Error executeCommands(StringRef ExecutablePath, ArrayRef<StringRef> Args) {
262
0
  if (Verbose || DryRun)
263
0
    printCommands(Args);
264
265
0
  if (!DryRun)
266
0
    if (sys::ExecuteAndWait(ExecutablePath, Args))
267
0
      return createStringError(inconvertibleErrorCode(),
268
0
                               "'" + sys::path::filename(ExecutablePath) + "'" +
269
0
                                   " failed");
270
0
  return Error::success();
271
0
}
272
273
0
Expected<std::string> findProgram(StringRef Name, ArrayRef<StringRef> Paths) {
274
275
0
  ErrorOr<std::string> Path = sys::findProgramByName(Name, Paths);
276
0
  if (!Path)
277
0
    Path = sys::findProgramByName(Name);
278
0
  if (!Path && DryRun)
279
0
    return Name.str();
280
0
  if (!Path)
281
0
    return createStringError(Path.getError(),
282
0
                             "Unable to find '" + Name + "' in path");
283
0
  return *Path;
284
0
}
285
286
0
Error runLinker(std::string &LinkerPath, SmallVectorImpl<std::string> &Args) {
287
0
  std::vector<StringRef> LinkerArgs;
288
0
  LinkerArgs.push_back(LinkerPath);
289
0
  for (auto &Arg : Args)
290
0
    LinkerArgs.push_back(Arg);
291
292
0
  if (Error Err = executeCommands(LinkerPath, LinkerArgs))
293
0
    return Err;
294
0
  return Error::success();
295
0
}
296
297
0
void PrintVersion(raw_ostream &OS) {
298
0
  OS << clang::getClangToolFullVersion("clang-linker-wrapper") << '\n';
299
0
}
300
301
/// Attempts to extract all the embedded device images contained inside the
302
/// buffer \p Contents. The buffer is expected to contain a valid offloading
303
/// binary format.
304
Error extractOffloadFiles(StringRef Contents, StringRef Prefix,
305
0
                          SmallVectorImpl<DeviceFile> &DeviceFiles) {
306
0
  uint64_t Offset = 0;
307
  // There could be multiple offloading binaries stored at this section.
308
0
  while (Offset < Contents.size()) {
309
0
    std::unique_ptr<MemoryBuffer> Buffer =
310
0
        MemoryBuffer::getMemBuffer(Contents.drop_front(Offset), "",
311
0
                                   /*RequiresNullTerminator*/ false);
312
0
    auto BinaryOrErr = OffloadBinary::create(*Buffer);
313
0
    if (!BinaryOrErr)
314
0
      return BinaryOrErr.takeError();
315
0
    OffloadBinary &Binary = **BinaryOrErr;
316
317
0
    if (Binary.getVersion() != 1)
318
0
      return createStringError(inconvertibleErrorCode(),
319
0
                               "Incompatible device image version");
320
321
0
    StringRef Kind = getOffloadKindName(Binary.getOffloadKind());
322
0
    StringRef Suffix = getImageKindName(Binary.getImageKind());
323
324
0
    SmallString<128> TempFile;
325
0
    if (Error Err =
326
0
            createOutputFile(Prefix + "-" + Kind + "-" + Binary.getTriple() +
327
0
                                 "-" + Binary.getArch(),
328
0
                             Suffix, TempFile))
329
0
      return Err;
330
331
0
    Expected<std::unique_ptr<FileOutputBuffer>> OutputOrErr =
332
0
        FileOutputBuffer::create(TempFile, Binary.getImage().size());
333
0
    if (!OutputOrErr)
334
0
      return OutputOrErr.takeError();
335
0
    std::unique_ptr<FileOutputBuffer> Output = std::move(*OutputOrErr);
336
0
    std::copy(Binary.getImage().bytes_begin(), Binary.getImage().bytes_end(),
337
0
              Output->getBufferStart());
338
0
    if (Error E = Output->commit())
339
0
      return E;
340
341
0
    DeviceFiles.emplace_back(Binary.getOffloadKind(), Binary.getTriple(),
342
0
                             Binary.getArch(), TempFile);
343
344
0
    Offset += Binary.getSize();
345
0
  }
346
347
0
  return Error::success();
348
0
}
349
350
Error extractFromBinary(const ObjectFile &Obj,
351
0
                        SmallVectorImpl<DeviceFile> &DeviceFiles) {
352
0
  StringRef Prefix = sys::path::stem(Obj.getFileName());
353
354
  // Extract offloading binaries from sections with the name `.llvm.offloading`.
355
0
  for (const SectionRef &Sec : Obj.sections()) {
356
0
    Expected<StringRef> Name = Sec.getName();
357
0
    if (!Name || !Name->equals(OFFLOAD_SECTION_MAGIC_STR))
358
0
      continue;
359
360
0
    Expected<StringRef> Contents = Sec.getContents();
361
0
    if (!Contents)
362
0
      return Contents.takeError();
363
364
0
    if (Error Err = extractOffloadFiles(*Contents, Prefix, DeviceFiles))
365
0
      return Err;
366
0
  }
367
368
0
  return Error::success();
369
0
}
370
371
Error extractFromBitcode(std::unique_ptr<MemoryBuffer> Buffer,
372
0
                         SmallVectorImpl<DeviceFile> &DeviceFiles) {
373
0
  LLVMContext Context;
374
0
  SMDiagnostic Err;
375
0
  std::unique_ptr<Module> M = getLazyIRModule(std::move(Buffer), Err, Context);
376
0
  if (!M)
377
0
    return createStringError(inconvertibleErrorCode(),
378
0
                             "Failed to create module");
379
380
0
  StringRef Prefix =
381
0
      sys::path::stem(M->getName()).take_until([](char C) { return C == '-'; });
382
383
  // Extract offloading data from globals with the `.llvm.offloading` section
384
  // name.
385
0
  for (GlobalVariable &GV : M->globals()) {
386
0
    if (!GV.hasSection() || !GV.getSection().equals(OFFLOAD_SECTION_MAGIC_STR))
387
0
      continue;
388
389
0
    auto *CDS = dyn_cast<ConstantDataSequential>(GV.getInitializer());
390
0
    if (!CDS)
391
0
      continue;
392
393
0
    StringRef Contents = CDS->getAsString();
394
395
0
    if (Error Err = extractOffloadFiles(Contents, Prefix, DeviceFiles))
396
0
      return Err;
397
0
  }
398
399
0
  return Error::success();
400
0
}
401
402
Error extractFromArchive(const Archive &Library,
403
0
                         SmallVectorImpl<DeviceFile> &DeviceFiles) {
404
  // Try to extract device code from each file stored in the static archive.
405
0
  Error Err = Error::success();
406
0
  for (auto Child : Library.children(Err)) {
407
0
    auto ChildBufferOrErr = Child.getMemoryBufferRef();
408
0
    if (!ChildBufferOrErr)
409
0
      return ChildBufferOrErr.takeError();
410
0
    std::unique_ptr<MemoryBuffer> ChildBuffer =
411
0
        MemoryBuffer::getMemBuffer(*ChildBufferOrErr, false);
412
413
    // Check if the buffer has the required alignment.
414
0
    if (!isAddrAligned(Align(OffloadBinary::getAlignment()),
415
0
                       ChildBuffer->getBufferStart()))
416
0
      ChildBuffer = MemoryBuffer::getMemBufferCopy(
417
0
          ChildBufferOrErr->getBuffer(),
418
0
          ChildBufferOrErr->getBufferIdentifier());
419
420
0
    if (Error Err = extractFromBuffer(std::move(ChildBuffer), DeviceFiles))
421
0
      return Err;
422
0
  }
423
424
0
  if (Err)
425
0
    return Err;
426
0
  return Error::success();
427
0
}
428
429
/// Extracts embedded device offloading code from a memory \p Buffer to a list
430
/// of \p DeviceFiles.
431
Error extractFromBuffer(std::unique_ptr<MemoryBuffer> Buffer,
432
0
                        SmallVectorImpl<DeviceFile> &DeviceFiles) {
433
0
  file_magic Type = identify_magic(Buffer->getBuffer());
434
0
  switch (Type) {
435
0
  case file_magic::bitcode:
436
0
    return extractFromBitcode(std::move(Buffer), DeviceFiles);
437
0
  case file_magic::elf_relocatable:
438
0
  case file_magic::macho_object:
439
0
  case file_magic::coff_object: {
440
0
    Expected<std::unique_ptr<ObjectFile>> ObjFile =
441
0
        ObjectFile::createObjectFile(*Buffer, Type);
442
0
    if (!ObjFile)
443
0
      return ObjFile.takeError();
444
0
    return extractFromBinary(*ObjFile->get(), DeviceFiles);
445
0
  }
446
0
  case file_magic::archive: {
447
0
    Expected<std::unique_ptr<llvm::object::Archive>> LibFile =
448
0
        object::Archive::create(*Buffer);
449
0
    if (!LibFile)
450
0
      return LibFile.takeError();
451
0
    return extractFromArchive(*LibFile->get(), DeviceFiles);
452
0
  }
453
0
  default:
454
0
    return Error::success();
455
0
  }
456
0
}
457
458
// TODO: Move these to a separate file.
459
namespace nvptx {
460
Expected<std::string> assemble(StringRef InputFile, Triple TheTriple,
461
0
                               StringRef Arch, bool RDC = true) {
462
  // NVPTX uses the ptxas binary to create device object files.
463
0
  Expected<std::string> PtxasPath = findProgram("ptxas", {CudaBinaryPath});
464
0
  if (!PtxasPath)
465
0
    return PtxasPath.takeError();
466
467
  // Create a new file to write the linked device image to.
468
0
  SmallString<128> TempFile;
469
0
  if (Error Err =
470
0
          createOutputFile(sys::path::filename(ExecutableName) + "-device-" +
471
0
                               TheTriple.getArchName() + "-" + Arch,
472
0
                           "cubin", TempFile))
473
0
    return std::move(Err);
474
475
0
  SmallVector<StringRef, 16> CmdArgs;
476
0
  std::string Opt = "-" + OptLevel;
477
0
  CmdArgs.push_back(*PtxasPath);
478
0
  CmdArgs.push_back(TheTriple.isArch64Bit() ? "-m64" : "-m32");
479
0
  if (Verbose)
480
0
    CmdArgs.push_back("-v");
481
0
  if (DebugInfo == DirectivesOnly && OptLevel[1] == '0')
482
0
    CmdArgs.push_back("-lineinfo");
483
0
  else if (DebugInfo == FullDebugInfo && OptLevel[1] == '0')
484
0
    CmdArgs.push_back("-g");
485
0
  for (auto &Arg : PtxasArgs)
486
0
    CmdArgs.push_back(Arg);
487
0
  CmdArgs.push_back("-o");
488
0
  CmdArgs.push_back(TempFile);
489
0
  CmdArgs.push_back(Opt);
490
0
  CmdArgs.push_back("--gpu-name");
491
0
  CmdArgs.push_back(Arch);
492
0
  if (RDC)
493
0
    CmdArgs.push_back("-c");
494
495
0
  CmdArgs.push_back(InputFile);
496
497
0
  if (Error Err = executeCommands(*PtxasPath, CmdArgs))
498
0
    return std::move(Err);
499
500
0
  return static_cast<std::string>(TempFile);
501
0
}
502
503
Expected<std::string> link(ArrayRef<std::string> InputFiles, Triple TheTriple,
504
0
                           StringRef Arch) {
505
  // NVPTX uses the nvlink binary to link device object files.
506
0
  Expected<std::string> NvlinkPath = findProgram("nvlink", {CudaBinaryPath});
507
0
  if (!NvlinkPath)
508
0
    return NvlinkPath.takeError();
509
510
  // Create a new file to write the linked device image to.
511
0
  SmallString<128> TempFile;
512
0
  if (Error Err =
513
0
          createOutputFile(sys::path::filename(ExecutableName) + "-device-" +
514
0
                               TheTriple.getArchName() + "-" + Arch,
515
0
                           "out", TempFile))
516
0
    return std::move(Err);
517
518
0
  SmallVector<StringRef, 16> CmdArgs;
519
0
  CmdArgs.push_back(*NvlinkPath);
520
0
  CmdArgs.push_back(TheTriple.isArch64Bit() ? "-m64" : "-m32");
521
0
  if (Verbose)
522
0
    CmdArgs.push_back("-v");
523
0
  if (DebugInfo != NoDebugInfo)
524
0
    CmdArgs.push_back("-g");
525
0
  CmdArgs.push_back("-o");
526
0
  CmdArgs.push_back(TempFile);
527
0
  CmdArgs.push_back("-arch");
528
0
  CmdArgs.push_back(Arch);
529
530
  // Add extracted input files.
531
0
  for (StringRef Input : InputFiles)
532
0
    CmdArgs.push_back(Input);
533
534
0
  if (Error Err = executeCommands(*NvlinkPath, CmdArgs))
535
0
    return std::move(Err);
536
537
0
  return static_cast<std::string>(TempFile);
538
0
}
539
540
Expected<std::string> fatbinary(ArrayRef<StringRef> InputFiles,
541
0
                                Triple TheTriple, ArrayRef<StringRef> Archs) {
542
  // NVPTX uses the fatbinary program to bundle the linked images.
543
0
  Expected<std::string> FatBinaryPath =
544
0
      findProgram("fatbinary", {CudaBinaryPath});
545
0
  if (!FatBinaryPath)
546
0
    return FatBinaryPath.takeError();
547
548
  // Create a new file to write the linked device image to.
549
0
  SmallString<128> TempFile;
550
0
  if (Error Err = createOutputFile(sys::path::filename(ExecutableName) +
551
0
                                       "-device-" + TheTriple.getArchName(),
552
0
                                   "fatbin", TempFile))
553
0
    return std::move(Err);
554
555
0
  BumpPtrAllocator Alloc;
556
0
  StringSaver Saver(Alloc);
557
558
0
  SmallVector<StringRef, 16> CmdArgs;
559
0
  CmdArgs.push_back(*FatBinaryPath);
560
0
  CmdArgs.push_back(TheTriple.isArch64Bit() ? "-64" : "-32");
561
0
  CmdArgs.push_back("--create");
562
0
  CmdArgs.push_back(TempFile);
563
0
  for (const auto &FileAndArch : llvm::zip(InputFiles, Archs))
564
0
    CmdArgs.push_back(Saver.save("--image=profile=" + std::get<1>(FileAndArch) +
565
0
                                 ",file=" + std::get<0>(FileAndArch)));
566
567
0
  if (Error Err = executeCommands(*FatBinaryPath, CmdArgs))
568
0
    return std::move(Err);
569
570
0
  return static_cast<std::string>(TempFile);
571
0
}
572
} // namespace nvptx
573
namespace amdgcn {
574
Expected<std::string> link(ArrayRef<std::string> InputFiles, Triple TheTriple,
575
0
                           StringRef Arch) {
576
  // AMDGPU uses lld to link device object files.
577
0
  Expected<std::string> LLDPath =
578
0
      findProgram("lld", {getMainExecutable("lld")});
579
0
  if (!LLDPath)
580
0
    return LLDPath.takeError();
581
582
  // Create a new file to write the linked device image to.
583
0
  SmallString<128> TempFile;
584
0
  if (Error Err = createOutputFile(sys::path::filename(ExecutableName) + "-" +
585
0
                                       TheTriple.getArchName() + "-" + Arch,
586
0
                                   "out", TempFile))
587
0
    return std::move(Err);
588
589
0
  SmallVector<StringRef, 16> CmdArgs;
590
0
  CmdArgs.push_back(*LLDPath);
591
0
  CmdArgs.push_back("-flavor");
592
0
  CmdArgs.push_back("gnu");
593
0
  CmdArgs.push_back("--no-undefined");
594
0
  CmdArgs.push_back("-shared");
595
0
  CmdArgs.push_back("-o");
596
0
  CmdArgs.push_back(TempFile);
597
598
  // Add extracted input files.
599
0
  for (StringRef Input : InputFiles)
600
0
    CmdArgs.push_back(Input);
601
602
0
  if (Error Err = executeCommands(*LLDPath, CmdArgs))
603
0
    return std::move(Err);
604
605
0
  return static_cast<std::string>(TempFile);
606
0
}
607
} // namespace amdgcn
608
609
namespace generic {
610
611
0
const char *getLDMOption(const llvm::Triple &T) {
612
0
  switch (T.getArch()) {
613
0
  case llvm::Triple::x86:
614
0
    if (T.isOSIAMCU())
615
0
      return "elf_iamcu";
616
0
    return "elf_i386";
617
0
  case llvm::Triple::aarch64:
618
0
    return "aarch64linux";
619
0
  case llvm::Triple::aarch64_be:
620
0
    return "aarch64linuxb";
621
0
  case llvm::Triple::ppc64:
622
0
    return "elf64ppc";
623
0
  case llvm::Triple::ppc64le:
624
0
    return "elf64lppc";
625
0
  case llvm::Triple::x86_64:
626
0
    if (T.isX32())
627
0
      return "elf32_x86_64";
628
0
    return "elf_x86_64";
629
0
  case llvm::Triple::ve:
630
0
    return "elf64ve";
631
0
  default:
632
0
    return nullptr;
633
0
  }
634
0
}
635
636
Expected<std::string> link(ArrayRef<std::string> InputFiles, Triple TheTriple,
637
0
                           StringRef Arch) {
638
  // Create a new file to write the linked device image to.
639
0
  SmallString<128> TempFile;
640
0
  if (Error Err = createOutputFile(sys::path::filename(ExecutableName) + "-" +
641
0
                                       TheTriple.getArchName() + "-" + Arch,
642
0
                                   "out", TempFile))
643
0
    return std::move(Err);
644
645
  // Use the host linker to perform generic offloading. Use the same libraries
646
  // and paths as the host application does.
647
0
  SmallVector<StringRef, 16> CmdArgs;
648
0
  CmdArgs.push_back(LinkerUserPath);
649
0
  CmdArgs.push_back("-m");
650
0
  CmdArgs.push_back(getLDMOption(TheTriple));
651
0
  CmdArgs.push_back("-shared");
652
0
  for (auto AI = HostLinkerArgs.begin(), AE = HostLinkerArgs.end(); AI != AE;
653
0
       ++AI) {
654
0
    StringRef Arg = *AI;
655
0
    if (Arg.startswith("-L"))
656
0
      CmdArgs.push_back(Arg);
657
0
    else if (Arg.startswith("-l"))
658
0
      CmdArgs.push_back(Arg);
659
0
    else if (Arg.startswith("--as-needed"))
660
0
      CmdArgs.push_back(Arg);
661
0
    else if (Arg.startswith("--no-as-needed"))
662
0
      CmdArgs.push_back(Arg);
663
0
    else if (Arg.startswith("-rpath")) {
664
0
      CmdArgs.push_back(Arg);
665
0
      CmdArgs.push_back(*std::next(AI));
666
0
    } else if (Arg.startswith("-dynamic-linker")) {
667
0
      CmdArgs.push_back(Arg);
668
0
      CmdArgs.push_back(*std::next(AI));
669
0
    }
670
0
  }
671
0
  CmdArgs.push_back("-Bsymbolic");
672
0
  CmdArgs.push_back("-o");
673
0
  CmdArgs.push_back(TempFile);
674
675
  // Add extracted input files.
676
0
  for (StringRef Input : InputFiles)
677
0
    CmdArgs.push_back(Input);
678
679
0
  if (Error Err = executeCommands(LinkerUserPath, CmdArgs))
680
0
    return std::move(Err);
681
682
0
  return static_cast<std::string>(TempFile);
683
0
}
684
} // namespace generic
685
686
Expected<std::string> linkDevice(ArrayRef<std::string> InputFiles,
687
0
                                 Triple TheTriple, StringRef Arch) {
688
0
  switch (TheTriple.getArch()) {
689
0
  case Triple::nvptx:
690
0
  case Triple::nvptx64:
691
0
    return nvptx::link(InputFiles, TheTriple, Arch);
692
0
  case Triple::amdgcn:
693
0
    return amdgcn::link(InputFiles, TheTriple, Arch);
694
0
  case Triple::x86:
695
0
  case Triple::x86_64:
696
0
  case Triple::aarch64:
697
0
  case Triple::aarch64_be:
698
0
  case Triple::ppc64:
699
0
  case Triple::ppc64le:
700
0
    return generic::link(InputFiles, TheTriple, Arch);
701
0
  default:
702
0
    return createStringError(inconvertibleErrorCode(),
703
0
                             TheTriple.getArchName() +
704
0
                                 " linking is not supported");
705
0
  }
706
0
}
707
708
0
void diagnosticHandler(const DiagnosticInfo &DI) {
709
0
  std::string ErrStorage;
710
0
  raw_string_ostream OS(ErrStorage);
711
0
  DiagnosticPrinterRawOStream DP(OS);
712
0
  DI.print(DP);
713
714
0
  switch (DI.getSeverity()) {
715
0
  case DS_Error:
716
0
    WithColor::error(errs(), LinkerExecutable) << ErrStorage << "\n";
717
0
    break;
718
0
  case DS_Warning:
719
0
    WithColor::warning(errs(), LinkerExecutable) << ErrStorage << "\n";
720
0
    break;
721
0
  case DS_Note:
722
0
    WithColor::note(errs(), LinkerExecutable) << ErrStorage << "\n";
723
0
    break;
724
0
  case DS_Remark:
725
0
    WithColor::remark(errs()) << ErrStorage << "\n";
726
0
    break;
727
0
  }
728
0
}
729
730
// Get the target features passed in from the driver as <triple>=<features>.
731
0
std::vector<std::string> getTargetFeatures(const Triple &TheTriple) {
732
0
  std::vector<std::string> Features;
733
0
  auto TargetAndFeatures = StringRef(TargetFeatures).split('=');
734
0
  if (TargetAndFeatures.first != TheTriple.getTriple())
735
0
    return Features;
736
737
0
  for (auto Feature : llvm::split(TargetAndFeatures.second, ','))
738
0
    Features.push_back(Feature.str());
739
0
  return Features;
740
0
}
741
742
0
CodeGenOpt::Level getCGOptLevel(unsigned OptLevel) {
743
0
  switch (OptLevel) {
744
0
  case 0:
745
0
    return CodeGenOpt::None;
746
0
  case 1:
747
0
    return CodeGenOpt::Less;
748
0
  case 2:
749
0
    return CodeGenOpt::Default;
750
0
  case 3:
751
0
    return CodeGenOpt::Aggressive;
752
0
  }
753
0
  llvm_unreachable("Invalid optimization level");
754
0
}
755
756
template <typename ModuleHook = function_ref<bool(size_t, const Module &)>>
757
std::unique_ptr<lto::LTO> createLTO(
758
    const Triple &TheTriple, StringRef Arch, bool WholeProgram,
759
0
    ModuleHook Hook = [](size_t, const Module &) { return true; }) {
760
0
  lto::Config Conf;
761
0
  lto::ThinBackend Backend;
762
  // TODO: Handle index-only thin-LTO
763
0
  Backend =
764
0
      lto::createInProcessThinBackend(llvm::heavyweight_hardware_concurrency());
765
766
0
  Conf.CPU = Arch.str();
767
0
  Conf.Options = codegen::InitTargetOptionsFromCodeGenFlags(TheTriple);
768
769
0
  Conf.MAttrs = getTargetFeatures(TheTriple);
770
0
  Conf.CGOptLevel = getCGOptLevel(OptLevel[1] - '0');
771
0
  Conf.OptLevel = OptLevel[1] - '0';
772
0
  if (Conf.OptLevel > 0)
773
0
    Conf.UseDefaultPipeline = true;
774
0
  Conf.DefaultTriple = TheTriple.getTriple();
775
0
  Conf.DiagHandler = diagnosticHandler;
776
777
0
  Conf.PTO.LoopVectorization = Conf.OptLevel > 1;
778
0
  Conf.PTO.SLPVectorization = Conf.OptLevel > 1;
779
780
0
  if (SaveTemps) {
781
0
    auto HandleError = [&](Error Err) {
782
0
      logAllUnhandledErrors(std::move(Err),
783
0
                            WithColor::error(errs(), LinkerExecutable));
784
0
      exit(1);
785
0
    };
Unexecuted instantiation: ClangLinkerWrapper.cpp:std::__1::unique_ptr<llvm::lto::LTO, std::__1::default_delete<llvm::lto::LTO> > (anonymous namespace)::createLTO<(anonymous namespace)::linkBitcodeFiles(llvm::SmallVectorImpl<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > >&, llvm::Triple const&, llvm::StringRef, bool&)::$_5>(llvm::Triple const&, llvm::StringRef, bool, (anonymous namespace)::linkBitcodeFiles(llvm::SmallVectorImpl<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > >&, llvm::Triple const&, llvm::StringRef, bool&)::$_5)::'lambda'(llvm::Error)::operator()(llvm::Error) const
Unexecuted instantiation: ClangLinkerWrapper.cpp:std::__1::unique_ptr<llvm::lto::LTO, std::__1::default_delete<llvm::lto::LTO> > (anonymous namespace)::createLTO<llvm::function_ref<bool (unsigned long, llvm::Module const&)> >(llvm::Triple const&, llvm::StringRef, bool, llvm::function_ref<bool (unsigned long, llvm::Module const&)>)::'lambda'(llvm::Error)::operator()(llvm::Error) const
786
0
    Conf.PostInternalizeModuleHook = [&](size_t, const Module &M) {
787
0
      SmallString<128> TempFile;
788
0
      if (Error Err = createOutputFile(sys::path::filename(ExecutableName) +
789
0
                                           "-device-" + TheTriple.getTriple(),
790
0
                                       "bc", TempFile))
791
0
        HandleError(std::move(Err));
792
793
0
      std::error_code EC;
794
0
      raw_fd_ostream LinkedBitcode(TempFile, EC, sys::fs::OF_None);
795
0
      if (EC)
796
0
        HandleError(errorCodeToError(EC));
797
0
      WriteBitcodeToFile(M, LinkedBitcode);
798
0
      return true;
799
0
    };
Unexecuted instantiation: ClangLinkerWrapper.cpp:std::__1::unique_ptr<llvm::lto::LTO, std::__1::default_delete<llvm::lto::LTO> > (anonymous namespace)::createLTO<(anonymous namespace)::linkBitcodeFiles(llvm::SmallVectorImpl<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > >&, llvm::Triple const&, llvm::StringRef, bool&)::$_5>(llvm::Triple const&, llvm::StringRef, bool, (anonymous namespace)::linkBitcodeFiles(llvm::SmallVectorImpl<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > >&, llvm::Triple const&, llvm::StringRef, bool&)::$_5)::'lambda'(unsigned long, llvm::Module const&)::operator()(unsigned long, llvm::Module const&) const
Unexecuted instantiation: ClangLinkerWrapper.cpp:std::__1::unique_ptr<llvm::lto::LTO, std::__1::default_delete<llvm::lto::LTO> > (anonymous namespace)::createLTO<llvm::function_ref<bool (unsigned long, llvm::Module const&)> >(llvm::Triple const&, llvm::StringRef, bool, llvm::function_ref<bool (unsigned long, llvm::Module const&)>)::'lambda'(unsigned long, llvm::Module const&)::operator()(unsigned long, llvm::Module const&) const
800
0
  }
801
0
  Conf.PostOptModuleHook = Hook;
802
0
  if (TheTriple.isNVPTX())
803
0
    Conf.CGFileType = CGFT_AssemblyFile;
804
0
  else
805
0
    Conf.CGFileType = CGFT_ObjectFile;
806
807
  // TODO: Handle remark files
808
0
  Conf.HasWholeProgramVisibility = WholeProgram;
809
810
0
  return std::make_unique<lto::LTO>(std::move(Conf), Backend);
811
0
}
Unexecuted instantiation: ClangLinkerWrapper.cpp:std::__1::unique_ptr<llvm::lto::LTO, std::__1::default_delete<llvm::lto::LTO> > (anonymous namespace)::createLTO<(anonymous namespace)::linkBitcodeFiles(llvm::SmallVectorImpl<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > >&, llvm::Triple const&, llvm::StringRef, bool&)::$_5>(llvm::Triple const&, llvm::StringRef, bool, (anonymous namespace)::linkBitcodeFiles(llvm::SmallVectorImpl<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > >&, llvm::Triple const&, llvm::StringRef, bool&)::$_5)
Unexecuted instantiation: ClangLinkerWrapper.cpp:std::__1::unique_ptr<llvm::lto::LTO, std::__1::default_delete<llvm::lto::LTO> > (anonymous namespace)::createLTO<llvm::function_ref<bool (unsigned long, llvm::Module const&)> >(llvm::Triple const&, llvm::StringRef, bool, llvm::function_ref<bool (unsigned long, llvm::Module const&)>)
812
813
// Returns true if \p S is valid as a C language identifier and will be given
814
// `__start_` and `__stop_` symbols.
815
0
bool isValidCIdentifier(StringRef S) {
816
0
  return !S.empty() && (isAlpha(S[0]) || S[0] == '_') &&
817
0
         std::all_of(S.begin() + 1, S.end(),
818
0
                     [](char C) { return C == '_' || isAlnum(C); });
819
0
}
820
821
Error linkBitcodeFiles(SmallVectorImpl<std::string> &InputFiles,
822
                       const Triple &TheTriple, StringRef Arch,
823
0
                       bool &WholeProgram) {
824
0
  SmallVector<std::unique_ptr<MemoryBuffer>, 4> SavedBuffers;
825
0
  SmallVector<std::unique_ptr<lto::InputFile>, 4> BitcodeFiles;
826
0
  SmallVector<std::string, 4> NewInputFiles;
827
0
  DenseSet<StringRef> UsedInRegularObj;
828
0
  DenseSet<StringRef> UsedInSharedLib;
829
0
  BumpPtrAllocator Alloc;
830
0
  StringSaver Saver(Alloc);
831
832
  // Search for bitcode files in the input and create an LTO input file. If it
833
  // is not a bitcode file, scan its symbol table for symbols we need to
834
  // save.
835
0
  for (StringRef File : InputFiles) {
836
0
    ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr =
837
0
        MemoryBuffer::getFileOrSTDIN(File);
838
0
    if (std::error_code EC = BufferOrErr.getError())
839
0
      return createFileError(File, EC);
840
0
    MemoryBufferRef Buffer = **BufferOrErr;
841
842
0
    file_magic Type = identify_magic((*BufferOrErr)->getBuffer());
843
0
    switch (Type) {
844
0
    case file_magic::bitcode: {
845
0
      Expected<std::unique_ptr<lto::InputFile>> InputFileOrErr =
846
0
          llvm::lto::InputFile::create(Buffer);
847
0
      if (!InputFileOrErr)
848
0
        return InputFileOrErr.takeError();
849
850
      // Save the input file and the buffer associated with its memory.
851
0
      BitcodeFiles.push_back(std::move(*InputFileOrErr));
852
0
      SavedBuffers.push_back(std::move(*BufferOrErr));
853
0
      continue;
854
0
    }
855
0
    case file_magic::cuda_fatbinary: {
856
      // Cuda fatbinaries made by Clang almost almost have an object eighty
857
      // bytes from the beginning. This should be sufficient to identify the
858
      // symbols.
859
0
      Buffer = MemoryBufferRef(
860
0
          (*BufferOrErr)->getBuffer().drop_front(FatbinaryOffset), "FatBinary");
861
0
      LLVM_FALLTHROUGH;
862
0
    }
863
0
    case file_magic::elf_relocatable:
864
0
    case file_magic::elf_shared_object:
865
0
    case file_magic::macho_object:
866
0
    case file_magic::coff_object: {
867
0
      Expected<std::unique_ptr<ObjectFile>> ObjFile =
868
0
          ObjectFile::createObjectFile(Buffer);
869
0
      if (!ObjFile)
870
0
        continue;
871
872
0
      NewInputFiles.push_back(File.str());
873
0
      for (auto &Sym : (*ObjFile)->symbols()) {
874
0
        Expected<StringRef> Name = Sym.getName();
875
0
        if (!Name)
876
0
          return Name.takeError();
877
878
        // Record if we've seen these symbols in any object or shared libraries.
879
0
        if ((*ObjFile)->isRelocatableObject())
880
0
          UsedInRegularObj.insert(Saver.save(*Name));
881
0
        else
882
0
          UsedInSharedLib.insert(Saver.save(*Name));
883
0
      }
884
0
      continue;
885
0
    }
886
0
    default:
887
0
      continue;
888
0
    }
889
0
  }
890
891
0
  if (BitcodeFiles.empty())
892
0
    return Error::success();
893
894
0
  auto HandleError = [&](Error Err) {
895
0
    logAllUnhandledErrors(std::move(Err),
896
0
                          WithColor::error(errs(), LinkerExecutable));
897
0
    exit(1);
898
0
  };
899
900
  // LTO Module hook to output bitcode without running the backend.
901
0
  auto OutputBitcode = [&](size_t Task, const Module &M) {
902
0
    SmallString<128> TempFile;
903
0
    if (Error Err = createOutputFile(sys::path::filename(ExecutableName) +
904
0
                                         "-jit-" + TheTriple.getTriple(),
905
0
                                     "bc", TempFile))
906
0
      HandleError(std::move(Err));
907
908
0
    std::error_code EC;
909
0
    raw_fd_ostream LinkedBitcode(TempFile, EC, sys::fs::OF_None);
910
0
    if (EC)
911
0
      HandleError(errorCodeToError(EC));
912
0
    WriteBitcodeToFile(M, LinkedBitcode);
913
0
    NewInputFiles.push_back(static_cast<std::string>(TempFile));
914
0
    return false;
915
0
  };
916
917
  // We assume visibility of the whole program if every input file was bitcode.
918
0
  WholeProgram = BitcodeFiles.size() == InputFiles.size();
919
0
  auto LTOBackend =
920
0
      (EmbedBitcode) ? createLTO(TheTriple, Arch, WholeProgram, OutputBitcode)
921
0
                     : createLTO(TheTriple, Arch, WholeProgram);
922
923
  // We need to resolve the symbols so the LTO backend knows which symbols need
924
  // to be kept or can be internalized. This is a simplified symbol resolution
925
  // scheme to approximate the full resolution a linker would do.
926
0
  DenseSet<StringRef> PrevailingSymbols;
927
0
  for (auto &BitcodeFile : BitcodeFiles) {
928
0
    const auto Symbols = BitcodeFile->symbols();
929
0
    SmallVector<lto::SymbolResolution, 16> Resolutions(Symbols.size());
930
0
    size_t Idx = 0;
931
0
    for (auto &Sym : Symbols) {
932
0
      lto::SymbolResolution &Res = Resolutions[Idx++];
933
934
      // We will use this as the prevailing symbol definition in LTO unless
935
      // it is undefined or another definition has already been used.
936
0
      Res.Prevailing =
937
0
          !Sym.isUndefined() &&
938
0
          PrevailingSymbols.insert(Saver.save(Sym.getName())).second;
939
940
      // We need LTO to preseve the following global symbols:
941
      // 1) Symbols used in regular objects.
942
      // 2) Sections that will be given a __start/__stop symbol.
943
      // 3) Prevailing symbols that are needed visible to external libraries.
944
0
      Res.VisibleToRegularObj =
945
0
          UsedInRegularObj.contains(Sym.getName()) ||
946
0
          isValidCIdentifier(Sym.getSectionName()) ||
947
0
          (Res.Prevailing &&
948
0
           (Sym.getVisibility() != GlobalValue::HiddenVisibility &&
949
0
            !Sym.canBeOmittedFromSymbolTable()));
950
951
      // Identify symbols that must be exported dynamically and can be
952
      // referenced by other files.
953
0
      Res.ExportDynamic =
954
0
          Sym.getVisibility() != GlobalValue::HiddenVisibility &&
955
0
          (UsedInSharedLib.contains(Sym.getName()) ||
956
0
           !Sym.canBeOmittedFromSymbolTable());
957
958
      // The final definition will reside in this linkage unit if the symbol is
959
      // defined and local to the module. This only checks for bitcode files,
960
      // full assertion will require complete symbol resolution.
961
0
      Res.FinalDefinitionInLinkageUnit =
962
0
          Sym.getVisibility() != GlobalValue::DefaultVisibility &&
963
0
          (!Sym.isUndefined() && !Sym.isCommon());
964
965
      // We do not support linker redefined symbols (e.g. --wrap) for device
966
      // image linking, so the symbols will not be changed after LTO.
967
0
      Res.LinkerRedefined = false;
968
0
    }
969
970
    // Add the bitcode file with its resolved symbols to the LTO job.
971
0
    if (Error Err = LTOBackend->add(std::move(BitcodeFile), Resolutions))
972
0
      return Err;
973
0
  }
974
975
  // Run the LTO job to compile the bitcode.
976
0
  size_t MaxTasks = LTOBackend->getMaxTasks();
977
0
  std::vector<SmallString<128>> Files(MaxTasks);
978
0
  auto AddStream = [&](size_t Task) -> std::unique_ptr<CachedFileStream> {
979
0
    int FD = -1;
980
0
    auto &TempFile = Files[Task];
981
0
    StringRef Extension = (TheTriple.isNVPTX()) ? "s" : "o";
982
0
    if (Error Err = createOutputFile(sys::path::filename(ExecutableName) +
983
0
                                         "-device-" + TheTriple.getTriple(),
984
0
                                     Extension, TempFile))
985
0
      HandleError(std::move(Err));
986
0
    if (std::error_code EC = sys::fs::openFileForWrite(TempFile, FD))
987
0
      HandleError(errorCodeToError(EC));
988
0
    return std::make_unique<CachedFileStream>(
989
0
        std::make_unique<llvm::raw_fd_ostream>(FD, true));
990
0
  };
991
992
0
  if (Error Err = LTOBackend->run(AddStream))
993
0
    return Err;
994
995
  // Is we are compiling for NVPTX we need to run the assembler first.
996
0
  if (TheTriple.isNVPTX() && !EmbedBitcode) {
997
0
    for (auto &File : Files) {
998
0
      auto FileOrErr = nvptx::assemble(File, TheTriple, Arch, !WholeProgram);
999
0
      if (!FileOrErr)
1000
0
        return FileOrErr.takeError();
1001
0
      File = *FileOrErr;
1002
0
    }
1003
0
  }
1004
1005
  // Append the new inputs to the device linker input.
1006
0
  for (auto &File : Files)
1007
0
    NewInputFiles.push_back(static_cast<std::string>(File));
1008
0
  InputFiles = NewInputFiles;
1009
1010
0
  return Error::success();
1011
0
}
1012
1013
/// Runs the appropriate linking action on all the device files specified in \p
1014
/// DeviceFiles. The linked device images are returned in \p LinkedImages.
1015
Error linkDeviceFiles(ArrayRef<DeviceFile> DeviceFiles,
1016
                      ArrayRef<DeviceFile> LibraryFiles,
1017
0
                      SmallVectorImpl<DeviceFile> &LinkedImages) {
1018
  // Get the list of inputs and active offload kinds for a specific device.
1019
0
  DenseMap<DeviceFile, SmallVector<std::string, 4>> LinkerInputMap;
1020
0
  DenseMap<DeviceFile, DenseSet<OffloadKind>> ActiveOffloadKinds;
1021
0
  for (auto &File : DeviceFiles) {
1022
0
    LinkerInputMap[File].push_back(File.Filename);
1023
0
    ActiveOffloadKinds[File].insert(File.Kind);
1024
0
  }
1025
1026
  // Static libraries are loaded lazily as-needed, only add them if other files
1027
  // are present.
1028
  // TODO: We need to check the symbols as well, static libraries are only
1029
  //       loaded if they contain symbols that are currently undefined or common
1030
  //       in the symbol table.
1031
0
  for (auto &File : LibraryFiles)
1032
0
    if (LinkerInputMap.count(File))
1033
0
      LinkerInputMap[File].push_back(File.Filename);
1034
1035
  // Try to link each device toolchain.
1036
0
  for (auto &LinkerInput : LinkerInputMap) {
1037
0
    DeviceFile &File = LinkerInput.getFirst();
1038
0
    Triple TheTriple = Triple(File.TheTriple);
1039
0
    auto &LinkerInputFiles = LinkerInput.getSecond();
1040
0
    bool WholeProgram = false;
1041
1042
    // Run LTO on any bitcode files and replace the input with the result.
1043
0
    if (Error Err = linkBitcodeFiles(LinkerInputFiles, TheTriple, File.Arch,
1044
0
                                     WholeProgram))
1045
0
      return Err;
1046
1047
0
    if (EmbedBitcode) {
1048
      // If we are embedding bitcode for JIT, skip the final device linking.
1049
0
      if (LinkerInputFiles.size() != 1 || !WholeProgram)
1050
0
        return createStringError(inconvertibleErrorCode(),
1051
0
                                 "Unable to embed bitcode image for JIT");
1052
0
      LinkedImages.emplace_back(OFK_OpenMP, TheTriple.getTriple(), File.Arch,
1053
0
                                LinkerInputFiles.front());
1054
0
      continue;
1055
0
    }
1056
0
    if (WholeProgram && TheTriple.isNVPTX()) {
1057
      // If we performed LTO on NVPTX and had whole program visibility, we can
1058
      // use CUDA in non-RDC mode.
1059
0
      if (LinkerInputFiles.size() != 1)
1060
0
        return createStringError(inconvertibleErrorCode(),
1061
0
                                 "Invalid number of inputs for non-RDC mode");
1062
0
      for (OffloadKind Kind : ActiveOffloadKinds[LinkerInput.getFirst()])
1063
0
        LinkedImages.emplace_back(Kind, TheTriple.getTriple(), File.Arch,
1064
0
                                  LinkerInputFiles.front());
1065
0
      continue;
1066
0
    }
1067
1068
0
    auto ImageOrErr = linkDevice(LinkerInputFiles, TheTriple, File.Arch);
1069
0
    if (!ImageOrErr)
1070
0
      return ImageOrErr.takeError();
1071
1072
    // Create separate images for all the active offload kinds.
1073
0
    for (OffloadKind Kind : ActiveOffloadKinds[LinkerInput.getFirst()])
1074
0
      LinkedImages.emplace_back(Kind, TheTriple.getTriple(), File.Arch,
1075
0
                                *ImageOrErr);
1076
0
  }
1077
0
  return Error::success();
1078
0
}
1079
1080
// Compile the module to an object file using the appropriate target machine for
1081
// the host triple.
1082
0
Expected<std::string> compileModule(Module &M) {
1083
0
  std::string Msg;
1084
0
  const Target *T = TargetRegistry::lookupTarget(M.getTargetTriple(), Msg);
1085
0
  if (!T)
1086
0
    return createStringError(inconvertibleErrorCode(), Msg);
1087
1088
0
  auto Options =
1089
0
      codegen::InitTargetOptionsFromCodeGenFlags(Triple(M.getTargetTriple()));
1090
0
  StringRef CPU = "";
1091
0
  StringRef Features = "";
1092
0
  std::unique_ptr<TargetMachine> TM(T->createTargetMachine(
1093
0
      HostTriple, CPU, Features, Options, Reloc::PIC_, M.getCodeModel()));
1094
1095
0
  if (M.getDataLayout().isDefault())
1096
0
    M.setDataLayout(TM->createDataLayout());
1097
1098
0
  SmallString<128> ObjectFile;
1099
0
  int FD = -1;
1100
0
  if (Error Err = createOutputFile(
1101
0
          sys::path::filename(ExecutableName) + "-wrapper", "o", ObjectFile))
1102
0
    return std::move(Err);
1103
0
  if (std::error_code EC = sys::fs::openFileForWrite(ObjectFile, FD))
1104
0
    return errorCodeToError(EC);
1105
1106
0
  auto OS = std::make_unique<llvm::raw_fd_ostream>(FD, true);
1107
1108
0
  legacy::PassManager CodeGenPasses;
1109
0
  TargetLibraryInfoImpl TLII(Triple(M.getTargetTriple()));
1110
0
  CodeGenPasses.add(new TargetLibraryInfoWrapperPass(TLII));
1111
0
  if (TM->addPassesToEmitFile(CodeGenPasses, *OS, nullptr, CGFT_ObjectFile))
1112
0
    return createStringError(inconvertibleErrorCode(),
1113
0
                             "Failed to execute host backend");
1114
0
  CodeGenPasses.run(M);
1115
1116
0
  return static_cast<std::string>(ObjectFile);
1117
0
}
1118
1119
/// Load all of the OpenMP images into a buffer and pass it to the binary
1120
/// wrapping function to create the registration code in the module \p M.
1121
0
Error wrapOpenMPImages(Module &M, ArrayRef<DeviceFile> Images) {
1122
0
  SmallVector<std::unique_ptr<MemoryBuffer>, 4> SavedBuffers;
1123
0
  SmallVector<ArrayRef<char>, 4> ImagesToWrap;
1124
0
  for (const DeviceFile &File : Images) {
1125
0
    llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> ImageOrError =
1126
0
        llvm::MemoryBuffer::getFileOrSTDIN(File.Filename);
1127
0
    if (std::error_code EC = ImageOrError.getError())
1128
0
      return createFileError(File.Filename, EC);
1129
0
    ImagesToWrap.emplace_back((*ImageOrError)->getBufferStart(),
1130
0
                              (*ImageOrError)->getBufferSize());
1131
0
    SavedBuffers.emplace_back(std::move(*ImageOrError));
1132
0
  }
1133
1134
0
  if (Error Err = wrapOpenMPBinaries(M, ImagesToWrap))
1135
0
    return Err;
1136
0
  return Error::success();
1137
0
}
1138
1139
/// Combine all of the CUDA images into a single fatbinary and pass it to the
1140
/// binary wrapping function to create the registration code in the module \p M.
1141
0
Error wrapCudaImages(Module &M, ArrayRef<DeviceFile> Images) {
1142
0
  SmallVector<StringRef, 4> InputFiles;
1143
0
  SmallVector<StringRef, 4> Architectures;
1144
0
  for (const DeviceFile &File : Images) {
1145
0
    InputFiles.push_back(File.Filename);
1146
0
    Architectures.push_back(File.Arch);
1147
0
  }
1148
1149
  // CUDA expects its embedded device images to be a fatbinary.
1150
0
  Triple TheTriple = Triple(Images.front().TheTriple);
1151
0
  auto FileOrErr = nvptx::fatbinary(InputFiles, TheTriple, Architectures);
1152
0
  if (!FileOrErr)
1153
0
    return FileOrErr.takeError();
1154
1155
0
  llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> ImageOrError =
1156
0
      llvm::MemoryBuffer::getFileOrSTDIN(*FileOrErr);
1157
0
  if (std::error_code EC = ImageOrError.getError())
1158
0
    return createFileError(*FileOrErr, EC);
1159
1160
0
  auto ImageToWrap = ArrayRef<char>((*ImageOrError)->getBufferStart(),
1161
0
                                    (*ImageOrError)->getBufferSize());
1162
1163
0
  if (Error Err = wrapCudaBinary(M, ImageToWrap))
1164
0
    return Err;
1165
0
  return Error::success();
1166
0
}
1167
1168
/// Creates the object file containing the device image and runtime
1169
/// registration code from the device images stored in \p Images.
1170
Expected<SmallVector<std::string, 2>>
1171
0
wrapDeviceImages(ArrayRef<DeviceFile> Images) {
1172
0
  DenseMap<OffloadKind, SmallVector<DeviceFile, 2>> ImagesForKind;
1173
0
  for (const DeviceFile &Image : Images)
1174
0
    ImagesForKind[Image.Kind].push_back(Image);
1175
1176
0
  SmallVector<std::string, 2> WrappedImages;
1177
0
  for (const auto &KindAndImages : ImagesForKind) {
1178
0
    LLVMContext Context;
1179
0
    Module M("offload.wrapper.module", Context);
1180
0
    M.setTargetTriple(HostTriple);
1181
1182
    // Create registration code for the given offload kinds in the Module.
1183
0
    switch (KindAndImages.getFirst()) {
1184
0
    case OFK_OpenMP:
1185
0
      if (Error Err = wrapOpenMPImages(M, KindAndImages.getSecond()))
1186
0
        return std::move(Err);
1187
0
      break;
1188
0
    case OFK_Cuda:
1189
0
      if (Error Err = wrapCudaImages(M, KindAndImages.getSecond()))
1190
0
        return std::move(Err);
1191
0
      break;
1192
0
    default:
1193
0
      return createStringError(inconvertibleErrorCode(),
1194
0
                               getOffloadKindName(KindAndImages.getFirst()) +
1195
0
                                   " wrapping is not supported");
1196
0
    }
1197
1198
0
    if (PrintWrappedModule)
1199
0
      llvm::errs() << M;
1200
1201
0
    auto FileOrErr = compileModule(M);
1202
0
    if (!FileOrErr)
1203
0
      return FileOrErr.takeError();
1204
0
    WrappedImages.push_back(*FileOrErr);
1205
0
  }
1206
1207
0
  return WrappedImages;
1208
0
}
1209
1210
0
Optional<std::string> findFile(StringRef Dir, const Twine &Name) {
1211
0
  SmallString<128> Path;
1212
0
  if (Dir.startswith("="))
1213
0
    sys::path::append(Path, Sysroot, Dir.substr(1), Name);
1214
0
  else
1215
0
    sys::path::append(Path, Dir, Name);
1216
1217
0
  if (sys::fs::exists(Path))
1218
0
    return static_cast<std::string>(Path);
1219
0
  return None;
1220
0
}
1221
1222
Optional<std::string> findFromSearchPaths(StringRef Name,
1223
0
                                          ArrayRef<StringRef> SearchPaths) {
1224
0
  for (StringRef Dir : SearchPaths)
1225
0
    if (Optional<std::string> File = findFile(Dir, Name))
1226
0
      return File;
1227
0
  return None;
1228
0
}
1229
1230
Optional<std::string> searchLibraryBaseName(StringRef Name,
1231
0
                                            ArrayRef<StringRef> SearchPaths) {
1232
0
  for (StringRef Dir : SearchPaths) {
1233
0
    if (Optional<std::string> File = findFile(Dir, "lib" + Name + ".so"))
1234
0
      return None;
1235
0
    if (Optional<std::string> File = findFile(Dir, "lib" + Name + ".a"))
1236
0
      return File;
1237
0
  }
1238
0
  return None;
1239
0
}
1240
1241
/// Search for static libraries in the linker's library path given input like
1242
/// `-lfoo` or `-l:libfoo.a`.
1243
Optional<std::string> searchLibrary(StringRef Input,
1244
0
                                    ArrayRef<StringRef> SearchPaths) {
1245
0
  if (!Input.startswith("-l"))
1246
0
    return None;
1247
0
  StringRef Name = Input.drop_front(2);
1248
0
  if (Name.startswith(":"))
1249
0
    return findFromSearchPaths(Name.drop_front(), SearchPaths);
1250
0
  return searchLibraryBaseName(Name, SearchPaths);
1251
0
}
1252
1253
} // namespace
1254
1255
int main(int argc, const char **argv) {
1256
  InitLLVM X(argc, argv);
1257
  InitializeAllTargetInfos();
1258
  InitializeAllTargets();
1259
  InitializeAllTargetMCs();
1260
  InitializeAllAsmParsers();
1261
  InitializeAllAsmPrinters();
1262
1263
  LinkerExecutable = argv[0];
1264
  sys::PrintStackTraceOnErrorSignal(argv[0]);
1265
  cl::SetVersionPrinter(PrintVersion);
1266
  cl::HideUnrelatedOptions(ClangLinkerWrapperCategory);
1267
  cl::ParseCommandLineOptions(
1268
      argc, argv,
1269
      "A wrapper utility over the host linker. It scans the input files for\n"
1270
      "sections that require additional processing prior to linking. The tool\n"
1271
      "will then transparently pass all arguments and input to the specified\n"
1272
      "host linker to create the final binary.\n");
1273
1274
  if (Help) {
1275
    cl::PrintHelpMessage();
1276
    return EXIT_SUCCESS;
1277
  }
1278
1279
0
  auto reportError = [argv](Error E) {
1280
0
    logAllUnhandledErrors(std::move(E), WithColor::error(errs(), argv[0]));
1281
0
    return EXIT_FAILURE;
1282
0
  };
1283
1284
  if (!CudaPath.empty())
1285
    CudaBinaryPath = CudaPath + "/bin";
1286
1287
0
  auto RootIt = llvm::find_if(HostLinkerArgs, [](StringRef Arg) {
1288
0
    return Arg.startswith("--sysroot=");
1289
0
  });
1290
  if (RootIt != HostLinkerArgs.end())
1291
    Sysroot = StringRef(*RootIt).split('=').second;
1292
1293
  ExecutableName = *std::next(llvm::find(HostLinkerArgs, "-o"));
1294
  SmallVector<std::string, 16> LinkerArgs;
1295
  for (const std::string &Arg : HostLinkerArgs)
1296
    LinkerArgs.push_back(Arg);
1297
1298
  SmallVector<StringRef, 16> LibraryPaths;
1299
  for (StringRef Arg : LinkerArgs) {
1300
    if (Arg.startswith("-L"))
1301
      LibraryPaths.push_back(Arg.drop_front(2));
1302
  }
1303
1304
  // Try to extract device code from the linker input.
1305
  SmallVector<DeviceFile, 4> DeviceFiles;
1306
  SmallVector<DeviceFile, 4> LibraryFiles;
1307
  for (StringRef Arg : LinkerArgs) {
1308
    if (Arg == ExecutableName)
1309
      continue;
1310
1311
    // Search the inpuot argument for embedded device files if it is a static
1312
    // library or regular input file.
1313
    if (Optional<std::string> Library = searchLibrary(Arg, LibraryPaths)) {
1314
      ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr =
1315
          MemoryBuffer::getFileOrSTDIN(*Library);
1316
      if (std::error_code EC = BufferOrErr.getError())
1317
        return reportError(createFileError(*Library, EC));
1318
1319
      if (Error Err = extractFromBuffer(std::move(*BufferOrErr), LibraryFiles))
1320
        return reportError(std::move(Err));
1321
    } else if (sys::fs::exists(Arg) && !sys::fs::is_directory(Arg)) {
1322
      ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr =
1323
          MemoryBuffer::getFileOrSTDIN(Arg);
1324
      if (std::error_code EC = BufferOrErr.getError())
1325
        return reportError(createFileError(Arg, EC));
1326
1327
      if (Error Err = extractFromBuffer(std::move(*BufferOrErr), DeviceFiles))
1328
        return reportError(std::move(Err));
1329
    }
1330
  }
1331
1332
  // Add the device bitcode libraries to the device files if any were passed in.
1333
  for (StringRef LibraryStr : BitcodeLibraries)
1334
    DeviceFiles.push_back(getBitcodeLibrary(LibraryStr));
1335
1336
  // Link the device images extracted from the linker input.
1337
  SmallVector<DeviceFile, 4> LinkedImages;
1338
  if (Error Err = linkDeviceFiles(DeviceFiles, LibraryFiles, LinkedImages))
1339
    return reportError(std::move(Err));
1340
1341
  // Wrap each linked device image into a linkable host binary and add it to the
1342
  // link job's inputs.
1343
  auto FileOrErr = wrapDeviceImages(LinkedImages);
1344
  if (!FileOrErr)
1345
    return reportError(FileOrErr.takeError());
1346
1347
  // We need to insert the new files next to the old ones to make sure they're
1348
  // linked with the same libraries / arguments.
1349
  if (!FileOrErr->empty()) {
1350
0
    auto *FirstInput = std::next(llvm::find_if(LinkerArgs, [](StringRef Str) {
1351
0
      return sys::fs::exists(Str) && !sys::fs::is_directory(Str) &&
1352
0
             Str != ExecutableName;
1353
0
    }));
1354
    LinkerArgs.insert(FirstInput, FileOrErr->begin(), FileOrErr->end());
1355
  }
1356
1357
  // Run the host linking job.
1358
  if (Error Err = runLinker(LinkerUserPath, LinkerArgs))
1359
    return reportError(std::move(Err));
1360
1361
  // Remove the temporary files created.
1362
  for (const auto &TempFile : TempFiles)
1363
    if (std::error_code EC = sys::fs::remove(TempFile))
1364
      reportError(createFileError(TempFile, EC));
1365
1366
  return EXIT_SUCCESS;
1367
}