Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
Line
Count
Source (jump to first uncovered line)
1
//===-- NVPTXTargetMachine.cpp - Define TargetMachine for NVPTX -----------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// Top-level implementation for the NVPTX target.
10
//
11
//===----------------------------------------------------------------------===//
12
13
#include "NVPTXTargetMachine.h"
14
#include "NVPTX.h"
15
#include "NVPTXAllocaHoisting.h"
16
#include "NVPTXLowerAggrCopies.h"
17
#include "NVPTXTargetObjectFile.h"
18
#include "NVPTXTargetTransformInfo.h"
19
#include "TargetInfo/NVPTXTargetInfo.h"
20
#include "llvm/ADT/STLExtras.h"
21
#include "llvm/ADT/Triple.h"
22
#include "llvm/Analysis/TargetTransformInfo.h"
23
#include "llvm/CodeGen/Passes.h"
24
#include "llvm/CodeGen/TargetPassConfig.h"
25
#include "llvm/IR/LegacyPassManager.h"
26
#include "llvm/Pass.h"
27
#include "llvm/Support/CommandLine.h"
28
#include "llvm/Support/TargetRegistry.h"
29
#include "llvm/Target/TargetMachine.h"
30
#include "llvm/Target/TargetOptions.h"
31
#include "llvm/Transforms/IPO/PassManagerBuilder.h"
32
#include "llvm/Transforms/Scalar.h"
33
#include "llvm/Transforms/Scalar/GVN.h"
34
#include "llvm/Transforms/Vectorize.h"
35
#include <cassert>
36
#include <string>
37
38
using namespace llvm;
39
40
// LSV is still relatively new; this switch lets us turn it off in case we
41
// encounter (or suspect) a bug.
42
static cl::opt<bool>
43
    DisableLoadStoreVectorizer("disable-nvptx-load-store-vectorizer",
44
                               cl::desc("Disable load/store vectorizer"),
45
                               cl::init(false), cl::Hidden);
46
47
// TODO: Remove this flag when we are confident with no regressions.
48
static cl::opt<bool> DisableRequireStructuredCFG(
49
    "disable-nvptx-require-structured-cfg",
50
    cl::desc("Transitional flag to turn off NVPTX's requirement on preserving "
51
             "structured CFG. The requirement should be disabled only when "
52
             "unexpected regressions happen."),
53
    cl::init(false), cl::Hidden);
54
55
static cl::opt<bool> UseShortPointersOpt(
56
    "nvptx-short-ptr",
57
    cl::desc(
58
        "Use 32-bit pointers for accessing const/local/shared address spaces."),
59
    cl::init(false), cl::Hidden);
60
61
namespace llvm {
62
63
void initializeNVVMIntrRangePass(PassRegistry&);
64
void initializeNVVMReflectPass(PassRegistry&);
65
void initializeGenericToNVVMPass(PassRegistry&);
66
void initializeNVPTXAllocaHoistingPass(PassRegistry &);
67
void initializeNVPTXAssignValidGlobalNamesPass(PassRegistry&);
68
void initializeNVPTXLowerAggrCopiesPass(PassRegistry &);
69
void initializeNVPTXLowerArgsPass(PassRegistry &);
70
void initializeNVPTXLowerAllocaPass(PassRegistry &);
71
void initializeNVPTXProxyRegErasurePass(PassRegistry &);
72
73
} // end namespace llvm
74
75
139k
extern "C" void LLVMInitializeNVPTXTarget() {
76
139k
  // Register the target.
77
139k
  RegisterTargetMachine<NVPTXTargetMachine32> X(getTheNVPTXTarget32());
78
139k
  RegisterTargetMachine<NVPTXTargetMachine64> Y(getTheNVPTXTarget64());
79
139k
80
139k
  // FIXME: This pass is really intended to be invoked during IR optimization,
81
139k
  // but it's very NVPTX-specific.
82
139k
  PassRegistry &PR = *PassRegistry::getPassRegistry();
83
139k
  initializeNVVMReflectPass(PR);
84
139k
  initializeNVVMIntrRangePass(PR);
85
139k
  initializeGenericToNVVMPass(PR);
86
139k
  initializeNVPTXAllocaHoistingPass(PR);
87
139k
  initializeNVPTXAssignValidGlobalNamesPass(PR);
88
139k
  initializeNVPTXLowerArgsPass(PR);
89
139k
  initializeNVPTXLowerAllocaPass(PR);
90
139k
  initializeNVPTXLowerAggrCopiesPass(PR);
91
139k
  initializeNVPTXProxyRegErasurePass(PR);
92
139k
}
93
94
455
static std::string computeDataLayout(bool is64Bit, bool UseShortPointers) {
95
455
  std::string Ret = "e";
96
455
97
455
  if (!is64Bit)
98
240
    Ret += "-p:32:32";
99
215
  else if (UseShortPointers)
100
3
    Ret += "-p3:32:32-p4:32:32-p5:32:32";
101
455
102
455
  Ret += "-i64:64-i128:128-v16:16-v32:32-n16:32:64";
103
455
104
455
  return Ret;
105
455
}
106
107
NVPTXTargetMachine::NVPTXTargetMachine(const Target &T, const Triple &TT,
108
                                       StringRef CPU, StringRef FS,
109
                                       const TargetOptions &Options,
110
                                       Optional<Reloc::Model> RM,
111
                                       Optional<CodeModel::Model> CM,
112
                                       CodeGenOpt::Level OL, bool is64bit)
113
    // The pic relocation model is used regardless of what the client has
114
    // specified, as it is the only relocation model currently supported.
115
    : LLVMTargetMachine(T, computeDataLayout(is64bit, UseShortPointersOpt), TT,
116
                        CPU, FS, Options, Reloc::PIC_,
117
                        getEffectiveCodeModel(CM, CodeModel::Small), OL),
118
      is64bit(is64bit), UseShortPointers(UseShortPointersOpt),
119
      TLOF(llvm::make_unique<NVPTXTargetObjectFile>()),
120
455
      Subtarget(TT, CPU, FS, *this) {
121
455
  if (TT.getOS() == Triple::NVCL)
122
4
    drvInterface = NVPTX::NVCL;
123
451
  else
124
451
    drvInterface = NVPTX::CUDA;
125
455
  if (!DisableRequireStructuredCFG)
126
455
    setRequiresStructuredCFG(true);
127
455
  initAsmInfo();
128
455
}
129
130
447
NVPTXTargetMachine::~NVPTXTargetMachine() = default;
131
132
0
void NVPTXTargetMachine32::anchor() {}
133
134
NVPTXTargetMachine32::NVPTXTargetMachine32(const Target &T, const Triple &TT,
135
                                           StringRef CPU, StringRef FS,
136
                                           const TargetOptions &Options,
137
                                           Optional<Reloc::Model> RM,
138
                                           Optional<CodeModel::Model> CM,
139
                                           CodeGenOpt::Level OL, bool JIT)
140
240
    : NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {}
141
142
0
void NVPTXTargetMachine64::anchor() {}
143
144
NVPTXTargetMachine64::NVPTXTargetMachine64(const Target &T, const Triple &TT,
145
                                           StringRef CPU, StringRef FS,
146
                                           const TargetOptions &Options,
147
                                           Optional<Reloc::Model> RM,
148
                                           Optional<CodeModel::Model> CM,
149
                                           CodeGenOpt::Level OL, bool JIT)
150
215
    : NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {}
151
152
namespace {
153
154
class NVPTXPassConfig : public TargetPassConfig {
155
public:
156
  NVPTXPassConfig(NVPTXTargetMachine &TM, PassManagerBase &PM)
157
306
      : TargetPassConfig(TM, PM) {}
158
159
526
  NVPTXTargetMachine &getNVPTXTargetMachine() const {
160
526
    return getTM<NVPTXTargetMachine>();
161
526
  }
162
163
  void addIRPasses() override;
164
  bool addInstSelector() override;
165
  void addPreRegAlloc() override;
166
  void addPostRegAlloc() override;
167
  void addMachineSSAOptimization() override;
168
169
  FunctionPass *createTargetRegisterAllocator(bool) override;
170
  void addFastRegAlloc() override;
171
  void addOptimizedRegAlloc() override;
172
173
0
  bool addRegAssignmentFast() override {
174
0
    llvm_unreachable("should not be used");
175
0
  }
176
177
0
  bool addRegAssignmentOptimized() override {
178
0
    llvm_unreachable("should not be used");
179
0
  }
180
181
private:
182
  // If the opt level is aggressive, add GVN; otherwise, add EarlyCSE. This
183
  // function is only called in opt mode.
184
  void addEarlyCSEOrGVNPass();
185
186
  // Add passes that propagate special memory spaces.
187
  void addAddressSpaceInferencePasses();
188
189
  // Add passes that perform straight-line scalar optimizations.
190
  void addStraightLineScalarOptimizationPasses();
191
};
192
193
} // end anonymous namespace
194
195
306
TargetPassConfig *NVPTXTargetMachine::createPassConfig(PassManagerBase &PM) {
196
306
  return new NVPTXPassConfig(*this, PM);
197
306
}
198
199
125
void NVPTXTargetMachine::adjustPassManager(PassManagerBuilder &Builder) {
200
125
  Builder.addExtension(
201
125
    PassManagerBuilder::EP_EarlyAsPossible,
202
125
    [&](const PassManagerBuilder &, legacy::PassManagerBase &PM) {
203
125
      PM.add(createNVVMReflectPass(Subtarget.getSmVersion()));
204
125
      PM.add(createNVVMIntrRangePass(Subtarget.getSmVersion()));
205
125
    });
206
125
}
207
208
TargetTransformInfo
209
19.6k
NVPTXTargetMachine::getTargetTransformInfo(const Function &F) {
210
19.6k
  return TargetTransformInfo(NVPTXTTIImpl(this, F));
211
19.6k
}
212
213
468
void NVPTXPassConfig::addEarlyCSEOrGVNPass() {
214
468
  if (getOptLevel() == CodeGenOpt::Aggressive)
215
2
    addPass(createGVNPass());
216
466
  else
217
466
    addPass(createEarlyCSEPass());
218
468
}
219
220
234
void NVPTXPassConfig::addAddressSpaceInferencePasses() {
221
234
  // NVPTXLowerArgs emits alloca for byval parameters which can often
222
234
  // be eliminated by SROA.
223
234
  addPass(createSROAPass());
224
234
  addPass(createNVPTXLowerAllocaPass());
225
234
  addPass(createInferAddressSpacesPass());
226
234
}
227
228
234
void NVPTXPassConfig::addStraightLineScalarOptimizationPasses() {
229
234
  addPass(createSeparateConstOffsetFromGEPPass());
230
234
  addPass(createSpeculativeExecutionPass());
231
234
  // ReassociateGEPs exposes more opportunites for SLSR. See
232
234
  // the example in reassociate-geps-and-slsr.ll.
233
234
  addPass(createStraightLineStrengthReducePass());
234
234
  // SeparateConstOffsetFromGEP and SLSR creates common expressions which GVN or
235
234
  // EarlyCSE can reuse. GVN generates significantly better code than EarlyCSE
236
234
  // for some of our benchmarks.
237
234
  addEarlyCSEOrGVNPass();
238
234
  // Run NaryReassociate after EarlyCSE/GVN to be more effective.
239
234
  addPass(createNaryReassociatePass());
240
234
  // NaryReassociate on GEPs creates redundant common expressions, so run
241
234
  // EarlyCSE after it.
242
234
  addPass(createEarlyCSEPass());
243
234
}
244
245
263
void NVPTXPassConfig::addIRPasses() {
246
263
  // The following passes are known to not play well with virtual regs hanging
247
263
  // around after register allocation (which in our case, is *all* registers).
248
263
  // We explicitly disable them here.  We do, however, need some functionality
249
263
  // of the PrologEpilogCodeInserter pass, so we emulate that behavior in the
250
263
  // NVPTXPrologEpilog pass (see NVPTXPrologEpilogPass.cpp).
251
263
  disablePass(&PrologEpilogCodeInserterID);
252
263
  disablePass(&MachineCopyPropagationID);
253
263
  disablePass(&TailDuplicateID);
254
263
  disablePass(&StackMapLivenessID);
255
263
  disablePass(&LiveDebugValuesID);
256
263
  disablePass(&PostRAMachineSinkingID);
257
263
  disablePass(&PostRASchedulerID);
258
263
  disablePass(&FuncletLayoutID);
259
263
  disablePass(&PatchableFunctionID);
260
263
  disablePass(&ShrinkWrapID);
261
263
262
263
  // NVVMReflectPass is added in addEarlyAsPossiblePasses, so hopefully running
263
263
  // it here does nothing.  But since we need it for correctness when lowering
264
263
  // to NVPTX, run it here too, in case whoever built our pass pipeline didn't
265
263
  // call addEarlyAsPossiblePasses.
266
263
  const NVPTXSubtarget &ST = *getTM<NVPTXTargetMachine>().getSubtargetImpl();
267
263
  addPass(createNVVMReflectPass(ST.getSmVersion()));
268
263
269
263
  if (getOptLevel() != CodeGenOpt::None)
270
234
    addPass(createNVPTXImageOptimizerPass());
271
263
  addPass(createNVPTXAssignValidGlobalNamesPass());
272
263
  addPass(createGenericToNVVMPass());
273
263
274
263
  // NVPTXLowerArgs is required for correctness and should be run right
275
263
  // before the address space inference passes.
276
263
  addPass(createNVPTXLowerArgsPass(&getNVPTXTargetMachine()));
277
263
  if (getOptLevel() != CodeGenOpt::None) {
278
234
    addAddressSpaceInferencePasses();
279
234
    if (!DisableLoadStoreVectorizer)
280
233
      addPass(createLoadStoreVectorizerPass());
281
234
    addStraightLineScalarOptimizationPasses();
282
234
  }
283
263
284
263
  // === LSR and other generic IR passes ===
285
263
  TargetPassConfig::addIRPasses();
286
263
  // EarlyCSE is not always strong enough to clean up what LSR produces. For
287
263
  // example, GVN can combine
288
263
  //
289
263
  //   %0 = add %a, %b
290
263
  //   %1 = add %b, %a
291
263
  //
292
263
  // and
293
263
  //
294
263
  //   %0 = shl nsw %a, 2
295
263
  //   %1 = shl %a, 2
296
263
  //
297
263
  // but EarlyCSE can do neither of them.
298
263
  if (getOptLevel() != CodeGenOpt::None)
299
234
    addEarlyCSEOrGVNPass();
300
263
}
301
302
263
bool NVPTXPassConfig::addInstSelector() {
303
263
  const NVPTXSubtarget &ST = *getTM<NVPTXTargetMachine>().getSubtargetImpl();
304
263
305
263
  addPass(createLowerAggrCopies());
306
263
  addPass(createAllocaHoisting());
307
263
  addPass(createNVPTXISelDag(getNVPTXTargetMachine(), getOptLevel()));
308
263
309
263
  if (!ST.hasImageHandles())
310
199
    addPass(createNVPTXReplaceImageHandlesPass());
311
263
312
263
  return false;
313
263
}
314
315
263
void NVPTXPassConfig::addPreRegAlloc() {
316
263
  // Remove Proxy Register pseudo instructions used to keep `callseq_end` alive.
317
263
  addPass(createNVPTXProxyRegErasurePass());
318
263
}
319
320
263
void NVPTXPassConfig::addPostRegAlloc() {
321
263
  addPass(createNVPTXPrologEpilogPass(), false);
322
263
  if (getOptLevel() != CodeGenOpt::None) {
323
234
    // NVPTXPrologEpilogPass calculates frame object offset and replace frame
324
234
    // index with VRFrame register. NVPTXPeephole need to be run after that and
325
234
    // will replace VRFrame with VRFrameLocal when possible.
326
234
    addPass(createNVPTXPeephole());
327
234
  }
328
263
}
329
330
0
FunctionPass *NVPTXPassConfig::createTargetRegisterAllocator(bool) {
331
0
  return nullptr; // No reg alloc
332
0
}
333
334
29
void NVPTXPassConfig::addFastRegAlloc() {
335
29
  addPass(&PHIEliminationID);
336
29
  addPass(&TwoAddressInstructionPassID);
337
29
}
338
339
234
void NVPTXPassConfig::addOptimizedRegAlloc() {
340
234
  addPass(&ProcessImplicitDefsID);
341
234
  addPass(&LiveVariablesID);
342
234
  addPass(&MachineLoopInfoID);
343
234
  addPass(&PHIEliminationID);
344
234
345
234
  addPass(&TwoAddressInstructionPassID);
346
234
  addPass(&RegisterCoalescerID);
347
234
348
234
  // PreRA instruction scheduling.
349
234
  if (addPass(&MachineSchedulerID))
350
234
    printAndVerify("After Machine Scheduling");
351
234
352
234
353
234
  addPass(&StackSlotColoringID);
354
234
355
234
  // FIXME: Needs physical registers
356
234
  //addPass(&MachineLICMID);
357
234
358
234
  printAndVerify("After StackSlotColoring");
359
234
}
360
361
234
void NVPTXPassConfig::addMachineSSAOptimization() {
362
234
  // Pre-ra tail duplication.
363
234
  if (addPass(&EarlyTailDuplicateID))
364
234
    printAndVerify("After Pre-RegAlloc TailDuplicate");
365
234
366
234
  // Optimize PHIs before DCE: removing dead PHI cycles may make more
367
234
  // instructions dead.
368
234
  addPass(&OptimizePHIsID);
369
234
370
234
  // This pass merges large allocas. StackSlotColoring is a different pass
371
234
  // which merges spill slots.
372
234
  addPass(&StackColoringID);
373
234
374
234
  // If the target requests it, assign local variables to stack slots relative
375
234
  // to one another and simplify frame index references where possible.
376
234
  addPass(&LocalStackSlotAllocationID);
377
234
378
234
  // With optimization, dead code should already be eliminated. However
379
234
  // there is one known exception: lowered code for arguments that are only
380
234
  // used by tail calls, where the tail calls reuse the incoming stack
381
234
  // arguments directly (see t11 in test/CodeGen/X86/sibcall.ll).
382
234
  addPass(&DeadMachineInstructionElimID);
383
234
  printAndVerify("After codegen DCE pass");
384
234
385
234
  // Allow targets to insert passes that improve instruction level parallelism,
386
234
  // like if-conversion. Such passes will typically need dominator trees and
387
234
  // loop info, just like LICM and CSE below.
388
234
  if (addILPOpts())
389
0
    printAndVerify("After ILP optimizations");
390
234
391
234
  addPass(&EarlyMachineLICMID);
392
234
  addPass(&MachineCSEID);
393
234
394
234
  addPass(&MachineSinkingID);
395
234
  printAndVerify("After Machine LICM, CSE and Sinking passes");
396
234
397
234
  addPass(&PeepholeOptimizerID);
398
234
  printAndVerify("After codegen peephole optimization pass");
399
234
}