/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===-- NVPTXTargetMachine.cpp - Define TargetMachine for NVPTX -----------===// |
2 | | // |
3 | | // The LLVM Compiler Infrastructure |
4 | | // |
5 | | // This file is distributed under the University of Illinois Open Source |
6 | | // License. See LICENSE.TXT for details. |
7 | | // |
8 | | //===----------------------------------------------------------------------===// |
9 | | // |
10 | | // Top-level implementation for the NVPTX target. |
11 | | // |
12 | | //===----------------------------------------------------------------------===// |
13 | | |
14 | | #include "NVPTXTargetMachine.h" |
15 | | #include "NVPTX.h" |
16 | | #include "NVPTXAllocaHoisting.h" |
17 | | #include "NVPTXLowerAggrCopies.h" |
18 | | #include "NVPTXTargetObjectFile.h" |
19 | | #include "NVPTXTargetTransformInfo.h" |
20 | | #include "llvm/ADT/STLExtras.h" |
21 | | #include "llvm/ADT/Triple.h" |
22 | | #include "llvm/Analysis/TargetTransformInfo.h" |
23 | | #include "llvm/CodeGen/Passes.h" |
24 | | #include "llvm/CodeGen/TargetPassConfig.h" |
25 | | #include "llvm/IR/LegacyPassManager.h" |
26 | | #include "llvm/Pass.h" |
27 | | #include "llvm/Support/CommandLine.h" |
28 | | #include "llvm/Support/TargetRegistry.h" |
29 | | #include "llvm/Target/TargetMachine.h" |
30 | | #include "llvm/Target/TargetOptions.h" |
31 | | #include "llvm/Transforms/IPO/PassManagerBuilder.h" |
32 | | #include "llvm/Transforms/Scalar.h" |
33 | | #include "llvm/Transforms/Scalar/GVN.h" |
34 | | #include "llvm/Transforms/Vectorize.h" |
35 | | #include <cassert> |
36 | | #include <string> |
37 | | |
38 | | using namespace llvm; |
39 | | |
40 | | // LSV is still relatively new; this switch lets us turn it off in case we |
41 | | // encounter (or suspect) a bug. |
42 | | static cl::opt<bool> |
43 | | DisableLoadStoreVectorizer("disable-nvptx-load-store-vectorizer", |
44 | | cl::desc("Disable load/store vectorizer"), |
45 | | cl::init(false), cl::Hidden); |
46 | | |
47 | | namespace llvm { |
48 | | |
49 | | void initializeNVVMIntrRangePass(PassRegistry&); |
50 | | void initializeNVVMReflectPass(PassRegistry&); |
51 | | void initializeGenericToNVVMPass(PassRegistry&); |
52 | | void initializeNVPTXAllocaHoistingPass(PassRegistry &); |
53 | | void initializeNVPTXAssignValidGlobalNamesPass(PassRegistry&); |
54 | | void initializeNVPTXLowerAggrCopiesPass(PassRegistry &); |
55 | | void initializeNVPTXLowerArgsPass(PassRegistry &); |
56 | | void initializeNVPTXLowerAllocaPass(PassRegistry &); |
57 | | |
58 | | } // end namespace llvm |
59 | | |
60 | 123k | extern "C" void LLVMInitializeNVPTXTarget() { |
61 | 123k | // Register the target. |
62 | 123k | RegisterTargetMachine<NVPTXTargetMachine32> X(getTheNVPTXTarget32()); |
63 | 123k | RegisterTargetMachine<NVPTXTargetMachine64> Y(getTheNVPTXTarget64()); |
64 | 123k | |
65 | 123k | // FIXME: This pass is really intended to be invoked during IR optimization, |
66 | 123k | // but it's very NVPTX-specific. |
67 | 123k | PassRegistry &PR = *PassRegistry::getPassRegistry(); |
68 | 123k | initializeNVVMReflectPass(PR); |
69 | 123k | initializeNVVMIntrRangePass(PR); |
70 | 123k | initializeGenericToNVVMPass(PR); |
71 | 123k | initializeNVPTXAllocaHoistingPass(PR); |
72 | 123k | initializeNVPTXAssignValidGlobalNamesPass(PR); |
73 | 123k | initializeNVPTXLowerArgsPass(PR); |
74 | 123k | initializeNVPTXLowerAllocaPass(PR); |
75 | 123k | initializeNVPTXLowerAggrCopiesPass(PR); |
76 | 123k | } |
77 | | |
78 | 351 | static std::string computeDataLayout(bool is64Bit) { |
79 | 351 | std::string Ret = "e"; |
80 | 351 | |
81 | 351 | if (!is64Bit) |
82 | 207 | Ret += "-p:32:32"; |
83 | 351 | |
84 | 351 | Ret += "-i64:64-i128:128-v16:16-v32:32-n16:32:64"; |
85 | 351 | |
86 | 351 | return Ret; |
87 | 351 | } |
88 | | |
89 | 351 | static CodeModel::Model getEffectiveCodeModel(Optional<CodeModel::Model> CM) { |
90 | 351 | if (CM) |
91 | 0 | return *CM; |
92 | 351 | return CodeModel::Small; |
93 | 351 | } |
94 | | |
95 | | NVPTXTargetMachine::NVPTXTargetMachine(const Target &T, const Triple &TT, |
96 | | StringRef CPU, StringRef FS, |
97 | | const TargetOptions &Options, |
98 | | Optional<Reloc::Model> RM, |
99 | | Optional<CodeModel::Model> CM, |
100 | | CodeGenOpt::Level OL, bool is64bit) |
101 | | // The pic relocation model is used regardless of what the client has |
102 | | // specified, as it is the only relocation model currently supported. |
103 | | : LLVMTargetMachine(T, computeDataLayout(is64bit), TT, CPU, FS, Options, |
104 | | Reloc::PIC_, getEffectiveCodeModel(CM), OL), |
105 | | is64bit(is64bit), TLOF(llvm::make_unique<NVPTXTargetObjectFile>()), |
106 | 351 | Subtarget(TT, CPU, FS, *this) { |
107 | 351 | if (TT.getOS() == Triple::NVCL) |
108 | 4 | drvInterface = NVPTX::NVCL; |
109 | 351 | else |
110 | 347 | drvInterface = NVPTX::CUDA; |
111 | 351 | initAsmInfo(); |
112 | 351 | } |
113 | | |
114 | 344 | NVPTXTargetMachine::~NVPTXTargetMachine() = default; |
115 | | |
116 | 0 | void NVPTXTargetMachine32::anchor() {} |
117 | | |
118 | | NVPTXTargetMachine32::NVPTXTargetMachine32(const Target &T, const Triple &TT, |
119 | | StringRef CPU, StringRef FS, |
120 | | const TargetOptions &Options, |
121 | | Optional<Reloc::Model> RM, |
122 | | Optional<CodeModel::Model> CM, |
123 | | CodeGenOpt::Level OL, bool JIT) |
124 | 207 | : NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {} |
125 | | |
126 | 0 | void NVPTXTargetMachine64::anchor() {} |
127 | | |
128 | | NVPTXTargetMachine64::NVPTXTargetMachine64(const Target &T, const Triple &TT, |
129 | | StringRef CPU, StringRef FS, |
130 | | const TargetOptions &Options, |
131 | | Optional<Reloc::Model> RM, |
132 | | Optional<CodeModel::Model> CM, |
133 | | CodeGenOpt::Level OL, bool JIT) |
134 | 144 | : NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {} |
135 | | |
136 | | namespace { |
137 | | |
138 | | class NVPTXPassConfig : public TargetPassConfig { |
139 | | public: |
140 | | NVPTXPassConfig(NVPTXTargetMachine &TM, PassManagerBase &PM) |
141 | 268 | : TargetPassConfig(TM, PM) {} |
142 | | |
143 | 464 | NVPTXTargetMachine &getNVPTXTargetMachine() const { |
144 | 464 | return getTM<NVPTXTargetMachine>(); |
145 | 464 | } |
146 | | |
147 | | void addIRPasses() override; |
148 | | bool addInstSelector() override; |
149 | | void addPostRegAlloc() override; |
150 | | void addMachineSSAOptimization() override; |
151 | | |
152 | | FunctionPass *createTargetRegisterAllocator(bool) override; |
153 | | void addFastRegAlloc(FunctionPass *RegAllocPass) override; |
154 | | void addOptimizedRegAlloc(FunctionPass *RegAllocPass) override; |
155 | | |
156 | | private: |
157 | | // If the opt level is aggressive, add GVN; otherwise, add EarlyCSE. This |
158 | | // function is only called in opt mode. |
159 | | void addEarlyCSEOrGVNPass(); |
160 | | |
161 | | // Add passes that propagate special memory spaces. |
162 | | void addAddressSpaceInferencePasses(); |
163 | | |
164 | | // Add passes that perform straight-line scalar optimizations. |
165 | | void addStraightLineScalarOptimizationPasses(); |
166 | | }; |
167 | | |
168 | | } // end anonymous namespace |
169 | | |
170 | 268 | TargetPassConfig *NVPTXTargetMachine::createPassConfig(PassManagerBase &PM) { |
171 | 268 | return new NVPTXPassConfig(*this, PM); |
172 | 268 | } |
173 | | |
174 | 77 | void NVPTXTargetMachine::adjustPassManager(PassManagerBuilder &Builder) { |
175 | 77 | Builder.addExtension( |
176 | 77 | PassManagerBuilder::EP_EarlyAsPossible, |
177 | 77 | [&](const PassManagerBuilder &, legacy::PassManagerBase &PM) { |
178 | 77 | PM.add(createNVVMReflectPass()); |
179 | 77 | PM.add(createNVVMIntrRangePass(Subtarget.getSmVersion())); |
180 | 77 | }); |
181 | 77 | } |
182 | | |
183 | 528 | TargetIRAnalysis NVPTXTargetMachine::getTargetIRAnalysis() { |
184 | 15.6k | return TargetIRAnalysis([this](const Function &F) { |
185 | 15.6k | return TargetTransformInfo(NVPTXTTIImpl(this, F)); |
186 | 15.6k | }); |
187 | 528 | } |
188 | | |
189 | 412 | void NVPTXPassConfig::addEarlyCSEOrGVNPass() { |
190 | 412 | if (getOptLevel() == CodeGenOpt::Aggressive) |
191 | 2 | addPass(createGVNPass()); |
192 | 412 | else |
193 | 410 | addPass(createEarlyCSEPass()); |
194 | 412 | } |
195 | | |
196 | 206 | void NVPTXPassConfig::addAddressSpaceInferencePasses() { |
197 | 206 | // NVPTXLowerArgs emits alloca for byval parameters which can often |
198 | 206 | // be eliminated by SROA. |
199 | 206 | addPass(createSROAPass()); |
200 | 206 | addPass(createNVPTXLowerAllocaPass()); |
201 | 206 | addPass(createInferAddressSpacesPass()); |
202 | 206 | } |
203 | | |
204 | 206 | void NVPTXPassConfig::addStraightLineScalarOptimizationPasses() { |
205 | 206 | addPass(createSeparateConstOffsetFromGEPPass()); |
206 | 206 | addPass(createSpeculativeExecutionPass()); |
207 | 206 | // ReassociateGEPs exposes more opportunites for SLSR. See |
208 | 206 | // the example in reassociate-geps-and-slsr.ll. |
209 | 206 | addPass(createStraightLineStrengthReducePass()); |
210 | 206 | // SeparateConstOffsetFromGEP and SLSR creates common expressions which GVN or |
211 | 206 | // EarlyCSE can reuse. GVN generates significantly better code than EarlyCSE |
212 | 206 | // for some of our benchmarks. |
213 | 206 | addEarlyCSEOrGVNPass(); |
214 | 206 | // Run NaryReassociate after EarlyCSE/GVN to be more effective. |
215 | 206 | addPass(createNaryReassociatePass()); |
216 | 206 | // NaryReassociate on GEPs creates redundant common expressions, so run |
217 | 206 | // EarlyCSE after it. |
218 | 206 | addPass(createEarlyCSEPass()); |
219 | 206 | } |
220 | | |
221 | 232 | void NVPTXPassConfig::addIRPasses() { |
222 | 232 | // The following passes are known to not play well with virtual regs hanging |
223 | 232 | // around after register allocation (which in our case, is *all* registers). |
224 | 232 | // We explicitly disable them here. We do, however, need some functionality |
225 | 232 | // of the PrologEpilogCodeInserter pass, so we emulate that behavior in the |
226 | 232 | // NVPTXPrologEpilog pass (see NVPTXPrologEpilogPass.cpp). |
227 | 232 | disablePass(&PrologEpilogCodeInserterID); |
228 | 232 | disablePass(&MachineCopyPropagationID); |
229 | 232 | disablePass(&TailDuplicateID); |
230 | 232 | disablePass(&StackMapLivenessID); |
231 | 232 | disablePass(&LiveDebugValuesID); |
232 | 232 | disablePass(&PostRASchedulerID); |
233 | 232 | disablePass(&FuncletLayoutID); |
234 | 232 | disablePass(&PatchableFunctionID); |
235 | 232 | |
236 | 232 | // NVVMReflectPass is added in addEarlyAsPossiblePasses, so hopefully running |
237 | 232 | // it here does nothing. But since we need it for correctness when lowering |
238 | 232 | // to NVPTX, run it here too, in case whoever built our pass pipeline didn't |
239 | 232 | // call addEarlyAsPossiblePasses. |
240 | 232 | addPass(createNVVMReflectPass()); |
241 | 232 | |
242 | 232 | if (getOptLevel() != CodeGenOpt::None) |
243 | 206 | addPass(createNVPTXImageOptimizerPass()); |
244 | 232 | addPass(createNVPTXAssignValidGlobalNamesPass()); |
245 | 232 | addPass(createGenericToNVVMPass()); |
246 | 232 | |
247 | 232 | // NVPTXLowerArgs is required for correctness and should be run right |
248 | 232 | // before the address space inference passes. |
249 | 232 | addPass(createNVPTXLowerArgsPass(&getNVPTXTargetMachine())); |
250 | 232 | if (getOptLevel() != CodeGenOpt::None232 ) { |
251 | 206 | addAddressSpaceInferencePasses(); |
252 | 206 | if (!DisableLoadStoreVectorizer) |
253 | 205 | addPass(createLoadStoreVectorizerPass()); |
254 | 206 | addStraightLineScalarOptimizationPasses(); |
255 | 206 | } |
256 | 232 | |
257 | 232 | // === LSR and other generic IR passes === |
258 | 232 | TargetPassConfig::addIRPasses(); |
259 | 232 | // EarlyCSE is not always strong enough to clean up what LSR produces. For |
260 | 232 | // example, GVN can combine |
261 | 232 | // |
262 | 232 | // %0 = add %a, %b |
263 | 232 | // %1 = add %b, %a |
264 | 232 | // |
265 | 232 | // and |
266 | 232 | // |
267 | 232 | // %0 = shl nsw %a, 2 |
268 | 232 | // %1 = shl %a, 2 |
269 | 232 | // |
270 | 232 | // but EarlyCSE can do neither of them. |
271 | 232 | if (getOptLevel() != CodeGenOpt::None) |
272 | 206 | addEarlyCSEOrGVNPass(); |
273 | 232 | } |
274 | | |
275 | 232 | bool NVPTXPassConfig::addInstSelector() { |
276 | 232 | const NVPTXSubtarget &ST = *getTM<NVPTXTargetMachine>().getSubtargetImpl(); |
277 | 232 | |
278 | 232 | addPass(createLowerAggrCopies()); |
279 | 232 | addPass(createAllocaHoisting()); |
280 | 232 | addPass(createNVPTXISelDag(getNVPTXTargetMachine(), getOptLevel())); |
281 | 232 | |
282 | 232 | if (!ST.hasImageHandles()) |
283 | 173 | addPass(createNVPTXReplaceImageHandlesPass()); |
284 | 232 | |
285 | 232 | return false; |
286 | 232 | } |
287 | | |
288 | 232 | void NVPTXPassConfig::addPostRegAlloc() { |
289 | 232 | addPass(createNVPTXPrologEpilogPass(), false); |
290 | 232 | if (getOptLevel() != CodeGenOpt::None232 ) { |
291 | 206 | // NVPTXPrologEpilogPass calculates frame object offset and replace frame |
292 | 206 | // index with VRFrame register. NVPTXPeephole need to be run after that and |
293 | 206 | // will replace VRFrame with VRFrameLocal when possible. |
294 | 206 | addPass(createNVPTXPeephole()); |
295 | 206 | } |
296 | 232 | } |
297 | | |
298 | 232 | FunctionPass *NVPTXPassConfig::createTargetRegisterAllocator(bool) { |
299 | 232 | return nullptr; // No reg alloc |
300 | 232 | } |
301 | | |
302 | 26 | void NVPTXPassConfig::addFastRegAlloc(FunctionPass *RegAllocPass) { |
303 | 26 | assert(!RegAllocPass && "NVPTX uses no regalloc!"); |
304 | 26 | addPass(&PHIEliminationID); |
305 | 26 | addPass(&TwoAddressInstructionPassID); |
306 | 26 | } |
307 | | |
308 | 206 | void NVPTXPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) { |
309 | 206 | assert(!RegAllocPass && "NVPTX uses no regalloc!"); |
310 | 206 | |
311 | 206 | addPass(&ProcessImplicitDefsID); |
312 | 206 | addPass(&LiveVariablesID); |
313 | 206 | addPass(&MachineLoopInfoID); |
314 | 206 | addPass(&PHIEliminationID); |
315 | 206 | |
316 | 206 | addPass(&TwoAddressInstructionPassID); |
317 | 206 | addPass(&RegisterCoalescerID); |
318 | 206 | |
319 | 206 | // PreRA instruction scheduling. |
320 | 206 | if (addPass(&MachineSchedulerID)) |
321 | 206 | printAndVerify("After Machine Scheduling"); |
322 | 206 | |
323 | 206 | |
324 | 206 | addPass(&StackSlotColoringID); |
325 | 206 | |
326 | 206 | // FIXME: Needs physical registers |
327 | 206 | //addPass(&PostRAMachineLICMID); |
328 | 206 | |
329 | 206 | printAndVerify("After StackSlotColoring"); |
330 | 206 | } |
331 | | |
332 | 206 | void NVPTXPassConfig::addMachineSSAOptimization() { |
333 | 206 | // Pre-ra tail duplication. |
334 | 206 | if (addPass(&EarlyTailDuplicateID)) |
335 | 206 | printAndVerify("After Pre-RegAlloc TailDuplicate"); |
336 | 206 | |
337 | 206 | // Optimize PHIs before DCE: removing dead PHI cycles may make more |
338 | 206 | // instructions dead. |
339 | 206 | addPass(&OptimizePHIsID); |
340 | 206 | |
341 | 206 | // This pass merges large allocas. StackSlotColoring is a different pass |
342 | 206 | // which merges spill slots. |
343 | 206 | addPass(&StackColoringID); |
344 | 206 | |
345 | 206 | // If the target requests it, assign local variables to stack slots relative |
346 | 206 | // to one another and simplify frame index references where possible. |
347 | 206 | addPass(&LocalStackSlotAllocationID); |
348 | 206 | |
349 | 206 | // With optimization, dead code should already be eliminated. However |
350 | 206 | // there is one known exception: lowered code for arguments that are only |
351 | 206 | // used by tail calls, where the tail calls reuse the incoming stack |
352 | 206 | // arguments directly (see t11 in test/CodeGen/X86/sibcall.ll). |
353 | 206 | addPass(&DeadMachineInstructionElimID); |
354 | 206 | printAndVerify("After codegen DCE pass"); |
355 | 206 | |
356 | 206 | // Allow targets to insert passes that improve instruction level parallelism, |
357 | 206 | // like if-conversion. Such passes will typically need dominator trees and |
358 | 206 | // loop info, just like LICM and CSE below. |
359 | 206 | if (addILPOpts()) |
360 | 0 | printAndVerify("After ILP optimizations"); |
361 | 206 | |
362 | 206 | addPass(&MachineLICMID); |
363 | 206 | addPass(&MachineCSEID); |
364 | 206 | |
365 | 206 | addPass(&MachineSinkingID); |
366 | 206 | printAndVerify("After Machine LICM, CSE and Sinking passes"); |
367 | 206 | |
368 | 206 | addPass(&PeepholeOptimizerID); |
369 | 206 | printAndVerify("After codegen peephole optimization pass"); |
370 | 206 | } |