/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===-- PPCTargetMachine.cpp - Define TargetMachine for PowerPC -----------===// |
2 | | // |
3 | | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | | // See https://llvm.org/LICENSE.txt for license information. |
5 | | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | | // |
7 | | //===----------------------------------------------------------------------===// |
8 | | // |
9 | | // Top-level implementation for the PowerPC target. |
10 | | // |
11 | | //===----------------------------------------------------------------------===// |
12 | | |
13 | | #include "PPCTargetMachine.h" |
14 | | #include "MCTargetDesc/PPCMCTargetDesc.h" |
15 | | #include "PPC.h" |
16 | | #include "PPCMachineScheduler.h" |
17 | | #include "PPCSubtarget.h" |
18 | | #include "PPCTargetObjectFile.h" |
19 | | #include "PPCTargetTransformInfo.h" |
20 | | #include "TargetInfo/PowerPCTargetInfo.h" |
21 | | #include "llvm/ADT/Optional.h" |
22 | | #include "llvm/ADT/STLExtras.h" |
23 | | #include "llvm/ADT/StringRef.h" |
24 | | #include "llvm/ADT/Triple.h" |
25 | | #include "llvm/Analysis/TargetTransformInfo.h" |
26 | | #include "llvm/CodeGen/Passes.h" |
27 | | #include "llvm/CodeGen/TargetPassConfig.h" |
28 | | #include "llvm/CodeGen/MachineScheduler.h" |
29 | | #include "llvm/IR/Attributes.h" |
30 | | #include "llvm/IR/DataLayout.h" |
31 | | #include "llvm/IR/Function.h" |
32 | | #include "llvm/Pass.h" |
33 | | #include "llvm/Support/CodeGen.h" |
34 | | #include "llvm/Support/CommandLine.h" |
35 | | #include "llvm/Support/TargetRegistry.h" |
36 | | #include "llvm/Target/TargetLoweringObjectFile.h" |
37 | | #include "llvm/Target/TargetOptions.h" |
38 | | #include "llvm/Transforms/Scalar.h" |
39 | | #include <cassert> |
40 | | #include <memory> |
41 | | #include <string> |
42 | | |
43 | | using namespace llvm; |
44 | | |
45 | | |
46 | | static cl::opt<bool> |
47 | | EnableBranchCoalescing("enable-ppc-branch-coalesce", cl::Hidden, |
48 | | cl::desc("enable coalescing of duplicate branches for PPC")); |
49 | | static cl:: |
50 | | opt<bool> DisableCTRLoops("disable-ppc-ctrloops", cl::Hidden, |
51 | | cl::desc("Disable CTR loops for PPC")); |
52 | | |
53 | | static cl:: |
54 | | opt<bool> DisablePreIncPrep("disable-ppc-preinc-prep", cl::Hidden, |
55 | | cl::desc("Disable PPC loop preinc prep")); |
56 | | |
57 | | static cl::opt<bool> |
58 | | VSXFMAMutateEarly("schedule-ppc-vsx-fma-mutation-early", |
59 | | cl::Hidden, cl::desc("Schedule VSX FMA instruction mutation early")); |
60 | | |
61 | | static cl:: |
62 | | opt<bool> DisableVSXSwapRemoval("disable-ppc-vsx-swap-removal", cl::Hidden, |
63 | | cl::desc("Disable VSX Swap Removal for PPC")); |
64 | | |
65 | | static cl:: |
66 | | opt<bool> DisableQPXLoadSplat("disable-ppc-qpx-load-splat", cl::Hidden, |
67 | | cl::desc("Disable QPX load splat simplification")); |
68 | | |
69 | | static cl:: |
70 | | opt<bool> DisableMIPeephole("disable-ppc-peephole", cl::Hidden, |
71 | | cl::desc("Disable machine peepholes for PPC")); |
72 | | |
73 | | static cl::opt<bool> |
74 | | EnableGEPOpt("ppc-gep-opt", cl::Hidden, |
75 | | cl::desc("Enable optimizations on complex GEPs"), |
76 | | cl::init(true)); |
77 | | |
78 | | static cl::opt<bool> |
79 | | EnablePrefetch("enable-ppc-prefetching", |
80 | | cl::desc("disable software prefetching on PPC"), |
81 | | cl::init(false), cl::Hidden); |
82 | | |
83 | | static cl::opt<bool> |
84 | | EnableExtraTOCRegDeps("enable-ppc-extra-toc-reg-deps", |
85 | | cl::desc("Add extra TOC register dependencies"), |
86 | | cl::init(true), cl::Hidden); |
87 | | |
88 | | static cl::opt<bool> |
89 | | EnableMachineCombinerPass("ppc-machine-combiner", |
90 | | cl::desc("Enable the machine combiner pass"), |
91 | | cl::init(true), cl::Hidden); |
92 | | |
93 | | static cl::opt<bool> |
94 | | ReduceCRLogical("ppc-reduce-cr-logicals", |
95 | | cl::desc("Expand eligible cr-logical binary ops to branches"), |
96 | | cl::init(false), cl::Hidden); |
97 | 139k | extern "C" void LLVMInitializePowerPCTarget() { |
98 | 139k | // Register the targets |
99 | 139k | RegisterTargetMachine<PPCTargetMachine> A(getThePPC32Target()); |
100 | 139k | RegisterTargetMachine<PPCTargetMachine> B(getThePPC64Target()); |
101 | 139k | RegisterTargetMachine<PPCTargetMachine> C(getThePPC64LETarget()); |
102 | 139k | |
103 | 139k | PassRegistry &PR = *PassRegistry::getPassRegistry(); |
104 | | #ifndef NDEBUG |
105 | | initializePPCCTRLoopsVerifyPass(PR); |
106 | | #endif |
107 | | initializePPCLoopPreIncPrepPass(PR); |
108 | 139k | initializePPCTOCRegDepsPass(PR); |
109 | 139k | initializePPCEarlyReturnPass(PR); |
110 | 139k | initializePPCVSXCopyPass(PR); |
111 | 139k | initializePPCVSXFMAMutatePass(PR); |
112 | 139k | initializePPCVSXSwapRemovalPass(PR); |
113 | 139k | initializePPCReduceCRLogicalsPass(PR); |
114 | 139k | initializePPCBSelPass(PR); |
115 | 139k | initializePPCBranchCoalescingPass(PR); |
116 | 139k | initializePPCQPXLoadSplatPass(PR); |
117 | 139k | initializePPCBoolRetToIntPass(PR); |
118 | 139k | initializePPCExpandISELPass(PR); |
119 | 139k | initializePPCPreEmitPeepholePass(PR); |
120 | 139k | initializePPCTLSDynamicCallPass(PR); |
121 | 139k | initializePPCMIPeepholePass(PR); |
122 | 139k | } |
123 | | |
124 | | /// Return the datalayout string of a subtarget. |
125 | 3.59k | static std::string getDataLayoutString(const Triple &T) { |
126 | 3.59k | bool is64Bit = T.getArch() == Triple::ppc64 || T.getArch() == Triple::ppc64le2.62k ; |
127 | 3.59k | std::string Ret; |
128 | 3.59k | |
129 | 3.59k | // Most PPC* platforms are big endian, PPC64LE is little endian. |
130 | 3.59k | if (T.getArch() == Triple::ppc64le) |
131 | 2.19k | Ret = "e"; |
132 | 1.39k | else |
133 | 1.39k | Ret = "E"; |
134 | 3.59k | |
135 | 3.59k | Ret += DataLayout::getManglingComponent(T); |
136 | 3.59k | |
137 | 3.59k | // PPC32 has 32 bit pointers. The PS3 (OS Lv2) is a PPC64 machine with 32 bit |
138 | 3.59k | // pointers. |
139 | 3.59k | if (!is64Bit || T.getOS() == Triple::Lv23.15k ) |
140 | 433 | Ret += "-p:32:32"; |
141 | 3.59k | |
142 | 3.59k | // Note, the alignment values for f64 and i64 on ppc64 in Darwin |
143 | 3.59k | // documentation are wrong; these are correct (i.e. "what gcc does"). |
144 | 3.59k | if (is64Bit || !T.isOSDarwin()432 ) |
145 | 3.59k | Ret += "-i64:64"; |
146 | 0 | else |
147 | 0 | Ret += "-f64:32:64"; |
148 | 3.59k | |
149 | 3.59k | // PPC64 has 32 and 64 bit registers, PPC32 has only 32 bit ones. |
150 | 3.59k | if (is64Bit) |
151 | 3.15k | Ret += "-n32:64"; |
152 | 432 | else |
153 | 432 | Ret += "-n32"; |
154 | 3.59k | |
155 | 3.59k | return Ret; |
156 | 3.59k | } |
157 | | |
158 | | static std::string computeFSAdditions(StringRef FS, CodeGenOpt::Level OL, |
159 | 5.44k | const Triple &TT) { |
160 | 5.44k | std::string FullFS = FS; |
161 | 5.44k | |
162 | 5.44k | // Make sure 64-bit features are available when CPUname is generic |
163 | 5.44k | if (TT.getArch() == Triple::ppc64 || TT.getArch() == Triple::ppc64le3.61k ) { |
164 | 4.62k | if (!FullFS.empty()) |
165 | 3.29k | FullFS = "+64bit," + FullFS; |
166 | 1.32k | else |
167 | 1.32k | FullFS = "+64bit"; |
168 | 4.62k | } |
169 | 5.44k | |
170 | 5.44k | if (OL >= CodeGenOpt::Default) { |
171 | 3.30k | if (!FullFS.empty()) |
172 | 2.96k | FullFS = "+crbits," + FullFS; |
173 | 337 | else |
174 | 337 | FullFS = "+crbits"; |
175 | 3.30k | } |
176 | 5.44k | |
177 | 5.44k | if (OL != CodeGenOpt::None) { |
178 | 3.35k | if (!FullFS.empty()) |
179 | 3.35k | FullFS = "+invariant-function-descriptors," + FullFS; |
180 | 3 | else |
181 | 3 | FullFS = "+invariant-function-descriptors"; |
182 | 3.35k | } |
183 | 5.44k | |
184 | 5.44k | return FullFS; |
185 | 5.44k | } |
186 | | |
187 | 3.58k | static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) { |
188 | 3.58k | if (TT.isOSDarwin()) |
189 | 0 | return llvm::make_unique<TargetLoweringObjectFileMachO>(); |
190 | 3.58k | |
191 | 3.58k | if (TT.isOSAIX()) |
192 | 9 | return llvm::make_unique<TargetLoweringObjectFileXCOFF>(); |
193 | 3.58k | |
194 | 3.58k | return llvm::make_unique<PPC64LinuxTargetObjectFile>(); |
195 | 3.58k | } |
196 | | |
197 | | static PPCTargetMachine::PPCABI computeTargetABI(const Triple &TT, |
198 | 3.58k | const TargetOptions &Options) { |
199 | 3.58k | if (TT.isOSDarwin()) |
200 | 0 | report_fatal_error("Darwin is no longer supported for PowerPC"); |
201 | 3.58k | |
202 | 3.58k | if (Options.MCOptions.getABIName().startswith("elfv1")) |
203 | 22 | return PPCTargetMachine::PPC_ABI_ELFv1; |
204 | 3.56k | else if (Options.MCOptions.getABIName().startswith("elfv2")) |
205 | 18 | return PPCTargetMachine::PPC_ABI_ELFv2; |
206 | 3.54k | |
207 | 3.54k | assert(Options.MCOptions.getABIName().empty() && |
208 | 3.54k | "Unknown target-abi option!"); |
209 | 3.54k | |
210 | 3.54k | if (TT.isMacOSX()) |
211 | 0 | return PPCTargetMachine::PPC_ABI_UNKNOWN; |
212 | 3.54k | |
213 | 3.54k | switch (TT.getArch()) { |
214 | 3.54k | case Triple::ppc64le: |
215 | 2.17k | return PPCTargetMachine::PPC_ABI_ELFv2; |
216 | 3.54k | case Triple::ppc64: |
217 | 942 | if (TT.getEnvironment() == llvm::Triple::ELFv2) |
218 | 1 | return PPCTargetMachine::PPC_ABI_ELFv2; |
219 | 941 | return PPCTargetMachine::PPC_ABI_ELFv1; |
220 | 941 | default: |
221 | 430 | return PPCTargetMachine::PPC_ABI_UNKNOWN; |
222 | 3.54k | } |
223 | 3.54k | } |
224 | | |
225 | | static Reloc::Model getEffectiveRelocModel(const Triple &TT, |
226 | 3.59k | Optional<Reloc::Model> RM) { |
227 | 3.59k | if (RM.hasValue()) |
228 | 1.83k | return *RM; |
229 | 1.75k | |
230 | 1.75k | // Darwin defaults to dynamic-no-pic. |
231 | 1.75k | if (TT.isOSDarwin()) |
232 | 0 | return Reloc::DynamicNoPIC; |
233 | 1.75k | |
234 | 1.75k | // Big Endian PPC is PIC by default. |
235 | 1.75k | if (TT.getArch() == Triple::ppc64) |
236 | 821 | return Reloc::PIC_; |
237 | 932 | |
238 | 932 | // Rest are static by default. |
239 | 932 | return Reloc::Static; |
240 | 932 | } |
241 | | |
242 | | static CodeModel::Model getEffectivePPCCodeModel(const Triple &TT, |
243 | | Optional<CodeModel::Model> CM, |
244 | 3.59k | bool JIT) { |
245 | 3.59k | if (CM) { |
246 | 55 | if (*CM == CodeModel::Tiny) |
247 | 1 | report_fatal_error("Target does not support the tiny CodeModel", false); |
248 | 54 | if (*CM == CodeModel::Kernel) |
249 | 1 | report_fatal_error("Target does not support the kernel CodeModel", false); |
250 | 53 | return *CM; |
251 | 53 | } |
252 | 3.53k | if (!TT.isOSDarwin() && !JIT && |
253 | 3.53k | (TT.getArch() == Triple::ppc64 || TT.getArch() == Triple::ppc64le2.61k )) |
254 | 3.10k | return CodeModel::Medium; |
255 | 430 | return CodeModel::Small; |
256 | 430 | } |
257 | | |
258 | | |
259 | 10.4k | static ScheduleDAGInstrs *createPPCMachineScheduler(MachineSchedContext *C) { |
260 | 10.4k | const PPCSubtarget &ST = C->MF->getSubtarget<PPCSubtarget>(); |
261 | 10.4k | ScheduleDAGMILive *DAG = |
262 | 10.4k | new ScheduleDAGMILive(C, ST.usePPCPreRASchedStrategy() ? |
263 | 2.46k | llvm::make_unique<PPCPreRASchedStrategy>(C) : |
264 | 10.4k | llvm::make_unique<GenericScheduler>(C)8.00k ); |
265 | 10.4k | // add DAG Mutations here. |
266 | 10.4k | DAG->addMutation(createCopyConstrainDAGMutation(DAG->TII, DAG->TRI)); |
267 | 10.4k | return DAG; |
268 | 10.4k | } |
269 | | |
270 | | static ScheduleDAGInstrs *createPPCPostMachineScheduler( |
271 | 10.4k | MachineSchedContext *C) { |
272 | 10.4k | const PPCSubtarget &ST = C->MF->getSubtarget<PPCSubtarget>(); |
273 | 10.4k | ScheduleDAGMI *DAG = |
274 | 10.4k | new ScheduleDAGMI(C, ST.usePPCPostRASchedStrategy() ? |
275 | 2.46k | llvm::make_unique<PPCPostRASchedStrategy>(C) : |
276 | 10.4k | llvm::make_unique<PostGenericScheduler>(C)7.98k , true); |
277 | 10.4k | // add DAG Mutations here. |
278 | 10.4k | return DAG; |
279 | 10.4k | } |
280 | | |
281 | | // The FeatureString here is a little subtle. We are modifying the feature |
282 | | // string with what are (currently) non-function specific overrides as it goes |
283 | | // into the LLVMTargetMachine constructor and then using the stored value in the |
284 | | // Subtarget constructor below it. |
285 | | PPCTargetMachine::PPCTargetMachine(const Target &T, const Triple &TT, |
286 | | StringRef CPU, StringRef FS, |
287 | | const TargetOptions &Options, |
288 | | Optional<Reloc::Model> RM, |
289 | | Optional<CodeModel::Model> CM, |
290 | | CodeGenOpt::Level OL, bool JIT) |
291 | | : LLVMTargetMachine(T, getDataLayoutString(TT), TT, CPU, |
292 | | computeFSAdditions(FS, OL, TT), Options, |
293 | | getEffectiveRelocModel(TT, RM), |
294 | | getEffectivePPCCodeModel(TT, CM, JIT), OL), |
295 | | TLOF(createTLOF(getTargetTriple())), |
296 | 3.59k | TargetABI(computeTargetABI(TT, Options)) { |
297 | 3.59k | initAsmInfo(); |
298 | 3.59k | } |
299 | | |
300 | 3.55k | PPCTargetMachine::~PPCTargetMachine() = default; |
301 | | |
302 | | const PPCSubtarget * |
303 | 186k | PPCTargetMachine::getSubtargetImpl(const Function &F) const { |
304 | 186k | Attribute CPUAttr = F.getFnAttribute("target-cpu"); |
305 | 186k | Attribute FSAttr = F.getFnAttribute("target-features"); |
306 | 186k | |
307 | 186k | std::string CPU = !CPUAttr.hasAttribute(Attribute::None) |
308 | 186k | ? CPUAttr.getValueAsString().str()133k |
309 | 186k | : TargetCPU53.1k ; |
310 | 186k | std::string FS = !FSAttr.hasAttribute(Attribute::None) |
311 | 186k | ? FSAttr.getValueAsString().str()27.4k |
312 | 186k | : TargetFS158k ; |
313 | 186k | |
314 | 186k | // FIXME: This is related to the code below to reset the target options, |
315 | 186k | // we need to know whether or not the soft float flag is set on the |
316 | 186k | // function before we can generate a subtarget. We also need to use |
317 | 186k | // it as a key for the subtarget since that can be the only difference |
318 | 186k | // between two functions. |
319 | 186k | bool SoftFloat = |
320 | 186k | F.getFnAttribute("use-soft-float").getValueAsString() == "true"; |
321 | 186k | // If the soft float attribute is set on the function turn on the soft float |
322 | 186k | // subtarget feature. |
323 | 186k | if (SoftFloat) |
324 | 301 | FS += FS.empty() ? "-hard-float"119 : ",-hard-float"182 ; |
325 | 186k | |
326 | 186k | auto &I = SubtargetMap[CPU + FS]; |
327 | 186k | if (!I) { |
328 | 1.85k | // This needs to be done before we create a new subtarget since any |
329 | 1.85k | // creation will depend on the TM and the code generation flags on the |
330 | 1.85k | // function that reside in TargetOptions. |
331 | 1.85k | resetTargetOptions(F); |
332 | 1.85k | I = llvm::make_unique<PPCSubtarget>( |
333 | 1.85k | TargetTriple, CPU, |
334 | 1.85k | // FIXME: It would be good to have the subtarget additions here |
335 | 1.85k | // not necessary. Anything that turns them on/off (overrides) ends |
336 | 1.85k | // up being put at the end of the feature string, but the defaults |
337 | 1.85k | // shouldn't require adding them. Fixing this means pulling Feature64Bit |
338 | 1.85k | // out of most of the target cpus in the .td file and making it set only |
339 | 1.85k | // as part of initialization via the TargetTriple. |
340 | 1.85k | computeFSAdditions(FS, getOptLevel(), getTargetTriple()), *this); |
341 | 1.85k | } |
342 | 186k | return I.get(); |
343 | 186k | } |
344 | | |
345 | | //===----------------------------------------------------------------------===// |
346 | | // Pass Pipeline Configuration |
347 | | //===----------------------------------------------------------------------===// |
348 | | |
349 | | namespace { |
350 | | |
351 | | /// PPC Code Generator Pass Configuration Options. |
352 | | class PPCPassConfig : public TargetPassConfig { |
353 | | public: |
354 | | PPCPassConfig(PPCTargetMachine &TM, PassManagerBase &PM) |
355 | 1.92k | : TargetPassConfig(TM, PM) { |
356 | 1.92k | // At any optimization level above -O0 we use the Machine Scheduler and not |
357 | 1.92k | // the default Post RA List Scheduler. |
358 | 1.92k | if (TM.getOptLevel() != CodeGenOpt::None) |
359 | 1.69k | substitutePass(&PostRASchedulerID, &PostMachineSchedulerID); |
360 | 1.92k | } |
361 | | |
362 | 5.27k | PPCTargetMachine &getPPCTargetMachine() const { |
363 | 5.27k | return getTM<PPCTargetMachine>(); |
364 | 5.27k | } |
365 | | |
366 | | void addIRPasses() override; |
367 | | bool addPreISel() override; |
368 | | bool addILPOpts() override; |
369 | | bool addInstSelector() override; |
370 | | void addMachineSSAOptimization() override; |
371 | | void addPreRegAlloc() override; |
372 | | void addPreSched2() override; |
373 | | void addPreEmitPass() override; |
374 | | ScheduleDAGInstrs * |
375 | 10.4k | createMachineScheduler(MachineSchedContext *C) const override { |
376 | 10.4k | return createPPCMachineScheduler(C); |
377 | 10.4k | } |
378 | | ScheduleDAGInstrs * |
379 | 10.4k | createPostMachineScheduler(MachineSchedContext *C) const override { |
380 | 10.4k | return createPPCPostMachineScheduler(C); |
381 | 10.4k | } |
382 | | }; |
383 | | |
384 | | } // end anonymous namespace |
385 | | |
386 | 1.92k | TargetPassConfig *PPCTargetMachine::createPassConfig(PassManagerBase &PM) { |
387 | 1.92k | return new PPCPassConfig(*this, PM); |
388 | 1.92k | } |
389 | | |
390 | 1.80k | void PPCPassConfig::addIRPasses() { |
391 | 1.80k | if (TM->getOptLevel() != CodeGenOpt::None) |
392 | 1.66k | addPass(createPPCBoolRetToIntPass()); |
393 | 1.80k | addPass(createAtomicExpandPass()); |
394 | 1.80k | |
395 | 1.80k | // For the BG/Q (or if explicitly requested), add explicit data prefetch |
396 | 1.80k | // intrinsics. |
397 | 1.80k | bool UsePrefetching = TM->getTargetTriple().getVendor() == Triple::BGQ && |
398 | 1.80k | getOptLevel() != CodeGenOpt::None35 ; |
399 | 1.80k | if (EnablePrefetch.getNumOccurrences() > 0) |
400 | 7 | UsePrefetching = EnablePrefetch; |
401 | 1.80k | if (UsePrefetching) |
402 | 40 | addPass(createLoopDataPrefetchPass()); |
403 | 1.80k | |
404 | 1.80k | if (TM->getOptLevel() >= CodeGenOpt::Default && EnableGEPOpt1.63k ) { |
405 | 1.63k | // Call SeparateConstOffsetFromGEP pass to extract constants within indices |
406 | 1.63k | // and lower a GEP with multiple indices to either arithmetic operations or |
407 | 1.63k | // multiple GEPs with single index. |
408 | 1.63k | addPass(createSeparateConstOffsetFromGEPPass(true)); |
409 | 1.63k | // Call EarlyCSE pass to find and remove subexpressions in the lowered |
410 | 1.63k | // result. |
411 | 1.63k | addPass(createEarlyCSEPass()); |
412 | 1.63k | // Do loop invariant code motion in case part of the lowered result is |
413 | 1.63k | // invariant. |
414 | 1.63k | addPass(createLICMPass()); |
415 | 1.63k | } |
416 | 1.80k | |
417 | 1.80k | TargetPassConfig::addIRPasses(); |
418 | 1.80k | } |
419 | | |
420 | 1.80k | bool PPCPassConfig::addPreISel() { |
421 | 1.80k | if (!DisablePreIncPrep && getOptLevel() != CodeGenOpt::None1.80k ) |
422 | 1.66k | addPass(createPPCLoopPreIncPrepPass(getPPCTargetMachine())); |
423 | 1.80k | |
424 | 1.80k | if (!DisableCTRLoops && getOptLevel() != CodeGenOpt::None1.80k ) |
425 | 1.66k | addPass(createHardwareLoopsPass()); |
426 | 1.80k | |
427 | 1.80k | return false; |
428 | 1.80k | } |
429 | | |
430 | 1.66k | bool PPCPassConfig::addILPOpts() { |
431 | 1.66k | addPass(&EarlyIfConverterID); |
432 | 1.66k | |
433 | 1.66k | if (EnableMachineCombinerPass) |
434 | 1.66k | addPass(&MachineCombinerID); |
435 | 1.66k | |
436 | 1.66k | return true; |
437 | 1.66k | } |
438 | | |
439 | 1.80k | bool PPCPassConfig::addInstSelector() { |
440 | 1.80k | // Install an instruction selector. |
441 | 1.80k | addPass(createPPCISelDag(getPPCTargetMachine(), getOptLevel())); |
442 | 1.80k | |
443 | | #ifndef NDEBUG |
444 | | if (!DisableCTRLoops && getOptLevel() != CodeGenOpt::None) |
445 | | addPass(createPPCCTRLoopsVerify()); |
446 | | #endif |
447 | | |
448 | 1.80k | addPass(createPPCVSXCopyPass()); |
449 | 1.80k | return false; |
450 | 1.80k | } |
451 | | |
452 | 1.66k | void PPCPassConfig::addMachineSSAOptimization() { |
453 | 1.66k | // PPCBranchCoalescingPass need to be done before machine sinking |
454 | 1.66k | // since it merges empty blocks. |
455 | 1.66k | if (EnableBranchCoalescing && getOptLevel() != CodeGenOpt::None4 ) |
456 | 4 | addPass(createPPCBranchCoalescingPass()); |
457 | 1.66k | TargetPassConfig::addMachineSSAOptimization(); |
458 | 1.66k | // For little endian, remove where possible the vector swap instructions |
459 | 1.66k | // introduced at code generation to normalize vector element order. |
460 | 1.66k | if (TM->getTargetTriple().getArch() == Triple::ppc64le && |
461 | 1.66k | !DisableVSXSwapRemoval560 ) |
462 | 558 | addPass(createPPCVSXSwapRemovalPass()); |
463 | 1.66k | // Reduce the number of cr-logical ops. |
464 | 1.66k | if (ReduceCRLogical && getOptLevel() != CodeGenOpt::None8 ) |
465 | 8 | addPass(createPPCReduceCRLogicalsPass()); |
466 | 1.66k | // Target-specific peephole cleanups performed after instruction |
467 | 1.66k | // selection. |
468 | 1.66k | if (!DisableMIPeephole) { |
469 | 1.66k | addPass(createPPCMIPeepholePass()); |
470 | 1.66k | addPass(&DeadMachineInstructionElimID); |
471 | 1.66k | } |
472 | 1.66k | } |
473 | | |
474 | 1.80k | void PPCPassConfig::addPreRegAlloc() { |
475 | 1.80k | if (getOptLevel() != CodeGenOpt::None) { |
476 | 1.66k | initializePPCVSXFMAMutatePass(*PassRegistry::getPassRegistry()); |
477 | 1.66k | insertPass(VSXFMAMutateEarly ? &RegisterCoalescerID0 : &MachineSchedulerID, |
478 | 1.66k | &PPCVSXFMAMutateID); |
479 | 1.66k | } |
480 | 1.80k | |
481 | 1.80k | // FIXME: We probably don't need to run these for -fPIE. |
482 | 1.80k | if (getPPCTargetMachine().isPositionIndependent()) { |
483 | 874 | // FIXME: LiveVariables should not be necessary here! |
484 | 874 | // PPCTLSDynamicCallPass uses LiveIntervals which previously dependent on |
485 | 874 | // LiveVariables. This (unnecessary) dependency has been removed now, |
486 | 874 | // however a stage-2 clang build fails without LiveVariables computed here. |
487 | 874 | addPass(&LiveVariablesID, false); |
488 | 874 | addPass(createPPCTLSDynamicCallPass()); |
489 | 874 | } |
490 | 1.80k | if (EnableExtraTOCRegDeps) |
491 | 1.80k | addPass(createPPCTOCRegDepsPass()); |
492 | 1.80k | |
493 | 1.80k | if (getOptLevel() != CodeGenOpt::None) |
494 | 1.66k | addPass(&MachinePipelinerID); |
495 | 1.80k | } |
496 | | |
497 | 1.80k | void PPCPassConfig::addPreSched2() { |
498 | 1.80k | if (getOptLevel() != CodeGenOpt::None) { |
499 | 1.66k | addPass(&IfConverterID); |
500 | 1.66k | |
501 | 1.66k | // This optimization must happen after anything that might do store-to-load |
502 | 1.66k | // forwarding. Here we're after RA (and, thus, when spills are inserted) |
503 | 1.66k | // but before post-RA scheduling. |
504 | 1.66k | if (!DisableQPXLoadSplat) |
505 | 1.66k | addPass(createPPCQPXLoadSplatPass()); |
506 | 1.66k | } |
507 | 1.80k | } |
508 | | |
509 | 1.80k | void PPCPassConfig::addPreEmitPass() { |
510 | 1.80k | addPass(createPPCPreEmitPeepholePass()); |
511 | 1.80k | addPass(createPPCExpandISELPass()); |
512 | 1.80k | |
513 | 1.80k | if (getOptLevel() != CodeGenOpt::None) |
514 | 1.66k | addPass(createPPCEarlyReturnPass(), false); |
515 | 1.80k | // Must run branch selection immediately preceding the asm printer. |
516 | 1.80k | addPass(createPPCBranchSelectionPass(), false); |
517 | 1.80k | } |
518 | | |
519 | | TargetTransformInfo |
520 | 97.9k | PPCTargetMachine::getTargetTransformInfo(const Function &F) { |
521 | 97.9k | return TargetTransformInfo(PPCTTIImpl(this, F)); |
522 | 97.9k | } |
523 | | |
524 | | static MachineSchedRegistry |
525 | | PPCPreRASchedRegistry("ppc-prera", |
526 | | "Run PowerPC PreRA specific scheduler", |
527 | | createPPCMachineScheduler); |
528 | | |
529 | | static MachineSchedRegistry |
530 | | PPCPostRASchedRegistry("ppc-postra", |
531 | | "Run PowerPC PostRA specific scheduler", |
532 | | createPPCPostMachineScheduler); |