/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===-- AArch64TargetMachine.cpp - Define TargetMachine for AArch64 -------===// |
2 | | // |
3 | | // The LLVM Compiler Infrastructure |
4 | | // |
5 | | // This file is distributed under the University of Illinois Open Source |
6 | | // License. See LICENSE.TXT for details. |
7 | | // |
8 | | //===----------------------------------------------------------------------===// |
9 | | // |
10 | | // |
11 | | //===----------------------------------------------------------------------===// |
12 | | |
13 | | #include "AArch64TargetMachine.h" |
14 | | #include "AArch64.h" |
15 | | #include "AArch64MacroFusion.h" |
16 | | #include "AArch64Subtarget.h" |
17 | | #include "AArch64TargetObjectFile.h" |
18 | | #include "AArch64TargetTransformInfo.h" |
19 | | #include "MCTargetDesc/AArch64MCTargetDesc.h" |
20 | | #include "llvm/ADT/STLExtras.h" |
21 | | #include "llvm/ADT/Triple.h" |
22 | | #include "llvm/Analysis/TargetTransformInfo.h" |
23 | | #include "llvm/CodeGen/GlobalISel/IRTranslator.h" |
24 | | #include "llvm/CodeGen/GlobalISel/InstructionSelect.h" |
25 | | #include "llvm/CodeGen/GlobalISel/Legalizer.h" |
26 | | #include "llvm/CodeGen/GlobalISel/Localizer.h" |
27 | | #include "llvm/CodeGen/GlobalISel/RegBankSelect.h" |
28 | | #include "llvm/CodeGen/MachineScheduler.h" |
29 | | #include "llvm/CodeGen/Passes.h" |
30 | | #include "llvm/CodeGen/TargetPassConfig.h" |
31 | | #include "llvm/IR/Attributes.h" |
32 | | #include "llvm/IR/Function.h" |
33 | | #include "llvm/MC/MCTargetOptions.h" |
34 | | #include "llvm/Pass.h" |
35 | | #include "llvm/Support/CodeGen.h" |
36 | | #include "llvm/Support/CommandLine.h" |
37 | | #include "llvm/Support/TargetRegistry.h" |
38 | | #include "llvm/Target/TargetLoweringObjectFile.h" |
39 | | #include "llvm/Target/TargetOptions.h" |
40 | | #include "llvm/Transforms/Scalar.h" |
41 | | #include <memory> |
42 | | #include <string> |
43 | | |
44 | | using namespace llvm; |
45 | | |
46 | | static cl::opt<bool> EnableCCMP("aarch64-enable-ccmp", |
47 | | cl::desc("Enable the CCMP formation pass"), |
48 | | cl::init(true), cl::Hidden); |
49 | | |
50 | | static cl::opt<bool> |
51 | | EnableCondBrTuning("aarch64-enable-cond-br-tune", |
52 | | cl::desc("Enable the conditional branch tuning pass"), |
53 | | cl::init(true), cl::Hidden); |
54 | | |
55 | | static cl::opt<bool> EnableMCR("aarch64-enable-mcr", |
56 | | cl::desc("Enable the machine combiner pass"), |
57 | | cl::init(true), cl::Hidden); |
58 | | |
59 | | static cl::opt<bool> EnableStPairSuppress("aarch64-enable-stp-suppress", |
60 | | cl::desc("Suppress STP for AArch64"), |
61 | | cl::init(true), cl::Hidden); |
62 | | |
63 | | static cl::opt<bool> EnableAdvSIMDScalar( |
64 | | "aarch64-enable-simd-scalar", |
65 | | cl::desc("Enable use of AdvSIMD scalar integer instructions"), |
66 | | cl::init(false), cl::Hidden); |
67 | | |
68 | | static cl::opt<bool> |
69 | | EnablePromoteConstant("aarch64-enable-promote-const", |
70 | | cl::desc("Enable the promote constant pass"), |
71 | | cl::init(true), cl::Hidden); |
72 | | |
73 | | static cl::opt<bool> EnableCollectLOH( |
74 | | "aarch64-enable-collect-loh", |
75 | | cl::desc("Enable the pass that emits the linker optimization hints (LOH)"), |
76 | | cl::init(true), cl::Hidden); |
77 | | |
78 | | static cl::opt<bool> |
79 | | EnableDeadRegisterElimination("aarch64-enable-dead-defs", cl::Hidden, |
80 | | cl::desc("Enable the pass that removes dead" |
81 | | " definitons and replaces stores to" |
82 | | " them with stores to the zero" |
83 | | " register"), |
84 | | cl::init(true)); |
85 | | |
86 | | static cl::opt<bool> EnableRedundantCopyElimination( |
87 | | "aarch64-enable-copyelim", |
88 | | cl::desc("Enable the redundant copy elimination pass"), cl::init(true), |
89 | | cl::Hidden); |
90 | | |
91 | | static cl::opt<bool> EnableLoadStoreOpt("aarch64-enable-ldst-opt", |
92 | | cl::desc("Enable the load/store pair" |
93 | | " optimization pass"), |
94 | | cl::init(true), cl::Hidden); |
95 | | |
96 | | static cl::opt<bool> EnableAtomicTidy( |
97 | | "aarch64-enable-atomic-cfg-tidy", cl::Hidden, |
98 | | cl::desc("Run SimplifyCFG after expanding atomic operations" |
99 | | " to make use of cmpxchg flow-based information"), |
100 | | cl::init(true)); |
101 | | |
102 | | static cl::opt<bool> |
103 | | EnableEarlyIfConversion("aarch64-enable-early-ifcvt", cl::Hidden, |
104 | | cl::desc("Run early if-conversion"), |
105 | | cl::init(true)); |
106 | | |
107 | | static cl::opt<bool> |
108 | | EnableCondOpt("aarch64-enable-condopt", |
109 | | cl::desc("Enable the condition optimizer pass"), |
110 | | cl::init(true), cl::Hidden); |
111 | | |
112 | | static cl::opt<bool> |
113 | | EnableA53Fix835769("aarch64-fix-cortex-a53-835769", cl::Hidden, |
114 | | cl::desc("Work around Cortex-A53 erratum 835769"), |
115 | | cl::init(false)); |
116 | | |
117 | | static cl::opt<bool> |
118 | | EnableGEPOpt("aarch64-enable-gep-opt", cl::Hidden, |
119 | | cl::desc("Enable optimizations on complex GEPs"), |
120 | | cl::init(false)); |
121 | | |
122 | | static cl::opt<bool> |
123 | | BranchRelaxation("aarch64-enable-branch-relax", cl::Hidden, cl::init(true), |
124 | | cl::desc("Relax out of range conditional branches")); |
125 | | |
126 | | // FIXME: Unify control over GlobalMerge. |
127 | | static cl::opt<cl::boolOrDefault> |
128 | | EnableGlobalMerge("aarch64-enable-global-merge", cl::Hidden, |
129 | | cl::desc("Enable the global merge pass")); |
130 | | |
131 | | static cl::opt<bool> |
132 | | EnableLoopDataPrefetch("aarch64-enable-loop-data-prefetch", cl::Hidden, |
133 | | cl::desc("Enable the loop data prefetch pass"), |
134 | | cl::init(true)); |
135 | | |
136 | | static cl::opt<int> EnableGlobalISelAtO( |
137 | | "aarch64-enable-global-isel-at-O", cl::Hidden, |
138 | | cl::desc("Enable GlobalISel at or below an opt level (-1 to disable)"), |
139 | | cl::init(-1)); |
140 | | |
141 | | static cl::opt<bool> EnableFalkorHWPFFix("aarch64-enable-falkor-hwpf-fix", |
142 | | cl::init(true), cl::Hidden); |
143 | | |
144 | 123k | extern "C" void LLVMInitializeAArch64Target() { |
145 | 123k | // Register the target. |
146 | 123k | RegisterTargetMachine<AArch64leTargetMachine> X(getTheAArch64leTarget()); |
147 | 123k | RegisterTargetMachine<AArch64beTargetMachine> Y(getTheAArch64beTarget()); |
148 | 123k | RegisterTargetMachine<AArch64leTargetMachine> Z(getTheARM64Target()); |
149 | 123k | auto PR = PassRegistry::getPassRegistry(); |
150 | 123k | initializeGlobalISel(*PR); |
151 | 123k | initializeAArch64A53Fix835769Pass(*PR); |
152 | 123k | initializeAArch64A57FPLoadBalancingPass(*PR); |
153 | 123k | initializeAArch64AdvSIMDScalarPass(*PR); |
154 | 123k | initializeAArch64CollectLOHPass(*PR); |
155 | 123k | initializeAArch64ConditionalComparesPass(*PR); |
156 | 123k | initializeAArch64ConditionOptimizerPass(*PR); |
157 | 123k | initializeAArch64DeadRegisterDefinitionsPass(*PR); |
158 | 123k | initializeAArch64ExpandPseudoPass(*PR); |
159 | 123k | initializeAArch64LoadStoreOptPass(*PR); |
160 | 123k | initializeAArch64VectorByElementOptPass(*PR); |
161 | 123k | initializeAArch64PromoteConstantPass(*PR); |
162 | 123k | initializeAArch64RedundantCopyEliminationPass(*PR); |
163 | 123k | initializeAArch64StorePairSuppressPass(*PR); |
164 | 123k | initializeFalkorHWPFFixPass(*PR); |
165 | 123k | initializeFalkorMarkStridedAccessesLegacyPass(*PR); |
166 | 123k | initializeLDTLSCleanupPass(*PR); |
167 | 123k | } |
168 | | |
169 | | //===----------------------------------------------------------------------===// |
170 | | // AArch64 Lowering public interface. |
171 | | //===----------------------------------------------------------------------===// |
172 | 14.3k | static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) { |
173 | 14.3k | if (TT.isOSBinFormatMachO()) |
174 | 13.3k | return llvm::make_unique<AArch64_MachoTargetObjectFile>(); |
175 | 1.02k | if (1.02k TT.isOSBinFormatCOFF()1.02k ) |
176 | 11 | return llvm::make_unique<AArch64_COFFTargetObjectFile>(); |
177 | 1.01k | |
178 | 1.01k | return llvm::make_unique<AArch64_ELFTargetObjectFile>(); |
179 | 1.01k | } |
180 | | |
181 | | // Helper function to build a DataLayout string |
182 | | static std::string computeDataLayout(const Triple &TT, |
183 | | const MCTargetOptions &Options, |
184 | 14.3k | bool LittleEndian) { |
185 | 14.3k | if (Options.getABIName() == "ilp32") |
186 | 0 | return "e-m:e-p:32:32-i8:8-i16:16-i64:64-S128"; |
187 | 14.3k | if (14.3k TT.isOSBinFormatMachO()14.3k ) |
188 | 13.3k | return "e-m:o-i64:64-i128:128-n32:64-S128"; |
189 | 1.02k | if (1.02k TT.isOSBinFormatCOFF()1.02k ) |
190 | 11 | return "e-m:w-p:64:64-i32:32-i64:64-i128:128-n32:64-S128"; |
191 | 1.01k | if (1.01k LittleEndian1.01k ) |
192 | 983 | return "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"; |
193 | 29 | return "E-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"; |
194 | 29 | } |
195 | | |
196 | | static Reloc::Model getEffectiveRelocModel(const Triple &TT, |
197 | 14.3k | Optional<Reloc::Model> RM) { |
198 | 14.3k | // AArch64 Darwin is always PIC. |
199 | 14.3k | if (TT.isOSDarwin()) |
200 | 13.3k | return Reloc::PIC_; |
201 | 1.02k | // On ELF platforms the default static relocation model has a smart enough |
202 | 1.02k | // linker to cope with referencing external symbols defined in a shared |
203 | 1.02k | // library. Hence DynamicNoPIC doesn't need to be promoted to PIC. |
204 | 1.02k | if (1.02k !RM.hasValue() || 1.02k *RM == Reloc::DynamicNoPIC127 ) |
205 | 898 | return Reloc::Static; |
206 | 127 | return *RM; |
207 | 127 | } |
208 | | |
209 | | static CodeModel::Model getEffectiveCodeModel(const Triple &TT, |
210 | | Optional<CodeModel::Model> CM, |
211 | 14.3k | bool JIT) { |
212 | 14.3k | if (CM14.3k ) { |
213 | 26 | if (*CM != CodeModel::Small && 26 *CM != CodeModel::Large22 ) { |
214 | 6 | if (!TT.isOSFuchsia()) |
215 | 0 | report_fatal_error( |
216 | 0 | "Only small and large code models are allowed on AArch64"); |
217 | 6 | else if (6 CM != CodeModel::Kernel6 ) |
218 | 0 | report_fatal_error( |
219 | 0 | "Only small, kernel, and large code models are allowed on AArch64"); |
220 | 26 | } |
221 | 26 | return *CM; |
222 | 26 | } |
223 | 14.3k | // The default MCJIT memory managers make no guarantees about where they can |
224 | 14.3k | // find an executable page; JITed code needs to be able to refer to globals |
225 | 14.3k | // no matter how far away they are. |
226 | 14.3k | if (14.3k JIT14.3k ) |
227 | 0 | return CodeModel::Large; |
228 | 14.3k | return CodeModel::Small; |
229 | 14.3k | } |
230 | | |
231 | | /// Create an AArch64 architecture model. |
232 | | /// |
233 | | AArch64TargetMachine::AArch64TargetMachine(const Target &T, const Triple &TT, |
234 | | StringRef CPU, StringRef FS, |
235 | | const TargetOptions &Options, |
236 | | Optional<Reloc::Model> RM, |
237 | | Optional<CodeModel::Model> CM, |
238 | | CodeGenOpt::Level OL, bool JIT, |
239 | | bool LittleEndian) |
240 | | : LLVMTargetMachine(T, |
241 | | computeDataLayout(TT, Options.MCOptions, LittleEndian), |
242 | | TT, CPU, FS, Options, getEffectiveRelocModel(TT, RM), |
243 | | getEffectiveCodeModel(TT, CM, JIT), OL), |
244 | 14.3k | TLOF(createTLOF(getTargetTriple())), isLittle(LittleEndian) { |
245 | 14.3k | initAsmInfo(); |
246 | 14.3k | } |
247 | | |
248 | 1.37k | AArch64TargetMachine::~AArch64TargetMachine() = default; |
249 | | |
250 | | const AArch64Subtarget * |
251 | 23.4M | AArch64TargetMachine::getSubtargetImpl(const Function &F) const { |
252 | 23.4M | Attribute CPUAttr = F.getFnAttribute("target-cpu"); |
253 | 23.4M | Attribute FSAttr = F.getFnAttribute("target-features"); |
254 | 23.4M | |
255 | 23.4M | std::string CPU = !CPUAttr.hasAttribute(Attribute::None) |
256 | 4.19M | ? CPUAttr.getValueAsString().str() |
257 | 19.2M | : TargetCPU; |
258 | 23.4M | std::string FS = !FSAttr.hasAttribute(Attribute::None) |
259 | 4.23M | ? FSAttr.getValueAsString().str() |
260 | 19.1M | : TargetFS; |
261 | 23.4M | |
262 | 23.4M | auto &I = SubtargetMap[CPU + FS]; |
263 | 23.4M | if (!I23.4M ) { |
264 | 13.9k | // This needs to be done before we create a new subtarget since any |
265 | 13.9k | // creation will depend on the TM and the code generation flags on the |
266 | 13.9k | // function that reside in TargetOptions. |
267 | 13.9k | resetTargetOptions(F); |
268 | 13.9k | I = llvm::make_unique<AArch64Subtarget>(TargetTriple, CPU, FS, *this, |
269 | 13.9k | isLittle); |
270 | 13.9k | } |
271 | 23.4M | return I.get(); |
272 | 23.4M | } |
273 | | |
274 | 0 | void AArch64leTargetMachine::anchor() { } |
275 | | |
276 | | AArch64leTargetMachine::AArch64leTargetMachine( |
277 | | const Target &T, const Triple &TT, StringRef CPU, StringRef FS, |
278 | | const TargetOptions &Options, Optional<Reloc::Model> RM, |
279 | | Optional<CodeModel::Model> CM, CodeGenOpt::Level OL, bool JIT) |
280 | 14.3k | : AArch64TargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, JIT, true) {} |
281 | | |
282 | 0 | void AArch64beTargetMachine::anchor() { } |
283 | | |
284 | | AArch64beTargetMachine::AArch64beTargetMachine( |
285 | | const Target &T, const Triple &TT, StringRef CPU, StringRef FS, |
286 | | const TargetOptions &Options, Optional<Reloc::Model> RM, |
287 | | Optional<CodeModel::Model> CM, CodeGenOpt::Level OL, bool JIT) |
288 | 29 | : AArch64TargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, JIT, false) {} |
289 | | |
290 | | namespace { |
291 | | |
292 | | /// AArch64 Code Generator Pass Configuration Options. |
293 | | class AArch64PassConfig : public TargetPassConfig { |
294 | | public: |
295 | | AArch64PassConfig(AArch64TargetMachine &TM, PassManagerBase &PM) |
296 | 14.1k | : TargetPassConfig(TM, PM) { |
297 | 14.1k | if (TM.getOptLevel() != CodeGenOpt::None) |
298 | 13.8k | substitutePass(&PostRASchedulerID, &PostMachineSchedulerID); |
299 | 14.1k | } |
300 | | |
301 | 13.9k | AArch64TargetMachine &getAArch64TargetMachine() const { |
302 | 13.9k | return getTM<AArch64TargetMachine>(); |
303 | 13.9k | } |
304 | | |
305 | | ScheduleDAGInstrs * |
306 | 455k | createMachineScheduler(MachineSchedContext *C) const override { |
307 | 455k | const AArch64Subtarget &ST = C->MF->getSubtarget<AArch64Subtarget>(); |
308 | 455k | ScheduleDAGMILive *DAG = createGenericSchedLive(C); |
309 | 455k | DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI)); |
310 | 455k | DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI)); |
311 | 455k | if (ST.hasFusion()) |
312 | 455k | DAG->addMutation(createAArch64MacroFusionDAGMutation()); |
313 | 455k | return DAG; |
314 | 455k | } |
315 | | |
316 | | ScheduleDAGInstrs * |
317 | 8.59k | createPostMachineScheduler(MachineSchedContext *C) const override { |
318 | 8.59k | const AArch64Subtarget &ST = C->MF->getSubtarget<AArch64Subtarget>(); |
319 | 8.59k | if (ST.hasFusion()8.59k ) { |
320 | 8.49k | // Run the Macro Fusion after RA again since literals are expanded from |
321 | 8.49k | // pseudos then (v. addPreSched2()). |
322 | 8.49k | ScheduleDAGMI *DAG = createGenericSchedPostRA(C); |
323 | 8.49k | DAG->addMutation(createAArch64MacroFusionDAGMutation()); |
324 | 8.49k | return DAG; |
325 | 8.49k | } |
326 | 101 | |
327 | 101 | return nullptr; |
328 | 101 | } |
329 | | |
330 | | void addIRPasses() override; |
331 | | bool addPreISel() override; |
332 | | bool addInstSelector() override; |
333 | | bool addIRTranslator() override; |
334 | | bool addLegalizeMachineIR() override; |
335 | | bool addRegBankSelect() override; |
336 | | void addPreGlobalInstructionSelect() override; |
337 | | bool addGlobalInstructionSelect() override; |
338 | | bool addILPOpts() override; |
339 | | void addPreRegAlloc() override; |
340 | | void addPostRegAlloc() override; |
341 | | void addPreSched2() override; |
342 | | void addPreEmitPass() override; |
343 | | |
344 | | bool isGlobalISelEnabled() const override; |
345 | | }; |
346 | | |
347 | | } // end anonymous namespace |
348 | | |
349 | 53.3k | TargetIRAnalysis AArch64TargetMachine::getTargetIRAnalysis() { |
350 | 15.5M | return TargetIRAnalysis([this](const Function &F) { |
351 | 15.5M | return TargetTransformInfo(AArch64TTIImpl(this, F)); |
352 | 15.5M | }); |
353 | 53.3k | } |
354 | | |
355 | 14.1k | TargetPassConfig *AArch64TargetMachine::createPassConfig(PassManagerBase &PM) { |
356 | 14.1k | return new AArch64PassConfig(*this, PM); |
357 | 14.1k | } |
358 | | |
359 | 13.9k | void AArch64PassConfig::addIRPasses() { |
360 | 13.9k | // Always expand atomic operations, we don't deal with atomicrmw or cmpxchg |
361 | 13.9k | // ourselves. |
362 | 13.9k | addPass(createAtomicExpandPass()); |
363 | 13.9k | |
364 | 13.9k | // Cmpxchg instructions are often used with a subsequent comparison to |
365 | 13.9k | // determine whether it succeeded. We can exploit existing control-flow in |
366 | 13.9k | // ldrex/strex loops to simplify this, but it needs tidying up. |
367 | 13.9k | if (TM->getOptLevel() != CodeGenOpt::None && 13.9k EnableAtomicTidy13.8k ) |
368 | 13.7k | addPass(createCFGSimplificationPass()); |
369 | 13.9k | |
370 | 13.9k | // Run LoopDataPrefetch |
371 | 13.9k | // |
372 | 13.9k | // Run this before LSR to remove the multiplies involved in computing the |
373 | 13.9k | // pointer values N iterations ahead. |
374 | 13.9k | if (TM->getOptLevel() != CodeGenOpt::None13.9k ) { |
375 | 13.8k | if (EnableLoopDataPrefetch) |
376 | 13.8k | addPass(createLoopDataPrefetchPass()); |
377 | 13.8k | if (EnableFalkorHWPFFix) |
378 | 13.8k | addPass(createFalkorMarkStridedAccessesPass()); |
379 | 13.8k | } |
380 | 13.9k | |
381 | 13.9k | TargetPassConfig::addIRPasses(); |
382 | 13.9k | |
383 | 13.9k | // Match interleaved memory accesses to ldN/stN intrinsics. |
384 | 13.9k | if (TM->getOptLevel() != CodeGenOpt::None) |
385 | 13.8k | addPass(createInterleavedAccessPass()); |
386 | 13.9k | |
387 | 13.9k | if (TM->getOptLevel() == CodeGenOpt::Aggressive && 13.9k EnableGEPOpt12.9k ) { |
388 | 6 | // Call SeparateConstOffsetFromGEP pass to extract constants within indices |
389 | 6 | // and lower a GEP with multiple indices to either arithmetic operations or |
390 | 6 | // multiple GEPs with single index. |
391 | 6 | addPass(createSeparateConstOffsetFromGEPPass(TM, true)); |
392 | 6 | // Call EarlyCSE pass to find and remove subexpressions in the lowered |
393 | 6 | // result. |
394 | 6 | addPass(createEarlyCSEPass()); |
395 | 6 | // Do loop invariant code motion in case part of the lowered result is |
396 | 6 | // invariant. |
397 | 6 | addPass(createLICMPass()); |
398 | 6 | } |
399 | 13.9k | } |
400 | | |
401 | | // Pass Pipeline Configuration |
402 | 13.9k | bool AArch64PassConfig::addPreISel() { |
403 | 13.9k | // Run promote constant before global merge, so that the promoted constants |
404 | 13.9k | // get a chance to be merged |
405 | 13.9k | if (TM->getOptLevel() != CodeGenOpt::None && 13.9k EnablePromoteConstant13.8k ) |
406 | 13.8k | addPass(createAArch64PromoteConstantPass()); |
407 | 13.9k | // FIXME: On AArch64, this depends on the type. |
408 | 13.9k | // Basically, the addressable offsets are up to 4095 * Ty.getSizeInBytes(). |
409 | 13.9k | // and the offset has to be a multiple of the related size in bytes. |
410 | 13.9k | if ((TM->getOptLevel() != CodeGenOpt::None && |
411 | 13.8k | EnableGlobalMerge == cl::BOU_UNSET) || |
412 | 13.9k | EnableGlobalMerge == cl::BOU_TRUE146 ) { |
413 | 13.8k | bool OnlyOptimizeForSize = (TM->getOptLevel() < CodeGenOpt::Aggressive) && |
414 | 880 | (EnableGlobalMerge == cl::BOU_UNSET); |
415 | 13.8k | addPass(createGlobalMergePass(TM, 4095, OnlyOptimizeForSize)); |
416 | 13.8k | } |
417 | 13.9k | |
418 | 13.9k | return false; |
419 | 13.9k | } |
420 | | |
421 | 13.9k | bool AArch64PassConfig::addInstSelector() { |
422 | 13.9k | addPass(createAArch64ISelDag(getAArch64TargetMachine(), getOptLevel())); |
423 | 13.9k | |
424 | 13.9k | // For ELF, cleanup any local-dynamic TLS accesses (i.e. combine as many |
425 | 13.9k | // references to _TLS_MODULE_BASE_ as possible. |
426 | 13.9k | if (TM->getTargetTriple().isOSBinFormatELF() && |
427 | 691 | getOptLevel() != CodeGenOpt::None) |
428 | 656 | addPass(createAArch64CleanupLocalDynamicTLSPass()); |
429 | 13.9k | |
430 | 13.9k | return false; |
431 | 13.9k | } |
432 | | |
433 | 6.27k | bool AArch64PassConfig::addIRTranslator() { |
434 | 6.27k | addPass(new IRTranslator()); |
435 | 6.27k | return false; |
436 | 6.27k | } |
437 | | |
438 | 6.27k | bool AArch64PassConfig::addLegalizeMachineIR() { |
439 | 6.27k | addPass(new Legalizer()); |
440 | 6.27k | return false; |
441 | 6.27k | } |
442 | | |
443 | 6.27k | bool AArch64PassConfig::addRegBankSelect() { |
444 | 6.27k | addPass(new RegBankSelect()); |
445 | 6.27k | return false; |
446 | 6.27k | } |
447 | | |
448 | 6.27k | void AArch64PassConfig::addPreGlobalInstructionSelect() { |
449 | 6.27k | // Workaround the deficiency of the fast register allocator. |
450 | 6.27k | if (TM->getOptLevel() == CodeGenOpt::None) |
451 | 36 | addPass(new Localizer()); |
452 | 6.27k | } |
453 | | |
454 | 6.27k | bool AArch64PassConfig::addGlobalInstructionSelect() { |
455 | 6.27k | addPass(new InstructionSelect()); |
456 | 6.27k | return false; |
457 | 6.27k | } |
458 | | |
459 | 7.65k | bool AArch64PassConfig::isGlobalISelEnabled() const { |
460 | 7.65k | return TM->getOptLevel() <= EnableGlobalISelAtO; |
461 | 7.65k | } |
462 | | |
463 | 13.8k | bool AArch64PassConfig::addILPOpts() { |
464 | 13.8k | if (EnableCondOpt) |
465 | 13.8k | addPass(createAArch64ConditionOptimizerPass()); |
466 | 13.8k | if (EnableCCMP) |
467 | 13.8k | addPass(createAArch64ConditionalCompares()); |
468 | 13.8k | if (EnableMCR) |
469 | 13.8k | addPass(&MachineCombinerID); |
470 | 13.8k | if (EnableCondBrTuning) |
471 | 13.7k | addPass(createAArch64CondBrTuning()); |
472 | 13.8k | if (EnableEarlyIfConversion) |
473 | 13.8k | addPass(&EarlyIfConverterID); |
474 | 13.8k | if (EnableStPairSuppress) |
475 | 13.7k | addPass(createAArch64StorePairSuppressPass()); |
476 | 13.8k | addPass(createAArch64VectorByElementOptPass()); |
477 | 13.8k | return true; |
478 | 13.8k | } |
479 | | |
480 | 13.9k | void AArch64PassConfig::addPreRegAlloc() { |
481 | 13.9k | // Change dead register definitions to refer to the zero register. |
482 | 13.9k | if (TM->getOptLevel() != CodeGenOpt::None && 13.9k EnableDeadRegisterElimination13.8k ) |
483 | 13.8k | addPass(createAArch64DeadRegisterDefinitions()); |
484 | 13.9k | |
485 | 13.9k | // Use AdvSIMD scalar instructions whenever profitable. |
486 | 13.9k | if (TM->getOptLevel() != CodeGenOpt::None && 13.9k EnableAdvSIMDScalar13.8k ) { |
487 | 5 | addPass(createAArch64AdvSIMDScalar()); |
488 | 5 | // The AdvSIMD pass may produce copies that can be rewritten to |
489 | 5 | // be register coaleascer friendly. |
490 | 5 | addPass(&PeepholeOptimizerID); |
491 | 5 | } |
492 | 13.9k | } |
493 | | |
494 | 13.9k | void AArch64PassConfig::addPostRegAlloc() { |
495 | 13.9k | // Remove redundant copy instructions. |
496 | 13.9k | if (TM->getOptLevel() != CodeGenOpt::None && 13.9k EnableRedundantCopyElimination13.8k ) |
497 | 13.8k | addPass(createAArch64RedundantCopyEliminationPass()); |
498 | 13.9k | |
499 | 13.9k | if (TM->getOptLevel() != CodeGenOpt::None && 13.9k usingDefaultRegAlloc()13.8k ) |
500 | 13.9k | // Improve performance for some FP/SIMD code for A57. |
501 | 13.7k | addPass(createAArch64A57FPLoadBalancing()); |
502 | 13.9k | } |
503 | | |
504 | 13.9k | void AArch64PassConfig::addPreSched2() { |
505 | 13.9k | // Expand some pseudo instructions to allow proper scheduling. |
506 | 13.9k | addPass(createAArch64ExpandPseudoPass()); |
507 | 13.9k | // Use load/store pair instructions when possible. |
508 | 13.9k | if (TM->getOptLevel() != CodeGenOpt::None13.9k ) { |
509 | 13.8k | if (EnableLoadStoreOpt) |
510 | 13.7k | addPass(createAArch64LoadStoreOptimizationPass()); |
511 | 13.8k | if (EnableFalkorHWPFFix) |
512 | 13.8k | addPass(createFalkorHWPFFixPass()); |
513 | 13.8k | } |
514 | 13.9k | } |
515 | | |
516 | 13.9k | void AArch64PassConfig::addPreEmitPass() { |
517 | 13.9k | if (EnableA53Fix835769) |
518 | 3 | addPass(createAArch64A53Fix835769()); |
519 | 13.9k | // Relax conditional branch instructions if they're otherwise out of |
520 | 13.9k | // range of their destination. |
521 | 13.9k | if (BranchRelaxation) |
522 | 13.9k | addPass(&BranchRelaxationPassID); |
523 | 13.9k | |
524 | 13.9k | if (TM->getOptLevel() != CodeGenOpt::None && 13.9k EnableCollectLOH13.8k && |
525 | 13.7k | TM->getTargetTriple().isOSBinFormatMachO()) |
526 | 13.1k | addPass(createAArch64CollectLOHPass()); |
527 | 13.9k | } |