/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/lib/Target/X86/X86TargetMachine.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===-- X86TargetMachine.cpp - Define TargetMachine for the X86 -----------===// |
2 | | // |
3 | | // The LLVM Compiler Infrastructure |
4 | | // |
5 | | // This file is distributed under the University of Illinois Open Source |
6 | | // License. See LICENSE.TXT for details. |
7 | | // |
8 | | //===----------------------------------------------------------------------===// |
9 | | // |
10 | | // This file defines the X86 specific subclass of TargetMachine. |
11 | | // |
12 | | //===----------------------------------------------------------------------===// |
13 | | |
14 | | #include "MCTargetDesc/X86MCTargetDesc.h" |
15 | | #include "X86.h" |
16 | | #include "X86CallLowering.h" |
17 | | #include "X86LegalizerInfo.h" |
18 | | #include "X86MacroFusion.h" |
19 | | #include "X86Subtarget.h" |
20 | | #include "X86TargetMachine.h" |
21 | | #include "X86TargetObjectFile.h" |
22 | | #include "X86TargetTransformInfo.h" |
23 | | #include "llvm/ADT/Optional.h" |
24 | | #include "llvm/ADT/STLExtras.h" |
25 | | #include "llvm/ADT/SmallString.h" |
26 | | #include "llvm/ADT/StringRef.h" |
27 | | #include "llvm/ADT/Triple.h" |
28 | | #include "llvm/Analysis/TargetTransformInfo.h" |
29 | | #include "llvm/CodeGen/ExecutionDepsFix.h" |
30 | | #include "llvm/CodeGen/GlobalISel/CallLowering.h" |
31 | | #include "llvm/CodeGen/GlobalISel/IRTranslator.h" |
32 | | #include "llvm/CodeGen/GlobalISel/InstructionSelect.h" |
33 | | #include "llvm/CodeGen/GlobalISel/Legalizer.h" |
34 | | #include "llvm/CodeGen/GlobalISel/RegBankSelect.h" |
35 | | #include "llvm/CodeGen/MachineScheduler.h" |
36 | | #include "llvm/CodeGen/Passes.h" |
37 | | #include "llvm/CodeGen/TargetPassConfig.h" |
38 | | #include "llvm/IR/Attributes.h" |
39 | | #include "llvm/IR/DataLayout.h" |
40 | | #include "llvm/IR/Function.h" |
41 | | #include "llvm/Pass.h" |
42 | | #include "llvm/Support/CodeGen.h" |
43 | | #include "llvm/Support/CommandLine.h" |
44 | | #include "llvm/Support/ErrorHandling.h" |
45 | | #include "llvm/Support/TargetRegistry.h" |
46 | | #include "llvm/Target/TargetLoweringObjectFile.h" |
47 | | #include "llvm/Target/TargetOptions.h" |
48 | | #include <memory> |
49 | | #include <string> |
50 | | |
51 | | using namespace llvm; |
52 | | |
53 | | static cl::opt<bool> EnableMachineCombinerPass("x86-machine-combiner", |
54 | | cl::desc("Enable the machine combiner pass"), |
55 | | cl::init(true), cl::Hidden); |
56 | | |
57 | | namespace llvm { |
58 | | |
59 | | void initializeWinEHStatePassPass(PassRegistry &); |
60 | | void initializeFixupLEAPassPass(PassRegistry &); |
61 | | void initializeX86ExecutionDepsFixPass(PassRegistry &); |
62 | | |
63 | | } // end namespace llvm |
64 | | |
65 | 124k | extern "C" void LLVMInitializeX86Target() { |
66 | 124k | // Register the target. |
67 | 124k | RegisterTargetMachine<X86TargetMachine> X(getTheX86_32Target()); |
68 | 124k | RegisterTargetMachine<X86TargetMachine> Y(getTheX86_64Target()); |
69 | 124k | |
70 | 124k | PassRegistry &PR = *PassRegistry::getPassRegistry(); |
71 | 124k | initializeGlobalISel(PR); |
72 | 124k | initializeWinEHStatePassPass(PR); |
73 | 124k | initializeFixupBWInstPassPass(PR); |
74 | 124k | initializeEvexToVexInstPassPass(PR); |
75 | 124k | initializeFixupLEAPassPass(PR); |
76 | 124k | initializeX86ExecutionDepsFixPass(PR); |
77 | 124k | } |
78 | | |
79 | 15.2k | static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) { |
80 | 15.2k | if (TT.isOSBinFormatMachO()15.2k ) { |
81 | 7.15k | if (TT.getArch() == Triple::x86_64) |
82 | 5.85k | return llvm::make_unique<X86_64MachoTargetObjectFile>(); |
83 | 1.30k | return llvm::make_unique<TargetLoweringObjectFileMachO>(); |
84 | 1.30k | } |
85 | 8.04k | |
86 | 8.04k | if (8.04k TT.isOSFreeBSD()8.04k ) |
87 | 50 | return llvm::make_unique<X86FreeBSDTargetObjectFile>(); |
88 | 7.99k | if (7.99k TT.isOSLinux() || 7.99k TT.isOSNaCl()4.59k || TT.isOSIAMCU()4.58k ) |
89 | 3.41k | return llvm::make_unique<X86LinuxNaClTargetObjectFile>(); |
90 | 4.57k | if (4.57k TT.isOSSolaris()4.57k ) |
91 | 10 | return llvm::make_unique<X86SolarisTargetObjectFile>(); |
92 | 4.56k | if (4.56k TT.isOSFuchsia()4.56k ) |
93 | 6 | return llvm::make_unique<X86FuchsiaTargetObjectFile>(); |
94 | 4.56k | if (4.56k TT.isOSBinFormatELF()4.56k ) |
95 | 3.46k | return llvm::make_unique<X86ELFTargetObjectFile>(); |
96 | 1.09k | if (1.09k TT.isKnownWindowsMSVCEnvironment() || 1.09k TT.isWindowsCoreCLREnvironment()186 ) |
97 | 909 | return llvm::make_unique<X86WindowsTargetObjectFile>(); |
98 | 187 | if (187 TT.isOSBinFormatCOFF()187 ) |
99 | 181 | return llvm::make_unique<TargetLoweringObjectFileCOFF>(); |
100 | 6 | llvm_unreachable6 ("unknown subtarget type"); |
101 | 6 | } |
102 | | |
103 | 15.1k | static std::string computeDataLayout(const Triple &TT) { |
104 | 15.1k | // X86 is little endian |
105 | 15.1k | std::string Ret = "e"; |
106 | 15.1k | |
107 | 15.1k | Ret += DataLayout::getManglingComponent(TT); |
108 | 15.1k | // X86 and x32 have 32 bit pointers. |
109 | 15.1k | if ((TT.isArch64Bit() && |
110 | 11.5k | (TT.getEnvironment() == Triple::GNUX32 || 11.5k TT.isOSNaCl()11.4k )) || |
111 | 15.1k | !TT.isArch64Bit()) |
112 | 3.74k | Ret += "-p:32:32"; |
113 | 15.1k | |
114 | 15.1k | // Some ABIs align 64 bit integers and doubles to 64 bits, others to 32. |
115 | 15.1k | if (TT.isArch64Bit() || 15.1k TT.isOSWindows()3.67k || TT.isOSNaCl()3.13k ) |
116 | 12.0k | Ret += "-i64:64"; |
117 | 3.13k | else if (3.13k TT.isOSIAMCU()3.13k ) |
118 | 7 | Ret += "-i64:32-f64:32"; |
119 | 3.13k | else |
120 | 3.12k | Ret += "-f64:32:64"; |
121 | 15.1k | |
122 | 15.1k | // Some ABIs align long double to 128 bits, others to 32. |
123 | 15.1k | if (TT.isOSNaCl() || 15.1k TT.isOSIAMCU()15.1k ) |
124 | 26 | ; // No f80 |
125 | 15.1k | else if (15.1k TT.isArch64Bit() || 15.1k TT.isOSDarwin()3.66k ) |
126 | 12.8k | Ret += "-f80:128"; |
127 | 15.1k | else |
128 | 2.36k | Ret += "-f80:32"; |
129 | 15.1k | |
130 | 15.1k | if (TT.isOSIAMCU()) |
131 | 7 | Ret += "-f128:32"; |
132 | 15.1k | |
133 | 15.1k | // The registers can hold 8, 16, 32 or, in x86-64, 64 bits. |
134 | 15.1k | if (TT.isArch64Bit()) |
135 | 11.5k | Ret += "-n8:16:32:64"; |
136 | 15.1k | else |
137 | 3.67k | Ret += "-n8:16:32"; |
138 | 15.1k | |
139 | 15.1k | // The stack is aligned to 32 bits on some ABIs and 128 bits on others. |
140 | 15.1k | if ((!TT.isArch64Bit() && 15.1k TT.isOSWindows()3.67k ) || TT.isOSIAMCU()14.6k ) |
141 | 548 | Ret += "-a:0:32-S32"; |
142 | 15.1k | else |
143 | 14.6k | Ret += "-S128"; |
144 | 15.1k | |
145 | 15.1k | return Ret; |
146 | 15.1k | } |
147 | | |
148 | | static Reloc::Model getEffectiveRelocModel(const Triple &TT, |
149 | 15.1k | Optional<Reloc::Model> RM) { |
150 | 15.1k | bool is64Bit = TT.getArch() == Triple::x86_64; |
151 | 15.1k | if (!RM.hasValue()15.1k ) { |
152 | 8.67k | // Darwin defaults to PIC in 64 bit mode and dynamic-no-pic in 32 bit mode. |
153 | 8.67k | // Win64 requires rip-rel addressing, thus we force it to PIC. Otherwise we |
154 | 8.67k | // use static relocation model by default. |
155 | 8.67k | if (TT.isOSDarwin()8.67k ) { |
156 | 2.67k | if (is64Bit) |
157 | 2.21k | return Reloc::PIC_; |
158 | 456 | return Reloc::DynamicNoPIC; |
159 | 456 | } |
160 | 6.00k | if (6.00k TT.isOSWindows() && 6.00k is64Bit609 ) |
161 | 335 | return Reloc::PIC_; |
162 | 5.66k | return Reloc::Static; |
163 | 5.66k | } |
164 | 6.52k | |
165 | 6.52k | // ELF and X86-64 don't have a distinct DynamicNoPIC model. DynamicNoPIC |
166 | 6.52k | // is defined as a model for code which may be used in static or dynamic |
167 | 6.52k | // executables but not necessarily a shared library. On X86-32 we just |
168 | 6.52k | // compile in -static mode, in x86-64 we use PIC. |
169 | 6.52k | if (6.52k *RM == Reloc::DynamicNoPIC6.52k ) { |
170 | 9 | if (is64Bit) |
171 | 3 | return Reloc::PIC_; |
172 | 6 | if (6 !TT.isOSDarwin()6 ) |
173 | 1 | return Reloc::Static; |
174 | 6.52k | } |
175 | 6.52k | |
176 | 6.52k | // If we are on Darwin, disallow static relocation model in X86-64 mode, since |
177 | 6.52k | // the Mach-O file format doesn't support it. |
178 | 6.52k | if (6.52k *RM == Reloc::Static && 6.52k TT.isOSDarwin()711 && is64Bit549 ) |
179 | 370 | return Reloc::PIC_; |
180 | 6.15k | |
181 | 6.15k | return *RM; |
182 | 6.15k | } |
183 | | |
184 | | static CodeModel::Model getEffectiveCodeModel(Optional<CodeModel::Model> CM, |
185 | 15.1k | bool JIT, bool Is64Bit) { |
186 | 15.1k | if (CM) |
187 | 54 | return *CM; |
188 | 15.1k | if (15.1k JIT15.1k ) |
189 | 220 | return Is64Bit ? 220 CodeModel::Large220 : CodeModel::Small0 ; |
190 | 14.9k | return CodeModel::Small; |
191 | 14.9k | } |
192 | | |
193 | | /// Create an X86 target. |
194 | | /// |
195 | | X86TargetMachine::X86TargetMachine(const Target &T, const Triple &TT, |
196 | | StringRef CPU, StringRef FS, |
197 | | const TargetOptions &Options, |
198 | | Optional<Reloc::Model> RM, |
199 | | Optional<CodeModel::Model> CM, |
200 | | CodeGenOpt::Level OL, bool JIT) |
201 | | : LLVMTargetMachine( |
202 | | T, computeDataLayout(TT), TT, CPU, FS, Options, |
203 | | getEffectiveRelocModel(TT, RM), |
204 | | getEffectiveCodeModel(CM, JIT, TT.getArch() == Triple::x86_64), OL), |
205 | 15.1k | TLOF(createTLOF(getTargetTriple())) { |
206 | 15.1k | // Windows stack unwinder gets confused when execution flow "falls through" |
207 | 15.1k | // after a call to 'noreturn' function. |
208 | 15.1k | // To prevent that, we emit a trap for 'unreachable' IR instructions. |
209 | 15.1k | // (which on X86, happens to be the 'ud2' instruction) |
210 | 15.1k | // On PS4, the "return address" of a 'noreturn' call must still be within |
211 | 15.1k | // the calling function, and TrapUnreachable is an easy way to get that. |
212 | 15.1k | // The check here for 64-bit windows is a bit icky, but as we're unlikely |
213 | 15.1k | // to ever want to mix 32 and 64-bit windows code in a single module |
214 | 15.1k | // this should be fine. |
215 | 15.1k | if ((TT.isOSWindows() && 15.1k TT.getArch() == Triple::x86_641.10k ) || TT.isPS4()14.6k ) |
216 | 595 | this->Options.TrapUnreachable = true; |
217 | 15.1k | |
218 | 15.1k | initAsmInfo(); |
219 | 15.1k | } |
220 | | |
221 | 12.9k | X86TargetMachine::~X86TargetMachine() = default; |
222 | | |
223 | | const X86Subtarget * |
224 | 1.25M | X86TargetMachine::getSubtargetImpl(const Function &F) const { |
225 | 1.25M | Attribute CPUAttr = F.getFnAttribute("target-cpu"); |
226 | 1.25M | Attribute FSAttr = F.getFnAttribute("target-features"); |
227 | 1.25M | |
228 | 1.25M | StringRef CPU = !CPUAttr.hasAttribute(Attribute::None) |
229 | 622k | ? CPUAttr.getValueAsString() |
230 | 632k | : (StringRef)TargetCPU; |
231 | 1.25M | StringRef FS = !FSAttr.hasAttribute(Attribute::None) |
232 | 975k | ? FSAttr.getValueAsString() |
233 | 279k | : (StringRef)TargetFS; |
234 | 1.25M | |
235 | 1.25M | SmallString<512> Key; |
236 | 1.25M | Key.reserve(CPU.size() + FS.size()); |
237 | 1.25M | Key += CPU; |
238 | 1.25M | Key += FS; |
239 | 1.25M | |
240 | 1.25M | // FIXME: This is related to the code below to reset the target options, |
241 | 1.25M | // we need to know whether or not the soft float flag is set on the |
242 | 1.25M | // function before we can generate a subtarget. We also need to use |
243 | 1.25M | // it as a key for the subtarget since that can be the only difference |
244 | 1.25M | // between two functions. |
245 | 1.25M | bool SoftFloat = |
246 | 1.25M | F.getFnAttribute("use-soft-float").getValueAsString() == "true"; |
247 | 1.25M | // If the soft float attribute is set on the function turn on the soft float |
248 | 1.25M | // subtarget feature. |
249 | 1.25M | if (SoftFloat) |
250 | 780 | Key += FS.empty() ? 780 "+soft-float"780 : ",+soft-float"0 ; |
251 | 1.25M | |
252 | 1.25M | FS = Key.substr(CPU.size()); |
253 | 1.25M | |
254 | 1.25M | auto &I = SubtargetMap[Key]; |
255 | 1.25M | if (!I1.25M ) { |
256 | 9.65k | // This needs to be done before we create a new subtarget since any |
257 | 9.65k | // creation will depend on the TM and the code generation flags on the |
258 | 9.65k | // function that reside in TargetOptions. |
259 | 9.65k | resetTargetOptions(F); |
260 | 9.65k | I = llvm::make_unique<X86Subtarget>(TargetTriple, CPU, FS, *this, |
261 | 9.65k | Options.StackAlignmentOverride); |
262 | 9.65k | } |
263 | 1.25M | return I.get(); |
264 | 1.25M | } |
265 | | |
266 | | //===----------------------------------------------------------------------===// |
267 | | // Command line options for x86 |
268 | | //===----------------------------------------------------------------------===// |
269 | | static cl::opt<bool> |
270 | | UseVZeroUpper("x86-use-vzeroupper", cl::Hidden, |
271 | | cl::desc("Minimize AVX to SSE transition penalty"), |
272 | | cl::init(true)); |
273 | | |
274 | | //===----------------------------------------------------------------------===// |
275 | | // X86 TTI query. |
276 | | //===----------------------------------------------------------------------===// |
277 | | |
278 | 29.0k | TargetIRAnalysis X86TargetMachine::getTargetIRAnalysis() { |
279 | 660k | return TargetIRAnalysis([this](const Function &F) { |
280 | 660k | return TargetTransformInfo(X86TTIImpl(this, F)); |
281 | 660k | }); |
282 | 29.0k | } |
283 | | |
284 | | //===----------------------------------------------------------------------===// |
285 | | // Pass Pipeline Configuration |
286 | | //===----------------------------------------------------------------------===// |
287 | | |
288 | | namespace { |
289 | | |
290 | | /// X86 Code Generator Pass Configuration Options. |
291 | | class X86PassConfig : public TargetPassConfig { |
292 | | public: |
293 | | X86PassConfig(X86TargetMachine &TM, PassManagerBase &PM) |
294 | 10.9k | : TargetPassConfig(TM, PM) {} |
295 | | |
296 | 8.49k | X86TargetMachine &getX86TargetMachine() const { |
297 | 8.49k | return getTM<X86TargetMachine>(); |
298 | 8.49k | } |
299 | | |
300 | | ScheduleDAGInstrs * |
301 | 70.8k | createMachineScheduler(MachineSchedContext *C) const override { |
302 | 70.8k | ScheduleDAGMILive *DAG = createGenericSchedLive(C); |
303 | 70.8k | DAG->addMutation(createX86MacroFusionDAGMutation()); |
304 | 70.8k | return DAG; |
305 | 70.8k | } |
306 | | |
307 | | void addIRPasses() override; |
308 | | bool addInstSelector() override; |
309 | | bool addIRTranslator() override; |
310 | | bool addLegalizeMachineIR() override; |
311 | | bool addRegBankSelect() override; |
312 | | bool addGlobalInstructionSelect() override; |
313 | | bool addILPOpts() override; |
314 | | bool addPreISel() override; |
315 | | void addPreRegAlloc() override; |
316 | | void addPostRegAlloc() override; |
317 | | void addPreEmitPass() override; |
318 | | void addPreSched2() override; |
319 | | }; |
320 | | |
321 | | class X86ExecutionDepsFix : public ExecutionDepsFix { |
322 | | public: |
323 | | static char ID; |
324 | 7.89k | X86ExecutionDepsFix() : ExecutionDepsFix(ID, X86::VR128XRegClass) {} |
325 | 7.88k | StringRef getPassName() const override { |
326 | 7.88k | return "X86 Execution Dependency Fix"; |
327 | 7.88k | } |
328 | | }; |
329 | | char X86ExecutionDepsFix::ID; |
330 | | |
331 | | } // end anonymous namespace |
332 | | |
333 | | INITIALIZE_PASS(X86ExecutionDepsFix, "x86-execution-deps-fix", |
334 | | "X86 Execution Dependency Fix", false, false) |
335 | | |
336 | 10.9k | TargetPassConfig *X86TargetMachine::createPassConfig(PassManagerBase &PM) { |
337 | 10.9k | return new X86PassConfig(*this, PM); |
338 | 10.9k | } |
339 | | |
340 | 8.54k | void X86PassConfig::addIRPasses() { |
341 | 8.54k | addPass(createAtomicExpandPass()); |
342 | 8.54k | |
343 | 8.54k | TargetPassConfig::addIRPasses(); |
344 | 8.54k | |
345 | 8.54k | if (TM->getOptLevel() != CodeGenOpt::None) |
346 | 7.89k | addPass(createInterleavedAccessPass()); |
347 | 8.54k | } |
348 | | |
349 | 8.49k | bool X86PassConfig::addInstSelector() { |
350 | 8.49k | // Install an instruction selector. |
351 | 8.49k | addPass(createX86ISelDag(getX86TargetMachine(), getOptLevel())); |
352 | 8.49k | |
353 | 8.49k | // For ELF, cleanup any local-dynamic TLS accesses. |
354 | 8.49k | if (TM->getTargetTriple().isOSBinFormatELF() && |
355 | 4.16k | getOptLevel() != CodeGenOpt::None) |
356 | 3.99k | addPass(createCleanupLocalDynamicTLSPass()); |
357 | 8.49k | |
358 | 8.49k | addPass(createX86GlobalBaseRegPass()); |
359 | 8.49k | return false; |
360 | 8.49k | } |
361 | | |
362 | 58 | bool X86PassConfig::addIRTranslator() { |
363 | 58 | addPass(new IRTranslator()); |
364 | 58 | return false; |
365 | 58 | } |
366 | | |
367 | 58 | bool X86PassConfig::addLegalizeMachineIR() { |
368 | 58 | addPass(new Legalizer()); |
369 | 58 | return false; |
370 | 58 | } |
371 | | |
372 | 58 | bool X86PassConfig::addRegBankSelect() { |
373 | 58 | addPass(new RegBankSelect()); |
374 | 58 | return false; |
375 | 58 | } |
376 | | |
377 | 58 | bool X86PassConfig::addGlobalInstructionSelect() { |
378 | 58 | addPass(new InstructionSelect()); |
379 | 58 | return false; |
380 | 58 | } |
381 | | |
382 | 7.89k | bool X86PassConfig::addILPOpts() { |
383 | 7.89k | addPass(&EarlyIfConverterID); |
384 | 7.89k | if (EnableMachineCombinerPass) |
385 | 7.89k | addPass(&MachineCombinerID); |
386 | 7.89k | addPass(createX86CmovConverterPass()); |
387 | 7.89k | return true; |
388 | 7.89k | } |
389 | | |
390 | 8.54k | bool X86PassConfig::addPreISel() { |
391 | 8.54k | // Only add this pass for 32-bit x86 Windows. |
392 | 8.54k | const Triple &TT = TM->getTargetTriple(); |
393 | 8.54k | if (TT.isOSWindows() && 8.54k TT.getArch() == Triple::x86543 ) |
394 | 234 | addPass(createX86WinEHStatePass()); |
395 | 8.54k | return true; |
396 | 8.54k | } |
397 | | |
398 | 8.54k | void X86PassConfig::addPreRegAlloc() { |
399 | 8.54k | if (getOptLevel() != CodeGenOpt::None8.54k ) { |
400 | 7.89k | addPass(&LiveRangeShrinkID); |
401 | 7.89k | addPass(createX86FixupSetCC()); |
402 | 7.89k | addPass(createX86OptimizeLEAs()); |
403 | 7.89k | addPass(createX86CallFrameOptimization()); |
404 | 7.89k | } |
405 | 8.54k | |
406 | 8.54k | addPass(createX86WinAllocaExpander()); |
407 | 8.54k | } |
408 | | |
409 | 8.54k | void X86PassConfig::addPostRegAlloc() { |
410 | 8.54k | addPass(createX86FloatingPointStackifierPass()); |
411 | 8.54k | } |
412 | | |
413 | 8.54k | void X86PassConfig::addPreSched2() { addPass(createX86ExpandPseudoPass()); } |
414 | | |
415 | 8.54k | void X86PassConfig::addPreEmitPass() { |
416 | 8.54k | if (getOptLevel() != CodeGenOpt::None) |
417 | 7.89k | addPass(new X86ExecutionDepsFix()); |
418 | 8.54k | |
419 | 8.54k | if (UseVZeroUpper) |
420 | 8.54k | addPass(createX86IssueVZeroUpperPass()); |
421 | 8.54k | |
422 | 8.54k | if (getOptLevel() != CodeGenOpt::None8.54k ) { |
423 | 7.89k | addPass(createX86FixupBWInsts()); |
424 | 7.89k | addPass(createX86PadShortFunctions()); |
425 | 7.89k | addPass(createX86FixupLEAs()); |
426 | 7.89k | addPass(createX86EvexToVexInsts()); |
427 | 7.89k | } |
428 | 8.54k | } |