/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/lib/Target/X86/X86RegisterInfo.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===-- X86RegisterInfo.cpp - X86 Register Information --------------------===// |
2 | | // |
3 | | // The LLVM Compiler Infrastructure |
4 | | // |
5 | | // This file is distributed under the University of Illinois Open Source |
6 | | // License. See LICENSE.TXT for details. |
7 | | // |
8 | | //===----------------------------------------------------------------------===// |
9 | | // |
10 | | // This file contains the X86 implementation of the TargetRegisterInfo class. |
11 | | // This file is responsible for the frame pointer elimination optimization |
12 | | // on X86. |
13 | | // |
14 | | //===----------------------------------------------------------------------===// |
15 | | |
16 | | #include "X86RegisterInfo.h" |
17 | | #include "X86FrameLowering.h" |
18 | | #include "X86InstrBuilder.h" |
19 | | #include "X86MachineFunctionInfo.h" |
20 | | #include "X86Subtarget.h" |
21 | | #include "X86TargetMachine.h" |
22 | | #include "llvm/ADT/BitVector.h" |
23 | | #include "llvm/ADT/STLExtras.h" |
24 | | #include "llvm/CodeGen/MachineFrameInfo.h" |
25 | | #include "llvm/CodeGen/MachineFunction.h" |
26 | | #include "llvm/CodeGen/MachineFunctionPass.h" |
27 | | #include "llvm/CodeGen/MachineInstrBuilder.h" |
28 | | #include "llvm/CodeGen/MachineModuleInfo.h" |
29 | | #include "llvm/CodeGen/MachineRegisterInfo.h" |
30 | | #include "llvm/IR/Constants.h" |
31 | | #include "llvm/IR/Function.h" |
32 | | #include "llvm/IR/Type.h" |
33 | | #include "llvm/MC/MCAsmInfo.h" |
34 | | #include "llvm/Support/CommandLine.h" |
35 | | #include "llvm/Support/ErrorHandling.h" |
36 | | #include "llvm/Target/TargetFrameLowering.h" |
37 | | #include "llvm/Target/TargetInstrInfo.h" |
38 | | #include "llvm/Target/TargetMachine.h" |
39 | | #include "llvm/Target/TargetOptions.h" |
40 | | |
41 | | using namespace llvm; |
42 | | |
43 | | #define GET_REGINFO_TARGET_DESC |
44 | | #include "X86GenRegisterInfo.inc" |
45 | | |
46 | | static cl::opt<bool> |
47 | | EnableBasePointer("x86-use-base-pointer", cl::Hidden, cl::init(true), |
48 | | cl::desc("Enable use of a base pointer for complex stack frames")); |
49 | | |
50 | | X86RegisterInfo::X86RegisterInfo(const Triple &TT) |
51 | | : X86GenRegisterInfo((TT.isArch64Bit() ? X86::RIP : X86::EIP), |
52 | | X86_MC::getDwarfRegFlavour(TT, false), |
53 | | X86_MC::getDwarfRegFlavour(TT, true), |
54 | 9.65k | (TT.isArch64Bit() ? X86::RIP : X86::EIP)) { |
55 | 9.65k | X86_MC::initLLVMToSEHAndCVRegMapping(this); |
56 | 9.65k | |
57 | 9.65k | // Cache some information. |
58 | 9.65k | Is64Bit = TT.isArch64Bit(); |
59 | 7.11k | IsWin64 = Is64Bit && TT.isOSWindows(); |
60 | 9.65k | |
61 | 9.65k | // Use a callee-saved register as the base pointer. These registers must |
62 | 9.65k | // not conflict with any ABI requirements. For example, in 32-bit mode PIC |
63 | 9.65k | // requires GOT in the EBX register before function calls via PLT GOT pointer. |
64 | 9.65k | if (Is64Bit9.65k ) { |
65 | 7.11k | SlotSize = 8; |
66 | 7.11k | // This matches the simplified 32-bit pointer code in the data layout |
67 | 7.11k | // computation. |
68 | 7.11k | // FIXME: Should use the data layout? |
69 | 7.11k | bool Use64BitReg = TT.getEnvironment() != Triple::GNUX32; |
70 | 7.11k | StackPtr = Use64BitReg ? X86::RSP7.06k : X86::ESP54 ; |
71 | 7.11k | FramePtr = Use64BitReg ? X86::RBP7.06k : X86::EBP54 ; |
72 | 7.11k | BasePtr = Use64BitReg ? X86::RBX7.06k : X86::EBX54 ; |
73 | 9.65k | } else { |
74 | 2.53k | SlotSize = 4; |
75 | 2.53k | StackPtr = X86::ESP; |
76 | 2.53k | FramePtr = X86::EBP; |
77 | 2.53k | BasePtr = X86::ESI; |
78 | 2.53k | } |
79 | 9.65k | } |
80 | | |
81 | | bool |
82 | 75.5k | X86RegisterInfo::trackLivenessAfterRegAlloc(const MachineFunction &MF) const { |
83 | 75.5k | // ExecutionDepsFixer and PostRAScheduler require liveness. |
84 | 75.5k | return true; |
85 | 75.5k | } |
86 | | |
87 | | int |
88 | 607 | X86RegisterInfo::getSEHRegNum(unsigned i) const { |
89 | 607 | return getEncodingValue(i); |
90 | 607 | } |
91 | | |
92 | | const TargetRegisterClass * |
93 | | X86RegisterInfo::getSubClassWithSubReg(const TargetRegisterClass *RC, |
94 | 87.8k | unsigned Idx) const { |
95 | 87.8k | // The sub_8bit sub-register index is more constrained in 32-bit mode. |
96 | 87.8k | // It behaves just like the sub_8bit_hi index. |
97 | 87.8k | if (!Is64Bit && 87.8k Idx == X86::sub_8bit7.69k ) |
98 | 3.93k | Idx = X86::sub_8bit_hi; |
99 | 87.8k | |
100 | 87.8k | // Forward to TableGen's default version. |
101 | 87.8k | return X86GenRegisterInfo::getSubClassWithSubReg(RC, Idx); |
102 | 87.8k | } |
103 | | |
104 | | const TargetRegisterClass * |
105 | | X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A, |
106 | | const TargetRegisterClass *B, |
107 | 123k | unsigned SubIdx) const { |
108 | 123k | // The sub_8bit sub-register index is more constrained in 32-bit mode. |
109 | 123k | if (!Is64Bit && 123k SubIdx == X86::sub_8bit14.2k ) { |
110 | 8.00k | A = X86GenRegisterInfo::getSubClassWithSubReg(A, X86::sub_8bit_hi); |
111 | 8.00k | if (!A) |
112 | 0 | return nullptr; |
113 | 123k | } |
114 | 123k | return X86GenRegisterInfo::getMatchingSuperRegClass(A, B, SubIdx); |
115 | 123k | } |
116 | | |
117 | | const TargetRegisterClass * |
118 | | X86RegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC, |
119 | 261k | const MachineFunction &MF) const { |
120 | 261k | // Don't allow super-classes of GR8_NOREX. This class is only used after |
121 | 261k | // extracting sub_8bit_hi sub-registers. The H sub-registers cannot be copied |
122 | 261k | // to the full GR8 register class in 64-bit mode, so we cannot allow the |
123 | 261k | // reigster class inflation. |
124 | 261k | // |
125 | 261k | // The GR8_NOREX class is always used in a way that won't be constrained to a |
126 | 261k | // sub-class, so sub-classes like GR8_ABCD_L are allowed to expand to the |
127 | 261k | // full GR8 class. |
128 | 261k | if (RC == &X86::GR8_NOREXRegClass) |
129 | 3.37k | return RC; |
130 | 257k | |
131 | 257k | const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>(); |
132 | 257k | |
133 | 257k | const TargetRegisterClass *Super = RC; |
134 | 257k | TargetRegisterClass::sc_iterator I = RC->getSuperClasses(); |
135 | 506k | do { |
136 | 506k | switch (Super->getID()) { |
137 | 42.9k | case X86::FR32RegClassID: |
138 | 42.9k | case X86::FR64RegClassID: |
139 | 42.9k | // If AVX-512 isn't supported we should only inflate to these classes. |
140 | 42.9k | if (!Subtarget.hasAVX512() && |
141 | 38.4k | getRegSizeInBits(*Super) == getRegSizeInBits(*RC)) |
142 | 4.93k | return Super; |
143 | 38.0k | break; |
144 | 31.6k | case X86::VR128RegClassID: |
145 | 31.6k | case X86::VR256RegClassID: |
146 | 31.6k | // If VLX isn't supported we should only inflate to these classes. |
147 | 31.6k | if (!Subtarget.hasVLX() && |
148 | 31.4k | getRegSizeInBits(*Super) == getRegSizeInBits(*RC)) |
149 | 31.4k | return Super; |
150 | 219 | break; |
151 | 20.1k | case X86::VR128XRegClassID: |
152 | 20.1k | case X86::VR256XRegClassID: |
153 | 20.1k | // If VLX isn't support we shouldn't inflate to these classes. |
154 | 20.1k | if (Subtarget.hasVLX() && |
155 | 2.10k | getRegSizeInBits(*Super) == getRegSizeInBits(*RC)) |
156 | 2.10k | return Super; |
157 | 18.0k | break; |
158 | 42.0k | case X86::FR32XRegClassID: |
159 | 42.0k | case X86::FR64XRegClassID: |
160 | 42.0k | // If AVX-512 isn't support we shouldn't inflate to these classes. |
161 | 42.0k | if (Subtarget.hasAVX512() && |
162 | 5.41k | getRegSizeInBits(*Super) == getRegSizeInBits(*RC)) |
163 | 1.24k | return Super; |
164 | 40.7k | break; |
165 | 162k | case X86::GR8RegClassID: |
166 | 162k | case X86::GR16RegClassID: |
167 | 162k | case X86::GR32RegClassID: |
168 | 162k | case X86::GR64RegClassID: |
169 | 162k | case X86::RFP32RegClassID: |
170 | 162k | case X86::RFP64RegClassID: |
171 | 162k | case X86::RFP80RegClassID: |
172 | 162k | case X86::VR512RegClassID: |
173 | 162k | // Don't return a super-class that would shrink the spill size. |
174 | 162k | // That can happen with the vector and float classes. |
175 | 162k | if (getRegSizeInBits(*Super) == getRegSizeInBits(*RC)) |
176 | 162k | return Super; |
177 | 304k | } |
178 | 304k | Super = *I++; |
179 | 304k | } while (Super); |
180 | 55.8k | return RC; |
181 | 261k | } |
182 | | |
183 | | const TargetRegisterClass * |
184 | | X86RegisterInfo::getPointerRegClass(const MachineFunction &MF, |
185 | 285k | unsigned Kind) const { |
186 | 285k | const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>(); |
187 | 285k | switch (Kind) { |
188 | 0 | default: 0 llvm_unreachable0 ("Unexpected Kind in getPointerRegClass!"); |
189 | 263k | case 0: // Normal GPRs. |
190 | 263k | if (Subtarget.isTarget64BitLP64()) |
191 | 144k | return &X86::GR64RegClass; |
192 | 119k | // If the target is 64bit but we have been told to use 32bit addresses, |
193 | 119k | // we can still use 64-bit register as long as we know the high bits |
194 | 119k | // are zeros. |
195 | 119k | // Reflect that in the returned register class. |
196 | 119k | if (119k Is64Bit119k ) { |
197 | 823 | // When the target also allows 64-bit frame pointer and we do have a |
198 | 823 | // frame, this is fine to use it for the address accesses as well. |
199 | 823 | const X86FrameLowering *TFI = getFrameLowering(MF); |
200 | 59 | return TFI->hasFP(MF) && TFI->Uses64BitFramePtr |
201 | 18 | ? &X86::LOW32_ADDR_ACCESS_RBPRegClass |
202 | 805 | : &X86::LOW32_ADDR_ACCESSRegClass; |
203 | 823 | } |
204 | 118k | return &X86::GR32RegClass; |
205 | 21.0k | case 1: // Normal GPRs except the stack pointer (for encoding reasons). |
206 | 21.0k | if (Subtarget.isTarget64BitLP64()) |
207 | 12.7k | return &X86::GR64_NOSPRegClass; |
208 | 8.24k | // NOSP does not contain RIP, so no special case here. |
209 | 8.24k | return &X86::GR32_NOSPRegClass; |
210 | 53 | case 2: // NOREX GPRs. |
211 | 53 | if (Subtarget.isTarget64BitLP64()) |
212 | 50 | return &X86::GR64_NOREXRegClass; |
213 | 3 | return &X86::GR32_NOREXRegClass; |
214 | 1 | case 3: // NOREX GPRs except the stack pointer (for encoding reasons). |
215 | 1 | if (Subtarget.isTarget64BitLP64()) |
216 | 1 | return &X86::GR64_NOREX_NOSPRegClass; |
217 | 0 | // NOSP does not contain RIP, so no special case here. |
218 | 0 | return &X86::GR32_NOREX_NOSPRegClass; |
219 | 1.17k | case 4: // Available for tailcall (not callee-saved GPRs). |
220 | 1.17k | return getGPRsForTailCall(MF); |
221 | 0 | } |
222 | 0 | } |
223 | | |
224 | | const TargetRegisterClass * |
225 | 4.13k | X86RegisterInfo::getGPRsForTailCall(const MachineFunction &MF) const { |
226 | 4.13k | const Function *F = MF.getFunction(); |
227 | 4.13k | if (IsWin64 || 4.13k (F && 3.94k F->getCallingConv() == CallingConv::Win643.94k )) |
228 | 206 | return &X86::GR64_TCW64RegClass; |
229 | 3.93k | else if (3.93k Is64Bit3.93k ) |
230 | 2.96k | return &X86::GR64_TCRegClass; |
231 | 962 | |
232 | 962 | bool hasHipeCC = (F ? 962 F->getCallingConv() == CallingConv::HiPE961 : false1 ); |
233 | 962 | if (hasHipeCC) |
234 | 1 | return &X86::GR32RegClass; |
235 | 961 | return &X86::GR32_TCRegClass; |
236 | 961 | } |
237 | | |
238 | | const TargetRegisterClass * |
239 | 380 | X86RegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const { |
240 | 380 | if (RC == &X86::CCRRegClass380 ) { |
241 | 380 | if (Is64Bit) |
242 | 158 | return &X86::GR64RegClass; |
243 | 380 | else |
244 | 222 | return &X86::GR32RegClass; |
245 | 0 | } |
246 | 0 | return RC; |
247 | 0 | } |
248 | | |
249 | | unsigned |
250 | | X86RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, |
251 | 151k | MachineFunction &MF) const { |
252 | 151k | const X86FrameLowering *TFI = getFrameLowering(MF); |
253 | 151k | |
254 | 151k | unsigned FPDiff = TFI->hasFP(MF) ? 10 : 0151k ; |
255 | 151k | switch (RC->getID()) { |
256 | 145k | default: |
257 | 145k | return 0; |
258 | 1.48k | case X86::GR32RegClassID: |
259 | 1.48k | return 4 - FPDiff; |
260 | 1.48k | case X86::GR64RegClassID: |
261 | 1.48k | return 12 - FPDiff; |
262 | 1.48k | case X86::VR128RegClassID: |
263 | 1.48k | return Is64Bit ? 10748 : 4740 ; |
264 | 1.48k | case X86::VR64RegClassID: |
265 | 1.48k | return 4; |
266 | 0 | } |
267 | 0 | } |
268 | | |
269 | | const MCPhysReg * |
270 | 796k | X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { |
271 | 796k | assert(MF && "MachineFunction required"); |
272 | 796k | |
273 | 796k | const X86Subtarget &Subtarget = MF->getSubtarget<X86Subtarget>(); |
274 | 796k | const Function *F = MF->getFunction(); |
275 | 796k | bool HasSSE = Subtarget.hasSSE1(); |
276 | 796k | bool HasAVX = Subtarget.hasAVX(); |
277 | 796k | bool HasAVX512 = Subtarget.hasAVX512(); |
278 | 796k | bool CallsEHReturn = MF->callsEHReturn(); |
279 | 796k | |
280 | 796k | CallingConv::ID CC = F->getCallingConv(); |
281 | 796k | |
282 | 796k | // If attribute NoCallerSavedRegisters exists then we set X86_INTR calling |
283 | 796k | // convention because it has the CSR list. |
284 | 796k | if (MF->getFunction()->hasFnAttribute("no_caller_saved_registers")) |
285 | 7 | CC = CallingConv::X86_INTR; |
286 | 796k | |
287 | 796k | switch (CC) { |
288 | 303 | case CallingConv::GHC: |
289 | 303 | case CallingConv::HiPE: |
290 | 303 | return CSR_NoRegs_SaveList; |
291 | 30 | case CallingConv::AnyReg: |
292 | 30 | if (HasAVX) |
293 | 10 | return CSR_64_AllRegs_AVX_SaveList; |
294 | 20 | return CSR_64_AllRegs_SaveList; |
295 | 20 | case CallingConv::PreserveMost: |
296 | 20 | return CSR_64_RT_MostRegs_SaveList; |
297 | 50 | case CallingConv::PreserveAll: |
298 | 50 | if (HasAVX) |
299 | 10 | return CSR_64_RT_AllRegs_AVX_SaveList; |
300 | 40 | return CSR_64_RT_AllRegs_SaveList; |
301 | 283 | case CallingConv::CXX_FAST_TLS: |
302 | 283 | if (Is64Bit) |
303 | 283 | return MF->getInfo<X86MachineFunctionInfo>()->isSplitCSR() ? |
304 | 283 | CSR_64_CXX_TLS_Darwin_PE_SaveList232 : CSR_64_TLS_Darwin_SaveList51 ; |
305 | 0 | break; |
306 | 146 | case CallingConv::Intel_OCL_BI: { |
307 | 146 | if (HasAVX512 && 146 IsWin6478 ) |
308 | 18 | return CSR_Win64_Intel_OCL_BI_AVX512_SaveList; |
309 | 128 | if (128 HasAVX512 && 128 Is64Bit60 ) |
310 | 20 | return CSR_64_Intel_OCL_BI_AVX512_SaveList; |
311 | 108 | if (108 HasAVX && 108 IsWin6479 ) |
312 | 9 | return CSR_Win64_Intel_OCL_BI_AVX_SaveList; |
313 | 99 | if (99 HasAVX && 99 Is64Bit70 ) |
314 | 10 | return CSR_64_Intel_OCL_BI_AVX_SaveList; |
315 | 89 | if (89 !HasAVX && 89 !IsWin6429 && Is64Bit20 ) |
316 | 10 | return CSR_64_Intel_OCL_BI_SaveList; |
317 | 79 | break; |
318 | 79 | } |
319 | 115 | case CallingConv::HHVM: |
320 | 115 | return CSR_64_HHVM_SaveList; |
321 | 128 | case CallingConv::X86_RegCall: |
322 | 128 | if (Is64Bit128 ) { |
323 | 85 | if (IsWin6485 ) { |
324 | 43 | return (HasSSE ? CSR_Win64_RegCall_SaveList : |
325 | 0 | CSR_Win64_RegCall_NoSSE_SaveList); |
326 | 0 | } else { |
327 | 42 | return (HasSSE ? CSR_SysV64_RegCall_SaveList : |
328 | 0 | CSR_SysV64_RegCall_NoSSE_SaveList); |
329 | 42 | } |
330 | 43 | } else { |
331 | 42 | return (HasSSE ? CSR_32_RegCall_SaveList : |
332 | 1 | CSR_32_RegCall_NoSSE_SaveList); |
333 | 43 | } |
334 | 9 | case CallingConv::Cold: |
335 | 9 | if (Is64Bit) |
336 | 9 | return CSR_64_MostRegs_SaveList; |
337 | 0 | break; |
338 | 283 | case CallingConv::Win64: |
339 | 283 | if (!HasSSE) |
340 | 40 | return CSR_Win64_NoSSE_SaveList; |
341 | 243 | return CSR_Win64_SaveList; |
342 | 28 | case CallingConv::X86_64_SysV: |
343 | 28 | if (CallsEHReturn) |
344 | 0 | return CSR_64EHRet_SaveList; |
345 | 28 | return CSR_64_SaveList; |
346 | 214 | case CallingConv::X86_INTR: |
347 | 214 | if (Is64Bit214 ) { |
348 | 127 | if (HasAVX512) |
349 | 23 | return CSR_64_AllRegs_AVX512_SaveList; |
350 | 104 | if (104 HasAVX104 ) |
351 | 23 | return CSR_64_AllRegs_AVX_SaveList; |
352 | 81 | if (81 HasSSE81 ) |
353 | 72 | return CSR_64_AllRegs_SaveList; |
354 | 9 | return CSR_64_AllRegs_NoSSE_SaveList; |
355 | 0 | } else { |
356 | 87 | if (HasAVX512) |
357 | 23 | return CSR_32_AllRegs_AVX512_SaveList; |
358 | 64 | if (64 HasAVX64 ) |
359 | 0 | return CSR_32_AllRegs_AVX_SaveList; |
360 | 64 | if (64 HasSSE64 ) |
361 | 16 | return CSR_32_AllRegs_SSE_SaveList; |
362 | 48 | return CSR_32_AllRegs_SaveList; |
363 | 48 | } |
364 | 795k | default: |
365 | 795k | break; |
366 | 795k | } |
367 | 795k | |
368 | 795k | if (795k Is64Bit795k ) { |
369 | 623k | bool IsSwiftCC = Subtarget.getTargetLowering()->supportSwiftError() && |
370 | 623k | F->getAttributes().hasAttrSomewhere(Attribute::SwiftError); |
371 | 623k | if (IsSwiftCC) |
372 | 755 | return IsWin64 ? 755 CSR_Win64_SwiftError_SaveList9 |
373 | 746 | : CSR_64_SwiftError_SaveList; |
374 | 622k | |
375 | 622k | if (622k IsWin64622k ) |
376 | 11.2k | return HasSSE ? 11.2k CSR_Win64_SaveList11.2k : CSR_Win64_NoSSE_SaveList0 ; |
377 | 611k | if (611k CallsEHReturn611k ) |
378 | 595 | return CSR_64EHRet_SaveList; |
379 | 610k | return CSR_64_SaveList; |
380 | 610k | } |
381 | 171k | |
382 | 171k | return CallsEHReturn ? 171k CSR_32EHRet_SaveList48 : CSR_32_SaveList171k ; |
383 | 796k | } |
384 | | |
385 | | const MCPhysReg *X86RegisterInfo::getCalleeSavedRegsViaCopy( |
386 | 67.3k | const MachineFunction *MF) const { |
387 | 67.3k | assert(MF && "Invalid MachineFunction pointer."); |
388 | 67.3k | if (MF->getFunction()->getCallingConv() == CallingConv::CXX_FAST_TLS && |
389 | 28 | MF->getInfo<X86MachineFunctionInfo>()->isSplitCSR()) |
390 | 20 | return CSR_64_CXX_TLS_Darwin_ViaCopy_SaveList; |
391 | 67.2k | return nullptr; |
392 | 67.2k | } |
393 | | |
394 | | const uint32_t * |
395 | | X86RegisterInfo::getCallPreservedMask(const MachineFunction &MF, |
396 | 33.0k | CallingConv::ID CC) const { |
397 | 33.0k | const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>(); |
398 | 33.0k | bool HasSSE = Subtarget.hasSSE1(); |
399 | 33.0k | bool HasAVX = Subtarget.hasAVX(); |
400 | 33.0k | bool HasAVX512 = Subtarget.hasAVX512(); |
401 | 33.0k | |
402 | 33.0k | switch (CC) { |
403 | 20 | case CallingConv::GHC: |
404 | 20 | case CallingConv::HiPE: |
405 | 20 | return CSR_NoRegs_RegMask; |
406 | 38 | case CallingConv::AnyReg: |
407 | 38 | if (HasAVX) |
408 | 17 | return CSR_64_AllRegs_AVX_RegMask; |
409 | 21 | return CSR_64_AllRegs_RegMask; |
410 | 2 | case CallingConv::PreserveMost: |
411 | 2 | return CSR_64_RT_MostRegs_RegMask; |
412 | 4 | case CallingConv::PreserveAll: |
413 | 4 | if (HasAVX) |
414 | 1 | return CSR_64_RT_AllRegs_AVX_RegMask; |
415 | 3 | return CSR_64_RT_AllRegs_RegMask; |
416 | 6 | case CallingConv::CXX_FAST_TLS: |
417 | 6 | if (Is64Bit) |
418 | 6 | return CSR_64_TLS_Darwin_RegMask; |
419 | 0 | break; |
420 | 42 | case CallingConv::Intel_OCL_BI: { |
421 | 42 | if (HasAVX512 && 42 IsWin6416 ) |
422 | 4 | return CSR_Win64_Intel_OCL_BI_AVX512_RegMask; |
423 | 38 | if (38 HasAVX512 && 38 Is64Bit12 ) |
424 | 4 | return CSR_64_Intel_OCL_BI_AVX512_RegMask; |
425 | 34 | if (34 HasAVX && 34 IsWin6428 ) |
426 | 5 | return CSR_Win64_Intel_OCL_BI_AVX_RegMask; |
427 | 29 | if (29 HasAVX && 29 Is64Bit23 ) |
428 | 5 | return CSR_64_Intel_OCL_BI_AVX_RegMask; |
429 | 24 | if (24 !HasAVX && 24 !IsWin646 && Is64Bit4 ) |
430 | 2 | return CSR_64_Intel_OCL_BI_RegMask; |
431 | 22 | break; |
432 | 22 | } |
433 | 8 | case CallingConv::HHVM: |
434 | 8 | return CSR_64_HHVM_RegMask; |
435 | 65 | case CallingConv::X86_RegCall: |
436 | 65 | if (Is64Bit65 ) { |
437 | 43 | if (IsWin6443 ) { |
438 | 22 | return (HasSSE ? CSR_Win64_RegCall_RegMask : |
439 | 0 | CSR_Win64_RegCall_NoSSE_RegMask); |
440 | 0 | } else { |
441 | 21 | return (HasSSE ? CSR_SysV64_RegCall_RegMask : |
442 | 0 | CSR_SysV64_RegCall_NoSSE_RegMask); |
443 | 21 | } |
444 | 22 | } else { |
445 | 21 | return (HasSSE ? CSR_32_RegCall_RegMask : |
446 | 1 | CSR_32_RegCall_NoSSE_RegMask); |
447 | 22 | } |
448 | 0 | case CallingConv::Cold: |
449 | 0 | if (Is64Bit) |
450 | 0 | return CSR_64_MostRegs_RegMask; |
451 | 0 | break; |
452 | 7 | case CallingConv::Win64: |
453 | 7 | return CSR_Win64_RegMask; |
454 | 2 | case CallingConv::X86_64_SysV: |
455 | 2 | return CSR_64_RegMask; |
456 | 9 | case CallingConv::X86_INTR: |
457 | 9 | if (Is64Bit9 ) { |
458 | 5 | if (HasAVX512) |
459 | 0 | return CSR_64_AllRegs_AVX512_RegMask; |
460 | 5 | if (5 HasAVX5 ) |
461 | 0 | return CSR_64_AllRegs_AVX_RegMask; |
462 | 5 | if (5 HasSSE5 ) |
463 | 5 | return CSR_64_AllRegs_RegMask; |
464 | 0 | return CSR_64_AllRegs_NoSSE_RegMask; |
465 | 0 | } else { |
466 | 4 | if (HasAVX512) |
467 | 0 | return CSR_32_AllRegs_AVX512_RegMask; |
468 | 4 | if (4 HasAVX4 ) |
469 | 0 | return CSR_32_AllRegs_AVX_RegMask; |
470 | 4 | if (4 HasSSE4 ) |
471 | 4 | return CSR_32_AllRegs_SSE_RegMask; |
472 | 0 | return CSR_32_AllRegs_RegMask; |
473 | 0 | } |
474 | 32.8k | default: |
475 | 32.8k | break; |
476 | 32.8k | } |
477 | 32.8k | |
478 | 32.8k | // Unlike getCalleeSavedRegs(), we don't have MMI so we can't check |
479 | 32.8k | // callsEHReturn(). |
480 | 32.8k | if (32.8k Is64Bit32.8k ) { |
481 | 20.7k | const Function *F = MF.getFunction(); |
482 | 20.7k | bool IsSwiftCC = Subtarget.getTargetLowering()->supportSwiftError() && |
483 | 20.7k | F->getAttributes().hasAttrSomewhere(Attribute::SwiftError); |
484 | 20.7k | if (IsSwiftCC) |
485 | 37 | return IsWin64 ? 37 CSR_Win64_SwiftError_RegMask1 : CSR_64_SwiftError_RegMask36 ; |
486 | 20.6k | return IsWin64 ? 20.6k CSR_Win64_RegMask580 : CSR_64_RegMask20.0k ; |
487 | 20.7k | } |
488 | 12.1k | |
489 | 12.1k | return CSR_32_RegMask; |
490 | 12.1k | } |
491 | | |
492 | | const uint32_t* |
493 | 244 | X86RegisterInfo::getNoPreservedMask() const { |
494 | 244 | return CSR_NoRegs_RegMask; |
495 | 244 | } |
496 | | |
497 | 52 | const uint32_t *X86RegisterInfo::getDarwinTLSCallPreservedMask() const { |
498 | 52 | return CSR_64_TLS_Darwin_RegMask; |
499 | 52 | } |
500 | | |
501 | 146k | BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const { |
502 | 146k | BitVector Reserved(getNumRegs()); |
503 | 146k | const X86FrameLowering *TFI = getFrameLowering(MF); |
504 | 146k | |
505 | 146k | // Set the stack-pointer register and its aliases as reserved. |
506 | 734k | for (MCSubRegIterator I(X86::RSP, this, /*IncludeSelf=*/true); I.isValid(); |
507 | 587k | ++I) |
508 | 587k | Reserved.set(*I); |
509 | 146k | |
510 | 146k | // Set the instruction pointer register and its aliases as reserved. |
511 | 587k | for (MCSubRegIterator I(X86::RIP, this, /*IncludeSelf=*/true); I.isValid(); |
512 | 440k | ++I) |
513 | 440k | Reserved.set(*I); |
514 | 146k | |
515 | 146k | // Set the frame-pointer register and its aliases as reserved if needed. |
516 | 146k | if (TFI->hasFP(MF)146k ) { |
517 | 76.3k | for (MCSubRegIterator I(X86::RBP, this, /*IncludeSelf=*/true); I.isValid(); |
518 | 61.0k | ++I) |
519 | 61.0k | Reserved.set(*I); |
520 | 15.2k | } |
521 | 146k | |
522 | 146k | // Set the base-pointer register and its aliases as reserved if needed. |
523 | 146k | if (hasBasePointer(MF)146k ) { |
524 | 144 | CallingConv::ID CC = MF.getFunction()->getCallingConv(); |
525 | 144 | const uint32_t *RegMask = getCallPreservedMask(MF, CC); |
526 | 144 | if (MachineOperand::clobbersPhysReg(RegMask, getBaseRegister())) |
527 | 0 | report_fatal_error( |
528 | 0 | "Stack realignment in presence of dynamic allocas is not supported with" |
529 | 0 | "this calling convention."); |
530 | 144 | |
531 | 144 | unsigned BasePtr = getX86SubSuperRegister(getBaseRegister(), 64); |
532 | 144 | for (MCSubRegIterator I(BasePtr, this, /*IncludeSelf=*/true); |
533 | 780 | I.isValid()780 ; ++I636 ) |
534 | 636 | Reserved.set(*I); |
535 | 144 | } |
536 | 146k | |
537 | 146k | // Mark the segment registers as reserved. |
538 | 146k | Reserved.set(X86::CS); |
539 | 146k | Reserved.set(X86::SS); |
540 | 146k | Reserved.set(X86::DS); |
541 | 146k | Reserved.set(X86::ES); |
542 | 146k | Reserved.set(X86::FS); |
543 | 146k | Reserved.set(X86::GS); |
544 | 146k | |
545 | 146k | // Mark the floating point stack registers as reserved. |
546 | 1.32M | for (unsigned n = 0; n != 81.32M ; ++n1.17M ) |
547 | 1.17M | Reserved.set(X86::ST0 + n); |
548 | 146k | |
549 | 146k | // Reserve the registers that only exist in 64-bit mode. |
550 | 146k | if (!Is64Bit146k ) { |
551 | 28.8k | // These 8-bit registers are part of the x86-64 extension even though their |
552 | 28.8k | // super-registers are old 32-bits. |
553 | 28.8k | Reserved.set(X86::SIL); |
554 | 28.8k | Reserved.set(X86::DIL); |
555 | 28.8k | Reserved.set(X86::BPL); |
556 | 28.8k | Reserved.set(X86::SPL); |
557 | 28.8k | |
558 | 259k | for (unsigned n = 0; n != 8259k ; ++n230k ) { |
559 | 230k | // R8, R9, ... |
560 | 1.15M | for (MCRegAliasIterator AI(X86::R8 + n, this, true); AI.isValid()1.15M ; ++AI921k ) |
561 | 921k | Reserved.set(*AI); |
562 | 230k | |
563 | 230k | // XMM8, XMM9, ... |
564 | 921k | for (MCRegAliasIterator AI(X86::XMM8 + n, this, true); AI.isValid()921k ; ++AI691k ) |
565 | 691k | Reserved.set(*AI); |
566 | 230k | } |
567 | 28.8k | } |
568 | 146k | if (!Is64Bit || 146k !MF.getSubtarget<X86Subtarget>().hasAVX512()118k ) { |
569 | 1.87M | for (unsigned n = 16; n != 321.87M ; ++n1.76M ) { |
570 | 7.05M | for (MCRegAliasIterator AI(X86::XMM0 + n, this, true); AI.isValid()7.05M ; ++AI5.29M ) |
571 | 5.29M | Reserved.set(*AI); |
572 | 1.76M | } |
573 | 110k | } |
574 | 146k | |
575 | 146k | assert(checkAllSuperRegsMarked(Reserved, |
576 | 146k | {X86::SIL, X86::DIL, X86::BPL, X86::SPL})); |
577 | 146k | return Reserved; |
578 | 146k | } |
579 | | |
580 | 63 | void X86RegisterInfo::adjustStackMapLiveOutMask(uint32_t *Mask) const { |
581 | 63 | // Check if the EFLAGS register is marked as live-out. This shouldn't happen, |
582 | 63 | // because the calling convention defines the EFLAGS register as NOT |
583 | 63 | // preserved. |
584 | 63 | // |
585 | 63 | // Unfortunatelly the EFLAGS show up as live-out after branch folding. Adding |
586 | 63 | // an assert to track this and clear the register afterwards to avoid |
587 | 63 | // unnecessary crashes during release builds. |
588 | 63 | assert(!(Mask[X86::EFLAGS / 32] & (1U << (X86::EFLAGS % 32))) && |
589 | 63 | "EFLAGS are not live-out from a patchpoint."); |
590 | 63 | |
591 | 63 | // Also clean other registers that don't need preserving (IP). |
592 | 63 | for (auto Reg : {X86::EFLAGS, X86::RIP, X86::EIP, X86::IP}) |
593 | 252 | Mask[Reg / 32] &= ~(1U << (Reg % 32)); |
594 | 63 | } |
595 | | |
596 | | //===----------------------------------------------------------------------===// |
597 | | // Stack Frame Processing methods |
598 | | //===----------------------------------------------------------------------===// |
599 | | |
600 | 170k | static bool CantUseSP(const MachineFrameInfo &MFI) { |
601 | 165k | return MFI.hasVarSizedObjects() || MFI.hasOpaqueSPAdjustment(); |
602 | 170k | } |
603 | | |
604 | 467k | bool X86RegisterInfo::hasBasePointer(const MachineFunction &MF) const { |
605 | 467k | const MachineFrameInfo &MFI = MF.getFrameInfo(); |
606 | 467k | |
607 | 467k | if (!EnableBasePointer) |
608 | 66 | return false; |
609 | 467k | |
610 | 467k | // When we need stack realignment, we can't address the stack from the frame |
611 | 467k | // pointer. When we have dynamic allocas or stack-adjusting inline asm, we |
612 | 467k | // can't address variables from the stack pointer. MS inline asm can |
613 | 467k | // reference locals while also adjusting the stack pointer. When we can't |
614 | 467k | // use both the SP and the FP, we need a separate base pointer register. |
615 | 467k | bool CantUseFP = needsStackRealignment(MF); |
616 | 41.5k | return CantUseFP && CantUseSP(MFI); |
617 | 467k | } |
618 | | |
619 | 156k | bool X86RegisterInfo::canRealignStack(const MachineFunction &MF) const { |
620 | 156k | if (!TargetRegisterInfo::canRealignStack(MF)) |
621 | 0 | return false; |
622 | 156k | |
623 | 156k | const MachineFrameInfo &MFI = MF.getFrameInfo(); |
624 | 156k | const MachineRegisterInfo *MRI = &MF.getRegInfo(); |
625 | 156k | |
626 | 156k | // Stack realignment requires a frame pointer. If we already started |
627 | 156k | // register allocation with frame pointer elimination, it is too late now. |
628 | 156k | if (!MRI->canReserveReg(FramePtr)) |
629 | 27.1k | return false; |
630 | 128k | |
631 | 128k | // If a base pointer is necessary. Check that it isn't too late to reserve |
632 | 128k | // it. |
633 | 128k | if (128k CantUseSP(MFI)128k ) |
634 | 3.78k | return MRI->canReserveReg(BasePtr); |
635 | 125k | return true; |
636 | 125k | } |
637 | | |
638 | | bool X86RegisterInfo::hasReservedSpillSlot(const MachineFunction &MF, |
639 | 0 | unsigned Reg, int &FrameIdx) const { |
640 | 0 | // Since X86 defines assignCalleeSavedSpillSlots which always return true |
641 | 0 | // this function neither used nor tested. |
642 | 0 | llvm_unreachable("Unused function on X86. Otherwise need a test case."); |
643 | 0 | } |
644 | | |
645 | | // tryOptimizeLEAtoMOV - helper function that tries to replace a LEA instruction |
646 | | // of the form 'lea (%esp), %ebx' --> 'mov %esp, %ebx'. |
647 | | // TODO: In this case we should be really trying first to entirely eliminate |
648 | | // this instruction which is a plain copy. |
649 | 5.79k | static bool tryOptimizeLEAtoMOV(MachineBasicBlock::iterator II) { |
650 | 5.79k | MachineInstr &MI = *II; |
651 | 5.79k | unsigned Opc = II->getOpcode(); |
652 | 5.79k | // Check if this is a LEA of the form 'lea (%esp), %ebx' |
653 | 5.79k | if ((Opc != X86::LEA32r && 5.79k Opc != X86::LEA64r5.67k && Opc != X86::LEA64_32r5.41k ) || |
654 | 391 | MI.getOperand(2).getImm() != 1 || |
655 | 385 | MI.getOperand(3).getReg() != X86::NoRegister || |
656 | 385 | MI.getOperand(4).getImm() != 0 || |
657 | 385 | MI.getOperand(5).getReg() != X86::NoRegister) |
658 | 5.41k | return false; |
659 | 385 | unsigned BasePtr = MI.getOperand(1).getReg(); |
660 | 385 | // In X32 mode, ensure the base-pointer is a 32-bit operand, so the LEA will |
661 | 385 | // be replaced with a 32-bit operand MOV which will zero extend the upper |
662 | 385 | // 32-bits of the super register. |
663 | 385 | if (Opc == X86::LEA64_32r) |
664 | 9 | BasePtr = getX86SubSuperRegister(BasePtr, 32); |
665 | 5.79k | unsigned NewDestReg = MI.getOperand(0).getReg(); |
666 | 5.79k | const X86InstrInfo *TII = |
667 | 5.79k | MI.getParent()->getParent()->getSubtarget<X86Subtarget>().getInstrInfo(); |
668 | 5.79k | TII->copyPhysReg(*MI.getParent(), II, MI.getDebugLoc(), NewDestReg, BasePtr, |
669 | 5.79k | MI.getOperand(1).isKill()); |
670 | 5.79k | MI.eraseFromParent(); |
671 | 5.79k | return true; |
672 | 5.79k | } |
673 | | |
674 | | void |
675 | | X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, |
676 | | int SPAdj, unsigned FIOperandNum, |
677 | 84.0k | RegScavenger *RS) const { |
678 | 84.0k | MachineInstr &MI = *II; |
679 | 84.0k | MachineFunction &MF = *MI.getParent()->getParent(); |
680 | 84.0k | const X86FrameLowering *TFI = getFrameLowering(MF); |
681 | 84.0k | int FrameIndex = MI.getOperand(FIOperandNum).getIndex(); |
682 | 84.0k | |
683 | 84.0k | // Determine base register and offset. |
684 | 84.0k | int FIOffset; |
685 | 84.0k | unsigned BasePtr; |
686 | 84.0k | if (MI.isReturn()84.0k ) { |
687 | 7 | assert((!needsStackRealignment(MF) || |
688 | 7 | MF.getFrameInfo().isFixedObjectIndex(FrameIndex)) && |
689 | 7 | "Return instruction can only reference SP relative frame objects"); |
690 | 7 | FIOffset = TFI->getFrameIndexReferenceSP(MF, FrameIndex, BasePtr, 0); |
691 | 84.0k | } else { |
692 | 84.0k | FIOffset = TFI->getFrameIndexReference(MF, FrameIndex, BasePtr); |
693 | 84.0k | } |
694 | 84.0k | |
695 | 84.0k | // LOCAL_ESCAPE uses a single offset, with no register. It only works in the |
696 | 84.0k | // simple FP case, and doesn't work with stack realignment. On 32-bit, the |
697 | 84.0k | // offset is from the traditional base pointer location. On 64-bit, the |
698 | 84.0k | // offset is from the SP at the end of the prologue, not the FP location. This |
699 | 84.0k | // matches the behavior of llvm.frameaddress. |
700 | 84.0k | unsigned Opc = MI.getOpcode(); |
701 | 84.0k | if (Opc == TargetOpcode::LOCAL_ESCAPE84.0k ) { |
702 | 16 | MachineOperand &FI = MI.getOperand(FIOperandNum); |
703 | 16 | FI.ChangeToImmediate(FIOffset); |
704 | 16 | return; |
705 | 16 | } |
706 | 83.9k | |
707 | 83.9k | // For LEA64_32r when BasePtr is 32-bits (X32) we can use full-size 64-bit |
708 | 83.9k | // register as source operand, semantic is the same and destination is |
709 | 83.9k | // 32-bits. It saves one byte per lea in code since 0x67 prefix is avoided. |
710 | 83.9k | // Don't change BasePtr since it is used later for stack adjustment. |
711 | 83.9k | unsigned MachineBasePtr = BasePtr; |
712 | 83.9k | if (Opc == X86::LEA64_32r && 83.9k X86::GR32RegClass.contains(BasePtr)45 ) |
713 | 42 | MachineBasePtr = getX86SubSuperRegister(BasePtr, 64); |
714 | 83.9k | |
715 | 83.9k | // This must be part of a four operand memory reference. Replace the |
716 | 83.9k | // FrameIndex with base register. Add an offset to the offset. |
717 | 83.9k | MI.getOperand(FIOperandNum).ChangeToRegister(MachineBasePtr, false); |
718 | 83.9k | |
719 | 83.9k | if (BasePtr == StackPtr) |
720 | 55.9k | FIOffset += SPAdj; |
721 | 83.9k | |
722 | 83.9k | // The frame index format for stackmaps and patchpoints is different from the |
723 | 83.9k | // X86 format. It only has a FI and an offset. |
724 | 83.9k | if (Opc == TargetOpcode::STACKMAP || 83.9k Opc == TargetOpcode::PATCHPOINT83.9k ) { |
725 | 65 | assert(BasePtr == FramePtr && "Expected the FP as base register"); |
726 | 65 | int64_t Offset = MI.getOperand(FIOperandNum + 1).getImm() + FIOffset; |
727 | 65 | MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset); |
728 | 65 | return; |
729 | 65 | } |
730 | 83.9k | |
731 | 83.9k | if (83.9k MI.getOperand(FIOperandNum+3).isImm()83.9k ) { |
732 | 83.9k | // Offset is a 32-bit integer. |
733 | 83.9k | int Imm = (int)(MI.getOperand(FIOperandNum + 3).getImm()); |
734 | 83.9k | int Offset = FIOffset + Imm; |
735 | 83.9k | assert((!Is64Bit || isInt<32>((long long)FIOffset + Imm)) && |
736 | 83.9k | "Requesting 64-bit offset in 32-bit immediate!"); |
737 | 83.9k | if (Offset != 0 || 83.9k !tryOptimizeLEAtoMOV(II)5.79k ) |
738 | 83.5k | MI.getOperand(FIOperandNum + 3).ChangeToImmediate(Offset); |
739 | 83.9k | } else { |
740 | 2 | // Offset is symbolic. This is extremely rare. |
741 | 2 | uint64_t Offset = FIOffset + |
742 | 2 | (uint64_t)MI.getOperand(FIOperandNum+3).getOffset(); |
743 | 2 | MI.getOperand(FIOperandNum + 3).setOffset(Offset); |
744 | 2 | } |
745 | 84.0k | } |
746 | | |
747 | 226k | unsigned X86RegisterInfo::getFrameRegister(const MachineFunction &MF) const { |
748 | 226k | const X86FrameLowering *TFI = getFrameLowering(MF); |
749 | 226k | return TFI->hasFP(MF) ? FramePtr49.9k : StackPtr176k ; |
750 | 226k | } |
751 | | |
752 | | unsigned |
753 | 44 | X86RegisterInfo::getPtrSizedFrameRegister(const MachineFunction &MF) const { |
754 | 44 | const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>(); |
755 | 44 | unsigned FrameReg = getFrameRegister(MF); |
756 | 44 | if (Subtarget.isTarget64BitILP32()) |
757 | 8 | FrameReg = getX86SubSuperRegister(FrameReg, 32); |
758 | 44 | return FrameReg; |
759 | 44 | } |