/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/lib/Target/X86/X86MCInstLower.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===-- X86MCInstLower.cpp - Convert X86 MachineInstr to an MCInst --------===// |
2 | | // |
3 | | // The LLVM Compiler Infrastructure |
4 | | // |
5 | | // This file is distributed under the University of Illinois Open Source |
6 | | // License. See LICENSE.TXT for details. |
7 | | // |
8 | | //===----------------------------------------------------------------------===// |
9 | | // |
10 | | // This file contains code to lower X86 MachineInstrs to their corresponding |
11 | | // MCInst records. |
12 | | // |
13 | | //===----------------------------------------------------------------------===// |
14 | | |
15 | | #include "InstPrinter/X86ATTInstPrinter.h" |
16 | | #include "InstPrinter/X86InstComments.h" |
17 | | #include "MCTargetDesc/X86BaseInfo.h" |
18 | | #include "Utils/X86ShuffleDecode.h" |
19 | | #include "X86AsmPrinter.h" |
20 | | #include "X86RegisterInfo.h" |
21 | | #include "X86ShuffleDecodeConstantPool.h" |
22 | | #include "llvm/ADT/Optional.h" |
23 | | #include "llvm/ADT/SmallString.h" |
24 | | #include "llvm/ADT/iterator_range.h" |
25 | | #include "llvm/BinaryFormat/ELF.h" |
26 | | #include "llvm/CodeGen/MachineConstantPool.h" |
27 | | #include "llvm/CodeGen/MachineFunction.h" |
28 | | #include "llvm/CodeGen/MachineModuleInfoImpls.h" |
29 | | #include "llvm/CodeGen/MachineOperand.h" |
30 | | #include "llvm/CodeGen/StackMaps.h" |
31 | | #include "llvm/IR/DataLayout.h" |
32 | | #include "llvm/IR/GlobalValue.h" |
33 | | #include "llvm/IR/Mangler.h" |
34 | | #include "llvm/MC/MCAsmInfo.h" |
35 | | #include "llvm/MC/MCCodeEmitter.h" |
36 | | #include "llvm/MC/MCContext.h" |
37 | | #include "llvm/MC/MCExpr.h" |
38 | | #include "llvm/MC/MCFixup.h" |
39 | | #include "llvm/MC/MCInst.h" |
40 | | #include "llvm/MC/MCInstBuilder.h" |
41 | | #include "llvm/MC/MCSection.h" |
42 | | #include "llvm/MC/MCSectionELF.h" |
43 | | #include "llvm/MC/MCSectionMachO.h" |
44 | | #include "llvm/MC/MCStreamer.h" |
45 | | #include "llvm/MC/MCSymbol.h" |
46 | | #include "llvm/MC/MCSymbolELF.h" |
47 | | #include "llvm/Support/TargetRegistry.h" |
48 | | #include "llvm/Target/TargetLoweringObjectFile.h" |
49 | | |
50 | | using namespace llvm; |
51 | | |
52 | | namespace { |
53 | | |
54 | | /// X86MCInstLower - This class is used to lower an MachineInstr into an MCInst. |
55 | | class X86MCInstLower { |
56 | | MCContext &Ctx; |
57 | | const MachineFunction &MF; |
58 | | const TargetMachine &TM; |
59 | | const MCAsmInfo &MAI; |
60 | | X86AsmPrinter &AsmPrinter; |
61 | | public: |
62 | | X86MCInstLower(const MachineFunction &MF, X86AsmPrinter &asmprinter); |
63 | | |
64 | | Optional<MCOperand> LowerMachineOperand(const MachineInstr *MI, |
65 | | const MachineOperand &MO) const; |
66 | | void Lower(const MachineInstr *MI, MCInst &OutMI) const; |
67 | | |
68 | | MCSymbol *GetSymbolFromOperand(const MachineOperand &MO) const; |
69 | | MCOperand LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const; |
70 | | |
71 | | private: |
72 | | MachineModuleInfoMachO &getMachOMMI() const; |
73 | | }; |
74 | | |
75 | | } // end anonymous namespace |
76 | | |
77 | | // Emit a minimal sequence of nops spanning NumBytes bytes. |
78 | | static void EmitNops(MCStreamer &OS, unsigned NumBytes, bool Is64Bit, |
79 | | const MCSubtargetInfo &STI); |
80 | | |
81 | | void X86AsmPrinter::StackMapShadowTracker::count(MCInst &Inst, |
82 | | const MCSubtargetInfo &STI, |
83 | 845k | MCCodeEmitter *CodeEmitter) { |
84 | 845k | if (InShadow845k ) { |
85 | 8.02k | SmallString<256> Code; |
86 | 8.02k | SmallVector<MCFixup, 4> Fixups; |
87 | 8.02k | raw_svector_ostream VecOS(Code); |
88 | 8.02k | CodeEmitter->encodeInstruction(Inst, VecOS, Fixups, STI); |
89 | 8.02k | CurrentShadowSize += Code.size(); |
90 | 8.02k | if (CurrentShadowSize >= RequiredShadowSize) |
91 | 8.00k | InShadow = false; // The shadow is big enough. Stop counting. |
92 | 8.02k | } |
93 | 845k | } |
94 | | |
95 | | void X86AsmPrinter::StackMapShadowTracker::emitShadowPadding( |
96 | 166k | MCStreamer &OutStreamer, const MCSubtargetInfo &STI) { |
97 | 166k | if (InShadow && 166k CurrentShadowSize < RequiredShadowSize126 ) { |
98 | 73 | InShadow = false; |
99 | 73 | EmitNops(OutStreamer, RequiredShadowSize - CurrentShadowSize, |
100 | 73 | MF->getSubtarget<X86Subtarget>().is64Bit(), STI); |
101 | 73 | } |
102 | 166k | } |
103 | | |
104 | 815k | void X86AsmPrinter::EmitAndCountInstruction(MCInst &Inst) { |
105 | 815k | OutStreamer->EmitInstruction(Inst, getSubtargetInfo(), EnablePrintSchedInfo); |
106 | 815k | SMShadowTracker.count(Inst, getSubtargetInfo(), CodeEmitter.get()); |
107 | 815k | } |
108 | | |
109 | | X86MCInstLower::X86MCInstLower(const MachineFunction &mf, |
110 | | X86AsmPrinter &asmprinter) |
111 | | : Ctx(mf.getContext()), MF(mf), TM(mf.getTarget()), MAI(*TM.getMCAsmInfo()), |
112 | 845k | AsmPrinter(asmprinter) {} |
113 | | |
114 | 1.12k | MachineModuleInfoMachO &X86MCInstLower::getMachOMMI() const { |
115 | 1.12k | return MF.getMMI().getObjFileInfo<MachineModuleInfoMachO>(); |
116 | 1.12k | } |
117 | | |
118 | | |
119 | | /// GetSymbolFromOperand - Lower an MO_GlobalAddress or MO_ExternalSymbol |
120 | | /// operand to an MCSymbol. |
121 | | MCSymbol *X86MCInstLower:: |
122 | 101k | GetSymbolFromOperand(const MachineOperand &MO) const { |
123 | 101k | const DataLayout &DL = MF.getDataLayout(); |
124 | 101k | assert((MO.isGlobal() || MO.isSymbol() || MO.isMBB()) && "Isn't a symbol reference"); |
125 | 101k | |
126 | 101k | MCSymbol *Sym = nullptr; |
127 | 101k | SmallString<128> Name; |
128 | 101k | StringRef Suffix; |
129 | 101k | |
130 | 101k | switch (MO.getTargetFlags()) { |
131 | 61 | case X86II::MO_DLLIMPORT: |
132 | 61 | // Handle dllimport linkage. |
133 | 61 | Name += "__imp_"; |
134 | 61 | break; |
135 | 1.12k | case X86II::MO_DARWIN_NONLAZY: |
136 | 1.12k | case X86II::MO_DARWIN_NONLAZY_PIC_BASE: |
137 | 1.12k | Suffix = "$non_lazy_ptr"; |
138 | 1.12k | break; |
139 | 101k | } |
140 | 101k | |
141 | 101k | if (101k !Suffix.empty()101k ) |
142 | 1.12k | Name += DL.getPrivateGlobalPrefix(); |
143 | 101k | |
144 | 101k | if (MO.isGlobal()101k ) { |
145 | 53.4k | const GlobalValue *GV = MO.getGlobal(); |
146 | 53.4k | AsmPrinter.getNameWithPrefix(Name, GV); |
147 | 101k | } else if (48.4k MO.isSymbol()48.4k ) { |
148 | 2.29k | Mangler::getNameWithPrefix(Name, MO.getSymbolName(), DL); |
149 | 48.4k | } else if (46.1k MO.isMBB()46.1k ) { |
150 | 46.1k | assert(Suffix.empty()); |
151 | 46.1k | Sym = MO.getMBB()->getSymbol(); |
152 | 46.1k | } |
153 | 101k | |
154 | 101k | Name += Suffix; |
155 | 101k | if (!Sym) |
156 | 55.7k | Sym = Ctx.getOrCreateSymbol(Name); |
157 | 101k | |
158 | 101k | // If the target flags on the operand changes the name of the symbol, do that |
159 | 101k | // before we return the symbol. |
160 | 101k | switch (MO.getTargetFlags()) { |
161 | 100k | default: break; |
162 | 1.12k | case X86II::MO_DARWIN_NONLAZY: |
163 | 1.12k | case X86II::MO_DARWIN_NONLAZY_PIC_BASE: { |
164 | 1.12k | MachineModuleInfoImpl::StubValueTy &StubSym = |
165 | 1.12k | getMachOMMI().getGVStubEntry(Sym); |
166 | 1.12k | if (!StubSym.getPointer()1.12k ) { |
167 | 388 | assert(MO.isGlobal() && "Extern symbol not handled yet"); |
168 | 388 | StubSym = |
169 | 388 | MachineModuleInfoImpl:: |
170 | 388 | StubValueTy(AsmPrinter.getSymbol(MO.getGlobal()), |
171 | 388 | !MO.getGlobal()->hasInternalLinkage()); |
172 | 388 | } |
173 | 1.12k | break; |
174 | 101k | } |
175 | 101k | } |
176 | 101k | |
177 | 101k | return Sym; |
178 | 101k | } |
179 | | |
180 | | MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO, |
181 | 116k | MCSymbol *Sym) const { |
182 | 116k | // FIXME: We would like an efficient form for this, so we don't have to do a |
183 | 116k | // lot of extra uniquing. |
184 | 116k | const MCExpr *Expr = nullptr; |
185 | 116k | MCSymbolRefExpr::VariantKind RefKind = MCSymbolRefExpr::VK_None; |
186 | 116k | |
187 | 116k | switch (MO.getTargetFlags()) { |
188 | 0 | default: 0 llvm_unreachable0 ("Unknown target flag on GV operand"); |
189 | 104k | case X86II::MO_NO_FLAG: // No flag. |
190 | 104k | // These affect the name of the symbol, not any suffix. |
191 | 104k | case X86II::MO_DARWIN_NONLAZY: |
192 | 104k | case X86II::MO_DLLIMPORT: |
193 | 104k | break; |
194 | 104k | |
195 | 53 | case X86II::MO_TLVP: RefKind = MCSymbolRefExpr::VK_TLVP; break; |
196 | 0 | case X86II::MO_TLVP_PIC_BASE: |
197 | 0 | Expr = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_TLVP, Ctx); |
198 | 0 | // Subtract the pic base. |
199 | 0 | Expr = MCBinaryExpr::createSub(Expr, |
200 | 0 | MCSymbolRefExpr::create(MF.getPICBaseSymbol(), |
201 | 0 | Ctx), |
202 | 0 | Ctx); |
203 | 0 | break; |
204 | 68 | case X86II::MO_SECREL: RefKind = MCSymbolRefExpr::VK_SECREL; break; |
205 | 0 | case X86II::MO_TLSGD: RefKind = MCSymbolRefExpr::VK_TLSGD; break; |
206 | 0 | case X86II::MO_TLSLD: RefKind = MCSymbolRefExpr::VK_TLSLD; break; |
207 | 0 | case X86II::MO_TLSLDM: RefKind = MCSymbolRefExpr::VK_TLSLDM; break; |
208 | 21 | case X86II::MO_GOTTPOFF: RefKind = MCSymbolRefExpr::VK_GOTTPOFF; break; |
209 | 8 | case X86II::MO_INDNTPOFF: RefKind = MCSymbolRefExpr::VK_INDNTPOFF; break; |
210 | 26 | case X86II::MO_TPOFF: RefKind = MCSymbolRefExpr::VK_TPOFF; break; |
211 | 17 | case X86II::MO_DTPOFF: RefKind = MCSymbolRefExpr::VK_DTPOFF; break; |
212 | 24 | case X86II::MO_NTPOFF: RefKind = MCSymbolRefExpr::VK_NTPOFF; break; |
213 | 5 | case X86II::MO_GOTNTPOFF: RefKind = MCSymbolRefExpr::VK_GOTNTPOFF; break; |
214 | 3.32k | case X86II::MO_GOTPCREL: RefKind = MCSymbolRefExpr::VK_GOTPCREL; break; |
215 | 55 | case X86II::MO_GOT: RefKind = MCSymbolRefExpr::VK_GOT; break; |
216 | 76 | case X86II::MO_GOTOFF: RefKind = MCSymbolRefExpr::VK_GOTOFF; break; |
217 | 243 | case X86II::MO_PLT: RefKind = MCSymbolRefExpr::VK_PLT; break; |
218 | 4 | case X86II::MO_ABS8: RefKind = MCSymbolRefExpr::VK_X86_ABS8; break; |
219 | 8.09k | case X86II::MO_PIC_BASE_OFFSET: |
220 | 8.09k | case X86II::MO_DARWIN_NONLAZY_PIC_BASE: |
221 | 8.09k | Expr = MCSymbolRefExpr::create(Sym, Ctx); |
222 | 8.09k | // Subtract the pic base. |
223 | 8.09k | Expr = MCBinaryExpr::createSub(Expr, |
224 | 8.09k | MCSymbolRefExpr::create(MF.getPICBaseSymbol(), Ctx), |
225 | 8.09k | Ctx); |
226 | 8.09k | if (MO.isJTI()8.09k ) { |
227 | 160 | assert(MAI.doesSetDirectiveSuppressReloc()); |
228 | 160 | // If .set directive is supported, use it to reduce the number of |
229 | 160 | // relocations the assembler will generate for differences between |
230 | 160 | // local labels. This is only safe when the symbols are in the same |
231 | 160 | // section so we are restricting it to jumptable references. |
232 | 160 | MCSymbol *Label = Ctx.createTempSymbol(); |
233 | 160 | AsmPrinter.OutStreamer->EmitAssignment(Label, Expr); |
234 | 160 | Expr = MCSymbolRefExpr::create(Label, Ctx); |
235 | 160 | } |
236 | 104k | break; |
237 | 116k | } |
238 | 116k | |
239 | 116k | if (116k !Expr116k ) |
240 | 108k | Expr = MCSymbolRefExpr::create(Sym, RefKind, Ctx); |
241 | 116k | |
242 | 116k | if (!MO.isJTI() && 116k !MO.isMBB()116k && MO.getOffset()70.0k ) |
243 | 2.97k | Expr = MCBinaryExpr::createAdd(Expr, |
244 | 2.97k | MCConstantExpr::create(MO.getOffset(), Ctx), |
245 | 2.97k | Ctx); |
246 | 116k | return MCOperand::createExpr(Expr); |
247 | 116k | } |
248 | | |
249 | | |
250 | | /// \brief Simplify FOO $imm, %{al,ax,eax,rax} to FOO $imm, for instruction with |
251 | | /// a short fixed-register form. |
252 | 12.6k | static void SimplifyShortImmForm(MCInst &Inst, unsigned Opcode) { |
253 | 12.6k | unsigned ImmOp = Inst.getNumOperands() - 1; |
254 | 12.6k | assert(Inst.getOperand(0).isReg() && |
255 | 12.6k | (Inst.getOperand(ImmOp).isImm() || Inst.getOperand(ImmOp).isExpr()) && |
256 | 12.6k | ((Inst.getNumOperands() == 3 && Inst.getOperand(1).isReg() && |
257 | 12.6k | Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg()) || |
258 | 12.6k | Inst.getNumOperands() == 2) && "Unexpected instruction!"); |
259 | 12.6k | |
260 | 12.6k | // Check whether the destination register can be fixed. |
261 | 12.6k | unsigned Reg = Inst.getOperand(0).getReg(); |
262 | 12.6k | if (Reg != X86::AL && 12.6k Reg != X86::AX9.96k && Reg != X86::EAX9.96k && Reg != X86::RAX8.81k ) |
263 | 8.61k | return; |
264 | 3.99k | |
265 | 3.99k | // If so, rewrite the instruction. |
266 | 3.99k | MCOperand Saved = Inst.getOperand(ImmOp); |
267 | 3.99k | Inst = MCInst(); |
268 | 3.99k | Inst.setOpcode(Opcode); |
269 | 3.99k | Inst.addOperand(Saved); |
270 | 3.99k | } |
271 | | |
272 | | /// \brief If a movsx instruction has a shorter encoding for the used register |
273 | | /// simplify the instruction to use it instead. |
274 | 848 | static void SimplifyMOVSX(MCInst &Inst) { |
275 | 848 | unsigned NewOpcode = 0; |
276 | 848 | unsigned Op0 = Inst.getOperand(0).getReg(), Op1 = Inst.getOperand(1).getReg(); |
277 | 848 | switch (Inst.getOpcode()) { |
278 | 0 | default: |
279 | 0 | llvm_unreachable("Unexpected instruction!"); |
280 | 2 | case X86::MOVSX16rr8: // movsbw %al, %ax --> cbtw |
281 | 2 | if (Op0 == X86::AX && 2 Op1 == X86::AL2 ) |
282 | 0 | NewOpcode = X86::CBW; |
283 | 2 | break; |
284 | 292 | case X86::MOVSX32rr16: // movswl %ax, %eax --> cwtl |
285 | 292 | if (Op0 == X86::EAX && 292 Op1 == X86::AX123 ) |
286 | 69 | NewOpcode = X86::CWDE; |
287 | 292 | break; |
288 | 554 | case X86::MOVSX64rr32: // movslq %eax, %rax --> cltq |
289 | 554 | if (Op0 == X86::RAX && 554 Op1 == X86::EAX258 ) |
290 | 153 | NewOpcode = X86::CDQE; |
291 | 554 | break; |
292 | 848 | } |
293 | 848 | |
294 | 848 | if (848 NewOpcode != 0848 ) { |
295 | 222 | Inst = MCInst(); |
296 | 222 | Inst.setOpcode(NewOpcode); |
297 | 222 | } |
298 | 848 | } |
299 | | |
300 | | /// \brief Simplify things like MOV32rm to MOV32o32a. |
301 | | static void SimplifyShortMoveForm(X86AsmPrinter &Printer, MCInst &Inst, |
302 | 61.7k | unsigned Opcode) { |
303 | 61.7k | // Don't make these simplifications in 64-bit mode; other assemblers don't |
304 | 61.7k | // perform them because they make the code larger. |
305 | 61.7k | if (Printer.getSubtarget().is64Bit()) |
306 | 13.8k | return; |
307 | 47.8k | |
308 | 47.8k | bool IsStore = Inst.getOperand(0).isReg() && 47.8k Inst.getOperand(1).isReg()47.8k ; |
309 | 47.8k | unsigned AddrBase = IsStore; |
310 | 47.8k | unsigned RegOp = IsStore ? 030.4k : 517.4k ; |
311 | 47.8k | unsigned AddrOp = AddrBase + 3; |
312 | 47.8k | assert(Inst.getNumOperands() == 6 && Inst.getOperand(RegOp).isReg() && |
313 | 47.8k | Inst.getOperand(AddrBase + X86::AddrBaseReg).isReg() && |
314 | 47.8k | Inst.getOperand(AddrBase + X86::AddrScaleAmt).isImm() && |
315 | 47.8k | Inst.getOperand(AddrBase + X86::AddrIndexReg).isReg() && |
316 | 47.8k | Inst.getOperand(AddrBase + X86::AddrSegmentReg).isReg() && |
317 | 47.8k | (Inst.getOperand(AddrOp).isExpr() || |
318 | 47.8k | Inst.getOperand(AddrOp).isImm()) && |
319 | 47.8k | "Unexpected instruction!"); |
320 | 47.8k | |
321 | 47.8k | // Check whether the destination register can be fixed. |
322 | 47.8k | unsigned Reg = Inst.getOperand(RegOp).getReg(); |
323 | 47.8k | if (Reg != X86::AL && 47.8k Reg != X86::AX47.1k && Reg != X86::EAX46.9k && Reg != X86::RAX29.0k ) |
324 | 29.0k | return; |
325 | 18.8k | |
326 | 18.8k | // Check whether this is an absolute address. |
327 | 18.8k | // FIXME: We know TLVP symbol refs aren't, but there should be a better way |
328 | 18.8k | // to do this here. |
329 | 18.8k | bool Absolute = true; |
330 | 18.8k | if (Inst.getOperand(AddrOp).isExpr()18.8k ) { |
331 | 1.57k | const MCExpr *MCE = Inst.getOperand(AddrOp).getExpr(); |
332 | 1.57k | if (const MCSymbolRefExpr *SRE = dyn_cast<MCSymbolRefExpr>(MCE)) |
333 | 587 | if (587 SRE->getKind() == MCSymbolRefExpr::VK_TLVP587 ) |
334 | 1 | Absolute = false; |
335 | 1.57k | } |
336 | 18.8k | |
337 | 18.8k | if (Absolute && |
338 | 18.8k | (Inst.getOperand(AddrBase + X86::AddrBaseReg).getReg() != 0 || |
339 | 857 | Inst.getOperand(AddrBase + X86::AddrScaleAmt).getImm() != 1 || |
340 | 851 | Inst.getOperand(AddrBase + X86::AddrIndexReg).getReg() != 0)) |
341 | 18.0k | return; |
342 | 852 | |
343 | 852 | // If so, rewrite the instruction. |
344 | 852 | MCOperand Saved = Inst.getOperand(AddrOp); |
345 | 852 | MCOperand Seg = Inst.getOperand(AddrBase + X86::AddrSegmentReg); |
346 | 852 | Inst = MCInst(); |
347 | 852 | Inst.setOpcode(Opcode); |
348 | 852 | Inst.addOperand(Saved); |
349 | 852 | Inst.addOperand(Seg); |
350 | 852 | } |
351 | | |
352 | 231 | static unsigned getRetOpcode(const X86Subtarget &Subtarget) { |
353 | 231 | return Subtarget.is64Bit() ? X86::RETQ151 : X86::RETL80 ; |
354 | 231 | } |
355 | | |
356 | | Optional<MCOperand> |
357 | | X86MCInstLower::LowerMachineOperand(const MachineInstr *MI, |
358 | 3.23M | const MachineOperand &MO) const { |
359 | 3.23M | switch (MO.getType()) { |
360 | 0 | default: |
361 | 0 | MI->print(errs()); |
362 | 0 | llvm_unreachable("unknown operand type"); |
363 | 2.52M | case MachineOperand::MO_Register: |
364 | 2.52M | // Ignore all implicit register operands. |
365 | 2.52M | if (MO.isImplicit()) |
366 | 628k | return None; |
367 | 1.89M | return MCOperand::createReg(MO.getReg()); |
368 | 570k | case MachineOperand::MO_Immediate: |
369 | 570k | return MCOperand::createImm(MO.getImm()); |
370 | 101k | case MachineOperand::MO_MachineBasicBlock: |
371 | 101k | case MachineOperand::MO_GlobalAddress: |
372 | 101k | case MachineOperand::MO_ExternalSymbol: |
373 | 101k | return LowerSymbolOperand(MO, GetSymbolFromOperand(MO)); |
374 | 67 | case MachineOperand::MO_MCSymbol: |
375 | 67 | return LowerSymbolOperand(MO, MO.getMCSymbol()); |
376 | 573 | case MachineOperand::MO_JumpTableIndex: |
377 | 573 | return LowerSymbolOperand(MO, AsmPrinter.GetJTISymbol(MO.getIndex())); |
378 | 14.3k | case MachineOperand::MO_ConstantPoolIndex: |
379 | 14.3k | return LowerSymbolOperand(MO, AsmPrinter.GetCPISymbol(MO.getIndex())); |
380 | 21 | case MachineOperand::MO_BlockAddress: |
381 | 21 | return LowerSymbolOperand( |
382 | 21 | MO, AsmPrinter.GetBlockAddressSymbol(MO.getBlockAddress())); |
383 | 29.3k | case MachineOperand::MO_RegisterMask: |
384 | 29.3k | // Ignore call clobbers. |
385 | 29.3k | return None; |
386 | 0 | } |
387 | 0 | } |
388 | | |
389 | 841k | void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { |
390 | 841k | OutMI.setOpcode(MI->getOpcode()); |
391 | 841k | |
392 | 841k | for (const MachineOperand &MO : MI->operands()) |
393 | 3.23M | if (auto 3.23M MaybeMCOp3.23M = LowerMachineOperand(MI, MO)) |
394 | 2.57M | OutMI.addOperand(MaybeMCOp.getValue()); |
395 | 841k | |
396 | 841k | // Handle a few special cases to eliminate operand modifiers. |
397 | 843k | ReSimplify: |
398 | 843k | switch (OutMI.getOpcode()) { |
399 | 28.9k | case X86::LEA64_32r: |
400 | 28.9k | case X86::LEA64r: |
401 | 28.9k | case X86::LEA16r: |
402 | 28.9k | case X86::LEA32r: |
403 | 28.9k | // LEA should have a segment register, but it must be empty. |
404 | 28.9k | assert(OutMI.getNumOperands() == 1+X86::AddrNumOperands && |
405 | 28.9k | "Unexpected # of LEA operands"); |
406 | 28.9k | assert(OutMI.getOperand(1+X86::AddrSegmentReg).getReg() == 0 && |
407 | 28.9k | "LEA has segment specified!"); |
408 | 28.9k | break; |
409 | 28.9k | |
410 | 28.9k | // Commute operands to get a smaller encoding by using VEX.R instead of VEX.B |
411 | 28.9k | // if one of the registers is extended, but other isn't. |
412 | 1.48k | case X86::VMOVZPQILo2PQIrr: |
413 | 1.48k | case X86::VMOVAPDrr: |
414 | 1.48k | case X86::VMOVAPDYrr: |
415 | 1.48k | case X86::VMOVAPSrr: |
416 | 1.48k | case X86::VMOVAPSYrr: |
417 | 1.48k | case X86::VMOVDQArr: |
418 | 1.48k | case X86::VMOVDQAYrr: |
419 | 1.48k | case X86::VMOVDQUrr: |
420 | 1.48k | case X86::VMOVDQUYrr: |
421 | 1.48k | case X86::VMOVUPDrr: |
422 | 1.48k | case X86::VMOVUPDYrr: |
423 | 1.48k | case X86::VMOVUPSrr: |
424 | 1.48k | case X86::VMOVUPSYrr: { |
425 | 1.48k | if (!X86II::isX86_64ExtendedReg(OutMI.getOperand(0).getReg()) && |
426 | 1.48k | X86II::isX86_64ExtendedReg(OutMI.getOperand(1).getReg())1.46k ) { |
427 | 4 | unsigned NewOpc; |
428 | 4 | switch (OutMI.getOpcode()) { |
429 | 0 | default: 0 llvm_unreachable0 ("Invalid opcode"); |
430 | 0 | case X86::VMOVZPQILo2PQIrr: NewOpc = X86::VMOVPQI2QIrr; break; |
431 | 0 | case X86::VMOVAPDrr: NewOpc = X86::VMOVAPDrr_REV; break; |
432 | 0 | case X86::VMOVAPDYrr: NewOpc = X86::VMOVAPDYrr_REV; break; |
433 | 2 | case X86::VMOVAPSrr: NewOpc = X86::VMOVAPSrr_REV; break; |
434 | 0 | case X86::VMOVAPSYrr: NewOpc = X86::VMOVAPSYrr_REV; break; |
435 | 2 | case X86::VMOVDQArr: NewOpc = X86::VMOVDQArr_REV; break; |
436 | 0 | case X86::VMOVDQAYrr: NewOpc = X86::VMOVDQAYrr_REV; break; |
437 | 0 | case X86::VMOVDQUrr: NewOpc = X86::VMOVDQUrr_REV; break; |
438 | 0 | case X86::VMOVDQUYrr: NewOpc = X86::VMOVDQUYrr_REV; break; |
439 | 0 | case X86::VMOVUPDrr: NewOpc = X86::VMOVUPDrr_REV; break; |
440 | 0 | case X86::VMOVUPDYrr: NewOpc = X86::VMOVUPDYrr_REV; break; |
441 | 0 | case X86::VMOVUPSrr: NewOpc = X86::VMOVUPSrr_REV; break; |
442 | 0 | case X86::VMOVUPSYrr: NewOpc = X86::VMOVUPSYrr_REV; break; |
443 | 4 | } |
444 | 4 | OutMI.setOpcode(NewOpc); |
445 | 4 | } |
446 | 1.48k | break; |
447 | 1.48k | } |
448 | 45 | case X86::VMOVSDrr: |
449 | 45 | case X86::VMOVSSrr: { |
450 | 45 | if (!X86II::isX86_64ExtendedReg(OutMI.getOperand(0).getReg()) && |
451 | 45 | X86II::isX86_64ExtendedReg(OutMI.getOperand(2).getReg())45 ) { |
452 | 0 | unsigned NewOpc; |
453 | 0 | switch (OutMI.getOpcode()) { |
454 | 0 | default: 0 llvm_unreachable0 ("Invalid opcode"); |
455 | 0 | case X86::VMOVSDrr: NewOpc = X86::VMOVSDrr_REV; break; |
456 | 0 | case X86::VMOVSSrr: NewOpc = X86::VMOVSSrr_REV; break; |
457 | 0 | } |
458 | 0 | OutMI.setOpcode(NewOpc); |
459 | 0 | } |
460 | 45 | break; |
461 | 45 | } |
462 | 45 | |
463 | 45 | // TAILJMPr64, CALL64r, CALL64pcrel32 - These instructions have register |
464 | 45 | // inputs modeled as normal uses instead of implicit uses. As such, truncate |
465 | 45 | // off all but the first operand (the callee). FIXME: Change isel. |
466 | 16.6k | case X86::TAILJMPr64: |
467 | 16.6k | case X86::TAILJMPr64_REX: |
468 | 16.6k | case X86::CALL64r: |
469 | 16.6k | case X86::CALL64pcrel32: { |
470 | 16.6k | unsigned Opcode = OutMI.getOpcode(); |
471 | 16.6k | MCOperand Saved = OutMI.getOperand(0); |
472 | 16.6k | OutMI = MCInst(); |
473 | 16.6k | OutMI.setOpcode(Opcode); |
474 | 16.6k | OutMI.addOperand(Saved); |
475 | 16.6k | break; |
476 | 16.6k | } |
477 | 16.6k | |
478 | 6 | case X86::EH_RETURN: |
479 | 6 | case X86::EH_RETURN64: { |
480 | 6 | OutMI = MCInst(); |
481 | 6 | OutMI.setOpcode(getRetOpcode(AsmPrinter.getSubtarget())); |
482 | 6 | break; |
483 | 6 | } |
484 | 6 | |
485 | 31 | case X86::CLEANUPRET: { |
486 | 31 | // Replace CATCHRET with the appropriate RET. |
487 | 31 | OutMI = MCInst(); |
488 | 31 | OutMI.setOpcode(getRetOpcode(AsmPrinter.getSubtarget())); |
489 | 31 | break; |
490 | 6 | } |
491 | 6 | |
492 | 61 | case X86::CATCHRET: { |
493 | 61 | // Replace CATCHRET with the appropriate RET. |
494 | 61 | const X86Subtarget &Subtarget = AsmPrinter.getSubtarget(); |
495 | 61 | unsigned ReturnReg = Subtarget.is64Bit() ? X86::RAX43 : X86::EAX18 ; |
496 | 61 | OutMI = MCInst(); |
497 | 61 | OutMI.setOpcode(getRetOpcode(Subtarget)); |
498 | 61 | OutMI.addOperand(MCOperand::createReg(ReturnReg)); |
499 | 61 | break; |
500 | 6 | } |
501 | 6 | |
502 | 6 | // TAILJMPd, TAILJMPd64, TailJMPd_cc - Lower to the correct jump instruction. |
503 | 0 | { unsigned Opcode; |
504 | 79 | case X86::TAILJMPr: Opcode = X86::JMP32r; goto SetTailJmpOpcode; |
505 | 1.62k | case X86::TAILJMPd: |
506 | 1.62k | case X86::TAILJMPd64: Opcode = X86::JMP_1; goto SetTailJmpOpcode; |
507 | 26 | case X86::TAILJMPd_CC: |
508 | 26 | case X86::TAILJMPd64_CC: |
509 | 26 | Opcode = X86::GetCondBranchFromCond( |
510 | 26 | static_cast<X86::CondCode>(MI->getOperand(1).getImm())); |
511 | 26 | goto SetTailJmpOpcode; |
512 | 26 | |
513 | 1.72k | SetTailJmpOpcode: |
514 | 1.72k | MCOperand Saved = OutMI.getOperand(0); |
515 | 1.72k | OutMI = MCInst(); |
516 | 1.72k | OutMI.setOpcode(Opcode); |
517 | 1.72k | OutMI.addOperand(Saved); |
518 | 1.72k | break; |
519 | 26 | } |
520 | 26 | |
521 | 2.68k | case X86::DEC16r: |
522 | 2.68k | case X86::DEC32r: |
523 | 2.68k | case X86::INC16r: |
524 | 2.68k | case X86::INC32r: |
525 | 2.68k | // If we aren't in 64-bit mode we can use the 1-byte inc/dec instructions. |
526 | 2.68k | if (!AsmPrinter.getSubtarget().is64Bit()2.68k ) { |
527 | 1.91k | unsigned Opcode; |
528 | 1.91k | switch (OutMI.getOpcode()) { |
529 | 0 | default: 0 llvm_unreachable0 ("Invalid opcode"); |
530 | 0 | case X86::DEC16r: Opcode = X86::DEC16r_alt; break; |
531 | 649 | case X86::DEC32r: Opcode = X86::DEC32r_alt; break; |
532 | 0 | case X86::INC16r: Opcode = X86::INC16r_alt; break; |
533 | 1.26k | case X86::INC32r: Opcode = X86::INC32r_alt; break; |
534 | 1.91k | } |
535 | 1.91k | OutMI.setOpcode(Opcode); |
536 | 1.91k | } |
537 | 2.68k | break; |
538 | 2.68k | |
539 | 2.68k | // These are pseudo-ops for OR to help with the OR->ADD transformation. We do |
540 | 2.68k | // this with an ugly goto in case the resultant OR uses EAX and needs the |
541 | 2.68k | // short form. |
542 | 0 | case X86::ADD16rr_DB: OutMI.setOpcode(X86::OR16rr); goto ReSimplify; |
543 | 397 | case X86::ADD32rr_DB: OutMI.setOpcode(X86::OR32rr); goto ReSimplify; |
544 | 491 | case X86::ADD64rr_DB: OutMI.setOpcode(X86::OR64rr); goto ReSimplify; |
545 | 0 | case X86::ADD16ri_DB: OutMI.setOpcode(X86::OR16ri); goto ReSimplify; |
546 | 152 | case X86::ADD32ri_DB: OutMI.setOpcode(X86::OR32ri); goto ReSimplify; |
547 | 7 | case X86::ADD64ri32_DB: OutMI.setOpcode(X86::OR64ri32); goto ReSimplify; |
548 | 0 | case X86::ADD16ri8_DB: OutMI.setOpcode(X86::OR16ri8); goto ReSimplify; |
549 | 28 | case X86::ADD32ri8_DB: OutMI.setOpcode(X86::OR32ri8); goto ReSimplify; |
550 | 3 | case X86::ADD64ri8_DB: OutMI.setOpcode(X86::OR64ri8); goto ReSimplify; |
551 | 2.68k | |
552 | 2.68k | // Atomic load and store require a separate pseudo-inst because Acquire |
553 | 2.68k | // implies mayStore and Release implies mayLoad; fix these to regular MOV |
554 | 2.68k | // instructions here |
555 | 12 | case X86::ACQUIRE_MOV8rm: OutMI.setOpcode(X86::MOV8rm); goto ReSimplify; |
556 | 53 | case X86::ACQUIRE_MOV16rm: OutMI.setOpcode(X86::MOV16rm); goto ReSimplify; |
557 | 89 | case X86::ACQUIRE_MOV32rm: OutMI.setOpcode(X86::MOV32rm); goto ReSimplify; |
558 | 23 | case X86::ACQUIRE_MOV64rm: OutMI.setOpcode(X86::MOV64rm); goto ReSimplify; |
559 | 12 | case X86::RELEASE_MOV8mr: OutMI.setOpcode(X86::MOV8mr); goto ReSimplify; |
560 | 53 | case X86::RELEASE_MOV16mr: OutMI.setOpcode(X86::MOV16mr); goto ReSimplify; |
561 | 44 | case X86::RELEASE_MOV32mr: OutMI.setOpcode(X86::MOV32mr); goto ReSimplify; |
562 | 24 | case X86::RELEASE_MOV64mr: OutMI.setOpcode(X86::MOV64mr); goto ReSimplify; |
563 | 15 | case X86::RELEASE_MOV8mi: OutMI.setOpcode(X86::MOV8mi); goto ReSimplify; |
564 | 3 | case X86::RELEASE_MOV16mi: OutMI.setOpcode(X86::MOV16mi); goto ReSimplify; |
565 | 3 | case X86::RELEASE_MOV32mi: OutMI.setOpcode(X86::MOV32mi); goto ReSimplify; |
566 | 2 | case X86::RELEASE_MOV64mi32: OutMI.setOpcode(X86::MOV64mi32); goto ReSimplify; |
567 | 7 | case X86::RELEASE_ADD8mi: OutMI.setOpcode(X86::ADD8mi); goto ReSimplify; |
568 | 3 | case X86::RELEASE_ADD8mr: OutMI.setOpcode(X86::ADD8mr); goto ReSimplify; |
569 | 5 | case X86::RELEASE_ADD32mi: OutMI.setOpcode(X86::ADD32mi); goto ReSimplify; |
570 | 3 | case X86::RELEASE_ADD32mr: OutMI.setOpcode(X86::ADD32mr); goto ReSimplify; |
571 | 4 | case X86::RELEASE_ADD64mi32: OutMI.setOpcode(X86::ADD64mi32); goto ReSimplify; |
572 | 2 | case X86::RELEASE_ADD64mr: OutMI.setOpcode(X86::ADD64mr); goto ReSimplify; |
573 | 3 | case X86::RELEASE_AND8mi: OutMI.setOpcode(X86::AND8mi); goto ReSimplify; |
574 | 3 | case X86::RELEASE_AND8mr: OutMI.setOpcode(X86::AND8mr); goto ReSimplify; |
575 | 3 | case X86::RELEASE_AND32mi: OutMI.setOpcode(X86::AND32mi); goto ReSimplify; |
576 | 3 | case X86::RELEASE_AND32mr: OutMI.setOpcode(X86::AND32mr); goto ReSimplify; |
577 | 2 | case X86::RELEASE_AND64mi32: OutMI.setOpcode(X86::AND64mi32); goto ReSimplify; |
578 | 2 | case X86::RELEASE_AND64mr: OutMI.setOpcode(X86::AND64mr); goto ReSimplify; |
579 | 3 | case X86::RELEASE_OR8mi: OutMI.setOpcode(X86::OR8mi); goto ReSimplify; |
580 | 3 | case X86::RELEASE_OR8mr: OutMI.setOpcode(X86::OR8mr); goto ReSimplify; |
581 | 3 | case X86::RELEASE_OR32mi: OutMI.setOpcode(X86::OR32mi); goto ReSimplify; |
582 | 3 | case X86::RELEASE_OR32mr: OutMI.setOpcode(X86::OR32mr); goto ReSimplify; |
583 | 2 | case X86::RELEASE_OR64mi32: OutMI.setOpcode(X86::OR64mi32); goto ReSimplify; |
584 | 2 | case X86::RELEASE_OR64mr: OutMI.setOpcode(X86::OR64mr); goto ReSimplify; |
585 | 3 | case X86::RELEASE_XOR8mi: OutMI.setOpcode(X86::XOR8mi); goto ReSimplify; |
586 | 3 | case X86::RELEASE_XOR8mr: OutMI.setOpcode(X86::XOR8mr); goto ReSimplify; |
587 | 3 | case X86::RELEASE_XOR32mi: OutMI.setOpcode(X86::XOR32mi); goto ReSimplify; |
588 | 3 | case X86::RELEASE_XOR32mr: OutMI.setOpcode(X86::XOR32mr); goto ReSimplify; |
589 | 2 | case X86::RELEASE_XOR64mi32: OutMI.setOpcode(X86::XOR64mi32); goto ReSimplify; |
590 | 2 | case X86::RELEASE_XOR64mr: OutMI.setOpcode(X86::XOR64mr); goto ReSimplify; |
591 | 4 | case X86::RELEASE_INC8m: OutMI.setOpcode(X86::INC8m); goto ReSimplify; |
592 | 0 | case X86::RELEASE_INC16m: OutMI.setOpcode(X86::INC16m); goto ReSimplify; |
593 | 2 | case X86::RELEASE_INC32m: OutMI.setOpcode(X86::INC32m); goto ReSimplify; |
594 | 1 | case X86::RELEASE_INC64m: OutMI.setOpcode(X86::INC64m); goto ReSimplify; |
595 | 2 | case X86::RELEASE_DEC8m: OutMI.setOpcode(X86::DEC8m); goto ReSimplify; |
596 | 0 | case X86::RELEASE_DEC16m: OutMI.setOpcode(X86::DEC16m); goto ReSimplify; |
597 | 2 | case X86::RELEASE_DEC32m: OutMI.setOpcode(X86::DEC32m); goto ReSimplify; |
598 | 1 | case X86::RELEASE_DEC64m: OutMI.setOpcode(X86::DEC64m); goto ReSimplify; |
599 | 2.68k | |
600 | 2.68k | // We don't currently select the correct instruction form for instructions |
601 | 2.68k | // which have a short %eax, etc. form. Handle this by custom lowering, for |
602 | 2.68k | // now. |
603 | 2.68k | // |
604 | 2.68k | // Note, we are currently not handling the following instructions: |
605 | 2.68k | // MOV64ao8, MOV64o8a |
606 | 2.68k | // XCHG16ar, XCHG32ar, XCHG64ar |
607 | 61.7k | case X86::MOV8mr_NOREX: |
608 | 61.7k | case X86::MOV8mr: |
609 | 61.7k | case X86::MOV8rm_NOREX: |
610 | 61.7k | case X86::MOV8rm: |
611 | 61.7k | case X86::MOV16mr: |
612 | 61.7k | case X86::MOV16rm: |
613 | 61.7k | case X86::MOV32mr: |
614 | 61.7k | case X86::MOV32rm: { |
615 | 61.7k | unsigned NewOpc; |
616 | 61.7k | switch (OutMI.getOpcode()) { |
617 | 0 | default: 0 llvm_unreachable0 ("Invalid opcode"); |
618 | 4.63k | case X86::MOV8mr_NOREX: |
619 | 4.63k | case X86::MOV8mr: NewOpc = X86::MOV8o32a; break; |
620 | 2.65k | case X86::MOV8rm_NOREX: |
621 | 2.65k | case X86::MOV8rm: NewOpc = X86::MOV8ao32; break; |
622 | 1.49k | case X86::MOV16mr: NewOpc = X86::MOV16o32a; break; |
623 | 141 | case X86::MOV16rm: NewOpc = X86::MOV16ao32; break; |
624 | 19.2k | case X86::MOV32mr: NewOpc = X86::MOV32o32a; break; |
625 | 33.5k | case X86::MOV32rm: NewOpc = X86::MOV32ao32; break; |
626 | 61.7k | } |
627 | 61.7k | SimplifyShortMoveForm(AsmPrinter, OutMI, NewOpc); |
628 | 61.7k | break; |
629 | 61.7k | } |
630 | 61.7k | |
631 | 12.6k | case X86::ADC8ri: 12.6k case X86::ADC16ri: 12.6k case X86::ADC32ri: 12.6k case X86::ADC64ri32: |
632 | 12.6k | case X86::ADD8ri: 12.6k case X86::ADD16ri: 12.6k case X86::ADD32ri: 12.6k case X86::ADD64ri32: |
633 | 12.6k | case X86::AND8ri: 12.6k case X86::AND16ri: 12.6k case X86::AND32ri: 12.6k case X86::AND64ri32: |
634 | 12.6k | case X86::CMP8ri: 12.6k case X86::CMP16ri: 12.6k case X86::CMP32ri: 12.6k case X86::CMP64ri32: |
635 | 12.6k | case X86::OR8ri: 12.6k case X86::OR16ri: 12.6k case X86::OR32ri: 12.6k case X86::OR64ri32: |
636 | 12.6k | case X86::SBB8ri: 12.6k case X86::SBB16ri: 12.6k case X86::SBB32ri: 12.6k case X86::SBB64ri32: |
637 | 12.6k | case X86::SUB8ri: 12.6k case X86::SUB16ri: 12.6k case X86::SUB32ri: 12.6k case X86::SUB64ri32: |
638 | 12.6k | case X86::TEST8ri:12.6k case X86::TEST16ri:12.6k case X86::TEST32ri:12.6k case X86::TEST64ri32: |
639 | 12.6k | case X86::XOR8ri: 12.6k case X86::XOR16ri: 12.6k case X86::XOR32ri: 12.6k case X86::XOR64ri32: { |
640 | 12.6k | unsigned NewOpc; |
641 | 12.6k | switch (OutMI.getOpcode()) { |
642 | 0 | default: 0 llvm_unreachable0 ("Invalid opcode"); |
643 | 5 | case X86::ADC8ri: NewOpc = X86::ADC8i8; break; |
644 | 0 | case X86::ADC16ri: NewOpc = X86::ADC16i16; break; |
645 | 1 | case X86::ADC32ri: NewOpc = X86::ADC32i32; break; |
646 | 0 | case X86::ADC64ri32: NewOpc = X86::ADC64i32; break; |
647 | 174 | case X86::ADD8ri: NewOpc = X86::ADD8i8; break; |
648 | 0 | case X86::ADD16ri: NewOpc = X86::ADD16i16; break; |
649 | 511 | case X86::ADD32ri: NewOpc = X86::ADD32i32; break; |
650 | 765 | case X86::ADD64ri32: NewOpc = X86::ADD64i32; break; |
651 | 2.55k | case X86::AND8ri: NewOpc = X86::AND8i8; break; |
652 | 6 | case X86::AND16ri: NewOpc = X86::AND16i16; break; |
653 | 2.16k | case X86::AND32ri: NewOpc = X86::AND32i32; break; |
654 | 248 | case X86::AND64ri32: NewOpc = X86::AND64i32; break; |
655 | 777 | case X86::CMP8ri: NewOpc = X86::CMP8i8; break; |
656 | 8 | case X86::CMP16ri: NewOpc = X86::CMP16i16; break; |
657 | 1.77k | case X86::CMP32ri: NewOpc = X86::CMP32i32; break; |
658 | 411 | case X86::CMP64ri32: NewOpc = X86::CMP64i32; break; |
659 | 42 | case X86::OR8ri: NewOpc = X86::OR8i8; break; |
660 | 0 | case X86::OR16ri: NewOpc = X86::OR16i16; break; |
661 | 591 | case X86::OR32ri: NewOpc = X86::OR32i32; break; |
662 | 18 | case X86::OR64ri32: NewOpc = X86::OR64i32; break; |
663 | 2 | case X86::SBB8ri: NewOpc = X86::SBB8i8; break; |
664 | 0 | case X86::SBB16ri: NewOpc = X86::SBB16i16; break; |
665 | 18 | case X86::SBB32ri: NewOpc = X86::SBB32i32; break; |
666 | 0 | case X86::SBB64ri32: NewOpc = X86::SBB64i32; break; |
667 | 3 | case X86::SUB8ri: NewOpc = X86::SUB8i8; break; |
668 | 0 | case X86::SUB16ri: NewOpc = X86::SUB16i16; break; |
669 | 215 | case X86::SUB32ri: NewOpc = X86::SUB32i32; break; |
670 | 471 | case X86::SUB64ri32: NewOpc = X86::SUB64i32; break; |
671 | 1.49k | case X86::TEST8ri: NewOpc = X86::TEST8i8; break; |
672 | 11 | case X86::TEST16ri: NewOpc = X86::TEST16i16; break; |
673 | 72 | case X86::TEST32ri: NewOpc = X86::TEST32i32; break; |
674 | 1 | case X86::TEST64ri32: NewOpc = X86::TEST64i32; break; |
675 | 71 | case X86::XOR8ri: NewOpc = X86::XOR8i8; break; |
676 | 0 | case X86::XOR16ri: NewOpc = X86::XOR16i16; break; |
677 | 196 | case X86::XOR32ri: NewOpc = X86::XOR32i32; break; |
678 | 2 | case X86::XOR64ri32: NewOpc = X86::XOR64i32; break; |
679 | 12.6k | } |
680 | 12.6k | SimplifyShortImmForm(OutMI, NewOpc); |
681 | 12.6k | break; |
682 | 12.6k | } |
683 | 12.6k | |
684 | 12.6k | // Try to shrink some forms of movsx. |
685 | 848 | case X86::MOVSX16rr8: |
686 | 848 | case X86::MOVSX32rr16: |
687 | 848 | case X86::MOVSX64rr32: |
688 | 848 | SimplifyMOVSX(OutMI); |
689 | 848 | break; |
690 | 841k | } |
691 | 841k | } |
692 | | |
693 | | void X86AsmPrinter::LowerTlsAddr(X86MCInstLower &MCInstLowering, |
694 | 35 | const MachineInstr &MI) { |
695 | 35 | |
696 | 35 | bool is64Bits = MI.getOpcode() == X86::TLS_addr64 || |
697 | 23 | MI.getOpcode() == X86::TLS_base_addr64; |
698 | 35 | |
699 | 35 | bool needsPadding = MI.getOpcode() == X86::TLS_addr64; |
700 | 35 | |
701 | 35 | MCContext &context = OutStreamer->getContext(); |
702 | 35 | |
703 | 35 | if (needsPadding) |
704 | 12 | EmitAndCountInstruction(MCInstBuilder(X86::DATA16_PREFIX)); |
705 | 35 | |
706 | 35 | MCSymbolRefExpr::VariantKind SRVK; |
707 | 35 | switch (MI.getOpcode()) { |
708 | 21 | case X86::TLS_addr32: |
709 | 21 | case X86::TLS_addr64: |
710 | 21 | SRVK = MCSymbolRefExpr::VK_TLSGD; |
711 | 21 | break; |
712 | 7 | case X86::TLS_base_addr32: |
713 | 7 | SRVK = MCSymbolRefExpr::VK_TLSLDM; |
714 | 7 | break; |
715 | 7 | case X86::TLS_base_addr64: |
716 | 7 | SRVK = MCSymbolRefExpr::VK_TLSLD; |
717 | 7 | break; |
718 | 0 | default: |
719 | 0 | llvm_unreachable("unexpected opcode"); |
720 | 35 | } |
721 | 35 | |
722 | 35 | MCSymbol *sym = MCInstLowering.GetSymbolFromOperand(MI.getOperand(3)); |
723 | 35 | const MCSymbolRefExpr *symRef = MCSymbolRefExpr::create(sym, SRVK, context); |
724 | 35 | |
725 | 35 | MCInst LEA; |
726 | 35 | if (is64Bits35 ) { |
727 | 19 | LEA.setOpcode(X86::LEA64r); |
728 | 19 | LEA.addOperand(MCOperand::createReg(X86::RDI)); // dest |
729 | 19 | LEA.addOperand(MCOperand::createReg(X86::RIP)); // base |
730 | 19 | LEA.addOperand(MCOperand::createImm(1)); // scale |
731 | 19 | LEA.addOperand(MCOperand::createReg(0)); // index |
732 | 19 | LEA.addOperand(MCOperand::createExpr(symRef)); // disp |
733 | 19 | LEA.addOperand(MCOperand::createReg(0)); // seg |
734 | 35 | } else if (16 SRVK == MCSymbolRefExpr::VK_TLSLDM16 ) { |
735 | 7 | LEA.setOpcode(X86::LEA32r); |
736 | 7 | LEA.addOperand(MCOperand::createReg(X86::EAX)); // dest |
737 | 7 | LEA.addOperand(MCOperand::createReg(X86::EBX)); // base |
738 | 7 | LEA.addOperand(MCOperand::createImm(1)); // scale |
739 | 7 | LEA.addOperand(MCOperand::createReg(0)); // index |
740 | 7 | LEA.addOperand(MCOperand::createExpr(symRef)); // disp |
741 | 7 | LEA.addOperand(MCOperand::createReg(0)); // seg |
742 | 16 | } else { |
743 | 9 | LEA.setOpcode(X86::LEA32r); |
744 | 9 | LEA.addOperand(MCOperand::createReg(X86::EAX)); // dest |
745 | 9 | LEA.addOperand(MCOperand::createReg(0)); // base |
746 | 9 | LEA.addOperand(MCOperand::createImm(1)); // scale |
747 | 9 | LEA.addOperand(MCOperand::createReg(X86::EBX)); // index |
748 | 9 | LEA.addOperand(MCOperand::createExpr(symRef)); // disp |
749 | 9 | LEA.addOperand(MCOperand::createReg(0)); // seg |
750 | 9 | } |
751 | 35 | EmitAndCountInstruction(LEA); |
752 | 35 | |
753 | 35 | if (needsPadding35 ) { |
754 | 12 | EmitAndCountInstruction(MCInstBuilder(X86::DATA16_PREFIX)); |
755 | 12 | EmitAndCountInstruction(MCInstBuilder(X86::DATA16_PREFIX)); |
756 | 12 | EmitAndCountInstruction(MCInstBuilder(X86::REX64_PREFIX)); |
757 | 12 | } |
758 | 35 | |
759 | 35 | StringRef name = is64Bits ? "__tls_get_addr"19 : "___tls_get_addr"16 ; |
760 | 35 | MCSymbol *tlsGetAddr = context.getOrCreateSymbol(name); |
761 | 35 | const MCSymbolRefExpr *tlsRef = |
762 | 35 | MCSymbolRefExpr::create(tlsGetAddr, |
763 | 35 | MCSymbolRefExpr::VK_PLT, |
764 | 35 | context); |
765 | 35 | |
766 | 19 | EmitAndCountInstruction(MCInstBuilder(is64Bits ? X86::CALL64pcrel32 |
767 | 16 | : X86::CALLpcrel32) |
768 | 35 | .addExpr(tlsRef)); |
769 | 35 | } |
770 | | |
771 | | /// \brief Emit the largest nop instruction smaller than or equal to \p NumBytes |
772 | | /// bytes. Return the size of nop emitted. |
773 | | static unsigned EmitNop(MCStreamer &OS, unsigned NumBytes, bool Is64Bit, |
774 | 209 | const MCSubtargetInfo &STI) { |
775 | 209 | // This works only for 64bit. For 32bit we have to do additional checking if |
776 | 209 | // the CPU supports multi-byte nops. |
777 | 209 | assert(Is64Bit && "EmitNops only supports X86-64"); |
778 | 209 | |
779 | 209 | unsigned NopSize; |
780 | 209 | unsigned Opc, BaseReg, ScaleVal, IndexReg, Displacement, SegmentReg; |
781 | 209 | Opc = IndexReg = Displacement = SegmentReg = 0; |
782 | 209 | BaseReg = X86::RAX; |
783 | 209 | ScaleVal = 1; |
784 | 209 | switch (NumBytes) { |
785 | 209 | case 0: 0 llvm_unreachable0 ("Zero nops?"); break0 ; |
786 | 4 | case 1: NopSize = 1; Opc = X86::NOOP; break; |
787 | 48 | case 2: NopSize = 2; Opc = X86::XCHG16ar; break; |
788 | 9 | case 3: NopSize = 3; Opc = X86::NOOPL; break; |
789 | 4 | case 4: NopSize = 4; Opc = X86::NOOPL; Displacement = 8; break; |
790 | 10 | case 5: NopSize = 5; Opc = X86::NOOPL; Displacement = 8; |
791 | 10 | IndexReg = X86::RAX; break; |
792 | 4 | case 6: NopSize = 6; Opc = X86::NOOPW; Displacement = 8; |
793 | 4 | IndexReg = X86::RAX; break; |
794 | 5 | case 7: NopSize = 7; Opc = X86::NOOPL; Displacement = 512; break; |
795 | 4 | case 8: NopSize = 8; Opc = X86::NOOPL; Displacement = 512; |
796 | 4 | IndexReg = X86::RAX; break; |
797 | 33 | case 9: NopSize = 9; Opc = X86::NOOPW; Displacement = 512; |
798 | 33 | IndexReg = X86::RAX; break; |
799 | 88 | default: NopSize = 10; Opc = X86::NOOPW; Displacement = 512; |
800 | 88 | IndexReg = X86::RAX; SegmentReg = X86::CS; break; |
801 | 209 | } |
802 | 209 | |
803 | 209 | unsigned NumPrefixes = std::min(NumBytes - NopSize, 5U); |
804 | 209 | NopSize += NumPrefixes; |
805 | 455 | for (unsigned i = 0; i != NumPrefixes455 ; ++i246 ) |
806 | 246 | OS.EmitBytes("\x66"); |
807 | 209 | |
808 | 209 | switch (Opc) { |
809 | 0 | default: |
810 | 0 | llvm_unreachable("Unexpected opcode"); |
811 | 0 | break; |
812 | 4 | case X86::NOOP: |
813 | 4 | OS.EmitInstruction(MCInstBuilder(Opc), STI); |
814 | 4 | break; |
815 | 48 | case X86::XCHG16ar: |
816 | 48 | OS.EmitInstruction(MCInstBuilder(Opc).addReg(X86::AX), STI); |
817 | 48 | break; |
818 | 157 | case X86::NOOPL: |
819 | 157 | case X86::NOOPW: |
820 | 157 | OS.EmitInstruction(MCInstBuilder(Opc) |
821 | 157 | .addReg(BaseReg) |
822 | 157 | .addImm(ScaleVal) |
823 | 157 | .addReg(IndexReg) |
824 | 157 | .addImm(Displacement) |
825 | 157 | .addReg(SegmentReg), |
826 | 157 | STI); |
827 | 157 | break; |
828 | 209 | } |
829 | 209 | assert(NopSize <= NumBytes && "We overemitted?"); |
830 | 209 | return NopSize; |
831 | 209 | } |
832 | | |
833 | | /// \brief Emit the optimal amount of multi-byte nops on X86. |
834 | | static void EmitNops(MCStreamer &OS, unsigned NumBytes, bool Is64Bit, |
835 | 192 | const MCSubtargetInfo &STI) { |
836 | 192 | unsigned NopsToEmit = NumBytes; |
837 | 192 | (void)NopsToEmit; |
838 | 397 | while (NumBytes397 ) { |
839 | 205 | NumBytes -= EmitNop(OS, NumBytes, Is64Bit, STI); |
840 | 205 | assert(NopsToEmit >= NumBytes && "Emitted more than I asked for!"); |
841 | 205 | } |
842 | 192 | } |
843 | | |
844 | | void X86AsmPrinter::LowerSTATEPOINT(const MachineInstr &MI, |
845 | 69 | X86MCInstLower &MCIL) { |
846 | 69 | assert(Subtarget->is64Bit() && "Statepoint currently only supports X86-64"); |
847 | 69 | |
848 | 69 | StatepointOpers SOpers(&MI); |
849 | 69 | if (unsigned PatchBytes69 = SOpers.getNumPatchBytes()) { |
850 | 1 | EmitNops(*OutStreamer, PatchBytes, Subtarget->is64Bit(), |
851 | 1 | getSubtargetInfo()); |
852 | 69 | } else { |
853 | 68 | // Lower call target and choose correct opcode |
854 | 68 | const MachineOperand &CallTarget = SOpers.getCallTarget(); |
855 | 68 | MCOperand CallTargetMCOp; |
856 | 68 | unsigned CallOpcode; |
857 | 68 | switch (CallTarget.getType()) { |
858 | 57 | case MachineOperand::MO_GlobalAddress: |
859 | 57 | case MachineOperand::MO_ExternalSymbol: |
860 | 57 | CallTargetMCOp = MCIL.LowerSymbolOperand( |
861 | 57 | CallTarget, MCIL.GetSymbolFromOperand(CallTarget)); |
862 | 57 | CallOpcode = X86::CALL64pcrel32; |
863 | 57 | // Currently, we only support relative addressing with statepoints. |
864 | 57 | // Otherwise, we'll need a scratch register to hold the target |
865 | 57 | // address. You'll fail asserts during load & relocation if this |
866 | 57 | // symbol is to far away. (TODO: support non-relative addressing) |
867 | 57 | break; |
868 | 0 | case MachineOperand::MO_Immediate: |
869 | 0 | CallTargetMCOp = MCOperand::createImm(CallTarget.getImm()); |
870 | 0 | CallOpcode = X86::CALL64pcrel32; |
871 | 0 | // Currently, we only support relative addressing with statepoints. |
872 | 0 | // Otherwise, we'll need a scratch register to hold the target |
873 | 0 | // immediate. You'll fail asserts during load & relocation if this |
874 | 0 | // address is to far away. (TODO: support non-relative addressing) |
875 | 0 | break; |
876 | 11 | case MachineOperand::MO_Register: |
877 | 11 | CallTargetMCOp = MCOperand::createReg(CallTarget.getReg()); |
878 | 11 | CallOpcode = X86::CALL64r; |
879 | 11 | break; |
880 | 0 | default: |
881 | 0 | llvm_unreachable("Unsupported operand type in statepoint call target"); |
882 | 0 | break; |
883 | 68 | } |
884 | 68 | |
885 | 68 | // Emit call |
886 | 68 | MCInst CallInst; |
887 | 68 | CallInst.setOpcode(CallOpcode); |
888 | 68 | CallInst.addOperand(CallTargetMCOp); |
889 | 68 | OutStreamer->EmitInstruction(CallInst, getSubtargetInfo()); |
890 | 68 | } |
891 | 69 | |
892 | 69 | // Record our statepoint node in the same section used by STACKMAP |
893 | 69 | // and PATCHPOINT |
894 | 69 | SM.recordStatepoint(MI); |
895 | 69 | } |
896 | | |
897 | | void X86AsmPrinter::LowerFAULTING_OP(const MachineInstr &FaultingMI, |
898 | 23 | X86MCInstLower &MCIL) { |
899 | 23 | // FAULTING_LOAD_OP <def>, <faltinf type>, <MBB handler>, |
900 | 23 | // <opcode>, <operands> |
901 | 23 | |
902 | 23 | unsigned DefRegister = FaultingMI.getOperand(0).getReg(); |
903 | 23 | FaultMaps::FaultKind FK = |
904 | 23 | static_cast<FaultMaps::FaultKind>(FaultingMI.getOperand(1).getImm()); |
905 | 23 | MCSymbol *HandlerLabel = FaultingMI.getOperand(2).getMBB()->getSymbol(); |
906 | 23 | unsigned Opcode = FaultingMI.getOperand(3).getImm(); |
907 | 23 | unsigned OperandsBeginIdx = 4; |
908 | 23 | |
909 | 23 | assert(FK < FaultMaps::FaultKindMax && "Invalid Faulting Kind!"); |
910 | 23 | FM.recordFaultingOp(FK, HandlerLabel); |
911 | 23 | |
912 | 23 | MCInst MI; |
913 | 23 | MI.setOpcode(Opcode); |
914 | 23 | |
915 | 23 | if (DefRegister != X86::NoRegister) |
916 | 16 | MI.addOperand(MCOperand::createReg(DefRegister)); |
917 | 23 | |
918 | 23 | for (auto I = FaultingMI.operands_begin() + OperandsBeginIdx, |
919 | 23 | E = FaultingMI.operands_end(); |
920 | 158 | I != E158 ; ++I135 ) |
921 | 135 | if (auto 135 MaybeOperand135 = MCIL.LowerMachineOperand(&FaultingMI, *I)) |
922 | 125 | MI.addOperand(MaybeOperand.getValue()); |
923 | 23 | |
924 | 23 | OutStreamer->EmitInstruction(MI, getSubtargetInfo()); |
925 | 23 | } |
926 | | |
927 | | void X86AsmPrinter::LowerFENTRY_CALL(const MachineInstr &MI, |
928 | 2 | X86MCInstLower &MCIL) { |
929 | 2 | bool Is64Bits = Subtarget->is64Bit(); |
930 | 2 | MCContext &Ctx = OutStreamer->getContext(); |
931 | 2 | MCSymbol *fentry = Ctx.getOrCreateSymbol("__fentry__"); |
932 | 2 | const MCSymbolRefExpr *Op = |
933 | 2 | MCSymbolRefExpr::create(fentry, MCSymbolRefExpr::VK_None, Ctx); |
934 | 2 | |
935 | 2 | EmitAndCountInstruction( |
936 | 2 | MCInstBuilder(Is64Bits ? X86::CALL64pcrel322 : X86::CALLpcrel320 ) |
937 | 2 | .addExpr(Op)); |
938 | 2 | } |
939 | | |
940 | | void X86AsmPrinter::LowerPATCHABLE_OP(const MachineInstr &MI, |
941 | 10 | X86MCInstLower &MCIL) { |
942 | 10 | // PATCHABLE_OP minsize, opcode, operands |
943 | 10 | |
944 | 10 | unsigned MinSize = MI.getOperand(0).getImm(); |
945 | 10 | unsigned Opcode = MI.getOperand(1).getImm(); |
946 | 10 | |
947 | 10 | MCInst MCI; |
948 | 10 | MCI.setOpcode(Opcode); |
949 | 10 | for (auto &MO : make_range(MI.operands_begin() + 2, MI.operands_end())) |
950 | 28 | if (auto 28 MaybeOperand28 = MCIL.LowerMachineOperand(&MI, MO)) |
951 | 20 | MCI.addOperand(MaybeOperand.getValue()); |
952 | 10 | |
953 | 10 | SmallString<256> Code; |
954 | 10 | SmallVector<MCFixup, 4> Fixups; |
955 | 10 | raw_svector_ostream VecOS(Code); |
956 | 10 | CodeEmitter->encodeInstruction(MCI, VecOS, Fixups, getSubtargetInfo()); |
957 | 10 | |
958 | 10 | if (Code.size() < MinSize10 ) { |
959 | 6 | if (MinSize == 2 && 6 Opcode == X86::PUSH64r6 ) { |
960 | 2 | // This is an optimization that lets us get away without emitting a nop in |
961 | 2 | // many cases. |
962 | 2 | // |
963 | 2 | // NB! In some cases the encoding for PUSH64r (e.g. PUSH64r %R9) takes two |
964 | 2 | // bytes too, so the check on MinSize is important. |
965 | 2 | MCI.setOpcode(X86::PUSH64rmr); |
966 | 6 | } else { |
967 | 4 | unsigned NopSize = EmitNop(*OutStreamer, MinSize, Subtarget->is64Bit(), |
968 | 4 | getSubtargetInfo()); |
969 | 4 | assert(NopSize == MinSize && "Could not implement MinSize!"); |
970 | 4 | (void) NopSize; |
971 | 4 | } |
972 | 6 | } |
973 | 10 | |
974 | 10 | OutStreamer->EmitInstruction(MCI, getSubtargetInfo()); |
975 | 10 | } |
976 | | |
977 | | // Lower a stackmap of the form: |
978 | | // <id>, <shadowBytes>, ... |
979 | 100 | void X86AsmPrinter::LowerSTACKMAP(const MachineInstr &MI) { |
980 | 100 | SMShadowTracker.emitShadowPadding(*OutStreamer, getSubtargetInfo()); |
981 | 100 | SM.recordStackMap(MI); |
982 | 100 | unsigned NumShadowBytes = MI.getOperand(1).getImm(); |
983 | 100 | SMShadowTracker.reset(NumShadowBytes); |
984 | 100 | } |
985 | | |
986 | | // Lower a patchpoint of the form: |
987 | | // [<def>], <id>, <numBytes>, <target>, <numArgs>, <cc>, ... |
988 | | void X86AsmPrinter::LowerPATCHPOINT(const MachineInstr &MI, |
989 | 67 | X86MCInstLower &MCIL) { |
990 | 67 | assert(Subtarget->is64Bit() && "Patchpoint currently only supports X86-64"); |
991 | 67 | |
992 | 67 | SMShadowTracker.emitShadowPadding(*OutStreamer, getSubtargetInfo()); |
993 | 67 | |
994 | 67 | SM.recordPatchPoint(MI); |
995 | 67 | |
996 | 67 | PatchPointOpers opers(&MI); |
997 | 67 | unsigned ScratchIdx = opers.getNextScratchIdx(); |
998 | 67 | unsigned EncodedBytes = 0; |
999 | 67 | const MachineOperand &CalleeMO = opers.getCallTarget(); |
1000 | 67 | |
1001 | 67 | // Check for null target. If target is non-null (i.e. is non-zero or is |
1002 | 67 | // symbolic) then emit a call. |
1003 | 67 | if (!(CalleeMO.isImm() && 67 !CalleeMO.getImm()65 )) { |
1004 | 38 | MCOperand CalleeMCOp; |
1005 | 38 | switch (CalleeMO.getType()) { |
1006 | 0 | default: |
1007 | 0 | /// FIXME: Add a verifier check for bad callee types. |
1008 | 0 | llvm_unreachable("Unrecognized callee operand type."); |
1009 | 36 | case MachineOperand::MO_Immediate: |
1010 | 36 | if (CalleeMO.getImm()) |
1011 | 36 | CalleeMCOp = MCOperand::createImm(CalleeMO.getImm()); |
1012 | 36 | break; |
1013 | 2 | case MachineOperand::MO_ExternalSymbol: |
1014 | 2 | case MachineOperand::MO_GlobalAddress: |
1015 | 2 | CalleeMCOp = |
1016 | 2 | MCIL.LowerSymbolOperand(CalleeMO, |
1017 | 2 | MCIL.GetSymbolFromOperand(CalleeMO)); |
1018 | 2 | break; |
1019 | 38 | } |
1020 | 38 | |
1021 | 38 | // Emit MOV to materialize the target address and the CALL to target. |
1022 | 38 | // This is encoded with 12-13 bytes, depending on which register is used. |
1023 | 38 | unsigned ScratchReg = MI.getOperand(ScratchIdx).getReg(); |
1024 | 38 | if (X86II::isX86_64ExtendedReg(ScratchReg)) |
1025 | 38 | EncodedBytes = 13; |
1026 | 38 | else |
1027 | 0 | EncodedBytes = 12; |
1028 | 38 | |
1029 | 38 | EmitAndCountInstruction( |
1030 | 38 | MCInstBuilder(X86::MOV64ri).addReg(ScratchReg).addOperand(CalleeMCOp)); |
1031 | 38 | EmitAndCountInstruction(MCInstBuilder(X86::CALL64r).addReg(ScratchReg)); |
1032 | 38 | } |
1033 | 67 | |
1034 | 67 | // Emit padding. |
1035 | 67 | unsigned NumBytes = opers.getNumPatchBytes(); |
1036 | 67 | assert(NumBytes >= EncodedBytes && |
1037 | 67 | "Patchpoint can't request size less than the length of a call."); |
1038 | 67 | |
1039 | 67 | EmitNops(*OutStreamer, NumBytes - EncodedBytes, Subtarget->is64Bit(), |
1040 | 67 | getSubtargetInfo()); |
1041 | 67 | } |
1042 | | |
1043 | | void X86AsmPrinter::LowerPATCHABLE_EVENT_CALL(const MachineInstr &MI, |
1044 | 2 | X86MCInstLower &MCIL) { |
1045 | 2 | assert(Subtarget->is64Bit() && "XRay custom events only supports X86-64"); |
1046 | 2 | |
1047 | 2 | // We want to emit the following pattern, which follows the x86 calling |
1048 | 2 | // convention to prepare for the trampoline call to be patched in. |
1049 | 2 | // |
1050 | 2 | // .p2align 1, ... |
1051 | 2 | // .Lxray_event_sled_N: |
1052 | 2 | // jmp +N // jump across the instrumentation sled |
1053 | 2 | // ... // set up arguments in register |
1054 | 2 | // callq __xray_CustomEvent@plt // force dependency to symbol |
1055 | 2 | // ... |
1056 | 2 | // <jump here> |
1057 | 2 | // |
1058 | 2 | // After patching, it would look something like: |
1059 | 2 | // |
1060 | 2 | // nopw (2-byte nop) |
1061 | 2 | // ... |
1062 | 2 | // callq __xrayCustomEvent // already lowered |
1063 | 2 | // ... |
1064 | 2 | // |
1065 | 2 | // --- |
1066 | 2 | // First we emit the label and the jump. |
1067 | 2 | auto CurSled = OutContext.createTempSymbol("xray_event_sled_", true); |
1068 | 2 | OutStreamer->AddComment("# XRay Custom Event Log"); |
1069 | 2 | OutStreamer->EmitCodeAlignment(2); |
1070 | 2 | OutStreamer->EmitLabel(CurSled); |
1071 | 2 | |
1072 | 2 | // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as |
1073 | 2 | // an operand (computed as an offset from the jmp instruction). |
1074 | 2 | // FIXME: Find another less hacky way do force the relative jump. |
1075 | 2 | OutStreamer->EmitBinaryData("\xeb\x0f"); |
1076 | 2 | |
1077 | 2 | // The default C calling convention will place two arguments into %rcx and |
1078 | 2 | // %rdx -- so we only work with those. |
1079 | 2 | unsigned UsedRegs[] = {X86::RDI, X86::RSI}; |
1080 | 2 | bool UsedMask[] = {false, false}; |
1081 | 2 | |
1082 | 2 | // Then we put the operands in the %rdi and %rsi registers. We spill the |
1083 | 2 | // values in the register before we clobber them, and mark them as used in |
1084 | 2 | // UsedMask. In case the arguments are already in the correct register, we use |
1085 | 2 | // emit nops appropriately sized to keep the sled the same size in every |
1086 | 2 | // situation. |
1087 | 6 | for (unsigned I = 0; I < MI.getNumOperands()6 ; ++I4 ) |
1088 | 4 | if (auto 4 Op4 = MCIL.LowerMachineOperand(&MI, MI.getOperand(I))) { |
1089 | 4 | assert(Op->isReg() && "Only support arguments in registers"); |
1090 | 4 | if (Op->getReg() != UsedRegs[I]4 ) { |
1091 | 4 | UsedMask[I] = true; |
1092 | 4 | EmitAndCountInstruction( |
1093 | 4 | MCInstBuilder(X86::PUSH64r).addReg(UsedRegs[I])); |
1094 | 4 | EmitAndCountInstruction(MCInstBuilder(X86::MOV64rr) |
1095 | 4 | .addReg(UsedRegs[I]) |
1096 | 4 | .addReg(Op->getReg())); |
1097 | 4 | } else { |
1098 | 0 | EmitNops(*OutStreamer, 4, Subtarget->is64Bit(), getSubtargetInfo()); |
1099 | 0 | } |
1100 | 4 | } |
1101 | 2 | |
1102 | 2 | // We emit a hard dependency on the __xray_CustomEvent symbol, which is the |
1103 | 2 | // name of the trampoline to be implemented by the XRay runtime. |
1104 | 2 | auto TSym = OutContext.getOrCreateSymbol("__xray_CustomEvent"); |
1105 | 2 | MachineOperand TOp = MachineOperand::CreateMCSymbol(TSym); |
1106 | 2 | if (isPositionIndependent()) |
1107 | 1 | TOp.setTargetFlags(X86II::MO_PLT); |
1108 | 2 | |
1109 | 2 | // Emit the call instruction. |
1110 | 2 | EmitAndCountInstruction(MCInstBuilder(X86::CALL64pcrel32) |
1111 | 2 | .addOperand(MCIL.LowerSymbolOperand(TOp, TSym))); |
1112 | 2 | |
1113 | 2 | // Restore caller-saved and used registers. |
1114 | 6 | for (unsigned I = sizeof UsedMask; I-- > 0;) |
1115 | 4 | if (4 UsedMask[I]4 ) |
1116 | 4 | EmitAndCountInstruction(MCInstBuilder(X86::POP64r).addReg(UsedRegs[I])); |
1117 | 4 | else |
1118 | 0 | EmitNops(*OutStreamer, 1, Subtarget->is64Bit(), getSubtargetInfo()); |
1119 | 2 | |
1120 | 2 | OutStreamer->AddComment("xray custom event end."); |
1121 | 2 | |
1122 | 2 | // Record the sled version. Older versions of this sled were spelled |
1123 | 2 | // differently, so we let the runtime handle the different offsets we're |
1124 | 2 | // using. |
1125 | 2 | recordSled(CurSled, MI, SledKind::CUSTOM_EVENT, 1); |
1126 | 2 | } |
1127 | | |
1128 | | void X86AsmPrinter::LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI, |
1129 | 25 | X86MCInstLower &MCIL) { |
1130 | 25 | // We want to emit the following pattern: |
1131 | 25 | // |
1132 | 25 | // .p2align 1, ... |
1133 | 25 | // .Lxray_sled_N: |
1134 | 25 | // jmp .tmpN |
1135 | 25 | // # 9 bytes worth of noops |
1136 | 25 | // |
1137 | 25 | // We need the 9 bytes because at runtime, we'd be patching over the full 11 |
1138 | 25 | // bytes with the following pattern: |
1139 | 25 | // |
1140 | 25 | // mov %r10, <function id, 32-bit> // 6 bytes |
1141 | 25 | // call <relative offset, 32-bits> // 5 bytes |
1142 | 25 | // |
1143 | 25 | auto CurSled = OutContext.createTempSymbol("xray_sled_", true); |
1144 | 25 | OutStreamer->EmitCodeAlignment(2); |
1145 | 25 | OutStreamer->EmitLabel(CurSled); |
1146 | 25 | |
1147 | 25 | // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as |
1148 | 25 | // an operand (computed as an offset from the jmp instruction). |
1149 | 25 | // FIXME: Find another less hacky way do force the relative jump. |
1150 | 25 | OutStreamer->EmitBytes("\xeb\x09"); |
1151 | 25 | EmitNops(*OutStreamer, 9, Subtarget->is64Bit(), getSubtargetInfo()); |
1152 | 25 | recordSled(CurSled, MI, SledKind::FUNCTION_ENTER); |
1153 | 25 | } |
1154 | | |
1155 | | void X86AsmPrinter::LowerPATCHABLE_RET(const MachineInstr &MI, |
1156 | 22 | X86MCInstLower &MCIL) { |
1157 | 22 | // Since PATCHABLE_RET takes the opcode of the return statement as an |
1158 | 22 | // argument, we use that to emit the correct form of the RET that we want. |
1159 | 22 | // i.e. when we see this: |
1160 | 22 | // |
1161 | 22 | // PATCHABLE_RET X86::RET ... |
1162 | 22 | // |
1163 | 22 | // We should emit the RET followed by sleds. |
1164 | 22 | // |
1165 | 22 | // .p2align 1, ... |
1166 | 22 | // .Lxray_sled_N: |
1167 | 22 | // ret # or equivalent instruction |
1168 | 22 | // # 10 bytes worth of noops |
1169 | 22 | // |
1170 | 22 | // This just makes sure that the alignment for the next instruction is 2. |
1171 | 22 | auto CurSled = OutContext.createTempSymbol("xray_sled_", true); |
1172 | 22 | OutStreamer->EmitCodeAlignment(2); |
1173 | 22 | OutStreamer->EmitLabel(CurSled); |
1174 | 22 | unsigned OpCode = MI.getOperand(0).getImm(); |
1175 | 22 | MCInst Ret; |
1176 | 22 | Ret.setOpcode(OpCode); |
1177 | 22 | for (auto &MO : make_range(MI.operands_begin() + 1, MI.operands_end())) |
1178 | 22 | if (auto 22 MaybeOperand22 = MCIL.LowerMachineOperand(&MI, MO)) |
1179 | 22 | Ret.addOperand(MaybeOperand.getValue()); |
1180 | 22 | OutStreamer->EmitInstruction(Ret, getSubtargetInfo()); |
1181 | 22 | EmitNops(*OutStreamer, 10, Subtarget->is64Bit(), getSubtargetInfo()); |
1182 | 22 | recordSled(CurSled, MI, SledKind::FUNCTION_EXIT); |
1183 | 22 | } |
1184 | | |
1185 | 4 | void X86AsmPrinter::LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI, X86MCInstLower &MCIL) { |
1186 | 4 | // Like PATCHABLE_RET, we have the actual instruction in the operands to this |
1187 | 4 | // instruction so we lower that particular instruction and its operands. |
1188 | 4 | // Unlike PATCHABLE_RET though, we put the sled before the JMP, much like how |
1189 | 4 | // we do it for PATCHABLE_FUNCTION_ENTER. The sled should be very similar to |
1190 | 4 | // the PATCHABLE_FUNCTION_ENTER case, followed by the lowering of the actual |
1191 | 4 | // tail call much like how we have it in PATCHABLE_RET. |
1192 | 4 | auto CurSled = OutContext.createTempSymbol("xray_sled_", true); |
1193 | 4 | OutStreamer->EmitCodeAlignment(2); |
1194 | 4 | OutStreamer->EmitLabel(CurSled); |
1195 | 4 | auto Target = OutContext.createTempSymbol(); |
1196 | 4 | |
1197 | 4 | // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as |
1198 | 4 | // an operand (computed as an offset from the jmp instruction). |
1199 | 4 | // FIXME: Find another less hacky way do force the relative jump. |
1200 | 4 | OutStreamer->EmitBytes("\xeb\x09"); |
1201 | 4 | EmitNops(*OutStreamer, 9, Subtarget->is64Bit(), getSubtargetInfo()); |
1202 | 4 | OutStreamer->EmitLabel(Target); |
1203 | 4 | recordSled(CurSled, MI, SledKind::TAIL_CALL); |
1204 | 4 | |
1205 | 4 | unsigned OpCode = MI.getOperand(0).getImm(); |
1206 | 4 | MCInst TC; |
1207 | 4 | TC.setOpcode(OpCode); |
1208 | 4 | |
1209 | 4 | // Before emitting the instruction, add a comment to indicate that this is |
1210 | 4 | // indeed a tail call. |
1211 | 4 | OutStreamer->AddComment("TAILCALL"); |
1212 | 4 | for (auto &MO : make_range(MI.operands_begin() + 1, MI.operands_end())) |
1213 | 18 | if (auto 18 MaybeOperand18 = MCIL.LowerMachineOperand(&MI, MO)) |
1214 | 4 | TC.addOperand(MaybeOperand.getValue()); |
1215 | 4 | OutStreamer->EmitInstruction(TC, getSubtargetInfo()); |
1216 | 4 | } |
1217 | | |
1218 | | // Returns instruction preceding MBBI in MachineFunction. |
1219 | | // If MBBI is the first instruction of the first basic block, returns null. |
1220 | | static MachineBasicBlock::const_iterator |
1221 | 413 | PrevCrossBBInst(MachineBasicBlock::const_iterator MBBI) { |
1222 | 413 | const MachineBasicBlock *MBB = MBBI->getParent(); |
1223 | 448 | while (MBBI == MBB->begin()448 ) { |
1224 | 35 | if (MBB == &MBB->getParent()->front()) |
1225 | 0 | return MachineBasicBlock::const_iterator(); |
1226 | 35 | MBB = MBB->getPrevNode(); |
1227 | 35 | MBBI = MBB->end(); |
1228 | 35 | } |
1229 | 413 | return --MBBI; |
1230 | 413 | } |
1231 | | |
1232 | | static const Constant *getConstantFromPool(const MachineInstr &MI, |
1233 | 17.4k | const MachineOperand &Op) { |
1234 | 17.4k | if (!Op.isCPI()) |
1235 | 8.71k | return nullptr; |
1236 | 8.77k | |
1237 | 8.77k | ArrayRef<MachineConstantPoolEntry> Constants = |
1238 | 8.77k | MI.getParent()->getParent()->getConstantPool()->getConstants(); |
1239 | 8.77k | const MachineConstantPoolEntry &ConstantEntry = |
1240 | 8.77k | Constants[Op.getIndex()]; |
1241 | 8.77k | |
1242 | 8.77k | // Bail if this is a machine constant pool entry, we won't be able to dig out |
1243 | 8.77k | // anything useful. |
1244 | 8.77k | if (ConstantEntry.isMachineConstantPoolEntry()) |
1245 | 0 | return nullptr; |
1246 | 8.77k | |
1247 | 8.77k | auto *C = dyn_cast<Constant>(ConstantEntry.Val.ConstVal); |
1248 | 8.77k | assert((!C || ConstantEntry.getType() == C->getType()) && |
1249 | 8.77k | "Expected a constant of the same type!"); |
1250 | 8.77k | return C; |
1251 | 8.77k | } |
1252 | | |
1253 | | static std::string getShuffleComment(const MachineInstr *MI, |
1254 | | unsigned SrcOp1Idx, |
1255 | | unsigned SrcOp2Idx, |
1256 | 2.01k | ArrayRef<int> Mask) { |
1257 | 2.01k | std::string Comment; |
1258 | 2.01k | |
1259 | 2.01k | // Compute the name for a register. This is really goofy because we have |
1260 | 2.01k | // multiple instruction printers that could (in theory) use different |
1261 | 2.01k | // names. Fortunately most people use the ATT style (outside of Windows) |
1262 | 2.01k | // and they actually agree on register naming here. Ultimately, this is |
1263 | 2.01k | // a comment, and so its OK if it isn't perfect. |
1264 | 6.13k | auto GetRegisterName = [](unsigned RegNum) -> StringRef { |
1265 | 6.13k | return X86ATTInstPrinter::getRegisterName(RegNum); |
1266 | 6.13k | }; |
1267 | 2.01k | |
1268 | 2.01k | const MachineOperand &DstOp = MI->getOperand(0); |
1269 | 2.01k | const MachineOperand &SrcOp1 = MI->getOperand(SrcOp1Idx); |
1270 | 2.01k | const MachineOperand &SrcOp2 = MI->getOperand(SrcOp2Idx); |
1271 | 2.01k | |
1272 | 2.01k | StringRef DstName = DstOp.isReg() ? GetRegisterName(DstOp.getReg())2.01k : "mem"0 ; |
1273 | 2.01k | StringRef Src1Name = |
1274 | 2.01k | SrcOp1.isReg() ? GetRegisterName(SrcOp1.getReg())2.01k : "mem"0 ; |
1275 | 2.01k | StringRef Src2Name = |
1276 | 2.01k | SrcOp2.isReg() ? GetRegisterName(SrcOp2.getReg())2.01k : "mem"0 ; |
1277 | 2.01k | |
1278 | 2.01k | // One source operand, fix the mask to print all elements in one span. |
1279 | 2.01k | SmallVector<int, 8> ShuffleMask(Mask.begin(), Mask.end()); |
1280 | 2.01k | if (Src1Name == Src2Name) |
1281 | 43.4k | for (int i = 0, e = ShuffleMask.size(); 1.98k i != e43.4k ; ++i41.4k ) |
1282 | 41.4k | if (41.4k ShuffleMask[i] >= e41.4k ) |
1283 | 48 | ShuffleMask[i] -= e; |
1284 | 2.01k | |
1285 | 2.01k | raw_string_ostream CS(Comment); |
1286 | 2.01k | CS << DstName; |
1287 | 2.01k | |
1288 | 2.01k | // Handle AVX512 MASK/MASXZ write mask comments. |
1289 | 2.01k | // MASK: zmmX {%kY} |
1290 | 2.01k | // MASKZ: zmmX {%kY} {z} |
1291 | 2.01k | if (SrcOp1Idx > 12.01k ) { |
1292 | 91 | assert((SrcOp1Idx == 2 || SrcOp1Idx == 3) && "Unexpected writemask"); |
1293 | 91 | |
1294 | 91 | const MachineOperand &WriteMaskOp = MI->getOperand(SrcOp1Idx - 1); |
1295 | 91 | if (WriteMaskOp.isReg()91 ) { |
1296 | 91 | CS << " {%" << GetRegisterName(WriteMaskOp.getReg()) << "}"; |
1297 | 91 | |
1298 | 91 | if (SrcOp1Idx == 291 ) { |
1299 | 46 | CS << " {z}"; |
1300 | 46 | } |
1301 | 91 | } |
1302 | 91 | } |
1303 | 2.01k | |
1304 | 2.01k | CS << " = "; |
1305 | 2.01k | |
1306 | 13.4k | for (int i = 0, e = ShuffleMask.size(); i != e13.4k ; ++i11.4k ) { |
1307 | 11.4k | if (i != 0) |
1308 | 9.42k | CS << ","; |
1309 | 11.4k | if (ShuffleMask[i] == SM_SentinelZero11.4k ) { |
1310 | 8.40k | CS << "zero"; |
1311 | 8.40k | continue; |
1312 | 8.40k | } |
1313 | 3.04k | |
1314 | 3.04k | // Otherwise, it must come from src1 or src2. Print the span of elements |
1315 | 3.04k | // that comes from this src. |
1316 | 3.04k | bool isSrc1 = ShuffleMask[i] < (int)e; |
1317 | 3.04k | CS << (isSrc1 ? Src1Name2.97k : Src2Name66 ) << '['; |
1318 | 3.04k | |
1319 | 3.04k | bool IsFirst = true; |
1320 | 36.3k | while (i != e && 36.3k ShuffleMask[i] != SM_SentinelZero34.7k && |
1321 | 33.4k | (ShuffleMask[i] < (int)e) == isSrc133.4k ) { |
1322 | 33.3k | if (!IsFirst) |
1323 | 30.2k | CS << ','; |
1324 | 33.3k | else |
1325 | 3.04k | IsFirst = false; |
1326 | 33.3k | if (ShuffleMask[i] == SM_SentinelUndef) |
1327 | 4.84k | CS << "u"; |
1328 | 33.3k | else |
1329 | 28.4k | CS << ShuffleMask[i] % (int)e; |
1330 | 33.3k | ++i; |
1331 | 33.3k | } |
1332 | 11.4k | CS << ']'; |
1333 | 11.4k | --i; // For loop increments element #. |
1334 | 11.4k | } |
1335 | 2.01k | CS.flush(); |
1336 | 2.01k | |
1337 | 2.01k | return Comment; |
1338 | 2.01k | } |
1339 | | |
1340 | 11.1k | static void printConstant(const Constant *COp, raw_ostream &CS) { |
1341 | 11.1k | if (isa<UndefValue>(COp)11.1k ) { |
1342 | 3.95k | CS << "u"; |
1343 | 11.1k | } else if (auto *7.14k CI7.14k = dyn_cast<ConstantInt>(COp)) { |
1344 | 6.20k | if (CI->getBitWidth() <= 646.20k ) { |
1345 | 6.20k | CS << CI->getZExtValue(); |
1346 | 6.20k | } else { |
1347 | 0 | // print multi-word constant as (w0,w1) |
1348 | 0 | const auto &Val = CI->getValue(); |
1349 | 0 | CS << "("; |
1350 | 0 | for (int i = 0, N = Val.getNumWords(); i < N0 ; ++i0 ) { |
1351 | 0 | if (i > 0) |
1352 | 0 | CS << ","; |
1353 | 0 | CS << Val.getRawData()[i]; |
1354 | 0 | } |
1355 | 0 | CS << ")"; |
1356 | 0 | } |
1357 | 7.14k | } else if (auto *941 CF941 = dyn_cast<ConstantFP>(COp)) { |
1358 | 941 | SmallString<32> Str; |
1359 | 941 | CF->getValueAPF().toString(Str); |
1360 | 941 | CS << Str; |
1361 | 941 | } else { |
1362 | 0 | CS << "?"; |
1363 | 0 | } |
1364 | 11.1k | } |
1365 | | |
1366 | 845k | void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { |
1367 | 845k | X86MCInstLower MCInstLowering(*MF, *this); |
1368 | 845k | const X86RegisterInfo *RI = MF->getSubtarget<X86Subtarget>().getRegisterInfo(); |
1369 | 845k | |
1370 | 845k | // Add a comment about EVEX-2-VEX compression for AVX-512 instrs that |
1371 | 845k | // are compressed from EVEX encoding to VEX encoding. |
1372 | 845k | if (TM.Options.MCOptions.ShowMCEncoding845k ) { |
1373 | 15.2k | if (MI->getAsmPrinterFlags() & AC_EVEX_2_VEX) |
1374 | 2.26k | OutStreamer->AddComment("EVEX TO VEX Compression ", false); |
1375 | 15.2k | } |
1376 | 845k | |
1377 | 845k | switch (MI->getOpcode()) { |
1378 | 0 | case TargetOpcode::DBG_VALUE: |
1379 | 0 | llvm_unreachable("Should be handled target independently"); |
1380 | 845k | |
1381 | 845k | // Emit nothing here but a comment if we can. |
1382 | 29 | case X86::Int_MemBarrier: |
1383 | 29 | OutStreamer->emitRawComment("MEMBARRIER"); |
1384 | 29 | return; |
1385 | 845k | |
1386 | 845k | |
1387 | 6 | case X86::EH_RETURN: |
1388 | 6 | case X86::EH_RETURN64: { |
1389 | 6 | // Lower these as normal, but add some comments. |
1390 | 6 | unsigned Reg = MI->getOperand(0).getReg(); |
1391 | 6 | OutStreamer->AddComment(StringRef("eh_return, addr: %") + |
1392 | 6 | X86ATTInstPrinter::getRegisterName(Reg)); |
1393 | 6 | break; |
1394 | 6 | } |
1395 | 31 | case X86::CLEANUPRET: { |
1396 | 31 | // Lower these as normal, but add some comments. |
1397 | 31 | OutStreamer->AddComment("CLEANUPRET"); |
1398 | 31 | break; |
1399 | 6 | } |
1400 | 6 | |
1401 | 61 | case X86::CATCHRET: { |
1402 | 61 | // Lower these as normal, but add some comments. |
1403 | 61 | OutStreamer->AddComment("CATCHRET"); |
1404 | 61 | break; |
1405 | 6 | } |
1406 | 6 | |
1407 | 1.88k | case X86::TAILJMPr: |
1408 | 1.88k | case X86::TAILJMPm: |
1409 | 1.88k | case X86::TAILJMPd: |
1410 | 1.88k | case X86::TAILJMPd_CC: |
1411 | 1.88k | case X86::TAILJMPr64: |
1412 | 1.88k | case X86::TAILJMPm64: |
1413 | 1.88k | case X86::TAILJMPd64: |
1414 | 1.88k | case X86::TAILJMPd64_CC: |
1415 | 1.88k | case X86::TAILJMPr64_REX: |
1416 | 1.88k | case X86::TAILJMPm64_REX: |
1417 | 1.88k | // Lower these as normal, but add some comments. |
1418 | 1.88k | OutStreamer->AddComment("TAILCALL"); |
1419 | 1.88k | break; |
1420 | 1.88k | |
1421 | 35 | case X86::TLS_addr32: |
1422 | 35 | case X86::TLS_addr64: |
1423 | 35 | case X86::TLS_base_addr32: |
1424 | 35 | case X86::TLS_base_addr64: |
1425 | 35 | return LowerTlsAddr(MCInstLowering, *MI); |
1426 | 35 | |
1427 | 1.63k | case X86::MOVPC32r: { |
1428 | 1.63k | // This is a pseudo op for a two instruction sequence with a label, which |
1429 | 1.63k | // looks like: |
1430 | 1.63k | // call "L1$pb" |
1431 | 1.63k | // "L1$pb": |
1432 | 1.63k | // popl %esi |
1433 | 1.63k | |
1434 | 1.63k | // Emit the call. |
1435 | 1.63k | MCSymbol *PICBase = MF->getPICBaseSymbol(); |
1436 | 1.63k | // FIXME: We would like an efficient form for this, so we don't have to do a |
1437 | 1.63k | // lot of extra uniquing. |
1438 | 1.63k | EmitAndCountInstruction(MCInstBuilder(X86::CALLpcrel32) |
1439 | 1.63k | .addExpr(MCSymbolRefExpr::create(PICBase, OutContext))); |
1440 | 1.63k | |
1441 | 1.63k | const X86FrameLowering* FrameLowering = |
1442 | 1.63k | MF->getSubtarget<X86Subtarget>().getFrameLowering(); |
1443 | 1.63k | bool hasFP = FrameLowering->hasFP(*MF); |
1444 | 1.63k | |
1445 | 1.63k | // TODO: This is needed only if we require precise CFA. |
1446 | 1.63k | bool HasActiveDwarfFrame = OutStreamer->getNumFrameInfos() && |
1447 | 1.40k | !OutStreamer->getDwarfFrameInfos().back().End; |
1448 | 1.63k | |
1449 | 1.63k | int stackGrowth = -RI->getSlotSize(); |
1450 | 1.63k | |
1451 | 1.63k | if (HasActiveDwarfFrame && 1.63k !hasFP1.40k ) { |
1452 | 286 | OutStreamer->EmitCFIAdjustCfaOffset(-stackGrowth); |
1453 | 286 | } |
1454 | 1.63k | |
1455 | 1.63k | // Emit the label. |
1456 | 1.63k | OutStreamer->EmitLabel(PICBase); |
1457 | 1.63k | |
1458 | 1.63k | // popl $reg |
1459 | 1.63k | EmitAndCountInstruction(MCInstBuilder(X86::POP32r) |
1460 | 1.63k | .addReg(MI->getOperand(0).getReg())); |
1461 | 1.63k | |
1462 | 1.63k | if (HasActiveDwarfFrame && 1.63k !hasFP1.40k ) { |
1463 | 286 | OutStreamer->EmitCFIAdjustCfaOffset(stackGrowth); |
1464 | 286 | } |
1465 | 1.63k | return; |
1466 | 35 | } |
1467 | 35 | |
1468 | 637 | case X86::ADD32ri: { |
1469 | 637 | // Lower the MO_GOT_ABSOLUTE_ADDRESS form of ADD32ri. |
1470 | 637 | if (MI->getOperand(2).getTargetFlags() != X86II::MO_GOT_ABSOLUTE_ADDRESS) |
1471 | 511 | break; |
1472 | 126 | |
1473 | 126 | // Okay, we have something like: |
1474 | 126 | // EAX = ADD32ri EAX, MO_GOT_ABSOLUTE_ADDRESS(@MYGLOBAL) |
1475 | 126 | |
1476 | 126 | // For this, we want to print something like: |
1477 | 126 | // MYGLOBAL + (. - PICBASE) |
1478 | 126 | // However, we can't generate a ".", so just emit a new label here and refer |
1479 | 126 | // to it. |
1480 | 126 | MCSymbol *DotSym = OutContext.createTempSymbol(); |
1481 | 126 | OutStreamer->EmitLabel(DotSym); |
1482 | 126 | |
1483 | 126 | // Now that we have emitted the label, lower the complex operand expression. |
1484 | 126 | MCSymbol *OpSym = MCInstLowering.GetSymbolFromOperand(MI->getOperand(2)); |
1485 | 126 | |
1486 | 126 | const MCExpr *DotExpr = MCSymbolRefExpr::create(DotSym, OutContext); |
1487 | 126 | const MCExpr *PICBase = |
1488 | 126 | MCSymbolRefExpr::create(MF->getPICBaseSymbol(), OutContext); |
1489 | 126 | DotExpr = MCBinaryExpr::createSub(DotExpr, PICBase, OutContext); |
1490 | 126 | |
1491 | 126 | DotExpr = MCBinaryExpr::createAdd(MCSymbolRefExpr::create(OpSym,OutContext), |
1492 | 126 | DotExpr, OutContext); |
1493 | 126 | |
1494 | 126 | EmitAndCountInstruction(MCInstBuilder(X86::ADD32ri) |
1495 | 126 | .addReg(MI->getOperand(0).getReg()) |
1496 | 126 | .addReg(MI->getOperand(1).getReg()) |
1497 | 126 | .addExpr(DotExpr)); |
1498 | 126 | return; |
1499 | 126 | } |
1500 | 69 | case TargetOpcode::STATEPOINT: |
1501 | 69 | return LowerSTATEPOINT(*MI, MCInstLowering); |
1502 | 126 | |
1503 | 23 | case TargetOpcode::FAULTING_OP: |
1504 | 23 | return LowerFAULTING_OP(*MI, MCInstLowering); |
1505 | 126 | |
1506 | 2 | case TargetOpcode::FENTRY_CALL: |
1507 | 2 | return LowerFENTRY_CALL(*MI, MCInstLowering); |
1508 | 126 | |
1509 | 10 | case TargetOpcode::PATCHABLE_OP: |
1510 | 10 | return LowerPATCHABLE_OP(*MI, MCInstLowering); |
1511 | 126 | |
1512 | 100 | case TargetOpcode::STACKMAP: |
1513 | 100 | return LowerSTACKMAP(*MI); |
1514 | 126 | |
1515 | 67 | case TargetOpcode::PATCHPOINT: |
1516 | 67 | return LowerPATCHPOINT(*MI, MCInstLowering); |
1517 | 126 | |
1518 | 25 | case TargetOpcode::PATCHABLE_FUNCTION_ENTER: |
1519 | 25 | return LowerPATCHABLE_FUNCTION_ENTER(*MI, MCInstLowering); |
1520 | 126 | |
1521 | 22 | case TargetOpcode::PATCHABLE_RET: |
1522 | 22 | return LowerPATCHABLE_RET(*MI, MCInstLowering); |
1523 | 126 | |
1524 | 4 | case TargetOpcode::PATCHABLE_TAIL_CALL: |
1525 | 4 | return LowerPATCHABLE_TAIL_CALL(*MI, MCInstLowering); |
1526 | 126 | |
1527 | 2 | case TargetOpcode::PATCHABLE_EVENT_CALL: |
1528 | 2 | return LowerPATCHABLE_EVENT_CALL(*MI, MCInstLowering); |
1529 | 126 | |
1530 | 120 | case X86::MORESTACK_RET: |
1531 | 120 | EmitAndCountInstruction(MCInstBuilder(getRetOpcode(*Subtarget))); |
1532 | 120 | return; |
1533 | 126 | |
1534 | 13 | case X86::MORESTACK_RET_RESTORE_R10: |
1535 | 13 | // Return, then restore R10. |
1536 | 13 | EmitAndCountInstruction(MCInstBuilder(getRetOpcode(*Subtarget))); |
1537 | 13 | EmitAndCountInstruction(MCInstBuilder(X86::MOV64rr) |
1538 | 13 | .addReg(X86::R10) |
1539 | 13 | .addReg(X86::RAX)); |
1540 | 13 | return; |
1541 | 126 | |
1542 | 379 | case X86::SEH_PushReg: |
1543 | 379 | assert(MF->hasWinCFI() && "SEH_ instruction in function without WinCFI?"); |
1544 | 379 | OutStreamer->EmitWinCFIPushReg(RI->getSEHRegNum(MI->getOperand(0).getImm())); |
1545 | 379 | return; |
1546 | 126 | |
1547 | 0 | case X86::SEH_SaveReg: |
1548 | 0 | assert(MF->hasWinCFI() && "SEH_ instruction in function without WinCFI?"); |
1549 | 0 | OutStreamer->EmitWinCFISaveReg(RI->getSEHRegNum(MI->getOperand(0).getImm()), |
1550 | 0 | MI->getOperand(1).getImm()); |
1551 | 0 | return; |
1552 | 126 | |
1553 | 141 | case X86::SEH_SaveXMM: |
1554 | 141 | assert(MF->hasWinCFI() && "SEH_ instruction in function without WinCFI?"); |
1555 | 141 | OutStreamer->EmitWinCFISaveXMM(RI->getSEHRegNum(MI->getOperand(0).getImm()), |
1556 | 141 | MI->getOperand(1).getImm()); |
1557 | 141 | return; |
1558 | 126 | |
1559 | 355 | case X86::SEH_StackAlloc: |
1560 | 355 | assert(MF->hasWinCFI() && "SEH_ instruction in function without WinCFI?"); |
1561 | 355 | OutStreamer->EmitWinCFIAllocStack(MI->getOperand(0).getImm()); |
1562 | 355 | return; |
1563 | 126 | |
1564 | 87 | case X86::SEH_SetFrame: |
1565 | 87 | assert(MF->hasWinCFI() && "SEH_ instruction in function without WinCFI?"); |
1566 | 87 | OutStreamer->EmitWinCFISetFrame(RI->getSEHRegNum(MI->getOperand(0).getImm()), |
1567 | 87 | MI->getOperand(1).getImm()); |
1568 | 87 | return; |
1569 | 126 | |
1570 | 0 | case X86::SEH_PushFrame: |
1571 | 0 | assert(MF->hasWinCFI() && "SEH_ instruction in function without WinCFI?"); |
1572 | 0 | OutStreamer->EmitWinCFIPushFrame(MI->getOperand(0).getImm()); |
1573 | 0 | return; |
1574 | 126 | |
1575 | 382 | case X86::SEH_EndPrologue: |
1576 | 382 | assert(MF->hasWinCFI() && "SEH_ instruction in function without WinCFI?"); |
1577 | 382 | OutStreamer->EmitWinCFIEndProlog(); |
1578 | 382 | return; |
1579 | 126 | |
1580 | 356 | case X86::SEH_Epilogue: { |
1581 | 356 | assert(MF->hasWinCFI() && "SEH_ instruction in function without WinCFI?"); |
1582 | 356 | MachineBasicBlock::const_iterator MBBI(MI); |
1583 | 356 | // Check if preceded by a call and emit nop if so. |
1584 | 356 | for (MBBI = PrevCrossBBInst(MBBI); |
1585 | 413 | MBBI != MachineBasicBlock::const_iterator(); |
1586 | 356 | MBBI = PrevCrossBBInst(MBBI)57 ) { |
1587 | 413 | // Conservatively assume that pseudo instructions don't emit code and keep |
1588 | 413 | // looking for a call. We may emit an unnecessary nop in some cases. |
1589 | 413 | if (!MBBI->isPseudo()413 ) { |
1590 | 356 | if (MBBI->isCall()) |
1591 | 109 | EmitAndCountInstruction(MCInstBuilder(X86::NOOP)); |
1592 | 356 | break; |
1593 | 356 | } |
1594 | 413 | } |
1595 | 356 | return; |
1596 | 126 | } |
1597 | 126 | |
1598 | 126 | // Lower PSHUFB and VPERMILP normally but add a comment if we can find |
1599 | 126 | // a constant shuffle mask. We won't be able to do this at the MC layer |
1600 | 126 | // because the mask isn't an immediate. |
1601 | 1.85k | case X86::PSHUFBrm: |
1602 | 1.85k | case X86::VPSHUFBrm: |
1603 | 1.85k | case X86::VPSHUFBYrm: |
1604 | 1.85k | case X86::VPSHUFBZ128rm: |
1605 | 1.85k | case X86::VPSHUFBZ128rmk: |
1606 | 1.85k | case X86::VPSHUFBZ128rmkz: |
1607 | 1.85k | case X86::VPSHUFBZ256rm: |
1608 | 1.85k | case X86::VPSHUFBZ256rmk: |
1609 | 1.85k | case X86::VPSHUFBZ256rmkz: |
1610 | 1.85k | case X86::VPSHUFBZrm: |
1611 | 1.85k | case X86::VPSHUFBZrmk: |
1612 | 1.85k | case X86::VPSHUFBZrmkz: { |
1613 | 1.85k | if (!OutStreamer->isVerboseAsm()) |
1614 | 0 | break; |
1615 | 1.85k | unsigned SrcIdx, MaskIdx; |
1616 | 1.85k | switch (MI->getOpcode()) { |
1617 | 0 | default: 0 llvm_unreachable0 ("Invalid opcode"); |
1618 | 1.79k | case X86::PSHUFBrm: |
1619 | 1.79k | case X86::VPSHUFBrm: |
1620 | 1.79k | case X86::VPSHUFBYrm: |
1621 | 1.79k | case X86::VPSHUFBZ128rm: |
1622 | 1.79k | case X86::VPSHUFBZ256rm: |
1623 | 1.79k | case X86::VPSHUFBZrm: |
1624 | 1.79k | SrcIdx = 1; MaskIdx = 5; break; |
1625 | 31 | case X86::VPSHUFBZ128rmkz: |
1626 | 31 | case X86::VPSHUFBZ256rmkz: |
1627 | 31 | case X86::VPSHUFBZrmkz: |
1628 | 31 | SrcIdx = 2; MaskIdx = 6; break; |
1629 | 30 | case X86::VPSHUFBZ128rmk: |
1630 | 30 | case X86::VPSHUFBZ256rmk: |
1631 | 30 | case X86::VPSHUFBZrmk: |
1632 | 30 | SrcIdx = 3; MaskIdx = 7; break; |
1633 | 1.85k | } |
1634 | 1.85k | |
1635 | 1.85k | assert(MI->getNumOperands() >= 6 && |
1636 | 1.85k | "We should always have at least 6 operands!"); |
1637 | 1.85k | |
1638 | 1.85k | const MachineOperand &MaskOp = MI->getOperand(MaskIdx); |
1639 | 1.85k | if (auto *C1.85k = getConstantFromPool(*MI, MaskOp)) { |
1640 | 1.83k | SmallVector<int, 64> Mask; |
1641 | 1.83k | DecodePSHUFBMask(C, Mask); |
1642 | 1.83k | if (!Mask.empty()) |
1643 | 1.83k | OutStreamer->AddComment(getShuffleComment(MI, SrcIdx, SrcIdx, Mask), |
1644 | 1.83k | !EnablePrintSchedInfo); |
1645 | 1.83k | } |
1646 | 1.85k | break; |
1647 | 1.85k | } |
1648 | 1.85k | |
1649 | 188 | case X86::VPERMILPSrm: |
1650 | 188 | case X86::VPERMILPSYrm: |
1651 | 188 | case X86::VPERMILPSZ128rm: |
1652 | 188 | case X86::VPERMILPSZ128rmk: |
1653 | 188 | case X86::VPERMILPSZ128rmkz: |
1654 | 188 | case X86::VPERMILPSZ256rm: |
1655 | 188 | case X86::VPERMILPSZ256rmk: |
1656 | 188 | case X86::VPERMILPSZ256rmkz: |
1657 | 188 | case X86::VPERMILPSZrm: |
1658 | 188 | case X86::VPERMILPSZrmk: |
1659 | 188 | case X86::VPERMILPSZrmkz: |
1660 | 188 | case X86::VPERMILPDrm: |
1661 | 188 | case X86::VPERMILPDYrm: |
1662 | 188 | case X86::VPERMILPDZ128rm: |
1663 | 188 | case X86::VPERMILPDZ128rmk: |
1664 | 188 | case X86::VPERMILPDZ128rmkz: |
1665 | 188 | case X86::VPERMILPDZ256rm: |
1666 | 188 | case X86::VPERMILPDZ256rmk: |
1667 | 188 | case X86::VPERMILPDZ256rmkz: |
1668 | 188 | case X86::VPERMILPDZrm: |
1669 | 188 | case X86::VPERMILPDZrmk: |
1670 | 188 | case X86::VPERMILPDZrmkz: { |
1671 | 188 | if (!OutStreamer->isVerboseAsm()) |
1672 | 0 | break; |
1673 | 188 | unsigned SrcIdx, MaskIdx; |
1674 | 188 | unsigned ElSize; |
1675 | 188 | switch (MI->getOpcode()) { |
1676 | 0 | default: 0 llvm_unreachable0 ("Invalid opcode"); |
1677 | 128 | case X86::VPERMILPSrm: |
1678 | 128 | case X86::VPERMILPSYrm: |
1679 | 128 | case X86::VPERMILPSZ128rm: |
1680 | 128 | case X86::VPERMILPSZ256rm: |
1681 | 128 | case X86::VPERMILPSZrm: |
1682 | 128 | SrcIdx = 1; MaskIdx = 5; ElSize = 32; break; |
1683 | 20 | case X86::VPERMILPSZ128rmkz: |
1684 | 20 | case X86::VPERMILPSZ256rmkz: |
1685 | 20 | case X86::VPERMILPSZrmkz: |
1686 | 20 | SrcIdx = 2; MaskIdx = 6; ElSize = 32; break; |
1687 | 19 | case X86::VPERMILPSZ128rmk: |
1688 | 19 | case X86::VPERMILPSZ256rmk: |
1689 | 19 | case X86::VPERMILPSZrmk: |
1690 | 19 | SrcIdx = 3; MaskIdx = 7; ElSize = 32; break; |
1691 | 19 | case X86::VPERMILPDrm: |
1692 | 19 | case X86::VPERMILPDYrm: |
1693 | 19 | case X86::VPERMILPDZ128rm: |
1694 | 19 | case X86::VPERMILPDZ256rm: |
1695 | 19 | case X86::VPERMILPDZrm: |
1696 | 19 | SrcIdx = 1; MaskIdx = 5; ElSize = 64; break; |
1697 | 1 | case X86::VPERMILPDZ128rmkz: |
1698 | 1 | case X86::VPERMILPDZ256rmkz: |
1699 | 1 | case X86::VPERMILPDZrmkz: |
1700 | 1 | SrcIdx = 2; MaskIdx = 6; ElSize = 64; break; |
1701 | 1 | case X86::VPERMILPDZ128rmk: |
1702 | 1 | case X86::VPERMILPDZ256rmk: |
1703 | 1 | case X86::VPERMILPDZrmk: |
1704 | 1 | SrcIdx = 3; MaskIdx = 7; ElSize = 64; break; |
1705 | 188 | } |
1706 | 188 | |
1707 | 188 | assert(MI->getNumOperands() >= 6 && |
1708 | 188 | "We should always have at least 6 operands!"); |
1709 | 188 | |
1710 | 188 | const MachineOperand &MaskOp = MI->getOperand(MaskIdx); |
1711 | 188 | if (auto *C188 = getConstantFromPool(*MI, MaskOp)) { |
1712 | 143 | SmallVector<int, 16> Mask; |
1713 | 143 | DecodeVPERMILPMask(C, ElSize, Mask); |
1714 | 143 | if (!Mask.empty()) |
1715 | 143 | OutStreamer->AddComment(getShuffleComment(MI, SrcIdx, SrcIdx, Mask), |
1716 | 143 | !EnablePrintSchedInfo); |
1717 | 143 | } |
1718 | 188 | break; |
1719 | 188 | } |
1720 | 188 | |
1721 | 32 | case X86::VPERMIL2PDrm: |
1722 | 32 | case X86::VPERMIL2PSrm: |
1723 | 32 | case X86::VPERMIL2PDYrm: |
1724 | 32 | case X86::VPERMIL2PSYrm: { |
1725 | 32 | if (!OutStreamer->isVerboseAsm()) |
1726 | 0 | break; |
1727 | 32 | assert(MI->getNumOperands() >= 8 && |
1728 | 32 | "We should always have at least 8 operands!"); |
1729 | 32 | |
1730 | 32 | const MachineOperand &CtrlOp = MI->getOperand(MI->getNumOperands() - 1); |
1731 | 32 | if (!CtrlOp.isImm()) |
1732 | 0 | break; |
1733 | 32 | |
1734 | 32 | unsigned ElSize; |
1735 | 32 | switch (MI->getOpcode()) { |
1736 | 0 | default: 0 llvm_unreachable0 ("Invalid opcode"); |
1737 | 18 | case X86::VPERMIL2PSrm: 18 case X86::VPERMIL2PSYrm: ElSize = 32; break18 ; |
1738 | 14 | case X86::VPERMIL2PDrm: 14 case X86::VPERMIL2PDYrm: ElSize = 64; break14 ; |
1739 | 32 | } |
1740 | 32 | |
1741 | 32 | const MachineOperand &MaskOp = MI->getOperand(6); |
1742 | 32 | if (auto *C32 = getConstantFromPool(*MI, MaskOp)) { |
1743 | 24 | SmallVector<int, 16> Mask; |
1744 | 24 | DecodeVPERMIL2PMask(C, (unsigned)CtrlOp.getImm(), ElSize, Mask); |
1745 | 24 | if (!Mask.empty()) |
1746 | 24 | OutStreamer->AddComment(getShuffleComment(MI, 1, 2, Mask), |
1747 | 24 | !EnablePrintSchedInfo); |
1748 | 24 | } |
1749 | 32 | break; |
1750 | 32 | } |
1751 | 32 | |
1752 | 34 | case X86::VPPERMrrm: { |
1753 | 34 | if (!OutStreamer->isVerboseAsm()) |
1754 | 0 | break; |
1755 | 34 | assert(MI->getNumOperands() >= 7 && |
1756 | 34 | "We should always have at least 7 operands!"); |
1757 | 34 | |
1758 | 34 | const MachineOperand &MaskOp = MI->getOperand(6); |
1759 | 34 | if (auto *C34 = getConstantFromPool(*MI, MaskOp)) { |
1760 | 32 | SmallVector<int, 16> Mask; |
1761 | 32 | DecodeVPPERMMask(C, Mask); |
1762 | 32 | if (!Mask.empty()) |
1763 | 14 | OutStreamer->AddComment(getShuffleComment(MI, 1, 2, Mask), |
1764 | 14 | !EnablePrintSchedInfo); |
1765 | 32 | } |
1766 | 34 | break; |
1767 | 34 | } |
1768 | 34 | |
1769 | 34 | #define MOV_CASE(Prefix, Suffix) \ |
1770 | 50.8k | case X86::Prefix##MOVAPD##Suffix##rm: \ |
1771 | 50.8k | case X86::Prefix##MOVAPS##Suffix##rm: \ |
1772 | 50.8k | case X86::Prefix##MOVUPD##Suffix##rm: \ |
1773 | 50.8k | case X86::Prefix##MOVUPS##Suffix##rm: \ |
1774 | 50.8k | case X86::Prefix##MOVDQA##Suffix##rm: \ |
1775 | 50.8k | case X86::Prefix##MOVDQU##Suffix##rm: |
1776 | 34 | |
1777 | 34 | #define MOV_AVX512_CASE(Suffix) \ |
1778 | 50.8k | case X86::VMOVDQA64##Suffix##rm: \ |
1779 | 50.8k | case X86::VMOVDQA32##Suffix##rm: \ |
1780 | 50.8k | case X86::VMOVDQU64##Suffix##rm: \ |
1781 | 50.8k | case X86::VMOVDQU32##Suffix##rm: \ |
1782 | 50.8k | case X86::VMOVDQU16##Suffix##rm: \ |
1783 | 50.8k | case X86::VMOVDQU8##Suffix##rm: \ |
1784 | 50.8k | case X86::VMOVAPS##Suffix##rm: \ |
1785 | 50.8k | case X86::VMOVAPD##Suffix##rm: \ |
1786 | 50.8k | case X86::VMOVUPS##Suffix##rm: \ |
1787 | 50.8k | case X86::VMOVUPD##Suffix##rm: |
1788 | 34 | |
1789 | 34 | #define CASE_ALL_MOV_RM() \ |
1790 | 16.9k | MOV_CASE16.9k (, ) /* SSE */ \ |
1791 | 16.9k | MOV_CASE16.9k (V, ) /* AVX-128 */ \ |
1792 | 16.9k | MOV_CASE16.9k (V, Y) /* AVX-256 */ \ |
1793 | 16.9k | MOV_AVX512_CASE16.9k (Z) \ |
1794 | 16.9k | MOV_AVX512_CASE16.9k (Z256) \ |
1795 | 16.9k | MOV_AVX512_CASE(Z128) |
1796 | 34 | |
1797 | 34 | // For loads from a constant pool to a vector register, print the constant |
1798 | 34 | // loaded. |
1799 | 34 | CASE_ALL_MOV_RM34 () |
1800 | 16.9k | case X86::VBROADCASTF128: |
1801 | 16.9k | case X86::VBROADCASTI128: |
1802 | 16.9k | case X86::VBROADCASTF32X4Z256rm: |
1803 | 16.9k | case X86::VBROADCASTF32X4rm: |
1804 | 16.9k | case X86::VBROADCASTF32X8rm: |
1805 | 16.9k | case X86::VBROADCASTF64X2Z128rm: |
1806 | 16.9k | case X86::VBROADCASTF64X2rm: |
1807 | 16.9k | case X86::VBROADCASTF64X4rm: |
1808 | 16.9k | case X86::VBROADCASTI32X4Z256rm: |
1809 | 16.9k | case X86::VBROADCASTI32X4rm: |
1810 | 16.9k | case X86::VBROADCASTI32X8rm: |
1811 | 16.9k | case X86::VBROADCASTI64X2Z128rm: |
1812 | 16.9k | case X86::VBROADCASTI64X2rm: |
1813 | 16.9k | case X86::VBROADCASTI64X4rm: |
1814 | 16.9k | if (!OutStreamer->isVerboseAsm()) |
1815 | 2.76k | break; |
1816 | 14.1k | if (14.1k MI->getNumOperands() <= 414.1k ) |
1817 | 0 | break; |
1818 | 14.1k | if (auto *14.1k C14.1k = getConstantFromPool(*MI, MI->getOperand(4))) { |
1819 | 6.19k | int NumLanes = 1; |
1820 | 6.19k | // Override NumLanes for the broadcast instructions. |
1821 | 6.19k | switch (MI->getOpcode()) { |
1822 | 25 | case X86::VBROADCASTF128: NumLanes = 2; break; |
1823 | 38 | case X86::VBROADCASTI128: NumLanes = 2; break; |
1824 | 0 | case X86::VBROADCASTF32X4Z256rm: NumLanes = 2; break; |
1825 | 8 | case X86::VBROADCASTF32X4rm: NumLanes = 4; break; |
1826 | 0 | case X86::VBROADCASTF32X8rm: NumLanes = 2; break; |
1827 | 0 | case X86::VBROADCASTF64X2Z128rm: NumLanes = 2; break; |
1828 | 0 | case X86::VBROADCASTF64X2rm: NumLanes = 4; break; |
1829 | 8 | case X86::VBROADCASTF64X4rm: NumLanes = 2; break; |
1830 | 0 | case X86::VBROADCASTI32X4Z256rm: NumLanes = 2; break; |
1831 | 10 | case X86::VBROADCASTI32X4rm: NumLanes = 4; break; |
1832 | 0 | case X86::VBROADCASTI32X8rm: NumLanes = 2; break; |
1833 | 0 | case X86::VBROADCASTI64X2Z128rm: NumLanes = 2; break; |
1834 | 0 | case X86::VBROADCASTI64X2rm: NumLanes = 4; break; |
1835 | 8 | case X86::VBROADCASTI64X4rm: NumLanes = 2; break; |
1836 | 6.19k | } |
1837 | 6.19k | |
1838 | 6.19k | std::string Comment; |
1839 | 6.19k | raw_string_ostream CS(Comment); |
1840 | 6.19k | const MachineOperand &DstOp = MI->getOperand(0); |
1841 | 6.19k | CS << X86ATTInstPrinter::getRegisterName(DstOp.getReg()) << " = "; |
1842 | 6.19k | if (auto *CDS6.19k = dyn_cast<ConstantDataSequential>(C)) { |
1843 | 5.65k | CS << "["; |
1844 | 11.4k | for (int l = 0; l != NumLanes11.4k ; ++l5.78k ) { |
1845 | 82.9k | for (int i = 0, NumElements = CDS->getNumElements(); i < NumElements82.9k ; ++i77.1k ) { |
1846 | 77.1k | if (i != 0 || 77.1k l != 05.78k ) |
1847 | 71.4k | CS << ","; |
1848 | 77.1k | if (CDS->getElementType()->isIntegerTy()) |
1849 | 75.3k | CS << CDS->getElementAsInteger(i); |
1850 | 1.80k | else if (1.80k CDS->getElementType()->isFloatTy()1.80k ) |
1851 | 1.33k | CS << CDS->getElementAsFloat(i); |
1852 | 468 | else if (468 CDS->getElementType()->isDoubleTy()468 ) |
1853 | 468 | CS << CDS->getElementAsDouble(i); |
1854 | 468 | else |
1855 | 0 | CS << "?"; |
1856 | 77.1k | } |
1857 | 5.78k | } |
1858 | 5.65k | CS << "]"; |
1859 | 5.65k | OutStreamer->AddComment(CS.str(), !EnablePrintSchedInfo); |
1860 | 6.19k | } else if (auto *538 CV538 = dyn_cast<ConstantVector>(C)) { |
1861 | 525 | CS << "<"; |
1862 | 1.05k | for (int l = 0; l != NumLanes1.05k ; ++l525 ) { |
1863 | 8.30k | for (int i = 0, NumOperands = CV->getNumOperands(); i < NumOperands8.30k ; ++i7.78k ) { |
1864 | 7.78k | if (i != 0 || 7.78k l != 0525 ) |
1865 | 7.25k | CS << ","; |
1866 | 7.78k | printConstant(CV->getOperand(i), CS); |
1867 | 7.78k | } |
1868 | 525 | } |
1869 | 538 | CS << ">"; |
1870 | 538 | OutStreamer->AddComment(CS.str(), !EnablePrintSchedInfo); |
1871 | 538 | } |
1872 | 6.19k | } |
1873 | 14.1k | break; |
1874 | 1.23k | case X86::VBROADCASTSSrm: |
1875 | 1.23k | case X86::VBROADCASTSSYrm: |
1876 | 1.23k | case X86::VBROADCASTSSZ128m: |
1877 | 1.23k | case X86::VBROADCASTSSZ256m: |
1878 | 1.23k | case X86::VBROADCASTSSZm: |
1879 | 1.23k | case X86::VBROADCASTSDYrm: |
1880 | 1.23k | case X86::VBROADCASTSDZ256m: |
1881 | 1.23k | case X86::VBROADCASTSDZm: |
1882 | 1.23k | case X86::VPBROADCASTBrm: |
1883 | 1.23k | case X86::VPBROADCASTBYrm: |
1884 | 1.23k | case X86::VPBROADCASTBZ128m: |
1885 | 1.23k | case X86::VPBROADCASTBZ256m: |
1886 | 1.23k | case X86::VPBROADCASTBZm: |
1887 | 1.23k | case X86::VPBROADCASTDrm: |
1888 | 1.23k | case X86::VPBROADCASTDYrm: |
1889 | 1.23k | case X86::VPBROADCASTDZ128m: |
1890 | 1.23k | case X86::VPBROADCASTDZ256m: |
1891 | 1.23k | case X86::VPBROADCASTDZm: |
1892 | 1.23k | case X86::VPBROADCASTQrm: |
1893 | 1.23k | case X86::VPBROADCASTQYrm: |
1894 | 1.23k | case X86::VPBROADCASTQZ128m: |
1895 | 1.23k | case X86::VPBROADCASTQZ256m: |
1896 | 1.23k | case X86::VPBROADCASTQZm: |
1897 | 1.23k | case X86::VPBROADCASTWrm: |
1898 | 1.23k | case X86::VPBROADCASTWYrm: |
1899 | 1.23k | case X86::VPBROADCASTWZ128m: |
1900 | 1.23k | case X86::VPBROADCASTWZ256m: |
1901 | 1.23k | case X86::VPBROADCASTWZm: |
1902 | 1.23k | if (!OutStreamer->isVerboseAsm()) |
1903 | 45 | break; |
1904 | 1.19k | if (1.19k MI->getNumOperands() <= 41.19k ) |
1905 | 0 | break; |
1906 | 1.19k | if (auto *1.19k C1.19k = getConstantFromPool(*MI, MI->getOperand(4))) { |
1907 | 544 | int NumElts; |
1908 | 544 | switch (MI->getOpcode()) { |
1909 | 0 | default: 0 llvm_unreachable0 ("Invalid opcode"); |
1910 | 94 | case X86::VBROADCASTSSrm: NumElts = 4; break; |
1911 | 49 | case X86::VBROADCASTSSYrm: NumElts = 8; break; |
1912 | 0 | case X86::VBROADCASTSSZ128m: NumElts = 4; break; |
1913 | 0 | case X86::VBROADCASTSSZ256m: NumElts = 8; break; |
1914 | 5 | case X86::VBROADCASTSSZm: NumElts = 16; break; |
1915 | 29 | case X86::VBROADCASTSDYrm: NumElts = 4; break; |
1916 | 0 | case X86::VBROADCASTSDZ256m: NumElts = 4; break; |
1917 | 7 | case X86::VBROADCASTSDZm: NumElts = 8; break; |
1918 | 1 | case X86::VPBROADCASTBrm: NumElts = 16; break; |
1919 | 1 | case X86::VPBROADCASTBYrm: NumElts = 32; break; |
1920 | 0 | case X86::VPBROADCASTBZ128m: NumElts = 16; break; |
1921 | 0 | case X86::VPBROADCASTBZ256m: NumElts = 32; break; |
1922 | 0 | case X86::VPBROADCASTBZm: NumElts = 64; break; |
1923 | 104 | case X86::VPBROADCASTDrm: NumElts = 4; break; |
1924 | 61 | case X86::VPBROADCASTDYrm: NumElts = 8; break; |
1925 | 0 | case X86::VPBROADCASTDZ128m: NumElts = 4; break; |
1926 | 0 | case X86::VPBROADCASTDZ256m: NumElts = 8; break; |
1927 | 20 | case X86::VPBROADCASTDZm: NumElts = 16; break; |
1928 | 16 | case X86::VPBROADCASTQrm: NumElts = 2; break; |
1929 | 109 | case X86::VPBROADCASTQYrm: NumElts = 4; break; |
1930 | 0 | case X86::VPBROADCASTQZ128m: NumElts = 2; break; |
1931 | 0 | case X86::VPBROADCASTQZ256m: NumElts = 4; break; |
1932 | 23 | case X86::VPBROADCASTQZm: NumElts = 8; break; |
1933 | 7 | case X86::VPBROADCASTWrm: NumElts = 8; break; |
1934 | 16 | case X86::VPBROADCASTWYrm: NumElts = 16; break; |
1935 | 0 | case X86::VPBROADCASTWZ128m: NumElts = 8; break; |
1936 | 0 | case X86::VPBROADCASTWZ256m: NumElts = 16; break; |
1937 | 2 | case X86::VPBROADCASTWZm: NumElts = 32; break; |
1938 | 544 | } |
1939 | 544 | |
1940 | 544 | std::string Comment; |
1941 | 544 | raw_string_ostream CS(Comment); |
1942 | 544 | const MachineOperand &DstOp = MI->getOperand(0); |
1943 | 544 | CS << X86ATTInstPrinter::getRegisterName(DstOp.getReg()) << " = "; |
1944 | 544 | CS << "["; |
1945 | 3.86k | for (int i = 0; i != NumElts3.86k ; ++i3.32k ) { |
1946 | 3.32k | if (i != 0) |
1947 | 2.77k | CS << ","; |
1948 | 3.32k | printConstant(C, CS); |
1949 | 3.32k | } |
1950 | 6 | CS << "]"; |
1951 | 6 | OutStreamer->AddComment(CS.str(), !EnablePrintSchedInfo); |
1952 | 6 | } |
1953 | 845k | } |
1954 | 845k | |
1955 | 841k | MCInst TmpInst; |
1956 | 841k | MCInstLowering.Lower(MI, TmpInst); |
1957 | 841k | |
1958 | 841k | // Stackmap shadows cannot include branch targets, so we can count the bytes |
1959 | 841k | // in a call towards the shadow, but must ensure that the no thread returns |
1960 | 841k | // in to the stackmap shadow. The only way to achieve this is if the call |
1961 | 841k | // is at the end of the shadow. |
1962 | 841k | if (MI->isCall()841k ) { |
1963 | 29.5k | // Count then size of the call towards the shadow |
1964 | 29.5k | SMShadowTracker.count(TmpInst, getSubtargetInfo(), CodeEmitter.get()); |
1965 | 29.5k | // Then flush the shadow so that we fill with nops before the call, not |
1966 | 29.5k | // after it. |
1967 | 29.5k | SMShadowTracker.emitShadowPadding(*OutStreamer, getSubtargetInfo()); |
1968 | 29.5k | // Then emit the call |
1969 | 29.5k | OutStreamer->EmitInstruction(TmpInst, getSubtargetInfo()); |
1970 | 29.5k | return; |
1971 | 29.5k | } |
1972 | 812k | |
1973 | 812k | EmitAndCountInstruction(TmpInst); |
1974 | 812k | } |