Coverage Report

Created: 2017-10-03 07:32

/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/lib/Target/X86/X86MCInstLower.cpp
Line
Count
Source (jump to first uncovered line)
1
//===-- X86MCInstLower.cpp - Convert X86 MachineInstr to an MCInst --------===//
2
//
3
//                     The LLVM Compiler Infrastructure
4
//
5
// This file is distributed under the University of Illinois Open Source
6
// License. See LICENSE.TXT for details.
7
//
8
//===----------------------------------------------------------------------===//
9
//
10
// This file contains code to lower X86 MachineInstrs to their corresponding
11
// MCInst records.
12
//
13
//===----------------------------------------------------------------------===//
14
15
#include "InstPrinter/X86ATTInstPrinter.h"
16
#include "InstPrinter/X86InstComments.h"
17
#include "MCTargetDesc/X86BaseInfo.h"
18
#include "Utils/X86ShuffleDecode.h"
19
#include "X86AsmPrinter.h"
20
#include "X86RegisterInfo.h"
21
#include "X86ShuffleDecodeConstantPool.h"
22
#include "llvm/ADT/Optional.h"
23
#include "llvm/ADT/SmallString.h"
24
#include "llvm/ADT/iterator_range.h"
25
#include "llvm/BinaryFormat/ELF.h"
26
#include "llvm/CodeGen/MachineConstantPool.h"
27
#include "llvm/CodeGen/MachineFunction.h"
28
#include "llvm/CodeGen/MachineModuleInfoImpls.h"
29
#include "llvm/CodeGen/MachineOperand.h"
30
#include "llvm/CodeGen/StackMaps.h"
31
#include "llvm/IR/DataLayout.h"
32
#include "llvm/IR/GlobalValue.h"
33
#include "llvm/IR/Mangler.h"
34
#include "llvm/MC/MCAsmInfo.h"
35
#include "llvm/MC/MCCodeEmitter.h"
36
#include "llvm/MC/MCContext.h"
37
#include "llvm/MC/MCExpr.h"
38
#include "llvm/MC/MCFixup.h"
39
#include "llvm/MC/MCInst.h"
40
#include "llvm/MC/MCInstBuilder.h"
41
#include "llvm/MC/MCSection.h"
42
#include "llvm/MC/MCSectionELF.h"
43
#include "llvm/MC/MCSectionMachO.h"
44
#include "llvm/MC/MCStreamer.h"
45
#include "llvm/MC/MCSymbol.h"
46
#include "llvm/MC/MCSymbolELF.h"
47
#include "llvm/Support/TargetRegistry.h"
48
#include "llvm/Target/TargetLoweringObjectFile.h"
49
50
using namespace llvm;
51
52
namespace {
53
54
/// X86MCInstLower - This class is used to lower an MachineInstr into an MCInst.
55
class X86MCInstLower {
56
  MCContext &Ctx;
57
  const MachineFunction &MF;
58
  const TargetMachine &TM;
59
  const MCAsmInfo &MAI;
60
  X86AsmPrinter &AsmPrinter;
61
public:
62
  X86MCInstLower(const MachineFunction &MF, X86AsmPrinter &asmprinter);
63
64
  Optional<MCOperand> LowerMachineOperand(const MachineInstr *MI,
65
                                          const MachineOperand &MO) const;
66
  void Lower(const MachineInstr *MI, MCInst &OutMI) const;
67
68
  MCSymbol *GetSymbolFromOperand(const MachineOperand &MO) const;
69
  MCOperand LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const;
70
71
private:
72
  MachineModuleInfoMachO &getMachOMMI() const;
73
};
74
75
} // end anonymous namespace
76
77
// Emit a minimal sequence of nops spanning NumBytes bytes.
78
static void EmitNops(MCStreamer &OS, unsigned NumBytes, bool Is64Bit,
79
                     const MCSubtargetInfo &STI);
80
81
void X86AsmPrinter::StackMapShadowTracker::count(MCInst &Inst,
82
                                                 const MCSubtargetInfo &STI,
83
845k
                                                 MCCodeEmitter *CodeEmitter) {
84
845k
  if (
InShadow845k
) {
85
8.02k
    SmallString<256> Code;
86
8.02k
    SmallVector<MCFixup, 4> Fixups;
87
8.02k
    raw_svector_ostream VecOS(Code);
88
8.02k
    CodeEmitter->encodeInstruction(Inst, VecOS, Fixups, STI);
89
8.02k
    CurrentShadowSize += Code.size();
90
8.02k
    if (CurrentShadowSize >= RequiredShadowSize)
91
8.00k
      InShadow = false; // The shadow is big enough. Stop counting.
92
8.02k
  }
93
845k
}
94
95
void X86AsmPrinter::StackMapShadowTracker::emitShadowPadding(
96
166k
    MCStreamer &OutStreamer, const MCSubtargetInfo &STI) {
97
166k
  if (
InShadow && 166k
CurrentShadowSize < RequiredShadowSize126
) {
98
73
    InShadow = false;
99
73
    EmitNops(OutStreamer, RequiredShadowSize - CurrentShadowSize,
100
73
             MF->getSubtarget<X86Subtarget>().is64Bit(), STI);
101
73
  }
102
166k
}
103
104
815k
void X86AsmPrinter::EmitAndCountInstruction(MCInst &Inst) {
105
815k
  OutStreamer->EmitInstruction(Inst, getSubtargetInfo(), EnablePrintSchedInfo);
106
815k
  SMShadowTracker.count(Inst, getSubtargetInfo(), CodeEmitter.get());
107
815k
}
108
109
X86MCInstLower::X86MCInstLower(const MachineFunction &mf,
110
                               X86AsmPrinter &asmprinter)
111
    : Ctx(mf.getContext()), MF(mf), TM(mf.getTarget()), MAI(*TM.getMCAsmInfo()),
112
845k
      AsmPrinter(asmprinter) {}
113
114
1.12k
MachineModuleInfoMachO &X86MCInstLower::getMachOMMI() const {
115
1.12k
  return MF.getMMI().getObjFileInfo<MachineModuleInfoMachO>();
116
1.12k
}
117
118
119
/// GetSymbolFromOperand - Lower an MO_GlobalAddress or MO_ExternalSymbol
120
/// operand to an MCSymbol.
121
MCSymbol *X86MCInstLower::
122
101k
GetSymbolFromOperand(const MachineOperand &MO) const {
123
101k
  const DataLayout &DL = MF.getDataLayout();
124
101k
  assert((MO.isGlobal() || MO.isSymbol() || MO.isMBB()) && "Isn't a symbol reference");
125
101k
126
101k
  MCSymbol *Sym = nullptr;
127
101k
  SmallString<128> Name;
128
101k
  StringRef Suffix;
129
101k
130
101k
  switch (MO.getTargetFlags()) {
131
61
  case X86II::MO_DLLIMPORT:
132
61
    // Handle dllimport linkage.
133
61
    Name += "__imp_";
134
61
    break;
135
1.12k
  case X86II::MO_DARWIN_NONLAZY:
136
1.12k
  case X86II::MO_DARWIN_NONLAZY_PIC_BASE:
137
1.12k
    Suffix = "$non_lazy_ptr";
138
1.12k
    break;
139
101k
  }
140
101k
141
101k
  
if (101k
!Suffix.empty()101k
)
142
1.12k
    Name += DL.getPrivateGlobalPrefix();
143
101k
144
101k
  if (
MO.isGlobal()101k
) {
145
53.4k
    const GlobalValue *GV = MO.getGlobal();
146
53.4k
    AsmPrinter.getNameWithPrefix(Name, GV);
147
101k
  } else 
if (48.4k
MO.isSymbol()48.4k
) {
148
2.29k
    Mangler::getNameWithPrefix(Name, MO.getSymbolName(), DL);
149
48.4k
  } else 
if (46.1k
MO.isMBB()46.1k
) {
150
46.1k
    assert(Suffix.empty());
151
46.1k
    Sym = MO.getMBB()->getSymbol();
152
46.1k
  }
153
101k
154
101k
  Name += Suffix;
155
101k
  if (!Sym)
156
55.7k
    Sym = Ctx.getOrCreateSymbol(Name);
157
101k
158
101k
  // If the target flags on the operand changes the name of the symbol, do that
159
101k
  // before we return the symbol.
160
101k
  switch (MO.getTargetFlags()) {
161
100k
  default: break;
162
1.12k
  case X86II::MO_DARWIN_NONLAZY:
163
1.12k
  case X86II::MO_DARWIN_NONLAZY_PIC_BASE: {
164
1.12k
    MachineModuleInfoImpl::StubValueTy &StubSym =
165
1.12k
      getMachOMMI().getGVStubEntry(Sym);
166
1.12k
    if (
!StubSym.getPointer()1.12k
) {
167
388
      assert(MO.isGlobal() && "Extern symbol not handled yet");
168
388
      StubSym =
169
388
        MachineModuleInfoImpl::
170
388
        StubValueTy(AsmPrinter.getSymbol(MO.getGlobal()),
171
388
                    !MO.getGlobal()->hasInternalLinkage());
172
388
    }
173
1.12k
    break;
174
101k
  }
175
101k
  }
176
101k
177
101k
  return Sym;
178
101k
}
179
180
MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO,
181
116k
                                             MCSymbol *Sym) const {
182
116k
  // FIXME: We would like an efficient form for this, so we don't have to do a
183
116k
  // lot of extra uniquing.
184
116k
  const MCExpr *Expr = nullptr;
185
116k
  MCSymbolRefExpr::VariantKind RefKind = MCSymbolRefExpr::VK_None;
186
116k
187
116k
  switch (MO.getTargetFlags()) {
188
0
  
default: 0
llvm_unreachable0
("Unknown target flag on GV operand");
189
104k
  case X86II::MO_NO_FLAG:    // No flag.
190
104k
  // These affect the name of the symbol, not any suffix.
191
104k
  case X86II::MO_DARWIN_NONLAZY:
192
104k
  case X86II::MO_DLLIMPORT:
193
104k
    break;
194
104k
195
53
  case X86II::MO_TLVP:      RefKind = MCSymbolRefExpr::VK_TLVP; break;
196
0
  case X86II::MO_TLVP_PIC_BASE:
197
0
    Expr = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_TLVP, Ctx);
198
0
    // Subtract the pic base.
199
0
    Expr = MCBinaryExpr::createSub(Expr,
200
0
                                  MCSymbolRefExpr::create(MF.getPICBaseSymbol(),
201
0
                                                           Ctx),
202
0
                                   Ctx);
203
0
    break;
204
68
  case X86II::MO_SECREL:    RefKind = MCSymbolRefExpr::VK_SECREL; break;
205
0
  case X86II::MO_TLSGD:     RefKind = MCSymbolRefExpr::VK_TLSGD; break;
206
0
  case X86II::MO_TLSLD:     RefKind = MCSymbolRefExpr::VK_TLSLD; break;
207
0
  case X86II::MO_TLSLDM:    RefKind = MCSymbolRefExpr::VK_TLSLDM; break;
208
21
  case X86II::MO_GOTTPOFF:  RefKind = MCSymbolRefExpr::VK_GOTTPOFF; break;
209
8
  case X86II::MO_INDNTPOFF: RefKind = MCSymbolRefExpr::VK_INDNTPOFF; break;
210
26
  case X86II::MO_TPOFF:     RefKind = MCSymbolRefExpr::VK_TPOFF; break;
211
17
  case X86II::MO_DTPOFF:    RefKind = MCSymbolRefExpr::VK_DTPOFF; break;
212
24
  case X86II::MO_NTPOFF:    RefKind = MCSymbolRefExpr::VK_NTPOFF; break;
213
5
  case X86II::MO_GOTNTPOFF: RefKind = MCSymbolRefExpr::VK_GOTNTPOFF; break;
214
3.32k
  case X86II::MO_GOTPCREL:  RefKind = MCSymbolRefExpr::VK_GOTPCREL; break;
215
55
  case X86II::MO_GOT:       RefKind = MCSymbolRefExpr::VK_GOT; break;
216
76
  case X86II::MO_GOTOFF:    RefKind = MCSymbolRefExpr::VK_GOTOFF; break;
217
243
  case X86II::MO_PLT:       RefKind = MCSymbolRefExpr::VK_PLT; break;
218
4
  case X86II::MO_ABS8:      RefKind = MCSymbolRefExpr::VK_X86_ABS8; break;
219
8.09k
  case X86II::MO_PIC_BASE_OFFSET:
220
8.09k
  case X86II::MO_DARWIN_NONLAZY_PIC_BASE:
221
8.09k
    Expr = MCSymbolRefExpr::create(Sym, Ctx);
222
8.09k
    // Subtract the pic base.
223
8.09k
    Expr = MCBinaryExpr::createSub(Expr,
224
8.09k
                            MCSymbolRefExpr::create(MF.getPICBaseSymbol(), Ctx),
225
8.09k
                                   Ctx);
226
8.09k
    if (
MO.isJTI()8.09k
) {
227
160
      assert(MAI.doesSetDirectiveSuppressReloc());
228
160
      // If .set directive is supported, use it to reduce the number of
229
160
      // relocations the assembler will generate for differences between
230
160
      // local labels. This is only safe when the symbols are in the same
231
160
      // section so we are restricting it to jumptable references.
232
160
      MCSymbol *Label = Ctx.createTempSymbol();
233
160
      AsmPrinter.OutStreamer->EmitAssignment(Label, Expr);
234
160
      Expr = MCSymbolRefExpr::create(Label, Ctx);
235
160
    }
236
104k
    break;
237
116k
  }
238
116k
239
116k
  
if (116k
!Expr116k
)
240
108k
    Expr = MCSymbolRefExpr::create(Sym, RefKind, Ctx);
241
116k
242
116k
  if (
!MO.isJTI() && 116k
!MO.isMBB()116k
&&
MO.getOffset()70.0k
)
243
2.97k
    Expr = MCBinaryExpr::createAdd(Expr,
244
2.97k
                                   MCConstantExpr::create(MO.getOffset(), Ctx),
245
2.97k
                                   Ctx);
246
116k
  return MCOperand::createExpr(Expr);
247
116k
}
248
249
250
/// \brief Simplify FOO $imm, %{al,ax,eax,rax} to FOO $imm, for instruction with
251
/// a short fixed-register form.
252
12.6k
static void SimplifyShortImmForm(MCInst &Inst, unsigned Opcode) {
253
12.6k
  unsigned ImmOp = Inst.getNumOperands() - 1;
254
12.6k
  assert(Inst.getOperand(0).isReg() &&
255
12.6k
         (Inst.getOperand(ImmOp).isImm() || Inst.getOperand(ImmOp).isExpr()) &&
256
12.6k
         ((Inst.getNumOperands() == 3 && Inst.getOperand(1).isReg() &&
257
12.6k
           Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg()) ||
258
12.6k
          Inst.getNumOperands() == 2) && "Unexpected instruction!");
259
12.6k
260
12.6k
  // Check whether the destination register can be fixed.
261
12.6k
  unsigned Reg = Inst.getOperand(0).getReg();
262
12.6k
  if (
Reg != X86::AL && 12.6k
Reg != X86::AX9.96k
&&
Reg != X86::EAX9.96k
&&
Reg != X86::RAX8.81k
)
263
8.61k
    return;
264
3.99k
265
3.99k
  // If so, rewrite the instruction.
266
3.99k
  MCOperand Saved = Inst.getOperand(ImmOp);
267
3.99k
  Inst = MCInst();
268
3.99k
  Inst.setOpcode(Opcode);
269
3.99k
  Inst.addOperand(Saved);
270
3.99k
}
271
272
/// \brief If a movsx instruction has a shorter encoding for the used register
273
/// simplify the instruction to use it instead.
274
848
static void SimplifyMOVSX(MCInst &Inst) {
275
848
  unsigned NewOpcode = 0;
276
848
  unsigned Op0 = Inst.getOperand(0).getReg(), Op1 = Inst.getOperand(1).getReg();
277
848
  switch (Inst.getOpcode()) {
278
0
  default:
279
0
    llvm_unreachable("Unexpected instruction!");
280
2
  case X86::MOVSX16rr8:  // movsbw %al, %ax   --> cbtw
281
2
    if (
Op0 == X86::AX && 2
Op1 == X86::AL2
)
282
0
      NewOpcode = X86::CBW;
283
2
    break;
284
292
  case X86::MOVSX32rr16: // movswl %ax, %eax  --> cwtl
285
292
    if (
Op0 == X86::EAX && 292
Op1 == X86::AX123
)
286
69
      NewOpcode = X86::CWDE;
287
292
    break;
288
554
  case X86::MOVSX64rr32: // movslq %eax, %rax --> cltq
289
554
    if (
Op0 == X86::RAX && 554
Op1 == X86::EAX258
)
290
153
      NewOpcode = X86::CDQE;
291
554
    break;
292
848
  }
293
848
294
848
  
if (848
NewOpcode != 0848
) {
295
222
    Inst = MCInst();
296
222
    Inst.setOpcode(NewOpcode);
297
222
  }
298
848
}
299
300
/// \brief Simplify things like MOV32rm to MOV32o32a.
301
static void SimplifyShortMoveForm(X86AsmPrinter &Printer, MCInst &Inst,
302
61.7k
                                  unsigned Opcode) {
303
61.7k
  // Don't make these simplifications in 64-bit mode; other assemblers don't
304
61.7k
  // perform them because they make the code larger.
305
61.7k
  if (Printer.getSubtarget().is64Bit())
306
13.8k
    return;
307
47.8k
308
47.8k
  
bool IsStore = Inst.getOperand(0).isReg() && 47.8k
Inst.getOperand(1).isReg()47.8k
;
309
47.8k
  unsigned AddrBase = IsStore;
310
47.8k
  unsigned RegOp = IsStore ? 
030.4k
:
517.4k
;
311
47.8k
  unsigned AddrOp = AddrBase + 3;
312
47.8k
  assert(Inst.getNumOperands() == 6 && Inst.getOperand(RegOp).isReg() &&
313
47.8k
         Inst.getOperand(AddrBase + X86::AddrBaseReg).isReg() &&
314
47.8k
         Inst.getOperand(AddrBase + X86::AddrScaleAmt).isImm() &&
315
47.8k
         Inst.getOperand(AddrBase + X86::AddrIndexReg).isReg() &&
316
47.8k
         Inst.getOperand(AddrBase + X86::AddrSegmentReg).isReg() &&
317
47.8k
         (Inst.getOperand(AddrOp).isExpr() ||
318
47.8k
          Inst.getOperand(AddrOp).isImm()) &&
319
47.8k
         "Unexpected instruction!");
320
47.8k
321
47.8k
  // Check whether the destination register can be fixed.
322
47.8k
  unsigned Reg = Inst.getOperand(RegOp).getReg();
323
47.8k
  if (
Reg != X86::AL && 47.8k
Reg != X86::AX47.1k
&&
Reg != X86::EAX46.9k
&&
Reg != X86::RAX29.0k
)
324
29.0k
    return;
325
18.8k
326
18.8k
  // Check whether this is an absolute address.
327
18.8k
  // FIXME: We know TLVP symbol refs aren't, but there should be a better way
328
18.8k
  // to do this here.
329
18.8k
  bool Absolute = true;
330
18.8k
  if (
Inst.getOperand(AddrOp).isExpr()18.8k
) {
331
1.57k
    const MCExpr *MCE = Inst.getOperand(AddrOp).getExpr();
332
1.57k
    if (const MCSymbolRefExpr *SRE = dyn_cast<MCSymbolRefExpr>(MCE))
333
587
      
if (587
SRE->getKind() == MCSymbolRefExpr::VK_TLVP587
)
334
1
        Absolute = false;
335
1.57k
  }
336
18.8k
337
18.8k
  if (Absolute &&
338
18.8k
      (Inst.getOperand(AddrBase + X86::AddrBaseReg).getReg() != 0 ||
339
857
       Inst.getOperand(AddrBase + X86::AddrScaleAmt).getImm() != 1 ||
340
851
       Inst.getOperand(AddrBase + X86::AddrIndexReg).getReg() != 0))
341
18.0k
    return;
342
852
343
852
  // If so, rewrite the instruction.
344
852
  MCOperand Saved = Inst.getOperand(AddrOp);
345
852
  MCOperand Seg = Inst.getOperand(AddrBase + X86::AddrSegmentReg);
346
852
  Inst = MCInst();
347
852
  Inst.setOpcode(Opcode);
348
852
  Inst.addOperand(Saved);
349
852
  Inst.addOperand(Seg);
350
852
}
351
352
231
static unsigned getRetOpcode(const X86Subtarget &Subtarget) {
353
231
  return Subtarget.is64Bit() ? 
X86::RETQ151
:
X86::RETL80
;
354
231
}
355
356
Optional<MCOperand>
357
X86MCInstLower::LowerMachineOperand(const MachineInstr *MI,
358
3.23M
                                    const MachineOperand &MO) const {
359
3.23M
  switch (MO.getType()) {
360
0
  default:
361
0
    MI->print(errs());
362
0
    llvm_unreachable("unknown operand type");
363
2.52M
  case MachineOperand::MO_Register:
364
2.52M
    // Ignore all implicit register operands.
365
2.52M
    if (MO.isImplicit())
366
628k
      return None;
367
1.89M
    return MCOperand::createReg(MO.getReg());
368
570k
  case MachineOperand::MO_Immediate:
369
570k
    return MCOperand::createImm(MO.getImm());
370
101k
  case MachineOperand::MO_MachineBasicBlock:
371
101k
  case MachineOperand::MO_GlobalAddress:
372
101k
  case MachineOperand::MO_ExternalSymbol:
373
101k
    return LowerSymbolOperand(MO, GetSymbolFromOperand(MO));
374
67
  case MachineOperand::MO_MCSymbol:
375
67
    return LowerSymbolOperand(MO, MO.getMCSymbol());
376
573
  case MachineOperand::MO_JumpTableIndex:
377
573
    return LowerSymbolOperand(MO, AsmPrinter.GetJTISymbol(MO.getIndex()));
378
14.3k
  case MachineOperand::MO_ConstantPoolIndex:
379
14.3k
    return LowerSymbolOperand(MO, AsmPrinter.GetCPISymbol(MO.getIndex()));
380
21
  case MachineOperand::MO_BlockAddress:
381
21
    return LowerSymbolOperand(
382
21
        MO, AsmPrinter.GetBlockAddressSymbol(MO.getBlockAddress()));
383
29.3k
  case MachineOperand::MO_RegisterMask:
384
29.3k
    // Ignore call clobbers.
385
29.3k
    return None;
386
0
  }
387
0
}
388
389
841k
void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
390
841k
  OutMI.setOpcode(MI->getOpcode());
391
841k
392
841k
  for (const MachineOperand &MO : MI->operands())
393
3.23M
    
if (auto 3.23M
MaybeMCOp3.23M
= LowerMachineOperand(MI, MO))
394
2.57M
      OutMI.addOperand(MaybeMCOp.getValue());
395
841k
396
841k
  // Handle a few special cases to eliminate operand modifiers.
397
843k
ReSimplify:
398
843k
  switch (OutMI.getOpcode()) {
399
28.9k
  case X86::LEA64_32r:
400
28.9k
  case X86::LEA64r:
401
28.9k
  case X86::LEA16r:
402
28.9k
  case X86::LEA32r:
403
28.9k
    // LEA should have a segment register, but it must be empty.
404
28.9k
    assert(OutMI.getNumOperands() == 1+X86::AddrNumOperands &&
405
28.9k
           "Unexpected # of LEA operands");
406
28.9k
    assert(OutMI.getOperand(1+X86::AddrSegmentReg).getReg() == 0 &&
407
28.9k
           "LEA has segment specified!");
408
28.9k
    break;
409
28.9k
410
28.9k
  // Commute operands to get a smaller encoding by using VEX.R instead of VEX.B
411
28.9k
  // if one of the registers is extended, but other isn't.
412
1.48k
  case X86::VMOVZPQILo2PQIrr:
413
1.48k
  case X86::VMOVAPDrr:
414
1.48k
  case X86::VMOVAPDYrr:
415
1.48k
  case X86::VMOVAPSrr:
416
1.48k
  case X86::VMOVAPSYrr:
417
1.48k
  case X86::VMOVDQArr:
418
1.48k
  case X86::VMOVDQAYrr:
419
1.48k
  case X86::VMOVDQUrr:
420
1.48k
  case X86::VMOVDQUYrr:
421
1.48k
  case X86::VMOVUPDrr:
422
1.48k
  case X86::VMOVUPDYrr:
423
1.48k
  case X86::VMOVUPSrr:
424
1.48k
  case X86::VMOVUPSYrr: {
425
1.48k
    if (!X86II::isX86_64ExtendedReg(OutMI.getOperand(0).getReg()) &&
426
1.48k
        
X86II::isX86_64ExtendedReg(OutMI.getOperand(1).getReg())1.46k
) {
427
4
      unsigned NewOpc;
428
4
      switch (OutMI.getOpcode()) {
429
0
      
default: 0
llvm_unreachable0
("Invalid opcode");
430
0
      case X86::VMOVZPQILo2PQIrr: NewOpc = X86::VMOVPQI2QIrr;   break;
431
0
      case X86::VMOVAPDrr:        NewOpc = X86::VMOVAPDrr_REV;  break;
432
0
      case X86::VMOVAPDYrr:       NewOpc = X86::VMOVAPDYrr_REV; break;
433
2
      case X86::VMOVAPSrr:        NewOpc = X86::VMOVAPSrr_REV;  break;
434
0
      case X86::VMOVAPSYrr:       NewOpc = X86::VMOVAPSYrr_REV; break;
435
2
      case X86::VMOVDQArr:        NewOpc = X86::VMOVDQArr_REV;  break;
436
0
      case X86::VMOVDQAYrr:       NewOpc = X86::VMOVDQAYrr_REV; break;
437
0
      case X86::VMOVDQUrr:        NewOpc = X86::VMOVDQUrr_REV;  break;
438
0
      case X86::VMOVDQUYrr:       NewOpc = X86::VMOVDQUYrr_REV; break;
439
0
      case X86::VMOVUPDrr:        NewOpc = X86::VMOVUPDrr_REV;  break;
440
0
      case X86::VMOVUPDYrr:       NewOpc = X86::VMOVUPDYrr_REV; break;
441
0
      case X86::VMOVUPSrr:        NewOpc = X86::VMOVUPSrr_REV;  break;
442
0
      case X86::VMOVUPSYrr:       NewOpc = X86::VMOVUPSYrr_REV; break;
443
4
      }
444
4
      OutMI.setOpcode(NewOpc);
445
4
    }
446
1.48k
    break;
447
1.48k
  }
448
45
  case X86::VMOVSDrr:
449
45
  case X86::VMOVSSrr: {
450
45
    if (!X86II::isX86_64ExtendedReg(OutMI.getOperand(0).getReg()) &&
451
45
        
X86II::isX86_64ExtendedReg(OutMI.getOperand(2).getReg())45
) {
452
0
      unsigned NewOpc;
453
0
      switch (OutMI.getOpcode()) {
454
0
      
default: 0
llvm_unreachable0
("Invalid opcode");
455
0
      case X86::VMOVSDrr:   NewOpc = X86::VMOVSDrr_REV;   break;
456
0
      case X86::VMOVSSrr:   NewOpc = X86::VMOVSSrr_REV;   break;
457
0
      }
458
0
      OutMI.setOpcode(NewOpc);
459
0
    }
460
45
    break;
461
45
  }
462
45
463
45
  // TAILJMPr64, CALL64r, CALL64pcrel32 - These instructions have register
464
45
  // inputs modeled as normal uses instead of implicit uses.  As such, truncate
465
45
  // off all but the first operand (the callee).  FIXME: Change isel.
466
16.6k
  case X86::TAILJMPr64:
467
16.6k
  case X86::TAILJMPr64_REX:
468
16.6k
  case X86::CALL64r:
469
16.6k
  case X86::CALL64pcrel32: {
470
16.6k
    unsigned Opcode = OutMI.getOpcode();
471
16.6k
    MCOperand Saved = OutMI.getOperand(0);
472
16.6k
    OutMI = MCInst();
473
16.6k
    OutMI.setOpcode(Opcode);
474
16.6k
    OutMI.addOperand(Saved);
475
16.6k
    break;
476
16.6k
  }
477
16.6k
478
6
  case X86::EH_RETURN:
479
6
  case X86::EH_RETURN64: {
480
6
    OutMI = MCInst();
481
6
    OutMI.setOpcode(getRetOpcode(AsmPrinter.getSubtarget()));
482
6
    break;
483
6
  }
484
6
485
31
  case X86::CLEANUPRET: {
486
31
    // Replace CATCHRET with the appropriate RET.
487
31
    OutMI = MCInst();
488
31
    OutMI.setOpcode(getRetOpcode(AsmPrinter.getSubtarget()));
489
31
    break;
490
6
  }
491
6
492
61
  case X86::CATCHRET: {
493
61
    // Replace CATCHRET with the appropriate RET.
494
61
    const X86Subtarget &Subtarget = AsmPrinter.getSubtarget();
495
61
    unsigned ReturnReg = Subtarget.is64Bit() ? 
X86::RAX43
:
X86::EAX18
;
496
61
    OutMI = MCInst();
497
61
    OutMI.setOpcode(getRetOpcode(Subtarget));
498
61
    OutMI.addOperand(MCOperand::createReg(ReturnReg));
499
61
    break;
500
6
  }
501
6
502
6
  // TAILJMPd, TAILJMPd64, TailJMPd_cc - Lower to the correct jump instruction.
503
0
  { unsigned Opcode;
504
79
  case X86::TAILJMPr:   Opcode = X86::JMP32r; goto SetTailJmpOpcode;
505
1.62k
  case X86::TAILJMPd:
506
1.62k
  case X86::TAILJMPd64: Opcode = X86::JMP_1;  goto SetTailJmpOpcode;
507
26
  case X86::TAILJMPd_CC:
508
26
  case X86::TAILJMPd64_CC:
509
26
    Opcode = X86::GetCondBranchFromCond(
510
26
        static_cast<X86::CondCode>(MI->getOperand(1).getImm()));
511
26
    goto SetTailJmpOpcode;
512
26
513
1.72k
  SetTailJmpOpcode:
514
1.72k
    MCOperand Saved = OutMI.getOperand(0);
515
1.72k
    OutMI = MCInst();
516
1.72k
    OutMI.setOpcode(Opcode);
517
1.72k
    OutMI.addOperand(Saved);
518
1.72k
    break;
519
26
  }
520
26
521
2.68k
  case X86::DEC16r:
522
2.68k
  case X86::DEC32r:
523
2.68k
  case X86::INC16r:
524
2.68k
  case X86::INC32r:
525
2.68k
    // If we aren't in 64-bit mode we can use the 1-byte inc/dec instructions.
526
2.68k
    if (
!AsmPrinter.getSubtarget().is64Bit()2.68k
) {
527
1.91k
      unsigned Opcode;
528
1.91k
      switch (OutMI.getOpcode()) {
529
0
      
default: 0
llvm_unreachable0
("Invalid opcode");
530
0
      case X86::DEC16r: Opcode = X86::DEC16r_alt; break;
531
649
      case X86::DEC32r: Opcode = X86::DEC32r_alt; break;
532
0
      case X86::INC16r: Opcode = X86::INC16r_alt; break;
533
1.26k
      case X86::INC32r: Opcode = X86::INC32r_alt; break;
534
1.91k
      }
535
1.91k
      OutMI.setOpcode(Opcode);
536
1.91k
    }
537
2.68k
    break;
538
2.68k
539
2.68k
  // These are pseudo-ops for OR to help with the OR->ADD transformation.  We do
540
2.68k
  // this with an ugly goto in case the resultant OR uses EAX and needs the
541
2.68k
  // short form.
542
0
  case X86::ADD16rr_DB:   OutMI.setOpcode(X86::OR16rr); goto ReSimplify;
543
397
  case X86::ADD32rr_DB:   OutMI.setOpcode(X86::OR32rr); goto ReSimplify;
544
491
  case X86::ADD64rr_DB:   OutMI.setOpcode(X86::OR64rr); goto ReSimplify;
545
0
  case X86::ADD16ri_DB:   OutMI.setOpcode(X86::OR16ri); goto ReSimplify;
546
152
  case X86::ADD32ri_DB:   OutMI.setOpcode(X86::OR32ri); goto ReSimplify;
547
7
  case X86::ADD64ri32_DB: OutMI.setOpcode(X86::OR64ri32); goto ReSimplify;
548
0
  case X86::ADD16ri8_DB:  OutMI.setOpcode(X86::OR16ri8); goto ReSimplify;
549
28
  case X86::ADD32ri8_DB:  OutMI.setOpcode(X86::OR32ri8); goto ReSimplify;
550
3
  case X86::ADD64ri8_DB:  OutMI.setOpcode(X86::OR64ri8); goto ReSimplify;
551
2.68k
552
2.68k
  // Atomic load and store require a separate pseudo-inst because Acquire
553
2.68k
  // implies mayStore and Release implies mayLoad; fix these to regular MOV
554
2.68k
  // instructions here
555
12
  case X86::ACQUIRE_MOV8rm:    OutMI.setOpcode(X86::MOV8rm); goto ReSimplify;
556
53
  case X86::ACQUIRE_MOV16rm:   OutMI.setOpcode(X86::MOV16rm); goto ReSimplify;
557
89
  case X86::ACQUIRE_MOV32rm:   OutMI.setOpcode(X86::MOV32rm); goto ReSimplify;
558
23
  case X86::ACQUIRE_MOV64rm:   OutMI.setOpcode(X86::MOV64rm); goto ReSimplify;
559
12
  case X86::RELEASE_MOV8mr:    OutMI.setOpcode(X86::MOV8mr); goto ReSimplify;
560
53
  case X86::RELEASE_MOV16mr:   OutMI.setOpcode(X86::MOV16mr); goto ReSimplify;
561
44
  case X86::RELEASE_MOV32mr:   OutMI.setOpcode(X86::MOV32mr); goto ReSimplify;
562
24
  case X86::RELEASE_MOV64mr:   OutMI.setOpcode(X86::MOV64mr); goto ReSimplify;
563
15
  case X86::RELEASE_MOV8mi:    OutMI.setOpcode(X86::MOV8mi); goto ReSimplify;
564
3
  case X86::RELEASE_MOV16mi:   OutMI.setOpcode(X86::MOV16mi); goto ReSimplify;
565
3
  case X86::RELEASE_MOV32mi:   OutMI.setOpcode(X86::MOV32mi); goto ReSimplify;
566
2
  case X86::RELEASE_MOV64mi32: OutMI.setOpcode(X86::MOV64mi32); goto ReSimplify;
567
7
  case X86::RELEASE_ADD8mi:    OutMI.setOpcode(X86::ADD8mi); goto ReSimplify;
568
3
  case X86::RELEASE_ADD8mr:    OutMI.setOpcode(X86::ADD8mr); goto ReSimplify;
569
5
  case X86::RELEASE_ADD32mi:   OutMI.setOpcode(X86::ADD32mi); goto ReSimplify;
570
3
  case X86::RELEASE_ADD32mr:   OutMI.setOpcode(X86::ADD32mr); goto ReSimplify;
571
4
  case X86::RELEASE_ADD64mi32: OutMI.setOpcode(X86::ADD64mi32); goto ReSimplify;
572
2
  case X86::RELEASE_ADD64mr:   OutMI.setOpcode(X86::ADD64mr); goto ReSimplify;
573
3
  case X86::RELEASE_AND8mi:    OutMI.setOpcode(X86::AND8mi); goto ReSimplify;
574
3
  case X86::RELEASE_AND8mr:    OutMI.setOpcode(X86::AND8mr); goto ReSimplify;
575
3
  case X86::RELEASE_AND32mi:   OutMI.setOpcode(X86::AND32mi); goto ReSimplify;
576
3
  case X86::RELEASE_AND32mr:   OutMI.setOpcode(X86::AND32mr); goto ReSimplify;
577
2
  case X86::RELEASE_AND64mi32: OutMI.setOpcode(X86::AND64mi32); goto ReSimplify;
578
2
  case X86::RELEASE_AND64mr:   OutMI.setOpcode(X86::AND64mr); goto ReSimplify;
579
3
  case X86::RELEASE_OR8mi:     OutMI.setOpcode(X86::OR8mi); goto ReSimplify;
580
3
  case X86::RELEASE_OR8mr:     OutMI.setOpcode(X86::OR8mr); goto ReSimplify;
581
3
  case X86::RELEASE_OR32mi:    OutMI.setOpcode(X86::OR32mi); goto ReSimplify;
582
3
  case X86::RELEASE_OR32mr:    OutMI.setOpcode(X86::OR32mr); goto ReSimplify;
583
2
  case X86::RELEASE_OR64mi32:  OutMI.setOpcode(X86::OR64mi32); goto ReSimplify;
584
2
  case X86::RELEASE_OR64mr:    OutMI.setOpcode(X86::OR64mr); goto ReSimplify;
585
3
  case X86::RELEASE_XOR8mi:    OutMI.setOpcode(X86::XOR8mi); goto ReSimplify;
586
3
  case X86::RELEASE_XOR8mr:    OutMI.setOpcode(X86::XOR8mr); goto ReSimplify;
587
3
  case X86::RELEASE_XOR32mi:   OutMI.setOpcode(X86::XOR32mi); goto ReSimplify;
588
3
  case X86::RELEASE_XOR32mr:   OutMI.setOpcode(X86::XOR32mr); goto ReSimplify;
589
2
  case X86::RELEASE_XOR64mi32: OutMI.setOpcode(X86::XOR64mi32); goto ReSimplify;
590
2
  case X86::RELEASE_XOR64mr:   OutMI.setOpcode(X86::XOR64mr); goto ReSimplify;
591
4
  case X86::RELEASE_INC8m:     OutMI.setOpcode(X86::INC8m); goto ReSimplify;
592
0
  case X86::RELEASE_INC16m:    OutMI.setOpcode(X86::INC16m); goto ReSimplify;
593
2
  case X86::RELEASE_INC32m:    OutMI.setOpcode(X86::INC32m); goto ReSimplify;
594
1
  case X86::RELEASE_INC64m:    OutMI.setOpcode(X86::INC64m); goto ReSimplify;
595
2
  case X86::RELEASE_DEC8m:     OutMI.setOpcode(X86::DEC8m); goto ReSimplify;
596
0
  case X86::RELEASE_DEC16m:    OutMI.setOpcode(X86::DEC16m); goto ReSimplify;
597
2
  case X86::RELEASE_DEC32m:    OutMI.setOpcode(X86::DEC32m); goto ReSimplify;
598
1
  case X86::RELEASE_DEC64m:    OutMI.setOpcode(X86::DEC64m); goto ReSimplify;
599
2.68k
600
2.68k
  // We don't currently select the correct instruction form for instructions
601
2.68k
  // which have a short %eax, etc. form. Handle this by custom lowering, for
602
2.68k
  // now.
603
2.68k
  //
604
2.68k
  // Note, we are currently not handling the following instructions:
605
2.68k
  // MOV64ao8, MOV64o8a
606
2.68k
  // XCHG16ar, XCHG32ar, XCHG64ar
607
61.7k
  case X86::MOV8mr_NOREX:
608
61.7k
  case X86::MOV8mr:
609
61.7k
  case X86::MOV8rm_NOREX:
610
61.7k
  case X86::MOV8rm:
611
61.7k
  case X86::MOV16mr:
612
61.7k
  case X86::MOV16rm:
613
61.7k
  case X86::MOV32mr:
614
61.7k
  case X86::MOV32rm: {
615
61.7k
    unsigned NewOpc;
616
61.7k
    switch (OutMI.getOpcode()) {
617
0
    
default: 0
llvm_unreachable0
("Invalid opcode");
618
4.63k
    case X86::MOV8mr_NOREX:
619
4.63k
    case X86::MOV8mr:     NewOpc = X86::MOV8o32a; break;
620
2.65k
    case X86::MOV8rm_NOREX:
621
2.65k
    case X86::MOV8rm:     NewOpc = X86::MOV8ao32; break;
622
1.49k
    case X86::MOV16mr:    NewOpc = X86::MOV16o32a; break;
623
141
    case X86::MOV16rm:    NewOpc = X86::MOV16ao32; break;
624
19.2k
    case X86::MOV32mr:    NewOpc = X86::MOV32o32a; break;
625
33.5k
    case X86::MOV32rm:    NewOpc = X86::MOV32ao32; break;
626
61.7k
    }
627
61.7k
    SimplifyShortMoveForm(AsmPrinter, OutMI, NewOpc);
628
61.7k
    break;
629
61.7k
  }
630
61.7k
631
12.6k
  
case X86::ADC8ri: 12.6k
case X86::ADC16ri: 12.6k
case X86::ADC32ri: 12.6k
case X86::ADC64ri32:
632
12.6k
  
case X86::ADD8ri: 12.6k
case X86::ADD16ri: 12.6k
case X86::ADD32ri: 12.6k
case X86::ADD64ri32:
633
12.6k
  
case X86::AND8ri: 12.6k
case X86::AND16ri: 12.6k
case X86::AND32ri: 12.6k
case X86::AND64ri32:
634
12.6k
  
case X86::CMP8ri: 12.6k
case X86::CMP16ri: 12.6k
case X86::CMP32ri: 12.6k
case X86::CMP64ri32:
635
12.6k
  
case X86::OR8ri: 12.6k
case X86::OR16ri: 12.6k
case X86::OR32ri: 12.6k
case X86::OR64ri32:
636
12.6k
  
case X86::SBB8ri: 12.6k
case X86::SBB16ri: 12.6k
case X86::SBB32ri: 12.6k
case X86::SBB64ri32:
637
12.6k
  
case X86::SUB8ri: 12.6k
case X86::SUB16ri: 12.6k
case X86::SUB32ri: 12.6k
case X86::SUB64ri32:
638
12.6k
  
case X86::TEST8ri:12.6k
case X86::TEST16ri:12.6k
case X86::TEST32ri:12.6k
case X86::TEST64ri32:
639
12.6k
  
case X86::XOR8ri: 12.6k
case X86::XOR16ri: 12.6k
case X86::XOR32ri: 12.6k
case X86::XOR64ri32: {
640
12.6k
    unsigned NewOpc;
641
12.6k
    switch (OutMI.getOpcode()) {
642
0
    
default: 0
llvm_unreachable0
("Invalid opcode");
643
5
    case X86::ADC8ri:     NewOpc = X86::ADC8i8;    break;
644
0
    case X86::ADC16ri:    NewOpc = X86::ADC16i16;  break;
645
1
    case X86::ADC32ri:    NewOpc = X86::ADC32i32;  break;
646
0
    case X86::ADC64ri32:  NewOpc = X86::ADC64i32;  break;
647
174
    case X86::ADD8ri:     NewOpc = X86::ADD8i8;    break;
648
0
    case X86::ADD16ri:    NewOpc = X86::ADD16i16;  break;
649
511
    case X86::ADD32ri:    NewOpc = X86::ADD32i32;  break;
650
765
    case X86::ADD64ri32:  NewOpc = X86::ADD64i32;  break;
651
2.55k
    case X86::AND8ri:     NewOpc = X86::AND8i8;    break;
652
6
    case X86::AND16ri:    NewOpc = X86::AND16i16;  break;
653
2.16k
    case X86::AND32ri:    NewOpc = X86::AND32i32;  break;
654
248
    case X86::AND64ri32:  NewOpc = X86::AND64i32;  break;
655
777
    case X86::CMP8ri:     NewOpc = X86::CMP8i8;    break;
656
8
    case X86::CMP16ri:    NewOpc = X86::CMP16i16;  break;
657
1.77k
    case X86::CMP32ri:    NewOpc = X86::CMP32i32;  break;
658
411
    case X86::CMP64ri32:  NewOpc = X86::CMP64i32;  break;
659
42
    case X86::OR8ri:      NewOpc = X86::OR8i8;     break;
660
0
    case X86::OR16ri:     NewOpc = X86::OR16i16;   break;
661
591
    case X86::OR32ri:     NewOpc = X86::OR32i32;   break;
662
18
    case X86::OR64ri32:   NewOpc = X86::OR64i32;   break;
663
2
    case X86::SBB8ri:     NewOpc = X86::SBB8i8;    break;
664
0
    case X86::SBB16ri:    NewOpc = X86::SBB16i16;  break;
665
18
    case X86::SBB32ri:    NewOpc = X86::SBB32i32;  break;
666
0
    case X86::SBB64ri32:  NewOpc = X86::SBB64i32;  break;
667
3
    case X86::SUB8ri:     NewOpc = X86::SUB8i8;    break;
668
0
    case X86::SUB16ri:    NewOpc = X86::SUB16i16;  break;
669
215
    case X86::SUB32ri:    NewOpc = X86::SUB32i32;  break;
670
471
    case X86::SUB64ri32:  NewOpc = X86::SUB64i32;  break;
671
1.49k
    case X86::TEST8ri:    NewOpc = X86::TEST8i8;   break;
672
11
    case X86::TEST16ri:   NewOpc = X86::TEST16i16; break;
673
72
    case X86::TEST32ri:   NewOpc = X86::TEST32i32; break;
674
1
    case X86::TEST64ri32: NewOpc = X86::TEST64i32; break;
675
71
    case X86::XOR8ri:     NewOpc = X86::XOR8i8;    break;
676
0
    case X86::XOR16ri:    NewOpc = X86::XOR16i16;  break;
677
196
    case X86::XOR32ri:    NewOpc = X86::XOR32i32;  break;
678
2
    case X86::XOR64ri32:  NewOpc = X86::XOR64i32;  break;
679
12.6k
    }
680
12.6k
    SimplifyShortImmForm(OutMI, NewOpc);
681
12.6k
    break;
682
12.6k
  }
683
12.6k
684
12.6k
  // Try to shrink some forms of movsx.
685
848
  case X86::MOVSX16rr8:
686
848
  case X86::MOVSX32rr16:
687
848
  case X86::MOVSX64rr32:
688
848
    SimplifyMOVSX(OutMI);
689
848
    break;
690
841k
  }
691
841k
}
692
693
void X86AsmPrinter::LowerTlsAddr(X86MCInstLower &MCInstLowering,
694
35
                                 const MachineInstr &MI) {
695
35
696
35
  bool is64Bits = MI.getOpcode() == X86::TLS_addr64 ||
697
23
                  MI.getOpcode() == X86::TLS_base_addr64;
698
35
699
35
  bool needsPadding = MI.getOpcode() == X86::TLS_addr64;
700
35
701
35
  MCContext &context = OutStreamer->getContext();
702
35
703
35
  if (needsPadding)
704
12
    EmitAndCountInstruction(MCInstBuilder(X86::DATA16_PREFIX));
705
35
706
35
  MCSymbolRefExpr::VariantKind SRVK;
707
35
  switch (MI.getOpcode()) {
708
21
    case X86::TLS_addr32:
709
21
    case X86::TLS_addr64:
710
21
      SRVK = MCSymbolRefExpr::VK_TLSGD;
711
21
      break;
712
7
    case X86::TLS_base_addr32:
713
7
      SRVK = MCSymbolRefExpr::VK_TLSLDM;
714
7
      break;
715
7
    case X86::TLS_base_addr64:
716
7
      SRVK = MCSymbolRefExpr::VK_TLSLD;
717
7
      break;
718
0
    default:
719
0
      llvm_unreachable("unexpected opcode");
720
35
  }
721
35
722
35
  MCSymbol *sym = MCInstLowering.GetSymbolFromOperand(MI.getOperand(3));
723
35
  const MCSymbolRefExpr *symRef = MCSymbolRefExpr::create(sym, SRVK, context);
724
35
725
35
  MCInst LEA;
726
35
  if (
is64Bits35
) {
727
19
    LEA.setOpcode(X86::LEA64r);
728
19
    LEA.addOperand(MCOperand::createReg(X86::RDI)); // dest
729
19
    LEA.addOperand(MCOperand::createReg(X86::RIP)); // base
730
19
    LEA.addOperand(MCOperand::createImm(1));        // scale
731
19
    LEA.addOperand(MCOperand::createReg(0));        // index
732
19
    LEA.addOperand(MCOperand::createExpr(symRef));  // disp
733
19
    LEA.addOperand(MCOperand::createReg(0));        // seg
734
35
  } else 
if (16
SRVK == MCSymbolRefExpr::VK_TLSLDM16
) {
735
7
    LEA.setOpcode(X86::LEA32r);
736
7
    LEA.addOperand(MCOperand::createReg(X86::EAX)); // dest
737
7
    LEA.addOperand(MCOperand::createReg(X86::EBX)); // base
738
7
    LEA.addOperand(MCOperand::createImm(1));        // scale
739
7
    LEA.addOperand(MCOperand::createReg(0));        // index
740
7
    LEA.addOperand(MCOperand::createExpr(symRef));  // disp
741
7
    LEA.addOperand(MCOperand::createReg(0));        // seg
742
16
  } else {
743
9
    LEA.setOpcode(X86::LEA32r);
744
9
    LEA.addOperand(MCOperand::createReg(X86::EAX)); // dest
745
9
    LEA.addOperand(MCOperand::createReg(0));        // base
746
9
    LEA.addOperand(MCOperand::createImm(1));        // scale
747
9
    LEA.addOperand(MCOperand::createReg(X86::EBX)); // index
748
9
    LEA.addOperand(MCOperand::createExpr(symRef));  // disp
749
9
    LEA.addOperand(MCOperand::createReg(0));        // seg
750
9
  }
751
35
  EmitAndCountInstruction(LEA);
752
35
753
35
  if (
needsPadding35
) {
754
12
    EmitAndCountInstruction(MCInstBuilder(X86::DATA16_PREFIX));
755
12
    EmitAndCountInstruction(MCInstBuilder(X86::DATA16_PREFIX));
756
12
    EmitAndCountInstruction(MCInstBuilder(X86::REX64_PREFIX));
757
12
  }
758
35
759
35
  StringRef name = is64Bits ? 
"__tls_get_addr"19
:
"___tls_get_addr"16
;
760
35
  MCSymbol *tlsGetAddr = context.getOrCreateSymbol(name);
761
35
  const MCSymbolRefExpr *tlsRef =
762
35
    MCSymbolRefExpr::create(tlsGetAddr,
763
35
                            MCSymbolRefExpr::VK_PLT,
764
35
                            context);
765
35
766
19
  EmitAndCountInstruction(MCInstBuilder(is64Bits ? X86::CALL64pcrel32
767
16
                                                 : X86::CALLpcrel32)
768
35
                            .addExpr(tlsRef));
769
35
}
770
771
/// \brief Emit the largest nop instruction smaller than or equal to \p NumBytes
772
/// bytes.  Return the size of nop emitted.
773
static unsigned EmitNop(MCStreamer &OS, unsigned NumBytes, bool Is64Bit,
774
209
                        const MCSubtargetInfo &STI) {
775
209
  // This works only for 64bit. For 32bit we have to do additional checking if
776
209
  // the CPU supports multi-byte nops.
777
209
  assert(Is64Bit && "EmitNops only supports X86-64");
778
209
779
209
  unsigned NopSize;
780
209
  unsigned Opc, BaseReg, ScaleVal, IndexReg, Displacement, SegmentReg;
781
209
  Opc = IndexReg = Displacement = SegmentReg = 0;
782
209
  BaseReg = X86::RAX;
783
209
  ScaleVal = 1;
784
209
  switch (NumBytes) {
785
209
  
case 0: 0
llvm_unreachable0
("Zero nops?");
break0
;
786
4
  case  1: NopSize = 1; Opc = X86::NOOP; break;
787
48
  case  2: NopSize = 2; Opc = X86::XCHG16ar; break;
788
9
  case  3: NopSize = 3; Opc = X86::NOOPL; break;
789
4
  case  4: NopSize = 4; Opc = X86::NOOPL; Displacement = 8; break;
790
10
  case  5: NopSize = 5; Opc = X86::NOOPL; Displacement = 8;
791
10
           IndexReg = X86::RAX; break;
792
4
  case  6: NopSize = 6; Opc = X86::NOOPW; Displacement = 8;
793
4
           IndexReg = X86::RAX; break;
794
5
  case  7: NopSize = 7; Opc = X86::NOOPL; Displacement = 512; break;
795
4
  case  8: NopSize = 8; Opc = X86::NOOPL; Displacement = 512;
796
4
           IndexReg = X86::RAX; break;
797
33
  case  9: NopSize = 9; Opc = X86::NOOPW; Displacement = 512;
798
33
           IndexReg = X86::RAX; break;
799
88
  default: NopSize = 10; Opc = X86::NOOPW; Displacement = 512;
800
88
           IndexReg = X86::RAX; SegmentReg = X86::CS; break;
801
209
  }
802
209
803
209
  unsigned NumPrefixes = std::min(NumBytes - NopSize, 5U);
804
209
  NopSize += NumPrefixes;
805
455
  for (unsigned i = 0; 
i != NumPrefixes455
;
++i246
)
806
246
    OS.EmitBytes("\x66");
807
209
808
209
  switch (Opc) {
809
0
  default:
810
0
    llvm_unreachable("Unexpected opcode");
811
0
    break;
812
4
  case X86::NOOP:
813
4
    OS.EmitInstruction(MCInstBuilder(Opc), STI);
814
4
    break;
815
48
  case X86::XCHG16ar:
816
48
    OS.EmitInstruction(MCInstBuilder(Opc).addReg(X86::AX), STI);
817
48
    break;
818
157
  case X86::NOOPL:
819
157
  case X86::NOOPW:
820
157
    OS.EmitInstruction(MCInstBuilder(Opc)
821
157
                           .addReg(BaseReg)
822
157
                           .addImm(ScaleVal)
823
157
                           .addReg(IndexReg)
824
157
                           .addImm(Displacement)
825
157
                           .addReg(SegmentReg),
826
157
                       STI);
827
157
    break;
828
209
  }
829
209
  assert(NopSize <= NumBytes && "We overemitted?");
830
209
  return NopSize;
831
209
}
832
833
/// \brief Emit the optimal amount of multi-byte nops on X86.
834
static void EmitNops(MCStreamer &OS, unsigned NumBytes, bool Is64Bit,
835
192
                     const MCSubtargetInfo &STI) {
836
192
  unsigned NopsToEmit = NumBytes;
837
192
  (void)NopsToEmit;
838
397
  while (
NumBytes397
) {
839
205
    NumBytes -= EmitNop(OS, NumBytes, Is64Bit, STI);
840
205
    assert(NopsToEmit >= NumBytes && "Emitted more than I asked for!");
841
205
  }
842
192
}
843
844
void X86AsmPrinter::LowerSTATEPOINT(const MachineInstr &MI,
845
69
                                    X86MCInstLower &MCIL) {
846
69
  assert(Subtarget->is64Bit() && "Statepoint currently only supports X86-64");
847
69
848
69
  StatepointOpers SOpers(&MI);
849
69
  if (unsigned 
PatchBytes69
= SOpers.getNumPatchBytes()) {
850
1
    EmitNops(*OutStreamer, PatchBytes, Subtarget->is64Bit(),
851
1
             getSubtargetInfo());
852
69
  } else {
853
68
    // Lower call target and choose correct opcode
854
68
    const MachineOperand &CallTarget = SOpers.getCallTarget();
855
68
    MCOperand CallTargetMCOp;
856
68
    unsigned CallOpcode;
857
68
    switch (CallTarget.getType()) {
858
57
    case MachineOperand::MO_GlobalAddress:
859
57
    case MachineOperand::MO_ExternalSymbol:
860
57
      CallTargetMCOp = MCIL.LowerSymbolOperand(
861
57
          CallTarget, MCIL.GetSymbolFromOperand(CallTarget));
862
57
      CallOpcode = X86::CALL64pcrel32;
863
57
      // Currently, we only support relative addressing with statepoints.
864
57
      // Otherwise, we'll need a scratch register to hold the target
865
57
      // address.  You'll fail asserts during load & relocation if this
866
57
      // symbol is to far away. (TODO: support non-relative addressing)
867
57
      break;
868
0
    case MachineOperand::MO_Immediate:
869
0
      CallTargetMCOp = MCOperand::createImm(CallTarget.getImm());
870
0
      CallOpcode = X86::CALL64pcrel32;
871
0
      // Currently, we only support relative addressing with statepoints.
872
0
      // Otherwise, we'll need a scratch register to hold the target
873
0
      // immediate.  You'll fail asserts during load & relocation if this
874
0
      // address is to far away. (TODO: support non-relative addressing)
875
0
      break;
876
11
    case MachineOperand::MO_Register:
877
11
      CallTargetMCOp = MCOperand::createReg(CallTarget.getReg());
878
11
      CallOpcode = X86::CALL64r;
879
11
      break;
880
0
    default:
881
0
      llvm_unreachable("Unsupported operand type in statepoint call target");
882
0
      break;
883
68
    }
884
68
885
68
    // Emit call
886
68
    MCInst CallInst;
887
68
    CallInst.setOpcode(CallOpcode);
888
68
    CallInst.addOperand(CallTargetMCOp);
889
68
    OutStreamer->EmitInstruction(CallInst, getSubtargetInfo());
890
68
  }
891
69
892
69
  // Record our statepoint node in the same section used by STACKMAP
893
69
  // and PATCHPOINT
894
69
  SM.recordStatepoint(MI);
895
69
}
896
897
void X86AsmPrinter::LowerFAULTING_OP(const MachineInstr &FaultingMI,
898
23
                                     X86MCInstLower &MCIL) {
899
23
  // FAULTING_LOAD_OP <def>, <faltinf type>, <MBB handler>,
900
23
  //                  <opcode>, <operands>
901
23
902
23
  unsigned DefRegister = FaultingMI.getOperand(0).getReg();
903
23
  FaultMaps::FaultKind FK =
904
23
      static_cast<FaultMaps::FaultKind>(FaultingMI.getOperand(1).getImm());
905
23
  MCSymbol *HandlerLabel = FaultingMI.getOperand(2).getMBB()->getSymbol();
906
23
  unsigned Opcode = FaultingMI.getOperand(3).getImm();
907
23
  unsigned OperandsBeginIdx = 4;
908
23
909
23
  assert(FK < FaultMaps::FaultKindMax && "Invalid Faulting Kind!");
910
23
  FM.recordFaultingOp(FK, HandlerLabel);
911
23
912
23
  MCInst MI;
913
23
  MI.setOpcode(Opcode);
914
23
915
23
  if (DefRegister != X86::NoRegister)
916
16
    MI.addOperand(MCOperand::createReg(DefRegister));
917
23
918
23
  for (auto I = FaultingMI.operands_begin() + OperandsBeginIdx,
919
23
            E = FaultingMI.operands_end();
920
158
       
I != E158
;
++I135
)
921
135
    
if (auto 135
MaybeOperand135
= MCIL.LowerMachineOperand(&FaultingMI, *I))
922
125
      MI.addOperand(MaybeOperand.getValue());
923
23
924
23
  OutStreamer->EmitInstruction(MI, getSubtargetInfo());
925
23
}
926
927
void X86AsmPrinter::LowerFENTRY_CALL(const MachineInstr &MI,
928
2
                                     X86MCInstLower &MCIL) {
929
2
  bool Is64Bits = Subtarget->is64Bit();
930
2
  MCContext &Ctx = OutStreamer->getContext();
931
2
  MCSymbol *fentry = Ctx.getOrCreateSymbol("__fentry__");
932
2
  const MCSymbolRefExpr *Op =
933
2
      MCSymbolRefExpr::create(fentry, MCSymbolRefExpr::VK_None, Ctx);
934
2
935
2
  EmitAndCountInstruction(
936
2
      MCInstBuilder(Is64Bits ? 
X86::CALL64pcrel322
:
X86::CALLpcrel320
)
937
2
          .addExpr(Op));
938
2
}
939
940
void X86AsmPrinter::LowerPATCHABLE_OP(const MachineInstr &MI,
941
10
                                      X86MCInstLower &MCIL) {
942
10
  // PATCHABLE_OP minsize, opcode, operands
943
10
944
10
  unsigned MinSize = MI.getOperand(0).getImm();
945
10
  unsigned Opcode = MI.getOperand(1).getImm();
946
10
947
10
  MCInst MCI;
948
10
  MCI.setOpcode(Opcode);
949
10
  for (auto &MO : make_range(MI.operands_begin() + 2, MI.operands_end()))
950
28
    
if (auto 28
MaybeOperand28
= MCIL.LowerMachineOperand(&MI, MO))
951
20
      MCI.addOperand(MaybeOperand.getValue());
952
10
953
10
  SmallString<256> Code;
954
10
  SmallVector<MCFixup, 4> Fixups;
955
10
  raw_svector_ostream VecOS(Code);
956
10
  CodeEmitter->encodeInstruction(MCI, VecOS, Fixups, getSubtargetInfo());
957
10
958
10
  if (
Code.size() < MinSize10
) {
959
6
    if (
MinSize == 2 && 6
Opcode == X86::PUSH64r6
) {
960
2
      // This is an optimization that lets us get away without emitting a nop in
961
2
      // many cases.
962
2
      //
963
2
      // NB! In some cases the encoding for PUSH64r (e.g. PUSH64r %R9) takes two
964
2
      // bytes too, so the check on MinSize is important.
965
2
      MCI.setOpcode(X86::PUSH64rmr);
966
6
    } else {
967
4
      unsigned NopSize = EmitNop(*OutStreamer, MinSize, Subtarget->is64Bit(),
968
4
                                 getSubtargetInfo());
969
4
      assert(NopSize == MinSize && "Could not implement MinSize!");
970
4
      (void) NopSize;
971
4
    }
972
6
  }
973
10
974
10
  OutStreamer->EmitInstruction(MCI, getSubtargetInfo());
975
10
}
976
977
// Lower a stackmap of the form:
978
// <id>, <shadowBytes>, ...
979
100
void X86AsmPrinter::LowerSTACKMAP(const MachineInstr &MI) {
980
100
  SMShadowTracker.emitShadowPadding(*OutStreamer, getSubtargetInfo());
981
100
  SM.recordStackMap(MI);
982
100
  unsigned NumShadowBytes = MI.getOperand(1).getImm();
983
100
  SMShadowTracker.reset(NumShadowBytes);
984
100
}
985
986
// Lower a patchpoint of the form:
987
// [<def>], <id>, <numBytes>, <target>, <numArgs>, <cc>, ...
988
void X86AsmPrinter::LowerPATCHPOINT(const MachineInstr &MI,
989
67
                                    X86MCInstLower &MCIL) {
990
67
  assert(Subtarget->is64Bit() && "Patchpoint currently only supports X86-64");
991
67
992
67
  SMShadowTracker.emitShadowPadding(*OutStreamer, getSubtargetInfo());
993
67
994
67
  SM.recordPatchPoint(MI);
995
67
996
67
  PatchPointOpers opers(&MI);
997
67
  unsigned ScratchIdx = opers.getNextScratchIdx();
998
67
  unsigned EncodedBytes = 0;
999
67
  const MachineOperand &CalleeMO = opers.getCallTarget();
1000
67
1001
67
  // Check for null target. If target is non-null (i.e. is non-zero or is
1002
67
  // symbolic) then emit a call.
1003
67
  if (
!(CalleeMO.isImm() && 67
!CalleeMO.getImm()65
)) {
1004
38
    MCOperand CalleeMCOp;
1005
38
    switch (CalleeMO.getType()) {
1006
0
    default:
1007
0
      /// FIXME: Add a verifier check for bad callee types.
1008
0
      llvm_unreachable("Unrecognized callee operand type.");
1009
36
    case MachineOperand::MO_Immediate:
1010
36
      if (CalleeMO.getImm())
1011
36
        CalleeMCOp = MCOperand::createImm(CalleeMO.getImm());
1012
36
      break;
1013
2
    case MachineOperand::MO_ExternalSymbol:
1014
2
    case MachineOperand::MO_GlobalAddress:
1015
2
      CalleeMCOp =
1016
2
        MCIL.LowerSymbolOperand(CalleeMO,
1017
2
                                MCIL.GetSymbolFromOperand(CalleeMO));
1018
2
      break;
1019
38
    }
1020
38
1021
38
    // Emit MOV to materialize the target address and the CALL to target.
1022
38
    // This is encoded with 12-13 bytes, depending on which register is used.
1023
38
    unsigned ScratchReg = MI.getOperand(ScratchIdx).getReg();
1024
38
    if (X86II::isX86_64ExtendedReg(ScratchReg))
1025
38
      EncodedBytes = 13;
1026
38
    else
1027
0
      EncodedBytes = 12;
1028
38
1029
38
    EmitAndCountInstruction(
1030
38
        MCInstBuilder(X86::MOV64ri).addReg(ScratchReg).addOperand(CalleeMCOp));
1031
38
    EmitAndCountInstruction(MCInstBuilder(X86::CALL64r).addReg(ScratchReg));
1032
38
  }
1033
67
1034
67
  // Emit padding.
1035
67
  unsigned NumBytes = opers.getNumPatchBytes();
1036
67
  assert(NumBytes >= EncodedBytes &&
1037
67
         "Patchpoint can't request size less than the length of a call.");
1038
67
1039
67
  EmitNops(*OutStreamer, NumBytes - EncodedBytes, Subtarget->is64Bit(),
1040
67
           getSubtargetInfo());
1041
67
}
1042
1043
void X86AsmPrinter::LowerPATCHABLE_EVENT_CALL(const MachineInstr &MI,
1044
2
                                              X86MCInstLower &MCIL) {
1045
2
  assert(Subtarget->is64Bit() && "XRay custom events only supports X86-64");
1046
2
1047
2
  // We want to emit the following pattern, which follows the x86 calling
1048
2
  // convention to prepare for the trampoline call to be patched in.
1049
2
  //
1050
2
  //   .p2align 1, ...
1051
2
  // .Lxray_event_sled_N:
1052
2
  //   jmp +N                        // jump across the instrumentation sled
1053
2
  //   ...                           // set up arguments in register
1054
2
  //   callq __xray_CustomEvent@plt  // force dependency to symbol
1055
2
  //   ...
1056
2
  //   <jump here>
1057
2
  //
1058
2
  // After patching, it would look something like:
1059
2
  //
1060
2
  //   nopw (2-byte nop)
1061
2
  //   ...
1062
2
  //   callq __xrayCustomEvent  // already lowered
1063
2
  //   ...
1064
2
  //
1065
2
  // ---
1066
2
  // First we emit the label and the jump.
1067
2
  auto CurSled = OutContext.createTempSymbol("xray_event_sled_", true);
1068
2
  OutStreamer->AddComment("# XRay Custom Event Log");
1069
2
  OutStreamer->EmitCodeAlignment(2);
1070
2
  OutStreamer->EmitLabel(CurSled);
1071
2
1072
2
  // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as
1073
2
  // an operand (computed as an offset from the jmp instruction).
1074
2
  // FIXME: Find another less hacky way do force the relative jump.
1075
2
  OutStreamer->EmitBinaryData("\xeb\x0f");
1076
2
1077
2
  // The default C calling convention will place two arguments into %rcx and
1078
2
  // %rdx -- so we only work with those.
1079
2
  unsigned UsedRegs[] = {X86::RDI, X86::RSI};
1080
2
  bool UsedMask[] = {false, false};
1081
2
1082
2
  // Then we put the operands in the %rdi and %rsi registers. We spill the
1083
2
  // values in the register before we clobber them, and mark them as used in
1084
2
  // UsedMask. In case the arguments are already in the correct register, we use
1085
2
  // emit nops appropriately sized to keep the sled the same size in every
1086
2
  // situation.
1087
6
  for (unsigned I = 0; 
I < MI.getNumOperands()6
;
++I4
)
1088
4
    
if (auto 4
Op4
= MCIL.LowerMachineOperand(&MI, MI.getOperand(I))) {
1089
4
      assert(Op->isReg() && "Only support arguments in registers");
1090
4
      if (
Op->getReg() != UsedRegs[I]4
) {
1091
4
        UsedMask[I] = true;
1092
4
        EmitAndCountInstruction(
1093
4
            MCInstBuilder(X86::PUSH64r).addReg(UsedRegs[I]));
1094
4
        EmitAndCountInstruction(MCInstBuilder(X86::MOV64rr)
1095
4
                                    .addReg(UsedRegs[I])
1096
4
                                    .addReg(Op->getReg()));
1097
4
      } else {
1098
0
        EmitNops(*OutStreamer, 4, Subtarget->is64Bit(), getSubtargetInfo());
1099
0
      }
1100
4
    }
1101
2
1102
2
  // We emit a hard dependency on the __xray_CustomEvent symbol, which is the
1103
2
  // name of the trampoline to be implemented by the XRay runtime.
1104
2
  auto TSym = OutContext.getOrCreateSymbol("__xray_CustomEvent");
1105
2
  MachineOperand TOp = MachineOperand::CreateMCSymbol(TSym);
1106
2
  if (isPositionIndependent())
1107
1
    TOp.setTargetFlags(X86II::MO_PLT);
1108
2
1109
2
  // Emit the call instruction.
1110
2
  EmitAndCountInstruction(MCInstBuilder(X86::CALL64pcrel32)
1111
2
                              .addOperand(MCIL.LowerSymbolOperand(TOp, TSym)));
1112
2
1113
2
  // Restore caller-saved and used registers.
1114
6
  for (unsigned I = sizeof UsedMask; I-- > 0;)
1115
4
    
if (4
UsedMask[I]4
)
1116
4
      EmitAndCountInstruction(MCInstBuilder(X86::POP64r).addReg(UsedRegs[I]));
1117
4
    else
1118
0
      EmitNops(*OutStreamer, 1, Subtarget->is64Bit(), getSubtargetInfo());
1119
2
1120
2
  OutStreamer->AddComment("xray custom event end.");
1121
2
1122
2
  // Record the sled version. Older versions of this sled were spelled
1123
2
  // differently, so we let the runtime handle the different offsets we're
1124
2
  // using.
1125
2
  recordSled(CurSled, MI, SledKind::CUSTOM_EVENT, 1);
1126
2
}
1127
1128
void X86AsmPrinter::LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI,
1129
25
                                                  X86MCInstLower &MCIL) {
1130
25
  // We want to emit the following pattern:
1131
25
  //
1132
25
  //   .p2align 1, ...
1133
25
  // .Lxray_sled_N:
1134
25
  //   jmp .tmpN
1135
25
  //   # 9 bytes worth of noops
1136
25
  //
1137
25
  // We need the 9 bytes because at runtime, we'd be patching over the full 11
1138
25
  // bytes with the following pattern:
1139
25
  //
1140
25
  //   mov %r10, <function id, 32-bit>   // 6 bytes
1141
25
  //   call <relative offset, 32-bits>   // 5 bytes
1142
25
  //
1143
25
  auto CurSled = OutContext.createTempSymbol("xray_sled_", true);
1144
25
  OutStreamer->EmitCodeAlignment(2);
1145
25
  OutStreamer->EmitLabel(CurSled);
1146
25
1147
25
  // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as
1148
25
  // an operand (computed as an offset from the jmp instruction).
1149
25
  // FIXME: Find another less hacky way do force the relative jump.
1150
25
  OutStreamer->EmitBytes("\xeb\x09");
1151
25
  EmitNops(*OutStreamer, 9, Subtarget->is64Bit(), getSubtargetInfo());
1152
25
  recordSled(CurSled, MI, SledKind::FUNCTION_ENTER);
1153
25
}
1154
1155
void X86AsmPrinter::LowerPATCHABLE_RET(const MachineInstr &MI,
1156
22
                                       X86MCInstLower &MCIL) {
1157
22
  // Since PATCHABLE_RET takes the opcode of the return statement as an
1158
22
  // argument, we use that to emit the correct form of the RET that we want.
1159
22
  // i.e. when we see this:
1160
22
  //
1161
22
  //   PATCHABLE_RET X86::RET ...
1162
22
  //
1163
22
  // We should emit the RET followed by sleds.
1164
22
  //
1165
22
  //   .p2align 1, ...
1166
22
  // .Lxray_sled_N:
1167
22
  //   ret  # or equivalent instruction
1168
22
  //   # 10 bytes worth of noops
1169
22
  //
1170
22
  // This just makes sure that the alignment for the next instruction is 2.
1171
22
  auto CurSled = OutContext.createTempSymbol("xray_sled_", true);
1172
22
  OutStreamer->EmitCodeAlignment(2);
1173
22
  OutStreamer->EmitLabel(CurSled);
1174
22
  unsigned OpCode = MI.getOperand(0).getImm();
1175
22
  MCInst Ret;
1176
22
  Ret.setOpcode(OpCode);
1177
22
  for (auto &MO : make_range(MI.operands_begin() + 1, MI.operands_end()))
1178
22
    
if (auto 22
MaybeOperand22
= MCIL.LowerMachineOperand(&MI, MO))
1179
22
      Ret.addOperand(MaybeOperand.getValue());
1180
22
  OutStreamer->EmitInstruction(Ret, getSubtargetInfo());
1181
22
  EmitNops(*OutStreamer, 10, Subtarget->is64Bit(), getSubtargetInfo());
1182
22
  recordSled(CurSled, MI, SledKind::FUNCTION_EXIT);
1183
22
}
1184
1185
4
void X86AsmPrinter::LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI, X86MCInstLower &MCIL) {
1186
4
  // Like PATCHABLE_RET, we have the actual instruction in the operands to this
1187
4
  // instruction so we lower that particular instruction and its operands.
1188
4
  // Unlike PATCHABLE_RET though, we put the sled before the JMP, much like how
1189
4
  // we do it for PATCHABLE_FUNCTION_ENTER. The sled should be very similar to
1190
4
  // the PATCHABLE_FUNCTION_ENTER case, followed by the lowering of the actual
1191
4
  // tail call much like how we have it in PATCHABLE_RET.
1192
4
  auto CurSled = OutContext.createTempSymbol("xray_sled_", true);
1193
4
  OutStreamer->EmitCodeAlignment(2);
1194
4
  OutStreamer->EmitLabel(CurSled);
1195
4
  auto Target = OutContext.createTempSymbol();
1196
4
1197
4
  // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as
1198
4
  // an operand (computed as an offset from the jmp instruction).
1199
4
  // FIXME: Find another less hacky way do force the relative jump.
1200
4
  OutStreamer->EmitBytes("\xeb\x09");
1201
4
  EmitNops(*OutStreamer, 9, Subtarget->is64Bit(), getSubtargetInfo());
1202
4
  OutStreamer->EmitLabel(Target);
1203
4
  recordSled(CurSled, MI, SledKind::TAIL_CALL);
1204
4
1205
4
  unsigned OpCode = MI.getOperand(0).getImm();
1206
4
  MCInst TC;
1207
4
  TC.setOpcode(OpCode);
1208
4
1209
4
  // Before emitting the instruction, add a comment to indicate that this is
1210
4
  // indeed a tail call.
1211
4
  OutStreamer->AddComment("TAILCALL");
1212
4
  for (auto &MO : make_range(MI.operands_begin() + 1, MI.operands_end()))
1213
18
    
if (auto 18
MaybeOperand18
= MCIL.LowerMachineOperand(&MI, MO))
1214
4
      TC.addOperand(MaybeOperand.getValue());
1215
4
  OutStreamer->EmitInstruction(TC, getSubtargetInfo());
1216
4
}
1217
1218
// Returns instruction preceding MBBI in MachineFunction.
1219
// If MBBI is the first instruction of the first basic block, returns null.
1220
static MachineBasicBlock::const_iterator
1221
413
PrevCrossBBInst(MachineBasicBlock::const_iterator MBBI) {
1222
413
  const MachineBasicBlock *MBB = MBBI->getParent();
1223
448
  while (
MBBI == MBB->begin()448
) {
1224
35
    if (MBB == &MBB->getParent()->front())
1225
0
      return MachineBasicBlock::const_iterator();
1226
35
    MBB = MBB->getPrevNode();
1227
35
    MBBI = MBB->end();
1228
35
  }
1229
413
  return --MBBI;
1230
413
}
1231
1232
static const Constant *getConstantFromPool(const MachineInstr &MI,
1233
17.4k
                                           const MachineOperand &Op) {
1234
17.4k
  if (!Op.isCPI())
1235
8.71k
    return nullptr;
1236
8.77k
1237
8.77k
  ArrayRef<MachineConstantPoolEntry> Constants =
1238
8.77k
      MI.getParent()->getParent()->getConstantPool()->getConstants();
1239
8.77k
  const MachineConstantPoolEntry &ConstantEntry =
1240
8.77k
      Constants[Op.getIndex()];
1241
8.77k
1242
8.77k
  // Bail if this is a machine constant pool entry, we won't be able to dig out
1243
8.77k
  // anything useful.
1244
8.77k
  if (ConstantEntry.isMachineConstantPoolEntry())
1245
0
    return nullptr;
1246
8.77k
1247
8.77k
  auto *C = dyn_cast<Constant>(ConstantEntry.Val.ConstVal);
1248
8.77k
  assert((!C || ConstantEntry.getType() == C->getType()) &&
1249
8.77k
         "Expected a constant of the same type!");
1250
8.77k
  return C;
1251
8.77k
}
1252
1253
static std::string getShuffleComment(const MachineInstr *MI,
1254
                                     unsigned SrcOp1Idx,
1255
                                     unsigned SrcOp2Idx,
1256
2.01k
                                     ArrayRef<int> Mask) {
1257
2.01k
  std::string Comment;
1258
2.01k
1259
2.01k
  // Compute the name for a register. This is really goofy because we have
1260
2.01k
  // multiple instruction printers that could (in theory) use different
1261
2.01k
  // names. Fortunately most people use the ATT style (outside of Windows)
1262
2.01k
  // and they actually agree on register naming here. Ultimately, this is
1263
2.01k
  // a comment, and so its OK if it isn't perfect.
1264
6.13k
  auto GetRegisterName = [](unsigned RegNum) -> StringRef {
1265
6.13k
    return X86ATTInstPrinter::getRegisterName(RegNum);
1266
6.13k
  };
1267
2.01k
1268
2.01k
  const MachineOperand &DstOp = MI->getOperand(0);
1269
2.01k
  const MachineOperand &SrcOp1 = MI->getOperand(SrcOp1Idx);
1270
2.01k
  const MachineOperand &SrcOp2 = MI->getOperand(SrcOp2Idx);
1271
2.01k
1272
2.01k
  StringRef DstName = DstOp.isReg() ? 
GetRegisterName(DstOp.getReg())2.01k
:
"mem"0
;
1273
2.01k
  StringRef Src1Name =
1274
2.01k
      SrcOp1.isReg() ? 
GetRegisterName(SrcOp1.getReg())2.01k
:
"mem"0
;
1275
2.01k
  StringRef Src2Name =
1276
2.01k
      SrcOp2.isReg() ? 
GetRegisterName(SrcOp2.getReg())2.01k
:
"mem"0
;
1277
2.01k
1278
2.01k
  // One source operand, fix the mask to print all elements in one span.
1279
2.01k
  SmallVector<int, 8> ShuffleMask(Mask.begin(), Mask.end());
1280
2.01k
  if (Src1Name == Src2Name)
1281
43.4k
    
for (int i = 0, e = ShuffleMask.size(); 1.98k
i != e43.4k
;
++i41.4k
)
1282
41.4k
      
if (41.4k
ShuffleMask[i] >= e41.4k
)
1283
48
        ShuffleMask[i] -= e;
1284
2.01k
1285
2.01k
  raw_string_ostream CS(Comment);
1286
2.01k
  CS << DstName;
1287
2.01k
1288
2.01k
  // Handle AVX512 MASK/MASXZ write mask comments.
1289
2.01k
  // MASK: zmmX {%kY}
1290
2.01k
  // MASKZ: zmmX {%kY} {z}
1291
2.01k
  if (
SrcOp1Idx > 12.01k
) {
1292
91
    assert((SrcOp1Idx == 2 || SrcOp1Idx == 3) && "Unexpected writemask");
1293
91
1294
91
    const MachineOperand &WriteMaskOp = MI->getOperand(SrcOp1Idx - 1);
1295
91
    if (
WriteMaskOp.isReg()91
) {
1296
91
      CS << " {%" << GetRegisterName(WriteMaskOp.getReg()) << "}";
1297
91
1298
91
      if (
SrcOp1Idx == 291
) {
1299
46
        CS << " {z}";
1300
46
      }
1301
91
    }
1302
91
  }
1303
2.01k
1304
2.01k
  CS << " = ";
1305
2.01k
1306
13.4k
  for (int i = 0, e = ShuffleMask.size(); 
i != e13.4k
;
++i11.4k
) {
1307
11.4k
    if (i != 0)
1308
9.42k
      CS << ",";
1309
11.4k
    if (
ShuffleMask[i] == SM_SentinelZero11.4k
) {
1310
8.40k
      CS << "zero";
1311
8.40k
      continue;
1312
8.40k
    }
1313
3.04k
1314
3.04k
    // Otherwise, it must come from src1 or src2.  Print the span of elements
1315
3.04k
    // that comes from this src.
1316
3.04k
    bool isSrc1 = ShuffleMask[i] < (int)e;
1317
3.04k
    CS << (isSrc1 ? 
Src1Name2.97k
:
Src2Name66
) << '[';
1318
3.04k
1319
3.04k
    bool IsFirst = true;
1320
36.3k
    while (
i != e && 36.3k
ShuffleMask[i] != SM_SentinelZero34.7k
&&
1321
33.4k
           
(ShuffleMask[i] < (int)e) == isSrc133.4k
) {
1322
33.3k
      if (!IsFirst)
1323
30.2k
        CS << ',';
1324
33.3k
      else
1325
3.04k
        IsFirst = false;
1326
33.3k
      if (ShuffleMask[i] == SM_SentinelUndef)
1327
4.84k
        CS << "u";
1328
33.3k
      else
1329
28.4k
        CS << ShuffleMask[i] % (int)e;
1330
33.3k
      ++i;
1331
33.3k
    }
1332
11.4k
    CS << ']';
1333
11.4k
    --i; // For loop increments element #.
1334
11.4k
  }
1335
2.01k
  CS.flush();
1336
2.01k
1337
2.01k
  return Comment;
1338
2.01k
}
1339
1340
11.1k
static void printConstant(const Constant *COp, raw_ostream &CS) {
1341
11.1k
  if (
isa<UndefValue>(COp)11.1k
) {
1342
3.95k
    CS << "u";
1343
11.1k
  } else 
if (auto *7.14k
CI7.14k
= dyn_cast<ConstantInt>(COp)) {
1344
6.20k
    if (
CI->getBitWidth() <= 646.20k
) {
1345
6.20k
      CS << CI->getZExtValue();
1346
6.20k
    } else {
1347
0
      // print multi-word constant as (w0,w1)
1348
0
      const auto &Val = CI->getValue();
1349
0
      CS << "(";
1350
0
      for (int i = 0, N = Val.getNumWords(); 
i < N0
;
++i0
) {
1351
0
        if (i > 0)
1352
0
          CS << ",";
1353
0
        CS << Val.getRawData()[i];
1354
0
      }
1355
0
      CS << ")";
1356
0
    }
1357
7.14k
  } else 
if (auto *941
CF941
= dyn_cast<ConstantFP>(COp)) {
1358
941
    SmallString<32> Str;
1359
941
    CF->getValueAPF().toString(Str);
1360
941
    CS << Str;
1361
941
  } else {
1362
0
    CS << "?";
1363
0
  }
1364
11.1k
}
1365
1366
845k
void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
1367
845k
  X86MCInstLower MCInstLowering(*MF, *this);
1368
845k
  const X86RegisterInfo *RI = MF->getSubtarget<X86Subtarget>().getRegisterInfo();
1369
845k
1370
845k
  // Add a comment about EVEX-2-VEX compression for AVX-512 instrs that
1371
845k
  // are compressed from EVEX encoding to VEX encoding.
1372
845k
  if (
TM.Options.MCOptions.ShowMCEncoding845k
) {
1373
15.2k
    if (MI->getAsmPrinterFlags() & AC_EVEX_2_VEX)
1374
2.26k
      OutStreamer->AddComment("EVEX TO VEX Compression ", false);
1375
15.2k
  }
1376
845k
1377
845k
  switch (MI->getOpcode()) {
1378
0
  case TargetOpcode::DBG_VALUE:
1379
0
    llvm_unreachable("Should be handled target independently");
1380
845k
1381
845k
  // Emit nothing here but a comment if we can.
1382
29
  case X86::Int_MemBarrier:
1383
29
    OutStreamer->emitRawComment("MEMBARRIER");
1384
29
    return;
1385
845k
1386
845k
1387
6
  case X86::EH_RETURN:
1388
6
  case X86::EH_RETURN64: {
1389
6
    // Lower these as normal, but add some comments.
1390
6
    unsigned Reg = MI->getOperand(0).getReg();
1391
6
    OutStreamer->AddComment(StringRef("eh_return, addr: %") +
1392
6
                            X86ATTInstPrinter::getRegisterName(Reg));
1393
6
    break;
1394
6
  }
1395
31
  case X86::CLEANUPRET: {
1396
31
    // Lower these as normal, but add some comments.
1397
31
    OutStreamer->AddComment("CLEANUPRET");
1398
31
    break;
1399
6
  }
1400
6
1401
61
  case X86::CATCHRET: {
1402
61
    // Lower these as normal, but add some comments.
1403
61
    OutStreamer->AddComment("CATCHRET");
1404
61
    break;
1405
6
  }
1406
6
1407
1.88k
  case X86::TAILJMPr:
1408
1.88k
  case X86::TAILJMPm:
1409
1.88k
  case X86::TAILJMPd:
1410
1.88k
  case X86::TAILJMPd_CC:
1411
1.88k
  case X86::TAILJMPr64:
1412
1.88k
  case X86::TAILJMPm64:
1413
1.88k
  case X86::TAILJMPd64:
1414
1.88k
  case X86::TAILJMPd64_CC:
1415
1.88k
  case X86::TAILJMPr64_REX:
1416
1.88k
  case X86::TAILJMPm64_REX:
1417
1.88k
    // Lower these as normal, but add some comments.
1418
1.88k
    OutStreamer->AddComment("TAILCALL");
1419
1.88k
    break;
1420
1.88k
1421
35
  case X86::TLS_addr32:
1422
35
  case X86::TLS_addr64:
1423
35
  case X86::TLS_base_addr32:
1424
35
  case X86::TLS_base_addr64:
1425
35
    return LowerTlsAddr(MCInstLowering, *MI);
1426
35
1427
1.63k
  case X86::MOVPC32r: {
1428
1.63k
    // This is a pseudo op for a two instruction sequence with a label, which
1429
1.63k
    // looks like:
1430
1.63k
    //     call "L1$pb"
1431
1.63k
    // "L1$pb":
1432
1.63k
    //     popl %esi
1433
1.63k
1434
1.63k
    // Emit the call.
1435
1.63k
    MCSymbol *PICBase = MF->getPICBaseSymbol();
1436
1.63k
    // FIXME: We would like an efficient form for this, so we don't have to do a
1437
1.63k
    // lot of extra uniquing.
1438
1.63k
    EmitAndCountInstruction(MCInstBuilder(X86::CALLpcrel32)
1439
1.63k
      .addExpr(MCSymbolRefExpr::create(PICBase, OutContext)));
1440
1.63k
1441
1.63k
    const X86FrameLowering* FrameLowering =
1442
1.63k
        MF->getSubtarget<X86Subtarget>().getFrameLowering();
1443
1.63k
    bool hasFP = FrameLowering->hasFP(*MF);
1444
1.63k
    
1445
1.63k
    // TODO: This is needed only if we require precise CFA.
1446
1.63k
    bool HasActiveDwarfFrame = OutStreamer->getNumFrameInfos() &&
1447
1.40k
                               !OutStreamer->getDwarfFrameInfos().back().End;
1448
1.63k
1449
1.63k
    int stackGrowth = -RI->getSlotSize();
1450
1.63k
1451
1.63k
    if (
HasActiveDwarfFrame && 1.63k
!hasFP1.40k
) {
1452
286
      OutStreamer->EmitCFIAdjustCfaOffset(-stackGrowth);
1453
286
    }
1454
1.63k
1455
1.63k
    // Emit the label.
1456
1.63k
    OutStreamer->EmitLabel(PICBase);
1457
1.63k
1458
1.63k
    // popl $reg
1459
1.63k
    EmitAndCountInstruction(MCInstBuilder(X86::POP32r)
1460
1.63k
                            .addReg(MI->getOperand(0).getReg()));
1461
1.63k
1462
1.63k
    if (
HasActiveDwarfFrame && 1.63k
!hasFP1.40k
) {
1463
286
      OutStreamer->EmitCFIAdjustCfaOffset(stackGrowth);
1464
286
    }
1465
1.63k
    return;
1466
35
  }
1467
35
1468
637
  case X86::ADD32ri: {
1469
637
    // Lower the MO_GOT_ABSOLUTE_ADDRESS form of ADD32ri.
1470
637
    if (MI->getOperand(2).getTargetFlags() != X86II::MO_GOT_ABSOLUTE_ADDRESS)
1471
511
      break;
1472
126
1473
126
    // Okay, we have something like:
1474
126
    //  EAX = ADD32ri EAX, MO_GOT_ABSOLUTE_ADDRESS(@MYGLOBAL)
1475
126
1476
126
    // For this, we want to print something like:
1477
126
    //   MYGLOBAL + (. - PICBASE)
1478
126
    // However, we can't generate a ".", so just emit a new label here and refer
1479
126
    // to it.
1480
126
    MCSymbol *DotSym = OutContext.createTempSymbol();
1481
126
    OutStreamer->EmitLabel(DotSym);
1482
126
1483
126
    // Now that we have emitted the label, lower the complex operand expression.
1484
126
    MCSymbol *OpSym = MCInstLowering.GetSymbolFromOperand(MI->getOperand(2));
1485
126
1486
126
    const MCExpr *DotExpr = MCSymbolRefExpr::create(DotSym, OutContext);
1487
126
    const MCExpr *PICBase =
1488
126
      MCSymbolRefExpr::create(MF->getPICBaseSymbol(), OutContext);
1489
126
    DotExpr = MCBinaryExpr::createSub(DotExpr, PICBase, OutContext);
1490
126
1491
126
    DotExpr = MCBinaryExpr::createAdd(MCSymbolRefExpr::create(OpSym,OutContext),
1492
126
                                      DotExpr, OutContext);
1493
126
1494
126
    EmitAndCountInstruction(MCInstBuilder(X86::ADD32ri)
1495
126
      .addReg(MI->getOperand(0).getReg())
1496
126
      .addReg(MI->getOperand(1).getReg())
1497
126
      .addExpr(DotExpr));
1498
126
    return;
1499
126
  }
1500
69
  case TargetOpcode::STATEPOINT:
1501
69
    return LowerSTATEPOINT(*MI, MCInstLowering);
1502
126
1503
23
  case TargetOpcode::FAULTING_OP:
1504
23
    return LowerFAULTING_OP(*MI, MCInstLowering);
1505
126
1506
2
  case TargetOpcode::FENTRY_CALL:
1507
2
    return LowerFENTRY_CALL(*MI, MCInstLowering);
1508
126
1509
10
  case TargetOpcode::PATCHABLE_OP:
1510
10
    return LowerPATCHABLE_OP(*MI, MCInstLowering);
1511
126
1512
100
  case TargetOpcode::STACKMAP:
1513
100
    return LowerSTACKMAP(*MI);
1514
126
1515
67
  case TargetOpcode::PATCHPOINT:
1516
67
    return LowerPATCHPOINT(*MI, MCInstLowering);
1517
126
1518
25
  case TargetOpcode::PATCHABLE_FUNCTION_ENTER:
1519
25
    return LowerPATCHABLE_FUNCTION_ENTER(*MI, MCInstLowering);
1520
126
1521
22
  case TargetOpcode::PATCHABLE_RET:
1522
22
    return LowerPATCHABLE_RET(*MI, MCInstLowering);
1523
126
1524
4
  case TargetOpcode::PATCHABLE_TAIL_CALL:
1525
4
    return LowerPATCHABLE_TAIL_CALL(*MI, MCInstLowering);
1526
126
    
1527
2
  case TargetOpcode::PATCHABLE_EVENT_CALL:
1528
2
    return LowerPATCHABLE_EVENT_CALL(*MI, MCInstLowering);
1529
126
1530
120
  case X86::MORESTACK_RET:
1531
120
    EmitAndCountInstruction(MCInstBuilder(getRetOpcode(*Subtarget)));
1532
120
    return;
1533
126
1534
13
  case X86::MORESTACK_RET_RESTORE_R10:
1535
13
    // Return, then restore R10.
1536
13
    EmitAndCountInstruction(MCInstBuilder(getRetOpcode(*Subtarget)));
1537
13
    EmitAndCountInstruction(MCInstBuilder(X86::MOV64rr)
1538
13
                            .addReg(X86::R10)
1539
13
                            .addReg(X86::RAX));
1540
13
    return;
1541
126
1542
379
  case X86::SEH_PushReg:
1543
379
    assert(MF->hasWinCFI() && "SEH_ instruction in function without WinCFI?");
1544
379
    OutStreamer->EmitWinCFIPushReg(RI->getSEHRegNum(MI->getOperand(0).getImm()));
1545
379
    return;
1546
126
1547
0
  case X86::SEH_SaveReg:
1548
0
    assert(MF->hasWinCFI() && "SEH_ instruction in function without WinCFI?");
1549
0
    OutStreamer->EmitWinCFISaveReg(RI->getSEHRegNum(MI->getOperand(0).getImm()),
1550
0
                                   MI->getOperand(1).getImm());
1551
0
    return;
1552
126
1553
141
  case X86::SEH_SaveXMM:
1554
141
    assert(MF->hasWinCFI() && "SEH_ instruction in function without WinCFI?");
1555
141
    OutStreamer->EmitWinCFISaveXMM(RI->getSEHRegNum(MI->getOperand(0).getImm()),
1556
141
                                   MI->getOperand(1).getImm());
1557
141
    return;
1558
126
1559
355
  case X86::SEH_StackAlloc:
1560
355
    assert(MF->hasWinCFI() && "SEH_ instruction in function without WinCFI?");
1561
355
    OutStreamer->EmitWinCFIAllocStack(MI->getOperand(0).getImm());
1562
355
    return;
1563
126
1564
87
  case X86::SEH_SetFrame:
1565
87
    assert(MF->hasWinCFI() && "SEH_ instruction in function without WinCFI?");
1566
87
    OutStreamer->EmitWinCFISetFrame(RI->getSEHRegNum(MI->getOperand(0).getImm()),
1567
87
                                    MI->getOperand(1).getImm());
1568
87
    return;
1569
126
1570
0
  case X86::SEH_PushFrame:
1571
0
    assert(MF->hasWinCFI() && "SEH_ instruction in function without WinCFI?");
1572
0
    OutStreamer->EmitWinCFIPushFrame(MI->getOperand(0).getImm());
1573
0
    return;
1574
126
1575
382
  case X86::SEH_EndPrologue:
1576
382
    assert(MF->hasWinCFI() && "SEH_ instruction in function without WinCFI?");
1577
382
    OutStreamer->EmitWinCFIEndProlog();
1578
382
    return;
1579
126
1580
356
  case X86::SEH_Epilogue: {
1581
356
    assert(MF->hasWinCFI() && "SEH_ instruction in function without WinCFI?");
1582
356
    MachineBasicBlock::const_iterator MBBI(MI);
1583
356
    // Check if preceded by a call and emit nop if so.
1584
356
    for (MBBI = PrevCrossBBInst(MBBI);
1585
413
         MBBI != MachineBasicBlock::const_iterator();
1586
356
         
MBBI = PrevCrossBBInst(MBBI)57
) {
1587
413
      // Conservatively assume that pseudo instructions don't emit code and keep
1588
413
      // looking for a call. We may emit an unnecessary nop in some cases.
1589
413
      if (
!MBBI->isPseudo()413
) {
1590
356
        if (MBBI->isCall())
1591
109
          EmitAndCountInstruction(MCInstBuilder(X86::NOOP));
1592
356
        break;
1593
356
      }
1594
413
    }
1595
356
    return;
1596
126
  }
1597
126
1598
126
  // Lower PSHUFB and VPERMILP normally but add a comment if we can find
1599
126
  // a constant shuffle mask. We won't be able to do this at the MC layer
1600
126
  // because the mask isn't an immediate.
1601
1.85k
  case X86::PSHUFBrm:
1602
1.85k
  case X86::VPSHUFBrm:
1603
1.85k
  case X86::VPSHUFBYrm:
1604
1.85k
  case X86::VPSHUFBZ128rm:
1605
1.85k
  case X86::VPSHUFBZ128rmk:
1606
1.85k
  case X86::VPSHUFBZ128rmkz:
1607
1.85k
  case X86::VPSHUFBZ256rm:
1608
1.85k
  case X86::VPSHUFBZ256rmk:
1609
1.85k
  case X86::VPSHUFBZ256rmkz:
1610
1.85k
  case X86::VPSHUFBZrm:
1611
1.85k
  case X86::VPSHUFBZrmk:
1612
1.85k
  case X86::VPSHUFBZrmkz: {
1613
1.85k
    if (!OutStreamer->isVerboseAsm())
1614
0
      break;
1615
1.85k
    unsigned SrcIdx, MaskIdx;
1616
1.85k
    switch (MI->getOpcode()) {
1617
0
    
default: 0
llvm_unreachable0
("Invalid opcode");
1618
1.79k
    case X86::PSHUFBrm:
1619
1.79k
    case X86::VPSHUFBrm:
1620
1.79k
    case X86::VPSHUFBYrm:
1621
1.79k
    case X86::VPSHUFBZ128rm:
1622
1.79k
    case X86::VPSHUFBZ256rm:
1623
1.79k
    case X86::VPSHUFBZrm:
1624
1.79k
      SrcIdx = 1; MaskIdx = 5; break;
1625
31
    case X86::VPSHUFBZ128rmkz:
1626
31
    case X86::VPSHUFBZ256rmkz:
1627
31
    case X86::VPSHUFBZrmkz:
1628
31
      SrcIdx = 2; MaskIdx = 6; break;
1629
30
    case X86::VPSHUFBZ128rmk:
1630
30
    case X86::VPSHUFBZ256rmk:
1631
30
    case X86::VPSHUFBZrmk:
1632
30
      SrcIdx = 3; MaskIdx = 7; break;
1633
1.85k
    }
1634
1.85k
1635
1.85k
    assert(MI->getNumOperands() >= 6 &&
1636
1.85k
           "We should always have at least 6 operands!");
1637
1.85k
1638
1.85k
    const MachineOperand &MaskOp = MI->getOperand(MaskIdx);
1639
1.85k
    if (auto *
C1.85k
= getConstantFromPool(*MI, MaskOp)) {
1640
1.83k
      SmallVector<int, 64> Mask;
1641
1.83k
      DecodePSHUFBMask(C, Mask);
1642
1.83k
      if (!Mask.empty())
1643
1.83k
        OutStreamer->AddComment(getShuffleComment(MI, SrcIdx, SrcIdx, Mask),
1644
1.83k
                                !EnablePrintSchedInfo);
1645
1.83k
    }
1646
1.85k
    break;
1647
1.85k
  }
1648
1.85k
1649
188
  case X86::VPERMILPSrm:
1650
188
  case X86::VPERMILPSYrm:
1651
188
  case X86::VPERMILPSZ128rm:
1652
188
  case X86::VPERMILPSZ128rmk:
1653
188
  case X86::VPERMILPSZ128rmkz:
1654
188
  case X86::VPERMILPSZ256rm:
1655
188
  case X86::VPERMILPSZ256rmk:
1656
188
  case X86::VPERMILPSZ256rmkz:
1657
188
  case X86::VPERMILPSZrm:
1658
188
  case X86::VPERMILPSZrmk:
1659
188
  case X86::VPERMILPSZrmkz:
1660
188
  case X86::VPERMILPDrm:
1661
188
  case X86::VPERMILPDYrm:
1662
188
  case X86::VPERMILPDZ128rm:
1663
188
  case X86::VPERMILPDZ128rmk:
1664
188
  case X86::VPERMILPDZ128rmkz:
1665
188
  case X86::VPERMILPDZ256rm:
1666
188
  case X86::VPERMILPDZ256rmk:
1667
188
  case X86::VPERMILPDZ256rmkz:
1668
188
  case X86::VPERMILPDZrm:
1669
188
  case X86::VPERMILPDZrmk:
1670
188
  case X86::VPERMILPDZrmkz: {
1671
188
    if (!OutStreamer->isVerboseAsm())
1672
0
      break;
1673
188
    unsigned SrcIdx, MaskIdx;
1674
188
    unsigned ElSize;
1675
188
    switch (MI->getOpcode()) {
1676
0
    
default: 0
llvm_unreachable0
("Invalid opcode");
1677
128
    case X86::VPERMILPSrm:
1678
128
    case X86::VPERMILPSYrm:
1679
128
    case X86::VPERMILPSZ128rm:
1680
128
    case X86::VPERMILPSZ256rm:
1681
128
    case X86::VPERMILPSZrm:
1682
128
      SrcIdx = 1; MaskIdx = 5; ElSize = 32; break;
1683
20
    case X86::VPERMILPSZ128rmkz:
1684
20
    case X86::VPERMILPSZ256rmkz:
1685
20
    case X86::VPERMILPSZrmkz:
1686
20
      SrcIdx = 2; MaskIdx = 6; ElSize = 32; break;
1687
19
    case X86::VPERMILPSZ128rmk:
1688
19
    case X86::VPERMILPSZ256rmk:
1689
19
    case X86::VPERMILPSZrmk:
1690
19
      SrcIdx = 3; MaskIdx = 7; ElSize = 32; break;
1691
19
    case X86::VPERMILPDrm:
1692
19
    case X86::VPERMILPDYrm:
1693
19
    case X86::VPERMILPDZ128rm:
1694
19
    case X86::VPERMILPDZ256rm:
1695
19
    case X86::VPERMILPDZrm:
1696
19
      SrcIdx = 1; MaskIdx = 5; ElSize = 64; break;
1697
1
    case X86::VPERMILPDZ128rmkz:
1698
1
    case X86::VPERMILPDZ256rmkz:
1699
1
    case X86::VPERMILPDZrmkz:
1700
1
      SrcIdx = 2; MaskIdx = 6; ElSize = 64; break;
1701
1
    case X86::VPERMILPDZ128rmk:
1702
1
    case X86::VPERMILPDZ256rmk:
1703
1
    case X86::VPERMILPDZrmk:
1704
1
      SrcIdx = 3; MaskIdx = 7; ElSize = 64; break;
1705
188
    }
1706
188
1707
188
    assert(MI->getNumOperands() >= 6 &&
1708
188
           "We should always have at least 6 operands!");
1709
188
1710
188
    const MachineOperand &MaskOp = MI->getOperand(MaskIdx);
1711
188
    if (auto *
C188
= getConstantFromPool(*MI, MaskOp)) {
1712
143
      SmallVector<int, 16> Mask;
1713
143
      DecodeVPERMILPMask(C, ElSize, Mask);
1714
143
      if (!Mask.empty())
1715
143
        OutStreamer->AddComment(getShuffleComment(MI, SrcIdx, SrcIdx, Mask),
1716
143
                                !EnablePrintSchedInfo);
1717
143
    }
1718
188
    break;
1719
188
  }
1720
188
1721
32
  case X86::VPERMIL2PDrm:
1722
32
  case X86::VPERMIL2PSrm:
1723
32
  case X86::VPERMIL2PDYrm:
1724
32
  case X86::VPERMIL2PSYrm: {
1725
32
    if (!OutStreamer->isVerboseAsm())
1726
0
      break;
1727
32
    assert(MI->getNumOperands() >= 8 &&
1728
32
           "We should always have at least 8 operands!");
1729
32
1730
32
    const MachineOperand &CtrlOp = MI->getOperand(MI->getNumOperands() - 1);
1731
32
    if (!CtrlOp.isImm())
1732
0
      break;
1733
32
1734
32
    unsigned ElSize;
1735
32
    switch (MI->getOpcode()) {
1736
0
    
default: 0
llvm_unreachable0
("Invalid opcode");
1737
18
    
case X86::VPERMIL2PSrm: 18
case X86::VPERMIL2PSYrm: ElSize = 32; break18
;
1738
14
    
case X86::VPERMIL2PDrm: 14
case X86::VPERMIL2PDYrm: ElSize = 64; break14
;
1739
32
    }
1740
32
1741
32
    const MachineOperand &MaskOp = MI->getOperand(6);
1742
32
    if (auto *
C32
= getConstantFromPool(*MI, MaskOp)) {
1743
24
      SmallVector<int, 16> Mask;
1744
24
      DecodeVPERMIL2PMask(C, (unsigned)CtrlOp.getImm(), ElSize, Mask);
1745
24
      if (!Mask.empty())
1746
24
        OutStreamer->AddComment(getShuffleComment(MI, 1, 2, Mask),
1747
24
                                !EnablePrintSchedInfo);
1748
24
    }
1749
32
    break;
1750
32
  }
1751
32
1752
34
  case X86::VPPERMrrm: {
1753
34
    if (!OutStreamer->isVerboseAsm())
1754
0
      break;
1755
34
    assert(MI->getNumOperands() >= 7 &&
1756
34
           "We should always have at least 7 operands!");
1757
34
1758
34
    const MachineOperand &MaskOp = MI->getOperand(6);
1759
34
    if (auto *
C34
= getConstantFromPool(*MI, MaskOp)) {
1760
32
      SmallVector<int, 16> Mask;
1761
32
      DecodeVPPERMMask(C, Mask);
1762
32
      if (!Mask.empty())
1763
14
        OutStreamer->AddComment(getShuffleComment(MI, 1, 2, Mask),
1764
14
                                !EnablePrintSchedInfo);
1765
32
    }
1766
34
    break;
1767
34
  }
1768
34
1769
34
#define MOV_CASE(Prefix, Suffix)        \
1770
50.8k
  case X86::Prefix##MOVAPD##Suffix##rm: \
1771
50.8k
  case X86::Prefix##MOVAPS##Suffix##rm: \
1772
50.8k
  case X86::Prefix##MOVUPD##Suffix##rm: \
1773
50.8k
  case X86::Prefix##MOVUPS##Suffix##rm: \
1774
50.8k
  case X86::Prefix##MOVDQA##Suffix##rm: \
1775
50.8k
  case X86::Prefix##MOVDQU##Suffix##rm:
1776
34
1777
34
#define MOV_AVX512_CASE(Suffix)         \
1778
50.8k
  case X86::VMOVDQA64##Suffix##rm:      \
1779
50.8k
  case X86::VMOVDQA32##Suffix##rm:      \
1780
50.8k
  case X86::VMOVDQU64##Suffix##rm:      \
1781
50.8k
  case X86::VMOVDQU32##Suffix##rm:      \
1782
50.8k
  case X86::VMOVDQU16##Suffix##rm:      \
1783
50.8k
  case X86::VMOVDQU8##Suffix##rm:       \
1784
50.8k
  case X86::VMOVAPS##Suffix##rm:        \
1785
50.8k
  case X86::VMOVAPD##Suffix##rm:        \
1786
50.8k
  case X86::VMOVUPS##Suffix##rm:        \
1787
50.8k
  case X86::VMOVUPD##Suffix##rm:
1788
34
1789
34
#define CASE_ALL_MOV_RM()               \
1790
16.9k
  
MOV_CASE16.9k
(, ) /* SSE */ \
1791
16.9k
  
MOV_CASE16.9k
(V, ) /* AVX-128 */ \
1792
16.9k
  
MOV_CASE16.9k
(V, Y) /* AVX-256 */ \
1793
16.9k
  
MOV_AVX512_CASE16.9k
(Z) \
1794
16.9k
  
MOV_AVX512_CASE16.9k
(Z256) \
1795
16.9k
  MOV_AVX512_CASE(Z128)
1796
34
1797
34
  // For loads from a constant pool to a vector register, print the constant
1798
34
  // loaded.
1799
34
  
CASE_ALL_MOV_RM34
()
1800
16.9k
  case X86::VBROADCASTF128:
1801
16.9k
  case X86::VBROADCASTI128:
1802
16.9k
  case X86::VBROADCASTF32X4Z256rm:
1803
16.9k
  case X86::VBROADCASTF32X4rm:
1804
16.9k
  case X86::VBROADCASTF32X8rm:
1805
16.9k
  case X86::VBROADCASTF64X2Z128rm:
1806
16.9k
  case X86::VBROADCASTF64X2rm:
1807
16.9k
  case X86::VBROADCASTF64X4rm:
1808
16.9k
  case X86::VBROADCASTI32X4Z256rm:
1809
16.9k
  case X86::VBROADCASTI32X4rm:
1810
16.9k
  case X86::VBROADCASTI32X8rm:
1811
16.9k
  case X86::VBROADCASTI64X2Z128rm:
1812
16.9k
  case X86::VBROADCASTI64X2rm:
1813
16.9k
  case X86::VBROADCASTI64X4rm:
1814
16.9k
    if (!OutStreamer->isVerboseAsm())
1815
2.76k
      break;
1816
14.1k
    
if (14.1k
MI->getNumOperands() <= 414.1k
)
1817
0
      break;
1818
14.1k
    
if (auto *14.1k
C14.1k
= getConstantFromPool(*MI, MI->getOperand(4))) {
1819
6.19k
      int NumLanes = 1;
1820
6.19k
      // Override NumLanes for the broadcast instructions.
1821
6.19k
      switch (MI->getOpcode()) {
1822
25
      case X86::VBROADCASTF128:         NumLanes = 2;  break;
1823
38
      case X86::VBROADCASTI128:         NumLanes = 2;  break;
1824
0
      case X86::VBROADCASTF32X4Z256rm:  NumLanes = 2;  break;
1825
8
      case X86::VBROADCASTF32X4rm:      NumLanes = 4;  break;
1826
0
      case X86::VBROADCASTF32X8rm:      NumLanes = 2;  break;
1827
0
      case X86::VBROADCASTF64X2Z128rm:  NumLanes = 2;  break;
1828
0
      case X86::VBROADCASTF64X2rm:      NumLanes = 4;  break;
1829
8
      case X86::VBROADCASTF64X4rm:      NumLanes = 2;  break;
1830
0
      case X86::VBROADCASTI32X4Z256rm:  NumLanes = 2;  break;
1831
10
      case X86::VBROADCASTI32X4rm:      NumLanes = 4;  break;
1832
0
      case X86::VBROADCASTI32X8rm:      NumLanes = 2;  break;
1833
0
      case X86::VBROADCASTI64X2Z128rm:  NumLanes = 2;  break;
1834
0
      case X86::VBROADCASTI64X2rm:      NumLanes = 4;  break;
1835
8
      case X86::VBROADCASTI64X4rm:      NumLanes = 2;  break;
1836
6.19k
      }
1837
6.19k
1838
6.19k
      std::string Comment;
1839
6.19k
      raw_string_ostream CS(Comment);
1840
6.19k
      const MachineOperand &DstOp = MI->getOperand(0);
1841
6.19k
      CS << X86ATTInstPrinter::getRegisterName(DstOp.getReg()) << " = ";
1842
6.19k
      if (auto *
CDS6.19k
= dyn_cast<ConstantDataSequential>(C)) {
1843
5.65k
        CS << "[";
1844
11.4k
        for (int l = 0; 
l != NumLanes11.4k
;
++l5.78k
) {
1845
82.9k
          for (int i = 0, NumElements = CDS->getNumElements(); 
i < NumElements82.9k
;
++i77.1k
) {
1846
77.1k
            if (
i != 0 || 77.1k
l != 05.78k
)
1847
71.4k
              CS << ",";
1848
77.1k
            if (CDS->getElementType()->isIntegerTy())
1849
75.3k
              CS << CDS->getElementAsInteger(i);
1850
1.80k
            else 
if (1.80k
CDS->getElementType()->isFloatTy()1.80k
)
1851
1.33k
              CS << CDS->getElementAsFloat(i);
1852
468
            else 
if (468
CDS->getElementType()->isDoubleTy()468
)
1853
468
              CS << CDS->getElementAsDouble(i);
1854
468
            else
1855
0
              CS << "?";
1856
77.1k
          }
1857
5.78k
        }
1858
5.65k
        CS << "]";
1859
5.65k
        OutStreamer->AddComment(CS.str(), !EnablePrintSchedInfo);
1860
6.19k
      } else 
if (auto *538
CV538
= dyn_cast<ConstantVector>(C)) {
1861
525
        CS << "<";
1862
1.05k
        for (int l = 0; 
l != NumLanes1.05k
;
++l525
) {
1863
8.30k
          for (int i = 0, NumOperands = CV->getNumOperands(); 
i < NumOperands8.30k
;
++i7.78k
) {
1864
7.78k
            if (
i != 0 || 7.78k
l != 0525
)
1865
7.25k
              CS << ",";
1866
7.78k
            printConstant(CV->getOperand(i), CS);
1867
7.78k
          }
1868
525
        }
1869
538
        CS << ">";
1870
538
        OutStreamer->AddComment(CS.str(), !EnablePrintSchedInfo);
1871
538
      }
1872
6.19k
    }
1873
14.1k
    break;
1874
1.23k
  case X86::VBROADCASTSSrm:
1875
1.23k
  case X86::VBROADCASTSSYrm:
1876
1.23k
  case X86::VBROADCASTSSZ128m:
1877
1.23k
  case X86::VBROADCASTSSZ256m:
1878
1.23k
  case X86::VBROADCASTSSZm:
1879
1.23k
  case X86::VBROADCASTSDYrm:
1880
1.23k
  case X86::VBROADCASTSDZ256m:
1881
1.23k
  case X86::VBROADCASTSDZm:
1882
1.23k
  case X86::VPBROADCASTBrm:
1883
1.23k
  case X86::VPBROADCASTBYrm:
1884
1.23k
  case X86::VPBROADCASTBZ128m:
1885
1.23k
  case X86::VPBROADCASTBZ256m:
1886
1.23k
  case X86::VPBROADCASTBZm:
1887
1.23k
  case X86::VPBROADCASTDrm:
1888
1.23k
  case X86::VPBROADCASTDYrm:
1889
1.23k
  case X86::VPBROADCASTDZ128m:
1890
1.23k
  case X86::VPBROADCASTDZ256m:
1891
1.23k
  case X86::VPBROADCASTDZm:
1892
1.23k
  case X86::VPBROADCASTQrm:
1893
1.23k
  case X86::VPBROADCASTQYrm:
1894
1.23k
  case X86::VPBROADCASTQZ128m:
1895
1.23k
  case X86::VPBROADCASTQZ256m:
1896
1.23k
  case X86::VPBROADCASTQZm:
1897
1.23k
  case X86::VPBROADCASTWrm:
1898
1.23k
  case X86::VPBROADCASTWYrm:
1899
1.23k
  case X86::VPBROADCASTWZ128m:
1900
1.23k
  case X86::VPBROADCASTWZ256m:
1901
1.23k
  case X86::VPBROADCASTWZm:
1902
1.23k
    if (!OutStreamer->isVerboseAsm())
1903
45
      break;
1904
1.19k
    
if (1.19k
MI->getNumOperands() <= 41.19k
)
1905
0
      break;
1906
1.19k
    
if (auto *1.19k
C1.19k
= getConstantFromPool(*MI, MI->getOperand(4))) {
1907
544
      int NumElts;
1908
544
      switch (MI->getOpcode()) {
1909
0
      
default: 0
llvm_unreachable0
("Invalid opcode");
1910
94
      case X86::VBROADCASTSSrm:    NumElts = 4;  break;
1911
49
      case X86::VBROADCASTSSYrm:   NumElts = 8;  break;
1912
0
      case X86::VBROADCASTSSZ128m: NumElts = 4;  break;
1913
0
      case X86::VBROADCASTSSZ256m: NumElts = 8;  break;
1914
5
      case X86::VBROADCASTSSZm:    NumElts = 16; break;
1915
29
      case X86::VBROADCASTSDYrm:   NumElts = 4;  break;
1916
0
      case X86::VBROADCASTSDZ256m: NumElts = 4;  break;
1917
7
      case X86::VBROADCASTSDZm:    NumElts = 8;  break;
1918
1
      case X86::VPBROADCASTBrm:    NumElts = 16; break;
1919
1
      case X86::VPBROADCASTBYrm:   NumElts = 32; break;
1920
0
      case X86::VPBROADCASTBZ128m: NumElts = 16; break;
1921
0
      case X86::VPBROADCASTBZ256m: NumElts = 32; break;
1922
0
      case X86::VPBROADCASTBZm:    NumElts = 64; break;
1923
104
      case X86::VPBROADCASTDrm:    NumElts = 4;  break;
1924
61
      case X86::VPBROADCASTDYrm:   NumElts = 8;  break;
1925
0
      case X86::VPBROADCASTDZ128m: NumElts = 4;  break;
1926
0
      case X86::VPBROADCASTDZ256m: NumElts = 8;  break;
1927
20
      case X86::VPBROADCASTDZm:    NumElts = 16; break;
1928
16
      case X86::VPBROADCASTQrm:    NumElts = 2;  break;
1929
109
      case X86::VPBROADCASTQYrm:   NumElts = 4;  break;
1930
0
      case X86::VPBROADCASTQZ128m: NumElts = 2;  break;
1931
0
      case X86::VPBROADCASTQZ256m: NumElts = 4;  break;
1932
23
      case X86::VPBROADCASTQZm:    NumElts = 8;  break;
1933
7
      case X86::VPBROADCASTWrm:    NumElts = 8;  break;
1934
16
      case X86::VPBROADCASTWYrm:   NumElts = 16; break;
1935
0
      case X86::VPBROADCASTWZ128m: NumElts = 8;  break;
1936
0
      case X86::VPBROADCASTWZ256m: NumElts = 16; break;
1937
2
      case X86::VPBROADCASTWZm:    NumElts = 32; break;
1938
544
      }
1939
544
1940
544
      std::string Comment;
1941
544
      raw_string_ostream CS(Comment);
1942
544
      const MachineOperand &DstOp = MI->getOperand(0);
1943
544
      CS << X86ATTInstPrinter::getRegisterName(DstOp.getReg()) << " = ";
1944
544
      CS << "[";
1945
3.86k
      for (int i = 0; 
i != NumElts3.86k
;
++i3.32k
) {
1946
3.32k
        if (i != 0)
1947
2.77k
          CS << ",";
1948
3.32k
        printConstant(C, CS);
1949
3.32k
      }
1950
6
      CS << "]";
1951
6
      OutStreamer->AddComment(CS.str(), !EnablePrintSchedInfo);
1952
6
    }
1953
845k
  }
1954
845k
1955
841k
  MCInst TmpInst;
1956
841k
  MCInstLowering.Lower(MI, TmpInst);
1957
841k
1958
841k
  // Stackmap shadows cannot include branch targets, so we can count the bytes
1959
841k
  // in a call towards the shadow, but must ensure that the no thread returns
1960
841k
  // in to the stackmap shadow.  The only way to achieve this is if the call
1961
841k
  // is at the end of the shadow.
1962
841k
  if (
MI->isCall()841k
) {
1963
29.5k
    // Count then size of the call towards the shadow
1964
29.5k
    SMShadowTracker.count(TmpInst, getSubtargetInfo(), CodeEmitter.get());
1965
29.5k
    // Then flush the shadow so that we fill with nops before the call, not
1966
29.5k
    // after it.
1967
29.5k
    SMShadowTracker.emitShadowPadding(*OutStreamer, getSubtargetInfo());
1968
29.5k
    // Then emit the call
1969
29.5k
    OutStreamer->EmitInstruction(TmpInst, getSubtargetInfo());
1970
29.5k
    return;
1971
29.5k
  }
1972
812k
1973
812k
  EmitAndCountInstruction(TmpInst);
1974
812k
}