Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/Target/X86/X86MCInstLower.cpp
Line
Count
Source (jump to first uncovered line)
1
//===-- X86MCInstLower.cpp - Convert X86 MachineInstr to an MCInst --------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This file contains code to lower X86 MachineInstrs to their corresponding
10
// MCInst records.
11
//
12
//===----------------------------------------------------------------------===//
13
14
#include "MCTargetDesc/X86ATTInstPrinter.h"
15
#include "MCTargetDesc/X86BaseInfo.h"
16
#include "MCTargetDesc/X86InstComments.h"
17
#include "MCTargetDesc/X86TargetStreamer.h"
18
#include "Utils/X86ShuffleDecode.h"
19
#include "X86AsmPrinter.h"
20
#include "X86RegisterInfo.h"
21
#include "X86ShuffleDecodeConstantPool.h"
22
#include "llvm/ADT/Optional.h"
23
#include "llvm/ADT/SmallString.h"
24
#include "llvm/ADT/iterator_range.h"
25
#include "llvm/CodeGen/MachineConstantPool.h"
26
#include "llvm/CodeGen/MachineFunction.h"
27
#include "llvm/CodeGen/MachineModuleInfoImpls.h"
28
#include "llvm/CodeGen/MachineOperand.h"
29
#include "llvm/CodeGen/StackMaps.h"
30
#include "llvm/IR/DataLayout.h"
31
#include "llvm/IR/GlobalValue.h"
32
#include "llvm/IR/Mangler.h"
33
#include "llvm/MC/MCAsmInfo.h"
34
#include "llvm/MC/MCCodeEmitter.h"
35
#include "llvm/MC/MCContext.h"
36
#include "llvm/MC/MCExpr.h"
37
#include "llvm/MC/MCFixup.h"
38
#include "llvm/MC/MCInst.h"
39
#include "llvm/MC/MCInstBuilder.h"
40
#include "llvm/MC/MCSection.h"
41
#include "llvm/MC/MCSectionELF.h"
42
#include "llvm/MC/MCStreamer.h"
43
#include "llvm/MC/MCSymbol.h"
44
#include "llvm/MC/MCSymbolELF.h"
45
#include "llvm/Target/TargetLoweringObjectFile.h"
46
47
using namespace llvm;
48
49
namespace {
50
51
/// X86MCInstLower - This class is used to lower an MachineInstr into an MCInst.
52
class X86MCInstLower {
53
  MCContext &Ctx;
54
  const MachineFunction &MF;
55
  const TargetMachine &TM;
56
  const MCAsmInfo &MAI;
57
  X86AsmPrinter &AsmPrinter;
58
59
public:
60
  X86MCInstLower(const MachineFunction &MF, X86AsmPrinter &asmprinter);
61
62
  Optional<MCOperand> LowerMachineOperand(const MachineInstr *MI,
63
                                          const MachineOperand &MO) const;
64
  void Lower(const MachineInstr *MI, MCInst &OutMI) const;
65
66
  MCSymbol *GetSymbolFromOperand(const MachineOperand &MO) const;
67
  MCOperand LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const;
68
69
private:
70
  MachineModuleInfoMachO &getMachOMMI() const;
71
};
72
73
} // end anonymous namespace
74
75
// Emit a minimal sequence of nops spanning NumBytes bytes.
76
static void EmitNops(MCStreamer &OS, unsigned NumBytes, bool Is64Bit,
77
                     const MCSubtargetInfo &STI);
78
79
void X86AsmPrinter::StackMapShadowTracker::count(MCInst &Inst,
80
                                                 const MCSubtargetInfo &STI,
81
2.41M
                                                 MCCodeEmitter *CodeEmitter) {
82
2.41M
  if (InShadow) {
83
11.0k
    SmallString<256> Code;
84
11.0k
    SmallVector<MCFixup, 4> Fixups;
85
11.0k
    raw_svector_ostream VecOS(Code);
86
11.0k
    CodeEmitter->encodeInstruction(Inst, VecOS, Fixups, STI);
87
11.0k
    CurrentShadowSize += Code.size();
88
11.0k
    if (CurrentShadowSize >= RequiredShadowSize)
89
10.9k
      InShadow = false; // The shadow is big enough. Stop counting.
90
11.0k
  }
91
2.41M
}
92
93
void X86AsmPrinter::StackMapShadowTracker::emitShadowPadding(
94
530k
    MCStreamer &OutStreamer, const MCSubtargetInfo &STI) {
95
530k
  if (InShadow && 
CurrentShadowSize < RequiredShadowSize140
) {
96
73
    InShadow = false;
97
73
    EmitNops(OutStreamer, RequiredShadowSize - CurrentShadowSize,
98
73
             MF->getSubtarget<X86Subtarget>().is64Bit(), STI);
99
73
  }
100
530k
}
101
102
2.27M
void X86AsmPrinter::EmitAndCountInstruction(MCInst &Inst) {
103
2.27M
  OutStreamer->EmitInstruction(Inst, getSubtargetInfo());
104
2.27M
  SMShadowTracker.count(Inst, getSubtargetInfo(), CodeEmitter.get());
105
2.27M
}
106
107
X86MCInstLower::X86MCInstLower(const MachineFunction &mf,
108
                               X86AsmPrinter &asmprinter)
109
    : Ctx(mf.getContext()), MF(mf), TM(mf.getTarget()), MAI(*TM.getMCAsmInfo()),
110
2.41M
      AsmPrinter(asmprinter) {}
111
112
3.00k
MachineModuleInfoMachO &X86MCInstLower::getMachOMMI() const {
113
3.00k
  return MF.getMMI().getObjFileInfo<MachineModuleInfoMachO>();
114
3.00k
}
115
116
/// GetSymbolFromOperand - Lower an MO_GlobalAddress or MO_ExternalSymbol
117
/// operand to an MCSymbol.
118
415k
MCSymbol *X86MCInstLower::GetSymbolFromOperand(const MachineOperand &MO) const {
119
415k
  const DataLayout &DL = MF.getDataLayout();
120
415k
  assert((MO.isGlobal() || MO.isSymbol() || MO.isMBB()) &&
121
415k
         "Isn't a symbol reference");
122
415k
123
415k
  MCSymbol *Sym = nullptr;
124
415k
  SmallString<128> Name;
125
415k
  StringRef Suffix;
126
415k
127
415k
  switch (MO.getTargetFlags()) {
128
415k
  case X86II::MO_DLLIMPORT:
129
64
    // Handle dllimport linkage.
130
64
    Name += "__imp_";
131
64
    break;
132
415k
  case X86II::MO_COFFSTUB:
133
71
    Name += ".refptr.";
134
71
    break;
135
415k
  case X86II::MO_DARWIN_NONLAZY:
136
3.00k
  case X86II::MO_DARWIN_NONLAZY_PIC_BASE:
137
3.00k
    Suffix = "$non_lazy_ptr";
138
3.00k
    break;
139
415k
  }
140
415k
141
415k
  if (!Suffix.empty())
142
3.00k
    Name += DL.getPrivateGlobalPrefix();
143
415k
144
415k
  if (MO.isGlobal()) {
145
218k
    const GlobalValue *GV = MO.getGlobal();
146
218k
    AsmPrinter.getNameWithPrefix(Name, GV);
147
218k
  } else 
if (197k
MO.isSymbol()197k
) {
148
3.52k
    Mangler::getNameWithPrefix(Name, MO.getSymbolName(), DL);
149
194k
  } else 
if (194k
MO.isMBB()194k
) {
150
194k
    assert(Suffix.empty());
151
194k
    Sym = MO.getMBB()->getSymbol();
152
194k
  }
153
415k
154
415k
  Name += Suffix;
155
415k
  if (!Sym)
156
221k
    Sym = Ctx.getOrCreateSymbol(Name);
157
415k
158
415k
  // If the target flags on the operand changes the name of the symbol, do that
159
415k
  // before we return the symbol.
160
415k
  switch (MO.getTargetFlags()) {
161
415k
  default:
162
412k
    break;
163
415k
  case X86II::MO_COFFSTUB: {
164
71
    MachineModuleInfoCOFF &MMICOFF =
165
71
        MF.getMMI().getObjFileInfo<MachineModuleInfoCOFF>();
166
71
    MachineModuleInfoImpl::StubValueTy &StubSym = MMICOFF.getGVStubEntry(Sym);
167
71
    if (!StubSym.getPointer()) {
168
12
      assert(MO.isGlobal() && "Extern symbol not handled yet");
169
12
      StubSym = MachineModuleInfoImpl::StubValueTy(
170
12
          AsmPrinter.getSymbol(MO.getGlobal()), true);
171
12
    }
172
71
    break;
173
415k
  }
174
415k
  case X86II::MO_DARWIN_NONLAZY:
175
3.00k
  case X86II::MO_DARWIN_NONLAZY_PIC_BASE: {
176
3.00k
    MachineModuleInfoImpl::StubValueTy &StubSym =
177
3.00k
        getMachOMMI().getGVStubEntry(Sym);
178
3.00k
    if (!StubSym.getPointer()) {
179
605
      assert(MO.isGlobal() && "Extern symbol not handled yet");
180
605
      StubSym = MachineModuleInfoImpl::StubValueTy(
181
605
          AsmPrinter.getSymbol(MO.getGlobal()),
182
605
          !MO.getGlobal()->hasInternalLinkage());
183
605
    }
184
3.00k
    break;
185
415k
  }
186
415k
  }
187
415k
188
415k
  return Sym;
189
415k
}
190
191
MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO,
192
447k
                                             MCSymbol *Sym) const {
193
447k
  // FIXME: We would like an efficient form for this, so we don't have to do a
194
447k
  // lot of extra uniquing.
195
447k
  const MCExpr *Expr = nullptr;
196
447k
  MCSymbolRefExpr::VariantKind RefKind = MCSymbolRefExpr::VK_None;
197
447k
198
447k
  switch (MO.getTargetFlags()) {
199
447k
  default:
200
0
    llvm_unreachable("Unknown target flag on GV operand");
201
447k
  case X86II::MO_NO_FLAG: // No flag.
202
416k
  // These affect the name of the symbol, not any suffix.
203
416k
  case X86II::MO_DARWIN_NONLAZY:
204
416k
  case X86II::MO_DLLIMPORT:
205
416k
  case X86II::MO_COFFSTUB:
206
416k
    break;
207
416k
208
416k
  case X86II::MO_TLVP:
209
355
    RefKind = MCSymbolRefExpr::VK_TLVP;
210
355
    break;
211
416k
  case X86II::MO_TLVP_PIC_BASE:
212
0
    Expr = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_TLVP, Ctx);
213
0
    // Subtract the pic base.
214
0
    Expr = MCBinaryExpr::createSub(
215
0
        Expr, MCSymbolRefExpr::create(MF.getPICBaseSymbol(), Ctx), Ctx);
216
0
    break;
217
416k
  case X86II::MO_SECREL:
218
77
    RefKind = MCSymbolRefExpr::VK_SECREL;
219
77
    break;
220
416k
  case X86II::MO_TLSGD:
221
0
    RefKind = MCSymbolRefExpr::VK_TLSGD;
222
0
    break;
223
416k
  case X86II::MO_TLSLD:
224
0
    RefKind = MCSymbolRefExpr::VK_TLSLD;
225
0
    break;
226
416k
  case X86II::MO_TLSLDM:
227
0
    RefKind = MCSymbolRefExpr::VK_TLSLDM;
228
0
    break;
229
416k
  case X86II::MO_GOTTPOFF:
230
40
    RefKind = MCSymbolRefExpr::VK_GOTTPOFF;
231
40
    break;
232
416k
  case X86II::MO_INDNTPOFF:
233
14
    RefKind = MCSymbolRefExpr::VK_INDNTPOFF;
234
14
    break;
235
416k
  case X86II::MO_TPOFF:
236
67
    RefKind = MCSymbolRefExpr::VK_TPOFF;
237
67
    break;
238
416k
  case X86II::MO_DTPOFF:
239
26
    RefKind = MCSymbolRefExpr::VK_DTPOFF;
240
26
    break;
241
416k
  case X86II::MO_NTPOFF:
242
54
    RefKind = MCSymbolRefExpr::VK_NTPOFF;
243
54
    break;
244
416k
  case X86II::MO_GOTNTPOFF:
245
7
    RefKind = MCSymbolRefExpr::VK_GOTNTPOFF;
246
7
    break;
247
416k
  case X86II::MO_GOTPCREL:
248
17.8k
    RefKind = MCSymbolRefExpr::VK_GOTPCREL;
249
17.8k
    break;
250
416k
  case X86II::MO_GOT:
251
252
    RefKind = MCSymbolRefExpr::VK_GOT;
252
252
    break;
253
416k
  case X86II::MO_GOTOFF:
254
169
    RefKind = MCSymbolRefExpr::VK_GOTOFF;
255
169
    break;
256
416k
  case X86II::MO_PLT:
257
395
    RefKind = MCSymbolRefExpr::VK_PLT;
258
395
    break;
259
416k
  case X86II::MO_ABS8:
260
9
    RefKind = MCSymbolRefExpr::VK_X86_ABS8;
261
9
    break;
262
416k
  case X86II::MO_PIC_BASE_OFFSET:
263
12.0k
  case X86II::MO_DARWIN_NONLAZY_PIC_BASE:
264
12.0k
    Expr = MCSymbolRefExpr::create(Sym, Ctx);
265
12.0k
    // Subtract the pic base.
266
12.0k
    Expr = MCBinaryExpr::createSub(
267
12.0k
        Expr, MCSymbolRefExpr::create(MF.getPICBaseSymbol(), Ctx), Ctx);
268
12.0k
    if (MO.isJTI()) {
269
54
      assert(MAI.doesSetDirectiveSuppressReloc());
270
54
      // If .set directive is supported, use it to reduce the number of
271
54
      // relocations the assembler will generate for differences between
272
54
      // local labels. This is only safe when the symbols are in the same
273
54
      // section so we are restricting it to jumptable references.
274
54
      MCSymbol *Label = Ctx.createTempSymbol();
275
54
      AsmPrinter.OutStreamer->EmitAssignment(Label, Expr);
276
54
      Expr = MCSymbolRefExpr::create(Label, Ctx);
277
54
    }
278
12.0k
    break;
279
447k
  }
280
447k
281
447k
  if (!Expr)
282
435k
    Expr = MCSymbolRefExpr::create(Sym, RefKind, Ctx);
283
447k
284
447k
  if (!MO.isJTI() && 
!MO.isMBB()447k
&&
MO.getOffset()252k
)
285
6.35k
    Expr = MCBinaryExpr::createAdd(
286
6.35k
        Expr, MCConstantExpr::create(MO.getOffset(), Ctx), Ctx);
287
447k
  return MCOperand::createExpr(Expr);
288
447k
}
289
290
/// Simplify FOO $imm, %{al,ax,eax,rax} to FOO $imm, for instruction with
291
/// a short fixed-register form.
292
33.2k
static void SimplifyShortImmForm(MCInst &Inst, unsigned Opcode) {
293
33.2k
  unsigned ImmOp = Inst.getNumOperands() - 1;
294
33.2k
  assert(Inst.getOperand(0).isReg() &&
295
33.2k
         (Inst.getOperand(ImmOp).isImm() || Inst.getOperand(ImmOp).isExpr()) &&
296
33.2k
         ((Inst.getNumOperands() == 3 && Inst.getOperand(1).isReg() &&
297
33.2k
           Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg()) ||
298
33.2k
          Inst.getNumOperands() == 2) &&
299
33.2k
         "Unexpected instruction!");
300
33.2k
301
33.2k
  // Check whether the destination register can be fixed.
302
33.2k
  unsigned Reg = Inst.getOperand(0).getReg();
303
33.2k
  if (Reg != X86::AL && 
Reg != X86::AX25.4k
&&
Reg != X86::EAX25.4k
&&
Reg != X86::RAX22.7k
)
304
22.0k
    return;
305
11.1k
306
11.1k
  // If so, rewrite the instruction.
307
11.1k
  MCOperand Saved = Inst.getOperand(ImmOp);
308
11.1k
  Inst = MCInst();
309
11.1k
  Inst.setOpcode(Opcode);
310
11.1k
  Inst.addOperand(Saved);
311
11.1k
}
312
313
/// If a movsx instruction has a shorter encoding for the used register
314
/// simplify the instruction to use it instead.
315
1.63k
static void SimplifyMOVSX(MCInst &Inst) {
316
1.63k
  unsigned NewOpcode = 0;
317
1.63k
  unsigned Op0 = Inst.getOperand(0).getReg(), Op1 = Inst.getOperand(1).getReg();
318
1.63k
  switch (Inst.getOpcode()) {
319
1.63k
  default:
320
0
    llvm_unreachable("Unexpected instruction!");
321
1.63k
  case X86::MOVSX16rr8: // movsbw %al, %ax   --> cbtw
322
2
    if (Op0 == X86::AX && Op1 == X86::AL)
323
0
      NewOpcode = X86::CBW;
324
2
    break;
325
1.63k
  case X86::MOVSX32rr16: // movswl %ax, %eax  --> cwtl
326
405
    if (Op0 == X86::EAX && 
Op1 == X86::AX174
)
327
86
      NewOpcode = X86::CWDE;
328
405
    break;
329
1.63k
  case X86::MOVSX64rr32: // movslq %eax, %rax --> cltq
330
1.23k
    if (Op0 == X86::RAX && 
Op1 == X86::EAX574
)
331
273
      NewOpcode = X86::CDQE;
332
1.23k
    break;
333
1.63k
  }
334
1.63k
335
1.63k
  if (NewOpcode != 0) {
336
359
    Inst = MCInst();
337
359
    Inst.setOpcode(NewOpcode);
338
359
  }
339
1.63k
}
340
341
/// Simplify things like MOV32rm to MOV32o32a.
342
static void SimplifyShortMoveForm(X86AsmPrinter &Printer, MCInst &Inst,
343
103k
                                  unsigned Opcode) {
344
103k
  // Don't make these simplifications in 64-bit mode; other assemblers don't
345
103k
  // perform them because they make the code larger.
346
103k
  if (Printer.getSubtarget().is64Bit())
347
33.2k
    return;
348
70.1k
349
70.1k
  bool IsStore = Inst.getOperand(0).isReg() && Inst.getOperand(1).isReg();
350
70.1k
  unsigned AddrBase = IsStore;
351
70.1k
  unsigned RegOp = IsStore ? 
045.5k
:
524.6k
;
352
70.1k
  unsigned AddrOp = AddrBase + 3;
353
70.1k
  assert(
354
70.1k
      Inst.getNumOperands() == 6 && Inst.getOperand(RegOp).isReg() &&
355
70.1k
      Inst.getOperand(AddrBase + X86::AddrBaseReg).isReg() &&
356
70.1k
      Inst.getOperand(AddrBase + X86::AddrScaleAmt).isImm() &&
357
70.1k
      Inst.getOperand(AddrBase + X86::AddrIndexReg).isReg() &&
358
70.1k
      Inst.getOperand(AddrBase + X86::AddrSegmentReg).isReg() &&
359
70.1k
      (Inst.getOperand(AddrOp).isExpr() || Inst.getOperand(AddrOp).isImm()) &&
360
70.1k
      "Unexpected instruction!");
361
70.1k
362
70.1k
  // Check whether the destination register can be fixed.
363
70.1k
  unsigned Reg = Inst.getOperand(RegOp).getReg();
364
70.1k
  if (Reg != X86::AL && 
Reg != X86::AX68.3k
&&
Reg != X86::EAX67.7k
&&
Reg != X86::RAX40.3k
)
365
40.3k
    return;
366
29.8k
367
29.8k
  // Check whether this is an absolute address.
368
29.8k
  // FIXME: We know TLVP symbol refs aren't, but there should be a better way
369
29.8k
  // to do this here.
370
29.8k
  bool Absolute = true;
371
29.8k
  if (Inst.getOperand(AddrOp).isExpr()) {
372
2.64k
    const MCExpr *MCE = Inst.getOperand(AddrOp).getExpr();
373
2.64k
    if (const MCSymbolRefExpr *SRE = dyn_cast<MCSymbolRefExpr>(MCE))
374
802
      if (SRE->getKind() == MCSymbolRefExpr::VK_TLVP)
375
1
        Absolute = false;
376
2.64k
  }
377
29.8k
378
29.8k
  if (Absolute &&
379
29.8k
      
(29.8k
Inst.getOperand(AddrBase + X86::AddrBaseReg).getReg() != 029.8k
||
380
29.8k
       
Inst.getOperand(AddrBase + X86::AddrScaleAmt).getImm() != 1910
||
381
29.8k
       
Inst.getOperand(AddrBase + X86::AddrIndexReg).getReg() != 0902
))
382
28.9k
    return;
383
903
384
903
  // If so, rewrite the instruction.
385
903
  MCOperand Saved = Inst.getOperand(AddrOp);
386
903
  MCOperand Seg = Inst.getOperand(AddrBase + X86::AddrSegmentReg);
387
903
  Inst = MCInst();
388
903
  Inst.setOpcode(Opcode);
389
903
  Inst.addOperand(Saved);
390
903
  Inst.addOperand(Seg);
391
903
}
392
393
277
static unsigned getRetOpcode(const X86Subtarget &Subtarget) {
394
277
  return Subtarget.is64Bit() ? 
X86::RETQ178
:
X86::RETL99
;
395
277
}
396
397
Optional<MCOperand>
398
X86MCInstLower::LowerMachineOperand(const MachineInstr *MI,
399
9.90M
                                    const MachineOperand &MO) const {
400
9.90M
  switch (MO.getType()) {
401
9.90M
  default:
402
0
    MI->print(errs());
403
0
    llvm_unreachable("unknown operand type");
404
9.90M
  case MachineOperand::MO_Register:
405
7.57M
    // Ignore all implicit register operands.
406
7.57M
    if (MO.isImplicit())
407
2.41M
      return None;
408
5.15M
    return MCOperand::createReg(MO.getReg());
409
5.15M
  case MachineOperand::MO_Immediate:
410
1.74M
    return MCOperand::createImm(MO.getImm());
411
5.15M
  case MachineOperand::MO_MachineBasicBlock:
412
415k
  case MachineOperand::MO_GlobalAddress:
413
415k
  case MachineOperand::MO_ExternalSymbol:
414
415k
    return LowerSymbolOperand(MO, GetSymbolFromOperand(MO));
415
415k
  case MachineOperand::MO_MCSymbol:
416
179
    return LowerSymbolOperand(MO, MO.getMCSymbol());
417
415k
  case MachineOperand::MO_JumpTableIndex:
418
344
    return LowerSymbolOperand(MO, AsmPrinter.GetJTISymbol(MO.getIndex()));
419
415k
  case MachineOperand::MO_ConstantPoolIndex:
420
31.2k
    return LowerSymbolOperand(MO, AsmPrinter.GetCPISymbol(MO.getIndex()));
421
415k
  case MachineOperand::MO_BlockAddress:
422
26
    return LowerSymbolOperand(
423
26
        MO, AsmPrinter.GetBlockAddressSymbol(MO.getBlockAddress()));
424
415k
  case MachineOperand::MO_RegisterMask:
425
140k
    // Ignore call clobbers.
426
140k
    return None;
427
9.90M
  }
428
9.90M
}
429
430
2.40M
void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
431
2.40M
  OutMI.setOpcode(MI->getOpcode());
432
2.40M
433
2.40M
  for (const MachineOperand &MO : MI->operands())
434
9.90M
    if (auto MaybeMCOp = LowerMachineOperand(MI, MO))
435
7.34M
      OutMI.addOperand(MaybeMCOp.getValue());
436
2.40M
437
2.40M
  // Handle a few special cases to eliminate operand modifiers.
438
2.40M
  switch (OutMI.getOpcode()) {
439
2.40M
  case X86::LEA64_32r:
440
106k
  case X86::LEA64r:
441
106k
  case X86::LEA16r:
442
106k
  case X86::LEA32r:
443
106k
    // LEA should have a segment register, but it must be empty.
444
106k
    assert(OutMI.getNumOperands() == 1 + X86::AddrNumOperands &&
445
106k
           "Unexpected # of LEA operands");
446
106k
    assert(OutMI.getOperand(1 + X86::AddrSegmentReg).getReg() == 0 &&
447
106k
           "LEA has segment specified!");
448
106k
    break;
449
106k
450
106k
  // Commute operands to get a smaller encoding by using VEX.R instead of VEX.B
451
106k
  // if one of the registers is extended, but other isn't.
452
106k
  case X86::VMOVZPQILo2PQIrr:
453
3.32k
  case X86::VMOVAPDrr:
454
3.32k
  case X86::VMOVAPDYrr:
455
3.32k
  case X86::VMOVAPSrr:
456
3.32k
  case X86::VMOVAPSYrr:
457
3.32k
  case X86::VMOVDQArr:
458
3.32k
  case X86::VMOVDQAYrr:
459
3.32k
  case X86::VMOVDQUrr:
460
3.32k
  case X86::VMOVDQUYrr:
461
3.32k
  case X86::VMOVUPDrr:
462
3.32k
  case X86::VMOVUPDYrr:
463
3.32k
  case X86::VMOVUPSrr:
464
3.32k
  case X86::VMOVUPSYrr: {
465
3.32k
    if (!X86II::isX86_64ExtendedReg(OutMI.getOperand(0).getReg()) &&
466
3.32k
        
X86II::isX86_64ExtendedReg(OutMI.getOperand(1).getReg())3.28k
) {
467
13
      unsigned NewOpc;
468
13
      switch (OutMI.getOpcode()) {
469
13
      
default: 0
llvm_unreachable0
("Invalid opcode");
470
13
      
case X86::VMOVZPQILo2PQIrr: NewOpc = X86::VMOVPQI2QIrr; break0
;
471
13
      
case X86::VMOVAPDrr: NewOpc = X86::VMOVAPDrr_REV; break0
;
472
13
      
case X86::VMOVAPDYrr: NewOpc = X86::VMOVAPDYrr_REV; break0
;
473
13
      
case X86::VMOVAPSrr: NewOpc = X86::VMOVAPSrr_REV; break4
;
474
13
      
case X86::VMOVAPSYrr: NewOpc = X86::VMOVAPSYrr_REV; break1
;
475
13
      
case X86::VMOVDQArr: NewOpc = X86::VMOVDQArr_REV; break4
;
476
13
      
case X86::VMOVDQAYrr: NewOpc = X86::VMOVDQAYrr_REV; break4
;
477
13
      
case X86::VMOVDQUrr: NewOpc = X86::VMOVDQUrr_REV; break0
;
478
13
      
case X86::VMOVDQUYrr: NewOpc = X86::VMOVDQUYrr_REV; break0
;
479
13
      
case X86::VMOVUPDrr: NewOpc = X86::VMOVUPDrr_REV; break0
;
480
13
      
case X86::VMOVUPDYrr: NewOpc = X86::VMOVUPDYrr_REV; break0
;
481
13
      
case X86::VMOVUPSrr: NewOpc = X86::VMOVUPSrr_REV; break0
;
482
13
      
case X86::VMOVUPSYrr: NewOpc = X86::VMOVUPSYrr_REV; break0
;
483
13
      }
484
13
      OutMI.setOpcode(NewOpc);
485
13
    }
486
3.32k
    break;
487
3.32k
  }
488
3.32k
  case X86::VMOVSDrr:
489
18
  case X86::VMOVSSrr: {
490
18
    if (!X86II::isX86_64ExtendedReg(OutMI.getOperand(0).getReg()) &&
491
18
        X86II::isX86_64ExtendedReg(OutMI.getOperand(2).getReg())) {
492
0
      unsigned NewOpc;
493
0
      switch (OutMI.getOpcode()) {
494
0
      default: llvm_unreachable("Invalid opcode");
495
0
      case X86::VMOVSDrr: NewOpc = X86::VMOVSDrr_REV; break;
496
0
      case X86::VMOVSSrr: NewOpc = X86::VMOVSSrr_REV; break;
497
0
      }
498
0
      OutMI.setOpcode(NewOpc);
499
0
    }
500
18
    break;
501
18
  }
502
18
503
18
  // TAILJMPr64, CALL64r, CALL64pcrel32 - These instructions have register
504
18
  // inputs modeled as normal uses instead of implicit uses.  As such, truncate
505
18
  // off all but the first operand (the callee).  FIXME: Change isel.
506
113k
  case X86::TAILJMPr64:
507
113k
  case X86::TAILJMPr64_REX:
508
113k
  case X86::CALL64r:
509
113k
  case X86::CALL64pcrel32: {
510
113k
    unsigned Opcode = OutMI.getOpcode();
511
113k
    MCOperand Saved = OutMI.getOperand(0);
512
113k
    OutMI = MCInst();
513
113k
    OutMI.setOpcode(Opcode);
514
113k
    OutMI.addOperand(Saved);
515
113k
    break;
516
113k
  }
517
113k
518
113k
  case X86::EH_RETURN:
519
6
  case X86::EH_RETURN64: {
520
6
    OutMI = MCInst();
521
6
    OutMI.setOpcode(getRetOpcode(AsmPrinter.getSubtarget()));
522
6
    break;
523
6
  }
524
6
525
33
  case X86::CLEANUPRET: {
526
33
    // Replace CLEANUPRET with the appropriate RET.
527
33
    OutMI = MCInst();
528
33
    OutMI.setOpcode(getRetOpcode(AsmPrinter.getSubtarget()));
529
33
    break;
530
6
  }
531
6
532
63
  case X86::CATCHRET: {
533
63
    // Replace CATCHRET with the appropriate RET.
534
63
    const X86Subtarget &Subtarget = AsmPrinter.getSubtarget();
535
63
    unsigned ReturnReg = Subtarget.is64Bit() ? 
X86::RAX43
:
X86::EAX20
;
536
63
    OutMI = MCInst();
537
63
    OutMI.setOpcode(getRetOpcode(Subtarget));
538
63
    OutMI.addOperand(MCOperand::createReg(ReturnReg));
539
63
    break;
540
6
  }
541
6
542
6
    // TAILJMPd, TAILJMPd64, TailJMPd_cc - Lower to the correct jump
543
6
    // instruction.
544
6
    {
545
0
      unsigned Opcode;
546
68
    case X86::TAILJMPr:
547
68
      Opcode = X86::JMP32r;
548
68
      goto SetTailJmpOpcode;
549
6.98k
    case X86::TAILJMPd:
550
6.98k
    case X86::TAILJMPd64:
551
6.98k
      Opcode = X86::JMP_1;
552
6.98k
      goto SetTailJmpOpcode;
553
7.05k
554
7.05k
    SetTailJmpOpcode:
555
7.05k
      MCOperand Saved = OutMI.getOperand(0);
556
7.05k
      OutMI = MCInst();
557
7.05k
      OutMI.setOpcode(Opcode);
558
7.05k
      OutMI.addOperand(Saved);
559
7.05k
      break;
560
6.98k
    }
561
6.98k
562
6.98k
  case X86::TAILJMPd_CC:
563
28
  case X86::TAILJMPd64_CC: {
564
28
    MCOperand Saved = OutMI.getOperand(0);
565
28
    MCOperand Saved2 = OutMI.getOperand(1);
566
28
    OutMI = MCInst();
567
28
    OutMI.setOpcode(X86::JCC_1);
568
28
    OutMI.addOperand(Saved);
569
28
    OutMI.addOperand(Saved2);
570
28
    break;
571
28
  }
572
28
573
3.94k
  case X86::DEC16r:
574
3.94k
  case X86::DEC32r:
575
3.94k
  case X86::INC16r:
576
3.94k
  case X86::INC32r:
577
3.94k
    // If we aren't in 64-bit mode we can use the 1-byte inc/dec instructions.
578
3.94k
    if (!AsmPrinter.getSubtarget().is64Bit()) {
579
2.02k
      unsigned Opcode;
580
2.02k
      switch (OutMI.getOpcode()) {
581
2.02k
      
default: 0
llvm_unreachable0
("Invalid opcode");
582
2.02k
      
case X86::DEC16r: Opcode = X86::DEC16r_alt; break0
;
583
2.02k
      
case X86::DEC32r: Opcode = X86::DEC32r_alt; break624
;
584
2.02k
      
case X86::INC16r: Opcode = X86::INC16r_alt; break0
;
585
2.02k
      
case X86::INC32r: Opcode = X86::INC32r_alt; break1.40k
;
586
2.02k
      }
587
2.02k
      OutMI.setOpcode(Opcode);
588
2.02k
    }
589
3.94k
    break;
590
3.94k
591
3.94k
  // We don't currently select the correct instruction form for instructions
592
3.94k
  // which have a short %eax, etc. form. Handle this by custom lowering, for
593
3.94k
  // now.
594
3.94k
  //
595
3.94k
  // Note, we are currently not handling the following instructions:
596
3.94k
  // MOV64ao8, MOV64o8a
597
3.94k
  // XCHG16ar, XCHG32ar, XCHG64ar
598
103k
  case X86::MOV8mr_NOREX:
599
103k
  case X86::MOV8mr:
600
103k
  case X86::MOV8rm_NOREX:
601
103k
  case X86::MOV8rm:
602
103k
  case X86::MOV16mr:
603
103k
  case X86::MOV16rm:
604
103k
  case X86::MOV32mr:
605
103k
  case X86::MOV32rm: {
606
103k
    unsigned NewOpc;
607
103k
    switch (OutMI.getOpcode()) {
608
103k
    
default: 0
llvm_unreachable0
("Invalid opcode");
609
103k
    case X86::MOV8mr_NOREX:
610
5.32k
    case X86::MOV8mr:  NewOpc = X86::MOV8o32a; break;
611
6.16k
    case X86::MOV8rm_NOREX:
612
6.16k
    case X86::MOV8rm:  NewOpc = X86::MOV8ao32; break;
613
6.16k
    
case X86::MOV16mr: NewOpc = X86::MOV16o32a; break3.12k
;
614
6.16k
    
case X86::MOV16rm: NewOpc = X86::MOV16ao32; break90
;
615
32.5k
    case X86::MOV32mr: NewOpc = X86::MOV32o32a; break;
616
56.2k
    case X86::MOV32rm: NewOpc = X86::MOV32ao32; break;
617
103k
    }
618
103k
    SimplifyShortMoveForm(AsmPrinter, OutMI, NewOpc);
619
103k
    break;
620
103k
  }
621
103k
622
103k
  
case X86::ADC8ri: 33.2k
case X86::ADC16ri: 33.2k
case X86::ADC32ri: 33.2k
case X86::ADC64ri32:
623
33.2k
  case X86::ADD8ri: case X86::ADD16ri: case X86::ADD32ri: case X86::ADD64ri32:
624
33.2k
  case X86::AND8ri: case X86::AND16ri: case X86::AND32ri: case X86::AND64ri32:
625
33.2k
  case X86::CMP8ri: case X86::CMP16ri: case X86::CMP32ri: case X86::CMP64ri32:
626
33.2k
  case X86::OR8ri:  case X86::OR16ri:  case X86::OR32ri:  case X86::OR64ri32:
627
33.2k
  case X86::SBB8ri: case X86::SBB16ri: case X86::SBB32ri: case X86::SBB64ri32:
628
33.2k
  case X86::SUB8ri: case X86::SUB16ri: case X86::SUB32ri: case X86::SUB64ri32:
629
33.2k
  case X86::TEST8ri:case X86::TEST16ri:case X86::TEST32ri:case X86::TEST64ri32:
630
33.2k
  case X86::XOR8ri: case X86::XOR16ri: case X86::XOR32ri: case X86::XOR64ri32: {
631
33.2k
    unsigned NewOpc;
632
33.2k
    switch (OutMI.getOpcode()) {
633
33.2k
    
default: 0
llvm_unreachable0
("Invalid opcode");
634
33.2k
    
case X86::ADC8ri: NewOpc = X86::ADC8i8; break5
;
635
33.2k
    
case X86::ADC16ri: NewOpc = X86::ADC16i16; break0
;
636
33.2k
    
case X86::ADC32ri: NewOpc = X86::ADC32i32; break13
;
637
33.2k
    
case X86::ADC64ri32: NewOpc = X86::ADC64i32; break0
;
638
33.2k
    
case X86::ADD8ri: NewOpc = X86::ADD8i8; break571
;
639
33.2k
    
case X86::ADD16ri: NewOpc = X86::ADD16i16; break1
;
640
33.2k
    
case X86::ADD32ri: NewOpc = X86::ADD32i32; break1.32k
;
641
33.2k
    
case X86::ADD64ri32: NewOpc = X86::ADD64i32; break3.68k
;
642
33.2k
    
case X86::AND8ri: NewOpc = X86::AND8i8; break2.58k
;
643
33.2k
    
case X86::AND16ri: NewOpc = X86::AND16i16; break6
;
644
33.2k
    
case X86::AND32ri: NewOpc = X86::AND32i32; break4.02k
;
645
33.2k
    
case X86::AND64ri32: NewOpc = X86::AND64i32; break369
;
646
33.2k
    
case X86::CMP8ri: NewOpc = X86::CMP8i8; break1.98k
;
647
33.2k
    
case X86::CMP16ri: NewOpc = X86::CMP16i16; break8
;
648
33.2k
    
case X86::CMP32ri: NewOpc = X86::CMP32i32; break2.15k
;
649
33.2k
    
case X86::CMP64ri32: NewOpc = X86::CMP64i32; break1.74k
;
650
33.2k
    
case X86::OR8ri: NewOpc = X86::OR8i8; break57
;
651
33.2k
    
case X86::OR16ri: NewOpc = X86::OR16i16; break0
;
652
33.2k
    
case X86::OR32ri: NewOpc = X86::OR32i32; break1.26k
;
653
33.2k
    
case X86::OR64ri32: NewOpc = X86::OR64i32; break42
;
654
33.2k
    
case X86::SBB8ri: NewOpc = X86::SBB8i8; break2
;
655
33.2k
    
case X86::SBB16ri: NewOpc = X86::SBB16i16; break0
;
656
33.2k
    
case X86::SBB32ri: NewOpc = X86::SBB32i32; break12
;
657
33.2k
    
case X86::SBB64ri32: NewOpc = X86::SBB64i32; break0
;
658
33.2k
    
case X86::SUB8ri: NewOpc = X86::SUB8i8; break10
;
659
33.2k
    
case X86::SUB16ri: NewOpc = X86::SUB16i16; break0
;
660
33.2k
    
case X86::SUB32ri: NewOpc = X86::SUB32i32; break568
;
661
33.2k
    
case X86::SUB64ri32: NewOpc = X86::SUB64i32; break2.06k
;
662
33.2k
    
case X86::TEST8ri: NewOpc = X86::TEST8i8; break8.85k
;
663
33.2k
    
case X86::TEST16ri: NewOpc = X86::TEST16i16; break18
;
664
33.2k
    
case X86::TEST32ri: NewOpc = X86::TEST32i32; break815
;
665
33.2k
    
case X86::TEST64ri32: NewOpc = X86::TEST64i32; break19
;
666
33.2k
    
case X86::XOR8ri: NewOpc = X86::XOR8i8; break650
;
667
33.2k
    
case X86::XOR16ri: NewOpc = X86::XOR16i16; break0
;
668
33.2k
    
case X86::XOR32ri: NewOpc = X86::XOR32i32; break358
;
669
33.2k
    
case X86::XOR64ri32: NewOpc = X86::XOR64i32; break14
;
670
33.2k
    }
671
33.2k
    SimplifyShortImmForm(OutMI, NewOpc);
672
33.2k
    break;
673
33.2k
  }
674
33.2k
675
33.2k
  // Try to shrink some forms of movsx.
676
33.2k
  case X86::MOVSX16rr8:
677
1.63k
  case X86::MOVSX32rr16:
678
1.63k
  case X86::MOVSX64rr32:
679
1.63k
    SimplifyMOVSX(OutMI);
680
1.63k
    break;
681
2.40M
  }
682
2.40M
}
683
684
void X86AsmPrinter::LowerTlsAddr(X86MCInstLower &MCInstLowering,
685
58
                                 const MachineInstr &MI) {
686
58
  bool Is64Bits = MI.getOpcode() == X86::TLS_addr64 ||
687
58
                  
MI.getOpcode() == X86::TLS_base_addr6440
;
688
58
  MCContext &Ctx = OutStreamer->getContext();
689
58
690
58
  MCSymbolRefExpr::VariantKind SRVK;
691
58
  switch (MI.getOpcode()) {
692
58
  case X86::TLS_addr32:
693
37
  case X86::TLS_addr64:
694
37
    SRVK = MCSymbolRefExpr::VK_TLSGD;
695
37
    break;
696
37
  case X86::TLS_base_addr32:
697
11
    SRVK = MCSymbolRefExpr::VK_TLSLDM;
698
11
    break;
699
37
  case X86::TLS_base_addr64:
700
10
    SRVK = MCSymbolRefExpr::VK_TLSLD;
701
10
    break;
702
37
  default:
703
0
    llvm_unreachable("unexpected opcode");
704
58
  }
705
58
706
58
  const MCSymbolRefExpr *Sym = MCSymbolRefExpr::create(
707
58
      MCInstLowering.GetSymbolFromOperand(MI.getOperand(3)), SRVK, Ctx);
708
58
709
58
  // As of binutils 2.32, ld has a bogus TLS relaxation error when the GD/LD
710
58
  // code sequence using R_X86_64_GOTPCREL (instead of R_X86_64_GOTPCRELX) is
711
58
  // attempted to be relaxed to IE/LE (binutils PR24784). Work around the bug by
712
58
  // only using GOT when GOTPCRELX is enabled.
713
58
  // TODO Delete the workaround when GOTPCRELX becomes commonplace.
714
58
  bool UseGot = MMI->getModule()->getRtLibUseGOT() &&
715
58
                
Ctx.getAsmInfo()->canRelaxRelocations()8
;
716
58
717
58
  if (Is64Bits) {
718
28
    bool NeedsPadding = SRVK == MCSymbolRefExpr::VK_TLSGD;
719
28
    if (NeedsPadding)
720
18
      EmitAndCountInstruction(MCInstBuilder(X86::DATA16_PREFIX));
721
28
    EmitAndCountInstruction(MCInstBuilder(X86::LEA64r)
722
28
                                .addReg(X86::RDI)
723
28
                                .addReg(X86::RIP)
724
28
                                .addImm(1)
725
28
                                .addReg(0)
726
28
                                .addExpr(Sym)
727
28
                                .addReg(0));
728
28
    const MCSymbol *TlsGetAddr = Ctx.getOrCreateSymbol("__tls_get_addr");
729
28
    if (NeedsPadding) {
730
18
      if (!UseGot)
731
17
        EmitAndCountInstruction(MCInstBuilder(X86::DATA16_PREFIX));
732
18
      EmitAndCountInstruction(MCInstBuilder(X86::DATA16_PREFIX));
733
18
      EmitAndCountInstruction(MCInstBuilder(X86::REX64_PREFIX));
734
18
    }
735
28
    if (UseGot) {
736
2
      const MCExpr *Expr = MCSymbolRefExpr::create(
737
2
          TlsGetAddr, MCSymbolRefExpr::VK_GOTPCREL, Ctx);
738
2
      EmitAndCountInstruction(MCInstBuilder(X86::CALL64m)
739
2
                                  .addReg(X86::RIP)
740
2
                                  .addImm(1)
741
2
                                  .addReg(0)
742
2
                                  .addExpr(Expr)
743
2
                                  .addReg(0));
744
26
    } else {
745
26
      EmitAndCountInstruction(
746
26
          MCInstBuilder(X86::CALL64pcrel32)
747
26
              .addExpr(MCSymbolRefExpr::create(TlsGetAddr,
748
26
                                               MCSymbolRefExpr::VK_PLT, Ctx)));
749
26
    }
750
30
  } else {
751
30
    if (SRVK == MCSymbolRefExpr::VK_TLSGD && 
!UseGot19
) {
752
18
      EmitAndCountInstruction(MCInstBuilder(X86::LEA32r)
753
18
                                  .addReg(X86::EAX)
754
18
                                  .addReg(0)
755
18
                                  .addImm(1)
756
18
                                  .addReg(X86::EBX)
757
18
                                  .addExpr(Sym)
758
18
                                  .addReg(0));
759
18
    } else {
760
12
      EmitAndCountInstruction(MCInstBuilder(X86::LEA32r)
761
12
                                  .addReg(X86::EAX)
762
12
                                  .addReg(X86::EBX)
763
12
                                  .addImm(1)
764
12
                                  .addReg(0)
765
12
                                  .addExpr(Sym)
766
12
                                  .addReg(0));
767
12
    }
768
30
769
30
    const MCSymbol *TlsGetAddr = Ctx.getOrCreateSymbol("___tls_get_addr");
770
30
    if (UseGot) {
771
2
      const MCExpr *Expr =
772
2
          MCSymbolRefExpr::create(TlsGetAddr, MCSymbolRefExpr::VK_GOT, Ctx);
773
2
      EmitAndCountInstruction(MCInstBuilder(X86::CALL32m)
774
2
                                  .addReg(X86::EBX)
775
2
                                  .addImm(1)
776
2
                                  .addReg(0)
777
2
                                  .addExpr(Expr)
778
2
                                  .addReg(0));
779
28
    } else {
780
28
      EmitAndCountInstruction(
781
28
          MCInstBuilder(X86::CALLpcrel32)
782
28
              .addExpr(MCSymbolRefExpr::create(TlsGetAddr,
783
28
                                               MCSymbolRefExpr::VK_PLT, Ctx)));
784
28
    }
785
30
  }
786
58
}
787
788
/// Emit the largest nop instruction smaller than or equal to \p NumBytes
789
/// bytes.  Return the size of nop emitted.
790
static unsigned EmitNop(MCStreamer &OS, unsigned NumBytes, bool Is64Bit,
791
216
                        const MCSubtargetInfo &STI) {
792
216
  // This works only for 64bit. For 32bit we have to do additional checking if
793
216
  // the CPU supports multi-byte nops.
794
216
  assert(Is64Bit && "EmitNops only supports X86-64");
795
216
796
216
  unsigned NopSize;
797
216
  unsigned Opc, BaseReg, ScaleVal, IndexReg, Displacement, SegmentReg;
798
216
  IndexReg = Displacement = SegmentReg = 0;
799
216
  BaseReg = X86::RAX;
800
216
  ScaleVal = 1;
801
216
  switch (NumBytes) {
802
216
  case 0:
803
0
    llvm_unreachable("Zero nops?");
804
216
    
break0
;
805
216
  case 1:
806
4
    NopSize = 1;
807
4
    Opc = X86::NOOP;
808
4
    break;
809
216
  case 2:
810
50
    NopSize = 2;
811
50
    Opc = X86::XCHG16ar;
812
50
    break;
813
216
  case 3:
814
9
    NopSize = 3;
815
9
    Opc = X86::NOOPL;
816
9
    break;
817
216
  case 4:
818
4
    NopSize = 4;
819
4
    Opc = X86::NOOPL;
820
4
    Displacement = 8;
821
4
    break;
822
216
  case 5:
823
10
    NopSize = 5;
824
10
    Opc = X86::NOOPL;
825
10
    Displacement = 8;
826
10
    IndexReg = X86::RAX;
827
10
    break;
828
216
  case 6:
829
4
    NopSize = 6;
830
4
    Opc = X86::NOOPW;
831
4
    Displacement = 8;
832
4
    IndexReg = X86::RAX;
833
4
    break;
834
216
  case 7:
835
5
    NopSize = 7;
836
5
    Opc = X86::NOOPL;
837
5
    Displacement = 512;
838
5
    break;
839
216
  case 8:
840
4
    NopSize = 8;
841
4
    Opc = X86::NOOPL;
842
4
    Displacement = 512;
843
4
    IndexReg = X86::RAX;
844
4
    break;
845
216
  case 9:
846
35
    NopSize = 9;
847
35
    Opc = X86::NOOPW;
848
35
    Displacement = 512;
849
35
    IndexReg = X86::RAX;
850
35
    break;
851
216
  default:
852
91
    NopSize = 10;
853
91
    Opc = X86::NOOPW;
854
91
    Displacement = 512;
855
91
    IndexReg = X86::RAX;
856
91
    SegmentReg = X86::CS;
857
91
    break;
858
216
  }
859
216
860
216
  unsigned NumPrefixes = std::min(NumBytes - NopSize, 5U);
861
216
  NopSize += NumPrefixes;
862
467
  for (unsigned i = 0; i != NumPrefixes; 
++i251
)
863
251
    OS.EmitBytes("\x66");
864
216
865
216
  switch (Opc) {
866
216
  
default: 0
llvm_unreachable0
("Unexpected opcode");
867
216
  case X86::NOOP:
868
4
    OS.EmitInstruction(MCInstBuilder(Opc), STI);
869
4
    break;
870
216
  case X86::XCHG16ar:
871
50
    OS.EmitInstruction(MCInstBuilder(Opc).addReg(X86::AX).addReg(X86::AX), STI);
872
50
    break;
873
216
  case X86::NOOPL:
874
162
  case X86::NOOPW:
875
162
    OS.EmitInstruction(MCInstBuilder(Opc)
876
162
                           .addReg(BaseReg)
877
162
                           .addImm(ScaleVal)
878
162
                           .addReg(IndexReg)
879
162
                           .addImm(Displacement)
880
162
                           .addReg(SegmentReg),
881
162
                       STI);
882
162
    break;
883
216
  }
884
216
  assert(NopSize <= NumBytes && "We overemitted?");
885
216
  return NopSize;
886
216
}
887
888
/// Emit the optimal amount of multi-byte nops on X86.
889
static void EmitNops(MCStreamer &OS, unsigned NumBytes, bool Is64Bit,
890
199
                     const MCSubtargetInfo &STI) {
891
199
  unsigned NopsToEmit = NumBytes;
892
199
  (void)NopsToEmit;
893
411
  while (NumBytes) {
894
212
    NumBytes -= EmitNop(OS, NumBytes, Is64Bit, STI);
895
212
    assert(NopsToEmit >= NumBytes && "Emitted more than I asked for!");
896
212
  }
897
199
}
898
899
void X86AsmPrinter::LowerSTATEPOINT(const MachineInstr &MI,
900
88
                                    X86MCInstLower &MCIL) {
901
88
  assert(Subtarget->is64Bit() && "Statepoint currently only supports X86-64");
902
88
903
88
  StatepointOpers SOpers(&MI);
904
88
  if (unsigned PatchBytes = SOpers.getNumPatchBytes()) {
905
1
    EmitNops(*OutStreamer, PatchBytes, Subtarget->is64Bit(),
906
1
             getSubtargetInfo());
907
87
  } else {
908
87
    // Lower call target and choose correct opcode
909
87
    const MachineOperand &CallTarget = SOpers.getCallTarget();
910
87
    MCOperand CallTargetMCOp;
911
87
    unsigned CallOpcode;
912
87
    switch (CallTarget.getType()) {
913
87
    case MachineOperand::MO_GlobalAddress:
914
76
    case MachineOperand::MO_ExternalSymbol:
915
76
      CallTargetMCOp = MCIL.LowerSymbolOperand(
916
76
          CallTarget, MCIL.GetSymbolFromOperand(CallTarget));
917
76
      CallOpcode = X86::CALL64pcrel32;
918
76
      // Currently, we only support relative addressing with statepoints.
919
76
      // Otherwise, we'll need a scratch register to hold the target
920
76
      // address.  You'll fail asserts during load & relocation if this
921
76
      // symbol is to far away. (TODO: support non-relative addressing)
922
76
      break;
923
76
    case MachineOperand::MO_Immediate:
924
0
      CallTargetMCOp = MCOperand::createImm(CallTarget.getImm());
925
0
      CallOpcode = X86::CALL64pcrel32;
926
0
      // Currently, we only support relative addressing with statepoints.
927
0
      // Otherwise, we'll need a scratch register to hold the target
928
0
      // immediate.  You'll fail asserts during load & relocation if this
929
0
      // address is to far away. (TODO: support non-relative addressing)
930
0
      break;
931
76
    case MachineOperand::MO_Register:
932
11
      // FIXME: Add retpoline support and remove this.
933
11
      if (Subtarget->useRetpolineIndirectCalls())
934
0
        report_fatal_error("Lowering register statepoints with retpoline not "
935
0
                           "yet implemented.");
936
11
      CallTargetMCOp = MCOperand::createReg(CallTarget.getReg());
937
11
      CallOpcode = X86::CALL64r;
938
11
      break;
939
11
    default:
940
0
      llvm_unreachable("Unsupported operand type in statepoint call target");
941
11
      
break0
;
942
87
    }
943
87
944
87
    // Emit call
945
87
    MCInst CallInst;
946
87
    CallInst.setOpcode(CallOpcode);
947
87
    CallInst.addOperand(CallTargetMCOp);
948
87
    OutStreamer->EmitInstruction(CallInst, getSubtargetInfo());
949
87
  }
950
88
951
88
  // Record our statepoint node in the same section used by STACKMAP
952
88
  // and PATCHPOINT
953
88
  SM.recordStatepoint(MI);
954
88
}
955
956
void X86AsmPrinter::LowerFAULTING_OP(const MachineInstr &FaultingMI,
957
25
                                     X86MCInstLower &MCIL) {
958
25
  // FAULTING_LOAD_OP <def>, <faltinf type>, <MBB handler>,
959
25
  //                  <opcode>, <operands>
960
25
961
25
  unsigned DefRegister = FaultingMI.getOperand(0).getReg();
962
25
  FaultMaps::FaultKind FK =
963
25
      static_cast<FaultMaps::FaultKind>(FaultingMI.getOperand(1).getImm());
964
25
  MCSymbol *HandlerLabel = FaultingMI.getOperand(2).getMBB()->getSymbol();
965
25
  unsigned Opcode = FaultingMI.getOperand(3).getImm();
966
25
  unsigned OperandsBeginIdx = 4;
967
25
968
25
  assert(FK < FaultMaps::FaultKindMax && "Invalid Faulting Kind!");
969
25
  FM.recordFaultingOp(FK, HandlerLabel);
970
25
971
25
  MCInst MI;
972
25
  MI.setOpcode(Opcode);
973
25
974
25
  if (DefRegister != X86::NoRegister)
975
18
    MI.addOperand(MCOperand::createReg(DefRegister));
976
25
977
25
  for (auto I = FaultingMI.operands_begin() + OperandsBeginIdx,
978
25
            E = FaultingMI.operands_end();
979
164
       I != E; 
++I139
)
980
139
    if (auto MaybeOperand = MCIL.LowerMachineOperand(&FaultingMI, *I))
981
134
      MI.addOperand(MaybeOperand.getValue());
982
25
983
25
  OutStreamer->AddComment("on-fault: " + HandlerLabel->getName());
984
25
  OutStreamer->EmitInstruction(MI, getSubtargetInfo());
985
25
}
986
987
void X86AsmPrinter::LowerFENTRY_CALL(const MachineInstr &MI,
988
2
                                     X86MCInstLower &MCIL) {
989
2
  bool Is64Bits = Subtarget->is64Bit();
990
2
  MCContext &Ctx = OutStreamer->getContext();
991
2
  MCSymbol *fentry = Ctx.getOrCreateSymbol("__fentry__");
992
2
  const MCSymbolRefExpr *Op =
993
2
      MCSymbolRefExpr::create(fentry, MCSymbolRefExpr::VK_None, Ctx);
994
2
995
2
  EmitAndCountInstruction(
996
2
      MCInstBuilder(Is64Bits ? X86::CALL64pcrel32 : 
X86::CALLpcrel320
)
997
2
          .addExpr(Op));
998
2
}
999
1000
void X86AsmPrinter::LowerPATCHABLE_OP(const MachineInstr &MI,
1001
10
                                      X86MCInstLower &MCIL) {
1002
10
  // PATCHABLE_OP minsize, opcode, operands
1003
10
1004
10
  unsigned MinSize = MI.getOperand(0).getImm();
1005
10
  unsigned Opcode = MI.getOperand(1).getImm();
1006
10
1007
10
  MCInst MCI;
1008
10
  MCI.setOpcode(Opcode);
1009
10
  for (auto &MO : make_range(MI.operands_begin() + 2, MI.operands_end()))
1010
28
    if (auto MaybeOperand = MCIL.LowerMachineOperand(&MI, MO))
1011
20
      MCI.addOperand(MaybeOperand.getValue());
1012
10
1013
10
  SmallString<256> Code;
1014
10
  SmallVector<MCFixup, 4> Fixups;
1015
10
  raw_svector_ostream VecOS(Code);
1016
10
  CodeEmitter->encodeInstruction(MCI, VecOS, Fixups, getSubtargetInfo());
1017
10
1018
10
  if (Code.size() < MinSize) {
1019
6
    if (MinSize == 2 && Opcode == X86::PUSH64r) {
1020
2
      // This is an optimization that lets us get away without emitting a nop in
1021
2
      // many cases.
1022
2
      //
1023
2
      // NB! In some cases the encoding for PUSH64r (e.g. PUSH64r %r9) takes two
1024
2
      // bytes too, so the check on MinSize is important.
1025
2
      MCI.setOpcode(X86::PUSH64rmr);
1026
4
    } else {
1027
4
      unsigned NopSize = EmitNop(*OutStreamer, MinSize, Subtarget->is64Bit(),
1028
4
                                 getSubtargetInfo());
1029
4
      assert(NopSize == MinSize && "Could not implement MinSize!");
1030
4
      (void)NopSize;
1031
4
    }
1032
6
  }
1033
10
1034
10
  OutStreamer->EmitInstruction(MCI, getSubtargetInfo());
1035
10
}
1036
1037
// Lower a stackmap of the form:
1038
// <id>, <shadowBytes>, ...
1039
100
void X86AsmPrinter::LowerSTACKMAP(const MachineInstr &MI) {
1040
100
  SMShadowTracker.emitShadowPadding(*OutStreamer, getSubtargetInfo());
1041
100
  SM.recordStackMap(MI);
1042
100
  unsigned NumShadowBytes = MI.getOperand(1).getImm();
1043
100
  SMShadowTracker.reset(NumShadowBytes);
1044
100
}
1045
1046
// Lower a patchpoint of the form:
1047
// [<def>], <id>, <numBytes>, <target>, <numArgs>, <cc>, ...
1048
void X86AsmPrinter::LowerPATCHPOINT(const MachineInstr &MI,
1049
70
                                    X86MCInstLower &MCIL) {
1050
70
  assert(Subtarget->is64Bit() && "Patchpoint currently only supports X86-64");
1051
70
1052
70
  SMShadowTracker.emitShadowPadding(*OutStreamer, getSubtargetInfo());
1053
70
1054
70
  SM.recordPatchPoint(MI);
1055
70
1056
70
  PatchPointOpers opers(&MI);
1057
70
  unsigned ScratchIdx = opers.getNextScratchIdx();
1058
70
  unsigned EncodedBytes = 0;
1059
70
  const MachineOperand &CalleeMO = opers.getCallTarget();
1060
70
1061
70
  // Check for null target. If target is non-null (i.e. is non-zero or is
1062
70
  // symbolic) then emit a call.
1063
70
  if (!(CalleeMO.isImm() && 
!CalleeMO.getImm()66
)) {
1064
40
    MCOperand CalleeMCOp;
1065
40
    switch (CalleeMO.getType()) {
1066
40
    default:
1067
0
      /// FIXME: Add a verifier check for bad callee types.
1068
0
      llvm_unreachable("Unrecognized callee operand type.");
1069
40
    case MachineOperand::MO_Immediate:
1070
36
      if (CalleeMO.getImm())
1071
36
        CalleeMCOp = MCOperand::createImm(CalleeMO.getImm());
1072
36
      break;
1073
40
    case MachineOperand::MO_ExternalSymbol:
1074
4
    case MachineOperand::MO_GlobalAddress:
1075
4
      CalleeMCOp = MCIL.LowerSymbolOperand(CalleeMO,
1076
4
                                           MCIL.GetSymbolFromOperand(CalleeMO));
1077
4
      break;
1078
40
    }
1079
40
1080
40
    // Emit MOV to materialize the target address and the CALL to target.
1081
40
    // This is encoded with 12-13 bytes, depending on which register is used.
1082
40
    unsigned ScratchReg = MI.getOperand(ScratchIdx).getReg();
1083
40
    if (X86II::isX86_64ExtendedReg(ScratchReg))
1084
40
      EncodedBytes = 13;
1085
0
    else
1086
0
      EncodedBytes = 12;
1087
40
1088
40
    EmitAndCountInstruction(
1089
40
        MCInstBuilder(X86::MOV64ri).addReg(ScratchReg).addOperand(CalleeMCOp));
1090
40
    // FIXME: Add retpoline support and remove this.
1091
40
    if (Subtarget->useRetpolineIndirectCalls())
1092
0
      report_fatal_error(
1093
0
          "Lowering patchpoint with retpoline not yet implemented.");
1094
40
    EmitAndCountInstruction(MCInstBuilder(X86::CALL64r).addReg(ScratchReg));
1095
40
  }
1096
70
1097
70
  // Emit padding.
1098
70
  unsigned NumBytes = opers.getNumPatchBytes();
1099
70
  assert(NumBytes >= EncodedBytes &&
1100
70
         "Patchpoint can't request size less than the length of a call.");
1101
70
1102
70
  EmitNops(*OutStreamer, NumBytes - EncodedBytes, Subtarget->is64Bit(),
1103
70
           getSubtargetInfo());
1104
70
}
1105
1106
void X86AsmPrinter::LowerPATCHABLE_EVENT_CALL(const MachineInstr &MI,
1107
2
                                              X86MCInstLower &MCIL) {
1108
2
  assert(Subtarget->is64Bit() && "XRay custom events only supports X86-64");
1109
2
1110
2
  // We want to emit the following pattern, which follows the x86 calling
1111
2
  // convention to prepare for the trampoline call to be patched in.
1112
2
  //
1113
2
  //   .p2align 1, ...
1114
2
  // .Lxray_event_sled_N:
1115
2
  //   jmp +N                        // jump across the instrumentation sled
1116
2
  //   ...                           // set up arguments in register
1117
2
  //   callq __xray_CustomEvent@plt  // force dependency to symbol
1118
2
  //   ...
1119
2
  //   <jump here>
1120
2
  //
1121
2
  // After patching, it would look something like:
1122
2
  //
1123
2
  //   nopw (2-byte nop)
1124
2
  //   ...
1125
2
  //   callq __xrayCustomEvent  // already lowered
1126
2
  //   ...
1127
2
  //
1128
2
  // ---
1129
2
  // First we emit the label and the jump.
1130
2
  auto CurSled = OutContext.createTempSymbol("xray_event_sled_", true);
1131
2
  OutStreamer->AddComment("# XRay Custom Event Log");
1132
2
  OutStreamer->EmitCodeAlignment(2);
1133
2
  OutStreamer->EmitLabel(CurSled);
1134
2
1135
2
  // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as
1136
2
  // an operand (computed as an offset from the jmp instruction).
1137
2
  // FIXME: Find another less hacky way do force the relative jump.
1138
2
  OutStreamer->EmitBinaryData("\xeb\x0f");
1139
2
1140
2
  // The default C calling convention will place two arguments into %rcx and
1141
2
  // %rdx -- so we only work with those.
1142
2
  unsigned DestRegs[] = {X86::RDI, X86::RSI};
1143
2
  bool UsedMask[] = {false, false};
1144
2
  // Filled out in loop.
1145
2
  unsigned SrcRegs[] = {0, 0};
1146
2
1147
2
  // Then we put the operands in the %rdi and %rsi registers. We spill the
1148
2
  // values in the register before we clobber them, and mark them as used in
1149
2
  // UsedMask. In case the arguments are already in the correct register, we use
1150
2
  // emit nops appropriately sized to keep the sled the same size in every
1151
2
  // situation.
1152
6
  for (unsigned I = 0; I < MI.getNumOperands(); 
++I4
)
1153
4
    if (auto Op = MCIL.LowerMachineOperand(&MI, MI.getOperand(I))) {
1154
4
      assert(Op->isReg() && "Only support arguments in registers");
1155
4
      SrcRegs[I] = Op->getReg();
1156
4
      if (SrcRegs[I] != DestRegs[I]) {
1157
4
        UsedMask[I] = true;
1158
4
        EmitAndCountInstruction(
1159
4
            MCInstBuilder(X86::PUSH64r).addReg(DestRegs[I]));
1160
4
      } else {
1161
0
        EmitNops(*OutStreamer, 4, Subtarget->is64Bit(), getSubtargetInfo());
1162
0
      }
1163
4
    }
1164
2
1165
2
  // Now that the register values are stashed, mov arguments into place.
1166
6
  for (unsigned I = 0; I < MI.getNumOperands(); 
++I4
)
1167
4
    if (SrcRegs[I] != DestRegs[I])
1168
4
      EmitAndCountInstruction(
1169
4
          MCInstBuilder(X86::MOV64rr).addReg(DestRegs[I]).addReg(SrcRegs[I]));
1170
2
1171
2
  // We emit a hard dependency on the __xray_CustomEvent symbol, which is the
1172
2
  // name of the trampoline to be implemented by the XRay runtime.
1173
2
  auto TSym = OutContext.getOrCreateSymbol("__xray_CustomEvent");
1174
2
  MachineOperand TOp = MachineOperand::CreateMCSymbol(TSym);
1175
2
  if (isPositionIndependent())
1176
1
    TOp.setTargetFlags(X86II::MO_PLT);
1177
2
1178
2
  // Emit the call instruction.
1179
2
  EmitAndCountInstruction(MCInstBuilder(X86::CALL64pcrel32)
1180
2
                              .addOperand(MCIL.LowerSymbolOperand(TOp, TSym)));
1181
2
1182
2
  // Restore caller-saved and used registers.
1183
6
  for (unsigned I = sizeof UsedMask; I-- > 0;)
1184
4
    if (UsedMask[I])
1185
4
      EmitAndCountInstruction(MCInstBuilder(X86::POP64r).addReg(DestRegs[I]));
1186
0
    else
1187
0
      EmitNops(*OutStreamer, 1, Subtarget->is64Bit(), getSubtargetInfo());
1188
2
1189
2
  OutStreamer->AddComment("xray custom event end.");
1190
2
1191
2
  // Record the sled version. Older versions of this sled were spelled
1192
2
  // differently, so we let the runtime handle the different offsets we're
1193
2
  // using.
1194
2
  recordSled(CurSled, MI, SledKind::CUSTOM_EVENT, 1);
1195
2
}
1196
1197
void X86AsmPrinter::LowerPATCHABLE_TYPED_EVENT_CALL(const MachineInstr &MI,
1198
2
                                                    X86MCInstLower &MCIL) {
1199
2
  assert(Subtarget->is64Bit() && "XRay typed events only supports X86-64");
1200
2
1201
2
  // We want to emit the following pattern, which follows the x86 calling
1202
2
  // convention to prepare for the trampoline call to be patched in.
1203
2
  //
1204
2
  //   .p2align 1, ...
1205
2
  // .Lxray_event_sled_N:
1206
2
  //   jmp +N                        // jump across the instrumentation sled
1207
2
  //   ...                           // set up arguments in register
1208
2
  //   callq __xray_TypedEvent@plt  // force dependency to symbol
1209
2
  //   ...
1210
2
  //   <jump here>
1211
2
  //
1212
2
  // After patching, it would look something like:
1213
2
  //
1214
2
  //   nopw (2-byte nop)
1215
2
  //   ...
1216
2
  //   callq __xrayTypedEvent  // already lowered
1217
2
  //   ...
1218
2
  //
1219
2
  // ---
1220
2
  // First we emit the label and the jump.
1221
2
  auto CurSled = OutContext.createTempSymbol("xray_typed_event_sled_", true);
1222
2
  OutStreamer->AddComment("# XRay Typed Event Log");
1223
2
  OutStreamer->EmitCodeAlignment(2);
1224
2
  OutStreamer->EmitLabel(CurSled);
1225
2
1226
2
  // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as
1227
2
  // an operand (computed as an offset from the jmp instruction).
1228
2
  // FIXME: Find another less hacky way do force the relative jump.
1229
2
  OutStreamer->EmitBinaryData("\xeb\x14");
1230
2
1231
2
  // An x86-64 convention may place three arguments into %rcx, %rdx, and R8,
1232
2
  // so we'll work with those. Or we may be called via SystemV, in which case
1233
2
  // we don't have to do any translation.
1234
2
  unsigned DestRegs[] = {X86::RDI, X86::RSI, X86::RDX};
1235
2
  bool UsedMask[] = {false, false, false};
1236
2
1237
2
  // Will fill out src regs in the loop.
1238
2
  unsigned SrcRegs[] = {0, 0, 0};
1239
2
1240
2
  // Then we put the operands in the SystemV registers. We spill the values in
1241
2
  // the registers before we clobber them, and mark them as used in UsedMask.
1242
2
  // In case the arguments are already in the correct register, we emit nops
1243
2
  // appropriately sized to keep the sled the same size in every situation.
1244
8
  for (unsigned I = 0; I < MI.getNumOperands(); 
++I6
)
1245
6
    if (auto Op = MCIL.LowerMachineOperand(&MI, MI.getOperand(I))) {
1246
6
      // TODO: Is register only support adequate?
1247
6
      assert(Op->isReg() && "Only supports arguments in registers");
1248
6
      SrcRegs[I] = Op->getReg();
1249
6
      if (SrcRegs[I] != DestRegs[I]) {
1250
6
        UsedMask[I] = true;
1251
6
        EmitAndCountInstruction(
1252
6
            MCInstBuilder(X86::PUSH64r).addReg(DestRegs[I]));
1253
6
      } else {
1254
0
        EmitNops(*OutStreamer, 4, Subtarget->is64Bit(), getSubtargetInfo());
1255
0
      }
1256
6
    }
1257
2
1258
2
  // In the above loop we only stash all of the destination registers or emit
1259
2
  // nops if the arguments are already in the right place. Doing the actually
1260
2
  // moving is postponed until after all the registers are stashed so nothing
1261
2
  // is clobbers. We've already added nops to account for the size of mov and
1262
2
  // push if the register is in the right place, so we only have to worry about
1263
2
  // emitting movs.
1264
8
  for (unsigned I = 0; I < MI.getNumOperands(); 
++I6
)
1265
6
    if (UsedMask[I])
1266
6
      EmitAndCountInstruction(
1267
6
          MCInstBuilder(X86::MOV64rr).addReg(DestRegs[I]).addReg(SrcRegs[I]));
1268
2
1269
2
  // We emit a hard dependency on the __xray_TypedEvent symbol, which is the
1270
2
  // name of the trampoline to be implemented by the XRay runtime.
1271
2
  auto TSym = OutContext.getOrCreateSymbol("__xray_TypedEvent");
1272
2
  MachineOperand TOp = MachineOperand::CreateMCSymbol(TSym);
1273
2
  if (isPositionIndependent())
1274
1
    TOp.setTargetFlags(X86II::MO_PLT);
1275
2
1276
2
  // Emit the call instruction.
1277
2
  EmitAndCountInstruction(MCInstBuilder(X86::CALL64pcrel32)
1278
2
                              .addOperand(MCIL.LowerSymbolOperand(TOp, TSym)));
1279
2
1280
2
  // Restore caller-saved and used registers.
1281
8
  for (unsigned I = sizeof UsedMask; I-- > 0;)
1282
6
    if (UsedMask[I])
1283
6
      EmitAndCountInstruction(MCInstBuilder(X86::POP64r).addReg(DestRegs[I]));
1284
0
    else
1285
0
      EmitNops(*OutStreamer, 1, Subtarget->is64Bit(), getSubtargetInfo());
1286
2
1287
2
  OutStreamer->AddComment("xray typed event end.");
1288
2
1289
2
  // Record the sled version.
1290
2
  recordSled(CurSled, MI, SledKind::TYPED_EVENT, 0);
1291
2
}
1292
1293
void X86AsmPrinter::LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI,
1294
27
                                                  X86MCInstLower &MCIL) {
1295
27
  // We want to emit the following pattern:
1296
27
  //
1297
27
  //   .p2align 1, ...
1298
27
  // .Lxray_sled_N:
1299
27
  //   jmp .tmpN
1300
27
  //   # 9 bytes worth of noops
1301
27
  //
1302
27
  // We need the 9 bytes because at runtime, we'd be patching over the full 11
1303
27
  // bytes with the following pattern:
1304
27
  //
1305
27
  //   mov %r10, <function id, 32-bit>   // 6 bytes
1306
27
  //   call <relative offset, 32-bits>   // 5 bytes
1307
27
  //
1308
27
  auto CurSled = OutContext.createTempSymbol("xray_sled_", true);
1309
27
  OutStreamer->EmitCodeAlignment(2);
1310
27
  OutStreamer->EmitLabel(CurSled);
1311
27
1312
27
  // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as
1313
27
  // an operand (computed as an offset from the jmp instruction).
1314
27
  // FIXME: Find another less hacky way do force the relative jump.
1315
27
  OutStreamer->EmitBytes("\xeb\x09");
1316
27
  EmitNops(*OutStreamer, 9, Subtarget->is64Bit(), getSubtargetInfo());
1317
27
  recordSled(CurSled, MI, SledKind::FUNCTION_ENTER);
1318
27
}
1319
1320
void X86AsmPrinter::LowerPATCHABLE_RET(const MachineInstr &MI,
1321
24
                                       X86MCInstLower &MCIL) {
1322
24
  // Since PATCHABLE_RET takes the opcode of the return statement as an
1323
24
  // argument, we use that to emit the correct form of the RET that we want.
1324
24
  // i.e. when we see this:
1325
24
  //
1326
24
  //   PATCHABLE_RET X86::RET ...
1327
24
  //
1328
24
  // We should emit the RET followed by sleds.
1329
24
  //
1330
24
  //   .p2align 1, ...
1331
24
  // .Lxray_sled_N:
1332
24
  //   ret  # or equivalent instruction
1333
24
  //   # 10 bytes worth of noops
1334
24
  //
1335
24
  // This just makes sure that the alignment for the next instruction is 2.
1336
24
  auto CurSled = OutContext.createTempSymbol("xray_sled_", true);
1337
24
  OutStreamer->EmitCodeAlignment(2);
1338
24
  OutStreamer->EmitLabel(CurSled);
1339
24
  unsigned OpCode = MI.getOperand(0).getImm();
1340
24
  MCInst Ret;
1341
24
  Ret.setOpcode(OpCode);
1342
24
  for (auto &MO : make_range(MI.operands_begin() + 1, MI.operands_end()))
1343
24
    if (auto MaybeOperand = MCIL.LowerMachineOperand(&MI, MO))
1344
24
      Ret.addOperand(MaybeOperand.getValue());
1345
24
  OutStreamer->EmitInstruction(Ret, getSubtargetInfo());
1346
24
  EmitNops(*OutStreamer, 10, Subtarget->is64Bit(), getSubtargetInfo());
1347
24
  recordSled(CurSled, MI, SledKind::FUNCTION_EXIT);
1348
24
}
1349
1350
void X86AsmPrinter::LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI,
1351
4
                                             X86MCInstLower &MCIL) {
1352
4
  // Like PATCHABLE_RET, we have the actual instruction in the operands to this
1353
4
  // instruction so we lower that particular instruction and its operands.
1354
4
  // Unlike PATCHABLE_RET though, we put the sled before the JMP, much like how
1355
4
  // we do it for PATCHABLE_FUNCTION_ENTER. The sled should be very similar to
1356
4
  // the PATCHABLE_FUNCTION_ENTER case, followed by the lowering of the actual
1357
4
  // tail call much like how we have it in PATCHABLE_RET.
1358
4
  auto CurSled = OutContext.createTempSymbol("xray_sled_", true);
1359
4
  OutStreamer->EmitCodeAlignment(2);
1360
4
  OutStreamer->EmitLabel(CurSled);
1361
4
  auto Target = OutContext.createTempSymbol();
1362
4
1363
4
  // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as
1364
4
  // an operand (computed as an offset from the jmp instruction).
1365
4
  // FIXME: Find another less hacky way do force the relative jump.
1366
4
  OutStreamer->EmitBytes("\xeb\x09");
1367
4
  EmitNops(*OutStreamer, 9, Subtarget->is64Bit(), getSubtargetInfo());
1368
4
  OutStreamer->EmitLabel(Target);
1369
4
  recordSled(CurSled, MI, SledKind::TAIL_CALL);
1370
4
1371
4
  unsigned OpCode = MI.getOperand(0).getImm();
1372
4
  MCInst TC;
1373
4
  TC.setOpcode(OpCode);
1374
4
1375
4
  // Before emitting the instruction, add a comment to indicate that this is
1376
4
  // indeed a tail call.
1377
4
  OutStreamer->AddComment("TAILCALL");
1378
4
  for (auto &MO : make_range(MI.operands_begin() + 1, MI.operands_end()))
1379
26
    if (auto MaybeOperand = MCIL.LowerMachineOperand(&MI, MO))
1380
4
      TC.addOperand(MaybeOperand.getValue());
1381
4
  OutStreamer->EmitInstruction(TC, getSubtargetInfo());
1382
4
}
1383
1384
// Returns instruction preceding MBBI in MachineFunction.
1385
// If MBBI is the first instruction of the first basic block, returns null.
1386
static MachineBasicBlock::const_iterator
1387
703
PrevCrossBBInst(MachineBasicBlock::const_iterator MBBI) {
1388
703
  const MachineBasicBlock *MBB = MBBI->getParent();
1389
796
  while (MBBI == MBB->begin()) {
1390
93
    if (MBB == &MBB->getParent()->front())
1391
0
      return MachineBasicBlock::const_iterator();
1392
93
    MBB = MBB->getPrevNode();
1393
93
    MBBI = MBB->end();
1394
93
  }
1395
703
  --MBBI;
1396
703
  return MBBI;
1397
703
}
1398
1399
static const Constant *getConstantFromPool(const MachineInstr &MI,
1400
29.9k
                                           const MachineOperand &Op) {
1401
29.9k
  if (!Op.isCPI() || 
Op.getOffset() != 016.3k
)
1402
13.6k
    return nullptr;
1403
16.3k
1404
16.3k
  ArrayRef<MachineConstantPoolEntry> Constants =
1405
16.3k
      MI.getParent()->getParent()->getConstantPool()->getConstants();
1406
16.3k
  const MachineConstantPoolEntry &ConstantEntry = Constants[Op.getIndex()];
1407
16.3k
1408
16.3k
  // Bail if this is a machine constant pool entry, we won't be able to dig out
1409
16.3k
  // anything useful.
1410
16.3k
  if (ConstantEntry.isMachineConstantPoolEntry())
1411
0
    return nullptr;
1412
16.3k
1413
16.3k
  const Constant *C = ConstantEntry.Val.ConstVal;
1414
16.3k
  assert((!C || ConstantEntry.getType() == C->getType()) &&
1415
16.3k
         "Expected a constant of the same type!");
1416
16.3k
  return C;
1417
16.3k
}
1418
1419
static std::string getShuffleComment(const MachineInstr *MI, unsigned SrcOp1Idx,
1420
3.01k
                                     unsigned SrcOp2Idx, ArrayRef<int> Mask) {
1421
3.01k
  std::string Comment;
1422
3.01k
1423
3.01k
  // Compute the name for a register. This is really goofy because we have
1424
3.01k
  // multiple instruction printers that could (in theory) use different
1425
3.01k
  // names. Fortunately most people use the ATT style (outside of Windows)
1426
3.01k
  // and they actually agree on register naming here. Ultimately, this is
1427
3.01k
  // a comment, and so its OK if it isn't perfect.
1428
9.14k
  auto GetRegisterName = [](unsigned RegNum) -> StringRef {
1429
9.14k
    return X86ATTInstPrinter::getRegisterName(RegNum);
1430
9.14k
  };
1431
3.01k
1432
3.01k
  const MachineOperand &DstOp = MI->getOperand(0);
1433
3.01k
  const MachineOperand &SrcOp1 = MI->getOperand(SrcOp1Idx);
1434
3.01k
  const MachineOperand &SrcOp2 = MI->getOperand(SrcOp2Idx);
1435
3.01k
1436
3.01k
  StringRef DstName = DstOp.isReg() ? GetRegisterName(DstOp.getReg()) : 
"mem"0
;
1437
3.01k
  StringRef Src1Name =
1438
3.01k
      SrcOp1.isReg() ? GetRegisterName(SrcOp1.getReg()) : 
"mem"0
;
1439
3.01k
  StringRef Src2Name =
1440
3.01k
      SrcOp2.isReg() ? GetRegisterName(SrcOp2.getReg()) : 
"mem"0
;
1441
3.01k
1442
3.01k
  // One source operand, fix the mask to print all elements in one span.
1443
3.01k
  SmallVector<int, 8> ShuffleMask(Mask.begin(), Mask.end());
1444
3.01k
  if (Src1Name == Src2Name)
1445
64.4k
    
for (int i = 0, e = ShuffleMask.size(); 2.95k
i != e;
++i61.5k
)
1446
61.5k
      if (ShuffleMask[i] >= e)
1447
48
        ShuffleMask[i] -= e;
1448
3.01k
1449
3.01k
  raw_string_ostream CS(Comment);
1450
3.01k
  CS << DstName;
1451
3.01k
1452
3.01k
  // Handle AVX512 MASK/MASXZ write mask comments.
1453
3.01k
  // MASK: zmmX {%kY}
1454
3.01k
  // MASKZ: zmmX {%kY} {z}
1455
3.01k
  if (SrcOp1Idx > 1) {
1456
98
    assert((SrcOp1Idx == 2 || SrcOp1Idx == 3) && "Unexpected writemask");
1457
98
1458
98
    const MachineOperand &WriteMaskOp = MI->getOperand(SrcOp1Idx - 1);
1459
98
    if (WriteMaskOp.isReg()) {
1460
98
      CS << " {%" << GetRegisterName(WriteMaskOp.getReg()) << "}";
1461
98
1462
98
      if (SrcOp1Idx == 2) {
1463
48
        CS << " {z}";
1464
48
      }
1465
98
    }
1466
98
  }
1467
3.01k
1468
3.01k
  CS << " = ";
1469
3.01k
1470
13.5k
  for (int i = 0, e = ShuffleMask.size(); i != e; 
++i10.5k
) {
1471
10.5k
    if (i != 0)
1472
7.52k
      CS << ",";
1473
10.5k
    if (ShuffleMask[i] == SM_SentinelZero) {
1474
5.94k
      CS << "zero";
1475
5.94k
      continue;
1476
5.94k
    }
1477
4.59k
1478
4.59k
    // Otherwise, it must come from src1 or src2.  Print the span of elements
1479
4.59k
    // that comes from this src.
1480
4.59k
    bool isSrc1 = ShuffleMask[i] < (int)e;
1481
4.59k
    CS << (isSrc1 ? 
Src1Name4.48k
:
Src2Name116
) << '[';
1482
4.59k
1483
4.59k
    bool IsFirst = true;
1484
60.8k
    while (i != e && 
ShuffleMask[i] != SM_SentinelZero58.2k
&&
1485
60.8k
           
(ShuffleMask[i] < (int)e) == isSrc156.4k
) {
1486
56.2k
      if (!IsFirst)
1487
51.6k
        CS << ',';
1488
4.59k
      else
1489
4.59k
        IsFirst = false;
1490
56.2k
      if (ShuffleMask[i] == SM_SentinelUndef)
1491
7.87k
        CS << "u";
1492
48.4k
      else
1493
48.4k
        CS << ShuffleMask[i] % (int)e;
1494
56.2k
      ++i;
1495
56.2k
    }
1496
4.59k
    CS << ']';
1497
4.59k
    --i; // For loop increments element #.
1498
4.59k
  }
1499
3.01k
  CS.flush();
1500
3.01k
1501
3.01k
  return Comment;
1502
3.01k
}
1503
1504
124k
static void printConstant(const APInt &Val, raw_ostream &CS) {
1505
124k
  if (Val.getBitWidth() <= 64) {
1506
124k
    CS << Val.getZExtValue();
1507
124k
  } else {
1508
0
    // print multi-word constant as (w0,w1)
1509
0
    CS << "(";
1510
0
    for (int i = 0, N = Val.getNumWords(); i < N; ++i) {
1511
0
      if (i > 0)
1512
0
        CS << ",";
1513
0
      CS << Val.getRawData()[i];
1514
0
    }
1515
0
    CS << ")";
1516
0
  }
1517
124k
}
1518
1519
4.63k
static void printConstant(const APFloat &Flt, raw_ostream &CS) {
1520
4.63k
  SmallString<32> Str;
1521
4.63k
  // Force scientific notation to distinquish from integers.
1522
4.63k
  Flt.toString(Str, 0, 0);
1523
4.63k
  CS << Str;
1524
4.63k
}
1525
1526
21.5k
static void printConstant(const Constant *COp, raw_ostream &CS) {
1527
21.5k
  if (isa<UndefValue>(COp)) {
1528
6.53k
    CS << "u";
1529
14.9k
  } else if (auto *CI = dyn_cast<ConstantInt>(COp)) {
1530
12.9k
    printConstant(CI->getValue(), CS);
1531
12.9k
  } else 
if (auto *2.04k
CF2.04k
= dyn_cast<ConstantFP>(COp)) {
1532
2.04k
    printConstant(CF->getValueAPF(), CS);
1533
2.04k
  } else {
1534
0
    CS << "?";
1535
0
  }
1536
21.5k
}
1537
1538
2.38k
void X86AsmPrinter::EmitSEHInstruction(const MachineInstr *MI) {
1539
2.38k
  assert(MF->hasWinCFI() && "SEH_ instruction in function without WinCFI?");
1540
2.38k
  assert(getSubtarget().isOSWindows() && "SEH_ instruction Windows only");
1541
2.38k
  const X86RegisterInfo *RI =
1542
2.38k
      MF->getSubtarget<X86Subtarget>().getRegisterInfo();
1543
2.38k
1544
2.38k
  // Use the .cv_fpo directives if we're emitting CodeView on 32-bit x86.
1545
2.38k
  if (EmitFPOData) {
1546
356
    X86TargetStreamer *XTS =
1547
356
        static_cast<X86TargetStreamer *>(OutStreamer->getTargetStreamer());
1548
356
    switch (MI->getOpcode()) {
1549
356
    case X86::SEH_PushReg:
1550
120
      XTS->emitFPOPushReg(MI->getOperand(0).getImm());
1551
120
      break;
1552
356
    case X86::SEH_StackAlloc:
1553
72
      XTS->emitFPOStackAlloc(MI->getOperand(0).getImm());
1554
72
      break;
1555
356
    case X86::SEH_StackAlign:
1556
7
      XTS->emitFPOStackAlign(MI->getOperand(0).getImm());
1557
7
      break;
1558
356
    case X86::SEH_SetFrame:
1559
60
      assert(MI->getOperand(1).getImm() == 0 &&
1560
60
             ".cv_fpo_setframe takes no offset");
1561
60
      XTS->emitFPOSetFrame(MI->getOperand(0).getImm());
1562
60
      break;
1563
356
    case X86::SEH_EndPrologue:
1564
97
      XTS->emitFPOEndPrologue();
1565
97
      break;
1566
356
    case X86::SEH_SaveReg:
1567
0
    case X86::SEH_SaveXMM:
1568
0
    case X86::SEH_PushFrame:
1569
0
      llvm_unreachable("SEH_ directive incompatible with FPO");
1570
0
      break;
1571
0
    default:
1572
0
      llvm_unreachable("expected SEH_ instruction");
1573
356
    }
1574
356
    return;
1575
356
  }
1576
2.02k
1577
2.02k
  // Otherwise, use the .seh_ directives for all other Windows platforms.
1578
2.02k
  switch (MI->getOpcode()) {
1579
2.02k
  case X86::SEH_PushReg:
1580
463
    OutStreamer->EmitWinCFIPushReg(
1581
463
        RI->getSEHRegNum(MI->getOperand(0).getImm()));
1582
463
    break;
1583
2.02k
1584
2.02k
  case X86::SEH_SaveReg:
1585
0
    OutStreamer->EmitWinCFISaveReg(RI->getSEHRegNum(MI->getOperand(0).getImm()),
1586
0
                                   MI->getOperand(1).getImm());
1587
0
    break;
1588
2.02k
1589
2.02k
  case X86::SEH_SaveXMM:
1590
190
    OutStreamer->EmitWinCFISaveXMM(RI->getSEHRegNum(MI->getOperand(0).getImm()),
1591
190
                                   MI->getOperand(1).getImm());
1592
190
    break;
1593
2.02k
1594
2.02k
  case X86::SEH_StackAlloc:
1595
626
    OutStreamer->EmitWinCFIAllocStack(MI->getOperand(0).getImm());
1596
626
    break;
1597
2.02k
1598
2.02k
  case X86::SEH_SetFrame:
1599
95
    OutStreamer->EmitWinCFISetFrame(
1600
95
        RI->getSEHRegNum(MI->getOperand(0).getImm()),
1601
95
        MI->getOperand(1).getImm());
1602
95
    break;
1603
2.02k
1604
2.02k
  case X86::SEH_PushFrame:
1605
0
    OutStreamer->EmitWinCFIPushFrame(MI->getOperand(0).getImm());
1606
0
    break;
1607
2.02k
1608
2.02k
  case X86::SEH_EndPrologue:
1609
653
    OutStreamer->EmitWinCFIEndProlog();
1610
653
    break;
1611
2.02k
1612
2.02k
  default:
1613
0
    llvm_unreachable("expected SEH_ instruction");
1614
2.02k
  }
1615
2.02k
}
1616
1617
3.04k
static unsigned getRegisterWidth(const MCOperandInfo &Info) {
1618
3.04k
  if (Info.RegClass == X86::VR128RegClassID ||
1619
3.04k
      
Info.RegClass == X86::VR128XRegClassID1.10k
)
1620
1.95k
    return 128;
1621
1.09k
  if (Info.RegClass == X86::VR256RegClassID ||
1622
1.09k
      
Info.RegClass == X86::VR256XRegClassID117
)
1623
1.01k
    return 256;
1624
79
  if (Info.RegClass == X86::VR512RegClassID)
1625
79
    return 512;
1626
0
  llvm_unreachable("Unknown register class!");
1627
0
}
1628
1629
2.41M
void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
1630
2.41M
  X86MCInstLower MCInstLowering(*MF, *this);
1631
2.41M
  const X86RegisterInfo *RI =
1632
2.41M
      MF->getSubtarget<X86Subtarget>().getRegisterInfo();
1633
2.41M
1634
2.41M
  // Add a comment about EVEX-2-VEX compression for AVX-512 instrs that
1635
2.41M
  // are compressed from EVEX encoding to VEX encoding.
1636
2.41M
  if (TM.Options.MCOptions.ShowMCEncoding) {
1637
53.8k
    if (MI->getAsmPrinterFlags() & X86::AC_EVEX_2_VEX)
1638
6.39k
      OutStreamer->AddComment("EVEX TO VEX Compression ", false);
1639
53.8k
  }
1640
2.41M
1641
2.41M
  switch (MI->getOpcode()) {
1642
2.41M
  case TargetOpcode::DBG_VALUE:
1643
0
    llvm_unreachable("Should be handled target independently");
1644
2.41M
1645
2.41M
  // Emit nothing here but a comment if we can.
1646
2.41M
  case X86::Int_MemBarrier:
1647
37
    OutStreamer->emitRawComment("MEMBARRIER");
1648
37
    return;
1649
2.41M
1650
2.41M
  case X86::EH_RETURN:
1651
6
  case X86::EH_RETURN64: {
1652
6
    // Lower these as normal, but add some comments.
1653
6
    unsigned Reg = MI->getOperand(0).getReg();
1654
6
    OutStreamer->AddComment(StringRef("eh_return, addr: %") +
1655
6
                            X86ATTInstPrinter::getRegisterName(Reg));
1656
6
    break;
1657
6
  }
1658
33
  case X86::CLEANUPRET: {
1659
33
    // Lower these as normal, but add some comments.
1660
33
    OutStreamer->AddComment("CLEANUPRET");
1661
33
    break;
1662
6
  }
1663
6
1664
63
  case X86::CATCHRET: {
1665
63
    // Lower these as normal, but add some comments.
1666
63
    OutStreamer->AddComment("CATCHRET");
1667
63
    break;
1668
6
  }
1669
6
1670
7.46k
  case X86::TAILJMPr:
1671
7.46k
  case X86::TAILJMPm:
1672
7.46k
  case X86::TAILJMPd:
1673
7.46k
  case X86::TAILJMPd_CC:
1674
7.46k
  case X86::TAILJMPr64:
1675
7.46k
  case X86::TAILJMPm64:
1676
7.46k
  case X86::TAILJMPd64:
1677
7.46k
  case X86::TAILJMPd64_CC:
1678
7.46k
  case X86::TAILJMPr64_REX:
1679
7.46k
  case X86::TAILJMPm64_REX:
1680
7.46k
    // Lower these as normal, but add some comments.
1681
7.46k
    OutStreamer->AddComment("TAILCALL");
1682
7.46k
    break;
1683
7.46k
1684
7.46k
  case X86::TLS_addr32:
1685
58
  case X86::TLS_addr64:
1686
58
  case X86::TLS_base_addr32:
1687
58
  case X86::TLS_base_addr64:
1688
58
    return LowerTlsAddr(MCInstLowering, *MI);
1689
58
1690
58
  // Loading/storing mask pairs requires two kmov operations. The second one of these
1691
58
  // needs a 2 byte displacement relative to the specified address (with 32 bit spill
1692
58
  // size). The pairs of 1bit masks up to 16 bit masks all use the same spill size,
1693
58
  // they all are stored using MASKPAIR16STORE, loaded using MASKPAIR16LOAD.
1694
58
  //
1695
58
  // The displacement value might wrap around in theory, thus the asserts in both
1696
58
  // cases.
1697
58
  case X86::MASKPAIR16LOAD: {
1698
10
    int64_t Disp = MI->getOperand(1 + X86::AddrDisp).getImm();
1699
10
    assert(Disp >= 0 && Disp <= INT32_MAX - 2 && "Unexpected displacement");
1700
10
    const X86RegisterInfo *RI =
1701
10
      MF->getSubtarget<X86Subtarget>().getRegisterInfo();
1702
10
    unsigned Reg = MI->getOperand(0).getReg();
1703
10
    unsigned Reg0 = RI->getSubReg(Reg, X86::sub_mask_0);
1704
10
    unsigned Reg1 = RI->getSubReg(Reg, X86::sub_mask_1);
1705
10
1706
10
    // Load the first mask register
1707
10
    MCInstBuilder MIB = MCInstBuilder(X86::KMOVWkm);
1708
10
    MIB.addReg(Reg0);
1709
60
    for (int i = 0; i < X86::AddrNumOperands; 
++i50
) {
1710
50
      auto Op = MCInstLowering.LowerMachineOperand(MI, MI->getOperand(1 + i));
1711
50
      MIB.addOperand(Op.getValue());
1712
50
    }
1713
10
    EmitAndCountInstruction(MIB);
1714
10
1715
10
    // Load the second mask register of the pair
1716
10
    MIB = MCInstBuilder(X86::KMOVWkm);
1717
10
    MIB.addReg(Reg1);
1718
60
    for (int i = 0; i < X86::AddrNumOperands; 
++i50
) {
1719
50
      if (i == X86::AddrDisp) {
1720
10
        MIB.addImm(Disp + 2);
1721
40
      } else {
1722
40
        auto Op = MCInstLowering.LowerMachineOperand(MI, MI->getOperand(1 + i));
1723
40
        MIB.addOperand(Op.getValue());
1724
40
      }
1725
50
    }
1726
10
    EmitAndCountInstruction(MIB);
1727
10
    return;
1728
58
  }
1729
58
1730
58
  case X86::MASKPAIR16STORE: {
1731
10
    int64_t Disp = MI->getOperand(X86::AddrDisp).getImm();
1732
10
    assert(Disp >= 0 && Disp <= INT32_MAX - 2 && "Unexpected displacement");
1733
10
    const X86RegisterInfo *RI =
1734
10
      MF->getSubtarget<X86Subtarget>().getRegisterInfo();
1735
10
    unsigned Reg = MI->getOperand(X86::AddrNumOperands).getReg();
1736
10
    unsigned Reg0 = RI->getSubReg(Reg, X86::sub_mask_0);
1737
10
    unsigned Reg1 = RI->getSubReg(Reg, X86::sub_mask_1);
1738
10
1739
10
    // Store the first mask register
1740
10
    MCInstBuilder MIB = MCInstBuilder(X86::KMOVWmk);
1741
60
    for (int i = 0; i < X86::AddrNumOperands; 
++i50
)
1742
50
      MIB.addOperand(MCInstLowering.LowerMachineOperand(MI, MI->getOperand(i)).getValue());
1743
10
    MIB.addReg(Reg0);
1744
10
    EmitAndCountInstruction(MIB);
1745
10
1746
10
    // Store the second mask register of the pair
1747
10
    MIB = MCInstBuilder(X86::KMOVWmk);
1748
60
    for (int i = 0; i < X86::AddrNumOperands; 
++i50
) {
1749
50
      if (i == X86::AddrDisp) {
1750
10
        MIB.addImm(Disp + 2);
1751
40
      } else {
1752
40
        auto Op = MCInstLowering.LowerMachineOperand(MI, MI->getOperand(0 + i));
1753
40
        MIB.addOperand(Op.getValue());
1754
40
      }
1755
50
    }
1756
10
    MIB.addReg(Reg1);
1757
10
    EmitAndCountInstruction(MIB);
1758
10
    return;
1759
58
  }
1760
58
1761
2.45k
  case X86::MOVPC32r: {
1762
2.45k
    // This is a pseudo op for a two instruction sequence with a label, which
1763
2.45k
    // looks like:
1764
2.45k
    //     call "L1$pb"
1765
2.45k
    // "L1$pb":
1766
2.45k
    //     popl %esi
1767
2.45k
1768
2.45k
    // Emit the call.
1769
2.45k
    MCSymbol *PICBase = MF->getPICBaseSymbol();
1770
2.45k
    // FIXME: We would like an efficient form for this, so we don't have to do a
1771
2.45k
    // lot of extra uniquing.
1772
2.45k
    EmitAndCountInstruction(
1773
2.45k
        MCInstBuilder(X86::CALLpcrel32)
1774
2.45k
            .addExpr(MCSymbolRefExpr::create(PICBase, OutContext)));
1775
2.45k
1776
2.45k
    const X86FrameLowering *FrameLowering =
1777
2.45k
        MF->getSubtarget<X86Subtarget>().getFrameLowering();
1778
2.45k
    bool hasFP = FrameLowering->hasFP(*MF);
1779
2.45k
1780
2.45k
    // TODO: This is needed only if we require precise CFA.
1781
2.45k
    bool HasActiveDwarfFrame = OutStreamer->getNumFrameInfos() &&
1782
2.45k
                               
!OutStreamer->getDwarfFrameInfos().back().End2.08k
;
1783
2.45k
1784
2.45k
    int stackGrowth = -RI->getSlotSize();
1785
2.45k
1786
2.45k
    if (HasActiveDwarfFrame && 
!hasFP2.07k
) {
1787
239
      OutStreamer->EmitCFIAdjustCfaOffset(-stackGrowth);
1788
239
    }
1789
2.45k
1790
2.45k
    // Emit the label.
1791
2.45k
    OutStreamer->EmitLabel(PICBase);
1792
2.45k
1793
2.45k
    // popl $reg
1794
2.45k
    EmitAndCountInstruction(
1795
2.45k
        MCInstBuilder(X86::POP32r).addReg(MI->getOperand(0).getReg()));
1796
2.45k
1797
2.45k
    if (HasActiveDwarfFrame && 
!hasFP2.07k
) {
1798
239
      OutStreamer->EmitCFIAdjustCfaOffset(stackGrowth);
1799
239
    }
1800
2.45k
    return;
1801
58
  }
1802
58
1803
1.68k
  case X86::ADD32ri: {
1804
1.68k
    // Lower the MO_GOT_ABSOLUTE_ADDRESS form of ADD32ri.
1805
1.68k
    if (MI->getOperand(2).getTargetFlags() != X86II::MO_GOT_ABSOLUTE_ADDRESS)
1806
1.32k
      break;
1807
358
1808
358
    // Okay, we have something like:
1809
358
    //  EAX = ADD32ri EAX, MO_GOT_ABSOLUTE_ADDRESS(@MYGLOBAL)
1810
358
1811
358
    // For this, we want to print something like:
1812
358
    //   MYGLOBAL + (. - PICBASE)
1813
358
    // However, we can't generate a ".", so just emit a new label here and refer
1814
358
    // to it.
1815
358
    MCSymbol *DotSym = OutContext.createTempSymbol();
1816
358
    OutStreamer->EmitLabel(DotSym);
1817
358
1818
358
    // Now that we have emitted the label, lower the complex operand expression.
1819
358
    MCSymbol *OpSym = MCInstLowering.GetSymbolFromOperand(MI->getOperand(2));
1820
358
1821
358
    const MCExpr *DotExpr = MCSymbolRefExpr::create(DotSym, OutContext);
1822
358
    const MCExpr *PICBase =
1823
358
        MCSymbolRefExpr::create(MF->getPICBaseSymbol(), OutContext);
1824
358
    DotExpr = MCBinaryExpr::createSub(DotExpr, PICBase, OutContext);
1825
358
1826
358
    DotExpr = MCBinaryExpr::createAdd(
1827
358
        MCSymbolRefExpr::create(OpSym, OutContext), DotExpr, OutContext);
1828
358
1829
358
    EmitAndCountInstruction(MCInstBuilder(X86::ADD32ri)
1830
358
                                .addReg(MI->getOperand(0).getReg())
1831
358
                                .addReg(MI->getOperand(1).getReg())
1832
358
                                .addExpr(DotExpr));
1833
358
    return;
1834
358
  }
1835
358
  case TargetOpcode::STATEPOINT:
1836
88
    return LowerSTATEPOINT(*MI, MCInstLowering);
1837
358
1838
358
  case TargetOpcode::FAULTING_OP:
1839
25
    return LowerFAULTING_OP(*MI, MCInstLowering);
1840
358
1841
358
  case TargetOpcode::FENTRY_CALL:
1842
2
    return LowerFENTRY_CALL(*MI, MCInstLowering);
1843
358
1844
358
  case TargetOpcode::PATCHABLE_OP:
1845
10
    return LowerPATCHABLE_OP(*MI, MCInstLowering);
1846
358
1847
358
  case TargetOpcode::STACKMAP:
1848
100
    return LowerSTACKMAP(*MI);
1849
358
1850
358
  case TargetOpcode::PATCHPOINT:
1851
70
    return LowerPATCHPOINT(*MI, MCInstLowering);
1852
358
1853
358
  case TargetOpcode::PATCHABLE_FUNCTION_ENTER:
1854
27
    return LowerPATCHABLE_FUNCTION_ENTER(*MI, MCInstLowering);
1855
358
1856
358
  case TargetOpcode::PATCHABLE_RET:
1857
24
    return LowerPATCHABLE_RET(*MI, MCInstLowering);
1858
358
1859
358
  case TargetOpcode::PATCHABLE_TAIL_CALL:
1860
4
    return LowerPATCHABLE_TAIL_CALL(*MI, MCInstLowering);
1861
358
1862
358
  case TargetOpcode::PATCHABLE_EVENT_CALL:
1863
2
    return LowerPATCHABLE_EVENT_CALL(*MI, MCInstLowering);
1864
358
1865
358
  case TargetOpcode::PATCHABLE_TYPED_EVENT_CALL:
1866
2
    return LowerPATCHABLE_TYPED_EVENT_CALL(*MI, MCInstLowering);
1867
358
1868
358
  case X86::MORESTACK_RET:
1869
162
    EmitAndCountInstruction(MCInstBuilder(getRetOpcode(*Subtarget)));
1870
162
    return;
1871
358
1872
358
  case X86::MORESTACK_RET_RESTORE_R10:
1873
13
    // Return, then restore R10.
1874
13
    EmitAndCountInstruction(MCInstBuilder(getRetOpcode(*Subtarget)));
1875
13
    EmitAndCountInstruction(
1876
13
        MCInstBuilder(X86::MOV64rr).addReg(X86::R10).addReg(X86::RAX));
1877
13
    return;
1878
358
1879
2.38k
  case X86::SEH_PushReg:
1880
2.38k
  case X86::SEH_SaveReg:
1881
2.38k
  case X86::SEH_SaveXMM:
1882
2.38k
  case X86::SEH_StackAlloc:
1883
2.38k
  case X86::SEH_StackAlign:
1884
2.38k
  case X86::SEH_SetFrame:
1885
2.38k
  case X86::SEH_PushFrame:
1886
2.38k
  case X86::SEH_EndPrologue:
1887
2.38k
    EmitSEHInstruction(MI);
1888
2.38k
    return;
1889
2.38k
1890
2.38k
  case X86::SEH_Epilogue: {
1891
626
    assert(MF->hasWinCFI() && "SEH_ instruction in function without WinCFI?");
1892
626
    MachineBasicBlock::const_iterator MBBI(MI);
1893
626
    // Check if preceded by a call and emit nop if so.
1894
626
    for (MBBI = PrevCrossBBInst(MBBI);
1895
703
         MBBI != MachineBasicBlock::const_iterator();
1896
703
         
MBBI = PrevCrossBBInst(MBBI)77
) {
1897
703
      // Conservatively assume that pseudo instructions don't emit code and keep
1898
703
      // looking for a call. We may emit an unnecessary nop in some cases.
1899
703
      if (!MBBI->isPseudo()) {
1900
626
        if (MBBI->isCall())
1901
175
          EmitAndCountInstruction(MCInstBuilder(X86::NOOP));
1902
626
        break;
1903
626
      }
1904
703
    }
1905
626
    return;
1906
2.38k
  }
1907
2.38k
1908
2.38k
  // Lower PSHUFB and VPERMILP normally but add a comment if we can find
1909
2.38k
  // a constant shuffle mask. We won't be able to do this at the MC layer
1910
2.38k
  // because the mask isn't an immediate.
1911
2.77k
  case X86::PSHUFBrm:
1912
2.77k
  case X86::VPSHUFBrm:
1913
2.77k
  case X86::VPSHUFBYrm:
1914
2.77k
  case X86::VPSHUFBZ128rm:
1915
2.77k
  case X86::VPSHUFBZ128rmk:
1916
2.77k
  case X86::VPSHUFBZ128rmkz:
1917
2.77k
  case X86::VPSHUFBZ256rm:
1918
2.77k
  case X86::VPSHUFBZ256rmk:
1919
2.77k
  case X86::VPSHUFBZ256rmkz:
1920
2.77k
  case X86::VPSHUFBZrm:
1921
2.77k
  case X86::VPSHUFBZrmk:
1922
2.77k
  case X86::VPSHUFBZrmkz: {
1923
2.77k
    if (!OutStreamer->isVerboseAsm())
1924
3
      break;
1925
2.77k
    unsigned SrcIdx, MaskIdx;
1926
2.77k
    switch (MI->getOpcode()) {
1927
2.77k
    
default: 0
llvm_unreachable0
("Invalid opcode");
1928
2.77k
    case X86::PSHUFBrm:
1929
2.70k
    case X86::VPSHUFBrm:
1930
2.70k
    case X86::VPSHUFBYrm:
1931
2.70k
    case X86::VPSHUFBZ128rm:
1932
2.70k
    case X86::VPSHUFBZ256rm:
1933
2.70k
    case X86::VPSHUFBZrm:
1934
2.70k
      SrcIdx = 1; MaskIdx = 5; break;
1935
2.70k
    case X86::VPSHUFBZ128rmkz:
1936
31
    case X86::VPSHUFBZ256rmkz:
1937
31
    case X86::VPSHUFBZrmkz:
1938
31
      SrcIdx = 2; MaskIdx = 6; break;
1939
33
    case X86::VPSHUFBZ128rmk:
1940
33
    case X86::VPSHUFBZ256rmk:
1941
33
    case X86::VPSHUFBZrmk:
1942
33
      SrcIdx = 3; MaskIdx = 7; break;
1943
2.77k
    }
1944
2.77k
1945
2.77k
    assert(MI->getNumOperands() >= 6 &&
1946
2.77k
           "We should always have at least 6 operands!");
1947
2.77k
1948
2.77k
    const MachineOperand &MaskOp = MI->getOperand(MaskIdx);
1949
2.77k
    if (auto *C = getConstantFromPool(*MI, MaskOp)) {
1950
2.75k
      unsigned Width = getRegisterWidth(MI->getDesc().OpInfo[0]);
1951
2.75k
      SmallVector<int, 64> Mask;
1952
2.75k
      DecodePSHUFBMask(C, Width, Mask);
1953
2.75k
      if (!Mask.empty())
1954
2.75k
        OutStreamer->AddComment(getShuffleComment(MI, SrcIdx, SrcIdx, Mask));
1955
2.75k
    }
1956
2.77k
    break;
1957
2.77k
  }
1958
2.77k
1959
2.77k
  case X86::VPERMILPSrm:
1960
210
  case X86::VPERMILPSYrm:
1961
210
  case X86::VPERMILPSZ128rm:
1962
210
  case X86::VPERMILPSZ128rmk:
1963
210
  case X86::VPERMILPSZ128rmkz:
1964
210
  case X86::VPERMILPSZ256rm:
1965
210
  case X86::VPERMILPSZ256rmk:
1966
210
  case X86::VPERMILPSZ256rmkz:
1967
210
  case X86::VPERMILPSZrm:
1968
210
  case X86::VPERMILPSZrmk:
1969
210
  case X86::VPERMILPSZrmkz:
1970
210
  case X86::VPERMILPDrm:
1971
210
  case X86::VPERMILPDYrm:
1972
210
  case X86::VPERMILPDZ128rm:
1973
210
  case X86::VPERMILPDZ128rmk:
1974
210
  case X86::VPERMILPDZ128rmkz:
1975
210
  case X86::VPERMILPDZ256rm:
1976
210
  case X86::VPERMILPDZ256rmk:
1977
210
  case X86::VPERMILPDZ256rmkz:
1978
210
  case X86::VPERMILPDZrm:
1979
210
  case X86::VPERMILPDZrmk:
1980
210
  case X86::VPERMILPDZrmkz: {
1981
210
    if (!OutStreamer->isVerboseAsm())
1982
0
      break;
1983
210
    unsigned SrcIdx, MaskIdx;
1984
210
    unsigned ElSize;
1985
210
    switch (MI->getOpcode()) {
1986
210
    
default: 0
llvm_unreachable0
("Invalid opcode");
1987
210
    case X86::VPERMILPSrm:
1988
160
    case X86::VPERMILPSYrm:
1989
160
    case X86::VPERMILPSZ128rm:
1990
160
    case X86::VPERMILPSZ256rm:
1991
160
    case X86::VPERMILPSZrm:
1992
160
      SrcIdx = 1; MaskIdx = 5; ElSize = 32; break;
1993
160
    case X86::VPERMILPSZ128rmkz:
1994
22
    case X86::VPERMILPSZ256rmkz:
1995
22
    case X86::VPERMILPSZrmkz:
1996
22
      SrcIdx = 2; MaskIdx = 6; ElSize = 32; break;
1997
22
    case X86::VPERMILPSZ128rmk:
1998
21
    case X86::VPERMILPSZ256rmk:
1999
21
    case X86::VPERMILPSZrmk:
2000
21
      SrcIdx = 3; MaskIdx = 7; ElSize = 32; break;
2001
21
    case X86::VPERMILPDrm:
2002
5
    case X86::VPERMILPDYrm:
2003
5
    case X86::VPERMILPDZ128rm:
2004
5
    case X86::VPERMILPDZ256rm:
2005
5
    case X86::VPERMILPDZrm:
2006
5
      SrcIdx = 1; MaskIdx = 5; ElSize = 64; break;
2007
5
    case X86::VPERMILPDZ128rmkz:
2008
1
    case X86::VPERMILPDZ256rmkz:
2009
1
    case X86::VPERMILPDZrmkz:
2010
1
      SrcIdx = 2; MaskIdx = 6; ElSize = 64; break;
2011
1
    case X86::VPERMILPDZ128rmk:
2012
1
    case X86::VPERMILPDZ256rmk:
2013
1
    case X86::VPERMILPDZrmk:
2014
1
      SrcIdx = 3; MaskIdx = 7; ElSize = 64; break;
2015
210
    }
2016
210
2017
210
    assert(MI->getNumOperands() >= 6 &&
2018
210
           "We should always have at least 6 operands!");
2019
210
2020
210
    const MachineOperand &MaskOp = MI->getOperand(MaskIdx);
2021
210
    if (auto *C = getConstantFromPool(*MI, MaskOp)) {
2022
191
      unsigned Width = getRegisterWidth(MI->getDesc().OpInfo[0]);
2023
191
      SmallVector<int, 16> Mask;
2024
191
      DecodeVPERMILPMask(C, ElSize, Width, Mask);
2025
191
      if (!Mask.empty())
2026
191
        OutStreamer->AddComment(getShuffleComment(MI, SrcIdx, SrcIdx, Mask));
2027
191
    }
2028
210
    break;
2029
210
  }
2030
210
2031
210
  case X86::VPERMIL2PDrm:
2032
35
  case X86::VPERMIL2PSrm:
2033
35
  case X86::VPERMIL2PDYrm:
2034
35
  case X86::VPERMIL2PSYrm: {
2035
35
    if (!OutStreamer->isVerboseAsm())
2036
0
      break;
2037
35
    assert(MI->getNumOperands() >= 8 &&
2038
35
           "We should always have at least 8 operands!");
2039
35
2040
35
    const MachineOperand &CtrlOp = MI->getOperand(MI->getNumOperands() - 1);
2041
35
    if (!CtrlOp.isImm())
2042
0
      break;
2043
35
2044
35
    unsigned ElSize;
2045
35
    switch (MI->getOpcode()) {
2046
35
    
default: 0
llvm_unreachable0
("Invalid opcode");
2047
35
    
case X86::VPERMIL2PSrm: 19
case X86::VPERMIL2PSYrm: ElSize = 32; break19
;
2048
19
    
case X86::VPERMIL2PDrm: 16
case X86::VPERMIL2PDYrm: ElSize = 64; break16
;
2049
35
    }
2050
35
2051
35
    const MachineOperand &MaskOp = MI->getOperand(6);
2052
35
    if (auto *C = getConstantFromPool(*MI, MaskOp)) {
2053
27
      unsigned Width = getRegisterWidth(MI->getDesc().OpInfo[0]);
2054
27
      SmallVector<int, 16> Mask;
2055
27
      DecodeVPERMIL2PMask(C, (unsigned)CtrlOp.getImm(), ElSize, Width, Mask);
2056
27
      if (!Mask.empty())
2057
27
        OutStreamer->AddComment(getShuffleComment(MI, 1, 2, Mask));
2058
27
    }
2059
35
    break;
2060
35
  }
2061
35
2062
70
  case X86::VPPERMrrm: {
2063
70
    if (!OutStreamer->isVerboseAsm())
2064
0
      break;
2065
70
    assert(MI->getNumOperands() >= 7 &&
2066
70
           "We should always have at least 7 operands!");
2067
70
2068
70
    const MachineOperand &MaskOp = MI->getOperand(6);
2069
70
    if (auto *C = getConstantFromPool(*MI, MaskOp)) {
2070
68
      unsigned Width = getRegisterWidth(MI->getDesc().OpInfo[0]);
2071
68
      SmallVector<int, 16> Mask;
2072
68
      DecodeVPPERMMask(C, Width, Mask);
2073
68
      if (!Mask.empty())
2074
42
        OutStreamer->AddComment(getShuffleComment(MI, 1, 2, Mask));
2075
68
    }
2076
70
    break;
2077
70
  }
2078
70
2079
250
  case X86::MMX_MOVQ64rm: {
2080
250
    if (!OutStreamer->isVerboseAsm())
2081
4
      break;
2082
246
    if (MI->getNumOperands() <= 4)
2083
0
      break;
2084
246
    if (auto *C = getConstantFromPool(*MI, MI->getOperand(4))) {
2085
4
      std::string Comment;
2086
4
      raw_string_ostream CS(Comment);
2087
4
      const MachineOperand &DstOp = MI->getOperand(0);
2088
4
      CS << X86ATTInstPrinter::getRegisterName(DstOp.getReg()) << " = ";
2089
4
      if (auto *CF = dyn_cast<ConstantFP>(C)) {
2090
4
        CS << "0x" << CF->getValueAPF().bitcastToAPInt().toString(16, false);
2091
4
        OutStreamer->AddComment(CS.str());
2092
4
      }
2093
4
    }
2094
246
    break;
2095
246
  }
2096
246
2097
246
#define MOV_CASE(Prefix, Suffix)                                               \
2098
98.0k
  case X86::Prefix##MOVAPD##Suffix##rm:                                        \
2099
98.0k
  case X86::Prefix##MOVAPS##Suffix##rm:                                        \
2100
98.0k
  case X86::Prefix##MOVUPD##Suffix##rm:                                        \
2101
98.0k
  case X86::Prefix##MOVUPS##Suffix##rm:                                        \
2102
98.0k
  case X86::Prefix##MOVDQA##Suffix##rm:                                        \
2103
98.0k
  case X86::Prefix##MOVDQU##Suffix##rm:
2104
246
2105
246
#define MOV_AVX512_CASE(Suffix)                                                \
2106
98.0k
  case X86::VMOVDQA64##Suffix##rm:                                             \
2107
98.0k
  case X86::VMOVDQA32##Suffix##rm:                                             \
2108
98.0k
  case X86::VMOVDQU64##Suffix##rm:                                             \
2109
98.0k
  case X86::VMOVDQU32##Suffix##rm:                                             \
2110
98.0k
  case X86::VMOVDQU16##Suffix##rm:                                             \
2111
98.0k
  case X86::VMOVDQU8##Suffix##rm:                                              \
2112
98.0k
  case X86::VMOVAPS##Suffix##rm:                                               \
2113
98.0k
  case X86::VMOVAPD##Suffix##rm:                                               \
2114
98.0k
  case X86::VMOVUPS##Suffix##rm:                                               \
2115
98.0k
  case X86::VMOVUPD##Suffix##rm:
2116
246
2117
246
#define CASE_ALL_MOV_RM()                                                      \
2118
32.6k
  MOV_CASE(, )   /* SSE */                                                     \
2119
32.6k
  MOV_CASE(V, )  /* AVX-128 */                                                 \
2120
32.6k
  MOV_CASE(V, Y) /* AVX-256 */                                                 \
2121
32.6k
  MOV_AVX512_CASE(Z)                                                           \
2122
32.6k
  MOV_AVX512_CASE(Z256)                                                        \
2123
32.6k
  MOV_AVX512_CASE(Z128)
2124
246
2125
246
    // For loads from a constant pool to a vector register, print the constant
2126
246
    // loaded.
2127
1.56M
    
CASE_ALL_MOV_RM246
()
2128
1.56M
  case X86::VBROADCASTF128:
2129
32.6k
  case X86::VBROADCASTI128:
2130
32.6k
  case X86::VBROADCASTF32X4Z256rm:
2131
32.6k
  case X86::VBROADCASTF32X4rm:
2132
32.6k
  case X86::VBROADCASTF32X8rm:
2133
32.6k
  case X86::VBROADCASTF64X2Z128rm:
2134
32.6k
  case X86::VBROADCASTF64X2rm:
2135
32.6k
  case X86::VBROADCASTF64X4rm:
2136
32.6k
  case X86::VBROADCASTI32X4Z256rm:
2137
32.6k
  case X86::VBROADCASTI32X4rm:
2138
32.6k
  case X86::VBROADCASTI32X8rm:
2139
32.6k
  case X86::VBROADCASTI64X2Z128rm:
2140
32.6k
  case X86::VBROADCASTI64X2rm:
2141
32.6k
  case X86::VBROADCASTI64X4rm:
2142
32.6k
    if (!OutStreamer->isVerboseAsm())
2143
8.84k
      break;
2144
23.8k
    if (MI->getNumOperands() <= 4)
2145
0
      break;
2146
23.8k
    if (auto *C = getConstantFromPool(*MI, MI->getOperand(4))) {
2147
11.5k
      int NumLanes = 1;
2148
11.5k
      // Override NumLanes for the broadcast instructions.
2149
11.5k
      switch (MI->getOpcode()) {
2150
11.5k
      
case X86::VBROADCASTF128: NumLanes = 2; break53
;
2151
11.5k
      
case X86::VBROADCASTI128: NumLanes = 2; break142
;
2152
11.5k
      
case X86::VBROADCASTF32X4Z256rm: NumLanes = 2; break0
;
2153
11.5k
      
case X86::VBROADCASTF32X4rm: NumLanes = 4; break8
;
2154
11.5k
      
case X86::VBROADCASTF32X8rm: NumLanes = 2; break0
;
2155
11.5k
      
case X86::VBROADCASTF64X2Z128rm: NumLanes = 2; break0
;
2156
11.5k
      
case X86::VBROADCASTF64X2rm: NumLanes = 4; break0
;
2157
11.5k
      
case X86::VBROADCASTF64X4rm: NumLanes = 2; break8
;
2158
11.5k
      
case X86::VBROADCASTI32X4Z256rm: NumLanes = 2; break0
;
2159
11.5k
      
case X86::VBROADCASTI32X4rm: NumLanes = 4; break41
;
2160
11.5k
      
case X86::VBROADCASTI32X8rm: NumLanes = 2; break0
;
2161
11.5k
      
case X86::VBROADCASTI64X2Z128rm: NumLanes = 2; break0
;
2162
11.5k
      
case X86::VBROADCASTI64X2rm: NumLanes = 4; break0
;
2163
11.5k
      
case X86::VBROADCASTI64X4rm: NumLanes = 2; break44
;
2164
11.5k
      }
2165
11.5k
2166
11.5k
      std::string Comment;
2167
11.5k
      raw_string_ostream CS(Comment);
2168
11.5k
      const MachineOperand &DstOp = MI->getOperand(0);
2169
11.5k
      CS << X86ATTInstPrinter::getRegisterName(DstOp.getReg()) << " = ";
2170
11.5k
      if (auto *CDS = dyn_cast<ConstantDataSequential>(C)) {
2171
10.5k
        CS << "[";
2172
21.4k
        for (int l = 0; l != NumLanes; 
++l10.9k
) {
2173
124k
          for (int i = 0, NumElements = CDS->getNumElements(); i < NumElements;
2174
113k
               ++i) {
2175
113k
            if (i != 0 || 
l != 010.9k
)
2176
103k
              CS << ",";
2177
113k
            if (CDS->getElementType()->isIntegerTy())
2178
111k
              printConstant(CDS->getElementAsAPInt(i), CS);
2179
2.59k
            else if (CDS->getElementType()->isHalfTy() ||
2180
2.59k
                     CDS->getElementType()->isFloatTy() ||
2181
2.59k
                     
CDS->getElementType()->isDoubleTy()674
)
2182
2.59k
              printConstant(CDS->getElementAsAPFloat(i), CS);
2183
0
            else
2184
0
              CS << "?";
2185
113k
          }
2186
10.9k
        }
2187
10.5k
        CS << "]";
2188
10.5k
        OutStreamer->AddComment(CS.str());
2189
10.5k
      } else 
if (auto *983
CV983
= dyn_cast<ConstantVector>(C)) {
2190
956
        CS << "<";
2191
1.91k
        for (int l = 0; l != NumLanes; 
++l956
) {
2192
12.3k
          for (int i = 0, NumOperands = CV->getNumOperands(); i < NumOperands;
2193
11.4k
               ++i) {
2194
11.4k
            if (i != 0 || 
l != 0956
)
2195
10.4k
              CS << ",";
2196
11.4k
            printConstant(CV->getOperand(i), CS);
2197
11.4k
          }
2198
956
        }
2199
956
        CS << ">";
2200
956
        OutStreamer->AddComment(CS.str());
2201
956
      }
2202
11.5k
    }
2203
23.8k
    break;
2204
23.8k
  case X86::MOVDDUPrm:
2205
2.92k
  case X86::VMOVDDUPrm:
2206
2.92k
  case X86::VMOVDDUPZ128rm:
2207
2.92k
  case X86::VBROADCASTSSrm:
2208
2.92k
  case X86::VBROADCASTSSYrm:
2209
2.92k
  case X86::VBROADCASTSSZ128m:
2210
2.92k
  case X86::VBROADCASTSSZ256m:
2211
2.92k
  case X86::VBROADCASTSSZm:
2212
2.92k
  case X86::VBROADCASTSDYrm:
2213
2.92k
  case X86::VBROADCASTSDZ256m:
2214
2.92k
  case X86::VBROADCASTSDZm:
2215
2.92k
  case X86::VPBROADCASTBrm:
2216
2.92k
  case X86::VPBROADCASTBYrm:
2217
2.92k
  case X86::VPBROADCASTBZ128m:
2218
2.92k
  case X86::VPBROADCASTBZ256m:
2219
2.92k
  case X86::VPBROADCASTBZm:
2220
2.92k
  case X86::VPBROADCASTDrm:
2221
2.92k
  case X86::VPBROADCASTDYrm:
2222
2.92k
  case X86::VPBROADCASTDZ128m:
2223
2.92k
  case X86::VPBROADCASTDZ256m:
2224
2.92k
  case X86::VPBROADCASTDZm:
2225
2.92k
  case X86::VPBROADCASTQrm:
2226
2.92k
  case X86::VPBROADCASTQYrm:
2227
2.92k
  case X86::VPBROADCASTQZ128m:
2228
2.92k
  case X86::VPBROADCASTQZ256m:
2229
2.92k
  case X86::VPBROADCASTQZm:
2230
2.92k
  case X86::VPBROADCASTWrm:
2231
2.92k
  case X86::VPBROADCASTWYrm:
2232
2.92k
  case X86::VPBROADCASTWZ128m:
2233
2.92k
  case X86::VPBROADCASTWZ256m:
2234
2.92k
  case X86::VPBROADCASTWZm:
2235
2.92k
    if (!OutStreamer->isVerboseAsm())
2236
126
      break;
2237
2.79k
    if (MI->getNumOperands() <= 4)
2238
0
      break;
2239
2.79k
    if (auto *C = getConstantFromPool(*MI, MI->getOperand(4))) {
2240
1.78k
      int NumElts;
2241
1.78k
      switch (MI->getOpcode()) {
2242
1.78k
      
default: 0
llvm_unreachable0
("Invalid opcode");
2243
1.78k
      
case X86::MOVDDUPrm: NumElts = 2; break0
;
2244
1.78k
      
case X86::VMOVDDUPrm: NumElts = 2; break46
;
2245
1.78k
      
case X86::VMOVDDUPZ128rm: NumElts = 2; break0
;
2246
1.78k
      
case X86::VBROADCASTSSrm: NumElts = 4; break158
;
2247
1.78k
      
case X86::VBROADCASTSSYrm: NumElts = 8; break83
;
2248
1.78k
      
case X86::VBROADCASTSSZ128m: NumElts = 4; break0
;
2249
1.78k
      
case X86::VBROADCASTSSZ256m: NumElts = 8; break0
;
2250
1.78k
      
case X86::VBROADCASTSSZm: NumElts = 16; break18
;
2251
1.78k
      
case X86::VBROADCASTSDYrm: NumElts = 4; break98
;
2252
1.78k
      
case X86::VBROADCASTSDZ256m: NumElts = 4; break0
;
2253
1.78k
      
case X86::VBROADCASTSDZm: NumElts = 8; break23
;
2254
1.78k
      
case X86::VPBROADCASTBrm: NumElts = 16; break1
;
2255
1.78k
      
case X86::VPBROADCASTBYrm: NumElts = 32; break1
;
2256
1.78k
      
case X86::VPBROADCASTBZ128m: NumElts = 16; break0
;
2257
1.78k
      
case X86::VPBROADCASTBZ256m: NumElts = 32; break0
;
2258
1.78k
      
case X86::VPBROADCASTBZm: NumElts = 64; break0
;
2259
1.78k
      
case X86::VPBROADCASTDrm: NumElts = 4; break444
;
2260
1.78k
      
case X86::VPBROADCASTDYrm: NumElts = 8; break273
;
2261
1.78k
      
case X86::VPBROADCASTDZ128m: NumElts = 4; break0
;
2262
1.78k
      
case X86::VPBROADCASTDZ256m: NumElts = 8; break0
;
2263
1.78k
      
case X86::VPBROADCASTDZm: NumElts = 16; break45
;
2264
1.78k
      
case X86::VPBROADCASTQrm: NumElts = 2; break46
;
2265
1.78k
      
case X86::VPBROADCASTQYrm: NumElts = 4; break389
;
2266
1.78k
      
case X86::VPBROADCASTQZ128m: NumElts = 2; break0
;
2267
1.78k
      
case X86::VPBROADCASTQZ256m: NumElts = 4; break0
;
2268
1.78k
      
case X86::VPBROADCASTQZm: NumElts = 8; break116
;
2269
1.78k
      
case X86::VPBROADCASTWrm: NumElts = 8; break27
;
2270
1.78k
      
case X86::VPBROADCASTWYrm: NumElts = 16; break16
;
2271
1.78k
      
case X86::VPBROADCASTWZ128m: NumElts = 8; break0
;
2272
1.78k
      
case X86::VPBROADCASTWZ256m: NumElts = 16; break0
;
2273
1.78k
      
case X86::VPBROADCASTWZm: NumElts = 32; break2
;
2274
1.78k
      }
2275
1.78k
2276
1.78k
      std::string Comment;
2277
1.78k
      raw_string_ostream CS(Comment);
2278
1.78k
      const MachineOperand &DstOp = MI->getOperand(0);
2279
1.78k
      CS << X86ATTInstPrinter::getRegisterName(DstOp.getReg()) << " = ";
2280
1.78k
      CS << "[";
2281
11.8k
      for (int i = 0; i != NumElts; 
++i10.0k
) {
2282
10.0k
        if (i != 0)
2283
8.30k
          CS << ",";
2284
10.0k
        printConstant(C, CS);
2285
10.0k
      }
2286
1.78k
      CS << "]";
2287
1.78k
      OutStreamer->AddComment(CS.str());
2288
1.78k
    }
2289
2.41M
  }
2290
2.41M
2291
2.41M
  MCInst TmpInst;
2292
2.40M
  MCInstLowering.Lower(MI, TmpInst);
2293
2.40M
2294
2.40M
  // Stackmap shadows cannot include branch targets, so we can count the bytes
2295
2.40M
  // in a call towards the shadow, but must ensure that the no thread returns
2296
2.40M
  // in to the stackmap shadow.  The only way to achieve this is if the call
2297
2.40M
  // is at the end of the shadow.
2298
2.40M
  if (MI->isCall()) {
2299
141k
    // Count then size of the call towards the shadow
2300
141k
    SMShadowTracker.count(TmpInst, getSubtargetInfo(), CodeEmitter.get());
2301
141k
    // Then flush the shadow so that we fill with nops before the call, not
2302
141k
    // after it.
2303
141k
    SMShadowTracker.emitShadowPadding(*OutStreamer, getSubtargetInfo());
2304
141k
    // Then emit the call
2305
141k
    OutStreamer->EmitInstruction(TmpInst, getSubtargetInfo());
2306
141k
    return;
2307
141k
  }
2308
2.26M
2309
2.26M
  EmitAndCountInstruction(TmpInst);
2310
2.26M
}