Coverage Report

Created: 2017-10-03 07:32

/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/lib/Target/AArch64/AArch64FastISel.cpp
Line
Count
Source (jump to first uncovered line)
1
//===- AArch6464FastISel.cpp - AArch64 FastISel implementation ------------===//
2
//
3
//                     The LLVM Compiler Infrastructure
4
//
5
// This file is distributed under the University of Illinois Open Source
6
// License. See LICENSE.TXT for details.
7
//
8
//===----------------------------------------------------------------------===//
9
//
10
// This file defines the AArch64-specific support for the FastISel class. Some
11
// of the target-specific code is generated by tablegen in the file
12
// AArch64GenFastISel.inc, which is #included here.
13
//
14
//===----------------------------------------------------------------------===//
15
16
#include "AArch64.h"
17
#include "AArch64CallingConvention.h"
18
#include "AArch64RegisterInfo.h"
19
#include "AArch64Subtarget.h"
20
#include "MCTargetDesc/AArch64AddressingModes.h"
21
#include "Utils/AArch64BaseInfo.h"
22
#include "llvm/ADT/APFloat.h"
23
#include "llvm/ADT/APInt.h"
24
#include "llvm/ADT/DenseMap.h"
25
#include "llvm/ADT/SmallVector.h"
26
#include "llvm/Analysis/BranchProbabilityInfo.h"
27
#include "llvm/CodeGen/CallingConvLower.h"
28
#include "llvm/CodeGen/FastISel.h"
29
#include "llvm/CodeGen/FunctionLoweringInfo.h"
30
#include "llvm/CodeGen/ISDOpcodes.h"
31
#include "llvm/CodeGen/MachineBasicBlock.h"
32
#include "llvm/CodeGen/MachineConstantPool.h"
33
#include "llvm/CodeGen/MachineFrameInfo.h"
34
#include "llvm/CodeGen/MachineInstr.h"
35
#include "llvm/CodeGen/MachineInstrBuilder.h"
36
#include "llvm/CodeGen/MachineMemOperand.h"
37
#include "llvm/CodeGen/MachineRegisterInfo.h"
38
#include "llvm/CodeGen/MachineValueType.h"
39
#include "llvm/CodeGen/RuntimeLibcalls.h"
40
#include "llvm/CodeGen/ValueTypes.h"
41
#include "llvm/IR/Argument.h"
42
#include "llvm/IR/Attributes.h"
43
#include "llvm/IR/BasicBlock.h"
44
#include "llvm/IR/CallingConv.h"
45
#include "llvm/IR/Constant.h"
46
#include "llvm/IR/Constants.h"
47
#include "llvm/IR/DataLayout.h"
48
#include "llvm/IR/DerivedTypes.h"
49
#include "llvm/IR/Function.h"
50
#include "llvm/IR/GetElementPtrTypeIterator.h"
51
#include "llvm/IR/GlobalValue.h"
52
#include "llvm/IR/InstrTypes.h"
53
#include "llvm/IR/Instruction.h"
54
#include "llvm/IR/Instructions.h"
55
#include "llvm/IR/IntrinsicInst.h"
56
#include "llvm/IR/Intrinsics.h"
57
#include "llvm/IR/Operator.h"
58
#include "llvm/IR/Type.h"
59
#include "llvm/IR/User.h"
60
#include "llvm/IR/Value.h"
61
#include "llvm/MC/MCInstrDesc.h"
62
#include "llvm/MC/MCRegisterInfo.h"
63
#include "llvm/MC/MCSymbol.h"
64
#include "llvm/Support/AtomicOrdering.h"
65
#include "llvm/Support/Casting.h"
66
#include "llvm/Support/CodeGen.h"
67
#include "llvm/Support/Compiler.h"
68
#include "llvm/Support/ErrorHandling.h"
69
#include "llvm/Support/MathExtras.h"
70
#include <algorithm>
71
#include <cassert>
72
#include <cstdint>
73
#include <iterator>
74
#include <utility>
75
76
using namespace llvm;
77
78
namespace {
79
80
class AArch64FastISel final : public FastISel {
81
  class Address {
82
  public:
83
    using BaseKind = enum {
84
      RegBase,
85
      FrameIndexBase
86
    };
87
88
  private:
89
    BaseKind Kind = RegBase;
90
    AArch64_AM::ShiftExtendType ExtType = AArch64_AM::InvalidShiftExtend;
91
    union {
92
      unsigned Reg;
93
      int FI;
94
    } Base;
95
    unsigned OffsetReg = 0;
96
    unsigned Shift = 0;
97
    int64_t Offset = 0;
98
    const GlobalValue *GV = nullptr;
99
100
  public:
101
1.01k
    Address() { Base.Reg = 0; }
102
103
271
    void setKind(BaseKind K) { Kind = K; }
104
0
    BaseKind getKind() const { return Kind; }
105
85
    void setExtendType(AArch64_AM::ShiftExtendType E) { ExtType = E; }
106
1.56k
    AArch64_AM::ShiftExtendType getExtendType() const { return ExtType; }
107
2.06k
    bool isRegBase() const { return Kind == RegBase; }
108
812
    bool isFIBase() const { return Kind == FrameIndexBase; }
109
110
1.14k
    void setReg(unsigned Reg) {
111
1.14k
      assert(isRegBase() && "Invalid base register access!");
112
1.14k
      Base.Reg = Reg;
113
1.14k
    }
114
115
2.24k
    unsigned getReg() const {
116
2.24k
      assert(isRegBase() && "Invalid base register access!");
117
2.24k
      return Base.Reg;
118
2.24k
    }
119
120
566
    void setOffsetReg(unsigned Reg) {
121
566
      OffsetReg = Reg;
122
566
    }
123
124
2.77k
    unsigned getOffsetReg() const {
125
2.77k
      return OffsetReg;
126
2.77k
    }
127
128
236
    void setFI(unsigned FI) {
129
236
      assert(isFIBase() && "Invalid base frame index  access!");
130
236
      Base.FI = FI;
131
236
    }
132
133
244
    unsigned getFI() const {
134
244
      assert(isFIBase() && "Invalid base frame index access!");
135
244
      return Base.FI;
136
244
    }
137
138
222
    void setOffset(int64_t O) { Offset = O; }
139
4.20k
    int64_t getOffset() { return Offset; }
140
64
    void setShift(unsigned S) { Shift = S; }
141
91
    unsigned getShift() { return Shift; }
142
143
91
    void setGlobalValue(const GlobalValue *G) { GV = G; }
144
200
    const GlobalValue *getGlobalValue() { return GV; }
145
  };
146
147
  /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
148
  /// make the right decision when generating code for different targets.
149
  const AArch64Subtarget *Subtarget;
150
  LLVMContext *Context;
151
152
  bool fastLowerArguments() override;
153
  bool fastLowerCall(CallLoweringInfo &CLI) override;
154
  bool fastLowerIntrinsicCall(const IntrinsicInst *II) override;
155
156
private:
157
  // Selection routines.
158
  bool selectAddSub(const Instruction *I);
159
  bool selectLogicalOp(const Instruction *I);
160
  bool selectLoad(const Instruction *I);
161
  bool selectStore(const Instruction *I);
162
  bool selectBranch(const Instruction *I);
163
  bool selectIndirectBr(const Instruction *I);
164
  bool selectCmp(const Instruction *I);
165
  bool selectSelect(const Instruction *I);
166
  bool selectFPExt(const Instruction *I);
167
  bool selectFPTrunc(const Instruction *I);
168
  bool selectFPToInt(const Instruction *I, bool Signed);
169
  bool selectIntToFP(const Instruction *I, bool Signed);
170
  bool selectRem(const Instruction *I, unsigned ISDOpcode);
171
  bool selectRet(const Instruction *I);
172
  bool selectTrunc(const Instruction *I);
173
  bool selectIntExt(const Instruction *I);
174
  bool selectMul(const Instruction *I);
175
  bool selectShift(const Instruction *I);
176
  bool selectBitCast(const Instruction *I);
177
  bool selectFRem(const Instruction *I);
178
  bool selectSDiv(const Instruction *I);
179
  bool selectGetElementPtr(const Instruction *I);
180
  bool selectAtomicCmpXchg(const AtomicCmpXchgInst *I);
181
182
  // Utility helper routines.
183
  bool isTypeLegal(Type *Ty, MVT &VT);
184
  bool isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed = false);
185
  bool isValueAvailable(const Value *V) const;
186
  bool computeAddress(const Value *Obj, Address &Addr, Type *Ty = nullptr);
187
  bool computeCallAddress(const Value *V, Address &Addr);
188
  bool simplifyAddress(Address &Addr, MVT VT);
189
  void addLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB,
190
                            MachineMemOperand::Flags Flags,
191
                            unsigned ScaleFactor, MachineMemOperand *MMO);
192
  bool isMemCpySmall(uint64_t Len, unsigned Alignment);
193
  bool tryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
194
                          unsigned Alignment);
195
  bool foldXALUIntrinsic(AArch64CC::CondCode &CC, const Instruction *I,
196
                         const Value *Cond);
197
  bool optimizeIntExtLoad(const Instruction *I, MVT RetVT, MVT SrcVT);
198
  bool optimizeSelect(const SelectInst *SI);
199
  std::pair<unsigned, bool> getRegForGEPIndex(const Value *Idx);
200
201
  // Emit helper routines.
202
  unsigned emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
203
                      const Value *RHS, bool SetFlags = false,
204
                      bool WantResult = true,  bool IsZExt = false);
205
  unsigned emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
206
                         bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
207
                         bool SetFlags = false, bool WantResult = true);
208
  unsigned emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
209
                         bool LHSIsKill, uint64_t Imm, bool SetFlags = false,
210
                         bool WantResult = true);
211
  unsigned emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
212
                         bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
213
                         AArch64_AM::ShiftExtendType ShiftType,
214
                         uint64_t ShiftImm, bool SetFlags = false,
215
                         bool WantResult = true);
216
  unsigned emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
217
                         bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
218
                          AArch64_AM::ShiftExtendType ExtType,
219
                          uint64_t ShiftImm, bool SetFlags = false,
220
                         bool WantResult = true);
221
222
  // Emit functions.
223
  bool emitCompareAndBranch(const BranchInst *BI);
224
  bool emitCmp(const Value *LHS, const Value *RHS, bool IsZExt);
225
  bool emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, bool IsZExt);
226
  bool emitICmp_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm);
227
  bool emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS);
228
  unsigned emitLoad(MVT VT, MVT ResultVT, Address Addr, bool WantZExt = true,
229
                    MachineMemOperand *MMO = nullptr);
230
  bool emitStore(MVT VT, unsigned SrcReg, Address Addr,
231
                 MachineMemOperand *MMO = nullptr);
232
  bool emitStoreRelease(MVT VT, unsigned SrcReg, unsigned AddrReg,
233
                        MachineMemOperand *MMO = nullptr);
234
  unsigned emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt);
235
  unsigned emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt);
236
  unsigned emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
237
                   bool SetFlags = false, bool WantResult = true,
238
                   bool IsZExt = false);
239
  unsigned emitAdd_ri_(MVT VT, unsigned Op0, bool Op0IsKill, int64_t Imm);
240
  unsigned emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
241
                   bool SetFlags = false, bool WantResult = true,
242
                   bool IsZExt = false);
243
  unsigned emitSubs_rr(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
244
                       unsigned RHSReg, bool RHSIsKill, bool WantResult = true);
245
  unsigned emitSubs_rs(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
246
                       unsigned RHSReg, bool RHSIsKill,
247
                       AArch64_AM::ShiftExtendType ShiftType, uint64_t ShiftImm,
248
                       bool WantResult = true);
249
  unsigned emitLogicalOp(unsigned ISDOpc, MVT RetVT, const Value *LHS,
250
                         const Value *RHS);
251
  unsigned emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
252
                            bool LHSIsKill, uint64_t Imm);
253
  unsigned emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
254
                            bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
255
                            uint64_t ShiftImm);
256
  unsigned emitAnd_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm);
257
  unsigned emitMul_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
258
                      unsigned Op1, bool Op1IsKill);
259
  unsigned emitSMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
260
                        unsigned Op1, bool Op1IsKill);
261
  unsigned emitUMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
262
                        unsigned Op1, bool Op1IsKill);
263
  unsigned emitLSL_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
264
                      unsigned Op1Reg, bool Op1IsKill);
265
  unsigned emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
266
                      uint64_t Imm, bool IsZExt = true);
267
  unsigned emitLSR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
268
                      unsigned Op1Reg, bool Op1IsKill);
269
  unsigned emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
270
                      uint64_t Imm, bool IsZExt = true);
271
  unsigned emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
272
                      unsigned Op1Reg, bool Op1IsKill);
273
  unsigned emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
274
                      uint64_t Imm, bool IsZExt = false);
275
276
  unsigned materializeInt(const ConstantInt *CI, MVT VT);
277
  unsigned materializeFP(const ConstantFP *CFP, MVT VT);
278
  unsigned materializeGV(const GlobalValue *GV);
279
280
  // Call handling routines.
281
private:
282
  CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const;
283
  bool processCallArgs(CallLoweringInfo &CLI, SmallVectorImpl<MVT> &ArgVTs,
284
                       unsigned &NumBytes);
285
  bool finishCall(CallLoweringInfo &CLI, MVT RetVT, unsigned NumBytes);
286
287
public:
288
  // Backend specific FastISel code.
289
  unsigned fastMaterializeAlloca(const AllocaInst *AI) override;
290
  unsigned fastMaterializeConstant(const Constant *C) override;
291
  unsigned fastMaterializeFloatZero(const ConstantFP* CF) override;
292
293
  explicit AArch64FastISel(FunctionLoweringInfo &FuncInfo,
294
                           const TargetLibraryInfo *LibInfo)
295
1.23k
      : FastISel(FuncInfo, LibInfo, /*SkipTargetIndependentISel=*/true) {
296
1.23k
    Subtarget =
297
1.23k
        &static_cast<const AArch64Subtarget &>(FuncInfo.MF->getSubtarget());
298
1.23k
    Context = &FuncInfo.Fn->getContext();
299
1.23k
  }
300
301
  bool fastSelectInstruction(const Instruction *I) override;
302
303
#include "AArch64GenFastISel.inc"
304
};
305
306
} // end anonymous namespace
307
308
#include "AArch64GenCallingConv.inc"
309
310
/// \brief Check if the sign-/zero-extend will be a noop.
311
83
static bool isIntExtFree(const Instruction *I) {
312
83
  assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
313
83
         "Unexpected integer extend instruction.");
314
83
  assert(!I->getType()->isVectorTy() && I->getType()->isIntegerTy() &&
315
83
         "Unexpected value type.");
316
83
  bool IsZExt = isa<ZExtInst>(I);
317
83
318
83
  if (const auto *LI = dyn_cast<LoadInst>(I->getOperand(0)))
319
0
    
if (0
LI->hasOneUse()0
)
320
0
      return true;
321
83
322
83
  
if (const auto *83
Arg83
= dyn_cast<Argument>(I->getOperand(0)))
323
80
    
if (80
(IsZExt && 80
Arg->hasZExtAttr()31
) ||
(!IsZExt && 78
Arg->hasSExtAttr()49
))
324
4
      return true;
325
79
326
79
  return false;
327
79
}
328
329
/// \brief Determine the implicit scale factor that is applied by a memory
330
/// operation for a given value type.
331
1.63k
static unsigned getImplicitScaleFactor(MVT VT) {
332
1.63k
  switch (VT.SimpleTy) {
333
216
  default:
334
216
    return 0;    // invalid
335
222
  case MVT::i1:  // fall-through
336
222
  case MVT::i8:
337
222
    return 1;
338
130
  case MVT::i16:
339
130
    return 2;
340
538
  case MVT::i32: // fall-through
341
538
  case MVT::f32:
342
538
    return 4;
343
530
  case MVT::i64: // fall-through
344
530
  case MVT::f64:
345
530
    return 8;
346
0
  }
347
0
}
348
349
194
CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const {
350
194
  if (CC == CallingConv::WebKit_JS)
351
7
    return CC_AArch64_WebKit_JS;
352
187
  
if (187
CC == CallingConv::GHC187
)
353
0
    return CC_AArch64_GHC;
354
187
  
return Subtarget->isTargetDarwin() ? 187
CC_AArch64_DarwinPCS146
:
CC_AArch64_AAPCS41
;
355
194
}
356
357
17
unsigned AArch64FastISel::fastMaterializeAlloca(const AllocaInst *AI) {
358
17
  assert(TLI.getValueType(DL, AI->getType(), true) == MVT::i64 &&
359
17
         "Alloca should always return a pointer.");
360
17
361
17
  // Don't handle dynamic allocas.
362
17
  if (!FuncInfo.StaticAllocaMap.count(AI))
363
0
    return 0;
364
17
365
17
  DenseMap<const AllocaInst *, int>::iterator SI =
366
17
      FuncInfo.StaticAllocaMap.find(AI);
367
17
368
17
  if (
SI != FuncInfo.StaticAllocaMap.end()17
) {
369
17
    unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass);
370
17
    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
371
17
            ResultReg)
372
17
        .addFrameIndex(SI->second)
373
17
        .addImm(0)
374
17
        .addImm(0);
375
17
    return ResultReg;
376
17
  }
377
0
378
0
  return 0;
379
0
}
380
381
377
unsigned AArch64FastISel::materializeInt(const ConstantInt *CI, MVT VT) {
382
377
  if (VT > MVT::i64)
383
0
    return 0;
384
377
385
377
  
if (377
!CI->isZero()377
)
386
245
    return fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue());
387
132
388
132
  // Create a copy from the zero register to materialize a "0" value.
389
132
  
const TargetRegisterClass *RC = (VT == MVT::i64) ? 132
&AArch64::GPR64RegClass18
390
114
                                                   : &AArch64::GPR32RegClass;
391
132
  unsigned ZeroReg = (VT == MVT::i64) ? 
AArch64::XZR18
:
AArch64::WZR114
;
392
377
  unsigned ResultReg = createResultReg(RC);
393
377
  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY),
394
377
          ResultReg).addReg(ZeroReg, getKillRegState(true));
395
377
  return ResultReg;
396
377
}
397
398
47
unsigned AArch64FastISel::materializeFP(const ConstantFP *CFP, MVT VT) {
399
47
  // Positive zero (+0.0) has to be materialized with a fmov from the zero
400
47
  // register, because the immediate version of fmov cannot encode zero.
401
47
  if (CFP->isNullValue())
402
2
    return fastMaterializeFloatZero(CFP);
403
45
404
45
  
if (45
VT != MVT::f32 && 45
VT != MVT::f6421
)
405
2
    return 0;
406
43
407
43
  const APFloat Val = CFP->getValueAPF();
408
43
  bool Is64Bit = (VT == MVT::f64);
409
43
  // This checks to see if we can use FMOV instructions to materialize
410
43
  // a constant, otherwise we have to materialize via the constant pool.
411
43
  if (
TLI.isFPImmLegal(Val, VT)43
) {
412
10
    int Imm =
413
10
        Is64Bit ? 
AArch64_AM::getFP64Imm(Val)2
:
AArch64_AM::getFP32Imm(Val)8
;
414
10
    assert((Imm != -1) && "Cannot encode floating-point constant.");
415
10
    unsigned Opc = Is64Bit ? 
AArch64::FMOVDi2
:
AArch64::FMOVSi8
;
416
10
    return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm);
417
10
  }
418
33
419
33
  // For the MachO large code model materialize the FP constant in code.
420
33
  
if (33
Subtarget->isTargetMachO() && 33
TM.getCodeModel() == CodeModel::Large30
) {
421
4
    unsigned Opc1 = Is64Bit ? 
AArch64::MOVi64imm2
:
AArch64::MOVi32imm2
;
422
4
    const TargetRegisterClass *RC = Is64Bit ?
423
4
        
&AArch64::GPR64RegClass2
:
&AArch64::GPR32RegClass2
;
424
4
425
4
    unsigned TmpReg = createResultReg(RC);
426
4
    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc1), TmpReg)
427
4
        .addImm(CFP->getValueAPF().bitcastToAPInt().getZExtValue());
428
4
429
4
    unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
430
4
    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
431
4
            TII.get(TargetOpcode::COPY), ResultReg)
432
4
        .addReg(TmpReg, getKillRegState(true));
433
4
434
4
    return ResultReg;
435
4
  }
436
29
437
29
  // Materialize via constant pool.  MachineConstantPool wants an explicit
438
29
  // alignment.
439
29
  unsigned Align = DL.getPrefTypeAlignment(CFP->getType());
440
29
  if (Align == 0)
441
0
    Align = DL.getTypeAllocSize(CFP->getType());
442
29
443
29
  unsigned CPI = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align);
444
29
  unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
445
29
  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
446
29
          ADRPReg).addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGE);
447
29
448
29
  unsigned Opc = Is64Bit ? 
AArch64::LDRDui15
:
AArch64::LDRSui14
;
449
47
  unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
450
47
  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
451
47
      .addReg(ADRPReg)
452
47
      .addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
453
47
  return ResultReg;
454
47
}
455
456
129
unsigned AArch64FastISel::materializeGV(const GlobalValue *GV) {
457
129
  // We can't handle thread-local variables quickly yet.
458
129
  if (GV->isThreadLocal())
459
10
    return 0;
460
119
461
119
  // MachO still uses GOT for large code-model accesses, but ELF requires
462
119
  // movz/movk sequences, which FastISel doesn't handle yet.
463
119
  
if (119
!Subtarget->useSmallAddressing() && 119
!Subtarget->isTargetMachO()9
)
464
0
    return 0;
465
119
466
119
  unsigned char OpFlags = Subtarget->ClassifyGlobalReference(GV, TM);
467
119
468
119
  EVT DestEVT = TLI.getValueType(DL, GV->getType(), true);
469
119
  if (!DestEVT.isSimple())
470
0
    return 0;
471
119
472
119
  unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
473
119
  unsigned ResultReg;
474
119
475
119
  if (
OpFlags & AArch64II::MO_GOT119
) {
476
48
    // ADRP + LDRX
477
48
    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
478
48
            ADRPReg)
479
48
      .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGE);
480
48
481
48
    ResultReg = createResultReg(&AArch64::GPR64RegClass);
482
48
    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::LDRXui),
483
48
            ResultReg)
484
48
      .addReg(ADRPReg)
485
48
      .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGEOFF |
486
48
                        AArch64II::MO_NC);
487
119
  } else {
488
71
    // ADRP + ADDX
489
71
    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
490
71
            ADRPReg)
491
71
      .addGlobalAddress(GV, 0, AArch64II::MO_PAGE);
492
71
493
71
    ResultReg = createResultReg(&AArch64::GPR64spRegClass);
494
71
    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
495
71
            ResultReg)
496
71
      .addReg(ADRPReg)
497
71
      .addGlobalAddress(GV, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC)
498
71
      .addImm(0);
499
71
  }
500
129
  return ResultReg;
501
129
}
502
503
592
unsigned AArch64FastISel::fastMaterializeConstant(const Constant *C) {
504
592
  EVT CEVT = TLI.getValueType(DL, C->getType(), true);
505
592
506
592
  // Only handle simple types.
507
592
  if (!CEVT.isSimple())
508
0
    return 0;
509
592
  MVT VT = CEVT.getSimpleVT();
510
592
511
592
  if (const auto *CI = dyn_cast<ConstantInt>(C))
512
371
    return materializeInt(CI, VT);
513
221
  else 
if (const ConstantFP *221
CFP221
= dyn_cast<ConstantFP>(C))
514
47
    return materializeFP(CFP, VT);
515
174
  else 
if (const GlobalValue *174
GV174
= dyn_cast<GlobalValue>(C))
516
122
    return materializeGV(GV);
517
52
518
52
  return 0;
519
52
}
520
521
2
unsigned AArch64FastISel::fastMaterializeFloatZero(const ConstantFP* CFP) {
522
2
  assert(CFP->isNullValue() &&
523
2
         "Floating-point constant is not a positive zero.");
524
2
  MVT VT;
525
2
  if (!isTypeLegal(CFP->getType(), VT))
526
0
    return 0;
527
2
528
2
  
if (2
VT != MVT::f32 && 2
VT != MVT::f641
)
529
0
    return 0;
530
2
531
2
  bool Is64Bit = (VT == MVT::f64);
532
2
  unsigned ZReg = Is64Bit ? 
AArch64::XZR1
:
AArch64::WZR1
;
533
2
  unsigned Opc = Is64Bit ? 
AArch64::FMOVXDr1
:
AArch64::FMOVWSr1
;
534
2
  return fastEmitInst_r(Opc, TLI.getRegClassFor(VT), ZReg, /*IsKill=*/true);
535
2
}
536
537
/// \brief Check if the multiply is by a power-of-2 constant.
538
567
static bool isMulPowOf2(const Value *I) {
539
567
  if (const auto *
MI567
= dyn_cast<MulOperator>(I)) {
540
23
    if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(0)))
541
0
      
if (0
C->getValue().isPowerOf2()0
)
542
0
        return true;
543
23
    
if (const auto *23
C23
= dyn_cast<ConstantInt>(MI->getOperand(1)))
544
21
      
if (21
C->getValue().isPowerOf2()21
)
545
19
        return true;
546
548
  }
547
548
  return false;
548
548
}
549
550
// Computes the address to get to an object.
551
bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty)
552
1.29k
{
553
1.29k
  const User *U = nullptr;
554
1.29k
  unsigned Opcode = Instruction::UserOp1;
555
1.29k
  if (const Instruction *
I1.29k
= dyn_cast<Instruction>(Obj)) {
556
675
    // Don't walk into other basic blocks unless the object is an alloca from
557
675
    // another block, otherwise it may not have a virtual register assigned.
558
675
    if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
559
675
        
FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB439
) {
560
660
      Opcode = I->getOpcode();
561
660
      U = I;
562
660
    }
563
1.29k
  } else 
if (const ConstantExpr *615
C615
= dyn_cast<ConstantExpr>(Obj)) {
564
25
    Opcode = C->getOpcode();
565
25
    U = C;
566
25
  }
567
1.29k
568
1.29k
  if (auto *Ty = dyn_cast<PointerType>(Obj->getType()))
569
928
    
if (928
Ty->getAddressSpace() > 255928
)
570
928
      // Fast instruction selection doesn't support the special
571
928
      // address spaces.
572
10
      return false;
573
1.28k
574
1.28k
  switch (Opcode) {
575
623
  default:
576
623
    break;
577
17
  case Instruction::BitCast:
578
17
    // Look through bitcasts.
579
17
    return computeAddress(U->getOperand(0), Addr, Ty);
580
1.28k
581
136
  case Instruction::IntToPtr:
582
136
    // Look past no-op inttoptrs.
583
136
    if (TLI.getValueType(DL, U->getOperand(0)->getType()) ==
584
136
        TLI.getPointerTy(DL))
585
136
      return computeAddress(U->getOperand(0), Addr, Ty);
586
0
    break;
587
0
588
6
  case Instruction::PtrToInt:
589
6
    // Look past no-op ptrtoints.
590
6
    if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
591
6
      return computeAddress(U->getOperand(0), Addr, Ty);
592
0
    break;
593
0
594
62
  case Instruction::GetElementPtr: {
595
62
    Address SavedAddr = Addr;
596
62
    uint64_t TmpOffset = Addr.getOffset();
597
62
598
62
    // Iterate through the GEP folding the constants into offsets where
599
62
    // we can.
600
62
    for (gep_type_iterator GTI = gep_type_begin(U), E = gep_type_end(U);
601
158
         
GTI != E158
;
++GTI96
) {
602
111
      const Value *Op = GTI.getOperand();
603
111
      if (StructType *
STy111
= GTI.getStructTypeOrNull()) {
604
21
        const StructLayout *SL = DL.getStructLayout(STy);
605
21
        unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
606
21
        TmpOffset += SL->getElementOffset(Idx);
607
111
      } else {
608
90
        uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
609
90
        while (
true90
) {
610
90
          if (const ConstantInt *
CI90
= dyn_cast<ConstantInt>(Op)) {
611
75
            // Constant-offset addressing.
612
75
            TmpOffset += CI->getSExtValue() * S;
613
75
            break;
614
75
          }
615
15
          
if (15
canFoldAddIntoGEP(U, Op)15
) {
616
0
            // A compatible add with a constant operand. Fold the constant.
617
0
            ConstantInt *CI =
618
0
                cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
619
0
            TmpOffset += CI->getSExtValue() * S;
620
0
            // Iterate on the other operand.
621
0
            Op = cast<AddOperator>(Op)->getOperand(0);
622
0
            continue;
623
0
          }
624
15
          // Unsupported
625
15
          goto unsupported_gep;
626
15
        }
627
90
      }
628
111
    }
629
62
630
62
    // Try to grab the base operand now.
631
47
    Addr.setOffset(TmpOffset);
632
47
    if (computeAddress(U->getOperand(0), Addr, Ty))
633
46
      return true;
634
1
635
1
    // We failed, restore everything and try the other options.
636
1
    Addr = SavedAddr;
637
1
638
16
  unsupported_gep:
639
16
    break;
640
1
  }
641
236
  case Instruction::Alloca: {
642
236
    const AllocaInst *AI = cast<AllocaInst>(Obj);
643
236
    DenseMap<const AllocaInst *, int>::iterator SI =
644
236
        FuncInfo.StaticAllocaMap.find(AI);
645
236
    if (
SI != FuncInfo.StaticAllocaMap.end()236
) {
646
236
      Addr.setKind(Address::FrameIndexBase);
647
236
      Addr.setFI(SI->second);
648
236
      return true;
649
236
    }
650
0
    break;
651
0
  }
652
117
  case Instruction::Add: {
653
117
    // Adds of constants are common and easy enough.
654
117
    const Value *LHS = U->getOperand(0);
655
117
    const Value *RHS = U->getOperand(1);
656
117
657
117
    if (isa<ConstantInt>(LHS))
658
1
      std::swap(LHS, RHS);
659
117
660
117
    if (const ConstantInt *
CI117
= dyn_cast<ConstantInt>(RHS)) {
661
29
      Addr.setOffset(Addr.getOffset() + CI->getSExtValue());
662
29
      return computeAddress(LHS, Addr, Ty);
663
29
    }
664
88
665
88
    Address Backup = Addr;
666
88
    if (
computeAddress(LHS, Addr, Ty) && 88
computeAddress(RHS, Addr, Ty)88
)
667
88
      return true;
668
0
    Addr = Backup;
669
0
670
0
    break;
671
0
  }
672
21
  case Instruction::Sub: {
673
21
    // Subs of constants are common and easy enough.
674
21
    const Value *LHS = U->getOperand(0);
675
21
    const Value *RHS = U->getOperand(1);
676
21
677
21
    if (const ConstantInt *
CI21
= dyn_cast<ConstantInt>(RHS)) {
678
21
      Addr.setOffset(Addr.getOffset() - CI->getSExtValue());
679
21
      return computeAddress(LHS, Addr, Ty);
680
21
    }
681
0
    break;
682
0
  }
683
26
  case Instruction::Shl: {
684
26
    if (Addr.getOffsetReg())
685
3
      break;
686
23
687
23
    const auto *CI = dyn_cast<ConstantInt>(U->getOperand(1));
688
23
    if (!CI)
689
0
      break;
690
23
691
23
    unsigned Val = CI->getZExtValue();
692
23
    if (
Val < 1 || 23
Val > 323
)
693
0
      break;
694
23
695
23
    uint64_t NumBytes = 0;
696
23
    if (
Ty && 23
Ty->isSized()23
) {
697
23
      uint64_t NumBits = DL.getTypeSizeInBits(Ty);
698
23
      NumBytes = NumBits / 8;
699
23
      if (!isPowerOf2_64(NumBits))
700
0
        NumBytes = 0;
701
23
    }
702
23
703
23
    if (NumBytes != (1ULL << Val))
704
0
      break;
705
23
706
23
    Addr.setShift(Val);
707
23
    Addr.setExtendType(AArch64_AM::LSL);
708
23
709
23
    const Value *Src = U->getOperand(0);
710
23
    if (const auto *
I23
= dyn_cast<Instruction>(Src)) {
711
17
      if (
FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB17
) {
712
17
        // Fold the zext or sext when it won't become a noop.
713
17
        if (const auto *
ZE17
= dyn_cast<ZExtInst>(I)) {
714
5
          if (!isIntExtFree(ZE) &&
715
5
              
ZE->getOperand(0)->getType()->isIntegerTy(32)4
) {
716
4
            Addr.setExtendType(AArch64_AM::UXTW);
717
4
            Src = ZE->getOperand(0);
718
4
          }
719
17
        } else 
if (const auto *12
SE12
= dyn_cast<SExtInst>(I)) {
720
8
          if (!isIntExtFree(SE) &&
721
8
              
SE->getOperand(0)->getType()->isIntegerTy(32)7
) {
722
7
            Addr.setExtendType(AArch64_AM::SXTW);
723
7
            Src = SE->getOperand(0);
724
7
          }
725
12
        }
726
17
      }
727
17
    }
728
23
729
23
    if (const auto *AI = dyn_cast<BinaryOperator>(Src))
730
5
      
if (5
AI->getOpcode() == Instruction::And5
) {
731
4
        const Value *LHS = AI->getOperand(0);
732
4
        const Value *RHS = AI->getOperand(1);
733
4
734
4
        if (const auto *C = dyn_cast<ConstantInt>(LHS))
735
0
          
if (0
C->getValue() == 0xffffffff0
)
736
0
            std::swap(LHS, RHS);
737
4
738
4
        if (const auto *C = dyn_cast<ConstantInt>(RHS))
739
3
          
if (3
C->getValue() == 0xffffffff3
) {
740
3
            Addr.setExtendType(AArch64_AM::UXTW);
741
3
            unsigned Reg = getRegForValue(LHS);
742
3
            if (!Reg)
743
0
              return false;
744
3
            bool RegIsKill = hasTrivialKill(LHS);
745
3
            Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, RegIsKill,
746
3
                                             AArch64::sub_32);
747
3
            Addr.setOffsetReg(Reg);
748
3
            return true;
749
3
          }
750
5
      }
751
20
752
20
    unsigned Reg = getRegForValue(Src);
753
20
    if (!Reg)
754
0
      return false;
755
20
    Addr.setOffsetReg(Reg);
756
20
    return true;
757
20
  }
758
13
  case Instruction::Mul: {
759
13
    if (Addr.getOffsetReg())
760
0
      break;
761
13
762
13
    
if (13
!isMulPowOf2(U)13
)
763
0
      break;
764
13
765
13
    const Value *LHS = U->getOperand(0);
766
13
    const Value *RHS = U->getOperand(1);
767
13
768
13
    // Canonicalize power-of-2 value to the RHS.
769
13
    if (const auto *C = dyn_cast<ConstantInt>(LHS))
770
0
      
if (0
C->getValue().isPowerOf2()0
)
771
0
        std::swap(LHS, RHS);
772
13
773
13
    assert(isa<ConstantInt>(RHS) && "Expected an ConstantInt.");
774
13
    const auto *C = cast<ConstantInt>(RHS);
775
13
    unsigned Val = C->getValue().logBase2();
776
13
    if (
Val < 1 || 13
Val > 313
)
777
0
      break;
778
13
779
13
    uint64_t NumBytes = 0;
780
13
    if (
Ty && 13
Ty->isSized()13
) {
781
13
      uint64_t NumBits = DL.getTypeSizeInBits(Ty);
782
13
      NumBytes = NumBits / 8;
783
13
      if (!isPowerOf2_64(NumBits))
784
0
        NumBytes = 0;
785
13
    }
786
13
787
13
    if (NumBytes != (1ULL << Val))
788
0
      break;
789
13
790
13
    Addr.setShift(Val);
791
13
    Addr.setExtendType(AArch64_AM::LSL);
792
13
793
13
    const Value *Src = LHS;
794
13
    if (const auto *
I13
= dyn_cast<Instruction>(Src)) {
795
9
      if (
FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB9
) {
796
8
        // Fold the zext or sext when it won't become a noop.
797
8
        if (const auto *
ZE8
= dyn_cast<ZExtInst>(I)) {
798
4
          if (!isIntExtFree(ZE) &&
799
4
              
ZE->getOperand(0)->getType()->isIntegerTy(32)3
) {
800
3
            Addr.setExtendType(AArch64_AM::UXTW);
801
3
            Src = ZE->getOperand(0);
802
3
          }
803
8
        } else 
if (const auto *4
SE4
= dyn_cast<SExtInst>(I)) {
804
4
          if (!isIntExtFree(SE) &&
805
4
              
SE->getOperand(0)->getType()->isIntegerTy(32)3
) {
806
3
            Addr.setExtendType(AArch64_AM::SXTW);
807
3
            Src = SE->getOperand(0);
808
3
          }
809
4
        }
810
8
      }
811
9
    }
812
13
813
13
    unsigned Reg = getRegForValue(Src);
814
13
    if (!Reg)
815
0
      return false;
816
13
    Addr.setOffsetReg(Reg);
817
13
    return true;
818
13
  }
819
3
  case Instruction::And: {
820
3
    if (Addr.getOffsetReg())
821
0
      break;
822
3
823
3
    
if (3
!Ty || 3
DL.getTypeSizeInBits(Ty) != 82
)
824
2
      break;
825
1
826
1
    const Value *LHS = U->getOperand(0);
827
1
    const Value *RHS = U->getOperand(1);
828
1
829
1
    if (const auto *C = dyn_cast<ConstantInt>(LHS))
830
0
      
if (0
C->getValue() == 0xffffffff0
)
831
0
        std::swap(LHS, RHS);
832
1
833
1
    if (const auto *C = dyn_cast<ConstantInt>(RHS))
834
1
      
if (1
C->getValue() == 0xffffffff1
) {
835
1
        Addr.setShift(0);
836
1
        Addr.setExtendType(AArch64_AM::LSL);
837
1
        Addr.setExtendType(AArch64_AM::UXTW);
838
1
839
1
        unsigned Reg = getRegForValue(LHS);
840
1
        if (!Reg)
841
0
          return false;
842
1
        bool RegIsKill = hasTrivialKill(LHS);
843
1
        Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, RegIsKill,
844
1
                                         AArch64::sub_32);
845
1
        Addr.setOffsetReg(Reg);
846
1
        return true;
847
1
      }
848
0
    break;
849
0
  }
850
20
  case Instruction::SExt:
851
20
  case Instruction::ZExt: {
852
20
    if (
!Addr.getReg() || 20
Addr.getOffsetReg()20
)
853
0
      break;
854
20
855
20
    const Value *Src = nullptr;
856
20
    // Fold the zext or sext when it won't become a noop.
857
20
    if (const auto *
ZE20
= dyn_cast<ZExtInst>(U)) {
858
0
      if (
!isIntExtFree(ZE) && 0
ZE->getOperand(0)->getType()->isIntegerTy(32)0
) {
859
0
        Addr.setExtendType(AArch64_AM::UXTW);
860
0
        Src = ZE->getOperand(0);
861
0
      }
862
20
    } else 
if (const auto *20
SE20
= dyn_cast<SExtInst>(U)) {
863
20
      if (
!isIntExtFree(SE) && 20
SE->getOperand(0)->getType()->isIntegerTy(32)20
) {
864
20
        Addr.setExtendType(AArch64_AM::SXTW);
865
20
        Src = SE->getOperand(0);
866
20
      }
867
20
    }
868
20
869
20
    if (!Src)
870
0
      break;
871
20
872
20
    Addr.setShift(0);
873
20
    unsigned Reg = getRegForValue(Src);
874
20
    if (!Reg)
875
0
      return false;
876
20
    Addr.setOffsetReg(Reg);
877
20
    return true;
878
20
  }
879
644
  } // end switch
880
644
881
644
  
if (644
Addr.isRegBase() && 644
!Addr.getReg()644
) {
882
610
    unsigned Reg = getRegForValue(Obj);
883
610
    if (!Reg)
884
8
      return false;
885
602
    Addr.setReg(Reg);
886
602
    return true;
887
602
  }
888
34
889
34
  
if (34
!Addr.getOffsetReg()34
) {
890
34
    unsigned Reg = getRegForValue(Obj);
891
34
    if (!Reg)
892
0
      return false;
893
34
    Addr.setOffsetReg(Reg);
894
34
    return true;
895
34
  }
896
0
897
0
  return false;
898
0
}
899
900
126
bool AArch64FastISel::computeCallAddress(const Value *V, Address &Addr) {
901
126
  const User *U = nullptr;
902
126
  unsigned Opcode = Instruction::UserOp1;
903
126
  bool InMBB = true;
904
126
905
126
  if (const auto *
I126
= dyn_cast<Instruction>(V)) {
906
15
    Opcode = I->getOpcode();
907
15
    U = I;
908
15
    InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock();
909
126
  } else 
if (const auto *111
C111
= dyn_cast<ConstantExpr>(V)) {
910
3
    Opcode = C->getOpcode();
911
3
    U = C;
912
3
  }
913
126
914
126
  switch (Opcode) {
915
111
  default: break;
916
2
  case Instruction::BitCast:
917
2
    // Look past bitcasts if its operand is in the same BB.
918
2
    if (InMBB)
919
2
      return computeCallAddress(U->getOperand(0), Addr);
920
0
    break;
921
13
  case Instruction::IntToPtr:
922
13
    // Look past no-op inttoptrs if its operand is in the same BB.
923
13
    if (InMBB &&
924
13
        TLI.getValueType(DL, U->getOperand(0)->getType()) ==
925
13
            TLI.getPointerTy(DL))
926
13
      return computeCallAddress(U->getOperand(0), Addr);
927
0
    break;
928
0
  case Instruction::PtrToInt:
929
0
    // Look past no-op ptrtoints if its operand is in the same BB.
930
0
    if (
InMBB && 0
TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL)0
)
931
0
      return computeCallAddress(U->getOperand(0), Addr);
932
0
    break;
933
111
  }
934
111
935
111
  
if (const GlobalValue *111
GV111
= dyn_cast<GlobalValue>(V)) {
936
91
    Addr.setGlobalValue(GV);
937
91
    return true;
938
91
  }
939
20
940
20
  // If all else fails, try to materialize the value in a register.
941
20
  
if (20
!Addr.getGlobalValue()20
) {
942
20
    Addr.setReg(getRegForValue(V));
943
20
    return Addr.getReg() != 0;
944
20
  }
945
0
946
0
  return false;
947
0
}
948
949
3.84k
bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) {
950
3.84k
  EVT evt = TLI.getValueType(DL, Ty, true);
951
3.84k
952
3.84k
  // Only handle simple types.
953
3.84k
  if (
evt == MVT::Other || 3.84k
!evt.isSimple()3.83k
)
954
11
    return false;
955
3.83k
  VT = evt.getSimpleVT();
956
3.83k
957
3.83k
  // This is a legal type, but it's not something we handle in fast-isel.
958
3.83k
  if (VT == MVT::f128)
959
26
    return false;
960
3.80k
961
3.80k
  // Handle all other legal types, i.e. a register that will directly hold this
962
3.80k
  // value.
963
3.80k
  return TLI.isTypeLegal(VT);
964
3.80k
}
965
966
/// \brief Determine if the value type is supported by FastISel.
967
///
968
/// FastISel for AArch64 can handle more value types than are legal. This adds
969
/// simple value type such as i1, i8, and i16.
970
2.09k
bool AArch64FastISel::isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed) {
971
2.09k
  if (
Ty->isVectorTy() && 2.09k
!IsVectorAllowed293
)
972
6
    return false;
973
2.08k
974
2.08k
  
if (2.08k
isTypeLegal(Ty, VT)2.08k
)
975
1.57k
    return true;
976
512
977
512
  // If this is a type than can be sign or zero-extended to a basic operation
978
512
  // go ahead and accept it now.
979
512
  
if (512
VT == MVT::i1 || 512
VT == MVT::i8419
||
VT == MVT::i16211
)
980
490
    return true;
981
22
982
22
  return false;
983
22
}
984
985
1.31k
bool AArch64FastISel::isValueAvailable(const Value *V) const {
986
1.31k
  if (!isa<Instruction>(V))
987
505
    return true;
988
808
989
808
  const auto *I = cast<Instruction>(V);
990
808
  return FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB;
991
808
}
992
993
927
bool AArch64FastISel::simplifyAddress(Address &Addr, MVT VT) {
994
927
  unsigned ScaleFactor = getImplicitScaleFactor(VT);
995
927
  if (!ScaleFactor)
996
216
    return false;
997
711
998
711
  bool ImmediateOffsetNeedsLowering = false;
999
711
  bool RegisterOffsetNeedsLowering = false;
1000
711
  int64_t Offset = Addr.getOffset();
1001
711
  if (
((Offset < 0) || 711
(Offset & (ScaleFactor - 1))681
) &&
!isInt<9>(Offset)36
)
1002
4
    ImmediateOffsetNeedsLowering = true;
1003
707
  else 
if (707
Offset > 0 && 707
!(Offset & (ScaleFactor - 1))112
&&
1004
108
           !isUInt<12>(Offset / ScaleFactor))
1005
9
    ImmediateOffsetNeedsLowering = true;
1006
711
1007
711
  // Cannot encode an offset register and an immediate offset in the same
1008
711
  // instruction. Fold the immediate offset into the load/store instruction and
1009
711
  // emit an additional add to take care of the offset register.
1010
711
  if (
!ImmediateOffsetNeedsLowering && 711
Addr.getOffset()698
&&
Addr.getOffsetReg()131
)
1011
5
    RegisterOffsetNeedsLowering = true;
1012
711
1013
711
  // Cannot encode zero register as base.
1014
711
  if (
Addr.isRegBase() && 711
Addr.getOffsetReg()467
&&
!Addr.getReg()89
)
1015
3
    RegisterOffsetNeedsLowering = true;
1016
711
1017
711
  // If this is a stack pointer and the offset needs to be simplified then put
1018
711
  // the alloca address into a register, set the base type back to register and
1019
711
  // continue. This should almost never happen.
1020
711
  if (
(ImmediateOffsetNeedsLowering || 711
Addr.getOffsetReg()698
) &&
Addr.isFIBase()103
)
1021
3
  {
1022
3
    unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass);
1023
3
    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
1024
3
            ResultReg)
1025
3
      .addFrameIndex(Addr.getFI())
1026
3
      .addImm(0)
1027
3
      .addImm(0);
1028
3
    Addr.setKind(Address::RegBase);
1029
3
    Addr.setReg(ResultReg);
1030
3
  }
1031
711
1032
711
  if (
RegisterOffsetNeedsLowering711
) {
1033
7
    unsigned ResultReg = 0;
1034
7
    if (
Addr.getReg()7
) {
1035
4
      if (Addr.getExtendType() == AArch64_AM::SXTW ||
1036
3
          Addr.getExtendType() == AArch64_AM::UXTW   )
1037
1
        ResultReg = emitAddSub_rx(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
1038
1
                                  /*TODO:IsKill=*/false, Addr.getOffsetReg(),
1039
1
                                  /*TODO:IsKill=*/false, Addr.getExtendType(),
1040
1
                                  Addr.getShift());
1041
4
      else
1042
3
        ResultReg = emitAddSub_rs(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
1043
3
                                  /*TODO:IsKill=*/false, Addr.getOffsetReg(),
1044
3
                                  /*TODO:IsKill=*/false, AArch64_AM::LSL,
1045
3
                                  Addr.getShift());
1046
7
    } else {
1047
3
      if (Addr.getExtendType() == AArch64_AM::UXTW)
1048
0
        ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
1049
0
                               /*Op0IsKill=*/false, Addr.getShift(),
1050
0
                               /*IsZExt=*/true);
1051
3
      else 
if (3
Addr.getExtendType() == AArch64_AM::SXTW3
)
1052
1
        ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
1053
1
                               /*Op0IsKill=*/false, Addr.getShift(),
1054
1
                               /*IsZExt=*/false);
1055
3
      else
1056
2
        ResultReg = emitLSL_ri(MVT::i64, MVT::i64, Addr.getOffsetReg(),
1057
2
                               /*Op0IsKill=*/false, Addr.getShift());
1058
3
    }
1059
7
    if (!ResultReg)
1060
0
      return false;
1061
7
1062
7
    Addr.setReg(ResultReg);
1063
7
    Addr.setOffsetReg(0);
1064
7
    Addr.setShift(0);
1065
7
    Addr.setExtendType(AArch64_AM::InvalidShiftExtend);
1066
7
  }
1067
711
1068
711
  // Since the offset is too large for the load/store instruction get the
1069
711
  // reg+offset into a register.
1070
711
  
if (711
ImmediateOffsetNeedsLowering711
) {
1071
13
    unsigned ResultReg;
1072
13
    if (Addr.getReg())
1073
13
      // Try to fold the immediate into the add instruction.
1074
13
      ResultReg = emitAdd_ri_(MVT::i64, Addr.getReg(), /*IsKill=*/false, Offset);
1075
13
    else
1076
0
      ResultReg = fastEmit_i(MVT::i64, MVT::i64, ISD::Constant, Offset);
1077
13
1078
13
    if (!ResultReg)
1079
2
      return false;
1080
11
    Addr.setReg(ResultReg);
1081
11
    Addr.setOffset(0);
1082
11
  }
1083
709
  return true;
1084
927
}
1085
1086
void AArch64FastISel::addLoadStoreOperands(Address &Addr,
1087
                                           const MachineInstrBuilder &MIB,
1088
                                           MachineMemOperand::Flags Flags,
1089
                                           unsigned ScaleFactor,
1090
709
                                           MachineMemOperand *MMO) {
1091
709
  int64_t Offset = Addr.getOffset() / ScaleFactor;
1092
709
  // Frame base works a bit differently. Handle it separately.
1093
709
  if (
Addr.isFIBase()709
) {
1094
241
    int FI = Addr.getFI();
1095
241
    // FIXME: We shouldn't be using getObjectSize/getObjectAlignment.  The size
1096
241
    // and alignment should be based on the VT.
1097
241
    MMO = FuncInfo.MF->getMachineMemOperand(
1098
241
        MachinePointerInfo::getFixedStack(*FuncInfo.MF, FI, Offset), Flags,
1099
241
        MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
1100
241
    // Now add the rest of the operands.
1101
241
    MIB.addFrameIndex(FI).addImm(Offset);
1102
709
  } else {
1103
468
    assert(Addr.isRegBase() && "Unexpected address kind.");
1104
468
    const MCInstrDesc &II = MIB->getDesc();
1105
468
    unsigned Idx = (Flags & MachineMemOperand::MOStore) ? 
1205
:
0263
;
1106
468
    Addr.setReg(
1107
468
      constrainOperandRegClass(II, Addr.getReg(), II.getNumDefs()+Idx));
1108
468
    Addr.setOffsetReg(
1109
468
      constrainOperandRegClass(II, Addr.getOffsetReg(), II.getNumDefs()+Idx+1));
1110
468
    if (
Addr.getOffsetReg()468
) {
1111
84
      assert(Addr.getOffset() == 0 && "Unexpected offset");
1112
84
      bool IsSigned = Addr.getExtendType() == AArch64_AM::SXTW ||
1113
56
                      Addr.getExtendType() == AArch64_AM::SXTX;
1114
84
      MIB.addReg(Addr.getReg());
1115
84
      MIB.addReg(Addr.getOffsetReg());
1116
84
      MIB.addImm(IsSigned);
1117
84
      MIB.addImm(Addr.getShift() != 0);
1118
84
    } else
1119
384
      MIB.addReg(Addr.getReg()).addImm(Offset);
1120
468
  }
1121
709
1122
709
  if (MMO)
1123
649
    MIB.addMemOperand(MMO);
1124
709
}
1125
1126
unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
1127
                                     const Value *RHS, bool SetFlags,
1128
302
                                     bool WantResult,  bool IsZExt) {
1129
302
  AArch64_AM::ShiftExtendType ExtendType = AArch64_AM::InvalidShiftExtend;
1130
302
  bool NeedExtend = false;
1131
302
  switch (RetVT.SimpleTy) {
1132
0
  default:
1133
0
    return 0;
1134
2
  case MVT::i1:
1135
2
    NeedExtend = true;
1136
2
    break;
1137
4
  case MVT::i8:
1138
4
    NeedExtend = true;
1139
4
    ExtendType = IsZExt ? 
AArch64_AM::UXTB0
:
AArch64_AM::SXTB4
;
1140
4
    break;
1141
5
  case MVT::i16:
1142
5
    NeedExtend = true;
1143
5
    ExtendType = IsZExt ? 
AArch64_AM::UXTH2
:
AArch64_AM::SXTH3
;
1144
5
    break;
1145
291
  case MVT::i32:  // fall-through
1146
291
  case MVT::i64:
1147
291
    break;
1148
302
  }
1149
302
  MVT SrcVT = RetVT;
1150
302
  RetVT.SimpleTy = std::max(RetVT.SimpleTy, MVT::i32);
1151
302
1152
302
  // Canonicalize immediates to the RHS first.
1153
302
  if (
UseAdd && 302
isa<Constant>(LHS)217
&&
!isa<Constant>(RHS)1
)
1154
0
    std::swap(LHS, RHS);
1155
302
1156
302
  // Canonicalize mul by power of 2 to the RHS.
1157
302
  if (
UseAdd && 302
LHS->hasOneUse()217
&&
isValueAvailable(LHS)181
)
1158
181
    
if (181
isMulPowOf2(LHS)181
)
1159
0
      std::swap(LHS, RHS);
1160
302
1161
302
  // Canonicalize shift immediate to the RHS.
1162
302
  if (
UseAdd && 302
LHS->hasOneUse()217
&&
isValueAvailable(LHS)181
)
1163
181
    
if (const auto *181
SI181
= dyn_cast<BinaryOperator>(LHS))
1164
79
      
if (79
isa<ConstantInt>(SI->getOperand(1))79
)
1165
4
        
if (4
SI->getOpcode() == Instruction::Shl ||
1166
4
            SI->getOpcode() == Instruction::LShr ||
1167
3
            SI->getOpcode() == Instruction::AShr   )
1168
1
          std::swap(LHS, RHS);
1169
302
1170
302
  unsigned LHSReg = getRegForValue(LHS);
1171
302
  if (!LHSReg)
1172
0
    return 0;
1173
302
  bool LHSIsKill = hasTrivialKill(LHS);
1174
302
1175
302
  if (NeedExtend)
1176
11
    LHSReg = emitIntExt(SrcVT, LHSReg, RetVT, IsZExt);
1177
302
1178
302
  unsigned ResultReg = 0;
1179
302
  if (const auto *
C302
= dyn_cast<ConstantInt>(RHS)) {
1180
43
    uint64_t Imm = IsZExt ? 
C->getZExtValue()1
:
C->getSExtValue()42
;
1181
43
    if (C->isNegative())
1182
6
      ResultReg = emitAddSub_ri(!UseAdd, RetVT, LHSReg, LHSIsKill, -Imm,
1183
6
                                SetFlags, WantResult);
1184
43
    else
1185
37
      ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, LHSIsKill, Imm, SetFlags,
1186
37
                                WantResult);
1187
302
  } else 
if (const auto *259
C259
= dyn_cast<Constant>(RHS))
1188
2
    
if (2
C->isNullValue()2
)
1189
2
      ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, LHSIsKill, 0, SetFlags,
1190
2
                                WantResult);
1191
302
1192
302
  if (ResultReg)
1193
42
    return ResultReg;
1194
260
1195
260
  // Only extend the RHS within the instruction if there is a valid extend type.
1196
260
  
if (260
ExtendType != AArch64_AM::InvalidShiftExtend && 260
RHS->hasOneUse()6
&&
1197
260
      
isValueAvailable(RHS)6
) {
1198
6
    if (const auto *SI = dyn_cast<BinaryOperator>(RHS))
1199
0
      
if (const auto *0
C0
= dyn_cast<ConstantInt>(SI->getOperand(1)))
1200
0
        
if (0
(SI->getOpcode() == Instruction::Shl) && 0
(C->getZExtValue() < 4)0
) {
1201
0
          unsigned RHSReg = getRegForValue(SI->getOperand(0));
1202
0
          if (!RHSReg)
1203
0
            return 0;
1204
0
          bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
1205
0
          return emitAddSub_rx(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg,
1206
0
                               RHSIsKill, ExtendType, C->getZExtValue(),
1207
0
                               SetFlags, WantResult);
1208
0
        }
1209
6
    unsigned RHSReg = getRegForValue(RHS);
1210
6
    if (!RHSReg)
1211
0
      return 0;
1212
6
    bool RHSIsKill = hasTrivialKill(RHS);
1213
6
    return emitAddSub_rx(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
1214
6
                         ExtendType, 0, SetFlags, WantResult);
1215
6
  }
1216
254
1217
254
  // Check if the mul can be folded into the instruction.
1218
254
  
if (254
RHS->hasOneUse() && 254
isValueAvailable(RHS)215
) {
1219
213
    if (
isMulPowOf2(RHS)213
) {
1220
0
      const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
1221
0
      const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
1222
0
1223
0
      if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
1224
0
        
if (0
C->getValue().isPowerOf2()0
)
1225
0
          std::swap(MulLHS, MulRHS);
1226
0
1227
0
      assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1228
0
      uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
1229
0
      unsigned RHSReg = getRegForValue(MulLHS);
1230
0
      if (!RHSReg)
1231
0
        return 0;
1232
0
      bool RHSIsKill = hasTrivialKill(MulLHS);
1233
0
      ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg,
1234
0
                                RHSIsKill, AArch64_AM::LSL, ShiftVal, SetFlags,
1235
0
                                WantResult);
1236
0
      if (ResultReg)
1237
0
        return ResultReg;
1238
254
    }
1239
213
  }
1240
254
1241
254
  // Check if the shift can be folded into the instruction.
1242
254
  
if (254
RHS->hasOneUse() && 254
isValueAvailable(RHS)215
) {
1243
213
    if (const auto *
SI213
= dyn_cast<BinaryOperator>(RHS)) {
1244
52
      if (const auto *
C52
= dyn_cast<ConstantInt>(SI->getOperand(1))) {
1245
12
        AArch64_AM::ShiftExtendType ShiftType = AArch64_AM::InvalidShiftExtend;
1246
12
        switch (SI->getOpcode()) {
1247
0
        default: break;
1248
3
        case Instruction::Shl:  ShiftType = AArch64_AM::LSL; break;
1249
1
        case Instruction::LShr: ShiftType = AArch64_AM::LSR; break;
1250
8
        case Instruction::AShr: ShiftType = AArch64_AM::ASR; break;
1251
12
        }
1252
12
        uint64_t ShiftVal = C->getZExtValue();
1253
12
        if (
ShiftType != AArch64_AM::InvalidShiftExtend12
) {
1254
12
          unsigned RHSReg = getRegForValue(SI->getOperand(0));
1255
12
          if (!RHSReg)
1256
0
            return 0;
1257
12
          bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
1258
12
          ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg,
1259
12
                                    RHSIsKill, ShiftType, ShiftVal, SetFlags,
1260
12
                                    WantResult);
1261
12
          if (ResultReg)
1262
10
            return ResultReg;
1263
244
        }
1264
12
      }
1265
52
    }
1266
213
  }
1267
244
1268
244
  unsigned RHSReg = getRegForValue(RHS);
1269
244
  if (!RHSReg)
1270
0
    return 0;
1271
244
  bool RHSIsKill = hasTrivialKill(RHS);
1272
244
1273
244
  if (NeedExtend)
1274
1
    RHSReg = emitIntExt(SrcVT, RHSReg, RetVT, IsZExt);
1275
302
1276
302
  return emitAddSub_rr(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
1277
302
                       SetFlags, WantResult);
1278
302
}
1279
1280
unsigned AArch64FastISel::emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
1281
                                        bool LHSIsKill, unsigned RHSReg,
1282
                                        bool RHSIsKill, bool SetFlags,
1283
256
                                        bool WantResult) {
1284
256
  assert(LHSReg && RHSReg && "Invalid register number.");
1285
256
1286
256
  if (
LHSReg == AArch64::SP || 256
LHSReg == AArch64::WSP254
||
1287
256
      
RHSReg == AArch64::SP254
||
RHSReg == AArch64::WSP254
)
1288
2
    return 0;
1289
254
1290
254
  
if (254
RetVT != MVT::i32 && 254
RetVT != MVT::i64108
)
1291
0
    return 0;
1292
254
1293
254
  static const unsigned OpcTable[2][2][2] = {
1294
254
    { { AArch64::SUBWrr,  AArch64::SUBXrr  },
1295
254
      { AArch64::ADDWrr,  AArch64::ADDXrr  }  },
1296
254
    { { AArch64::SUBSWrr, AArch64::SUBSXrr },
1297
254
      { AArch64::ADDSWrr, AArch64::ADDSXrr }  }
1298
254
  };
1299
254
  bool Is64Bit = RetVT == MVT::i64;
1300
254
  unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1301
254
  const TargetRegisterClass *RC =
1302
254
      Is64Bit ? 
&AArch64::GPR64RegClass108
:
&AArch64::GPR32RegClass146
;
1303
254
  unsigned ResultReg;
1304
254
  if (WantResult)
1305
208
    ResultReg = createResultReg(RC);
1306
254
  else
1307
46
    
ResultReg = Is64Bit ? 46
AArch64::XZR11
:
AArch64::WZR35
;
1308
256
1309
256
  const MCInstrDesc &II = TII.get(Opc);
1310
256
  LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1311
256
  RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1312
256
  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1313
256
      .addReg(LHSReg, getKillRegState(LHSIsKill))
1314
256
      .addReg(RHSReg, getKillRegState(RHSIsKill));
1315
256
  return ResultReg;
1316
256
}
1317
1318
unsigned AArch64FastISel::emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
1319
                                        bool LHSIsKill, uint64_t Imm,
1320
93
                                        bool SetFlags, bool WantResult) {
1321
93
  assert(LHSReg && "Invalid register number.");
1322
93
1323
93
  if (
RetVT != MVT::i32 && 93
RetVT != MVT::i6454
)
1324
0
    return 0;
1325
93
1326
93
  unsigned ShiftImm;
1327
93
  if (isUInt<12>(Imm))
1328
78
    ShiftImm = 0;
1329
15
  else 
if (15
(Imm & 0xfff000) == Imm15
) {
1330
4
    ShiftImm = 12;
1331
4
    Imm >>= 12;
1332
4
  } else
1333
11
    return 0;
1334
82
1335
82
  static const unsigned OpcTable[2][2][2] = {
1336
82
    { { AArch64::SUBWri,  AArch64::SUBXri  },
1337
82
      { AArch64::ADDWri,  AArch64::ADDXri  }  },
1338
82
    { { AArch64::SUBSWri, AArch64::SUBSXri },
1339
82
      { AArch64::ADDSWri, AArch64::ADDSXri }  }
1340
82
  };
1341
82
  bool Is64Bit = RetVT == MVT::i64;
1342
82
  unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1343
82
  const TargetRegisterClass *RC;
1344
82
  if (SetFlags)
1345
33
    
RC = Is64Bit ? 33
&AArch64::GPR64RegClass12
:
&AArch64::GPR32RegClass21
;
1346
82
  else
1347
49
    
RC = Is64Bit ? 49
&AArch64::GPR64spRegClass32
:
&AArch64::GPR32spRegClass17
;
1348
82
  unsigned ResultReg;
1349
82
  if (WantResult)
1350
54
    ResultReg = createResultReg(RC);
1351
82
  else
1352
28
    
ResultReg = Is64Bit ? 28
AArch64::XZR10
:
AArch64::WZR18
;
1353
93
1354
93
  const MCInstrDesc &II = TII.get(Opc);
1355
93
  LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1356
93
  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1357
93
      .addReg(LHSReg, getKillRegState(LHSIsKill))
1358
93
      .addImm(Imm)
1359
93
      .addImm(getShifterImm(AArch64_AM::LSL, ShiftImm));
1360
93
  return ResultReg;
1361
93
}
1362
1363
unsigned AArch64FastISel::emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
1364
                                        bool LHSIsKill, unsigned RHSReg,
1365
                                        bool RHSIsKill,
1366
                                        AArch64_AM::ShiftExtendType ShiftType,
1367
                                        uint64_t ShiftImm, bool SetFlags,
1368
28
                                        bool WantResult) {
1369
28
  assert(LHSReg && RHSReg && "Invalid register number.");
1370
28
  assert(LHSReg != AArch64::SP && LHSReg != AArch64::WSP &&
1371
28
         RHSReg != AArch64::SP && RHSReg != AArch64::WSP);
1372
28
1373
28
  if (
RetVT != MVT::i32 && 28
RetVT != MVT::i6413
)
1374
0
    return 0;
1375
28
1376
28
  // Don't deal with undefined shifts.
1377
28
  
if (28
ShiftImm >= RetVT.getSizeInBits()28
)
1378
2
    return 0;
1379
26
1380
26
  static const unsigned OpcTable[2][2][2] = {
1381
26
    { { AArch64::SUBWrs,  AArch64::SUBXrs  },
1382
26
      { AArch64::ADDWrs,  AArch64::ADDXrs  }  },
1383
26
    { { AArch64::SUBSWrs, AArch64::SUBSXrs },
1384
26
      { AArch64::ADDSWrs, AArch64::ADDSXrs }  }
1385
26
  };
1386
26
  bool Is64Bit = RetVT == MVT::i64;
1387
26
  unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1388
26
  const TargetRegisterClass *RC =
1389
26
      Is64Bit ? 
&AArch64::GPR64RegClass12
:
&AArch64::GPR32RegClass14
;
1390
26
  unsigned ResultReg;
1391
26
  if (WantResult)
1392
17
    ResultReg = createResultReg(RC);
1393
26
  else
1394
9
    
ResultReg = Is64Bit ? 9
AArch64::XZR6
:
AArch64::WZR3
;
1395
28
1396
28
  const MCInstrDesc &II = TII.get(Opc);
1397
28
  LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1398
28
  RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1399
28
  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1400
28
      .addReg(LHSReg, getKillRegState(LHSIsKill))
1401
28
      .addReg(RHSReg, getKillRegState(RHSIsKill))
1402
28
      .addImm(getShifterImm(ShiftType, ShiftImm));
1403
28
  return ResultReg;
1404
28
}
1405
1406
unsigned AArch64FastISel::emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
1407
                                        bool LHSIsKill, unsigned RHSReg,
1408
                                        bool RHSIsKill,
1409
                                        AArch64_AM::ShiftExtendType ExtType,
1410
                                        uint64_t ShiftImm, bool SetFlags,
1411
7
                                        bool WantResult) {
1412
7
  assert(LHSReg && RHSReg && "Invalid register number.");
1413
7
  assert(LHSReg != AArch64::XZR && LHSReg != AArch64::WZR &&
1414
7
         RHSReg != AArch64::XZR && RHSReg != AArch64::WZR);
1415
7
1416
7
  if (
RetVT != MVT::i32 && 7
RetVT != MVT::i641
)
1417
0
    return 0;
1418
7
1419
7
  
if (7
ShiftImm >= 47
)
1420
0
    return 0;
1421
7
1422
7
  static const unsigned OpcTable[2][2][2] = {
1423
7
    { { AArch64::SUBWrx,  AArch64::SUBXrx  },
1424
7
      { AArch64::ADDWrx,  AArch64::ADDXrx  }  },
1425
7
    { { AArch64::SUBSWrx, AArch64::SUBSXrx },
1426
7
      { AArch64::ADDSWrx, AArch64::ADDSXrx }  }
1427
7
  };
1428
7
  bool Is64Bit = RetVT == MVT::i64;
1429
7
  unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1430
7
  const TargetRegisterClass *RC = nullptr;
1431
7
  if (SetFlags)
1432
5
    
RC = Is64Bit ? 5
&AArch64::GPR64RegClass0
:
&AArch64::GPR32RegClass5
;
1433
7
  else
1434
2
    
RC = Is64Bit ? 2
&AArch64::GPR64spRegClass1
:
&AArch64::GPR32spRegClass1
;
1435
7
  unsigned ResultReg;
1436
7
  if (WantResult)
1437
2
    ResultReg = createResultReg(RC);
1438
7
  else
1439
5
    
ResultReg = Is64Bit ? 5
AArch64::XZR0
:
AArch64::WZR5
;
1440
7
1441
7
  const MCInstrDesc &II = TII.get(Opc);
1442
7
  LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1443
7
  RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1444
7
  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1445
7
      .addReg(LHSReg, getKillRegState(LHSIsKill))
1446
7
      .addReg(RHSReg, getKillRegState(RHSIsKill))
1447
7
      .addImm(getArithExtendImm(ExtType, ShiftImm));
1448
7
  return ResultReg;
1449
7
}
1450
1451
111
bool AArch64FastISel::emitCmp(const Value *LHS, const Value *RHS, bool IsZExt) {
1452
111
  Type *Ty = LHS->getType();
1453
111
  EVT EVT = TLI.getValueType(DL, Ty, true);
1454
111
  if (!EVT.isSimple())
1455
0
    return false;
1456
111
  MVT VT = EVT.getSimpleVT();
1457
111
1458
111
  switch (VT.SimpleTy) {
1459
0
  default:
1460
0
    return false;
1461
64
  case MVT::i1:
1462
64
  case MVT::i8:
1463
64
  case MVT::i16:
1464
64
  case MVT::i32:
1465
64
  case MVT::i64:
1466
64
    return emitICmp(VT, LHS, RHS, IsZExt);
1467
47
  case MVT::f32:
1468
47
  case MVT::f64:
1469
47
    return emitFCmp(VT, LHS, RHS);
1470
0
  }
1471
0
}
1472
1473
bool AArch64FastISel::emitICmp(MVT RetVT, const Value *LHS, const Value *RHS,
1474
64
                               bool IsZExt) {
1475
64
  return emitSub(RetVT, LHS, RHS, /*SetFlags=*/true, /*WantResult=*/false,
1476
64
                 IsZExt) != 0;
1477
64
}
1478
1479
bool AArch64FastISel::emitICmp_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
1480
11
                                  uint64_t Imm) {
1481
11
  return emitAddSub_ri(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, Imm,
1482
11
                       /*SetFlags=*/true, /*WantResult=*/false) != 0;
1483
11
}
1484
1485
47
bool AArch64FastISel::emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS) {
1486
47
  if (
RetVT != MVT::f32 && 47
RetVT != MVT::f642
)
1487
0
    return false;
1488
47
1489
47
  // Check to see if the 2nd operand is a constant that we can encode directly
1490
47
  // in the compare.
1491
47
  bool UseImm = false;
1492
47
  if (const auto *CFP = dyn_cast<ConstantFP>(RHS))
1493
4
    
if (4
CFP->isZero() && 4
!CFP->isNegative()2
)
1494
2
      UseImm = true;
1495
47
1496
47
  unsigned LHSReg = getRegForValue(LHS);
1497
47
  if (!LHSReg)
1498
0
    return false;
1499
47
  bool LHSIsKill = hasTrivialKill(LHS);
1500
47
1501
47
  if (
UseImm47
) {
1502
2
    unsigned Opc = (RetVT == MVT::f64) ? 
AArch64::FCMPDri1
:
AArch64::FCMPSri1
;
1503
2
    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
1504
2
        .addReg(LHSReg, getKillRegState(LHSIsKill));
1505
2
    return true;
1506
2
  }
1507
45
1508
45
  unsigned RHSReg = getRegForValue(RHS);
1509
45
  if (!RHSReg)
1510
0
    return false;
1511
45
  bool RHSIsKill = hasTrivialKill(RHS);
1512
45
1513
45
  unsigned Opc = (RetVT == MVT::f64) ? 
AArch64::FCMPDrr1
:
AArch64::FCMPSrr44
;
1514
47
  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
1515
47
      .addReg(LHSReg, getKillRegState(LHSIsKill))
1516
47
      .addReg(RHSReg, getKillRegState(RHSIsKill));
1517
47
  return true;
1518
47
}
1519
1520
unsigned AArch64FastISel::emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
1521
217
                                  bool SetFlags, bool WantResult, bool IsZExt) {
1522
217
  return emitAddSub(/*UseAdd=*/true, RetVT, LHS, RHS, SetFlags, WantResult,
1523
217
                    IsZExt);
1524
217
}
1525
1526
/// \brief This method is a wrapper to simplify add emission.
1527
///
1528
/// First try to emit an add with an immediate operand using emitAddSub_ri. If
1529
/// that fails, then try to materialize the immediate into a register and use
1530
/// emitAddSub_rr instead.
1531
unsigned AArch64FastISel::emitAdd_ri_(MVT VT, unsigned Op0, bool Op0IsKill,
1532
37
                                      int64_t Imm) {
1533
37
  unsigned ResultReg;
1534
37
  if (Imm < 0)
1535
2
    ResultReg = emitAddSub_ri(false, VT, Op0, Op0IsKill, -Imm);
1536
37
  else
1537
35
    ResultReg = emitAddSub_ri(true, VT, Op0, Op0IsKill, Imm);
1538
37
1539
37
  if (ResultReg)
1540
29
    return ResultReg;
1541
8
1542
8
  unsigned CReg = fastEmit_i(VT, VT, ISD::Constant, Imm);
1543
8
  if (!CReg)
1544
0
    return 0;
1545
8
1546
8
  ResultReg = emitAddSub_rr(true, VT, Op0, Op0IsKill, CReg, true);
1547
8
  return ResultReg;
1548
8
}
1549
1550
unsigned AArch64FastISel::emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
1551
85
                                  bool SetFlags, bool WantResult, bool IsZExt) {
1552
85
  return emitAddSub(/*UseAdd=*/false, RetVT, LHS, RHS, SetFlags, WantResult,
1553
85
                    IsZExt);
1554
85
}
1555
1556
unsigned AArch64FastISel::emitSubs_rr(MVT RetVT, unsigned LHSReg,
1557
                                      bool LHSIsKill, unsigned RHSReg,
1558
4
                                      bool RHSIsKill, bool WantResult) {
1559
4
  return emitAddSub_rr(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, RHSReg,
1560
4
                       RHSIsKill, /*SetFlags=*/true, WantResult);
1561
4
}
1562
1563
unsigned AArch64FastISel::emitSubs_rs(MVT RetVT, unsigned LHSReg,
1564
                                      bool LHSIsKill, unsigned RHSReg,
1565
                                      bool RHSIsKill,
1566
                                      AArch64_AM::ShiftExtendType ShiftType,
1567
9
                                      uint64_t ShiftImm, bool WantResult) {
1568
9
  return emitAddSub_rs(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, RHSReg,
1569
9
                       RHSIsKill, ShiftType, ShiftImm, /*SetFlags=*/true,
1570
9
                       WantResult);
1571
9
}
1572
1573
unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT,
1574
89
                                        const Value *LHS, const Value *RHS) {
1575
89
  // Canonicalize immediates to the RHS first.
1576
89
  if (
isa<ConstantInt>(LHS) && 89
!isa<ConstantInt>(RHS)0
)
1577
0
    std::swap(LHS, RHS);
1578
89
1579
89
  // Canonicalize mul by power-of-2 to the RHS.
1580
89
  if (
LHS->hasOneUse() && 89
isValueAvailable(LHS)89
)
1581
89
    
if (89
isMulPowOf2(LHS)89
)
1582
0
      std::swap(LHS, RHS);
1583
89
1584
89
  // Canonicalize shift immediate to the RHS.
1585
89
  if (
LHS->hasOneUse() && 89
isValueAvailable(LHS)89
)
1586
89
    
if (const auto *89
SI89
= dyn_cast<ShlOperator>(LHS))
1587
0
      
if (0
isa<ConstantInt>(SI->getOperand(1))0
)
1588
0
        std::swap(LHS, RHS);
1589
89
1590
89
  unsigned LHSReg = getRegForValue(LHS);
1591
89
  if (!LHSReg)
1592
0
    return 0;
1593
89
  bool LHSIsKill = hasTrivialKill(LHS);
1594
89
1595
89
  unsigned ResultReg = 0;
1596
89
  if (const auto *
C89
= dyn_cast<ConstantInt>(RHS)) {
1597
18
    uint64_t Imm = C->getZExtValue();
1598
18
    ResultReg = emitLogicalOp_ri(ISDOpc, RetVT, LHSReg, LHSIsKill, Imm);
1599
18
  }
1600
89
  if (ResultReg)
1601
18
    return ResultReg;
1602
71
1603
71
  // Check if the mul can be folded into the instruction.
1604
71
  
if (71
RHS->hasOneUse() && 71
isValueAvailable(RHS)71
) {
1605
71
    if (
isMulPowOf2(RHS)71
) {
1606
6
      const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
1607
6
      const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
1608
6
1609
6
      if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
1610
0
        
if (0
C->getValue().isPowerOf2()0
)
1611
0
          std::swap(MulLHS, MulRHS);
1612
6
1613
6
      assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1614
6
      uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
1615
6
1616
6
      unsigned RHSReg = getRegForValue(MulLHS);
1617
6
      if (!RHSReg)
1618
0
        return 0;
1619
6
      bool RHSIsKill = hasTrivialKill(MulLHS);
1620
6
      ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg,
1621
6
                                   RHSIsKill, ShiftVal);
1622
6
      if (ResultReg)
1623
6
        return ResultReg;
1624
65
    }
1625
71
  }
1626
65
1627
65
  // Check if the shift can be folded into the instruction.
1628
65
  
if (65
RHS->hasOneUse() && 65
isValueAvailable(RHS)65
) {
1629
65
    if (const auto *SI = dyn_cast<ShlOperator>(RHS))
1630
24
      
if (const auto *24
C24
= dyn_cast<ConstantInt>(SI->getOperand(1))) {
1631
24
        uint64_t ShiftVal = C->getZExtValue();
1632
24
        unsigned RHSReg = getRegForValue(SI->getOperand(0));
1633
24
        if (!RHSReg)
1634
0
          return 0;
1635
24
        bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
1636
24
        ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg,
1637
24
                                     RHSIsKill, ShiftVal);
1638
24
        if (ResultReg)
1639
12
          return ResultReg;
1640
53
      }
1641
65
  }
1642
53
1643
53
  unsigned RHSReg = getRegForValue(RHS);
1644
53
  if (!RHSReg)
1645
0
    return 0;
1646
53
  bool RHSIsKill = hasTrivialKill(RHS);
1647
53
1648
53
  MVT VT = std::max(MVT::i32, RetVT.SimpleTy);
1649
53
  ResultReg = fastEmit_rr(VT, VT, ISDOpc, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
1650
53
  if (
RetVT >= MVT::i8 && 53
RetVT <= MVT::i1627
) {
1651
12
    uint64_t Mask = (RetVT == MVT::i8) ? 
0xff6
:
0xffff6
;
1652
12
    ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
1653
12
  }
1654
89
  return ResultReg;
1655
89
}
1656
1657
unsigned AArch64FastISel::emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT,
1658
                                           unsigned LHSReg, bool LHSIsKill,
1659
203
                                           uint64_t Imm) {
1660
203
  static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR),
1661
203
                "ISD nodes are not consecutive!");
1662
203
  static const unsigned OpcTable[3][2] = {
1663
203
    { AArch64::ANDWri, AArch64::ANDXri },
1664
203
    { AArch64::ORRWri, AArch64::ORRXri },
1665
203
    { AArch64::EORWri, AArch64::EORXri }
1666
203
  };
1667
203
  const TargetRegisterClass *RC;
1668
203
  unsigned Opc;
1669
203
  unsigned RegSize;
1670
203
  switch (RetVT.SimpleTy) {
1671
0
  default:
1672
0
    return 0;
1673
197
  case MVT::i1:
1674
197
  case MVT::i8:
1675
197
  case MVT::i16:
1676
197
  case MVT::i32: {
1677
197
    unsigned Idx = ISDOpc - ISD::AND;
1678
197
    Opc = OpcTable[Idx][0];
1679
197
    RC = &AArch64::GPR32spRegClass;
1680
197
    RegSize = 32;
1681
197
    break;
1682
197
  }
1683
6
  case MVT::i64:
1684
6
    Opc = OpcTable[ISDOpc - ISD::AND][1];
1685
6
    RC = &AArch64::GPR64spRegClass;
1686
6
    RegSize = 64;
1687
6
    break;
1688
203
  }
1689
203
1690
203
  
if (203
!AArch64_AM::isLogicalImmediate(Imm, RegSize)203
)
1691
0
    return 0;
1692
203
1693
203
  unsigned ResultReg =
1694
203
      fastEmitInst_ri(Opc, RC, LHSReg, LHSIsKill,
1695
203
                      AArch64_AM::encodeLogicalImmediate(Imm, RegSize));
1696
203
  if (
RetVT >= MVT::i8 && 203
RetVT <= MVT::i16202
&&
ISDOpc != ISD::AND6
) {
1697
4
    uint64_t Mask = (RetVT == MVT::i8) ? 
0xff2
:
0xffff2
;
1698
4
    ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
1699
4
  }
1700
203
  return ResultReg;
1701
203
}
1702
1703
unsigned AArch64FastISel::emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT,
1704
                                           unsigned LHSReg, bool LHSIsKill,
1705
                                           unsigned RHSReg, bool RHSIsKill,
1706
30
                                           uint64_t ShiftImm) {
1707
30
  static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR),
1708
30
                "ISD nodes are not consecutive!");
1709
30
  static const unsigned OpcTable[3][2] = {
1710
30
    { AArch64::ANDWrs, AArch64::ANDXrs },
1711
30
    { AArch64::ORRWrs, AArch64::ORRXrs },
1712
30
    { AArch64::EORWrs, AArch64::EORXrs }
1713
30
  };
1714
30
1715
30
  // Don't deal with undefined shifts.
1716
30
  if (ShiftImm >= RetVT.getSizeInBits())
1717
12
    return 0;
1718
18
1719
18
  const TargetRegisterClass *RC;
1720
18
  unsigned Opc;
1721
18
  switch (RetVT.SimpleTy) {
1722
0
  default:
1723
0
    return 0;
1724
12
  case MVT::i1:
1725
12
  case MVT::i8:
1726
12
  case MVT::i16:
1727
12
  case MVT::i32:
1728
12
    Opc = OpcTable[ISDOpc - ISD::AND][0];
1729
12
    RC = &AArch64::GPR32RegClass;
1730
12
    break;
1731
6
  case MVT::i64:
1732
6
    Opc = OpcTable[ISDOpc - ISD::AND][1];
1733
6
    RC = &AArch64::GPR64RegClass;
1734
6
    break;
1735
18
  }
1736
18
  unsigned ResultReg =
1737
18
      fastEmitInst_rri(Opc, RC, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
1738
18
                       AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftImm));
1739
18
  if (
RetVT >= MVT::i8 && 18
RetVT <= MVT::i1618
) {
1740
6
    uint64_t Mask = (RetVT == MVT::i8) ? 
0xff3
:
0xffff3
;
1741
6
    ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
1742
6
  }
1743
30
  return ResultReg;
1744
30
}
1745
1746
unsigned AArch64FastISel::emitAnd_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
1747
184
                                     uint64_t Imm) {
1748
184
  return emitLogicalOp_ri(ISD::AND, RetVT, LHSReg, LHSIsKill, Imm);
1749
184
}
1750
1751
unsigned AArch64FastISel::emitLoad(MVT VT, MVT RetVT, Address Addr,
1752
422
                                   bool WantZExt, MachineMemOperand *MMO) {
1753
422
  if (!TLI.allowsMisalignedMemoryAccesses(VT))
1754
2
    return 0;
1755
420
1756
420
  // Simplify this down to something we can handle.
1757
420
  
if (420
!simplifyAddress(Addr, VT)420
)
1758
76
    return 0;
1759
344
1760
344
  unsigned ScaleFactor = getImplicitScaleFactor(VT);
1761
344
  if (!ScaleFactor)
1762
0
    llvm_unreachable("Unexpected value type.");
1763
344
1764
344
  // Negative offsets require unscaled, 9-bit, signed immediate offsets.
1765
344
  // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
1766
344
  bool UseScaled = true;
1767
344
  if (
(Addr.getOffset() < 0) || 344
(Addr.getOffset() & (ScaleFactor - 1))319
) {
1768
26
    UseScaled = false;
1769
26
    ScaleFactor = 1;
1770
26
  }
1771
344
1772
344
  static const unsigned GPOpcTable[2][8][4] = {
1773
344
    // Sign-extend.
1774
344
    { { AArch64::LDURSBWi,  AArch64::LDURSHWi,  AArch64::LDURWi,
1775
344
        AArch64::LDURXi  },
1776
344
      { AArch64::LDURSBXi,  AArch64::LDURSHXi,  AArch64::LDURSWi,
1777
344
        AArch64::LDURXi  },
1778
344
      { AArch64::LDRSBWui,  AArch64::LDRSHWui,  AArch64::LDRWui,
1779
344
        AArch64::LDRXui  },
1780
344
      { AArch64::LDRSBXui,  AArch64::LDRSHXui,  AArch64::LDRSWui,
1781
344
        AArch64::LDRXui  },
1782
344
      { AArch64::LDRSBWroX, AArch64::LDRSHWroX, AArch64::LDRWroX,
1783
344
        AArch64::LDRXroX },
1784
344
      { AArch64::LDRSBXroX, AArch64::LDRSHXroX, AArch64::LDRSWroX,
1785
344
        AArch64::LDRXroX },
1786
344
      { AArch64::LDRSBWroW, AArch64::LDRSHWroW, AArch64::LDRWroW,
1787
344
        AArch64::LDRXroW },
1788
344
      { AArch64::LDRSBXroW, AArch64::LDRSHXroW, AArch64::LDRSWroW,
1789
344
        AArch64::LDRXroW }
1790
344
    },
1791
344
    // Zero-extend.
1792
344
    { { AArch64::LDURBBi,   AArch64::LDURHHi,   AArch64::LDURWi,
1793
344
        AArch64::LDURXi  },
1794
344
      { AArch64::LDURBBi,   AArch64::LDURHHi,   AArch64::LDURWi,
1795
344
        AArch64::LDURXi  },
1796
344
      { AArch64::LDRBBui,   AArch64::LDRHHui,   AArch64::LDRWui,
1797
344
        AArch64::LDRXui  },
1798
344
      { AArch64::LDRBBui,   AArch64::LDRHHui,   AArch64::LDRWui,
1799
344
        AArch64::LDRXui  },
1800
344
      { AArch64::LDRBBroX,  AArch64::LDRHHroX,  AArch64::LDRWroX,
1801
344
        AArch64::LDRXroX },
1802
344
      { AArch64::LDRBBroX,  AArch64::LDRHHroX,  AArch64::LDRWroX,
1803
344
        AArch64::LDRXroX },
1804
344
      { AArch64::LDRBBroW,  AArch64::LDRHHroW,  AArch64::LDRWroW,
1805
344
        AArch64::LDRXroW },
1806
344
      { AArch64::LDRBBroW,  AArch64::LDRHHroW,  AArch64::LDRWroW,
1807
344
        AArch64::LDRXroW }
1808
344
    }
1809
344
  };
1810
344
1811
344
  static const unsigned FPOpcTable[4][2] = {
1812
344
    { AArch64::LDURSi,  AArch64::LDURDi  },
1813
344
    { AArch64::LDRSui,  AArch64::LDRDui  },
1814
344
    { AArch64::LDRSroX, AArch64::LDRDroX },
1815
344
    { AArch64::LDRSroW, AArch64::LDRDroW }
1816
344
  };
1817
344
1818
344
  unsigned Opc;
1819
344
  const TargetRegisterClass *RC;
1820
344
  bool UseRegOffset = Addr.isRegBase() && 
!Addr.getOffset()263
&&
Addr.getReg()194
&&
1821
194
                      Addr.getOffsetReg();
1822
344
  unsigned Idx = UseRegOffset ? 
281
:
UseScaled ? 263
1237
:
026
;
1823
344
  if (Addr.getExtendType() == AArch64_AM::UXTW ||
1824
333
      Addr.getExtendType() == AArch64_AM::SXTW)
1825
39
    Idx++;
1826
344
1827
344
  bool IsRet64Bit = RetVT == MVT::i64;
1828
344
  switch (VT.SimpleTy) {
1829
0
  default:
1830
0
    llvm_unreachable("Unexpected value type.");
1831
66
  case MVT::i1: // Intentional fall-through.
1832
66
  case MVT::i8:
1833
66
    Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][0];
1834
13
    RC = (IsRet64Bit && !WantZExt) ?
1835
66
             
&AArch64::GPR64RegClass6
:
&AArch64::GPR32RegClass60
;
1836
66
    break;
1837
44
  case MVT::i16:
1838
44
    Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][1];
1839
12
    RC = (IsRet64Bit && !WantZExt) ?
1840
44
             
&AArch64::GPR64RegClass6
:
&AArch64::GPR32RegClass38
;
1841
44
    break;
1842
112
  case MVT::i32:
1843
112
    Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][2];
1844
27
    RC = (IsRet64Bit && !WantZExt) ?
1845
112
             
&AArch64::GPR64RegClass19
:
&AArch64::GPR32RegClass93
;
1846
112
    break;
1847
101
  case MVT::i64:
1848
101
    Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][3];
1849
101
    RC = &AArch64::GPR64RegClass;
1850
101
    break;
1851
7
  case MVT::f32:
1852
7
    Opc = FPOpcTable[Idx][0];
1853
7
    RC = &AArch64::FPR32RegClass;
1854
7
    break;
1855
14
  case MVT::f64:
1856
14
    Opc = FPOpcTable[Idx][1];
1857
14
    RC = &AArch64::FPR64RegClass;
1858
14
    break;
1859
344
  }
1860
344
1861
344
  // Create the base instruction, then add the operands.
1862
344
  unsigned ResultReg = createResultReg(RC);
1863
344
  MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1864
344
                                    TII.get(Opc), ResultReg);
1865
344
  addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, ScaleFactor, MMO);
1866
344
1867
344
  // Loading an i1 requires special handling.
1868
344
  if (
VT == MVT::i1344
) {
1869
3
    unsigned ANDReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, 1);
1870
3
    assert(ANDReg && "Unexpected AND instruction emission failure.");
1871
3
    ResultReg = ANDReg;
1872
3
  }
1873
344
1874
344
  // For zero-extending loads to 64bit we emit a 32bit load and then convert
1875
344
  // the 32bit reg to a 64bit reg.
1876
344
  if (
WantZExt && 344
RetVT == MVT::i64283
&&
VT <= MVT::i32122
) {
1877
21
    unsigned Reg64 = createResultReg(&AArch64::GPR64RegClass);
1878
21
    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1879
21
            TII.get(AArch64::SUBREG_TO_REG), Reg64)
1880
21
        .addImm(0)
1881
21
        .addReg(ResultReg, getKillRegState(true))
1882
21
        .addImm(AArch64::sub_32);
1883
21
    ResultReg = Reg64;
1884
21
  }
1885
422
  return ResultReg;
1886
422
}
1887
1888
274
bool AArch64FastISel::selectAddSub(const Instruction *I) {
1889
274
  MVT VT;
1890
274
  if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
1891
0
    return false;
1892
274
1893
274
  
if (274
VT.isVector()274
)
1894
71
    return selectOperator(I, I->getOpcode());
1895
203
1896
203
  unsigned ResultReg;
1897
203
  switch (I->getOpcode()) {
1898
0
  default:
1899
0
    llvm_unreachable("Unexpected instruction.");
1900
195
  case Instruction::Add:
1901
195
    ResultReg = emitAdd(VT, I->getOperand(0), I->getOperand(1));
1902
195
    break;
1903
8
  case Instruction::Sub:
1904
8
    ResultReg = emitSub(VT, I->getOperand(0), I->getOperand(1));
1905
8
    break;
1906
203
  }
1907
203
  
if (203
!ResultReg203
)
1908
0
    return false;
1909
203
1910
203
  updateValueMap(I, ResultReg);
1911
203
  return true;
1912
203
}
1913
1914
89
bool AArch64FastISel::selectLogicalOp(const Instruction *I) {
1915
89
  MVT VT;
1916
89
  if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
1917
0
    return false;
1918
89
1919
89
  
if (89
VT.isVector()89
)
1920
0
    return selectOperator(I, I->getOpcode());
1921
89
1922
89
  unsigned ResultReg;
1923
89
  switch (I->getOpcode()) {
1924
0
  default:
1925
0
    llvm_unreachable("Unexpected instruction.");
1926
50
  case Instruction::And:
1927
50
    ResultReg = emitLogicalOp(ISD::AND, VT, I->getOperand(0), I->getOperand(1));
1928
50
    break;
1929
20
  case Instruction::Or:
1930
20
    ResultReg = emitLogicalOp(ISD::OR, VT, I->getOperand(0), I->getOperand(1));
1931
20
    break;
1932
19
  case Instruction::Xor:
1933
19
    ResultReg = emitLogicalOp(ISD::XOR, VT, I->getOperand(0), I->getOperand(1));
1934
19
    break;
1935
89
  }
1936
89
  
if (89
!ResultReg89
)
1937
0
    return false;
1938
89
1939
89
  updateValueMap(I, ResultReg);
1940
89
  return true;
1941
89
}
1942
1943
408
bool AArch64FastISel::selectLoad(const Instruction *I) {
1944
408
  MVT VT;
1945
408
  // Verify we have a legal type before going any further.  Currently, we handle
1946
408
  // simple types that will directly fit in a register (i32/f32/i64/f64) or
1947
408
  // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
1948
408
  if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true) ||
1949
403
      cast<LoadInst>(I)->isAtomic())
1950
7
    return false;
1951
401
1952
401
  const Value *SV = I->getOperand(0);
1953
401
  if (
TLI.supportSwiftError()401
) {
1954
401
    // Swifterror values can come from either a function parameter with
1955
401
    // swifterror attribute or an alloca with swifterror attribute.
1956
401
    if (const Argument *
Arg401
= dyn_cast<Argument>(SV)) {
1957
103
      if (Arg->hasSwiftErrorAttr())
1958
0
        return false;
1959
401
    }
1960
401
1961
401
    
if (const AllocaInst *401
Alloca401
= dyn_cast<AllocaInst>(SV)) {
1962
86
      if (Alloca->isSwiftError())
1963
5
        return false;
1964
396
    }
1965
401
  }
1966
396
1967
396
  // See if we can handle this address.
1968
396
  Address Addr;
1969
396
  if (!computeAddress(I->getOperand(0), Addr, I->getType()))
1970
15
    return false;
1971
381
1972
381
  // Fold the following sign-/zero-extend into the load instruction.
1973
381
  bool WantZExt = true;
1974
381
  MVT RetVT = VT;
1975
381
  const Value *IntExtVal = nullptr;
1976
381
  if (
I->hasOneUse()381
) {
1977
287
    if (const auto *
ZE287
= dyn_cast<ZExtInst>(I->use_begin()->getUser())) {
1978
40
      if (isTypeSupported(ZE->getType(), RetVT))
1979
40
        IntExtVal = ZE;
1980
40
      else
1981
0
        RetVT = VT;
1982
287
    } else 
if (const auto *247
SE247
= dyn_cast<SExtInst>(I->use_begin()->getUser())) {
1983
61
      if (isTypeSupported(SE->getType(), RetVT))
1984
61
        IntExtVal = SE;
1985
61
      else
1986
0
        RetVT = VT;
1987
247
      WantZExt = false;
1988
247
    }
1989
287
  }
1990
381
1991
381
  unsigned ResultReg =
1992
381
      emitLoad(VT, RetVT, Addr, WantZExt, createMachineMemOperandFor(I));
1993
381
  if (!ResultReg)
1994
78
    return false;
1995
303
1996
303
  // There are a few different cases we have to handle, because the load or the
1997
303
  // sign-/zero-extend might not be selected by FastISel if we fall-back to
1998
303
  // SelectionDAG. There is also an ordering issue when both instructions are in
1999
303
  // different basic blocks.
2000
303
  // 1.) The load instruction is selected by FastISel, but the integer extend
2001
303
  //     not. This usually happens when the integer extend is in a different
2002
303
  //     basic block and SelectionDAG took over for that basic block.
2003
303
  // 2.) The load instruction is selected before the integer extend. This only
2004
303
  //     happens when the integer extend is in a different basic block.
2005
303
  // 3.) The load instruction is selected by SelectionDAG and the integer extend
2006
303
  //     by FastISel. This happens if there are instructions between the load
2007
303
  //     and the integer extend that couldn't be selected by FastISel.
2008
303
  
if (303
IntExtVal303
) {
2009
101
    // The integer extend hasn't been emitted yet. FastISel or SelectionDAG
2010
101
    // could select it. Emit a copy to subreg if necessary. FastISel will remove
2011
101
    // it when it selects the integer extend.
2012
101
    unsigned Reg = lookUpRegForValue(IntExtVal);
2013
101
    auto *MI = MRI.getUniqueVRegDef(Reg);
2014
101
    if (
!MI101
) {
2015
1
      if (
RetVT == MVT::i64 && 1
VT <= MVT::i321
) {
2016
1
        if (
WantZExt1
) {
2017
1
          // Delete the last emitted instruction from emitLoad (SUBREG_TO_REG).
2018
1
          std::prev(FuncInfo.InsertPt)->eraseFromParent();
2019
1
          ResultReg = std::prev(FuncInfo.InsertPt)->getOperand(0).getReg();
2020
1
        } else
2021
0
          ResultReg = fastEmitInst_extractsubreg(MVT::i32, ResultReg,
2022
0
                                                 /*IsKill=*/true,
2023
0
                                                 AArch64::sub_32);
2024
1
      }
2025
1
      updateValueMap(I, ResultReg);
2026
1
      return true;
2027
1
    }
2028
100
2029
100
    // The integer extend has already been emitted - delete all the instructions
2030
100
    // that have been emitted by the integer extend lowering code and use the
2031
100
    // result from the load instruction directly.
2032
251
    
while (100
MI251
) {
2033
151
      Reg = 0;
2034
202
      for (auto &Opnd : MI->uses()) {
2035
202
        if (
Opnd.isReg()202
) {
2036
151
          Reg = Opnd.getReg();
2037
151
          break;
2038
151
        }
2039
151
      }
2040
151
      MI->eraseFromParent();
2041
151
      MI = nullptr;
2042
151
      if (Reg)
2043
151
        MI = MRI.getUniqueVRegDef(Reg);
2044
151
    }
2045
101
    updateValueMap(IntExtVal, ResultReg);
2046
101
    return true;
2047
101
  }
2048
202
2049
202
  updateValueMap(I, ResultReg);
2050
202
  return true;
2051
202
}
2052
2053
bool AArch64FastISel::emitStoreRelease(MVT VT, unsigned SrcReg,
2054
                                       unsigned AddrReg,
2055
18
                                       MachineMemOperand *MMO) {
2056
18
  unsigned Opc;
2057
18
  switch (VT.SimpleTy) {
2058
0
  default: return false;
2059
4
  case MVT::i8:  Opc = AArch64::STLRB; break;
2060
4
  case MVT::i16: Opc = AArch64::STLRH; break;
2061
6
  case MVT::i32: Opc = AArch64::STLRW; break;
2062
4
  case MVT::i64: Opc = AArch64::STLRX; break;
2063
18
  }
2064
18
2065
18
  const MCInstrDesc &II = TII.get(Opc);
2066
18
  SrcReg = constrainOperandRegClass(II, SrcReg, 0);
2067
18
  AddrReg = constrainOperandRegClass(II, AddrReg, 1);
2068
18
  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
2069
18
      .addReg(SrcReg)
2070
18
      .addReg(AddrReg)
2071
18
      .addMemOperand(MMO);
2072
18
  return true;
2073
18
}
2074
2075
bool AArch64FastISel::emitStore(MVT VT, unsigned SrcReg, Address Addr,
2076
507
                                MachineMemOperand *MMO) {
2077
507
  if (!TLI.allowsMisalignedMemoryAccesses(VT))
2078
0
    return false;
2079
507
2080
507
  // Simplify this down to something we can handle.
2081
507
  
if (507
!simplifyAddress(Addr, VT)507
)
2082
142
    return false;
2083
365
2084
365
  unsigned ScaleFactor = getImplicitScaleFactor(VT);
2085
365
  if (!ScaleFactor)
2086
0
    llvm_unreachable("Unexpected value type.");
2087
365
2088
365
  // Negative offsets require unscaled, 9-bit, signed immediate offsets.
2089
365
  // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
2090
365
  bool UseScaled = true;
2091
365
  if (
(Addr.getOffset() < 0) || 365
(Addr.getOffset() & (ScaleFactor - 1))362
) {
2092
6
    UseScaled = false;
2093
6
    ScaleFactor = 1;
2094
6
  }
2095
365
2096
365
  static const unsigned OpcTable[4][6] = {
2097
365
    { AArch64::STURBBi,  AArch64::STURHHi,  AArch64::STURWi,  AArch64::STURXi,
2098
365
      AArch64::STURSi,   AArch64::STURDi },
2099
365
    { AArch64::STRBBui,  AArch64::STRHHui,  AArch64::STRWui,  AArch64::STRXui,
2100
365
      AArch64::STRSui,   AArch64::STRDui },
2101
365
    { AArch64::STRBBroX, AArch64::STRHHroX, AArch64::STRWroX, AArch64::STRXroX,
2102
365
      AArch64::STRSroX,  AArch64::STRDroX },
2103
365
    { AArch64::STRBBroW, AArch64::STRHHroW, AArch64::STRWroW, AArch64::STRXroW,
2104
365
      AArch64::STRSroW,  AArch64::STRDroW }
2105
365
  };
2106
365
2107
365
  unsigned Opc;
2108
365
  bool VTIsi1 = false;
2109
365
  bool UseRegOffset = Addr.isRegBase() && 
!Addr.getOffset()205
&&
Addr.getReg()161
&&
2110
161
                      Addr.getOffsetReg();
2111
365
  unsigned Idx = UseRegOffset ? 
23
:
UseScaled ? 362
1356
:
06
;
2112
365
  if (Addr.getExtendType() == AArch64_AM::UXTW ||
2113
365
      Addr.getExtendType() == AArch64_AM::SXTW)
2114
0
    Idx++;
2115
365
2116
365
  switch (VT.SimpleTy) {
2117
0
  
default: 0
llvm_unreachable0
("Unexpected value type.");
2118
8
  
case MVT::i1: VTIsi1 = true; 8
LLVM_FALLTHROUGH8
;
2119
44
  case MVT::i8:  Opc = OpcTable[Idx][0]; break;
2120
21
  case MVT::i16: Opc = OpcTable[Idx][1]; break;
2121
139
  case MVT::i32: Opc = OpcTable[Idx][2]; break;
2122
125
  case MVT::i64: Opc = OpcTable[Idx][3]; break;
2123
11
  case MVT::f32: Opc = OpcTable[Idx][4]; break;
2124
25
  case MVT::f64: Opc = OpcTable[Idx][5]; break;
2125
365
  }
2126
365
2127
365
  // Storing an i1 requires special handling.
2128
365
  
if (365
VTIsi1 && 365
SrcReg != AArch64::WZR8
) {
2129
7
    unsigned ANDReg = emitAnd_ri(MVT::i32, SrcReg, /*TODO:IsKill=*/false, 1);
2130
7
    assert(ANDReg && "Unexpected AND instruction emission failure.");
2131
7
    SrcReg = ANDReg;
2132
7
  }
2133
507
  // Create the base instruction, then add the operands.
2134
507
  const MCInstrDesc &II = TII.get(Opc);
2135
507
  SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs());
2136
507
  MachineInstrBuilder MIB =
2137
507
      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(SrcReg);
2138
507
  addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, ScaleFactor, MMO);
2139
507
2140
507
  return true;
2141
507
}
2142
2143
472
bool AArch64FastISel::selectStore(const Instruction *I) {
2144
472
  MVT VT;
2145
472
  const Value *Op0 = I->getOperand(0);
2146
472
  // Verify we have a legal type before going any further.  Currently, we handle
2147
472
  // simple types that will directly fit in a register (i32/f32/i64/f64) or
2148
472
  // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
2149
472
  if (!isTypeSupported(Op0->getType(), VT, /*IsVectorAllowed=*/true))
2150
16
    return false;
2151
456
2152
456
  const Value *PtrV = I->getOperand(1);
2153
456
  if (
TLI.supportSwiftError()456
) {
2154
456
    // Swifterror values can come from either a function parameter with
2155
456
    // swifterror attribute or an alloca with swifterror attribute.
2156
456
    if (const Argument *
Arg456
= dyn_cast<Argument>(PtrV)) {
2157
216
      if (Arg->hasSwiftErrorAttr())
2158
1
        return false;
2159
455
    }
2160
455
2161
455
    
if (const AllocaInst *455
Alloca455
= dyn_cast<AllocaInst>(PtrV)) {
2162
144
      if (Alloca->isSwiftError())
2163
1
        return false;
2164
454
    }
2165
456
  }
2166
454
2167
454
  // Get the value to be stored into a register. Use the zero register directly
2168
454
  // when possible to avoid an unnecessary copy and a wasted register.
2169
454
  unsigned SrcReg = 0;
2170
454
  if (const auto *
CI454
= dyn_cast<ConstantInt>(Op0)) {
2171
79
    if (CI->isZero())
2172
57
      
SrcReg = (VT == MVT::i64) ? 57
AArch64::XZR3
:
AArch64::WZR54
;
2173
454
  } else 
if (const auto *375
CF375
= dyn_cast<ConstantFP>(Op0)) {
2174
2
    if (
CF->isZero() && 2
!CF->isNegative()2
) {
2175
2
      VT = MVT::getIntegerVT(VT.getSizeInBits());
2176
2
      SrcReg = (VT == MVT::i64) ? 
AArch64::XZR1
:
AArch64::WZR1
;
2177
2
    }
2178
375
  }
2179
454
2180
454
  if (!SrcReg)
2181
395
    SrcReg = getRegForValue(Op0);
2182
454
2183
454
  if (!SrcReg)
2184
0
    return false;
2185
454
2186
454
  auto *SI = cast<StoreInst>(I);
2187
454
2188
454
  // Try to emit a STLR for seq_cst/release.
2189
454
  if (
SI->isAtomic()454
) {
2190
26
    AtomicOrdering Ord = SI->getOrdering();
2191
26
    // The non-atomic instructions are sufficient for relaxed stores.
2192
26
    if (
isReleaseOrStronger(Ord)26
) {
2193
18
      // The STLR addressing mode only supports a base reg; pass that directly.
2194
18
      unsigned AddrReg = getRegForValue(PtrV);
2195
18
      return emitStoreRelease(VT, SrcReg, AddrReg,
2196
18
                              createMachineMemOperandFor(I));
2197
18
    }
2198
436
  }
2199
436
2200
436
  // See if we can handle this address.
2201
436
  Address Addr;
2202
436
  if (!computeAddress(PtrV, Addr, Op0->getType()))
2203
2
    return false;
2204
434
2205
434
  
if (434
!emitStore(VT, SrcReg, Addr, createMachineMemOperandFor(I))434
)
2206
140
    return false;
2207
294
  return true;
2208
294
}
2209
2210
109
static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred) {
2211
109
  switch (Pred) {
2212
4
  case CmpInst::FCMP_ONE:
2213
4
  case CmpInst::FCMP_UEQ:
2214
4
  default:
2215
4
    // AL is our "false" for now. The other two need more compares.
2216
4
    return AArch64CC::AL;
2217
13
  case CmpInst::ICMP_EQ:
2218
13
  case CmpInst::FCMP_OEQ:
2219
13
    return AArch64CC::EQ;
2220
9
  case CmpInst::ICMP_SGT:
2221
9
  case CmpInst::FCMP_OGT:
2222
9
    return AArch64CC::GT;
2223
8
  case CmpInst::ICMP_SGE:
2224
8
  case CmpInst::FCMP_OGE:
2225
8
    return AArch64CC::GE;
2226
6
  case CmpInst::ICMP_UGT:
2227
6
  case CmpInst::FCMP_UGT:
2228
6
    return AArch64CC::HI;
2229
3
  case CmpInst::FCMP_OLT:
2230
3
    return AArch64CC::MI;
2231
6
  case CmpInst::ICMP_ULE:
2232
6
  case CmpInst::FCMP_OLE:
2233
6
    return AArch64CC::LS;
2234
3
  case CmpInst::FCMP_ORD:
2235
3
    return AArch64CC::VC;
2236
3
  case CmpInst::FCMP_UNO:
2237
3
    return AArch64CC::VS;
2238
3
  case CmpInst::FCMP_UGE:
2239
3
    return AArch64CC::PL;
2240
10
  case CmpInst::ICMP_SLT:
2241
10
  case CmpInst::FCMP_ULT:
2242
10
    return AArch64CC::LT;
2243
10
  case CmpInst::ICMP_SLE:
2244
10
  case CmpInst::FCMP_ULE:
2245
10
    return AArch64CC::LE;
2246
18
  case CmpInst::FCMP_UNE:
2247
18
  case CmpInst::ICMP_NE:
2248
18
    return AArch64CC::NE;
2249
7
  case CmpInst::ICMP_UGE:
2250
7
    return AArch64CC::HS;
2251
6
  case CmpInst::ICMP_ULT:
2252
6
    return AArch64CC::LO;
2253
0
  }
2254
0
}
2255
2256
/// \brief Try to emit a combined compare-and-branch instruction.
2257
93
bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) {
2258
93
  assert(isa<CmpInst>(BI->getCondition()) && "Expected cmp instruction");
2259
93
  const CmpInst *CI = cast<CmpInst>(BI->getCondition());
2260
93
  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2261
93
2262
93
  const Value *LHS = CI->getOperand(0);
2263
93
  const Value *RHS = CI->getOperand(1);
2264
93
2265
93
  MVT VT;
2266
93
  if (!isTypeSupported(LHS->getType(), VT))
2267
0
    return false;
2268
93
2269
93
  unsigned BW = VT.getSizeInBits();
2270
93
  if (BW > 64)
2271
0
    return false;
2272
93
2273
93
  MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
2274
93
  MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
2275
93
2276
93
  // Try to take advantage of fallthrough opportunities.
2277
93
  if (
FuncInfo.MBB->isLayoutSuccessor(TBB)93
) {
2278
54
    std::swap(TBB, FBB);
2279
54
    Predicate = CmpInst::getInversePredicate(Predicate);
2280
54
  }
2281
93
2282
93
  int TestBit = -1;
2283
93
  bool IsCmpNE;
2284
93
  switch (Predicate) {
2285
24
  default:
2286
24
    return false;
2287
44
  case CmpInst::ICMP_EQ:
2288
44
  case CmpInst::ICMP_NE:
2289
44
    if (
isa<Constant>(LHS) && 44
cast<Constant>(LHS)->isNullValue()0
)
2290
0
      std::swap(LHS, RHS);
2291
44
2292
44
    if (
!isa<Constant>(RHS) || 44
!cast<Constant>(RHS)->isNullValue()41
)
2293
3
      return false;
2294
41
2295
41
    
if (const auto *41
AI41
= dyn_cast<BinaryOperator>(LHS))
2296
13
      
if (13
AI->getOpcode() == Instruction::And && 13
isValueAvailable(AI)13
) {
2297
12
        const Value *AndLHS = AI->getOperand(0);
2298
12
        const Value *AndRHS = AI->getOperand(1);
2299
12
2300
12
        if (const auto *C = dyn_cast<ConstantInt>(AndLHS))
2301
0
          
if (0
C->getValue().isPowerOf2()0
)
2302
0
            std::swap(AndLHS, AndRHS);
2303
12
2304
12
        if (const auto *C = dyn_cast<ConstantInt>(AndRHS))
2305
12
          
if (12
C->getValue().isPowerOf2()12
) {
2306
12
            TestBit = C->getValue().logBase2();
2307
12
            LHS = AndLHS;
2308
12
          }
2309
13
      }
2310
41
2311
41
    if (VT == MVT::i1)
2312
1
      TestBit = 0;
2313
41
2314
41
    IsCmpNE = Predicate == CmpInst::ICMP_NE;
2315
41
    break;
2316
14
  case CmpInst::ICMP_SLT:
2317
14
  case CmpInst::ICMP_SGE:
2318
14
    if (
!isa<Constant>(RHS) || 14
!cast<Constant>(RHS)->isNullValue()8
)
2319
7
      return false;
2320
7
2321
7
    TestBit = BW - 1;
2322
7
    IsCmpNE = Predicate == CmpInst::ICMP_SLT;
2323
7
    break;
2324
11
  case CmpInst::ICMP_SGT:
2325
11
  case CmpInst::ICMP_SLE:
2326
11
    if (!isa<ConstantInt>(RHS))
2327
2
      return false;
2328
9
2329
9
    
if (9
cast<ConstantInt>(RHS)->getValue() != APInt(BW, -1, true)9
)
2330
1
      return false;
2331
8
2332
8
    TestBit = BW - 1;
2333
8
    IsCmpNE = Predicate == CmpInst::ICMP_SLE;
2334
8
    break;
2335
56
  } // end switch
2336
56
2337
56
  static const unsigned OpcTable[2][2][2] = {
2338
56
    { {AArch64::CBZW,  AArch64::CBZX },
2339
56
      {AArch64::CBNZW, AArch64::CBNZX} },
2340
56
    { {AArch64::TBZW,  AArch64::TBZX },
2341
56
      {AArch64::TBNZW, AArch64::TBNZX} }
2342
56
  };
2343
56
2344
56
  bool IsBitTest = TestBit != -1;
2345
56
  bool Is64Bit = BW == 64;
2346
56
  if (
TestBit < 32 && 56
TestBit >= 051
)
2347
23
    Is64Bit = false;
2348
56
2349
56
  unsigned Opc = OpcTable[IsBitTest][IsCmpNE][Is64Bit];
2350
56
  const MCInstrDesc &II = TII.get(Opc);
2351
56
2352
56
  unsigned SrcReg = getRegForValue(LHS);
2353
56
  if (!SrcReg)
2354
0
    return false;
2355
56
  bool SrcIsKill = hasTrivialKill(LHS);
2356
56
2357
56
  if (
BW == 64 && 56
!Is64Bit13
)
2358
2
    SrcReg = fastEmitInst_extractsubreg(MVT::i32, SrcReg, SrcIsKill,
2359
2
                                        AArch64::sub_32);
2360
56
2361
56
  if (
(BW < 32) && 56
!IsBitTest21
)
2362
8
    SrcReg = emitIntExt(VT, SrcReg, MVT::i32, /*IsZExt=*/true);
2363
56
2364
56
  // Emit the combined compare and branch instruction.
2365
56
  SrcReg = constrainOperandRegClass(II, SrcReg,  II.getNumDefs());
2366
56
  MachineInstrBuilder MIB =
2367
56
      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
2368
56
          .addReg(SrcReg, getKillRegState(SrcIsKill));
2369
56
  if (IsBitTest)
2370
28
    MIB.addImm(TestBit);
2371
93
  MIB.addMBB(TBB);
2372
93
2373
93
  finishCondBranch(BI->getParent(), TBB, FBB);
2374
93
  return true;
2375
93
}
2376
2377
300
bool AArch64FastISel::selectBranch(const Instruction *I) {
2378
300
  const BranchInst *BI = cast<BranchInst>(I);
2379
300
  if (
BI->isUnconditional()300
) {
2380
168
    MachineBasicBlock *MSucc = FuncInfo.MBBMap[BI->getSuccessor(0)];
2381
168
    fastEmitBranch(MSucc, BI->getDebugLoc());
2382
168
    return true;
2383
168
  }
2384
132
2385
132
  MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
2386
132
  MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
2387
132
2388
132
  if (const CmpInst *
CI132
= dyn_cast<CmpInst>(BI->getCondition())) {
2389
94
    if (
CI->hasOneUse() && 94
isValueAvailable(CI)93
) {
2390
93
      // Try to optimize or fold the cmp.
2391
93
      CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2392
93
      switch (Predicate) {
2393
93
      default:
2394
93
        break;
2395
0
      case CmpInst::FCMP_FALSE:
2396
0
        fastEmitBranch(FBB, DbgLoc);
2397
0
        return true;
2398
0
      case CmpInst::FCMP_TRUE:
2399
0
        fastEmitBranch(TBB, DbgLoc);
2400
0
        return true;
2401
93
      }
2402
93
2403
93
      // Try to emit a combined compare-and-branch first.
2404
93
      
if (93
emitCompareAndBranch(BI)93
)
2405
56
        return true;
2406
37
2407
37
      // Try to take advantage of fallthrough opportunities.
2408
37
      
if (37
FuncInfo.MBB->isLayoutSuccessor(TBB)37
) {
2409
10
        std::swap(TBB, FBB);
2410
10
        Predicate = CmpInst::getInversePredicate(Predicate);
2411
10
      }
2412
37
2413
37
      // Emit the cmp.
2414
37
      if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
2415
0
        return false;
2416
37
2417
37
      // FCMP_UEQ and FCMP_ONE cannot be checked with a single branch
2418
37
      // instruction.
2419
37
      AArch64CC::CondCode CC = getCompareCC(Predicate);
2420
37
      AArch64CC::CondCode ExtraCC = AArch64CC::AL;
2421
37
      switch (Predicate) {
2422
35
      default:
2423
35
        break;
2424
1
      case CmpInst::FCMP_UEQ:
2425
1
        ExtraCC = AArch64CC::EQ;
2426
1
        CC = AArch64CC::VS;
2427
1
        break;
2428
1
      case CmpInst::FCMP_ONE:
2429
1
        ExtraCC = AArch64CC::MI;
2430
1
        CC = AArch64CC::GT;
2431
1
        break;
2432
37
      }
2433
0
      assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2434
37
2435
37
      // Emit the extra branch for FCMP_UEQ and FCMP_ONE.
2436
37
      if (
ExtraCC != AArch64CC::AL37
) {
2437
2
        BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2438
2
            .addImm(ExtraCC)
2439
2
            .addMBB(TBB);
2440
2
      }
2441
93
2442
93
      // Emit the branch.
2443
93
      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2444
93
          .addImm(CC)
2445
93
          .addMBB(TBB);
2446
93
2447
93
      finishCondBranch(BI->getParent(), TBB, FBB);
2448
93
      return true;
2449
93
    }
2450
38
  } else 
if (const auto *38
CI38
= dyn_cast<ConstantInt>(BI->getCondition())) {
2451
2
    uint64_t Imm = CI->getZExtValue();
2452
2
    MachineBasicBlock *Target = (Imm == 0) ? 
FBB1
:
TBB1
;
2453
2
    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::B))
2454
2
        .addMBB(Target);
2455
2
2456
2
    // Obtain the branch probability and add the target to the successor list.
2457
2
    if (
FuncInfo.BPI2
) {
2458
0
      auto BranchProbability = FuncInfo.BPI->getEdgeProbability(
2459
0
          BI->getParent(), Target->getBasicBlock());
2460
0
      FuncInfo.MBB->addSuccessor(Target, BranchProbability);
2461
0
    } else
2462
2
      FuncInfo.MBB->addSuccessorWithoutProb(Target);
2463
2
    return true;
2464
0
  } else {
2465
36
    AArch64CC::CondCode CC = AArch64CC::NE;
2466
36
    if (
foldXALUIntrinsic(CC, I, BI->getCondition())36
) {
2467
14
      // Fake request the condition, otherwise the intrinsic might be completely
2468
14
      // optimized away.
2469
14
      unsigned CondReg = getRegForValue(BI->getCondition());
2470
14
      if (!CondReg)
2471
0
        return false;
2472
14
2473
14
      // Emit the branch.
2474
14
      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2475
14
        .addImm(CC)
2476
14
        .addMBB(TBB);
2477
14
2478
14
      finishCondBranch(BI->getParent(), TBB, FBB);
2479
14
      return true;
2480
14
    }
2481
38
  }
2482
23
2483
23
  unsigned CondReg = getRegForValue(BI->getCondition());
2484
23
  if (CondReg == 0)
2485
0
    return false;
2486
23
  bool CondRegIsKill = hasTrivialKill(BI->getCondition());
2487
23
2488
23
  // i1 conditions come as i32 values, test the lowest bit with tb(n)z.
2489
23
  unsigned Opcode = AArch64::TBNZW;
2490
23
  if (
FuncInfo.MBB->isLayoutSuccessor(TBB)23
) {
2491
17
    std::swap(TBB, FBB);
2492
17
    Opcode = AArch64::TBZW;
2493
17
  }
2494
300
2495
300
  const MCInstrDesc &II = TII.get(Opcode);
2496
300
  unsigned ConstrainedCondReg
2497
300
    = constrainOperandRegClass(II, CondReg, II.getNumDefs());
2498
300
  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
2499
300
      .addReg(ConstrainedCondReg, getKillRegState(CondRegIsKill))
2500
300
      .addImm(0)
2501
300
      .addMBB(TBB);
2502
300
2503
300
  finishCondBranch(BI->getParent(), TBB, FBB);
2504
300
  return true;
2505
300
}
2506
2507
1
bool AArch64FastISel::selectIndirectBr(const Instruction *I) {
2508
1
  const IndirectBrInst *BI = cast<IndirectBrInst>(I);
2509
1
  unsigned AddrReg = getRegForValue(BI->getOperand(0));
2510
1
  if (AddrReg == 0)
2511
0
    return false;
2512
1
2513
1
  // Emit the indirect branch.
2514
1
  const MCInstrDesc &II = TII.get(AArch64::BR);
2515
1
  AddrReg = constrainOperandRegClass(II, AddrReg,  II.getNumDefs());
2516
1
  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(AddrReg);
2517
1
2518
1
  // Make sure the CFG is up-to-date.
2519
1
  for (auto *Succ : BI->successors())
2520
2
    FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[Succ]);
2521
1
2522
1
  return true;
2523
1
}
2524
2525
57
bool AArch64FastISel::selectCmp(const Instruction *I) {
2526
57
  const CmpInst *CI = cast<CmpInst>(I);
2527
57
2528
57
  // Vectors of i1 are weird: bail out.
2529
57
  if (CI->getType()->isVectorTy())
2530
6
    return false;
2531
51
2532
51
  // Try to optimize or fold the cmp.
2533
51
  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2534
51
  unsigned ResultReg = 0;
2535
51
  switch (Predicate) {
2536
49
  default:
2537
49
    break;
2538
1
  case CmpInst::FCMP_FALSE:
2539
1
    ResultReg = createResultReg(&AArch64::GPR32RegClass);
2540
1
    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2541
1
            TII.get(TargetOpcode::COPY), ResultReg)
2542
1
        .addReg(AArch64::WZR, getKillRegState(true));
2543
1
    break;
2544
1
  case CmpInst::FCMP_TRUE:
2545
1
    ResultReg = fastEmit_i(MVT::i32, MVT::i32, ISD::Constant, 1);
2546
1
    break;
2547
51
  }
2548
51
2549
51
  
if (51
ResultReg51
) {
2550
2
    updateValueMap(I, ResultReg);
2551
2
    return true;
2552
2
  }
2553
49
2554
49
  // Emit the cmp.
2555
49
  
if (49
!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned())49
)
2556
0
    return false;
2557
49
2558
49
  ResultReg = createResultReg(&AArch64::GPR32RegClass);
2559
49
2560
49
  // FCMP_UEQ and FCMP_ONE cannot be checked with a single instruction. These
2561
49
  // condition codes are inverted, because they are used by CSINC.
2562
49
  static unsigned CondCodeTable[2][2] = {
2563
49
    { AArch64CC::NE, AArch64CC::VC },
2564
49
    { AArch64CC::PL, AArch64CC::LE }
2565
49
  };
2566
49
  unsigned *CondCodes = nullptr;
2567
49
  switch (Predicate) {
2568
47
  default:
2569
47
    break;
2570
1
  case CmpInst::FCMP_UEQ:
2571
1
    CondCodes = &CondCodeTable[0][0];
2572
1
    break;
2573
1
  case CmpInst::FCMP_ONE:
2574
1
    CondCodes = &CondCodeTable[1][0];
2575
1
    break;
2576
49
  }
2577
49
2578
49
  
if (49
CondCodes49
) {
2579
2
    unsigned TmpReg1 = createResultReg(&AArch64::GPR32RegClass);
2580
2
    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
2581
2
            TmpReg1)
2582
2
        .addReg(AArch64::WZR, getKillRegState(true))
2583
2
        .addReg(AArch64::WZR, getKillRegState(true))
2584
2
        .addImm(CondCodes[0]);
2585
2
    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
2586
2
            ResultReg)
2587
2
        .addReg(TmpReg1, getKillRegState(true))
2588
2
        .addReg(AArch64::WZR, getKillRegState(true))
2589
2
        .addImm(CondCodes[1]);
2590
2
2591
2
    updateValueMap(I, ResultReg);
2592
2
    return true;
2593
2
  }
2594
47
2595
47
  // Now set a register based on the comparison.
2596
47
  AArch64CC::CondCode CC = getCompareCC(Predicate);
2597
47
  assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2598
47
  AArch64CC::CondCode invertedCC = getInvertedCondCode(CC);
2599
47
  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
2600
47
          ResultReg)
2601
47
      .addReg(AArch64::WZR, getKillRegState(true))
2602
47
      .addReg(AArch64::WZR, getKillRegState(true))
2603
47
      .addImm(invertedCC);
2604
47
2605
47
  updateValueMap(I, ResultReg);
2606
47
  return true;
2607
47
}
2608
2609
/// \brief Optimize selects of i1 if one of the operands has a 'true' or 'false'
2610
/// value.
2611
53
bool AArch64FastISel::optimizeSelect(const SelectInst *SI) {
2612
53
  if (!SI->getType()->isIntegerTy(1))
2613
47
    return false;
2614
6
2615
6
  const Value *Src1Val, *Src2Val;
2616
6
  unsigned Opc = 0;
2617
6
  bool NeedExtraOp = false;
2618
6
  if (auto *
CI6
= dyn_cast<ConstantInt>(SI->getTrueValue())) {
2619
3
    if (
CI->isOne()3
) {
2620
2
      Src1Val = SI->getCondition();
2621
2
      Src2Val = SI->getFalseValue();
2622
2
      Opc = AArch64::ORRWrr;
2623
3
    } else {
2624
1
      assert(CI->isZero());
2625
1
      Src1Val = SI->getFalseValue();
2626
1
      Src2Val = SI->getCondition();
2627
1
      Opc = AArch64::BICWrr;
2628
1
    }
2629
6
  } else 
if (auto *3
CI3
= dyn_cast<ConstantInt>(SI->getFalseValue())) {
2630
2
    if (
CI->isOne()2
) {
2631
1
      Src1Val = SI->getCondition();
2632
1
      Src2Val = SI->getTrueValue();
2633
1
      Opc = AArch64::ORRWrr;
2634
1
      NeedExtraOp = true;
2635
2
    } else {
2636
1
      assert(CI->isZero());
2637
1
      Src1Val = SI->getCondition();
2638
1
      Src2Val = SI->getTrueValue();
2639
1
      Opc = AArch64::ANDWrr;
2640
1
    }
2641
3
  }
2642
6
2643
6
  if (!Opc)
2644
1
    return false;
2645
5
2646
5
  unsigned Src1Reg = getRegForValue(Src1Val);
2647
5
  if (!Src1Reg)
2648
0
    return false;
2649
5
  bool Src1IsKill = hasTrivialKill(Src1Val);
2650
5
2651
5
  unsigned Src2Reg = getRegForValue(Src2Val);
2652
5
  if (!Src2Reg)
2653
0
    return false;
2654
5
  bool Src2IsKill = hasTrivialKill(Src2Val);
2655
5
2656
5
  if (
NeedExtraOp5
) {
2657
1
    Src1Reg = emitLogicalOp_ri(ISD::XOR, MVT::i32, Src1Reg, Src1IsKill, 1);
2658
1
    Src1IsKill = true;
2659
1
  }
2660
53
  unsigned ResultReg = fastEmitInst_rr(Opc, &AArch64::GPR32RegClass, Src1Reg,
2661
53
                                       Src1IsKill, Src2Reg, Src2IsKill);
2662
53
  updateValueMap(SI, ResultReg);
2663
53
  return true;
2664
53
}
2665
2666
53
bool AArch64FastISel::selectSelect(const Instruction *I) {
2667
53
  assert(isa<SelectInst>(I) && "Expected a select instruction.");
2668
53
  MVT VT;
2669
53
  if (!isTypeSupported(I->getType(), VT))
2670
0
    return false;
2671
53
2672
53
  unsigned Opc;
2673
53
  const TargetRegisterClass *RC;
2674
53
  switch (VT.SimpleTy) {
2675
0
  default:
2676
0
    return false;
2677
18
  case MVT::i1:
2678
18
  case MVT::i8:
2679
18
  case MVT::i16:
2680
18
  case MVT::i32:
2681
18
    Opc = AArch64::CSELWr;
2682
18
    RC = &AArch64::GPR32RegClass;
2683
18
    break;
2684
8
  case MVT::i64:
2685
8
    Opc = AArch64::CSELXr;
2686
8
    RC = &AArch64::GPR64RegClass;
2687
8
    break;
2688
26
  case MVT::f32:
2689
26
    Opc = AArch64::FCSELSrrr;
2690
26
    RC = &AArch64::FPR32RegClass;
2691
26
    break;
2692
1
  case MVT::f64:
2693
1
    Opc = AArch64::FCSELDrrr;
2694
1
    RC = &AArch64::FPR64RegClass;
2695
1
    break;
2696
53
  }
2697
53
2698
53
  const SelectInst *SI = cast<SelectInst>(I);
2699
53
  const Value *Cond = SI->getCondition();
2700
53
  AArch64CC::CondCode CC = AArch64CC::NE;
2701
53
  AArch64CC::CondCode ExtraCC = AArch64CC::AL;
2702
53
2703
53
  if (optimizeSelect(SI))
2704
5
    return true;
2705
48
2706
48
  // Try to pickup the flags, so we don't have to emit another compare.
2707
48
  
if (48
foldXALUIntrinsic(CC, I, Cond)48
) {
2708
12
    // Fake request the condition to force emission of the XALU intrinsic.
2709
12
    unsigned CondReg = getRegForValue(Cond);
2710
12
    if (!CondReg)
2711
0
      return false;
2712
36
  } else 
if (36
isa<CmpInst>(Cond) && 36
cast<CmpInst>(Cond)->hasOneUse()28
&&
2713
36
             
isValueAvailable(Cond)27
) {
2714
27
    const auto *Cmp = cast<CmpInst>(Cond);
2715
27
    // Try to optimize or fold the cmp.
2716
27
    CmpInst::Predicate Predicate = optimizeCmpPredicate(Cmp);
2717
27
    const Value *FoldSelect = nullptr;
2718
27
    switch (Predicate) {
2719
25
    default:
2720
25
      break;
2721
1
    case CmpInst::FCMP_FALSE:
2722
1
      FoldSelect = SI->getFalseValue();
2723
1
      break;
2724
1
    case CmpInst::FCMP_TRUE:
2725
1
      FoldSelect = SI->getTrueValue();
2726
1
      break;
2727
27
    }
2728
27
2729
27
    
if (27
FoldSelect27
) {
2730
2
      unsigned SrcReg = getRegForValue(FoldSelect);
2731
2
      if (!SrcReg)
2732
0
        return false;
2733
2
      unsigned UseReg = lookUpRegForValue(SI);
2734
2
      if (UseReg)
2735
2
        MRI.clearKillFlags(UseReg);
2736
2
2737
2
      updateValueMap(I, SrcReg);
2738
2
      return true;
2739
2
    }
2740
25
2741
25
    // Emit the cmp.
2742
25
    
if (25
!emitCmp(Cmp->getOperand(0), Cmp->getOperand(1), Cmp->isUnsigned())25
)
2743
0
      return false;
2744
25
2745
25
    // FCMP_UEQ and FCMP_ONE cannot be checked with a single select instruction.
2746
25
    CC = getCompareCC(Predicate);
2747
25
    switch (Predicate) {
2748
23
    default:
2749
23
      break;
2750
1
    case CmpInst::FCMP_UEQ:
2751
1
      ExtraCC = AArch64CC::EQ;
2752
1
      CC = AArch64CC::VS;
2753
1
      break;
2754
1
    case CmpInst::FCMP_ONE:
2755
1
      ExtraCC = AArch64CC::MI;
2756
1
      CC = AArch64CC::GT;
2757
1
      break;
2758
25
    }
2759
25
    assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2760
36
  } else {
2761
9
    unsigned CondReg = getRegForValue(Cond);
2762
9
    if (!CondReg)
2763
0
      return false;
2764
9
    bool CondIsKill = hasTrivialKill(Cond);
2765
9
2766
9
    const MCInstrDesc &II = TII.get(AArch64::ANDSWri);
2767
9
    CondReg = constrainOperandRegClass(II, CondReg, 1);
2768
9
2769
9
    // Emit a TST instruction (ANDS wzr, reg, #imm).
2770
9
    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II,
2771
9
            AArch64::WZR)
2772
9
        .addReg(CondReg, getKillRegState(CondIsKill))
2773
9
        .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
2774
9
  }
2775
48
2776
46
  unsigned Src1Reg = getRegForValue(SI->getTrueValue());
2777
46
  bool Src1IsKill = hasTrivialKill(SI->getTrueValue());
2778
46
2779
46
  unsigned Src2Reg = getRegForValue(SI->getFalseValue());
2780
46
  bool Src2IsKill = hasTrivialKill(SI->getFalseValue());
2781
46
2782
46
  if (
!Src1Reg || 46
!Src2Reg46
)
2783
0
    return false;
2784
46
2785
46
  
if (46
ExtraCC != AArch64CC::AL46
) {
2786
2
    Src2Reg = fastEmitInst_rri(Opc, RC, Src1Reg, Src1IsKill, Src2Reg,
2787
2
                               Src2IsKill, ExtraCC);
2788
2
    Src2IsKill = true;
2789
2
  }
2790
53
  unsigned ResultReg = fastEmitInst_rri(Opc, RC, Src1Reg, Src1IsKill, Src2Reg,
2791
53
                                        Src2IsKill, CC);
2792
53
  updateValueMap(I, ResultReg);
2793
53
  return true;
2794
53
}
2795
2796
7
bool AArch64FastISel::selectFPExt(const Instruction *I) {
2797
7
  Value *V = I->getOperand(0);
2798
7
  if (
!I->getType()->isDoubleTy() || 7
!V->getType()->isFloatTy()5
)
2799
2
    return false;
2800
5
2801
5
  unsigned Op = getRegForValue(V);
2802
5
  if (Op == 0)
2803
0
    return false;
2804
5
2805
5
  unsigned ResultReg = createResultReg(&AArch64::FPR64RegClass);
2806
5
  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTDSr),
2807
5
          ResultReg).addReg(Op);
2808
5
  updateValueMap(I, ResultReg);
2809
5
  return true;
2810
5
}
2811
2812
2
bool AArch64FastISel::selectFPTrunc(const Instruction *I) {
2813
2
  Value *V = I->getOperand(0);
2814
2
  if (
!I->getType()->isFloatTy() || 2
!V->getType()->isDoubleTy()1
)
2815
1
    return false;
2816
1
2817
1
  unsigned Op = getRegForValue(V);
2818
1
  if (Op == 0)
2819
0
    return false;
2820
1
2821
1
  unsigned ResultReg = createResultReg(&AArch64::FPR32RegClass);
2822
1
  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTSDr),
2823
1
          ResultReg).addReg(Op);
2824
1
  updateValueMap(I, ResultReg);
2825
1
  return true;
2826
1
}
2827
2828
// FPToUI and FPToSI
2829
19
bool AArch64FastISel::selectFPToInt(const Instruction *I, bool Signed) {
2830
19
  MVT DestVT;
2831
19
  if (
!isTypeLegal(I->getType(), DestVT) || 19
DestVT.isVector()19
)
2832
1
    return false;
2833
18
2834
18
  unsigned SrcReg = getRegForValue(I->getOperand(0));
2835
18
  if (SrcReg == 0)
2836
0
    return false;
2837
18
2838
18
  EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
2839
18
  if (
SrcVT == MVT::f128 || 18
SrcVT == MVT::f1616
)
2840
4
    return false;
2841
14
2842
14
  unsigned Opc;
2843
14
  if (
SrcVT == MVT::f6414
) {
2844
7
    if (Signed)
2845
0
      
Opc = (DestVT == MVT::i32) ? 0
AArch64::FCVTZSUWDr0
:
AArch64::FCVTZSUXDr0
;
2846
7
    else
2847
7
      
Opc = (DestVT == MVT::i32) ? 7
AArch64::FCVTZUUWDr4
:
AArch64::FCVTZUUXDr3
;
2848
14
  } else {
2849
7
    if (Signed)
2850
0
      
Opc = (DestVT == MVT::i32) ? 0
AArch64::FCVTZSUWSr0
:
AArch64::FCVTZSUXSr0
;
2851
7
    else
2852
7
      
Opc = (DestVT == MVT::i32) ? 7
AArch64::FCVTZUUWSr4
:
AArch64::FCVTZUUXSr3
;
2853
7
  }
2854
14
  unsigned ResultReg = createResultReg(
2855
14
      DestVT == MVT::i32 ? 
&AArch64::GPR32RegClass8
:
&AArch64::GPR64RegClass6
);
2856
19
  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
2857
19
      .addReg(SrcReg);
2858
19
  updateValueMap(I, ResultReg);
2859
19
  return true;
2860
19
}
2861
2862
30
bool AArch64FastISel::selectIntToFP(const Instruction *I, bool Signed) {
2863
30
  MVT DestVT;
2864
30
  if (
!isTypeLegal(I->getType(), DestVT) || 30
DestVT.isVector()30
)
2865
2
    return false;
2866
28
  // Let regular ISEL handle FP16
2867
28
  
if (28
DestVT == MVT::f1628
)
2868
10
    return false;
2869
18
2870
28
  assert((DestVT == MVT::f32 || DestVT == MVT::f64) &&
2871
18
         "Unexpected value type.");
2872
18
2873
18
  unsigned SrcReg = getRegForValue(I->getOperand(0));
2874
18
  if (!SrcReg)
2875
0
    return false;
2876
18
  bool SrcIsKill = hasTrivialKill(I->getOperand(0));
2877
18
2878
18
  EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
2879
18
2880
18
  // Handle sign-extension.
2881
18
  if (
SrcVT == MVT::i16 || 18
SrcVT == MVT::i816
||
SrcVT == MVT::i114
) {
2882
6
    SrcReg =
2883
6
        emitIntExt(SrcVT.getSimpleVT(), SrcReg, MVT::i32, /*isZExt*/ !Signed);
2884
6
    if (!SrcReg)
2885
0
      return false;
2886
6
    SrcIsKill = true;
2887
6
  }
2888
18
2889
18
  unsigned Opc;
2890
18
  if (
SrcVT == MVT::i6418
) {
2891
6
    if (Signed)
2892
0
      
Opc = (DestVT == MVT::f32) ? 0
AArch64::SCVTFUXSri0
:
AArch64::SCVTFUXDri0
;
2893
6
    else
2894
6
      
Opc = (DestVT == MVT::f32) ? 6
AArch64::UCVTFUXSri3
:
AArch64::UCVTFUXDri3
;
2895
18
  } else {
2896
12
    if (Signed)
2897
3
      
Opc = (DestVT == MVT::f32) ? 3
AArch64::SCVTFUWSri3
:
AArch64::SCVTFUWDri0
;
2898
12
    else
2899
9
      
Opc = (DestVT == MVT::f32) ? 9
AArch64::UCVTFUWSri6
:
AArch64::UCVTFUWDri3
;
2900
12
  }
2901
18
2902
18
  unsigned ResultReg = fastEmitInst_r(Opc, TLI.getRegClassFor(DestVT), SrcReg,
2903
18
                                      SrcIsKill);
2904
18
  updateValueMap(I, ResultReg);
2905
18
  return true;
2906
30
}
2907
2908
1.23k
bool AArch64FastISel::fastLowerArguments() {
2909
1.23k
  if (!FuncInfo.CanLowerReturn)
2910
0
    return false;
2911
1.23k
2912
1.23k
  const Function *F = FuncInfo.Fn;
2913
1.23k
  if (F->isVarArg())
2914
1
    return false;
2915
1.23k
2916
1.23k
  CallingConv::ID CC = F->getCallingConv();
2917
1.23k
  if (
CC != CallingConv::C && 1.23k
CC != CallingConv::Swift23
)
2918
6
    return false;
2919
1.22k
2920
1.22k
  // Only handle simple cases of up to 8 GPR and FPR each.
2921
1.22k
  unsigned GPRCnt = 0;
2922
1.22k
  unsigned FPRCnt = 0;
2923
2.04k
  for (auto const &Arg : F->args()) {
2924
2.04k
    if (Arg.hasAttribute(Attribute::ByVal) ||
2925
2.04k
        Arg.hasAttribute(Attribute::InReg) ||
2926
2.04k
        Arg.hasAttribute(Attribute::StructRet) ||
2927
2.04k
        Arg.hasAttribute(Attribute::SwiftSelf) ||
2928
2.03k
        Arg.hasAttribute(Attribute::SwiftError) ||
2929
2.02k
        Arg.hasAttribute(Attribute::Nest))
2930
16
      return false;
2931
2.02k
2932
2.02k
    Type *ArgTy = Arg.getType();
2933
2.02k
    if (
ArgTy->isStructTy() || 2.02k
ArgTy->isArrayTy()2.02k
)
2934
6
      return false;
2935
2.02k
2936
2.02k
    EVT ArgVT = TLI.getValueType(DL, ArgTy);
2937
2.02k
    if (!ArgVT.isSimple())
2938
0
      return false;
2939
2.02k
2940
2.02k
    MVT VT = ArgVT.getSimpleVT().SimpleTy;
2941
2.02k
    if (
VT.isFloatingPoint() && 2.02k
!Subtarget->hasFPARMv8()319
)
2942
0
      return false;
2943
2.02k
2944
2.02k
    
if (2.02k
VT.isVector() &&
2945
120
        
(!Subtarget->hasNEON() || 120
!Subtarget->isLittleEndian()120
))
2946
71
      return false;
2947
1.94k
2948
1.94k
    
if (1.94k
VT >= MVT::i1 && 1.94k
VT <= MVT::i641.94k
)
2949
1.62k
      ++GPRCnt;
2950
328
    else 
if (328
(VT >= MVT::f16 && 328
VT <= MVT::f64324
) ||
VT.is64BitVector()61
||
2951
41
             VT.is128BitVector())
2952
316
      ++FPRCnt;
2953
328
    else
2954
12
      return false;
2955
1.93k
2956
1.93k
    
if (1.93k
GPRCnt > 8 || 1.93k
FPRCnt > 81.93k
)
2957
6
      return false;
2958
1.11k
  }
2959
1.11k
2960
1.11k
  static const MCPhysReg Registers[6][8] = {
2961
1.11k
    { AArch64::W0, AArch64::W1, AArch64::W2, AArch64::W3, AArch64::W4,
2962
1.11k
      AArch64::W5, AArch64::W6, AArch64::W7 },
2963
1.11k
    { AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4,
2964
1.11k
      AArch64::X5, AArch64::X6, AArch64::X7 },
2965
1.11k
    { AArch64::H0, AArch64::H1, AArch64::H2, AArch64::H3, AArch64::H4,
2966
1.11k
      AArch64::H5, AArch64::H6, AArch64::H7 },
2967
1.11k
    { AArch64::S0, AArch64::S1, AArch64::S2, AArch64::S3, AArch64::S4,
2968
1.11k
      AArch64::S5, AArch64::S6, AArch64::S7 },
2969
1.11k
    { AArch64::D0, AArch64::D1, AArch64::D2, AArch64::D3, AArch64::D4,
2970
1.11k
      AArch64::D5, AArch64::D6, AArch64::D7 },
2971
1.11k
    { AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, AArch64::Q4,
2972
1.11k
      AArch64::Q5, AArch64::Q6, AArch64::Q7 }
2973
1.11k
  };
2974
1.11k
2975
1.11k
  unsigned GPRIdx = 0;
2976
1.11k
  unsigned FPRIdx = 0;
2977
1.86k
  for (auto const &Arg : F->args()) {
2978
1.86k
    MVT VT = TLI.getSimpleValueType(DL, Arg.getType());
2979
1.86k
    unsigned SrcReg;
2980
1.86k
    const TargetRegisterClass *RC;
2981
1.86k
    if (
VT >= MVT::i1 && 1.86k
VT <= MVT::i321.86k
) {
2982
671
      SrcReg = Registers[0][GPRIdx++];
2983
671
      RC = &AArch64::GPR32RegClass;
2984
671
      VT = MVT::i32;
2985
1.86k
    } else 
if (1.19k
VT == MVT::i641.19k
) {
2986
884
      SrcReg = Registers[1][GPRIdx++];
2987
884
      RC = &AArch64::GPR64RegClass;
2988
1.19k
    } else 
if (307
VT == MVT::f16307
) {
2989
2
      SrcReg = Registers[2][FPRIdx++];
2990
2
      RC = &AArch64::FPR16RegClass;
2991
307
    } else 
if (305
VT == MVT::f32305
) {
2992
193
      SrcReg = Registers[3][FPRIdx++];
2993
193
      RC = &AArch64::FPR32RegClass;
2994
305
    } else 
if (112
(VT == MVT::f64) || 112
VT.is64BitVector()49
) {
2995
83
      SrcReg = Registers[4][FPRIdx++];
2996
83
      RC = &AArch64::FPR64RegClass;
2997
112
    } else 
if (29
VT.is128BitVector()29
) {
2998
29
      SrcReg = Registers[5][FPRIdx++];
2999
29
      RC = &AArch64::FPR128RegClass;
3000
29
    } else
3001
0
      llvm_unreachable("Unexpected value type.");
3002
1.86k
3003
1.86k
    unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
3004
1.86k
    // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
3005
1.86k
    // Without this, EmitLiveInCopies may eliminate the livein if its only
3006
1.86k
    // use is a bitcast (which isn't turned into an instruction).
3007
1.86k
    unsigned ResultReg = createResultReg(RC);
3008
1.86k
    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3009
1.86k
            TII.get(TargetOpcode::COPY), ResultReg)
3010
1.86k
        .addReg(DstReg, getKillRegState(true));
3011
1.86k
    updateValueMap(&Arg, ResultReg);
3012
1.86k
  }
3013
1.11k
  return true;
3014
1.23k
}
3015
3016
bool AArch64FastISel::processCallArgs(CallLoweringInfo &CLI,
3017
                                      SmallVectorImpl<MVT> &OutVTs,
3018
127
                                      unsigned &NumBytes) {
3019
127
  CallingConv::ID CC = CLI.CallConv;
3020
127
  SmallVector<CCValAssign, 16> ArgLocs;
3021
127
  CCState CCInfo(CC, false, *FuncInfo.MF, ArgLocs, *Context);
3022
127
  CCInfo.AnalyzeCallOperands(OutVTs, CLI.OutFlags, CCAssignFnForCall(CC));
3023
127
3024
127
  // Get a count of how many bytes are to be pushed on the stack.
3025
127
  NumBytes = CCInfo.getNextStackOffset();
3026
127
3027
127
  // Issue CALLSEQ_START
3028
127
  unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
3029
127
  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown))
3030
127
    .addImm(NumBytes).addImm(0);
3031
127
3032
127
  // Process the args.
3033
1.32k
  for (CCValAssign &VA : ArgLocs) {
3034
1.32k
    const Value *ArgVal = CLI.OutVals[VA.getValNo()];
3035
1.32k
    MVT ArgVT = OutVTs[VA.getValNo()];
3036
1.32k
3037
1.32k
    unsigned ArgReg = getRegForValue(ArgVal);
3038
1.32k
    if (!ArgReg)
3039
2
      return false;
3040
1.32k
3041
1.32k
    // Handle arg promotion: SExt, ZExt, AExt.
3042
1.32k
    switch (VA.getLocInfo()) {
3043
1.22k
    case CCValAssign::Full:
3044
1.22k
      break;
3045
15
    case CCValAssign::SExt: {
3046
15
      MVT DestVT = VA.getLocVT();
3047
15
      MVT SrcVT = ArgVT;
3048
15
      ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/false);
3049
15
      if (!ArgReg)
3050
0
        return false;
3051
15
      break;
3052
15
    }
3053
84
    case CCValAssign::AExt:
3054
84
    // Intentional fall-through.
3055
84
    case CCValAssign::ZExt: {
3056
84
      MVT DestVT = VA.getLocVT();
3057
84
      MVT SrcVT = ArgVT;
3058
84
      ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/true);
3059
84
      if (!ArgReg)
3060
0
        return false;
3061
84
      break;
3062
84
    }
3063
0
    default:
3064
0
      llvm_unreachable("Unknown arg promotion!");
3065
1.32k
    }
3066
1.32k
3067
1.32k
    // Now copy/store arg to correct locations.
3068
1.32k
    
if (1.32k
VA.isRegLoc() && 1.32k
!VA.needsCustom()258
) {
3069
258
      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3070
258
              TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg);
3071
258
      CLI.OutRegs.push_back(VA.getLocReg());
3072
1.32k
    } else 
if (1.06k
VA.needsCustom()1.06k
) {
3073
0
      // FIXME: Handle custom args.
3074
0
      return false;
3075
0
    } else {
3076
1.06k
      assert(VA.isMemLoc() && "Assuming store on stack.");
3077
1.06k
3078
1.06k
      // Don't emit stores for undef values.
3079
1.06k
      if (isa<UndefValue>(ArgVal))
3080
1.03k
        continue;
3081
32
3082
32
      // Need to store on the stack.
3083
32
      unsigned ArgSize = (ArgVT.getSizeInBits() + 7) / 8;
3084
32
3085
32
      unsigned BEAlign = 0;
3086
32
      if (
ArgSize < 8 && 32
!Subtarget->isLittleEndian()21
)
3087
2
        BEAlign = 8 - ArgSize;
3088
32
3089
32
      Address Addr;
3090
32
      Addr.setKind(Address::RegBase);
3091
32
      Addr.setReg(AArch64::SP);
3092
32
      Addr.setOffset(VA.getLocMemOffset() + BEAlign);
3093
32
3094
32
      unsigned Alignment = DL.getABITypeAlignment(ArgVal->getType());
3095
32
      MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
3096
32
          MachinePointerInfo::getStack(*FuncInfo.MF, Addr.getOffset()),
3097
32
          MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment);
3098
32
3099
32
      if (!emitStore(ArgVT, ArgReg, Addr, MMO))
3100
2
        return false;
3101
123
    }
3102
1.32k
  }
3103
123
  return true;
3104
123
}
3105
3106
bool AArch64FastISel::finishCall(CallLoweringInfo &CLI, MVT RetVT,
3107
123
                                 unsigned NumBytes) {
3108
123
  CallingConv::ID CC = CLI.CallConv;
3109
123
3110
123
  // Issue CALLSEQ_END
3111
123
  unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
3112
123
  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp))
3113
123
    .addImm(NumBytes).addImm(0);
3114
123
3115
123
  // Now the return value.
3116
123
  if (
RetVT != MVT::isVoid123
) {
3117
67
    SmallVector<CCValAssign, 16> RVLocs;
3118
67
    CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);
3119
67
    CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC));
3120
67
3121
67
    // Only handle a single return value.
3122
67
    if (RVLocs.size() != 1)
3123
0
      return false;
3124
67
3125
67
    // Copy all of the result registers out of their specified physreg.
3126
67
    MVT CopyVT = RVLocs[0].getValVT();
3127
67
3128
67
    // TODO: Handle big-endian results
3129
67
    if (
CopyVT.isVector() && 67
!Subtarget->isLittleEndian()10
)
3130
10
      return false;
3131
57
3132
57
    unsigned ResultReg = createResultReg(TLI.getRegClassFor(CopyVT));
3133
57
    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3134
57
            TII.get(TargetOpcode::COPY), ResultReg)
3135
57
        .addReg(RVLocs[0].getLocReg());
3136
57
    CLI.InRegs.push_back(RVLocs[0].getLocReg());
3137
57
3138
57
    CLI.ResultReg = ResultReg;
3139
57
    CLI.NumResultRegs = 1;
3140
57
  }
3141
123
3142
113
  return true;
3143
123
}
3144
3145
240
bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) {
3146
240
  CallingConv::ID CC  = CLI.CallConv;
3147
240
  bool IsTailCall     = CLI.IsTailCall;
3148
240
  bool IsVarArg       = CLI.IsVarArg;
3149
240
  const Value *Callee = CLI.Callee;
3150
240
  MCSymbol *Symbol = CLI.Symbol;
3151
240
3152
240
  if (
!Callee && 240
!Symbol16
)
3153
0
    return false;
3154
240
3155
240
  // Allow SelectionDAG isel to handle tail calls.
3156
240
  
if (240
IsTailCall240
)
3157
22
    return false;
3158
218
3159
218
  CodeModel::Model CM = TM.getCodeModel();
3160
218
  // Only support the small-addressing and large code models.
3161
218
  if (
CM != CodeModel::Large && 218
!Subtarget->useSmallAddressing()202
)
3162
0
    return false;
3163
218
3164
218
  // FIXME: Add large code model support for ELF.
3165
218
  
if (218
CM == CodeModel::Large && 218
!Subtarget->isTargetMachO()16
)
3166
0
    return false;
3167
218
3168
218
  // Let SDISel handle vararg functions.
3169
218
  
if (218
IsVarArg218
)
3170
5
    return false;
3171
213
3172
213
  // FIXME: Only handle *simple* calls for now.
3173
213
  MVT RetVT;
3174
213
  if (CLI.RetTy->isVoidTy())
3175
60
    RetVT = MVT::isVoid;
3176
153
  else 
if (153
!isTypeLegal(CLI.RetTy, RetVT)153
)
3177
12
    return false;
3178
201
3179
201
  for (auto Flag : CLI.OutFlags)
3180
1.40k
    
if (1.40k
Flag.isInReg() || 1.40k
Flag.isSRet()1.40k
||
Flag.isNest()1.40k
||
Flag.isByVal()1.40k
||
3181
1.40k
        
Flag.isSwiftSelf()1.40k
||
Flag.isSwiftError()1.39k
)
3182
5
      return false;
3183
196
3184
196
  // Set up the argument vectors.
3185
196
  SmallVector<MVT, 16> OutVTs;
3186
196
  OutVTs.reserve(CLI.OutVals.size());
3187
196
3188
1.39k
  for (auto *Val : CLI.OutVals) {
3189
1.39k
    MVT VT;
3190
1.39k
    if (!isTypeLegal(Val->getType(), VT) &&
3191
108
        
!(VT == MVT::i1 || 108
VT == MVT::i890
||
VT == MVT::i1620
))
3192
9
      return false;
3193
1.38k
3194
1.38k
    // We don't handle vector parameters yet.
3195
1.38k
    
if (1.38k
VT.isVector() || 1.38k
VT.getSizeInBits() > 641.32k
)
3196
60
      return false;
3197
1.32k
3198
1.32k
    OutVTs.push_back(VT);
3199
1.32k
  }
3200
196
3201
127
  Address Addr;
3202
127
  if (
Callee && 127
!computeCallAddress(Callee, Addr)111
)
3203
0
    return false;
3204
127
3205
127
  // Handle the arguments now that we've gotten them.
3206
127
  unsigned NumBytes;
3207
127
  if (!processCallArgs(CLI, OutVTs, NumBytes))
3208
4
    return false;
3209
123
3210
123
  // Issue the call.
3211
123
  MachineInstrBuilder MIB;
3212
123
  if (
Subtarget->useSmallAddressing()123
) {
3213
107
    const MCInstrDesc &II = TII.get(Addr.getReg() ? 
AArch64::BLR19
:
AArch64::BL88
);
3214
107
    MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II);
3215
107
    if (Symbol)
3216
15
      MIB.addSym(Symbol, 0);
3217
92
    else 
if (92
Addr.getGlobalValue()92
)
3218
73
      MIB.addGlobalAddress(Addr.getGlobalValue(), 0, 0);
3219
19
    else 
if (19
Addr.getReg()19
) {
3220
19
      unsigned Reg = constrainOperandRegClass(II, Addr.getReg(), 0);
3221
19
      MIB.addReg(Reg);
3222
19
    } else
3223
0
      return false;
3224
16
  } else {
3225
16
    unsigned CallReg = 0;
3226
16
    if (
Symbol16
) {
3227
8
      unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
3228
8
      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
3229
8
              ADRPReg)
3230
8
          .addSym(Symbol, AArch64II::MO_GOT | AArch64II::MO_PAGE);
3231
8
3232
8
      CallReg = createResultReg(&AArch64::GPR64RegClass);
3233
8
      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3234
8
              TII.get(AArch64::LDRXui), CallReg)
3235
8
          .addReg(ADRPReg)
3236
8
          .addSym(Symbol,
3237
8
                  AArch64II::MO_GOT | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
3238
16
    } else 
if (8
Addr.getGlobalValue()8
)
3239
7
      CallReg = materializeGV(Addr.getGlobalValue());
3240
1
    else 
if (1
Addr.getReg()1
)
3241
1
      CallReg = Addr.getReg();
3242
16
3243
16
    if (!CallReg)
3244
0
      return false;
3245
16
3246
16
    const MCInstrDesc &II = TII.get(AArch64::BLR);
3247
16
    CallReg = constrainOperandRegClass(II, CallReg, 0);
3248
16
    MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(CallReg);
3249
16
  }
3250
123
3251
123
  // Add implicit physical register uses to the call.
3252
123
  for (auto Reg : CLI.OutRegs)
3253
241
    MIB.addReg(Reg, RegState::Implicit);
3254
123
3255
123
  // Add a register mask with the call-preserved registers.
3256
123
  // Proper defs for return values will be added by setPhysRegsDeadExcept().
3257
123
  MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
3258
123
3259
123
  CLI.Call = MIB;
3260
123
3261
123
  // Finish off the call including any return values.
3262
123
  return finishCall(CLI, RetVT, NumBytes);
3263
240
}
3264
3265
31
bool AArch64FastISel::isMemCpySmall(uint64_t Len, unsigned Alignment) {
3266
31
  if (Alignment)
3267
31
    return Len / Alignment <= 4;
3268
31
  else
3269
0
    return Len < 32;
3270
0
}
3271
3272
bool AArch64FastISel::tryEmitSmallMemCpy(Address Dest, Address Src,
3273
13
                                         uint64_t Len, unsigned Alignment) {
3274
13
  // Make sure we don't bloat code by inlining very large memcpy's.
3275
13
  if (!isMemCpySmall(Len, Alignment))
3276
0
    return false;
3277
13
3278
13
  int64_t UnscaledOffset = 0;
3279
13
  Address OrigDest = Dest;
3280
13
  Address OrigSrc = Src;
3281
13
3282
54
  while (
Len54
) {
3283
41
    MVT VT;
3284
41
    if (
!Alignment || 41
Alignment >= 841
) {
3285
29
      if (Len >= 8)
3286
27
        VT = MVT::i64;
3287
2
      else 
if (2
Len >= 42
)
3288
0
        VT = MVT::i32;
3289
2
      else 
if (2
Len >= 22
)
3290
0
        VT = MVT::i16;
3291
2
      else {
3292
2
        VT = MVT::i8;
3293
2
      }
3294
41
    } else {
3295
12
      // Bound based on alignment.
3296
12
      if (
Len >= 4 && 12
Alignment == 45
)
3297
2
        VT = MVT::i32;
3298
10
      else 
if (10
Len >= 2 && 10
Alignment == 26
)
3299
3
        VT = MVT::i16;
3300
7
      else {
3301
7
        VT = MVT::i8;
3302
7
      }
3303
12
    }
3304
41
3305
41
    unsigned ResultReg = emitLoad(VT, VT, Src);
3306
41
    if (!ResultReg)
3307
0
      return false;
3308
41
3309
41
    
if (41
!emitStore(VT, ResultReg, Dest)41
)
3310
0
      return false;
3311
41
3312
41
    int64_t Size = VT.getSizeInBits() / 8;
3313
41
    Len -= Size;
3314
41
    UnscaledOffset += Size;
3315
41
3316
41
    // We need to recompute the unscaled offset for each iteration.
3317
41
    Dest.setOffset(OrigDest.getOffset() + UnscaledOffset);
3318
41
    Src.setOffset(OrigSrc.getOffset() + UnscaledOffset);
3319
41
  }
3320
13
3321
13
  return true;
3322
13
}
3323
3324
/// \brief Check if it is possible to fold the condition from the XALU intrinsic
3325
/// into the user. The condition code will only be updated on success.
3326
bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC,
3327
                                        const Instruction *I,
3328
84
                                        const Value *Cond) {
3329
84
  if (!isa<ExtractValueInst>(Cond))
3330
58
    return false;
3331
26
3332
26
  const auto *EV = cast<ExtractValueInst>(Cond);
3333
26
  if (!isa<IntrinsicInst>(EV->getAggregateOperand()))
3334
0
    return false;
3335
26
3336
26
  const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand());
3337
26
  MVT RetVT;
3338
26
  const Function *Callee = II->getCalledFunction();
3339
26
  Type *RetTy =
3340
26
  cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U);
3341
26
  if (!isTypeLegal(RetTy, RetVT))
3342
0
    return false;
3343
26
3344
26
  
if (26
RetVT != MVT::i32 && 26
RetVT != MVT::i6414
)
3345
0
    return false;
3346
26
3347
26
  const Value *LHS = II->getArgOperand(0);
3348
26
  const Value *RHS = II->getArgOperand(1);
3349
26
3350
26
  // Canonicalize immediate to the RHS.
3351
26
  if (
isa<ConstantInt>(LHS) && 26
!isa<ConstantInt>(RHS)0
&&
3352
0
      isCommutativeIntrinsic(II))
3353
0
    std::swap(LHS, RHS);
3354
26
3355
26
  // Simplify multiplies.
3356
26
  Intrinsic::ID IID = II->getIntrinsicID();
3357
26
  switch (IID) {
3358
16
  default:
3359
16
    break;
3360
5
  case Intrinsic::smul_with_overflow:
3361
5
    if (const auto *C = dyn_cast<ConstantInt>(RHS))
3362
1
      
if (1
C->getValue() == 21
)
3363
1
        IID = Intrinsic::sadd_with_overflow;
3364
5
    break;
3365
5
  case Intrinsic::umul_with_overflow:
3366
5
    if (const auto *C = dyn_cast<ConstantInt>(RHS))
3367
1
      
if (1
C->getValue() == 21
)
3368
1
        IID = Intrinsic::uadd_with_overflow;
3369
5
    break;
3370
26
  }
3371
26
3372
26
  AArch64CC::CondCode TmpCC;
3373
26
  switch (IID) {
3374
0
  default:
3375
0
    return false;
3376
9
  case Intrinsic::sadd_with_overflow:
3377
9
  case Intrinsic::ssub_with_overflow:
3378
9
    TmpCC = AArch64CC::VS;
3379
9
    break;
3380
5
  case Intrinsic::uadd_with_overflow:
3381
5
    TmpCC = AArch64CC::HS;
3382
5
    break;
3383
4
  case Intrinsic::usub_with_overflow:
3384
4
    TmpCC = AArch64CC::LO;
3385
4
    break;
3386
8
  case Intrinsic::smul_with_overflow:
3387
8
  case Intrinsic::umul_with_overflow:
3388
8
    TmpCC = AArch64CC::NE;
3389
8
    break;
3390
26
  }
3391
26
3392
26
  // Check if both instructions are in the same basic block.
3393
26
  
if (26
!isValueAvailable(II)26
)
3394
0
    return false;
3395
26
3396
26
  // Make sure nothing is in the way
3397
26
  BasicBlock::const_iterator Start(I);
3398
26
  BasicBlock::const_iterator End(II);
3399
66
  for (auto Itr = std::prev(Start); 
Itr != End66
;
--Itr40
) {
3400
40
    // We only expect extractvalue instructions between the intrinsic and the
3401
40
    // instruction to be selected.
3402
40
    if (!isa<ExtractValueInst>(Itr))
3403
0
      return false;
3404
40
3405
40
    // Check that the extractvalue operand comes from the intrinsic.
3406
40
    const auto *EVI = cast<ExtractValueInst>(Itr);
3407
40
    if (EVI->getAggregateOperand() != II)
3408
0
      return false;
3409
40
  }
3410
26
3411
26
  CC = TmpCC;
3412
26
  return true;
3413
84
}
3414
3415
94
bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
3416
94
  // FIXME: Handle more intrinsics.
3417
94
  switch (II->getIntrinsicID()) {
3418
7
  default: return false;
3419
2
  case Intrinsic::frameaddress: {
3420
2
    MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
3421
2
    MFI.setFrameAddressIsTaken(true);
3422
2
3423
2
    const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
3424
2
    unsigned FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF));
3425
2
    unsigned SrcReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3426
2
    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3427
2
            TII.get(TargetOpcode::COPY), SrcReg).addReg(FramePtr);
3428
2
    // Recursively load frame address
3429
2
    // ldr x0, [fp]
3430
2
    // ldr x0, [x0]
3431
2
    // ldr x0, [x0]
3432
2
    // ...
3433
2
    unsigned DestReg;
3434
2
    unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue();
3435
4
    while (
Depth--4
) {
3436
2
      DestReg = fastEmitInst_ri(AArch64::LDRXui, &AArch64::GPR64RegClass,
3437
2
                                SrcReg, /*IsKill=*/true, 0);
3438
2
      assert(DestReg && "Unexpected LDR instruction emission failure.");
3439
2
      SrcReg = DestReg;
3440
2
    }
3441
2
3442
2
    updateValueMap(II, SrcReg);
3443
2
    return true;
3444
94
  }
3445
19
  case Intrinsic::memcpy:
3446
19
  case Intrinsic::memmove: {
3447
19
    const auto *MTI = cast<MemTransferInst>(II);
3448
19
    // Don't handle volatile.
3449
19
    if (MTI->isVolatile())
3450
0
      return false;
3451
19
3452
19
    // Disable inlining for memmove before calls to ComputeAddress.  Otherwise,
3453
19
    // we would emit dead code because we don't currently handle memmoves.
3454
19
    bool IsMemCpy = (II->getIntrinsicID() == Intrinsic::memcpy);
3455
19
    if (
isa<ConstantInt>(MTI->getLength()) && 19
IsMemCpy19
) {
3456
18
      // Small memcpy's are common enough that we want to do them without a call
3457
18
      // if possible.
3458
18
      uint64_t Len = cast<ConstantInt>(MTI->getLength())->getZExtValue();
3459
18
      unsigned Alignment = MTI->getAlignment();
3460
18
      if (
isMemCpySmall(Len, Alignment)18
) {
3461
13
        Address Dest, Src;
3462
13
        if (!computeAddress(MTI->getRawDest(), Dest) ||
3463
13
            !computeAddress(MTI->getRawSource(), Src))
3464
0
          return false;
3465
13
        
if (13
tryEmitSmallMemCpy(Dest, Src, Len, Alignment)13
)
3466
13
          return true;
3467
6
      }
3468
18
    }
3469
6
3470
6
    
if (6
!MTI->getLength()->getType()->isIntegerTy(64)6
)
3471
0
      return false;
3472
6
3473
6
    
if (6
MTI->getSourceAddressSpace() > 255 || 6
MTI->getDestAddressSpace() > 2556
)
3474
6
      // Fast instruction selection doesn't support the special
3475
6
      // address spaces.
3476
0
      return false;
3477
6
3478
6
    
const char *IntrMemName = isa<MemCpyInst>(II) ? 6
"memcpy"5
:
"memmove"1
;
3479
6
    return lowerCallTo(II, IntrMemName, II->getNumArgOperands() - 2);
3480
6
  }
3481
1
  case Intrinsic::memset: {
3482
1
    const MemSetInst *MSI = cast<MemSetInst>(II);
3483
1
    // Don't handle volatile.
3484
1
    if (MSI->isVolatile())
3485
0
      return false;
3486
1
3487
1
    
if (1
!MSI->getLength()->getType()->isIntegerTy(64)1
)
3488
0
      return false;
3489
1
3490
1
    
if (1
MSI->getDestAddressSpace() > 2551
)
3491
1
      // Fast instruction selection doesn't support the special
3492
1
      // address spaces.
3493
0
      return false;
3494
1
3495
1
    return lowerCallTo(II, "memset", II->getNumArgOperands() - 2);
3496
1
  }
3497
12
  case Intrinsic::sin:
3498
12
  case Intrinsic::cos:
3499
12
  case Intrinsic::pow: {
3500
12
    MVT RetVT;
3501
12
    if (!isTypeLegal(II->getType(), RetVT))
3502
0
      return false;
3503
12
3504
12
    
if (12
RetVT != MVT::f32 && 12
RetVT != MVT::f646
)
3505
0
      return false;
3506
12
3507
12
    static const RTLIB::Libcall LibCallTable[3][2] = {
3508
12
      { RTLIB::SIN_F32, RTLIB::SIN_F64 },
3509
12
      { RTLIB::COS_F32, RTLIB::COS_F64 },
3510
12
      { RTLIB::POW_F32, RTLIB::POW_F64 }
3511
12
    };
3512
12
    RTLIB::Libcall LC;
3513
12
    bool Is64Bit = RetVT == MVT::f64;
3514
12
    switch (II->getIntrinsicID()) {
3515
0
    default:
3516
0
      llvm_unreachable("Unexpected intrinsic.");
3517
4
    case Intrinsic::sin:
3518
4
      LC = LibCallTable[0][Is64Bit];
3519
4
      break;
3520
4
    case Intrinsic::cos:
3521
4
      LC = LibCallTable[1][Is64Bit];
3522
4
      break;
3523
4
    case Intrinsic::pow:
3524
4
      LC = LibCallTable[2][Is64Bit];
3525
4
      break;
3526
12
    }
3527
12
3528
12
    ArgListTy Args;
3529
12
    Args.reserve(II->getNumArgOperands());
3530
12
3531
12
    // Populate the argument list.
3532
16
    for (auto &Arg : II->arg_operands()) {
3533
16
      ArgListEntry Entry;
3534
16
      Entry.Val = Arg;
3535
16
      Entry.Ty = Arg->getType();
3536
16
      Args.push_back(Entry);
3537
16
    }
3538
12
3539
12
    CallLoweringInfo CLI;
3540
12
    MCContext &Ctx = MF->getContext();
3541
12
    CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), II->getType(),
3542
12
                  TLI.getLibcallName(LC), std::move(Args));
3543
12
    if (!lowerCallTo(CLI))
3544
0
      return false;
3545
12
    updateValueMap(II, CLI.ResultReg);
3546
12
    return true;
3547
12
  }
3548
2
  case Intrinsic::fabs: {
3549
2
    MVT VT;
3550
2
    if (!isTypeLegal(II->getType(), VT))
3551
0
      return false;
3552
2
3553
2
    unsigned Opc;
3554
2
    switch (VT.SimpleTy) {
3555
0
    default:
3556
0
      return false;
3557
1
    case MVT::f32:
3558
1
      Opc = AArch64::FABSSr;
3559
1
      break;
3560
1
    case MVT::f64:
3561
1
      Opc = AArch64::FABSDr;
3562
1
      break;
3563
2
    }
3564
2
    unsigned SrcReg = getRegForValue(II->getOperand(0));
3565
2
    if (!SrcReg)
3566
0
      return false;
3567
2
    bool SrcRegIsKill = hasTrivialKill(II->getOperand(0));
3568
2
    unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
3569
2
    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
3570
2
      .addReg(SrcReg, getKillRegState(SrcRegIsKill));
3571
2
    updateValueMap(II, ResultReg);
3572
2
    return true;
3573
2
  }
3574
1
  case Intrinsic::trap:
3575
1
    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BRK))
3576
1
        .addImm(1);
3577
1
    return true;
3578
2
3579
2
  case Intrinsic::sqrt: {
3580
2
    Type *RetTy = II->getCalledFunction()->getReturnType();
3581
2
3582
2
    MVT VT;
3583
2
    if (!isTypeLegal(RetTy, VT))
3584
0
      return false;
3585
2
3586
2
    unsigned Op0Reg = getRegForValue(II->getOperand(0));
3587
2
    if (!Op0Reg)
3588
0
      return false;
3589
2
    bool Op0IsKill = hasTrivialKill(II->getOperand(0));
3590
2
3591
2
    unsigned ResultReg = fastEmit_r(VT, VT, ISD::FSQRT, Op0Reg, Op0IsKill);
3592
2
    if (!ResultReg)
3593
0
      return false;
3594
2
3595
2
    updateValueMap(II, ResultReg);
3596
2
    return true;
3597
2
  }
3598
48
  case Intrinsic::sadd_with_overflow:
3599
48
  case Intrinsic::uadd_with_overflow:
3600
48
  case Intrinsic::ssub_with_overflow:
3601
48
  case Intrinsic::usub_with_overflow:
3602
48
  case Intrinsic::smul_with_overflow:
3603
48
  case Intrinsic::umul_with_overflow: {
3604
48
    // This implements the basic lowering of the xalu with overflow intrinsics.
3605
48
    const Function *Callee = II->getCalledFunction();
3606
48
    auto *Ty = cast<StructType>(Callee->getReturnType());
3607
48
    Type *RetTy = Ty->getTypeAtIndex(0U);
3608
48
3609
48
    MVT VT;
3610
48
    if (!isTypeLegal(RetTy, VT))
3611
0
      return false;
3612
48
3613
48
    
if (48
VT != MVT::i32 && 48
VT != MVT::i6425
)
3614
0
      return false;
3615
48
3616
48
    const Value *LHS = II->getArgOperand(0);
3617
48
    const Value *RHS = II->getArgOperand(1);
3618
48
    // Canonicalize immediate to the RHS.
3619
48
    if (
isa<ConstantInt>(LHS) && 48
!isa<ConstantInt>(RHS)0
&&
3620
0
        isCommutativeIntrinsic(II))
3621
0
      std::swap(LHS, RHS);
3622
48
3623
48
    // Simplify multiplies.
3624
48
    Intrinsic::ID IID = II->getIntrinsicID();
3625
48
    switch (IID) {
3626
31
    default:
3627
31
      break;
3628
8
    case Intrinsic::smul_with_overflow:
3629
8
      if (const auto *C = dyn_cast<ConstantInt>(RHS))
3630
2
        
if (2
C->getValue() == 22
) {
3631
2
          IID = Intrinsic::sadd_with_overflow;
3632
2
          RHS = LHS;
3633
2
        }
3634
8
      break;
3635
9
    case Intrinsic::umul_with_overflow:
3636
9
      if (const auto *C = dyn_cast<ConstantInt>(RHS))
3637
3
        
if (3
C->getValue() == 23
) {
3638
2
          IID = Intrinsic::uadd_with_overflow;
3639
2
          RHS = LHS;
3640
2
        }
3641
9
      break;
3642
48
    }
3643
48
3644
48
    unsigned ResultReg1 = 0, ResultReg2 = 0, MulReg = 0;
3645
48
    AArch64CC::CondCode CC = AArch64CC::Invalid;
3646
48
    switch (IID) {
3647
0
    
default: 0
llvm_unreachable0
("Unexpected intrinsic!");
3648
14
    case Intrinsic::sadd_with_overflow:
3649
14
      ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
3650
14
      CC = AArch64CC::VS;
3651
14
      break;
3652
8
    case Intrinsic::uadd_with_overflow:
3653
8
      ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
3654
8
      CC = AArch64CC::HS;
3655
8
      break;
3656
7
    case Intrinsic::ssub_with_overflow:
3657
7
      ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
3658
7
      CC = AArch64CC::VS;
3659
7
      break;
3660
6
    case Intrinsic::usub_with_overflow:
3661
6
      ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
3662
6
      CC = AArch64CC::LO;
3663
6
      break;
3664
6
    case Intrinsic::smul_with_overflow: {
3665
6
      CC = AArch64CC::NE;
3666
6
      unsigned LHSReg = getRegForValue(LHS);
3667
6
      if (!LHSReg)
3668
0
        return false;
3669
6
      bool LHSIsKill = hasTrivialKill(LHS);
3670
6
3671
6
      unsigned RHSReg = getRegForValue(RHS);
3672
6
      if (!RHSReg)
3673
0
        return false;
3674
6
      bool RHSIsKill = hasTrivialKill(RHS);
3675
6
3676
6
      if (
VT == MVT::i326
) {
3677
3
        MulReg = emitSMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
3678
3
        unsigned ShiftReg = emitLSR_ri(MVT::i64, MVT::i64, MulReg,
3679
3
                                       /*IsKill=*/false, 32);
3680
3
        MulReg = fastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true,
3681
3
                                            AArch64::sub_32);
3682
3
        ShiftReg = fastEmitInst_extractsubreg(VT, ShiftReg, /*IsKill=*/true,
3683
3
                                              AArch64::sub_32);
3684
3
        emitSubs_rs(VT, ShiftReg, /*IsKill=*/true, MulReg, /*IsKill=*/false,
3685
3
                    AArch64_AM::ASR, 31, /*WantResult=*/false);
3686
6
      } else {
3687
3
        assert(VT == MVT::i64 && "Unexpected value type.");
3688
3
        // LHSReg and RHSReg cannot be killed by this Mul, since they are
3689
3
        // reused in the next instruction.
3690
3
        MulReg = emitMul_rr(VT, LHSReg, /*IsKill=*/false, RHSReg,
3691
3
                            /*IsKill=*/false);
3692
3
        unsigned SMULHReg = fastEmit_rr(VT, VT, ISD::MULHS, LHSReg, LHSIsKill,
3693
3
                                        RHSReg, RHSIsKill);
3694
3
        emitSubs_rs(VT, SMULHReg, /*IsKill=*/true, MulReg, /*IsKill=*/false,
3695
3
                    AArch64_AM::ASR, 63, /*WantResult=*/false);
3696
3
      }
3697
6
      break;
3698
6
    }
3699
7
    case Intrinsic::umul_with_overflow: {
3700
7
      CC = AArch64CC::NE;
3701
7
      unsigned LHSReg = getRegForValue(LHS);
3702
7
      if (!LHSReg)
3703
0
        return false;
3704
7
      bool LHSIsKill = hasTrivialKill(LHS);
3705
7
3706
7
      unsigned RHSReg = getRegForValue(RHS);
3707
7
      if (!RHSReg)
3708
0
        return false;
3709
7
      bool RHSIsKill = hasTrivialKill(RHS);
3710
7
3711
7
      if (
VT == MVT::i327
) {
3712
3
        MulReg = emitUMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
3713
3
        emitSubs_rs(MVT::i64, AArch64::XZR, /*IsKill=*/true, MulReg,
3714
3
                    /*IsKill=*/false, AArch64_AM::LSR, 32,
3715
3
                    /*WantResult=*/false);
3716
3
        MulReg = fastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true,
3717
3
                                            AArch64::sub_32);
3718
7
      } else {
3719
4
        assert(VT == MVT::i64 && "Unexpected value type.");
3720
4
        // LHSReg and RHSReg cannot be killed by this Mul, since they are
3721
4
        // reused in the next instruction.
3722
4
        MulReg = emitMul_rr(VT, LHSReg, /*IsKill=*/false, RHSReg,
3723
4
                            /*IsKill=*/false);
3724
4
        unsigned UMULHReg = fastEmit_rr(VT, VT, ISD::MULHU, LHSReg, LHSIsKill,
3725
4
                                        RHSReg, RHSIsKill);
3726
4
        emitSubs_rr(VT, AArch64::XZR, /*IsKill=*/true, UMULHReg,
3727
4
                    /*IsKill=*/false, /*WantResult=*/false);
3728
4
      }
3729
6
      break;
3730
6
    }
3731
48
    }
3732
48
3733
48
    
if (48
MulReg48
) {
3734
13
      ResultReg1 = createResultReg(TLI.getRegClassFor(VT));
3735
13
      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3736
13
              TII.get(TargetOpcode::COPY), ResultReg1).addReg(MulReg);
3737
13
    }
3738
19
3739
19
    ResultReg2 = fastEmitInst_rri(AArch64::CSINCWr, &AArch64::GPR32RegClass,
3740
19
                                  AArch64::WZR, /*IsKill=*/true, AArch64::WZR,
3741
19
                                  /*IsKill=*/true, getInvertedCondCode(CC));
3742
19
    (void)ResultReg2;
3743
19
    assert((ResultReg1 + 1) == ResultReg2 &&
3744
19
           "Nonconsecutive result registers.");
3745
19
    updateValueMap(II, ResultReg1, 2);
3746
19
    return true;
3747
19
  }
3748
0
  }
3749
0
  return false;
3750
0
}
3751
3752
1.31k
bool AArch64FastISel::selectRet(const Instruction *I) {
3753
1.31k
  const ReturnInst *Ret = cast<ReturnInst>(I);
3754
1.31k
  const Function &F = *I->getParent()->getParent();
3755
1.31k
3756
1.31k
  if (!FuncInfo.CanLowerReturn)
3757
0
    return false;
3758
1.31k
3759
1.31k
  
if (1.31k
F.isVarArg()1.31k
)
3760
1
    return false;
3761
1.31k
3762
1.31k
  
if (1.31k
TLI.supportSwiftError() &&
3763
1.31k
      F.getAttributes().hasAttrSomewhere(Attribute::SwiftError))
3764
11
    return false;
3765
1.30k
3766
1.30k
  
if (1.30k
TLI.supportSplitCSR(FuncInfo.MF)1.30k
)
3767
3
    return false;
3768
1.30k
3769
1.30k
  // Build a list of return value registers.
3770
1.30k
  SmallVector<unsigned, 4> RetRegs;
3771
1.30k
3772
1.30k
  if (
Ret->getNumOperands() > 01.30k
) {
3773
982
    CallingConv::ID CC = F.getCallingConv();
3774
982
    SmallVector<ISD::OutputArg, 4> Outs;
3775
982
    GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
3776
982
3777
982
    // Analyze operands of the call, assigning locations to each operand.
3778
982
    SmallVector<CCValAssign, 16> ValLocs;
3779
982
    CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext());
3780
1
    CCAssignFn *RetCC = CC == CallingConv::WebKit_JS ? RetCC_AArch64_WebKit_JS
3781
981
                                                     : RetCC_AArch64_AAPCS;
3782
982
    CCInfo.AnalyzeReturn(Outs, RetCC);
3783
982
3784
982
    // Only handle a single return value for now.
3785
982
    if (ValLocs.size() != 1)
3786
10
      return false;
3787
972
3788
972
    CCValAssign &VA = ValLocs[0];
3789
972
    const Value *RV = Ret->getOperand(0);
3790
972
3791
972
    // Don't bother handling odd stuff for now.
3792
972
    if ((VA.getLocInfo() != CCValAssign::Full) &&
3793
72
        (VA.getLocInfo() != CCValAssign::BCvt))
3794
0
      return false;
3795
972
3796
972
    // Only handle register returns for now.
3797
972
    
if (972
!VA.isRegLoc()972
)
3798
0
      return false;
3799
972
3800
972
    unsigned Reg = getRegForValue(RV);
3801
972
    if (Reg == 0)
3802
4
      return false;
3803
968
3804
968
    unsigned SrcReg = Reg + VA.getValNo();
3805
968
    unsigned DestReg = VA.getLocReg();
3806
968
    // Avoid a cross-class copy. This is very unlikely.
3807
968
    if (!MRI.getRegClass(SrcReg)->contains(DestReg))
3808
0
      return false;
3809
968
3810
968
    EVT RVEVT = TLI.getValueType(DL, RV->getType());
3811
968
    if (!RVEVT.isSimple())
3812
0
      return false;
3813
968
3814
968
    // Vectors (of > 1 lane) in big endian need tricky handling.
3815
968
    
if (968
RVEVT.isVector() && 968
RVEVT.getVectorNumElements() > 198
&&
3816
92
        !Subtarget->isLittleEndian())
3817
60
      return false;
3818
908
3819
908
    MVT RVVT = RVEVT.getSimpleVT();
3820
908
    if (RVVT == MVT::f128)
3821
8
      return false;
3822
900
3823
900
    MVT DestVT = VA.getValVT();
3824
900
    // Special handling for extended integers.
3825
900
    if (
RVVT != DestVT900
) {
3826
163
      if (
RVVT != MVT::i1 && 163
RVVT != MVT::i876
&&
RVVT != MVT::i1644
)
3827
0
        return false;
3828
163
3829
163
      
if (163
!Outs[0].Flags.isZExt() && 163
!Outs[0].Flags.isSExt()22
)
3830
10
        return false;
3831
153
3832
153
      bool IsZExt = Outs[0].Flags.isZExt();
3833
153
      SrcReg = emitIntExt(RVVT, SrcReg, DestVT, IsZExt);
3834
153
      if (SrcReg == 0)
3835
0
        return false;
3836
890
    }
3837
890
3838
890
    // Make the copy.
3839
890
    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3840
890
            TII.get(TargetOpcode::COPY), DestReg).addReg(SrcReg);
3841
890
3842
890
    // Add register to return instruction.
3843
890
    RetRegs.push_back(VA.getLocReg());
3844
890
  }
3845
1.30k
3846
1.21k
  MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3847
1.21k
                                    TII.get(AArch64::RET_ReallyLR));
3848
1.21k
  for (unsigned RetReg : RetRegs)
3849
890
    MIB.addReg(RetReg, RegState::Implicit);
3850
1.21k
  return true;
3851
1.31k
}
3852
3853
14
bool AArch64FastISel::selectTrunc(const Instruction *I) {
3854
14
  Type *DestTy = I->getType();
3855
14
  Value *Op = I->getOperand(0);
3856
14
  Type *SrcTy = Op->getType();
3857
14
3858
14
  EVT SrcEVT = TLI.getValueType(DL, SrcTy, true);
3859
14
  EVT DestEVT = TLI.getValueType(DL, DestTy, true);
3860
14
  if (!SrcEVT.isSimple())
3861
0
    return false;
3862
14
  
if (14
!DestEVT.isSimple()14
)
3863
0
    return false;
3864
14
3865
14
  MVT SrcVT = SrcEVT.getSimpleVT();
3866
14
  MVT DestVT = DestEVT.getSimpleVT();
3867
14
3868
14
  if (
SrcVT != MVT::i64 && 14
SrcVT != MVT::i328
&&
SrcVT != MVT::i164
&&
3869
2
      SrcVT != MVT::i8)
3870
2
    return false;
3871
12
  
if (12
DestVT != MVT::i32 && 12
DestVT != MVT::i1612
&&
DestVT != MVT::i88
&&
3872
5
      DestVT != MVT::i1)
3873
0
    return false;
3874
12
3875
12
  unsigned SrcReg = getRegForValue(Op);
3876
12
  if (!SrcReg)
3877
0
    return false;
3878
12
  bool SrcIsKill = hasTrivialKill(Op);
3879
12
3880
12
  // If we're truncating from i64 to a smaller non-legal type then generate an
3881
12
  // AND. Otherwise, we know the high bits are undefined and a truncate only
3882
12
  // generate a COPY. We cannot mark the source register also as result
3883
12
  // register, because this can incorrectly transfer the kill flag onto the
3884
12
  // source register.
3885
12
  unsigned ResultReg;
3886
12
  if (
SrcVT == MVT::i6412
) {
3887
6
    uint64_t Mask = 0;
3888
6
    switch (DestVT.SimpleTy) {
3889
0
    default:
3890
0
      // Trunc i64 to i32 is handled by the target-independent fast-isel.
3891
0
      return false;
3892
3
    case MVT::i1:
3893
3
      Mask = 0x1;
3894
3
      break;
3895
2
    case MVT::i8:
3896
2
      Mask = 0xff;
3897
2
      break;
3898
1
    case MVT::i16:
3899
1
      Mask = 0xffff;
3900
1
      break;
3901
6
    }
3902
6
    // Issue an extract_subreg to get the lower 32-bits.
3903
6
    unsigned Reg32 = fastEmitInst_extractsubreg(MVT::i32, SrcReg, SrcIsKill,
3904
6
                                                AArch64::sub_32);
3905
6
    // Create the AND instruction which performs the actual truncation.
3906
6
    ResultReg = emitAnd_ri(MVT::i32, Reg32, /*IsKill=*/true, Mask);
3907
6
    assert(ResultReg && "Unexpected AND instruction emission failure.");
3908
12
  } else {
3909
6
    ResultReg = createResultReg(&AArch64::GPR32RegClass);
3910
6
    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3911
6
            TII.get(TargetOpcode::COPY), ResultReg)
3912
6
        .addReg(SrcReg, getKillRegState(SrcIsKill));
3913
6
  }
3914
12
3915
12
  updateValueMap(I, ResultReg);
3916
12
  return true;
3917
14
}
3918
3919
141
unsigned AArch64FastISel::emiti1Ext(unsigned SrcReg, MVT DestVT, bool IsZExt) {
3920
141
  assert((DestVT == MVT::i8 || DestVT == MVT::i16 || DestVT == MVT::i32 ||
3921
141
          DestVT == MVT::i64) &&
3922
141
         "Unexpected value type.");
3923
141
  // Handle i8 and i16 as i32.
3924
141
  if (
DestVT == MVT::i8 || 141
DestVT == MVT::i16140
)
3925
3
    DestVT = MVT::i32;
3926
141
3927
141
  if (
IsZExt141
) {
3928
132
    unsigned ResultReg = emitAnd_ri(MVT::i32, SrcReg, /*TODO:IsKill=*/false, 1);
3929
132
    assert(ResultReg && "Unexpected AND instruction emission failure.");
3930
132
    if (
DestVT == MVT::i64132
) {
3931
0
      // We're ZExt i1 to i64.  The ANDWri Wd, Ws, #1 implicitly clears the
3932
0
      // upper 32 bits.  Emit a SUBREG_TO_REG to extend from Wd to Xd.
3933
0
      unsigned Reg64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3934
0
      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3935
0
              TII.get(AArch64::SUBREG_TO_REG), Reg64)
3936
0
          .addImm(0)
3937
0
          .addReg(ResultReg)
3938
0
          .addImm(AArch64::sub_32);
3939
0
      ResultReg = Reg64;
3940
0
    }
3941
132
    return ResultReg;
3942
0
  } else {
3943
9
    if (
DestVT == MVT::i649
) {
3944
0
      // FIXME: We're SExt i1 to i64.
3945
0
      return 0;
3946
0
    }
3947
9
    return fastEmitInst_rii(AArch64::SBFMWri, &AArch64::GPR32RegClass, SrcReg,
3948
9
                            /*TODO:IsKill=*/false, 0, 0);
3949
9
  }
3950
141
}
3951
3952
unsigned AArch64FastISel::emitMul_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
3953
21
                                      unsigned Op1, bool Op1IsKill) {
3954
21
  unsigned Opc, ZReg;
3955
21
  switch (RetVT.SimpleTy) {
3956
0
  default: return 0;
3957
5
  case MVT::i8:
3958
5
  case MVT::i16:
3959
5
  case MVT::i32:
3960
5
    RetVT = MVT::i32;
3961
5
    Opc = AArch64::MADDWrrr; ZReg = AArch64::WZR; break;
3962
16
  case MVT::i64:
3963
16
    Opc = AArch64::MADDXrrr; ZReg = AArch64::XZR; break;
3964
21
  }
3965
21
3966
21
  const TargetRegisterClass *RC =
3967
21
      (RetVT == MVT::i64) ? 
&AArch64::GPR64RegClass16
:
&AArch64::GPR32RegClass5
;
3968
21
  return fastEmitInst_rrr(Opc, RC, Op0, Op0IsKill, Op1, Op1IsKill,
3969
21
                          /*IsKill=*/ZReg, true);
3970
21
}
3971
3972
unsigned AArch64FastISel::emitSMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
3973
3
                                        unsigned Op1, bool Op1IsKill) {
3974
3
  if (RetVT != MVT::i64)
3975
0
    return 0;
3976
3
3977
3
  return fastEmitInst_rrr(AArch64::SMADDLrrr, &AArch64::GPR64RegClass,
3978
3
                          Op0, Op0IsKill, Op1, Op1IsKill,
3979
3
                          AArch64::XZR, /*IsKill=*/true);
3980
3
}
3981
3982
unsigned AArch64FastISel::emitUMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
3983
3
                                        unsigned Op1, bool Op1IsKill) {
3984
3
  if (RetVT != MVT::i64)
3985
0
    return 0;
3986
3
3987
3
  return fastEmitInst_rrr(AArch64::UMADDLrrr, &AArch64::GPR64RegClass,
3988
3
                          Op0, Op0IsKill, Op1, Op1IsKill,
3989
3
                          AArch64::XZR, /*IsKill=*/true);
3990
3
}
3991
3992
unsigned AArch64FastISel::emitLSL_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
3993
4
                                     unsigned Op1Reg, bool Op1IsKill) {
3994
4
  unsigned Opc = 0;
3995
4
  bool NeedTrunc = false;
3996
4
  uint64_t Mask = 0;
3997
4
  switch (RetVT.SimpleTy) {
3998
0
  default: return 0;
3999
1
  case MVT::i8:  Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xff;   break;
4000
1
  case MVT::i16: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xffff; break;
4001
1
  case MVT::i32: Opc = AArch64::LSLVWr;                                  break;
4002
1
  case MVT::i64: Opc = AArch64::LSLVXr;                                  break;
4003
4
  }
4004
4
4005
4
  const TargetRegisterClass *RC =
4006
4
      (RetVT == MVT::i64) ? 
&AArch64::GPR64RegClass1
:
&AArch64::GPR32RegClass3
;
4007
4
  if (
NeedTrunc4
) {
4008
2
    Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
4009
2
    Op1IsKill = true;
4010
2
  }
4011
4
  unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
4012
4
                                       Op1IsKill);
4013
4
  if (NeedTrunc)
4014
2
    ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
4015
4
  return ResultReg;
4016
4
}
4017
4018
unsigned AArch64FastISel::emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4019
                                     bool Op0IsKill, uint64_t Shift,
4020
55
                                     bool IsZExt) {
4021
55
  assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4022
55
         "Unexpected source/return type pair.");
4023
55
  assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4024
55
          SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4025
55
         "Unexpected source value type.");
4026
55
  assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4027
55
          RetVT == MVT::i64) && "Unexpected return value type.");
4028
55
4029
55
  bool Is64Bit = (RetVT == MVT::i64);
4030
55
  unsigned RegSize = Is64Bit ? 
6428
:
3227
;
4031
55
  unsigned DstBits = RetVT.getSizeInBits();
4032
55
  unsigned SrcBits = SrcVT.getSizeInBits();
4033
55
  const TargetRegisterClass *RC =
4034
55
      Is64Bit ? 
&AArch64::GPR64RegClass28
:
&AArch64::GPR32RegClass27
;
4035
55
4036
55
  // Just emit a copy for "zero" shifts.
4037
55
  if (
Shift == 055
) {
4038
2
    if (
RetVT == SrcVT2
) {
4039
1
      unsigned ResultReg = createResultReg(RC);
4040
1
      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4041
1
              TII.get(TargetOpcode::COPY), ResultReg)
4042
1
          .addReg(Op0, getKillRegState(Op0IsKill));
4043
1
      return ResultReg;
4044
1
    } else
4045
1
      return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4046
53
  }
4047
53
4048
53
  // Don't deal with undefined shifts.
4049
53
  
if (53
Shift >= DstBits53
)
4050
14
    return 0;
4051
39
4052
39
  // For immediate shifts we can fold the zero-/sign-extension into the shift.
4053
39
  // {S|U}BFM Wd, Wn, #r, #s
4054
39
  // Wd<32+s-r,32-r> = Wn<s:0> when r > s
4055
39
4056
39
  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4057
39
  // %2 = shl i16 %1, 4
4058
39
  // Wd<32+7-28,32-28> = Wn<7:0> <- clamp s to 7
4059
39
  // 0b1111_1111_1111_1111__1111_1010_1010_0000 sext
4060
39
  // 0b0000_0000_0000_0000__0000_0101_0101_0000 sext | zext
4061
39
  // 0b0000_0000_0000_0000__0000_1010_1010_0000 zext
4062
39
4063
39
  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4064
39
  // %2 = shl i16 %1, 8
4065
39
  // Wd<32+7-24,32-24> = Wn<7:0>
4066
39
  // 0b1111_1111_1111_1111__1010_1010_0000_0000 sext
4067
39
  // 0b0000_0000_0000_0000__0101_0101_0000_0000 sext | zext
4068
39
  // 0b0000_0000_0000_0000__1010_1010_0000_0000 zext
4069
39
4070
39
  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4071
39
  // %2 = shl i16 %1, 12
4072
39
  // Wd<32+3-20,32-20> = Wn<3:0>
4073
39
  // 0b1111_1111_1111_1111__1010_0000_0000_0000 sext
4074
39
  // 0b0000_0000_0000_0000__0101_0000_0000_0000 sext | zext
4075
39
  // 0b0000_0000_0000_0000__1010_0000_0000_0000 zext
4076
39
4077
39
  unsigned ImmR = RegSize - Shift;
4078
39
  // Limit the width to the length of the source type.
4079
39
  unsigned ImmS = std::min<unsigned>(SrcBits - 1, DstBits - 1 - Shift);
4080
39
  static const unsigned OpcTable[2][2] = {
4081
39
    {AArch64::SBFMWri, AArch64::SBFMXri},
4082
39
    {AArch64::UBFMWri, AArch64::UBFMXri}
4083
39
  };
4084
39
  unsigned Opc = OpcTable[IsZExt][Is64Bit];
4085
39
  if (
SrcVT.SimpleTy <= MVT::i32 && 39
RetVT == MVT::i6426
) {
4086
10
    unsigned TmpReg = MRI.createVirtualRegister(RC);
4087
10
    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4088
10
            TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4089
10
        .addImm(0)
4090
10
        .addReg(Op0, getKillRegState(Op0IsKill))
4091
10
        .addImm(AArch64::sub_32);
4092
10
    Op0 = TmpReg;
4093
10
    Op0IsKill = true;
4094
10
  }
4095
55
  return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
4096
55
}
4097
4098
unsigned AArch64FastISel::emitLSR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
4099
4
                                     unsigned Op1Reg, bool Op1IsKill) {
4100
4
  unsigned Opc = 0;
4101
4
  bool NeedTrunc = false;
4102
4
  uint64_t Mask = 0;
4103
4
  switch (RetVT.SimpleTy) {
4104
0
  default: return 0;
4105
1
  case MVT::i8:  Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xff;   break;
4106
1
  case MVT::i16: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xffff; break;
4107
1
  case MVT::i32: Opc = AArch64::LSRVWr; break;
4108
1
  case MVT::i64: Opc = AArch64::LSRVXr; break;
4109
4
  }
4110
4
4111
4
  const TargetRegisterClass *RC =
4112
4
      (RetVT == MVT::i64) ? 
&AArch64::GPR64RegClass1
:
&AArch64::GPR32RegClass3
;
4113
4
  if (
NeedTrunc4
) {
4114
2
    Op0Reg = emitAnd_ri(MVT::i32, Op0Reg, Op0IsKill, Mask);
4115
2
    Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
4116
2
    Op0IsKill = Op1IsKill = true;
4117
2
  }
4118
4
  unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
4119
4
                                       Op1IsKill);
4120
4
  if (NeedTrunc)
4121
2
    ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
4122
4
  return ResultReg;
4123
4
}
4124
4125
unsigned AArch64FastISel::emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4126
                                     bool Op0IsKill, uint64_t Shift,
4127
27
                                     bool IsZExt) {
4128
27
  assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4129
27
         "Unexpected source/return type pair.");
4130
27
  assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4131
27
          SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4132
27
         "Unexpected source value type.");
4133
27
  assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4134
27
          RetVT == MVT::i64) && "Unexpected return value type.");
4135
27
4136
27
  bool Is64Bit = (RetVT == MVT::i64);
4137
27
  unsigned RegSize = Is64Bit ? 
6414
:
3213
;
4138
27
  unsigned DstBits = RetVT.getSizeInBits();
4139
27
  unsigned SrcBits = SrcVT.getSizeInBits();
4140
27
  const TargetRegisterClass *RC =
4141
27
      Is64Bit ? 
&AArch64::GPR64RegClass14
:
&AArch64::GPR32RegClass13
;
4142
27
4143
27
  // Just emit a copy for "zero" shifts.
4144
27
  if (
Shift == 027
) {
4145
2
    if (
RetVT == SrcVT2
) {
4146
1
      unsigned ResultReg = createResultReg(RC);
4147
1
      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4148
1
              TII.get(TargetOpcode::COPY), ResultReg)
4149
1
      .addReg(Op0, getKillRegState(Op0IsKill));
4150
1
      return ResultReg;
4151
1
    } else
4152
1
      return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4153
25
  }
4154
25
4155
25
  // Don't deal with undefined shifts.
4156
25
  
if (25
Shift >= DstBits25
)
4157
0
    return 0;
4158
25
4159
25
  // For immediate shifts we can fold the zero-/sign-extension into the shift.
4160
25
  // {S|U}BFM Wd, Wn, #r, #s
4161
25
  // Wd<s-r:0> = Wn<s:r> when r <= s
4162
25
4163
25
  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4164
25
  // %2 = lshr i16 %1, 4
4165
25
  // Wd<7-4:0> = Wn<7:4>
4166
25
  // 0b0000_0000_0000_0000__0000_1111_1111_1010 sext
4167
25
  // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
4168
25
  // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
4169
25
4170
25
  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4171
25
  // %2 = lshr i16 %1, 8
4172
25
  // Wd<7-7,0> = Wn<7:7>
4173
25
  // 0b0000_0000_0000_0000__0000_0000_1111_1111 sext
4174
25
  // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4175
25
  // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4176
25
4177
25
  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4178
25
  // %2 = lshr i16 %1, 12
4179
25
  // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
4180
25
  // 0b0000_0000_0000_0000__0000_0000_0000_1111 sext
4181
25
  // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4182
25
  // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4183
25
4184
25
  
if (25
Shift >= SrcBits && 25
IsZExt5
)
4185
3
    return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
4186
22
4187
22
  // It is not possible to fold a sign-extend into the LShr instruction. In this
4188
22
  // case emit a sign-extend.
4189
22
  
if (22
!IsZExt22
) {
4190
4
    Op0 = emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4191
4
    if (!Op0)
4192
0
      return 0;
4193
4
    Op0IsKill = true;
4194
4
    SrcVT = RetVT;
4195
4
    SrcBits = SrcVT.getSizeInBits();
4196
4
    IsZExt = true;
4197
4
  }
4198
22
4199
22
  unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
4200
22
  unsigned ImmS = SrcBits - 1;
4201
22
  static const unsigned OpcTable[2][2] = {
4202
22
    {AArch64::SBFMWri, AArch64::SBFMXri},
4203
22
    {AArch64::UBFMWri, AArch64::UBFMXri}
4204
22
  };
4205
22
  unsigned Opc = OpcTable[IsZExt][Is64Bit];
4206
22
  if (
SrcVT.SimpleTy <= MVT::i32 && 22
RetVT == MVT::i6410
) {
4207
0
    unsigned TmpReg = MRI.createVirtualRegister(RC);
4208
0
    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4209
0
            TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4210
0
        .addImm(0)
4211
0
        .addReg(Op0, getKillRegState(Op0IsKill))
4212
0
        .addImm(AArch64::sub_32);
4213
0
    Op0 = TmpReg;
4214
0
    Op0IsKill = true;
4215
0
  }
4216
22
  return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
4217
27
}
4218
4219
unsigned AArch64FastISel::emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
4220
4
                                     unsigned Op1Reg, bool Op1IsKill) {
4221
4
  unsigned Opc = 0;
4222
4
  bool NeedTrunc = false;
4223
4
  uint64_t Mask = 0;
4224
4
  switch (RetVT.SimpleTy) {
4225
0
  default: return 0;
4226
1
  case MVT::i8:  Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xff;   break;
4227
1
  case MVT::i16: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xffff; break;
4228
1
  case MVT::i32: Opc = AArch64::ASRVWr;                                  break;
4229
1
  case MVT::i64: Opc = AArch64::ASRVXr;                                  break;
4230
4
  }
4231
4
4232
4
  const TargetRegisterClass *RC =
4233
4
      (RetVT == MVT::i64) ? 
&AArch64::GPR64RegClass1
:
&AArch64::GPR32RegClass3
;
4234
4
  if (
NeedTrunc4
) {
4235
2
    Op0Reg = emitIntExt(RetVT, Op0Reg, MVT::i32, /*IsZExt=*/false);
4236
2
    Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
4237
2
    Op0IsKill = Op1IsKill = true;
4238
2
  }
4239
4
  unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
4240
4
                                       Op1IsKill);
4241
4
  if (NeedTrunc)
4242
2
    ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
4243
4
  return ResultReg;
4244
4
}
4245
4246
unsigned AArch64FastISel::emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4247
                                     bool Op0IsKill, uint64_t Shift,
4248
29
                                     bool IsZExt) {
4249
29
  assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4250
29
         "Unexpected source/return type pair.");
4251
29
  assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4252
29
          SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4253
29
         "Unexpected source value type.");
4254
29
  assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4255
29
          RetVT == MVT::i64) && "Unexpected return value type.");
4256
29
4257
29
  bool Is64Bit = (RetVT == MVT::i64);
4258
29
  unsigned RegSize = Is64Bit ? 
6412
:
3217
;
4259
29
  unsigned DstBits = RetVT.getSizeInBits();
4260
29
  unsigned SrcBits = SrcVT.getSizeInBits();
4261
29
  const TargetRegisterClass *RC =
4262
29
      Is64Bit ? 
&AArch64::GPR64RegClass12
:
&AArch64::GPR32RegClass17
;
4263
29
4264
29
  // Just emit a copy for "zero" shifts.
4265
29
  if (
Shift == 029
) {
4266
2
    if (
RetVT == SrcVT2
) {
4267
1
      unsigned ResultReg = createResultReg(RC);
4268
1
      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4269
1
              TII.get(TargetOpcode::COPY), ResultReg)
4270
1
      .addReg(Op0, getKillRegState(Op0IsKill));
4271
1
      return ResultReg;
4272
1
    } else
4273
1
      return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4274
27
  }
4275
27
4276
27
  // Don't deal with undefined shifts.
4277
27
  
if (27
Shift >= DstBits27
)
4278
0
    return 0;
4279
27
4280
27
  // For immediate shifts we can fold the zero-/sign-extension into the shift.
4281
27
  // {S|U}BFM Wd, Wn, #r, #s
4282
27
  // Wd<s-r:0> = Wn<s:r> when r <= s
4283
27
4284
27
  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4285
27
  // %2 = ashr i16 %1, 4
4286
27
  // Wd<7-4:0> = Wn<7:4>
4287
27
  // 0b1111_1111_1111_1111__1111_1111_1111_1010 sext
4288
27
  // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
4289
27
  // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
4290
27
4291
27
  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4292
27
  // %2 = ashr i16 %1, 8
4293
27
  // Wd<7-7,0> = Wn<7:7>
4294
27
  // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
4295
27
  // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4296
27
  // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4297
27
4298
27
  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4299
27
  // %2 = ashr i16 %1, 12
4300
27
  // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
4301
27
  // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
4302
27
  // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4303
27
  // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4304
27
4305
27
  
if (27
Shift >= SrcBits && 27
IsZExt6
)
4306
3
    return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
4307
24
4308
24
  unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
4309
24
  unsigned ImmS = SrcBits - 1;
4310
24
  static const unsigned OpcTable[2][2] = {
4311
24
    {AArch64::SBFMWri, AArch64::SBFMXri},
4312
24
    {AArch64::UBFMWri, AArch64::UBFMXri}
4313
24
  };
4314
24
  unsigned Opc = OpcTable[IsZExt][Is64Bit];
4315
24
  if (
SrcVT.SimpleTy <= MVT::i32 && 24
RetVT == MVT::i6415
) {
4316
1
    unsigned TmpReg = MRI.createVirtualRegister(RC);
4317
1
    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4318
1
            TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4319
1
        .addImm(0)
4320
1
        .addReg(Op0, getKillRegState(Op0IsKill))
4321
1
        .addImm(AArch64::sub_32);
4322
1
    Op0 = TmpReg;
4323
1
    Op0IsKill = true;
4324
1
  }
4325
29
  return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
4326
29
}
4327
4328
unsigned AArch64FastISel::emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
4329
443
                                     bool IsZExt) {
4330
443
  assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?");
4331
443
4332
443
  // FastISel does not have plumbing to deal with extensions where the SrcVT or
4333
443
  // DestVT are odd things, so test to make sure that they are both types we can
4334
443
  // handle (i1/i8/i16/i32 for SrcVT and i8/i16/i32/i64 for DestVT), otherwise
4335
443
  // bail out to SelectionDAG.
4336
443
  if (
((DestVT != MVT::i8) && 443
(DestVT != MVT::i16)442
&&
4337
443
       
(DestVT != MVT::i32)437
&&
(DestVT != MVT::i64)74
) ||
4338
443
      
((SrcVT != MVT::i1) && 443
(SrcVT != MVT::i8)302
&&
4339
443
       
(SrcVT != MVT::i16)143
&&
(SrcVT != MVT::i32)43
))
4340
0
    return 0;
4341
443
4342
443
  unsigned Opc;
4343
443
  unsigned Imm = 0;
4344
443
4345
443
  switch (SrcVT.SimpleTy) {
4346
0
  default:
4347
0
    return 0;
4348
141
  case MVT::i1:
4349
141
    return emiti1Ext(SrcReg, DestVT, IsZExt);
4350
159
  case MVT::i8:
4351
159
    if (DestVT == MVT::i64)
4352
16
      
Opc = IsZExt ? 16
AArch64::UBFMXri7
:
AArch64::SBFMXri9
;
4353
159
    else
4354
143
      
Opc = IsZExt ? 143
AArch64::UBFMWri102
:
AArch64::SBFMWri41
;
4355
159
    Imm = 7;
4356
159
    break;
4357
100
  case MVT::i16:
4358
100
    if (DestVT == MVT::i64)
4359
15
      
Opc = IsZExt ? 15
AArch64::UBFMXri7
:
AArch64::SBFMXri8
;
4360
100
    else
4361
85
      
Opc = IsZExt ? 85
AArch64::UBFMWri53
:
AArch64::SBFMWri32
;
4362
100
    Imm = 15;
4363
100
    break;
4364
43
  case MVT::i32:
4365
43
    assert(DestVT == MVT::i64 && "IntExt i32 to i32?!?");
4366
43
    Opc = IsZExt ? 
AArch64::UBFMXri19
:
AArch64::SBFMXri24
;
4367
43
    Imm = 31;
4368
43
    break;
4369
302
  }
4370
302
4371
302
  // Handle i8 and i16 as i32.
4372
302
  
if (302
DestVT == MVT::i8 || 302
DestVT == MVT::i16302
)
4373
3
    DestVT = MVT::i32;
4374
299
  else 
if (299
DestVT == MVT::i64299
) {
4375
74
    unsigned Src64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
4376
74
    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4377
74
            TII.get(AArch64::SUBREG_TO_REG), Src64)
4378
74
        .addImm(0)
4379
74
        .addReg(SrcReg)
4380
74
        .addImm(AArch64::sub_32);
4381
74
    SrcReg = Src64;
4382
74
  }
4383
302
4384
302
  const TargetRegisterClass *RC =
4385
302
      (DestVT == MVT::i64) ? 
&AArch64::GPR64RegClass74
:
&AArch64::GPR32RegClass228
;
4386
443
  return fastEmitInst_rii(Opc, RC, SrcReg, /*TODO:IsKill=*/false, 0, Imm);
4387
443
}
4388
4389
1
static bool isZExtLoad(const MachineInstr *LI) {
4390
1
  switch (LI->getOpcode()) {
4391
0
  default:
4392
0
    return false;
4393
1
  case AArch64::LDURBBi:
4394
1
  case AArch64::LDURHHi:
4395
1
  case AArch64::LDURWi:
4396
1
  case AArch64::LDRBBui:
4397
1
  case AArch64::LDRHHui:
4398
1
  case AArch64::LDRWui:
4399
1
  case AArch64::LDRBBroX:
4400
1
  case AArch64::LDRHHroX:
4401
1
  case AArch64::LDRWroX:
4402
1
  case AArch64::LDRBBroW:
4403
1
  case AArch64::LDRHHroW:
4404
1
  case AArch64::LDRWroW:
4405
1
    return true;
4406
0
  }
4407
0
}
4408
4409
0
static bool isSExtLoad(const MachineInstr *LI) {
4410
0
  switch (LI->getOpcode()) {
4411
0
  default:
4412
0
    return false;
4413
0
  case AArch64::LDURSBWi:
4414
0
  case AArch64::LDURSHWi:
4415
0
  case AArch64::LDURSBXi:
4416
0
  case AArch64::LDURSHXi:
4417
0
  case AArch64::LDURSWi:
4418
0
  case AArch64::LDRSBWui:
4419
0
  case AArch64::LDRSHWui:
4420
0
  case AArch64::LDRSBXui:
4421
0
  case AArch64::LDRSHXui:
4422
0
  case AArch64::LDRSWui:
4423
0
  case AArch64::LDRSBWroX:
4424
0
  case AArch64::LDRSHWroX:
4425
0
  case AArch64::LDRSBXroX:
4426
0
  case AArch64::LDRSHXroX:
4427
0
  case AArch64::LDRSWroX:
4428
0
  case AArch64::LDRSBWroW:
4429
0
  case AArch64::LDRSHWroW:
4430
0
  case AArch64::LDRSBXroW:
4431
0
  case AArch64::LDRSHXroW:
4432
0
  case AArch64::LDRSWroW:
4433
0
    return true;
4434
0
  }
4435
0
}
4436
4437
bool AArch64FastISel::optimizeIntExtLoad(const Instruction *I, MVT RetVT,
4438
221
                                         MVT SrcVT) {
4439
221
  const auto *LI = dyn_cast<LoadInst>(I->getOperand(0));
4440
221
  if (
!LI || 221
!LI->hasOneUse()111
)
4441
110
    return false;
4442
111
4443
111
  // Check if the load instruction has already been selected.
4444
111
  unsigned Reg = lookUpRegForValue(LI);
4445
111
  if (!Reg)
4446
110
    return false;
4447
1
4448
1
  MachineInstr *MI = MRI.getUniqueVRegDef(Reg);
4449
1
  if (!MI)
4450
0
    return false;
4451
1
4452
1
  // Check if the correct load instruction has been emitted - SelectionDAG might
4453
1
  // have emitted a zero-extending load, but we need a sign-extending load.
4454
1
  bool IsZExt = isa<ZExtInst>(I);
4455
1
  const auto *LoadMI = MI;
4456
1
  if (LoadMI->getOpcode() == TargetOpcode::COPY &&
4457
1
      
LoadMI->getOperand(1).getSubReg() == AArch64::sub_320
) {
4458
0
    unsigned LoadReg = MI->getOperand(1).getReg();
4459
0
    LoadMI = MRI.getUniqueVRegDef(LoadReg);
4460
0
    assert(LoadMI && "Expected valid instruction");
4461
0
  }
4462
1
  if (
!(IsZExt && 1
isZExtLoad(LoadMI)1
) &&
!(!IsZExt && 0
isSExtLoad(LoadMI)0
))
4463
0
    return false;
4464
1
4465
1
  // Nothing to be done.
4466
1
  
if (1
RetVT != MVT::i64 || 1
SrcVT > MVT::i321
) {
4467
0
    updateValueMap(I, Reg);
4468
0
    return true;
4469
0
  }
4470
1
4471
1
  
if (1
IsZExt1
) {
4472
1
    unsigned Reg64 = createResultReg(&AArch64::GPR64RegClass);
4473
1
    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4474
1
            TII.get(AArch64::SUBREG_TO_REG), Reg64)
4475
1
        .addImm(0)
4476
1
        .addReg(Reg, getKillRegState(true))
4477
1
        .addImm(AArch64::sub_32);
4478
1
    Reg = Reg64;
4479
1
  } else {
4480
0
    assert((MI->getOpcode() == TargetOpcode::COPY &&
4481
0
            MI->getOperand(1).getSubReg() == AArch64::sub_32) &&
4482
0
           "Expected copy instruction");
4483
0
    Reg = MI->getOperand(1).getReg();
4484
0
    MI->eraseFromParent();
4485
0
  }
4486
221
  updateValueMap(I, Reg);
4487
221
  return true;
4488
221
}
4489
4490
228
bool AArch64FastISel::selectIntExt(const Instruction *I) {
4491
228
  assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
4492
228
         "Unexpected integer extend instruction.");
4493
228
  MVT RetVT;
4494
228
  MVT SrcVT;
4495
228
  if (!isTypeSupported(I->getType(), RetVT))
4496
7
    return false;
4497
221
4498
221
  
if (221
!isTypeSupported(I->getOperand(0)->getType(), SrcVT)221
)
4499
0
    return false;
4500
221
4501
221
  // Try to optimize already sign-/zero-extended values from load instructions.
4502
221
  
if (221
optimizeIntExtLoad(I, RetVT, SrcVT)221
)
4503
1
    return true;
4504
220
4505
220
  unsigned SrcReg = getRegForValue(I->getOperand(0));
4506
220
  if (!SrcReg)
4507
0
    return false;
4508
220
  bool SrcIsKill = hasTrivialKill(I->getOperand(0));
4509
220
4510
220
  // Try to optimize already sign-/zero-extended values from function arguments.
4511
220
  bool IsZExt = isa<ZExtInst>(I);
4512
220
  if (const auto *
Arg220
= dyn_cast<Argument>(I->getOperand(0))) {
4513
78
    if (
(IsZExt && 78
Arg->hasZExtAttr()38
) ||
(!IsZExt && 47
Arg->hasSExtAttr()40
)) {
4514
66
      if (
RetVT == MVT::i64 && 66
SrcVT != MVT::i6413
) {
4515
13
        unsigned ResultReg = createResultReg(&AArch64::GPR64RegClass);
4516
13
        BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4517
13
                TII.get(AArch64::SUBREG_TO_REG), ResultReg)
4518
13
            .addImm(0)
4519
13
            .addReg(SrcReg, getKillRegState(SrcIsKill))
4520
13
            .addImm(AArch64::sub_32);
4521
13
        SrcReg = ResultReg;
4522
13
      }
4523
66
      // Conservatively clear all kill flags from all uses, because we are
4524
66
      // replacing a sign-/zero-extend instruction at IR level with a nop at MI
4525
66
      // level. The result of the instruction at IR level might have been
4526
66
      // trivially dead, which is now not longer true.
4527
66
      unsigned UseReg = lookUpRegForValue(I);
4528
66
      if (UseReg)
4529
66
        MRI.clearKillFlags(UseReg);
4530
66
4531
66
      updateValueMap(I, SrcReg);
4532
66
      return true;
4533
66
    }
4534
154
  }
4535
154
4536
154
  unsigned ResultReg = emitIntExt(SrcVT, SrcReg, RetVT, IsZExt);
4537
154
  if (!ResultReg)
4538
0
    return false;
4539
154
4540
154
  updateValueMap(I, ResultReg);
4541
154
  return true;
4542
154
}
4543
4544
8
bool AArch64FastISel::selectRem(const Instruction *I, unsigned ISDOpcode) {
4545
8
  EVT DestEVT = TLI.getValueType(DL, I->getType(), true);
4546
8
  if (!DestEVT.isSimple())
4547
0
    return false;
4548
8
4549
8
  MVT DestVT = DestEVT.getSimpleVT();
4550
8
  if (
DestVT != MVT::i64 && 8
DestVT != MVT::i324
)
4551
0
    return false;
4552
8
4553
8
  unsigned DivOpc;
4554
8
  bool Is64bit = (DestVT == MVT::i64);
4555
8
  switch (ISDOpcode) {
4556
0
  default:
4557
0
    return false;
4558
4
  case ISD::SREM:
4559
4
    DivOpc = Is64bit ? 
AArch64::SDIVXr2
:
AArch64::SDIVWr2
;
4560
4
    break;
4561
4
  case ISD::UREM:
4562
4
    DivOpc = Is64bit ? 
AArch64::UDIVXr2
:
AArch64::UDIVWr2
;
4563
4
    break;
4564
8
  }
4565
8
  
unsigned MSubOpc = Is64bit ? 8
AArch64::MSUBXrrr4
:
AArch64::MSUBWrrr4
;
4566
8
  unsigned Src0Reg = getRegForValue(I->getOperand(0));
4567
8
  if (!Src0Reg)
4568
0
    return false;
4569
8
  bool Src0IsKill = hasTrivialKill(I->getOperand(0));
4570
8
4571
8
  unsigned Src1Reg = getRegForValue(I->getOperand(1));
4572
8
  if (!Src1Reg)
4573
0
    return false;
4574
8
  bool Src1IsKill = hasTrivialKill(I->getOperand(1));
4575
8
4576
8
  const TargetRegisterClass *RC =
4577
8
      (DestVT == MVT::i64) ? 
&AArch64::GPR64RegClass4
:
&AArch64::GPR32RegClass4
;
4578
8
  unsigned QuotReg = fastEmitInst_rr(DivOpc, RC, Src0Reg, /*IsKill=*/false,
4579
8
                                     Src1Reg, /*IsKill=*/false);
4580
8
  assert(QuotReg && "Unexpected DIV instruction emission failure.");
4581
8
  // The remainder is computed as numerator - (quotient * denominator) using the
4582
8
  // MSUB instruction.
4583
8
  unsigned ResultReg = fastEmitInst_rrr(MSubOpc, RC, QuotReg, /*IsKill=*/true,
4584
8
                                        Src1Reg, Src1IsKill, Src0Reg,
4585
8
                                        Src0IsKill);
4586
8
  updateValueMap(I, ResultReg);
4587
8
  return true;
4588
8
}
4589
4590
10
bool AArch64FastISel::selectMul(const Instruction *I) {
4591
10
  MVT VT;
4592
10
  if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
4593
0
    return false;
4594
10
4595
10
  
if (10
VT.isVector()10
)
4596
0
    return selectBinaryOp(I, ISD::MUL);
4597
10
4598
10
  const Value *Src0 = I->getOperand(0);
4599
10
  const Value *Src1 = I->getOperand(1);
4600
10
  if (const auto *C = dyn_cast<ConstantInt>(Src0))
4601
0
    
if (0
C->getValue().isPowerOf2()0
)
4602
0
      std::swap(Src0, Src1);
4603
10
4604
10
  // Try to simplify to a shift instruction.
4605
10
  if (const auto *C = dyn_cast<ConstantInt>(Src1))
4606
4
    
if (4
C->getValue().isPowerOf2()4
) {
4607
2
      uint64_t ShiftVal = C->getValue().logBase2();
4608
2
      MVT SrcVT = VT;
4609
2
      bool IsZExt = true;
4610
2
      if (const auto *
ZExt2
= dyn_cast<ZExtInst>(Src0)) {
4611
0
        if (
!isIntExtFree(ZExt)0
) {
4612
0
          MVT VT;
4613
0
          if (
isValueAvailable(ZExt) && 0
isTypeSupported(ZExt->getSrcTy(), VT)0
) {
4614
0
            SrcVT = VT;
4615
0
            IsZExt = true;
4616
0
            Src0 = ZExt->getOperand(0);
4617
0
          }
4618
0
        }
4619
2
      } else 
if (const auto *2
SExt2
= dyn_cast<SExtInst>(Src0)) {
4620
0
        if (
!isIntExtFree(SExt)0
) {
4621
0
          MVT VT;
4622
0
          if (
isValueAvailable(SExt) && 0
isTypeSupported(SExt->getSrcTy(), VT)0
) {
4623
0
            SrcVT = VT;
4624
0
            IsZExt = false;
4625
0
            Src0 = SExt->getOperand(0);
4626
0
          }
4627
0
        }
4628
2
      }
4629
2
4630
2
      unsigned Src0Reg = getRegForValue(Src0);
4631
2
      if (!Src0Reg)
4632
0
        return false;
4633
2
      bool Src0IsKill = hasTrivialKill(Src0);
4634
2
4635
2
      unsigned ResultReg =
4636
2
          emitLSL_ri(VT, SrcVT, Src0Reg, Src0IsKill, ShiftVal, IsZExt);
4637
2
4638
2
      if (
ResultReg2
) {
4639
2
        updateValueMap(I, ResultReg);
4640
2
        return true;
4641
2
      }
4642
8
    }
4643
8
4644
8
  unsigned Src0Reg = getRegForValue(I->getOperand(0));
4645
8
  if (!Src0Reg)
4646
0
    return false;
4647
8
  bool Src0IsKill = hasTrivialKill(I->getOperand(0));
4648
8
4649
8
  unsigned Src1Reg = getRegForValue(I->getOperand(1));
4650
8
  if (!Src1Reg)
4651
0
    return false;
4652
8
  bool Src1IsKill = hasTrivialKill(I->getOperand(1));
4653
8
4654
8
  unsigned ResultReg = emitMul_rr(VT, Src0Reg, Src0IsKill, Src1Reg, Src1IsKill);
4655
8
4656
8
  if (!ResultReg)
4657
0
    return false;
4658
8
4659
8
  updateValueMap(I, ResultReg);
4660
8
  return true;
4661
8
}
4662
4663
105
bool AArch64FastISel::selectShift(const Instruction *I) {
4664
105
  MVT RetVT;
4665
105
  if (!isTypeSupported(I->getType(), RetVT, /*IsVectorAllowed=*/true))
4666
0
    return false;
4667
105
4668
105
  
if (105
RetVT.isVector()105
)
4669
0
    return selectOperator(I, I->getOpcode());
4670
105
4671
105
  
if (const auto *105
C105
= dyn_cast<ConstantInt>(I->getOperand(1))) {
4672
93
    unsigned ResultReg = 0;
4673
93
    uint64_t ShiftVal = C->getZExtValue();
4674
93
    MVT SrcVT = RetVT;
4675
93
    bool IsZExt = I->getOpcode() != Instruction::AShr;
4676
93
    const Value *Op0 = I->getOperand(0);
4677
93
    if (const auto *
ZExt93
= dyn_cast<ZExtInst>(Op0)) {
4678
22
      if (
!isIntExtFree(ZExt)22
) {
4679
22
        MVT TmpVT;
4680
22
        if (
isValueAvailable(ZExt) && 22
isTypeSupported(ZExt->getSrcTy(), TmpVT)22
) {
4681
22
          SrcVT = TmpVT;
4682
22
          IsZExt = true;
4683
22
          Op0 = ZExt->getOperand(0);
4684
22
        }
4685
22
      }
4686
93
    } else 
if (const auto *71
SExt71
= dyn_cast<SExtInst>(Op0)) {
4687
20
      if (
!isIntExtFree(SExt)20
) {
4688
20
        MVT TmpVT;
4689
20
        if (
isValueAvailable(SExt) && 20
isTypeSupported(SExt->getSrcTy(), TmpVT)19
) {
4690
19
          SrcVT = TmpVT;
4691
19
          IsZExt = false;
4692
19
          Op0 = SExt->getOperand(0);
4693
19
        }
4694
20
      }
4695
71
    }
4696
93
4697
93
    unsigned Op0Reg = getRegForValue(Op0);
4698
93
    if (!Op0Reg)
4699
0
      return false;
4700
93
    bool Op0IsKill = hasTrivialKill(Op0);
4701
93
4702
93
    switch (I->getOpcode()) {
4703
0
    
default: 0
llvm_unreachable0
("Unexpected instruction.");
4704
50
    case Instruction::Shl:
4705
50
      ResultReg = emitLSL_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
4706
50
      break;
4707
19
    case Instruction::AShr:
4708
19
      ResultReg = emitASR_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
4709
19
      break;
4710
24
    case Instruction::LShr:
4711
24
      ResultReg = emitLSR_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
4712
24
      break;
4713
93
    }
4714
93
    
if (93
!ResultReg93
)
4715
14
      return false;
4716
79
4717
79
    updateValueMap(I, ResultReg);
4718
79
    return true;
4719
79
  }
4720
12
4721
12
  unsigned Op0Reg = getRegForValue(I->getOperand(0));
4722
12
  if (!Op0Reg)
4723
0
    return false;
4724
12
  bool Op0IsKill = hasTrivialKill(I->getOperand(0));
4725
12
4726
12
  unsigned Op1Reg = getRegForValue(I->getOperand(1));
4727
12
  if (!Op1Reg)
4728
0
    return false;
4729
12
  bool Op1IsKill = hasTrivialKill(I->getOperand(1));
4730
12
4731
12
  unsigned ResultReg = 0;
4732
12
  switch (I->getOpcode()) {
4733
0
  
default: 0
llvm_unreachable0
("Unexpected instruction.");
4734
4
  case Instruction::Shl:
4735
4
    ResultReg = emitLSL_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
4736
4
    break;
4737
4
  case Instruction::AShr:
4738
4
    ResultReg = emitASR_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
4739
4
    break;
4740
4
  case Instruction::LShr:
4741
4
    ResultReg = emitLSR_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
4742
4
    break;
4743
12
  }
4744
12
4745
12
  
if (12
!ResultReg12
)
4746
0
    return false;
4747
12
4748
12
  updateValueMap(I, ResultReg);
4749
12
  return true;
4750
12
}
4751
4752
22
bool AArch64FastISel::selectBitCast(const Instruction *I) {
4753
22
  MVT RetVT, SrcVT;
4754
22
4755
22
  if (!isTypeLegal(I->getOperand(0)->getType(), SrcVT))
4756
0
    return false;
4757
22
  
if (22
!isTypeLegal(I->getType(), RetVT)22
)
4758
0
    return false;
4759
22
4760
22
  unsigned Opc;
4761
22
  if (
RetVT == MVT::f32 && 22
SrcVT == MVT::i321
)
4762
1
    Opc = AArch64::FMOVWSr;
4763
21
  else 
if (21
RetVT == MVT::f64 && 21
SrcVT == MVT::i645
)
4764
3
    Opc = AArch64::FMOVXDr;
4765
18
  else 
if (18
RetVT == MVT::i32 && 18
SrcVT == MVT::f321
)
4766
1
    Opc = AArch64::FMOVSWr;
4767
17
  else 
if (17
RetVT == MVT::i64 && 17
SrcVT == MVT::f6413
)
4768
3
    Opc = AArch64::FMOVDXr;
4769
17
  else
4770
14
    return false;
4771
8
4772
8
  const TargetRegisterClass *RC = nullptr;
4773
8
  switch (RetVT.SimpleTy) {
4774
0
  
default: 0
llvm_unreachable0
("Unexpected value type.");
4775
1
  case MVT::i32: RC = &AArch64::GPR32RegClass; break;
4776
3
  case MVT::i64: RC = &AArch64::GPR64RegClass; break;
4777
1
  case MVT::f32: RC = &AArch64::FPR32RegClass; break;
4778
3
  case MVT::f64: RC = &AArch64::FPR64RegClass; break;
4779
8
  }
4780
8
  unsigned Op0Reg = getRegForValue(I->getOperand(0));
4781
8
  if (!Op0Reg)
4782
0
    return false;
4783
8
  bool Op0IsKill = hasTrivialKill(I->getOperand(0));
4784
8
  unsigned ResultReg = fastEmitInst_r(Opc, RC, Op0Reg, Op0IsKill);
4785
8
4786
8
  if (!ResultReg)
4787
0
    return false;
4788
8
4789
8
  updateValueMap(I, ResultReg);
4790
8
  return true;
4791
8
}
4792
4793
4
bool AArch64FastISel::selectFRem(const Instruction *I) {
4794
4
  MVT RetVT;
4795
4
  if (!isTypeLegal(I->getType(), RetVT))
4796
0
    return false;
4797
4
4798
4
  RTLIB::Libcall LC;
4799
4
  switch (RetVT.SimpleTy) {
4800
0
  default:
4801
0
    return false;
4802
2
  case MVT::f32:
4803
2
    LC = RTLIB::REM_F32;
4804
2
    break;
4805
2
  case MVT::f64:
4806
2
    LC = RTLIB::REM_F64;
4807
2
    break;
4808
4
  }
4809
4
4810
4
  ArgListTy Args;
4811
4
  Args.reserve(I->getNumOperands());
4812
4
4813
4
  // Populate the argument list.
4814
8
  for (auto &Arg : I->operands()) {
4815
8
    ArgListEntry Entry;
4816
8
    Entry.Val = Arg;
4817
8
    Entry.Ty = Arg->getType();
4818
8
    Args.push_back(Entry);
4819
8
  }
4820
4
4821
4
  CallLoweringInfo CLI;
4822
4
  MCContext &Ctx = MF->getContext();
4823
4
  CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), I->getType(),
4824
4
                TLI.getLibcallName(LC), std::move(Args));
4825
4
  if (!lowerCallTo(CLI))
4826
0
    return false;
4827
4
  updateValueMap(I, CLI.ResultReg);
4828
4
  return true;
4829
4
}
4830
4831
14
bool AArch64FastISel::selectSDiv(const Instruction *I) {
4832
14
  MVT VT;
4833
14
  if (!isTypeLegal(I->getType(), VT))
4834
0
    return false;
4835
14
4836
14
  
if (14
!isa<ConstantInt>(I->getOperand(1))14
)
4837
0
    return selectBinaryOp(I, ISD::SDIV);
4838
14
4839
14
  const APInt &C = cast<ConstantInt>(I->getOperand(1))->getValue();
4840
14
  if (
(VT != MVT::i32 && 14
VT != MVT::i648
) ||
!C14
||
4841
14
      
!(C.isPowerOf2() || 14
(-C).isPowerOf2()4
))
4842
0
    return selectBinaryOp(I, ISD::SDIV);
4843
14
4844
14
  unsigned Lg2 = C.countTrailingZeros();
4845
14
  unsigned Src0Reg = getRegForValue(I->getOperand(0));
4846
14
  if (!Src0Reg)
4847
0
    return false;
4848
14
  bool Src0IsKill = hasTrivialKill(I->getOperand(0));
4849
14
4850
14
  if (
cast<BinaryOperator>(I)->isExact()14
) {
4851
3
    unsigned ResultReg = emitASR_ri(VT, VT, Src0Reg, Src0IsKill, Lg2);
4852
3
    if (!ResultReg)
4853
0
      return false;
4854
3
    updateValueMap(I, ResultReg);
4855
3
    return true;
4856
3
  }
4857
11
4858
11
  int64_t Pow2MinusOne = (1ULL << Lg2) - 1;
4859
11
  unsigned AddReg = emitAdd_ri_(VT, Src0Reg, /*IsKill=*/false, Pow2MinusOne);
4860
11
  if (!AddReg)
4861
0
    return false;
4862
11
4863
11
  // (Src0 < 0) ? Pow2 - 1 : 0;
4864
11
  
if (11
!emitICmp_ri(VT, Src0Reg, /*IsKill=*/false, 0)11
)
4865
0
    return false;
4866
11
4867
11
  unsigned SelectOpc;
4868
11
  const TargetRegisterClass *RC;
4869
11
  if (
VT == MVT::i6411
) {
4870
6
    SelectOpc = AArch64::CSELXr;
4871
6
    RC = &AArch64::GPR64RegClass;
4872
11
  } else {
4873
5
    SelectOpc = AArch64::CSELWr;
4874
5
    RC = &AArch64::GPR32RegClass;
4875
5
  }
4876
11
  unsigned SelectReg =
4877
11
      fastEmitInst_rri(SelectOpc, RC, AddReg, /*IsKill=*/true, Src0Reg,
4878
11
                       Src0IsKill, AArch64CC::LT);
4879
11
  if (!SelectReg)
4880
0
    return false;
4881
11
4882
11
  // Divide by Pow2 --> ashr. If we're dividing by a negative value we must also
4883
11
  // negate the result.
4884
11
  
unsigned ZeroReg = (VT == MVT::i64) ? 11
AArch64::XZR6
:
AArch64::WZR5
;
4885
11
  unsigned ResultReg;
4886
11
  if (C.isNegative())
4887
4
    ResultReg = emitAddSub_rs(/*UseAdd=*/false, VT, ZeroReg, /*IsKill=*/true,
4888
4
                              SelectReg, /*IsKill=*/true, AArch64_AM::ASR, Lg2);
4889
11
  else
4890
7
    ResultReg = emitASR_ri(VT, VT, SelectReg, /*IsKill=*/true, Lg2);
4891
11
4892
11
  if (!ResultReg)
4893
0
    return false;
4894
11
4895
11
  updateValueMap(I, ResultReg);
4896
11
  return true;
4897
11
}
4898
4899
/// This is mostly a copy of the existing FastISel getRegForGEPIndex code. We
4900
/// have to duplicate it for AArch64, because otherwise we would fail during the
4901
/// sign-extend emission.
4902
17
std::pair<unsigned, bool> AArch64FastISel::getRegForGEPIndex(const Value *Idx) {
4903
17
  unsigned IdxN = getRegForValue(Idx);
4904
17
  if (IdxN == 0)
4905
17
    // Unhandled operand. Halt "fast" selection and bail.
4906
0
    return std::pair<unsigned, bool>(0, false);
4907
17
4908
17
  bool IdxNIsKill = hasTrivialKill(Idx);
4909
17
4910
17
  // If the index is smaller or larger than intptr_t, truncate or extend it.
4911
17
  MVT PtrVT = TLI.getPointerTy(DL);
4912
17
  EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false);
4913
17
  if (
IdxVT.bitsLT(PtrVT)17
) {
4914
2
    IdxN = emitIntExt(IdxVT.getSimpleVT(), IdxN, PtrVT, /*IsZExt=*/false);
4915
2
    IdxNIsKill = true;
4916
17
  } else 
if (15
IdxVT.bitsGT(PtrVT)15
)
4917
0
    llvm_unreachable("AArch64 FastISel doesn't support types larger than i64");
4918
17
  return std::pair<unsigned, bool>(IdxN, IdxNIsKill);
4919
17
}
4920
4921
/// This is mostly a copy of the existing FastISel GEP code, but we have to
4922
/// duplicate it for AArch64, because otherwise we would bail out even for
4923
/// simple cases. This is because the standard fastEmit functions don't cover
4924
/// MUL at all and ADD is lowered very inefficientily.
4925
32
bool AArch64FastISel::selectGetElementPtr(const Instruction *I) {
4926
32
  unsigned N = getRegForValue(I->getOperand(0));
4927
32
  if (!N)
4928
0
    return false;
4929
32
  bool NIsKill = hasTrivialKill(I->getOperand(0));
4930
32
4931
32
  // Keep a running tab of the total offset to coalesce multiple N = N + Offset
4932
32
  // into a single N = N + TotalOffset.
4933
32
  uint64_t TotalOffs = 0;
4934
32
  MVT VT = TLI.getPointerTy(DL);
4935
32
  for (gep_type_iterator GTI = gep_type_begin(I), E = gep_type_end(I);
4936
80
       
GTI != E80
;
++GTI48
) {
4937
48
    const Value *Idx = GTI.getOperand();
4938
48
    if (auto *
StTy48
= GTI.getStructTypeOrNull()) {
4939
4
      unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
4940
4
      // N = N + Offset
4941
4
      if (Field)
4942
2
        TotalOffs += DL.getStructLayout(StTy)->getElementOffset(Field);
4943
48
    } else {
4944
44
      Type *Ty = GTI.getIndexedType();
4945
44
4946
44
      // If this is a constant subscript, handle it quickly.
4947
44
      if (const auto *
CI44
= dyn_cast<ConstantInt>(Idx)) {
4948
27
        if (CI->isZero())
4949
16
          continue;
4950
11
        // N = N + Offset
4951
11
        TotalOffs +=
4952
11
            DL.getTypeAllocSize(Ty) * cast<ConstantInt>(CI)->getSExtValue();
4953
11
        continue;
4954
11
      }
4955
17
      
if (17
TotalOffs17
) {
4956
0
        N = emitAdd_ri_(VT, N, NIsKill, TotalOffs);
4957
0
        if (!N)
4958
0
          return false;
4959
0
        NIsKill = true;
4960
0
        TotalOffs = 0;
4961
0
      }
4962
17
4963
17
      // N = N + Idx * ElementSize;
4964
17
      uint64_t ElementSize = DL.getTypeAllocSize(Ty);
4965
17
      std::pair<unsigned, bool> Pair = getRegForGEPIndex(Idx);
4966
17
      unsigned IdxN = Pair.first;
4967
17
      bool IdxNIsKill = Pair.second;
4968
17
      if (!IdxN)
4969
0
        return false;
4970
17
4971
17
      
if (17
ElementSize != 117
) {
4972
6
        unsigned C = fastEmit_i(VT, VT, ISD::Constant, ElementSize);
4973
6
        if (!C)
4974
0
          return false;
4975
6
        IdxN = emitMul_rr(VT, IdxN, IdxNIsKill, C, true);
4976
6
        if (!IdxN)
4977
0
          return false;
4978
6
        IdxNIsKill = true;
4979
6
      }
4980
17
      N = fastEmit_rr(VT, VT, ISD::ADD, N, NIsKill, IdxN, IdxNIsKill);
4981
17
      if (!N)
4982
0
        return false;
4983
44
    }
4984
48
  }
4985
32
  
if (32
TotalOffs32
) {
4986
13
    N = emitAdd_ri_(VT, N, NIsKill, TotalOffs);
4987
13
    if (!N)
4988
0
      return false;
4989
32
  }
4990
32
  updateValueMap(I, N);
4991
32
  return true;
4992
32
}
4993
4994
3
bool AArch64FastISel::selectAtomicCmpXchg(const AtomicCmpXchgInst *I) {
4995
3
  assert(TM.getOptLevel() == CodeGenOpt::None &&
4996
3
         "cmpxchg survived AtomicExpand at optlevel > -O0");
4997
3
4998
3
  auto *RetPairTy = cast<StructType>(I->getType());
4999
3
  Type *RetTy = RetPairTy->getTypeAtIndex(0U);
5000
3
  assert(RetPairTy->getTypeAtIndex(1U)->isIntegerTy(1) &&
5001
3
         "cmpxchg has a non-i1 status result");
5002
3
5003
3
  MVT VT;
5004
3
  if (!isTypeLegal(RetTy, VT))
5005
0
    return false;
5006
3
5007
3
  const TargetRegisterClass *ResRC;
5008
3
  unsigned Opc, CmpOpc;
5009
3
  // This only supports i32/i64, because i8/i16 aren't legal, and the generic
5010
3
  // extractvalue selection doesn't support that.
5011
3
  if (
VT == MVT::i323
) {
5012
2
    Opc = AArch64::CMP_SWAP_32;
5013
2
    CmpOpc = AArch64::SUBSWrs;
5014
2
    ResRC = &AArch64::GPR32RegClass;
5015
3
  } else 
if (1
VT == MVT::i641
) {
5016
1
    Opc = AArch64::CMP_SWAP_64;
5017
1
    CmpOpc = AArch64::SUBSXrs;
5018
1
    ResRC = &AArch64::GPR64RegClass;
5019
1
  } else {
5020
0
    return false;
5021
0
  }
5022
3
5023
3
  const MCInstrDesc &II = TII.get(Opc);
5024
3
5025
3
  const unsigned AddrReg = constrainOperandRegClass(
5026
3
      II, getRegForValue(I->getPointerOperand()), II.getNumDefs());
5027
3
  const unsigned DesiredReg = constrainOperandRegClass(
5028
3
      II, getRegForValue(I->getCompareOperand()), II.getNumDefs() + 1);
5029
3
  const unsigned NewReg = constrainOperandRegClass(
5030
3
      II, getRegForValue(I->getNewValOperand()), II.getNumDefs() + 2);
5031
3
5032
3
  const unsigned ResultReg1 = createResultReg(ResRC);
5033
3
  const unsigned ResultReg2 = createResultReg(&AArch64::GPR32RegClass);
5034
3
  const unsigned ScratchReg = createResultReg(&AArch64::GPR32RegClass);
5035
3
5036
3
  // FIXME: MachineMemOperand doesn't support cmpxchg yet.
5037
3
  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
5038
3
      .addDef(ResultReg1)
5039
3
      .addDef(ScratchReg)
5040
3
      .addUse(AddrReg)
5041
3
      .addUse(DesiredReg)
5042
3
      .addUse(NewReg);
5043
3
5044
3
  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc))
5045
3
      .addDef(VT == MVT::i32 ? 
AArch64::WZR2
:
AArch64::XZR1
)
5046
3
      .addUse(ResultReg1)
5047
3
      .addUse(DesiredReg)
5048
3
      .addImm(0);
5049
3
5050
3
  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr))
5051
3
      .addDef(ResultReg2)
5052
3
      .addUse(AArch64::WZR)
5053
3
      .addUse(AArch64::WZR)
5054
3
      .addImm(AArch64CC::NE);
5055
3
5056
3
  assert((ResultReg1 + 1) == ResultReg2 && "Nonconsecutive result registers.");
5057
3
  updateValueMap(I, ResultReg1, 2);
5058
3
  return true;
5059
3
}
5060
5061
4.17k
bool AArch64FastISel::fastSelectInstruction(const Instruction *I) {
5062
4.17k
  switch (I->getOpcode()) {
5063
636
  default:
5064
636
    break;
5065
274
  case Instruction::Add:
5066
274
  case Instruction::Sub:
5067
274
    return selectAddSub(I);
5068
10
  case Instruction::Mul:
5069
10
    return selectMul(I);
5070
14
  case Instruction::SDiv:
5071
14
    return selectSDiv(I);
5072
4
  case Instruction::SRem:
5073
4
    if (!selectBinaryOp(I, ISD::SREM))
5074
4
      return selectRem(I, ISD::SREM);
5075
0
    return true;
5076
4
  case Instruction::URem:
5077
4
    if (!selectBinaryOp(I, ISD::UREM))
5078
4
      return selectRem(I, ISD::UREM);
5079
0
    return true;
5080
105
  case Instruction::Shl:
5081
105
  case Instruction::LShr:
5082
105
  case Instruction::AShr:
5083
105
    return selectShift(I);
5084
89
  case Instruction::And:
5085
89
  case Instruction::Or:
5086
89
  case Instruction::Xor:
5087
89
    return selectLogicalOp(I);
5088
300
  case Instruction::Br:
5089
300
    return selectBranch(I);
5090
1
  case Instruction::IndirectBr:
5091
1
    return selectIndirectBr(I);
5092
50
  case Instruction::BitCast:
5093
50
    if (!FastISel::selectBitCast(I))
5094
22
      return selectBitCast(I);
5095
28
    return true;
5096
12
  case Instruction::FPToSI:
5097
12
    if (!selectCast(I, ISD::FP_TO_SINT))
5098
1
      return selectFPToInt(I, /*Signed=*/true);
5099
11
    return true;
5100
18
  case Instruction::FPToUI:
5101
18
    return selectFPToInt(I, /*Signed=*/false);
5102
228
  case Instruction::ZExt:
5103
228
  case Instruction::SExt:
5104
228
    return selectIntExt(I);
5105
26
  case Instruction::Trunc:
5106
26
    if (!selectCast(I, ISD::TRUNCATE))
5107
14
      return selectTrunc(I);
5108
12
    return true;
5109
7
  case Instruction::FPExt:
5110
7
    return selectFPExt(I);
5111
2
  case Instruction::FPTrunc:
5112
2
    return selectFPTrunc(I);
5113
25
  case Instruction::SIToFP:
5114
25
    if (!selectCast(I, ISD::SINT_TO_FP))
5115
9
      return selectIntToFP(I, /*Signed=*/true);
5116
16
    return true;
5117
21
  case Instruction::UIToFP:
5118
21
    return selectIntToFP(I, /*Signed=*/false);
5119
408
  case Instruction::Load:
5120
408
    return selectLoad(I);
5121
472
  case Instruction::Store:
5122
472
    return selectStore(I);
5123
57
  case Instruction::FCmp:
5124
57
  case Instruction::ICmp:
5125
57
    return selectCmp(I);
5126
53
  case Instruction::Select:
5127
53
    return selectSelect(I);
5128
1.31k
  case Instruction::Ret:
5129
1.31k
    return selectRet(I);
5130
4
  case Instruction::FRem:
5131
4
    return selectFRem(I);
5132
32
  case Instruction::GetElementPtr:
5133
32
    return selectGetElementPtr(I);
5134
3
  case Instruction::AtomicCmpXchg:
5135
3
    return selectAtomicCmpXchg(cast<AtomicCmpXchgInst>(I));
5136
636
  }
5137
636
5138
636
  // fall-back to target-independent instruction selection.
5139
636
  return selectOperator(I, I->getOpcode());
5140
636
  // Silence warnings.
5141
0
  (void)&CC_AArch64_DarwinPCS_VarArg;
5142
0
  (void)&CC_AArch64_Win64_VarArg;
5143
0
}
5144
5145
namespace llvm {
5146
5147
FastISel *AArch64::createFastISel(FunctionLoweringInfo &FuncInfo,
5148
1.23k
                                        const TargetLibraryInfo *LibInfo) {
5149
1.23k
  return new AArch64FastISel(FuncInfo, LibInfo);
5150
1.23k
}
5151
5152
} // end namespace llvm