Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/Target/AArch64/AArch64FastISel.cpp
Line
Count
Source (jump to first uncovered line)
1
//===- AArch6464FastISel.cpp - AArch64 FastISel implementation ------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This file defines the AArch64-specific support for the FastISel class. Some
10
// of the target-specific code is generated by tablegen in the file
11
// AArch64GenFastISel.inc, which is #included here.
12
//
13
//===----------------------------------------------------------------------===//
14
15
#include "AArch64.h"
16
#include "AArch64CallingConvention.h"
17
#include "AArch64RegisterInfo.h"
18
#include "AArch64Subtarget.h"
19
#include "MCTargetDesc/AArch64AddressingModes.h"
20
#include "Utils/AArch64BaseInfo.h"
21
#include "llvm/ADT/APFloat.h"
22
#include "llvm/ADT/APInt.h"
23
#include "llvm/ADT/DenseMap.h"
24
#include "llvm/ADT/SmallVector.h"
25
#include "llvm/Analysis/BranchProbabilityInfo.h"
26
#include "llvm/CodeGen/CallingConvLower.h"
27
#include "llvm/CodeGen/FastISel.h"
28
#include "llvm/CodeGen/FunctionLoweringInfo.h"
29
#include "llvm/CodeGen/ISDOpcodes.h"
30
#include "llvm/CodeGen/MachineBasicBlock.h"
31
#include "llvm/CodeGen/MachineConstantPool.h"
32
#include "llvm/CodeGen/MachineFrameInfo.h"
33
#include "llvm/CodeGen/MachineInstr.h"
34
#include "llvm/CodeGen/MachineInstrBuilder.h"
35
#include "llvm/CodeGen/MachineMemOperand.h"
36
#include "llvm/CodeGen/MachineRegisterInfo.h"
37
#include "llvm/CodeGen/RuntimeLibcalls.h"
38
#include "llvm/CodeGen/ValueTypes.h"
39
#include "llvm/IR/Argument.h"
40
#include "llvm/IR/Attributes.h"
41
#include "llvm/IR/BasicBlock.h"
42
#include "llvm/IR/CallingConv.h"
43
#include "llvm/IR/Constant.h"
44
#include "llvm/IR/Constants.h"
45
#include "llvm/IR/DataLayout.h"
46
#include "llvm/IR/DerivedTypes.h"
47
#include "llvm/IR/Function.h"
48
#include "llvm/IR/GetElementPtrTypeIterator.h"
49
#include "llvm/IR/GlobalValue.h"
50
#include "llvm/IR/InstrTypes.h"
51
#include "llvm/IR/Instruction.h"
52
#include "llvm/IR/Instructions.h"
53
#include "llvm/IR/IntrinsicInst.h"
54
#include "llvm/IR/Intrinsics.h"
55
#include "llvm/IR/Operator.h"
56
#include "llvm/IR/Type.h"
57
#include "llvm/IR/User.h"
58
#include "llvm/IR/Value.h"
59
#include "llvm/MC/MCInstrDesc.h"
60
#include "llvm/MC/MCRegisterInfo.h"
61
#include "llvm/MC/MCSymbol.h"
62
#include "llvm/Support/AtomicOrdering.h"
63
#include "llvm/Support/Casting.h"
64
#include "llvm/Support/CodeGen.h"
65
#include "llvm/Support/Compiler.h"
66
#include "llvm/Support/ErrorHandling.h"
67
#include "llvm/Support/MachineValueType.h"
68
#include "llvm/Support/MathExtras.h"
69
#include <algorithm>
70
#include <cassert>
71
#include <cstdint>
72
#include <iterator>
73
#include <utility>
74
75
using namespace llvm;
76
77
namespace {
78
79
class AArch64FastISel final : public FastISel {
80
  class Address {
81
  public:
82
    using BaseKind = enum {
83
      RegBase,
84
      FrameIndexBase
85
    };
86
87
  private:
88
    BaseKind Kind = RegBase;
89
    AArch64_AM::ShiftExtendType ExtType = AArch64_AM::InvalidShiftExtend;
90
    union {
91
      unsigned Reg;
92
      int FI;
93
    } Base;
94
    unsigned OffsetReg = 0;
95
    unsigned Shift = 0;
96
    int64_t Offset = 0;
97
    const GlobalValue *GV = nullptr;
98
99
  public:
100
941
    Address() { Base.Reg = 0; }
101
102
219
    void setKind(BaseKind K) { Kind = K; }
103
0
    BaseKind getKind() const { return Kind; }
104
83
    void setExtendType(AArch64_AM::ShiftExtendType E) { ExtType = E; }
105
1.34k
    AArch64_AM::ShiftExtendType getExtendType() const { return ExtType; }
106
1.82k
    bool isRegBase() const { return Kind == RegBase; }
107
699
    bool isFIBase() const { return Kind == FrameIndexBase; }
108
109
1.04k
    void setReg(unsigned Reg) {
110
1.04k
      assert(isRegBase() && "Invalid base register access!");
111
1.04k
      Base.Reg = Reg;
112
1.04k
    }
113
114
2.05k
    unsigned getReg() const {
115
2.05k
      assert(isRegBase() && "Invalid base register access!");
116
2.05k
      return Base.Reg;
117
2.05k
    }
118
119
506
    void setOffsetReg(unsigned Reg) {
120
506
      OffsetReg = Reg;
121
506
    }
122
123
2.46k
    unsigned getOffsetReg() const {
124
2.46k
      return OffsetReg;
125
2.46k
    }
126
127
184
    void setFI(unsigned FI) {
128
184
      assert(isFIBase() && "Invalid base frame index  access!");
129
184
      Base.FI = FI;
130
184
    }
131
132
192
    unsigned getFI() const {
133
192
      assert(isFIBase() && "Invalid base frame index access!");
134
192
      return Base.FI;
135
192
    }
136
137
213
    void setOffset(int64_t O) { Offset = O; }
138
3.63k
    int64_t getOffset() { return Offset; }
139
62
    void setShift(unsigned S) { Shift = S; }
140
82
    unsigned getShift() { return Shift; }
141
142
96
    void setGlobalValue(const GlobalValue *G) { GV = G; }
143
206
    const GlobalValue *getGlobalValue() { return GV; }
144
  };
145
146
  /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
147
  /// make the right decision when generating code for different targets.
148
  const AArch64Subtarget *Subtarget;
149
  LLVMContext *Context;
150
151
  bool fastLowerArguments() override;
152
  bool fastLowerCall(CallLoweringInfo &CLI) override;
153
  bool fastLowerIntrinsicCall(const IntrinsicInst *II) override;
154
155
private:
156
  // Selection routines.
157
  bool selectAddSub(const Instruction *I);
158
  bool selectLogicalOp(const Instruction *I);
159
  bool selectLoad(const Instruction *I);
160
  bool selectStore(const Instruction *I);
161
  bool selectBranch(const Instruction *I);
162
  bool selectIndirectBr(const Instruction *I);
163
  bool selectCmp(const Instruction *I);
164
  bool selectSelect(const Instruction *I);
165
  bool selectFPExt(const Instruction *I);
166
  bool selectFPTrunc(const Instruction *I);
167
  bool selectFPToInt(const Instruction *I, bool Signed);
168
  bool selectIntToFP(const Instruction *I, bool Signed);
169
  bool selectRem(const Instruction *I, unsigned ISDOpcode);
170
  bool selectRet(const Instruction *I);
171
  bool selectTrunc(const Instruction *I);
172
  bool selectIntExt(const Instruction *I);
173
  bool selectMul(const Instruction *I);
174
  bool selectShift(const Instruction *I);
175
  bool selectBitCast(const Instruction *I);
176
  bool selectFRem(const Instruction *I);
177
  bool selectSDiv(const Instruction *I);
178
  bool selectGetElementPtr(const Instruction *I);
179
  bool selectAtomicCmpXchg(const AtomicCmpXchgInst *I);
180
181
  // Utility helper routines.
182
  bool isTypeLegal(Type *Ty, MVT &VT);
183
  bool isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed = false);
184
  bool isValueAvailable(const Value *V) const;
185
  bool computeAddress(const Value *Obj, Address &Addr, Type *Ty = nullptr);
186
  bool computeCallAddress(const Value *V, Address &Addr);
187
  bool simplifyAddress(Address &Addr, MVT VT);
188
  void addLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB,
189
                            MachineMemOperand::Flags Flags,
190
                            unsigned ScaleFactor, MachineMemOperand *MMO);
191
  bool isMemCpySmall(uint64_t Len, unsigned Alignment);
192
  bool tryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
193
                          unsigned Alignment);
194
  bool foldXALUIntrinsic(AArch64CC::CondCode &CC, const Instruction *I,
195
                         const Value *Cond);
196
  bool optimizeIntExtLoad(const Instruction *I, MVT RetVT, MVT SrcVT);
197
  bool optimizeSelect(const SelectInst *SI);
198
  std::pair<unsigned, bool> getRegForGEPIndex(const Value *Idx);
199
200
  // Emit helper routines.
201
  unsigned emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
202
                      const Value *RHS, bool SetFlags = false,
203
                      bool WantResult = true,  bool IsZExt = false);
204
  unsigned emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
205
                         bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
206
                         bool SetFlags = false, bool WantResult = true);
207
  unsigned emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
208
                         bool LHSIsKill, uint64_t Imm, bool SetFlags = false,
209
                         bool WantResult = true);
210
  unsigned emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
211
                         bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
212
                         AArch64_AM::ShiftExtendType ShiftType,
213
                         uint64_t ShiftImm, bool SetFlags = false,
214
                         bool WantResult = true);
215
  unsigned emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
216
                         bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
217
                          AArch64_AM::ShiftExtendType ExtType,
218
                          uint64_t ShiftImm, bool SetFlags = false,
219
                         bool WantResult = true);
220
221
  // Emit functions.
222
  bool emitCompareAndBranch(const BranchInst *BI);
223
  bool emitCmp(const Value *LHS, const Value *RHS, bool IsZExt);
224
  bool emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, bool IsZExt);
225
  bool emitICmp_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm);
226
  bool emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS);
227
  unsigned emitLoad(MVT VT, MVT ResultVT, Address Addr, bool WantZExt = true,
228
                    MachineMemOperand *MMO = nullptr);
229
  bool emitStore(MVT VT, unsigned SrcReg, Address Addr,
230
                 MachineMemOperand *MMO = nullptr);
231
  bool emitStoreRelease(MVT VT, unsigned SrcReg, unsigned AddrReg,
232
                        MachineMemOperand *MMO = nullptr);
233
  unsigned emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt);
234
  unsigned emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt);
235
  unsigned emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
236
                   bool SetFlags = false, bool WantResult = true,
237
                   bool IsZExt = false);
238
  unsigned emitAdd_ri_(MVT VT, unsigned Op0, bool Op0IsKill, int64_t Imm);
239
  unsigned emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
240
                   bool SetFlags = false, bool WantResult = true,
241
                   bool IsZExt = false);
242
  unsigned emitSubs_rr(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
243
                       unsigned RHSReg, bool RHSIsKill, bool WantResult = true);
244
  unsigned emitSubs_rs(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
245
                       unsigned RHSReg, bool RHSIsKill,
246
                       AArch64_AM::ShiftExtendType ShiftType, uint64_t ShiftImm,
247
                       bool WantResult = true);
248
  unsigned emitLogicalOp(unsigned ISDOpc, MVT RetVT, const Value *LHS,
249
                         const Value *RHS);
250
  unsigned emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
251
                            bool LHSIsKill, uint64_t Imm);
252
  unsigned emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
253
                            bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
254
                            uint64_t ShiftImm);
255
  unsigned emitAnd_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm);
256
  unsigned emitMul_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
257
                      unsigned Op1, bool Op1IsKill);
258
  unsigned emitSMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
259
                        unsigned Op1, bool Op1IsKill);
260
  unsigned emitUMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
261
                        unsigned Op1, bool Op1IsKill);
262
  unsigned emitLSL_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
263
                      unsigned Op1Reg, bool Op1IsKill);
264
  unsigned emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
265
                      uint64_t Imm, bool IsZExt = true);
266
  unsigned emitLSR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
267
                      unsigned Op1Reg, bool Op1IsKill);
268
  unsigned emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
269
                      uint64_t Imm, bool IsZExt = true);
270
  unsigned emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
271
                      unsigned Op1Reg, bool Op1IsKill);
272
  unsigned emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
273
                      uint64_t Imm, bool IsZExt = false);
274
275
  unsigned materializeInt(const ConstantInt *CI, MVT VT);
276
  unsigned materializeFP(const ConstantFP *CFP, MVT VT);
277
  unsigned materializeGV(const GlobalValue *GV);
278
279
  // Call handling routines.
280
private:
281
  CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const;
282
  bool processCallArgs(CallLoweringInfo &CLI, SmallVectorImpl<MVT> &ArgVTs,
283
                       unsigned &NumBytes);
284
  bool finishCall(CallLoweringInfo &CLI, MVT RetVT, unsigned NumBytes);
285
286
public:
287
  // Backend specific FastISel code.
288
  unsigned fastMaterializeAlloca(const AllocaInst *AI) override;
289
  unsigned fastMaterializeConstant(const Constant *C) override;
290
  unsigned fastMaterializeFloatZero(const ConstantFP* CF) override;
291
292
  explicit AArch64FastISel(FunctionLoweringInfo &FuncInfo,
293
                           const TargetLibraryInfo *LibInfo)
294
1.26k
      : FastISel(FuncInfo, LibInfo, /*SkipTargetIndependentISel=*/true) {
295
1.26k
    Subtarget =
296
1.26k
        &static_cast<const AArch64Subtarget &>(FuncInfo.MF->getSubtarget());
297
1.26k
    Context = &FuncInfo.Fn->getContext();
298
1.26k
  }
299
300
  bool fastSelectInstruction(const Instruction *I) override;
301
302
#include "AArch64GenFastISel.inc"
303
};
304
305
} // end anonymous namespace
306
307
/// Check if the sign-/zero-extend will be a noop.
308
82
static bool isIntExtFree(const Instruction *I) {
309
82
  assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
310
82
         "Unexpected integer extend instruction.");
311
82
  assert(!I->getType()->isVectorTy() && I->getType()->isIntegerTy() &&
312
82
         "Unexpected value type.");
313
82
  bool IsZExt = isa<ZExtInst>(I);
314
82
315
82
  if (const auto *LI = dyn_cast<LoadInst>(I->getOperand(0)))
316
0
    if (LI->hasOneUse())
317
0
      return true;
318
82
319
82
  if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0)))
320
80
    if ((IsZExt && 
Arg->hasZExtAttr()31
) ||
(78
!IsZExt78
&&
Arg->hasSExtAttr()49
))
321
4
      return true;
322
78
323
78
  return false;
324
78
}
325
326
/// Determine the implicit scale factor that is applied by a memory
327
/// operation for a given value type.
328
1.44k
static unsigned getImplicitScaleFactor(MVT VT) {
329
1.44k
  switch (VT.SimpleTy) {
330
1.44k
  default:
331
224
    return 0;    // invalid
332
1.44k
  case MVT::i1:  // fall-through
333
200
  case MVT::i8:
334
200
    return 1;
335
200
  case MVT::i16:
336
120
    return 2;
337
458
  case MVT::i32: // fall-through
338
458
  case MVT::f32:
339
458
    return 4;
340
458
  case MVT::i64: // fall-through
341
440
  case MVT::f64:
342
440
    return 8;
343
1.44k
  }
344
1.44k
}
345
346
203
CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const {
347
203
  if (CC == CallingConv::WebKit_JS)
348
7
    return CC_AArch64_WebKit_JS;
349
196
  if (CC == CallingConv::GHC)
350
0
    return CC_AArch64_GHC;
351
196
  return Subtarget->isTargetDarwin() ? 
CC_AArch64_DarwinPCS125
:
CC_AArch64_AAPCS71
;
352
196
}
353
354
15
unsigned AArch64FastISel::fastMaterializeAlloca(const AllocaInst *AI) {
355
15
  assert(TLI.getValueType(DL, AI->getType(), true) == MVT::i64 &&
356
15
         "Alloca should always return a pointer.");
357
15
358
15
  // Don't handle dynamic allocas.
359
15
  if (!FuncInfo.StaticAllocaMap.count(AI))
360
0
    return 0;
361
15
362
15
  DenseMap<const AllocaInst *, int>::iterator SI =
363
15
      FuncInfo.StaticAllocaMap.find(AI);
364
15
365
15
  if (SI != FuncInfo.StaticAllocaMap.end()) {
366
15
    unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass);
367
15
    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
368
15
            ResultReg)
369
15
        .addFrameIndex(SI->second)
370
15
        .addImm(0)
371
15
        .addImm(0);
372
15
    return ResultReg;
373
15
  }
374
0
375
0
  return 0;
376
0
}
377
378
357
unsigned AArch64FastISel::materializeInt(const ConstantInt *CI, MVT VT) {
379
357
  if (VT > MVT::i64)
380
0
    return 0;
381
357
382
357
  if (!CI->isZero())
383
231
    return fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue());
384
126
385
126
  // Create a copy from the zero register to materialize a "0" value.
386
126
  const TargetRegisterClass *RC = (VT == MVT::i64) ? 
&AArch64::GPR64RegClass15
387
126
                                                   : 
&AArch64::GPR32RegClass111
;
388
126
  unsigned ZeroReg = (VT == MVT::i64) ? 
AArch64::XZR15
:
AArch64::WZR111
;
389
126
  unsigned ResultReg = createResultReg(RC);
390
126
  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY),
391
126
          ResultReg).addReg(ZeroReg, getKillRegState(true));
392
126
  return ResultReg;
393
126
}
394
395
20
unsigned AArch64FastISel::materializeFP(const ConstantFP *CFP, MVT VT) {
396
20
  // Positive zero (+0.0) has to be materialized with a fmov from the zero
397
20
  // register, because the immediate version of fmov cannot encode zero.
398
20
  if (CFP->isNullValue())
399
2
    return fastMaterializeFloatZero(CFP);
400
18
401
18
  if (VT != MVT::f32 && 
VT != MVT::f647
)
402
0
    return 0;
403
18
404
18
  const APFloat Val = CFP->getValueAPF();
405
18
  bool Is64Bit = (VT == MVT::f64);
406
18
  // This checks to see if we can use FMOV instructions to materialize
407
18
  // a constant, otherwise we have to materialize via the constant pool.
408
18
  int Imm =
409
18
      Is64Bit ? 
AArch64_AM::getFP64Imm(Val)7
:
AArch64_AM::getFP32Imm(Val)11
;
410
18
  if (Imm != -1) {
411
10
    unsigned Opc = Is64Bit ? 
AArch64::FMOVDi2
:
AArch64::FMOVSi8
;
412
10
    return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm);
413
10
  }
414
8
415
8
  // For the MachO large code model materialize the FP constant in code.
416
8
  if (Subtarget->isTargetMachO() && 
TM.getCodeModel() == CodeModel::Large7
) {
417
5
    unsigned Opc1 = Is64Bit ? 
AArch64::MOVi64imm3
:
AArch64::MOVi32imm2
;
418
5
    const TargetRegisterClass *RC = Is64Bit ?
419
3
        &AArch64::GPR64RegClass : 
&AArch64::GPR32RegClass2
;
420
5
421
5
    unsigned TmpReg = createResultReg(RC);
422
5
    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc1), TmpReg)
423
5
        .addImm(CFP->getValueAPF().bitcastToAPInt().getZExtValue());
424
5
425
5
    unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
426
5
    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
427
5
            TII.get(TargetOpcode::COPY), ResultReg)
428
5
        .addReg(TmpReg, getKillRegState(true));
429
5
430
5
    return ResultReg;
431
5
  }
432
3
433
3
  // Materialize via constant pool.  MachineConstantPool wants an explicit
434
3
  // alignment.
435
3
  unsigned Align = DL.getPrefTypeAlignment(CFP->getType());
436
3
  if (Align == 0)
437
0
    Align = DL.getTypeAllocSize(CFP->getType());
438
3
439
3
  unsigned CPI = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align);
440
3
  unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
441
3
  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
442
3
          ADRPReg).addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGE);
443
3
444
3
  unsigned Opc = Is64Bit ? 
AArch64::LDRDui2
:
AArch64::LDRSui1
;
445
3
  unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
446
3
  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
447
3
      .addReg(ADRPReg)
448
3
      .addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
449
3
  return ResultReg;
450
3
}
451
452
125
unsigned AArch64FastISel::materializeGV(const GlobalValue *GV) {
453
125
  // We can't handle thread-local variables quickly yet.
454
125
  if (GV->isThreadLocal())
455
11
    return 0;
456
114
457
114
  // MachO still uses GOT for large code-model accesses, but ELF requires
458
114
  // movz/movk sequences, which FastISel doesn't handle yet.
459
114
  if (!Subtarget->useSmallAddressing() && 
!Subtarget->isTargetMachO()35
)
460
26
    return 0;
461
88
462
88
  unsigned char OpFlags = Subtarget->ClassifyGlobalReference(GV, TM);
463
88
464
88
  EVT DestEVT = TLI.getValueType(DL, GV->getType(), true);
465
88
  if (!DestEVT.isSimple())
466
0
    return 0;
467
88
468
88
  unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
469
88
  unsigned ResultReg;
470
88
471
88
  if (OpFlags & AArch64II::MO_GOT) {
472
46
    // ADRP + LDRX
473
46
    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
474
46
            ADRPReg)
475
46
        .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags);
476
46
477
46
    ResultReg = createResultReg(&AArch64::GPR64RegClass);
478
46
    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::LDRXui),
479
46
            ResultReg)
480
46
        .addReg(ADRPReg)
481
46
        .addGlobalAddress(GV, 0,
482
46
                          AArch64II::MO_PAGEOFF | AArch64II::MO_NC | OpFlags);
483
46
  } else {
484
42
    // ADRP + ADDX
485
42
    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
486
42
            ADRPReg)
487
42
        .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags);
488
42
489
42
    ResultReg = createResultReg(&AArch64::GPR64spRegClass);
490
42
    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
491
42
            ResultReg)
492
42
        .addReg(ADRPReg)
493
42
        .addGlobalAddress(GV, 0,
494
42
                          AArch64II::MO_PAGEOFF | AArch64II::MO_NC | OpFlags)
495
42
        .addImm(0);
496
42
  }
497
88
  return ResultReg;
498
88
}
499
500
541
unsigned AArch64FastISel::fastMaterializeConstant(const Constant *C) {
501
541
  EVT CEVT = TLI.getValueType(DL, C->getType(), true);
502
541
503
541
  // Only handle simple types.
504
541
  if (!CEVT.isSimple())
505
0
    return 0;
506
541
  MVT VT = CEVT.getSimpleVT();
507
541
508
541
  if (const auto *CI = dyn_cast<ConstantInt>(C))
509
351
    return materializeInt(CI, VT);
510
190
  else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
511
20
    return materializeFP(CFP, VT);
512
170
  else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
513
118
    return materializeGV(GV);
514
52
515
52
  return 0;
516
52
}
517
518
2
unsigned AArch64FastISel::fastMaterializeFloatZero(const ConstantFP* CFP) {
519
2
  assert(CFP->isNullValue() &&
520
2
         "Floating-point constant is not a positive zero.");
521
2
  MVT VT;
522
2
  if (!isTypeLegal(CFP->getType(), VT))
523
0
    return 0;
524
2
525
2
  if (VT != MVT::f32 && 
VT != MVT::f641
)
526
0
    return 0;
527
2
528
2
  bool Is64Bit = (VT == MVT::f64);
529
2
  unsigned ZReg = Is64Bit ? 
AArch64::XZR1
:
AArch64::WZR1
;
530
2
  unsigned Opc = Is64Bit ? 
AArch64::FMOVXDr1
:
AArch64::FMOVWSr1
;
531
2
  return fastEmitInst_r(Opc, TLI.getRegClassFor(VT), ZReg, /*IsKill=*/true);
532
2
}
533
534
/// Check if the multiply is by a power-of-2 constant.
535
583
static bool isMulPowOf2(const Value *I) {
536
583
  if (const auto *MI = dyn_cast<MulOperator>(I)) {
537
23
    if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(0)))
538
0
      if (C->getValue().isPowerOf2())
539
0
        return true;
540
23
    if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(1)))
541
21
      if (C->getValue().isPowerOf2())
542
19
        return true;
543
564
  }
544
564
  return false;
545
564
}
546
547
// Computes the address to get to an object.
548
bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty)
549
1.16k
{
550
1.16k
  const User *U = nullptr;
551
1.16k
  unsigned Opcode = Instruction::UserOp1;
552
1.16k
  if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
553
559
    // Don't walk into other basic blocks unless the object is an alloca from
554
559
    // another block, otherwise it may not have a virtual register assigned.
555
559
    if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
556
559
        
FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB375
) {
557
555
      Opcode = I->getOpcode();
558
555
      U = I;
559
555
    }
560
609
  } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
561
24
    Opcode = C->getOpcode();
562
24
    U = C;
563
24
  }
564
1.16k
565
1.16k
  if (auto *Ty = dyn_cast<PointerType>(Obj->getType()))
566
847
    if (Ty->getAddressSpace() > 255)
567
10
      // Fast instruction selection doesn't support the special
568
10
      // address spaces.
569
10
      return false;
570
1.15k
571
1.15k
  switch (Opcode) {
572
1.15k
  default:
573
597
    break;
574
1.15k
  case Instruction::BitCast:
575
17
    // Look through bitcasts.
576
17
    return computeAddress(U->getOperand(0), Addr, Ty);
577
1.15k
578
1.15k
  case Instruction::IntToPtr:
579
121
    // Look past no-op inttoptrs.
580
121
    if (TLI.getValueType(DL, U->getOperand(0)->getType()) ==
581
121
        TLI.getPointerTy(DL))
582
121
      return computeAddress(U->getOperand(0), Addr, Ty);
583
0
    break;
584
0
585
3
  case Instruction::PtrToInt:
586
3
    // Look past no-op ptrtoints.
587
3
    if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
588
3
      return computeAddress(U->getOperand(0), Addr, Ty);
589
0
    break;
590
0
591
53
  case Instruction::GetElementPtr: {
592
53
    Address SavedAddr = Addr;
593
53
    uint64_t TmpOffset = Addr.getOffset();
594
53
595
53
    // Iterate through the GEP folding the constants into offsets where
596
53
    // we can.
597
53
    for (gep_type_iterator GTI = gep_type_begin(U), E = gep_type_end(U);
598
144
         GTI != E; 
++GTI91
) {
599
95
      const Value *Op = GTI.getOperand();
600
95
      if (StructType *STy = GTI.getStructTypeOrNull()) {
601
21
        const StructLayout *SL = DL.getStructLayout(STy);
602
21
        unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
603
21
        TmpOffset += SL->getElementOffset(Idx);
604
74
      } else {
605
74
        uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
606
74
        while (true) {
607
74
          if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
608
70
            // Constant-offset addressing.
609
70
            TmpOffset += CI->getSExtValue() * S;
610
70
            break;
611
70
          }
612
4
          if (canFoldAddIntoGEP(U, Op)) {
613
0
            // A compatible add with a constant operand. Fold the constant.
614
0
            ConstantInt *CI =
615
0
                cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
616
0
            TmpOffset += CI->getSExtValue() * S;
617
0
            // Iterate on the other operand.
618
0
            Op = cast<AddOperator>(Op)->getOperand(0);
619
0
            continue;
620
0
          }
621
4
          // Unsupported
622
4
          goto unsupported_gep;
623
4
        }
624
74
      }
625
95
    }
626
53
627
53
    // Try to grab the base operand now.
628
53
    Addr.setOffset(TmpOffset);
629
49
    if (computeAddress(U->getOperand(0), Addr, Ty))
630
44
      return true;
631
5
632
5
    // We failed, restore everything and try the other options.
633
5
    Addr = SavedAddr;
634
5
635
9
  unsupported_gep:
636
9
    break;
637
5
  }
638
184
  case Instruction::Alloca: {
639
184
    const AllocaInst *AI = cast<AllocaInst>(Obj);
640
184
    DenseMap<const AllocaInst *, int>::iterator SI =
641
184
        FuncInfo.StaticAllocaMap.find(AI);
642
184
    if (SI != FuncInfo.StaticAllocaMap.end()) {
643
184
      Addr.setKind(Address::FrameIndexBase);
644
184
      Addr.setFI(SI->second);
645
184
      return true;
646
184
    }
647
0
    break;
648
0
  }
649
100
  case Instruction::Add: {
650
100
    // Adds of constants are common and easy enough.
651
100
    const Value *LHS = U->getOperand(0);
652
100
    const Value *RHS = U->getOperand(1);
653
100
654
100
    if (isa<ConstantInt>(LHS))
655
1
      std::swap(LHS, RHS);
656
100
657
100
    if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
658
21
      Addr.setOffset(Addr.getOffset() + CI->getSExtValue());
659
21
      return computeAddress(LHS, Addr, Ty);
660
21
    }
661
79
662
79
    Address Backup = Addr;
663
79
    if (computeAddress(LHS, Addr, Ty) && computeAddress(RHS, Addr, Ty))
664
79
      return true;
665
0
    Addr = Backup;
666
0
667
0
    break;
668
0
  }
669
21
  case Instruction::Sub: {
670
21
    // Subs of constants are common and easy enough.
671
21
    const Value *LHS = U->getOperand(0);
672
21
    const Value *RHS = U->getOperand(1);
673
21
674
21
    if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
675
21
      Addr.setOffset(Addr.getOffset() - CI->getSExtValue());
676
21
      return computeAddress(LHS, Addr, Ty);
677
21
    }
678
0
    break;
679
0
  }
680
26
  case Instruction::Shl: {
681
26
    if (Addr.getOffsetReg())
682
3
      break;
683
23
684
23
    const auto *CI = dyn_cast<ConstantInt>(U->getOperand(1));
685
23
    if (!CI)
686
0
      break;
687
23
688
23
    unsigned Val = CI->getZExtValue();
689
23
    if (Val < 1 || Val > 3)
690
0
      break;
691
23
692
23
    uint64_t NumBytes = 0;
693
23
    if (Ty && Ty->isSized()) {
694
23
      uint64_t NumBits = DL.getTypeSizeInBits(Ty);
695
23
      NumBytes = NumBits / 8;
696
23
      if (!isPowerOf2_64(NumBits))
697
0
        NumBytes = 0;
698
23
    }
699
23
700
23
    if (NumBytes != (1ULL << Val))
701
0
      break;
702
23
703
23
    Addr.setShift(Val);
704
23
    Addr.setExtendType(AArch64_AM::LSL);
705
23
706
23
    const Value *Src = U->getOperand(0);
707
23
    if (const auto *I = dyn_cast<Instruction>(Src)) {
708
17
      if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
709
17
        // Fold the zext or sext when it won't become a noop.
710
17
        if (const auto *ZE = dyn_cast<ZExtInst>(I)) {
711
5
          if (!isIntExtFree(ZE) &&
712
5
              
ZE->getOperand(0)->getType()->isIntegerTy(32)4
) {
713
4
            Addr.setExtendType(AArch64_AM::UXTW);
714
4
            Src = ZE->getOperand(0);
715
4
          }
716
12
        } else if (const auto *SE = dyn_cast<SExtInst>(I)) {
717
8
          if (!isIntExtFree(SE) &&
718
8
              
SE->getOperand(0)->getType()->isIntegerTy(32)7
) {
719
7
            Addr.setExtendType(AArch64_AM::SXTW);
720
7
            Src = SE->getOperand(0);
721
7
          }
722
8
        }
723
17
      }
724
17
    }
725
23
726
23
    if (const auto *AI = dyn_cast<BinaryOperator>(Src))
727
5
      if (AI->getOpcode() == Instruction::And) {
728
4
        const Value *LHS = AI->getOperand(0);
729
4
        const Value *RHS = AI->getOperand(1);
730
4
731
4
        if (const auto *C = dyn_cast<ConstantInt>(LHS))
732
0
          if (C->getValue() == 0xffffffff)
733
0
            std::swap(LHS, RHS);
734
4
735
4
        if (const auto *C = dyn_cast<ConstantInt>(RHS))
736
3
          if (C->getValue() == 0xffffffff) {
737
3
            Addr.setExtendType(AArch64_AM::UXTW);
738
3
            unsigned Reg = getRegForValue(LHS);
739
3
            if (!Reg)
740
0
              return false;
741
3
            bool RegIsKill = hasTrivialKill(LHS);
742
3
            Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, RegIsKill,
743
3
                                             AArch64::sub_32);
744
3
            Addr.setOffsetReg(Reg);
745
3
            return true;
746
3
          }
747
4
      }
748
20
749
20
    unsigned Reg = getRegForValue(Src);
750
20
    if (!Reg)
751
0
      return false;
752
20
    Addr.setOffsetReg(Reg);
753
20
    return true;
754
20
  }
755
20
  case Instruction::Mul: {
756
13
    if (Addr.getOffsetReg())
757
0
      break;
758
13
759
13
    if (!isMulPowOf2(U))
760
0
      break;
761
13
762
13
    const Value *LHS = U->getOperand(0);
763
13
    const Value *RHS = U->getOperand(1);
764
13
765
13
    // Canonicalize power-of-2 value to the RHS.
766
13
    if (const auto *C = dyn_cast<ConstantInt>(LHS))
767
0
      if (C->getValue().isPowerOf2())
768
0
        std::swap(LHS, RHS);
769
13
770
13
    assert(isa<ConstantInt>(RHS) && "Expected an ConstantInt.");
771
13
    const auto *C = cast<ConstantInt>(RHS);
772
13
    unsigned Val = C->getValue().logBase2();
773
13
    if (Val < 1 || Val > 3)
774
0
      break;
775
13
776
13
    uint64_t NumBytes = 0;
777
13
    if (Ty && Ty->isSized()) {
778
13
      uint64_t NumBits = DL.getTypeSizeInBits(Ty);
779
13
      NumBytes = NumBits / 8;
780
13
      if (!isPowerOf2_64(NumBits))
781
0
        NumBytes = 0;
782
13
    }
783
13
784
13
    if (NumBytes != (1ULL << Val))
785
0
      break;
786
13
787
13
    Addr.setShift(Val);
788
13
    Addr.setExtendType(AArch64_AM::LSL);
789
13
790
13
    const Value *Src = LHS;
791
13
    if (const auto *I = dyn_cast<Instruction>(Src)) {
792
9
      if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
793
8
        // Fold the zext or sext when it won't become a noop.
794
8
        if (const auto *ZE = dyn_cast<ZExtInst>(I)) {
795
4
          if (!isIntExtFree(ZE) &&
796
4
              
ZE->getOperand(0)->getType()->isIntegerTy(32)3
) {
797
3
            Addr.setExtendType(AArch64_AM::UXTW);
798
3
            Src = ZE->getOperand(0);
799
3
          }
800
4
        } else if (const auto *SE = dyn_cast<SExtInst>(I)) {
801
4
          if (!isIntExtFree(SE) &&
802
4
              
SE->getOperand(0)->getType()->isIntegerTy(32)3
) {
803
3
            Addr.setExtendType(AArch64_AM::SXTW);
804
3
            Src = SE->getOperand(0);
805
3
          }
806
4
        }
807
8
      }
808
9
    }
809
13
810
13
    unsigned Reg = getRegForValue(Src);
811
13
    if (!Reg)
812
0
      return false;
813
13
    Addr.setOffsetReg(Reg);
814
13
    return true;
815
13
  }
816
13
  case Instruction::And: {
817
3
    if (Addr.getOffsetReg())
818
0
      break;
819
3
820
3
    if (!Ty || 
DL.getTypeSizeInBits(Ty) != 82
)
821
2
      break;
822
1
823
1
    const Value *LHS = U->getOperand(0);
824
1
    const Value *RHS = U->getOperand(1);
825
1
826
1
    if (const auto *C = dyn_cast<ConstantInt>(LHS))
827
0
      if (C->getValue() == 0xffffffff)
828
0
        std::swap(LHS, RHS);
829
1
830
1
    if (const auto *C = dyn_cast<ConstantInt>(RHS))
831
1
      if (C->getValue() == 0xffffffff) {
832
1
        Addr.setShift(0);
833
1
        Addr.setExtendType(AArch64_AM::LSL);
834
1
        Addr.setExtendType(AArch64_AM::UXTW);
835
1
836
1
        unsigned Reg = getRegForValue(LHS);
837
1
        if (!Reg)
838
0
          return false;
839
1
        bool RegIsKill = hasTrivialKill(LHS);
840
1
        Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, RegIsKill,
841
1
                                         AArch64::sub_32);
842
1
        Addr.setOffsetReg(Reg);
843
1
        return true;
844
1
      }
845
0
    break;
846
0
  }
847
20
  case Instruction::SExt:
848
20
  case Instruction::ZExt: {
849
20
    if (!Addr.getReg() || Addr.getOffsetReg())
850
0
      break;
851
20
852
20
    const Value *Src = nullptr;
853
20
    // Fold the zext or sext when it won't become a noop.
854
20
    if (const auto *ZE = dyn_cast<ZExtInst>(U)) {
855
0
      if (!isIntExtFree(ZE) && ZE->getOperand(0)->getType()->isIntegerTy(32)) {
856
0
        Addr.setExtendType(AArch64_AM::UXTW);
857
0
        Src = ZE->getOperand(0);
858
0
      }
859
20
    } else if (const auto *SE = dyn_cast<SExtInst>(U)) {
860
20
      if (!isIntExtFree(SE) && SE->getOperand(0)->getType()->isIntegerTy(32)) {
861
20
        Addr.setExtendType(AArch64_AM::SXTW);
862
20
        Src = SE->getOperand(0);
863
20
      }
864
20
    }
865
20
866
20
    if (!Src)
867
0
      break;
868
20
869
20
    Addr.setShift(0);
870
20
    unsigned Reg = getRegForValue(Src);
871
20
    if (!Reg)
872
0
      return false;
873
20
    Addr.setOffsetReg(Reg);
874
20
    return true;
875
20
  }
876
611
  } // end switch
877
611
878
611
  if (Addr.isRegBase() && !Addr.getReg()) {
879
586
    unsigned Reg = getRegForValue(Obj);
880
586
    if (!Reg)
881
25
      return false;
882
561
    Addr.setReg(Reg);
883
561
    return true;
884
561
  }
885
25
886
25
  if (!Addr.getOffsetReg()) {
887
25
    unsigned Reg = getRegForValue(Obj);
888
25
    if (!Reg)
889
0
      return false;
890
25
    Addr.setOffsetReg(Reg);
891
25
    return true;
892
25
  }
893
0
894
0
  return false;
895
0
}
896
897
127
bool AArch64FastISel::computeCallAddress(const Value *V, Address &Addr) {
898
127
  const User *U = nullptr;
899
127
  unsigned Opcode = Instruction::UserOp1;
900
127
  bool InMBB = true;
901
127
902
127
  if (const auto *I = dyn_cast<Instruction>(V)) {
903
14
    Opcode = I->getOpcode();
904
14
    U = I;
905
14
    InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock();
906
113
  } else if (const auto *C = dyn_cast<ConstantExpr>(V)) {
907
1
    Opcode = C->getOpcode();
908
1
    U = C;
909
1
  }
910
127
911
127
  switch (Opcode) {
912
127
  
default: break115
;
913
127
  case Instruction::BitCast:
914
0
    // Look past bitcasts if its operand is in the same BB.
915
0
    if (InMBB)
916
0
      return computeCallAddress(U->getOperand(0), Addr);
917
0
    break;
918
12
  case Instruction::IntToPtr:
919
12
    // Look past no-op inttoptrs if its operand is in the same BB.
920
12
    if (InMBB &&
921
12
        TLI.getValueType(DL, U->getOperand(0)->getType()) ==
922
12
            TLI.getPointerTy(DL))
923
12
      return computeCallAddress(U->getOperand(0), Addr);
924
0
    break;
925
0
  case Instruction::PtrToInt:
926
0
    // Look past no-op ptrtoints if its operand is in the same BB.
927
0
    if (InMBB && TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
928
0
      return computeCallAddress(U->getOperand(0), Addr);
929
0
    break;
930
115
  }
931
115
932
115
  if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
933
96
    Addr.setGlobalValue(GV);
934
96
    return true;
935
96
  }
936
19
937
19
  // If all else fails, try to materialize the value in a register.
938
19
  if (!Addr.getGlobalValue()) {
939
19
    Addr.setReg(getRegForValue(V));
940
19
    return Addr.getReg() != 0;
941
19
  }
942
0
943
0
  return false;
944
0
}
945
946
3.70k
bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) {
947
3.70k
  EVT evt = TLI.getValueType(DL, Ty, true);
948
3.70k
949
3.70k
  // Only handle simple types.
950
3.70k
  if (evt == MVT::Other || 
!evt.isSimple()3.69k
)
951
16
    return false;
952
3.69k
  VT = evt.getSimpleVT();
953
3.69k
954
3.69k
  // This is a legal type, but it's not something we handle in fast-isel.
955
3.69k
  if (VT == MVT::f128)
956
26
    return false;
957
3.66k
958
3.66k
  // Handle all other legal types, i.e. a register that will directly hold this
959
3.66k
  // value.
960
3.66k
  return TLI.isTypeLegal(VT);
961
3.66k
}
962
963
/// Determine if the value type is supported by FastISel.
964
///
965
/// FastISel for AArch64 can handle more value types than are legal. This adds
966
/// simple value type such as i1, i8, and i16.
967
1.95k
bool AArch64FastISel::isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed) {
968
1.95k
  if (Ty->isVectorTy() && 
!IsVectorAllowed301
)
969
6
    return false;
970
1.94k
971
1.94k
  if (isTypeLegal(Ty, VT))
972
1.45k
    return true;
973
491
974
491
  // If this is a type than can be sign or zero-extended to a basic operation
975
491
  // go ahead and accept it now.
976
491
  if (VT == MVT::i1 || 
VT == MVT::i8398
||
VT == MVT::i16209
)
977
463
    return true;
978
28
979
28
  return false;
980
28
}
981
982
1.33k
bool AArch64FastISel::isValueAvailable(const Value *V) const {
983
1.33k
  if (!isa<Instruction>(V))
984
509
    return true;
985
828
986
828
  const auto *I = cast<Instruction>(V);
987
828
  return FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB;
988
828
}
989
990
834
bool AArch64FastISel::simplifyAddress(Address &Addr, MVT VT) {
991
834
  unsigned ScaleFactor = getImplicitScaleFactor(VT);
992
834
  if (!ScaleFactor)
993
224
    return false;
994
610
995
610
  bool ImmediateOffsetNeedsLowering = false;
996
610
  bool RegisterOffsetNeedsLowering = false;
997
610
  int64_t Offset = Addr.getOffset();
998
610
  if (((Offset < 0) || 
(Offset & (ScaleFactor - 1))580
) &&
!isInt<9>(Offset)34
)
999
4
    ImmediateOffsetNeedsLowering = true;
1000
606
  else if (Offset > 0 && 
!(Offset & (ScaleFactor - 1))105
&&
1001
606
           
!isUInt<12>(Offset / ScaleFactor)103
)
1002
6
    ImmediateOffsetNeedsLowering = true;
1003
610
1004
610
  // Cannot encode an offset register and an immediate offset in the same
1005
610
  // instruction. Fold the immediate offset into the load/store instruction and
1006
610
  // emit an additional add to take care of the offset register.
1007
610
  if (!ImmediateOffsetNeedsLowering && 
Addr.getOffset()600
&&
Addr.getOffsetReg()127
)
1008
3
    RegisterOffsetNeedsLowering = true;
1009
610
1010
610
  // Cannot encode zero register as base.
1011
610
  if (Addr.isRegBase() && 
Addr.getOffsetReg()418
&&
!Addr.getReg()80
)
1012
3
    RegisterOffsetNeedsLowering = true;
1013
610
1014
610
  // If this is a stack pointer and the offset needs to be simplified then put
1015
610
  // the alloca address into a register, set the base type back to register and
1016
610
  // continue. This should almost never happen.
1017
610
  if ((ImmediateOffsetNeedsLowering || 
Addr.getOffsetReg()600
) &&
Addr.isFIBase()91
)
1018
3
  {
1019
3
    unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass);
1020
3
    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
1021
3
            ResultReg)
1022
3
      .addFrameIndex(Addr.getFI())
1023
3
      .addImm(0)
1024
3
      .addImm(0);
1025
3
    Addr.setKind(Address::RegBase);
1026
3
    Addr.setReg(ResultReg);
1027
3
  }
1028
610
1029
610
  if (RegisterOffsetNeedsLowering) {
1030
5
    unsigned ResultReg = 0;
1031
5
    if (Addr.getReg()) {
1032
2
      if (Addr.getExtendType() == AArch64_AM::SXTW ||
1033
2
          
Addr.getExtendType() == AArch64_AM::UXTW1
)
1034
1
        ResultReg = emitAddSub_rx(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
1035
1
                                  /*TODO:IsKill=*/false, Addr.getOffsetReg(),
1036
1
                                  /*TODO:IsKill=*/false, Addr.getExtendType(),
1037
1
                                  Addr.getShift());
1038
1
      else
1039
1
        ResultReg = emitAddSub_rs(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
1040
1
                                  /*TODO:IsKill=*/false, Addr.getOffsetReg(),
1041
1
                                  /*TODO:IsKill=*/false, AArch64_AM::LSL,
1042
1
                                  Addr.getShift());
1043
3
    } else {
1044
3
      if (Addr.getExtendType() == AArch64_AM::UXTW)
1045
0
        ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
1046
0
                               /*Op0IsKill=*/false, Addr.getShift(),
1047
0
                               /*IsZExt=*/true);
1048
3
      else if (Addr.getExtendType() == AArch64_AM::SXTW)
1049
1
        ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
1050
1
                               /*Op0IsKill=*/false, Addr.getShift(),
1051
1
                               /*IsZExt=*/false);
1052
2
      else
1053
2
        ResultReg = emitLSL_ri(MVT::i64, MVT::i64, Addr.getOffsetReg(),
1054
2
                               /*Op0IsKill=*/false, Addr.getShift());
1055
3
    }
1056
5
    if (!ResultReg)
1057
0
      return false;
1058
5
1059
5
    Addr.setReg(ResultReg);
1060
5
    Addr.setOffsetReg(0);
1061
5
    Addr.setShift(0);
1062
5
    Addr.setExtendType(AArch64_AM::InvalidShiftExtend);
1063
5
  }
1064
610
1065
610
  // Since the offset is too large for the load/store instruction get the
1066
610
  // reg+offset into a register.
1067
610
  if (ImmediateOffsetNeedsLowering) {
1068
10
    unsigned ResultReg;
1069
10
    if (Addr.getReg())
1070
10
      // Try to fold the immediate into the add instruction.
1071
10
      ResultReg = emitAdd_ri_(MVT::i64, Addr.getReg(), /*IsKill=*/false, Offset);
1072
0
    else
1073
0
      ResultReg = fastEmit_i(MVT::i64, MVT::i64, ISD::Constant, Offset);
1074
10
1075
10
    if (!ResultReg)
1076
2
      return false;
1077
8
    Addr.setReg(ResultReg);
1078
8
    Addr.setOffset(0);
1079
8
  }
1080
610
  
return true608
;
1081
610
}
1082
1083
void AArch64FastISel::addLoadStoreOperands(Address &Addr,
1084
                                           const MachineInstrBuilder &MIB,
1085
                                           MachineMemOperand::Flags Flags,
1086
                                           unsigned ScaleFactor,
1087
608
                                           MachineMemOperand *MMO) {
1088
608
  int64_t Offset = Addr.getOffset() / ScaleFactor;
1089
608
  // Frame base works a bit differently. Handle it separately.
1090
608
  if (Addr.isFIBase()) {
1091
189
    int FI = Addr.getFI();
1092
189
    // FIXME: We shouldn't be using getObjectSize/getObjectAlignment.  The size
1093
189
    // and alignment should be based on the VT.
1094
189
    MMO = FuncInfo.MF->getMachineMemOperand(
1095
189
        MachinePointerInfo::getFixedStack(*FuncInfo.MF, FI, Offset), Flags,
1096
189
        MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
1097
189
    // Now add the rest of the operands.
1098
189
    MIB.addFrameIndex(FI).addImm(Offset);
1099
419
  } else {
1100
419
    assert(Addr.isRegBase() && "Unexpected address kind.");
1101
419
    const MCInstrDesc &II = MIB->getDesc();
1102
419
    unsigned Idx = (Flags & MachineMemOperand::MOStore) ? 
1173
:
0246
;
1103
419
    Addr.setReg(
1104
419
      constrainOperandRegClass(II, Addr.getReg(), II.getNumDefs()+Idx));
1105
419
    Addr.setOffsetReg(
1106
419
      constrainOperandRegClass(II, Addr.getOffsetReg(), II.getNumDefs()+Idx+1));
1107
419
    if (Addr.getOffsetReg()) {
1108
77
      assert(Addr.getOffset() == 0 && "Unexpected offset");
1109
77
      bool IsSigned = Addr.getExtendType() == AArch64_AM::SXTW ||
1110
77
                      
Addr.getExtendType() == AArch64_AM::SXTX49
;
1111
77
      MIB.addReg(Addr.getReg());
1112
77
      MIB.addReg(Addr.getOffsetReg());
1113
77
      MIB.addImm(IsSigned);
1114
77
      MIB.addImm(Addr.getShift() != 0);
1115
77
    } else
1116
342
      MIB.addReg(Addr.getReg()).addImm(Offset);
1117
419
  }
1118
608
1119
608
  if (MMO)
1120
548
    MIB.addMemOperand(MMO);
1121
608
}
1122
1123
unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
1124
                                     const Value *RHS, bool SetFlags,
1125
313
                                     bool WantResult,  bool IsZExt) {
1126
313
  AArch64_AM::ShiftExtendType ExtendType = AArch64_AM::InvalidShiftExtend;
1127
313
  bool NeedExtend = false;
1128
313
  switch (RetVT.SimpleTy) {
1129
313
  default:
1130
0
    return 0;
1131
313
  case MVT::i1:
1132
2
    NeedExtend = true;
1133
2
    break;
1134
313
  case MVT::i8:
1135
3
    NeedExtend = true;
1136
3
    ExtendType = IsZExt ? 
AArch64_AM::UXTB0
: AArch64_AM::SXTB;
1137
3
    break;
1138
313
  case MVT::i16:
1139
5
    NeedExtend = true;
1140
5
    ExtendType = IsZExt ? 
AArch64_AM::UXTH2
:
AArch64_AM::SXTH3
;
1141
5
    break;
1142
313
  case MVT::i32:  // fall-through
1143
303
  case MVT::i64:
1144
303
    break;
1145
313
  }
1146
313
  MVT SrcVT = RetVT;
1147
313
  RetVT.SimpleTy = std::max(RetVT.SimpleTy, MVT::i32);
1148
313
1149
313
  // Canonicalize immediates to the RHS first.
1150
313
  if (UseAdd && 
isa<Constant>(LHS)232
&&
!isa<Constant>(RHS)0
)
1151
0
    std::swap(LHS, RHS);
1152
313
1153
313
  // Canonicalize mul by power of 2 to the RHS.
1154
313
  if (UseAdd && 
LHS->hasOneUse()232
&&
isValueAvailable(LHS)199
)
1155
199
    if (isMulPowOf2(LHS))
1156
0
      std::swap(LHS, RHS);
1157
313
1158
313
  // Canonicalize shift immediate to the RHS.
1159
313
  if (UseAdd && 
LHS->hasOneUse()232
&&
isValueAvailable(LHS)199
)
1160
199
    if (const auto *SI = dyn_cast<BinaryOperator>(LHS))
1161
78
      if (isa<ConstantInt>(SI->getOperand(1)))
1162
3
        if (SI->getOpcode() == Instruction::Shl  ||
1163
3
            SI->getOpcode() == Instruction::LShr ||
1164
3
            SI->getOpcode() == Instruction::AShr   )
1165
0
          std::swap(LHS, RHS);
1166
313
1167
313
  unsigned LHSReg = getRegForValue(LHS);
1168
313
  if (!LHSReg)
1169
0
    return 0;
1170
313
  bool LHSIsKill = hasTrivialKill(LHS);
1171
313
1172
313
  if (NeedExtend)
1173
10
    LHSReg = emitIntExt(SrcVT, LHSReg, RetVT, IsZExt);
1174
313
1175
313
  unsigned ResultReg = 0;
1176
313
  if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1177
58
    uint64_t Imm = IsZExt ? 
C->getZExtValue()1
:
C->getSExtValue()57
;
1178
58
    if (C->isNegative())
1179
8
      ResultReg = emitAddSub_ri(!UseAdd, RetVT, LHSReg, LHSIsKill, -Imm,
1180
8
                                SetFlags, WantResult);
1181
50
    else
1182
50
      ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, LHSIsKill, Imm, SetFlags,
1183
50
                                WantResult);
1184
255
  } else if (const auto *C = dyn_cast<Constant>(RHS))
1185
2
    if (C->isNullValue())
1186
2
      ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, LHSIsKill, 0, SetFlags,
1187
2
                                WantResult);
1188
313
1189
313
  if (ResultReg)
1190
57
    return ResultReg;
1191
256
1192
256
  // Only extend the RHS within the instruction if there is a valid extend type.
1193
256
  if (ExtendType != AArch64_AM::InvalidShiftExtend && 
RHS->hasOneUse()6
&&
1194
256
      
isValueAvailable(RHS)6
) {
1195
6
    if (const auto *SI = dyn_cast<BinaryOperator>(RHS))
1196
0
      if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1)))
1197
0
        if ((SI->getOpcode() == Instruction::Shl) && (C->getZExtValue() < 4)) {
1198
0
          unsigned RHSReg = getRegForValue(SI->getOperand(0));
1199
0
          if (!RHSReg)
1200
0
            return 0;
1201
0
          bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
1202
0
          return emitAddSub_rx(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg,
1203
0
                               RHSIsKill, ExtendType, C->getZExtValue(),
1204
0
                               SetFlags, WantResult);
1205
0
        }
1206
6
    unsigned RHSReg = getRegForValue(RHS);
1207
6
    if (!RHSReg)
1208
0
      return 0;
1209
6
    bool RHSIsKill = hasTrivialKill(RHS);
1210
6
    return emitAddSub_rx(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
1211
6
                         ExtendType, 0, SetFlags, WantResult);
1212
6
  }
1213
250
1214
250
  // Check if the mul can be folded into the instruction.
1215
250
  if (RHS->hasOneUse() && 
isValueAvailable(RHS)210
) {
1216
210
    if (isMulPowOf2(RHS)) {
1217
0
      const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
1218
0
      const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
1219
0
1220
0
      if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
1221
0
        if (C->getValue().isPowerOf2())
1222
0
          std::swap(MulLHS, MulRHS);
1223
0
1224
0
      assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1225
0
      uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
1226
0
      unsigned RHSReg = getRegForValue(MulLHS);
1227
0
      if (!RHSReg)
1228
0
        return 0;
1229
0
      bool RHSIsKill = hasTrivialKill(MulLHS);
1230
0
      ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg,
1231
0
                                RHSIsKill, AArch64_AM::LSL, ShiftVal, SetFlags,
1232
0
                                WantResult);
1233
0
      if (ResultReg)
1234
0
        return ResultReg;
1235
250
    }
1236
210
  }
1237
250
1238
250
  // Check if the shift can be folded into the instruction.
1239
250
  if (RHS->hasOneUse() && 
isValueAvailable(RHS)210
) {
1240
210
    if (const auto *SI = dyn_cast<BinaryOperator>(RHS)) {
1241
51
      if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
1242
11
        AArch64_AM::ShiftExtendType ShiftType = AArch64_AM::InvalidShiftExtend;
1243
11
        switch (SI->getOpcode()) {
1244
11
        
default: break0
;
1245
11
        
case Instruction::Shl: ShiftType = AArch64_AM::LSL; break3
;
1246
11
        
case Instruction::LShr: ShiftType = AArch64_AM::LSR; break0
;
1247
11
        
case Instruction::AShr: ShiftType = AArch64_AM::ASR; break8
;
1248
11
        }
1249
11
        uint64_t ShiftVal = C->getZExtValue();
1250
11
        if (ShiftType != AArch64_AM::InvalidShiftExtend) {
1251
11
          unsigned RHSReg = getRegForValue(SI->getOperand(0));
1252
11
          if (!RHSReg)
1253
0
            return 0;
1254
11
          bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
1255
11
          ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg,
1256
11
                                    RHSIsKill, ShiftType, ShiftVal, SetFlags,
1257
11
                                    WantResult);
1258
11
          if (ResultReg)
1259
9
            return ResultReg;
1260
241
        }
1261
11
      }
1262
51
    }
1263
210
  }
1264
241
1265
241
  unsigned RHSReg = getRegForValue(RHS);
1266
241
  if (!RHSReg)
1267
0
    return 0;
1268
241
  bool RHSIsKill = hasTrivialKill(RHS);
1269
241
1270
241
  if (NeedExtend)
1271
1
    RHSReg = emitIntExt(SrcVT, RHSReg, RetVT, IsZExt);
1272
241
1273
241
  return emitAddSub_rr(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
1274
241
                       SetFlags, WantResult);
1275
241
}
1276
1277
unsigned AArch64FastISel::emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
1278
                                        bool LHSIsKill, unsigned RHSReg,
1279
                                        bool RHSIsKill, bool SetFlags,
1280
250
                                        bool WantResult) {
1281
250
  assert(LHSReg && RHSReg && "Invalid register number.");
1282
250
1283
250
  if (LHSReg == AArch64::SP || 
LHSReg == AArch64::WSP248
||
1284
250
      
RHSReg == AArch64::SP248
||
RHSReg == AArch64::WSP248
)
1285
2
    return 0;
1286
248
1287
248
  if (RetVT != MVT::i32 && 
RetVT != MVT::i64103
)
1288
0
    return 0;
1289
248
1290
248
  static const unsigned OpcTable[2][2][2] = {
1291
248
    { { AArch64::SUBWrr,  AArch64::SUBXrr  },
1292
248
      { AArch64::ADDWrr,  AArch64::ADDXrr  }  },
1293
248
    { { AArch64::SUBSWrr, AArch64::SUBSXrr },
1294
248
      { AArch64::ADDSWrr, AArch64::ADDSXrr }  }
1295
248
  };
1296
248
  bool Is64Bit = RetVT == MVT::i64;
1297
248
  unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1298
248
  const TargetRegisterClass *RC =
1299
248
      Is64Bit ? 
&AArch64::GPR64RegClass103
:
&AArch64::GPR32RegClass145
;
1300
248
  unsigned ResultReg;
1301
248
  if (WantResult)
1302
202
    ResultReg = createResultReg(RC);
1303
46
  else
1304
46
    ResultReg = Is64Bit ? 
AArch64::XZR8
:
AArch64::WZR38
;
1305
248
1306
248
  const MCInstrDesc &II = TII.get(Opc);
1307
248
  LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1308
248
  RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1309
248
  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1310
248
      .addReg(LHSReg, getKillRegState(LHSIsKill))
1311
248
      .addReg(RHSReg, getKillRegState(RHSIsKill));
1312
248
  return ResultReg;
1313
248
}
1314
1315
unsigned AArch64FastISel::emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
1316
                                        bool LHSIsKill, uint64_t Imm,
1317
105
                                        bool SetFlags, bool WantResult) {
1318
105
  assert(LHSReg && "Invalid register number.");
1319
105
1320
105
  if (RetVT != MVT::i32 && 
RetVT != MVT::i6448
)
1321
0
    return 0;
1322
105
1323
105
  unsigned ShiftImm;
1324
105
  if (isUInt<12>(Imm))
1325
93
    ShiftImm = 0;
1326
12
  else if ((Imm & 0xfff000) == Imm) {
1327
4
    ShiftImm = 12;
1328
4
    Imm >>= 12;
1329
4
  } else
1330
8
    return 0;
1331
97
1332
97
  static const unsigned OpcTable[2][2][2] = {
1333
97
    { { AArch64::SUBWri,  AArch64::SUBXri  },
1334
97
      { AArch64::ADDWri,  AArch64::ADDXri  }  },
1335
97
    { { AArch64::SUBSWri, AArch64::SUBSXri },
1336
97
      { AArch64::ADDSWri, AArch64::ADDSXri }  }
1337
97
  };
1338
97
  bool Is64Bit = RetVT == MVT::i64;
1339
97
  unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1340
97
  const TargetRegisterClass *RC;
1341
97
  if (SetFlags)
1342
34
    RC = Is64Bit ? 
&AArch64::GPR64RegClass12
:
&AArch64::GPR32RegClass22
;
1343
63
  else
1344
63
    RC = Is64Bit ? 
&AArch64::GPR64spRegClass29
:
&AArch64::GPR32spRegClass34
;
1345
97
  unsigned ResultReg;
1346
97
  if (WantResult)
1347
68
    ResultReg = createResultReg(RC);
1348
29
  else
1349
29
    ResultReg = Is64Bit ? 
AArch64::XZR10
:
AArch64::WZR19
;
1350
97
1351
97
  const MCInstrDesc &II = TII.get(Opc);
1352
97
  LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1353
97
  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1354
97
      .addReg(LHSReg, getKillRegState(LHSIsKill))
1355
97
      .addImm(Imm)
1356
97
      .addImm(getShifterImm(AArch64_AM::LSL, ShiftImm));
1357
97
  return ResultReg;
1358
97
}
1359
1360
unsigned AArch64FastISel::emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
1361
                                        bool LHSIsKill, unsigned RHSReg,
1362
                                        bool RHSIsKill,
1363
                                        AArch64_AM::ShiftExtendType ShiftType,
1364
                                        uint64_t ShiftImm, bool SetFlags,
1365
25
                                        bool WantResult) {
1366
25
  assert(LHSReg && RHSReg && "Invalid register number.");
1367
25
  assert(LHSReg != AArch64::SP && LHSReg != AArch64::WSP &&
1368
25
         RHSReg != AArch64::SP && RHSReg != AArch64::WSP);
1369
25
1370
25
  if (RetVT != MVT::i32 && 
RetVT != MVT::i6410
)
1371
0
    return 0;
1372
25
1373
25
  // Don't deal with undefined shifts.
1374
25
  if (ShiftImm >= RetVT.getSizeInBits())
1375
2
    return 0;
1376
23
1377
23
  static const unsigned OpcTable[2][2][2] = {
1378
23
    { { AArch64::SUBWrs,  AArch64::SUBXrs  },
1379
23
      { AArch64::ADDWrs,  AArch64::ADDXrs  }  },
1380
23
    { { AArch64::SUBSWrs, AArch64::SUBSXrs },
1381
23
      { AArch64::ADDSWrs, AArch64::ADDSXrs }  }
1382
23
  };
1383
23
  bool Is64Bit = RetVT == MVT::i64;
1384
23
  unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1385
23
  const TargetRegisterClass *RC =
1386
23
      Is64Bit ? 
&AArch64::GPR64RegClass9
:
&AArch64::GPR32RegClass14
;
1387
23
  unsigned ResultReg;
1388
23
  if (WantResult)
1389
14
    ResultReg = createResultReg(RC);
1390
9
  else
1391
9
    ResultReg = Is64Bit ? 
AArch64::XZR6
:
AArch64::WZR3
;
1392
23
1393
23
  const MCInstrDesc &II = TII.get(Opc);
1394
23
  LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1395
23
  RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1396
23
  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1397
23
      .addReg(LHSReg, getKillRegState(LHSIsKill))
1398
23
      .addReg(RHSReg, getKillRegState(RHSIsKill))
1399
23
      .addImm(getShifterImm(ShiftType, ShiftImm));
1400
23
  return ResultReg;
1401
23
}
1402
1403
unsigned AArch64FastISel::emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
1404
                                        bool LHSIsKill, unsigned RHSReg,
1405
                                        bool RHSIsKill,
1406
                                        AArch64_AM::ShiftExtendType ExtType,
1407
                                        uint64_t ShiftImm, bool SetFlags,
1408
7
                                        bool WantResult) {
1409
7
  assert(LHSReg && RHSReg && "Invalid register number.");
1410
7
  assert(LHSReg != AArch64::XZR && LHSReg != AArch64::WZR &&
1411
7
         RHSReg != AArch64::XZR && RHSReg != AArch64::WZR);
1412
7
1413
7
  if (RetVT != MVT::i32 && 
RetVT != MVT::i641
)
1414
0
    return 0;
1415
7
1416
7
  if (ShiftImm >= 4)
1417
0
    return 0;
1418
7
1419
7
  static const unsigned OpcTable[2][2][2] = {
1420
7
    { { AArch64::SUBWrx,  AArch64::SUBXrx  },
1421
7
      { AArch64::ADDWrx,  AArch64::ADDXrx  }  },
1422
7
    { { AArch64::SUBSWrx, AArch64::SUBSXrx },
1423
7
      { AArch64::ADDSWrx, AArch64::ADDSXrx }  }
1424
7
  };
1425
7
  bool Is64Bit = RetVT == MVT::i64;
1426
7
  unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1427
7
  const TargetRegisterClass *RC = nullptr;
1428
7
  if (SetFlags)
1429
5
    RC = Is64Bit ? 
&AArch64::GPR64RegClass0
: &AArch64::GPR32RegClass;
1430
2
  else
1431
2
    RC = Is64Bit ? 
&AArch64::GPR64spRegClass1
:
&AArch64::GPR32spRegClass1
;
1432
7
  unsigned ResultReg;
1433
7
  if (WantResult)
1434
2
    ResultReg = createResultReg(RC);
1435
5
  else
1436
5
    ResultReg = Is64Bit ? 
AArch64::XZR0
: AArch64::WZR;
1437
7
1438
7
  const MCInstrDesc &II = TII.get(Opc);
1439
7
  LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1440
7
  RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1441
7
  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1442
7
      .addReg(LHSReg, getKillRegState(LHSIsKill))
1443
7
      .addReg(RHSReg, getKillRegState(RHSIsKill))
1444
7
      .addImm(getArithExtendImm(ExtType, ShiftImm));
1445
7
  return ResultReg;
1446
7
}
1447
1448
112
bool AArch64FastISel::emitCmp(const Value *LHS, const Value *RHS, bool IsZExt) {
1449
112
  Type *Ty = LHS->getType();
1450
112
  EVT EVT = TLI.getValueType(DL, Ty, true);
1451
112
  if (!EVT.isSimple())
1452
0
    return false;
1453
112
  MVT VT = EVT.getSimpleVT();
1454
112
1455
112
  switch (VT.SimpleTy) {
1456
112
  default:
1457
0
    return false;
1458
112
  case MVT::i1:
1459
65
  case MVT::i8:
1460
65
  case MVT::i16:
1461
65
  case MVT::i32:
1462
65
  case MVT::i64:
1463
65
    return emitICmp(VT, LHS, RHS, IsZExt);
1464
65
  case MVT::f32:
1465
47
  case MVT::f64:
1466
47
    return emitFCmp(VT, LHS, RHS);
1467
112
  }
1468
112
}
1469
1470
bool AArch64FastISel::emitICmp(MVT RetVT, const Value *LHS, const Value *RHS,
1471
65
                               bool IsZExt) {
1472
65
  return emitSub(RetVT, LHS, RHS, /*SetFlags=*/true, /*WantResult=*/false,
1473
65
                 IsZExt) != 0;
1474
65
}
1475
1476
bool AArch64FastISel::emitICmp_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
1477
11
                                  uint64_t Imm) {
1478
11
  return emitAddSub_ri(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, Imm,
1479
11
                       /*SetFlags=*/true, /*WantResult=*/false) != 0;
1480
11
}
1481
1482
47
bool AArch64FastISel::emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS) {
1483
47
  if (RetVT != MVT::f32 && 
RetVT != MVT::f642
)
1484
0
    return false;
1485
47
1486
47
  // Check to see if the 2nd operand is a constant that we can encode directly
1487
47
  // in the compare.
1488
47
  bool UseImm = false;
1489
47
  if (const auto *CFP = dyn_cast<ConstantFP>(RHS))
1490
4
    if (CFP->isZero() && 
!CFP->isNegative()2
)
1491
2
      UseImm = true;
1492
47
1493
47
  unsigned LHSReg = getRegForValue(LHS);
1494
47
  if (!LHSReg)
1495
0
    return false;
1496
47
  bool LHSIsKill = hasTrivialKill(LHS);
1497
47
1498
47
  if (UseImm) {
1499
2
    unsigned Opc = (RetVT == MVT::f64) ? 
AArch64::FCMPDri1
:
AArch64::FCMPSri1
;
1500
2
    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
1501
2
        .addReg(LHSReg, getKillRegState(LHSIsKill));
1502
2
    return true;
1503
2
  }
1504
45
1505
45
  unsigned RHSReg = getRegForValue(RHS);
1506
45
  if (!RHSReg)
1507
0
    return false;
1508
45
  bool RHSIsKill = hasTrivialKill(RHS);
1509
45
1510
45
  unsigned Opc = (RetVT == MVT::f64) ? 
AArch64::FCMPDrr1
:
AArch64::FCMPSrr44
;
1511
45
  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
1512
45
      .addReg(LHSReg, getKillRegState(LHSIsKill))
1513
45
      .addReg(RHSReg, getKillRegState(RHSIsKill));
1514
45
  return true;
1515
45
}
1516
1517
unsigned AArch64FastISel::emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
1518
232
                                  bool SetFlags, bool WantResult, bool IsZExt) {
1519
232
  return emitAddSub(/*UseAdd=*/true, RetVT, LHS, RHS, SetFlags, WantResult,
1520
232
                    IsZExt);
1521
232
}
1522
1523
/// This method is a wrapper to simplify add emission.
1524
///
1525
/// First try to emit an add with an immediate operand using emitAddSub_ri. If
1526
/// that fails, then try to materialize the immediate into a register and use
1527
/// emitAddSub_rr instead.
1528
unsigned AArch64FastISel::emitAdd_ri_(MVT VT, unsigned Op0, bool Op0IsKill,
1529
34
                                      int64_t Imm) {
1530
34
  unsigned ResultReg;
1531
34
  if (Imm < 0)
1532
2
    ResultReg = emitAddSub_ri(false, VT, Op0, Op0IsKill, -Imm);
1533
32
  else
1534
32
    ResultReg = emitAddSub_ri(true, VT, Op0, Op0IsKill, Imm);
1535
34
1536
34
  if (ResultReg)
1537
29
    return ResultReg;
1538
5
1539
5
  unsigned CReg = fastEmit_i(VT, VT, ISD::Constant, Imm);
1540
5
  if (!CReg)
1541
0
    return 0;
1542
5
1543
5
  ResultReg = emitAddSub_rr(true, VT, Op0, Op0IsKill, CReg, true);
1544
5
  return ResultReg;
1545
5
}
1546
1547
unsigned AArch64FastISel::emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
1548
81
                                  bool SetFlags, bool WantResult, bool IsZExt) {
1549
81
  return emitAddSub(/*UseAdd=*/false, RetVT, LHS, RHS, SetFlags, WantResult,
1550
81
                    IsZExt);
1551
81
}
1552
1553
unsigned AArch64FastISel::emitSubs_rr(MVT RetVT, unsigned LHSReg,
1554
                                      bool LHSIsKill, unsigned RHSReg,
1555
4
                                      bool RHSIsKill, bool WantResult) {
1556
4
  return emitAddSub_rr(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, RHSReg,
1557
4
                       RHSIsKill, /*SetFlags=*/true, WantResult);
1558
4
}
1559
1560
unsigned AArch64FastISel::emitSubs_rs(MVT RetVT, unsigned LHSReg,
1561
                                      bool LHSIsKill, unsigned RHSReg,
1562
                                      bool RHSIsKill,
1563
                                      AArch64_AM::ShiftExtendType ShiftType,
1564
9
                                      uint64_t ShiftImm, bool WantResult) {
1565
9
  return emitAddSub_rs(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, RHSReg,
1566
9
                       RHSIsKill, ShiftType, ShiftImm, /*SetFlags=*/true,
1567
9
                       WantResult);
1568
9
}
1569
1570
unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT,
1571
91
                                        const Value *LHS, const Value *RHS) {
1572
91
  // Canonicalize immediates to the RHS first.
1573
91
  if (isa<ConstantInt>(LHS) && 
!isa<ConstantInt>(RHS)0
)
1574
0
    std::swap(LHS, RHS);
1575
91
1576
91
  // Canonicalize mul by power-of-2 to the RHS.
1577
91
  if (LHS->hasOneUse() && 
isValueAvailable(LHS)89
)
1578
89
    if (isMulPowOf2(LHS))
1579
0
      std::swap(LHS, RHS);
1580
91
1581
91
  // Canonicalize shift immediate to the RHS.
1582
91
  if (LHS->hasOneUse() && 
isValueAvailable(LHS)89
)
1583
89
    if (const auto *SI = dyn_cast<ShlOperator>(LHS))
1584
0
      if (isa<ConstantInt>(SI->getOperand(1)))
1585
0
        std::swap(LHS, RHS);
1586
91
1587
91
  unsigned LHSReg = getRegForValue(LHS);
1588
91
  if (!LHSReg)
1589
0
    return 0;
1590
91
  bool LHSIsKill = hasTrivialKill(LHS);
1591
91
1592
91
  unsigned ResultReg = 0;
1593
91
  if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1594
19
    uint64_t Imm = C->getZExtValue();
1595
19
    ResultReg = emitLogicalOp_ri(ISDOpc, RetVT, LHSReg, LHSIsKill, Imm);
1596
19
  }
1597
91
  if (ResultReg)
1598
19
    return ResultReg;
1599
72
1600
72
  // Check if the mul can be folded into the instruction.
1601
72
  if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1602
72
    if (isMulPowOf2(RHS)) {
1603
6
      const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
1604
6
      const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
1605
6
1606
6
      if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
1607
0
        if (C->getValue().isPowerOf2())
1608
0
          std::swap(MulLHS, MulRHS);
1609
6
1610
6
      assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1611
6
      uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
1612
6
1613
6
      unsigned RHSReg = getRegForValue(MulLHS);
1614
6
      if (!RHSReg)
1615
0
        return 0;
1616
6
      bool RHSIsKill = hasTrivialKill(MulLHS);
1617
6
      ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg,
1618
6
                                   RHSIsKill, ShiftVal);
1619
6
      if (ResultReg)
1620
6
        return ResultReg;
1621
66
    }
1622
72
  }
1623
66
1624
66
  // Check if the shift can be folded into the instruction.
1625
66
  if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1626
66
    if (const auto *SI = dyn_cast<ShlOperator>(RHS))
1627
25
      if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
1628
25
        uint64_t ShiftVal = C->getZExtValue();
1629
25
        unsigned RHSReg = getRegForValue(SI->getOperand(0));
1630
25
        if (!RHSReg)
1631
0
          return 0;
1632
25
        bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
1633
25
        ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg,
1634
25
                                     RHSIsKill, ShiftVal);
1635
25
        if (ResultReg)
1636
13
          return ResultReg;
1637
53
      }
1638
66
  }
1639
53
1640
53
  unsigned RHSReg = getRegForValue(RHS);
1641
53
  if (!RHSReg)
1642
0
    return 0;
1643
53
  bool RHSIsKill = hasTrivialKill(RHS);
1644
53
1645
53
  MVT VT = std::max(MVT::i32, RetVT.SimpleTy);
1646
53
  ResultReg = fastEmit_rr(VT, VT, ISDOpc, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
1647
53
  if (RetVT >= MVT::i8 && 
RetVT <= MVT::i1627
) {
1648
12
    uint64_t Mask = (RetVT == MVT::i8) ? 
0xff6
:
0xffff6
;
1649
12
    ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
1650
12
  }
1651
53
  return ResultReg;
1652
53
}
1653
1654
unsigned AArch64FastISel::emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT,
1655
                                           unsigned LHSReg, bool LHSIsKill,
1656
204
                                           uint64_t Imm) {
1657
204
  static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR),
1658
204
                "ISD nodes are not consecutive!");
1659
204
  static const unsigned OpcTable[3][2] = {
1660
204
    { AArch64::ANDWri, AArch64::ANDXri },
1661
204
    { AArch64::ORRWri, AArch64::ORRXri },
1662
204
    { AArch64::EORWri, AArch64::EORXri }
1663
204
  };
1664
204
  const TargetRegisterClass *RC;
1665
204
  unsigned Opc;
1666
204
  unsigned RegSize;
1667
204
  switch (RetVT.SimpleTy) {
1668
204
  default:
1669
0
    return 0;
1670
204
  case MVT::i1:
1671
198
  case MVT::i8:
1672
198
  case MVT::i16:
1673
198
  case MVT::i32: {
1674
198
    unsigned Idx = ISDOpc - ISD::AND;
1675
198
    Opc = OpcTable[Idx][0];
1676
198
    RC = &AArch64::GPR32spRegClass;
1677
198
    RegSize = 32;
1678
198
    break;
1679
198
  }
1680
198
  case MVT::i64:
1681
6
    Opc = OpcTable[ISDOpc - ISD::AND][1];
1682
6
    RC = &AArch64::GPR64spRegClass;
1683
6
    RegSize = 64;
1684
6
    break;
1685
204
  }
1686
204
1687
204
  if (!AArch64_AM::isLogicalImmediate(Imm, RegSize))
1688
0
    return 0;
1689
204
1690
204
  unsigned ResultReg =
1691
204
      fastEmitInst_ri(Opc, RC, LHSReg, LHSIsKill,
1692
204
                      AArch64_AM::encodeLogicalImmediate(Imm, RegSize));
1693
204
  if (RetVT >= MVT::i8 && 
RetVT <= MVT::i16203
&&
ISDOpc != ISD::AND6
) {
1694
4
    uint64_t Mask = (RetVT == MVT::i8) ? 
0xff2
:
0xffff2
;
1695
4
    ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
1696
4
  }
1697
204
  return ResultReg;
1698
204
}
1699
1700
unsigned AArch64FastISel::emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT,
1701
                                           unsigned LHSReg, bool LHSIsKill,
1702
                                           unsigned RHSReg, bool RHSIsKill,
1703
31
                                           uint64_t ShiftImm) {
1704
31
  static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR),
1705
31
                "ISD nodes are not consecutive!");
1706
31
  static const unsigned OpcTable[3][2] = {
1707
31
    { AArch64::ANDWrs, AArch64::ANDXrs },
1708
31
    { AArch64::ORRWrs, AArch64::ORRXrs },
1709
31
    { AArch64::EORWrs, AArch64::EORXrs }
1710
31
  };
1711
31
1712
31
  // Don't deal with undefined shifts.
1713
31
  if (ShiftImm >= RetVT.getSizeInBits())
1714
12
    return 0;
1715
19
1716
19
  const TargetRegisterClass *RC;
1717
19
  unsigned Opc;
1718
19
  switch (RetVT.SimpleTy) {
1719
19
  default:
1720
0
    return 0;
1721
19
  case MVT::i1:
1722
13
  case MVT::i8:
1723
13
  case MVT::i16:
1724
13
  case MVT::i32:
1725
13
    Opc = OpcTable[ISDOpc - ISD::AND][0];
1726
13
    RC = &AArch64::GPR32RegClass;
1727
13
    break;
1728
13
  case MVT::i64:
1729
6
    Opc = OpcTable[ISDOpc - ISD::AND][1];
1730
6
    RC = &AArch64::GPR64RegClass;
1731
6
    break;
1732
19
  }
1733
19
  unsigned ResultReg =
1734
19
      fastEmitInst_rri(Opc, RC, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
1735
19
                       AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftImm));
1736
19
  if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1737
6
    uint64_t Mask = (RetVT == MVT::i8) ? 
0xff3
:
0xffff3
;
1738
6
    ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
1739
6
  }
1740
19
  return ResultReg;
1741
19
}
1742
1743
unsigned AArch64FastISel::emitAnd_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
1744
184
                                     uint64_t Imm) {
1745
184
  return emitLogicalOp_ri(ISD::AND, RetVT, LHSReg, LHSIsKill, Imm);
1746
184
}
1747
1748
unsigned AArch64FastISel::emitLoad(MVT VT, MVT RetVT, Address Addr,
1749
391
                                   bool WantZExt, MachineMemOperand *MMO) {
1750
391
  if (!TLI.allowsMisalignedMemoryAccesses(VT))
1751
2
    return 0;
1752
389
1753
389
  // Simplify this down to something we can handle.
1754
389
  if (!simplifyAddress(Addr, VT))
1755
72
    return 0;
1756
317
1757
317
  unsigned ScaleFactor = getImplicitScaleFactor(VT);
1758
317
  if (!ScaleFactor)
1759
317
    
llvm_unreachable0
("Unexpected value type.");
1760
317
1761
317
  // Negative offsets require unscaled, 9-bit, signed immediate offsets.
1762
317
  // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
1763
317
  bool UseScaled = true;
1764
317
  if ((Addr.getOffset() < 0) || 
(Addr.getOffset() & (ScaleFactor - 1))292
) {
1765
26
    UseScaled = false;
1766
26
    ScaleFactor = 1;
1767
26
  }
1768
317
1769
317
  static const unsigned GPOpcTable[2][8][4] = {
1770
317
    // Sign-extend.
1771
317
    { { AArch64::LDURSBWi,  AArch64::LDURSHWi,  AArch64::LDURWi,
1772
317
        AArch64::LDURXi  },
1773
317
      { AArch64::LDURSBXi,  AArch64::LDURSHXi,  AArch64::LDURSWi,
1774
317
        AArch64::LDURXi  },
1775
317
      { AArch64::LDRSBWui,  AArch64::LDRSHWui,  AArch64::LDRWui,
1776
317
        AArch64::LDRXui  },
1777
317
      { AArch64::LDRSBXui,  AArch64::LDRSHXui,  AArch64::LDRSWui,
1778
317
        AArch64::LDRXui  },
1779
317
      { AArch64::LDRSBWroX, AArch64::LDRSHWroX, AArch64::LDRWroX,
1780
317
        AArch64::LDRXroX },
1781
317
      { AArch64::LDRSBXroX, AArch64::LDRSHXroX, AArch64::LDRSWroX,
1782
317
        AArch64::LDRXroX },
1783
317
      { AArch64::LDRSBWroW, AArch64::LDRSHWroW, AArch64::LDRWroW,
1784
317
        AArch64::LDRXroW },
1785
317
      { AArch64::LDRSBXroW, AArch64::LDRSHXroW, AArch64::LDRSWroW,
1786
317
        AArch64::LDRXroW }
1787
317
    },
1788
317
    // Zero-extend.
1789
317
    { { AArch64::LDURBBi,   AArch64::LDURHHi,   AArch64::LDURWi,
1790
317
        AArch64::LDURXi  },
1791
317
      { AArch64::LDURBBi,   AArch64::LDURHHi,   AArch64::LDURWi,
1792
317
        AArch64::LDURXi  },
1793
317
      { AArch64::LDRBBui,   AArch64::LDRHHui,   AArch64::LDRWui,
1794
317
        AArch64::LDRXui  },
1795
317
      { AArch64::LDRBBui,   AArch64::LDRHHui,   AArch64::LDRWui,
1796
317
        AArch64::LDRXui  },
1797
317
      { AArch64::LDRBBroX,  AArch64::LDRHHroX,  AArch64::LDRWroX,
1798
317
        AArch64::LDRXroX },
1799
317
      { AArch64::LDRBBroX,  AArch64::LDRHHroX,  AArch64::LDRWroX,
1800
317
        AArch64::LDRXroX },
1801
317
      { AArch64::LDRBBroW,  AArch64::LDRHHroW,  AArch64::LDRWroW,
1802
317
        AArch64::LDRXroW },
1803
317
      { AArch64::LDRBBroW,  AArch64::LDRHHroW,  AArch64::LDRWroW,
1804
317
        AArch64::LDRXroW }
1805
317
    }
1806
317
  };
1807
317
1808
317
  static const unsigned FPOpcTable[4][2] = {
1809
317
    { AArch64::LDURSi,  AArch64::LDURDi  },
1810
317
    { AArch64::LDRSui,  AArch64::LDRDui  },
1811
317
    { AArch64::LDRSroX, AArch64::LDRDroX },
1812
317
    { AArch64::LDRSroW, AArch64::LDRDroW }
1813
317
  };
1814
317
1815
317
  unsigned Opc;
1816
317
  const TargetRegisterClass *RC;
1817
317
  bool UseRegOffset = Addr.isRegBase() && 
!Addr.getOffset()246
&&
Addr.getReg()177
&&
1818
317
                      
Addr.getOffsetReg()177
;
1819
317
  unsigned Idx = UseRegOffset ? 
275
:
UseScaled 242
?
1216
:
026
;
1820
317
  if (Addr.getExtendType() == AArch64_AM::UXTW ||
1821
317
      
Addr.getExtendType() == AArch64_AM::SXTW306
)
1822
39
    Idx++;
1823
317
1824
317
  bool IsRet64Bit = RetVT == MVT::i64;
1825
317
  switch (VT.SimpleTy) {
1826
317
  default:
1827
0
    llvm_unreachable("Unexpected value type.");
1828
317
  case MVT::i1: // Intentional fall-through.
1829
51
  case MVT::i8:
1830
51
    Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][0];
1831
51
    RC = (IsRet64Bit && 
!WantZExt13
) ?
1832
45
             
&AArch64::GPR64RegClass6
: &AArch64::GPR32RegClass;
1833
51
    break;
1834
51
  case MVT::i16:
1835
41
    Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][1];
1836
41
    RC = (IsRet64Bit && 
!WantZExt12
) ?
1837
35
             
&AArch64::GPR64RegClass6
: &AArch64::GPR32RegClass;
1838
41
    break;
1839
116
  case MVT::i32:
1840
116
    Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][2];
1841
116
    RC = (IsRet64Bit && 
!WantZExt15
) ?
1842
109
             
&AArch64::GPR64RegClass7
: &AArch64::GPR32RegClass;
1843
116
    break;
1844
96
  case MVT::i64:
1845
96
    Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][3];
1846
96
    RC = &AArch64::GPR64RegClass;
1847
96
    break;
1848
51
  case MVT::f32:
1849
3
    Opc = FPOpcTable[Idx][0];
1850
3
    RC = &AArch64::FPR32RegClass;
1851
3
    break;
1852
51
  case MVT::f64:
1853
10
    Opc = FPOpcTable[Idx][1];
1854
10
    RC = &AArch64::FPR64RegClass;
1855
10
    break;
1856
317
  }
1857
317
1858
317
  // Create the base instruction, then add the operands.
1859
317
  unsigned ResultReg = createResultReg(RC);
1860
317
  MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1861
317
                                    TII.get(Opc), ResultReg);
1862
317
  addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, ScaleFactor, MMO);
1863
317
1864
317
  // Loading an i1 requires special handling.
1865
317
  if (VT == MVT::i1) {
1866
3
    unsigned ANDReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, 1);
1867
3
    assert(ANDReg && "Unexpected AND instruction emission failure.");
1868
3
    ResultReg = ANDReg;
1869
3
  }
1870
317
1871
317
  // For zero-extending loads to 64bit we emit a 32bit load and then convert
1872
317
  // the 32bit reg to a 64bit reg.
1873
317
  if (WantZExt && 
RetVT == MVT::i64279
&&
VT <= MVT::i32117
) {
1874
21
    unsigned Reg64 = createResultReg(&AArch64::GPR64RegClass);
1875
21
    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1876
21
            TII.get(AArch64::SUBREG_TO_REG), Reg64)
1877
21
        .addImm(0)
1878
21
        .addReg(ResultReg, getKillRegState(true))
1879
21
        .addImm(AArch64::sub_32);
1880
21
    ResultReg = Reg64;
1881
21
  }
1882
317
  return ResultReg;
1883
317
}
1884
1885
284
bool AArch64FastISel::selectAddSub(const Instruction *I) {
1886
284
  MVT VT;
1887
284
  if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
1888
0
    return false;
1889
284
1890
284
  if (VT.isVector())
1891
71
    return selectOperator(I, I->getOpcode());
1892
213
1893
213
  unsigned ResultReg;
1894
213
  switch (I->getOpcode()) {
1895
213
  default:
1896
0
    llvm_unreachable("Unexpected instruction.");
1897
213
  case Instruction::Add:
1898
210
    ResultReg = emitAdd(VT, I->getOperand(0), I->getOperand(1));
1899
210
    break;
1900
213
  case Instruction::Sub:
1901
3
    ResultReg = emitSub(VT, I->getOperand(0), I->getOperand(1));
1902
3
    break;
1903
213
  }
1904
213
  if (!ResultReg)
1905
0
    return false;
1906
213
1907
213
  updateValueMap(I, ResultReg);
1908
213
  return true;
1909
213
}
1910
1911
91
bool AArch64FastISel::selectLogicalOp(const Instruction *I) {
1912
91
  MVT VT;
1913
91
  if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
1914
0
    return false;
1915
91
1916
91
  if (VT.isVector())
1917
0
    return selectOperator(I, I->getOpcode());
1918
91
1919
91
  unsigned ResultReg;
1920
91
  switch (I->getOpcode()) {
1921
91
  default:
1922
0
    llvm_unreachable("Unexpected instruction.");
1923
91
  case Instruction::And:
1924
51
    ResultReg = emitLogicalOp(ISD::AND, VT, I->getOperand(0), I->getOperand(1));
1925
51
    break;
1926
91
  case Instruction::Or:
1927
21
    ResultReg = emitLogicalOp(ISD::OR, VT, I->getOperand(0), I->getOperand(1));
1928
21
    break;
1929
91
  case Instruction::Xor:
1930
19
    ResultReg = emitLogicalOp(ISD::XOR, VT, I->getOperand(0), I->getOperand(1));
1931
19
    break;
1932
91
  }
1933
91
  if (!ResultReg)
1934
0
    return false;
1935
91
1936
91
  updateValueMap(I, ResultReg);
1937
91
  return true;
1938
91
}
1939
1940
381
bool AArch64FastISel::selectLoad(const Instruction *I) {
1941
381
  MVT VT;
1942
381
  // Verify we have a legal type before going any further.  Currently, we handle
1943
381
  // simple types that will directly fit in a register (i32/f32/i64/f64) or
1944
381
  // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
1945
381
  if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true) ||
1946
381
      
cast<LoadInst>(I)->isAtomic()377
)
1947
4
    return false;
1948
377
1949
377
  const Value *SV = I->getOperand(0);
1950
377
  if (TLI.supportSwiftError()) {
1951
377
    // Swifterror values can come from either a function parameter with
1952
377
    // swifterror attribute or an alloca with swifterror attribute.
1953
377
    if (const Argument *Arg = dyn_cast<Argument>(SV)) {
1954
103
      if (Arg->hasSwiftErrorAttr())
1955
0
        return false;
1956
377
    }
1957
377
1958
377
    if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) {
1959
74
      if (Alloca->isSwiftError())
1960
5
        return false;
1961
372
    }
1962
377
  }
1963
372
1964
372
  // See if we can handle this address.
1965
372
  Address Addr;
1966
372
  if (!computeAddress(I->getOperand(0), Addr, I->getType()))
1967
22
    return false;
1968
350
1969
350
  // Fold the following sign-/zero-extend into the load instruction.
1970
350
  bool WantZExt = true;
1971
350
  MVT RetVT = VT;
1972
350
  const Value *IntExtVal = nullptr;
1973
350
  if (I->hasOneUse()) {
1974
257
    if (const auto *ZE = dyn_cast<ZExtInst>(I->use_begin()->getUser())) {
1975
41
      if (isTypeSupported(ZE->getType(), RetVT))
1976
41
        IntExtVal = ZE;
1977
0
      else
1978
0
        RetVT = VT;
1979
216
    } else if (const auto *SE = dyn_cast<SExtInst>(I->use_begin()->getUser())) {
1980
38
      if (isTypeSupported(SE->getType(), RetVT))
1981
38
        IntExtVal = SE;
1982
0
      else
1983
0
        RetVT = VT;
1984
38
      WantZExt = false;
1985
38
    }
1986
257
  }
1987
350
1988
350
  unsigned ResultReg =
1989
350
      emitLoad(VT, RetVT, Addr, WantZExt, createMachineMemOperandFor(I));
1990
350
  if (!ResultReg)
1991
74
    return false;
1992
276
1993
276
  // There are a few different cases we have to handle, because the load or the
1994
276
  // sign-/zero-extend might not be selected by FastISel if we fall-back to
1995
276
  // SelectionDAG. There is also an ordering issue when both instructions are in
1996
276
  // different basic blocks.
1997
276
  // 1.) The load instruction is selected by FastISel, but the integer extend
1998
276
  //     not. This usually happens when the integer extend is in a different
1999
276
  //     basic block and SelectionDAG took over for that basic block.
2000
276
  // 2.) The load instruction is selected before the integer extend. This only
2001
276
  //     happens when the integer extend is in a different basic block.
2002
276
  // 3.) The load instruction is selected by SelectionDAG and the integer extend
2003
276
  //     by FastISel. This happens if there are instructions between the load
2004
276
  //     and the integer extend that couldn't be selected by FastISel.
2005
276
  if (IntExtVal) {
2006
79
    // The integer extend hasn't been emitted yet. FastISel or SelectionDAG
2007
79
    // could select it. Emit a copy to subreg if necessary. FastISel will remove
2008
79
    // it when it selects the integer extend.
2009
79
    unsigned Reg = lookUpRegForValue(IntExtVal);
2010
79
    auto *MI = MRI.getUniqueVRegDef(Reg);
2011
79
    if (!MI) {
2012
1
      if (RetVT == MVT::i64 && VT <= MVT::i32) {
2013
1
        if (WantZExt) {
2014
1
          // Delete the last emitted instruction from emitLoad (SUBREG_TO_REG).
2015
1
          MachineBasicBlock::iterator I(std::prev(FuncInfo.InsertPt));
2016
1
          ResultReg = std::prev(I)->getOperand(0).getReg();
2017
1
          removeDeadCode(I, std::next(I));
2018
1
        } else
2019
0
          ResultReg = fastEmitInst_extractsubreg(MVT::i32, ResultReg,
2020
0
                                                 /*IsKill=*/true,
2021
0
                                                 AArch64::sub_32);
2022
1
      }
2023
1
      updateValueMap(I, ResultReg);
2024
1
      return true;
2025
1
    }
2026
78
2027
78
    // The integer extend has already been emitted - delete all the instructions
2028
78
    // that have been emitted by the integer extend lowering code and use the
2029
78
    // result from the load instruction directly.
2030
195
    
while (78
MI) {
2031
117
      Reg = 0;
2032
156
      for (auto &Opnd : MI->uses()) {
2033
156
        if (Opnd.isReg()) {
2034
117
          Reg = Opnd.getReg();
2035
117
          break;
2036
117
        }
2037
156
      }
2038
117
      MachineBasicBlock::iterator I(MI);
2039
117
      removeDeadCode(I, std::next(I));
2040
117
      MI = nullptr;
2041
117
      if (Reg)
2042
117
        MI = MRI.getUniqueVRegDef(Reg);
2043
117
    }
2044
78
    updateValueMap(IntExtVal, ResultReg);
2045
78
    return true;
2046
78
  }
2047
197
2048
197
  updateValueMap(I, ResultReg);
2049
197
  return true;
2050
197
}
2051
2052
bool AArch64FastISel::emitStoreRelease(MVT VT, unsigned SrcReg,
2053
                                       unsigned AddrReg,
2054
16
                                       MachineMemOperand *MMO) {
2055
16
  unsigned Opc;
2056
16
  switch (VT.SimpleTy) {
2057
16
  
default: return false0
;
2058
16
  
case MVT::i8: Opc = AArch64::STLRB; break4
;
2059
16
  
case MVT::i16: Opc = AArch64::STLRH; break4
;
2060
16
  
case MVT::i32: Opc = AArch64::STLRW; break4
;
2061
16
  
case MVT::i64: Opc = AArch64::STLRX; break4
;
2062
16
  }
2063
16
2064
16
  const MCInstrDesc &II = TII.get(Opc);
2065
16
  SrcReg = constrainOperandRegClass(II, SrcReg, 0);
2066
16
  AddrReg = constrainOperandRegClass(II, AddrReg, 1);
2067
16
  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
2068
16
      .addReg(SrcReg)
2069
16
      .addReg(AddrReg)
2070
16
      .addMemOperand(MMO);
2071
16
  return true;
2072
16
}
2073
2074
bool AArch64FastISel::emitStore(MVT VT, unsigned SrcReg, Address Addr,
2075
445
                                MachineMemOperand *MMO) {
2076
445
  if (!TLI.allowsMisalignedMemoryAccesses(VT))
2077
0
    return false;
2078
445
2079
445
  // Simplify this down to something we can handle.
2080
445
  if (!simplifyAddress(Addr, VT))
2081
154
    return false;
2082
291
2083
291
  unsigned ScaleFactor = getImplicitScaleFactor(VT);
2084
291
  if (!ScaleFactor)
2085
291
    
llvm_unreachable0
("Unexpected value type.");
2086
291
2087
291
  // Negative offsets require unscaled, 9-bit, signed immediate offsets.
2088
291
  // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
2089
291
  bool UseScaled = true;
2090
291
  if ((Addr.getOffset() < 0) || 
(Addr.getOffset() & (ScaleFactor - 1))288
) {
2091
4
    UseScaled = false;
2092
4
    ScaleFactor = 1;
2093
4
  }
2094
291
2095
291
  static const unsigned OpcTable[4][6] = {
2096
291
    { AArch64::STURBBi,  AArch64::STURHHi,  AArch64::STURWi,  AArch64::STURXi,
2097
291
      AArch64::STURSi,   AArch64::STURDi },
2098
291
    { AArch64::STRBBui,  AArch64::STRHHui,  AArch64::STRWui,  AArch64::STRXui,
2099
291
      AArch64::STRSui,   AArch64::STRDui },
2100
291
    { AArch64::STRBBroX, AArch64::STRHHroX, AArch64::STRWroX, AArch64::STRXroX,
2101
291
      AArch64::STRSroX,  AArch64::STRDroX },
2102
291
    { AArch64::STRBBroW, AArch64::STRHHroW, AArch64::STRWroW, AArch64::STRXroW,
2103
291
      AArch64::STRSroW,  AArch64::STRDroW }
2104
291
  };
2105
291
2106
291
  unsigned Opc;
2107
291
  bool VTIsi1 = false;
2108
291
  bool UseRegOffset = Addr.isRegBase() && 
!Addr.getOffset()173
&&
Addr.getReg()133
&&
2109
291
                      
Addr.getOffsetReg()133
;
2110
291
  unsigned Idx = UseRegOffset ? 
22
:
UseScaled 289
?
1285
:
04
;
2111
291
  if (Addr.getExtendType() == AArch64_AM::UXTW ||
2112
291
      Addr.getExtendType() == AArch64_AM::SXTW)
2113
0
    Idx++;
2114
291
2115
291
  switch (VT.SimpleTy) {
2116
291
  
default: 0
llvm_unreachable0
("Unexpected value type.");
2117
291
  
case MVT::i1: VTIsi1 = true; 8
LLVM_FALLTHROUGH8
;
2118
48
  case MVT::i8:  Opc = OpcTable[Idx][0]; break;
2119
19
  case MVT::i16: Opc = OpcTable[Idx][1]; break;
2120
108
  case MVT::i32: Opc = OpcTable[Idx][2]; break;
2121
96
  case MVT::i64: Opc = OpcTable[Idx][3]; break;
2122
8
  
case MVT::f32: Opc = OpcTable[Idx][4]; break2
;
2123
18
  case MVT::f64: Opc = OpcTable[Idx][5]; break;
2124
291
  }
2125
291
2126
291
  // Storing an i1 requires special handling.
2127
291
  if (VTIsi1 && 
SrcReg != AArch64::WZR8
) {
2128
7
    unsigned ANDReg = emitAnd_ri(MVT::i32, SrcReg, /*TODO:IsKill=*/false, 1);
2129
7
    assert(ANDReg && "Unexpected AND instruction emission failure.");
2130
7
    SrcReg = ANDReg;
2131
7
  }
2132
291
  // Create the base instruction, then add the operands.
2133
291
  const MCInstrDesc &II = TII.get(Opc);
2134
291
  SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs());
2135
291
  MachineInstrBuilder MIB =
2136
291
      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(SrcReg);
2137
291
  addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, ScaleFactor, MMO);
2138
291
2139
291
  return true;
2140
291
}
2141
2142
427
bool AArch64FastISel::selectStore(const Instruction *I) {
2143
427
  MVT VT;
2144
427
  const Value *Op0 = I->getOperand(0);
2145
427
  // Verify we have a legal type before going any further.  Currently, we handle
2146
427
  // simple types that will directly fit in a register (i32/f32/i64/f64) or
2147
427
  // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
2148
427
  if (!isTypeSupported(Op0->getType(), VT, /*IsVectorAllowed=*/true))
2149
23
    return false;
2150
404
2151
404
  const Value *PtrV = I->getOperand(1);
2152
404
  if (TLI.supportSwiftError()) {
2153
404
    // Swifterror values can come from either a function parameter with
2154
404
    // swifterror attribute or an alloca with swifterror attribute.
2155
404
    if (const Argument *Arg = dyn_cast<Argument>(PtrV)) {
2156
228
      if (Arg->hasSwiftErrorAttr())
2157
1
        return false;
2158
403
    }
2159
403
2160
403
    if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) {
2161
105
      if (Alloca->isSwiftError())
2162
1
        return false;
2163
402
    }
2164
403
  }
2165
402
2166
402
  // Get the value to be stored into a register. Use the zero register directly
2167
402
  // when possible to avoid an unnecessary copy and a wasted register.
2168
402
  unsigned SrcReg = 0;
2169
402
  if (const auto *CI = dyn_cast<ConstantInt>(Op0)) {
2170
49
    if (CI->isZero())
2171
34
      SrcReg = (VT == MVT::i64) ? 
AArch64::XZR2
:
AArch64::WZR32
;
2172
353
  } else if (const auto *CF = dyn_cast<ConstantFP>(Op0)) {
2173
2
    if (CF->isZero() && !CF->isNegative()) {
2174
2
      VT = MVT::getIntegerVT(VT.getSizeInBits());
2175
2
      SrcReg = (VT == MVT::i64) ? 
AArch64::XZR1
:
AArch64::WZR1
;
2176
2
    }
2177
2
  }
2178
402
2179
402
  if (!SrcReg)
2180
366
    SrcReg = getRegForValue(Op0);
2181
402
2182
402
  if (!SrcReg)
2183
6
    return false;
2184
396
2185
396
  auto *SI = cast<StoreInst>(I);
2186
396
2187
396
  // Try to emit a STLR for seq_cst/release.
2188
396
  if (SI->isAtomic()) {
2189
24
    AtomicOrdering Ord = SI->getOrdering();
2190
24
    // The non-atomic instructions are sufficient for relaxed stores.
2191
24
    if (isReleaseOrStronger(Ord)) {
2192
16
      // The STLR addressing mode only supports a base reg; pass that directly.
2193
16
      unsigned AddrReg = getRegForValue(PtrV);
2194
16
      return emitStoreRelease(VT, SrcReg, AddrReg,
2195
16
                              createMachineMemOperandFor(I));
2196
16
    }
2197
380
  }
2198
380
2199
380
  // See if we can handle this address.
2200
380
  Address Addr;
2201
380
  if (!computeAddress(PtrV, Addr, Op0->getType()))
2202
8
    return false;
2203
372
2204
372
  if (!emitStore(VT, SrcReg, Addr, createMachineMemOperandFor(I)))
2205
152
    return false;
2206
220
  return true;
2207
220
}
2208
2209
110
static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred) {
2210
110
  switch (Pred) {
2211
110
  case CmpInst::FCMP_ONE:
2212
4
  case CmpInst::FCMP_UEQ:
2213
4
  default:
2214
4
    // AL is our "false" for now. The other two need more compares.
2215
4
    return AArch64CC::AL;
2216
13
  case CmpInst::ICMP_EQ:
2217
13
  case CmpInst::FCMP_OEQ:
2218
13
    return AArch64CC::EQ;
2219
13
  case CmpInst::ICMP_SGT:
2220
9
  case CmpInst::FCMP_OGT:
2221
9
    return AArch64CC::GT;
2222
10
  case CmpInst::ICMP_SGE:
2223
10
  case CmpInst::FCMP_OGE:
2224
10
    return AArch64CC::GE;
2225
10
  case CmpInst::ICMP_UGT:
2226
6
  case CmpInst::FCMP_UGT:
2227
6
    return AArch64CC::HI;
2228
6
  case CmpInst::FCMP_OLT:
2229
3
    return AArch64CC::MI;
2230
6
  case CmpInst::ICMP_ULE:
2231
6
  case CmpInst::FCMP_OLE:
2232
6
    return AArch64CC::LS;
2233
6
  case CmpInst::FCMP_ORD:
2234
3
    return AArch64CC::VC;
2235
6
  case CmpInst::FCMP_UNO:
2236
3
    return AArch64CC::VS;
2237
6
  case CmpInst::FCMP_UGE:
2238
3
    return AArch64CC::PL;
2239
10
  case CmpInst::ICMP_SLT:
2240
10
  case CmpInst::FCMP_ULT:
2241
10
    return AArch64CC::LT;
2242
11
  case CmpInst::ICMP_SLE:
2243
11
  case CmpInst::FCMP_ULE:
2244
11
    return AArch64CC::LE;
2245
19
  case CmpInst::FCMP_UNE:
2246
19
  case CmpInst::ICMP_NE:
2247
19
    return AArch64CC::NE;
2248
19
  case CmpInst::ICMP_UGE:
2249
4
    return AArch64CC::HS;
2250
19
  case CmpInst::ICMP_ULT:
2251
6
    return AArch64CC::LO;
2252
110
  }
2253
110
}
2254
2255
/// Try to emit a combined compare-and-branch instruction.
2256
89
bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) {
2257
89
  // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z instructions
2258
89
  // will not be produced, as they are conditional branch instructions that do
2259
89
  // not set flags.
2260
89
  if (FuncInfo.MF->getFunction().hasFnAttribute(
2261
89
          Attribute::SpeculativeLoadHardening))
2262
4
    return false;
2263
85
2264
85
  assert(isa<CmpInst>(BI->getCondition()) && "Expected cmp instruction");
2265
85
  const CmpInst *CI = cast<CmpInst>(BI->getCondition());
2266
85
  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2267
85
2268
85
  const Value *LHS = CI->getOperand(0);
2269
85
  const Value *RHS = CI->getOperand(1);
2270
85
2271
85
  MVT VT;
2272
85
  if (!isTypeSupported(LHS->getType(), VT))
2273
0
    return false;
2274
85
2275
85
  unsigned BW = VT.getSizeInBits();
2276
85
  if (BW > 64)
2277
0
    return false;
2278
85
2279
85
  MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
2280
85
  MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
2281
85
2282
85
  // Try to take advantage of fallthrough opportunities.
2283
85
  if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2284
46
    std::swap(TBB, FBB);
2285
46
    Predicate = CmpInst::getInversePredicate(Predicate);
2286
46
  }
2287
85
2288
85
  int TestBit = -1;
2289
85
  bool IsCmpNE;
2290
85
  switch (Predicate) {
2291
85
  default:
2292
21
    return false;
2293
85
  case CmpInst::ICMP_EQ:
2294
39
  case CmpInst::ICMP_NE:
2295
39
    if (isa<Constant>(LHS) && 
cast<Constant>(LHS)->isNullValue()0
)
2296
0
      std::swap(LHS, RHS);
2297
39
2298
39
    if (!isa<Constant>(RHS) || 
!cast<Constant>(RHS)->isNullValue()37
)
2299
2
      return false;
2300
37
2301
37
    if (const auto *AI = dyn_cast<BinaryOperator>(LHS))
2302
14
      if (AI->getOpcode() == Instruction::And && isValueAvailable(AI)) {
2303
13
        const Value *AndLHS = AI->getOperand(0);
2304
13
        const Value *AndRHS = AI->getOperand(1);
2305
13
2306
13
        if (const auto *C = dyn_cast<ConstantInt>(AndLHS))
2307
0
          if (C->getValue().isPowerOf2())
2308
0
            std::swap(AndLHS, AndRHS);
2309
13
2310
13
        if (const auto *C = dyn_cast<ConstantInt>(AndRHS))
2311
13
          if (C->getValue().isPowerOf2()) {
2312
13
            TestBit = C->getValue().logBase2();
2313
13
            LHS = AndLHS;
2314
13
          }
2315
13
      }
2316
37
2317
37
    if (VT == MVT::i1)
2318
1
      TestBit = 0;
2319
37
2320
37
    IsCmpNE = Predicate == CmpInst::ICMP_NE;
2321
37
    break;
2322
37
  case CmpInst::ICMP_SLT:
2323
14
  case CmpInst::ICMP_SGE:
2324
14
    if (!isa<Constant>(RHS) || 
!cast<Constant>(RHS)->isNullValue()7
)
2325
8
      return false;
2326
6
2327
6
    TestBit = BW - 1;
2328
6
    IsCmpNE = Predicate == CmpInst::ICMP_SLT;
2329
6
    break;
2330
11
  case CmpInst::ICMP_SGT:
2331
11
  case CmpInst::ICMP_SLE:
2332
11
    if (!isa<ConstantInt>(RHS))
2333
3
      return false;
2334
8
2335
8
    if (cast<ConstantInt>(RHS)->getValue() != APInt(BW, -1, true))
2336
0
      return false;
2337
8
2338
8
    TestBit = BW - 1;
2339
8
    IsCmpNE = Predicate == CmpInst::ICMP_SLE;
2340
8
    break;
2341
51
  } // end switch
2342
51
2343
51
  static const unsigned OpcTable[2][2][2] = {
2344
51
    { {AArch64::CBZW,  AArch64::CBZX },
2345
51
      {AArch64::CBNZW, AArch64::CBNZX} },
2346
51
    { {AArch64::TBZW,  AArch64::TBZX },
2347
51
      {AArch64::TBNZW, AArch64::TBNZX} }
2348
51
  };
2349
51
2350
51
  bool IsBitTest = TestBit != -1;
2351
51
  bool Is64Bit = BW == 64;
2352
51
  if (TestBit < 32 && 
TestBit >= 046
)
2353
23
    Is64Bit = false;
2354
51
2355
51
  unsigned Opc = OpcTable[IsBitTest][IsCmpNE][Is64Bit];
2356
51
  const MCInstrDesc &II = TII.get(Opc);
2357
51
2358
51
  unsigned SrcReg = getRegForValue(LHS);
2359
51
  if (!SrcReg)
2360
0
    return false;
2361
51
  bool SrcIsKill = hasTrivialKill(LHS);
2362
51
2363
51
  if (BW == 64 && 
!Is64Bit13
)
2364
2
    SrcReg = fastEmitInst_extractsubreg(MVT::i32, SrcReg, SrcIsKill,
2365
2
                                        AArch64::sub_32);
2366
51
2367
51
  if ((BW < 32) && 
!IsBitTest15
)
2368
2
    SrcReg = emitIntExt(VT, SrcReg, MVT::i32, /*isZExt=*/true);
2369
51
2370
51
  // Emit the combined compare and branch instruction.
2371
51
  SrcReg = constrainOperandRegClass(II, SrcReg,  II.getNumDefs());
2372
51
  MachineInstrBuilder MIB =
2373
51
      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
2374
51
          .addReg(SrcReg, getKillRegState(SrcIsKill));
2375
51
  if (IsBitTest)
2376
28
    MIB.addImm(TestBit);
2377
51
  MIB.addMBB(TBB);
2378
51
2379
51
  finishCondBranch(BI->getParent(), TBB, FBB);
2380
51
  return true;
2381
51
}
2382
2383
287
bool AArch64FastISel::selectBranch(const Instruction *I) {
2384
287
  const BranchInst *BI = cast<BranchInst>(I);
2385
287
  if (BI->isUnconditional()) {
2386
164
    MachineBasicBlock *MSucc = FuncInfo.MBBMap[BI->getSuccessor(0)];
2387
164
    fastEmitBranch(MSucc, BI->getDebugLoc());
2388
164
    return true;
2389
164
  }
2390
123
2391
123
  MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
2392
123
  MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
2393
123
2394
123
  if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
2395
90
    if (CI->hasOneUse() && 
isValueAvailable(CI)89
) {
2396
89
      // Try to optimize or fold the cmp.
2397
89
      CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2398
89
      switch (Predicate) {
2399
89
      default:
2400
89
        break;
2401
89
      case CmpInst::FCMP_FALSE:
2402
0
        fastEmitBranch(FBB, DbgLoc);
2403
0
        return true;
2404
89
      case CmpInst::FCMP_TRUE:
2405
0
        fastEmitBranch(TBB, DbgLoc);
2406
0
        return true;
2407
89
      }
2408
89
2409
89
      // Try to emit a combined compare-and-branch first.
2410
89
      if (emitCompareAndBranch(BI))
2411
51
        return true;
2412
38
2413
38
      // Try to take advantage of fallthrough opportunities.
2414
38
      if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2415
11
        std::swap(TBB, FBB);
2416
11
        Predicate = CmpInst::getInversePredicate(Predicate);
2417
11
      }
2418
38
2419
38
      // Emit the cmp.
2420
38
      if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
2421
0
        return false;
2422
38
2423
38
      // FCMP_UEQ and FCMP_ONE cannot be checked with a single branch
2424
38
      // instruction.
2425
38
      AArch64CC::CondCode CC = getCompareCC(Predicate);
2426
38
      AArch64CC::CondCode ExtraCC = AArch64CC::AL;
2427
38
      switch (Predicate) {
2428
38
      default:
2429
36
        break;
2430
38
      case CmpInst::FCMP_UEQ:
2431
1
        ExtraCC = AArch64CC::EQ;
2432
1
        CC = AArch64CC::VS;
2433
1
        break;
2434
38
      case CmpInst::FCMP_ONE:
2435
1
        ExtraCC = AArch64CC::MI;
2436
1
        CC = AArch64CC::GT;
2437
1
        break;
2438
38
      }
2439
38
      assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2440
38
2441
38
      // Emit the extra branch for FCMP_UEQ and FCMP_ONE.
2442
38
      if (ExtraCC != AArch64CC::AL) {
2443
2
        BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2444
2
            .addImm(ExtraCC)
2445
2
            .addMBB(TBB);
2446
2
      }
2447
38
2448
38
      // Emit the branch.
2449
38
      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2450
38
          .addImm(CC)
2451
38
          .addMBB(TBB);
2452
38
2453
38
      finishCondBranch(BI->getParent(), TBB, FBB);
2454
38
      return true;
2455
38
    }
2456
33
  } else if (const auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) {
2457
2
    uint64_t Imm = CI->getZExtValue();
2458
2
    MachineBasicBlock *Target = (Imm == 0) ? 
FBB1
:
TBB1
;
2459
2
    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::B))
2460
2
        .addMBB(Target);
2461
2
2462
2
    // Obtain the branch probability and add the target to the successor list.
2463
2
    if (FuncInfo.BPI) {
2464
0
      auto BranchProbability = FuncInfo.BPI->getEdgeProbability(
2465
0
          BI->getParent(), Target->getBasicBlock());
2466
0
      FuncInfo.MBB->addSuccessor(Target, BranchProbability);
2467
0
    } else
2468
2
      FuncInfo.MBB->addSuccessorWithoutProb(Target);
2469
2
    return true;
2470
31
  } else {
2471
31
    AArch64CC::CondCode CC = AArch64CC::NE;
2472
31
    if (foldXALUIntrinsic(CC, I, BI->getCondition())) {
2473
14
      // Fake request the condition, otherwise the intrinsic might be completely
2474
14
      // optimized away.
2475
14
      unsigned CondReg = getRegForValue(BI->getCondition());
2476
14
      if (!CondReg)
2477
0
        return false;
2478
14
2479
14
      // Emit the branch.
2480
14
      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2481
14
        .addImm(CC)
2482
14
        .addMBB(TBB);
2483
14
2484
14
      finishCondBranch(BI->getParent(), TBB, FBB);
2485
14
      return true;
2486
14
    }
2487
31
  }
2488
18
2489
18
  unsigned CondReg = getRegForValue(BI->getCondition());
2490
18
  if (CondReg == 0)
2491
0
    return false;
2492
18
  bool CondRegIsKill = hasTrivialKill(BI->getCondition());
2493
18
2494
18
  // i1 conditions come as i32 values, test the lowest bit with tb(n)z.
2495
18
  unsigned Opcode = AArch64::TBNZW;
2496
18
  if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2497
14
    std::swap(TBB, FBB);
2498
14
    Opcode = AArch64::TBZW;
2499
14
  }
2500
18
2501
18
  const MCInstrDesc &II = TII.get(Opcode);
2502
18
  unsigned ConstrainedCondReg
2503
18
    = constrainOperandRegClass(II, CondReg, II.getNumDefs());
2504
18
  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
2505
18
      .addReg(ConstrainedCondReg, getKillRegState(CondRegIsKill))
2506
18
      .addImm(0)
2507
18
      .addMBB(TBB);
2508
18
2509
18
  finishCondBranch(BI->getParent(), TBB, FBB);
2510
18
  return true;
2511
18
}
2512
2513
1
bool AArch64FastISel::selectIndirectBr(const Instruction *I) {
2514
1
  const IndirectBrInst *BI = cast<IndirectBrInst>(I);
2515
1
  unsigned AddrReg = getRegForValue(BI->getOperand(0));
2516
1
  if (AddrReg == 0)
2517
0
    return false;
2518
1
2519
1
  // Emit the indirect branch.
2520
1
  const MCInstrDesc &II = TII.get(AArch64::BR);
2521
1
  AddrReg = constrainOperandRegClass(II, AddrReg,  II.getNumDefs());
2522
1
  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(AddrReg);
2523
1
2524
1
  // Make sure the CFG is up-to-date.
2525
1
  for (auto *Succ : BI->successors())
2526
2
    FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[Succ]);
2527
1
2528
1
  return true;
2529
1
}
2530
2531
57
bool AArch64FastISel::selectCmp(const Instruction *I) {
2532
57
  const CmpInst *CI = cast<CmpInst>(I);
2533
57
2534
57
  // Vectors of i1 are weird: bail out.
2535
57
  if (CI->getType()->isVectorTy())
2536
6
    return false;
2537
51
2538
51
  // Try to optimize or fold the cmp.
2539
51
  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2540
51
  unsigned ResultReg = 0;
2541
51
  switch (Predicate) {
2542
51
  default:
2543
49
    break;
2544
51
  case CmpInst::FCMP_FALSE:
2545
1
    ResultReg = createResultReg(&AArch64::GPR32RegClass);
2546
1
    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2547
1
            TII.get(TargetOpcode::COPY), ResultReg)
2548
1
        .addReg(AArch64::WZR, getKillRegState(true));
2549
1
    break;
2550
51
  case CmpInst::FCMP_TRUE:
2551
1
    ResultReg = fastEmit_i(MVT::i32, MVT::i32, ISD::Constant, 1);
2552
1
    break;
2553
51
  }
2554
51
2555
51
  if (ResultReg) {
2556
2
    updateValueMap(I, ResultReg);
2557
2
    return true;
2558
2
  }
2559
49
2560
49
  // Emit the cmp.
2561
49
  if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
2562
0
    return false;
2563
49
2564
49
  ResultReg = createResultReg(&AArch64::GPR32RegClass);
2565
49
2566
49
  // FCMP_UEQ and FCMP_ONE cannot be checked with a single instruction. These
2567
49
  // condition codes are inverted, because they are used by CSINC.
2568
49
  static unsigned CondCodeTable[2][2] = {
2569
49
    { AArch64CC::NE, AArch64CC::VC },
2570
49
    { AArch64CC::PL, AArch64CC::LE }
2571
49
  };
2572
49
  unsigned *CondCodes = nullptr;
2573
49
  switch (Predicate) {
2574
49
  default:
2575
47
    break;
2576
49
  case CmpInst::FCMP_UEQ:
2577
1
    CondCodes = &CondCodeTable[0][0];
2578
1
    break;
2579
49
  case CmpInst::FCMP_ONE:
2580
1
    CondCodes = &CondCodeTable[1][0];
2581
1
    break;
2582
49
  }
2583
49
2584
49
  if (CondCodes) {
2585
2
    unsigned TmpReg1 = createResultReg(&AArch64::GPR32RegClass);
2586
2
    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
2587
2
            TmpReg1)
2588
2
        .addReg(AArch64::WZR, getKillRegState(true))
2589
2
        .addReg(AArch64::WZR, getKillRegState(true))
2590
2
        .addImm(CondCodes[0]);
2591
2
    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
2592
2
            ResultReg)
2593
2
        .addReg(TmpReg1, getKillRegState(true))
2594
2
        .addReg(AArch64::WZR, getKillRegState(true))
2595
2
        .addImm(CondCodes[1]);
2596
2
2597
2
    updateValueMap(I, ResultReg);
2598
2
    return true;
2599
2
  }
2600
47
2601
47
  // Now set a register based on the comparison.
2602
47
  AArch64CC::CondCode CC = getCompareCC(Predicate);
2603
47
  assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2604
47
  AArch64CC::CondCode invertedCC = getInvertedCondCode(CC);
2605
47
  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
2606
47
          ResultReg)
2607
47
      .addReg(AArch64::WZR, getKillRegState(true))
2608
47
      .addReg(AArch64::WZR, getKillRegState(true))
2609
47
      .addImm(invertedCC);
2610
47
2611
47
  updateValueMap(I, ResultReg);
2612
47
  return true;
2613
47
}
2614
2615
/// Optimize selects of i1 if one of the operands has a 'true' or 'false'
2616
/// value.
2617
53
bool AArch64FastISel::optimizeSelect(const SelectInst *SI) {
2618
53
  if (!SI->getType()->isIntegerTy(1))
2619
47
    return false;
2620
6
2621
6
  const Value *Src1Val, *Src2Val;
2622
6
  unsigned Opc = 0;
2623
6
  bool NeedExtraOp = false;
2624
6
  if (auto *CI = dyn_cast<ConstantInt>(SI->getTrueValue())) {
2625
3
    if (CI->isOne()) {
2626
2
      Src1Val = SI->getCondition();
2627
2
      Src2Val = SI->getFalseValue();
2628
2
      Opc = AArch64::ORRWrr;
2629
2
    } else {
2630
1
      assert(CI->isZero());
2631
1
      Src1Val = SI->getFalseValue();
2632
1
      Src2Val = SI->getCondition();
2633
1
      Opc = AArch64::BICWrr;
2634
1
    }
2635
3
  } else if (auto *CI = dyn_cast<ConstantInt>(SI->getFalseValue())) {
2636
2
    if (CI->isOne()) {
2637
1
      Src1Val = SI->getCondition();
2638
1
      Src2Val = SI->getTrueValue();
2639
1
      Opc = AArch64::ORRWrr;
2640
1
      NeedExtraOp = true;
2641
1
    } else {
2642
1
      assert(CI->isZero());
2643
1
      Src1Val = SI->getCondition();
2644
1
      Src2Val = SI->getTrueValue();
2645
1
      Opc = AArch64::ANDWrr;
2646
1
    }
2647
2
  }
2648
6
2649
6
  if (!Opc)
2650
1
    return false;
2651
5
2652
5
  unsigned Src1Reg = getRegForValue(Src1Val);
2653
5
  if (!Src1Reg)
2654
0
    return false;
2655
5
  bool Src1IsKill = hasTrivialKill(Src1Val);
2656
5
2657
5
  unsigned Src2Reg = getRegForValue(Src2Val);
2658
5
  if (!Src2Reg)
2659
0
    return false;
2660
5
  bool Src2IsKill = hasTrivialKill(Src2Val);
2661
5
2662
5
  if (NeedExtraOp) {
2663
1
    Src1Reg = emitLogicalOp_ri(ISD::XOR, MVT::i32, Src1Reg, Src1IsKill, 1);
2664
1
    Src1IsKill = true;
2665
1
  }
2666
5
  unsigned ResultReg = fastEmitInst_rr(Opc, &AArch64::GPR32RegClass, Src1Reg,
2667
5
                                       Src1IsKill, Src2Reg, Src2IsKill);
2668
5
  updateValueMap(SI, ResultReg);
2669
5
  return true;
2670
5
}
2671
2672
53
bool AArch64FastISel::selectSelect(const Instruction *I) {
2673
53
  assert(isa<SelectInst>(I) && "Expected a select instruction.");
2674
53
  MVT VT;
2675
53
  if (!isTypeSupported(I->getType(), VT))
2676
0
    return false;
2677
53
2678
53
  unsigned Opc;
2679
53
  const TargetRegisterClass *RC;
2680
53
  switch (VT.SimpleTy) {
2681
53
  default:
2682
0
    return false;
2683
53
  case MVT::i1:
2684
18
  case MVT::i8:
2685
18
  case MVT::i16:
2686
18
  case MVT::i32:
2687
18
    Opc = AArch64::CSELWr;
2688
18
    RC = &AArch64::GPR32RegClass;
2689
18
    break;
2690
18
  case MVT::i64:
2691
8
    Opc = AArch64::CSELXr;
2692
8
    RC = &AArch64::GPR64RegClass;
2693
8
    break;
2694
26
  case MVT::f32:
2695
26
    Opc = AArch64::FCSELSrrr;
2696
26
    RC = &AArch64::FPR32RegClass;
2697
26
    break;
2698
18
  case MVT::f64:
2699
1
    Opc = AArch64::FCSELDrrr;
2700
1
    RC = &AArch64::FPR64RegClass;
2701
1
    break;
2702
53
  }
2703
53
2704
53
  const SelectInst *SI = cast<SelectInst>(I);
2705
53
  const Value *Cond = SI->getCondition();
2706
53
  AArch64CC::CondCode CC = AArch64CC::NE;
2707
53
  AArch64CC::CondCode ExtraCC = AArch64CC::AL;
2708
53
2709
53
  if (optimizeSelect(SI))
2710
5
    return true;
2711
48
2712
48
  // Try to pickup the flags, so we don't have to emit another compare.
2713
48
  if (foldXALUIntrinsic(CC, I, Cond)) {
2714
12
    // Fake request the condition to force emission of the XALU intrinsic.
2715
12
    unsigned CondReg = getRegForValue(Cond);
2716
12
    if (!CondReg)
2717
0
      return false;
2718
36
  } else if (isa<CmpInst>(Cond) && 
cast<CmpInst>(Cond)->hasOneUse()28
&&
2719
36
             
isValueAvailable(Cond)27
) {
2720
27
    const auto *Cmp = cast<CmpInst>(Cond);
2721
27
    // Try to optimize or fold the cmp.
2722
27
    CmpInst::Predicate Predicate = optimizeCmpPredicate(Cmp);
2723
27
    const Value *FoldSelect = nullptr;
2724
27
    switch (Predicate) {
2725
27
    default:
2726
25
      break;
2727
27
    case CmpInst::FCMP_FALSE:
2728
1
      FoldSelect = SI->getFalseValue();
2729
1
      break;
2730
27
    case CmpInst::FCMP_TRUE:
2731
1
      FoldSelect = SI->getTrueValue();
2732
1
      break;
2733
27
    }
2734
27
2735
27
    if (FoldSelect) {
2736
2
      unsigned SrcReg = getRegForValue(FoldSelect);
2737
2
      if (!SrcReg)
2738
0
        return false;
2739
2
      unsigned UseReg = lookUpRegForValue(SI);
2740
2
      if (UseReg)
2741
2
        MRI.clearKillFlags(UseReg);
2742
2
2743
2
      updateValueMap(I, SrcReg);
2744
2
      return true;
2745
2
    }
2746
25
2747
25
    // Emit the cmp.
2748
25
    if (!emitCmp(Cmp->getOperand(0), Cmp->getOperand(1), Cmp->isUnsigned()))
2749
0
      return false;
2750
25
2751
25
    // FCMP_UEQ and FCMP_ONE cannot be checked with a single select instruction.
2752
25
    CC = getCompareCC(Predicate);
2753
25
    switch (Predicate) {
2754
25
    default:
2755
23
      break;
2756
25
    case CmpInst::FCMP_UEQ:
2757
1
      ExtraCC = AArch64CC::EQ;
2758
1
      CC = AArch64CC::VS;
2759
1
      break;
2760
25
    case CmpInst::FCMP_ONE:
2761
1
      ExtraCC = AArch64CC::MI;
2762
1
      CC = AArch64CC::GT;
2763
1
      break;
2764
25
    }
2765
25
    assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2766
25
  } else {
2767
9
    unsigned CondReg = getRegForValue(Cond);
2768
9
    if (!CondReg)
2769
0
      return false;
2770
9
    bool CondIsKill = hasTrivialKill(Cond);
2771
9
2772
9
    const MCInstrDesc &II = TII.get(AArch64::ANDSWri);
2773
9
    CondReg = constrainOperandRegClass(II, CondReg, 1);
2774
9
2775
9
    // Emit a TST instruction (ANDS wzr, reg, #imm).
2776
9
    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II,
2777
9
            AArch64::WZR)
2778
9
        .addReg(CondReg, getKillRegState(CondIsKill))
2779
9
        .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
2780
9
  }
2781
48
2782
48
  unsigned Src1Reg = getRegForValue(SI->getTrueValue());
2783
46
  bool Src1IsKill = hasTrivialKill(SI->getTrueValue());
2784
46
2785
46
  unsigned Src2Reg = getRegForValue(SI->getFalseValue());
2786
46
  bool Src2IsKill = hasTrivialKill(SI->getFalseValue());
2787
46
2788
46
  if (!Src1Reg || !Src2Reg)
2789
0
    return false;
2790
46
2791
46
  if (ExtraCC != AArch64CC::AL) {
2792
2
    Src2Reg = fastEmitInst_rri(Opc, RC, Src1Reg, Src1IsKill, Src2Reg,
2793
2
                               Src2IsKill, ExtraCC);
2794
2
    Src2IsKill = true;
2795
2
  }
2796
46
  unsigned ResultReg = fastEmitInst_rri(Opc, RC, Src1Reg, Src1IsKill, Src2Reg,
2797
46
                                        Src2IsKill, CC);
2798
46
  updateValueMap(I, ResultReg);
2799
46
  return true;
2800
46
}
2801
2802
5
bool AArch64FastISel::selectFPExt(const Instruction *I) {
2803
5
  Value *V = I->getOperand(0);
2804
5
  if (!I->getType()->isDoubleTy() || 
!V->getType()->isFloatTy()3
)
2805
2
    return false;
2806
3
2807
3
  unsigned Op = getRegForValue(V);
2808
3
  if (Op == 0)
2809
0
    return false;
2810
3
2811
3
  unsigned ResultReg = createResultReg(&AArch64::FPR64RegClass);
2812
3
  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTDSr),
2813
3
          ResultReg).addReg(Op);
2814
3
  updateValueMap(I, ResultReg);
2815
3
  return true;
2816
3
}
2817
2818
2
bool AArch64FastISel::selectFPTrunc(const Instruction *I) {
2819
2
  Value *V = I->getOperand(0);
2820
2
  if (!I->getType()->isFloatTy() || 
!V->getType()->isDoubleTy()1
)
2821
1
    return false;
2822
1
2823
1
  unsigned Op = getRegForValue(V);
2824
1
  if (Op == 0)
2825
0
    return false;
2826
1
2827
1
  unsigned ResultReg = createResultReg(&AArch64::FPR32RegClass);
2828
1
  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTSDr),
2829
1
          ResultReg).addReg(Op);
2830
1
  updateValueMap(I, ResultReg);
2831
1
  return true;
2832
1
}
2833
2834
// FPToUI and FPToSI
2835
7
bool AArch64FastISel::selectFPToInt(const Instruction *I, bool Signed) {
2836
7
  MVT DestVT;
2837
7
  if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
2838
1
    return false;
2839
6
2840
6
  unsigned SrcReg = getRegForValue(I->getOperand(0));
2841
6
  if (SrcReg == 0)
2842
0
    return false;
2843
6
2844
6
  EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
2845
6
  if (SrcVT == MVT::f128 || 
SrcVT == MVT::f164
)
2846
4
    return false;
2847
2
2848
2
  unsigned Opc;
2849
2
  if (SrcVT == MVT::f64) {
2850
1
    if (Signed)
2851
0
      Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWDr : AArch64::FCVTZSUXDr;
2852
1
    else
2853
1
      Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWDr : 
AArch64::FCVTZUUXDr0
;
2854
1
  } else {
2855
1
    if (Signed)
2856
0
      Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWSr : AArch64::FCVTZSUXSr;
2857
1
    else
2858
1
      Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWSr : 
AArch64::FCVTZUUXSr0
;
2859
1
  }
2860
2
  unsigned ResultReg = createResultReg(
2861
2
      DestVT == MVT::i32 ? &AArch64::GPR32RegClass : 
&AArch64::GPR64RegClass0
);
2862
2
  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
2863
2
      .addReg(SrcReg);
2864
2
  updateValueMap(I, ResultReg);
2865
2
  return true;
2866
2
}
2867
2868
22
bool AArch64FastISel::selectIntToFP(const Instruction *I, bool Signed) {
2869
22
  MVT DestVT;
2870
22
  if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
2871
2
    return false;
2872
20
  // Let regular ISEL handle FP16
2873
20
  if (DestVT == MVT::f16)
2874
10
    return false;
2875
10
2876
10
  assert((DestVT == MVT::f32 || DestVT == MVT::f64) &&
2877
10
         "Unexpected value type.");
2878
10
2879
10
  unsigned SrcReg = getRegForValue(I->getOperand(0));
2880
10
  if (!SrcReg)
2881
0
    return false;
2882
10
  bool SrcIsKill = hasTrivialKill(I->getOperand(0));
2883
10
2884
10
  EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
2885
10
2886
10
  // Handle sign-extension.
2887
10
  if (SrcVT == MVT::i16 || 
SrcVT == MVT::i88
||
SrcVT == MVT::i16
) {
2888
6
    SrcReg =
2889
6
        emitIntExt(SrcVT.getSimpleVT(), SrcReg, MVT::i32, /*isZExt*/ !Signed);
2890
6
    if (!SrcReg)
2891
0
      return false;
2892
6
    SrcIsKill = true;
2893
6
  }
2894
10
2895
10
  unsigned Opc;
2896
10
  if (SrcVT == MVT::i64) {
2897
2
    if (Signed)
2898
0
      Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUXSri : AArch64::SCVTFUXDri;
2899
2
    else
2900
2
      Opc = (DestVT == MVT::f32) ? 
AArch64::UCVTFUXSri1
:
AArch64::UCVTFUXDri1
;
2901
8
  } else {
2902
8
    if (Signed)
2903
3
      Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUWSri : 
AArch64::SCVTFUWDri0
;
2904
5
    else
2905
5
      Opc = (DestVT == MVT::f32) ? 
AArch64::UCVTFUWSri4
:
AArch64::UCVTFUWDri1
;
2906
8
  }
2907
10
2908
10
  unsigned ResultReg = fastEmitInst_r(Opc, TLI.getRegClassFor(DestVT), SrcReg,
2909
10
                                      SrcIsKill);
2910
10
  updateValueMap(I, ResultReg);
2911
10
  return true;
2912
10
}
2913
2914
1.26k
bool AArch64FastISel::fastLowerArguments() {
2915
1.26k
  if (!FuncInfo.CanLowerReturn)
2916
0
    return false;
2917
1.26k
2918
1.26k
  const Function *F = FuncInfo.Fn;
2919
1.26k
  if (F->isVarArg())
2920
6
    return false;
2921
1.25k
2922
1.25k
  CallingConv::ID CC = F->getCallingConv();
2923
1.25k
  if (CC != CallingConv::C && 
CC != CallingConv::Swift24
)
2924
8
    return false;
2925
1.24k
2926
1.24k
  if (Subtarget->hasCustomCallingConv())
2927
8
    return false;
2928
1.23k
2929
1.23k
  // Only handle simple cases of up to 8 GPR and FPR each.
2930
1.23k
  unsigned GPRCnt = 0;
2931
1.23k
  unsigned FPRCnt = 0;
2932
2.05k
  for (auto const &Arg : F->args()) {
2933
2.05k
    if (Arg.hasAttribute(Attribute::ByVal) ||
2934
2.05k
        Arg.hasAttribute(Attribute::InReg) ||
2935
2.05k
        Arg.hasAttribute(Attribute::StructRet) ||
2936
2.05k
        
Arg.hasAttribute(Attribute::SwiftSelf)2.05k
||
2937
2.05k
        
Arg.hasAttribute(Attribute::SwiftError)2.04k
||
2938
2.05k
        
Arg.hasAttribute(Attribute::Nest)2.04k
)
2939
16
      return false;
2940
2.04k
2941
2.04k
    Type *ArgTy = Arg.getType();
2942
2.04k
    if (ArgTy->isStructTy() || ArgTy->isArrayTy())
2943
3
      return false;
2944
2.03k
2945
2.03k
    EVT ArgVT = TLI.getValueType(DL, ArgTy);
2946
2.03k
    if (!ArgVT.isSimple())
2947
0
      return false;
2948
2.03k
2949
2.03k
    MVT VT = ArgVT.getSimpleVT().SimpleTy;
2950
2.03k
    if (VT.isFloatingPoint() && 
!Subtarget->hasFPARMv8()311
)
2951
0
      return false;
2952
2.03k
2953
2.03k
    if (VT.isVector() &&
2954
2.03k
        
(117
!Subtarget->hasNEON()117
||
!Subtarget->isLittleEndian()117
))
2955
71
      return false;
2956
1.96k
2957
1.96k
    if (VT >= MVT::i1 && VT <= MVT::i64)
2958
1.65k
      ++GPRCnt;
2959
315
    else if ((VT >= MVT::f16 && 
VT <= MVT::f64313
) ||
VT.is64BitVector()56
||
2960
315
             
VT.is128BitVector()37
)
2961
305
      ++FPRCnt;
2962
10
    else
2963
10
      return false;
2964
1.95k
2965
1.95k
    if (GPRCnt > 8 || 
FPRCnt > 81.95k
)
2966
8
      return false;
2967
1.95k
  }
2968
1.23k
2969
1.23k
  static const MCPhysReg Registers[6][8] = {
2970
1.13k
    { AArch64::W0, AArch64::W1, AArch64::W2, AArch64::W3, AArch64::W4,
2971
1.13k
      AArch64::W5, AArch64::W6, AArch64::W7 },
2972
1.13k
    { AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4,
2973
1.13k
      AArch64::X5, AArch64::X6, AArch64::X7 },
2974
1.13k
    { AArch64::H0, AArch64::H1, AArch64::H2, AArch64::H3, AArch64::H4,
2975
1.13k
      AArch64::H5, AArch64::H6, AArch64::H7 },
2976
1.13k
    { AArch64::S0, AArch64::S1, AArch64::S2, AArch64::S3, AArch64::S4,
2977
1.13k
      AArch64::S5, AArch64::S6, AArch64::S7 },
2978
1.13k
    { AArch64::D0, AArch64::D1, AArch64::D2, AArch64::D3, AArch64::D4,
2979
1.13k
      AArch64::D5, AArch64::D6, AArch64::D7 },
2980
1.13k
    { AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, AArch64::Q4,
2981
1.13k
      AArch64::Q5, AArch64::Q6, AArch64::Q7 }
2982
1.13k
  };
2983
1.13k
2984
1.13k
  unsigned GPRIdx = 0;
2985
1.13k
  unsigned FPRIdx = 0;
2986
1.86k
  for (auto const &Arg : F->args()) {
2987
1.86k
    MVT VT = TLI.getSimpleValueType(DL, Arg.getType());
2988
1.86k
    unsigned SrcReg;
2989
1.86k
    const TargetRegisterClass *RC;
2990
1.86k
    if (VT >= MVT::i1 && VT <= MVT::i32) {
2991
668
      SrcReg = Registers[0][GPRIdx++];
2992
668
      RC = &AArch64::GPR32RegClass;
2993
668
      VT = MVT::i32;
2994
1.19k
    } else if (VT == MVT::i64) {
2995
902
      SrcReg = Registers[1][GPRIdx++];
2996
902
      RC = &AArch64::GPR64RegClass;
2997
902
    } else 
if (296
VT == MVT::f16296
) {
2998
2
      SrcReg = Registers[2][FPRIdx++];
2999
2
      RC = &AArch64::FPR16RegClass;
3000
294
    } else if (VT ==  MVT::f32) {
3001
188
      SrcReg = Registers[3][FPRIdx++];
3002
188
      RC = &AArch64::FPR32RegClass;
3003
188
    } else 
if (106
(VT == MVT::f64)106
||
VT.is64BitVector()46
) {
3004
79
      SrcReg = Registers[4][FPRIdx++];
3005
79
      RC = &AArch64::FPR64RegClass;
3006
79
    } else 
if (27
VT.is128BitVector()27
) {
3007
27
      SrcReg = Registers[5][FPRIdx++];
3008
27
      RC = &AArch64::FPR128RegClass;
3009
27
    } else
3010
27
      
llvm_unreachable0
("Unexpected value type.");
3011
1.86k
3012
1.86k
    unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
3013
1.86k
    // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
3014
1.86k
    // Without this, EmitLiveInCopies may eliminate the livein if its only
3015
1.86k
    // use is a bitcast (which isn't turned into an instruction).
3016
1.86k
    unsigned ResultReg = createResultReg(RC);
3017
1.86k
    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3018
1.86k
            TII.get(TargetOpcode::COPY), ResultReg)
3019
1.86k
        .addReg(DstReg, getKillRegState(true));
3020
1.86k
    updateValueMap(&Arg, ResultReg);
3021
1.86k
  }
3022
1.13k
  return true;
3023
1.13k
}
3024
3025
bool AArch64FastISel::processCallArgs(CallLoweringInfo &CLI,
3026
                                      SmallVectorImpl<MVT> &OutVTs,
3027
131
                                      unsigned &NumBytes) {
3028
131
  CallingConv::ID CC = CLI.CallConv;
3029
131
  SmallVector<CCValAssign, 16> ArgLocs;
3030
131
  CCState CCInfo(CC, false, *FuncInfo.MF, ArgLocs, *Context);
3031
131
  CCInfo.AnalyzeCallOperands(OutVTs, CLI.OutFlags, CCAssignFnForCall(CC));
3032
131
3033
131
  // Get a count of how many bytes are to be pushed on the stack.
3034
131
  NumBytes = CCInfo.getNextStackOffset();
3035
131
3036
131
  // Issue CALLSEQ_START
3037
131
  unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
3038
131
  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown))
3039
131
    .addImm(NumBytes).addImm(0);
3040
131
3041
131
  // Process the args.
3042
1.33k
  for (CCValAssign &VA : ArgLocs) {
3043
1.33k
    const Value *ArgVal = CLI.OutVals[VA.getValNo()];
3044
1.33k
    MVT ArgVT = OutVTs[VA.getValNo()];
3045
1.33k
3046
1.33k
    unsigned ArgReg = getRegForValue(ArgVal);
3047
1.33k
    if (!ArgReg)
3048
2
      return false;
3049
1.33k
3050
1.33k
    // Handle arg promotion: SExt, ZExt, AExt.
3051
1.33k
    switch (VA.getLocInfo()) {
3052
1.33k
    case CCValAssign::Full:
3053
1.22k
      break;
3054
1.33k
    case CCValAssign::SExt: {
3055
15
      MVT DestVT = VA.getLocVT();
3056
15
      MVT SrcVT = ArgVT;
3057
15
      ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/false);
3058
15
      if (!ArgReg)
3059
0
        return false;
3060
15
      break;
3061
15
    }
3062
88
    case CCValAssign::AExt:
3063
88
    // Intentional fall-through.
3064
88
    case CCValAssign::ZExt: {
3065
88
      MVT DestVT = VA.getLocVT();
3066
88
      MVT SrcVT = ArgVT;
3067
88
      ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/true);
3068
88
      if (!ArgReg)
3069
0
        return false;
3070
88
      break;
3071
88
    }
3072
88
    default:
3073
0
      llvm_unreachable("Unknown arg promotion!");
3074
1.33k
    }
3075
1.33k
3076
1.33k
    // Now copy/store arg to correct locations.
3077
1.33k
    if (VA.isRegLoc() && 
!VA.needsCustom()267
) {
3078
267
      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3079
267
              TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg);
3080
267
      CLI.OutRegs.push_back(VA.getLocReg());
3081
1.06k
    } else if (VA.needsCustom()) {
3082
0
      // FIXME: Handle custom args.
3083
0
      return false;
3084
1.06k
    } else {
3085
1.06k
      assert(VA.isMemLoc() && "Assuming store on stack.");
3086
1.06k
3087
1.06k
      // Don't emit stores for undef values.
3088
1.06k
      if (isa<UndefValue>(ArgVal))
3089
1.03k
        continue;
3090
32
3091
32
      // Need to store on the stack.
3092
32
      unsigned ArgSize = (ArgVT.getSizeInBits() + 7) / 8;
3093
32
3094
32
      unsigned BEAlign = 0;
3095
32
      if (ArgSize < 8 && 
!Subtarget->isLittleEndian()21
)
3096
2
        BEAlign = 8 - ArgSize;
3097
32
3098
32
      Address Addr;
3099
32
      Addr.setKind(Address::RegBase);
3100
32
      Addr.setReg(AArch64::SP);
3101
32
      Addr.setOffset(VA.getLocMemOffset() + BEAlign);
3102
32
3103
32
      unsigned Alignment = DL.getABITypeAlignment(ArgVal->getType());
3104
32
      MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
3105
32
          MachinePointerInfo::getStack(*FuncInfo.MF, Addr.getOffset()),
3106
32
          MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment);
3107
32
3108
32
      if (!emitStore(ArgVT, ArgReg, Addr, MMO))
3109
2
        return false;
3110
32
    }
3111
1.33k
  }
3112
131
  
return true127
;
3113
131
}
3114
3115
bool AArch64FastISel::finishCall(CallLoweringInfo &CLI, MVT RetVT,
3116
127
                                 unsigned NumBytes) {
3117
127
  CallingConv::ID CC = CLI.CallConv;
3118
127
3119
127
  // Issue CALLSEQ_END
3120
127
  unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
3121
127
  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp))
3122
127
    .addImm(NumBytes).addImm(0);
3123
127
3124
127
  // Now the return value.
3125
127
  if (RetVT != MVT::isVoid) {
3126
72
    SmallVector<CCValAssign, 16> RVLocs;
3127
72
    CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);
3128
72
    CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC));
3129
72
3130
72
    // Only handle a single return value.
3131
72
    if (RVLocs.size() != 1)
3132
0
      return false;
3133
72
3134
72
    // Copy all of the result registers out of their specified physreg.
3135
72
    MVT CopyVT = RVLocs[0].getValVT();
3136
72
3137
72
    // TODO: Handle big-endian results
3138
72
    if (CopyVT.isVector() && 
!Subtarget->isLittleEndian()10
)
3139
10
      return false;
3140
62
3141
62
    unsigned ResultReg = createResultReg(TLI.getRegClassFor(CopyVT));
3142
62
    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3143
62
            TII.get(TargetOpcode::COPY), ResultReg)
3144
62
        .addReg(RVLocs[0].getLocReg());
3145
62
    CLI.InRegs.push_back(RVLocs[0].getLocReg());
3146
62
3147
62
    CLI.ResultReg = ResultReg;
3148
62
    CLI.NumResultRegs = 1;
3149
62
  }
3150
127
3151
127
  
return true117
;
3152
127
}
3153
3154
241
bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) {
3155
241
  CallingConv::ID CC  = CLI.CallConv;
3156
241
  bool IsTailCall     = CLI.IsTailCall;
3157
241
  bool IsVarArg       = CLI.IsVarArg;
3158
241
  const Value *Callee = CLI.Callee;
3159
241
  MCSymbol *Symbol = CLI.Symbol;
3160
241
3161
241
  if (!Callee && 
!Symbol16
)
3162
0
    return false;
3163
241
3164
241
  // Allow SelectionDAG isel to handle tail calls.
3165
241
  if (IsTailCall)
3166
26
    return false;
3167
215
3168
215
  CodeModel::Model CM = TM.getCodeModel();
3169
215
  // Only support the small-addressing and large code models.
3170
215
  if (CM != CodeModel::Large && 
!Subtarget->useSmallAddressing()199
)
3171
0
    return false;
3172
215
3173
215
  // FIXME: Add large code model support for ELF.
3174
215
  if (CM == CodeModel::Large && 
!Subtarget->isTargetMachO()16
)
3175
0
    return false;
3176
215
3177
215
  // Let SDISel handle vararg functions.
3178
215
  if (IsVarArg)
3179
0
    return false;
3180
215
3181
215
  // FIXME: Only handle *simple* calls for now.
3182
215
  MVT RetVT;
3183
215
  if (CLI.RetTy->isVoidTy())
3184
59
    RetVT = MVT::isVoid;
3185
156
  else if (!isTypeLegal(CLI.RetTy, RetVT))
3186
11
    return false;
3187
204
3188
204
  for (auto Flag : CLI.OutFlags)
3189
1.40k
    if (Flag.isInReg() || Flag.isSRet() || Flag.isNest() || Flag.isByVal() ||
3190
1.40k
        Flag.isSwiftSelf() || 
Flag.isSwiftError()1.40k
)
3191
5
      return false;
3192
204
3193
204
  // Set up the argument vectors.
3194
204
  SmallVector<MVT, 16> OutVTs;
3195
199
  OutVTs.reserve(CLI.OutVals.size());
3196
199
3197
1.40k
  for (auto *Val : CLI.OutVals) {
3198
1.40k
    MVT VT;
3199
1.40k
    if (!isTypeLegal(Val->getType(), VT) &&
3200
1.40k
        
!(111
VT == MVT::i1111
||
VT == MVT::i893
||
VT == MVT::i1619
))
3201
8
      return false;
3202
1.39k
3203
1.39k
    // We don't handle vector parameters yet.
3204
1.39k
    if (VT.isVector() || 
VT.getSizeInBits() > 641.33k
)
3205
60
      return false;
3206
1.33k
3207
1.33k
    OutVTs.push_back(VT);
3208
1.33k
  }
3209
199
3210
199
  Address Addr;
3211
131
  if (Callee && 
!computeCallAddress(Callee, Addr)115
)
3212
0
    return false;
3213
131
3214
131
  // Handle the arguments now that we've gotten them.
3215
131
  unsigned NumBytes;
3216
131
  if (!processCallArgs(CLI, OutVTs, NumBytes))
3217
4
    return false;
3218
127
3219
127
  const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
3220
127
  if (RegInfo->isAnyArgRegReserved(*MF))
3221
2
    RegInfo->emitReservedArgRegCallError(*MF);
3222
127
3223
127
  // Issue the call.
3224
127
  MachineInstrBuilder MIB;
3225
127
  if (Subtarget->useSmallAddressing()) {
3226
111
    const MCInstrDesc &II = TII.get(Addr.getReg() ? 
AArch64::BLR18
:
AArch64::BL93
);
3227
111
    MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II);
3228
111
    if (Symbol)
3229
16
      MIB.addSym(Symbol, 0);
3230
95
    else if (Addr.getGlobalValue())
3231
77
      MIB.addGlobalAddress(Addr.getGlobalValue(), 0, 0);
3232
18
    else if (Addr.getReg()) {
3233
18
      unsigned Reg = constrainOperandRegClass(II, Addr.getReg(), 0);
3234
18
      MIB.addReg(Reg);
3235
18
    } else
3236
0
      return false;
3237
16
  } else {
3238
16
    unsigned CallReg = 0;
3239
16
    if (Symbol) {
3240
8
      unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
3241
8
      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
3242
8
              ADRPReg)
3243
8
          .addSym(Symbol, AArch64II::MO_GOT | AArch64II::MO_PAGE);
3244
8
3245
8
      CallReg = createResultReg(&AArch64::GPR64RegClass);
3246
8
      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3247
8
              TII.get(AArch64::LDRXui), CallReg)
3248
8
          .addReg(ADRPReg)
3249
8
          .addSym(Symbol,
3250
8
                  AArch64II::MO_GOT | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
3251
8
    } else if (Addr.getGlobalValue())
3252
7
      CallReg = materializeGV(Addr.getGlobalValue());
3253
1
    else if (Addr.getReg())
3254
1
      CallReg = Addr.getReg();
3255
16
3256
16
    if (!CallReg)
3257
0
      return false;
3258
16
3259
16
    const MCInstrDesc &II = TII.get(AArch64::BLR);
3260
16
    CallReg = constrainOperandRegClass(II, CallReg, 0);
3261
16
    MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(CallReg);
3262
16
  }
3263
127
3264
127
  // Add implicit physical register uses to the call.
3265
127
  for (auto Reg : CLI.OutRegs)
3266
250
    MIB.addReg(Reg, RegState::Implicit);
3267
127
3268
127
  // Add a register mask with the call-preserved registers.
3269
127
  // Proper defs for return values will be added by setPhysRegsDeadExcept().
3270
127
  MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
3271
127
3272
127
  CLI.Call = MIB;
3273
127
3274
127
  // Finish off the call including any return values.
3275
127
  return finishCall(CLI, RetVT, NumBytes);
3276
127
}
3277
3278
32
bool AArch64FastISel::isMemCpySmall(uint64_t Len, unsigned Alignment) {
3279
32
  if (Alignment)
3280
29
    return Len / Alignment <= 4;
3281
3
  else
3282
3
    return Len < 32;
3283
32
}
3284
3285
bool AArch64FastISel::tryEmitSmallMemCpy(Address Dest, Address Src,
3286
13
                                         uint64_t Len, unsigned Alignment) {
3287
13
  // Make sure we don't bloat code by inlining very large memcpy's.
3288
13
  if (!isMemCpySmall(Len, Alignment))
3289
0
    return false;
3290
13
3291
13
  int64_t UnscaledOffset = 0;
3292
13
  Address OrigDest = Dest;
3293
13
  Address OrigSrc = Src;
3294
13
3295
54
  while (Len) {
3296
41
    MVT VT;
3297
41
    if (!Alignment || 
Alignment >= 840
) {
3298
30
      if (Len >= 8)
3299
27
        VT = MVT::i64;
3300
3
      else if (Len >= 4)
3301
0
        VT = MVT::i32;
3302
3
      else if (Len >= 2)
3303
0
        VT = MVT::i16;
3304
3
      else {
3305
3
        VT = MVT::i8;
3306
3
      }
3307
30
    } else {
3308
11
      // Bound based on alignment.
3309
11
      if (Len >= 4 && 
Alignment == 45
)
3310
2
        VT = MVT::i32;
3311
9
      else if (Len >= 2 && 
Alignment == 26
)
3312
3
        VT = MVT::i16;
3313
6
      else {
3314
6
        VT = MVT::i8;
3315
6
      }
3316
11
    }
3317
41
3318
41
    unsigned ResultReg = emitLoad(VT, VT, Src);
3319
41
    if (!ResultReg)
3320
0
      return false;
3321
41
3322
41
    if (!emitStore(VT, ResultReg, Dest))
3323
0
      return false;
3324
41
3325
41
    int64_t Size = VT.getSizeInBits() / 8;
3326
41
    Len -= Size;
3327
41
    UnscaledOffset += Size;
3328
41
3329
41
    // We need to recompute the unscaled offset for each iteration.
3330
41
    Dest.setOffset(OrigDest.getOffset() + UnscaledOffset);
3331
41
    Src.setOffset(OrigSrc.getOffset() + UnscaledOffset);
3332
41
  }
3333
13
3334
13
  return true;
3335
13
}
3336
3337
/// Check if it is possible to fold the condition from the XALU intrinsic
3338
/// into the user. The condition code will only be updated on success.
3339
bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC,
3340
                                        const Instruction *I,
3341
79
                                        const Value *Cond) {
3342
79
  if (!isa<ExtractValueInst>(Cond))
3343
53
    return false;
3344
26
3345
26
  const auto *EV = cast<ExtractValueInst>(Cond);
3346
26
  if (!isa<IntrinsicInst>(EV->getAggregateOperand()))
3347
0
    return false;
3348
26
3349
26
  const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand());
3350
26
  MVT RetVT;
3351
26
  const Function *Callee = II->getCalledFunction();
3352
26
  Type *RetTy =
3353
26
  cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U);
3354
26
  if (!isTypeLegal(RetTy, RetVT))
3355
0
    return false;
3356
26
3357
26
  if (RetVT != MVT::i32 && 
RetVT != MVT::i6414
)
3358
0
    return false;
3359
26
3360
26
  const Value *LHS = II->getArgOperand(0);
3361
26
  const Value *RHS = II->getArgOperand(1);
3362
26
3363
26
  // Canonicalize immediate to the RHS.
3364
26
  if (isa<ConstantInt>(LHS) && 
!isa<ConstantInt>(RHS)0
&&
3365
26
      
isCommutativeIntrinsic(II)0
)
3366
0
    std::swap(LHS, RHS);
3367
26
3368
26
  // Simplify multiplies.
3369
26
  Intrinsic::ID IID = II->getIntrinsicID();
3370
26
  switch (IID) {
3371
26
  default:
3372
16
    break;
3373
26
  case Intrinsic::smul_with_overflow:
3374
5
    if (const auto *C = dyn_cast<ConstantInt>(RHS))
3375
1
      if (C->getValue() == 2)
3376
1
        IID = Intrinsic::sadd_with_overflow;
3377
5
    break;
3378
26
  case Intrinsic::umul_with_overflow:
3379
5
    if (const auto *C = dyn_cast<ConstantInt>(RHS))
3380
1
      if (C->getValue() == 2)
3381
1
        IID = Intrinsic::uadd_with_overflow;
3382
5
    break;
3383
26
  }
3384
26
3385
26
  AArch64CC::CondCode TmpCC;
3386
26
  switch (IID) {
3387
26
  default:
3388
0
    return false;
3389
26
  case Intrinsic::sadd_with_overflow:
3390
9
  case Intrinsic::ssub_with_overflow:
3391
9
    TmpCC = AArch64CC::VS;
3392
9
    break;
3393
9
  case Intrinsic::uadd_with_overflow:
3394
5
    TmpCC = AArch64CC::HS;
3395
5
    break;
3396
9
  case Intrinsic::usub_with_overflow:
3397
4
    TmpCC = AArch64CC::LO;
3398
4
    break;
3399
9
  case Intrinsic::smul_with_overflow:
3400
8
  case Intrinsic::umul_with_overflow:
3401
8
    TmpCC = AArch64CC::NE;
3402
8
    break;
3403
26
  }
3404
26
3405
26
  // Check if both instructions are in the same basic block.
3406
26
  if (!isValueAvailable(II))
3407
0
    return false;
3408
26
3409
26
  // Make sure nothing is in the way
3410
26
  BasicBlock::const_iterator Start(I);
3411
26
  BasicBlock::const_iterator End(II);
3412
66
  for (auto Itr = std::prev(Start); Itr != End; 
--Itr40
) {
3413
40
    // We only expect extractvalue instructions between the intrinsic and the
3414
40
    // instruction to be selected.
3415
40
    if (!isa<ExtractValueInst>(Itr))
3416
0
      return false;
3417
40
3418
40
    // Check that the extractvalue operand comes from the intrinsic.
3419
40
    const auto *EVI = cast<ExtractValueInst>(Itr);
3420
40
    if (EVI->getAggregateOperand() != II)
3421
0
      return false;
3422
40
  }
3423
26
3424
26
  CC = TmpCC;
3425
26
  return true;
3426
26
}
3427
3428
104
bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
3429
104
  // FIXME: Handle more intrinsics.
3430
104
  switch (II->getIntrinsicID()) {
3431
104
  
default: return false9
;
3432
104
  case Intrinsic::frameaddress: {
3433
2
    MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
3434
2
    MFI.setFrameAddressIsTaken(true);
3435
2
3436
2
    const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
3437
2
    unsigned FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF));
3438
2
    unsigned SrcReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3439
2
    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3440
2
            TII.get(TargetOpcode::COPY), SrcReg).addReg(FramePtr);
3441
2
    // Recursively load frame address
3442
2
    // ldr x0, [fp]
3443
2
    // ldr x0, [x0]
3444
2
    // ldr x0, [x0]
3445
2
    // ...
3446
2
    unsigned DestReg;
3447
2
    unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue();
3448
4
    while (Depth--) {
3449
2
      DestReg = fastEmitInst_ri(AArch64::LDRXui, &AArch64::GPR64RegClass,
3450
2
                                SrcReg, /*IsKill=*/true, 0);
3451
2
      assert(DestReg && "Unexpected LDR instruction emission failure.");
3452
2
      SrcReg = DestReg;
3453
2
    }
3454
2
3455
2
    updateValueMap(II, SrcReg);
3456
2
    return true;
3457
104
  }
3458
104
  case Intrinsic::sponentry: {
3459
6
    MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
3460
6
3461
6
    // SP = FP + Fixed Object + 16
3462
6
    int FI = MFI.CreateFixedObject(4, 0, false);
3463
6
    unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass);
3464
6
    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3465
6
            TII.get(AArch64::ADDXri), ResultReg)
3466
6
            .addFrameIndex(FI)
3467
6
            .addImm(0)
3468
6
            .addImm(0);
3469
6
3470
6
    updateValueMap(II, ResultReg);
3471
6
    return true;
3472
104
  }
3473
104
  case Intrinsic::memcpy:
3474
20
  case Intrinsic::memmove: {
3475
20
    const auto *MTI = cast<MemTransferInst>(II);
3476
20
    // Don't handle volatile.
3477
20
    if (MTI->isVolatile())
3478
0
      return false;
3479
20
3480
20
    // Disable inlining for memmove before calls to ComputeAddress.  Otherwise,
3481
20
    // we would emit dead code because we don't currently handle memmoves.
3482
20
    bool IsMemCpy = (II->getIntrinsicID() == Intrinsic::memcpy);
3483
20
    if (isa<ConstantInt>(MTI->getLength()) && IsMemCpy) {
3484
19
      // Small memcpy's are common enough that we want to do them without a call
3485
19
      // if possible.
3486
19
      uint64_t Len = cast<ConstantInt>(MTI->getLength())->getZExtValue();
3487
19
      unsigned Alignment = MinAlign(MTI->getDestAlignment(),
3488
19
                                    MTI->getSourceAlignment());
3489
19
      if (isMemCpySmall(Len, Alignment)) {
3490
13
        Address Dest, Src;
3491
13
        if (!computeAddress(MTI->getRawDest(), Dest) ||
3492
13
            !computeAddress(MTI->getRawSource(), Src))
3493
0
          return false;
3494
13
        if (tryEmitSmallMemCpy(Dest, Src, Len, Alignment))
3495
13
          return true;
3496
7
      }
3497
19
    }
3498
7
3499
7
    if (!MTI->getLength()->getType()->isIntegerTy(64))
3500
0
      return false;
3501
7
3502
7
    if (MTI->getSourceAddressSpace() > 255 || MTI->getDestAddressSpace() > 255)
3503
0
      // Fast instruction selection doesn't support the special
3504
0
      // address spaces.
3505
0
      return false;
3506
7
3507
7
    const char *IntrMemName = isa<MemCpyInst>(II) ? 
"memcpy"6
:
"memmove"1
;
3508
7
    return lowerCallTo(II, IntrMemName, II->getNumArgOperands() - 1);
3509
7
  }
3510
7
  case Intrinsic::memset: {
3511
1
    const MemSetInst *MSI = cast<MemSetInst>(II);
3512
1
    // Don't handle volatile.
3513
1
    if (MSI->isVolatile())
3514
0
      return false;
3515
1
3516
1
    if (!MSI->getLength()->getType()->isIntegerTy(64))
3517
0
      return false;
3518
1
3519
1
    if (MSI->getDestAddressSpace() > 255)
3520
0
      // Fast instruction selection doesn't support the special
3521
0
      // address spaces.
3522
0
      return false;
3523
1
3524
1
    return lowerCallTo(II, "memset", II->getNumArgOperands() - 1);
3525
1
  }
3526
12
  case Intrinsic::sin:
3527
12
  case Intrinsic::cos:
3528
12
  case Intrinsic::pow: {
3529
12
    MVT RetVT;
3530
12
    if (!isTypeLegal(II->getType(), RetVT))
3531
0
      return false;
3532
12
3533
12
    if (RetVT != MVT::f32 && 
RetVT != MVT::f646
)
3534
0
      return false;
3535
12
3536
12
    static const RTLIB::Libcall LibCallTable[3][2] = {
3537
12
      { RTLIB::SIN_F32, RTLIB::SIN_F64 },
3538
12
      { RTLIB::COS_F32, RTLIB::COS_F64 },
3539
12
      { RTLIB::POW_F32, RTLIB::POW_F64 }
3540
12
    };
3541
12
    RTLIB::Libcall LC;
3542
12
    bool Is64Bit = RetVT == MVT::f64;
3543
12
    switch (II->getIntrinsicID()) {
3544
12
    default:
3545
0
      llvm_unreachable("Unexpected intrinsic.");
3546
12
    case Intrinsic::sin:
3547
4
      LC = LibCallTable[0][Is64Bit];
3548
4
      break;
3549
12
    case Intrinsic::cos:
3550
4
      LC = LibCallTable[1][Is64Bit];
3551
4
      break;
3552
12
    case Intrinsic::pow:
3553
4
      LC = LibCallTable[2][Is64Bit];
3554
4
      break;
3555
12
    }
3556
12
3557
12
    ArgListTy Args;
3558
12
    Args.reserve(II->getNumArgOperands());
3559
12
3560
12
    // Populate the argument list.
3561
16
    for (auto &Arg : II->arg_operands()) {
3562
16
      ArgListEntry Entry;
3563
16
      Entry.Val = Arg;
3564
16
      Entry.Ty = Arg->getType();
3565
16
      Args.push_back(Entry);
3566
16
    }
3567
12
3568
12
    CallLoweringInfo CLI;
3569
12
    MCContext &Ctx = MF->getContext();
3570
12
    CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), II->getType(),
3571
12
                  TLI.getLibcallName(LC), std::move(Args));
3572
12
    if (!lowerCallTo(CLI))
3573
0
      return false;
3574
12
    updateValueMap(II, CLI.ResultReg);
3575
12
    return true;
3576
12
  }
3577
12
  case Intrinsic::fabs: {
3578
2
    MVT VT;
3579
2
    if (!isTypeLegal(II->getType(), VT))
3580
0
      return false;
3581
2
3582
2
    unsigned Opc;
3583
2
    switch (VT.SimpleTy) {
3584
2
    default:
3585
0
      return false;
3586
2
    case MVT::f32:
3587
1
      Opc = AArch64::FABSSr;
3588
1
      break;
3589
2
    case MVT::f64:
3590
1
      Opc = AArch64::FABSDr;
3591
1
      break;
3592
2
    }
3593
2
    unsigned SrcReg = getRegForValue(II->getOperand(0));
3594
2
    if (!SrcReg)
3595
0
      return false;
3596
2
    bool SrcRegIsKill = hasTrivialKill(II->getOperand(0));
3597
2
    unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
3598
2
    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
3599
2
      .addReg(SrcReg, getKillRegState(SrcRegIsKill));
3600
2
    updateValueMap(II, ResultReg);
3601
2
    return true;
3602
2
  }
3603
2
  case Intrinsic::trap:
3604
1
    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BRK))
3605
1
        .addImm(1);
3606
1
    return true;
3607
2
  case Intrinsic::debugtrap: {
3608
1
    if (Subtarget->isTargetWindows()) {
3609
1
      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BRK))
3610
1
          .addImm(0xF000);
3611
1
      return true;
3612
1
    }
3613
0
    break;
3614
0
  }
3615
0
3616
2
  case Intrinsic::sqrt: {
3617
2
    Type *RetTy = II->getCalledFunction()->getReturnType();
3618
2
3619
2
    MVT VT;
3620
2
    if (!isTypeLegal(RetTy, VT))
3621
0
      return false;
3622
2
3623
2
    unsigned Op0Reg = getRegForValue(II->getOperand(0));
3624
2
    if (!Op0Reg)
3625
0
      return false;
3626
2
    bool Op0IsKill = hasTrivialKill(II->getOperand(0));
3627
2
3628
2
    unsigned ResultReg = fastEmit_r(VT, VT, ISD::FSQRT, Op0Reg, Op0IsKill);
3629
2
    if (!ResultReg)
3630
0
      return false;
3631
2
3632
2
    updateValueMap(II, ResultReg);
3633
2
    return true;
3634
2
  }
3635
48
  case Intrinsic::sadd_with_overflow:
3636
48
  case Intrinsic::uadd_with_overflow:
3637
48
  case Intrinsic::ssub_with_overflow:
3638
48
  case Intrinsic::usub_with_overflow:
3639
48
  case Intrinsic::smul_with_overflow:
3640
48
  case Intrinsic::umul_with_overflow: {
3641
48
    // This implements the basic lowering of the xalu with overflow intrinsics.
3642
48
    const Function *Callee = II->getCalledFunction();
3643
48
    auto *Ty = cast<StructType>(Callee->getReturnType());
3644
48
    Type *RetTy = Ty->getTypeAtIndex(0U);
3645
48
3646
48
    MVT VT;
3647
48
    if (!isTypeLegal(RetTy, VT))
3648
0
      return false;
3649
48
3650
48
    if (VT != MVT::i32 && 
VT != MVT::i6425
)
3651
0
      return false;
3652
48
3653
48
    const Value *LHS = II->getArgOperand(0);
3654
48
    const Value *RHS = II->getArgOperand(1);
3655
48
    // Canonicalize immediate to the RHS.
3656
48
    if (isa<ConstantInt>(LHS) && 
!isa<ConstantInt>(RHS)0
&&
3657
48
        
isCommutativeIntrinsic(II)0
)
3658
0
      std::swap(LHS, RHS);
3659
48
3660
48
    // Simplify multiplies.
3661
48
    Intrinsic::ID IID = II->getIntrinsicID();
3662
48
    switch (IID) {
3663
48
    default:
3664
31
      break;
3665
48
    case Intrinsic::smul_with_overflow:
3666
8
      if (const auto *C = dyn_cast<ConstantInt>(RHS))
3667
2
        if (C->getValue() == 2) {
3668
2
          IID = Intrinsic::sadd_with_overflow;
3669
2
          RHS = LHS;
3670
2
        }
3671
8
      break;
3672
48
    case Intrinsic::umul_with_overflow:
3673
9
      if (const auto *C = dyn_cast<ConstantInt>(RHS))
3674
3
        if (C->getValue() == 2) {
3675
2
          IID = Intrinsic::uadd_with_overflow;
3676
2
          RHS = LHS;
3677
2
        }
3678
9
      break;
3679
48
    }
3680
48
3681
48
    unsigned ResultReg1 = 0, ResultReg2 = 0, MulReg = 0;
3682
48
    AArch64CC::CondCode CC = AArch64CC::Invalid;
3683
48
    switch (IID) {
3684
48
    
default: 0
llvm_unreachable0
("Unexpected intrinsic!");
3685
48
    case Intrinsic::sadd_with_overflow:
3686
14
      ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
3687
14
      CC = AArch64CC::VS;
3688
14
      break;
3689
48
    case Intrinsic::uadd_with_overflow:
3690
8
      ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
3691
8
      CC = AArch64CC::HS;
3692
8
      break;
3693
48
    case Intrinsic::ssub_with_overflow:
3694
7
      ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
3695
7
      CC = AArch64CC::VS;
3696
7
      break;
3697
48
    case Intrinsic::usub_with_overflow:
3698
6
      ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
3699
6
      CC = AArch64CC::LO;
3700
6
      break;
3701
48
    case Intrinsic::smul_with_overflow: {
3702
6
      CC = AArch64CC::NE;
3703
6
      unsigned LHSReg = getRegForValue(LHS);
3704
6
      if (!LHSReg)
3705
0
        return false;
3706
6
      bool LHSIsKill = hasTrivialKill(LHS);
3707
6
3708
6
      unsigned RHSReg = getRegForValue(RHS);
3709
6
      if (!RHSReg)
3710
0
        return false;
3711
6
      bool RHSIsKill = hasTrivialKill(RHS);
3712
6
3713
6
      if (VT == MVT::i32) {
3714
3
        MulReg = emitSMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
3715
3
        unsigned ShiftReg = emitLSR_ri(MVT::i64, MVT::i64, MulReg,
3716
3
                                       /*IsKill=*/false, 32);
3717
3
        MulReg = fastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true,
3718
3
                                            AArch64::sub_32);
3719
3
        ShiftReg = fastEmitInst_extractsubreg(VT, ShiftReg, /*IsKill=*/true,
3720
3
                                              AArch64::sub_32);
3721
3
        emitSubs_rs(VT, ShiftReg, /*IsKill=*/true, MulReg, /*IsKill=*/false,
3722
3
                    AArch64_AM::ASR, 31, /*WantResult=*/false);
3723
3
      } else {
3724
3
        assert(VT == MVT::i64 && "Unexpected value type.");
3725
3
        // LHSReg and RHSReg cannot be killed by this Mul, since they are
3726
3
        // reused in the next instruction.
3727
3
        MulReg = emitMul_rr(VT, LHSReg, /*IsKill=*/false, RHSReg,
3728
3
                            /*IsKill=*/false);
3729
3
        unsigned SMULHReg = fastEmit_rr(VT, VT, ISD::MULHS, LHSReg, LHSIsKill,
3730
3
                                        RHSReg, RHSIsKill);
3731
3
        emitSubs_rs(VT, SMULHReg, /*IsKill=*/true, MulReg, /*IsKill=*/false,
3732
3
                    AArch64_AM::ASR, 63, /*WantResult=*/false);
3733
3
      }
3734
6
      break;
3735
6
    }
3736
7
    case Intrinsic::umul_with_overflow: {
3737
7
      CC = AArch64CC::NE;
3738
7
      unsigned LHSReg = getRegForValue(LHS);
3739
7
      if (!LHSReg)
3740
0
        return false;
3741
7
      bool LHSIsKill = hasTrivialKill(LHS);
3742
7
3743
7
      unsigned RHSReg = getRegForValue(RHS);
3744
7
      if (!RHSReg)
3745
0
        return false;
3746
7
      bool RHSIsKill = hasTrivialKill(RHS);
3747
7
3748
7
      if (VT == MVT::i32) {
3749
3
        MulReg = emitUMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
3750
3
        emitSubs_rs(MVT::i64, AArch64::XZR, /*IsKill=*/true, MulReg,
3751
3
                    /*IsKill=*/false, AArch64_AM::LSR, 32,
3752
3
                    /*WantResult=*/false);
3753
3
        MulReg = fastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true,
3754
3
                                            AArch64::sub_32);
3755
4
      } else {
3756
4
        assert(VT == MVT::i64 && "Unexpected value type.");
3757
4
        // LHSReg and RHSReg cannot be killed by this Mul, since they are
3758
4
        // reused in the next instruction.
3759
4
        MulReg = emitMul_rr(VT, LHSReg, /*IsKill=*/false, RHSReg,
3760
4
                            /*IsKill=*/false);
3761
4
        unsigned UMULHReg = fastEmit_rr(VT, VT, ISD::MULHU, LHSReg, LHSIsKill,
3762
4
                                        RHSReg, RHSIsKill);
3763
4
        emitSubs_rr(VT, AArch64::XZR, /*IsKill=*/true, UMULHReg,
3764
4
                    /*IsKill=*/false, /*WantResult=*/false);
3765
4
      }
3766
7
      break;
3767
7
    }
3768
48
    }
3769
48
3770
48
    if (MulReg) {
3771
13
      ResultReg1 = createResultReg(TLI.getRegClassFor(VT));
3772
13
      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3773
13
              TII.get(TargetOpcode::COPY), ResultReg1).addReg(MulReg);
3774
13
    }
3775
48
3776
48
    if (!ResultReg1)
3777
0
      return false;
3778
48
3779
48
    ResultReg2 = fastEmitInst_rri(AArch64::CSINCWr, &AArch64::GPR32RegClass,
3780
48
                                  AArch64::WZR, /*IsKill=*/true, AArch64::WZR,
3781
48
                                  /*IsKill=*/true, getInvertedCondCode(CC));
3782
48
    (void)ResultReg2;
3783
48
    assert((ResultReg1 + 1) == ResultReg2 &&
3784
48
           "Nonconsecutive result registers.");
3785
48
    updateValueMap(II, ResultReg1, 2);
3786
48
    return true;
3787
48
  }
3788
0
  }
3789
0
  return false;
3790
0
}
3791
3792
1.34k
bool AArch64FastISel::selectRet(const Instruction *I) {
3793
1.34k
  const ReturnInst *Ret = cast<ReturnInst>(I);
3794
1.34k
  const Function &F = *I->getParent()->getParent();
3795
1.34k
3796
1.34k
  if (!FuncInfo.CanLowerReturn)
3797
0
    return false;
3798
1.34k
3799
1.34k
  if (F.isVarArg())
3800
6
    return false;
3801
1.33k
3802
1.33k
  if (TLI.supportSwiftError() &&
3803
1.33k
      F.getAttributes().hasAttrSomewhere(Attribute::SwiftError))
3804
11
    return false;
3805
1.32k
3806
1.32k
  if (TLI.supportSplitCSR(FuncInfo.MF))
3807
3
    return false;
3808
1.32k
3809
1.32k
  // Build a list of return value registers.
3810
1.32k
  SmallVector<unsigned, 4> RetRegs;
3811
1.32k
3812
1.32k
  if (Ret->getNumOperands() > 0) {
3813
970
    CallingConv::ID CC = F.getCallingConv();
3814
970
    SmallVector<ISD::OutputArg, 4> Outs;
3815
970
    GetReturnInfo(CC, F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
3816
970
3817
970
    // Analyze operands of the call, assigning locations to each operand.
3818
970
    SmallVector<CCValAssign, 16> ValLocs;
3819
970
    CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext());
3820
970
    CCAssignFn *RetCC = CC == CallingConv::WebKit_JS ? 
RetCC_AArch64_WebKit_JS1
3821
970
                                                     : 
RetCC_AArch64_AAPCS969
;
3822
970
    CCInfo.AnalyzeReturn(Outs, RetCC);
3823
970
3824
970
    // Only handle a single return value for now.
3825
970
    if (ValLocs.size() != 1)
3826
7
      return false;
3827
963
3828
963
    CCValAssign &VA = ValLocs[0];
3829
963
    const Value *RV = Ret->getOperand(0);
3830
963
3831
963
    // Don't bother handling odd stuff for now.
3832
963
    if ((VA.getLocInfo() != CCValAssign::Full) &&
3833
963
        
(VA.getLocInfo() != CCValAssign::BCvt)72
)
3834
0
      return false;
3835
963
3836
963
    // Only handle register returns for now.
3837
963
    if (!VA.isRegLoc())
3838
0
      return false;
3839
963
3840
963
    unsigned Reg = getRegForValue(RV);
3841
963
    if (Reg == 0)
3842
4
      return false;
3843
959
3844
959
    unsigned SrcReg = Reg + VA.getValNo();
3845
959
    unsigned DestReg = VA.getLocReg();
3846
959
    // Avoid a cross-class copy. This is very unlikely.
3847
959
    if (!MRI.getRegClass(SrcReg)->contains(DestReg))
3848
0
      return false;
3849
959
3850
959
    EVT RVEVT = TLI.getValueType(DL, RV->getType());
3851
959
    if (!RVEVT.isSimple())
3852
0
      return false;
3853
959
3854
959
    // Vectors (of > 1 lane) in big endian need tricky handling.
3855
959
    if (RVEVT.isVector() && 
RVEVT.getVectorNumElements() > 195
&&
3856
959
        
!Subtarget->isLittleEndian()89
)
3857
60
      return false;
3858
899
3859
899
    MVT RVVT = RVEVT.getSimpleVT();
3860
899
    if (RVVT == MVT::f128)
3861
8
      return false;
3862
891
3863
891
    MVT DestVT = VA.getValVT();
3864
891
    // Special handling for extended integers.
3865
891
    if (RVVT != DestVT) {
3866
173
      if (RVVT != MVT::i1 && 
RVVT != MVT::i874
&&
RVVT != MVT::i1644
)
3867
0
        return false;
3868
173
3869
173
      if (!Outs[0].Flags.isZExt() && 
!Outs[0].Flags.isSExt()33
)
3870
22
        return false;
3871
151
3872
151
      bool IsZExt = Outs[0].Flags.isZExt();
3873
151
      SrcReg = emitIntExt(RVVT, SrcReg, DestVT, IsZExt);
3874
151
      if (SrcReg == 0)
3875
0
        return false;
3876
869
    }
3877
869
3878
869
    // Make the copy.
3879
869
    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3880
869
            TII.get(TargetOpcode::COPY), DestReg).addReg(SrcReg);
3881
869
3882
869
    // Add register to return instruction.
3883
869
    RetRegs.push_back(VA.getLocReg());
3884
869
  }
3885
1.32k
3886
1.32k
  MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3887
1.22k
                                    TII.get(AArch64::RET_ReallyLR));
3888
1.22k
  for (unsigned RetReg : RetRegs)
3889
869
    MIB.addReg(RetReg, RegState::Implicit);
3890
1.22k
  return true;
3891
1.32k
}
3892
3893
14
bool AArch64FastISel::selectTrunc(const Instruction *I) {
3894
14
  Type *DestTy = I->getType();
3895
14
  Value *Op = I->getOperand(0);
3896
14
  Type *SrcTy = Op->getType();
3897
14
3898
14
  EVT SrcEVT = TLI.getValueType(DL, SrcTy, true);
3899
14
  EVT DestEVT = TLI.getValueType(DL, DestTy, true);
3900
14
  if (!SrcEVT.isSimple())
3901
0
    return false;
3902
14
  if (!DestEVT.isSimple())
3903
0
    return false;
3904
14
3905
14
  MVT SrcVT = SrcEVT.getSimpleVT();
3906
14
  MVT DestVT = DestEVT.getSimpleVT();
3907
14
3908
14
  if (SrcVT != MVT::i64 && 
SrcVT != MVT::i328
&&
SrcVT != MVT::i164
&&
3909
14
      
SrcVT != MVT::i82
)
3910
2
    return false;
3911
12
  if (DestVT != MVT::i32 && DestVT != MVT::i16 && 
DestVT != MVT::i88
&&
3912
12
      
DestVT != MVT::i15
)
3913
0
    return false;
3914
12
3915
12
  unsigned SrcReg = getRegForValue(Op);
3916
12
  if (!SrcReg)
3917
0
    return false;
3918
12
  bool SrcIsKill = hasTrivialKill(Op);
3919
12
3920
12
  // If we're truncating from i64 to a smaller non-legal type then generate an
3921
12
  // AND. Otherwise, we know the high bits are undefined and a truncate only
3922
12
  // generate a COPY. We cannot mark the source register also as result
3923
12
  // register, because this can incorrectly transfer the kill flag onto the
3924
12
  // source register.
3925
12
  unsigned ResultReg;
3926
12
  if (SrcVT == MVT::i64) {
3927
6
    uint64_t Mask = 0;
3928
6
    switch (DestVT.SimpleTy) {
3929
6
    default:
3930
0
      // Trunc i64 to i32 is handled by the target-independent fast-isel.
3931
0
      return false;
3932
6
    case MVT::i1:
3933
3
      Mask = 0x1;
3934
3
      break;
3935
6
    case MVT::i8:
3936
2
      Mask = 0xff;
3937
2
      break;
3938
6
    case MVT::i16:
3939
1
      Mask = 0xffff;
3940
1
      break;
3941
6
    }
3942
6
    // Issue an extract_subreg to get the lower 32-bits.
3943
6
    unsigned Reg32 = fastEmitInst_extractsubreg(MVT::i32, SrcReg, SrcIsKill,
3944
6
                                                AArch64::sub_32);
3945
6
    // Create the AND instruction which performs the actual truncation.
3946
6
    ResultReg = emitAnd_ri(MVT::i32, Reg32, /*IsKill=*/true, Mask);
3947
6
    assert(ResultReg && "Unexpected AND instruction emission failure.");
3948
6
  } else {
3949
6
    ResultReg = createResultReg(&AArch64::GPR32RegClass);
3950
6
    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3951
6
            TII.get(TargetOpcode::COPY), ResultReg)
3952
6
        .addReg(SrcReg, getKillRegState(SrcIsKill));
3953
6
  }
3954
12
3955
12
  updateValueMap(I, ResultReg);
3956
12
  return true;
3957
12
}
3958
3959
141
unsigned AArch64FastISel::emiti1Ext(unsigned SrcReg, MVT DestVT, bool IsZExt) {
3960
141
  assert((DestVT == MVT::i8 || DestVT == MVT::i16 || DestVT == MVT::i32 ||
3961
141
          DestVT == MVT::i64) &&
3962
141
         "Unexpected value type.");
3963
141
  // Handle i8 and i16 as i32.
3964
141
  if (DestVT == MVT::i8 || 
DestVT == MVT::i16140
)
3965
3
    DestVT = MVT::i32;
3966
141
3967
141
  if (IsZExt) {
3968
132
    unsigned ResultReg = emitAnd_ri(MVT::i32, SrcReg, /*TODO:IsKill=*/false, 1);
3969
132
    assert(ResultReg && "Unexpected AND instruction emission failure.");
3970
132
    if (DestVT == MVT::i64) {
3971
0
      // We're ZExt i1 to i64.  The ANDWri Wd, Ws, #1 implicitly clears the
3972
0
      // upper 32 bits.  Emit a SUBREG_TO_REG to extend from Wd to Xd.
3973
0
      unsigned Reg64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3974
0
      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3975
0
              TII.get(AArch64::SUBREG_TO_REG), Reg64)
3976
0
          .addImm(0)
3977
0
          .addReg(ResultReg)
3978
0
          .addImm(AArch64::sub_32);
3979
0
      ResultReg = Reg64;
3980
0
    }
3981
132
    return ResultReg;
3982
132
  } else {
3983
9
    if (DestVT == MVT::i64) {
3984
0
      // FIXME: We're SExt i1 to i64.
3985
0
      return 0;
3986
0
    }
3987
9
    return fastEmitInst_rii(AArch64::SBFMWri, &AArch64::GPR32RegClass, SrcReg,
3988
9
                            /*TODO:IsKill=*/false, 0, 0);
3989
9
  }
3990
141
}
3991
3992
unsigned AArch64FastISel::emitMul_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
3993
19
                                      unsigned Op1, bool Op1IsKill) {
3994
19
  unsigned Opc, ZReg;
3995
19
  switch (RetVT.SimpleTy) {
3996
19
  
default: return 00
;
3997
19
  case MVT::i8:
3998
5
  case MVT::i16:
3999
5
  case MVT::i32:
4000
5
    RetVT = MVT::i32;
4001
5
    Opc = AArch64::MADDWrrr; ZReg = AArch64::WZR; break;
4002
14
  case MVT::i64:
4003
14
    Opc = AArch64::MADDXrrr; ZReg = AArch64::XZR; break;
4004
19
  }
4005
19
4006
19
  const TargetRegisterClass *RC =
4007
19
      (RetVT == MVT::i64) ? 
&AArch64::GPR64RegClass14
:
&AArch64::GPR32RegClass5
;
4008
19
  return fastEmitInst_rrr(Opc, RC, Op0, Op0IsKill, Op1, Op1IsKill,
4009
19
                          /*IsKill=*/ZReg, true);
4010
19
}
4011
4012
unsigned AArch64FastISel::emitSMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
4013
3
                                        unsigned Op1, bool Op1IsKill) {
4014
3
  if (RetVT != MVT::i64)
4015
0
    return 0;
4016
3
4017
3
  return fastEmitInst_rrr(AArch64::SMADDLrrr, &AArch64::GPR64RegClass,
4018
3
                          Op0, Op0IsKill, Op1, Op1IsKill,
4019
3
                          AArch64::XZR, /*IsKill=*/true);
4020
3
}
4021
4022
unsigned AArch64FastISel::emitUMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
4023
3
                                        unsigned Op1, bool Op1IsKill) {
4024
3
  if (RetVT != MVT::i64)
4025
0
    return 0;
4026
3
4027
3
  return fastEmitInst_rrr(AArch64::UMADDLrrr, &AArch64::GPR64RegClass,
4028
3
                          Op0, Op0IsKill, Op1, Op1IsKill,
4029
3
                          AArch64::XZR, /*IsKill=*/true);
4030
3
}
4031
4032
unsigned AArch64FastISel::emitLSL_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
4033
4
                                     unsigned Op1Reg, bool Op1IsKill) {
4034
4
  unsigned Opc = 0;
4035
4
  bool NeedTrunc = false;
4036
4
  uint64_t Mask = 0;
4037
4
  switch (RetVT.SimpleTy) {
4038
4
  
default: return 00
;
4039
4
  
case MVT::i8: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xff; break1
;
4040
4
  
case MVT::i16: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xffff; break1
;
4041
4
  
case MVT::i32: Opc = AArch64::LSLVWr; break1
;
4042
4
  
case MVT::i64: Opc = AArch64::LSLVXr; break1
;
4043
4
  }
4044
4
4045
4
  const TargetRegisterClass *RC =
4046
4
      (RetVT == MVT::i64) ? 
&AArch64::GPR64RegClass1
:
&AArch64::GPR32RegClass3
;
4047
4
  if (NeedTrunc) {
4048
2
    Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
4049
2
    Op1IsKill = true;
4050
2
  }
4051
4
  unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
4052
4
                                       Op1IsKill);
4053
4
  if (NeedTrunc)
4054
2
    ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
4055
4
  return ResultReg;
4056
4
}
4057
4058
unsigned AArch64FastISel::emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4059
                                     bool Op0IsKill, uint64_t Shift,
4060
53
                                     bool IsZExt) {
4061
53
  assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4062
53
         "Unexpected source/return type pair.");
4063
53
  assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4064
53
          SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4065
53
         "Unexpected source value type.");
4066
53
  assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4067
53
          RetVT == MVT::i64) && "Unexpected return value type.");
4068
53
4069
53
  bool Is64Bit = (RetVT == MVT::i64);
4070
53
  unsigned RegSize = Is64Bit ? 
6427
:
3226
;
4071
53
  unsigned DstBits = RetVT.getSizeInBits();
4072
53
  unsigned SrcBits = SrcVT.getSizeInBits();
4073
53
  const TargetRegisterClass *RC =
4074
53
      Is64Bit ? 
&AArch64::GPR64RegClass27
:
&AArch64::GPR32RegClass26
;
4075
53
4076
53
  // Just emit a copy for "zero" shifts.
4077
53
  if (Shift == 0) {
4078
2
    if (RetVT == SrcVT) {
4079
1
      unsigned ResultReg = createResultReg(RC);
4080
1
      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4081
1
              TII.get(TargetOpcode::COPY), ResultReg)
4082
1
          .addReg(Op0, getKillRegState(Op0IsKill));
4083
1
      return ResultReg;
4084
1
    } else
4085
1
      return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4086
51
  }
4087
51
4088
51
  // Don't deal with undefined shifts.
4089
51
  if (Shift >= DstBits)
4090
14
    return 0;
4091
37
4092
37
  // For immediate shifts we can fold the zero-/sign-extension into the shift.
4093
37
  // {S|U}BFM Wd, Wn, #r, #s
4094
37
  // Wd<32+s-r,32-r> = Wn<s:0> when r > s
4095
37
4096
37
  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4097
37
  // %2 = shl i16 %1, 4
4098
37
  // Wd<32+7-28,32-28> = Wn<7:0> <- clamp s to 7
4099
37
  // 0b1111_1111_1111_1111__1111_1010_1010_0000 sext
4100
37
  // 0b0000_0000_0000_0000__0000_0101_0101_0000 sext | zext
4101
37
  // 0b0000_0000_0000_0000__0000_1010_1010_0000 zext
4102
37
4103
37
  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4104
37
  // %2 = shl i16 %1, 8
4105
37
  // Wd<32+7-24,32-24> = Wn<7:0>
4106
37
  // 0b1111_1111_1111_1111__1010_1010_0000_0000 sext
4107
37
  // 0b0000_0000_0000_0000__0101_0101_0000_0000 sext | zext
4108
37
  // 0b0000_0000_0000_0000__1010_1010_0000_0000 zext
4109
37
4110
37
  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4111
37
  // %2 = shl i16 %1, 12
4112
37
  // Wd<32+3-20,32-20> = Wn<3:0>
4113
37
  // 0b1111_1111_1111_1111__1010_0000_0000_0000 sext
4114
37
  // 0b0000_0000_0000_0000__0101_0000_0000_0000 sext | zext
4115
37
  // 0b0000_0000_0000_0000__1010_0000_0000_0000 zext
4116
37
4117
37
  unsigned ImmR = RegSize - Shift;
4118
37
  // Limit the width to the length of the source type.
4119
37
  unsigned ImmS = std::min<unsigned>(SrcBits - 1, DstBits - 1 - Shift);
4120
37
  static const unsigned OpcTable[2][2] = {
4121
37
    {AArch64::SBFMWri, AArch64::SBFMXri},
4122
37
    {AArch64::UBFMWri, AArch64::UBFMXri}
4123
37
  };
4124
37
  unsigned Opc = OpcTable[IsZExt][Is64Bit];
4125
37
  if (SrcVT.SimpleTy <= MVT::i32 && 
RetVT == MVT::i6425
) {
4126
10
    unsigned TmpReg = MRI.createVirtualRegister(RC);
4127
10
    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4128
10
            TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4129
10
        .addImm(0)
4130
10
        .addReg(Op0, getKillRegState(Op0IsKill))
4131
10
        .addImm(AArch64::sub_32);
4132
10
    Op0 = TmpReg;
4133
10
    Op0IsKill = true;
4134
10
  }
4135
37
  return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
4136
37
}
4137
4138
unsigned AArch64FastISel::emitLSR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
4139
4
                                     unsigned Op1Reg, bool Op1IsKill) {
4140
4
  unsigned Opc = 0;
4141
4
  bool NeedTrunc = false;
4142
4
  uint64_t Mask = 0;
4143
4
  switch (RetVT.SimpleTy) {
4144
4
  
default: return 00
;
4145
4
  
case MVT::i8: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xff; break1
;
4146
4
  
case MVT::i16: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xffff; break1
;
4147
4
  
case MVT::i32: Opc = AArch64::LSRVWr; break1
;
4148
4
  
case MVT::i64: Opc = AArch64::LSRVXr; break1
;
4149
4
  }
4150
4
4151
4
  const TargetRegisterClass *RC =
4152
4
      (RetVT == MVT::i64) ? 
&AArch64::GPR64RegClass1
:
&AArch64::GPR32RegClass3
;
4153
4
  if (NeedTrunc) {
4154
2
    Op0Reg = emitAnd_ri(MVT::i32, Op0Reg, Op0IsKill, Mask);
4155
2
    Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
4156
2
    Op0IsKill = Op1IsKill = true;
4157
2
  }
4158
4
  unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
4159
4
                                       Op1IsKill);
4160
4
  if (NeedTrunc)
4161
2
    ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
4162
4
  return ResultReg;
4163
4
}
4164
4165
unsigned AArch64FastISel::emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4166
                                     bool Op0IsKill, uint64_t Shift,
4167
23
                                     bool IsZExt) {
4168
23
  assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4169
23
         "Unexpected source/return type pair.");
4170
23
  assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4171
23
          SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4172
23
         "Unexpected source value type.");
4173
23
  assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4174
23
          RetVT == MVT::i64) && "Unexpected return value type.");
4175
23
4176
23
  bool Is64Bit = (RetVT == MVT::i64);
4177
23
  unsigned RegSize = Is64Bit ? 
6410
:
3213
;
4178
23
  unsigned DstBits = RetVT.getSizeInBits();
4179
23
  unsigned SrcBits = SrcVT.getSizeInBits();
4180
23
  const TargetRegisterClass *RC =
4181
23
      Is64Bit ? 
&AArch64::GPR64RegClass10
:
&AArch64::GPR32RegClass13
;
4182
23
4183
23
  // Just emit a copy for "zero" shifts.
4184
23
  if (Shift == 0) {
4185
2
    if (RetVT == SrcVT) {
4186
1
      unsigned ResultReg = createResultReg(RC);
4187
1
      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4188
1
              TII.get(TargetOpcode::COPY), ResultReg)
4189
1
      .addReg(Op0, getKillRegState(Op0IsKill));
4190
1
      return ResultReg;
4191
1
    } else
4192
1
      return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4193
21
  }
4194
21
4195
21
  // Don't deal with undefined shifts.
4196
21
  if (Shift >= DstBits)
4197
0
    return 0;
4198
21
4199
21
  // For immediate shifts we can fold the zero-/sign-extension into the shift.
4200
21
  // {S|U}BFM Wd, Wn, #r, #s
4201
21
  // Wd<s-r:0> = Wn<s:r> when r <= s
4202
21
4203
21
  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4204
21
  // %2 = lshr i16 %1, 4
4205
21
  // Wd<7-4:0> = Wn<7:4>
4206
21
  // 0b0000_0000_0000_0000__0000_1111_1111_1010 sext
4207
21
  // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
4208
21
  // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
4209
21
4210
21
  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4211
21
  // %2 = lshr i16 %1, 8
4212
21
  // Wd<7-7,0> = Wn<7:7>
4213
21
  // 0b0000_0000_0000_0000__0000_0000_1111_1111 sext
4214
21
  // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4215
21
  // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4216
21
4217
21
  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4218
21
  // %2 = lshr i16 %1, 12
4219
21
  // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
4220
21
  // 0b0000_0000_0000_0000__0000_0000_0000_1111 sext
4221
21
  // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4222
21
  // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4223
21
4224
21
  if (Shift >= SrcBits && 
IsZExt5
)
4225
3
    return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
4226
18
4227
18
  // It is not possible to fold a sign-extend into the LShr instruction. In this
4228
18
  // case emit a sign-extend.
4229
18
  if (!IsZExt) {
4230
4
    Op0 = emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4231
4
    if (!Op0)
4232
0
      return 0;
4233
4
    Op0IsKill = true;
4234
4
    SrcVT = RetVT;
4235
4
    SrcBits = SrcVT.getSizeInBits();
4236
4
    IsZExt = true;
4237
4
  }
4238
18
4239
18
  unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
4240
18
  unsigned ImmS = SrcBits - 1;
4241
18
  static const unsigned OpcTable[2][2] = {
4242
18
    {AArch64::SBFMWri, AArch64::SBFMXri},
4243
18
    {AArch64::UBFMWri, AArch64::UBFMXri}
4244
18
  };
4245
18
  unsigned Opc = OpcTable[IsZExt][Is64Bit];
4246
18
  if (SrcVT.SimpleTy <= MVT::i32 && 
RetVT == MVT::i6410
) {
4247
0
    unsigned TmpReg = MRI.createVirtualRegister(RC);
4248
0
    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4249
0
            TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4250
0
        .addImm(0)
4251
0
        .addReg(Op0, getKillRegState(Op0IsKill))
4252
0
        .addImm(AArch64::sub_32);
4253
0
    Op0 = TmpReg;
4254
0
    Op0IsKill = true;
4255
0
  }
4256
18
  return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
4257
18
}
4258
4259
unsigned AArch64FastISel::emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
4260
4
                                     unsigned Op1Reg, bool Op1IsKill) {
4261
4
  unsigned Opc = 0;
4262
4
  bool NeedTrunc = false;
4263
4
  uint64_t Mask = 0;
4264
4
  switch (RetVT.SimpleTy) {
4265
4
  
default: return 00
;
4266
4
  
case MVT::i8: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xff; break1
;
4267
4
  
case MVT::i16: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xffff; break1
;
4268
4
  
case MVT::i32: Opc = AArch64::ASRVWr; break1
;
4269
4
  
case MVT::i64: Opc = AArch64::ASRVXr; break1
;
4270
4
  }
4271
4
4272
4
  const TargetRegisterClass *RC =
4273
4
      (RetVT == MVT::i64) ? 
&AArch64::GPR64RegClass1
:
&AArch64::GPR32RegClass3
;
4274
4
  if (NeedTrunc) {
4275
2
    Op0Reg = emitIntExt(RetVT, Op0Reg, MVT::i32, /*isZExt=*/false);
4276
2
    Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
4277
2
    Op0IsKill = Op1IsKill = true;
4278
2
  }
4279
4
  unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
4280
4
                                       Op1IsKill);
4281
4
  if (NeedTrunc)
4282
2
    ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
4283
4
  return ResultReg;
4284
4
}
4285
4286
unsigned AArch64FastISel::emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4287
                                     bool Op0IsKill, uint64_t Shift,
4288
28
                                     bool IsZExt) {
4289
28
  assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4290
28
         "Unexpected source/return type pair.");
4291
28
  assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4292
28
          SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4293
28
         "Unexpected source value type.");
4294
28
  assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4295
28
          RetVT == MVT::i64) && "Unexpected return value type.");
4296
28
4297
28
  bool Is64Bit = (RetVT == MVT::i64);
4298
28
  unsigned RegSize = Is64Bit ? 
6411
:
3217
;
4299
28
  unsigned DstBits = RetVT.getSizeInBits();
4300
28
  unsigned SrcBits = SrcVT.getSizeInBits();
4301
28
  const TargetRegisterClass *RC =
4302
28
      Is64Bit ? 
&AArch64::GPR64RegClass11
:
&AArch64::GPR32RegClass17
;
4303
28
4304
28
  // Just emit a copy for "zero" shifts.
4305
28
  if (Shift == 0) {
4306
2
    if (RetVT == SrcVT) {
4307
1
      unsigned ResultReg = createResultReg(RC);
4308
1
      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4309
1
              TII.get(TargetOpcode::COPY), ResultReg)
4310
1
      .addReg(Op0, getKillRegState(Op0IsKill));
4311
1
      return ResultReg;
4312
1
    } else
4313
1
      return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4314
26
  }
4315
26
4316
26
  // Don't deal with undefined shifts.
4317
26
  if (Shift >= DstBits)
4318
0
    return 0;
4319
26
4320
26
  // For immediate shifts we can fold the zero-/sign-extension into the shift.
4321
26
  // {S|U}BFM Wd, Wn, #r, #s
4322
26
  // Wd<s-r:0> = Wn<s:r> when r <= s
4323
26
4324
26
  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4325
26
  // %2 = ashr i16 %1, 4
4326
26
  // Wd<7-4:0> = Wn<7:4>
4327
26
  // 0b1111_1111_1111_1111__1111_1111_1111_1010 sext
4328
26
  // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
4329
26
  // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
4330
26
4331
26
  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4332
26
  // %2 = ashr i16 %1, 8
4333
26
  // Wd<7-7,0> = Wn<7:7>
4334
26
  // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
4335
26
  // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4336
26
  // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4337
26
4338
26
  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4339
26
  // %2 = ashr i16 %1, 12
4340
26
  // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
4341
26
  // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
4342
26
  // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4343
26
  // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4344
26
4345
26
  if (Shift >= SrcBits && 
IsZExt6
)
4346
3
    return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
4347
23
4348
23
  unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
4349
23
  unsigned ImmS = SrcBits - 1;
4350
23
  static const unsigned OpcTable[2][2] = {
4351
23
    {AArch64::SBFMWri, AArch64::SBFMXri},
4352
23
    {AArch64::UBFMWri, AArch64::UBFMXri}
4353
23
  };
4354
23
  unsigned Opc = OpcTable[IsZExt][Is64Bit];
4355
23
  if (SrcVT.SimpleTy <= MVT::i32 && 
RetVT == MVT::i6415
) {
4356
1
    unsigned TmpReg = MRI.createVirtualRegister(RC);
4357
1
    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4358
1
            TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4359
1
        .addImm(0)
4360
1
        .addReg(Op0, getKillRegState(Op0IsKill))
4361
1
        .addImm(AArch64::sub_32);
4362
1
    Op0 = TmpReg;
4363
1
    Op0IsKill = true;
4364
1
  }
4365
23
  return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
4366
23
}
4367
4368
unsigned AArch64FastISel::emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
4369
415
                                     bool IsZExt) {
4370
415
  assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?");
4371
415
4372
415
  // FastISel does not have plumbing to deal with extensions where the SrcVT or
4373
415
  // DestVT are odd things, so test to make sure that they are both types we can
4374
415
  // handle (i1/i8/i16/i32 for SrcVT and i8/i16/i32/i64 for DestVT), otherwise
4375
415
  // bail out to SelectionDAG.
4376
415
  if (((DestVT != MVT::i8) && 
(DestVT != MVT::i16)414
&&
4377
415
       
(DestVT != MVT::i32)409
&&
(DestVT != MVT::i64)61
) ||
4378
415
      ((SrcVT !=  MVT::i1) && 
(SrcVT != MVT::i8)274
&&
4379
415
       
(SrcVT != MVT::i16)128
&&
(SrcVT != MVT::i32)31
))
4380
0
    return 0;
4381
415
4382
415
  unsigned Opc;
4383
415
  unsigned Imm = 0;
4384
415
4385
415
  switch (SrcVT.SimpleTy) {
4386
415
  default:
4387
0
    return 0;
4388
415
  case MVT::i1:
4389
141
    return emiti1Ext(SrcReg, DestVT, IsZExt);
4390
415
  case MVT::i8:
4391
146
    if (DestVT == MVT::i64)
4392
15
      Opc = IsZExt ? 
AArch64::UBFMXri7
:
AArch64::SBFMXri8
;
4393
131
    else
4394
131
      Opc = IsZExt ? 
AArch64::UBFMWri103
:
AArch64::SBFMWri28
;
4395
146
    Imm = 7;
4396
146
    break;
4397
415
  case MVT::i16:
4398
97
    if (DestVT == MVT::i64)
4399
15
      Opc = IsZExt ? 
AArch64::UBFMXri7
:
AArch64::SBFMXri8
;
4400
82
    else
4401
82
      Opc = IsZExt ? 
AArch64::UBFMWri50
:
AArch64::SBFMWri32
;
4402
97
    Imm = 15;
4403
97
    break;
4404
415
  case MVT::i32:
4405
31
    assert(DestVT == MVT::i64 && "IntExt i32 to i32?!?");
4406
31
    Opc = IsZExt ? 
AArch64::UBFMXri19
:
AArch64::SBFMXri12
;
4407
31
    Imm = 31;
4408
31
    break;
4409
274
  }
4410
274
4411
274
  // Handle i8 and i16 as i32.
4412
274
  if (DestVT == MVT::i8 || DestVT == MVT::i16)
4413
3
    DestVT = MVT::i32;
4414
271
  else if (DestVT == MVT::i64) {
4415
61
    unsigned Src64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
4416
61
    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4417
61
            TII.get(AArch64::SUBREG_TO_REG), Src64)
4418
61
        .addImm(0)
4419
61
        .addReg(SrcReg)
4420
61
        .addImm(AArch64::sub_32);
4421
61
    SrcReg = Src64;
4422
61
  }
4423
274
4424
274
  const TargetRegisterClass *RC =
4425
274
      (DestVT == MVT::i64) ? 
&AArch64::GPR64RegClass61
:
&AArch64::GPR32RegClass213
;
4426
274
  return fastEmitInst_rii(Opc, RC, SrcReg, /*TODO:IsKill=*/false, 0, Imm);
4427
274
}
4428
4429
1
static bool isZExtLoad(const MachineInstr *LI) {
4430
1
  switch (LI->getOpcode()) {
4431
1
  default:
4432
0
    return false;
4433
1
  case AArch64::LDURBBi:
4434
1
  case AArch64::LDURHHi:
4435
1
  case AArch64::LDURWi:
4436
1
  case AArch64::LDRBBui:
4437
1
  case AArch64::LDRHHui:
4438
1
  case AArch64::LDRWui:
4439
1
  case AArch64::LDRBBroX:
4440
1
  case AArch64::LDRHHroX:
4441
1
  case AArch64::LDRWroX:
4442
1
  case AArch64::LDRBBroW:
4443
1
  case AArch64::LDRHHroW:
4444
1
  case AArch64::LDRWroW:
4445
1
    return true;
4446
1
  }
4447
1
}
4448
4449
static bool isSExtLoad(const MachineInstr *LI) {
4450
  switch (LI->getOpcode()) {
4451
  default:
4452
    return false;
4453
  case AArch64::LDURSBWi:
4454
  case AArch64::LDURSHWi:
4455
  case AArch64::LDURSBXi:
4456
  case AArch64::LDURSHXi:
4457
  case AArch64::LDURSWi:
4458
  case AArch64::LDRSBWui:
4459
  case AArch64::LDRSHWui:
4460
  case AArch64::LDRSBXui:
4461
  case AArch64::LDRSHXui:
4462
  case AArch64::LDRSWui:
4463
  case AArch64::LDRSBWroX:
4464
  case AArch64::LDRSHWroX:
4465
  case AArch64::LDRSBXroX:
4466
  case AArch64::LDRSHXroX:
4467
  case AArch64::LDRSWroX:
4468
  case AArch64::LDRSBWroW:
4469
  case AArch64::LDRSHWroW:
4470
  case AArch64::LDRSBXroW:
4471
  case AArch64::LDRSHXroW:
4472
  case AArch64::LDRSWroW:
4473
    return true;
4474
  }
4475
}
4476
4477
bool AArch64FastISel::optimizeIntExtLoad(const Instruction *I, MVT RetVT,
4478
199
                                         MVT SrcVT) {
4479
199
  const auto *LI = dyn_cast<LoadInst>(I->getOperand(0));
4480
199
  if (!LI || 
!LI->hasOneUse()89
)
4481
110
    return false;
4482
89
4483
89
  // Check if the load instruction has already been selected.
4484
89
  unsigned Reg = lookUpRegForValue(LI);
4485
89
  if (!Reg)
4486
88
    return false;
4487
1
4488
1
  MachineInstr *MI = MRI.getUniqueVRegDef(Reg);
4489
1
  if (!MI)
4490
0
    return false;
4491
1
4492
1
  // Check if the correct load instruction has been emitted - SelectionDAG might
4493
1
  // have emitted a zero-extending load, but we need a sign-extending load.
4494
1
  bool IsZExt = isa<ZExtInst>(I);
4495
1
  const auto *LoadMI = MI;
4496
1
  if (LoadMI->getOpcode() == TargetOpcode::COPY &&
4497
1
      
LoadMI->getOperand(1).getSubReg() == AArch64::sub_320
) {
4498
0
    unsigned LoadReg = MI->getOperand(1).getReg();
4499
0
    LoadMI = MRI.getUniqueVRegDef(LoadReg);
4500
0
    assert(LoadMI && "Expected valid instruction");
4501
0
  }
4502
1
  if (!(IsZExt && isZExtLoad(LoadMI)) && 
!(0
!IsZExt0
&&
isSExtLoad(LoadMI)0
))
4503
0
    return false;
4504
1
4505
1
  // Nothing to be done.
4506
1
  if (RetVT != MVT::i64 || SrcVT > MVT::i32) {
4507
0
    updateValueMap(I, Reg);
4508
0
    return true;
4509
0
  }
4510
1
4511
1
  if (IsZExt) {
4512
1
    unsigned Reg64 = createResultReg(&AArch64::GPR64RegClass);
4513
1
    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4514
1
            TII.get(AArch64::SUBREG_TO_REG), Reg64)
4515
1
        .addImm(0)
4516
1
        .addReg(Reg, getKillRegState(true))
4517
1
        .addImm(AArch64::sub_32);
4518
1
    Reg = Reg64;
4519
1
  } else {
4520
0
    assert((MI->getOpcode() == TargetOpcode::COPY &&
4521
0
            MI->getOperand(1).getSubReg() == AArch64::sub_32) &&
4522
0
           "Expected copy instruction");
4523
0
    Reg = MI->getOperand(1).getReg();
4524
0
    MachineBasicBlock::iterator I(MI);
4525
0
    removeDeadCode(I, std::next(I));
4526
0
  }
4527
1
  updateValueMap(I, Reg);
4528
1
  return true;
4529
1
}
4530
4531
206
bool AArch64FastISel::selectIntExt(const Instruction *I) {
4532
206
  assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
4533
206
         "Unexpected integer extend instruction.");
4534
206
  MVT RetVT;
4535
206
  MVT SrcVT;
4536
206
  if (!isTypeSupported(I->getType(), RetVT))
4537
7
    return false;
4538
199
4539
199
  if (!isTypeSupported(I->getOperand(0)->getType(), SrcVT))
4540
0
    return false;
4541
199
4542
199
  // Try to optimize already sign-/zero-extended values from load instructions.
4543
199
  if (optimizeIntExtLoad(I, RetVT, SrcVT))
4544
1
    return true;
4545
198
4546
198
  unsigned SrcReg = getRegForValue(I->getOperand(0));
4547
198
  if (!SrcReg)
4548
0
    return false;
4549
198
  bool SrcIsKill = hasTrivialKill(I->getOperand(0));
4550
198
4551
198
  // Try to optimize already sign-/zero-extended values from function arguments.
4552
198
  bool IsZExt = isa<ZExtInst>(I);
4553
198
  if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0))) {
4554
80
    if ((IsZExt && 
Arg->hasZExtAttr()38
) ||
(49
!IsZExt49
&&
Arg->hasSExtAttr()42
)) {
4555
66
      if (RetVT == MVT::i64 && 
SrcVT != MVT::i6413
) {
4556
13
        unsigned ResultReg = createResultReg(&AArch64::GPR64RegClass);
4557
13
        BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4558
13
                TII.get(AArch64::SUBREG_TO_REG), ResultReg)
4559
13
            .addImm(0)
4560
13
            .addReg(SrcReg, getKillRegState(SrcIsKill))
4561
13
            .addImm(AArch64::sub_32);
4562
13
        SrcReg = ResultReg;
4563
13
      }
4564
66
      // Conservatively clear all kill flags from all uses, because we are
4565
66
      // replacing a sign-/zero-extend instruction at IR level with a nop at MI
4566
66
      // level. The result of the instruction at IR level might have been
4567
66
      // trivially dead, which is now not longer true.
4568
66
      unsigned UseReg = lookUpRegForValue(I);
4569
66
      if (UseReg)
4570
66
        MRI.clearKillFlags(UseReg);
4571
66
4572
66
      updateValueMap(I, SrcReg);
4573
66
      return true;
4574
66
    }
4575
132
  }
4576
132
4577
132
  unsigned ResultReg = emitIntExt(SrcVT, SrcReg, RetVT, IsZExt);
4578
132
  if (!ResultReg)
4579
0
    return false;
4580
132
4581
132
  updateValueMap(I, ResultReg);
4582
132
  return true;
4583
132
}
4584
4585
8
bool AArch64FastISel::selectRem(const Instruction *I, unsigned ISDOpcode) {
4586
8
  EVT DestEVT = TLI.getValueType(DL, I->getType(), true);
4587
8
  if (!DestEVT.isSimple())
4588
0
    return false;
4589
8
4590
8
  MVT DestVT = DestEVT.getSimpleVT();
4591
8
  if (DestVT != MVT::i64 && 
DestVT != MVT::i324
)
4592
0
    return false;
4593
8
4594
8
  unsigned DivOpc;
4595
8
  bool Is64bit = (DestVT == MVT::i64);
4596
8
  switch (ISDOpcode) {
4597
8
  default:
4598
0
    return false;
4599
8
  case ISD::SREM:
4600
4
    DivOpc = Is64bit ? 
AArch64::SDIVXr2
:
AArch64::SDIVWr2
;
4601
4
    break;
4602
8
  case ISD::UREM:
4603
4
    DivOpc = Is64bit ? 
AArch64::UDIVXr2
:
AArch64::UDIVWr2
;
4604
4
    break;
4605
8
  }
4606
8
  unsigned MSubOpc = Is64bit ? 
AArch64::MSUBXrrr4
:
AArch64::MSUBWrrr4
;
4607
8
  unsigned Src0Reg = getRegForValue(I->getOperand(0));
4608
8
  if (!Src0Reg)
4609
0
    return false;
4610
8
  bool Src0IsKill = hasTrivialKill(I->getOperand(0));
4611
8
4612
8
  unsigned Src1Reg = getRegForValue(I->getOperand(1));
4613
8
  if (!Src1Reg)
4614
0
    return false;
4615
8
  bool Src1IsKill = hasTrivialKill(I->getOperand(1));
4616
8
4617
8
  const TargetRegisterClass *RC =
4618
8
      (DestVT == MVT::i64) ? 
&AArch64::GPR64RegClass4
:
&AArch64::GPR32RegClass4
;
4619
8
  unsigned QuotReg = fastEmitInst_rr(DivOpc, RC, Src0Reg, /*IsKill=*/false,
4620
8
                                     Src1Reg, /*IsKill=*/false);
4621
8
  assert(QuotReg && "Unexpected DIV instruction emission failure.");
4622
8
  // The remainder is computed as numerator - (quotient * denominator) using the
4623
8
  // MSUB instruction.
4624
8
  unsigned ResultReg = fastEmitInst_rrr(MSubOpc, RC, QuotReg, /*IsKill=*/true,
4625
8
                                        Src1Reg, Src1IsKill, Src0Reg,
4626
8
                                        Src0IsKill);
4627
8
  updateValueMap(I, ResultReg);
4628
8
  return true;
4629
8
}
4630
4631
10
bool AArch64FastISel::selectMul(const Instruction *I) {
4632
10
  MVT VT;
4633
10
  if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
4634
0
    return false;
4635
10
4636
10
  if (VT.isVector())
4637
0
    return selectBinaryOp(I, ISD::MUL);
4638
10
4639
10
  const Value *Src0 = I->getOperand(0);
4640
10
  const Value *Src1 = I->getOperand(1);
4641
10
  if (const auto *C = dyn_cast<ConstantInt>(Src0))
4642
0
    if (C->getValue().isPowerOf2())
4643
0
      std::swap(Src0, Src1);
4644
10
4645
10
  // Try to simplify to a shift instruction.
4646
10
  if (const auto *C = dyn_cast<ConstantInt>(Src1))
4647
4
    if (C->getValue().isPowerOf2()) {
4648
2
      uint64_t ShiftVal = C->getValue().logBase2();
4649
2
      MVT SrcVT = VT;
4650
2
      bool IsZExt = true;
4651
2
      if (const auto *ZExt = dyn_cast<ZExtInst>(Src0)) {
4652
0
        if (!isIntExtFree(ZExt)) {
4653
0
          MVT VT;
4654
0
          if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), VT)) {
4655
0
            SrcVT = VT;
4656
0
            IsZExt = true;
4657
0
            Src0 = ZExt->getOperand(0);
4658
0
          }
4659
0
        }
4660
2
      } else if (const auto *SExt = dyn_cast<SExtInst>(Src0)) {
4661
0
        if (!isIntExtFree(SExt)) {
4662
0
          MVT VT;
4663
0
          if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), VT)) {
4664
0
            SrcVT = VT;
4665
0
            IsZExt = false;
4666
0
            Src0 = SExt->getOperand(0);
4667
0
          }
4668
0
        }
4669
0
      }
4670
2
4671
2
      unsigned Src0Reg = getRegForValue(Src0);
4672
2
      if (!Src0Reg)
4673
0
        return false;
4674
2
      bool Src0IsKill = hasTrivialKill(Src0);
4675
2
4676
2
      unsigned ResultReg =
4677
2
          emitLSL_ri(VT, SrcVT, Src0Reg, Src0IsKill, ShiftVal, IsZExt);
4678
2
4679
2
      if (ResultReg) {
4680
2
        updateValueMap(I, ResultReg);
4681
2
        return true;
4682
2
      }
4683
8
    }
4684
8
4685
8
  unsigned Src0Reg = getRegForValue(I->getOperand(0));
4686
8
  if (!Src0Reg)
4687
0
    return false;
4688
8
  bool Src0IsKill = hasTrivialKill(I->getOperand(0));
4689
8
4690
8
  unsigned Src1Reg = getRegForValue(I->getOperand(1));
4691
8
  if (!Src1Reg)
4692
0
    return false;
4693
8
  bool Src1IsKill = hasTrivialKill(I->getOperand(1));
4694
8
4695
8
  unsigned ResultReg = emitMul_rr(VT, Src0Reg, Src0IsKill, Src1Reg, Src1IsKill);
4696
8
4697
8
  if (!ResultReg)
4698
0
    return false;
4699
8
4700
8
  updateValueMap(I, ResultReg);
4701
8
  return true;
4702
8
}
4703
4704
98
bool AArch64FastISel::selectShift(const Instruction *I) {
4705
98
  MVT RetVT;
4706
98
  if (!isTypeSupported(I->getType(), RetVT, /*IsVectorAllowed=*/true))
4707
0
    return false;
4708
98
4709
98
  if (RetVT.isVector())
4710
0
    return selectOperator(I, I->getOpcode());
4711
98
4712
98
  if (const auto *C = dyn_cast<ConstantInt>(I->getOperand(1))) {
4713
86
    unsigned ResultReg = 0;
4714
86
    uint64_t ShiftVal = C->getZExtValue();
4715
86
    MVT SrcVT = RetVT;
4716
86
    bool IsZExt = I->getOpcode() != Instruction::AShr;
4717
86
    const Value *Op0 = I->getOperand(0);
4718
86
    if (const auto *ZExt = dyn_cast<ZExtInst>(Op0)) {
4719
22
      if (!isIntExtFree(ZExt)) {
4720
22
        MVT TmpVT;
4721
22
        if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), TmpVT)) {
4722
22
          SrcVT = TmpVT;
4723
22
          IsZExt = true;
4724
22
          Op0 = ZExt->getOperand(0);
4725
22
        }
4726
22
      }
4727
64
    } else if (const auto *SExt = dyn_cast<SExtInst>(Op0)) {
4728
19
      if (!isIntExtFree(SExt)) {
4729
19
        MVT TmpVT;
4730
19
        if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), TmpVT)) {
4731
19
          SrcVT = TmpVT;
4732
19
          IsZExt = false;
4733
19
          Op0 = SExt->getOperand(0);
4734
19
        }
4735
19
      }
4736
19
    }
4737
86
4738
86
    unsigned Op0Reg = getRegForValue(Op0);
4739
86
    if (!Op0Reg)
4740
0
      return false;
4741
86
    bool Op0IsKill = hasTrivialKill(Op0);
4742
86
4743
86
    switch (I->getOpcode()) {
4744
86
    
default: 0
llvm_unreachable0
("Unexpected instruction.");
4745
86
    case Instruction::Shl:
4746
48
      ResultReg = emitLSL_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
4747
48
      break;
4748
86
    case Instruction::AShr:
4749
18
      ResultReg = emitASR_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
4750
18
      break;
4751
86
    case Instruction::LShr:
4752
20
      ResultReg = emitLSR_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
4753
20
      break;
4754
86
    }
4755
86
    if (!ResultReg)
4756
14
      return false;
4757
72
4758
72
    updateValueMap(I, ResultReg);
4759
72
    return true;
4760
72
  }
4761
12
4762
12
  unsigned Op0Reg = getRegForValue(I->getOperand(0));
4763
12
  if (!Op0Reg)
4764
0
    return false;
4765
12
  bool Op0IsKill = hasTrivialKill(I->getOperand(0));
4766
12
4767
12
  unsigned Op1Reg = getRegForValue(I->getOperand(1));
4768
12
  if (!Op1Reg)
4769
0
    return false;
4770
12
  bool Op1IsKill = hasTrivialKill(I->getOperand(1));
4771
12
4772
12
  unsigned ResultReg = 0;
4773
12
  switch (I->getOpcode()) {
4774
12
  
default: 0
llvm_unreachable0
("Unexpected instruction.");
4775
12
  case Instruction::Shl:
4776
4
    ResultReg = emitLSL_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
4777
4
    break;
4778
12
  case Instruction::AShr:
4779
4
    ResultReg = emitASR_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
4780
4
    break;
4781
12
  case Instruction::LShr:
4782
4
    ResultReg = emitLSR_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
4783
4
    break;
4784
12
  }
4785
12
4786
12
  if (!ResultReg)
4787
0
    return false;
4788
12
4789
12
  updateValueMap(I, ResultReg);
4790
12
  return true;
4791
12
}
4792
4793
23
bool AArch64FastISel::selectBitCast(const Instruction *I) {
4794
23
  MVT RetVT, SrcVT;
4795
23
4796
23
  if (!isTypeLegal(I->getOperand(0)->getType(), SrcVT))
4797
0
    return false;
4798
23
  if (!isTypeLegal(I->getType(), RetVT))
4799
0
    return false;
4800
23
4801
23
  unsigned Opc;
4802
23
  if (RetVT == MVT::f32 && 
SrcVT == MVT::i321
)
4803
1
    Opc = AArch64::FMOVWSr;
4804
22
  else if (RetVT == MVT::f64 && 
SrcVT == MVT::i645
)
4805
3
    Opc = AArch64::FMOVXDr;
4806
19
  else if (RetVT == MVT::i32 && 
SrcVT == MVT::f321
)
4807
1
    Opc = AArch64::FMOVSWr;
4808
18
  else if (RetVT == MVT::i64 && 
SrcVT == MVT::f6414
)
4809
3
    Opc = AArch64::FMOVDXr;
4810
15
  else
4811
15
    return false;
4812
8
4813
8
  const TargetRegisterClass *RC = nullptr;
4814
8
  switch (RetVT.SimpleTy) {
4815
8
  
default: 0
llvm_unreachable0
("Unexpected value type.");
4816
8
  
case MVT::i32: RC = &AArch64::GPR32RegClass; break1
;
4817
8
  
case MVT::i64: RC = &AArch64::GPR64RegClass; break3
;
4818
8
  
case MVT::f32: RC = &AArch64::FPR32RegClass; break1
;
4819
8
  
case MVT::f64: RC = &AArch64::FPR64RegClass; break3
;
4820
8
  }
4821
8
  unsigned Op0Reg = getRegForValue(I->getOperand(0));
4822
8
  if (!Op0Reg)
4823
0
    return false;
4824
8
  bool Op0IsKill = hasTrivialKill(I->getOperand(0));
4825
8
  unsigned ResultReg = fastEmitInst_r(Opc, RC, Op0Reg, Op0IsKill);
4826
8
4827
8
  if (!ResultReg)
4828
0
    return false;
4829
8
4830
8
  updateValueMap(I, ResultReg);
4831
8
  return true;
4832
8
}
4833
4834
4
bool AArch64FastISel::selectFRem(const Instruction *I) {
4835
4
  MVT RetVT;
4836
4
  if (!isTypeLegal(I->getType(), RetVT))
4837
0
    return false;
4838
4
4839
4
  RTLIB::Libcall LC;
4840
4
  switch (RetVT.SimpleTy) {
4841
4
  default:
4842
0
    return false;
4843
4
  case MVT::f32:
4844
2
    LC = RTLIB::REM_F32;
4845
2
    break;
4846
4
  case MVT::f64:
4847
2
    LC = RTLIB::REM_F64;
4848
2
    break;
4849
4
  }
4850
4
4851
4
  ArgListTy Args;
4852
4
  Args.reserve(I->getNumOperands());
4853
4
4854
4
  // Populate the argument list.
4855
8
  for (auto &Arg : I->operands()) {
4856
8
    ArgListEntry Entry;
4857
8
    Entry.Val = Arg;
4858
8
    Entry.Ty = Arg->getType();
4859
8
    Args.push_back(Entry);
4860
8
  }
4861
4
4862
4
  CallLoweringInfo CLI;
4863
4
  MCContext &Ctx = MF->getContext();
4864
4
  CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), I->getType(),
4865
4
                TLI.getLibcallName(LC), std::move(Args));
4866
4
  if (!lowerCallTo(CLI))
4867
0
    return false;
4868
4
  updateValueMap(I, CLI.ResultReg);
4869
4
  return true;
4870
4
}
4871
4872
26
bool AArch64FastISel::selectSDiv(const Instruction *I) {
4873
26
  MVT VT;
4874
26
  if (!isTypeLegal(I->getType(), VT))
4875
0
    return false;
4876
26
4877
26
  if (!isa<ConstantInt>(I->getOperand(1)))
4878
12
    return selectBinaryOp(I, ISD::SDIV);
4879
14
4880
14
  const APInt &C = cast<ConstantInt>(I->getOperand(1))->getValue();
4881
14
  if ((VT != MVT::i32 && 
VT != MVT::i648
) || !C ||
4882
14
      !(C.isPowerOf2() || 
(-C).isPowerOf2()4
))
4883
0
    return selectBinaryOp(I, ISD::SDIV);
4884
14
4885
14
  unsigned Lg2 = C.countTrailingZeros();
4886
14
  unsigned Src0Reg = getRegForValue(I->getOperand(0));
4887
14
  if (!Src0Reg)
4888
0
    return false;
4889
14
  bool Src0IsKill = hasTrivialKill(I->getOperand(0));
4890
14
4891
14
  if (cast<BinaryOperator>(I)->isExact()) {
4892
3
    unsigned ResultReg = emitASR_ri(VT, VT, Src0Reg, Src0IsKill, Lg2);
4893
3
    if (!ResultReg)
4894
0
      return false;
4895
3
    updateValueMap(I, ResultReg);
4896
3
    return true;
4897
3
  }
4898
11
4899
11
  int64_t Pow2MinusOne = (1ULL << Lg2) - 1;
4900
11
  unsigned AddReg = emitAdd_ri_(VT, Src0Reg, /*IsKill=*/false, Pow2MinusOne);
4901
11
  if (!AddReg)
4902
0
    return false;
4903
11
4904
11
  // (Src0 < 0) ? Pow2 - 1 : 0;
4905
11
  if (!emitICmp_ri(VT, Src0Reg, /*IsKill=*/false, 0))
4906
0
    return false;
4907
11
4908
11
  unsigned SelectOpc;
4909
11
  const TargetRegisterClass *RC;
4910
11
  if (VT == MVT::i64) {
4911
6
    SelectOpc = AArch64::CSELXr;
4912
6
    RC = &AArch64::GPR64RegClass;
4913
6
  } else {
4914
5
    SelectOpc = AArch64::CSELWr;
4915
5
    RC = &AArch64::GPR32RegClass;
4916
5
  }
4917
11
  unsigned SelectReg =
4918
11
      fastEmitInst_rri(SelectOpc, RC, AddReg, /*IsKill=*/true, Src0Reg,
4919
11
                       Src0IsKill, AArch64CC::LT);
4920
11
  if (!SelectReg)
4921
0
    return false;
4922
11
4923
11
  // Divide by Pow2 --> ashr. If we're dividing by a negative value we must also
4924
11
  // negate the result.
4925
11
  unsigned ZeroReg = (VT == MVT::i64) ? 
AArch64::XZR6
:
AArch64::WZR5
;
4926
11
  unsigned ResultReg;
4927
11
  if (C.isNegative())
4928
4
    ResultReg = emitAddSub_rs(/*UseAdd=*/false, VT, ZeroReg, /*IsKill=*/true,
4929
4
                              SelectReg, /*IsKill=*/true, AArch64_AM::ASR, Lg2);
4930
7
  else
4931
7
    ResultReg = emitASR_ri(VT, VT, SelectReg, /*IsKill=*/true, Lg2);
4932
11
4933
11
  if (!ResultReg)
4934
0
    return false;
4935
11
4936
11
  updateValueMap(I, ResultReg);
4937
11
  return true;
4938
11
}
4939
4940
/// This is mostly a copy of the existing FastISel getRegForGEPIndex code. We
4941
/// have to duplicate it for AArch64, because otherwise we would fail during the
4942
/// sign-extend emission.
4943
6
std::pair<unsigned, bool> AArch64FastISel::getRegForGEPIndex(const Value *Idx) {
4944
6
  unsigned IdxN = getRegForValue(Idx);
4945
6
  if (IdxN == 0)
4946
0
    // Unhandled operand. Halt "fast" selection and bail.
4947
0
    return std::pair<unsigned, bool>(0, false);
4948
6
4949
6
  bool IdxNIsKill = hasTrivialKill(Idx);
4950
6
4951
6
  // If the index is smaller or larger than intptr_t, truncate or extend it.
4952
6
  MVT PtrVT = TLI.getPointerTy(DL);
4953
6
  EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false);
4954
6
  if (IdxVT.bitsLT(PtrVT)) {
4955
1
    IdxN = emitIntExt(IdxVT.getSimpleVT(), IdxN, PtrVT, /*isZExt=*/false);
4956
1
    IdxNIsKill = true;
4957
5
  } else if (IdxVT.bitsGT(PtrVT))
4958
5
    
llvm_unreachable0
("AArch64 FastISel doesn't support types larger than i64");
4959
6
  return std::pair<unsigned, bool>(IdxN, IdxNIsKill);
4960
6
}
4961
4962
/// This is mostly a copy of the existing FastISel GEP code, but we have to
4963
/// duplicate it for AArch64, because otherwise we would bail out even for
4964
/// simple cases. This is because the standard fastEmit functions don't cover
4965
/// MUL at all and ADD is lowered very inefficientily.
4966
21
bool AArch64FastISel::selectGetElementPtr(const Instruction *I) {
4967
21
  unsigned N = getRegForValue(I->getOperand(0));
4968
21
  if (!N)
4969
0
    return false;
4970
21
  bool NIsKill = hasTrivialKill(I->getOperand(0));
4971
21
4972
21
  // Keep a running tab of the total offset to coalesce multiple N = N + Offset
4973
21
  // into a single N = N + TotalOffset.
4974
21
  uint64_t TotalOffs = 0;
4975
21
  MVT VT = TLI.getPointerTy(DL);
4976
21
  for (gep_type_iterator GTI = gep_type_begin(I), E = gep_type_end(I);
4977
47
       GTI != E; 
++GTI26
) {
4978
26
    const Value *Idx = GTI.getOperand();
4979
26
    if (auto *StTy = GTI.getStructTypeOrNull()) {
4980
4
      unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
4981
4
      // N = N + Offset
4982
4
      if (Field)
4983
2
        TotalOffs += DL.getStructLayout(StTy)->getElementOffset(Field);
4984
22
    } else {
4985
22
      Type *Ty = GTI.getIndexedType();
4986
22
4987
22
      // If this is a constant subscript, handle it quickly.
4988
22
      if (const auto *CI = dyn_cast<ConstantInt>(Idx)) {
4989
16
        if (CI->isZero())
4990
5
          continue;
4991
11
        // N = N + Offset
4992
11
        TotalOffs +=
4993
11
            DL.getTypeAllocSize(Ty) * cast<ConstantInt>(CI)->getSExtValue();
4994
11
        continue;
4995
11
      }
4996
6
      if (TotalOffs) {
4997
0
        N = emitAdd_ri_(VT, N, NIsKill, TotalOffs);
4998
0
        if (!N)
4999
0
          return false;
5000
0
        NIsKill = true;
5001
0
        TotalOffs = 0;
5002
0
      }
5003
6
5004
6
      // N = N + Idx * ElementSize;
5005
6
      uint64_t ElementSize = DL.getTypeAllocSize(Ty);
5006
6
      std::pair<unsigned, bool> Pair = getRegForGEPIndex(Idx);
5007
6
      unsigned IdxN = Pair.first;
5008
6
      bool IdxNIsKill = Pair.second;
5009
6
      if (!IdxN)
5010
0
        return false;
5011
6
5012
6
      if (ElementSize != 1) {
5013
4
        unsigned C = fastEmit_i(VT, VT, ISD::Constant, ElementSize);
5014
4
        if (!C)
5015
0
          return false;
5016
4
        IdxN = emitMul_rr(VT, IdxN, IdxNIsKill, C, true);
5017
4
        if (!IdxN)
5018
0
          return false;
5019
4
        IdxNIsKill = true;
5020
4
      }
5021
6
      N = fastEmit_rr(VT, VT, ISD::ADD, N, NIsKill, IdxN, IdxNIsKill);
5022
6
      if (!N)
5023
0
        return false;
5024
6
    }
5025
26
  }
5026
21
  if (TotalOffs) {
5027
13
    N = emitAdd_ri_(VT, N, NIsKill, TotalOffs);
5028
13
    if (!N)
5029
0
      return false;
5030
21
  }
5031
21
  updateValueMap(I, N);
5032
21
  return true;
5033
21
}
5034
5035
3
bool AArch64FastISel::selectAtomicCmpXchg(const AtomicCmpXchgInst *I) {
5036
3
  assert(TM.getOptLevel() == CodeGenOpt::None &&
5037
3
         "cmpxchg survived AtomicExpand at optlevel > -O0");
5038
3
5039
3
  auto *RetPairTy = cast<StructType>(I->getType());
5040
3
  Type *RetTy = RetPairTy->getTypeAtIndex(0U);
5041
3
  assert(RetPairTy->getTypeAtIndex(1U)->isIntegerTy(1) &&
5042
3
         "cmpxchg has a non-i1 status result");
5043
3
5044
3
  MVT VT;
5045
3
  if (!isTypeLegal(RetTy, VT))
5046
0
    return false;
5047
3
5048
3
  const TargetRegisterClass *ResRC;
5049
3
  unsigned Opc, CmpOpc;
5050
3
  // This only supports i32/i64, because i8/i16 aren't legal, and the generic
5051
3
  // extractvalue selection doesn't support that.
5052
3
  if (VT == MVT::i32) {
5053
2
    Opc = AArch64::CMP_SWAP_32;
5054
2
    CmpOpc = AArch64::SUBSWrs;
5055
2
    ResRC = &AArch64::GPR32RegClass;
5056
2
  } else 
if (1
VT == MVT::i641
) {
5057
1
    Opc = AArch64::CMP_SWAP_64;
5058
1
    CmpOpc = AArch64::SUBSXrs;
5059
1
    ResRC = &AArch64::GPR64RegClass;
5060
1
  } else {
5061
0
    return false;
5062
0
  }
5063
3
5064
3
  const MCInstrDesc &II = TII.get(Opc);
5065
3
5066
3
  const unsigned AddrReg = constrainOperandRegClass(
5067
3
      II, getRegForValue(I->getPointerOperand()), II.getNumDefs());
5068
3
  const unsigned DesiredReg = constrainOperandRegClass(
5069
3
      II, getRegForValue(I->getCompareOperand()), II.getNumDefs() + 1);
5070
3
  const unsigned NewReg = constrainOperandRegClass(
5071
3
      II, getRegForValue(I->getNewValOperand()), II.getNumDefs() + 2);
5072
3
5073
3
  const unsigned ResultReg1 = createResultReg(ResRC);
5074
3
  const unsigned ResultReg2 = createResultReg(&AArch64::GPR32RegClass);
5075
3
  const unsigned ScratchReg = createResultReg(&AArch64::GPR32RegClass);
5076
3
5077
3
  // FIXME: MachineMemOperand doesn't support cmpxchg yet.
5078
3
  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
5079
3
      .addDef(ResultReg1)
5080
3
      .addDef(ScratchReg)
5081
3
      .addUse(AddrReg)
5082
3
      .addUse(DesiredReg)
5083
3
      .addUse(NewReg);
5084
3
5085
3
  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc))
5086
3
      .addDef(VT == MVT::i32 ? 
AArch64::WZR2
:
AArch64::XZR1
)
5087
3
      .addUse(ResultReg1)
5088
3
      .addUse(DesiredReg)
5089
3
      .addImm(0);
5090
3
5091
3
  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr))
5092
3
      .addDef(ResultReg2)
5093
3
      .addUse(AArch64::WZR)
5094
3
      .addUse(AArch64::WZR)
5095
3
      .addImm(AArch64CC::NE);
5096
3
5097
3
  assert((ResultReg1 + 1) == ResultReg2 && "Nonconsecutive result registers.");
5098
3
  updateValueMap(I, ResultReg1, 2);
5099
3
  return true;
5100
3
}
5101
5102
4.03k
bool AArch64FastISel::fastSelectInstruction(const Instruction *I) {
5103
4.03k
  switch (I->getOpcode()) {
5104
4.03k
  default:
5105
603
    break;
5106
4.03k
  case Instruction::Add:
5107
284
  case Instruction::Sub:
5108
284
    return selectAddSub(I);
5109
284
  case Instruction::Mul:
5110
10
    return selectMul(I);
5111
284
  case Instruction::SDiv:
5112
26
    return selectSDiv(I);
5113
284
  case Instruction::SRem:
5114
4
    if (!selectBinaryOp(I, ISD::SREM))
5115
4
      return selectRem(I, ISD::SREM);
5116
0
    return true;
5117
4
  case Instruction::URem:
5118
4
    if (!selectBinaryOp(I, ISD::UREM))
5119
4
      return selectRem(I, ISD::UREM);
5120
0
    return true;
5121
98
  case Instruction::Shl:
5122
98
  case Instruction::LShr:
5123
98
  case Instruction::AShr:
5124
98
    return selectShift(I);
5125
98
  case Instruction::And:
5126
91
  case Instruction::Or:
5127
91
  case Instruction::Xor:
5128
91
    return selectLogicalOp(I);
5129
287
  case Instruction::Br:
5130
287
    return selectBranch(I);
5131
91
  case Instruction::IndirectBr:
5132
1
    return selectIndirectBr(I);
5133
91
  case Instruction::BitCast:
5134
56
    if (!FastISel::selectBitCast(I))
5135
23
      return selectBitCast(I);
5136
33
    return true;
5137
33
  case Instruction::FPToSI:
5138
4
    if (!selectCast(I, ISD::FP_TO_SINT))
5139
1
      return selectFPToInt(I, /*Signed=*/true);
5140
3
    return true;
5141
6
  case Instruction::FPToUI:
5142
6
    return selectFPToInt(I, /*Signed=*/false);
5143
206
  case Instruction::ZExt:
5144
206
  case Instruction::SExt:
5145
206
    return selectIntExt(I);
5146
206
  case Instruction::Trunc:
5147
26
    if (!selectCast(I, ISD::TRUNCATE))
5148
14
      return selectTrunc(I);
5149
12
    return true;
5150
12
  case Instruction::FPExt:
5151
5
    return selectFPExt(I);
5152
12
  case Instruction::FPTrunc:
5153
2
    return selectFPTrunc(I);
5154
17
  case Instruction::SIToFP:
5155
17
    if (!selectCast(I, ISD::SINT_TO_FP))
5156
9
      return selectIntToFP(I, /*Signed=*/true);
5157
8
    return true;
5158
13
  case Instruction::UIToFP:
5159
13
    return selectIntToFP(I, /*Signed=*/false);
5160
381
  case Instruction::Load:
5161
381
    return selectLoad(I);
5162
427
  case Instruction::Store:
5163
427
    return selectStore(I);
5164
57
  case Instruction::FCmp:
5165
57
  case Instruction::ICmp:
5166
57
    return selectCmp(I);
5167
57
  case Instruction::Select:
5168
53
    return selectSelect(I);
5169
1.34k
  case Instruction::Ret:
5170
1.34k
    return selectRet(I);
5171
57
  case Instruction::FRem:
5172
4
    return selectFRem(I);
5173
57
  case Instruction::GetElementPtr: