Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/Target/ARM/ARMFastISel.cpp
Line
Count
Source (jump to first uncovered line)
1
//===- ARMFastISel.cpp - ARM FastISel implementation ----------------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This file defines the ARM-specific support for the FastISel class. Some
10
// of the target-specific code is generated by tablegen in the file
11
// ARMGenFastISel.inc, which is #included here.
12
//
13
//===----------------------------------------------------------------------===//
14
15
#include "ARM.h"
16
#include "ARMBaseInstrInfo.h"
17
#include "ARMBaseRegisterInfo.h"
18
#include "ARMCallingConv.h"
19
#include "ARMConstantPoolValue.h"
20
#include "ARMISelLowering.h"
21
#include "ARMMachineFunctionInfo.h"
22
#include "ARMSubtarget.h"
23
#include "MCTargetDesc/ARMAddressingModes.h"
24
#include "MCTargetDesc/ARMBaseInfo.h"
25
#include "Utils/ARMBaseInfo.h"
26
#include "llvm/ADT/APFloat.h"
27
#include "llvm/ADT/APInt.h"
28
#include "llvm/ADT/DenseMap.h"
29
#include "llvm/ADT/SmallVector.h"
30
#include "llvm/CodeGen/CallingConvLower.h"
31
#include "llvm/CodeGen/FastISel.h"
32
#include "llvm/CodeGen/FunctionLoweringInfo.h"
33
#include "llvm/CodeGen/ISDOpcodes.h"
34
#include "llvm/CodeGen/MachineBasicBlock.h"
35
#include "llvm/CodeGen/MachineConstantPool.h"
36
#include "llvm/CodeGen/MachineFrameInfo.h"
37
#include "llvm/CodeGen/MachineFunction.h"
38
#include "llvm/CodeGen/MachineInstr.h"
39
#include "llvm/CodeGen/MachineInstrBuilder.h"
40
#include "llvm/CodeGen/MachineMemOperand.h"
41
#include "llvm/CodeGen/MachineOperand.h"
42
#include "llvm/CodeGen/MachineRegisterInfo.h"
43
#include "llvm/CodeGen/RuntimeLibcalls.h"
44
#include "llvm/CodeGen/TargetInstrInfo.h"
45
#include "llvm/CodeGen/TargetLowering.h"
46
#include "llvm/CodeGen/TargetOpcodes.h"
47
#include "llvm/CodeGen/TargetRegisterInfo.h"
48
#include "llvm/CodeGen/ValueTypes.h"
49
#include "llvm/IR/Argument.h"
50
#include "llvm/IR/Attributes.h"
51
#include "llvm/IR/CallSite.h"
52
#include "llvm/IR/CallingConv.h"
53
#include "llvm/IR/Constant.h"
54
#include "llvm/IR/Constants.h"
55
#include "llvm/IR/DataLayout.h"
56
#include "llvm/IR/DerivedTypes.h"
57
#include "llvm/IR/Function.h"
58
#include "llvm/IR/GetElementPtrTypeIterator.h"
59
#include "llvm/IR/GlobalValue.h"
60
#include "llvm/IR/GlobalVariable.h"
61
#include "llvm/IR/InstrTypes.h"
62
#include "llvm/IR/Instruction.h"
63
#include "llvm/IR/Instructions.h"
64
#include "llvm/IR/IntrinsicInst.h"
65
#include "llvm/IR/Intrinsics.h"
66
#include "llvm/IR/Module.h"
67
#include "llvm/IR/Operator.h"
68
#include "llvm/IR/Type.h"
69
#include "llvm/IR/User.h"
70
#include "llvm/IR/Value.h"
71
#include "llvm/MC/MCInstrDesc.h"
72
#include "llvm/MC/MCRegisterInfo.h"
73
#include "llvm/Support/Casting.h"
74
#include "llvm/Support/Compiler.h"
75
#include "llvm/Support/ErrorHandling.h"
76
#include "llvm/Support/MachineValueType.h"
77
#include "llvm/Support/MathExtras.h"
78
#include "llvm/Target/TargetMachine.h"
79
#include "llvm/Target/TargetOptions.h"
80
#include <cassert>
81
#include <cstdint>
82
#include <utility>
83
84
using namespace llvm;
85
86
namespace {
87
88
  // All possible address modes, plus some.
89
  struct Address {
90
    enum {
91
      RegBase,
92
      FrameIndexBase
93
    } BaseType = RegBase;
94
95
    union {
96
      unsigned Reg;
97
      int FI;
98
    } Base;
99
100
    int Offset = 0;
101
102
    // Innocuous defaults for our address.
103
1.54k
    Address() {
104
1.54k
      Base.Reg = 0;
105
1.54k
    }
106
  };
107
108
class ARMFastISel final : public FastISel {
109
  /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
110
  /// make the right decision when generating code for different targets.
111
  const ARMSubtarget *Subtarget;
112
  Module &M;
113
  const TargetMachine &TM;
114
  const TargetInstrInfo &TII;
115
  const TargetLowering &TLI;
116
  ARMFunctionInfo *AFI;
117
118
  // Convenience variables to avoid some queries.
119
  bool isThumb2;
120
  LLVMContext *Context;
121
122
  public:
123
    explicit ARMFastISel(FunctionLoweringInfo &funcInfo,
124
                         const TargetLibraryInfo *libInfo)
125
        : FastISel(funcInfo, libInfo),
126
          Subtarget(
127
              &static_cast<const ARMSubtarget &>(funcInfo.MF->getSubtarget())),
128
          M(const_cast<Module &>(*funcInfo.Fn->getParent())),
129
          TM(funcInfo.MF->getTarget()), TII(*Subtarget->getInstrInfo()),
130
912
          TLI(*Subtarget->getTargetLowering()) {
131
912
      AFI = funcInfo.MF->getInfo<ARMFunctionInfo>();
132
912
      isThumb2 = AFI->isThumbFunction();
133
912
      Context = &funcInfo.Fn->getContext();
134
912
    }
135
136
  private:
137
    // Code from FastISel.cpp.
138
139
    unsigned fastEmitInst_r(unsigned MachineInstOpcode,
140
                            const TargetRegisterClass *RC,
141
                            unsigned Op0, bool Op0IsKill);
142
    unsigned fastEmitInst_rr(unsigned MachineInstOpcode,
143
                             const TargetRegisterClass *RC,
144
                             unsigned Op0, bool Op0IsKill,
145
                             unsigned Op1, bool Op1IsKill);
146
    unsigned fastEmitInst_ri(unsigned MachineInstOpcode,
147
                             const TargetRegisterClass *RC,
148
                             unsigned Op0, bool Op0IsKill,
149
                             uint64_t Imm);
150
    unsigned fastEmitInst_i(unsigned MachineInstOpcode,
151
                            const TargetRegisterClass *RC,
152
                            uint64_t Imm);
153
154
    // Backend specific FastISel code.
155
156
    bool fastSelectInstruction(const Instruction *I) override;
157
    unsigned fastMaterializeConstant(const Constant *C) override;
158
    unsigned fastMaterializeAlloca(const AllocaInst *AI) override;
159
    bool tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
160
                             const LoadInst *LI) override;
161
    bool fastLowerArguments() override;
162
163
  #include "ARMGenFastISel.inc"
164
165
    // Instruction selection routines.
166
167
    bool SelectLoad(const Instruction *I);
168
    bool SelectStore(const Instruction *I);
169
    bool SelectBranch(const Instruction *I);
170
    bool SelectIndirectBr(const Instruction *I);
171
    bool SelectCmp(const Instruction *I);
172
    bool SelectFPExt(const Instruction *I);
173
    bool SelectFPTrunc(const Instruction *I);
174
    bool SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode);
175
    bool SelectBinaryFPOp(const Instruction *I, unsigned ISDOpcode);
176
    bool SelectIToFP(const Instruction *I, bool isSigned);
177
    bool SelectFPToI(const Instruction *I, bool isSigned);
178
    bool SelectDiv(const Instruction *I, bool isSigned);
179
    bool SelectRem(const Instruction *I, bool isSigned);
180
    bool SelectCall(const Instruction *I, const char *IntrMemName);
181
    bool SelectIntrinsicCall(const IntrinsicInst &I);
182
    bool SelectSelect(const Instruction *I);
183
    bool SelectRet(const Instruction *I);
184
    bool SelectTrunc(const Instruction *I);
185
    bool SelectIntExt(const Instruction *I);
186
    bool SelectShift(const Instruction *I, ARM_AM::ShiftOpc ShiftTy);
187
188
    // Utility routines.
189
190
    bool isPositionIndependent() const;
191
    bool isTypeLegal(Type *Ty, MVT &VT);
192
    bool isLoadTypeLegal(Type *Ty, MVT &VT);
193
    bool ARMEmitCmp(const Value *Src1Value, const Value *Src2Value,
194
                    bool isZExt, bool isEquality);
195
    bool ARMEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
196
                     unsigned Alignment = 0, bool isZExt = true,
197
                     bool allocReg = true);
198
    bool ARMEmitStore(MVT VT, unsigned SrcReg, Address &Addr,
199
                      unsigned Alignment = 0);
200
    bool ARMComputeAddress(const Value *Obj, Address &Addr);
201
    void ARMSimplifyAddress(Address &Addr, MVT VT, bool useAM3);
202
    bool ARMIsMemCpySmall(uint64_t Len);
203
    bool ARMTryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
204
                               unsigned Alignment);
205
    unsigned ARMEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt);
206
    unsigned ARMMaterializeFP(const ConstantFP *CFP, MVT VT);
207
    unsigned ARMMaterializeInt(const Constant *C, MVT VT);
208
    unsigned ARMMaterializeGV(const GlobalValue *GV, MVT VT);
209
    unsigned ARMMoveToFPReg(MVT VT, unsigned SrcReg);
210
    unsigned ARMMoveToIntReg(MVT VT, unsigned SrcReg);
211
    unsigned ARMSelectCallOp(bool UseReg);
212
    unsigned ARMLowerPICELF(const GlobalValue *GV, unsigned Align, MVT VT);
213
214
0
    const TargetLowering *getTargetLowering() { return &TLI; }
215
216
    // Call handling routines.
217
218
    CCAssignFn *CCAssignFnForCall(CallingConv::ID CC,
219
                                  bool Return,
220
                                  bool isVarArg);
221
    bool ProcessCallArgs(SmallVectorImpl<Value*> &Args,
222
                         SmallVectorImpl<unsigned> &ArgRegs,
223
                         SmallVectorImpl<MVT> &ArgVTs,
224
                         SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags,
225
                         SmallVectorImpl<unsigned> &RegArgs,
226
                         CallingConv::ID CC,
227
                         unsigned &NumBytes,
228
                         bool isVarArg);
229
    unsigned getLibcallReg(const Twine &Name);
230
    bool FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
231
                    const Instruction *I, CallingConv::ID CC,
232
                    unsigned &NumBytes, bool isVarArg);
233
    bool ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call);
234
235
    // OptionalDef handling routines.
236
237
    bool isARMNEONPred(const MachineInstr *MI);
238
    bool DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR);
239
    const MachineInstrBuilder &AddOptionalDefs(const MachineInstrBuilder &MIB);
240
    void AddLoadStoreOperands(MVT VT, Address &Addr,
241
                              const MachineInstrBuilder &MIB,
242
                              MachineMemOperand::Flags Flags, bool useAM3);
243
};
244
245
} // end anonymous namespace
246
247
// DefinesOptionalPredicate - This is different from DefinesPredicate in that
248
// we don't care about implicit defs here, just places we'll need to add a
249
// default CCReg argument. Sets CPSR if we're setting CPSR instead of CCR.
250
5.90k
bool ARMFastISel::DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR) {
251
5.90k
  if (!MI->hasOptionalDef())
252
5.06k
    return false;
253
841
254
841
  // Look to see if our OptionalDef is defining CPSR or CCR.
255
4.02k
  
for (const MachineOperand &MO : MI->operands())841
{
256
4.02k
    if (!MO.isReg() || 
!MO.isDef()2.28k
)
continue3.18k
;
257
841
    if (MO.getReg() == ARM::CPSR)
258
0
      *CPSR = true;
259
841
  }
260
841
  return true;
261
841
}
262
263
5.90k
bool ARMFastISel::isARMNEONPred(const MachineInstr *MI) {
264
5.90k
  const MCInstrDesc &MCID = MI->getDesc();
265
5.90k
266
5.90k
  // If we're a thumb2 or not NEON function we'll be handled via isPredicable.
267
5.90k
  if ((MCID.TSFlags & ARMII::DomainMask) != ARMII::DomainNEON ||
268
5.90k
       
AFI->isThumb2Function()2
)
269
5.90k
    return MI->isPredicable();
270
2
271
2
  for (const MCOperandInfo &opInfo : MCID.operands())
272
6
    if (opInfo.isPredicate())
273
2
      return true;
274
2
275
2
  
return false0
;
276
2
}
277
278
// If the machine is predicable go ahead and add the predicate operands, if
279
// it needs default CC operands add those.
280
// TODO: If we want to support thumb1 then we'll need to deal with optional
281
// CPSR defs that need to be added before the remaining operands. See s_cc_out
282
// for descriptions why.
283
const MachineInstrBuilder &
284
5.90k
ARMFastISel::AddOptionalDefs(const MachineInstrBuilder &MIB) {
285
5.90k
  MachineInstr *MI = &*MIB;
286
5.90k
287
5.90k
  // Do we use a predicate? or...
288
5.90k
  // Are we NEON in ARM mode and have a predicate operand? If so, I know
289
5.90k
  // we're not predicable but add it anyways.
290
5.90k
  if (isARMNEONPred(MI))
291
5.64k
    MIB.add(predOps(ARMCC::AL));
292
5.90k
293
5.90k
  // Do we optionally set a predicate?  Preds is size > 0 iff the predicate
294
5.90k
  // defines CPSR. All other OptionalDefines in ARM are the CCR register.
295
5.90k
  bool CPSR = false;
296
5.90k
  if (DefinesOptionalPredicate(MI, &CPSR))
297
841
    MIB.add(CPSR ? 
t1CondCodeOp()0
: condCodeOp());
298
5.90k
  return MIB;
299
5.90k
}
300
301
unsigned ARMFastISel::fastEmitInst_r(unsigned MachineInstOpcode,
302
                                     const TargetRegisterClass *RC,
303
83
                                     unsigned Op0, bool Op0IsKill) {
304
83
  unsigned ResultReg = createResultReg(RC);
305
83
  const MCInstrDesc &II = TII.get(MachineInstOpcode);
306
83
307
83
  // Make sure the input operand is sufficiently constrained to be legal
308
83
  // for this instruction.
309
83
  Op0 = constrainOperandRegClass(II, Op0, 1);
310
83
  if (II.getNumDefs() >= 1) {
311
83
    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II,
312
83
                            ResultReg).addReg(Op0, Op0IsKill * RegState::Kill));
313
83
  } else {
314
0
    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
315
0
                   .addReg(Op0, Op0IsKill * RegState::Kill));
316
0
    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
317
0
                   TII.get(TargetOpcode::COPY), ResultReg)
318
0
                   .addReg(II.ImplicitDefs[0]));
319
0
  }
320
83
  return ResultReg;
321
83
}
322
323
unsigned ARMFastISel::fastEmitInst_rr(unsigned MachineInstOpcode,
324
                                      const TargetRegisterClass *RC,
325
                                      unsigned Op0, bool Op0IsKill,
326
148
                                      unsigned Op1, bool Op1IsKill) {
327
148
  unsigned ResultReg = createResultReg(RC);
328
148
  const MCInstrDesc &II = TII.get(MachineInstOpcode);
329
148
330
148
  // Make sure the input operands are sufficiently constrained to be legal
331
148
  // for this instruction.
332
148
  Op0 = constrainOperandRegClass(II, Op0, 1);
333
148
  Op1 = constrainOperandRegClass(II, Op1, 2);
334
148
335
148
  if (II.getNumDefs() >= 1) {
336
148
    AddOptionalDefs(
337
148
        BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
338
148
            .addReg(Op0, Op0IsKill * RegState::Kill)
339
148
            .addReg(Op1, Op1IsKill * RegState::Kill));
340
148
  } else {
341
0
    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
342
0
                   .addReg(Op0, Op0IsKill * RegState::Kill)
343
0
                   .addReg(Op1, Op1IsKill * RegState::Kill));
344
0
    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
345
0
                           TII.get(TargetOpcode::COPY), ResultReg)
346
0
                   .addReg(II.ImplicitDefs[0]));
347
0
  }
348
148
  return ResultReg;
349
148
}
350
351
unsigned ARMFastISel::fastEmitInst_ri(unsigned MachineInstOpcode,
352
                                      const TargetRegisterClass *RC,
353
                                      unsigned Op0, bool Op0IsKill,
354
463
                                      uint64_t Imm) {
355
463
  unsigned ResultReg = createResultReg(RC);
356
463
  const MCInstrDesc &II = TII.get(MachineInstOpcode);
357
463
358
463
  // Make sure the input operand is sufficiently constrained to be legal
359
463
  // for this instruction.
360
463
  Op0 = constrainOperandRegClass(II, Op0, 1);
361
463
  if (II.getNumDefs() >= 1) {
362
463
    AddOptionalDefs(
363
463
        BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
364
463
            .addReg(Op0, Op0IsKill * RegState::Kill)
365
463
            .addImm(Imm));
366
463
  } else {
367
0
    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
368
0
                   .addReg(Op0, Op0IsKill * RegState::Kill)
369
0
                   .addImm(Imm));
370
0
    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
371
0
                           TII.get(TargetOpcode::COPY), ResultReg)
372
0
                   .addReg(II.ImplicitDefs[0]));
373
0
  }
374
463
  return ResultReg;
375
463
}
376
377
unsigned ARMFastISel::fastEmitInst_i(unsigned MachineInstOpcode,
378
                                     const TargetRegisterClass *RC,
379
15
                                     uint64_t Imm) {
380
15
  unsigned ResultReg = createResultReg(RC);
381
15
  const MCInstrDesc &II = TII.get(MachineInstOpcode);
382
15
383
15
  if (II.getNumDefs() >= 1) {
384
15
    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II,
385
15
                            ResultReg).addImm(Imm));
386
15
  } else {
387
0
    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
388
0
                   .addImm(Imm));
389
0
    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
390
0
                           TII.get(TargetOpcode::COPY), ResultReg)
391
0
                   .addReg(II.ImplicitDefs[0]));
392
0
  }
393
15
  return ResultReg;
394
15
}
395
396
// TODO: Don't worry about 64-bit now, but when this is fixed remove the
397
// checks from the various callers.
398
37
unsigned ARMFastISel::ARMMoveToFPReg(MVT VT, unsigned SrcReg) {
399
37
  if (VT == MVT::f64) 
return 00
;
400
37
401
37
  unsigned MoveReg = createResultReg(TLI.getRegClassFor(VT));
402
37
  AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
403
37
                          TII.get(ARM::VMOVSR), MoveReg)
404
37
                  .addReg(SrcReg));
405
37
  return MoveReg;
406
37
}
407
408
12
unsigned ARMFastISel::ARMMoveToIntReg(MVT VT, unsigned SrcReg) {
409
12
  if (VT == MVT::i64) 
return 00
;
410
12
411
12
  unsigned MoveReg = createResultReg(TLI.getRegClassFor(VT));
412
12
  AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
413
12
                          TII.get(ARM::VMOVRS), MoveReg)
414
12
                  .addReg(SrcReg));
415
12
  return MoveReg;
416
12
}
417
418
// For double width floating point we need to materialize two constants
419
// (the high and the low) into integer registers then use a move to get
420
// the combined constant into an FP reg.
421
309
unsigned ARMFastISel::ARMMaterializeFP(const ConstantFP *CFP, MVT VT) {
422
309
  const APFloat Val = CFP->getValueAPF();
423
309
  bool is64bit = VT == MVT::f64;
424
309
425
309
  // This checks to see if we can use VFP3 instructions to materialize
426
309
  // a constant, otherwise we have to go through the constant pool.
427
309
  if (TLI.isFPImmLegal(Val, VT)) {
428
47
    int Imm;
429
47
    unsigned Opc;
430
47
    if (is64bit) {
431
1
      Imm = ARM_AM::getFP64Imm(Val);
432
1
      Opc = ARM::FCONSTD;
433
46
    } else {
434
46
      Imm = ARM_AM::getFP32Imm(Val);
435
46
      Opc = ARM::FCONSTS;
436
46
    }
437
47
    unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
438
47
    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
439
47
                            TII.get(Opc), DestReg).addImm(Imm));
440
47
    return DestReg;
441
47
  }
442
262
443
262
  // Require VFP2 for loading fp constants.
444
262
  if (!Subtarget->hasVFP2Base()) 
return false0
;
445
262
446
262
  // MachineConstantPool wants an explicit alignment.
447
262
  unsigned Align = DL.getPrefTypeAlignment(CFP->getType());
448
262
  if (Align == 0) {
449
0
    // TODO: Figure out if this is correct.
450
0
    Align = DL.getTypeAllocSize(CFP->getType());
451
0
  }
452
262
  unsigned Idx = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align);
453
262
  unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
454
262
  unsigned Opc = is64bit ? 
ARM::VLDRD149
:
ARM::VLDRS113
;
455
262
456
262
  // The extra reg is for addrmode5.
457
262
  AddOptionalDefs(
458
262
      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg)
459
262
          .addConstantPoolIndex(Idx)
460
262
          .addReg(0));
461
262
  return DestReg;
462
262
}
463
464
528
unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, MVT VT) {
465
528
  if (VT != MVT::i32 && 
VT != MVT::i16122
&&
VT != MVT::i896
&&
VT != MVT::i116
)
466
0
    return 0;
467
528
468
528
  // If we can do this in a single instruction without a constant pool entry
469
528
  // do so now.
470
528
  const ConstantInt *CI = cast<ConstantInt>(C);
471
528
  if (Subtarget->hasV6T2Ops() && 
isUInt<16>(CI->getZExtValue())523
) {
472
316
    unsigned Opc = isThumb2 ? 
ARM::t2MOVi16123
:
ARM::MOVi16193
;
473
316
    const TargetRegisterClass *RC = isThumb2 ? 
&ARM::rGPRRegClass123
:
474
316
      
&ARM::GPRRegClass193
;
475
316
    unsigned ImmReg = createResultReg(RC);
476
316
    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
477
316
                            TII.get(Opc), ImmReg)
478
316
                    .addImm(CI->getZExtValue()));
479
316
    return ImmReg;
480
316
  }
481
212
482
212
  // Use MVN to emit negative constants.
483
212
  if (VT == MVT::i32 && Subtarget->hasV6T2Ops() && 
CI->isNegative()207
) {
484
205
    unsigned Imm = (unsigned)~(CI->getSExtValue());
485
205
    bool UseImm = isThumb2 ? 
(ARM_AM::getT2SOImmVal(Imm) != -1)95
:
486
205
      
(ARM_AM::getSOImmVal(Imm) != -1)110
;
487
205
    if (UseImm) {
488
190
      unsigned Opc = isThumb2 ? 
ARM::t2MVNi86
:
ARM::MVNi104
;
489
190
      const TargetRegisterClass *RC = isThumb2 ? 
&ARM::rGPRRegClass86
:
490
190
                                                 
&ARM::GPRRegClass104
;
491
190
      unsigned ImmReg = createResultReg(RC);
492
190
      AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
493
190
                              TII.get(Opc), ImmReg)
494
190
                      .addImm(Imm));
495
190
      return ImmReg;
496
190
    }
497
22
  }
498
22
499
22
  unsigned ResultReg = 0;
500
22
  if (Subtarget->useMovt())
501
6
    ResultReg = fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue());
502
22
503
22
  if (ResultReg)
504
4
    return ResultReg;
505
18
506
18
  // Load from constant pool.  For now 32-bit only.
507
18
  if (VT != MVT::i32)
508
0
    return 0;
509
18
510
18
  // MachineConstantPool wants an explicit alignment.
511
18
  unsigned Align = DL.getPrefTypeAlignment(C->getType());
512
18
  if (Align == 0) {
513
0
    // TODO: Figure out if this is correct.
514
0
    Align = DL.getTypeAllocSize(C->getType());
515
0
  }
516
18
  unsigned Idx = MCP.getConstantPoolIndex(C, Align);
517
18
  ResultReg = createResultReg(TLI.getRegClassFor(VT));
518
18
  if (isThumb2)
519
7
    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
520
7
                            TII.get(ARM::t2LDRpci), ResultReg)
521
7
                      .addConstantPoolIndex(Idx));
522
11
  else {
523
11
    // The extra immediate is for addrmode2.
524
11
    ResultReg = constrainOperandRegClass(TII.get(ARM::LDRcp), ResultReg, 0);
525
11
    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
526
11
                            TII.get(ARM::LDRcp), ResultReg)
527
11
                      .addConstantPoolIndex(Idx)
528
11
                      .addImm(0));
529
11
  }
530
18
  return ResultReg;
531
18
}
532
533
253
bool ARMFastISel::isPositionIndependent() const {
534
253
  return TLI.isPositionIndependent();
535
253
}
536
537
264
unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, MVT VT) {
538
264
  // For now 32-bit only.
539
264
  if (VT != MVT::i32 || GV->isThreadLocal()) 
return 011
;
540
253
541
253
  // ROPI/RWPI not currently supported.
542
253
  if (Subtarget->isROPI() || Subtarget->isRWPI())
543
0
    return 0;
544
253
545
253
  bool IsIndirect = Subtarget->isGVIndirectSymbol(GV);
546
253
  const TargetRegisterClass *RC = isThumb2 ? 
&ARM::rGPRRegClass119
547
253
                                           : 
&ARM::GPRRegClass134
;
548
253
  unsigned DestReg = createResultReg(RC);
549
253
550
253
  // FastISel TLS support on non-MachO is broken, punt to SelectionDAG.
551
253
  const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV);
552
253
  bool IsThreadLocal = GVar && 
GVar->isThreadLocal()185
;
553
253
  if (!Subtarget->isTargetMachO() && 
IsThreadLocal72
)
return 00
;
554
253
555
253
  bool IsPositionIndependent = isPositionIndependent();
556
253
  // Use movw+movt when possible, it avoids constant pool entries.
557
253
  // Non-darwin targets only support static movt relocations in FastISel.
558
253
  if (Subtarget->useMovt() &&
559
253
      
(250
Subtarget->isTargetMachO()250
||
!IsPositionIndependent72
)) {
560
244
    unsigned Opc;
561
244
    unsigned char TF = 0;
562
244
    if (Subtarget->isTargetMachO())
563
178
      TF = ARMII::MO_NONLAZY;
564
244
565
244
    if (IsPositionIndependent)
566
63
      Opc = isThumb2 ? 
ARM::t2MOV_ga_pcrel61
:
ARM::MOV_ga_pcrel2
;
567
181
    else
568
181
      Opc = isThumb2 ? 
ARM::t2MOVi32imm55
:
ARM::MOVi32imm126
;
569
244
    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
570
244
                            TII.get(Opc), DestReg).addGlobalAddress(GV, 0, TF));
571
244
  } else {
572
9
    // MachineConstantPool wants an explicit alignment.
573
9
    unsigned Align = DL.getPrefTypeAlignment(GV->getType());
574
9
    if (Align == 0) {
575
0
      // TODO: Figure out if this is correct.
576
0
      Align = DL.getTypeAllocSize(GV->getType());
577
0
    }
578
9
579
9
    if (Subtarget->isTargetELF() && 
IsPositionIndependent6
)
580
6
      return ARMLowerPICELF(GV, Align, VT);
581
3
582
3
    // Grab index.
583
3
    unsigned PCAdj = IsPositionIndependent ? (Subtarget->isThumb() ? 
41
:
82
) :
00
;
584
3
    unsigned Id = AFI->createPICLabelUId();
585
3
    ARMConstantPoolValue *CPV = ARMConstantPoolConstant::Create(GV, Id,
586
3
                                                                ARMCP::CPValue,
587
3
                                                                PCAdj);
588
3
    unsigned Idx = MCP.getConstantPoolIndex(CPV, Align);
589
3
590
3
    // Load value.
591
3
    MachineInstrBuilder MIB;
592
3
    if (isThumb2) {
593
1
      unsigned Opc = IsPositionIndependent ? ARM::t2LDRpci_pic : 
ARM::t2LDRpci0
;
594
1
      MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc),
595
1
                    DestReg).addConstantPoolIndex(Idx);
596
1
      if (IsPositionIndependent)
597
1
        MIB.addImm(Id);
598
1
      AddOptionalDefs(MIB);
599
2
    } else {
600
2
      // The extra immediate is for addrmode2.
601
2
      DestReg = constrainOperandRegClass(TII.get(ARM::LDRcp), DestReg, 0);
602
2
      MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
603
2
                    TII.get(ARM::LDRcp), DestReg)
604
2
                .addConstantPoolIndex(Idx)
605
2
                .addImm(0);
606
2
      AddOptionalDefs(MIB);
607
2
608
2
      if (IsPositionIndependent) {
609
2
        unsigned Opc = IsIndirect ? 
ARM::PICLDR1
:
ARM::PICADD1
;
610
2
        unsigned NewDestReg = createResultReg(TLI.getRegClassFor(VT));
611
2
612
2
        MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
613
2
                                          DbgLoc, TII.get(Opc), NewDestReg)
614
2
                                  .addReg(DestReg)
615
2
                                  .addImm(Id);
616
2
        AddOptionalDefs(MIB);
617
2
        return NewDestReg;
618
2
      }
619
245
    }
620
3
  }
621
245
622
245
  if (IsIndirect) {
623
82
    MachineInstrBuilder MIB;
624
82
    unsigned NewDestReg = createResultReg(TLI.getRegClassFor(VT));
625
82
    if (isThumb2)
626
47
      MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
627
47
                    TII.get(ARM::t2LDRi12), NewDestReg)
628
47
            .addReg(DestReg)
629
47
            .addImm(0);
630
35
    else
631
35
      MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
632
35
                    TII.get(ARM::LDRi12), NewDestReg)
633
35
                .addReg(DestReg)
634
35
                .addImm(0);
635
82
    DestReg = NewDestReg;
636
82
    AddOptionalDefs(MIB);
637
82
  }
638
245
639
245
  return DestReg;
640
245
}
641
642
1.19k
unsigned ARMFastISel::fastMaterializeConstant(const Constant *C) {
643
1.19k
  EVT CEVT = TLI.getValueType(DL, C->getType(), true);
644
1.19k
645
1.19k
  // Only handle simple types.
646
1.19k
  if (!CEVT.isSimple()) 
return 00
;
647
1.19k
  MVT VT = CEVT.getSimpleVT();
648
1.19k
649
1.19k
  if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
650
309
    return ARMMaterializeFP(CFP, VT);
651
889
  else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
652
252
    return ARMMaterializeGV(GV, VT);
653
637
  else if (isa<ConstantInt>(C))
654
528
    return ARMMaterializeInt(C, VT);
655
109
656
109
  return 0;
657
109
}
658
659
// TODO: unsigned ARMFastISel::TargetMaterializeFloatZero(const ConstantFP *CF);
660
661
205
unsigned ARMFastISel::fastMaterializeAlloca(const AllocaInst *AI) {
662
205
  // Don't handle dynamic allocas.
663
205
  if (!FuncInfo.StaticAllocaMap.count(AI)) 
return 00
;
664
205
665
205
  MVT VT;
666
205
  if (!isLoadTypeLegal(AI->getType(), VT)) 
return 00
;
667
205
668
205
  DenseMap<const AllocaInst*, int>::iterator SI =
669
205
    FuncInfo.StaticAllocaMap.find(AI);
670
205
671
205
  // This will get lowered later into the correct offsets and registers
672
205
  // via rewriteXFrameIndex.
673
205
  if (SI != FuncInfo.StaticAllocaMap.end()) {
674
205
    unsigned Opc = isThumb2 ? 
ARM::t2ADDri102
:
ARM::ADDri103
;
675
205
    const TargetRegisterClass* RC = TLI.getRegClassFor(VT);
676
205
    unsigned ResultReg = createResultReg(RC);
677
205
    ResultReg = constrainOperandRegClass(TII.get(Opc), ResultReg, 0);
678
205
679
205
    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
680
205
                            TII.get(Opc), ResultReg)
681
205
                            .addFrameIndex(SI->second)
682
205
                            .addImm(0));
683
205
    return ResultReg;
684
205
  }
685
0
686
0
  return 0;
687
0
}
688
689
3.00k
bool ARMFastISel::isTypeLegal(Type *Ty, MVT &VT) {
690
3.00k
  EVT evt = TLI.getValueType(DL, Ty, true);
691
3.00k
692
3.00k
  // Only handle simple types.
693
3.00k
  if (evt == MVT::Other || 
!evt.isSimple()2.99k
)
return false11
;
694
2.99k
  VT = evt.getSimpleVT();
695
2.99k
696
2.99k
  // Handle all legal types, i.e. a register that will directly hold this
697
2.99k
  // value.
698
2.99k
  return TLI.isTypeLegal(VT);
699
2.99k
}
700
701
1.30k
bool ARMFastISel::isLoadTypeLegal(Type *Ty, MVT &VT) {
702
1.30k
  if (isTypeLegal(Ty, VT)) 
return true1.13k
;
703
175
704
175
  // If this is a type than can be sign or zero-extended to a basic operation
705
175
  // go ahead and accept it now.
706
175
  if (VT == MVT::i1 || 
VT == MVT::i8158
||
VT == MVT::i1697
)
707
167
    return true;
708
8
709
8
  return false;
710
8
}
711
712
// Computes the address to get to an object.
713
1.53k
bool ARMFastISel::ARMComputeAddress(const Value *Obj, Address &Addr) {
714
1.53k
  // Some boilerplate from the X86 FastISel.
715
1.53k
  const User *U = nullptr;
716
1.53k
  unsigned Opcode = Instruction::UserOp1;
717
1.53k
  if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
718
1.10k
    // Don't walk into other basic blocks unless the object is an alloca from
719
1.10k
    // another block, otherwise it may not have a virtual register assigned.
720
1.10k
    if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
721
1.10k
        
FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB546
) {
722
1.04k
      Opcode = I->getOpcode();
723
1.04k
      U = I;
724
1.04k
    }
725
1.10k
  } else 
if (const ConstantExpr *433
C433
= dyn_cast<ConstantExpr>(Obj)) {
726
106
    Opcode = C->getOpcode();
727
106
    U = C;
728
106
  }
729
1.53k
730
1.53k
  if (PointerType *Ty = dyn_cast<PointerType>(Obj->getType()))
731
1.53k
    if (Ty->getAddressSpace() > 255)
732
0
      // Fast instruction selection doesn't support the special
733
0
      // address spaces.
734
0
      return false;
735
1.53k
736
1.53k
  switch (Opcode) {
737
1.53k
    default:
738
423
    break;
739
1.53k
    case Instruction::BitCast:
740
22
      // Look through bitcasts.
741
22
      return ARMComputeAddress(U->getOperand(0), Addr);
742
1.53k
    case Instruction::IntToPtr:
743
3
      // Look past no-op inttoptrs.
744
3
      if (TLI.getValueType(DL, U->getOperand(0)->getType()) ==
745
3
          TLI.getPointerTy(DL))
746
3
        return ARMComputeAddress(U->getOperand(0), Addr);
747
0
      break;
748
0
    case Instruction::PtrToInt:
749
0
      // Look past no-op ptrtoints.
750
0
      if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
751
0
        return ARMComputeAddress(U->getOperand(0), Addr);
752
0
      break;
753
531
    case Instruction::GetElementPtr: {
754
531
      Address SavedAddr = Addr;
755
531
      int TmpOffset = Addr.Offset;
756
531
757
531
      // Iterate through the GEP folding the constants into offsets where
758
531
      // we can.
759
531
      gep_type_iterator GTI = gep_type_begin(U);
760
531
      for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end();
761
1.53k
           i != e; 
++i, ++GTI1.00k
) {
762
1.01k
        const Value *Op = *i;
763
1.01k
        if (StructType *STy = GTI.getStructTypeOrNull()) {
764
295
          const StructLayout *SL = DL.getStructLayout(STy);
765
295
          unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
766
295
          TmpOffset += SL->getElementOffset(Idx);
767
719
        } else {
768
719
          uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
769
719
          while (true) {
770
719
            if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
771
708
              // Constant-offset addressing.
772
708
              TmpOffset += CI->getSExtValue() * S;
773
708
              break;
774
708
            }
775
11
            if (canFoldAddIntoGEP(U, Op)) {
776
0
              // A compatible add with a constant operand. Fold the constant.
777
0
              ConstantInt *CI =
778
0
              cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
779
0
              TmpOffset += CI->getSExtValue() * S;
780
0
              // Iterate on the other operand.
781
0
              Op = cast<AddOperator>(Op)->getOperand(0);
782
0
              continue;
783
0
            }
784
11
            // Unsupported
785
11
            goto unsupported_gep;
786
11
          }
787
719
        }
788
1.01k
      }
789
531
790
531
      // Try to grab the base operand now.
791
531
      Addr.Offset = TmpOffset;
792
520
      if (ARMComputeAddress(U->getOperand(0), Addr)) 
return true518
;
793
2
794
2
      // We failed, restore everything and try the other options.
795
2
      Addr = SavedAddr;
796
2
797
13
      unsupported_gep:
798
13
      break;
799
2
    }
800
559
    case Instruction::Alloca: {
801
559
      const AllocaInst *AI = cast<AllocaInst>(Obj);
802
559
      DenseMap<const AllocaInst*, int>::iterator SI =
803
559
        FuncInfo.StaticAllocaMap.find(AI);
804
559
      if (SI != FuncInfo.StaticAllocaMap.end()) {
805
559
        Addr.BaseType = Address::FrameIndexBase;
806
559
        Addr.Base.FI = SI->second;
807
559
        return true;
808
559
      }
809
0
      break;
810
0
    }
811
436
  }
812
436
813
436
  // Try to get this in a register if nothing else has worked.
814
436
  if (Addr.Base.Reg == 0) Addr.Base.Reg = getRegForValue(Obj);
815
436
  return Addr.Base.Reg != 0;
816
436
}
817
818
1.71k
void ARMFastISel::ARMSimplifyAddress(Address &Addr, MVT VT, bool useAM3) {
819
1.71k
  bool needsLowering = false;
820
1.71k
  switch (VT.SimpleTy) {
821
1.71k
    
default: 0
llvm_unreachable0
("Unhandled load/store type!");
822
1.71k
    case MVT::i1:
823
1.14k
    case MVT::i8:
824
1.14k
    case MVT::i16:
825
1.14k
    case MVT::i32:
826
1.14k
      if (!useAM3) {
827
1.02k
        // Integer loads/stores handle 12-bit offsets.
828
1.02k
        needsLowering = ((Addr.Offset & 0xfff) != Addr.Offset);
829
1.02k
        // Handle negative offsets.
830
1.02k
        if (needsLowering && 
isThumb219
)
831
19
          needsLowering = !(Subtarget->hasV6T2Ops() && Addr.Offset < 0 &&
832
19
                            
Addr.Offset > -25618
);
833
1.02k
      } else {
834
116
        // ARM halfword load/stores and signed byte loads use +/-imm8 offsets.
835
116
        needsLowering = (Addr.Offset > 255 || 
Addr.Offset < -255112
);
836
116
      }
837
1.14k
      break;
838
1.14k
    case MVT::f32:
839
572
    case MVT::f64:
840
572
      // Floating point operands handle 8-bit offsets.
841
572
      needsLowering = ((Addr.Offset & 0xff) != Addr.Offset);
842
572
      break;
843
1.71k
  }
844
1.71k
845
1.71k
  // If this is a stack pointer and the offset needs to be simplified then
846
1.71k
  // put the alloca address into a register, set the base type back to
847
1.71k
  // register and continue. This should almost never happen.
848
1.71k
  if (needsLowering && 
Addr.BaseType == Address::FrameIndexBase404
) {
849
0
    const TargetRegisterClass *RC = isThumb2 ? &ARM::tGPRRegClass
850
0
                                             : &ARM::GPRRegClass;
851
0
    unsigned ResultReg = createResultReg(RC);
852
0
    unsigned Opc = isThumb2 ? ARM::t2ADDri : ARM::ADDri;
853
0
    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
854
0
                            TII.get(Opc), ResultReg)
855
0
                            .addFrameIndex(Addr.Base.FI)
856
0
                            .addImm(0));
857
0
    Addr.Base.Reg = ResultReg;
858
0
    Addr.BaseType = Address::RegBase;
859
0
  }
860
1.71k
861
1.71k
  // Since the offset is too large for the load/store instruction
862
1.71k
  // get the reg+offset into a register.
863
1.71k
  if (needsLowering) {
864
404
    Addr.Base.Reg = fastEmit_ri_(MVT::i32, ISD::ADD, Addr.Base.Reg,
865
404
                                 /*Op0IsKill*/false, Addr.Offset, MVT::i32);
866
404
    Addr.Offset = 0;
867
404
  }
868
1.71k
}
869
870
void ARMFastISel::AddLoadStoreOperands(MVT VT, Address &Addr,
871
                                       const MachineInstrBuilder &MIB,
872
                                       MachineMemOperand::Flags Flags,
873
1.71k
                                       bool useAM3) {
874
1.71k
  // addrmode5 output depends on the selection dag addressing dividing the
875
1.71k
  // offset by 4 that it then later multiplies. Do this here as well.
876
1.71k
  if (VT.SimpleTy == MVT::f32 || 
VT.SimpleTy == MVT::f641.59k
)
877
572
    Addr.Offset /= 4;
878
1.71k
879
1.71k
  // Frame base works a bit differently. Handle it separately.
880
1.71k
  if (Addr.BaseType == Address::FrameIndexBase) {
881
562
    int FI = Addr.Base.FI;
882
562
    int Offset = Addr.Offset;
883
562
    MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
884
562
        MachinePointerInfo::getFixedStack(*FuncInfo.MF, FI, Offset), Flags,
885
562
        MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
886
562
    // Now add the rest of the operands.
887
562
    MIB.addFrameIndex(FI);
888
562
889
562
    // ARM halfword load/stores and signed byte loads need an additional
890
562
    // operand.
891
562
    if (useAM3) {
892
6
      int Imm = (Addr.Offset < 0) ? 
(0x100 | -Addr.Offset)0
: Addr.Offset;
893
6
      MIB.addReg(0);
894
6
      MIB.addImm(Imm);
895
556
    } else {
896
556
      MIB.addImm(Addr.Offset);
897
556
    }
898
562
    MIB.addMemOperand(MMO);
899
1.15k
  } else {
900
1.15k
    // Now add the rest of the operands.
901
1.15k
    MIB.addReg(Addr.Base.Reg);
902
1.15k
903
1.15k
    // ARM halfword load/stores and signed byte loads need an additional
904
1.15k
    // operand.
905
1.15k
    if (useAM3) {
906
110
      int Imm = (Addr.Offset < 0) ? 
(0x100 | -Addr.Offset)12
:
Addr.Offset98
;
907
110
      MIB.addReg(0);
908
110
      MIB.addImm(Imm);
909
1.04k
    } else {
910
1.04k
      MIB.addImm(Addr.Offset);
911
1.04k
    }
912
1.15k
  }
913
1.71k
  AddOptionalDefs(MIB);
914
1.71k
}
915
916
bool ARMFastISel::ARMEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
917
484
                              unsigned Alignment, bool isZExt, bool allocReg) {
918
484
  unsigned Opc;
919
484
  bool useAM3 = false;
920
484
  bool needVMOV = false;
921
484
  const TargetRegisterClass *RC;
922
484
  switch (VT.SimpleTy) {
923
484
    // This is mostly going to be Neon/vector support.
924
484
    
default: return false4
;
925
484
    case MVT::i1:
926
96
    case MVT::i8:
927
96
      if (isThumb2) {
928
31
        if (Addr.Offset < 0 && 
Addr.Offset > -2563
&&
Subtarget->hasV6T2Ops()2
)
929
2
          Opc = isZExt ? ARM::t2LDRBi8 : 
ARM::t2LDRSBi80
;
930
29
        else
931
29
          Opc = isZExt ? 
ARM::t2LDRBi1228
:
ARM::t2LDRSBi121
;
932
65
      } else {
933
65
        if (isZExt) {
934
57
          Opc = ARM::LDRBi12;
935
57
        } else {
936
8
          Opc = ARM::LDRSB;
937
8
          useAM3 = true;
938
8
        }
939
65
      }
940
96
      RC = isThumb2 ? 
&ARM::rGPRRegClass31
:
&ARM::GPRnopcRegClass65
;
941
96
      break;
942
96
    case MVT::i16:
943
85
      if (Alignment && 
Alignment < 240
&&
!Subtarget->allowsUnalignedMem()8
)
944
4
        return false;
945
81
946
81
      if (isThumb2) {
947
24
        if (Addr.Offset < 0 && 
Addr.Offset > -2563
&&
Subtarget->hasV6T2Ops()2
)
948
2
          Opc = isZExt ? ARM::t2LDRHi8 : 
ARM::t2LDRSHi80
;
949
22
        else
950
22
          Opc = isZExt ? 
ARM::t2LDRHi1221
:
ARM::t2LDRSHi121
;
951
57
      } else {
952
57
        Opc = isZExt ? 
ARM::LDRH55
:
ARM::LDRSH2
;
953
57
        useAM3 = true;
954
57
      }
955
81
      RC = isThumb2 ? 
&ARM::rGPRRegClass24
:
&ARM::GPRnopcRegClass57
;
956
81
      break;
957
279
    case MVT::i32:
958
279
      if (Alignment && 
Alignment < 4148
&&
!Subtarget->allowsUnalignedMem()8
)
959
4
        return false;
960
275
961
275
      if (isThumb2) {
962
138
        if (Addr.Offset < 0 && 
Addr.Offset > -2563
&&
Subtarget->hasV6T2Ops()2
)
963
2
          Opc = ARM::t2LDRi8;
964
136
        else
965
136
          Opc = ARM::t2LDRi12;
966
138
      } else {
967
137
        Opc = ARM::LDRi12;
968
137
      }
969
275
      RC = isThumb2 ? 
&ARM::rGPRRegClass138
:
&ARM::GPRnopcRegClass137
;
970
275
      break;
971
275
    case MVT::f32:
972
18
      if (!Subtarget->hasVFP2Base()) 
return false0
;
973
18
      // Unaligned loads need special handling. Floats require word-alignment.
974
18
      if (Alignment && 
Alignment < 410
) {
975
8
        needVMOV = true;
976
8
        VT = MVT::i32;
977
8
        Opc = isThumb2 ? 
ARM::t2LDRi122
:
ARM::LDRi126
;
978
8
        RC = isThumb2 ? 
&ARM::rGPRRegClass2
:
&ARM::GPRnopcRegClass6
;
979
10
      } else {
980
10
        Opc = ARM::VLDRS;
981
10
        RC = TLI.getRegClassFor(VT);
982
10
      }
983
18
      break;
984
18
    case MVT::f64:
985
2
      // Can load and store double precision even without FeatureFP64
986
2
      if (!Subtarget->hasVFP2Base()) 
return false0
;
987
2
      // FIXME: Unaligned loads need special handling.  Doublewords require
988
2
      // word-alignment.
989
2
      if (Alignment && 
Alignment < 41
)
990
0
        return false;
991
2
992
2
      Opc = ARM::VLDRD;
993
2
      RC = TLI.getRegClassFor(VT);
994
2
      break;
995
472
  }
996
472
  // Simplify this down to something we can handle.
997
472
  ARMSimplifyAddress(Addr, VT, useAM3);
998
472
999
472
  // Create the base instruction, then add the operands.
1000
472
  if (allocReg)
1001
421
    ResultReg = createResultReg(RC);
1002
472
  assert(ResultReg > 255 && "Expected an allocated virtual register.");
1003
472
  MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1004
472
                                    TII.get(Opc), ResultReg);
1005
472
  AddLoadStoreOperands(VT, Addr, MIB, MachineMemOperand::MOLoad, useAM3);
1006
472
1007
472
  // If we had an unaligned load of a float we've converted it to an regular
1008
472
  // load.  Now we must move from the GRP to the FP register.
1009
472
  if (needVMOV) {
1010
8
    unsigned MoveReg = createResultReg(TLI.getRegClassFor(MVT::f32));
1011
8
    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1012
8
                            TII.get(ARM::VMOVSR), MoveReg)
1013
8
                    .addReg(ResultReg));
1014
8
    ResultReg = MoveReg;
1015
8
  }
1016
472
  return true;
1017
472
}
1018
1019
317
bool ARMFastISel::SelectLoad(const Instruction *I) {
1020
317
  // Atomic loads need special handling.
1021
317
  if (cast<LoadInst>(I)->isAtomic())
1022
1
    return false;
1023
316
1024
316
  const Value *SV = I->getOperand(0);
1025
316
  if (TLI.supportSwiftError()) {
1026
316
    // Swifterror values can come from either a function parameter with
1027
316
    // swifterror attribute or an alloca with swifterror attribute.
1028
316
    if (const Argument *Arg = dyn_cast<Argument>(SV)) {
1029
28
      if (Arg->hasSwiftErrorAttr())
1030
0
        return false;
1031
316
    }
1032
316
1033
316
    if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) {
1034
124
      if (Alloca->isSwiftError())
1035
5
        return false;
1036
311
    }
1037
316
  }
1038
311
1039
311
  // Verify we have a legal type before going any further.
1040
311
  MVT VT;
1041
311
  if (!isLoadTypeLegal(I->getType(), VT))
1042
1
    return false;
1043
310
1044
310
  // See if we can handle this address.
1045
310
  Address Addr;
1046
310
  if (!ARMComputeAddress(I->getOperand(0), Addr)) 
return false5
;
1047
305
1048
305
  unsigned ResultReg;
1049
305
  if (!ARMEmitLoad(VT, ResultReg, Addr, cast<LoadInst>(I)->getAlignment()))
1050
12
    return false;
1051
293
  updateValueMap(I, ResultReg);
1052
293
  return true;
1053
293
}
1054
1055
bool ARMFastISel::ARMEmitStore(MVT VT, unsigned SrcReg, Address &Addr,
1056
1.25k
                               unsigned Alignment) {
1057
1.25k
  unsigned StrOpc;
1058
1.25k
  bool useAM3 = false;
1059
1.25k
  switch (VT.SimpleTy) {
1060
1.25k
    // This is mostly going to be Neon/vector support.
1061
1.25k
    
default: return false5
;
1062
1.25k
    case MVT::i1: {
1063
11
      unsigned Res = createResultReg(isThumb2 ? 
&ARM::tGPRRegClass5
1064
11
                                              : 
&ARM::GPRRegClass6
);
1065
11
      unsigned Opc = isThumb2 ? 
ARM::t2ANDri5
:
ARM::ANDri6
;
1066
11
      SrcReg = constrainOperandRegClass(TII.get(Opc), SrcReg, 1);
1067
11
      AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1068
11
                              TII.get(Opc), Res)
1069
11
                      .addReg(SrcReg).addImm(1));
1070
11
      SrcReg = Res;
1071
11
      LLVM_FALLTHROUGH;
1072
11
    }
1073
102
    case MVT::i8:
1074
102
      if (isThumb2) {
1075
35
        if (Addr.Offset < 0 && 
Addr.Offset > -2563
&&
Subtarget->hasV6T2Ops()2
)
1076
2
          StrOpc = ARM::t2STRBi8;
1077
33
        else
1078
33
          StrOpc = ARM::t2STRBi12;
1079
67
      } else {
1080
67
        StrOpc = ARM::STRBi12;
1081
67
      }
1082
102
      break;
1083
78
    case MVT::i16:
1084
78
      if (Alignment && 
Alignment < 230
&&
!Subtarget->allowsUnalignedMem()8
)
1085
4
        return false;
1086
74
1087
74
      if (isThumb2) {
1088
23
        if (Addr.Offset < 0 && 
Addr.Offset > -2563
&&
Subtarget->hasV6T2Ops()2
)
1089
2
          StrOpc = ARM::t2STRHi8;
1090
21
        else
1091
21
          StrOpc = ARM::t2STRHi12;
1092
51
      } else {
1093
51
        StrOpc = ARM::STRH;
1094
51
        useAM3 = true;
1095
51
      }
1096
74
      break;
1097
501
    case MVT::i32:
1098
501
      if (Alignment && 
Alignment < 4150
&&
!Subtarget->allowsUnalignedMem()8
)
1099
4
        return false;
1100
497
1101
497
      if (isThumb2) {
1102
301
        if (Addr.Offset < 0 && 
Addr.Offset > -2563
&&
Subtarget->hasV6T2Ops()2
)
1103
2
          StrOpc = ARM::t2STRi8;
1104
299
        else
1105
299
          StrOpc = ARM::t2STRi12;
1106
301
      } else {
1107
196
        StrOpc = ARM::STRi12;
1108
196
      }
1109
497
      break;
1110
497
    case MVT::f32:
1111
119
      if (!Subtarget->hasVFP2Base()) 
return false0
;
1112
119
      // Unaligned stores need special handling. Floats require word-alignment.
1113
119
      if (Alignment && 
Alignment < 4114
) {
1114
8
        unsigned MoveReg = createResultReg(TLI.getRegClassFor(MVT::i32));
1115
8
        AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1116
8
                                TII.get(ARM::VMOVRS), MoveReg)
1117
8
                        .addReg(SrcReg));
1118
8
        SrcReg = MoveReg;
1119
8
        VT = MVT::i32;
1120
8
        StrOpc = isThumb2 ? 
ARM::t2STRi122
:
ARM::STRi126
;
1121
111
      } else {
1122
111
        StrOpc = ARM::VSTRS;
1123
111
      }
1124
119
      break;
1125
449
    case MVT::f64:
1126
449
      // Can load and store double precision even without FeatureFP64
1127
449
      if (!Subtarget->hasVFP2Base()) 
return false0
;
1128
449
      // FIXME: Unaligned stores need special handling.  Doublewords require
1129
449
      // word-alignment.
1130
449
      if (Alignment && 
Alignment < 428
)
1131
0
          return false;
1132
449
1133
449
      StrOpc = ARM::VSTRD;
1134
449
      break;
1135
1.24k
  }
1136
1.24k
  // Simplify this down to something we can handle.
1137
1.24k
  ARMSimplifyAddress(Addr, VT, useAM3);
1138
1.24k
1139
1.24k
  // Create the base instruction, then add the operands.
1140
1.24k
  SrcReg = constrainOperandRegClass(TII.get(StrOpc), SrcReg, 0);
1141
1.24k
  MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1142
1.24k
                                    TII.get(StrOpc))
1143
1.24k
                            .addReg(SrcReg);
1144
1.24k
  AddLoadStoreOperands(VT, Addr, MIB, MachineMemOperand::MOStore, useAM3);
1145
1.24k
  return true;
1146
1.24k
}
1147
1148
592
bool ARMFastISel::SelectStore(const Instruction *I) {
1149
592
  Value *Op0 = I->getOperand(0);
1150
592
  unsigned SrcReg = 0;
1151
592
1152
592
  // Atomic stores need special handling.
1153
592
  if (cast<StoreInst>(I)->isAtomic())
1154
3
    return false;
1155
589
1156
589
  const Value *PtrV = I->getOperand(1);
1157
589
  if (TLI.supportSwiftError()) {
1158
589
    // Swifterror values can come from either a function parameter with
1159
589
    // swifterror attribute or an alloca with swifterror attribute.
1160
589
    if (const Argument *Arg = dyn_cast<Argument>(PtrV)) {
1161
63
      if (Arg->hasSwiftErrorAttr())
1162
1
        return false;
1163
588
    }
1164
588
1165
588
    if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) {
1166
259
      if (Alloca->isSwiftError())
1167
1
        return false;
1168
587
    }
1169
588
  }
1170
587
1171
587
  // Verify we have a legal type before going any further.
1172
587
  MVT VT;
1173
587
  if (!isLoadTypeLegal(I->getOperand(0)->getType(), VT))
1174
7
    return false;
1175
580
1176
580
  // Get the value to be stored into a register.
1177
580
  SrcReg = getRegForValue(Op0);
1178
580
  if (SrcReg == 0) 
return false0
;
1179
580
1180
580
  // See if we can handle this address.
1181
580
  Address Addr;
1182
580
  if (!ARMComputeAddress(I->getOperand(1), Addr))
1183
4
    return false;
1184
576
1185
576
  if (!ARMEmitStore(VT, SrcReg, Addr, cast<StoreInst>(I)->getAlignment()))
1186
13
    return false;
1187
563
  return true;
1188
563
}
1189
1190
86
static ARMCC::CondCodes getComparePred(CmpInst::Predicate Pred) {
1191
86
  switch (Pred) {
1192
86
    // Needs two compares...
1193
86
    case CmpInst::FCMP_ONE:
1194
0
    case CmpInst::FCMP_UEQ:
1195
0
    default:
1196
0
      // AL is our "false" for now. The other two need more compares.
1197
0
      return ARMCC::AL;
1198
7
    case CmpInst::ICMP_EQ:
1199
7
    case CmpInst::FCMP_OEQ:
1200
7
      return ARMCC::EQ;
1201
7
    case CmpInst::ICMP_SGT:
1202
3
    case CmpInst::FCMP_OGT:
1203
3
      return ARMCC::GT;
1204
5
    case CmpInst::ICMP_SGE:
1205
5
    case CmpInst::FCMP_OGE:
1206
5
      return ARMCC::GE;
1207
5
    case CmpInst::ICMP_UGT:
1208
3
    case CmpInst::FCMP_UGT:
1209
3
      return ARMCC::HI;
1210
3
    case CmpInst::FCMP_OLT:
1211
0
      return ARMCC::MI;
1212
3
    case CmpInst::ICMP_ULE:
1213
3
    case CmpInst::FCMP_OLE:
1214
3
      return ARMCC::LS;
1215
3
    case CmpInst::FCMP_ORD:
1216
0
      return ARMCC::VC;
1217
3
    case CmpInst::FCMP_UNO:
1218
0
      return ARMCC::VS;
1219
3
    case CmpInst::FCMP_UGE:
1220
0
      return ARMCC::PL;
1221
3
    case CmpInst::ICMP_SLT:
1222
3
    case CmpInst::FCMP_ULT:
1223
3
      return ARMCC::LT;
1224
3
    case CmpInst::ICMP_SLE:
1225
2
    case CmpInst::FCMP_ULE:
1226
2
      return ARMCC::LE;
1227
53
    case CmpInst::FCMP_UNE:
1228
53
    case CmpInst::ICMP_NE:
1229
53
      return ARMCC::NE;
1230
53
    case CmpInst::ICMP_UGE:
1231
1
      return ARMCC::HS;
1232
53
    case CmpInst::ICMP_ULT:
1233
6
      return ARMCC::LO;
1234
86
  }
1235
86
}
1236
1237
85
bool ARMFastISel::SelectBranch(const Instruction *I) {
1238
85
  const BranchInst *BI = cast<BranchInst>(I);
1239
85
  MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
1240
85
  MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
1241
85
1242
85
  // Simple branch support.
1243
85
1244
85
  // If we can, avoid recomputing the compare - redoing it could lead to wonky
1245
85
  // behavior.
1246
85
  if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
1247
60
    if (CI->hasOneUse() && (CI->getParent() == I->getParent())) {
1248
60
      // Get the compare predicate.
1249
60
      // Try to take advantage of fallthrough opportunities.
1250
60
      CmpInst::Predicate Predicate = CI->getPredicate();
1251
60
      if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
1252
53
        std::swap(TBB, FBB);
1253
53
        Predicate = CmpInst::getInversePredicate(Predicate);
1254
53
      }
1255
60
1256
60
      ARMCC::CondCodes ARMPred = getComparePred(Predicate);
1257
60
1258
60
      // We may not handle every CC for now.
1259
60
      if (ARMPred == ARMCC::AL) 
return false0
;
1260
60
1261
60
      // Emit the compare.
1262
60
      if (!ARMEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned(),
1263
60
                      CI->isEquality()))
1264
0
        return false;
1265
60
1266
60
      unsigned BrOpc = isThumb2 ? 
ARM::t2Bcc19
:
ARM::Bcc41
;
1267
60
      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(BrOpc))
1268
60
      .addMBB(TBB).addImm(ARMPred).addReg(ARM::CPSR);
1269
60
      finishCondBranch(BI->getParent(), TBB, FBB);
1270
60
      return true;
1271
60
    }
1272
25
  } else if (TruncInst *TI = dyn_cast<TruncInst>(BI->getCondition())) {
1273
3
    MVT SourceVT;
1274
3
    if (TI->hasOneUse() && TI->getParent() == I->getParent() &&
1275
3
        (isLoadTypeLegal(TI->getOperand(0)->getType(), SourceVT))) {
1276
3
      unsigned TstOpc = isThumb2 ? 
ARM::t2TSTri1
:
ARM::TSTri2
;
1277
3
      unsigned OpReg = getRegForValue(TI->getOperand(0));
1278
3
      OpReg = constrainOperandRegClass(TII.get(TstOpc), OpReg, 0);
1279
3
      AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1280
3
                              TII.get(TstOpc))
1281
3
                      .addReg(OpReg).addImm(1));
1282
3
1283
3
      unsigned CCMode = ARMCC::NE;
1284
3
      if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
1285
3
        std::swap(TBB, FBB);
1286
3
        CCMode = ARMCC::EQ;
1287
3
      }
1288
3
1289
3
      unsigned BrOpc = isThumb2 ? 
ARM::t2Bcc1
:
ARM::Bcc2
;
1290
3
      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(BrOpc))
1291
3
      .addMBB(TBB).addImm(CCMode).addReg(ARM::CPSR);
1292
3
1293
3
      finishCondBranch(BI->getParent(), TBB, FBB);
1294
3
      return true;
1295
3
    }
1296
22
  } else if (const ConstantInt *CI =
1297
14
             dyn_cast<ConstantInt>(BI->getCondition())) {
1298
14
    uint64_t Imm = CI->getZExtValue();
1299
14
    MachineBasicBlock *Target = (Imm == 0) ? 
FBB5
:
TBB9
;
1300
14
    fastEmitBranch(Target, DbgLoc);
1301
14
    return true;
1302
14
  }
1303
8
1304
8
  unsigned CmpReg = getRegForValue(BI->getCondition());
1305
8
  if (CmpReg == 0) 
return false0
;
1306
8
1307
8
  // We've been divorced from our compare!  Our block was split, and
1308
8
  // now our compare lives in a predecessor block.  We musn't
1309
8
  // re-compare here, as the children of the compare aren't guaranteed
1310
8
  // live across the block boundary (we *could* check for this).
1311
8
  // Regardless, the compare has been done in the predecessor block,
1312
8
  // and it left a value for us in a virtual register.  Ergo, we test
1313
8
  // the one-bit value left in the virtual register.
1314
8
  unsigned TstOpc = isThumb2 ? 
ARM::t2TSTri2
:
ARM::TSTri6
;
1315
8
  CmpReg = constrainOperandRegClass(TII.get(TstOpc), CmpReg, 0);
1316
8
  AddOptionalDefs(
1317
8
      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TstOpc))
1318
8
          .addReg(CmpReg)
1319
8
          .addImm(1));
1320
8
1321
8
  unsigned CCMode = ARMCC::NE;
1322
8
  if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
1323
5
    std::swap(TBB, FBB);
1324
5
    CCMode = ARMCC::EQ;
1325
5
  }
1326
8
1327
8
  unsigned BrOpc = isThumb2 ? 
ARM::t2Bcc2
:
ARM::Bcc6
;
1328
8
  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(BrOpc))
1329
8
                  .addMBB(TBB).addImm(CCMode).addReg(ARM::CPSR);
1330
8
  finishCondBranch(BI->getParent(), TBB, FBB);
1331
8
  return true;
1332
8
}
1333
1334
4
bool ARMFastISel::SelectIndirectBr(const Instruction *I) {
1335
4
  unsigned AddrReg = getRegForValue(I->getOperand(0));
1336
4
  if (AddrReg == 0) 
return false0
;
1337
4
1338
4
  unsigned Opc = isThumb2 ? 
ARM::tBRIND2
:
ARM::BX2
;
1339
4
  assert(isThumb2 || Subtarget->hasV4TOps());
1340
4
1341
4
  AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1342
4
                          TII.get(Opc)).addReg(AddrReg));
1343
4
1344
4
  const IndirectBrInst *IB = cast<IndirectBrInst>(I);
1345
4
  for (const BasicBlock *SuccBB : IB->successors())
1346
8
    FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[SuccBB]);
1347
4
1348
4
  return true;
1349
4
}
1350
1351
bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value,
1352
86
                             bool isZExt, bool isEquality) {
1353
86
  Type *Ty = Src1Value->getType();
1354
86
  EVT SrcEVT = TLI.getValueType(DL, Ty, true);
1355
86
  if (!SrcEVT.isSimple()) 
return false0
;
1356
86
  MVT SrcVT = SrcEVT.getSimpleVT();
1357
86
1358
86
  if (Ty->isFloatTy() && 
!Subtarget->hasVFP2Base()16
)
1359
0
    return false;
1360
86
1361
86
  if (Ty->isDoubleTy() && 
(7
!Subtarget->hasVFP2Base()7
||
!Subtarget->hasFP64()7
))
1362
1
    return false;
1363
85
1364
85
  // Check to see if the 2nd operand is a constant that we can encode directly
1365
85
  // in the compare.
1366
85
  int Imm = 0;
1367
85
  bool UseImm = false;
1368
85
  bool isNegativeImm = false;
1369
85
  // FIXME: At -O0 we don't have anything that canonicalizes operand order.
1370
85
  // Thus, Src1Value may be a ConstantInt, but we're missing it.
1371
85
  if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(Src2Value)) {
1372
41
    if (SrcVT == MVT::i32 || 
SrcVT == MVT::i1615
||
SrcVT == MVT::i89
||
1373
41
        
SrcVT == MVT::i10
) {
1374
41
      const APInt &CIVal = ConstInt->getValue();
1375
41
      Imm = (isZExt) ? 
(int)CIVal.getZExtValue()4
:
(int)CIVal.getSExtValue()37
;
1376
41
      // For INT_MIN/LONG_MIN (i.e., 0x80000000) we need to use a cmp, rather
1377
41
      // then a cmn, because there is no way to represent 2147483648 as a
1378
41
      // signed 32-bit int.
1379
41
      if (Imm < 0 && 
Imm != (int)0x8000000012
) {
1380
9
        isNegativeImm = true;
1381
9
        Imm = -Imm;
1382
9
      }
1383
41
      UseImm = isThumb2 ? 
(ARM_AM::getT2SOImmVal(Imm) != -1)16
:
1384
41
        
(ARM_AM::getSOImmVal(Imm) != -1)25
;
1385
41
    }
1386
44
  } else if (const ConstantFP *ConstFP = dyn_cast<ConstantFP>(Src2Value)) {
1387
22
    if (SrcVT == MVT::f32 || 
SrcVT == MVT::f646
)
1388
22
      if (ConstFP->isZero() && 
!ConstFP->isNegative()20
)
1389
14
        UseImm = true;
1390
22
  }
1391
85
1392
85
  unsigned CmpOpc;
1393
85
  bool isICmp = true;
1394
85
  bool needsExt = false;
1395
85
  switch (SrcVT.SimpleTy) {
1396
85
    
default: return false0
;
1397
85
    // TODO: Verify compares.
1398
85
    case MVT::f32:
1399
16
      isICmp = false;
1400
16
      // Equality comparisons shouldn't raise Invalid on uordered inputs.
1401
16
      if (isEquality)
1402
14
        CmpOpc = UseImm ? 
ARM::VCMPZS11
:
ARM::VCMPS3
;
1403
2
      else
1404
2
        CmpOpc = UseImm ? 
ARM::VCMPEZS0
: ARM::VCMPES;
1405
16
      break;
1406
85
    case MVT::f64:
1407
6
      isICmp = false;
1408
6
      // Equality comparisons shouldn't raise Invalid on uordered inputs.
1409
6
      if (isEquality)
1410
6
        CmpOpc = UseImm ? 
ARM::VCMPZD3
:
ARM::VCMPD3
;
1411
0
      else
1412
0
      CmpOpc = UseImm ? ARM::VCMPEZD : ARM::VCMPED;
1413
6
      break;
1414
85
    case MVT::i1:
1415
30
    case MVT::i8:
1416
30
    case MVT::i16:
1417
30
      needsExt = true;
1418
30
      LLVM_FALLTHROUGH;
1419
63
    case MVT::i32:
1420
63
      if (isThumb2) {
1421
22
        if (!UseImm)
1422
7
          CmpOpc = ARM::t2CMPrr;
1423
15
        else
1424
15
          CmpOpc = isNegativeImm ? 
ARM::t2CMNri3
:
ARM::t2CMPri12
;
1425
41
      } else {
1426
41
        if (!UseImm)
1427
16
          CmpOpc = ARM::CMPrr;
1428
25
        else
1429
25
          CmpOpc = isNegativeImm ? 
ARM::CMNri6
:
ARM::CMPri19
;
1430
41
      }
1431
63
      break;
1432
85
  }
1433
85
1434
85
  unsigned SrcReg1 = getRegForValue(Src1Value);
1435
85
  if (SrcReg1 == 0) 
return false0
;
1436
85
1437
85
  unsigned SrcReg2 = 0;
1438
85
  if (!UseImm) {
1439
31
    SrcReg2 = getRegForValue(Src2Value);
1440
31
    if (SrcReg2 == 0) 
return false0
;
1441
85
  }
1442
85
1443
85
  // We have i1, i8, or i16, we need to either zero extend or sign extend.
1444
85
  if (needsExt) {
1445
30
    SrcReg1 = ARMEmitIntExt(SrcVT, SrcReg1, MVT::i32, isZExt);
1446
30
    if (SrcReg1 == 0) 
return false0
;
1447
30
    if (!UseImm) {
1448
15
      SrcReg2 = ARMEmitIntExt(SrcVT, SrcReg2, MVT::i32, isZExt);
1449
15
      if (SrcReg2 == 0) 
return false0
;
1450
85
    }
1451
30
  }
1452
85
1453
85
  const MCInstrDesc &II = TII.get(CmpOpc);
1454
85
  SrcReg1 = constrainOperandRegClass(II, SrcReg1, 0);
1455
85
  if (!UseImm) {
1456
31
    SrcReg2 = constrainOperandRegClass(II, SrcReg2, 1);
1457
31
    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
1458
31
                    .addReg(SrcReg1).addReg(SrcReg2));
1459
54
  } else {
1460
54
    MachineInstrBuilder MIB;
1461
54
    MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
1462
54
      .addReg(SrcReg1);
1463
54
1464
54
    // Only add immediate for icmp as the immediate for fcmp is an implicit 0.0.
1465
54
    if (isICmp)
1466
40
      MIB.addImm(Imm);
1467
54
    AddOptionalDefs(MIB);
1468
54
  }
1469
85
1470
85
  // For floating point we need to move the result to a comparison register
1471
85
  // that we can then use for branches.
1472
85
  if (Ty->isFloatTy() || 
Ty->isDoubleTy()69
)
1473
22
    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1474
22
                            TII.get(ARM::FMSTAT)));
1475
85
  return true;
1476
85
}
1477
1478
26
bool ARMFastISel::SelectCmp(const Instruction *I) {
1479
26
  const CmpInst *CI = cast<CmpInst>(I);
1480
26
1481
26
  // Get the compare predicate.
1482
26
  ARMCC::CondCodes ARMPred = getComparePred(CI->getPredicate());
1483
26
1484
26
  // We may not handle every CC for now.
1485
26
  if (ARMPred == ARMCC::AL) 
return false0
;
1486
26
1487
26
  // Emit the compare.
1488
26
  if (!ARMEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned(),
1489
26
                  CI->isEquality()))
1490
1
    return false;
1491
25
1492
25
  // Now set a register based on the comparison. Explicitly set the predicates
1493
25
  // here.
1494
25
  unsigned MovCCOpc = isThumb2 ? 
ARM::t2MOVCCi9
:
ARM::MOVCCi16
;
1495
25
  const TargetRegisterClass *RC = isThumb2 ? 
&ARM::rGPRRegClass9
1496
25
                                           : 
&ARM::GPRRegClass16
;
1497
25
  unsigned DestReg = createResultReg(RC);
1498
25
  Constant *Zero = ConstantInt::get(Type::getInt32Ty(*Context), 0);
1499
25
  unsigned ZeroReg = fastMaterializeConstant(Zero);
1500
25
  // ARMEmitCmp emits a FMSTAT when necessary, so it's always safe to use CPSR.
1501
25
  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(MovCCOpc), DestReg)
1502
25
          .addReg(ZeroReg).addImm(1)
1503
25
          .addImm(ARMPred).addReg(ARM::CPSR);
1504
25
1505
25
  updateValueMap(I, DestReg);
1506
25
  return true;
1507
25
}
1508
1509
1
bool ARMFastISel::SelectFPExt(const Instruction *I) {
1510
1
  // Make sure we have VFP and that we're extending float to double.
1511
1
  if (!Subtarget->hasVFP2Base() || !Subtarget->hasFP64()) return false;
1512
0
1513
0
  Value *V = I->getOperand(0);
1514
0
  if (!I->getType()->isDoubleTy() ||
1515
0
      !V->getType()->isFloatTy()) return false;
1516
0
1517
0
  unsigned Op = getRegForValue(V);
1518
0
  if (Op == 0) return false;
1519
0
1520
0
  unsigned Result = createResultReg(&ARM::DPRRegClass);
1521
0
  AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1522
0
                          TII.get(ARM::VCVTDS), Result)
1523
0
                  .addReg(Op));
1524
0
  updateValueMap(I, Result);
1525
0
  return true;
1526
0
}
1527
1528
1
bool ARMFastISel::SelectFPTrunc(const Instruction *I) {
1529
1
  // Make sure we have VFP and that we're truncating double to float.
1530
1
  if (!Subtarget->hasVFP2Base() || !Subtarget->hasFP64()) return false;
1531
0
1532
0
  Value *V = I->getOperand(0);
1533
0
  if (!(I->getType()->isFloatTy() &&
1534
0
        V->getType()->isDoubleTy())) return false;
1535
0
1536
0
  unsigned Op = getRegForValue(V);
1537
0
  if (Op == 0) return false;
1538
0
1539
0
  unsigned Result = createResultReg(&ARM::SPRRegClass);
1540
0
  AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1541
0
                          TII.get(ARM::VCVTSD), Result)
1542
0
                  .addReg(Op));
1543
0
  updateValueMap(I, Result);
1544
0
  return true;
1545
0
}
1546
1547
37
bool ARMFastISel::SelectIToFP(const Instruction *I, bool isSigned) {
1548
37
  // Make sure we have VFP.
1549
37
  if (!Subtarget->hasVFP2Base()) 
return false0
;
1550
37
1551
37
  MVT DstVT;
1552
37
  Type *Ty = I->getType();
1553
37
  if (!isTypeLegal(Ty, DstVT))
1554
0
    return false;
1555
37
1556
37
  Value *Src = I->getOperand(0);
1557
37
  EVT SrcEVT = TLI.getValueType(DL, Src->getType(), true);
1558
37
  if (!SrcEVT.isSimple())
1559
0
    return false;
1560
37
  MVT SrcVT = SrcEVT.getSimpleVT();
1561
37
  if (SrcVT != MVT::i32 && 
SrcVT != MVT::i1624
&&
SrcVT != MVT::i812
)
1562
0
    return false;
1563
37
1564
37
  unsigned SrcReg = getRegForValue(Src);
1565
37
  if (SrcReg == 0) 
return false0
;
1566
37
1567
37
  // Handle sign-extension.
1568
37
  if (SrcVT == MVT::i16 || 
SrcVT == MVT::i825
) {
1569
24
    SrcReg = ARMEmitIntExt(SrcVT, SrcReg, MVT::i32,
1570
24
                                       /*isZExt*/!isSigned);
1571
24
    if (SrcReg == 0) 
return false0
;
1572
37
  }
1573
37
1574
37
  // The conversion routine works on fp-reg to fp-reg and the operand above
1575
37
  // was an integer, move it to the fp registers if possible.
1576
37
  unsigned FP = ARMMoveToFPReg(MVT::f32, SrcReg);
1577
37
  if (FP == 0) 
return false0
;
1578
37
1579
37
  unsigned Opc;
1580
37
  if (Ty->isFloatTy()) 
Opc = isSigned 18
?
ARM::VSITOS9
:
ARM::VUITOS9
;
1581
19
  else if (Ty->isDoubleTy() && Subtarget->hasFP64())
1582
18
    Opc = isSigned ? 
ARM::VSITOD9
:
ARM::VUITOD9
;
1583
1
  else return false;
1584
36
1585
36
  unsigned ResultReg = createResultReg(TLI.getRegClassFor(DstVT));
1586
36
  AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1587
36
                          TII.get(Opc), ResultReg).addReg(FP));
1588
36
  updateValueMap(I, ResultReg);
1589
36
  return true;
1590
36
}
1591
1592
13
bool ARMFastISel::SelectFPToI(const Instruction *I, bool isSigned) {
1593
13
  // Make sure we have VFP.
1594
13
  if (!Subtarget->hasVFP2Base()) 
return false0
;
1595
13
1596
13
  MVT DstVT;
1597
13
  Type *RetTy = I->getType();
1598
13
  if (!isTypeLegal(RetTy, DstVT))
1599
0
    return false;
1600
13
1601
13
  unsigned Op = getRegForValue(I->getOperand(0));
1602
13
  if (Op == 0) 
return false0
;
1603
13
1604
13
  unsigned Opc;
1605
13
  Type *OpTy = I->getOperand(0)->getType();
1606
13
  if (OpTy->isFloatTy()) 
Opc = isSigned 6
?
ARM::VTOSIZS3
:
ARM::VTOUIZS3
;
1607
7
  else if (OpTy->isDoubleTy() && Subtarget->hasFP64())
1608
6
    Opc = isSigned ? 
ARM::VTOSIZD3
:
ARM::VTOUIZD3
;
1609
1
  else return false;
1610
12
1611
12
  // f64->s32/u32 or f32->s32/u32 both need an intermediate f32 reg.
1612
12
  unsigned ResultReg = createResultReg(TLI.getRegClassFor(MVT::f32));
1613
12
  AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1614
12
                          TII.get(Opc), ResultReg).addReg(Op));
1615
12
1616
12
  // This result needs to be in an integer register, but the conversion only
1617
12
  // takes place in fp-regs.
1618
12
  unsigned IntReg = ARMMoveToIntReg(DstVT, ResultReg);
1619
12
  if (IntReg == 0) 
return false0
;
1620
12
1621
12
  updateValueMap(I, IntReg);
1622
12
  return true;
1623
12
}
1624
1625
24
bool ARMFastISel::SelectSelect(const Instruction *I) {
1626
24
  MVT VT;
1627
24
  if (!isTypeLegal(I->getType(), VT))
1628
0
    return false;
1629
24
1630
24
  // Things need to be register sized for register moves.
1631
24
  if (VT != MVT::i32) 
return false0
;
1632
24
1633
24
  unsigned CondReg = getRegForValue(I->getOperand(0));
1634
24
  if (CondReg == 0) 
return false0
;
1635
24
  unsigned Op1Reg = getRegForValue(I->getOperand(1));
1636
24
  if (Op1Reg == 0) 
return false0
;
1637
24
1638
24
  // Check to see if we can use an immediate in the conditional move.
1639
24
  int Imm = 0;
1640
24
  bool UseImm = false;
1641
24
  bool isNegativeImm = false;
1642
24
  if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(I->getOperand(2))) {
1643
20
    assert(VT == MVT::i32 && "Expecting an i32.");
1644
20
    Imm = (int)ConstInt->getValue().getZExtValue();
1645
20
    if (Imm < 0) {
1646
12
      isNegativeImm = true;
1647
12
      Imm = ~Imm;
1648
12
    }
1649
20
    UseImm = isThumb2 ? 
(ARM_AM::getT2SOImmVal(Imm) != -1)10
:
1650
20
      
(ARM_AM::getSOImmVal(Imm) != -1)10
;
1651
20
  }
1652
24
1653
24
  unsigned Op2Reg = 0;
1654
24
  if (!UseImm) {
1655
4
    Op2Reg = getRegForValue(I->getOperand(2));
1656
4
    if (Op2Reg == 0) 
return false0
;
1657
24
  }
1658
24
1659
24
  unsigned TstOpc = isThumb2 ? 
ARM::t2TSTri12
:
ARM::TSTri12
;
1660
24
  CondReg = constrainOperandRegClass(TII.get(TstOpc), CondReg, 0);
1661
24
  AddOptionalDefs(
1662
24
      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TstOpc))
1663
24
          .addReg(CondReg)
1664
24
          .addImm(1));
1665
24
1666
24
  unsigned MovCCOpc;
1667
24
  const TargetRegisterClass *RC;
1668
24
  if (!UseImm) {
1669
4
    RC = isThumb2 ? 
&ARM::tGPRRegClass2
:
&ARM::GPRRegClass2
;
1670
4
    MovCCOpc = isThumb2 ? 
ARM::t2MOVCCr2
:
ARM::MOVCCr2
;
1671
20
  } else {
1672
20
    RC = isThumb2 ? 
&ARM::rGPRRegClass10
:
&ARM::GPRRegClass10
;
1673
20
    if (!isNegativeImm)
1674
8
      MovCCOpc = isThumb2 ? 
ARM::t2MOVCCi4
:
ARM::MOVCCi4
;
1675
12
    else
1676
12
      MovCCOpc = isThumb2 ? 
ARM::t2MVNCCi6
:
ARM::MVNCCi6
;
1677
20
  }
1678
24
  unsigned ResultReg = createResultReg(RC);
1679
24
  if (!UseImm) {
1680
4
    Op2Reg = constrainOperandRegClass(TII.get(MovCCOpc), Op2Reg, 1);
1681
4
    Op1Reg = constrainOperandRegClass(TII.get(MovCCOpc), Op1Reg, 2);
1682
4
    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(MovCCOpc),
1683
4
            ResultReg)
1684
4
        .addReg(Op2Reg)
1685
4
        .addReg(Op1Reg)
1686
4
        .addImm(ARMCC::NE)
1687
4
        .addReg(ARM::CPSR);
1688
20
  } else {
1689
20
    Op1Reg = constrainOperandRegClass(TII.get(MovCCOpc), Op1Reg, 1);
1690
20
    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(MovCCOpc),
1691
20
            ResultReg)
1692
20
        .addReg(Op1Reg)
1693
20
        .addImm(Imm)
1694
20
        .addImm(ARMCC::EQ)
1695
20
        .addReg(ARM::CPSR);
1696
20
  }
1697
24
  updateValueMap(I, ResultReg);
1698
24
  return true;
1699
24
}
1700
1701
14
bool ARMFastISel::SelectDiv(const Instruction *I, bool isSigned) {
1702
14
  MVT VT;
1703
14
  Type *Ty = I->getType();
1704
14
  if (!isTypeLegal(Ty, VT))
1705
0
    return false;
1706
14
1707
14
  // If we have integer div support we should have selected this automagically.
1708
14
  // In case we have a real miss go ahead and return false and we'll pick
1709
14
  // it up later.
1710
14
  if (Subtarget->hasDivideInThumbMode())
1711
0
    return false;
1712
14
1713
14
  // Otherwise emit a libcall.
1714
14
  RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
1715
14
  if (VT == MVT::i8)
1716
0
    LC = isSigned ? RTLIB::SDIV_I8 : RTLIB::UDIV_I8;
1717
14
  else if (VT == MVT::i16)
1718
0
    LC = isSigned ? RTLIB::SDIV_I16 : RTLIB::UDIV_I16;
1719
14
  else if (VT == MVT::i32)
1720
14
    LC = isSigned ? 
RTLIB::SDIV_I324
:
RTLIB::UDIV_I3210
;
1721
0
  else if (VT == MVT::i64)
1722
0
    LC = isSigned ? RTLIB::SDIV_I64 : RTLIB::UDIV_I64;
1723
0
  else if (VT == MVT::i128)
1724
0
    LC = isSigned ? RTLIB::SDIV_I128 : RTLIB::UDIV_I128;
1725
14
  assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SDIV!");
1726
14
1727
14
  return ARMEmitLibcall(I, LC);
1728
14
}
1729
1730
36
bool ARMFastISel::SelectRem(const Instruction *I, bool isSigned) {
1731
36
  MVT VT;
1732
36
  Type *Ty = I->getType();
1733
36
  if (!isTypeLegal(Ty, VT))
1734
4
    return false;
1735
32
1736
32
  // Many ABIs do not provide a libcall for standalone remainder, so we need to
1737
32
  // use divrem (see the RTABI 4.3.1). Since FastISel can't handle non-double
1738
32
  // multi-reg returns, we'll have to bail out.
1739
32
  if (!TLI.hasStandaloneRem(VT)) {
1740
21
    return false;
1741
21
  }
1742
11
1743
11
  RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
1744
11
  if (VT == MVT::i8)
1745
0
    LC = isSigned ? RTLIB::SREM_I8 : RTLIB::UREM_I8;
1746
11
  else if (VT == MVT::i16)
1747
0
    LC = isSigned ? RTLIB::SREM_I16 : RTLIB::UREM_I16;
1748
11
  else if (VT == MVT::i32)
1749
11
    LC = isSigned ? 
RTLIB::SREM_I329
:
RTLIB::UREM_I322
;
1750
0
  else if (VT == MVT::i64)
1751
0
    LC = isSigned ? RTLIB::SREM_I64 : RTLIB::UREM_I64;
1752
0
  else if (VT == MVT::i128)
1753
0
    LC = isSigned ? RTLIB::SREM_I128 : RTLIB::UREM_I128;
1754
11
  assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SREM!");
1755
11
1756
11
  return ARMEmitLibcall(I, LC);
1757
11
}
1758
1759
37
bool ARMFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) {
1760
37
  EVT DestVT = TLI.getValueType(DL, I->getType(), true);
1761
37
1762
37
  // We can get here in the case when we have a binary operation on a non-legal
1763
37
  // type and the target independent selector doesn't know how to handle it.
1764
37
  if (DestVT != MVT::i16 && 
DestVT != MVT::i820
&&
DestVT != MVT::i17
)
1765
1
    return false;
1766
36
1767
36
  unsigned Opc;
1768
36
  switch (ISDOpcode) {
1769
36
    
default: return false0
;
1770
36
    case ISD::ADD:
1771
21
      Opc = isThumb2 ? 
ARM::t2ADDrr3
:
ARM::ADDrr18
;
1772
21
      break;
1773
36
    case ISD::OR:
1774
6
      Opc = isThumb2 ? 
ARM::t2ORRrr2
:
ARM::ORRrr4
;
1775
6
      break;
1776
36
    case ISD::SUB:
1777
9
      Opc = isThumb2 ? 
ARM::t2SUBrr3
:
ARM::SUBrr6
;
1778
9
      break;
1779
36
  }
1780
36
1781
36
  unsigned SrcReg1 = getRegForValue(I->getOperand(0));
1782
36
  if (SrcReg1 == 0) 
return false0
;
1783
36
1784
36
  // TODO: Often the 2nd operand is an immediate, which can be encoded directly
1785
36
  // in the instruction, rather then materializing the value in a register.
1786
36
  unsigned SrcReg2 = getRegForValue(I->getOperand(1));
1787
36
  if (SrcReg2 == 0) 
return false0
;
1788
36
1789
36
  unsigned ResultReg = createResultReg(&ARM::GPRnopcRegClass);
1790
36
  SrcReg1 = constrainOperandRegClass(TII.get(Opc), SrcReg1, 1);
1791
36
  SrcReg2 = constrainOperandRegClass(TII.get(Opc), SrcReg2, 2);
1792
36
  AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1793
36
                          TII.get(Opc), ResultReg)
1794
36
                  .addReg(SrcReg1).addReg(SrcReg2));
1795
36
  updateValueMap(I, ResultReg);
1796
36
  return true;
1797
36
}
1798
1799
2
bool ARMFastISel::SelectBinaryFPOp(const Instruction *I, unsigned ISDOpcode) {
1800
2
  EVT FPVT = TLI.getValueType(DL, I->getType(), true);
1801
2
  if (!FPVT.isSimple()) 
return false0
;
1802
2
  MVT VT = FPVT.getSimpleVT();
1803
2
1804
2
  // FIXME: Support vector types where possible.
1805
2
  if (VT.isVector())
1806
1
    return false;
1807
1
1808
1
  // We can get here in the case when we want to use NEON for our fp
1809
1
  // operations, but can't figure out how to. Just use the vfp instructions
1810
1
  // if we have them.
1811
1
  // FIXME: It'd be nice to use NEON instructions.
1812
1
  Type *Ty = I->getType();
1813
1
  if (Ty->isFloatTy() && 
!Subtarget->hasVFP2Base()0
)
1814
0
    return false;
1815
1
  if (Ty->isDoubleTy() && (!Subtarget->hasVFP2Base() || !Subtarget->hasFP64()))
1816
1
    return false;
1817
0
1818
0
  unsigned Opc;
1819
0
  bool is64bit = VT == MVT::f64 || VT == MVT::i64;
1820
0
  switch (ISDOpcode) {
1821
0
    default: return false;
1822
0
    case ISD::FADD:
1823
0
      Opc = is64bit ? ARM::VADDD : ARM::VADDS;
1824
0
      break;
1825
0
    case ISD::FSUB:
1826
0
      Opc = is64bit ? ARM::VSUBD : ARM::VSUBS;
1827
0
      break;
1828
0
    case ISD::FMUL:
1829
0
      Opc = is64bit ? ARM::VMULD : ARM::VMULS;
1830
0
      break;
1831
0
  }
1832
0
  unsigned Op1 = getRegForValue(I->getOperand(0));
1833
0
  if (Op1 == 0) return false;
1834
0
1835
0
  unsigned Op2 = getRegForValue(I->getOperand(1));
1836
0
  if (Op2 == 0) return false;
1837
0
1838
0
  unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT.SimpleTy));
1839
0
  AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1840
0
                          TII.get(Opc), ResultReg)
1841
0
                  .addReg(Op1).addReg(Op2));
1842
0
  updateValueMap(I, ResultReg);
1843
0
  return true;
1844
0
}
1845
1846
// Call Handling Code
1847
1848
// This is largely taken directly from CCAssignFnForNode
1849
// TODO: We may not support all of this.
1850
CCAssignFn *ARMFastISel::CCAssignFnForCall(CallingConv::ID CC,
1851
                                           bool Return,
1852
1.16k
                                           bool isVarArg) {
1853
1.16k
  switch (CC) {
1854
1.16k
  default:
1855
0
    report_fatal_error("Unsupported calling convention");
1856
1.16k
  case CallingConv::Fast:
1857
7
    if (Subtarget->hasVFP2Base() && !isVarArg) {
1858
7
      if (!Subtarget->isAAPCS_ABI())
1859
5
        return (Return ? 
RetFastCC_ARM_APCS0
: FastCC_ARM_APCS);
1860
2
      // For AAPCS ABI targets, just use VFP variant of the calling convention.
1861
2
      return (Return ? 
RetCC_ARM_AAPCS_VFP0
: CC_ARM_AAPCS_VFP);
1862
2
    }
1863
0
    LLVM_FALLTHROUGH;
1864
1.11k
  case CallingConv::C:
1865
1.11k
  case CallingConv::CXX_FAST_TLS:
1866
1.11k
    // Use target triple & subtarget features to do actual dispatch.
1867
1.11k
    if (Subtarget->isAAPCS_ABI()) {
1868
300
      if (Subtarget->hasVFP2Base() &&
1869
300
          
TM.Options.FloatABIType == FloatABI::Hard257
&&
!isVarArg14
)
1870
12
        return (Return ? RetCC_ARM_AAPCS_VFP: 
CC_ARM_AAPCS_VFP0
);
1871
288
      else
1872
288
        return (Return ? 
RetCC_ARM_AAPCS198
:
CC_ARM_AAPCS90
);
1873
814
    } else {
1874
814
      return (Return ? 
RetCC_ARM_APCS452
:
CC_ARM_APCS362
);
1875
814
    }
1876
33
  case CallingConv::ARM_AAPCS_VFP:
1877
33
  case CallingConv::Swift:
1878
33
    if (!isVarArg)
1879
31
      return (Return ? 
RetCC_ARM_AAPCS_VFP23
:
CC_ARM_AAPCS_VFP8
);
1880
2
    // Fall through to soft float variant, variadic functions don't
1881
2
    // use hard floating point ABI.
1882
2
    LLVM_FALLTHROUGH;
1883
11
  case CallingConv::ARM_AAPCS:
1884
11
    return (Return ? 
RetCC_ARM_AAPCS6
:
CC_ARM_AAPCS5
);
1885
2
  case CallingConv::ARM_APCS:
1886
1
    return (Return ? RetCC_ARM_APCS: 
CC_ARM_APCS0
);
1887
2
  case CallingConv::GHC:
1888
0
    if (Return)
1889
0
      report_fatal_error("Can't return in GHC call convention");
1890
0
    else
1891
0
      return CC_ARM_APCS_GHC;
1892
1.16k
  }
1893
1.16k
}
1894
1895
bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args,
1896
                                  SmallVectorImpl<unsigned> &ArgRegs,
1897
                                  SmallVectorImpl<MVT> &ArgVTs,
1898
                                  SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags,
1899
                                  SmallVectorImpl<unsigned> &RegArgs,
1900
                                  CallingConv::ID CC,
1901
                                  unsigned &NumBytes,
1902
472
                                  bool isVarArg) {
1903
472
  SmallVector<CCValAssign, 16> ArgLocs;
1904
472
  CCState CCInfo(CC, isVarArg, *FuncInfo.MF, ArgLocs, *Context);
1905
472
  CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags,
1906
472
                             CCAssignFnForCall(CC, false, isVarArg));
1907
472
1908
472
  // Check that we can handle all of the arguments. If we can't, then bail out
1909
472
  // now before we add code to the MBB.
1910
1.76k
  for (unsigned i = 0, e = ArgLocs.size(); i != e; 
++i1.29k
) {
1911
1.30k
    CCValAssign &VA = ArgLocs[i];
1912
1.30k
    MVT ArgVT = ArgVTs[VA.getValNo()];
1913
1.30k
1914
1.30k
    // We don't handle NEON/vector parameters yet.
1915
1.30k
    if (ArgVT.isVector() || 
ArgVT.getSizeInBits() > 641.29k
)
1916
5
      return false;
1917
1.29k
1918
1.29k
    // Now copy/store arg to correct locations.
1919
1.29k
    if (VA.isRegLoc() && 
!VA.needsCustom()734
) {
1920
733
      continue;
1921
733
    } else 
if (562
VA.needsCustom()562
) {
1922
1
      // TODO: We need custom lowering for vector (v2f64) args.
1923
1
      if (VA.getLocVT() != MVT::f64 ||
1924
1
          // TODO: Only handle register args for now.
1925
1
          !VA.isRegLoc() || !ArgLocs[++i].isRegLoc())
1926
1
        return false;
1927
561
    } else {
1928
561
      switch (ArgVT.SimpleTy) {
1929
561
      default:
1930
0
        return false;
1931
561
      case MVT::i1:
1932
140
      case MVT::i8:
1933
140
      case MVT::i16:
1934
140
      case MVT::i32:
1935
140
        break;
1936
140
      case MVT::f32:
1937
4
        if (!Subtarget->hasVFP2Base())
1938
0
          return false;
1939
4
        break;
1940
417
      case MVT::f64:
1941
417
        if (!Subtarget->hasVFP2Base())
1942
0
          return false;
1943
417
        break;
1944
561
      }
1945
561
    }
1946
1.29k
  }
1947
472
1948
472
  // At the point, we are able to handle the call's arguments in fast isel.
1949
472
1950
472
  // Get a count of how many bytes are to be pushed on the stack.
1951
472
  NumBytes = CCInfo.getNextStackOffset();
1952
466
1953
466
  // Issue CALLSEQ_START
1954
466
  unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
1955
466
  AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1956
466
                          TII.get(AdjStackDown))
1957
466
                  .addImm(NumBytes).addImm(0));
1958
466
1959
466
  // Process the args.
1960
1.75k
  for (unsigned i = 0, e = ArgLocs.size(); i != e; 
++i1.28k
) {
1961
1.28k
    CCValAssign &VA = ArgLocs[i];
1962
1.28k
    const Value *ArgVal = Args[VA.getValNo()];
1963
1.28k
    unsigned Arg = ArgRegs[VA.getValNo()];
1964
1.28k
    MVT ArgVT = ArgVTs[VA.getValNo()];
1965
1.28k
1966
1.28k
    assert((!ArgVT.isVector() && ArgVT.getSizeInBits() <= 64) &&
1967
1.28k
           "We don't handle NEON/vector parameters yet.");
1968
1.28k
1969
1.28k
    // Handle arg promotion, etc.
1970
1.28k
    switch (VA.getLocInfo()) {
1971
1.28k
      
case CCValAssign::Full: break1.07k
;
1972
1.28k
      case CCValAssign::SExt: {
1973
18
        MVT DestVT = VA.getLocVT();
1974
18
        Arg = ARMEmitIntExt(ArgVT, Arg, DestVT, /*isZExt*/false);
1975
18
        assert(Arg != 0 && "Failed to emit a sext");
1976
18
        ArgVT = DestVT;
1977
18
        break;
1978
1.28k
      }
1979
1.28k
      case CCValAssign::AExt:
1980
115
      // Intentional fall-through.  Handle AExt and ZExt.
1981
115
      case CCValAssign::ZExt: {
1982
115
        MVT DestVT = VA.getLocVT();
1983
115
        Arg = ARMEmitIntExt(ArgVT, Arg, DestVT, /*isZExt*/true);
1984
115
        assert(Arg != 0 && "Failed to emit a zext");
1985
115
        ArgVT = DestVT;
1986
115
        break;
1987
115
      }
1988
115
      case CCValAssign::BCvt: {
1989
81
        unsigned BC = fastEmit_r(ArgVT, VA.getLocVT(), ISD::BITCAST, Arg,
1990
81
                                 /*TODO: Kill=*/false);
1991
81
        assert(BC != 0 && "Failed to emit a bitcast!");
1992
81
        Arg = BC;
1993
81
        ArgVT = VA.getLocVT();
1994
81
        break;
1995
115
      }
1996
115
      
default: 0
llvm_unreachable0
("Unknown arg promotion!");
1997
1.28k
    }
1998
1.28k
1999
1.28k
    // Now copy/store arg to correct locations.
2000
1.28k
    if (VA.isRegLoc() && 
!VA.needsCustom()726
) {
2001
726
      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2002
726
              TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(Arg);
2003
726
      RegArgs.push_back(VA.getLocReg());
2004
726
    } else 
if (559
VA.needsCustom()559
) {
2005
0
      // TODO: We need custom lowering for vector (v2f64) args.
2006
0
      assert(VA.getLocVT() == MVT::f64 &&
2007
0
             "Custom lowering for v2f64 args not available");
2008
0
2009
0
      // FIXME: ArgLocs[++i] may extend beyond ArgLocs.size()
2010
0
      CCValAssign &NextVA = ArgLocs[++i];
2011
0
2012
0
      assert(VA.isRegLoc() && NextVA.isRegLoc() &&
2013
0
             "We only handle register args!");
2014
0
2015
0
      AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2016
0
                              TII.get(ARM::VMOVRRD), VA.getLocReg())
2017
0
                      .addReg(NextVA.getLocReg(), RegState::Define)
2018
0
                      .addReg(Arg));
2019
0
      RegArgs.push_back(VA.getLocReg());
2020
0
      RegArgs.push_back(NextVA.getLocReg());
2021
559
    } else {
2022
559
      assert(VA.isMemLoc());
2023
559
      // Need to store on the stack.
2024
559
2025
559
      // Don't emit stores for undef values.
2026
559
      if (isa<UndefValue>(ArgVal))
2027
9
        continue;
2028
550
2029
550
      Address Addr;
2030
550
      Addr.BaseType = Address::RegBase;
2031
550
      Addr.Base.Reg = ARM::SP;
2032
550
      Addr.Offset = VA.getLocMemOffset();
2033
550
2034
550
      bool EmitRet = ARMEmitStore(ArgVT, Arg, Addr); (void)EmitRet;
2035
550
      assert(EmitRet && "Could not emit a store for argument!");
2036
550
    }
2037
1.28k
  }
2038
466
2039
466
  return true;
2040
466
}
2041
2042
bool ARMFastISel::FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
2043
                             const Instruction *I, CallingConv::ID CC,
2044
466
                             unsigned &NumBytes, bool isVarArg) {
2045
466
  // Issue CALLSEQ_END
2046
466
  unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
2047
466
  AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2048
466
                          TII.get(AdjStackUp))
2049
466
                  .addImm(NumBytes).addImm(0));
2050
466
2051
466
  // Now the return value.
2052
466
  if (RetVT != MVT::isVoid) {
2053
233
    SmallVector<CCValAssign, 16> RVLocs;
2054
233
    CCState CCInfo(CC, isVarArg, *FuncInfo.MF, RVLocs, *Context);
2055
233
    CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC, true, isVarArg));
2056
233
2057
233
    // Copy all of the result registers out of their specified physreg.
2058
233
    if (RVLocs.size() == 2 && 
RetVT == MVT::f640
) {
2059
0
      // For this move we copy into two registers and then move into the
2060
0
      // double fp reg we want.
2061
0
      MVT DestVT = RVLocs[0].getValVT();
2062
0
      const TargetRegisterClass* DstRC = TLI.getRegClassFor(DestVT);
2063
0
      unsigned ResultReg = createResultReg(DstRC);
2064
0
      AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2065
0
                              TII.get(ARM::VMOVDRR), ResultReg)
2066
0
                      .addReg(RVLocs[0].getLocReg())
2067
0
                      .addReg(RVLocs[1].getLocReg()));
2068
0
2069
0
      UsedRegs.push_back(RVLocs[0].getLocReg());
2070
0
      UsedRegs.push_back(RVLocs[1].getLocReg());
2071
0
2072
0
      // Finally update the result.
2073
0
      updateValueMap(I, ResultReg);
2074
233
    } else {
2075
233
      assert(RVLocs.size() == 1 &&"Can't handle non-double multi-reg retvals!");
2076
233
      MVT CopyVT = RVLocs[0].getValVT();
2077
233
2078
233
      // Special handling for extended integers.
2079
233
      if (RetVT == MVT::i1 || 
RetVT == MVT::i8224
||
RetVT == MVT::i16206
)
2080
45
        CopyVT = MVT::i32;
2081
233
2082
233
      const TargetRegisterClass* DstRC = TLI.getRegClassFor(CopyVT);
2083
233
2084
233
      unsigned ResultReg = createResultReg(DstRC);
2085
233
      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2086
233
              TII.get(TargetOpcode::COPY),
2087
233
              ResultReg).addReg(RVLocs[0].getLocReg());
2088
233
      UsedRegs.push_back(RVLocs[0].getLocReg());
2089
233
2090
233
      // Finally update the result.
2091
233
      updateValueMap(I, ResultReg);
2092
233
    }
2093
233
  }
2094
466
2095
466
  return true;
2096
466
}
2097
2098
901
bool ARMFastISel::SelectRet(const Instruction *I) {
2099
901
  const ReturnInst *Ret = cast<ReturnInst>(I);
2100
901
  const Function &F = *I->getParent()->getParent();
2101
901
2102
901
  if (!FuncInfo.CanLowerReturn)
2103
2
    return false;
2104
899
2105
899
  if (TLI.supportSwiftError() &&
2106
899
      F.getAttributes().hasAttrSomewhere(Attribute::SwiftError))
2107
12
    return false;
2108
887
2109
887
  if (TLI.supportSplitCSR(FuncInfo.MF))
2110
6
    return false;
2111
881
2112
881
  // Build a list of return value registers.
2113
881
  SmallVector<unsigned, 4> RetRegs;
2114
881
2115
881
  CallingConv::ID CC = F.getCallingConv();
2116
881
  if (Ret->getNumOperands() > 0) {
2117
456
    SmallVector<ISD::OutputArg, 4> Outs;
2118
456
    GetReturnInfo(CC, F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
2119
456
2120
456
    // Analyze operands of the call, assigning locations to each operand.
2121
456
    SmallVector<CCValAssign, 16> ValLocs;
2122
456
    CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext());
2123
456
    CCInfo.AnalyzeReturn(Outs, CCAssignFnForCall(CC, true /* is Ret */,
2124
456
                                                 F.isVarArg()));
2125
456
2126
456
    const Value *RV = Ret->getOperand(0);
2127
456
    unsigned Reg = getRegForValue(RV);
2128
456
    if (Reg == 0)
2129
26
      return false;
2130
430
2131
430
    // Only handle a single return value for now.
2132
430
    if (ValLocs.size() != 1)
2133
1
      return false;
2134
429
2135
429
    CCValAssign &VA = ValLocs[0];
2136
429
2137
429
    // Don't bother handling odd stuff for now.
2138
429
    if (VA.getLocInfo() != CCValAssign::Full)
2139
7
      return false;
2140
422
    // Only handle register returns for now.
2141
422
    if (!VA.isRegLoc())
2142
0
      return false;
2143
422
2144
422
    unsigned SrcReg = Reg + VA.getValNo();
2145
422
    EVT RVEVT = TLI.getValueType(DL, RV->getType());
2146
422
    if (!RVEVT.isSimple()) 
return false0
;
2147
422
    MVT RVVT = RVEVT.getSimpleVT();
2148
422
    MVT DestVT = VA.getValVT();
2149
422
    // Special handling for extended integers.
2150
422
    if (RVVT != DestVT) {
2151
99
      if (RVVT != MVT::i1 && 
RVVT != MVT::i888
&&
RVVT != MVT::i1662
)
2152
0
        return false;
2153
99
2154
99
      assert(DestVT == MVT::i32 && "ARM should always ext to i32");
2155
99
2156
99
      // Perform extension if flagged as either zext or sext.  Otherwise, do
2157
99
      // nothing.
2158
99
      if (Outs[0].Flags.isZExt() || 
Outs[0].Flags.isSExt()55
) {
2159
60
        SrcReg = ARMEmitIntExt(RVVT, SrcReg, DestVT, Outs[0].Flags.isZExt());
2160
60
        if (SrcReg == 0) 
return false0
;
2161
422
      }
2162
99
    }
2163
422
2164
422
    // Make the copy.
2165
422
    unsigned DstReg = VA.getLocReg();
2166
422
    const TargetRegisterClass* SrcRC = MRI.getRegClass(SrcReg);
2167
422
    // Avoid a cross-class copy. This is very unlikely.
2168
422
    if (!SrcRC->contains(DstReg))
2169
0
      return false;
2170
422
    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2171
422
            TII.get(TargetOpcode::COPY), DstReg).addReg(SrcReg);
2172
422
2173
422
    // Add register to return instruction.
2174
422
    RetRegs.push_back(VA.getLocReg());
2175
422
  }
2176
881
2177
881
  MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2178
847
                                    TII.get(Subtarget->getReturnOpcode()));
2179
847
  AddOptionalDefs(MIB);
2180
847
  for (unsigned R : RetRegs)
2181
422
    MIB.addReg(R, RegState::Implicit);
2182
847
  return true;
2183
881
}
2184
2185
466
unsigned ARMFastISel::ARMSelectCallOp(bool UseReg) {
2186
466
  if (UseReg)
2187
82
    return isThumb2 ? 
ARM::tBLXr30
:
ARM::BLX52
;
2188
384
  else
2189
384
    return isThumb2 ? 
ARM::tBL157
:
ARM::BL227
;
2190
466
}
2191
2192
12
unsigned ARMFastISel::getLibcallReg(const Twine &Name) {
2193
12
  // Manually compute the global's type to avoid building it when unnecessary.
2194
12
  Type *GVTy = Type::getInt32PtrTy(*Context, /*AS=*/0);
2195
12
  EVT LCREVT = TLI.getValueType(DL, GVTy);
2196
12
  if (!LCREVT.isSimple()) 
return 00
;
2197
12
2198
12
  GlobalValue *GV = new GlobalVariable(M, Type::getInt32Ty(*Context), false,
2199
12
                                       GlobalValue::ExternalLinkage, nullptr,
2200
12
                                       Name);
2201
12
  assert(GV->getType() == GVTy && "We miscomputed the type for the global!");
2202
12
  return ARMMaterializeGV(GV, LCREVT.getSimpleVT());
2203
12
}
2204
2205
// A quick function that will emit a call for a named libcall in F with the
2206
// vector of passed arguments for the Instruction in I. We can assume that we
2207
// can emit a call for any libcall we can produce. This is an abridged version
2208
// of the full call infrastructure since we won't need to worry about things
2209
// like computed function pointers or strange arguments at call sites.
2210
// TODO: Try to unify this and the normal call bits for ARM, then try to unify
2211
// with X86.
2212
25
bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) {
2213
25
  CallingConv::ID CC = TLI.getLibcallCallingConv(Call);
2214
25
2215
25
  // Handle *simple* calls for now.
2216
25
  Type *RetTy = I->getType();
2217
25
  MVT RetVT;
2218
25
  if (RetTy->isVoidTy())
2219
0
    RetVT = MVT::isVoid;
2220
25
  else if (!isTypeLegal(RetTy, RetVT))
2221
0
    return false;
2222
25
2223
25
  // Can't handle non-double multi-reg retvals.
2224
25
  if (RetVT != MVT::isVoid && RetVT != MVT::i32) {
2225
0
    SmallVector<CCValAssign, 16> RVLocs;
2226
0
    CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);
2227
0
    CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC, true, false));
2228
0
    if (RVLocs.size() >= 2 && RetVT != MVT::f64)
2229
0
      return false;
2230
25
  }
2231
25
2232
25
  // Set up the argument vectors.
2233
25
  SmallVector<Value*, 8> Args;
2234
25
  SmallVector<unsigned, 8> ArgRegs;
2235
25
  SmallVector<MVT, 8> ArgVTs;
2236
25
  SmallVector<ISD::ArgFlagsTy, 8> ArgFlags;
2237
25
  Args.reserve(I->getNumOperands());
2238
25
  ArgRegs.reserve(I->getNumOperands());
2239
25
  ArgVTs.reserve(I->getNumOperands());
2240
25
  ArgFlags.reserve(I->getNumOperands());
2241
50
  for (Value *Op :  I->operands()) {
2242
50
    unsigned Arg = getRegForValue(Op);
2243
50
    if (Arg == 0) 
return false0
;
2244
50
2245
50
    Type *ArgTy = Op->getType();
2246
50
    MVT ArgVT;
2247
50
    if (!isTypeLegal(ArgTy, ArgVT)) 
return false0
;
2248
50
2249
50
    ISD::ArgFlagsTy Flags;
2250
50
    unsigned OriginalAlignment = DL.getABITypeAlignment(ArgTy);
2251
50
    Flags.setOrigAlign(OriginalAlignment);
2252
50
2253
50
    Args.push_back(Op);
2254
50
    ArgRegs.push_back(Arg);
2255
50
    ArgVTs.push_back(ArgVT);
2256
50
    ArgFlags.push_back(Flags);
2257
50
  }
2258
25
2259
25
  // Handle the arguments now that we've gotten them.
2260
25
  SmallVector<unsigned, 4> RegArgs;
2261
25
  unsigned NumBytes;
2262
25
  if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags,
2263
25
                       RegArgs, CC, NumBytes, false))
2264
0
    return false;
2265
25
2266
25
  unsigned CalleeReg = 0;
2267
25
  if (Subtarget->genLongCalls()) {
2268
3
    CalleeReg = getLibcallReg(TLI.getLibcallName(Call));
2269
3
    if (CalleeReg == 0) 
return false0
;
2270
25
  }
2271
25
2272
25
  // Issue the call.
2273
25
  unsigned CallOpc = ARMSelectCallOp(Subtarget->genLongCalls());
2274
25
  MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
2275
25
                                    DbgLoc, TII.get(CallOpc));
2276
25
  // BL / BLX don't take a predicate, but tBL / tBLX do.
2277
25
  if (isThumb2)
2278
3
    MIB.add(predOps(ARMCC::AL));
2279
25
  if (Subtarget->genLongCalls())
2280
3
    MIB.addReg(CalleeReg);
2281
22
  else
2282
22
    MIB.addExternalSymbol(TLI.getLibcallName(Call));
2283
25
2284
25
  // Add implicit physical register uses to the call.
2285
25
  for (unsigned R : RegArgs)
2286
50
    MIB.addReg(R, RegState::Implicit);
2287
25
2288
25
  // Add a register mask with the call-preserved registers.
2289
25
  // Proper defs for return values will be added by setPhysRegsDeadExcept().
2290
25
  MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
2291
25
2292
25
  // Finish off the call including any return values.
2293
25
  SmallVector<unsigned, 4> UsedRegs;
2294
25
  if (!FinishCall(RetVT, UsedRegs, I, CC, NumBytes, false)) 
return false0
;
2295
25
2296
25
  // Set all unused physreg defs as dead.
2297
25
  static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI);
2298
25
2299
25
  return true;
2300
25
}
2301
2302
bool ARMFastISel::SelectCall(const Instruction *I,
2303
625
                             const char *IntrMemName = nullptr) {
2304
625
  const CallInst *CI = cast<CallInst>(I);
2305
625
  const Value *Callee = CI->getCalledValue();
2306
625
2307
625
  // Can't handle inline asm.
2308
625
  if (isa<InlineAsm>(Callee)) 
return false9
;
2309
616
2310
616
  // Allow SelectionDAG isel to handle tail calls.
2311
616
  if (CI->isTailCall()) 
return false95
;
2312
521
2313
521
  // Check the calling convention.
2314
521
  ImmutableCallSite CS(CI);
2315
521
  CallingConv::ID CC = CS.getCallingConv();
2316
521
2317
521
  // TODO: Avoid some calling conventions?
2318
521
2319
521
  FunctionType *FTy = CS.getFunctionType();
2320
521
  bool isVarArg = FTy->isVarArg();
2321
521
2322
521
  // Handle *simple* calls for now.
2323
521
  Type *RetTy = I->getType();
2324
521
  MVT RetVT;
2325
521
  if (RetTy->isVoidTy())
2326
297
    RetVT = MVT::isVoid;
2327
224
  else if (!isTypeLegal(RetTy, RetVT) && 
RetVT != MVT::i1652
&&
2328
224
           
RetVT != MVT::i834
&&
RetVT != MVT::i116
)
2329
7
    return false;
2330
514
2331
514
  // Can't handle non-double multi-reg retvals.
2332
514
  if (RetVT != MVT::isVoid && 
RetVT != MVT::i1217
&&
RetVT != MVT::i8208
&&
2333
514
      
RetVT != MVT::i16190
&&
RetVT != MVT::i32172
) {
2334
3
    SmallVector<CCValAssign, 16> RVLocs;
2335
3
    CCState CCInfo(CC, isVarArg, *FuncInfo.MF, RVLocs, *Context);
2336
3
    CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC, true, isVarArg));
2337
3
    if (RVLocs.size() >= 2 && RetVT != MVT::f64)
2338
3
      return false;
2339
511
  }
2340
511
2341
511
  // Set up the argument vectors.
2342
511
  SmallVector<Value*, 8> Args;
2343
511
  SmallVector<unsigned, 8> ArgRegs;
2344
511
  SmallVector<MVT, 8> ArgVTs;
2345
511
  SmallVector<ISD::ArgFlagsTy, 8> ArgFlags;
2346
511
  unsigned arg_size = CS.arg_size();
2347
511
  Args.reserve(arg_size);
2348
511
  ArgRegs.reserve(arg_size);
2349
511
  ArgVTs.reserve(arg_size);
2350
511
  ArgFlags.reserve(arg_size);
2351
511
  for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
2352
1.77k
       i != e; 
++i1.26k
) {
2353
1.34k
    // If we're lowering a memory intrinsic instead of a regular call, skip the
2354
1.34k
    // last argument, which shouldn't be passed to the underlying function.
2355
1.34k
    if (IntrMemName && 
e - i <= 172
)
2356
18
      break;
2357
1.32k
2358
1.32k
    ISD::ArgFlagsTy Flags;
2359
1.32k
    unsigned ArgIdx = i - CS.arg_begin();
2360
1.32k
    if (CS.paramHasAttr(ArgIdx, Attribute::SExt))
2361
19
      Flags.setSExt();
2362
1.32k
    if (CS.paramHasAttr(ArgIdx, Attribute::ZExt))
2363
107
      Flags.setZExt();
2364
1.32k
2365
1.32k
    // FIXME: Only handle *easy* calls for now.
2366
1.32k
    if (CS.paramHasAttr(ArgIdx, Attribute::InReg) ||
2367
1.32k
        CS.paramHasAttr(ArgIdx, Attribute::StructRet) ||
2368
1.32k
        
CS.paramHasAttr(ArgIdx, Attribute::SwiftSelf)1.28k
||
2369
1.32k
        
CS.paramHasAttr(ArgIdx, Attribute::SwiftError)1.27k
||
2370
1.32k
        
CS.paramHasAttr(ArgIdx, Attribute::Nest)1.27k
||
2371
1.32k
        
CS.paramHasAttr(ArgIdx, Attribute::ByVal)1.27k
)
2372
51
      return false;
2373
1.27k
2374
1.27k
    Type *ArgTy = (*i)->getType();
2375
1.27k
    MVT ArgVT;
2376
1.27k
    if (!isTypeLegal(ArgTy, ArgVT) && 
ArgVT != MVT::i16147
&&
ArgVT != MVT::i8116
&&
2377
1.27k
        
ArgVT != MVT::i124
)
2378
13
      return false;
2379
1.26k
2380
1.26k
    unsigned Arg = getRegForValue(*i);
2381
1.26k
    if (Arg == 0)
2382
0
      return false;
2383
1.26k
2384
1.26k
    unsigned OriginalAlignment = DL.getABITypeAlignment(ArgTy);
2385
1.26k
    Flags.setOrigAlign(OriginalAlignment);
2386
1.26k
2387
1.26k
    Args.push_back(*i);
2388
1.26k
    ArgRegs.push_back(Arg);
2389
1.26k
    ArgVTs.push_back(ArgVT);
2390
1.26k
    ArgFlags.push_back(Flags);
2391
1.26k
  }
2392
511
2393
511
  // Handle the arguments now that we've gotten them.
2394
511
  SmallVector<unsigned, 4> RegArgs;
2395
447
  unsigned NumBytes;
2396
447
  if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags,
2397
447
                       RegArgs, CC, NumBytes, isVarArg))
2398
6
    return false;
2399
441
2400
441
  bool UseReg = false;
2401
441
  const GlobalValue *GV = dyn_cast<GlobalValue>(Callee);
2402
441
  if (!GV || 
Subtarget->genLongCalls()427
)
UseReg = true79
;
2403
441
2404
441
  unsigned CalleeReg = 0;
2405
441
  if (UseReg) {
2406
79
    if (IntrMemName)
2407
9
      CalleeReg = getLibcallReg(IntrMemName);
2408
70
    else
2409
70
      CalleeReg = getRegForValue(Callee);
2410
79
2411
79
    if (CalleeReg == 0) 
return false0
;
2412
441
  }
2413
441
2414
441
  // Issue the call.
2415
441
  unsigned CallOpc = ARMSelectCallOp(UseReg);
2416
441
  MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
2417
441
                                    DbgLoc, TII.get(CallOpc));
2418
441
2419
441
  // ARM calls don't take a predicate, but tBL / tBLX do.
2420
441
  if(isThumb2)
2421
184
    MIB.add(predOps(ARMCC::AL));
2422
441
  if (UseReg)
2423
79
    MIB.addReg(CalleeReg);
2424
362
  else if (!IntrMemName)
2425
353
    MIB.addGlobalAddress(GV, 0, 0);
2426
9
  else
2427
9
    MIB.addExternalSymbol(IntrMemName, 0);
2428
441
2429
441
  // Add implicit physical register uses to the call.
2430
441
  for (unsigned R : RegArgs)
2431
676
    MIB.addReg(R, RegState::Implicit);
2432
441
2433
441
  // Add a register mask with the call-preserved registers.
2434
441
  // Proper defs for return values will be added by setPhysRegsDeadExcept().
2435
441
  MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
2436
441
2437
441
  // Finish off the call including any return values.
2438
441
  SmallVector<unsigned, 4> UsedRegs;
2439
441
  if (!FinishCall(RetVT, UsedRegs, I, CC, NumBytes, isVarArg))
2440
0
    return false;
2441
441
2442
441
  // Set all unused physreg defs as dead.
2443
441
  static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI);
2444
441
2445
441
  return true;
2446
441
}
2447
2448
58
bool ARMFastISel::ARMIsMemCpySmall(uint64_t Len) {
2449
58
  return Len <= 16;
2450
58
}
2451
2452
bool ARMFastISel::ARMTryEmitSmallMemCpy(Address Dest, Address Src,
2453
26
                                        uint64_t Len, unsigned Alignment) {
2454
26
  // Make sure we don't bloat code by inlining very large memcpy's.
2455
26
  if (!ARMIsMemCpySmall(Len))
2456
0
    return false;
2457
26
2458
154
  
while (26
Len) {
2459
128
    MVT VT;
2460
128
    if (!Alignment || Alignment >= 4) {
2461
26
      if (Len >= 4)
2462
20
        VT = MVT::i32;
2463
6
      else if (Len >= 2)
2464
6
        VT = MVT::i16;
2465
0
      else {
2466
0
        assert(Len == 1 && "Expected a length of 1!");
2467
0
        VT = MVT::i8;
2468
0
      }
2469
102
    } else {
2470
102
      // Bound based on alignment.
2471
102
      if (Len >= 2 && 
Alignment == 290
)
2472
36
        VT = MVT::i16;
2473
66
      else {
2474
66
        VT = MVT::i8;
2475
66
      }
2476
102
    }
2477
128
2478
128
    bool RV;
2479
128
    unsigned ResultReg;
2480
128
    RV = ARMEmitLoad(VT, ResultReg, Src);
2481
128
    assert(RV && "Should be able to handle this load.");
2482
128
    RV = ARMEmitStore(VT, ResultReg, Dest);
2483
128
    assert(RV && "Should be able to handle this store.");
2484
128
    (void)RV;
2485
128
2486
128
    unsigned Size = VT.getSizeInBits()/8;
2487
128
    Len -= Size;
2488
128
    Dest.Offset += Size;
2489
128
    Src.Offset += Size;
2490
128
  }
2491
26
2492
26
  return true;
2493
26
}
2494
2495
69
bool ARMFastISel::SelectIntrinsicCall(const IntrinsicInst &I) {
2496
69
  // FIXME: Handle more intrinsics.
2497
69
  switch (I.getIntrinsicID()) {
2498
69
  
default: return false14
;
2499
69
  case Intrinsic::frameaddress: {
2500
9
    MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
2501
9
    MFI.setFrameAddressIsTaken(true);
2502
9
2503
9
    unsigned LdrOpc = isThumb2 ? 
ARM::t2LDRi123
:
ARM::LDRi126
;
2504
9
    const TargetRegisterClass *RC = isThumb2 ? 
&ARM::tGPRRegClass3
2505
9
                                             : 
&ARM::GPRRegClass6
;
2506
9
2507
9
    const ARMBaseRegisterInfo *RegInfo =
2508
9
        static_cast<const ARMBaseRegisterInfo *>(Subtarget->getRegisterInfo());
2509
9
    unsigned FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF));
2510
9
    unsigned SrcReg = FramePtr;
2511
9
2512
9
    // Recursively load frame address
2513
9
    // ldr r0 [fp]
2514
9
    // ldr r0 [r0]
2515
9
    // ldr r0 [r0]
2516
9
    // ...
2517
9
    unsigned DestReg;
2518
9
    unsigned Depth = cast<ConstantInt>(I.getOperand(0))->getZExtValue();
2519
21
    while (Depth--) {
2520
12
      DestReg = createResultReg(RC);
2521
12
      AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2522
12
                              TII.get(LdrOpc), DestReg)
2523
12
                      .addReg(SrcReg).addImm(0));
2524
12
      SrcReg = DestReg;
2525
12
    }
2526
9
    updateValueMap(&I, SrcReg);
2527
9
    return true;
2528
69
  }
2529
69
  case Intrinsic::memcpy:
2530
38
  case Intrinsic::memmove: {
2531
38
    const MemTransferInst &MTI = cast<MemTransferInst>(I);
2532
38
    // Don't handle volatile.
2533
38
    if (MTI.isVolatile())
2534
0
      return false;
2535
38
2536
38
    // Disable inlining for memmove before calls to ComputeAddress.  Otherwise,
2537
38
    // we would emit dead code because we don't currently handle memmoves.
2538
38
    bool isMemCpy = (I.getIntrinsicID() == Intrinsic::memcpy);
2539
38
    if (isa<ConstantInt>(MTI.getLength()) && isMemCpy) {
2540
32
      // Small memcpy's are common enough that we want to do them without a call
2541
32
      // if possible.
2542
32
      uint64_t Len = cast<ConstantInt>(MTI.getLength())->getZExtValue();
2543
32
      if (ARMIsMemCpySmall(Len)) {
2544
26
        Address Dest, Src;
2545
26
        if (!ARMComputeAddress(MTI.getRawDest(), Dest) ||
2546
26
            !ARMComputeAddress(MTI.getRawSource(), Src))
2547
0
          return false;
2548
26
        unsigned Alignment = MinAlign(MTI.getDestAlignment(),
2549
26
                                      MTI.getSourceAlignment());
2550
26
        if (ARMTryEmitSmallMemCpy(Dest, Src, Len, Alignment))
2551
26
          return true;
2552
12
      }
2553
32
    }
2554
12
2555
12
    if (!MTI.getLength()->getType()->isIntegerTy(32))
2556
0
      return false;
2557
12
2558
12
    if (MTI.getSourceAddressSpace() > 255 || MTI.getDestAddressSpace() > 255)
2559
0
      return false;
2560
12
2561
12
    const char *IntrMemName = isa<MemCpyInst>(I) ? 
"memcpy"6
:
"memmove"6
;
2562
12
    return SelectCall(&I, IntrMemName);
2563
12
  }
2564
12
  case Intrinsic::memset: {
2565
6
    const MemSetInst &MSI = cast<MemSetInst>(I);
2566
6
    // Don't handle volatile.
2567
6
    if (MSI.isVolatile())
2568
0
      return false;
2569
6
2570
6
    if (!MSI.getLength()->getType()->isIntegerTy(32))
2571
0
      return false;
2572
6
2573
6
    if (MSI.getDestAddressSpace() > 255)
2574
0
      return false;
2575
6
2576
6
    return SelectCall(&I, "memset");
2577
6
  }
2578
6
  case Intrinsic::trap: {
2579
2
    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(
2580
2
      Subtarget->useNaClTrap() ? 
ARM::TRAPNaCl0
: ARM::TRAP));
2581
2
    return true;
2582
6
  }
2583
69
  }
2584
69
}
2585
2586
22
bool ARMFastISel::SelectTrunc(const Instruction *I) {
2587
22
  // The high bits for a type smaller than the register size are assumed to be
2588
22
  // undefined.
2589
22
  Value *Op = I->getOperand(0);
2590
22
2591
22
  EVT SrcVT, DestVT;
2592
22
  SrcVT = TLI.getValueType(DL, Op->getType(), true);
2593
22
  DestVT = TLI.getValueType(DL, I->getType(), true);
2594
22
2595
22
  if (SrcVT != MVT::i32 && 
SrcVT != MVT::i1610
&&
SrcVT != MVT::i84
)
2596
1
    return false;
2597
21
  if (DestVT != MVT::i16 && 
DestVT != MVT::i89
&&
DestVT != MVT::i13
)
2598
0
    return false;
2599
21
2600
21
  unsigned SrcReg = getRegForValue(Op);
2601
21
  if (!SrcReg) 
return false0
;
2602
21
2603
21
  // Because the high bits are undefined, a truncate doesn't generate
2604
21
  // any code.
2605
21
  updateValueMap(I, SrcReg);
2606
21
  return true;
2607
21
}
2608
2609
unsigned ARMFastISel::ARMEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
2610
396
                                    bool isZExt) {
2611
396
  if (DestVT != MVT::i32 && 
DestVT != MVT::i1627
&&
DestVT != MVT::i89
)
2612
0
    return 0;
2613
396
  if (SrcVT != MVT::i16 && 
SrcVT != MVT::i8263
&&
SrcVT != MVT::i174
)
2614
0
    return 0;
2615
396
2616
396
  // Table of which combinations can be emitted as a single instruction,
2617
396
  // and which will require two.
2618
396
  static const uint8_t isSingleInstrTbl[3][2][2][2] = {
2619
396
    //            ARM                     Thumb
2620
396
    //           !hasV6Ops  hasV6Ops     !hasV6Ops  hasV6Ops
2621
396
    //    ext:     s  z      s  z          s  z      s  z
2622
396
    /*  1 */ { { { 0, 1 }, { 0, 1 } }, { { 0, 0 }, { 0, 1 } } },
2623
396
    /*  8 */ { { { 0, 1 }, { 1, 1 } }, { { 0, 0 }, { 1, 1 } } },
2624
396
    /* 16 */ { { { 0, 0 }, { 1, 1 } }, { { 0, 0 }, { 1, 1 } } }
2625
396
  };
2626
396
2627
396
  // Target registers for:
2628
396
  //  - For ARM can never be PC.
2629
396
  //  - For 16-bit Thumb are restricted to lower 8 registers.
2630
396
  //  - For 32-bit Thumb are restricted to non-SP and non-PC.
2631
396
  static const TargetRegisterClass *RCTbl[2][2] = {
2632
396
    // Instructions: Two                     Single
2633
396
    /* ARM      */ { &ARM::GPRnopcRegClass, &ARM::GPRnopcRegClass },
2634
396
    /* Thumb    */ { &ARM::tGPRRegClass,    &ARM::rGPRRegClass    }
2635
396
  };
2636
396
2637
396
  // Table governing the instruction(s) to be emitted.
2638
396
  static const struct InstructionTable {
2639
396
    uint32_t Opc   : 16;
2640
396
    uint32_t hasS  :  1; // Some instructions have an S bit, always set it to 0.
2641
396
    uint32_t Shift :  7; // For shift operand addressing mode, used by MOVsi.
2642
396
    uint32_t Imm   :  8; // All instructions have either a shift or a mask.
2643
396
  } IT[2][2][3][2] = {
2644
396
    { // Two instructions (first is left shift, second is in this table).
2645
396
      { // ARM                Opc           S  Shift             Imm
2646
396
        /*  1 bit sext */ { { ARM::MOVsi  , 1, ARM_AM::asr     ,  31 },
2647
396
        /*  1 bit zext */   { ARM::MOVsi  , 1, ARM_AM::lsr     ,  31 } },
2648
396
        /*  8 bit sext */ { { ARM::MOVsi  , 1, ARM_AM::asr     ,  24 },
2649
396
        /*  8 bit zext */   { ARM::MOVsi  , 1, ARM_AM::lsr     ,  24 } },
2650
396
        /* 16 bit sext */ { { ARM::MOVsi  , 1, ARM_AM::asr     ,  16 },
2651
396
        /* 16 bit zext */   { ARM::MOVsi  , 1, ARM_AM::lsr     ,  16 } }
2652
396
      },
2653
396
      { // Thumb              Opc           S  Shift             Imm
2654
396
        /*  1 bit sext */ { { ARM::tASRri , 0, ARM_AM::no_shift,  31 },
2655
396
        /*  1 bit zext */   { ARM::tLSRri , 0, ARM_AM::no_shift,  31 } },
2656
396
        /*  8 bit sext */ { { ARM::tASRri , 0, ARM_AM::no_shift,  24 },
2657
396
        /*  8 bit zext */   { ARM::tLSRri , 0, ARM_AM::no_shift,  24 } },
2658
396
        /* 16 bit sext */ { { ARM::tASRri , 0, ARM_AM::no_shift,  16 },
2659
396
        /* 16 bit zext */   { ARM::tLSRri , 0, ARM_AM::no_shift,  16 } }
2660
396
      }
2661
396
    },
2662
396
    { // Single instruction.
2663
396
      { // ARM                Opc           S  Shift             Imm
2664
396
        /*  1 bit sext */ { { ARM::KILL   , 0, ARM_AM::no_shift,   0 },
2665
396
        /*  1 bit zext */   { ARM::ANDri  , 1, ARM_AM::no_shift,   1 } },
2666
396
        /*  8 bit sext */ { { ARM::SXTB   , 0, ARM_AM::no_shift,   0 },
2667
396
        /*  8 bit zext */   { ARM::ANDri  , 1, ARM_AM::no_shift, 255 } },
2668
396
        /* 16 bit sext */ { { ARM::SXTH   , 0, ARM_AM::no_shift,   0 },
2669
396
        /* 16 bit zext */   { ARM::UXTH   , 0, ARM_AM::no_shift,   0 } }
2670
396
      },
2671
396
      { // Thumb              Opc           S  Shift             Imm
2672
396
        /*  1 bit sext */ { { ARM::KILL   , 0, ARM_AM::no_shift,   0 },
2673
396
        /*  1 bit zext */   { ARM::t2ANDri, 1, ARM_AM::no_shift,   1 } },
2674
396
        /*  8 bit sext */ { { ARM::t2SXTB , 0, ARM_AM::no_shift,   0 },
2675
396
        /*  8 bit zext */   { ARM::t2ANDri, 1, ARM_AM::no_shift, 255 } },
2676
396
        /* 16 bit sext */ { { ARM::t2SXTH , 0, ARM_AM::no_shift,   0 },
2677
396
        /* 16 bit zext */   { ARM::t2UXTH , 0, ARM_AM::no_shift,   0 } }
2678
396
      }
2679
396
    }
2680
396
  };
2681
396
2682
396
  unsigned SrcBits = SrcVT.getSizeInBits();
2683
396
  unsigned DestBits = DestVT.getSizeInBits();
2684
396
  (void) DestBits;
2685
396
  assert((SrcBits < DestBits) && "can only extend to larger types");
2686
396
  assert((DestBits == 32 || DestBits == 16 || DestBits == 8) &&
2687
396
         "other sizes unimplemented");
2688
396
  assert((SrcBits == 16 || SrcBits == 8 || SrcBits == 1) &&
2689
396
         "other sizes unimplemented");
2690
396
2691
396
  bool hasV6Ops = Subtarget->hasV6Ops();
2692
396
  unsigned Bitness = SrcBits / 8;  // {1,8,16}=>{0,1,2}
2693
396
  assert((Bitness < 3) && "sanity-check table bounds");
2694
396
2695
396
  bool isSingleInstr = isSingleInstrTbl[Bitness][isThumb2][hasV6Ops][isZExt];
2696
396
  const TargetRegisterClass *RC = RCTbl[isThumb2][isSingleInstr];
2697
396
  const InstructionTable *ITP = &IT[isSingleInstr][isThumb2][Bitness][isZExt];
2698
396
  unsigned Opc = ITP->Opc;
2699
396
  assert(ARM::KILL != Opc && "Invalid table entry");
2700
396
  unsigned hasS = ITP->hasS;
2701
396
  ARM_AM::ShiftOpc Shift = (ARM_AM::ShiftOpc) ITP->Shift;
2702
396
  assert(((Shift == ARM_AM::no_shift) == (Opc != ARM::MOVsi)) &&
2703
396
         "only MOVsi has shift operand addressing mode");
2704
396
  unsigned Imm = ITP->Imm;
2705
396
2706
396
  // 16-bit Thumb instructions always set CPSR (unless they're in an IT block).
2707
396
  bool setsCPSR = &ARM::tGPRRegClass == RC;
2708
396
  unsigned LSLOpc = isThumb2 ? 
ARM::tLSLri130
:
ARM::MOVsi266
;
2709
396
  unsigned ResultReg;
2710
396
  // MOVsi encodes shift and immediate in shift operand addressing mode.
2711
396
  // The following condition has the same value when emitting two
2712
396
  // instruction sequences: both are shifts.
2713
396
  bool ImmIsSO = (Shift != ARM_AM::no_shift);
2714
396
2715
396
  // Either one or two instructions are emitted.
2716
396
  // They're always of the form:
2717
396
  //   dst = in OP imm
2718
396
  // CPSR is set only by 16-bit Thumb instructions.
2719
396
  // Predicate, if any, is AL.
2720
396
  // S bit, if available, is always 0.
2721
396
  // When two are emitted the first's result will feed as the second's input,
2722
396
  // that value is then dead.
2723
396
  unsigned NumInstrsEmitted = isSingleInstr ? 
1387
:
29
;
2724
801
  for (unsigned Instr = 0; Instr != NumInstrsEmitted; 
++Instr405
) {
2725
405
    ResultReg = createResultReg(RC);
2726
405
    bool isLsl = (0 == Instr) && 
!isSingleInstr396
;
2727
405
    unsigned Opcode = isLsl ? 
LSLOpc9
:
Opc396
;
2728
405
    ARM_AM::ShiftOpc ShiftAM = isLsl ? 
ARM_AM::lsl9
:
Shift396
;
2729
405
    unsigned ImmEnc = ImmIsSO ? 
ARM_AM::getSORegOpc(ShiftAM, Imm)12
:
Imm393
;
2730
405
    bool isKill = 1 == Instr;
2731
405
    MachineInstrBuilder MIB = BuildMI(
2732
405
        *FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opcode), ResultReg);
2733
405
    if (setsCPSR)
2734
6
      MIB.addReg(ARM::CPSR, RegState::Define);
2735
405
    SrcReg = constrainOperandRegClass(TII.get(Opcode), SrcReg, 1 + setsCPSR);
2736
405
    MIB.addReg(SrcReg, isKill * RegState::Kill)
2737
405
        .addImm(ImmEnc)
2738
405
        .add(predOps(ARMCC::AL));
2739
405
    if (hasS)
2740
207
      MIB.add(condCodeOp());
2741
405
    // Second instruction consumes the first's result.
2742
405
    SrcReg = ResultReg;
2743
405
  }
2744
396
2745
396
  return ResultReg;
2746
396
}
2747
2748
136
bool ARMFastISel::SelectIntExt(const Instruction *I) {
2749
136
  // On ARM, in general, integer casts don't involve legal types; this code
2750
136
  // handles promotable integers.
2751
136
  Type *DestTy = I->getType();
2752
136
  Value *Src = I->getOperand(0);
2753
136
  Type *SrcTy = Src->getType();
2754
136
2755
136
  bool isZExt = isa<ZExtInst>(I);
2756
136
  unsigned SrcReg = getRegForValue(Src);
2757
136
  if (!SrcReg) 
return false2
;
2758
134
2759
134
  EVT SrcEVT, DestEVT;
2760
134
  SrcEVT = TLI.getValueType(DL, SrcTy, true);
2761
134
  DestEVT = TLI.getValueType(DL, DestTy, true);
2762
134
  if (!SrcEVT.isSimple()) 
return false0
;
2763
134
  if (!DestEVT.isSimple()) 
return false0
;
2764
134
2765
134
  MVT SrcVT = SrcEVT.getSimpleVT();
2766
134
  MVT DestVT = DestEVT.getSimpleVT();
2767
134
  unsigned ResultReg = ARMEmitIntExt(SrcVT, SrcReg, DestVT, isZExt);
2768
134
  if (ResultReg == 0) 
return false0
;
2769
134
  updateValueMap(I, ResultReg);
2770
134
  return true;
2771
134
}
2772
2773
bool ARMFastISel::SelectShift(const Instruction *I,
2774
12
                              ARM_AM::ShiftOpc ShiftTy) {
2775
12
  // We handle thumb2 mode by target independent selector
2776
12
  // or SelectionDAG ISel.
2777
12
  if (isThumb2)
2778
0
    return false;
2779
12
2780
12
  // Only handle i32 now.
2781
12
  EVT DestVT = TLI.getValueType(DL, I->getType(), true);
2782
12
  if (DestVT != MVT::i32)
2783
0
    return false;
2784
12
2785
12
  unsigned Opc = ARM::MOVsr;
2786
12
  unsigned ShiftImm;
2787
12
  Value *Src2Value = I->getOperand(1);
2788
12
  if (const ConstantInt *CI = dyn_cast<ConstantInt>(Src2Value)) {
2789
6
    ShiftImm = CI->getZExtValue();
2790
6
2791
6
    // Fall back to selection DAG isel if the shift amount
2792
6
    // is zero or greater than the width of the value type.
2793
6
    if (ShiftImm == 0 || ShiftImm >=32)
2794
0
      return false;
2795
6
2796
6
    Opc = ARM::MOVsi;
2797
6
  }
2798
12
2799
12
  Value *Src1Value = I->getOperand(0);
2800
12
  unsigned Reg1 = getRegForValue(Src1Value);
2801
12
  if (Reg1 == 0) 
return false0
;
2802
12
2803
12
  unsigned Reg2 = 0;
2804
12
  if (Opc == ARM::MOVsr) {
2805
6
    Reg2 = getRegForValue(Src2Value);
2806
6
    if (Reg2 == 0) 
return false0
;
2807
12
  }
2808
12
2809
12
  unsigned ResultReg = createResultReg(&ARM::GPRnopcRegClass);
2810
12
  if(ResultReg == 0) 
return false0
;
2811
12
2812
12
  MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2813
12
                                    TII.get(Opc), ResultReg)
2814
12
                            .addReg(Reg1);
2815
12
2816
12
  if (Opc == ARM::MOVsi)
2817
6
    MIB.addImm(ARM_AM::getSORegOpc(ShiftTy, ShiftImm));
2818
6
  else if (Opc == ARM::MOVsr) {
2819
6
    MIB.addReg(Reg2);
2820
6
    MIB.addImm(ARM_AM::getSORegOpc(ShiftTy, 0));
2821
6
  }
2822
12
2823
12
  AddOptionalDefs(MIB);
2824
12
  updateValueMap(I, ResultReg);
2825
12
  return true;
2826
12
}
2827
2828
// TODO: SoftFP support.
2829
3.04k
bool ARMFastISel::fastSelectInstruction(const Instruction *I) {
2830
3.04k
  switch (I->getOpcode()) {
2831
3.04k
    case Instruction::Load:
2832
317
      return SelectLoad(I);
2833
3.04k
    case Instruction::Store:
2834
592
      return SelectStore(I);
2835
3.04k
    case Instruction::Br:
2836
85
      return SelectBranch(I);
2837
3.04k
    case Instruction::IndirectBr:
2838
4
      return SelectIndirectBr(I);
2839
3.04k
    case Instruction::ICmp:
2840
26
    case Instruction::FCmp:
2841
26
      return SelectCmp(I);
2842
26
    case Instruction::FPExt:
2843
1
      return SelectFPExt(I);
2844
26
    case Instruction::FPTrunc:
2845
1
      return SelectFPTrunc(I);
2846
26
    case Instruction::SIToFP:
2847
19
      return SelectIToFP(I, /*isSigned*/ true);
2848
26
    case Instruction::UIToFP:
2849
18
      return SelectIToFP(I, /*isSigned*/ false);
2850
26
    case Instruction::FPToSI:
2851
6
      return SelectFPToI(I, /*isSigned*/ true);
2852
26
    case Instruction::FPToUI:
2853
7
      return SelectFPToI(I, /*isSigned*/ false);
2854
26
    case Instruction::Add:
2855
22
      return SelectBinaryIntOp(I, ISD::ADD);
2856
26
    case Instruction::Or:
2857
6
      return SelectBinaryIntOp(I, ISD::OR);
2858
26
    case Instruction::Sub:
2859
9
      return SelectBinaryIntOp(I, ISD::SUB);
2860
26
    case Instruction::FAdd:
2861
2
      return SelectBinaryFPOp(I, ISD::FADD);
2862
26
    case Instruction::FSub:
2863
0
      return SelectBinaryFPOp(I, ISD::FSUB);
2864
26
    case Instruction::FMul:
2865
0
      return SelectBinaryFPOp(I, ISD::FMUL);
2866
26
    case Instruction::SDiv:
2867
4
      return SelectDiv(I, /*isSigned*/ true);
2868
26
    case Instruction::UDiv:
2869
10
      return SelectDiv(I, /*isSigned*/ false);
2870
31
    case Instruction::SRem:
2871
31
      return SelectRem(I, /*isSigned*/ true);
2872
26
    case Instruction::URem:
2873
5
      return SelectRem(I, /*isSigned*/ false);
2874
676
    case Instruction::Call:
2875
676
      if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
2876
69
        return SelectIntrinsicCall(*II);
2877
607
      return SelectCall(I);
2878
607
    case Instruction::Select:
2879
24
      return SelectSelect(I);
2880
901
    case Instruction::Ret:
2881
901
      return SelectRet(I);
2882
607
    case Instruction::Trunc:
2883
22
      return SelectTrunc(I);
2884
607
    case Instruction::ZExt:
2885
136
    case Instruction::SExt:
2886
136
      return SelectIntExt(I);
2887
136
    case Instruction::Shl:
2888
4
      return SelectShift(I, ARM_AM::lsl);
2889
136
    case Instruction::LShr:
2890
4
      return SelectShift(I, ARM_AM::lsr);
2891
136
    case Instruction::AShr:
2892
4
      return SelectShift(I, ARM_AM::asr);
2893
136
    
default: break113
;
2894
113
  }
2895
113
  return false;
2896
113
}
2897
2898
// This table describes sign- and zero-extend instructions which can be
2899
// folded into a preceding load. All of these extends have an immediate
2900
// (sometimes a mask and sometimes a shift) that's applied after
2901
// extension.
2902
static const struct FoldableLoadExtendsStruct {
2903
  uint16_t Opc[2];  // ARM, Thumb.
2904
  uint8_t ExpectedImm;
2905
  uint8_t isZExt     : 1;
2906
  uint8_t ExpectedVT : 7;
2907
} FoldableLoadExtends[] = {
2908
  { { ARM::SXTH,  ARM::t2SXTH  },   0, 0, MVT::i16 },
2909
  { { ARM::UXTH,  ARM::t2UXTH  },   0, 1, MVT::i16 },
2910
  { { ARM::ANDri, ARM::t2ANDri }, 255, 1, MVT::i8  },
2911
  { { ARM::SXTB,  ARM::t2SXTB  },   0, 0, MVT::i8  },
2912
  { { ARM::UXTB,  ARM::t2UXTB  },   0, 1, MVT::i8  }
2913
};
2914
2915
/// The specified machine instr operand is a vreg, and that
2916
/// vreg is being provided by the specified load instruction.  If possible,
2917
/// try to fold the load as an operand to the instruction, returning true if
2918
/// successful.
2919
bool ARMFastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
2920
202
                                      const LoadInst *LI) {
2921
202
  // Verify we have a legal type before going any further.
2922
202
  MVT VT;
2923
202
  if (!isLoadTypeLegal(LI->getType(), VT))
2924
0
    return false;
2925
202
2926
202
  // Combine load followed by zero- or sign-extend.
2927
202
  // ldrb r1, [r0]       ldrb r1, [r0]
2928
202
  // uxtb r2, r1     =>
2929
202
  // mov  r3, r2         mov  r3, r1
2930
202
  if (MI->getNumOperands() < 3 || 
!MI->getOperand(2).isImm()139
)
2931
94
    return false;
2932
108
  const uint64_t Imm = MI->getOperand(2).getImm();
2933
108
2934
108
  bool Found = false;
2935
108
  bool isZExt;
2936
540
  for (const FoldableLoadExtendsStruct &FLE : FoldableLoadExtends) {
2937
540
    if (FLE.Opc[isThumb2] == MI->getOpcode() &&
2938
540
        
(uint64_t)FLE.ExpectedImm == Imm51
&&
2939
540
        
MVT((MVT::SimpleValueType)FLE.ExpectedVT) == VT51
) {
2940
51
      Found = true;
2941
51
      isZExt = FLE.isZExt;
2942
51
    }
2943
540
  }
2944
108
  if (!Found) 
return false57
;
2945
51
2946
51
  // See if we can handle this address.
2947
51
  Address Addr;
2948
51
  if (!ARMComputeAddress(LI->getOperand(0), Addr)) 
return false0
;
2949
51
2950
51
  unsigned ResultReg = MI->getOperand(0).getReg();
2951
51
  if (!ARMEmitLoad(VT, ResultReg, Addr, LI->getAlignment(), isZExt, false))
2952
0
    return false;
2953
51
  MachineBasicBlock::iterator I(MI);
2954
51
  removeDeadCode(I, std::next(I));
2955
51
  return true;
2956
51
}
2957
2958
unsigned ARMFastISel::ARMLowerPICELF(const GlobalValue *GV,
2959
6
                                     unsigned Align, MVT VT) {
2960
6
  bool UseGOT_PREL = !TM.shouldAssumeDSOLocal(*GV->getParent(), GV);
2961
6
2962
6
  LLVMContext *Context = &MF->getFunction().getContext();
2963
6
  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2964
6
  unsigned PCAdj = Subtarget->isThumb() ? 
42
:
84
;
2965
6
  ARMConstantPoolValue *CPV = ARMConstantPoolConstant::Create(
2966
6
      GV, ARMPCLabelIndex, ARMCP::CPValue, PCAdj,
2967
6
      UseGOT_PREL ? 
ARMCP::GOT_PREL4
:
ARMCP::no_modifier2
,
2968
6
      /*AddCurrentAddress=*/UseGOT_PREL);
2969
6
2970
6
  unsigned ConstAlign =
2971
6
      MF->getDataLayout().getPrefTypeAlignment(Type::getInt32PtrTy(*Context));
2972
6
  unsigned Idx = MF->getConstantPool()->getConstantPoolIndex(CPV, ConstAlign);
2973
6
  MachineMemOperand *CPMMO =
2974
6
      MF->getMachineMemOperand(MachinePointerInfo::getConstantPool(*MF),
2975
6
                               MachineMemOperand::MOLoad, 4, 4);
2976
6
2977
6
  unsigned TempReg = MF->getRegInfo().createVirtualRegister(&ARM::rGPRRegClass);
2978
6
  unsigned Opc = isThumb2 ? 
ARM::t2LDRpci2
:
ARM::LDRcp4
;
2979
6
  MachineInstrBuilder MIB =
2980
6
      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), TempReg)
2981
6
          .addConstantPoolIndex(Idx)
2982
6
          .addMemOperand(CPMMO);
2983
6
  if (Opc == ARM::LDRcp)
2984
4
    MIB.addImm(0);
2985
6
  MIB.add(predOps(ARMCC::AL));
2986
6
2987
6
  // Fix the address by adding pc.
2988
6
  unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
2989
6
  Opc = Subtarget->isThumb() ? 
ARM::tPICADD2
:
UseGOT_PREL 4
?
ARM::PICLDR2
2990
4
                                                          : 
ARM::PICADD2
;
2991
6
  DestReg = constrainOperandRegClass(TII.get(Opc), DestReg, 0);
2992
6
  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg)
2993
6
            .addReg(TempReg)
2994
6
            .addImm(ARMPCLabelIndex);
2995
6
2996
6
  if (!Subtarget->isThumb())
2997
4
    MIB.add(predOps(ARMCC::AL));
2998
6
2999
6
  if (UseGOT_PREL && 
Subtarget->isThumb()4
) {
3000
2
    unsigned NewDestReg = createResultReg(TLI.getRegClassFor(VT));
3001
2
    MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3002
2
                  TII.get(ARM::t2LDRi12), NewDestReg)
3003
2
              .addReg(DestReg)
3004
2
              .addImm(0);
3005
2
    DestReg = NewDestReg;
3006
2
    AddOptionalDefs(MIB);
3007
2
  }
3008
6
  return DestReg;
3009
6
}
3010
3011
910
bool ARMFastISel::fastLowerArguments() {
3012
910
  if (!FuncInfo.CanLowerReturn)
3013
0
    return false;
3014
910
3015
910
  const Function *F = FuncInfo.Fn;
3016
910
  if (F->isVarArg())
3017
1
    return false;
3018
909
3019
909
  CallingConv::ID CC = F->getCallingConv();
3020
909
  switch (CC) {
3021
909
  default:
3022
10
    return false;
3023
909
  case CallingConv::Fast:
3024
899
  case CallingConv::C:
3025
899
  case CallingConv::ARM_AAPCS_VFP:
3026
899
  case CallingConv::ARM_AAPCS:
3027
899
  case CallingConv::ARM_APCS:
3028
899
  case CallingConv::Swift:
3029
899
    break;
3030
899
  }
3031
899
3032
899
  // Only handle simple cases. i.e. Up to 4 i8/i16/i32 scalar arguments
3033
899
  // which are passed in r0 - r3.
3034
899
  for (const Argument &Arg : F->args()) {
3035
820
    if (Arg.getArgNo() >= 4)
3036
7
      return false;
3037
813
3038
813
    if (Arg.hasAttribute(Attribute::InReg) ||
3039
813
        Arg.hasAttribute(Attribute::StructRet) ||
3040
813
        
Arg.hasAttribute(Attribute::SwiftSelf)808
||
3041
813
        
Arg.hasAttribute(Attribute::SwiftError)798
||
3042
813
        
Arg.hasAttribute(Attribute::ByVal)791
)
3043
22
      return false;
3044
791
3045
791
    Type *ArgTy = Arg.getType();
3046
791
    if (ArgTy->isStructTy() || 
ArgTy->isArrayTy()790
||
ArgTy->isVectorTy()787
)
3047
10
      return false;
3048
781
3049
781
    EVT ArgVT = TLI.getValueType(DL, ArgTy);
3050
781
    if (!ArgVT.isSimple()) 
return false0
;
3051
781
    switch (ArgVT.getSimpleVT().SimpleTy) {
3052
781
    case MVT::i8:
3053
609
    case MVT::i16:
3054
609
    case MVT::i32:
3055
609
      break;
3056
609
    default:
3057
172
      return false;
3058
781
    }
3059
781
  }
3060
899
3061
899
  static const MCPhysReg GPRArgRegs[] = {
3062
688
    ARM::R0, ARM::R1, ARM::R2, ARM::R3
3063
688
  };
3064
688
3065
688
  const TargetRegisterClass *RC = &ARM::rGPRRegClass;
3066
688
  for (const Argument &Arg : F->args()) {
3067
537
    unsigned ArgNo = Arg.getArgNo();
3068
537
    unsigned SrcReg = GPRArgRegs[ArgNo];
3069
537
    unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
3070
537
    // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
3071
537
    // Without this, EmitLiveInCopies may eliminate the livein if its only
3072
537
    // use is a bitcast (which isn't turned into an instruction).
3073
537
    unsigned ResultReg = createResultReg(RC);
3074
537
    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3075
537
            TII.get(TargetOpcode::COPY),
3076
537
            ResultReg).addReg(DstReg, getKillRegState(true));
3077
537
    updateValueMap(&Arg, ResultReg);
3078
537
  }
3079
688
3080
688
  return true;
3081
899
}
3082
3083
namespace llvm {
3084
3085
  FastISel *ARM::createFastISel(FunctionLoweringInfo &funcInfo,
3086
1.26k
                                const TargetLibraryInfo *libInfo) {
3087
1.26k
    if (funcInfo.MF->getSubtarget<ARMSubtarget>().useFastISel())
3088
912
      return new ARMFastISel(funcInfo, libInfo);
3089
351
3090
351
    return nullptr;
3091
351
  }
3092
3093
} // end namespace llvm