/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/lib/Target/AArch64/AArch64FastISel.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===- AArch6464FastISel.cpp - AArch64 FastISel implementation ------------===// |
2 | | // |
3 | | // The LLVM Compiler Infrastructure |
4 | | // |
5 | | // This file is distributed under the University of Illinois Open Source |
6 | | // License. See LICENSE.TXT for details. |
7 | | // |
8 | | //===----------------------------------------------------------------------===// |
9 | | // |
10 | | // This file defines the AArch64-specific support for the FastISel class. Some |
11 | | // of the target-specific code is generated by tablegen in the file |
12 | | // AArch64GenFastISel.inc, which is #included here. |
13 | | // |
14 | | //===----------------------------------------------------------------------===// |
15 | | |
16 | | #include "AArch64.h" |
17 | | #include "AArch64CallingConvention.h" |
18 | | #include "AArch64RegisterInfo.h" |
19 | | #include "AArch64Subtarget.h" |
20 | | #include "MCTargetDesc/AArch64AddressingModes.h" |
21 | | #include "Utils/AArch64BaseInfo.h" |
22 | | #include "llvm/ADT/APFloat.h" |
23 | | #include "llvm/ADT/APInt.h" |
24 | | #include "llvm/ADT/DenseMap.h" |
25 | | #include "llvm/ADT/SmallVector.h" |
26 | | #include "llvm/Analysis/BranchProbabilityInfo.h" |
27 | | #include "llvm/CodeGen/CallingConvLower.h" |
28 | | #include "llvm/CodeGen/FastISel.h" |
29 | | #include "llvm/CodeGen/FunctionLoweringInfo.h" |
30 | | #include "llvm/CodeGen/ISDOpcodes.h" |
31 | | #include "llvm/CodeGen/MachineBasicBlock.h" |
32 | | #include "llvm/CodeGen/MachineConstantPool.h" |
33 | | #include "llvm/CodeGen/MachineFrameInfo.h" |
34 | | #include "llvm/CodeGen/MachineInstr.h" |
35 | | #include "llvm/CodeGen/MachineInstrBuilder.h" |
36 | | #include "llvm/CodeGen/MachineMemOperand.h" |
37 | | #include "llvm/CodeGen/MachineRegisterInfo.h" |
38 | | #include "llvm/CodeGen/MachineValueType.h" |
39 | | #include "llvm/CodeGen/RuntimeLibcalls.h" |
40 | | #include "llvm/CodeGen/ValueTypes.h" |
41 | | #include "llvm/IR/Argument.h" |
42 | | #include "llvm/IR/Attributes.h" |
43 | | #include "llvm/IR/BasicBlock.h" |
44 | | #include "llvm/IR/CallingConv.h" |
45 | | #include "llvm/IR/Constant.h" |
46 | | #include "llvm/IR/Constants.h" |
47 | | #include "llvm/IR/DataLayout.h" |
48 | | #include "llvm/IR/DerivedTypes.h" |
49 | | #include "llvm/IR/Function.h" |
50 | | #include "llvm/IR/GetElementPtrTypeIterator.h" |
51 | | #include "llvm/IR/GlobalValue.h" |
52 | | #include "llvm/IR/InstrTypes.h" |
53 | | #include "llvm/IR/Instruction.h" |
54 | | #include "llvm/IR/Instructions.h" |
55 | | #include "llvm/IR/IntrinsicInst.h" |
56 | | #include "llvm/IR/Intrinsics.h" |
57 | | #include "llvm/IR/Operator.h" |
58 | | #include "llvm/IR/Type.h" |
59 | | #include "llvm/IR/User.h" |
60 | | #include "llvm/IR/Value.h" |
61 | | #include "llvm/MC/MCInstrDesc.h" |
62 | | #include "llvm/MC/MCRegisterInfo.h" |
63 | | #include "llvm/MC/MCSymbol.h" |
64 | | #include "llvm/Support/AtomicOrdering.h" |
65 | | #include "llvm/Support/Casting.h" |
66 | | #include "llvm/Support/CodeGen.h" |
67 | | #include "llvm/Support/Compiler.h" |
68 | | #include "llvm/Support/ErrorHandling.h" |
69 | | #include "llvm/Support/MathExtras.h" |
70 | | #include <algorithm> |
71 | | #include <cassert> |
72 | | #include <cstdint> |
73 | | #include <iterator> |
74 | | #include <utility> |
75 | | |
76 | | using namespace llvm; |
77 | | |
78 | | namespace { |
79 | | |
80 | | class AArch64FastISel final : public FastISel { |
81 | | class Address { |
82 | | public: |
83 | | using BaseKind = enum { |
84 | | RegBase, |
85 | | FrameIndexBase |
86 | | }; |
87 | | |
88 | | private: |
89 | | BaseKind Kind = RegBase; |
90 | | AArch64_AM::ShiftExtendType ExtType = AArch64_AM::InvalidShiftExtend; |
91 | | union { |
92 | | unsigned Reg; |
93 | | int FI; |
94 | | } Base; |
95 | | unsigned OffsetReg = 0; |
96 | | unsigned Shift = 0; |
97 | | int64_t Offset = 0; |
98 | | const GlobalValue *GV = nullptr; |
99 | | |
100 | | public: |
101 | 1.01k | Address() { Base.Reg = 0; } |
102 | | |
103 | 271 | void setKind(BaseKind K) { Kind = K; } |
104 | 0 | BaseKind getKind() const { return Kind; } |
105 | 85 | void setExtendType(AArch64_AM::ShiftExtendType E) { ExtType = E; } |
106 | 1.56k | AArch64_AM::ShiftExtendType getExtendType() const { return ExtType; } |
107 | 2.06k | bool isRegBase() const { return Kind == RegBase; } |
108 | 812 | bool isFIBase() const { return Kind == FrameIndexBase; } |
109 | | |
110 | 1.14k | void setReg(unsigned Reg) { |
111 | 1.14k | assert(isRegBase() && "Invalid base register access!"); |
112 | 1.14k | Base.Reg = Reg; |
113 | 1.14k | } |
114 | | |
115 | 2.24k | unsigned getReg() const { |
116 | 2.24k | assert(isRegBase() && "Invalid base register access!"); |
117 | 2.24k | return Base.Reg; |
118 | 2.24k | } |
119 | | |
120 | 566 | void setOffsetReg(unsigned Reg) { |
121 | 566 | OffsetReg = Reg; |
122 | 566 | } |
123 | | |
124 | 2.77k | unsigned getOffsetReg() const { |
125 | 2.77k | return OffsetReg; |
126 | 2.77k | } |
127 | | |
128 | 236 | void setFI(unsigned FI) { |
129 | 236 | assert(isFIBase() && "Invalid base frame index access!"); |
130 | 236 | Base.FI = FI; |
131 | 236 | } |
132 | | |
133 | 244 | unsigned getFI() const { |
134 | 244 | assert(isFIBase() && "Invalid base frame index access!"); |
135 | 244 | return Base.FI; |
136 | 244 | } |
137 | | |
138 | 222 | void setOffset(int64_t O) { Offset = O; } |
139 | 4.20k | int64_t getOffset() { return Offset; } |
140 | 64 | void setShift(unsigned S) { Shift = S; } |
141 | 91 | unsigned getShift() { return Shift; } |
142 | | |
143 | 91 | void setGlobalValue(const GlobalValue *G) { GV = G; } |
144 | 200 | const GlobalValue *getGlobalValue() { return GV; } |
145 | | }; |
146 | | |
147 | | /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can |
148 | | /// make the right decision when generating code for different targets. |
149 | | const AArch64Subtarget *Subtarget; |
150 | | LLVMContext *Context; |
151 | | |
152 | | bool fastLowerArguments() override; |
153 | | bool fastLowerCall(CallLoweringInfo &CLI) override; |
154 | | bool fastLowerIntrinsicCall(const IntrinsicInst *II) override; |
155 | | |
156 | | private: |
157 | | // Selection routines. |
158 | | bool selectAddSub(const Instruction *I); |
159 | | bool selectLogicalOp(const Instruction *I); |
160 | | bool selectLoad(const Instruction *I); |
161 | | bool selectStore(const Instruction *I); |
162 | | bool selectBranch(const Instruction *I); |
163 | | bool selectIndirectBr(const Instruction *I); |
164 | | bool selectCmp(const Instruction *I); |
165 | | bool selectSelect(const Instruction *I); |
166 | | bool selectFPExt(const Instruction *I); |
167 | | bool selectFPTrunc(const Instruction *I); |
168 | | bool selectFPToInt(const Instruction *I, bool Signed); |
169 | | bool selectIntToFP(const Instruction *I, bool Signed); |
170 | | bool selectRem(const Instruction *I, unsigned ISDOpcode); |
171 | | bool selectRet(const Instruction *I); |
172 | | bool selectTrunc(const Instruction *I); |
173 | | bool selectIntExt(const Instruction *I); |
174 | | bool selectMul(const Instruction *I); |
175 | | bool selectShift(const Instruction *I); |
176 | | bool selectBitCast(const Instruction *I); |
177 | | bool selectFRem(const Instruction *I); |
178 | | bool selectSDiv(const Instruction *I); |
179 | | bool selectGetElementPtr(const Instruction *I); |
180 | | bool selectAtomicCmpXchg(const AtomicCmpXchgInst *I); |
181 | | |
182 | | // Utility helper routines. |
183 | | bool isTypeLegal(Type *Ty, MVT &VT); |
184 | | bool isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed = false); |
185 | | bool isValueAvailable(const Value *V) const; |
186 | | bool computeAddress(const Value *Obj, Address &Addr, Type *Ty = nullptr); |
187 | | bool computeCallAddress(const Value *V, Address &Addr); |
188 | | bool simplifyAddress(Address &Addr, MVT VT); |
189 | | void addLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB, |
190 | | MachineMemOperand::Flags Flags, |
191 | | unsigned ScaleFactor, MachineMemOperand *MMO); |
192 | | bool isMemCpySmall(uint64_t Len, unsigned Alignment); |
193 | | bool tryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len, |
194 | | unsigned Alignment); |
195 | | bool foldXALUIntrinsic(AArch64CC::CondCode &CC, const Instruction *I, |
196 | | const Value *Cond); |
197 | | bool optimizeIntExtLoad(const Instruction *I, MVT RetVT, MVT SrcVT); |
198 | | bool optimizeSelect(const SelectInst *SI); |
199 | | std::pair<unsigned, bool> getRegForGEPIndex(const Value *Idx); |
200 | | |
201 | | // Emit helper routines. |
202 | | unsigned emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS, |
203 | | const Value *RHS, bool SetFlags = false, |
204 | | bool WantResult = true, bool IsZExt = false); |
205 | | unsigned emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg, |
206 | | bool LHSIsKill, unsigned RHSReg, bool RHSIsKill, |
207 | | bool SetFlags = false, bool WantResult = true); |
208 | | unsigned emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg, |
209 | | bool LHSIsKill, uint64_t Imm, bool SetFlags = false, |
210 | | bool WantResult = true); |
211 | | unsigned emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg, |
212 | | bool LHSIsKill, unsigned RHSReg, bool RHSIsKill, |
213 | | AArch64_AM::ShiftExtendType ShiftType, |
214 | | uint64_t ShiftImm, bool SetFlags = false, |
215 | | bool WantResult = true); |
216 | | unsigned emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg, |
217 | | bool LHSIsKill, unsigned RHSReg, bool RHSIsKill, |
218 | | AArch64_AM::ShiftExtendType ExtType, |
219 | | uint64_t ShiftImm, bool SetFlags = false, |
220 | | bool WantResult = true); |
221 | | |
222 | | // Emit functions. |
223 | | bool emitCompareAndBranch(const BranchInst *BI); |
224 | | bool emitCmp(const Value *LHS, const Value *RHS, bool IsZExt); |
225 | | bool emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, bool IsZExt); |
226 | | bool emitICmp_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm); |
227 | | bool emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS); |
228 | | unsigned emitLoad(MVT VT, MVT ResultVT, Address Addr, bool WantZExt = true, |
229 | | MachineMemOperand *MMO = nullptr); |
230 | | bool emitStore(MVT VT, unsigned SrcReg, Address Addr, |
231 | | MachineMemOperand *MMO = nullptr); |
232 | | bool emitStoreRelease(MVT VT, unsigned SrcReg, unsigned AddrReg, |
233 | | MachineMemOperand *MMO = nullptr); |
234 | | unsigned emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt); |
235 | | unsigned emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt); |
236 | | unsigned emitAdd(MVT RetVT, const Value *LHS, const Value *RHS, |
237 | | bool SetFlags = false, bool WantResult = true, |
238 | | bool IsZExt = false); |
239 | | unsigned emitAdd_ri_(MVT VT, unsigned Op0, bool Op0IsKill, int64_t Imm); |
240 | | unsigned emitSub(MVT RetVT, const Value *LHS, const Value *RHS, |
241 | | bool SetFlags = false, bool WantResult = true, |
242 | | bool IsZExt = false); |
243 | | unsigned emitSubs_rr(MVT RetVT, unsigned LHSReg, bool LHSIsKill, |
244 | | unsigned RHSReg, bool RHSIsKill, bool WantResult = true); |
245 | | unsigned emitSubs_rs(MVT RetVT, unsigned LHSReg, bool LHSIsKill, |
246 | | unsigned RHSReg, bool RHSIsKill, |
247 | | AArch64_AM::ShiftExtendType ShiftType, uint64_t ShiftImm, |
248 | | bool WantResult = true); |
249 | | unsigned emitLogicalOp(unsigned ISDOpc, MVT RetVT, const Value *LHS, |
250 | | const Value *RHS); |
251 | | unsigned emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, unsigned LHSReg, |
252 | | bool LHSIsKill, uint64_t Imm); |
253 | | unsigned emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, unsigned LHSReg, |
254 | | bool LHSIsKill, unsigned RHSReg, bool RHSIsKill, |
255 | | uint64_t ShiftImm); |
256 | | unsigned emitAnd_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm); |
257 | | unsigned emitMul_rr(MVT RetVT, unsigned Op0, bool Op0IsKill, |
258 | | unsigned Op1, bool Op1IsKill); |
259 | | unsigned emitSMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill, |
260 | | unsigned Op1, bool Op1IsKill); |
261 | | unsigned emitUMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill, |
262 | | unsigned Op1, bool Op1IsKill); |
263 | | unsigned emitLSL_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill, |
264 | | unsigned Op1Reg, bool Op1IsKill); |
265 | | unsigned emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill, |
266 | | uint64_t Imm, bool IsZExt = true); |
267 | | unsigned emitLSR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill, |
268 | | unsigned Op1Reg, bool Op1IsKill); |
269 | | unsigned emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill, |
270 | | uint64_t Imm, bool IsZExt = true); |
271 | | unsigned emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill, |
272 | | unsigned Op1Reg, bool Op1IsKill); |
273 | | unsigned emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill, |
274 | | uint64_t Imm, bool IsZExt = false); |
275 | | |
276 | | unsigned materializeInt(const ConstantInt *CI, MVT VT); |
277 | | unsigned materializeFP(const ConstantFP *CFP, MVT VT); |
278 | | unsigned materializeGV(const GlobalValue *GV); |
279 | | |
280 | | // Call handling routines. |
281 | | private: |
282 | | CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const; |
283 | | bool processCallArgs(CallLoweringInfo &CLI, SmallVectorImpl<MVT> &ArgVTs, |
284 | | unsigned &NumBytes); |
285 | | bool finishCall(CallLoweringInfo &CLI, MVT RetVT, unsigned NumBytes); |
286 | | |
287 | | public: |
288 | | // Backend specific FastISel code. |
289 | | unsigned fastMaterializeAlloca(const AllocaInst *AI) override; |
290 | | unsigned fastMaterializeConstant(const Constant *C) override; |
291 | | unsigned fastMaterializeFloatZero(const ConstantFP* CF) override; |
292 | | |
293 | | explicit AArch64FastISel(FunctionLoweringInfo &FuncInfo, |
294 | | const TargetLibraryInfo *LibInfo) |
295 | 1.23k | : FastISel(FuncInfo, LibInfo, /*SkipTargetIndependentISel=*/true) { |
296 | 1.23k | Subtarget = |
297 | 1.23k | &static_cast<const AArch64Subtarget &>(FuncInfo.MF->getSubtarget()); |
298 | 1.23k | Context = &FuncInfo.Fn->getContext(); |
299 | 1.23k | } |
300 | | |
301 | | bool fastSelectInstruction(const Instruction *I) override; |
302 | | |
303 | | #include "AArch64GenFastISel.inc" |
304 | | }; |
305 | | |
306 | | } // end anonymous namespace |
307 | | |
308 | | #include "AArch64GenCallingConv.inc" |
309 | | |
310 | | /// \brief Check if the sign-/zero-extend will be a noop. |
311 | 83 | static bool isIntExtFree(const Instruction *I) { |
312 | 83 | assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) && |
313 | 83 | "Unexpected integer extend instruction."); |
314 | 83 | assert(!I->getType()->isVectorTy() && I->getType()->isIntegerTy() && |
315 | 83 | "Unexpected value type."); |
316 | 83 | bool IsZExt = isa<ZExtInst>(I); |
317 | 83 | |
318 | 83 | if (const auto *LI = dyn_cast<LoadInst>(I->getOperand(0))) |
319 | 0 | if (0 LI->hasOneUse()0 ) |
320 | 0 | return true; |
321 | 83 | |
322 | 83 | if (const auto *83 Arg83 = dyn_cast<Argument>(I->getOperand(0))) |
323 | 80 | if (80 (IsZExt && 80 Arg->hasZExtAttr()31 ) || (!IsZExt && 78 Arg->hasSExtAttr()49 )) |
324 | 4 | return true; |
325 | 79 | |
326 | 79 | return false; |
327 | 79 | } |
328 | | |
329 | | /// \brief Determine the implicit scale factor that is applied by a memory |
330 | | /// operation for a given value type. |
331 | 1.63k | static unsigned getImplicitScaleFactor(MVT VT) { |
332 | 1.63k | switch (VT.SimpleTy) { |
333 | 216 | default: |
334 | 216 | return 0; // invalid |
335 | 222 | case MVT::i1: // fall-through |
336 | 222 | case MVT::i8: |
337 | 222 | return 1; |
338 | 130 | case MVT::i16: |
339 | 130 | return 2; |
340 | 538 | case MVT::i32: // fall-through |
341 | 538 | case MVT::f32: |
342 | 538 | return 4; |
343 | 530 | case MVT::i64: // fall-through |
344 | 530 | case MVT::f64: |
345 | 530 | return 8; |
346 | 0 | } |
347 | 0 | } |
348 | | |
349 | 194 | CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const { |
350 | 194 | if (CC == CallingConv::WebKit_JS) |
351 | 7 | return CC_AArch64_WebKit_JS; |
352 | 187 | if (187 CC == CallingConv::GHC187 ) |
353 | 0 | return CC_AArch64_GHC; |
354 | 187 | return Subtarget->isTargetDarwin() ? 187 CC_AArch64_DarwinPCS146 : CC_AArch64_AAPCS41 ; |
355 | 194 | } |
356 | | |
357 | 17 | unsigned AArch64FastISel::fastMaterializeAlloca(const AllocaInst *AI) { |
358 | 17 | assert(TLI.getValueType(DL, AI->getType(), true) == MVT::i64 && |
359 | 17 | "Alloca should always return a pointer."); |
360 | 17 | |
361 | 17 | // Don't handle dynamic allocas. |
362 | 17 | if (!FuncInfo.StaticAllocaMap.count(AI)) |
363 | 0 | return 0; |
364 | 17 | |
365 | 17 | DenseMap<const AllocaInst *, int>::iterator SI = |
366 | 17 | FuncInfo.StaticAllocaMap.find(AI); |
367 | 17 | |
368 | 17 | if (SI != FuncInfo.StaticAllocaMap.end()17 ) { |
369 | 17 | unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass); |
370 | 17 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri), |
371 | 17 | ResultReg) |
372 | 17 | .addFrameIndex(SI->second) |
373 | 17 | .addImm(0) |
374 | 17 | .addImm(0); |
375 | 17 | return ResultReg; |
376 | 17 | } |
377 | 0 |
|
378 | 0 | return 0; |
379 | 0 | } |
380 | | |
381 | 377 | unsigned AArch64FastISel::materializeInt(const ConstantInt *CI, MVT VT) { |
382 | 377 | if (VT > MVT::i64) |
383 | 0 | return 0; |
384 | 377 | |
385 | 377 | if (377 !CI->isZero()377 ) |
386 | 245 | return fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue()); |
387 | 132 | |
388 | 132 | // Create a copy from the zero register to materialize a "0" value. |
389 | 132 | const TargetRegisterClass *RC = (VT == MVT::i64) ? 132 &AArch64::GPR64RegClass18 |
390 | 114 | : &AArch64::GPR32RegClass; |
391 | 132 | unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR18 : AArch64::WZR114 ; |
392 | 377 | unsigned ResultReg = createResultReg(RC); |
393 | 377 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), |
394 | 377 | ResultReg).addReg(ZeroReg, getKillRegState(true)); |
395 | 377 | return ResultReg; |
396 | 377 | } |
397 | | |
398 | 47 | unsigned AArch64FastISel::materializeFP(const ConstantFP *CFP, MVT VT) { |
399 | 47 | // Positive zero (+0.0) has to be materialized with a fmov from the zero |
400 | 47 | // register, because the immediate version of fmov cannot encode zero. |
401 | 47 | if (CFP->isNullValue()) |
402 | 2 | return fastMaterializeFloatZero(CFP); |
403 | 45 | |
404 | 45 | if (45 VT != MVT::f32 && 45 VT != MVT::f6421 ) |
405 | 2 | return 0; |
406 | 43 | |
407 | 43 | const APFloat Val = CFP->getValueAPF(); |
408 | 43 | bool Is64Bit = (VT == MVT::f64); |
409 | 43 | // This checks to see if we can use FMOV instructions to materialize |
410 | 43 | // a constant, otherwise we have to materialize via the constant pool. |
411 | 43 | if (TLI.isFPImmLegal(Val, VT)43 ) { |
412 | 10 | int Imm = |
413 | 10 | Is64Bit ? AArch64_AM::getFP64Imm(Val)2 : AArch64_AM::getFP32Imm(Val)8 ; |
414 | 10 | assert((Imm != -1) && "Cannot encode floating-point constant."); |
415 | 10 | unsigned Opc = Is64Bit ? AArch64::FMOVDi2 : AArch64::FMOVSi8 ; |
416 | 10 | return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm); |
417 | 10 | } |
418 | 33 | |
419 | 33 | // For the MachO large code model materialize the FP constant in code. |
420 | 33 | if (33 Subtarget->isTargetMachO() && 33 TM.getCodeModel() == CodeModel::Large30 ) { |
421 | 4 | unsigned Opc1 = Is64Bit ? AArch64::MOVi64imm2 : AArch64::MOVi32imm2 ; |
422 | 4 | const TargetRegisterClass *RC = Is64Bit ? |
423 | 4 | &AArch64::GPR64RegClass2 : &AArch64::GPR32RegClass2 ; |
424 | 4 | |
425 | 4 | unsigned TmpReg = createResultReg(RC); |
426 | 4 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc1), TmpReg) |
427 | 4 | .addImm(CFP->getValueAPF().bitcastToAPInt().getZExtValue()); |
428 | 4 | |
429 | 4 | unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT)); |
430 | 4 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, |
431 | 4 | TII.get(TargetOpcode::COPY), ResultReg) |
432 | 4 | .addReg(TmpReg, getKillRegState(true)); |
433 | 4 | |
434 | 4 | return ResultReg; |
435 | 4 | } |
436 | 29 | |
437 | 29 | // Materialize via constant pool. MachineConstantPool wants an explicit |
438 | 29 | // alignment. |
439 | 29 | unsigned Align = DL.getPrefTypeAlignment(CFP->getType()); |
440 | 29 | if (Align == 0) |
441 | 0 | Align = DL.getTypeAllocSize(CFP->getType()); |
442 | 29 | |
443 | 29 | unsigned CPI = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align); |
444 | 29 | unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass); |
445 | 29 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP), |
446 | 29 | ADRPReg).addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGE); |
447 | 29 | |
448 | 29 | unsigned Opc = Is64Bit ? AArch64::LDRDui15 : AArch64::LDRSui14 ; |
449 | 47 | unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT)); |
450 | 47 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) |
451 | 47 | .addReg(ADRPReg) |
452 | 47 | .addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC); |
453 | 47 | return ResultReg; |
454 | 47 | } |
455 | | |
456 | 129 | unsigned AArch64FastISel::materializeGV(const GlobalValue *GV) { |
457 | 129 | // We can't handle thread-local variables quickly yet. |
458 | 129 | if (GV->isThreadLocal()) |
459 | 10 | return 0; |
460 | 119 | |
461 | 119 | // MachO still uses GOT for large code-model accesses, but ELF requires |
462 | 119 | // movz/movk sequences, which FastISel doesn't handle yet. |
463 | 119 | if (119 !Subtarget->useSmallAddressing() && 119 !Subtarget->isTargetMachO()9 ) |
464 | 0 | return 0; |
465 | 119 | |
466 | 119 | unsigned char OpFlags = Subtarget->ClassifyGlobalReference(GV, TM); |
467 | 119 | |
468 | 119 | EVT DestEVT = TLI.getValueType(DL, GV->getType(), true); |
469 | 119 | if (!DestEVT.isSimple()) |
470 | 0 | return 0; |
471 | 119 | |
472 | 119 | unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass); |
473 | 119 | unsigned ResultReg; |
474 | 119 | |
475 | 119 | if (OpFlags & AArch64II::MO_GOT119 ) { |
476 | 48 | // ADRP + LDRX |
477 | 48 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP), |
478 | 48 | ADRPReg) |
479 | 48 | .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGE); |
480 | 48 | |
481 | 48 | ResultReg = createResultReg(&AArch64::GPR64RegClass); |
482 | 48 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::LDRXui), |
483 | 48 | ResultReg) |
484 | 48 | .addReg(ADRPReg) |
485 | 48 | .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGEOFF | |
486 | 48 | AArch64II::MO_NC); |
487 | 119 | } else { |
488 | 71 | // ADRP + ADDX |
489 | 71 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP), |
490 | 71 | ADRPReg) |
491 | 71 | .addGlobalAddress(GV, 0, AArch64II::MO_PAGE); |
492 | 71 | |
493 | 71 | ResultReg = createResultReg(&AArch64::GPR64spRegClass); |
494 | 71 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri), |
495 | 71 | ResultReg) |
496 | 71 | .addReg(ADRPReg) |
497 | 71 | .addGlobalAddress(GV, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC) |
498 | 71 | .addImm(0); |
499 | 71 | } |
500 | 129 | return ResultReg; |
501 | 129 | } |
502 | | |
503 | 592 | unsigned AArch64FastISel::fastMaterializeConstant(const Constant *C) { |
504 | 592 | EVT CEVT = TLI.getValueType(DL, C->getType(), true); |
505 | 592 | |
506 | 592 | // Only handle simple types. |
507 | 592 | if (!CEVT.isSimple()) |
508 | 0 | return 0; |
509 | 592 | MVT VT = CEVT.getSimpleVT(); |
510 | 592 | |
511 | 592 | if (const auto *CI = dyn_cast<ConstantInt>(C)) |
512 | 371 | return materializeInt(CI, VT); |
513 | 221 | else if (const ConstantFP *221 CFP221 = dyn_cast<ConstantFP>(C)) |
514 | 47 | return materializeFP(CFP, VT); |
515 | 174 | else if (const GlobalValue *174 GV174 = dyn_cast<GlobalValue>(C)) |
516 | 122 | return materializeGV(GV); |
517 | 52 | |
518 | 52 | return 0; |
519 | 52 | } |
520 | | |
521 | 2 | unsigned AArch64FastISel::fastMaterializeFloatZero(const ConstantFP* CFP) { |
522 | 2 | assert(CFP->isNullValue() && |
523 | 2 | "Floating-point constant is not a positive zero."); |
524 | 2 | MVT VT; |
525 | 2 | if (!isTypeLegal(CFP->getType(), VT)) |
526 | 0 | return 0; |
527 | 2 | |
528 | 2 | if (2 VT != MVT::f32 && 2 VT != MVT::f641 ) |
529 | 0 | return 0; |
530 | 2 | |
531 | 2 | bool Is64Bit = (VT == MVT::f64); |
532 | 2 | unsigned ZReg = Is64Bit ? AArch64::XZR1 : AArch64::WZR1 ; |
533 | 2 | unsigned Opc = Is64Bit ? AArch64::FMOVXDr1 : AArch64::FMOVWSr1 ; |
534 | 2 | return fastEmitInst_r(Opc, TLI.getRegClassFor(VT), ZReg, /*IsKill=*/true); |
535 | 2 | } |
536 | | |
537 | | /// \brief Check if the multiply is by a power-of-2 constant. |
538 | 567 | static bool isMulPowOf2(const Value *I) { |
539 | 567 | if (const auto *MI567 = dyn_cast<MulOperator>(I)) { |
540 | 23 | if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(0))) |
541 | 0 | if (0 C->getValue().isPowerOf2()0 ) |
542 | 0 | return true; |
543 | 23 | if (const auto *23 C23 = dyn_cast<ConstantInt>(MI->getOperand(1))) |
544 | 21 | if (21 C->getValue().isPowerOf2()21 ) |
545 | 19 | return true; |
546 | 548 | } |
547 | 548 | return false; |
548 | 548 | } |
549 | | |
550 | | // Computes the address to get to an object. |
551 | | bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty) |
552 | 1.29k | { |
553 | 1.29k | const User *U = nullptr; |
554 | 1.29k | unsigned Opcode = Instruction::UserOp1; |
555 | 1.29k | if (const Instruction *I1.29k = dyn_cast<Instruction>(Obj)) { |
556 | 675 | // Don't walk into other basic blocks unless the object is an alloca from |
557 | 675 | // another block, otherwise it may not have a virtual register assigned. |
558 | 675 | if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) || |
559 | 675 | FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB439 ) { |
560 | 660 | Opcode = I->getOpcode(); |
561 | 660 | U = I; |
562 | 660 | } |
563 | 1.29k | } else if (const ConstantExpr *615 C615 = dyn_cast<ConstantExpr>(Obj)) { |
564 | 25 | Opcode = C->getOpcode(); |
565 | 25 | U = C; |
566 | 25 | } |
567 | 1.29k | |
568 | 1.29k | if (auto *Ty = dyn_cast<PointerType>(Obj->getType())) |
569 | 928 | if (928 Ty->getAddressSpace() > 255928 ) |
570 | 928 | // Fast instruction selection doesn't support the special |
571 | 928 | // address spaces. |
572 | 10 | return false; |
573 | 1.28k | |
574 | 1.28k | switch (Opcode) { |
575 | 623 | default: |
576 | 623 | break; |
577 | 17 | case Instruction::BitCast: |
578 | 17 | // Look through bitcasts. |
579 | 17 | return computeAddress(U->getOperand(0), Addr, Ty); |
580 | 1.28k | |
581 | 136 | case Instruction::IntToPtr: |
582 | 136 | // Look past no-op inttoptrs. |
583 | 136 | if (TLI.getValueType(DL, U->getOperand(0)->getType()) == |
584 | 136 | TLI.getPointerTy(DL)) |
585 | 136 | return computeAddress(U->getOperand(0), Addr, Ty); |
586 | 0 | break; |
587 | 0 |
|
588 | 6 | case Instruction::PtrToInt: |
589 | 6 | // Look past no-op ptrtoints. |
590 | 6 | if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL)) |
591 | 6 | return computeAddress(U->getOperand(0), Addr, Ty); |
592 | 0 | break; |
593 | 0 |
|
594 | 62 | case Instruction::GetElementPtr: { |
595 | 62 | Address SavedAddr = Addr; |
596 | 62 | uint64_t TmpOffset = Addr.getOffset(); |
597 | 62 | |
598 | 62 | // Iterate through the GEP folding the constants into offsets where |
599 | 62 | // we can. |
600 | 62 | for (gep_type_iterator GTI = gep_type_begin(U), E = gep_type_end(U); |
601 | 158 | GTI != E158 ; ++GTI96 ) { |
602 | 111 | const Value *Op = GTI.getOperand(); |
603 | 111 | if (StructType *STy111 = GTI.getStructTypeOrNull()) { |
604 | 21 | const StructLayout *SL = DL.getStructLayout(STy); |
605 | 21 | unsigned Idx = cast<ConstantInt>(Op)->getZExtValue(); |
606 | 21 | TmpOffset += SL->getElementOffset(Idx); |
607 | 111 | } else { |
608 | 90 | uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType()); |
609 | 90 | while (true90 ) { |
610 | 90 | if (const ConstantInt *CI90 = dyn_cast<ConstantInt>(Op)) { |
611 | 75 | // Constant-offset addressing. |
612 | 75 | TmpOffset += CI->getSExtValue() * S; |
613 | 75 | break; |
614 | 75 | } |
615 | 15 | if (15 canFoldAddIntoGEP(U, Op)15 ) { |
616 | 0 | // A compatible add with a constant operand. Fold the constant. |
617 | 0 | ConstantInt *CI = |
618 | 0 | cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1)); |
619 | 0 | TmpOffset += CI->getSExtValue() * S; |
620 | 0 | // Iterate on the other operand. |
621 | 0 | Op = cast<AddOperator>(Op)->getOperand(0); |
622 | 0 | continue; |
623 | 0 | } |
624 | 15 | // Unsupported |
625 | 15 | goto unsupported_gep; |
626 | 15 | } |
627 | 90 | } |
628 | 111 | } |
629 | 62 | |
630 | 62 | // Try to grab the base operand now. |
631 | 47 | Addr.setOffset(TmpOffset); |
632 | 47 | if (computeAddress(U->getOperand(0), Addr, Ty)) |
633 | 46 | return true; |
634 | 1 | |
635 | 1 | // We failed, restore everything and try the other options. |
636 | 1 | Addr = SavedAddr; |
637 | 1 | |
638 | 16 | unsupported_gep: |
639 | 16 | break; |
640 | 1 | } |
641 | 236 | case Instruction::Alloca: { |
642 | 236 | const AllocaInst *AI = cast<AllocaInst>(Obj); |
643 | 236 | DenseMap<const AllocaInst *, int>::iterator SI = |
644 | 236 | FuncInfo.StaticAllocaMap.find(AI); |
645 | 236 | if (SI != FuncInfo.StaticAllocaMap.end()236 ) { |
646 | 236 | Addr.setKind(Address::FrameIndexBase); |
647 | 236 | Addr.setFI(SI->second); |
648 | 236 | return true; |
649 | 236 | } |
650 | 0 | break; |
651 | 0 | } |
652 | 117 | case Instruction::Add: { |
653 | 117 | // Adds of constants are common and easy enough. |
654 | 117 | const Value *LHS = U->getOperand(0); |
655 | 117 | const Value *RHS = U->getOperand(1); |
656 | 117 | |
657 | 117 | if (isa<ConstantInt>(LHS)) |
658 | 1 | std::swap(LHS, RHS); |
659 | 117 | |
660 | 117 | if (const ConstantInt *CI117 = dyn_cast<ConstantInt>(RHS)) { |
661 | 29 | Addr.setOffset(Addr.getOffset() + CI->getSExtValue()); |
662 | 29 | return computeAddress(LHS, Addr, Ty); |
663 | 29 | } |
664 | 88 | |
665 | 88 | Address Backup = Addr; |
666 | 88 | if (computeAddress(LHS, Addr, Ty) && 88 computeAddress(RHS, Addr, Ty)88 ) |
667 | 88 | return true; |
668 | 0 | Addr = Backup; |
669 | 0 |
|
670 | 0 | break; |
671 | 0 | } |
672 | 21 | case Instruction::Sub: { |
673 | 21 | // Subs of constants are common and easy enough. |
674 | 21 | const Value *LHS = U->getOperand(0); |
675 | 21 | const Value *RHS = U->getOperand(1); |
676 | 21 | |
677 | 21 | if (const ConstantInt *CI21 = dyn_cast<ConstantInt>(RHS)) { |
678 | 21 | Addr.setOffset(Addr.getOffset() - CI->getSExtValue()); |
679 | 21 | return computeAddress(LHS, Addr, Ty); |
680 | 21 | } |
681 | 0 | break; |
682 | 0 | } |
683 | 26 | case Instruction::Shl: { |
684 | 26 | if (Addr.getOffsetReg()) |
685 | 3 | break; |
686 | 23 | |
687 | 23 | const auto *CI = dyn_cast<ConstantInt>(U->getOperand(1)); |
688 | 23 | if (!CI) |
689 | 0 | break; |
690 | 23 | |
691 | 23 | unsigned Val = CI->getZExtValue(); |
692 | 23 | if (Val < 1 || 23 Val > 323 ) |
693 | 0 | break; |
694 | 23 | |
695 | 23 | uint64_t NumBytes = 0; |
696 | 23 | if (Ty && 23 Ty->isSized()23 ) { |
697 | 23 | uint64_t NumBits = DL.getTypeSizeInBits(Ty); |
698 | 23 | NumBytes = NumBits / 8; |
699 | 23 | if (!isPowerOf2_64(NumBits)) |
700 | 0 | NumBytes = 0; |
701 | 23 | } |
702 | 23 | |
703 | 23 | if (NumBytes != (1ULL << Val)) |
704 | 0 | break; |
705 | 23 | |
706 | 23 | Addr.setShift(Val); |
707 | 23 | Addr.setExtendType(AArch64_AM::LSL); |
708 | 23 | |
709 | 23 | const Value *Src = U->getOperand(0); |
710 | 23 | if (const auto *I23 = dyn_cast<Instruction>(Src)) { |
711 | 17 | if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB17 ) { |
712 | 17 | // Fold the zext or sext when it won't become a noop. |
713 | 17 | if (const auto *ZE17 = dyn_cast<ZExtInst>(I)) { |
714 | 5 | if (!isIntExtFree(ZE) && |
715 | 5 | ZE->getOperand(0)->getType()->isIntegerTy(32)4 ) { |
716 | 4 | Addr.setExtendType(AArch64_AM::UXTW); |
717 | 4 | Src = ZE->getOperand(0); |
718 | 4 | } |
719 | 17 | } else if (const auto *12 SE12 = dyn_cast<SExtInst>(I)) { |
720 | 8 | if (!isIntExtFree(SE) && |
721 | 8 | SE->getOperand(0)->getType()->isIntegerTy(32)7 ) { |
722 | 7 | Addr.setExtendType(AArch64_AM::SXTW); |
723 | 7 | Src = SE->getOperand(0); |
724 | 7 | } |
725 | 12 | } |
726 | 17 | } |
727 | 17 | } |
728 | 23 | |
729 | 23 | if (const auto *AI = dyn_cast<BinaryOperator>(Src)) |
730 | 5 | if (5 AI->getOpcode() == Instruction::And5 ) { |
731 | 4 | const Value *LHS = AI->getOperand(0); |
732 | 4 | const Value *RHS = AI->getOperand(1); |
733 | 4 | |
734 | 4 | if (const auto *C = dyn_cast<ConstantInt>(LHS)) |
735 | 0 | if (0 C->getValue() == 0xffffffff0 ) |
736 | 0 | std::swap(LHS, RHS); |
737 | 4 | |
738 | 4 | if (const auto *C = dyn_cast<ConstantInt>(RHS)) |
739 | 3 | if (3 C->getValue() == 0xffffffff3 ) { |
740 | 3 | Addr.setExtendType(AArch64_AM::UXTW); |
741 | 3 | unsigned Reg = getRegForValue(LHS); |
742 | 3 | if (!Reg) |
743 | 0 | return false; |
744 | 3 | bool RegIsKill = hasTrivialKill(LHS); |
745 | 3 | Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, RegIsKill, |
746 | 3 | AArch64::sub_32); |
747 | 3 | Addr.setOffsetReg(Reg); |
748 | 3 | return true; |
749 | 3 | } |
750 | 5 | } |
751 | 20 | |
752 | 20 | unsigned Reg = getRegForValue(Src); |
753 | 20 | if (!Reg) |
754 | 0 | return false; |
755 | 20 | Addr.setOffsetReg(Reg); |
756 | 20 | return true; |
757 | 20 | } |
758 | 13 | case Instruction::Mul: { |
759 | 13 | if (Addr.getOffsetReg()) |
760 | 0 | break; |
761 | 13 | |
762 | 13 | if (13 !isMulPowOf2(U)13 ) |
763 | 0 | break; |
764 | 13 | |
765 | 13 | const Value *LHS = U->getOperand(0); |
766 | 13 | const Value *RHS = U->getOperand(1); |
767 | 13 | |
768 | 13 | // Canonicalize power-of-2 value to the RHS. |
769 | 13 | if (const auto *C = dyn_cast<ConstantInt>(LHS)) |
770 | 0 | if (0 C->getValue().isPowerOf2()0 ) |
771 | 0 | std::swap(LHS, RHS); |
772 | 13 | |
773 | 13 | assert(isa<ConstantInt>(RHS) && "Expected an ConstantInt."); |
774 | 13 | const auto *C = cast<ConstantInt>(RHS); |
775 | 13 | unsigned Val = C->getValue().logBase2(); |
776 | 13 | if (Val < 1 || 13 Val > 313 ) |
777 | 0 | break; |
778 | 13 | |
779 | 13 | uint64_t NumBytes = 0; |
780 | 13 | if (Ty && 13 Ty->isSized()13 ) { |
781 | 13 | uint64_t NumBits = DL.getTypeSizeInBits(Ty); |
782 | 13 | NumBytes = NumBits / 8; |
783 | 13 | if (!isPowerOf2_64(NumBits)) |
784 | 0 | NumBytes = 0; |
785 | 13 | } |
786 | 13 | |
787 | 13 | if (NumBytes != (1ULL << Val)) |
788 | 0 | break; |
789 | 13 | |
790 | 13 | Addr.setShift(Val); |
791 | 13 | Addr.setExtendType(AArch64_AM::LSL); |
792 | 13 | |
793 | 13 | const Value *Src = LHS; |
794 | 13 | if (const auto *I13 = dyn_cast<Instruction>(Src)) { |
795 | 9 | if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB9 ) { |
796 | 8 | // Fold the zext or sext when it won't become a noop. |
797 | 8 | if (const auto *ZE8 = dyn_cast<ZExtInst>(I)) { |
798 | 4 | if (!isIntExtFree(ZE) && |
799 | 4 | ZE->getOperand(0)->getType()->isIntegerTy(32)3 ) { |
800 | 3 | Addr.setExtendType(AArch64_AM::UXTW); |
801 | 3 | Src = ZE->getOperand(0); |
802 | 3 | } |
803 | 8 | } else if (const auto *4 SE4 = dyn_cast<SExtInst>(I)) { |
804 | 4 | if (!isIntExtFree(SE) && |
805 | 4 | SE->getOperand(0)->getType()->isIntegerTy(32)3 ) { |
806 | 3 | Addr.setExtendType(AArch64_AM::SXTW); |
807 | 3 | Src = SE->getOperand(0); |
808 | 3 | } |
809 | 4 | } |
810 | 8 | } |
811 | 9 | } |
812 | 13 | |
813 | 13 | unsigned Reg = getRegForValue(Src); |
814 | 13 | if (!Reg) |
815 | 0 | return false; |
816 | 13 | Addr.setOffsetReg(Reg); |
817 | 13 | return true; |
818 | 13 | } |
819 | 3 | case Instruction::And: { |
820 | 3 | if (Addr.getOffsetReg()) |
821 | 0 | break; |
822 | 3 | |
823 | 3 | if (3 !Ty || 3 DL.getTypeSizeInBits(Ty) != 82 ) |
824 | 2 | break; |
825 | 1 | |
826 | 1 | const Value *LHS = U->getOperand(0); |
827 | 1 | const Value *RHS = U->getOperand(1); |
828 | 1 | |
829 | 1 | if (const auto *C = dyn_cast<ConstantInt>(LHS)) |
830 | 0 | if (0 C->getValue() == 0xffffffff0 ) |
831 | 0 | std::swap(LHS, RHS); |
832 | 1 | |
833 | 1 | if (const auto *C = dyn_cast<ConstantInt>(RHS)) |
834 | 1 | if (1 C->getValue() == 0xffffffff1 ) { |
835 | 1 | Addr.setShift(0); |
836 | 1 | Addr.setExtendType(AArch64_AM::LSL); |
837 | 1 | Addr.setExtendType(AArch64_AM::UXTW); |
838 | 1 | |
839 | 1 | unsigned Reg = getRegForValue(LHS); |
840 | 1 | if (!Reg) |
841 | 0 | return false; |
842 | 1 | bool RegIsKill = hasTrivialKill(LHS); |
843 | 1 | Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, RegIsKill, |
844 | 1 | AArch64::sub_32); |
845 | 1 | Addr.setOffsetReg(Reg); |
846 | 1 | return true; |
847 | 1 | } |
848 | 0 | break; |
849 | 0 | } |
850 | 20 | case Instruction::SExt: |
851 | 20 | case Instruction::ZExt: { |
852 | 20 | if (!Addr.getReg() || 20 Addr.getOffsetReg()20 ) |
853 | 0 | break; |
854 | 20 | |
855 | 20 | const Value *Src = nullptr; |
856 | 20 | // Fold the zext or sext when it won't become a noop. |
857 | 20 | if (const auto *ZE20 = dyn_cast<ZExtInst>(U)) { |
858 | 0 | if (!isIntExtFree(ZE) && 0 ZE->getOperand(0)->getType()->isIntegerTy(32)0 ) { |
859 | 0 | Addr.setExtendType(AArch64_AM::UXTW); |
860 | 0 | Src = ZE->getOperand(0); |
861 | 0 | } |
862 | 20 | } else if (const auto *20 SE20 = dyn_cast<SExtInst>(U)) { |
863 | 20 | if (!isIntExtFree(SE) && 20 SE->getOperand(0)->getType()->isIntegerTy(32)20 ) { |
864 | 20 | Addr.setExtendType(AArch64_AM::SXTW); |
865 | 20 | Src = SE->getOperand(0); |
866 | 20 | } |
867 | 20 | } |
868 | 20 | |
869 | 20 | if (!Src) |
870 | 0 | break; |
871 | 20 | |
872 | 20 | Addr.setShift(0); |
873 | 20 | unsigned Reg = getRegForValue(Src); |
874 | 20 | if (!Reg) |
875 | 0 | return false; |
876 | 20 | Addr.setOffsetReg(Reg); |
877 | 20 | return true; |
878 | 20 | } |
879 | 644 | } // end switch |
880 | 644 | |
881 | 644 | if (644 Addr.isRegBase() && 644 !Addr.getReg()644 ) { |
882 | 610 | unsigned Reg = getRegForValue(Obj); |
883 | 610 | if (!Reg) |
884 | 8 | return false; |
885 | 602 | Addr.setReg(Reg); |
886 | 602 | return true; |
887 | 602 | } |
888 | 34 | |
889 | 34 | if (34 !Addr.getOffsetReg()34 ) { |
890 | 34 | unsigned Reg = getRegForValue(Obj); |
891 | 34 | if (!Reg) |
892 | 0 | return false; |
893 | 34 | Addr.setOffsetReg(Reg); |
894 | 34 | return true; |
895 | 34 | } |
896 | 0 |
|
897 | 0 | return false; |
898 | 0 | } |
899 | | |
900 | 126 | bool AArch64FastISel::computeCallAddress(const Value *V, Address &Addr) { |
901 | 126 | const User *U = nullptr; |
902 | 126 | unsigned Opcode = Instruction::UserOp1; |
903 | 126 | bool InMBB = true; |
904 | 126 | |
905 | 126 | if (const auto *I126 = dyn_cast<Instruction>(V)) { |
906 | 15 | Opcode = I->getOpcode(); |
907 | 15 | U = I; |
908 | 15 | InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock(); |
909 | 126 | } else if (const auto *111 C111 = dyn_cast<ConstantExpr>(V)) { |
910 | 3 | Opcode = C->getOpcode(); |
911 | 3 | U = C; |
912 | 3 | } |
913 | 126 | |
914 | 126 | switch (Opcode) { |
915 | 111 | default: break; |
916 | 2 | case Instruction::BitCast: |
917 | 2 | // Look past bitcasts if its operand is in the same BB. |
918 | 2 | if (InMBB) |
919 | 2 | return computeCallAddress(U->getOperand(0), Addr); |
920 | 0 | break; |
921 | 13 | case Instruction::IntToPtr: |
922 | 13 | // Look past no-op inttoptrs if its operand is in the same BB. |
923 | 13 | if (InMBB && |
924 | 13 | TLI.getValueType(DL, U->getOperand(0)->getType()) == |
925 | 13 | TLI.getPointerTy(DL)) |
926 | 13 | return computeCallAddress(U->getOperand(0), Addr); |
927 | 0 | break; |
928 | 0 | case Instruction::PtrToInt: |
929 | 0 | // Look past no-op ptrtoints if its operand is in the same BB. |
930 | 0 | if (InMBB && 0 TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL)0 ) |
931 | 0 | return computeCallAddress(U->getOperand(0), Addr); |
932 | 0 | break; |
933 | 111 | } |
934 | 111 | |
935 | 111 | if (const GlobalValue *111 GV111 = dyn_cast<GlobalValue>(V)) { |
936 | 91 | Addr.setGlobalValue(GV); |
937 | 91 | return true; |
938 | 91 | } |
939 | 20 | |
940 | 20 | // If all else fails, try to materialize the value in a register. |
941 | 20 | if (20 !Addr.getGlobalValue()20 ) { |
942 | 20 | Addr.setReg(getRegForValue(V)); |
943 | 20 | return Addr.getReg() != 0; |
944 | 20 | } |
945 | 0 |
|
946 | 0 | return false; |
947 | 0 | } |
948 | | |
949 | 3.84k | bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) { |
950 | 3.84k | EVT evt = TLI.getValueType(DL, Ty, true); |
951 | 3.84k | |
952 | 3.84k | // Only handle simple types. |
953 | 3.84k | if (evt == MVT::Other || 3.84k !evt.isSimple()3.83k ) |
954 | 11 | return false; |
955 | 3.83k | VT = evt.getSimpleVT(); |
956 | 3.83k | |
957 | 3.83k | // This is a legal type, but it's not something we handle in fast-isel. |
958 | 3.83k | if (VT == MVT::f128) |
959 | 26 | return false; |
960 | 3.80k | |
961 | 3.80k | // Handle all other legal types, i.e. a register that will directly hold this |
962 | 3.80k | // value. |
963 | 3.80k | return TLI.isTypeLegal(VT); |
964 | 3.80k | } |
965 | | |
966 | | /// \brief Determine if the value type is supported by FastISel. |
967 | | /// |
968 | | /// FastISel for AArch64 can handle more value types than are legal. This adds |
969 | | /// simple value type such as i1, i8, and i16. |
970 | 2.09k | bool AArch64FastISel::isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed) { |
971 | 2.09k | if (Ty->isVectorTy() && 2.09k !IsVectorAllowed293 ) |
972 | 6 | return false; |
973 | 2.08k | |
974 | 2.08k | if (2.08k isTypeLegal(Ty, VT)2.08k ) |
975 | 1.57k | return true; |
976 | 512 | |
977 | 512 | // If this is a type than can be sign or zero-extended to a basic operation |
978 | 512 | // go ahead and accept it now. |
979 | 512 | if (512 VT == MVT::i1 || 512 VT == MVT::i8419 || VT == MVT::i16211 ) |
980 | 490 | return true; |
981 | 22 | |
982 | 22 | return false; |
983 | 22 | } |
984 | | |
985 | 1.31k | bool AArch64FastISel::isValueAvailable(const Value *V) const { |
986 | 1.31k | if (!isa<Instruction>(V)) |
987 | 505 | return true; |
988 | 808 | |
989 | 808 | const auto *I = cast<Instruction>(V); |
990 | 808 | return FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB; |
991 | 808 | } |
992 | | |
993 | 927 | bool AArch64FastISel::simplifyAddress(Address &Addr, MVT VT) { |
994 | 927 | unsigned ScaleFactor = getImplicitScaleFactor(VT); |
995 | 927 | if (!ScaleFactor) |
996 | 216 | return false; |
997 | 711 | |
998 | 711 | bool ImmediateOffsetNeedsLowering = false; |
999 | 711 | bool RegisterOffsetNeedsLowering = false; |
1000 | 711 | int64_t Offset = Addr.getOffset(); |
1001 | 711 | if (((Offset < 0) || 711 (Offset & (ScaleFactor - 1))681 ) && !isInt<9>(Offset)36 ) |
1002 | 4 | ImmediateOffsetNeedsLowering = true; |
1003 | 707 | else if (707 Offset > 0 && 707 !(Offset & (ScaleFactor - 1))112 && |
1004 | 108 | !isUInt<12>(Offset / ScaleFactor)) |
1005 | 9 | ImmediateOffsetNeedsLowering = true; |
1006 | 711 | |
1007 | 711 | // Cannot encode an offset register and an immediate offset in the same |
1008 | 711 | // instruction. Fold the immediate offset into the load/store instruction and |
1009 | 711 | // emit an additional add to take care of the offset register. |
1010 | 711 | if (!ImmediateOffsetNeedsLowering && 711 Addr.getOffset()698 && Addr.getOffsetReg()131 ) |
1011 | 5 | RegisterOffsetNeedsLowering = true; |
1012 | 711 | |
1013 | 711 | // Cannot encode zero register as base. |
1014 | 711 | if (Addr.isRegBase() && 711 Addr.getOffsetReg()467 && !Addr.getReg()89 ) |
1015 | 3 | RegisterOffsetNeedsLowering = true; |
1016 | 711 | |
1017 | 711 | // If this is a stack pointer and the offset needs to be simplified then put |
1018 | 711 | // the alloca address into a register, set the base type back to register and |
1019 | 711 | // continue. This should almost never happen. |
1020 | 711 | if ((ImmediateOffsetNeedsLowering || 711 Addr.getOffsetReg()698 ) && Addr.isFIBase()103 ) |
1021 | 3 | { |
1022 | 3 | unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass); |
1023 | 3 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri), |
1024 | 3 | ResultReg) |
1025 | 3 | .addFrameIndex(Addr.getFI()) |
1026 | 3 | .addImm(0) |
1027 | 3 | .addImm(0); |
1028 | 3 | Addr.setKind(Address::RegBase); |
1029 | 3 | Addr.setReg(ResultReg); |
1030 | 3 | } |
1031 | 711 | |
1032 | 711 | if (RegisterOffsetNeedsLowering711 ) { |
1033 | 7 | unsigned ResultReg = 0; |
1034 | 7 | if (Addr.getReg()7 ) { |
1035 | 4 | if (Addr.getExtendType() == AArch64_AM::SXTW || |
1036 | 3 | Addr.getExtendType() == AArch64_AM::UXTW ) |
1037 | 1 | ResultReg = emitAddSub_rx(/*UseAdd=*/true, MVT::i64, Addr.getReg(), |
1038 | 1 | /*TODO:IsKill=*/false, Addr.getOffsetReg(), |
1039 | 1 | /*TODO:IsKill=*/false, Addr.getExtendType(), |
1040 | 1 | Addr.getShift()); |
1041 | 4 | else |
1042 | 3 | ResultReg = emitAddSub_rs(/*UseAdd=*/true, MVT::i64, Addr.getReg(), |
1043 | 3 | /*TODO:IsKill=*/false, Addr.getOffsetReg(), |
1044 | 3 | /*TODO:IsKill=*/false, AArch64_AM::LSL, |
1045 | 3 | Addr.getShift()); |
1046 | 7 | } else { |
1047 | 3 | if (Addr.getExtendType() == AArch64_AM::UXTW) |
1048 | 0 | ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(), |
1049 | 0 | /*Op0IsKill=*/false, Addr.getShift(), |
1050 | 0 | /*IsZExt=*/true); |
1051 | 3 | else if (3 Addr.getExtendType() == AArch64_AM::SXTW3 ) |
1052 | 1 | ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(), |
1053 | 1 | /*Op0IsKill=*/false, Addr.getShift(), |
1054 | 1 | /*IsZExt=*/false); |
1055 | 3 | else |
1056 | 2 | ResultReg = emitLSL_ri(MVT::i64, MVT::i64, Addr.getOffsetReg(), |
1057 | 2 | /*Op0IsKill=*/false, Addr.getShift()); |
1058 | 3 | } |
1059 | 7 | if (!ResultReg) |
1060 | 0 | return false; |
1061 | 7 | |
1062 | 7 | Addr.setReg(ResultReg); |
1063 | 7 | Addr.setOffsetReg(0); |
1064 | 7 | Addr.setShift(0); |
1065 | 7 | Addr.setExtendType(AArch64_AM::InvalidShiftExtend); |
1066 | 7 | } |
1067 | 711 | |
1068 | 711 | // Since the offset is too large for the load/store instruction get the |
1069 | 711 | // reg+offset into a register. |
1070 | 711 | if (711 ImmediateOffsetNeedsLowering711 ) { |
1071 | 13 | unsigned ResultReg; |
1072 | 13 | if (Addr.getReg()) |
1073 | 13 | // Try to fold the immediate into the add instruction. |
1074 | 13 | ResultReg = emitAdd_ri_(MVT::i64, Addr.getReg(), /*IsKill=*/false, Offset); |
1075 | 13 | else |
1076 | 0 | ResultReg = fastEmit_i(MVT::i64, MVT::i64, ISD::Constant, Offset); |
1077 | 13 | |
1078 | 13 | if (!ResultReg) |
1079 | 2 | return false; |
1080 | 11 | Addr.setReg(ResultReg); |
1081 | 11 | Addr.setOffset(0); |
1082 | 11 | } |
1083 | 709 | return true; |
1084 | 927 | } |
1085 | | |
1086 | | void AArch64FastISel::addLoadStoreOperands(Address &Addr, |
1087 | | const MachineInstrBuilder &MIB, |
1088 | | MachineMemOperand::Flags Flags, |
1089 | | unsigned ScaleFactor, |
1090 | 709 | MachineMemOperand *MMO) { |
1091 | 709 | int64_t Offset = Addr.getOffset() / ScaleFactor; |
1092 | 709 | // Frame base works a bit differently. Handle it separately. |
1093 | 709 | if (Addr.isFIBase()709 ) { |
1094 | 241 | int FI = Addr.getFI(); |
1095 | 241 | // FIXME: We shouldn't be using getObjectSize/getObjectAlignment. The size |
1096 | 241 | // and alignment should be based on the VT. |
1097 | 241 | MMO = FuncInfo.MF->getMachineMemOperand( |
1098 | 241 | MachinePointerInfo::getFixedStack(*FuncInfo.MF, FI, Offset), Flags, |
1099 | 241 | MFI.getObjectSize(FI), MFI.getObjectAlignment(FI)); |
1100 | 241 | // Now add the rest of the operands. |
1101 | 241 | MIB.addFrameIndex(FI).addImm(Offset); |
1102 | 709 | } else { |
1103 | 468 | assert(Addr.isRegBase() && "Unexpected address kind."); |
1104 | 468 | const MCInstrDesc &II = MIB->getDesc(); |
1105 | 468 | unsigned Idx = (Flags & MachineMemOperand::MOStore) ? 1205 : 0263 ; |
1106 | 468 | Addr.setReg( |
1107 | 468 | constrainOperandRegClass(II, Addr.getReg(), II.getNumDefs()+Idx)); |
1108 | 468 | Addr.setOffsetReg( |
1109 | 468 | constrainOperandRegClass(II, Addr.getOffsetReg(), II.getNumDefs()+Idx+1)); |
1110 | 468 | if (Addr.getOffsetReg()468 ) { |
1111 | 84 | assert(Addr.getOffset() == 0 && "Unexpected offset"); |
1112 | 84 | bool IsSigned = Addr.getExtendType() == AArch64_AM::SXTW || |
1113 | 56 | Addr.getExtendType() == AArch64_AM::SXTX; |
1114 | 84 | MIB.addReg(Addr.getReg()); |
1115 | 84 | MIB.addReg(Addr.getOffsetReg()); |
1116 | 84 | MIB.addImm(IsSigned); |
1117 | 84 | MIB.addImm(Addr.getShift() != 0); |
1118 | 84 | } else |
1119 | 384 | MIB.addReg(Addr.getReg()).addImm(Offset); |
1120 | 468 | } |
1121 | 709 | |
1122 | 709 | if (MMO) |
1123 | 649 | MIB.addMemOperand(MMO); |
1124 | 709 | } |
1125 | | |
1126 | | unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS, |
1127 | | const Value *RHS, bool SetFlags, |
1128 | 302 | bool WantResult, bool IsZExt) { |
1129 | 302 | AArch64_AM::ShiftExtendType ExtendType = AArch64_AM::InvalidShiftExtend; |
1130 | 302 | bool NeedExtend = false; |
1131 | 302 | switch (RetVT.SimpleTy) { |
1132 | 0 | default: |
1133 | 0 | return 0; |
1134 | 2 | case MVT::i1: |
1135 | 2 | NeedExtend = true; |
1136 | 2 | break; |
1137 | 4 | case MVT::i8: |
1138 | 4 | NeedExtend = true; |
1139 | 4 | ExtendType = IsZExt ? AArch64_AM::UXTB0 : AArch64_AM::SXTB4 ; |
1140 | 4 | break; |
1141 | 5 | case MVT::i16: |
1142 | 5 | NeedExtend = true; |
1143 | 5 | ExtendType = IsZExt ? AArch64_AM::UXTH2 : AArch64_AM::SXTH3 ; |
1144 | 5 | break; |
1145 | 291 | case MVT::i32: // fall-through |
1146 | 291 | case MVT::i64: |
1147 | 291 | break; |
1148 | 302 | } |
1149 | 302 | MVT SrcVT = RetVT; |
1150 | 302 | RetVT.SimpleTy = std::max(RetVT.SimpleTy, MVT::i32); |
1151 | 302 | |
1152 | 302 | // Canonicalize immediates to the RHS first. |
1153 | 302 | if (UseAdd && 302 isa<Constant>(LHS)217 && !isa<Constant>(RHS)1 ) |
1154 | 0 | std::swap(LHS, RHS); |
1155 | 302 | |
1156 | 302 | // Canonicalize mul by power of 2 to the RHS. |
1157 | 302 | if (UseAdd && 302 LHS->hasOneUse()217 && isValueAvailable(LHS)181 ) |
1158 | 181 | if (181 isMulPowOf2(LHS)181 ) |
1159 | 0 | std::swap(LHS, RHS); |
1160 | 302 | |
1161 | 302 | // Canonicalize shift immediate to the RHS. |
1162 | 302 | if (UseAdd && 302 LHS->hasOneUse()217 && isValueAvailable(LHS)181 ) |
1163 | 181 | if (const auto *181 SI181 = dyn_cast<BinaryOperator>(LHS)) |
1164 | 79 | if (79 isa<ConstantInt>(SI->getOperand(1))79 ) |
1165 | 4 | if (4 SI->getOpcode() == Instruction::Shl || |
1166 | 4 | SI->getOpcode() == Instruction::LShr || |
1167 | 3 | SI->getOpcode() == Instruction::AShr ) |
1168 | 1 | std::swap(LHS, RHS); |
1169 | 302 | |
1170 | 302 | unsigned LHSReg = getRegForValue(LHS); |
1171 | 302 | if (!LHSReg) |
1172 | 0 | return 0; |
1173 | 302 | bool LHSIsKill = hasTrivialKill(LHS); |
1174 | 302 | |
1175 | 302 | if (NeedExtend) |
1176 | 11 | LHSReg = emitIntExt(SrcVT, LHSReg, RetVT, IsZExt); |
1177 | 302 | |
1178 | 302 | unsigned ResultReg = 0; |
1179 | 302 | if (const auto *C302 = dyn_cast<ConstantInt>(RHS)) { |
1180 | 43 | uint64_t Imm = IsZExt ? C->getZExtValue()1 : C->getSExtValue()42 ; |
1181 | 43 | if (C->isNegative()) |
1182 | 6 | ResultReg = emitAddSub_ri(!UseAdd, RetVT, LHSReg, LHSIsKill, -Imm, |
1183 | 6 | SetFlags, WantResult); |
1184 | 43 | else |
1185 | 37 | ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, LHSIsKill, Imm, SetFlags, |
1186 | 37 | WantResult); |
1187 | 302 | } else if (const auto *259 C259 = dyn_cast<Constant>(RHS)) |
1188 | 2 | if (2 C->isNullValue()2 ) |
1189 | 2 | ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, LHSIsKill, 0, SetFlags, |
1190 | 2 | WantResult); |
1191 | 302 | |
1192 | 302 | if (ResultReg) |
1193 | 42 | return ResultReg; |
1194 | 260 | |
1195 | 260 | // Only extend the RHS within the instruction if there is a valid extend type. |
1196 | 260 | if (260 ExtendType != AArch64_AM::InvalidShiftExtend && 260 RHS->hasOneUse()6 && |
1197 | 260 | isValueAvailable(RHS)6 ) { |
1198 | 6 | if (const auto *SI = dyn_cast<BinaryOperator>(RHS)) |
1199 | 0 | if (const auto *0 C0 = dyn_cast<ConstantInt>(SI->getOperand(1))) |
1200 | 0 | if (0 (SI->getOpcode() == Instruction::Shl) && 0 (C->getZExtValue() < 4)0 ) { |
1201 | 0 | unsigned RHSReg = getRegForValue(SI->getOperand(0)); |
1202 | 0 | if (!RHSReg) |
1203 | 0 | return 0; |
1204 | 0 | bool RHSIsKill = hasTrivialKill(SI->getOperand(0)); |
1205 | 0 | return emitAddSub_rx(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, |
1206 | 0 | RHSIsKill, ExtendType, C->getZExtValue(), |
1207 | 0 | SetFlags, WantResult); |
1208 | 0 | } |
1209 | 6 | unsigned RHSReg = getRegForValue(RHS); |
1210 | 6 | if (!RHSReg) |
1211 | 0 | return 0; |
1212 | 6 | bool RHSIsKill = hasTrivialKill(RHS); |
1213 | 6 | return emitAddSub_rx(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill, |
1214 | 6 | ExtendType, 0, SetFlags, WantResult); |
1215 | 6 | } |
1216 | 254 | |
1217 | 254 | // Check if the mul can be folded into the instruction. |
1218 | 254 | if (254 RHS->hasOneUse() && 254 isValueAvailable(RHS)215 ) { |
1219 | 213 | if (isMulPowOf2(RHS)213 ) { |
1220 | 0 | const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0); |
1221 | 0 | const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1); |
1222 | 0 |
|
1223 | 0 | if (const auto *C = dyn_cast<ConstantInt>(MulLHS)) |
1224 | 0 | if (0 C->getValue().isPowerOf2()0 ) |
1225 | 0 | std::swap(MulLHS, MulRHS); |
1226 | 0 |
|
1227 | 0 | assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt."); |
1228 | 0 | uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2(); |
1229 | 0 | unsigned RHSReg = getRegForValue(MulLHS); |
1230 | 0 | if (!RHSReg) |
1231 | 0 | return 0; |
1232 | 0 | bool RHSIsKill = hasTrivialKill(MulLHS); |
1233 | 0 | ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, |
1234 | 0 | RHSIsKill, AArch64_AM::LSL, ShiftVal, SetFlags, |
1235 | 0 | WantResult); |
1236 | 0 | if (ResultReg) |
1237 | 0 | return ResultReg; |
1238 | 254 | } |
1239 | 213 | } |
1240 | 254 | |
1241 | 254 | // Check if the shift can be folded into the instruction. |
1242 | 254 | if (254 RHS->hasOneUse() && 254 isValueAvailable(RHS)215 ) { |
1243 | 213 | if (const auto *SI213 = dyn_cast<BinaryOperator>(RHS)) { |
1244 | 52 | if (const auto *C52 = dyn_cast<ConstantInt>(SI->getOperand(1))) { |
1245 | 12 | AArch64_AM::ShiftExtendType ShiftType = AArch64_AM::InvalidShiftExtend; |
1246 | 12 | switch (SI->getOpcode()) { |
1247 | 0 | default: break; |
1248 | 3 | case Instruction::Shl: ShiftType = AArch64_AM::LSL; break; |
1249 | 1 | case Instruction::LShr: ShiftType = AArch64_AM::LSR; break; |
1250 | 8 | case Instruction::AShr: ShiftType = AArch64_AM::ASR; break; |
1251 | 12 | } |
1252 | 12 | uint64_t ShiftVal = C->getZExtValue(); |
1253 | 12 | if (ShiftType != AArch64_AM::InvalidShiftExtend12 ) { |
1254 | 12 | unsigned RHSReg = getRegForValue(SI->getOperand(0)); |
1255 | 12 | if (!RHSReg) |
1256 | 0 | return 0; |
1257 | 12 | bool RHSIsKill = hasTrivialKill(SI->getOperand(0)); |
1258 | 12 | ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, |
1259 | 12 | RHSIsKill, ShiftType, ShiftVal, SetFlags, |
1260 | 12 | WantResult); |
1261 | 12 | if (ResultReg) |
1262 | 10 | return ResultReg; |
1263 | 244 | } |
1264 | 12 | } |
1265 | 52 | } |
1266 | 213 | } |
1267 | 244 | |
1268 | 244 | unsigned RHSReg = getRegForValue(RHS); |
1269 | 244 | if (!RHSReg) |
1270 | 0 | return 0; |
1271 | 244 | bool RHSIsKill = hasTrivialKill(RHS); |
1272 | 244 | |
1273 | 244 | if (NeedExtend) |
1274 | 1 | RHSReg = emitIntExt(SrcVT, RHSReg, RetVT, IsZExt); |
1275 | 302 | |
1276 | 302 | return emitAddSub_rr(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill, |
1277 | 302 | SetFlags, WantResult); |
1278 | 302 | } |
1279 | | |
1280 | | unsigned AArch64FastISel::emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg, |
1281 | | bool LHSIsKill, unsigned RHSReg, |
1282 | | bool RHSIsKill, bool SetFlags, |
1283 | 256 | bool WantResult) { |
1284 | 256 | assert(LHSReg && RHSReg && "Invalid register number."); |
1285 | 256 | |
1286 | 256 | if (LHSReg == AArch64::SP || 256 LHSReg == AArch64::WSP254 || |
1287 | 256 | RHSReg == AArch64::SP254 || RHSReg == AArch64::WSP254 ) |
1288 | 2 | return 0; |
1289 | 254 | |
1290 | 254 | if (254 RetVT != MVT::i32 && 254 RetVT != MVT::i64108 ) |
1291 | 0 | return 0; |
1292 | 254 | |
1293 | 254 | static const unsigned OpcTable[2][2][2] = { |
1294 | 254 | { { AArch64::SUBWrr, AArch64::SUBXrr }, |
1295 | 254 | { AArch64::ADDWrr, AArch64::ADDXrr } }, |
1296 | 254 | { { AArch64::SUBSWrr, AArch64::SUBSXrr }, |
1297 | 254 | { AArch64::ADDSWrr, AArch64::ADDSXrr } } |
1298 | 254 | }; |
1299 | 254 | bool Is64Bit = RetVT == MVT::i64; |
1300 | 254 | unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit]; |
1301 | 254 | const TargetRegisterClass *RC = |
1302 | 254 | Is64Bit ? &AArch64::GPR64RegClass108 : &AArch64::GPR32RegClass146 ; |
1303 | 254 | unsigned ResultReg; |
1304 | 254 | if (WantResult) |
1305 | 208 | ResultReg = createResultReg(RC); |
1306 | 254 | else |
1307 | 46 | ResultReg = Is64Bit ? 46 AArch64::XZR11 : AArch64::WZR35 ; |
1308 | 256 | |
1309 | 256 | const MCInstrDesc &II = TII.get(Opc); |
1310 | 256 | LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs()); |
1311 | 256 | RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1); |
1312 | 256 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) |
1313 | 256 | .addReg(LHSReg, getKillRegState(LHSIsKill)) |
1314 | 256 | .addReg(RHSReg, getKillRegState(RHSIsKill)); |
1315 | 256 | return ResultReg; |
1316 | 256 | } |
1317 | | |
1318 | | unsigned AArch64FastISel::emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg, |
1319 | | bool LHSIsKill, uint64_t Imm, |
1320 | 93 | bool SetFlags, bool WantResult) { |
1321 | 93 | assert(LHSReg && "Invalid register number."); |
1322 | 93 | |
1323 | 93 | if (RetVT != MVT::i32 && 93 RetVT != MVT::i6454 ) |
1324 | 0 | return 0; |
1325 | 93 | |
1326 | 93 | unsigned ShiftImm; |
1327 | 93 | if (isUInt<12>(Imm)) |
1328 | 78 | ShiftImm = 0; |
1329 | 15 | else if (15 (Imm & 0xfff000) == Imm15 ) { |
1330 | 4 | ShiftImm = 12; |
1331 | 4 | Imm >>= 12; |
1332 | 4 | } else |
1333 | 11 | return 0; |
1334 | 82 | |
1335 | 82 | static const unsigned OpcTable[2][2][2] = { |
1336 | 82 | { { AArch64::SUBWri, AArch64::SUBXri }, |
1337 | 82 | { AArch64::ADDWri, AArch64::ADDXri } }, |
1338 | 82 | { { AArch64::SUBSWri, AArch64::SUBSXri }, |
1339 | 82 | { AArch64::ADDSWri, AArch64::ADDSXri } } |
1340 | 82 | }; |
1341 | 82 | bool Is64Bit = RetVT == MVT::i64; |
1342 | 82 | unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit]; |
1343 | 82 | const TargetRegisterClass *RC; |
1344 | 82 | if (SetFlags) |
1345 | 33 | RC = Is64Bit ? 33 &AArch64::GPR64RegClass12 : &AArch64::GPR32RegClass21 ; |
1346 | 82 | else |
1347 | 49 | RC = Is64Bit ? 49 &AArch64::GPR64spRegClass32 : &AArch64::GPR32spRegClass17 ; |
1348 | 82 | unsigned ResultReg; |
1349 | 82 | if (WantResult) |
1350 | 54 | ResultReg = createResultReg(RC); |
1351 | 82 | else |
1352 | 28 | ResultReg = Is64Bit ? 28 AArch64::XZR10 : AArch64::WZR18 ; |
1353 | 93 | |
1354 | 93 | const MCInstrDesc &II = TII.get(Opc); |
1355 | 93 | LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs()); |
1356 | 93 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) |
1357 | 93 | .addReg(LHSReg, getKillRegState(LHSIsKill)) |
1358 | 93 | .addImm(Imm) |
1359 | 93 | .addImm(getShifterImm(AArch64_AM::LSL, ShiftImm)); |
1360 | 93 | return ResultReg; |
1361 | 93 | } |
1362 | | |
1363 | | unsigned AArch64FastISel::emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg, |
1364 | | bool LHSIsKill, unsigned RHSReg, |
1365 | | bool RHSIsKill, |
1366 | | AArch64_AM::ShiftExtendType ShiftType, |
1367 | | uint64_t ShiftImm, bool SetFlags, |
1368 | 28 | bool WantResult) { |
1369 | 28 | assert(LHSReg && RHSReg && "Invalid register number."); |
1370 | 28 | assert(LHSReg != AArch64::SP && LHSReg != AArch64::WSP && |
1371 | 28 | RHSReg != AArch64::SP && RHSReg != AArch64::WSP); |
1372 | 28 | |
1373 | 28 | if (RetVT != MVT::i32 && 28 RetVT != MVT::i6413 ) |
1374 | 0 | return 0; |
1375 | 28 | |
1376 | 28 | // Don't deal with undefined shifts. |
1377 | 28 | if (28 ShiftImm >= RetVT.getSizeInBits()28 ) |
1378 | 2 | return 0; |
1379 | 26 | |
1380 | 26 | static const unsigned OpcTable[2][2][2] = { |
1381 | 26 | { { AArch64::SUBWrs, AArch64::SUBXrs }, |
1382 | 26 | { AArch64::ADDWrs, AArch64::ADDXrs } }, |
1383 | 26 | { { AArch64::SUBSWrs, AArch64::SUBSXrs }, |
1384 | 26 | { AArch64::ADDSWrs, AArch64::ADDSXrs } } |
1385 | 26 | }; |
1386 | 26 | bool Is64Bit = RetVT == MVT::i64; |
1387 | 26 | unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit]; |
1388 | 26 | const TargetRegisterClass *RC = |
1389 | 26 | Is64Bit ? &AArch64::GPR64RegClass12 : &AArch64::GPR32RegClass14 ; |
1390 | 26 | unsigned ResultReg; |
1391 | 26 | if (WantResult) |
1392 | 17 | ResultReg = createResultReg(RC); |
1393 | 26 | else |
1394 | 9 | ResultReg = Is64Bit ? 9 AArch64::XZR6 : AArch64::WZR3 ; |
1395 | 28 | |
1396 | 28 | const MCInstrDesc &II = TII.get(Opc); |
1397 | 28 | LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs()); |
1398 | 28 | RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1); |
1399 | 28 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) |
1400 | 28 | .addReg(LHSReg, getKillRegState(LHSIsKill)) |
1401 | 28 | .addReg(RHSReg, getKillRegState(RHSIsKill)) |
1402 | 28 | .addImm(getShifterImm(ShiftType, ShiftImm)); |
1403 | 28 | return ResultReg; |
1404 | 28 | } |
1405 | | |
1406 | | unsigned AArch64FastISel::emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg, |
1407 | | bool LHSIsKill, unsigned RHSReg, |
1408 | | bool RHSIsKill, |
1409 | | AArch64_AM::ShiftExtendType ExtType, |
1410 | | uint64_t ShiftImm, bool SetFlags, |
1411 | 7 | bool WantResult) { |
1412 | 7 | assert(LHSReg && RHSReg && "Invalid register number."); |
1413 | 7 | assert(LHSReg != AArch64::XZR && LHSReg != AArch64::WZR && |
1414 | 7 | RHSReg != AArch64::XZR && RHSReg != AArch64::WZR); |
1415 | 7 | |
1416 | 7 | if (RetVT != MVT::i32 && 7 RetVT != MVT::i641 ) |
1417 | 0 | return 0; |
1418 | 7 | |
1419 | 7 | if (7 ShiftImm >= 47 ) |
1420 | 0 | return 0; |
1421 | 7 | |
1422 | 7 | static const unsigned OpcTable[2][2][2] = { |
1423 | 7 | { { AArch64::SUBWrx, AArch64::SUBXrx }, |
1424 | 7 | { AArch64::ADDWrx, AArch64::ADDXrx } }, |
1425 | 7 | { { AArch64::SUBSWrx, AArch64::SUBSXrx }, |
1426 | 7 | { AArch64::ADDSWrx, AArch64::ADDSXrx } } |
1427 | 7 | }; |
1428 | 7 | bool Is64Bit = RetVT == MVT::i64; |
1429 | 7 | unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit]; |
1430 | 7 | const TargetRegisterClass *RC = nullptr; |
1431 | 7 | if (SetFlags) |
1432 | 5 | RC = Is64Bit ? 5 &AArch64::GPR64RegClass0 : &AArch64::GPR32RegClass5 ; |
1433 | 7 | else |
1434 | 2 | RC = Is64Bit ? 2 &AArch64::GPR64spRegClass1 : &AArch64::GPR32spRegClass1 ; |
1435 | 7 | unsigned ResultReg; |
1436 | 7 | if (WantResult) |
1437 | 2 | ResultReg = createResultReg(RC); |
1438 | 7 | else |
1439 | 5 | ResultReg = Is64Bit ? 5 AArch64::XZR0 : AArch64::WZR5 ; |
1440 | 7 | |
1441 | 7 | const MCInstrDesc &II = TII.get(Opc); |
1442 | 7 | LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs()); |
1443 | 7 | RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1); |
1444 | 7 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) |
1445 | 7 | .addReg(LHSReg, getKillRegState(LHSIsKill)) |
1446 | 7 | .addReg(RHSReg, getKillRegState(RHSIsKill)) |
1447 | 7 | .addImm(getArithExtendImm(ExtType, ShiftImm)); |
1448 | 7 | return ResultReg; |
1449 | 7 | } |
1450 | | |
1451 | 111 | bool AArch64FastISel::emitCmp(const Value *LHS, const Value *RHS, bool IsZExt) { |
1452 | 111 | Type *Ty = LHS->getType(); |
1453 | 111 | EVT EVT = TLI.getValueType(DL, Ty, true); |
1454 | 111 | if (!EVT.isSimple()) |
1455 | 0 | return false; |
1456 | 111 | MVT VT = EVT.getSimpleVT(); |
1457 | 111 | |
1458 | 111 | switch (VT.SimpleTy) { |
1459 | 0 | default: |
1460 | 0 | return false; |
1461 | 64 | case MVT::i1: |
1462 | 64 | case MVT::i8: |
1463 | 64 | case MVT::i16: |
1464 | 64 | case MVT::i32: |
1465 | 64 | case MVT::i64: |
1466 | 64 | return emitICmp(VT, LHS, RHS, IsZExt); |
1467 | 47 | case MVT::f32: |
1468 | 47 | case MVT::f64: |
1469 | 47 | return emitFCmp(VT, LHS, RHS); |
1470 | 0 | } |
1471 | 0 | } |
1472 | | |
1473 | | bool AArch64FastISel::emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, |
1474 | 64 | bool IsZExt) { |
1475 | 64 | return emitSub(RetVT, LHS, RHS, /*SetFlags=*/true, /*WantResult=*/false, |
1476 | 64 | IsZExt) != 0; |
1477 | 64 | } |
1478 | | |
1479 | | bool AArch64FastISel::emitICmp_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, |
1480 | 11 | uint64_t Imm) { |
1481 | 11 | return emitAddSub_ri(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, Imm, |
1482 | 11 | /*SetFlags=*/true, /*WantResult=*/false) != 0; |
1483 | 11 | } |
1484 | | |
1485 | 47 | bool AArch64FastISel::emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS) { |
1486 | 47 | if (RetVT != MVT::f32 && 47 RetVT != MVT::f642 ) |
1487 | 0 | return false; |
1488 | 47 | |
1489 | 47 | // Check to see if the 2nd operand is a constant that we can encode directly |
1490 | 47 | // in the compare. |
1491 | 47 | bool UseImm = false; |
1492 | 47 | if (const auto *CFP = dyn_cast<ConstantFP>(RHS)) |
1493 | 4 | if (4 CFP->isZero() && 4 !CFP->isNegative()2 ) |
1494 | 2 | UseImm = true; |
1495 | 47 | |
1496 | 47 | unsigned LHSReg = getRegForValue(LHS); |
1497 | 47 | if (!LHSReg) |
1498 | 0 | return false; |
1499 | 47 | bool LHSIsKill = hasTrivialKill(LHS); |
1500 | 47 | |
1501 | 47 | if (UseImm47 ) { |
1502 | 2 | unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDri1 : AArch64::FCMPSri1 ; |
1503 | 2 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc)) |
1504 | 2 | .addReg(LHSReg, getKillRegState(LHSIsKill)); |
1505 | 2 | return true; |
1506 | 2 | } |
1507 | 45 | |
1508 | 45 | unsigned RHSReg = getRegForValue(RHS); |
1509 | 45 | if (!RHSReg) |
1510 | 0 | return false; |
1511 | 45 | bool RHSIsKill = hasTrivialKill(RHS); |
1512 | 45 | |
1513 | 45 | unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDrr1 : AArch64::FCMPSrr44 ; |
1514 | 47 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc)) |
1515 | 47 | .addReg(LHSReg, getKillRegState(LHSIsKill)) |
1516 | 47 | .addReg(RHSReg, getKillRegState(RHSIsKill)); |
1517 | 47 | return true; |
1518 | 47 | } |
1519 | | |
1520 | | unsigned AArch64FastISel::emitAdd(MVT RetVT, const Value *LHS, const Value *RHS, |
1521 | 217 | bool SetFlags, bool WantResult, bool IsZExt) { |
1522 | 217 | return emitAddSub(/*UseAdd=*/true, RetVT, LHS, RHS, SetFlags, WantResult, |
1523 | 217 | IsZExt); |
1524 | 217 | } |
1525 | | |
1526 | | /// \brief This method is a wrapper to simplify add emission. |
1527 | | /// |
1528 | | /// First try to emit an add with an immediate operand using emitAddSub_ri. If |
1529 | | /// that fails, then try to materialize the immediate into a register and use |
1530 | | /// emitAddSub_rr instead. |
1531 | | unsigned AArch64FastISel::emitAdd_ri_(MVT VT, unsigned Op0, bool Op0IsKill, |
1532 | 37 | int64_t Imm) { |
1533 | 37 | unsigned ResultReg; |
1534 | 37 | if (Imm < 0) |
1535 | 2 | ResultReg = emitAddSub_ri(false, VT, Op0, Op0IsKill, -Imm); |
1536 | 37 | else |
1537 | 35 | ResultReg = emitAddSub_ri(true, VT, Op0, Op0IsKill, Imm); |
1538 | 37 | |
1539 | 37 | if (ResultReg) |
1540 | 29 | return ResultReg; |
1541 | 8 | |
1542 | 8 | unsigned CReg = fastEmit_i(VT, VT, ISD::Constant, Imm); |
1543 | 8 | if (!CReg) |
1544 | 0 | return 0; |
1545 | 8 | |
1546 | 8 | ResultReg = emitAddSub_rr(true, VT, Op0, Op0IsKill, CReg, true); |
1547 | 8 | return ResultReg; |
1548 | 8 | } |
1549 | | |
1550 | | unsigned AArch64FastISel::emitSub(MVT RetVT, const Value *LHS, const Value *RHS, |
1551 | 85 | bool SetFlags, bool WantResult, bool IsZExt) { |
1552 | 85 | return emitAddSub(/*UseAdd=*/false, RetVT, LHS, RHS, SetFlags, WantResult, |
1553 | 85 | IsZExt); |
1554 | 85 | } |
1555 | | |
1556 | | unsigned AArch64FastISel::emitSubs_rr(MVT RetVT, unsigned LHSReg, |
1557 | | bool LHSIsKill, unsigned RHSReg, |
1558 | 4 | bool RHSIsKill, bool WantResult) { |
1559 | 4 | return emitAddSub_rr(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, RHSReg, |
1560 | 4 | RHSIsKill, /*SetFlags=*/true, WantResult); |
1561 | 4 | } |
1562 | | |
1563 | | unsigned AArch64FastISel::emitSubs_rs(MVT RetVT, unsigned LHSReg, |
1564 | | bool LHSIsKill, unsigned RHSReg, |
1565 | | bool RHSIsKill, |
1566 | | AArch64_AM::ShiftExtendType ShiftType, |
1567 | 9 | uint64_t ShiftImm, bool WantResult) { |
1568 | 9 | return emitAddSub_rs(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, RHSReg, |
1569 | 9 | RHSIsKill, ShiftType, ShiftImm, /*SetFlags=*/true, |
1570 | 9 | WantResult); |
1571 | 9 | } |
1572 | | |
1573 | | unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT, |
1574 | 89 | const Value *LHS, const Value *RHS) { |
1575 | 89 | // Canonicalize immediates to the RHS first. |
1576 | 89 | if (isa<ConstantInt>(LHS) && 89 !isa<ConstantInt>(RHS)0 ) |
1577 | 0 | std::swap(LHS, RHS); |
1578 | 89 | |
1579 | 89 | // Canonicalize mul by power-of-2 to the RHS. |
1580 | 89 | if (LHS->hasOneUse() && 89 isValueAvailable(LHS)89 ) |
1581 | 89 | if (89 isMulPowOf2(LHS)89 ) |
1582 | 0 | std::swap(LHS, RHS); |
1583 | 89 | |
1584 | 89 | // Canonicalize shift immediate to the RHS. |
1585 | 89 | if (LHS->hasOneUse() && 89 isValueAvailable(LHS)89 ) |
1586 | 89 | if (const auto *89 SI89 = dyn_cast<ShlOperator>(LHS)) |
1587 | 0 | if (0 isa<ConstantInt>(SI->getOperand(1))0 ) |
1588 | 0 | std::swap(LHS, RHS); |
1589 | 89 | |
1590 | 89 | unsigned LHSReg = getRegForValue(LHS); |
1591 | 89 | if (!LHSReg) |
1592 | 0 | return 0; |
1593 | 89 | bool LHSIsKill = hasTrivialKill(LHS); |
1594 | 89 | |
1595 | 89 | unsigned ResultReg = 0; |
1596 | 89 | if (const auto *C89 = dyn_cast<ConstantInt>(RHS)) { |
1597 | 18 | uint64_t Imm = C->getZExtValue(); |
1598 | 18 | ResultReg = emitLogicalOp_ri(ISDOpc, RetVT, LHSReg, LHSIsKill, Imm); |
1599 | 18 | } |
1600 | 89 | if (ResultReg) |
1601 | 18 | return ResultReg; |
1602 | 71 | |
1603 | 71 | // Check if the mul can be folded into the instruction. |
1604 | 71 | if (71 RHS->hasOneUse() && 71 isValueAvailable(RHS)71 ) { |
1605 | 71 | if (isMulPowOf2(RHS)71 ) { |
1606 | 6 | const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0); |
1607 | 6 | const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1); |
1608 | 6 | |
1609 | 6 | if (const auto *C = dyn_cast<ConstantInt>(MulLHS)) |
1610 | 0 | if (0 C->getValue().isPowerOf2()0 ) |
1611 | 0 | std::swap(MulLHS, MulRHS); |
1612 | 6 | |
1613 | 6 | assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt."); |
1614 | 6 | uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2(); |
1615 | 6 | |
1616 | 6 | unsigned RHSReg = getRegForValue(MulLHS); |
1617 | 6 | if (!RHSReg) |
1618 | 0 | return 0; |
1619 | 6 | bool RHSIsKill = hasTrivialKill(MulLHS); |
1620 | 6 | ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg, |
1621 | 6 | RHSIsKill, ShiftVal); |
1622 | 6 | if (ResultReg) |
1623 | 6 | return ResultReg; |
1624 | 65 | } |
1625 | 71 | } |
1626 | 65 | |
1627 | 65 | // Check if the shift can be folded into the instruction. |
1628 | 65 | if (65 RHS->hasOneUse() && 65 isValueAvailable(RHS)65 ) { |
1629 | 65 | if (const auto *SI = dyn_cast<ShlOperator>(RHS)) |
1630 | 24 | if (const auto *24 C24 = dyn_cast<ConstantInt>(SI->getOperand(1))) { |
1631 | 24 | uint64_t ShiftVal = C->getZExtValue(); |
1632 | 24 | unsigned RHSReg = getRegForValue(SI->getOperand(0)); |
1633 | 24 | if (!RHSReg) |
1634 | 0 | return 0; |
1635 | 24 | bool RHSIsKill = hasTrivialKill(SI->getOperand(0)); |
1636 | 24 | ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg, |
1637 | 24 | RHSIsKill, ShiftVal); |
1638 | 24 | if (ResultReg) |
1639 | 12 | return ResultReg; |
1640 | 53 | } |
1641 | 65 | } |
1642 | 53 | |
1643 | 53 | unsigned RHSReg = getRegForValue(RHS); |
1644 | 53 | if (!RHSReg) |
1645 | 0 | return 0; |
1646 | 53 | bool RHSIsKill = hasTrivialKill(RHS); |
1647 | 53 | |
1648 | 53 | MVT VT = std::max(MVT::i32, RetVT.SimpleTy); |
1649 | 53 | ResultReg = fastEmit_rr(VT, VT, ISDOpc, LHSReg, LHSIsKill, RHSReg, RHSIsKill); |
1650 | 53 | if (RetVT >= MVT::i8 && 53 RetVT <= MVT::i1627 ) { |
1651 | 12 | uint64_t Mask = (RetVT == MVT::i8) ? 0xff6 : 0xffff6 ; |
1652 | 12 | ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask); |
1653 | 12 | } |
1654 | 89 | return ResultReg; |
1655 | 89 | } |
1656 | | |
1657 | | unsigned AArch64FastISel::emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, |
1658 | | unsigned LHSReg, bool LHSIsKill, |
1659 | 203 | uint64_t Imm) { |
1660 | 203 | static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR), |
1661 | 203 | "ISD nodes are not consecutive!"); |
1662 | 203 | static const unsigned OpcTable[3][2] = { |
1663 | 203 | { AArch64::ANDWri, AArch64::ANDXri }, |
1664 | 203 | { AArch64::ORRWri, AArch64::ORRXri }, |
1665 | 203 | { AArch64::EORWri, AArch64::EORXri } |
1666 | 203 | }; |
1667 | 203 | const TargetRegisterClass *RC; |
1668 | 203 | unsigned Opc; |
1669 | 203 | unsigned RegSize; |
1670 | 203 | switch (RetVT.SimpleTy) { |
1671 | 0 | default: |
1672 | 0 | return 0; |
1673 | 197 | case MVT::i1: |
1674 | 197 | case MVT::i8: |
1675 | 197 | case MVT::i16: |
1676 | 197 | case MVT::i32: { |
1677 | 197 | unsigned Idx = ISDOpc - ISD::AND; |
1678 | 197 | Opc = OpcTable[Idx][0]; |
1679 | 197 | RC = &AArch64::GPR32spRegClass; |
1680 | 197 | RegSize = 32; |
1681 | 197 | break; |
1682 | 197 | } |
1683 | 6 | case MVT::i64: |
1684 | 6 | Opc = OpcTable[ISDOpc - ISD::AND][1]; |
1685 | 6 | RC = &AArch64::GPR64spRegClass; |
1686 | 6 | RegSize = 64; |
1687 | 6 | break; |
1688 | 203 | } |
1689 | 203 | |
1690 | 203 | if (203 !AArch64_AM::isLogicalImmediate(Imm, RegSize)203 ) |
1691 | 0 | return 0; |
1692 | 203 | |
1693 | 203 | unsigned ResultReg = |
1694 | 203 | fastEmitInst_ri(Opc, RC, LHSReg, LHSIsKill, |
1695 | 203 | AArch64_AM::encodeLogicalImmediate(Imm, RegSize)); |
1696 | 203 | if (RetVT >= MVT::i8 && 203 RetVT <= MVT::i16202 && ISDOpc != ISD::AND6 ) { |
1697 | 4 | uint64_t Mask = (RetVT == MVT::i8) ? 0xff2 : 0xffff2 ; |
1698 | 4 | ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask); |
1699 | 4 | } |
1700 | 203 | return ResultReg; |
1701 | 203 | } |
1702 | | |
1703 | | unsigned AArch64FastISel::emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, |
1704 | | unsigned LHSReg, bool LHSIsKill, |
1705 | | unsigned RHSReg, bool RHSIsKill, |
1706 | 30 | uint64_t ShiftImm) { |
1707 | 30 | static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR), |
1708 | 30 | "ISD nodes are not consecutive!"); |
1709 | 30 | static const unsigned OpcTable[3][2] = { |
1710 | 30 | { AArch64::ANDWrs, AArch64::ANDXrs }, |
1711 | 30 | { AArch64::ORRWrs, AArch64::ORRXrs }, |
1712 | 30 | { AArch64::EORWrs, AArch64::EORXrs } |
1713 | 30 | }; |
1714 | 30 | |
1715 | 30 | // Don't deal with undefined shifts. |
1716 | 30 | if (ShiftImm >= RetVT.getSizeInBits()) |
1717 | 12 | return 0; |
1718 | 18 | |
1719 | 18 | const TargetRegisterClass *RC; |
1720 | 18 | unsigned Opc; |
1721 | 18 | switch (RetVT.SimpleTy) { |
1722 | 0 | default: |
1723 | 0 | return 0; |
1724 | 12 | case MVT::i1: |
1725 | 12 | case MVT::i8: |
1726 | 12 | case MVT::i16: |
1727 | 12 | case MVT::i32: |
1728 | 12 | Opc = OpcTable[ISDOpc - ISD::AND][0]; |
1729 | 12 | RC = &AArch64::GPR32RegClass; |
1730 | 12 | break; |
1731 | 6 | case MVT::i64: |
1732 | 6 | Opc = OpcTable[ISDOpc - ISD::AND][1]; |
1733 | 6 | RC = &AArch64::GPR64RegClass; |
1734 | 6 | break; |
1735 | 18 | } |
1736 | 18 | unsigned ResultReg = |
1737 | 18 | fastEmitInst_rri(Opc, RC, LHSReg, LHSIsKill, RHSReg, RHSIsKill, |
1738 | 18 | AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftImm)); |
1739 | 18 | if (RetVT >= MVT::i8 && 18 RetVT <= MVT::i1618 ) { |
1740 | 6 | uint64_t Mask = (RetVT == MVT::i8) ? 0xff3 : 0xffff3 ; |
1741 | 6 | ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask); |
1742 | 6 | } |
1743 | 30 | return ResultReg; |
1744 | 30 | } |
1745 | | |
1746 | | unsigned AArch64FastISel::emitAnd_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, |
1747 | 184 | uint64_t Imm) { |
1748 | 184 | return emitLogicalOp_ri(ISD::AND, RetVT, LHSReg, LHSIsKill, Imm); |
1749 | 184 | } |
1750 | | |
1751 | | unsigned AArch64FastISel::emitLoad(MVT VT, MVT RetVT, Address Addr, |
1752 | 422 | bool WantZExt, MachineMemOperand *MMO) { |
1753 | 422 | if (!TLI.allowsMisalignedMemoryAccesses(VT)) |
1754 | 2 | return 0; |
1755 | 420 | |
1756 | 420 | // Simplify this down to something we can handle. |
1757 | 420 | if (420 !simplifyAddress(Addr, VT)420 ) |
1758 | 76 | return 0; |
1759 | 344 | |
1760 | 344 | unsigned ScaleFactor = getImplicitScaleFactor(VT); |
1761 | 344 | if (!ScaleFactor) |
1762 | 0 | llvm_unreachable("Unexpected value type."); |
1763 | 344 | |
1764 | 344 | // Negative offsets require unscaled, 9-bit, signed immediate offsets. |
1765 | 344 | // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets. |
1766 | 344 | bool UseScaled = true; |
1767 | 344 | if ((Addr.getOffset() < 0) || 344 (Addr.getOffset() & (ScaleFactor - 1))319 ) { |
1768 | 26 | UseScaled = false; |
1769 | 26 | ScaleFactor = 1; |
1770 | 26 | } |
1771 | 344 | |
1772 | 344 | static const unsigned GPOpcTable[2][8][4] = { |
1773 | 344 | // Sign-extend. |
1774 | 344 | { { AArch64::LDURSBWi, AArch64::LDURSHWi, AArch64::LDURWi, |
1775 | 344 | AArch64::LDURXi }, |
1776 | 344 | { AArch64::LDURSBXi, AArch64::LDURSHXi, AArch64::LDURSWi, |
1777 | 344 | AArch64::LDURXi }, |
1778 | 344 | { AArch64::LDRSBWui, AArch64::LDRSHWui, AArch64::LDRWui, |
1779 | 344 | AArch64::LDRXui }, |
1780 | 344 | { AArch64::LDRSBXui, AArch64::LDRSHXui, AArch64::LDRSWui, |
1781 | 344 | AArch64::LDRXui }, |
1782 | 344 | { AArch64::LDRSBWroX, AArch64::LDRSHWroX, AArch64::LDRWroX, |
1783 | 344 | AArch64::LDRXroX }, |
1784 | 344 | { AArch64::LDRSBXroX, AArch64::LDRSHXroX, AArch64::LDRSWroX, |
1785 | 344 | AArch64::LDRXroX }, |
1786 | 344 | { AArch64::LDRSBWroW, AArch64::LDRSHWroW, AArch64::LDRWroW, |
1787 | 344 | AArch64::LDRXroW }, |
1788 | 344 | { AArch64::LDRSBXroW, AArch64::LDRSHXroW, AArch64::LDRSWroW, |
1789 | 344 | AArch64::LDRXroW } |
1790 | 344 | }, |
1791 | 344 | // Zero-extend. |
1792 | 344 | { { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi, |
1793 | 344 | AArch64::LDURXi }, |
1794 | 344 | { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi, |
1795 | 344 | AArch64::LDURXi }, |
1796 | 344 | { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui, |
1797 | 344 | AArch64::LDRXui }, |
1798 | 344 | { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui, |
1799 | 344 | AArch64::LDRXui }, |
1800 | 344 | { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX, |
1801 | 344 | AArch64::LDRXroX }, |
1802 | 344 | { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX, |
1803 | 344 | AArch64::LDRXroX }, |
1804 | 344 | { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW, |
1805 | 344 | AArch64::LDRXroW }, |
1806 | 344 | { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW, |
1807 | 344 | AArch64::LDRXroW } |
1808 | 344 | } |
1809 | 344 | }; |
1810 | 344 | |
1811 | 344 | static const unsigned FPOpcTable[4][2] = { |
1812 | 344 | { AArch64::LDURSi, AArch64::LDURDi }, |
1813 | 344 | { AArch64::LDRSui, AArch64::LDRDui }, |
1814 | 344 | { AArch64::LDRSroX, AArch64::LDRDroX }, |
1815 | 344 | { AArch64::LDRSroW, AArch64::LDRDroW } |
1816 | 344 | }; |
1817 | 344 | |
1818 | 344 | unsigned Opc; |
1819 | 344 | const TargetRegisterClass *RC; |
1820 | 344 | bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset()263 && Addr.getReg()194 && |
1821 | 194 | Addr.getOffsetReg(); |
1822 | 344 | unsigned Idx = UseRegOffset ? 281 : UseScaled ? 263 1237 : 026 ; |
1823 | 344 | if (Addr.getExtendType() == AArch64_AM::UXTW || |
1824 | 333 | Addr.getExtendType() == AArch64_AM::SXTW) |
1825 | 39 | Idx++; |
1826 | 344 | |
1827 | 344 | bool IsRet64Bit = RetVT == MVT::i64; |
1828 | 344 | switch (VT.SimpleTy) { |
1829 | 0 | default: |
1830 | 0 | llvm_unreachable("Unexpected value type."); |
1831 | 66 | case MVT::i1: // Intentional fall-through. |
1832 | 66 | case MVT::i8: |
1833 | 66 | Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][0]; |
1834 | 13 | RC = (IsRet64Bit && !WantZExt) ? |
1835 | 66 | &AArch64::GPR64RegClass6 : &AArch64::GPR32RegClass60 ; |
1836 | 66 | break; |
1837 | 44 | case MVT::i16: |
1838 | 44 | Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][1]; |
1839 | 12 | RC = (IsRet64Bit && !WantZExt) ? |
1840 | 44 | &AArch64::GPR64RegClass6 : &AArch64::GPR32RegClass38 ; |
1841 | 44 | break; |
1842 | 112 | case MVT::i32: |
1843 | 112 | Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][2]; |
1844 | 27 | RC = (IsRet64Bit && !WantZExt) ? |
1845 | 112 | &AArch64::GPR64RegClass19 : &AArch64::GPR32RegClass93 ; |
1846 | 112 | break; |
1847 | 101 | case MVT::i64: |
1848 | 101 | Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][3]; |
1849 | 101 | RC = &AArch64::GPR64RegClass; |
1850 | 101 | break; |
1851 | 7 | case MVT::f32: |
1852 | 7 | Opc = FPOpcTable[Idx][0]; |
1853 | 7 | RC = &AArch64::FPR32RegClass; |
1854 | 7 | break; |
1855 | 14 | case MVT::f64: |
1856 | 14 | Opc = FPOpcTable[Idx][1]; |
1857 | 14 | RC = &AArch64::FPR64RegClass; |
1858 | 14 | break; |
1859 | 344 | } |
1860 | 344 | |
1861 | 344 | // Create the base instruction, then add the operands. |
1862 | 344 | unsigned ResultReg = createResultReg(RC); |
1863 | 344 | MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, |
1864 | 344 | TII.get(Opc), ResultReg); |
1865 | 344 | addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, ScaleFactor, MMO); |
1866 | 344 | |
1867 | 344 | // Loading an i1 requires special handling. |
1868 | 344 | if (VT == MVT::i1344 ) { |
1869 | 3 | unsigned ANDReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, 1); |
1870 | 3 | assert(ANDReg && "Unexpected AND instruction emission failure."); |
1871 | 3 | ResultReg = ANDReg; |
1872 | 3 | } |
1873 | 344 | |
1874 | 344 | // For zero-extending loads to 64bit we emit a 32bit load and then convert |
1875 | 344 | // the 32bit reg to a 64bit reg. |
1876 | 344 | if (WantZExt && 344 RetVT == MVT::i64283 && VT <= MVT::i32122 ) { |
1877 | 21 | unsigned Reg64 = createResultReg(&AArch64::GPR64RegClass); |
1878 | 21 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, |
1879 | 21 | TII.get(AArch64::SUBREG_TO_REG), Reg64) |
1880 | 21 | .addImm(0) |
1881 | 21 | .addReg(ResultReg, getKillRegState(true)) |
1882 | 21 | .addImm(AArch64::sub_32); |
1883 | 21 | ResultReg = Reg64; |
1884 | 21 | } |
1885 | 422 | return ResultReg; |
1886 | 422 | } |
1887 | | |
1888 | 274 | bool AArch64FastISel::selectAddSub(const Instruction *I) { |
1889 | 274 | MVT VT; |
1890 | 274 | if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true)) |
1891 | 0 | return false; |
1892 | 274 | |
1893 | 274 | if (274 VT.isVector()274 ) |
1894 | 71 | return selectOperator(I, I->getOpcode()); |
1895 | 203 | |
1896 | 203 | unsigned ResultReg; |
1897 | 203 | switch (I->getOpcode()) { |
1898 | 0 | default: |
1899 | 0 | llvm_unreachable("Unexpected instruction."); |
1900 | 195 | case Instruction::Add: |
1901 | 195 | ResultReg = emitAdd(VT, I->getOperand(0), I->getOperand(1)); |
1902 | 195 | break; |
1903 | 8 | case Instruction::Sub: |
1904 | 8 | ResultReg = emitSub(VT, I->getOperand(0), I->getOperand(1)); |
1905 | 8 | break; |
1906 | 203 | } |
1907 | 203 | if (203 !ResultReg203 ) |
1908 | 0 | return false; |
1909 | 203 | |
1910 | 203 | updateValueMap(I, ResultReg); |
1911 | 203 | return true; |
1912 | 203 | } |
1913 | | |
1914 | 89 | bool AArch64FastISel::selectLogicalOp(const Instruction *I) { |
1915 | 89 | MVT VT; |
1916 | 89 | if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true)) |
1917 | 0 | return false; |
1918 | 89 | |
1919 | 89 | if (89 VT.isVector()89 ) |
1920 | 0 | return selectOperator(I, I->getOpcode()); |
1921 | 89 | |
1922 | 89 | unsigned ResultReg; |
1923 | 89 | switch (I->getOpcode()) { |
1924 | 0 | default: |
1925 | 0 | llvm_unreachable("Unexpected instruction."); |
1926 | 50 | case Instruction::And: |
1927 | 50 | ResultReg = emitLogicalOp(ISD::AND, VT, I->getOperand(0), I->getOperand(1)); |
1928 | 50 | break; |
1929 | 20 | case Instruction::Or: |
1930 | 20 | ResultReg = emitLogicalOp(ISD::OR, VT, I->getOperand(0), I->getOperand(1)); |
1931 | 20 | break; |
1932 | 19 | case Instruction::Xor: |
1933 | 19 | ResultReg = emitLogicalOp(ISD::XOR, VT, I->getOperand(0), I->getOperand(1)); |
1934 | 19 | break; |
1935 | 89 | } |
1936 | 89 | if (89 !ResultReg89 ) |
1937 | 0 | return false; |
1938 | 89 | |
1939 | 89 | updateValueMap(I, ResultReg); |
1940 | 89 | return true; |
1941 | 89 | } |
1942 | | |
1943 | 408 | bool AArch64FastISel::selectLoad(const Instruction *I) { |
1944 | 408 | MVT VT; |
1945 | 408 | // Verify we have a legal type before going any further. Currently, we handle |
1946 | 408 | // simple types that will directly fit in a register (i32/f32/i64/f64) or |
1947 | 408 | // those that can be sign or zero-extended to a basic operation (i1/i8/i16). |
1948 | 408 | if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true) || |
1949 | 403 | cast<LoadInst>(I)->isAtomic()) |
1950 | 7 | return false; |
1951 | 401 | |
1952 | 401 | const Value *SV = I->getOperand(0); |
1953 | 401 | if (TLI.supportSwiftError()401 ) { |
1954 | 401 | // Swifterror values can come from either a function parameter with |
1955 | 401 | // swifterror attribute or an alloca with swifterror attribute. |
1956 | 401 | if (const Argument *Arg401 = dyn_cast<Argument>(SV)) { |
1957 | 103 | if (Arg->hasSwiftErrorAttr()) |
1958 | 0 | return false; |
1959 | 401 | } |
1960 | 401 | |
1961 | 401 | if (const AllocaInst *401 Alloca401 = dyn_cast<AllocaInst>(SV)) { |
1962 | 86 | if (Alloca->isSwiftError()) |
1963 | 5 | return false; |
1964 | 396 | } |
1965 | 401 | } |
1966 | 396 | |
1967 | 396 | // See if we can handle this address. |
1968 | 396 | Address Addr; |
1969 | 396 | if (!computeAddress(I->getOperand(0), Addr, I->getType())) |
1970 | 15 | return false; |
1971 | 381 | |
1972 | 381 | // Fold the following sign-/zero-extend into the load instruction. |
1973 | 381 | bool WantZExt = true; |
1974 | 381 | MVT RetVT = VT; |
1975 | 381 | const Value *IntExtVal = nullptr; |
1976 | 381 | if (I->hasOneUse()381 ) { |
1977 | 287 | if (const auto *ZE287 = dyn_cast<ZExtInst>(I->use_begin()->getUser())) { |
1978 | 40 | if (isTypeSupported(ZE->getType(), RetVT)) |
1979 | 40 | IntExtVal = ZE; |
1980 | 40 | else |
1981 | 0 | RetVT = VT; |
1982 | 287 | } else if (const auto *247 SE247 = dyn_cast<SExtInst>(I->use_begin()->getUser())) { |
1983 | 61 | if (isTypeSupported(SE->getType(), RetVT)) |
1984 | 61 | IntExtVal = SE; |
1985 | 61 | else |
1986 | 0 | RetVT = VT; |
1987 | 247 | WantZExt = false; |
1988 | 247 | } |
1989 | 287 | } |
1990 | 381 | |
1991 | 381 | unsigned ResultReg = |
1992 | 381 | emitLoad(VT, RetVT, Addr, WantZExt, createMachineMemOperandFor(I)); |
1993 | 381 | if (!ResultReg) |
1994 | 78 | return false; |
1995 | 303 | |
1996 | 303 | // There are a few different cases we have to handle, because the load or the |
1997 | 303 | // sign-/zero-extend might not be selected by FastISel if we fall-back to |
1998 | 303 | // SelectionDAG. There is also an ordering issue when both instructions are in |
1999 | 303 | // different basic blocks. |
2000 | 303 | // 1.) The load instruction is selected by FastISel, but the integer extend |
2001 | 303 | // not. This usually happens when the integer extend is in a different |
2002 | 303 | // basic block and SelectionDAG took over for that basic block. |
2003 | 303 | // 2.) The load instruction is selected before the integer extend. This only |
2004 | 303 | // happens when the integer extend is in a different basic block. |
2005 | 303 | // 3.) The load instruction is selected by SelectionDAG and the integer extend |
2006 | 303 | // by FastISel. This happens if there are instructions between the load |
2007 | 303 | // and the integer extend that couldn't be selected by FastISel. |
2008 | 303 | if (303 IntExtVal303 ) { |
2009 | 101 | // The integer extend hasn't been emitted yet. FastISel or SelectionDAG |
2010 | 101 | // could select it. Emit a copy to subreg if necessary. FastISel will remove |
2011 | 101 | // it when it selects the integer extend. |
2012 | 101 | unsigned Reg = lookUpRegForValue(IntExtVal); |
2013 | 101 | auto *MI = MRI.getUniqueVRegDef(Reg); |
2014 | 101 | if (!MI101 ) { |
2015 | 1 | if (RetVT == MVT::i64 && 1 VT <= MVT::i321 ) { |
2016 | 1 | if (WantZExt1 ) { |
2017 | 1 | // Delete the last emitted instruction from emitLoad (SUBREG_TO_REG). |
2018 | 1 | std::prev(FuncInfo.InsertPt)->eraseFromParent(); |
2019 | 1 | ResultReg = std::prev(FuncInfo.InsertPt)->getOperand(0).getReg(); |
2020 | 1 | } else |
2021 | 0 | ResultReg = fastEmitInst_extractsubreg(MVT::i32, ResultReg, |
2022 | 0 | /*IsKill=*/true, |
2023 | 0 | AArch64::sub_32); |
2024 | 1 | } |
2025 | 1 | updateValueMap(I, ResultReg); |
2026 | 1 | return true; |
2027 | 1 | } |
2028 | 100 | |
2029 | 100 | // The integer extend has already been emitted - delete all the instructions |
2030 | 100 | // that have been emitted by the integer extend lowering code and use the |
2031 | 100 | // result from the load instruction directly. |
2032 | 251 | while (100 MI251 ) { |
2033 | 151 | Reg = 0; |
2034 | 202 | for (auto &Opnd : MI->uses()) { |
2035 | 202 | if (Opnd.isReg()202 ) { |
2036 | 151 | Reg = Opnd.getReg(); |
2037 | 151 | break; |
2038 | 151 | } |
2039 | 151 | } |
2040 | 151 | MI->eraseFromParent(); |
2041 | 151 | MI = nullptr; |
2042 | 151 | if (Reg) |
2043 | 151 | MI = MRI.getUniqueVRegDef(Reg); |
2044 | 151 | } |
2045 | 101 | updateValueMap(IntExtVal, ResultReg); |
2046 | 101 | return true; |
2047 | 101 | } |
2048 | 202 | |
2049 | 202 | updateValueMap(I, ResultReg); |
2050 | 202 | return true; |
2051 | 202 | } |
2052 | | |
2053 | | bool AArch64FastISel::emitStoreRelease(MVT VT, unsigned SrcReg, |
2054 | | unsigned AddrReg, |
2055 | 18 | MachineMemOperand *MMO) { |
2056 | 18 | unsigned Opc; |
2057 | 18 | switch (VT.SimpleTy) { |
2058 | 0 | default: return false; |
2059 | 4 | case MVT::i8: Opc = AArch64::STLRB; break; |
2060 | 4 | case MVT::i16: Opc = AArch64::STLRH; break; |
2061 | 6 | case MVT::i32: Opc = AArch64::STLRW; break; |
2062 | 4 | case MVT::i64: Opc = AArch64::STLRX; break; |
2063 | 18 | } |
2064 | 18 | |
2065 | 18 | const MCInstrDesc &II = TII.get(Opc); |
2066 | 18 | SrcReg = constrainOperandRegClass(II, SrcReg, 0); |
2067 | 18 | AddrReg = constrainOperandRegClass(II, AddrReg, 1); |
2068 | 18 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) |
2069 | 18 | .addReg(SrcReg) |
2070 | 18 | .addReg(AddrReg) |
2071 | 18 | .addMemOperand(MMO); |
2072 | 18 | return true; |
2073 | 18 | } |
2074 | | |
2075 | | bool AArch64FastISel::emitStore(MVT VT, unsigned SrcReg, Address Addr, |
2076 | 507 | MachineMemOperand *MMO) { |
2077 | 507 | if (!TLI.allowsMisalignedMemoryAccesses(VT)) |
2078 | 0 | return false; |
2079 | 507 | |
2080 | 507 | // Simplify this down to something we can handle. |
2081 | 507 | if (507 !simplifyAddress(Addr, VT)507 ) |
2082 | 142 | return false; |
2083 | 365 | |
2084 | 365 | unsigned ScaleFactor = getImplicitScaleFactor(VT); |
2085 | 365 | if (!ScaleFactor) |
2086 | 0 | llvm_unreachable("Unexpected value type."); |
2087 | 365 | |
2088 | 365 | // Negative offsets require unscaled, 9-bit, signed immediate offsets. |
2089 | 365 | // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets. |
2090 | 365 | bool UseScaled = true; |
2091 | 365 | if ((Addr.getOffset() < 0) || 365 (Addr.getOffset() & (ScaleFactor - 1))362 ) { |
2092 | 6 | UseScaled = false; |
2093 | 6 | ScaleFactor = 1; |
2094 | 6 | } |
2095 | 365 | |
2096 | 365 | static const unsigned OpcTable[4][6] = { |
2097 | 365 | { AArch64::STURBBi, AArch64::STURHHi, AArch64::STURWi, AArch64::STURXi, |
2098 | 365 | AArch64::STURSi, AArch64::STURDi }, |
2099 | 365 | { AArch64::STRBBui, AArch64::STRHHui, AArch64::STRWui, AArch64::STRXui, |
2100 | 365 | AArch64::STRSui, AArch64::STRDui }, |
2101 | 365 | { AArch64::STRBBroX, AArch64::STRHHroX, AArch64::STRWroX, AArch64::STRXroX, |
2102 | 365 | AArch64::STRSroX, AArch64::STRDroX }, |
2103 | 365 | { AArch64::STRBBroW, AArch64::STRHHroW, AArch64::STRWroW, AArch64::STRXroW, |
2104 | 365 | AArch64::STRSroW, AArch64::STRDroW } |
2105 | 365 | }; |
2106 | 365 | |
2107 | 365 | unsigned Opc; |
2108 | 365 | bool VTIsi1 = false; |
2109 | 365 | bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset()205 && Addr.getReg()161 && |
2110 | 161 | Addr.getOffsetReg(); |
2111 | 365 | unsigned Idx = UseRegOffset ? 23 : UseScaled ? 362 1356 : 06 ; |
2112 | 365 | if (Addr.getExtendType() == AArch64_AM::UXTW || |
2113 | 365 | Addr.getExtendType() == AArch64_AM::SXTW) |
2114 | 0 | Idx++; |
2115 | 365 | |
2116 | 365 | switch (VT.SimpleTy) { |
2117 | 0 | default: 0 llvm_unreachable0 ("Unexpected value type."); |
2118 | 8 | case MVT::i1: VTIsi1 = true; 8 LLVM_FALLTHROUGH8 ; |
2119 | 44 | case MVT::i8: Opc = OpcTable[Idx][0]; break; |
2120 | 21 | case MVT::i16: Opc = OpcTable[Idx][1]; break; |
2121 | 139 | case MVT::i32: Opc = OpcTable[Idx][2]; break; |
2122 | 125 | case MVT::i64: Opc = OpcTable[Idx][3]; break; |
2123 | 11 | case MVT::f32: Opc = OpcTable[Idx][4]; break; |
2124 | 25 | case MVT::f64: Opc = OpcTable[Idx][5]; break; |
2125 | 365 | } |
2126 | 365 | |
2127 | 365 | // Storing an i1 requires special handling. |
2128 | 365 | if (365 VTIsi1 && 365 SrcReg != AArch64::WZR8 ) { |
2129 | 7 | unsigned ANDReg = emitAnd_ri(MVT::i32, SrcReg, /*TODO:IsKill=*/false, 1); |
2130 | 7 | assert(ANDReg && "Unexpected AND instruction emission failure."); |
2131 | 7 | SrcReg = ANDReg; |
2132 | 7 | } |
2133 | 507 | // Create the base instruction, then add the operands. |
2134 | 507 | const MCInstrDesc &II = TII.get(Opc); |
2135 | 507 | SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs()); |
2136 | 507 | MachineInstrBuilder MIB = |
2137 | 507 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(SrcReg); |
2138 | 507 | addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, ScaleFactor, MMO); |
2139 | 507 | |
2140 | 507 | return true; |
2141 | 507 | } |
2142 | | |
2143 | 472 | bool AArch64FastISel::selectStore(const Instruction *I) { |
2144 | 472 | MVT VT; |
2145 | 472 | const Value *Op0 = I->getOperand(0); |
2146 | 472 | // Verify we have a legal type before going any further. Currently, we handle |
2147 | 472 | // simple types that will directly fit in a register (i32/f32/i64/f64) or |
2148 | 472 | // those that can be sign or zero-extended to a basic operation (i1/i8/i16). |
2149 | 472 | if (!isTypeSupported(Op0->getType(), VT, /*IsVectorAllowed=*/true)) |
2150 | 16 | return false; |
2151 | 456 | |
2152 | 456 | const Value *PtrV = I->getOperand(1); |
2153 | 456 | if (TLI.supportSwiftError()456 ) { |
2154 | 456 | // Swifterror values can come from either a function parameter with |
2155 | 456 | // swifterror attribute or an alloca with swifterror attribute. |
2156 | 456 | if (const Argument *Arg456 = dyn_cast<Argument>(PtrV)) { |
2157 | 216 | if (Arg->hasSwiftErrorAttr()) |
2158 | 1 | return false; |
2159 | 455 | } |
2160 | 455 | |
2161 | 455 | if (const AllocaInst *455 Alloca455 = dyn_cast<AllocaInst>(PtrV)) { |
2162 | 144 | if (Alloca->isSwiftError()) |
2163 | 1 | return false; |
2164 | 454 | } |
2165 | 456 | } |
2166 | 454 | |
2167 | 454 | // Get the value to be stored into a register. Use the zero register directly |
2168 | 454 | // when possible to avoid an unnecessary copy and a wasted register. |
2169 | 454 | unsigned SrcReg = 0; |
2170 | 454 | if (const auto *CI454 = dyn_cast<ConstantInt>(Op0)) { |
2171 | 79 | if (CI->isZero()) |
2172 | 57 | SrcReg = (VT == MVT::i64) ? 57 AArch64::XZR3 : AArch64::WZR54 ; |
2173 | 454 | } else if (const auto *375 CF375 = dyn_cast<ConstantFP>(Op0)) { |
2174 | 2 | if (CF->isZero() && 2 !CF->isNegative()2 ) { |
2175 | 2 | VT = MVT::getIntegerVT(VT.getSizeInBits()); |
2176 | 2 | SrcReg = (VT == MVT::i64) ? AArch64::XZR1 : AArch64::WZR1 ; |
2177 | 2 | } |
2178 | 375 | } |
2179 | 454 | |
2180 | 454 | if (!SrcReg) |
2181 | 395 | SrcReg = getRegForValue(Op0); |
2182 | 454 | |
2183 | 454 | if (!SrcReg) |
2184 | 0 | return false; |
2185 | 454 | |
2186 | 454 | auto *SI = cast<StoreInst>(I); |
2187 | 454 | |
2188 | 454 | // Try to emit a STLR for seq_cst/release. |
2189 | 454 | if (SI->isAtomic()454 ) { |
2190 | 26 | AtomicOrdering Ord = SI->getOrdering(); |
2191 | 26 | // The non-atomic instructions are sufficient for relaxed stores. |
2192 | 26 | if (isReleaseOrStronger(Ord)26 ) { |
2193 | 18 | // The STLR addressing mode only supports a base reg; pass that directly. |
2194 | 18 | unsigned AddrReg = getRegForValue(PtrV); |
2195 | 18 | return emitStoreRelease(VT, SrcReg, AddrReg, |
2196 | 18 | createMachineMemOperandFor(I)); |
2197 | 18 | } |
2198 | 436 | } |
2199 | 436 | |
2200 | 436 | // See if we can handle this address. |
2201 | 436 | Address Addr; |
2202 | 436 | if (!computeAddress(PtrV, Addr, Op0->getType())) |
2203 | 2 | return false; |
2204 | 434 | |
2205 | 434 | if (434 !emitStore(VT, SrcReg, Addr, createMachineMemOperandFor(I))434 ) |
2206 | 140 | return false; |
2207 | 294 | return true; |
2208 | 294 | } |
2209 | | |
2210 | 109 | static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred) { |
2211 | 109 | switch (Pred) { |
2212 | 4 | case CmpInst::FCMP_ONE: |
2213 | 4 | case CmpInst::FCMP_UEQ: |
2214 | 4 | default: |
2215 | 4 | // AL is our "false" for now. The other two need more compares. |
2216 | 4 | return AArch64CC::AL; |
2217 | 13 | case CmpInst::ICMP_EQ: |
2218 | 13 | case CmpInst::FCMP_OEQ: |
2219 | 13 | return AArch64CC::EQ; |
2220 | 9 | case CmpInst::ICMP_SGT: |
2221 | 9 | case CmpInst::FCMP_OGT: |
2222 | 9 | return AArch64CC::GT; |
2223 | 8 | case CmpInst::ICMP_SGE: |
2224 | 8 | case CmpInst::FCMP_OGE: |
2225 | 8 | return AArch64CC::GE; |
2226 | 6 | case CmpInst::ICMP_UGT: |
2227 | 6 | case CmpInst::FCMP_UGT: |
2228 | 6 | return AArch64CC::HI; |
2229 | 3 | case CmpInst::FCMP_OLT: |
2230 | 3 | return AArch64CC::MI; |
2231 | 6 | case CmpInst::ICMP_ULE: |
2232 | 6 | case CmpInst::FCMP_OLE: |
2233 | 6 | return AArch64CC::LS; |
2234 | 3 | case CmpInst::FCMP_ORD: |
2235 | 3 | return AArch64CC::VC; |
2236 | 3 | case CmpInst::FCMP_UNO: |
2237 | 3 | return AArch64CC::VS; |
2238 | 3 | case CmpInst::FCMP_UGE: |
2239 | 3 | return AArch64CC::PL; |
2240 | 10 | case CmpInst::ICMP_SLT: |
2241 | 10 | case CmpInst::FCMP_ULT: |
2242 | 10 | return AArch64CC::LT; |
2243 | 10 | case CmpInst::ICMP_SLE: |
2244 | 10 | case CmpInst::FCMP_ULE: |
2245 | 10 | return AArch64CC::LE; |
2246 | 18 | case CmpInst::FCMP_UNE: |
2247 | 18 | case CmpInst::ICMP_NE: |
2248 | 18 | return AArch64CC::NE; |
2249 | 7 | case CmpInst::ICMP_UGE: |
2250 | 7 | return AArch64CC::HS; |
2251 | 6 | case CmpInst::ICMP_ULT: |
2252 | 6 | return AArch64CC::LO; |
2253 | 0 | } |
2254 | 0 | } |
2255 | | |
2256 | | /// \brief Try to emit a combined compare-and-branch instruction. |
2257 | 93 | bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) { |
2258 | 93 | assert(isa<CmpInst>(BI->getCondition()) && "Expected cmp instruction"); |
2259 | 93 | const CmpInst *CI = cast<CmpInst>(BI->getCondition()); |
2260 | 93 | CmpInst::Predicate Predicate = optimizeCmpPredicate(CI); |
2261 | 93 | |
2262 | 93 | const Value *LHS = CI->getOperand(0); |
2263 | 93 | const Value *RHS = CI->getOperand(1); |
2264 | 93 | |
2265 | 93 | MVT VT; |
2266 | 93 | if (!isTypeSupported(LHS->getType(), VT)) |
2267 | 0 | return false; |
2268 | 93 | |
2269 | 93 | unsigned BW = VT.getSizeInBits(); |
2270 | 93 | if (BW > 64) |
2271 | 0 | return false; |
2272 | 93 | |
2273 | 93 | MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)]; |
2274 | 93 | MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)]; |
2275 | 93 | |
2276 | 93 | // Try to take advantage of fallthrough opportunities. |
2277 | 93 | if (FuncInfo.MBB->isLayoutSuccessor(TBB)93 ) { |
2278 | 54 | std::swap(TBB, FBB); |
2279 | 54 | Predicate = CmpInst::getInversePredicate(Predicate); |
2280 | 54 | } |
2281 | 93 | |
2282 | 93 | int TestBit = -1; |
2283 | 93 | bool IsCmpNE; |
2284 | 93 | switch (Predicate) { |
2285 | 24 | default: |
2286 | 24 | return false; |
2287 | 44 | case CmpInst::ICMP_EQ: |
2288 | 44 | case CmpInst::ICMP_NE: |
2289 | 44 | if (isa<Constant>(LHS) && 44 cast<Constant>(LHS)->isNullValue()0 ) |
2290 | 0 | std::swap(LHS, RHS); |
2291 | 44 | |
2292 | 44 | if (!isa<Constant>(RHS) || 44 !cast<Constant>(RHS)->isNullValue()41 ) |
2293 | 3 | return false; |
2294 | 41 | |
2295 | 41 | if (const auto *41 AI41 = dyn_cast<BinaryOperator>(LHS)) |
2296 | 13 | if (13 AI->getOpcode() == Instruction::And && 13 isValueAvailable(AI)13 ) { |
2297 | 12 | const Value *AndLHS = AI->getOperand(0); |
2298 | 12 | const Value *AndRHS = AI->getOperand(1); |
2299 | 12 | |
2300 | 12 | if (const auto *C = dyn_cast<ConstantInt>(AndLHS)) |
2301 | 0 | if (0 C->getValue().isPowerOf2()0 ) |
2302 | 0 | std::swap(AndLHS, AndRHS); |
2303 | 12 | |
2304 | 12 | if (const auto *C = dyn_cast<ConstantInt>(AndRHS)) |
2305 | 12 | if (12 C->getValue().isPowerOf2()12 ) { |
2306 | 12 | TestBit = C->getValue().logBase2(); |
2307 | 12 | LHS = AndLHS; |
2308 | 12 | } |
2309 | 13 | } |
2310 | 41 | |
2311 | 41 | if (VT == MVT::i1) |
2312 | 1 | TestBit = 0; |
2313 | 41 | |
2314 | 41 | IsCmpNE = Predicate == CmpInst::ICMP_NE; |
2315 | 41 | break; |
2316 | 14 | case CmpInst::ICMP_SLT: |
2317 | 14 | case CmpInst::ICMP_SGE: |
2318 | 14 | if (!isa<Constant>(RHS) || 14 !cast<Constant>(RHS)->isNullValue()8 ) |
2319 | 7 | return false; |
2320 | 7 | |
2321 | 7 | TestBit = BW - 1; |
2322 | 7 | IsCmpNE = Predicate == CmpInst::ICMP_SLT; |
2323 | 7 | break; |
2324 | 11 | case CmpInst::ICMP_SGT: |
2325 | 11 | case CmpInst::ICMP_SLE: |
2326 | 11 | if (!isa<ConstantInt>(RHS)) |
2327 | 2 | return false; |
2328 | 9 | |
2329 | 9 | if (9 cast<ConstantInt>(RHS)->getValue() != APInt(BW, -1, true)9 ) |
2330 | 1 | return false; |
2331 | 8 | |
2332 | 8 | TestBit = BW - 1; |
2333 | 8 | IsCmpNE = Predicate == CmpInst::ICMP_SLE; |
2334 | 8 | break; |
2335 | 56 | } // end switch |
2336 | 56 | |
2337 | 56 | static const unsigned OpcTable[2][2][2] = { |
2338 | 56 | { {AArch64::CBZW, AArch64::CBZX }, |
2339 | 56 | {AArch64::CBNZW, AArch64::CBNZX} }, |
2340 | 56 | { {AArch64::TBZW, AArch64::TBZX }, |
2341 | 56 | {AArch64::TBNZW, AArch64::TBNZX} } |
2342 | 56 | }; |
2343 | 56 | |
2344 | 56 | bool IsBitTest = TestBit != -1; |
2345 | 56 | bool Is64Bit = BW == 64; |
2346 | 56 | if (TestBit < 32 && 56 TestBit >= 051 ) |
2347 | 23 | Is64Bit = false; |
2348 | 56 | |
2349 | 56 | unsigned Opc = OpcTable[IsBitTest][IsCmpNE][Is64Bit]; |
2350 | 56 | const MCInstrDesc &II = TII.get(Opc); |
2351 | 56 | |
2352 | 56 | unsigned SrcReg = getRegForValue(LHS); |
2353 | 56 | if (!SrcReg) |
2354 | 0 | return false; |
2355 | 56 | bool SrcIsKill = hasTrivialKill(LHS); |
2356 | 56 | |
2357 | 56 | if (BW == 64 && 56 !Is64Bit13 ) |
2358 | 2 | SrcReg = fastEmitInst_extractsubreg(MVT::i32, SrcReg, SrcIsKill, |
2359 | 2 | AArch64::sub_32); |
2360 | 56 | |
2361 | 56 | if ((BW < 32) && 56 !IsBitTest21 ) |
2362 | 8 | SrcReg = emitIntExt(VT, SrcReg, MVT::i32, /*IsZExt=*/true); |
2363 | 56 | |
2364 | 56 | // Emit the combined compare and branch instruction. |
2365 | 56 | SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs()); |
2366 | 56 | MachineInstrBuilder MIB = |
2367 | 56 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc)) |
2368 | 56 | .addReg(SrcReg, getKillRegState(SrcIsKill)); |
2369 | 56 | if (IsBitTest) |
2370 | 28 | MIB.addImm(TestBit); |
2371 | 93 | MIB.addMBB(TBB); |
2372 | 93 | |
2373 | 93 | finishCondBranch(BI->getParent(), TBB, FBB); |
2374 | 93 | return true; |
2375 | 93 | } |
2376 | | |
2377 | 300 | bool AArch64FastISel::selectBranch(const Instruction *I) { |
2378 | 300 | const BranchInst *BI = cast<BranchInst>(I); |
2379 | 300 | if (BI->isUnconditional()300 ) { |
2380 | 168 | MachineBasicBlock *MSucc = FuncInfo.MBBMap[BI->getSuccessor(0)]; |
2381 | 168 | fastEmitBranch(MSucc, BI->getDebugLoc()); |
2382 | 168 | return true; |
2383 | 168 | } |
2384 | 132 | |
2385 | 132 | MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)]; |
2386 | 132 | MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)]; |
2387 | 132 | |
2388 | 132 | if (const CmpInst *CI132 = dyn_cast<CmpInst>(BI->getCondition())) { |
2389 | 94 | if (CI->hasOneUse() && 94 isValueAvailable(CI)93 ) { |
2390 | 93 | // Try to optimize or fold the cmp. |
2391 | 93 | CmpInst::Predicate Predicate = optimizeCmpPredicate(CI); |
2392 | 93 | switch (Predicate) { |
2393 | 93 | default: |
2394 | 93 | break; |
2395 | 0 | case CmpInst::FCMP_FALSE: |
2396 | 0 | fastEmitBranch(FBB, DbgLoc); |
2397 | 0 | return true; |
2398 | 0 | case CmpInst::FCMP_TRUE: |
2399 | 0 | fastEmitBranch(TBB, DbgLoc); |
2400 | 0 | return true; |
2401 | 93 | } |
2402 | 93 | |
2403 | 93 | // Try to emit a combined compare-and-branch first. |
2404 | 93 | if (93 emitCompareAndBranch(BI)93 ) |
2405 | 56 | return true; |
2406 | 37 | |
2407 | 37 | // Try to take advantage of fallthrough opportunities. |
2408 | 37 | if (37 FuncInfo.MBB->isLayoutSuccessor(TBB)37 ) { |
2409 | 10 | std::swap(TBB, FBB); |
2410 | 10 | Predicate = CmpInst::getInversePredicate(Predicate); |
2411 | 10 | } |
2412 | 37 | |
2413 | 37 | // Emit the cmp. |
2414 | 37 | if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned())) |
2415 | 0 | return false; |
2416 | 37 | |
2417 | 37 | // FCMP_UEQ and FCMP_ONE cannot be checked with a single branch |
2418 | 37 | // instruction. |
2419 | 37 | AArch64CC::CondCode CC = getCompareCC(Predicate); |
2420 | 37 | AArch64CC::CondCode ExtraCC = AArch64CC::AL; |
2421 | 37 | switch (Predicate) { |
2422 | 35 | default: |
2423 | 35 | break; |
2424 | 1 | case CmpInst::FCMP_UEQ: |
2425 | 1 | ExtraCC = AArch64CC::EQ; |
2426 | 1 | CC = AArch64CC::VS; |
2427 | 1 | break; |
2428 | 1 | case CmpInst::FCMP_ONE: |
2429 | 1 | ExtraCC = AArch64CC::MI; |
2430 | 1 | CC = AArch64CC::GT; |
2431 | 1 | break; |
2432 | 37 | } |
2433 | 0 | assert((CC != AArch64CC::AL) && "Unexpected condition code."); |
2434 | 37 | |
2435 | 37 | // Emit the extra branch for FCMP_UEQ and FCMP_ONE. |
2436 | 37 | if (ExtraCC != AArch64CC::AL37 ) { |
2437 | 2 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc)) |
2438 | 2 | .addImm(ExtraCC) |
2439 | 2 | .addMBB(TBB); |
2440 | 2 | } |
2441 | 93 | |
2442 | 93 | // Emit the branch. |
2443 | 93 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc)) |
2444 | 93 | .addImm(CC) |
2445 | 93 | .addMBB(TBB); |
2446 | 93 | |
2447 | 93 | finishCondBranch(BI->getParent(), TBB, FBB); |
2448 | 93 | return true; |
2449 | 93 | } |
2450 | 38 | } else if (const auto *38 CI38 = dyn_cast<ConstantInt>(BI->getCondition())) { |
2451 | 2 | uint64_t Imm = CI->getZExtValue(); |
2452 | 2 | MachineBasicBlock *Target = (Imm == 0) ? FBB1 : TBB1 ; |
2453 | 2 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::B)) |
2454 | 2 | .addMBB(Target); |
2455 | 2 | |
2456 | 2 | // Obtain the branch probability and add the target to the successor list. |
2457 | 2 | if (FuncInfo.BPI2 ) { |
2458 | 0 | auto BranchProbability = FuncInfo.BPI->getEdgeProbability( |
2459 | 0 | BI->getParent(), Target->getBasicBlock()); |
2460 | 0 | FuncInfo.MBB->addSuccessor(Target, BranchProbability); |
2461 | 0 | } else |
2462 | 2 | FuncInfo.MBB->addSuccessorWithoutProb(Target); |
2463 | 2 | return true; |
2464 | 0 | } else { |
2465 | 36 | AArch64CC::CondCode CC = AArch64CC::NE; |
2466 | 36 | if (foldXALUIntrinsic(CC, I, BI->getCondition())36 ) { |
2467 | 14 | // Fake request the condition, otherwise the intrinsic might be completely |
2468 | 14 | // optimized away. |
2469 | 14 | unsigned CondReg = getRegForValue(BI->getCondition()); |
2470 | 14 | if (!CondReg) |
2471 | 0 | return false; |
2472 | 14 | |
2473 | 14 | // Emit the branch. |
2474 | 14 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc)) |
2475 | 14 | .addImm(CC) |
2476 | 14 | .addMBB(TBB); |
2477 | 14 | |
2478 | 14 | finishCondBranch(BI->getParent(), TBB, FBB); |
2479 | 14 | return true; |
2480 | 14 | } |
2481 | 38 | } |
2482 | 23 | |
2483 | 23 | unsigned CondReg = getRegForValue(BI->getCondition()); |
2484 | 23 | if (CondReg == 0) |
2485 | 0 | return false; |
2486 | 23 | bool CondRegIsKill = hasTrivialKill(BI->getCondition()); |
2487 | 23 | |
2488 | 23 | // i1 conditions come as i32 values, test the lowest bit with tb(n)z. |
2489 | 23 | unsigned Opcode = AArch64::TBNZW; |
2490 | 23 | if (FuncInfo.MBB->isLayoutSuccessor(TBB)23 ) { |
2491 | 17 | std::swap(TBB, FBB); |
2492 | 17 | Opcode = AArch64::TBZW; |
2493 | 17 | } |
2494 | 300 | |
2495 | 300 | const MCInstrDesc &II = TII.get(Opcode); |
2496 | 300 | unsigned ConstrainedCondReg |
2497 | 300 | = constrainOperandRegClass(II, CondReg, II.getNumDefs()); |
2498 | 300 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) |
2499 | 300 | .addReg(ConstrainedCondReg, getKillRegState(CondRegIsKill)) |
2500 | 300 | .addImm(0) |
2501 | 300 | .addMBB(TBB); |
2502 | 300 | |
2503 | 300 | finishCondBranch(BI->getParent(), TBB, FBB); |
2504 | 300 | return true; |
2505 | 300 | } |
2506 | | |
2507 | 1 | bool AArch64FastISel::selectIndirectBr(const Instruction *I) { |
2508 | 1 | const IndirectBrInst *BI = cast<IndirectBrInst>(I); |
2509 | 1 | unsigned AddrReg = getRegForValue(BI->getOperand(0)); |
2510 | 1 | if (AddrReg == 0) |
2511 | 0 | return false; |
2512 | 1 | |
2513 | 1 | // Emit the indirect branch. |
2514 | 1 | const MCInstrDesc &II = TII.get(AArch64::BR); |
2515 | 1 | AddrReg = constrainOperandRegClass(II, AddrReg, II.getNumDefs()); |
2516 | 1 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(AddrReg); |
2517 | 1 | |
2518 | 1 | // Make sure the CFG is up-to-date. |
2519 | 1 | for (auto *Succ : BI->successors()) |
2520 | 2 | FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[Succ]); |
2521 | 1 | |
2522 | 1 | return true; |
2523 | 1 | } |
2524 | | |
2525 | 57 | bool AArch64FastISel::selectCmp(const Instruction *I) { |
2526 | 57 | const CmpInst *CI = cast<CmpInst>(I); |
2527 | 57 | |
2528 | 57 | // Vectors of i1 are weird: bail out. |
2529 | 57 | if (CI->getType()->isVectorTy()) |
2530 | 6 | return false; |
2531 | 51 | |
2532 | 51 | // Try to optimize or fold the cmp. |
2533 | 51 | CmpInst::Predicate Predicate = optimizeCmpPredicate(CI); |
2534 | 51 | unsigned ResultReg = 0; |
2535 | 51 | switch (Predicate) { |
2536 | 49 | default: |
2537 | 49 | break; |
2538 | 1 | case CmpInst::FCMP_FALSE: |
2539 | 1 | ResultReg = createResultReg(&AArch64::GPR32RegClass); |
2540 | 1 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, |
2541 | 1 | TII.get(TargetOpcode::COPY), ResultReg) |
2542 | 1 | .addReg(AArch64::WZR, getKillRegState(true)); |
2543 | 1 | break; |
2544 | 1 | case CmpInst::FCMP_TRUE: |
2545 | 1 | ResultReg = fastEmit_i(MVT::i32, MVT::i32, ISD::Constant, 1); |
2546 | 1 | break; |
2547 | 51 | } |
2548 | 51 | |
2549 | 51 | if (51 ResultReg51 ) { |
2550 | 2 | updateValueMap(I, ResultReg); |
2551 | 2 | return true; |
2552 | 2 | } |
2553 | 49 | |
2554 | 49 | // Emit the cmp. |
2555 | 49 | if (49 !emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned())49 ) |
2556 | 0 | return false; |
2557 | 49 | |
2558 | 49 | ResultReg = createResultReg(&AArch64::GPR32RegClass); |
2559 | 49 | |
2560 | 49 | // FCMP_UEQ and FCMP_ONE cannot be checked with a single instruction. These |
2561 | 49 | // condition codes are inverted, because they are used by CSINC. |
2562 | 49 | static unsigned CondCodeTable[2][2] = { |
2563 | 49 | { AArch64CC::NE, AArch64CC::VC }, |
2564 | 49 | { AArch64CC::PL, AArch64CC::LE } |
2565 | 49 | }; |
2566 | 49 | unsigned *CondCodes = nullptr; |
2567 | 49 | switch (Predicate) { |
2568 | 47 | default: |
2569 | 47 | break; |
2570 | 1 | case CmpInst::FCMP_UEQ: |
2571 | 1 | CondCodes = &CondCodeTable[0][0]; |
2572 | 1 | break; |
2573 | 1 | case CmpInst::FCMP_ONE: |
2574 | 1 | CondCodes = &CondCodeTable[1][0]; |
2575 | 1 | break; |
2576 | 49 | } |
2577 | 49 | |
2578 | 49 | if (49 CondCodes49 ) { |
2579 | 2 | unsigned TmpReg1 = createResultReg(&AArch64::GPR32RegClass); |
2580 | 2 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr), |
2581 | 2 | TmpReg1) |
2582 | 2 | .addReg(AArch64::WZR, getKillRegState(true)) |
2583 | 2 | .addReg(AArch64::WZR, getKillRegState(true)) |
2584 | 2 | .addImm(CondCodes[0]); |
2585 | 2 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr), |
2586 | 2 | ResultReg) |
2587 | 2 | .addReg(TmpReg1, getKillRegState(true)) |
2588 | 2 | .addReg(AArch64::WZR, getKillRegState(true)) |
2589 | 2 | .addImm(CondCodes[1]); |
2590 | 2 | |
2591 | 2 | updateValueMap(I, ResultReg); |
2592 | 2 | return true; |
2593 | 2 | } |
2594 | 47 | |
2595 | 47 | // Now set a register based on the comparison. |
2596 | 47 | AArch64CC::CondCode CC = getCompareCC(Predicate); |
2597 | 47 | assert((CC != AArch64CC::AL) && "Unexpected condition code."); |
2598 | 47 | AArch64CC::CondCode invertedCC = getInvertedCondCode(CC); |
2599 | 47 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr), |
2600 | 47 | ResultReg) |
2601 | 47 | .addReg(AArch64::WZR, getKillRegState(true)) |
2602 | 47 | .addReg(AArch64::WZR, getKillRegState(true)) |
2603 | 47 | .addImm(invertedCC); |
2604 | 47 | |
2605 | 47 | updateValueMap(I, ResultReg); |
2606 | 47 | return true; |
2607 | 47 | } |
2608 | | |
2609 | | /// \brief Optimize selects of i1 if one of the operands has a 'true' or 'false' |
2610 | | /// value. |
2611 | 53 | bool AArch64FastISel::optimizeSelect(const SelectInst *SI) { |
2612 | 53 | if (!SI->getType()->isIntegerTy(1)) |
2613 | 47 | return false; |
2614 | 6 | |
2615 | 6 | const Value *Src1Val, *Src2Val; |
2616 | 6 | unsigned Opc = 0; |
2617 | 6 | bool NeedExtraOp = false; |
2618 | 6 | if (auto *CI6 = dyn_cast<ConstantInt>(SI->getTrueValue())) { |
2619 | 3 | if (CI->isOne()3 ) { |
2620 | 2 | Src1Val = SI->getCondition(); |
2621 | 2 | Src2Val = SI->getFalseValue(); |
2622 | 2 | Opc = AArch64::ORRWrr; |
2623 | 3 | } else { |
2624 | 1 | assert(CI->isZero()); |
2625 | 1 | Src1Val = SI->getFalseValue(); |
2626 | 1 | Src2Val = SI->getCondition(); |
2627 | 1 | Opc = AArch64::BICWrr; |
2628 | 1 | } |
2629 | 6 | } else if (auto *3 CI3 = dyn_cast<ConstantInt>(SI->getFalseValue())) { |
2630 | 2 | if (CI->isOne()2 ) { |
2631 | 1 | Src1Val = SI->getCondition(); |
2632 | 1 | Src2Val = SI->getTrueValue(); |
2633 | 1 | Opc = AArch64::ORRWrr; |
2634 | 1 | NeedExtraOp = true; |
2635 | 2 | } else { |
2636 | 1 | assert(CI->isZero()); |
2637 | 1 | Src1Val = SI->getCondition(); |
2638 | 1 | Src2Val = SI->getTrueValue(); |
2639 | 1 | Opc = AArch64::ANDWrr; |
2640 | 1 | } |
2641 | 3 | } |
2642 | 6 | |
2643 | 6 | if (!Opc) |
2644 | 1 | return false; |
2645 | 5 | |
2646 | 5 | unsigned Src1Reg = getRegForValue(Src1Val); |
2647 | 5 | if (!Src1Reg) |
2648 | 0 | return false; |
2649 | 5 | bool Src1IsKill = hasTrivialKill(Src1Val); |
2650 | 5 | |
2651 | 5 | unsigned Src2Reg = getRegForValue(Src2Val); |
2652 | 5 | if (!Src2Reg) |
2653 | 0 | return false; |
2654 | 5 | bool Src2IsKill = hasTrivialKill(Src2Val); |
2655 | 5 | |
2656 | 5 | if (NeedExtraOp5 ) { |
2657 | 1 | Src1Reg = emitLogicalOp_ri(ISD::XOR, MVT::i32, Src1Reg, Src1IsKill, 1); |
2658 | 1 | Src1IsKill = true; |
2659 | 1 | } |
2660 | 53 | unsigned ResultReg = fastEmitInst_rr(Opc, &AArch64::GPR32RegClass, Src1Reg, |
2661 | 53 | Src1IsKill, Src2Reg, Src2IsKill); |
2662 | 53 | updateValueMap(SI, ResultReg); |
2663 | 53 | return true; |
2664 | 53 | } |
2665 | | |
2666 | 53 | bool AArch64FastISel::selectSelect(const Instruction *I) { |
2667 | 53 | assert(isa<SelectInst>(I) && "Expected a select instruction."); |
2668 | 53 | MVT VT; |
2669 | 53 | if (!isTypeSupported(I->getType(), VT)) |
2670 | 0 | return false; |
2671 | 53 | |
2672 | 53 | unsigned Opc; |
2673 | 53 | const TargetRegisterClass *RC; |
2674 | 53 | switch (VT.SimpleTy) { |
2675 | 0 | default: |
2676 | 0 | return false; |
2677 | 18 | case MVT::i1: |
2678 | 18 | case MVT::i8: |
2679 | 18 | case MVT::i16: |
2680 | 18 | case MVT::i32: |
2681 | 18 | Opc = AArch64::CSELWr; |
2682 | 18 | RC = &AArch64::GPR32RegClass; |
2683 | 18 | break; |
2684 | 8 | case MVT::i64: |
2685 | 8 | Opc = AArch64::CSELXr; |
2686 | 8 | RC = &AArch64::GPR64RegClass; |
2687 | 8 | break; |
2688 | 26 | case MVT::f32: |
2689 | 26 | Opc = AArch64::FCSELSrrr; |
2690 | 26 | RC = &AArch64::FPR32RegClass; |
2691 | 26 | break; |
2692 | 1 | case MVT::f64: |
2693 | 1 | Opc = AArch64::FCSELDrrr; |
2694 | 1 | RC = &AArch64::FPR64RegClass; |
2695 | 1 | break; |
2696 | 53 | } |
2697 | 53 | |
2698 | 53 | const SelectInst *SI = cast<SelectInst>(I); |
2699 | 53 | const Value *Cond = SI->getCondition(); |
2700 | 53 | AArch64CC::CondCode CC = AArch64CC::NE; |
2701 | 53 | AArch64CC::CondCode ExtraCC = AArch64CC::AL; |
2702 | 53 | |
2703 | 53 | if (optimizeSelect(SI)) |
2704 | 5 | return true; |
2705 | 48 | |
2706 | 48 | // Try to pickup the flags, so we don't have to emit another compare. |
2707 | 48 | if (48 foldXALUIntrinsic(CC, I, Cond)48 ) { |
2708 | 12 | // Fake request the condition to force emission of the XALU intrinsic. |
2709 | 12 | unsigned CondReg = getRegForValue(Cond); |
2710 | 12 | if (!CondReg) |
2711 | 0 | return false; |
2712 | 36 | } else if (36 isa<CmpInst>(Cond) && 36 cast<CmpInst>(Cond)->hasOneUse()28 && |
2713 | 36 | isValueAvailable(Cond)27 ) { |
2714 | 27 | const auto *Cmp = cast<CmpInst>(Cond); |
2715 | 27 | // Try to optimize or fold the cmp. |
2716 | 27 | CmpInst::Predicate Predicate = optimizeCmpPredicate(Cmp); |
2717 | 27 | const Value *FoldSelect = nullptr; |
2718 | 27 | switch (Predicate) { |
2719 | 25 | default: |
2720 | 25 | break; |
2721 | 1 | case CmpInst::FCMP_FALSE: |
2722 | 1 | FoldSelect = SI->getFalseValue(); |
2723 | 1 | break; |
2724 | 1 | case CmpInst::FCMP_TRUE: |
2725 | 1 | FoldSelect = SI->getTrueValue(); |
2726 | 1 | break; |
2727 | 27 | } |
2728 | 27 | |
2729 | 27 | if (27 FoldSelect27 ) { |
2730 | 2 | unsigned SrcReg = getRegForValue(FoldSelect); |
2731 | 2 | if (!SrcReg) |
2732 | 0 | return false; |
2733 | 2 | unsigned UseReg = lookUpRegForValue(SI); |
2734 | 2 | if (UseReg) |
2735 | 2 | MRI.clearKillFlags(UseReg); |
2736 | 2 | |
2737 | 2 | updateValueMap(I, SrcReg); |
2738 | 2 | return true; |
2739 | 2 | } |
2740 | 25 | |
2741 | 25 | // Emit the cmp. |
2742 | 25 | if (25 !emitCmp(Cmp->getOperand(0), Cmp->getOperand(1), Cmp->isUnsigned())25 ) |
2743 | 0 | return false; |
2744 | 25 | |
2745 | 25 | // FCMP_UEQ and FCMP_ONE cannot be checked with a single select instruction. |
2746 | 25 | CC = getCompareCC(Predicate); |
2747 | 25 | switch (Predicate) { |
2748 | 23 | default: |
2749 | 23 | break; |
2750 | 1 | case CmpInst::FCMP_UEQ: |
2751 | 1 | ExtraCC = AArch64CC::EQ; |
2752 | 1 | CC = AArch64CC::VS; |
2753 | 1 | break; |
2754 | 1 | case CmpInst::FCMP_ONE: |
2755 | 1 | ExtraCC = AArch64CC::MI; |
2756 | 1 | CC = AArch64CC::GT; |
2757 | 1 | break; |
2758 | 25 | } |
2759 | 25 | assert((CC != AArch64CC::AL) && "Unexpected condition code."); |
2760 | 36 | } else { |
2761 | 9 | unsigned CondReg = getRegForValue(Cond); |
2762 | 9 | if (!CondReg) |
2763 | 0 | return false; |
2764 | 9 | bool CondIsKill = hasTrivialKill(Cond); |
2765 | 9 | |
2766 | 9 | const MCInstrDesc &II = TII.get(AArch64::ANDSWri); |
2767 | 9 | CondReg = constrainOperandRegClass(II, CondReg, 1); |
2768 | 9 | |
2769 | 9 | // Emit a TST instruction (ANDS wzr, reg, #imm). |
2770 | 9 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, |
2771 | 9 | AArch64::WZR) |
2772 | 9 | .addReg(CondReg, getKillRegState(CondIsKill)) |
2773 | 9 | .addImm(AArch64_AM::encodeLogicalImmediate(1, 32)); |
2774 | 9 | } |
2775 | 48 | |
2776 | 46 | unsigned Src1Reg = getRegForValue(SI->getTrueValue()); |
2777 | 46 | bool Src1IsKill = hasTrivialKill(SI->getTrueValue()); |
2778 | 46 | |
2779 | 46 | unsigned Src2Reg = getRegForValue(SI->getFalseValue()); |
2780 | 46 | bool Src2IsKill = hasTrivialKill(SI->getFalseValue()); |
2781 | 46 | |
2782 | 46 | if (!Src1Reg || 46 !Src2Reg46 ) |
2783 | 0 | return false; |
2784 | 46 | |
2785 | 46 | if (46 ExtraCC != AArch64CC::AL46 ) { |
2786 | 2 | Src2Reg = fastEmitInst_rri(Opc, RC, Src1Reg, Src1IsKill, Src2Reg, |
2787 | 2 | Src2IsKill, ExtraCC); |
2788 | 2 | Src2IsKill = true; |
2789 | 2 | } |
2790 | 53 | unsigned ResultReg = fastEmitInst_rri(Opc, RC, Src1Reg, Src1IsKill, Src2Reg, |
2791 | 53 | Src2IsKill, CC); |
2792 | 53 | updateValueMap(I, ResultReg); |
2793 | 53 | return true; |
2794 | 53 | } |
2795 | | |
2796 | 7 | bool AArch64FastISel::selectFPExt(const Instruction *I) { |
2797 | 7 | Value *V = I->getOperand(0); |
2798 | 7 | if (!I->getType()->isDoubleTy() || 7 !V->getType()->isFloatTy()5 ) |
2799 | 2 | return false; |
2800 | 5 | |
2801 | 5 | unsigned Op = getRegForValue(V); |
2802 | 5 | if (Op == 0) |
2803 | 0 | return false; |
2804 | 5 | |
2805 | 5 | unsigned ResultReg = createResultReg(&AArch64::FPR64RegClass); |
2806 | 5 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTDSr), |
2807 | 5 | ResultReg).addReg(Op); |
2808 | 5 | updateValueMap(I, ResultReg); |
2809 | 5 | return true; |
2810 | 5 | } |
2811 | | |
2812 | 2 | bool AArch64FastISel::selectFPTrunc(const Instruction *I) { |
2813 | 2 | Value *V = I->getOperand(0); |
2814 | 2 | if (!I->getType()->isFloatTy() || 2 !V->getType()->isDoubleTy()1 ) |
2815 | 1 | return false; |
2816 | 1 | |
2817 | 1 | unsigned Op = getRegForValue(V); |
2818 | 1 | if (Op == 0) |
2819 | 0 | return false; |
2820 | 1 | |
2821 | 1 | unsigned ResultReg = createResultReg(&AArch64::FPR32RegClass); |
2822 | 1 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTSDr), |
2823 | 1 | ResultReg).addReg(Op); |
2824 | 1 | updateValueMap(I, ResultReg); |
2825 | 1 | return true; |
2826 | 1 | } |
2827 | | |
2828 | | // FPToUI and FPToSI |
2829 | 19 | bool AArch64FastISel::selectFPToInt(const Instruction *I, bool Signed) { |
2830 | 19 | MVT DestVT; |
2831 | 19 | if (!isTypeLegal(I->getType(), DestVT) || 19 DestVT.isVector()19 ) |
2832 | 1 | return false; |
2833 | 18 | |
2834 | 18 | unsigned SrcReg = getRegForValue(I->getOperand(0)); |
2835 | 18 | if (SrcReg == 0) |
2836 | 0 | return false; |
2837 | 18 | |
2838 | 18 | EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true); |
2839 | 18 | if (SrcVT == MVT::f128 || 18 SrcVT == MVT::f1616 ) |
2840 | 4 | return false; |
2841 | 14 | |
2842 | 14 | unsigned Opc; |
2843 | 14 | if (SrcVT == MVT::f6414 ) { |
2844 | 7 | if (Signed) |
2845 | 0 | Opc = (DestVT == MVT::i32) ? 0 AArch64::FCVTZSUWDr0 : AArch64::FCVTZSUXDr0 ; |
2846 | 7 | else |
2847 | 7 | Opc = (DestVT == MVT::i32) ? 7 AArch64::FCVTZUUWDr4 : AArch64::FCVTZUUXDr3 ; |
2848 | 14 | } else { |
2849 | 7 | if (Signed) |
2850 | 0 | Opc = (DestVT == MVT::i32) ? 0 AArch64::FCVTZSUWSr0 : AArch64::FCVTZSUXSr0 ; |
2851 | 7 | else |
2852 | 7 | Opc = (DestVT == MVT::i32) ? 7 AArch64::FCVTZUUWSr4 : AArch64::FCVTZUUXSr3 ; |
2853 | 7 | } |
2854 | 14 | unsigned ResultReg = createResultReg( |
2855 | 14 | DestVT == MVT::i32 ? &AArch64::GPR32RegClass8 : &AArch64::GPR64RegClass6 ); |
2856 | 19 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) |
2857 | 19 | .addReg(SrcReg); |
2858 | 19 | updateValueMap(I, ResultReg); |
2859 | 19 | return true; |
2860 | 19 | } |
2861 | | |
2862 | 30 | bool AArch64FastISel::selectIntToFP(const Instruction *I, bool Signed) { |
2863 | 30 | MVT DestVT; |
2864 | 30 | if (!isTypeLegal(I->getType(), DestVT) || 30 DestVT.isVector()30 ) |
2865 | 2 | return false; |
2866 | 28 | // Let regular ISEL handle FP16 |
2867 | 28 | if (28 DestVT == MVT::f1628 ) |
2868 | 10 | return false; |
2869 | 18 | |
2870 | 28 | assert((DestVT == MVT::f32 || DestVT == MVT::f64) && |
2871 | 18 | "Unexpected value type."); |
2872 | 18 | |
2873 | 18 | unsigned SrcReg = getRegForValue(I->getOperand(0)); |
2874 | 18 | if (!SrcReg) |
2875 | 0 | return false; |
2876 | 18 | bool SrcIsKill = hasTrivialKill(I->getOperand(0)); |
2877 | 18 | |
2878 | 18 | EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true); |
2879 | 18 | |
2880 | 18 | // Handle sign-extension. |
2881 | 18 | if (SrcVT == MVT::i16 || 18 SrcVT == MVT::i816 || SrcVT == MVT::i114 ) { |
2882 | 6 | SrcReg = |
2883 | 6 | emitIntExt(SrcVT.getSimpleVT(), SrcReg, MVT::i32, /*isZExt*/ !Signed); |
2884 | 6 | if (!SrcReg) |
2885 | 0 | return false; |
2886 | 6 | SrcIsKill = true; |
2887 | 6 | } |
2888 | 18 | |
2889 | 18 | unsigned Opc; |
2890 | 18 | if (SrcVT == MVT::i6418 ) { |
2891 | 6 | if (Signed) |
2892 | 0 | Opc = (DestVT == MVT::f32) ? 0 AArch64::SCVTFUXSri0 : AArch64::SCVTFUXDri0 ; |
2893 | 6 | else |
2894 | 6 | Opc = (DestVT == MVT::f32) ? 6 AArch64::UCVTFUXSri3 : AArch64::UCVTFUXDri3 ; |
2895 | 18 | } else { |
2896 | 12 | if (Signed) |
2897 | 3 | Opc = (DestVT == MVT::f32) ? 3 AArch64::SCVTFUWSri3 : AArch64::SCVTFUWDri0 ; |
2898 | 12 | else |
2899 | 9 | Opc = (DestVT == MVT::f32) ? 9 AArch64::UCVTFUWSri6 : AArch64::UCVTFUWDri3 ; |
2900 | 12 | } |
2901 | 18 | |
2902 | 18 | unsigned ResultReg = fastEmitInst_r(Opc, TLI.getRegClassFor(DestVT), SrcReg, |
2903 | 18 | SrcIsKill); |
2904 | 18 | updateValueMap(I, ResultReg); |
2905 | 18 | return true; |
2906 | 30 | } |
2907 | | |
2908 | 1.23k | bool AArch64FastISel::fastLowerArguments() { |
2909 | 1.23k | if (!FuncInfo.CanLowerReturn) |
2910 | 0 | return false; |
2911 | 1.23k | |
2912 | 1.23k | const Function *F = FuncInfo.Fn; |
2913 | 1.23k | if (F->isVarArg()) |
2914 | 1 | return false; |
2915 | 1.23k | |
2916 | 1.23k | CallingConv::ID CC = F->getCallingConv(); |
2917 | 1.23k | if (CC != CallingConv::C && 1.23k CC != CallingConv::Swift23 ) |
2918 | 6 | return false; |
2919 | 1.22k | |
2920 | 1.22k | // Only handle simple cases of up to 8 GPR and FPR each. |
2921 | 1.22k | unsigned GPRCnt = 0; |
2922 | 1.22k | unsigned FPRCnt = 0; |
2923 | 2.04k | for (auto const &Arg : F->args()) { |
2924 | 2.04k | if (Arg.hasAttribute(Attribute::ByVal) || |
2925 | 2.04k | Arg.hasAttribute(Attribute::InReg) || |
2926 | 2.04k | Arg.hasAttribute(Attribute::StructRet) || |
2927 | 2.04k | Arg.hasAttribute(Attribute::SwiftSelf) || |
2928 | 2.03k | Arg.hasAttribute(Attribute::SwiftError) || |
2929 | 2.02k | Arg.hasAttribute(Attribute::Nest)) |
2930 | 16 | return false; |
2931 | 2.02k | |
2932 | 2.02k | Type *ArgTy = Arg.getType(); |
2933 | 2.02k | if (ArgTy->isStructTy() || 2.02k ArgTy->isArrayTy()2.02k ) |
2934 | 6 | return false; |
2935 | 2.02k | |
2936 | 2.02k | EVT ArgVT = TLI.getValueType(DL, ArgTy); |
2937 | 2.02k | if (!ArgVT.isSimple()) |
2938 | 0 | return false; |
2939 | 2.02k | |
2940 | 2.02k | MVT VT = ArgVT.getSimpleVT().SimpleTy; |
2941 | 2.02k | if (VT.isFloatingPoint() && 2.02k !Subtarget->hasFPARMv8()319 ) |
2942 | 0 | return false; |
2943 | 2.02k | |
2944 | 2.02k | if (2.02k VT.isVector() && |
2945 | 120 | (!Subtarget->hasNEON() || 120 !Subtarget->isLittleEndian()120 )) |
2946 | 71 | return false; |
2947 | 1.94k | |
2948 | 1.94k | if (1.94k VT >= MVT::i1 && 1.94k VT <= MVT::i641.94k ) |
2949 | 1.62k | ++GPRCnt; |
2950 | 328 | else if (328 (VT >= MVT::f16 && 328 VT <= MVT::f64324 ) || VT.is64BitVector()61 || |
2951 | 41 | VT.is128BitVector()) |
2952 | 316 | ++FPRCnt; |
2953 | 328 | else |
2954 | 12 | return false; |
2955 | 1.93k | |
2956 | 1.93k | if (1.93k GPRCnt > 8 || 1.93k FPRCnt > 81.93k ) |
2957 | 6 | return false; |
2958 | 1.11k | } |
2959 | 1.11k | |
2960 | 1.11k | static const MCPhysReg Registers[6][8] = { |
2961 | 1.11k | { AArch64::W0, AArch64::W1, AArch64::W2, AArch64::W3, AArch64::W4, |
2962 | 1.11k | AArch64::W5, AArch64::W6, AArch64::W7 }, |
2963 | 1.11k | { AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4, |
2964 | 1.11k | AArch64::X5, AArch64::X6, AArch64::X7 }, |
2965 | 1.11k | { AArch64::H0, AArch64::H1, AArch64::H2, AArch64::H3, AArch64::H4, |
2966 | 1.11k | AArch64::H5, AArch64::H6, AArch64::H7 }, |
2967 | 1.11k | { AArch64::S0, AArch64::S1, AArch64::S2, AArch64::S3, AArch64::S4, |
2968 | 1.11k | AArch64::S5, AArch64::S6, AArch64::S7 }, |
2969 | 1.11k | { AArch64::D0, AArch64::D1, AArch64::D2, AArch64::D3, AArch64::D4, |
2970 | 1.11k | AArch64::D5, AArch64::D6, AArch64::D7 }, |
2971 | 1.11k | { AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, AArch64::Q4, |
2972 | 1.11k | AArch64::Q5, AArch64::Q6, AArch64::Q7 } |
2973 | 1.11k | }; |
2974 | 1.11k | |
2975 | 1.11k | unsigned GPRIdx = 0; |
2976 | 1.11k | unsigned FPRIdx = 0; |
2977 | 1.86k | for (auto const &Arg : F->args()) { |
2978 | 1.86k | MVT VT = TLI.getSimpleValueType(DL, Arg.getType()); |
2979 | 1.86k | unsigned SrcReg; |
2980 | 1.86k | const TargetRegisterClass *RC; |
2981 | 1.86k | if (VT >= MVT::i1 && 1.86k VT <= MVT::i321.86k ) { |
2982 | 671 | SrcReg = Registers[0][GPRIdx++]; |
2983 | 671 | RC = &AArch64::GPR32RegClass; |
2984 | 671 | VT = MVT::i32; |
2985 | 1.86k | } else if (1.19k VT == MVT::i641.19k ) { |
2986 | 884 | SrcReg = Registers[1][GPRIdx++]; |
2987 | 884 | RC = &AArch64::GPR64RegClass; |
2988 | 1.19k | } else if (307 VT == MVT::f16307 ) { |
2989 | 2 | SrcReg = Registers[2][FPRIdx++]; |
2990 | 2 | RC = &AArch64::FPR16RegClass; |
2991 | 307 | } else if (305 VT == MVT::f32305 ) { |
2992 | 193 | SrcReg = Registers[3][FPRIdx++]; |
2993 | 193 | RC = &AArch64::FPR32RegClass; |
2994 | 305 | } else if (112 (VT == MVT::f64) || 112 VT.is64BitVector()49 ) { |
2995 | 83 | SrcReg = Registers[4][FPRIdx++]; |
2996 | 83 | RC = &AArch64::FPR64RegClass; |
2997 | 112 | } else if (29 VT.is128BitVector()29 ) { |
2998 | 29 | SrcReg = Registers[5][FPRIdx++]; |
2999 | 29 | RC = &AArch64::FPR128RegClass; |
3000 | 29 | } else |
3001 | 0 | llvm_unreachable("Unexpected value type."); |
3002 | 1.86k | |
3003 | 1.86k | unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC); |
3004 | 1.86k | // FIXME: Unfortunately it's necessary to emit a copy from the livein copy. |
3005 | 1.86k | // Without this, EmitLiveInCopies may eliminate the livein if its only |
3006 | 1.86k | // use is a bitcast (which isn't turned into an instruction). |
3007 | 1.86k | unsigned ResultReg = createResultReg(RC); |
3008 | 1.86k | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, |
3009 | 1.86k | TII.get(TargetOpcode::COPY), ResultReg) |
3010 | 1.86k | .addReg(DstReg, getKillRegState(true)); |
3011 | 1.86k | updateValueMap(&Arg, ResultReg); |
3012 | 1.86k | } |
3013 | 1.11k | return true; |
3014 | 1.23k | } |
3015 | | |
3016 | | bool AArch64FastISel::processCallArgs(CallLoweringInfo &CLI, |
3017 | | SmallVectorImpl<MVT> &OutVTs, |
3018 | 127 | unsigned &NumBytes) { |
3019 | 127 | CallingConv::ID CC = CLI.CallConv; |
3020 | 127 | SmallVector<CCValAssign, 16> ArgLocs; |
3021 | 127 | CCState CCInfo(CC, false, *FuncInfo.MF, ArgLocs, *Context); |
3022 | 127 | CCInfo.AnalyzeCallOperands(OutVTs, CLI.OutFlags, CCAssignFnForCall(CC)); |
3023 | 127 | |
3024 | 127 | // Get a count of how many bytes are to be pushed on the stack. |
3025 | 127 | NumBytes = CCInfo.getNextStackOffset(); |
3026 | 127 | |
3027 | 127 | // Issue CALLSEQ_START |
3028 | 127 | unsigned AdjStackDown = TII.getCallFrameSetupOpcode(); |
3029 | 127 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown)) |
3030 | 127 | .addImm(NumBytes).addImm(0); |
3031 | 127 | |
3032 | 127 | // Process the args. |
3033 | 1.32k | for (CCValAssign &VA : ArgLocs) { |
3034 | 1.32k | const Value *ArgVal = CLI.OutVals[VA.getValNo()]; |
3035 | 1.32k | MVT ArgVT = OutVTs[VA.getValNo()]; |
3036 | 1.32k | |
3037 | 1.32k | unsigned ArgReg = getRegForValue(ArgVal); |
3038 | 1.32k | if (!ArgReg) |
3039 | 2 | return false; |
3040 | 1.32k | |
3041 | 1.32k | // Handle arg promotion: SExt, ZExt, AExt. |
3042 | 1.32k | switch (VA.getLocInfo()) { |
3043 | 1.22k | case CCValAssign::Full: |
3044 | 1.22k | break; |
3045 | 15 | case CCValAssign::SExt: { |
3046 | 15 | MVT DestVT = VA.getLocVT(); |
3047 | 15 | MVT SrcVT = ArgVT; |
3048 | 15 | ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/false); |
3049 | 15 | if (!ArgReg) |
3050 | 0 | return false; |
3051 | 15 | break; |
3052 | 15 | } |
3053 | 84 | case CCValAssign::AExt: |
3054 | 84 | // Intentional fall-through. |
3055 | 84 | case CCValAssign::ZExt: { |
3056 | 84 | MVT DestVT = VA.getLocVT(); |
3057 | 84 | MVT SrcVT = ArgVT; |
3058 | 84 | ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/true); |
3059 | 84 | if (!ArgReg) |
3060 | 0 | return false; |
3061 | 84 | break; |
3062 | 84 | } |
3063 | 0 | default: |
3064 | 0 | llvm_unreachable("Unknown arg promotion!"); |
3065 | 1.32k | } |
3066 | 1.32k | |
3067 | 1.32k | // Now copy/store arg to correct locations. |
3068 | 1.32k | if (1.32k VA.isRegLoc() && 1.32k !VA.needsCustom()258 ) { |
3069 | 258 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, |
3070 | 258 | TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg); |
3071 | 258 | CLI.OutRegs.push_back(VA.getLocReg()); |
3072 | 1.32k | } else if (1.06k VA.needsCustom()1.06k ) { |
3073 | 0 | // FIXME: Handle custom args. |
3074 | 0 | return false; |
3075 | 0 | } else { |
3076 | 1.06k | assert(VA.isMemLoc() && "Assuming store on stack."); |
3077 | 1.06k | |
3078 | 1.06k | // Don't emit stores for undef values. |
3079 | 1.06k | if (isa<UndefValue>(ArgVal)) |
3080 | 1.03k | continue; |
3081 | 32 | |
3082 | 32 | // Need to store on the stack. |
3083 | 32 | unsigned ArgSize = (ArgVT.getSizeInBits() + 7) / 8; |
3084 | 32 | |
3085 | 32 | unsigned BEAlign = 0; |
3086 | 32 | if (ArgSize < 8 && 32 !Subtarget->isLittleEndian()21 ) |
3087 | 2 | BEAlign = 8 - ArgSize; |
3088 | 32 | |
3089 | 32 | Address Addr; |
3090 | 32 | Addr.setKind(Address::RegBase); |
3091 | 32 | Addr.setReg(AArch64::SP); |
3092 | 32 | Addr.setOffset(VA.getLocMemOffset() + BEAlign); |
3093 | 32 | |
3094 | 32 | unsigned Alignment = DL.getABITypeAlignment(ArgVal->getType()); |
3095 | 32 | MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand( |
3096 | 32 | MachinePointerInfo::getStack(*FuncInfo.MF, Addr.getOffset()), |
3097 | 32 | MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment); |
3098 | 32 | |
3099 | 32 | if (!emitStore(ArgVT, ArgReg, Addr, MMO)) |
3100 | 2 | return false; |
3101 | 123 | } |
3102 | 1.32k | } |
3103 | 123 | return true; |
3104 | 123 | } |
3105 | | |
3106 | | bool AArch64FastISel::finishCall(CallLoweringInfo &CLI, MVT RetVT, |
3107 | 123 | unsigned NumBytes) { |
3108 | 123 | CallingConv::ID CC = CLI.CallConv; |
3109 | 123 | |
3110 | 123 | // Issue CALLSEQ_END |
3111 | 123 | unsigned AdjStackUp = TII.getCallFrameDestroyOpcode(); |
3112 | 123 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp)) |
3113 | 123 | .addImm(NumBytes).addImm(0); |
3114 | 123 | |
3115 | 123 | // Now the return value. |
3116 | 123 | if (RetVT != MVT::isVoid123 ) { |
3117 | 67 | SmallVector<CCValAssign, 16> RVLocs; |
3118 | 67 | CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context); |
3119 | 67 | CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC)); |
3120 | 67 | |
3121 | 67 | // Only handle a single return value. |
3122 | 67 | if (RVLocs.size() != 1) |
3123 | 0 | return false; |
3124 | 67 | |
3125 | 67 | // Copy all of the result registers out of their specified physreg. |
3126 | 67 | MVT CopyVT = RVLocs[0].getValVT(); |
3127 | 67 | |
3128 | 67 | // TODO: Handle big-endian results |
3129 | 67 | if (CopyVT.isVector() && 67 !Subtarget->isLittleEndian()10 ) |
3130 | 10 | return false; |
3131 | 57 | |
3132 | 57 | unsigned ResultReg = createResultReg(TLI.getRegClassFor(CopyVT)); |
3133 | 57 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, |
3134 | 57 | TII.get(TargetOpcode::COPY), ResultReg) |
3135 | 57 | .addReg(RVLocs[0].getLocReg()); |
3136 | 57 | CLI.InRegs.push_back(RVLocs[0].getLocReg()); |
3137 | 57 | |
3138 | 57 | CLI.ResultReg = ResultReg; |
3139 | 57 | CLI.NumResultRegs = 1; |
3140 | 57 | } |
3141 | 123 | |
3142 | 113 | return true; |
3143 | 123 | } |
3144 | | |
3145 | 240 | bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) { |
3146 | 240 | CallingConv::ID CC = CLI.CallConv; |
3147 | 240 | bool IsTailCall = CLI.IsTailCall; |
3148 | 240 | bool IsVarArg = CLI.IsVarArg; |
3149 | 240 | const Value *Callee = CLI.Callee; |
3150 | 240 | MCSymbol *Symbol = CLI.Symbol; |
3151 | 240 | |
3152 | 240 | if (!Callee && 240 !Symbol16 ) |
3153 | 0 | return false; |
3154 | 240 | |
3155 | 240 | // Allow SelectionDAG isel to handle tail calls. |
3156 | 240 | if (240 IsTailCall240 ) |
3157 | 22 | return false; |
3158 | 218 | |
3159 | 218 | CodeModel::Model CM = TM.getCodeModel(); |
3160 | 218 | // Only support the small-addressing and large code models. |
3161 | 218 | if (CM != CodeModel::Large && 218 !Subtarget->useSmallAddressing()202 ) |
3162 | 0 | return false; |
3163 | 218 | |
3164 | 218 | // FIXME: Add large code model support for ELF. |
3165 | 218 | if (218 CM == CodeModel::Large && 218 !Subtarget->isTargetMachO()16 ) |
3166 | 0 | return false; |
3167 | 218 | |
3168 | 218 | // Let SDISel handle vararg functions. |
3169 | 218 | if (218 IsVarArg218 ) |
3170 | 5 | return false; |
3171 | 213 | |
3172 | 213 | // FIXME: Only handle *simple* calls for now. |
3173 | 213 | MVT RetVT; |
3174 | 213 | if (CLI.RetTy->isVoidTy()) |
3175 | 60 | RetVT = MVT::isVoid; |
3176 | 153 | else if (153 !isTypeLegal(CLI.RetTy, RetVT)153 ) |
3177 | 12 | return false; |
3178 | 201 | |
3179 | 201 | for (auto Flag : CLI.OutFlags) |
3180 | 1.40k | if (1.40k Flag.isInReg() || 1.40k Flag.isSRet()1.40k || Flag.isNest()1.40k || Flag.isByVal()1.40k || |
3181 | 1.40k | Flag.isSwiftSelf()1.40k || Flag.isSwiftError()1.39k ) |
3182 | 5 | return false; |
3183 | 196 | |
3184 | 196 | // Set up the argument vectors. |
3185 | 196 | SmallVector<MVT, 16> OutVTs; |
3186 | 196 | OutVTs.reserve(CLI.OutVals.size()); |
3187 | 196 | |
3188 | 1.39k | for (auto *Val : CLI.OutVals) { |
3189 | 1.39k | MVT VT; |
3190 | 1.39k | if (!isTypeLegal(Val->getType(), VT) && |
3191 | 108 | !(VT == MVT::i1 || 108 VT == MVT::i890 || VT == MVT::i1620 )) |
3192 | 9 | return false; |
3193 | 1.38k | |
3194 | 1.38k | // We don't handle vector parameters yet. |
3195 | 1.38k | if (1.38k VT.isVector() || 1.38k VT.getSizeInBits() > 641.32k ) |
3196 | 60 | return false; |
3197 | 1.32k | |
3198 | 1.32k | OutVTs.push_back(VT); |
3199 | 1.32k | } |
3200 | 196 | |
3201 | 127 | Address Addr; |
3202 | 127 | if (Callee && 127 !computeCallAddress(Callee, Addr)111 ) |
3203 | 0 | return false; |
3204 | 127 | |
3205 | 127 | // Handle the arguments now that we've gotten them. |
3206 | 127 | unsigned NumBytes; |
3207 | 127 | if (!processCallArgs(CLI, OutVTs, NumBytes)) |
3208 | 4 | return false; |
3209 | 123 | |
3210 | 123 | // Issue the call. |
3211 | 123 | MachineInstrBuilder MIB; |
3212 | 123 | if (Subtarget->useSmallAddressing()123 ) { |
3213 | 107 | const MCInstrDesc &II = TII.get(Addr.getReg() ? AArch64::BLR19 : AArch64::BL88 ); |
3214 | 107 | MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II); |
3215 | 107 | if (Symbol) |
3216 | 15 | MIB.addSym(Symbol, 0); |
3217 | 92 | else if (92 Addr.getGlobalValue()92 ) |
3218 | 73 | MIB.addGlobalAddress(Addr.getGlobalValue(), 0, 0); |
3219 | 19 | else if (19 Addr.getReg()19 ) { |
3220 | 19 | unsigned Reg = constrainOperandRegClass(II, Addr.getReg(), 0); |
3221 | 19 | MIB.addReg(Reg); |
3222 | 19 | } else |
3223 | 0 | return false; |
3224 | 16 | } else { |
3225 | 16 | unsigned CallReg = 0; |
3226 | 16 | if (Symbol16 ) { |
3227 | 8 | unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass); |
3228 | 8 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP), |
3229 | 8 | ADRPReg) |
3230 | 8 | .addSym(Symbol, AArch64II::MO_GOT | AArch64II::MO_PAGE); |
3231 | 8 | |
3232 | 8 | CallReg = createResultReg(&AArch64::GPR64RegClass); |
3233 | 8 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, |
3234 | 8 | TII.get(AArch64::LDRXui), CallReg) |
3235 | 8 | .addReg(ADRPReg) |
3236 | 8 | .addSym(Symbol, |
3237 | 8 | AArch64II::MO_GOT | AArch64II::MO_PAGEOFF | AArch64II::MO_NC); |
3238 | 16 | } else if (8 Addr.getGlobalValue()8 ) |
3239 | 7 | CallReg = materializeGV(Addr.getGlobalValue()); |
3240 | 1 | else if (1 Addr.getReg()1 ) |
3241 | 1 | CallReg = Addr.getReg(); |
3242 | 16 | |
3243 | 16 | if (!CallReg) |
3244 | 0 | return false; |
3245 | 16 | |
3246 | 16 | const MCInstrDesc &II = TII.get(AArch64::BLR); |
3247 | 16 | CallReg = constrainOperandRegClass(II, CallReg, 0); |
3248 | 16 | MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(CallReg); |
3249 | 16 | } |
3250 | 123 | |
3251 | 123 | // Add implicit physical register uses to the call. |
3252 | 123 | for (auto Reg : CLI.OutRegs) |
3253 | 241 | MIB.addReg(Reg, RegState::Implicit); |
3254 | 123 | |
3255 | 123 | // Add a register mask with the call-preserved registers. |
3256 | 123 | // Proper defs for return values will be added by setPhysRegsDeadExcept(). |
3257 | 123 | MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC)); |
3258 | 123 | |
3259 | 123 | CLI.Call = MIB; |
3260 | 123 | |
3261 | 123 | // Finish off the call including any return values. |
3262 | 123 | return finishCall(CLI, RetVT, NumBytes); |
3263 | 240 | } |
3264 | | |
3265 | 31 | bool AArch64FastISel::isMemCpySmall(uint64_t Len, unsigned Alignment) { |
3266 | 31 | if (Alignment) |
3267 | 31 | return Len / Alignment <= 4; |
3268 | 31 | else |
3269 | 0 | return Len < 32; |
3270 | 0 | } |
3271 | | |
3272 | | bool AArch64FastISel::tryEmitSmallMemCpy(Address Dest, Address Src, |
3273 | 13 | uint64_t Len, unsigned Alignment) { |
3274 | 13 | // Make sure we don't bloat code by inlining very large memcpy's. |
3275 | 13 | if (!isMemCpySmall(Len, Alignment)) |
3276 | 0 | return false; |
3277 | 13 | |
3278 | 13 | int64_t UnscaledOffset = 0; |
3279 | 13 | Address OrigDest = Dest; |
3280 | 13 | Address OrigSrc = Src; |
3281 | 13 | |
3282 | 54 | while (Len54 ) { |
3283 | 41 | MVT VT; |
3284 | 41 | if (!Alignment || 41 Alignment >= 841 ) { |
3285 | 29 | if (Len >= 8) |
3286 | 27 | VT = MVT::i64; |
3287 | 2 | else if (2 Len >= 42 ) |
3288 | 0 | VT = MVT::i32; |
3289 | 2 | else if (2 Len >= 22 ) |
3290 | 0 | VT = MVT::i16; |
3291 | 2 | else { |
3292 | 2 | VT = MVT::i8; |
3293 | 2 | } |
3294 | 41 | } else { |
3295 | 12 | // Bound based on alignment. |
3296 | 12 | if (Len >= 4 && 12 Alignment == 45 ) |
3297 | 2 | VT = MVT::i32; |
3298 | 10 | else if (10 Len >= 2 && 10 Alignment == 26 ) |
3299 | 3 | VT = MVT::i16; |
3300 | 7 | else { |
3301 | 7 | VT = MVT::i8; |
3302 | 7 | } |
3303 | 12 | } |
3304 | 41 | |
3305 | 41 | unsigned ResultReg = emitLoad(VT, VT, Src); |
3306 | 41 | if (!ResultReg) |
3307 | 0 | return false; |
3308 | 41 | |
3309 | 41 | if (41 !emitStore(VT, ResultReg, Dest)41 ) |
3310 | 0 | return false; |
3311 | 41 | |
3312 | 41 | int64_t Size = VT.getSizeInBits() / 8; |
3313 | 41 | Len -= Size; |
3314 | 41 | UnscaledOffset += Size; |
3315 | 41 | |
3316 | 41 | // We need to recompute the unscaled offset for each iteration. |
3317 | 41 | Dest.setOffset(OrigDest.getOffset() + UnscaledOffset); |
3318 | 41 | Src.setOffset(OrigSrc.getOffset() + UnscaledOffset); |
3319 | 41 | } |
3320 | 13 | |
3321 | 13 | return true; |
3322 | 13 | } |
3323 | | |
3324 | | /// \brief Check if it is possible to fold the condition from the XALU intrinsic |
3325 | | /// into the user. The condition code will only be updated on success. |
3326 | | bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC, |
3327 | | const Instruction *I, |
3328 | 84 | const Value *Cond) { |
3329 | 84 | if (!isa<ExtractValueInst>(Cond)) |
3330 | 58 | return false; |
3331 | 26 | |
3332 | 26 | const auto *EV = cast<ExtractValueInst>(Cond); |
3333 | 26 | if (!isa<IntrinsicInst>(EV->getAggregateOperand())) |
3334 | 0 | return false; |
3335 | 26 | |
3336 | 26 | const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand()); |
3337 | 26 | MVT RetVT; |
3338 | 26 | const Function *Callee = II->getCalledFunction(); |
3339 | 26 | Type *RetTy = |
3340 | 26 | cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U); |
3341 | 26 | if (!isTypeLegal(RetTy, RetVT)) |
3342 | 0 | return false; |
3343 | 26 | |
3344 | 26 | if (26 RetVT != MVT::i32 && 26 RetVT != MVT::i6414 ) |
3345 | 0 | return false; |
3346 | 26 | |
3347 | 26 | const Value *LHS = II->getArgOperand(0); |
3348 | 26 | const Value *RHS = II->getArgOperand(1); |
3349 | 26 | |
3350 | 26 | // Canonicalize immediate to the RHS. |
3351 | 26 | if (isa<ConstantInt>(LHS) && 26 !isa<ConstantInt>(RHS)0 && |
3352 | 0 | isCommutativeIntrinsic(II)) |
3353 | 0 | std::swap(LHS, RHS); |
3354 | 26 | |
3355 | 26 | // Simplify multiplies. |
3356 | 26 | Intrinsic::ID IID = II->getIntrinsicID(); |
3357 | 26 | switch (IID) { |
3358 | 16 | default: |
3359 | 16 | break; |
3360 | 5 | case Intrinsic::smul_with_overflow: |
3361 | 5 | if (const auto *C = dyn_cast<ConstantInt>(RHS)) |
3362 | 1 | if (1 C->getValue() == 21 ) |
3363 | 1 | IID = Intrinsic::sadd_with_overflow; |
3364 | 5 | break; |
3365 | 5 | case Intrinsic::umul_with_overflow: |
3366 | 5 | if (const auto *C = dyn_cast<ConstantInt>(RHS)) |
3367 | 1 | if (1 C->getValue() == 21 ) |
3368 | 1 | IID = Intrinsic::uadd_with_overflow; |
3369 | 5 | break; |
3370 | 26 | } |
3371 | 26 | |
3372 | 26 | AArch64CC::CondCode TmpCC; |
3373 | 26 | switch (IID) { |
3374 | 0 | default: |
3375 | 0 | return false; |
3376 | 9 | case Intrinsic::sadd_with_overflow: |
3377 | 9 | case Intrinsic::ssub_with_overflow: |
3378 | 9 | TmpCC = AArch64CC::VS; |
3379 | 9 | break; |
3380 | 5 | case Intrinsic::uadd_with_overflow: |
3381 | 5 | TmpCC = AArch64CC::HS; |
3382 | 5 | break; |
3383 | 4 | case Intrinsic::usub_with_overflow: |
3384 | 4 | TmpCC = AArch64CC::LO; |
3385 | 4 | break; |
3386 | 8 | case Intrinsic::smul_with_overflow: |
3387 | 8 | case Intrinsic::umul_with_overflow: |
3388 | 8 | TmpCC = AArch64CC::NE; |
3389 | 8 | break; |
3390 | 26 | } |
3391 | 26 | |
3392 | 26 | // Check if both instructions are in the same basic block. |
3393 | 26 | if (26 !isValueAvailable(II)26 ) |
3394 | 0 | return false; |
3395 | 26 | |
3396 | 26 | // Make sure nothing is in the way |
3397 | 26 | BasicBlock::const_iterator Start(I); |
3398 | 26 | BasicBlock::const_iterator End(II); |
3399 | 66 | for (auto Itr = std::prev(Start); Itr != End66 ; --Itr40 ) { |
3400 | 40 | // We only expect extractvalue instructions between the intrinsic and the |
3401 | 40 | // instruction to be selected. |
3402 | 40 | if (!isa<ExtractValueInst>(Itr)) |
3403 | 0 | return false; |
3404 | 40 | |
3405 | 40 | // Check that the extractvalue operand comes from the intrinsic. |
3406 | 40 | const auto *EVI = cast<ExtractValueInst>(Itr); |
3407 | 40 | if (EVI->getAggregateOperand() != II) |
3408 | 0 | return false; |
3409 | 40 | } |
3410 | 26 | |
3411 | 26 | CC = TmpCC; |
3412 | 26 | return true; |
3413 | 84 | } |
3414 | | |
3415 | 94 | bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) { |
3416 | 94 | // FIXME: Handle more intrinsics. |
3417 | 94 | switch (II->getIntrinsicID()) { |
3418 | 7 | default: return false; |
3419 | 2 | case Intrinsic::frameaddress: { |
3420 | 2 | MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo(); |
3421 | 2 | MFI.setFrameAddressIsTaken(true); |
3422 | 2 | |
3423 | 2 | const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo(); |
3424 | 2 | unsigned FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF)); |
3425 | 2 | unsigned SrcReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass); |
3426 | 2 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, |
3427 | 2 | TII.get(TargetOpcode::COPY), SrcReg).addReg(FramePtr); |
3428 | 2 | // Recursively load frame address |
3429 | 2 | // ldr x0, [fp] |
3430 | 2 | // ldr x0, [x0] |
3431 | 2 | // ldr x0, [x0] |
3432 | 2 | // ... |
3433 | 2 | unsigned DestReg; |
3434 | 2 | unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue(); |
3435 | 4 | while (Depth--4 ) { |
3436 | 2 | DestReg = fastEmitInst_ri(AArch64::LDRXui, &AArch64::GPR64RegClass, |
3437 | 2 | SrcReg, /*IsKill=*/true, 0); |
3438 | 2 | assert(DestReg && "Unexpected LDR instruction emission failure."); |
3439 | 2 | SrcReg = DestReg; |
3440 | 2 | } |
3441 | 2 | |
3442 | 2 | updateValueMap(II, SrcReg); |
3443 | 2 | return true; |
3444 | 94 | } |
3445 | 19 | case Intrinsic::memcpy: |
3446 | 19 | case Intrinsic::memmove: { |
3447 | 19 | const auto *MTI = cast<MemTransferInst>(II); |
3448 | 19 | // Don't handle volatile. |
3449 | 19 | if (MTI->isVolatile()) |
3450 | 0 | return false; |
3451 | 19 | |
3452 | 19 | // Disable inlining for memmove before calls to ComputeAddress. Otherwise, |
3453 | 19 | // we would emit dead code because we don't currently handle memmoves. |
3454 | 19 | bool IsMemCpy = (II->getIntrinsicID() == Intrinsic::memcpy); |
3455 | 19 | if (isa<ConstantInt>(MTI->getLength()) && 19 IsMemCpy19 ) { |
3456 | 18 | // Small memcpy's are common enough that we want to do them without a call |
3457 | 18 | // if possible. |
3458 | 18 | uint64_t Len = cast<ConstantInt>(MTI->getLength())->getZExtValue(); |
3459 | 18 | unsigned Alignment = MTI->getAlignment(); |
3460 | 18 | if (isMemCpySmall(Len, Alignment)18 ) { |
3461 | 13 | Address Dest, Src; |
3462 | 13 | if (!computeAddress(MTI->getRawDest(), Dest) || |
3463 | 13 | !computeAddress(MTI->getRawSource(), Src)) |
3464 | 0 | return false; |
3465 | 13 | if (13 tryEmitSmallMemCpy(Dest, Src, Len, Alignment)13 ) |
3466 | 13 | return true; |
3467 | 6 | } |
3468 | 18 | } |
3469 | 6 | |
3470 | 6 | if (6 !MTI->getLength()->getType()->isIntegerTy(64)6 ) |
3471 | 0 | return false; |
3472 | 6 | |
3473 | 6 | if (6 MTI->getSourceAddressSpace() > 255 || 6 MTI->getDestAddressSpace() > 2556 ) |
3474 | 6 | // Fast instruction selection doesn't support the special |
3475 | 6 | // address spaces. |
3476 | 0 | return false; |
3477 | 6 | |
3478 | 6 | const char *IntrMemName = isa<MemCpyInst>(II) ? 6 "memcpy"5 : "memmove"1 ; |
3479 | 6 | return lowerCallTo(II, IntrMemName, II->getNumArgOperands() - 2); |
3480 | 6 | } |
3481 | 1 | case Intrinsic::memset: { |
3482 | 1 | const MemSetInst *MSI = cast<MemSetInst>(II); |
3483 | 1 | // Don't handle volatile. |
3484 | 1 | if (MSI->isVolatile()) |
3485 | 0 | return false; |
3486 | 1 | |
3487 | 1 | if (1 !MSI->getLength()->getType()->isIntegerTy(64)1 ) |
3488 | 0 | return false; |
3489 | 1 | |
3490 | 1 | if (1 MSI->getDestAddressSpace() > 2551 ) |
3491 | 1 | // Fast instruction selection doesn't support the special |
3492 | 1 | // address spaces. |
3493 | 0 | return false; |
3494 | 1 | |
3495 | 1 | return lowerCallTo(II, "memset", II->getNumArgOperands() - 2); |
3496 | 1 | } |
3497 | 12 | case Intrinsic::sin: |
3498 | 12 | case Intrinsic::cos: |
3499 | 12 | case Intrinsic::pow: { |
3500 | 12 | MVT RetVT; |
3501 | 12 | if (!isTypeLegal(II->getType(), RetVT)) |
3502 | 0 | return false; |
3503 | 12 | |
3504 | 12 | if (12 RetVT != MVT::f32 && 12 RetVT != MVT::f646 ) |
3505 | 0 | return false; |
3506 | 12 | |
3507 | 12 | static const RTLIB::Libcall LibCallTable[3][2] = { |
3508 | 12 | { RTLIB::SIN_F32, RTLIB::SIN_F64 }, |
3509 | 12 | { RTLIB::COS_F32, RTLIB::COS_F64 }, |
3510 | 12 | { RTLIB::POW_F32, RTLIB::POW_F64 } |
3511 | 12 | }; |
3512 | 12 | RTLIB::Libcall LC; |
3513 | 12 | bool Is64Bit = RetVT == MVT::f64; |
3514 | 12 | switch (II->getIntrinsicID()) { |
3515 | 0 | default: |
3516 | 0 | llvm_unreachable("Unexpected intrinsic."); |
3517 | 4 | case Intrinsic::sin: |
3518 | 4 | LC = LibCallTable[0][Is64Bit]; |
3519 | 4 | break; |
3520 | 4 | case Intrinsic::cos: |
3521 | 4 | LC = LibCallTable[1][Is64Bit]; |
3522 | 4 | break; |
3523 | 4 | case Intrinsic::pow: |
3524 | 4 | LC = LibCallTable[2][Is64Bit]; |
3525 | 4 | break; |
3526 | 12 | } |
3527 | 12 | |
3528 | 12 | ArgListTy Args; |
3529 | 12 | Args.reserve(II->getNumArgOperands()); |
3530 | 12 | |
3531 | 12 | // Populate the argument list. |
3532 | 16 | for (auto &Arg : II->arg_operands()) { |
3533 | 16 | ArgListEntry Entry; |
3534 | 16 | Entry.Val = Arg; |
3535 | 16 | Entry.Ty = Arg->getType(); |
3536 | 16 | Args.push_back(Entry); |
3537 | 16 | } |
3538 | 12 | |
3539 | 12 | CallLoweringInfo CLI; |
3540 | 12 | MCContext &Ctx = MF->getContext(); |
3541 | 12 | CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), II->getType(), |
3542 | 12 | TLI.getLibcallName(LC), std::move(Args)); |
3543 | 12 | if (!lowerCallTo(CLI)) |
3544 | 0 | return false; |
3545 | 12 | updateValueMap(II, CLI.ResultReg); |
3546 | 12 | return true; |
3547 | 12 | } |
3548 | 2 | case Intrinsic::fabs: { |
3549 | 2 | MVT VT; |
3550 | 2 | if (!isTypeLegal(II->getType(), VT)) |
3551 | 0 | return false; |
3552 | 2 | |
3553 | 2 | unsigned Opc; |
3554 | 2 | switch (VT.SimpleTy) { |
3555 | 0 | default: |
3556 | 0 | return false; |
3557 | 1 | case MVT::f32: |
3558 | 1 | Opc = AArch64::FABSSr; |
3559 | 1 | break; |
3560 | 1 | case MVT::f64: |
3561 | 1 | Opc = AArch64::FABSDr; |
3562 | 1 | break; |
3563 | 2 | } |
3564 | 2 | unsigned SrcReg = getRegForValue(II->getOperand(0)); |
3565 | 2 | if (!SrcReg) |
3566 | 0 | return false; |
3567 | 2 | bool SrcRegIsKill = hasTrivialKill(II->getOperand(0)); |
3568 | 2 | unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT)); |
3569 | 2 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) |
3570 | 2 | .addReg(SrcReg, getKillRegState(SrcRegIsKill)); |
3571 | 2 | updateValueMap(II, ResultReg); |
3572 | 2 | return true; |
3573 | 2 | } |
3574 | 1 | case Intrinsic::trap: |
3575 | 1 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BRK)) |
3576 | 1 | .addImm(1); |
3577 | 1 | return true; |
3578 | 2 | |
3579 | 2 | case Intrinsic::sqrt: { |
3580 | 2 | Type *RetTy = II->getCalledFunction()->getReturnType(); |
3581 | 2 | |
3582 | 2 | MVT VT; |
3583 | 2 | if (!isTypeLegal(RetTy, VT)) |
3584 | 0 | return false; |
3585 | 2 | |
3586 | 2 | unsigned Op0Reg = getRegForValue(II->getOperand(0)); |
3587 | 2 | if (!Op0Reg) |
3588 | 0 | return false; |
3589 | 2 | bool Op0IsKill = hasTrivialKill(II->getOperand(0)); |
3590 | 2 | |
3591 | 2 | unsigned ResultReg = fastEmit_r(VT, VT, ISD::FSQRT, Op0Reg, Op0IsKill); |
3592 | 2 | if (!ResultReg) |
3593 | 0 | return false; |
3594 | 2 | |
3595 | 2 | updateValueMap(II, ResultReg); |
3596 | 2 | return true; |
3597 | 2 | } |
3598 | 48 | case Intrinsic::sadd_with_overflow: |
3599 | 48 | case Intrinsic::uadd_with_overflow: |
3600 | 48 | case Intrinsic::ssub_with_overflow: |
3601 | 48 | case Intrinsic::usub_with_overflow: |
3602 | 48 | case Intrinsic::smul_with_overflow: |
3603 | 48 | case Intrinsic::umul_with_overflow: { |
3604 | 48 | // This implements the basic lowering of the xalu with overflow intrinsics. |
3605 | 48 | const Function *Callee = II->getCalledFunction(); |
3606 | 48 | auto *Ty = cast<StructType>(Callee->getReturnType()); |
3607 | 48 | Type *RetTy = Ty->getTypeAtIndex(0U); |
3608 | 48 | |
3609 | 48 | MVT VT; |
3610 | 48 | if (!isTypeLegal(RetTy, VT)) |
3611 | 0 | return false; |
3612 | 48 | |
3613 | 48 | if (48 VT != MVT::i32 && 48 VT != MVT::i6425 ) |
3614 | 0 | return false; |
3615 | 48 | |
3616 | 48 | const Value *LHS = II->getArgOperand(0); |
3617 | 48 | const Value *RHS = II->getArgOperand(1); |
3618 | 48 | // Canonicalize immediate to the RHS. |
3619 | 48 | if (isa<ConstantInt>(LHS) && 48 !isa<ConstantInt>(RHS)0 && |
3620 | 0 | isCommutativeIntrinsic(II)) |
3621 | 0 | std::swap(LHS, RHS); |
3622 | 48 | |
3623 | 48 | // Simplify multiplies. |
3624 | 48 | Intrinsic::ID IID = II->getIntrinsicID(); |
3625 | 48 | switch (IID) { |
3626 | 31 | default: |
3627 | 31 | break; |
3628 | 8 | case Intrinsic::smul_with_overflow: |
3629 | 8 | if (const auto *C = dyn_cast<ConstantInt>(RHS)) |
3630 | 2 | if (2 C->getValue() == 22 ) { |
3631 | 2 | IID = Intrinsic::sadd_with_overflow; |
3632 | 2 | RHS = LHS; |
3633 | 2 | } |
3634 | 8 | break; |
3635 | 9 | case Intrinsic::umul_with_overflow: |
3636 | 9 | if (const auto *C = dyn_cast<ConstantInt>(RHS)) |
3637 | 3 | if (3 C->getValue() == 23 ) { |
3638 | 2 | IID = Intrinsic::uadd_with_overflow; |
3639 | 2 | RHS = LHS; |
3640 | 2 | } |
3641 | 9 | break; |
3642 | 48 | } |
3643 | 48 | |
3644 | 48 | unsigned ResultReg1 = 0, ResultReg2 = 0, MulReg = 0; |
3645 | 48 | AArch64CC::CondCode CC = AArch64CC::Invalid; |
3646 | 48 | switch (IID) { |
3647 | 0 | default: 0 llvm_unreachable0 ("Unexpected intrinsic!"); |
3648 | 14 | case Intrinsic::sadd_with_overflow: |
3649 | 14 | ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true); |
3650 | 14 | CC = AArch64CC::VS; |
3651 | 14 | break; |
3652 | 8 | case Intrinsic::uadd_with_overflow: |
3653 | 8 | ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true); |
3654 | 8 | CC = AArch64CC::HS; |
3655 | 8 | break; |
3656 | 7 | case Intrinsic::ssub_with_overflow: |
3657 | 7 | ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true); |
3658 | 7 | CC = AArch64CC::VS; |
3659 | 7 | break; |
3660 | 6 | case Intrinsic::usub_with_overflow: |
3661 | 6 | ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true); |
3662 | 6 | CC = AArch64CC::LO; |
3663 | 6 | break; |
3664 | 6 | case Intrinsic::smul_with_overflow: { |
3665 | 6 | CC = AArch64CC::NE; |
3666 | 6 | unsigned LHSReg = getRegForValue(LHS); |
3667 | 6 | if (!LHSReg) |
3668 | 0 | return false; |
3669 | 6 | bool LHSIsKill = hasTrivialKill(LHS); |
3670 | 6 | |
3671 | 6 | unsigned RHSReg = getRegForValue(RHS); |
3672 | 6 | if (!RHSReg) |
3673 | 0 | return false; |
3674 | 6 | bool RHSIsKill = hasTrivialKill(RHS); |
3675 | 6 | |
3676 | 6 | if (VT == MVT::i326 ) { |
3677 | 3 | MulReg = emitSMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill); |
3678 | 3 | unsigned ShiftReg = emitLSR_ri(MVT::i64, MVT::i64, MulReg, |
3679 | 3 | /*IsKill=*/false, 32); |
3680 | 3 | MulReg = fastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true, |
3681 | 3 | AArch64::sub_32); |
3682 | 3 | ShiftReg = fastEmitInst_extractsubreg(VT, ShiftReg, /*IsKill=*/true, |
3683 | 3 | AArch64::sub_32); |
3684 | 3 | emitSubs_rs(VT, ShiftReg, /*IsKill=*/true, MulReg, /*IsKill=*/false, |
3685 | 3 | AArch64_AM::ASR, 31, /*WantResult=*/false); |
3686 | 6 | } else { |
3687 | 3 | assert(VT == MVT::i64 && "Unexpected value type."); |
3688 | 3 | // LHSReg and RHSReg cannot be killed by this Mul, since they are |
3689 | 3 | // reused in the next instruction. |
3690 | 3 | MulReg = emitMul_rr(VT, LHSReg, /*IsKill=*/false, RHSReg, |
3691 | 3 | /*IsKill=*/false); |
3692 | 3 | unsigned SMULHReg = fastEmit_rr(VT, VT, ISD::MULHS, LHSReg, LHSIsKill, |
3693 | 3 | RHSReg, RHSIsKill); |
3694 | 3 | emitSubs_rs(VT, SMULHReg, /*IsKill=*/true, MulReg, /*IsKill=*/false, |
3695 | 3 | AArch64_AM::ASR, 63, /*WantResult=*/false); |
3696 | 3 | } |
3697 | 6 | break; |
3698 | 6 | } |
3699 | 7 | case Intrinsic::umul_with_overflow: { |
3700 | 7 | CC = AArch64CC::NE; |
3701 | 7 | unsigned LHSReg = getRegForValue(LHS); |
3702 | 7 | if (!LHSReg) |
3703 | 0 | return false; |
3704 | 7 | bool LHSIsKill = hasTrivialKill(LHS); |
3705 | 7 | |
3706 | 7 | unsigned RHSReg = getRegForValue(RHS); |
3707 | 7 | if (!RHSReg) |
3708 | 0 | return false; |
3709 | 7 | bool RHSIsKill = hasTrivialKill(RHS); |
3710 | 7 | |
3711 | 7 | if (VT == MVT::i327 ) { |
3712 | 3 | MulReg = emitUMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill); |
3713 | 3 | emitSubs_rs(MVT::i64, AArch64::XZR, /*IsKill=*/true, MulReg, |
3714 | 3 | /*IsKill=*/false, AArch64_AM::LSR, 32, |
3715 | 3 | /*WantResult=*/false); |
3716 | 3 | MulReg = fastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true, |
3717 | 3 | AArch64::sub_32); |
3718 | 7 | } else { |
3719 | 4 | assert(VT == MVT::i64 && "Unexpected value type."); |
3720 | 4 | // LHSReg and RHSReg cannot be killed by this Mul, since they are |
3721 | 4 | // reused in the next instruction. |
3722 | 4 | MulReg = emitMul_rr(VT, LHSReg, /*IsKill=*/false, RHSReg, |
3723 | 4 | /*IsKill=*/false); |
3724 | 4 | unsigned UMULHReg = fastEmit_rr(VT, VT, ISD::MULHU, LHSReg, LHSIsKill, |
3725 | 4 | RHSReg, RHSIsKill); |
3726 | 4 | emitSubs_rr(VT, AArch64::XZR, /*IsKill=*/true, UMULHReg, |
3727 | 4 | /*IsKill=*/false, /*WantResult=*/false); |
3728 | 4 | } |
3729 | 6 | break; |
3730 | 6 | } |
3731 | 48 | } |
3732 | 48 | |
3733 | 48 | if (48 MulReg48 ) { |
3734 | 13 | ResultReg1 = createResultReg(TLI.getRegClassFor(VT)); |
3735 | 13 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, |
3736 | 13 | TII.get(TargetOpcode::COPY), ResultReg1).addReg(MulReg); |
3737 | 13 | } |
3738 | 19 | |
3739 | 19 | ResultReg2 = fastEmitInst_rri(AArch64::CSINCWr, &AArch64::GPR32RegClass, |
3740 | 19 | AArch64::WZR, /*IsKill=*/true, AArch64::WZR, |
3741 | 19 | /*IsKill=*/true, getInvertedCondCode(CC)); |
3742 | 19 | (void)ResultReg2; |
3743 | 19 | assert((ResultReg1 + 1) == ResultReg2 && |
3744 | 19 | "Nonconsecutive result registers."); |
3745 | 19 | updateValueMap(II, ResultReg1, 2); |
3746 | 19 | return true; |
3747 | 19 | } |
3748 | 0 | } |
3749 | 0 | return false; |
3750 | 0 | } |
3751 | | |
3752 | 1.31k | bool AArch64FastISel::selectRet(const Instruction *I) { |
3753 | 1.31k | const ReturnInst *Ret = cast<ReturnInst>(I); |
3754 | 1.31k | const Function &F = *I->getParent()->getParent(); |
3755 | 1.31k | |
3756 | 1.31k | if (!FuncInfo.CanLowerReturn) |
3757 | 0 | return false; |
3758 | 1.31k | |
3759 | 1.31k | if (1.31k F.isVarArg()1.31k ) |
3760 | 1 | return false; |
3761 | 1.31k | |
3762 | 1.31k | if (1.31k TLI.supportSwiftError() && |
3763 | 1.31k | F.getAttributes().hasAttrSomewhere(Attribute::SwiftError)) |
3764 | 11 | return false; |
3765 | 1.30k | |
3766 | 1.30k | if (1.30k TLI.supportSplitCSR(FuncInfo.MF)1.30k ) |
3767 | 3 | return false; |
3768 | 1.30k | |
3769 | 1.30k | // Build a list of return value registers. |
3770 | 1.30k | SmallVector<unsigned, 4> RetRegs; |
3771 | 1.30k | |
3772 | 1.30k | if (Ret->getNumOperands() > 01.30k ) { |
3773 | 982 | CallingConv::ID CC = F.getCallingConv(); |
3774 | 982 | SmallVector<ISD::OutputArg, 4> Outs; |
3775 | 982 | GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI, DL); |
3776 | 982 | |
3777 | 982 | // Analyze operands of the call, assigning locations to each operand. |
3778 | 982 | SmallVector<CCValAssign, 16> ValLocs; |
3779 | 982 | CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext()); |
3780 | 1 | CCAssignFn *RetCC = CC == CallingConv::WebKit_JS ? RetCC_AArch64_WebKit_JS |
3781 | 981 | : RetCC_AArch64_AAPCS; |
3782 | 982 | CCInfo.AnalyzeReturn(Outs, RetCC); |
3783 | 982 | |
3784 | 982 | // Only handle a single return value for now. |
3785 | 982 | if (ValLocs.size() != 1) |
3786 | 10 | return false; |
3787 | 972 | |
3788 | 972 | CCValAssign &VA = ValLocs[0]; |
3789 | 972 | const Value *RV = Ret->getOperand(0); |
3790 | 972 | |
3791 | 972 | // Don't bother handling odd stuff for now. |
3792 | 972 | if ((VA.getLocInfo() != CCValAssign::Full) && |
3793 | 72 | (VA.getLocInfo() != CCValAssign::BCvt)) |
3794 | 0 | return false; |
3795 | 972 | |
3796 | 972 | // Only handle register returns for now. |
3797 | 972 | if (972 !VA.isRegLoc()972 ) |
3798 | 0 | return false; |
3799 | 972 | |
3800 | 972 | unsigned Reg = getRegForValue(RV); |
3801 | 972 | if (Reg == 0) |
3802 | 4 | return false; |
3803 | 968 | |
3804 | 968 | unsigned SrcReg = Reg + VA.getValNo(); |
3805 | 968 | unsigned DestReg = VA.getLocReg(); |
3806 | 968 | // Avoid a cross-class copy. This is very unlikely. |
3807 | 968 | if (!MRI.getRegClass(SrcReg)->contains(DestReg)) |
3808 | 0 | return false; |
3809 | 968 | |
3810 | 968 | EVT RVEVT = TLI.getValueType(DL, RV->getType()); |
3811 | 968 | if (!RVEVT.isSimple()) |
3812 | 0 | return false; |
3813 | 968 | |
3814 | 968 | // Vectors (of > 1 lane) in big endian need tricky handling. |
3815 | 968 | if (968 RVEVT.isVector() && 968 RVEVT.getVectorNumElements() > 198 && |
3816 | 92 | !Subtarget->isLittleEndian()) |
3817 | 60 | return false; |
3818 | 908 | |
3819 | 908 | MVT RVVT = RVEVT.getSimpleVT(); |
3820 | 908 | if (RVVT == MVT::f128) |
3821 | 8 | return false; |
3822 | 900 | |
3823 | 900 | MVT DestVT = VA.getValVT(); |
3824 | 900 | // Special handling for extended integers. |
3825 | 900 | if (RVVT != DestVT900 ) { |
3826 | 163 | if (RVVT != MVT::i1 && 163 RVVT != MVT::i876 && RVVT != MVT::i1644 ) |
3827 | 0 | return false; |
3828 | 163 | |
3829 | 163 | if (163 !Outs[0].Flags.isZExt() && 163 !Outs[0].Flags.isSExt()22 ) |
3830 | 10 | return false; |
3831 | 153 | |
3832 | 153 | bool IsZExt = Outs[0].Flags.isZExt(); |
3833 | 153 | SrcReg = emitIntExt(RVVT, SrcReg, DestVT, IsZExt); |
3834 | 153 | if (SrcReg == 0) |
3835 | 0 | return false; |
3836 | 890 | } |
3837 | 890 | |
3838 | 890 | // Make the copy. |
3839 | 890 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, |
3840 | 890 | TII.get(TargetOpcode::COPY), DestReg).addReg(SrcReg); |
3841 | 890 | |
3842 | 890 | // Add register to return instruction. |
3843 | 890 | RetRegs.push_back(VA.getLocReg()); |
3844 | 890 | } |
3845 | 1.30k | |
3846 | 1.21k | MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, |
3847 | 1.21k | TII.get(AArch64::RET_ReallyLR)); |
3848 | 1.21k | for (unsigned RetReg : RetRegs) |
3849 | 890 | MIB.addReg(RetReg, RegState::Implicit); |
3850 | 1.21k | return true; |
3851 | 1.31k | } |
3852 | | |
3853 | 14 | bool AArch64FastISel::selectTrunc(const Instruction *I) { |
3854 | 14 | Type *DestTy = I->getType(); |
3855 | 14 | Value *Op = I->getOperand(0); |
3856 | 14 | Type *SrcTy = Op->getType(); |
3857 | 14 | |
3858 | 14 | EVT SrcEVT = TLI.getValueType(DL, SrcTy, true); |
3859 | 14 | EVT DestEVT = TLI.getValueType(DL, DestTy, true); |
3860 | 14 | if (!SrcEVT.isSimple()) |
3861 | 0 | return false; |
3862 | 14 | if (14 !DestEVT.isSimple()14 ) |
3863 | 0 | return false; |
3864 | 14 | |
3865 | 14 | MVT SrcVT = SrcEVT.getSimpleVT(); |
3866 | 14 | MVT DestVT = DestEVT.getSimpleVT(); |
3867 | 14 | |
3868 | 14 | if (SrcVT != MVT::i64 && 14 SrcVT != MVT::i328 && SrcVT != MVT::i164 && |
3869 | 2 | SrcVT != MVT::i8) |
3870 | 2 | return false; |
3871 | 12 | if (12 DestVT != MVT::i32 && 12 DestVT != MVT::i1612 && DestVT != MVT::i88 && |
3872 | 5 | DestVT != MVT::i1) |
3873 | 0 | return false; |
3874 | 12 | |
3875 | 12 | unsigned SrcReg = getRegForValue(Op); |
3876 | 12 | if (!SrcReg) |
3877 | 0 | return false; |
3878 | 12 | bool SrcIsKill = hasTrivialKill(Op); |
3879 | 12 | |
3880 | 12 | // If we're truncating from i64 to a smaller non-legal type then generate an |
3881 | 12 | // AND. Otherwise, we know the high bits are undefined and a truncate only |
3882 | 12 | // generate a COPY. We cannot mark the source register also as result |
3883 | 12 | // register, because this can incorrectly transfer the kill flag onto the |
3884 | 12 | // source register. |
3885 | 12 | unsigned ResultReg; |
3886 | 12 | if (SrcVT == MVT::i6412 ) { |
3887 | 6 | uint64_t Mask = 0; |
3888 | 6 | switch (DestVT.SimpleTy) { |
3889 | 0 | default: |
3890 | 0 | // Trunc i64 to i32 is handled by the target-independent fast-isel. |
3891 | 0 | return false; |
3892 | 3 | case MVT::i1: |
3893 | 3 | Mask = 0x1; |
3894 | 3 | break; |
3895 | 2 | case MVT::i8: |
3896 | 2 | Mask = 0xff; |
3897 | 2 | break; |
3898 | 1 | case MVT::i16: |
3899 | 1 | Mask = 0xffff; |
3900 | 1 | break; |
3901 | 6 | } |
3902 | 6 | // Issue an extract_subreg to get the lower 32-bits. |
3903 | 6 | unsigned Reg32 = fastEmitInst_extractsubreg(MVT::i32, SrcReg, SrcIsKill, |
3904 | 6 | AArch64::sub_32); |
3905 | 6 | // Create the AND instruction which performs the actual truncation. |
3906 | 6 | ResultReg = emitAnd_ri(MVT::i32, Reg32, /*IsKill=*/true, Mask); |
3907 | 6 | assert(ResultReg && "Unexpected AND instruction emission failure."); |
3908 | 12 | } else { |
3909 | 6 | ResultReg = createResultReg(&AArch64::GPR32RegClass); |
3910 | 6 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, |
3911 | 6 | TII.get(TargetOpcode::COPY), ResultReg) |
3912 | 6 | .addReg(SrcReg, getKillRegState(SrcIsKill)); |
3913 | 6 | } |
3914 | 12 | |
3915 | 12 | updateValueMap(I, ResultReg); |
3916 | 12 | return true; |
3917 | 14 | } |
3918 | | |
3919 | 141 | unsigned AArch64FastISel::emiti1Ext(unsigned SrcReg, MVT DestVT, bool IsZExt) { |
3920 | 141 | assert((DestVT == MVT::i8 || DestVT == MVT::i16 || DestVT == MVT::i32 || |
3921 | 141 | DestVT == MVT::i64) && |
3922 | 141 | "Unexpected value type."); |
3923 | 141 | // Handle i8 and i16 as i32. |
3924 | 141 | if (DestVT == MVT::i8 || 141 DestVT == MVT::i16140 ) |
3925 | 3 | DestVT = MVT::i32; |
3926 | 141 | |
3927 | 141 | if (IsZExt141 ) { |
3928 | 132 | unsigned ResultReg = emitAnd_ri(MVT::i32, SrcReg, /*TODO:IsKill=*/false, 1); |
3929 | 132 | assert(ResultReg && "Unexpected AND instruction emission failure."); |
3930 | 132 | if (DestVT == MVT::i64132 ) { |
3931 | 0 | // We're ZExt i1 to i64. The ANDWri Wd, Ws, #1 implicitly clears the |
3932 | 0 | // upper 32 bits. Emit a SUBREG_TO_REG to extend from Wd to Xd. |
3933 | 0 | unsigned Reg64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass); |
3934 | 0 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, |
3935 | 0 | TII.get(AArch64::SUBREG_TO_REG), Reg64) |
3936 | 0 | .addImm(0) |
3937 | 0 | .addReg(ResultReg) |
3938 | 0 | .addImm(AArch64::sub_32); |
3939 | 0 | ResultReg = Reg64; |
3940 | 0 | } |
3941 | 132 | return ResultReg; |
3942 | 0 | } else { |
3943 | 9 | if (DestVT == MVT::i649 ) { |
3944 | 0 | // FIXME: We're SExt i1 to i64. |
3945 | 0 | return 0; |
3946 | 0 | } |
3947 | 9 | return fastEmitInst_rii(AArch64::SBFMWri, &AArch64::GPR32RegClass, SrcReg, |
3948 | 9 | /*TODO:IsKill=*/false, 0, 0); |
3949 | 9 | } |
3950 | 141 | } |
3951 | | |
3952 | | unsigned AArch64FastISel::emitMul_rr(MVT RetVT, unsigned Op0, bool Op0IsKill, |
3953 | 21 | unsigned Op1, bool Op1IsKill) { |
3954 | 21 | unsigned Opc, ZReg; |
3955 | 21 | switch (RetVT.SimpleTy) { |
3956 | 0 | default: return 0; |
3957 | 5 | case MVT::i8: |
3958 | 5 | case MVT::i16: |
3959 | 5 | case MVT::i32: |
3960 | 5 | RetVT = MVT::i32; |
3961 | 5 | Opc = AArch64::MADDWrrr; ZReg = AArch64::WZR; break; |
3962 | 16 | case MVT::i64: |
3963 | 16 | Opc = AArch64::MADDXrrr; ZReg = AArch64::XZR; break; |
3964 | 21 | } |
3965 | 21 | |
3966 | 21 | const TargetRegisterClass *RC = |
3967 | 21 | (RetVT == MVT::i64) ? &AArch64::GPR64RegClass16 : &AArch64::GPR32RegClass5 ; |
3968 | 21 | return fastEmitInst_rrr(Opc, RC, Op0, Op0IsKill, Op1, Op1IsKill, |
3969 | 21 | /*IsKill=*/ZReg, true); |
3970 | 21 | } |
3971 | | |
3972 | | unsigned AArch64FastISel::emitSMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill, |
3973 | 3 | unsigned Op1, bool Op1IsKill) { |
3974 | 3 | if (RetVT != MVT::i64) |
3975 | 0 | return 0; |
3976 | 3 | |
3977 | 3 | return fastEmitInst_rrr(AArch64::SMADDLrrr, &AArch64::GPR64RegClass, |
3978 | 3 | Op0, Op0IsKill, Op1, Op1IsKill, |
3979 | 3 | AArch64::XZR, /*IsKill=*/true); |
3980 | 3 | } |
3981 | | |
3982 | | unsigned AArch64FastISel::emitUMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill, |
3983 | 3 | unsigned Op1, bool Op1IsKill) { |
3984 | 3 | if (RetVT != MVT::i64) |
3985 | 0 | return 0; |
3986 | 3 | |
3987 | 3 | return fastEmitInst_rrr(AArch64::UMADDLrrr, &AArch64::GPR64RegClass, |
3988 | 3 | Op0, Op0IsKill, Op1, Op1IsKill, |
3989 | 3 | AArch64::XZR, /*IsKill=*/true); |
3990 | 3 | } |
3991 | | |
3992 | | unsigned AArch64FastISel::emitLSL_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill, |
3993 | 4 | unsigned Op1Reg, bool Op1IsKill) { |
3994 | 4 | unsigned Opc = 0; |
3995 | 4 | bool NeedTrunc = false; |
3996 | 4 | uint64_t Mask = 0; |
3997 | 4 | switch (RetVT.SimpleTy) { |
3998 | 0 | default: return 0; |
3999 | 1 | case MVT::i8: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xff; break; |
4000 | 1 | case MVT::i16: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xffff; break; |
4001 | 1 | case MVT::i32: Opc = AArch64::LSLVWr; break; |
4002 | 1 | case MVT::i64: Opc = AArch64::LSLVXr; break; |
4003 | 4 | } |
4004 | 4 | |
4005 | 4 | const TargetRegisterClass *RC = |
4006 | 4 | (RetVT == MVT::i64) ? &AArch64::GPR64RegClass1 : &AArch64::GPR32RegClass3 ; |
4007 | 4 | if (NeedTrunc4 ) { |
4008 | 2 | Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask); |
4009 | 2 | Op1IsKill = true; |
4010 | 2 | } |
4011 | 4 | unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg, |
4012 | 4 | Op1IsKill); |
4013 | 4 | if (NeedTrunc) |
4014 | 2 | ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask); |
4015 | 4 | return ResultReg; |
4016 | 4 | } |
4017 | | |
4018 | | unsigned AArch64FastISel::emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0, |
4019 | | bool Op0IsKill, uint64_t Shift, |
4020 | 55 | bool IsZExt) { |
4021 | 55 | assert(RetVT.SimpleTy >= SrcVT.SimpleTy && |
4022 | 55 | "Unexpected source/return type pair."); |
4023 | 55 | assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 || |
4024 | 55 | SrcVT == MVT::i32 || SrcVT == MVT::i64) && |
4025 | 55 | "Unexpected source value type."); |
4026 | 55 | assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 || |
4027 | 55 | RetVT == MVT::i64) && "Unexpected return value type."); |
4028 | 55 | |
4029 | 55 | bool Is64Bit = (RetVT == MVT::i64); |
4030 | 55 | unsigned RegSize = Is64Bit ? 6428 : 3227 ; |
4031 | 55 | unsigned DstBits = RetVT.getSizeInBits(); |
4032 | 55 | unsigned SrcBits = SrcVT.getSizeInBits(); |
4033 | 55 | const TargetRegisterClass *RC = |
4034 | 55 | Is64Bit ? &AArch64::GPR64RegClass28 : &AArch64::GPR32RegClass27 ; |
4035 | 55 | |
4036 | 55 | // Just emit a copy for "zero" shifts. |
4037 | 55 | if (Shift == 055 ) { |
4038 | 2 | if (RetVT == SrcVT2 ) { |
4039 | 1 | unsigned ResultReg = createResultReg(RC); |
4040 | 1 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, |
4041 | 1 | TII.get(TargetOpcode::COPY), ResultReg) |
4042 | 1 | .addReg(Op0, getKillRegState(Op0IsKill)); |
4043 | 1 | return ResultReg; |
4044 | 1 | } else |
4045 | 1 | return emitIntExt(SrcVT, Op0, RetVT, IsZExt); |
4046 | 53 | } |
4047 | 53 | |
4048 | 53 | // Don't deal with undefined shifts. |
4049 | 53 | if (53 Shift >= DstBits53 ) |
4050 | 14 | return 0; |
4051 | 39 | |
4052 | 39 | // For immediate shifts we can fold the zero-/sign-extension into the shift. |
4053 | 39 | // {S|U}BFM Wd, Wn, #r, #s |
4054 | 39 | // Wd<32+s-r,32-r> = Wn<s:0> when r > s |
4055 | 39 | |
4056 | 39 | // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 |
4057 | 39 | // %2 = shl i16 %1, 4 |
4058 | 39 | // Wd<32+7-28,32-28> = Wn<7:0> <- clamp s to 7 |
4059 | 39 | // 0b1111_1111_1111_1111__1111_1010_1010_0000 sext |
4060 | 39 | // 0b0000_0000_0000_0000__0000_0101_0101_0000 sext | zext |
4061 | 39 | // 0b0000_0000_0000_0000__0000_1010_1010_0000 zext |
4062 | 39 | |
4063 | 39 | // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 |
4064 | 39 | // %2 = shl i16 %1, 8 |
4065 | 39 | // Wd<32+7-24,32-24> = Wn<7:0> |
4066 | 39 | // 0b1111_1111_1111_1111__1010_1010_0000_0000 sext |
4067 | 39 | // 0b0000_0000_0000_0000__0101_0101_0000_0000 sext | zext |
4068 | 39 | // 0b0000_0000_0000_0000__1010_1010_0000_0000 zext |
4069 | 39 | |
4070 | 39 | // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 |
4071 | 39 | // %2 = shl i16 %1, 12 |
4072 | 39 | // Wd<32+3-20,32-20> = Wn<3:0> |
4073 | 39 | // 0b1111_1111_1111_1111__1010_0000_0000_0000 sext |
4074 | 39 | // 0b0000_0000_0000_0000__0101_0000_0000_0000 sext | zext |
4075 | 39 | // 0b0000_0000_0000_0000__1010_0000_0000_0000 zext |
4076 | 39 | |
4077 | 39 | unsigned ImmR = RegSize - Shift; |
4078 | 39 | // Limit the width to the length of the source type. |
4079 | 39 | unsigned ImmS = std::min<unsigned>(SrcBits - 1, DstBits - 1 - Shift); |
4080 | 39 | static const unsigned OpcTable[2][2] = { |
4081 | 39 | {AArch64::SBFMWri, AArch64::SBFMXri}, |
4082 | 39 | {AArch64::UBFMWri, AArch64::UBFMXri} |
4083 | 39 | }; |
4084 | 39 | unsigned Opc = OpcTable[IsZExt][Is64Bit]; |
4085 | 39 | if (SrcVT.SimpleTy <= MVT::i32 && 39 RetVT == MVT::i6426 ) { |
4086 | 10 | unsigned TmpReg = MRI.createVirtualRegister(RC); |
4087 | 10 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, |
4088 | 10 | TII.get(AArch64::SUBREG_TO_REG), TmpReg) |
4089 | 10 | .addImm(0) |
4090 | 10 | .addReg(Op0, getKillRegState(Op0IsKill)) |
4091 | 10 | .addImm(AArch64::sub_32); |
4092 | 10 | Op0 = TmpReg; |
4093 | 10 | Op0IsKill = true; |
4094 | 10 | } |
4095 | 55 | return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS); |
4096 | 55 | } |
4097 | | |
4098 | | unsigned AArch64FastISel::emitLSR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill, |
4099 | 4 | unsigned Op1Reg, bool Op1IsKill) { |
4100 | 4 | unsigned Opc = 0; |
4101 | 4 | bool NeedTrunc = false; |
4102 | 4 | uint64_t Mask = 0; |
4103 | 4 | switch (RetVT.SimpleTy) { |
4104 | 0 | default: return 0; |
4105 | 1 | case MVT::i8: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xff; break; |
4106 | 1 | case MVT::i16: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xffff; break; |
4107 | 1 | case MVT::i32: Opc = AArch64::LSRVWr; break; |
4108 | 1 | case MVT::i64: Opc = AArch64::LSRVXr; break; |
4109 | 4 | } |
4110 | 4 | |
4111 | 4 | const TargetRegisterClass *RC = |
4112 | 4 | (RetVT == MVT::i64) ? &AArch64::GPR64RegClass1 : &AArch64::GPR32RegClass3 ; |
4113 | 4 | if (NeedTrunc4 ) { |
4114 | 2 | Op0Reg = emitAnd_ri(MVT::i32, Op0Reg, Op0IsKill, Mask); |
4115 | 2 | Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask); |
4116 | 2 | Op0IsKill = Op1IsKill = true; |
4117 | 2 | } |
4118 | 4 | unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg, |
4119 | 4 | Op1IsKill); |
4120 | 4 | if (NeedTrunc) |
4121 | 2 | ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask); |
4122 | 4 | return ResultReg; |
4123 | 4 | } |
4124 | | |
4125 | | unsigned AArch64FastISel::emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0, |
4126 | | bool Op0IsKill, uint64_t Shift, |
4127 | 27 | bool IsZExt) { |
4128 | 27 | assert(RetVT.SimpleTy >= SrcVT.SimpleTy && |
4129 | 27 | "Unexpected source/return type pair."); |
4130 | 27 | assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 || |
4131 | 27 | SrcVT == MVT::i32 || SrcVT == MVT::i64) && |
4132 | 27 | "Unexpected source value type."); |
4133 | 27 | assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 || |
4134 | 27 | RetVT == MVT::i64) && "Unexpected return value type."); |
4135 | 27 | |
4136 | 27 | bool Is64Bit = (RetVT == MVT::i64); |
4137 | 27 | unsigned RegSize = Is64Bit ? 6414 : 3213 ; |
4138 | 27 | unsigned DstBits = RetVT.getSizeInBits(); |
4139 | 27 | unsigned SrcBits = SrcVT.getSizeInBits(); |
4140 | 27 | const TargetRegisterClass *RC = |
4141 | 27 | Is64Bit ? &AArch64::GPR64RegClass14 : &AArch64::GPR32RegClass13 ; |
4142 | 27 | |
4143 | 27 | // Just emit a copy for "zero" shifts. |
4144 | 27 | if (Shift == 027 ) { |
4145 | 2 | if (RetVT == SrcVT2 ) { |
4146 | 1 | unsigned ResultReg = createResultReg(RC); |
4147 | 1 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, |
4148 | 1 | TII.get(TargetOpcode::COPY), ResultReg) |
4149 | 1 | .addReg(Op0, getKillRegState(Op0IsKill)); |
4150 | 1 | return ResultReg; |
4151 | 1 | } else |
4152 | 1 | return emitIntExt(SrcVT, Op0, RetVT, IsZExt); |
4153 | 25 | } |
4154 | 25 | |
4155 | 25 | // Don't deal with undefined shifts. |
4156 | 25 | if (25 Shift >= DstBits25 ) |
4157 | 0 | return 0; |
4158 | 25 | |
4159 | 25 | // For immediate shifts we can fold the zero-/sign-extension into the shift. |
4160 | 25 | // {S|U}BFM Wd, Wn, #r, #s |
4161 | 25 | // Wd<s-r:0> = Wn<s:r> when r <= s |
4162 | 25 | |
4163 | 25 | // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 |
4164 | 25 | // %2 = lshr i16 %1, 4 |
4165 | 25 | // Wd<7-4:0> = Wn<7:4> |
4166 | 25 | // 0b0000_0000_0000_0000__0000_1111_1111_1010 sext |
4167 | 25 | // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext |
4168 | 25 | // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext |
4169 | 25 | |
4170 | 25 | // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 |
4171 | 25 | // %2 = lshr i16 %1, 8 |
4172 | 25 | // Wd<7-7,0> = Wn<7:7> |
4173 | 25 | // 0b0000_0000_0000_0000__0000_0000_1111_1111 sext |
4174 | 25 | // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext |
4175 | 25 | // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext |
4176 | 25 | |
4177 | 25 | // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 |
4178 | 25 | // %2 = lshr i16 %1, 12 |
4179 | 25 | // Wd<7-7,0> = Wn<7:7> <- clamp r to 7 |
4180 | 25 | // 0b0000_0000_0000_0000__0000_0000_0000_1111 sext |
4181 | 25 | // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext |
4182 | 25 | // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext |
4183 | 25 | |
4184 | 25 | if (25 Shift >= SrcBits && 25 IsZExt5 ) |
4185 | 3 | return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT); |
4186 | 22 | |
4187 | 22 | // It is not possible to fold a sign-extend into the LShr instruction. In this |
4188 | 22 | // case emit a sign-extend. |
4189 | 22 | if (22 !IsZExt22 ) { |
4190 | 4 | Op0 = emitIntExt(SrcVT, Op0, RetVT, IsZExt); |
4191 | 4 | if (!Op0) |
4192 | 0 | return 0; |
4193 | 4 | Op0IsKill = true; |
4194 | 4 | SrcVT = RetVT; |
4195 | 4 | SrcBits = SrcVT.getSizeInBits(); |
4196 | 4 | IsZExt = true; |
4197 | 4 | } |
4198 | 22 | |
4199 | 22 | unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift); |
4200 | 22 | unsigned ImmS = SrcBits - 1; |
4201 | 22 | static const unsigned OpcTable[2][2] = { |
4202 | 22 | {AArch64::SBFMWri, AArch64::SBFMXri}, |
4203 | 22 | {AArch64::UBFMWri, AArch64::UBFMXri} |
4204 | 22 | }; |
4205 | 22 | unsigned Opc = OpcTable[IsZExt][Is64Bit]; |
4206 | 22 | if (SrcVT.SimpleTy <= MVT::i32 && 22 RetVT == MVT::i6410 ) { |
4207 | 0 | unsigned TmpReg = MRI.createVirtualRegister(RC); |
4208 | 0 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, |
4209 | 0 | TII.get(AArch64::SUBREG_TO_REG), TmpReg) |
4210 | 0 | .addImm(0) |
4211 | 0 | .addReg(Op0, getKillRegState(Op0IsKill)) |
4212 | 0 | .addImm(AArch64::sub_32); |
4213 | 0 | Op0 = TmpReg; |
4214 | 0 | Op0IsKill = true; |
4215 | 0 | } |
4216 | 22 | return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS); |
4217 | 27 | } |
4218 | | |
4219 | | unsigned AArch64FastISel::emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill, |
4220 | 4 | unsigned Op1Reg, bool Op1IsKill) { |
4221 | 4 | unsigned Opc = 0; |
4222 | 4 | bool NeedTrunc = false; |
4223 | 4 | uint64_t Mask = 0; |
4224 | 4 | switch (RetVT.SimpleTy) { |
4225 | 0 | default: return 0; |
4226 | 1 | case MVT::i8: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xff; break; |
4227 | 1 | case MVT::i16: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xffff; break; |
4228 | 1 | case MVT::i32: Opc = AArch64::ASRVWr; break; |
4229 | 1 | case MVT::i64: Opc = AArch64::ASRVXr; break; |
4230 | 4 | } |
4231 | 4 | |
4232 | 4 | const TargetRegisterClass *RC = |
4233 | 4 | (RetVT == MVT::i64) ? &AArch64::GPR64RegClass1 : &AArch64::GPR32RegClass3 ; |
4234 | 4 | if (NeedTrunc4 ) { |
4235 | 2 | Op0Reg = emitIntExt(RetVT, Op0Reg, MVT::i32, /*IsZExt=*/false); |
4236 | 2 | Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask); |
4237 | 2 | Op0IsKill = Op1IsKill = true; |
4238 | 2 | } |
4239 | 4 | unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg, |
4240 | 4 | Op1IsKill); |
4241 | 4 | if (NeedTrunc) |
4242 | 2 | ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask); |
4243 | 4 | return ResultReg; |
4244 | 4 | } |
4245 | | |
4246 | | unsigned AArch64FastISel::emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0, |
4247 | | bool Op0IsKill, uint64_t Shift, |
4248 | 29 | bool IsZExt) { |
4249 | 29 | assert(RetVT.SimpleTy >= SrcVT.SimpleTy && |
4250 | 29 | "Unexpected source/return type pair."); |
4251 | 29 | assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 || |
4252 | 29 | SrcVT == MVT::i32 || SrcVT == MVT::i64) && |
4253 | 29 | "Unexpected source value type."); |
4254 | 29 | assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 || |
4255 | 29 | RetVT == MVT::i64) && "Unexpected return value type."); |
4256 | 29 | |
4257 | 29 | bool Is64Bit = (RetVT == MVT::i64); |
4258 | 29 | unsigned RegSize = Is64Bit ? 6412 : 3217 ; |
4259 | 29 | unsigned DstBits = RetVT.getSizeInBits(); |
4260 | 29 | unsigned SrcBits = SrcVT.getSizeInBits(); |
4261 | 29 | const TargetRegisterClass *RC = |
4262 | 29 | Is64Bit ? &AArch64::GPR64RegClass12 : &AArch64::GPR32RegClass17 ; |
4263 | 29 | |
4264 | 29 | // Just emit a copy for "zero" shifts. |
4265 | 29 | if (Shift == 029 ) { |
4266 | 2 | if (RetVT == SrcVT2 ) { |
4267 | 1 | unsigned ResultReg = createResultReg(RC); |
4268 | 1 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, |
4269 | 1 | TII.get(TargetOpcode::COPY), ResultReg) |
4270 | 1 | .addReg(Op0, getKillRegState(Op0IsKill)); |
4271 | 1 | return ResultReg; |
4272 | 1 | } else |
4273 | 1 | return emitIntExt(SrcVT, Op0, RetVT, IsZExt); |
4274 | 27 | } |
4275 | 27 | |
4276 | 27 | // Don't deal with undefined shifts. |
4277 | 27 | if (27 Shift >= DstBits27 ) |
4278 | 0 | return 0; |
4279 | 27 | |
4280 | 27 | // For immediate shifts we can fold the zero-/sign-extension into the shift. |
4281 | 27 | // {S|U}BFM Wd, Wn, #r, #s |
4282 | 27 | // Wd<s-r:0> = Wn<s:r> when r <= s |
4283 | 27 | |
4284 | 27 | // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 |
4285 | 27 | // %2 = ashr i16 %1, 4 |
4286 | 27 | // Wd<7-4:0> = Wn<7:4> |
4287 | 27 | // 0b1111_1111_1111_1111__1111_1111_1111_1010 sext |
4288 | 27 | // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext |
4289 | 27 | // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext |
4290 | 27 | |
4291 | 27 | // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 |
4292 | 27 | // %2 = ashr i16 %1, 8 |
4293 | 27 | // Wd<7-7,0> = Wn<7:7> |
4294 | 27 | // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext |
4295 | 27 | // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext |
4296 | 27 | // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext |
4297 | 27 | |
4298 | 27 | // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 |
4299 | 27 | // %2 = ashr i16 %1, 12 |
4300 | 27 | // Wd<7-7,0> = Wn<7:7> <- clamp r to 7 |
4301 | 27 | // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext |
4302 | 27 | // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext |
4303 | 27 | // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext |
4304 | 27 | |
4305 | 27 | if (27 Shift >= SrcBits && 27 IsZExt6 ) |
4306 | 3 | return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT); |
4307 | 24 | |
4308 | 24 | unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift); |
4309 | 24 | unsigned ImmS = SrcBits - 1; |
4310 | 24 | static const unsigned OpcTable[2][2] = { |
4311 | 24 | {AArch64::SBFMWri, AArch64::SBFMXri}, |
4312 | 24 | {AArch64::UBFMWri, AArch64::UBFMXri} |
4313 | 24 | }; |
4314 | 24 | unsigned Opc = OpcTable[IsZExt][Is64Bit]; |
4315 | 24 | if (SrcVT.SimpleTy <= MVT::i32 && 24 RetVT == MVT::i6415 ) { |
4316 | 1 | unsigned TmpReg = MRI.createVirtualRegister(RC); |
4317 | 1 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, |
4318 | 1 | TII.get(AArch64::SUBREG_TO_REG), TmpReg) |
4319 | 1 | .addImm(0) |
4320 | 1 | .addReg(Op0, getKillRegState(Op0IsKill)) |
4321 | 1 | .addImm(AArch64::sub_32); |
4322 | 1 | Op0 = TmpReg; |
4323 | 1 | Op0IsKill = true; |
4324 | 1 | } |
4325 | 29 | return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS); |
4326 | 29 | } |
4327 | | |
4328 | | unsigned AArch64FastISel::emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, |
4329 | 443 | bool IsZExt) { |
4330 | 443 | assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?"); |
4331 | 443 | |
4332 | 443 | // FastISel does not have plumbing to deal with extensions where the SrcVT or |
4333 | 443 | // DestVT are odd things, so test to make sure that they are both types we can |
4334 | 443 | // handle (i1/i8/i16/i32 for SrcVT and i8/i16/i32/i64 for DestVT), otherwise |
4335 | 443 | // bail out to SelectionDAG. |
4336 | 443 | if (((DestVT != MVT::i8) && 443 (DestVT != MVT::i16)442 && |
4337 | 443 | (DestVT != MVT::i32)437 && (DestVT != MVT::i64)74 ) || |
4338 | 443 | ((SrcVT != MVT::i1) && 443 (SrcVT != MVT::i8)302 && |
4339 | 443 | (SrcVT != MVT::i16)143 && (SrcVT != MVT::i32)43 )) |
4340 | 0 | return 0; |
4341 | 443 | |
4342 | 443 | unsigned Opc; |
4343 | 443 | unsigned Imm = 0; |
4344 | 443 | |
4345 | 443 | switch (SrcVT.SimpleTy) { |
4346 | 0 | default: |
4347 | 0 | return 0; |
4348 | 141 | case MVT::i1: |
4349 | 141 | return emiti1Ext(SrcReg, DestVT, IsZExt); |
4350 | 159 | case MVT::i8: |
4351 | 159 | if (DestVT == MVT::i64) |
4352 | 16 | Opc = IsZExt ? 16 AArch64::UBFMXri7 : AArch64::SBFMXri9 ; |
4353 | 159 | else |
4354 | 143 | Opc = IsZExt ? 143 AArch64::UBFMWri102 : AArch64::SBFMWri41 ; |
4355 | 159 | Imm = 7; |
4356 | 159 | break; |
4357 | 100 | case MVT::i16: |
4358 | 100 | if (DestVT == MVT::i64) |
4359 | 15 | Opc = IsZExt ? 15 AArch64::UBFMXri7 : AArch64::SBFMXri8 ; |
4360 | 100 | else |
4361 | 85 | Opc = IsZExt ? 85 AArch64::UBFMWri53 : AArch64::SBFMWri32 ; |
4362 | 100 | Imm = 15; |
4363 | 100 | break; |
4364 | 43 | case MVT::i32: |
4365 | 43 | assert(DestVT == MVT::i64 && "IntExt i32 to i32?!?"); |
4366 | 43 | Opc = IsZExt ? AArch64::UBFMXri19 : AArch64::SBFMXri24 ; |
4367 | 43 | Imm = 31; |
4368 | 43 | break; |
4369 | 302 | } |
4370 | 302 | |
4371 | 302 | // Handle i8 and i16 as i32. |
4372 | 302 | if (302 DestVT == MVT::i8 || 302 DestVT == MVT::i16302 ) |
4373 | 3 | DestVT = MVT::i32; |
4374 | 299 | else if (299 DestVT == MVT::i64299 ) { |
4375 | 74 | unsigned Src64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass); |
4376 | 74 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, |
4377 | 74 | TII.get(AArch64::SUBREG_TO_REG), Src64) |
4378 | 74 | .addImm(0) |
4379 | 74 | .addReg(SrcReg) |
4380 | 74 | .addImm(AArch64::sub_32); |
4381 | 74 | SrcReg = Src64; |
4382 | 74 | } |
4383 | 302 | |
4384 | 302 | const TargetRegisterClass *RC = |
4385 | 302 | (DestVT == MVT::i64) ? &AArch64::GPR64RegClass74 : &AArch64::GPR32RegClass228 ; |
4386 | 443 | return fastEmitInst_rii(Opc, RC, SrcReg, /*TODO:IsKill=*/false, 0, Imm); |
4387 | 443 | } |
4388 | | |
4389 | 1 | static bool isZExtLoad(const MachineInstr *LI) { |
4390 | 1 | switch (LI->getOpcode()) { |
4391 | 0 | default: |
4392 | 0 | return false; |
4393 | 1 | case AArch64::LDURBBi: |
4394 | 1 | case AArch64::LDURHHi: |
4395 | 1 | case AArch64::LDURWi: |
4396 | 1 | case AArch64::LDRBBui: |
4397 | 1 | case AArch64::LDRHHui: |
4398 | 1 | case AArch64::LDRWui: |
4399 | 1 | case AArch64::LDRBBroX: |
4400 | 1 | case AArch64::LDRHHroX: |
4401 | 1 | case AArch64::LDRWroX: |
4402 | 1 | case AArch64::LDRBBroW: |
4403 | 1 | case AArch64::LDRHHroW: |
4404 | 1 | case AArch64::LDRWroW: |
4405 | 1 | return true; |
4406 | 0 | } |
4407 | 0 | } |
4408 | | |
4409 | 0 | static bool isSExtLoad(const MachineInstr *LI) { |
4410 | 0 | switch (LI->getOpcode()) { |
4411 | 0 | default: |
4412 | 0 | return false; |
4413 | 0 | case AArch64::LDURSBWi: |
4414 | 0 | case AArch64::LDURSHWi: |
4415 | 0 | case AArch64::LDURSBXi: |
4416 | 0 | case AArch64::LDURSHXi: |
4417 | 0 | case AArch64::LDURSWi: |
4418 | 0 | case AArch64::LDRSBWui: |
4419 | 0 | case AArch64::LDRSHWui: |
4420 | 0 | case AArch64::LDRSBXui: |
4421 | 0 | case AArch64::LDRSHXui: |
4422 | 0 | case AArch64::LDRSWui: |
4423 | 0 | case AArch64::LDRSBWroX: |
4424 | 0 | case AArch64::LDRSHWroX: |
4425 | 0 | case AArch64::LDRSBXroX: |
4426 | 0 | case AArch64::LDRSHXroX: |
4427 | 0 | case AArch64::LDRSWroX: |
4428 | 0 | case AArch64::LDRSBWroW: |
4429 | 0 | case AArch64::LDRSHWroW: |
4430 | 0 | case AArch64::LDRSBXroW: |
4431 | 0 | case AArch64::LDRSHXroW: |
4432 | 0 | case AArch64::LDRSWroW: |
4433 | 0 | return true; |
4434 | 0 | } |
4435 | 0 | } |
4436 | | |
4437 | | bool AArch64FastISel::optimizeIntExtLoad(const Instruction *I, MVT RetVT, |
4438 | 221 | MVT SrcVT) { |
4439 | 221 | const auto *LI = dyn_cast<LoadInst>(I->getOperand(0)); |
4440 | 221 | if (!LI || 221 !LI->hasOneUse()111 ) |
4441 | 110 | return false; |
4442 | 111 | |
4443 | 111 | // Check if the load instruction has already been selected. |
4444 | 111 | unsigned Reg = lookUpRegForValue(LI); |
4445 | 111 | if (!Reg) |
4446 | 110 | return false; |
4447 | 1 | |
4448 | 1 | MachineInstr *MI = MRI.getUniqueVRegDef(Reg); |
4449 | 1 | if (!MI) |
4450 | 0 | return false; |
4451 | 1 | |
4452 | 1 | // Check if the correct load instruction has been emitted - SelectionDAG might |
4453 | 1 | // have emitted a zero-extending load, but we need a sign-extending load. |
4454 | 1 | bool IsZExt = isa<ZExtInst>(I); |
4455 | 1 | const auto *LoadMI = MI; |
4456 | 1 | if (LoadMI->getOpcode() == TargetOpcode::COPY && |
4457 | 1 | LoadMI->getOperand(1).getSubReg() == AArch64::sub_320 ) { |
4458 | 0 | unsigned LoadReg = MI->getOperand(1).getReg(); |
4459 | 0 | LoadMI = MRI.getUniqueVRegDef(LoadReg); |
4460 | 0 | assert(LoadMI && "Expected valid instruction"); |
4461 | 0 | } |
4462 | 1 | if (!(IsZExt && 1 isZExtLoad(LoadMI)1 ) && !(!IsZExt && 0 isSExtLoad(LoadMI)0 )) |
4463 | 0 | return false; |
4464 | 1 | |
4465 | 1 | // Nothing to be done. |
4466 | 1 | if (1 RetVT != MVT::i64 || 1 SrcVT > MVT::i321 ) { |
4467 | 0 | updateValueMap(I, Reg); |
4468 | 0 | return true; |
4469 | 0 | } |
4470 | 1 | |
4471 | 1 | if (1 IsZExt1 ) { |
4472 | 1 | unsigned Reg64 = createResultReg(&AArch64::GPR64RegClass); |
4473 | 1 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, |
4474 | 1 | TII.get(AArch64::SUBREG_TO_REG), Reg64) |
4475 | 1 | .addImm(0) |
4476 | 1 | .addReg(Reg, getKillRegState(true)) |
4477 | 1 | .addImm(AArch64::sub_32); |
4478 | 1 | Reg = Reg64; |
4479 | 1 | } else { |
4480 | 0 | assert((MI->getOpcode() == TargetOpcode::COPY && |
4481 | 0 | MI->getOperand(1).getSubReg() == AArch64::sub_32) && |
4482 | 0 | "Expected copy instruction"); |
4483 | 0 | Reg = MI->getOperand(1).getReg(); |
4484 | 0 | MI->eraseFromParent(); |
4485 | 0 | } |
4486 | 221 | updateValueMap(I, Reg); |
4487 | 221 | return true; |
4488 | 221 | } |
4489 | | |
4490 | 228 | bool AArch64FastISel::selectIntExt(const Instruction *I) { |
4491 | 228 | assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) && |
4492 | 228 | "Unexpected integer extend instruction."); |
4493 | 228 | MVT RetVT; |
4494 | 228 | MVT SrcVT; |
4495 | 228 | if (!isTypeSupported(I->getType(), RetVT)) |
4496 | 7 | return false; |
4497 | 221 | |
4498 | 221 | if (221 !isTypeSupported(I->getOperand(0)->getType(), SrcVT)221 ) |
4499 | 0 | return false; |
4500 | 221 | |
4501 | 221 | // Try to optimize already sign-/zero-extended values from load instructions. |
4502 | 221 | if (221 optimizeIntExtLoad(I, RetVT, SrcVT)221 ) |
4503 | 1 | return true; |
4504 | 220 | |
4505 | 220 | unsigned SrcReg = getRegForValue(I->getOperand(0)); |
4506 | 220 | if (!SrcReg) |
4507 | 0 | return false; |
4508 | 220 | bool SrcIsKill = hasTrivialKill(I->getOperand(0)); |
4509 | 220 | |
4510 | 220 | // Try to optimize already sign-/zero-extended values from function arguments. |
4511 | 220 | bool IsZExt = isa<ZExtInst>(I); |
4512 | 220 | if (const auto *Arg220 = dyn_cast<Argument>(I->getOperand(0))) { |
4513 | 78 | if ((IsZExt && 78 Arg->hasZExtAttr()38 ) || (!IsZExt && 47 Arg->hasSExtAttr()40 )) { |
4514 | 66 | if (RetVT == MVT::i64 && 66 SrcVT != MVT::i6413 ) { |
4515 | 13 | unsigned ResultReg = createResultReg(&AArch64::GPR64RegClass); |
4516 | 13 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, |
4517 | 13 | TII.get(AArch64::SUBREG_TO_REG), ResultReg) |
4518 | 13 | .addImm(0) |
4519 | 13 | .addReg(SrcReg, getKillRegState(SrcIsKill)) |
4520 | 13 | .addImm(AArch64::sub_32); |
4521 | 13 | SrcReg = ResultReg; |
4522 | 13 | } |
4523 | 66 | // Conservatively clear all kill flags from all uses, because we are |
4524 | 66 | // replacing a sign-/zero-extend instruction at IR level with a nop at MI |
4525 | 66 | // level. The result of the instruction at IR level might have been |
4526 | 66 | // trivially dead, which is now not longer true. |
4527 | 66 | unsigned UseReg = lookUpRegForValue(I); |
4528 | 66 | if (UseReg) |
4529 | 66 | MRI.clearKillFlags(UseReg); |
4530 | 66 | |
4531 | 66 | updateValueMap(I, SrcReg); |
4532 | 66 | return true; |
4533 | 66 | } |
4534 | 154 | } |
4535 | 154 | |
4536 | 154 | unsigned ResultReg = emitIntExt(SrcVT, SrcReg, RetVT, IsZExt); |
4537 | 154 | if (!ResultReg) |
4538 | 0 | return false; |
4539 | 154 | |
4540 | 154 | updateValueMap(I, ResultReg); |
4541 | 154 | return true; |
4542 | 154 | } |
4543 | | |
4544 | 8 | bool AArch64FastISel::selectRem(const Instruction *I, unsigned ISDOpcode) { |
4545 | 8 | EVT DestEVT = TLI.getValueType(DL, I->getType(), true); |
4546 | 8 | if (!DestEVT.isSimple()) |
4547 | 0 | return false; |
4548 | 8 | |
4549 | 8 | MVT DestVT = DestEVT.getSimpleVT(); |
4550 | 8 | if (DestVT != MVT::i64 && 8 DestVT != MVT::i324 ) |
4551 | 0 | return false; |
4552 | 8 | |
4553 | 8 | unsigned DivOpc; |
4554 | 8 | bool Is64bit = (DestVT == MVT::i64); |
4555 | 8 | switch (ISDOpcode) { |
4556 | 0 | default: |
4557 | 0 | return false; |
4558 | 4 | case ISD::SREM: |
4559 | 4 | DivOpc = Is64bit ? AArch64::SDIVXr2 : AArch64::SDIVWr2 ; |
4560 | 4 | break; |
4561 | 4 | case ISD::UREM: |
4562 | 4 | DivOpc = Is64bit ? AArch64::UDIVXr2 : AArch64::UDIVWr2 ; |
4563 | 4 | break; |
4564 | 8 | } |
4565 | 8 | unsigned MSubOpc = Is64bit ? 8 AArch64::MSUBXrrr4 : AArch64::MSUBWrrr4 ; |
4566 | 8 | unsigned Src0Reg = getRegForValue(I->getOperand(0)); |
4567 | 8 | if (!Src0Reg) |
4568 | 0 | return false; |
4569 | 8 | bool Src0IsKill = hasTrivialKill(I->getOperand(0)); |
4570 | 8 | |
4571 | 8 | unsigned Src1Reg = getRegForValue(I->getOperand(1)); |
4572 | 8 | if (!Src1Reg) |
4573 | 0 | return false; |
4574 | 8 | bool Src1IsKill = hasTrivialKill(I->getOperand(1)); |
4575 | 8 | |
4576 | 8 | const TargetRegisterClass *RC = |
4577 | 8 | (DestVT == MVT::i64) ? &AArch64::GPR64RegClass4 : &AArch64::GPR32RegClass4 ; |
4578 | 8 | unsigned QuotReg = fastEmitInst_rr(DivOpc, RC, Src0Reg, /*IsKill=*/false, |
4579 | 8 | Src1Reg, /*IsKill=*/false); |
4580 | 8 | assert(QuotReg && "Unexpected DIV instruction emission failure."); |
4581 | 8 | // The remainder is computed as numerator - (quotient * denominator) using the |
4582 | 8 | // MSUB instruction. |
4583 | 8 | unsigned ResultReg = fastEmitInst_rrr(MSubOpc, RC, QuotReg, /*IsKill=*/true, |
4584 | 8 | Src1Reg, Src1IsKill, Src0Reg, |
4585 | 8 | Src0IsKill); |
4586 | 8 | updateValueMap(I, ResultReg); |
4587 | 8 | return true; |
4588 | 8 | } |
4589 | | |
4590 | 10 | bool AArch64FastISel::selectMul(const Instruction *I) { |
4591 | 10 | MVT VT; |
4592 | 10 | if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true)) |
4593 | 0 | return false; |
4594 | 10 | |
4595 | 10 | if (10 VT.isVector()10 ) |
4596 | 0 | return selectBinaryOp(I, ISD::MUL); |
4597 | 10 | |
4598 | 10 | const Value *Src0 = I->getOperand(0); |
4599 | 10 | const Value *Src1 = I->getOperand(1); |
4600 | 10 | if (const auto *C = dyn_cast<ConstantInt>(Src0)) |
4601 | 0 | if (0 C->getValue().isPowerOf2()0 ) |
4602 | 0 | std::swap(Src0, Src1); |
4603 | 10 | |
4604 | 10 | // Try to simplify to a shift instruction. |
4605 | 10 | if (const auto *C = dyn_cast<ConstantInt>(Src1)) |
4606 | 4 | if (4 C->getValue().isPowerOf2()4 ) { |
4607 | 2 | uint64_t ShiftVal = C->getValue().logBase2(); |
4608 | 2 | MVT SrcVT = VT; |
4609 | 2 | bool IsZExt = true; |
4610 | 2 | if (const auto *ZExt2 = dyn_cast<ZExtInst>(Src0)) { |
4611 | 0 | if (!isIntExtFree(ZExt)0 ) { |
4612 | 0 | MVT VT; |
4613 | 0 | if (isValueAvailable(ZExt) && 0 isTypeSupported(ZExt->getSrcTy(), VT)0 ) { |
4614 | 0 | SrcVT = VT; |
4615 | 0 | IsZExt = true; |
4616 | 0 | Src0 = ZExt->getOperand(0); |
4617 | 0 | } |
4618 | 0 | } |
4619 | 2 | } else if (const auto *2 SExt2 = dyn_cast<SExtInst>(Src0)) { |
4620 | 0 | if (!isIntExtFree(SExt)0 ) { |
4621 | 0 | MVT VT; |
4622 | 0 | if (isValueAvailable(SExt) && 0 isTypeSupported(SExt->getSrcTy(), VT)0 ) { |
4623 | 0 | SrcVT = VT; |
4624 | 0 | IsZExt = false; |
4625 | 0 | Src0 = SExt->getOperand(0); |
4626 | 0 | } |
4627 | 0 | } |
4628 | 2 | } |
4629 | 2 | |
4630 | 2 | unsigned Src0Reg = getRegForValue(Src0); |
4631 | 2 | if (!Src0Reg) |
4632 | 0 | return false; |
4633 | 2 | bool Src0IsKill = hasTrivialKill(Src0); |
4634 | 2 | |
4635 | 2 | unsigned ResultReg = |
4636 | 2 | emitLSL_ri(VT, SrcVT, Src0Reg, Src0IsKill, ShiftVal, IsZExt); |
4637 | 2 | |
4638 | 2 | if (ResultReg2 ) { |
4639 | 2 | updateValueMap(I, ResultReg); |
4640 | 2 | return true; |
4641 | 2 | } |
4642 | 8 | } |
4643 | 8 | |
4644 | 8 | unsigned Src0Reg = getRegForValue(I->getOperand(0)); |
4645 | 8 | if (!Src0Reg) |
4646 | 0 | return false; |
4647 | 8 | bool Src0IsKill = hasTrivialKill(I->getOperand(0)); |
4648 | 8 | |
4649 | 8 | unsigned Src1Reg = getRegForValue(I->getOperand(1)); |
4650 | 8 | if (!Src1Reg) |
4651 | 0 | return false; |
4652 | 8 | bool Src1IsKill = hasTrivialKill(I->getOperand(1)); |
4653 | 8 | |
4654 | 8 | unsigned ResultReg = emitMul_rr(VT, Src0Reg, Src0IsKill, Src1Reg, Src1IsKill); |
4655 | 8 | |
4656 | 8 | if (!ResultReg) |
4657 | 0 | return false; |
4658 | 8 | |
4659 | 8 | updateValueMap(I, ResultReg); |
4660 | 8 | return true; |
4661 | 8 | } |
4662 | | |
4663 | 105 | bool AArch64FastISel::selectShift(const Instruction *I) { |
4664 | 105 | MVT RetVT; |
4665 | 105 | if (!isTypeSupported(I->getType(), RetVT, /*IsVectorAllowed=*/true)) |
4666 | 0 | return false; |
4667 | 105 | |
4668 | 105 | if (105 RetVT.isVector()105 ) |
4669 | 0 | return selectOperator(I, I->getOpcode()); |
4670 | 105 | |
4671 | 105 | if (const auto *105 C105 = dyn_cast<ConstantInt>(I->getOperand(1))) { |
4672 | 93 | unsigned ResultReg = 0; |
4673 | 93 | uint64_t ShiftVal = C->getZExtValue(); |
4674 | 93 | MVT SrcVT = RetVT; |
4675 | 93 | bool IsZExt = I->getOpcode() != Instruction::AShr; |
4676 | 93 | const Value *Op0 = I->getOperand(0); |
4677 | 93 | if (const auto *ZExt93 = dyn_cast<ZExtInst>(Op0)) { |
4678 | 22 | if (!isIntExtFree(ZExt)22 ) { |
4679 | 22 | MVT TmpVT; |
4680 | 22 | if (isValueAvailable(ZExt) && 22 isTypeSupported(ZExt->getSrcTy(), TmpVT)22 ) { |
4681 | 22 | SrcVT = TmpVT; |
4682 | 22 | IsZExt = true; |
4683 | 22 | Op0 = ZExt->getOperand(0); |
4684 | 22 | } |
4685 | 22 | } |
4686 | 93 | } else if (const auto *71 SExt71 = dyn_cast<SExtInst>(Op0)) { |
4687 | 20 | if (!isIntExtFree(SExt)20 ) { |
4688 | 20 | MVT TmpVT; |
4689 | 20 | if (isValueAvailable(SExt) && 20 isTypeSupported(SExt->getSrcTy(), TmpVT)19 ) { |
4690 | 19 | SrcVT = TmpVT; |
4691 | 19 | IsZExt = false; |
4692 | 19 | Op0 = SExt->getOperand(0); |
4693 | 19 | } |
4694 | 20 | } |
4695 | 71 | } |
4696 | 93 | |
4697 | 93 | unsigned Op0Reg = getRegForValue(Op0); |
4698 | 93 | if (!Op0Reg) |
4699 | 0 | return false; |
4700 | 93 | bool Op0IsKill = hasTrivialKill(Op0); |
4701 | 93 | |
4702 | 93 | switch (I->getOpcode()) { |
4703 | 0 | default: 0 llvm_unreachable0 ("Unexpected instruction."); |
4704 | 50 | case Instruction::Shl: |
4705 | 50 | ResultReg = emitLSL_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt); |
4706 | 50 | break; |
4707 | 19 | case Instruction::AShr: |
4708 | 19 | ResultReg = emitASR_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt); |
4709 | 19 | break; |
4710 | 24 | case Instruction::LShr: |
4711 | 24 | ResultReg = emitLSR_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt); |
4712 | 24 | break; |
4713 | 93 | } |
4714 | 93 | if (93 !ResultReg93 ) |
4715 | 14 | return false; |
4716 | 79 | |
4717 | 79 | updateValueMap(I, ResultReg); |
4718 | 79 | return true; |
4719 | 79 | } |
4720 | 12 | |
4721 | 12 | unsigned Op0Reg = getRegForValue(I->getOperand(0)); |
4722 | 12 | if (!Op0Reg) |
4723 | 0 | return false; |
4724 | 12 | bool Op0IsKill = hasTrivialKill(I->getOperand(0)); |
4725 | 12 | |
4726 | 12 | unsigned Op1Reg = getRegForValue(I->getOperand(1)); |
4727 | 12 | if (!Op1Reg) |
4728 | 0 | return false; |
4729 | 12 | bool Op1IsKill = hasTrivialKill(I->getOperand(1)); |
4730 | 12 | |
4731 | 12 | unsigned ResultReg = 0; |
4732 | 12 | switch (I->getOpcode()) { |
4733 | 0 | default: 0 llvm_unreachable0 ("Unexpected instruction."); |
4734 | 4 | case Instruction::Shl: |
4735 | 4 | ResultReg = emitLSL_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill); |
4736 | 4 | break; |
4737 | 4 | case Instruction::AShr: |
4738 | 4 | ResultReg = emitASR_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill); |
4739 | 4 | break; |
4740 | 4 | case Instruction::LShr: |
4741 | 4 | ResultReg = emitLSR_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill); |
4742 | 4 | break; |
4743 | 12 | } |
4744 | 12 | |
4745 | 12 | if (12 !ResultReg12 ) |
4746 | 0 | return false; |
4747 | 12 | |
4748 | 12 | updateValueMap(I, ResultReg); |
4749 | 12 | return true; |
4750 | 12 | } |
4751 | | |
4752 | 22 | bool AArch64FastISel::selectBitCast(const Instruction *I) { |
4753 | 22 | MVT RetVT, SrcVT; |
4754 | 22 | |
4755 | 22 | if (!isTypeLegal(I->getOperand(0)->getType(), SrcVT)) |
4756 | 0 | return false; |
4757 | 22 | if (22 !isTypeLegal(I->getType(), RetVT)22 ) |
4758 | 0 | return false; |
4759 | 22 | |
4760 | 22 | unsigned Opc; |
4761 | 22 | if (RetVT == MVT::f32 && 22 SrcVT == MVT::i321 ) |
4762 | 1 | Opc = AArch64::FMOVWSr; |
4763 | 21 | else if (21 RetVT == MVT::f64 && 21 SrcVT == MVT::i645 ) |
4764 | 3 | Opc = AArch64::FMOVXDr; |
4765 | 18 | else if (18 RetVT == MVT::i32 && 18 SrcVT == MVT::f321 ) |
4766 | 1 | Opc = AArch64::FMOVSWr; |
4767 | 17 | else if (17 RetVT == MVT::i64 && 17 SrcVT == MVT::f6413 ) |
4768 | 3 | Opc = AArch64::FMOVDXr; |
4769 | 17 | else |
4770 | 14 | return false; |
4771 | 8 | |
4772 | 8 | const TargetRegisterClass *RC = nullptr; |
4773 | 8 | switch (RetVT.SimpleTy) { |
4774 | 0 | default: 0 llvm_unreachable0 ("Unexpected value type."); |
4775 | 1 | case MVT::i32: RC = &AArch64::GPR32RegClass; break; |
4776 | 3 | case MVT::i64: RC = &AArch64::GPR64RegClass; break; |
4777 | 1 | case MVT::f32: RC = &AArch64::FPR32RegClass; break; |
4778 | 3 | case MVT::f64: RC = &AArch64::FPR64RegClass; break; |
4779 | 8 | } |
4780 | 8 | unsigned Op0Reg = getRegForValue(I->getOperand(0)); |
4781 | 8 | if (!Op0Reg) |
4782 | 0 | return false; |
4783 | 8 | bool Op0IsKill = hasTrivialKill(I->getOperand(0)); |
4784 | 8 | unsigned ResultReg = fastEmitInst_r(Opc, RC, Op0Reg, Op0IsKill); |
4785 | 8 | |
4786 | 8 | if (!ResultReg) |
4787 | 0 | return false; |
4788 | 8 | |
4789 | 8 | updateValueMap(I, ResultReg); |
4790 | 8 | return true; |
4791 | 8 | } |
4792 | | |
4793 | 4 | bool AArch64FastISel::selectFRem(const Instruction *I) { |
4794 | 4 | MVT RetVT; |
4795 | 4 | if (!isTypeLegal(I->getType(), RetVT)) |
4796 | 0 | return false; |
4797 | 4 | |
4798 | 4 | RTLIB::Libcall LC; |
4799 | 4 | switch (RetVT.SimpleTy) { |
4800 | 0 | default: |
4801 | 0 | return false; |
4802 | 2 | case MVT::f32: |
4803 | 2 | LC = RTLIB::REM_F32; |
4804 | 2 | break; |
4805 | 2 | case MVT::f64: |
4806 | 2 | LC = RTLIB::REM_F64; |
4807 | 2 | break; |
4808 | 4 | } |
4809 | 4 | |
4810 | 4 | ArgListTy Args; |
4811 | 4 | Args.reserve(I->getNumOperands()); |
4812 | 4 | |
4813 | 4 | // Populate the argument list. |
4814 | 8 | for (auto &Arg : I->operands()) { |
4815 | 8 | ArgListEntry Entry; |
4816 | 8 | Entry.Val = Arg; |
4817 | 8 | Entry.Ty = Arg->getType(); |
4818 | 8 | Args.push_back(Entry); |
4819 | 8 | } |
4820 | 4 | |
4821 | 4 | CallLoweringInfo CLI; |
4822 | 4 | MCContext &Ctx = MF->getContext(); |
4823 | 4 | CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), I->getType(), |
4824 | 4 | TLI.getLibcallName(LC), std::move(Args)); |
4825 | 4 | if (!lowerCallTo(CLI)) |
4826 | 0 | return false; |
4827 | 4 | updateValueMap(I, CLI.ResultReg); |
4828 | 4 | return true; |
4829 | 4 | } |
4830 | | |
4831 | 14 | bool AArch64FastISel::selectSDiv(const Instruction *I) { |
4832 | 14 | MVT VT; |
4833 | 14 | if (!isTypeLegal(I->getType(), VT)) |
4834 | 0 | return false; |
4835 | 14 | |
4836 | 14 | if (14 !isa<ConstantInt>(I->getOperand(1))14 ) |
4837 | 0 | return selectBinaryOp(I, ISD::SDIV); |
4838 | 14 | |
4839 | 14 | const APInt &C = cast<ConstantInt>(I->getOperand(1))->getValue(); |
4840 | 14 | if ((VT != MVT::i32 && 14 VT != MVT::i648 ) || !C14 || |
4841 | 14 | !(C.isPowerOf2() || 14 (-C).isPowerOf2()4 )) |
4842 | 0 | return selectBinaryOp(I, ISD::SDIV); |
4843 | 14 | |
4844 | 14 | unsigned Lg2 = C.countTrailingZeros(); |
4845 | 14 | unsigned Src0Reg = getRegForValue(I->getOperand(0)); |
4846 | 14 | if (!Src0Reg) |
4847 | 0 | return false; |
4848 | 14 | bool Src0IsKill = hasTrivialKill(I->getOperand(0)); |
4849 | 14 | |
4850 | 14 | if (cast<BinaryOperator>(I)->isExact()14 ) { |
4851 | 3 | unsigned ResultReg = emitASR_ri(VT, VT, Src0Reg, Src0IsKill, Lg2); |
4852 | 3 | if (!ResultReg) |
4853 | 0 | return false; |
4854 | 3 | updateValueMap(I, ResultReg); |
4855 | 3 | return true; |
4856 | 3 | } |
4857 | 11 | |
4858 | 11 | int64_t Pow2MinusOne = (1ULL << Lg2) - 1; |
4859 | 11 | unsigned AddReg = emitAdd_ri_(VT, Src0Reg, /*IsKill=*/false, Pow2MinusOne); |
4860 | 11 | if (!AddReg) |
4861 | 0 | return false; |
4862 | 11 | |
4863 | 11 | // (Src0 < 0) ? Pow2 - 1 : 0; |
4864 | 11 | if (11 !emitICmp_ri(VT, Src0Reg, /*IsKill=*/false, 0)11 ) |
4865 | 0 | return false; |
4866 | 11 | |
4867 | 11 | unsigned SelectOpc; |
4868 | 11 | const TargetRegisterClass *RC; |
4869 | 11 | if (VT == MVT::i6411 ) { |
4870 | 6 | SelectOpc = AArch64::CSELXr; |
4871 | 6 | RC = &AArch64::GPR64RegClass; |
4872 | 11 | } else { |
4873 | 5 | SelectOpc = AArch64::CSELWr; |
4874 | 5 | RC = &AArch64::GPR32RegClass; |
4875 | 5 | } |
4876 | 11 | unsigned SelectReg = |
4877 | 11 | fastEmitInst_rri(SelectOpc, RC, AddReg, /*IsKill=*/true, Src0Reg, |
4878 | 11 | Src0IsKill, AArch64CC::LT); |
4879 | 11 | if (!SelectReg) |
4880 | 0 | return false; |
4881 | 11 | |
4882 | 11 | // Divide by Pow2 --> ashr. If we're dividing by a negative value we must also |
4883 | 11 | // negate the result. |
4884 | 11 | unsigned ZeroReg = (VT == MVT::i64) ? 11 AArch64::XZR6 : AArch64::WZR5 ; |
4885 | 11 | unsigned ResultReg; |
4886 | 11 | if (C.isNegative()) |
4887 | 4 | ResultReg = emitAddSub_rs(/*UseAdd=*/false, VT, ZeroReg, /*IsKill=*/true, |
4888 | 4 | SelectReg, /*IsKill=*/true, AArch64_AM::ASR, Lg2); |
4889 | 11 | else |
4890 | 7 | ResultReg = emitASR_ri(VT, VT, SelectReg, /*IsKill=*/true, Lg2); |
4891 | 11 | |
4892 | 11 | if (!ResultReg) |
4893 | 0 | return false; |
4894 | 11 | |
4895 | 11 | updateValueMap(I, ResultReg); |
4896 | 11 | return true; |
4897 | 11 | } |
4898 | | |
4899 | | /// This is mostly a copy of the existing FastISel getRegForGEPIndex code. We |
4900 | | /// have to duplicate it for AArch64, because otherwise we would fail during the |
4901 | | /// sign-extend emission. |
4902 | 17 | std::pair<unsigned, bool> AArch64FastISel::getRegForGEPIndex(const Value *Idx) { |
4903 | 17 | unsigned IdxN = getRegForValue(Idx); |
4904 | 17 | if (IdxN == 0) |
4905 | 17 | // Unhandled operand. Halt "fast" selection and bail. |
4906 | 0 | return std::pair<unsigned, bool>(0, false); |
4907 | 17 | |
4908 | 17 | bool IdxNIsKill = hasTrivialKill(Idx); |
4909 | 17 | |
4910 | 17 | // If the index is smaller or larger than intptr_t, truncate or extend it. |
4911 | 17 | MVT PtrVT = TLI.getPointerTy(DL); |
4912 | 17 | EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false); |
4913 | 17 | if (IdxVT.bitsLT(PtrVT)17 ) { |
4914 | 2 | IdxN = emitIntExt(IdxVT.getSimpleVT(), IdxN, PtrVT, /*IsZExt=*/false); |
4915 | 2 | IdxNIsKill = true; |
4916 | 17 | } else if (15 IdxVT.bitsGT(PtrVT)15 ) |
4917 | 0 | llvm_unreachable("AArch64 FastISel doesn't support types larger than i64"); |
4918 | 17 | return std::pair<unsigned, bool>(IdxN, IdxNIsKill); |
4919 | 17 | } |
4920 | | |
4921 | | /// This is mostly a copy of the existing FastISel GEP code, but we have to |
4922 | | /// duplicate it for AArch64, because otherwise we would bail out even for |
4923 | | /// simple cases. This is because the standard fastEmit functions don't cover |
4924 | | /// MUL at all and ADD is lowered very inefficientily. |
4925 | 32 | bool AArch64FastISel::selectGetElementPtr(const Instruction *I) { |
4926 | 32 | unsigned N = getRegForValue(I->getOperand(0)); |
4927 | 32 | if (!N) |
4928 | 0 | return false; |
4929 | 32 | bool NIsKill = hasTrivialKill(I->getOperand(0)); |
4930 | 32 | |
4931 | 32 | // Keep a running tab of the total offset to coalesce multiple N = N + Offset |
4932 | 32 | // into a single N = N + TotalOffset. |
4933 | 32 | uint64_t TotalOffs = 0; |
4934 | 32 | MVT VT = TLI.getPointerTy(DL); |
4935 | 32 | for (gep_type_iterator GTI = gep_type_begin(I), E = gep_type_end(I); |
4936 | 80 | GTI != E80 ; ++GTI48 ) { |
4937 | 48 | const Value *Idx = GTI.getOperand(); |
4938 | 48 | if (auto *StTy48 = GTI.getStructTypeOrNull()) { |
4939 | 4 | unsigned Field = cast<ConstantInt>(Idx)->getZExtValue(); |
4940 | 4 | // N = N + Offset |
4941 | 4 | if (Field) |
4942 | 2 | TotalOffs += DL.getStructLayout(StTy)->getElementOffset(Field); |
4943 | 48 | } else { |
4944 | 44 | Type *Ty = GTI.getIndexedType(); |
4945 | 44 | |
4946 | 44 | // If this is a constant subscript, handle it quickly. |
4947 | 44 | if (const auto *CI44 = dyn_cast<ConstantInt>(Idx)) { |
4948 | 27 | if (CI->isZero()) |
4949 | 16 | continue; |
4950 | 11 | // N = N + Offset |
4951 | 11 | TotalOffs += |
4952 | 11 | DL.getTypeAllocSize(Ty) * cast<ConstantInt>(CI)->getSExtValue(); |
4953 | 11 | continue; |
4954 | 11 | } |
4955 | 17 | if (17 TotalOffs17 ) { |
4956 | 0 | N = emitAdd_ri_(VT, N, NIsKill, TotalOffs); |
4957 | 0 | if (!N) |
4958 | 0 | return false; |
4959 | 0 | NIsKill = true; |
4960 | 0 | TotalOffs = 0; |
4961 | 0 | } |
4962 | 17 | |
4963 | 17 | // N = N + Idx * ElementSize; |
4964 | 17 | uint64_t ElementSize = DL.getTypeAllocSize(Ty); |
4965 | 17 | std::pair<unsigned, bool> Pair = getRegForGEPIndex(Idx); |
4966 | 17 | unsigned IdxN = Pair.first; |
4967 | 17 | bool IdxNIsKill = Pair.second; |
4968 | 17 | if (!IdxN) |
4969 | 0 | return false; |
4970 | 17 | |
4971 | 17 | if (17 ElementSize != 117 ) { |
4972 | 6 | unsigned C = fastEmit_i(VT, VT, ISD::Constant, ElementSize); |
4973 | 6 | if (!C) |
4974 | 0 | return false; |
4975 | 6 | IdxN = emitMul_rr(VT, IdxN, IdxNIsKill, C, true); |
4976 | 6 | if (!IdxN) |
4977 | 0 | return false; |
4978 | 6 | IdxNIsKill = true; |
4979 | 6 | } |
4980 | 17 | N = fastEmit_rr(VT, VT, ISD::ADD, N, NIsKill, IdxN, IdxNIsKill); |
4981 | 17 | if (!N) |
4982 | 0 | return false; |
4983 | 44 | } |
4984 | 48 | } |
4985 | 32 | if (32 TotalOffs32 ) { |
4986 | 13 | N = emitAdd_ri_(VT, N, NIsKill, TotalOffs); |
4987 | 13 | if (!N) |
4988 | 0 | return false; |
4989 | 32 | } |
4990 | 32 | updateValueMap(I, N); |
4991 | 32 | return true; |
4992 | 32 | } |
4993 | | |
4994 | 3 | bool AArch64FastISel::selectAtomicCmpXchg(const AtomicCmpXchgInst *I) { |
4995 | 3 | assert(TM.getOptLevel() == CodeGenOpt::None && |
4996 | 3 | "cmpxchg survived AtomicExpand at optlevel > -O0"); |
4997 | 3 | |
4998 | 3 | auto *RetPairTy = cast<StructType>(I->getType()); |
4999 | 3 | Type *RetTy = RetPairTy->getTypeAtIndex(0U); |
5000 | 3 | assert(RetPairTy->getTypeAtIndex(1U)->isIntegerTy(1) && |
5001 | 3 | "cmpxchg has a non-i1 status result"); |
5002 | 3 | |
5003 | 3 | MVT VT; |
5004 | 3 | if (!isTypeLegal(RetTy, VT)) |
5005 | 0 | return false; |
5006 | 3 | |
5007 | 3 | const TargetRegisterClass *ResRC; |
5008 | 3 | unsigned Opc, CmpOpc; |
5009 | 3 | // This only supports i32/i64, because i8/i16 aren't legal, and the generic |
5010 | 3 | // extractvalue selection doesn't support that. |
5011 | 3 | if (VT == MVT::i323 ) { |
5012 | 2 | Opc = AArch64::CMP_SWAP_32; |
5013 | 2 | CmpOpc = AArch64::SUBSWrs; |
5014 | 2 | ResRC = &AArch64::GPR32RegClass; |
5015 | 3 | } else if (1 VT == MVT::i641 ) { |
5016 | 1 | Opc = AArch64::CMP_SWAP_64; |
5017 | 1 | CmpOpc = AArch64::SUBSXrs; |
5018 | 1 | ResRC = &AArch64::GPR64RegClass; |
5019 | 1 | } else { |
5020 | 0 | return false; |
5021 | 0 | } |
5022 | 3 | |
5023 | 3 | const MCInstrDesc &II = TII.get(Opc); |
5024 | 3 | |
5025 | 3 | const unsigned AddrReg = constrainOperandRegClass( |
5026 | 3 | II, getRegForValue(I->getPointerOperand()), II.getNumDefs()); |
5027 | 3 | const unsigned DesiredReg = constrainOperandRegClass( |
5028 | 3 | II, getRegForValue(I->getCompareOperand()), II.getNumDefs() + 1); |
5029 | 3 | const unsigned NewReg = constrainOperandRegClass( |
5030 | 3 | II, getRegForValue(I->getNewValOperand()), II.getNumDefs() + 2); |
5031 | 3 | |
5032 | 3 | const unsigned ResultReg1 = createResultReg(ResRC); |
5033 | 3 | const unsigned ResultReg2 = createResultReg(&AArch64::GPR32RegClass); |
5034 | 3 | const unsigned ScratchReg = createResultReg(&AArch64::GPR32RegClass); |
5035 | 3 | |
5036 | 3 | // FIXME: MachineMemOperand doesn't support cmpxchg yet. |
5037 | 3 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) |
5038 | 3 | .addDef(ResultReg1) |
5039 | 3 | .addDef(ScratchReg) |
5040 | 3 | .addUse(AddrReg) |
5041 | 3 | .addUse(DesiredReg) |
5042 | 3 | .addUse(NewReg); |
5043 | 3 | |
5044 | 3 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc)) |
5045 | 3 | .addDef(VT == MVT::i32 ? AArch64::WZR2 : AArch64::XZR1 ) |
5046 | 3 | .addUse(ResultReg1) |
5047 | 3 | .addUse(DesiredReg) |
5048 | 3 | .addImm(0); |
5049 | 3 | |
5050 | 3 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr)) |
5051 | 3 | .addDef(ResultReg2) |
5052 | 3 | .addUse(AArch64::WZR) |
5053 | 3 | .addUse(AArch64::WZR) |
5054 | 3 | .addImm(AArch64CC::NE); |
5055 | 3 | |
5056 | 3 | assert((ResultReg1 + 1) == ResultReg2 && "Nonconsecutive result registers."); |
5057 | 3 | updateValueMap(I, ResultReg1, 2); |
5058 | 3 | return true; |
5059 | 3 | } |
5060 | | |
5061 | 4.17k | bool AArch64FastISel::fastSelectInstruction(const Instruction *I) { |
5062 | 4.17k | switch (I->getOpcode()) { |
5063 | 636 | default: |
5064 | 636 | break; |
5065 | 274 | case Instruction::Add: |
5066 | 274 | case Instruction::Sub: |
5067 | 274 | return selectAddSub(I); |
5068 | 10 | case Instruction::Mul: |
5069 | 10 | return selectMul(I); |
5070 | 14 | case Instruction::SDiv: |
5071 | 14 | return selectSDiv(I); |
5072 | 4 | case Instruction::SRem: |
5073 | 4 | if (!selectBinaryOp(I, ISD::SREM)) |
5074 | 4 | return selectRem(I, ISD::SREM); |
5075 | 0 | return true; |
5076 | 4 | case Instruction::URem: |
5077 | 4 | if (!selectBinaryOp(I, ISD::UREM)) |
5078 | 4 | return selectRem(I, ISD::UREM); |
5079 | 0 | return true; |
5080 | 105 | case Instruction::Shl: |
5081 | 105 | case Instruction::LShr: |
5082 | 105 | case Instruction::AShr: |
5083 | 105 | return selectShift(I); |
5084 | 89 | case Instruction::And: |
5085 | 89 | case Instruction::Or: |
5086 | 89 | case Instruction::Xor: |
5087 | 89 | return selectLogicalOp(I); |
5088 | 300 | case Instruction::Br: |
5089 | 300 | return selectBranch(I); |
5090 | 1 | case Instruction::IndirectBr: |
5091 | 1 | return selectIndirectBr(I); |
5092 | 50 | case Instruction::BitCast: |
5093 | 50 | if (!FastISel::selectBitCast(I)) |
5094 | 22 | return selectBitCast(I); |
5095 | 28 | return true; |
5096 | 12 | case Instruction::FPToSI: |
5097 | 12 | if (!selectCast(I, ISD::FP_TO_SINT)) |
5098 | 1 | return selectFPToInt(I, /*Signed=*/true); |
5099 | 11 | return true; |
5100 | 18 | case Instruction::FPToUI: |
5101 | 18 | return selectFPToInt(I, /*Signed=*/false); |
5102 | 228 | case Instruction::ZExt: |
5103 | 228 | case Instruction::SExt: |
5104 | 228 | return selectIntExt(I); |
5105 | 26 | case Instruction::Trunc: |
5106 | 26 | if (!selectCast(I, ISD::TRUNCATE)) |
5107 | 14 | return selectTrunc(I); |
5108 | 12 | return true; |
5109 | 7 | case Instruction::FPExt: |
5110 | 7 | return selectFPExt(I); |
5111 | 2 | case Instruction::FPTrunc: |
5112 | 2 | return selectFPTrunc(I); |
5113 | 25 | case Instruction::SIToFP: |
5114 | 25 | if (!selectCast(I, ISD::SINT_TO_FP)) |
5115 | 9 | return selectIntToFP(I, /*Signed=*/true); |
5116 | 16 | return true; |
5117 | 21 | case Instruction::UIToFP: |
5118 | 21 | return selectIntToFP(I, /*Signed=*/false); |
5119 | 408 | case Instruction::Load: |
5120 | 408 | return selectLoad(I); |
5121 | 472 | case Instruction::Store: |
5122 | 472 | return selectStore(I); |
5123 | 57 | case Instruction::FCmp: |
5124 | 57 | case Instruction::ICmp: |
5125 | 57 | return selectCmp(I); |
5126 | 53 | case Instruction::Select: |
5127 | 53 | return selectSelect(I); |
5128 | 1.31k | case Instruction::Ret: |
5129 | 1.31k | return selectRet(I); |
5130 | 4 | case Instruction::FRem: |
5131 | 4 | return selectFRem(I); |
5132 | 32 | case Instruction::GetElementPtr: |
5133 | 32 | return selectGetElementPtr(I); |
5134 | 3 | case Instruction::AtomicCmpXchg: |
5135 | 3 | return selectAtomicCmpXchg(cast<AtomicCmpXchgInst>(I)); |
5136 | 636 | } |
5137 | 636 | |
5138 | 636 | // fall-back to target-independent instruction selection. |
5139 | 636 | return selectOperator(I, I->getOpcode()); |
5140 | 636 | // Silence warnings. |
5141 | 0 | (void)&CC_AArch64_DarwinPCS_VarArg; |
5142 | 0 | (void)&CC_AArch64_Win64_VarArg; |
5143 | 0 | } |
5144 | | |
5145 | | namespace llvm { |
5146 | | |
5147 | | FastISel *AArch64::createFastISel(FunctionLoweringInfo &FuncInfo, |
5148 | 1.23k | const TargetLibraryInfo *LibInfo) { |
5149 | 1.23k | return new AArch64FastISel(FuncInfo, LibInfo); |
5150 | 1.23k | } |
5151 | | |
5152 | | } // end namespace llvm |