/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/Target/AArch64/AArch64FastISel.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===- AArch6464FastISel.cpp - AArch64 FastISel implementation ------------===// |
2 | | // |
3 | | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | | // See https://llvm.org/LICENSE.txt for license information. |
5 | | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | | // |
7 | | //===----------------------------------------------------------------------===// |
8 | | // |
9 | | // This file defines the AArch64-specific support for the FastISel class. Some |
10 | | // of the target-specific code is generated by tablegen in the file |
11 | | // AArch64GenFastISel.inc, which is #included here. |
12 | | // |
13 | | //===----------------------------------------------------------------------===// |
14 | | |
15 | | #include "AArch64.h" |
16 | | #include "AArch64CallingConvention.h" |
17 | | #include "AArch64RegisterInfo.h" |
18 | | #include "AArch64Subtarget.h" |
19 | | #include "MCTargetDesc/AArch64AddressingModes.h" |
20 | | #include "Utils/AArch64BaseInfo.h" |
21 | | #include "llvm/ADT/APFloat.h" |
22 | | #include "llvm/ADT/APInt.h" |
23 | | #include "llvm/ADT/DenseMap.h" |
24 | | #include "llvm/ADT/SmallVector.h" |
25 | | #include "llvm/Analysis/BranchProbabilityInfo.h" |
26 | | #include "llvm/CodeGen/CallingConvLower.h" |
27 | | #include "llvm/CodeGen/FastISel.h" |
28 | | #include "llvm/CodeGen/FunctionLoweringInfo.h" |
29 | | #include "llvm/CodeGen/ISDOpcodes.h" |
30 | | #include "llvm/CodeGen/MachineBasicBlock.h" |
31 | | #include "llvm/CodeGen/MachineConstantPool.h" |
32 | | #include "llvm/CodeGen/MachineFrameInfo.h" |
33 | | #include "llvm/CodeGen/MachineInstr.h" |
34 | | #include "llvm/CodeGen/MachineInstrBuilder.h" |
35 | | #include "llvm/CodeGen/MachineMemOperand.h" |
36 | | #include "llvm/CodeGen/MachineRegisterInfo.h" |
37 | | #include "llvm/CodeGen/RuntimeLibcalls.h" |
38 | | #include "llvm/CodeGen/ValueTypes.h" |
39 | | #include "llvm/IR/Argument.h" |
40 | | #include "llvm/IR/Attributes.h" |
41 | | #include "llvm/IR/BasicBlock.h" |
42 | | #include "llvm/IR/CallingConv.h" |
43 | | #include "llvm/IR/Constant.h" |
44 | | #include "llvm/IR/Constants.h" |
45 | | #include "llvm/IR/DataLayout.h" |
46 | | #include "llvm/IR/DerivedTypes.h" |
47 | | #include "llvm/IR/Function.h" |
48 | | #include "llvm/IR/GetElementPtrTypeIterator.h" |
49 | | #include "llvm/IR/GlobalValue.h" |
50 | | #include "llvm/IR/InstrTypes.h" |
51 | | #include "llvm/IR/Instruction.h" |
52 | | #include "llvm/IR/Instructions.h" |
53 | | #include "llvm/IR/IntrinsicInst.h" |
54 | | #include "llvm/IR/Intrinsics.h" |
55 | | #include "llvm/IR/Operator.h" |
56 | | #include "llvm/IR/Type.h" |
57 | | #include "llvm/IR/User.h" |
58 | | #include "llvm/IR/Value.h" |
59 | | #include "llvm/MC/MCInstrDesc.h" |
60 | | #include "llvm/MC/MCRegisterInfo.h" |
61 | | #include "llvm/MC/MCSymbol.h" |
62 | | #include "llvm/Support/AtomicOrdering.h" |
63 | | #include "llvm/Support/Casting.h" |
64 | | #include "llvm/Support/CodeGen.h" |
65 | | #include "llvm/Support/Compiler.h" |
66 | | #include "llvm/Support/ErrorHandling.h" |
67 | | #include "llvm/Support/MachineValueType.h" |
68 | | #include "llvm/Support/MathExtras.h" |
69 | | #include <algorithm> |
70 | | #include <cassert> |
71 | | #include <cstdint> |
72 | | #include <iterator> |
73 | | #include <utility> |
74 | | |
75 | | using namespace llvm; |
76 | | |
77 | | namespace { |
78 | | |
79 | | class AArch64FastISel final : public FastISel { |
80 | | class Address { |
81 | | public: |
82 | | using BaseKind = enum { |
83 | | RegBase, |
84 | | FrameIndexBase |
85 | | }; |
86 | | |
87 | | private: |
88 | | BaseKind Kind = RegBase; |
89 | | AArch64_AM::ShiftExtendType ExtType = AArch64_AM::InvalidShiftExtend; |
90 | | union { |
91 | | unsigned Reg; |
92 | | int FI; |
93 | | } Base; |
94 | | unsigned OffsetReg = 0; |
95 | | unsigned Shift = 0; |
96 | | int64_t Offset = 0; |
97 | | const GlobalValue *GV = nullptr; |
98 | | |
99 | | public: |
100 | 941 | Address() { Base.Reg = 0; } |
101 | | |
102 | 219 | void setKind(BaseKind K) { Kind = K; } |
103 | 0 | BaseKind getKind() const { return Kind; } |
104 | 83 | void setExtendType(AArch64_AM::ShiftExtendType E) { ExtType = E; } |
105 | 1.34k | AArch64_AM::ShiftExtendType getExtendType() const { return ExtType; } |
106 | 1.82k | bool isRegBase() const { return Kind == RegBase; } |
107 | 699 | bool isFIBase() const { return Kind == FrameIndexBase; } |
108 | | |
109 | 1.04k | void setReg(unsigned Reg) { |
110 | 1.04k | assert(isRegBase() && "Invalid base register access!"); |
111 | 1.04k | Base.Reg = Reg; |
112 | 1.04k | } |
113 | | |
114 | 2.05k | unsigned getReg() const { |
115 | 2.05k | assert(isRegBase() && "Invalid base register access!"); |
116 | 2.05k | return Base.Reg; |
117 | 2.05k | } |
118 | | |
119 | 506 | void setOffsetReg(unsigned Reg) { |
120 | 506 | OffsetReg = Reg; |
121 | 506 | } |
122 | | |
123 | 2.46k | unsigned getOffsetReg() const { |
124 | 2.46k | return OffsetReg; |
125 | 2.46k | } |
126 | | |
127 | 184 | void setFI(unsigned FI) { |
128 | 184 | assert(isFIBase() && "Invalid base frame index access!"); |
129 | 184 | Base.FI = FI; |
130 | 184 | } |
131 | | |
132 | 192 | unsigned getFI() const { |
133 | 192 | assert(isFIBase() && "Invalid base frame index access!"); |
134 | 192 | return Base.FI; |
135 | 192 | } |
136 | | |
137 | 213 | void setOffset(int64_t O) { Offset = O; } |
138 | 3.63k | int64_t getOffset() { return Offset; } |
139 | 62 | void setShift(unsigned S) { Shift = S; } |
140 | 82 | unsigned getShift() { return Shift; } |
141 | | |
142 | 96 | void setGlobalValue(const GlobalValue *G) { GV = G; } |
143 | 206 | const GlobalValue *getGlobalValue() { return GV; } |
144 | | }; |
145 | | |
146 | | /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can |
147 | | /// make the right decision when generating code for different targets. |
148 | | const AArch64Subtarget *Subtarget; |
149 | | LLVMContext *Context; |
150 | | |
151 | | bool fastLowerArguments() override; |
152 | | bool fastLowerCall(CallLoweringInfo &CLI) override; |
153 | | bool fastLowerIntrinsicCall(const IntrinsicInst *II) override; |
154 | | |
155 | | private: |
156 | | // Selection routines. |
157 | | bool selectAddSub(const Instruction *I); |
158 | | bool selectLogicalOp(const Instruction *I); |
159 | | bool selectLoad(const Instruction *I); |
160 | | bool selectStore(const Instruction *I); |
161 | | bool selectBranch(const Instruction *I); |
162 | | bool selectIndirectBr(const Instruction *I); |
163 | | bool selectCmp(const Instruction *I); |
164 | | bool selectSelect(const Instruction *I); |
165 | | bool selectFPExt(const Instruction *I); |
166 | | bool selectFPTrunc(const Instruction *I); |
167 | | bool selectFPToInt(const Instruction *I, bool Signed); |
168 | | bool selectIntToFP(const Instruction *I, bool Signed); |
169 | | bool selectRem(const Instruction *I, unsigned ISDOpcode); |
170 | | bool selectRet(const Instruction *I); |
171 | | bool selectTrunc(const Instruction *I); |
172 | | bool selectIntExt(const Instruction *I); |
173 | | bool selectMul(const Instruction *I); |
174 | | bool selectShift(const Instruction *I); |
175 | | bool selectBitCast(const Instruction *I); |
176 | | bool selectFRem(const Instruction *I); |
177 | | bool selectSDiv(const Instruction *I); |
178 | | bool selectGetElementPtr(const Instruction *I); |
179 | | bool selectAtomicCmpXchg(const AtomicCmpXchgInst *I); |
180 | | |
181 | | // Utility helper routines. |
182 | | bool isTypeLegal(Type *Ty, MVT &VT); |
183 | | bool isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed = false); |
184 | | bool isValueAvailable(const Value *V) const; |
185 | | bool computeAddress(const Value *Obj, Address &Addr, Type *Ty = nullptr); |
186 | | bool computeCallAddress(const Value *V, Address &Addr); |
187 | | bool simplifyAddress(Address &Addr, MVT VT); |
188 | | void addLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB, |
189 | | MachineMemOperand::Flags Flags, |
190 | | unsigned ScaleFactor, MachineMemOperand *MMO); |
191 | | bool isMemCpySmall(uint64_t Len, unsigned Alignment); |
192 | | bool tryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len, |
193 | | unsigned Alignment); |
194 | | bool foldXALUIntrinsic(AArch64CC::CondCode &CC, const Instruction *I, |
195 | | const Value *Cond); |
196 | | bool optimizeIntExtLoad(const Instruction *I, MVT RetVT, MVT SrcVT); |
197 | | bool optimizeSelect(const SelectInst *SI); |
198 | | std::pair<unsigned, bool> getRegForGEPIndex(const Value *Idx); |
199 | | |
200 | | // Emit helper routines. |
201 | | unsigned emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS, |
202 | | const Value *RHS, bool SetFlags = false, |
203 | | bool WantResult = true, bool IsZExt = false); |
204 | | unsigned emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg, |
205 | | bool LHSIsKill, unsigned RHSReg, bool RHSIsKill, |
206 | | bool SetFlags = false, bool WantResult = true); |
207 | | unsigned emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg, |
208 | | bool LHSIsKill, uint64_t Imm, bool SetFlags = false, |
209 | | bool WantResult = true); |
210 | | unsigned emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg, |
211 | | bool LHSIsKill, unsigned RHSReg, bool RHSIsKill, |
212 | | AArch64_AM::ShiftExtendType ShiftType, |
213 | | uint64_t ShiftImm, bool SetFlags = false, |
214 | | bool WantResult = true); |
215 | | unsigned emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg, |
216 | | bool LHSIsKill, unsigned RHSReg, bool RHSIsKill, |
217 | | AArch64_AM::ShiftExtendType ExtType, |
218 | | uint64_t ShiftImm, bool SetFlags = false, |
219 | | bool WantResult = true); |
220 | | |
221 | | // Emit functions. |
222 | | bool emitCompareAndBranch(const BranchInst *BI); |
223 | | bool emitCmp(const Value *LHS, const Value *RHS, bool IsZExt); |
224 | | bool emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, bool IsZExt); |
225 | | bool emitICmp_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm); |
226 | | bool emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS); |
227 | | unsigned emitLoad(MVT VT, MVT ResultVT, Address Addr, bool WantZExt = true, |
228 | | MachineMemOperand *MMO = nullptr); |
229 | | bool emitStore(MVT VT, unsigned SrcReg, Address Addr, |
230 | | MachineMemOperand *MMO = nullptr); |
231 | | bool emitStoreRelease(MVT VT, unsigned SrcReg, unsigned AddrReg, |
232 | | MachineMemOperand *MMO = nullptr); |
233 | | unsigned emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt); |
234 | | unsigned emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt); |
235 | | unsigned emitAdd(MVT RetVT, const Value *LHS, const Value *RHS, |
236 | | bool SetFlags = false, bool WantResult = true, |
237 | | bool IsZExt = false); |
238 | | unsigned emitAdd_ri_(MVT VT, unsigned Op0, bool Op0IsKill, int64_t Imm); |
239 | | unsigned emitSub(MVT RetVT, const Value *LHS, const Value *RHS, |
240 | | bool SetFlags = false, bool WantResult = true, |
241 | | bool IsZExt = false); |
242 | | unsigned emitSubs_rr(MVT RetVT, unsigned LHSReg, bool LHSIsKill, |
243 | | unsigned RHSReg, bool RHSIsKill, bool WantResult = true); |
244 | | unsigned emitSubs_rs(MVT RetVT, unsigned LHSReg, bool LHSIsKill, |
245 | | unsigned RHSReg, bool RHSIsKill, |
246 | | AArch64_AM::ShiftExtendType ShiftType, uint64_t ShiftImm, |
247 | | bool WantResult = true); |
248 | | unsigned emitLogicalOp(unsigned ISDOpc, MVT RetVT, const Value *LHS, |
249 | | const Value *RHS); |
250 | | unsigned emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, unsigned LHSReg, |
251 | | bool LHSIsKill, uint64_t Imm); |
252 | | unsigned emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, unsigned LHSReg, |
253 | | bool LHSIsKill, unsigned RHSReg, bool RHSIsKill, |
254 | | uint64_t ShiftImm); |
255 | | unsigned emitAnd_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm); |
256 | | unsigned emitMul_rr(MVT RetVT, unsigned Op0, bool Op0IsKill, |
257 | | unsigned Op1, bool Op1IsKill); |
258 | | unsigned emitSMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill, |
259 | | unsigned Op1, bool Op1IsKill); |
260 | | unsigned emitUMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill, |
261 | | unsigned Op1, bool Op1IsKill); |
262 | | unsigned emitLSL_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill, |
263 | | unsigned Op1Reg, bool Op1IsKill); |
264 | | unsigned emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill, |
265 | | uint64_t Imm, bool IsZExt = true); |
266 | | unsigned emitLSR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill, |
267 | | unsigned Op1Reg, bool Op1IsKill); |
268 | | unsigned emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill, |
269 | | uint64_t Imm, bool IsZExt = true); |
270 | | unsigned emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill, |
271 | | unsigned Op1Reg, bool Op1IsKill); |
272 | | unsigned emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill, |
273 | | uint64_t Imm, bool IsZExt = false); |
274 | | |
275 | | unsigned materializeInt(const ConstantInt *CI, MVT VT); |
276 | | unsigned materializeFP(const ConstantFP *CFP, MVT VT); |
277 | | unsigned materializeGV(const GlobalValue *GV); |
278 | | |
279 | | // Call handling routines. |
280 | | private: |
281 | | CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const; |
282 | | bool processCallArgs(CallLoweringInfo &CLI, SmallVectorImpl<MVT> &ArgVTs, |
283 | | unsigned &NumBytes); |
284 | | bool finishCall(CallLoweringInfo &CLI, MVT RetVT, unsigned NumBytes); |
285 | | |
286 | | public: |
287 | | // Backend specific FastISel code. |
288 | | unsigned fastMaterializeAlloca(const AllocaInst *AI) override; |
289 | | unsigned fastMaterializeConstant(const Constant *C) override; |
290 | | unsigned fastMaterializeFloatZero(const ConstantFP* CF) override; |
291 | | |
292 | | explicit AArch64FastISel(FunctionLoweringInfo &FuncInfo, |
293 | | const TargetLibraryInfo *LibInfo) |
294 | 1.26k | : FastISel(FuncInfo, LibInfo, /*SkipTargetIndependentISel=*/true) { |
295 | 1.26k | Subtarget = |
296 | 1.26k | &static_cast<const AArch64Subtarget &>(FuncInfo.MF->getSubtarget()); |
297 | 1.26k | Context = &FuncInfo.Fn->getContext(); |
298 | 1.26k | } |
299 | | |
300 | | bool fastSelectInstruction(const Instruction *I) override; |
301 | | |
302 | | #include "AArch64GenFastISel.inc" |
303 | | }; |
304 | | |
305 | | } // end anonymous namespace |
306 | | |
307 | | /// Check if the sign-/zero-extend will be a noop. |
308 | 82 | static bool isIntExtFree(const Instruction *I) { |
309 | 82 | assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) && |
310 | 82 | "Unexpected integer extend instruction."); |
311 | 82 | assert(!I->getType()->isVectorTy() && I->getType()->isIntegerTy() && |
312 | 82 | "Unexpected value type."); |
313 | 82 | bool IsZExt = isa<ZExtInst>(I); |
314 | 82 | |
315 | 82 | if (const auto *LI = dyn_cast<LoadInst>(I->getOperand(0))) |
316 | 0 | if (LI->hasOneUse()) |
317 | 0 | return true; |
318 | 82 | |
319 | 82 | if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0))) |
320 | 80 | if ((IsZExt && Arg->hasZExtAttr()31 ) || (78 !IsZExt78 && Arg->hasSExtAttr()49 )) |
321 | 4 | return true; |
322 | 78 | |
323 | 78 | return false; |
324 | 78 | } |
325 | | |
326 | | /// Determine the implicit scale factor that is applied by a memory |
327 | | /// operation for a given value type. |
328 | 1.44k | static unsigned getImplicitScaleFactor(MVT VT) { |
329 | 1.44k | switch (VT.SimpleTy) { |
330 | 1.44k | default: |
331 | 224 | return 0; // invalid |
332 | 1.44k | case MVT::i1: // fall-through |
333 | 200 | case MVT::i8: |
334 | 200 | return 1; |
335 | 200 | case MVT::i16: |
336 | 120 | return 2; |
337 | 458 | case MVT::i32: // fall-through |
338 | 458 | case MVT::f32: |
339 | 458 | return 4; |
340 | 458 | case MVT::i64: // fall-through |
341 | 440 | case MVT::f64: |
342 | 440 | return 8; |
343 | 1.44k | } |
344 | 1.44k | } |
345 | | |
346 | 203 | CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const { |
347 | 203 | if (CC == CallingConv::WebKit_JS) |
348 | 7 | return CC_AArch64_WebKit_JS; |
349 | 196 | if (CC == CallingConv::GHC) |
350 | 0 | return CC_AArch64_GHC; |
351 | 196 | return Subtarget->isTargetDarwin() ? CC_AArch64_DarwinPCS125 : CC_AArch64_AAPCS71 ; |
352 | 196 | } |
353 | | |
354 | 15 | unsigned AArch64FastISel::fastMaterializeAlloca(const AllocaInst *AI) { |
355 | 15 | assert(TLI.getValueType(DL, AI->getType(), true) == MVT::i64 && |
356 | 15 | "Alloca should always return a pointer."); |
357 | 15 | |
358 | 15 | // Don't handle dynamic allocas. |
359 | 15 | if (!FuncInfo.StaticAllocaMap.count(AI)) |
360 | 0 | return 0; |
361 | 15 | |
362 | 15 | DenseMap<const AllocaInst *, int>::iterator SI = |
363 | 15 | FuncInfo.StaticAllocaMap.find(AI); |
364 | 15 | |
365 | 15 | if (SI != FuncInfo.StaticAllocaMap.end()) { |
366 | 15 | unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass); |
367 | 15 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri), |
368 | 15 | ResultReg) |
369 | 15 | .addFrameIndex(SI->second) |
370 | 15 | .addImm(0) |
371 | 15 | .addImm(0); |
372 | 15 | return ResultReg; |
373 | 15 | } |
374 | 0 | |
375 | 0 | return 0; |
376 | 0 | } |
377 | | |
378 | 357 | unsigned AArch64FastISel::materializeInt(const ConstantInt *CI, MVT VT) { |
379 | 357 | if (VT > MVT::i64) |
380 | 0 | return 0; |
381 | 357 | |
382 | 357 | if (!CI->isZero()) |
383 | 231 | return fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue()); |
384 | 126 | |
385 | 126 | // Create a copy from the zero register to materialize a "0" value. |
386 | 126 | const TargetRegisterClass *RC = (VT == MVT::i64) ? &AArch64::GPR64RegClass15 |
387 | 126 | : &AArch64::GPR32RegClass111 ; |
388 | 126 | unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR15 : AArch64::WZR111 ; |
389 | 126 | unsigned ResultReg = createResultReg(RC); |
390 | 126 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), |
391 | 126 | ResultReg).addReg(ZeroReg, getKillRegState(true)); |
392 | 126 | return ResultReg; |
393 | 126 | } |
394 | | |
395 | 20 | unsigned AArch64FastISel::materializeFP(const ConstantFP *CFP, MVT VT) { |
396 | 20 | // Positive zero (+0.0) has to be materialized with a fmov from the zero |
397 | 20 | // register, because the immediate version of fmov cannot encode zero. |
398 | 20 | if (CFP->isNullValue()) |
399 | 2 | return fastMaterializeFloatZero(CFP); |
400 | 18 | |
401 | 18 | if (VT != MVT::f32 && VT != MVT::f647 ) |
402 | 0 | return 0; |
403 | 18 | |
404 | 18 | const APFloat Val = CFP->getValueAPF(); |
405 | 18 | bool Is64Bit = (VT == MVT::f64); |
406 | 18 | // This checks to see if we can use FMOV instructions to materialize |
407 | 18 | // a constant, otherwise we have to materialize via the constant pool. |
408 | 18 | int Imm = |
409 | 18 | Is64Bit ? AArch64_AM::getFP64Imm(Val)7 : AArch64_AM::getFP32Imm(Val)11 ; |
410 | 18 | if (Imm != -1) { |
411 | 10 | unsigned Opc = Is64Bit ? AArch64::FMOVDi2 : AArch64::FMOVSi8 ; |
412 | 10 | return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm); |
413 | 10 | } |
414 | 8 | |
415 | 8 | // For the MachO large code model materialize the FP constant in code. |
416 | 8 | if (Subtarget->isTargetMachO() && TM.getCodeModel() == CodeModel::Large7 ) { |
417 | 5 | unsigned Opc1 = Is64Bit ? AArch64::MOVi64imm3 : AArch64::MOVi32imm2 ; |
418 | 5 | const TargetRegisterClass *RC = Is64Bit ? |
419 | 3 | &AArch64::GPR64RegClass : &AArch64::GPR32RegClass2 ; |
420 | 5 | |
421 | 5 | unsigned TmpReg = createResultReg(RC); |
422 | 5 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc1), TmpReg) |
423 | 5 | .addImm(CFP->getValueAPF().bitcastToAPInt().getZExtValue()); |
424 | 5 | |
425 | 5 | unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT)); |
426 | 5 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, |
427 | 5 | TII.get(TargetOpcode::COPY), ResultReg) |
428 | 5 | .addReg(TmpReg, getKillRegState(true)); |
429 | 5 | |
430 | 5 | return ResultReg; |
431 | 5 | } |
432 | 3 | |
433 | 3 | // Materialize via constant pool. MachineConstantPool wants an explicit |
434 | 3 | // alignment. |
435 | 3 | unsigned Align = DL.getPrefTypeAlignment(CFP->getType()); |
436 | 3 | if (Align == 0) |
437 | 0 | Align = DL.getTypeAllocSize(CFP->getType()); |
438 | 3 | |
439 | 3 | unsigned CPI = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align); |
440 | 3 | unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass); |
441 | 3 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP), |
442 | 3 | ADRPReg).addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGE); |
443 | 3 | |
444 | 3 | unsigned Opc = Is64Bit ? AArch64::LDRDui2 : AArch64::LDRSui1 ; |
445 | 3 | unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT)); |
446 | 3 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) |
447 | 3 | .addReg(ADRPReg) |
448 | 3 | .addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC); |
449 | 3 | return ResultReg; |
450 | 3 | } |
451 | | |
452 | 125 | unsigned AArch64FastISel::materializeGV(const GlobalValue *GV) { |
453 | 125 | // We can't handle thread-local variables quickly yet. |
454 | 125 | if (GV->isThreadLocal()) |
455 | 11 | return 0; |
456 | 114 | |
457 | 114 | // MachO still uses GOT for large code-model accesses, but ELF requires |
458 | 114 | // movz/movk sequences, which FastISel doesn't handle yet. |
459 | 114 | if (!Subtarget->useSmallAddressing() && !Subtarget->isTargetMachO()35 ) |
460 | 26 | return 0; |
461 | 88 | |
462 | 88 | unsigned char OpFlags = Subtarget->ClassifyGlobalReference(GV, TM); |
463 | 88 | |
464 | 88 | EVT DestEVT = TLI.getValueType(DL, GV->getType(), true); |
465 | 88 | if (!DestEVT.isSimple()) |
466 | 0 | return 0; |
467 | 88 | |
468 | 88 | unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass); |
469 | 88 | unsigned ResultReg; |
470 | 88 | |
471 | 88 | if (OpFlags & AArch64II::MO_GOT) { |
472 | 46 | // ADRP + LDRX |
473 | 46 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP), |
474 | 46 | ADRPReg) |
475 | 46 | .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags); |
476 | 46 | |
477 | 46 | ResultReg = createResultReg(&AArch64::GPR64RegClass); |
478 | 46 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::LDRXui), |
479 | 46 | ResultReg) |
480 | 46 | .addReg(ADRPReg) |
481 | 46 | .addGlobalAddress(GV, 0, |
482 | 46 | AArch64II::MO_PAGEOFF | AArch64II::MO_NC | OpFlags); |
483 | 46 | } else { |
484 | 42 | // ADRP + ADDX |
485 | 42 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP), |
486 | 42 | ADRPReg) |
487 | 42 | .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags); |
488 | 42 | |
489 | 42 | ResultReg = createResultReg(&AArch64::GPR64spRegClass); |
490 | 42 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri), |
491 | 42 | ResultReg) |
492 | 42 | .addReg(ADRPReg) |
493 | 42 | .addGlobalAddress(GV, 0, |
494 | 42 | AArch64II::MO_PAGEOFF | AArch64II::MO_NC | OpFlags) |
495 | 42 | .addImm(0); |
496 | 42 | } |
497 | 88 | return ResultReg; |
498 | 88 | } |
499 | | |
500 | 541 | unsigned AArch64FastISel::fastMaterializeConstant(const Constant *C) { |
501 | 541 | EVT CEVT = TLI.getValueType(DL, C->getType(), true); |
502 | 541 | |
503 | 541 | // Only handle simple types. |
504 | 541 | if (!CEVT.isSimple()) |
505 | 0 | return 0; |
506 | 541 | MVT VT = CEVT.getSimpleVT(); |
507 | 541 | |
508 | 541 | if (const auto *CI = dyn_cast<ConstantInt>(C)) |
509 | 351 | return materializeInt(CI, VT); |
510 | 190 | else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C)) |
511 | 20 | return materializeFP(CFP, VT); |
512 | 170 | else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C)) |
513 | 118 | return materializeGV(GV); |
514 | 52 | |
515 | 52 | return 0; |
516 | 52 | } |
517 | | |
518 | 2 | unsigned AArch64FastISel::fastMaterializeFloatZero(const ConstantFP* CFP) { |
519 | 2 | assert(CFP->isNullValue() && |
520 | 2 | "Floating-point constant is not a positive zero."); |
521 | 2 | MVT VT; |
522 | 2 | if (!isTypeLegal(CFP->getType(), VT)) |
523 | 0 | return 0; |
524 | 2 | |
525 | 2 | if (VT != MVT::f32 && VT != MVT::f641 ) |
526 | 0 | return 0; |
527 | 2 | |
528 | 2 | bool Is64Bit = (VT == MVT::f64); |
529 | 2 | unsigned ZReg = Is64Bit ? AArch64::XZR1 : AArch64::WZR1 ; |
530 | 2 | unsigned Opc = Is64Bit ? AArch64::FMOVXDr1 : AArch64::FMOVWSr1 ; |
531 | 2 | return fastEmitInst_r(Opc, TLI.getRegClassFor(VT), ZReg, /*IsKill=*/true); |
532 | 2 | } |
533 | | |
534 | | /// Check if the multiply is by a power-of-2 constant. |
535 | 583 | static bool isMulPowOf2(const Value *I) { |
536 | 583 | if (const auto *MI = dyn_cast<MulOperator>(I)) { |
537 | 23 | if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(0))) |
538 | 0 | if (C->getValue().isPowerOf2()) |
539 | 0 | return true; |
540 | 23 | if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(1))) |
541 | 21 | if (C->getValue().isPowerOf2()) |
542 | 19 | return true; |
543 | 564 | } |
544 | 564 | return false; |
545 | 564 | } |
546 | | |
547 | | // Computes the address to get to an object. |
548 | | bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty) |
549 | 1.16k | { |
550 | 1.16k | const User *U = nullptr; |
551 | 1.16k | unsigned Opcode = Instruction::UserOp1; |
552 | 1.16k | if (const Instruction *I = dyn_cast<Instruction>(Obj)) { |
553 | 559 | // Don't walk into other basic blocks unless the object is an alloca from |
554 | 559 | // another block, otherwise it may not have a virtual register assigned. |
555 | 559 | if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) || |
556 | 559 | FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB375 ) { |
557 | 555 | Opcode = I->getOpcode(); |
558 | 555 | U = I; |
559 | 555 | } |
560 | 609 | } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) { |
561 | 24 | Opcode = C->getOpcode(); |
562 | 24 | U = C; |
563 | 24 | } |
564 | 1.16k | |
565 | 1.16k | if (auto *Ty = dyn_cast<PointerType>(Obj->getType())) |
566 | 847 | if (Ty->getAddressSpace() > 255) |
567 | 10 | // Fast instruction selection doesn't support the special |
568 | 10 | // address spaces. |
569 | 10 | return false; |
570 | 1.15k | |
571 | 1.15k | switch (Opcode) { |
572 | 1.15k | default: |
573 | 597 | break; |
574 | 1.15k | case Instruction::BitCast: |
575 | 17 | // Look through bitcasts. |
576 | 17 | return computeAddress(U->getOperand(0), Addr, Ty); |
577 | 1.15k | |
578 | 1.15k | case Instruction::IntToPtr: |
579 | 121 | // Look past no-op inttoptrs. |
580 | 121 | if (TLI.getValueType(DL, U->getOperand(0)->getType()) == |
581 | 121 | TLI.getPointerTy(DL)) |
582 | 121 | return computeAddress(U->getOperand(0), Addr, Ty); |
583 | 0 | break; |
584 | 0 |
|
585 | 3 | case Instruction::PtrToInt: |
586 | 3 | // Look past no-op ptrtoints. |
587 | 3 | if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL)) |
588 | 3 | return computeAddress(U->getOperand(0), Addr, Ty); |
589 | 0 | break; |
590 | 0 |
|
591 | 53 | case Instruction::GetElementPtr: { |
592 | 53 | Address SavedAddr = Addr; |
593 | 53 | uint64_t TmpOffset = Addr.getOffset(); |
594 | 53 | |
595 | 53 | // Iterate through the GEP folding the constants into offsets where |
596 | 53 | // we can. |
597 | 53 | for (gep_type_iterator GTI = gep_type_begin(U), E = gep_type_end(U); |
598 | 144 | GTI != E; ++GTI91 ) { |
599 | 95 | const Value *Op = GTI.getOperand(); |
600 | 95 | if (StructType *STy = GTI.getStructTypeOrNull()) { |
601 | 21 | const StructLayout *SL = DL.getStructLayout(STy); |
602 | 21 | unsigned Idx = cast<ConstantInt>(Op)->getZExtValue(); |
603 | 21 | TmpOffset += SL->getElementOffset(Idx); |
604 | 74 | } else { |
605 | 74 | uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType()); |
606 | 74 | while (true) { |
607 | 74 | if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) { |
608 | 70 | // Constant-offset addressing. |
609 | 70 | TmpOffset += CI->getSExtValue() * S; |
610 | 70 | break; |
611 | 70 | } |
612 | 4 | if (canFoldAddIntoGEP(U, Op)) { |
613 | 0 | // A compatible add with a constant operand. Fold the constant. |
614 | 0 | ConstantInt *CI = |
615 | 0 | cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1)); |
616 | 0 | TmpOffset += CI->getSExtValue() * S; |
617 | 0 | // Iterate on the other operand. |
618 | 0 | Op = cast<AddOperator>(Op)->getOperand(0); |
619 | 0 | continue; |
620 | 0 | } |
621 | 4 | // Unsupported |
622 | 4 | goto unsupported_gep; |
623 | 4 | } |
624 | 74 | } |
625 | 95 | } |
626 | 53 | |
627 | 53 | // Try to grab the base operand now. |
628 | 53 | Addr.setOffset(TmpOffset); |
629 | 49 | if (computeAddress(U->getOperand(0), Addr, Ty)) |
630 | 44 | return true; |
631 | 5 | |
632 | 5 | // We failed, restore everything and try the other options. |
633 | 5 | Addr = SavedAddr; |
634 | 5 | |
635 | 9 | unsupported_gep: |
636 | 9 | break; |
637 | 5 | } |
638 | 184 | case Instruction::Alloca: { |
639 | 184 | const AllocaInst *AI = cast<AllocaInst>(Obj); |
640 | 184 | DenseMap<const AllocaInst *, int>::iterator SI = |
641 | 184 | FuncInfo.StaticAllocaMap.find(AI); |
642 | 184 | if (SI != FuncInfo.StaticAllocaMap.end()) { |
643 | 184 | Addr.setKind(Address::FrameIndexBase); |
644 | 184 | Addr.setFI(SI->second); |
645 | 184 | return true; |
646 | 184 | } |
647 | 0 | break; |
648 | 0 | } |
649 | 100 | case Instruction::Add: { |
650 | 100 | // Adds of constants are common and easy enough. |
651 | 100 | const Value *LHS = U->getOperand(0); |
652 | 100 | const Value *RHS = U->getOperand(1); |
653 | 100 | |
654 | 100 | if (isa<ConstantInt>(LHS)) |
655 | 1 | std::swap(LHS, RHS); |
656 | 100 | |
657 | 100 | if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) { |
658 | 21 | Addr.setOffset(Addr.getOffset() + CI->getSExtValue()); |
659 | 21 | return computeAddress(LHS, Addr, Ty); |
660 | 21 | } |
661 | 79 | |
662 | 79 | Address Backup = Addr; |
663 | 79 | if (computeAddress(LHS, Addr, Ty) && computeAddress(RHS, Addr, Ty)) |
664 | 79 | return true; |
665 | 0 | Addr = Backup; |
666 | 0 |
|
667 | 0 | break; |
668 | 0 | } |
669 | 21 | case Instruction::Sub: { |
670 | 21 | // Subs of constants are common and easy enough. |
671 | 21 | const Value *LHS = U->getOperand(0); |
672 | 21 | const Value *RHS = U->getOperand(1); |
673 | 21 | |
674 | 21 | if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) { |
675 | 21 | Addr.setOffset(Addr.getOffset() - CI->getSExtValue()); |
676 | 21 | return computeAddress(LHS, Addr, Ty); |
677 | 21 | } |
678 | 0 | break; |
679 | 0 | } |
680 | 26 | case Instruction::Shl: { |
681 | 26 | if (Addr.getOffsetReg()) |
682 | 3 | break; |
683 | 23 | |
684 | 23 | const auto *CI = dyn_cast<ConstantInt>(U->getOperand(1)); |
685 | 23 | if (!CI) |
686 | 0 | break; |
687 | 23 | |
688 | 23 | unsigned Val = CI->getZExtValue(); |
689 | 23 | if (Val < 1 || Val > 3) |
690 | 0 | break; |
691 | 23 | |
692 | 23 | uint64_t NumBytes = 0; |
693 | 23 | if (Ty && Ty->isSized()) { |
694 | 23 | uint64_t NumBits = DL.getTypeSizeInBits(Ty); |
695 | 23 | NumBytes = NumBits / 8; |
696 | 23 | if (!isPowerOf2_64(NumBits)) |
697 | 0 | NumBytes = 0; |
698 | 23 | } |
699 | 23 | |
700 | 23 | if (NumBytes != (1ULL << Val)) |
701 | 0 | break; |
702 | 23 | |
703 | 23 | Addr.setShift(Val); |
704 | 23 | Addr.setExtendType(AArch64_AM::LSL); |
705 | 23 | |
706 | 23 | const Value *Src = U->getOperand(0); |
707 | 23 | if (const auto *I = dyn_cast<Instruction>(Src)) { |
708 | 17 | if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) { |
709 | 17 | // Fold the zext or sext when it won't become a noop. |
710 | 17 | if (const auto *ZE = dyn_cast<ZExtInst>(I)) { |
711 | 5 | if (!isIntExtFree(ZE) && |
712 | 5 | ZE->getOperand(0)->getType()->isIntegerTy(32)4 ) { |
713 | 4 | Addr.setExtendType(AArch64_AM::UXTW); |
714 | 4 | Src = ZE->getOperand(0); |
715 | 4 | } |
716 | 12 | } else if (const auto *SE = dyn_cast<SExtInst>(I)) { |
717 | 8 | if (!isIntExtFree(SE) && |
718 | 8 | SE->getOperand(0)->getType()->isIntegerTy(32)7 ) { |
719 | 7 | Addr.setExtendType(AArch64_AM::SXTW); |
720 | 7 | Src = SE->getOperand(0); |
721 | 7 | } |
722 | 8 | } |
723 | 17 | } |
724 | 17 | } |
725 | 23 | |
726 | 23 | if (const auto *AI = dyn_cast<BinaryOperator>(Src)) |
727 | 5 | if (AI->getOpcode() == Instruction::And) { |
728 | 4 | const Value *LHS = AI->getOperand(0); |
729 | 4 | const Value *RHS = AI->getOperand(1); |
730 | 4 | |
731 | 4 | if (const auto *C = dyn_cast<ConstantInt>(LHS)) |
732 | 0 | if (C->getValue() == 0xffffffff) |
733 | 0 | std::swap(LHS, RHS); |
734 | 4 | |
735 | 4 | if (const auto *C = dyn_cast<ConstantInt>(RHS)) |
736 | 3 | if (C->getValue() == 0xffffffff) { |
737 | 3 | Addr.setExtendType(AArch64_AM::UXTW); |
738 | 3 | unsigned Reg = getRegForValue(LHS); |
739 | 3 | if (!Reg) |
740 | 0 | return false; |
741 | 3 | bool RegIsKill = hasTrivialKill(LHS); |
742 | 3 | Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, RegIsKill, |
743 | 3 | AArch64::sub_32); |
744 | 3 | Addr.setOffsetReg(Reg); |
745 | 3 | return true; |
746 | 3 | } |
747 | 4 | } |
748 | 20 | |
749 | 20 | unsigned Reg = getRegForValue(Src); |
750 | 20 | if (!Reg) |
751 | 0 | return false; |
752 | 20 | Addr.setOffsetReg(Reg); |
753 | 20 | return true; |
754 | 20 | } |
755 | 20 | case Instruction::Mul: { |
756 | 13 | if (Addr.getOffsetReg()) |
757 | 0 | break; |
758 | 13 | |
759 | 13 | if (!isMulPowOf2(U)) |
760 | 0 | break; |
761 | 13 | |
762 | 13 | const Value *LHS = U->getOperand(0); |
763 | 13 | const Value *RHS = U->getOperand(1); |
764 | 13 | |
765 | 13 | // Canonicalize power-of-2 value to the RHS. |
766 | 13 | if (const auto *C = dyn_cast<ConstantInt>(LHS)) |
767 | 0 | if (C->getValue().isPowerOf2()) |
768 | 0 | std::swap(LHS, RHS); |
769 | 13 | |
770 | 13 | assert(isa<ConstantInt>(RHS) && "Expected an ConstantInt."); |
771 | 13 | const auto *C = cast<ConstantInt>(RHS); |
772 | 13 | unsigned Val = C->getValue().logBase2(); |
773 | 13 | if (Val < 1 || Val > 3) |
774 | 0 | break; |
775 | 13 | |
776 | 13 | uint64_t NumBytes = 0; |
777 | 13 | if (Ty && Ty->isSized()) { |
778 | 13 | uint64_t NumBits = DL.getTypeSizeInBits(Ty); |
779 | 13 | NumBytes = NumBits / 8; |
780 | 13 | if (!isPowerOf2_64(NumBits)) |
781 | 0 | NumBytes = 0; |
782 | 13 | } |
783 | 13 | |
784 | 13 | if (NumBytes != (1ULL << Val)) |
785 | 0 | break; |
786 | 13 | |
787 | 13 | Addr.setShift(Val); |
788 | 13 | Addr.setExtendType(AArch64_AM::LSL); |
789 | 13 | |
790 | 13 | const Value *Src = LHS; |
791 | 13 | if (const auto *I = dyn_cast<Instruction>(Src)) { |
792 | 9 | if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) { |
793 | 8 | // Fold the zext or sext when it won't become a noop. |
794 | 8 | if (const auto *ZE = dyn_cast<ZExtInst>(I)) { |
795 | 4 | if (!isIntExtFree(ZE) && |
796 | 4 | ZE->getOperand(0)->getType()->isIntegerTy(32)3 ) { |
797 | 3 | Addr.setExtendType(AArch64_AM::UXTW); |
798 | 3 | Src = ZE->getOperand(0); |
799 | 3 | } |
800 | 4 | } else if (const auto *SE = dyn_cast<SExtInst>(I)) { |
801 | 4 | if (!isIntExtFree(SE) && |
802 | 4 | SE->getOperand(0)->getType()->isIntegerTy(32)3 ) { |
803 | 3 | Addr.setExtendType(AArch64_AM::SXTW); |
804 | 3 | Src = SE->getOperand(0); |
805 | 3 | } |
806 | 4 | } |
807 | 8 | } |
808 | 9 | } |
809 | 13 | |
810 | 13 | unsigned Reg = getRegForValue(Src); |
811 | 13 | if (!Reg) |
812 | 0 | return false; |
813 | 13 | Addr.setOffsetReg(Reg); |
814 | 13 | return true; |
815 | 13 | } |
816 | 13 | case Instruction::And: { |
817 | 3 | if (Addr.getOffsetReg()) |
818 | 0 | break; |
819 | 3 | |
820 | 3 | if (!Ty || DL.getTypeSizeInBits(Ty) != 82 ) |
821 | 2 | break; |
822 | 1 | |
823 | 1 | const Value *LHS = U->getOperand(0); |
824 | 1 | const Value *RHS = U->getOperand(1); |
825 | 1 | |
826 | 1 | if (const auto *C = dyn_cast<ConstantInt>(LHS)) |
827 | 0 | if (C->getValue() == 0xffffffff) |
828 | 0 | std::swap(LHS, RHS); |
829 | 1 | |
830 | 1 | if (const auto *C = dyn_cast<ConstantInt>(RHS)) |
831 | 1 | if (C->getValue() == 0xffffffff) { |
832 | 1 | Addr.setShift(0); |
833 | 1 | Addr.setExtendType(AArch64_AM::LSL); |
834 | 1 | Addr.setExtendType(AArch64_AM::UXTW); |
835 | 1 | |
836 | 1 | unsigned Reg = getRegForValue(LHS); |
837 | 1 | if (!Reg) |
838 | 0 | return false; |
839 | 1 | bool RegIsKill = hasTrivialKill(LHS); |
840 | 1 | Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, RegIsKill, |
841 | 1 | AArch64::sub_32); |
842 | 1 | Addr.setOffsetReg(Reg); |
843 | 1 | return true; |
844 | 1 | } |
845 | 0 | break; |
846 | 0 | } |
847 | 20 | case Instruction::SExt: |
848 | 20 | case Instruction::ZExt: { |
849 | 20 | if (!Addr.getReg() || Addr.getOffsetReg()) |
850 | 0 | break; |
851 | 20 | |
852 | 20 | const Value *Src = nullptr; |
853 | 20 | // Fold the zext or sext when it won't become a noop. |
854 | 20 | if (const auto *ZE = dyn_cast<ZExtInst>(U)) { |
855 | 0 | if (!isIntExtFree(ZE) && ZE->getOperand(0)->getType()->isIntegerTy(32)) { |
856 | 0 | Addr.setExtendType(AArch64_AM::UXTW); |
857 | 0 | Src = ZE->getOperand(0); |
858 | 0 | } |
859 | 20 | } else if (const auto *SE = dyn_cast<SExtInst>(U)) { |
860 | 20 | if (!isIntExtFree(SE) && SE->getOperand(0)->getType()->isIntegerTy(32)) { |
861 | 20 | Addr.setExtendType(AArch64_AM::SXTW); |
862 | 20 | Src = SE->getOperand(0); |
863 | 20 | } |
864 | 20 | } |
865 | 20 | |
866 | 20 | if (!Src) |
867 | 0 | break; |
868 | 20 | |
869 | 20 | Addr.setShift(0); |
870 | 20 | unsigned Reg = getRegForValue(Src); |
871 | 20 | if (!Reg) |
872 | 0 | return false; |
873 | 20 | Addr.setOffsetReg(Reg); |
874 | 20 | return true; |
875 | 20 | } |
876 | 611 | } // end switch |
877 | 611 | |
878 | 611 | if (Addr.isRegBase() && !Addr.getReg()) { |
879 | 586 | unsigned Reg = getRegForValue(Obj); |
880 | 586 | if (!Reg) |
881 | 25 | return false; |
882 | 561 | Addr.setReg(Reg); |
883 | 561 | return true; |
884 | 561 | } |
885 | 25 | |
886 | 25 | if (!Addr.getOffsetReg()) { |
887 | 25 | unsigned Reg = getRegForValue(Obj); |
888 | 25 | if (!Reg) |
889 | 0 | return false; |
890 | 25 | Addr.setOffsetReg(Reg); |
891 | 25 | return true; |
892 | 25 | } |
893 | 0 | |
894 | 0 | return false; |
895 | 0 | } |
896 | | |
897 | 127 | bool AArch64FastISel::computeCallAddress(const Value *V, Address &Addr) { |
898 | 127 | const User *U = nullptr; |
899 | 127 | unsigned Opcode = Instruction::UserOp1; |
900 | 127 | bool InMBB = true; |
901 | 127 | |
902 | 127 | if (const auto *I = dyn_cast<Instruction>(V)) { |
903 | 14 | Opcode = I->getOpcode(); |
904 | 14 | U = I; |
905 | 14 | InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock(); |
906 | 113 | } else if (const auto *C = dyn_cast<ConstantExpr>(V)) { |
907 | 1 | Opcode = C->getOpcode(); |
908 | 1 | U = C; |
909 | 1 | } |
910 | 127 | |
911 | 127 | switch (Opcode) { |
912 | 127 | default: break115 ; |
913 | 127 | case Instruction::BitCast: |
914 | 0 | // Look past bitcasts if its operand is in the same BB. |
915 | 0 | if (InMBB) |
916 | 0 | return computeCallAddress(U->getOperand(0), Addr); |
917 | 0 | break; |
918 | 12 | case Instruction::IntToPtr: |
919 | 12 | // Look past no-op inttoptrs if its operand is in the same BB. |
920 | 12 | if (InMBB && |
921 | 12 | TLI.getValueType(DL, U->getOperand(0)->getType()) == |
922 | 12 | TLI.getPointerTy(DL)) |
923 | 12 | return computeCallAddress(U->getOperand(0), Addr); |
924 | 0 | break; |
925 | 0 | case Instruction::PtrToInt: |
926 | 0 | // Look past no-op ptrtoints if its operand is in the same BB. |
927 | 0 | if (InMBB && TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL)) |
928 | 0 | return computeCallAddress(U->getOperand(0), Addr); |
929 | 0 | break; |
930 | 115 | } |
931 | 115 | |
932 | 115 | if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) { |
933 | 96 | Addr.setGlobalValue(GV); |
934 | 96 | return true; |
935 | 96 | } |
936 | 19 | |
937 | 19 | // If all else fails, try to materialize the value in a register. |
938 | 19 | if (!Addr.getGlobalValue()) { |
939 | 19 | Addr.setReg(getRegForValue(V)); |
940 | 19 | return Addr.getReg() != 0; |
941 | 19 | } |
942 | 0 | |
943 | 0 | return false; |
944 | 0 | } |
945 | | |
946 | 3.70k | bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) { |
947 | 3.70k | EVT evt = TLI.getValueType(DL, Ty, true); |
948 | 3.70k | |
949 | 3.70k | // Only handle simple types. |
950 | 3.70k | if (evt == MVT::Other || !evt.isSimple()3.69k ) |
951 | 16 | return false; |
952 | 3.69k | VT = evt.getSimpleVT(); |
953 | 3.69k | |
954 | 3.69k | // This is a legal type, but it's not something we handle in fast-isel. |
955 | 3.69k | if (VT == MVT::f128) |
956 | 26 | return false; |
957 | 3.66k | |
958 | 3.66k | // Handle all other legal types, i.e. a register that will directly hold this |
959 | 3.66k | // value. |
960 | 3.66k | return TLI.isTypeLegal(VT); |
961 | 3.66k | } |
962 | | |
963 | | /// Determine if the value type is supported by FastISel. |
964 | | /// |
965 | | /// FastISel for AArch64 can handle more value types than are legal. This adds |
966 | | /// simple value type such as i1, i8, and i16. |
967 | 1.95k | bool AArch64FastISel::isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed) { |
968 | 1.95k | if (Ty->isVectorTy() && !IsVectorAllowed301 ) |
969 | 6 | return false; |
970 | 1.94k | |
971 | 1.94k | if (isTypeLegal(Ty, VT)) |
972 | 1.45k | return true; |
973 | 491 | |
974 | 491 | // If this is a type than can be sign or zero-extended to a basic operation |
975 | 491 | // go ahead and accept it now. |
976 | 491 | if (VT == MVT::i1 || VT == MVT::i8398 || VT == MVT::i16209 ) |
977 | 463 | return true; |
978 | 28 | |
979 | 28 | return false; |
980 | 28 | } |
981 | | |
982 | 1.33k | bool AArch64FastISel::isValueAvailable(const Value *V) const { |
983 | 1.33k | if (!isa<Instruction>(V)) |
984 | 509 | return true; |
985 | 828 | |
986 | 828 | const auto *I = cast<Instruction>(V); |
987 | 828 | return FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB; |
988 | 828 | } |
989 | | |
990 | 834 | bool AArch64FastISel::simplifyAddress(Address &Addr, MVT VT) { |
991 | 834 | unsigned ScaleFactor = getImplicitScaleFactor(VT); |
992 | 834 | if (!ScaleFactor) |
993 | 224 | return false; |
994 | 610 | |
995 | 610 | bool ImmediateOffsetNeedsLowering = false; |
996 | 610 | bool RegisterOffsetNeedsLowering = false; |
997 | 610 | int64_t Offset = Addr.getOffset(); |
998 | 610 | if (((Offset < 0) || (Offset & (ScaleFactor - 1))580 ) && !isInt<9>(Offset)34 ) |
999 | 4 | ImmediateOffsetNeedsLowering = true; |
1000 | 606 | else if (Offset > 0 && !(Offset & (ScaleFactor - 1))105 && |
1001 | 606 | !isUInt<12>(Offset / ScaleFactor)103 ) |
1002 | 6 | ImmediateOffsetNeedsLowering = true; |
1003 | 610 | |
1004 | 610 | // Cannot encode an offset register and an immediate offset in the same |
1005 | 610 | // instruction. Fold the immediate offset into the load/store instruction and |
1006 | 610 | // emit an additional add to take care of the offset register. |
1007 | 610 | if (!ImmediateOffsetNeedsLowering && Addr.getOffset()600 && Addr.getOffsetReg()127 ) |
1008 | 3 | RegisterOffsetNeedsLowering = true; |
1009 | 610 | |
1010 | 610 | // Cannot encode zero register as base. |
1011 | 610 | if (Addr.isRegBase() && Addr.getOffsetReg()418 && !Addr.getReg()80 ) |
1012 | 3 | RegisterOffsetNeedsLowering = true; |
1013 | 610 | |
1014 | 610 | // If this is a stack pointer and the offset needs to be simplified then put |
1015 | 610 | // the alloca address into a register, set the base type back to register and |
1016 | 610 | // continue. This should almost never happen. |
1017 | 610 | if ((ImmediateOffsetNeedsLowering || Addr.getOffsetReg()600 ) && Addr.isFIBase()91 ) |
1018 | 3 | { |
1019 | 3 | unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass); |
1020 | 3 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri), |
1021 | 3 | ResultReg) |
1022 | 3 | .addFrameIndex(Addr.getFI()) |
1023 | 3 | .addImm(0) |
1024 | 3 | .addImm(0); |
1025 | 3 | Addr.setKind(Address::RegBase); |
1026 | 3 | Addr.setReg(ResultReg); |
1027 | 3 | } |
1028 | 610 | |
1029 | 610 | if (RegisterOffsetNeedsLowering) { |
1030 | 5 | unsigned ResultReg = 0; |
1031 | 5 | if (Addr.getReg()) { |
1032 | 2 | if (Addr.getExtendType() == AArch64_AM::SXTW || |
1033 | 2 | Addr.getExtendType() == AArch64_AM::UXTW1 ) |
1034 | 1 | ResultReg = emitAddSub_rx(/*UseAdd=*/true, MVT::i64, Addr.getReg(), |
1035 | 1 | /*TODO:IsKill=*/false, Addr.getOffsetReg(), |
1036 | 1 | /*TODO:IsKill=*/false, Addr.getExtendType(), |
1037 | 1 | Addr.getShift()); |
1038 | 1 | else |
1039 | 1 | ResultReg = emitAddSub_rs(/*UseAdd=*/true, MVT::i64, Addr.getReg(), |
1040 | 1 | /*TODO:IsKill=*/false, Addr.getOffsetReg(), |
1041 | 1 | /*TODO:IsKill=*/false, AArch64_AM::LSL, |
1042 | 1 | Addr.getShift()); |
1043 | 3 | } else { |
1044 | 3 | if (Addr.getExtendType() == AArch64_AM::UXTW) |
1045 | 0 | ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(), |
1046 | 0 | /*Op0IsKill=*/false, Addr.getShift(), |
1047 | 0 | /*IsZExt=*/true); |
1048 | 3 | else if (Addr.getExtendType() == AArch64_AM::SXTW) |
1049 | 1 | ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(), |
1050 | 1 | /*Op0IsKill=*/false, Addr.getShift(), |
1051 | 1 | /*IsZExt=*/false); |
1052 | 2 | else |
1053 | 2 | ResultReg = emitLSL_ri(MVT::i64, MVT::i64, Addr.getOffsetReg(), |
1054 | 2 | /*Op0IsKill=*/false, Addr.getShift()); |
1055 | 3 | } |
1056 | 5 | if (!ResultReg) |
1057 | 0 | return false; |
1058 | 5 | |
1059 | 5 | Addr.setReg(ResultReg); |
1060 | 5 | Addr.setOffsetReg(0); |
1061 | 5 | Addr.setShift(0); |
1062 | 5 | Addr.setExtendType(AArch64_AM::InvalidShiftExtend); |
1063 | 5 | } |
1064 | 610 | |
1065 | 610 | // Since the offset is too large for the load/store instruction get the |
1066 | 610 | // reg+offset into a register. |
1067 | 610 | if (ImmediateOffsetNeedsLowering) { |
1068 | 10 | unsigned ResultReg; |
1069 | 10 | if (Addr.getReg()) |
1070 | 10 | // Try to fold the immediate into the add instruction. |
1071 | 10 | ResultReg = emitAdd_ri_(MVT::i64, Addr.getReg(), /*IsKill=*/false, Offset); |
1072 | 0 | else |
1073 | 0 | ResultReg = fastEmit_i(MVT::i64, MVT::i64, ISD::Constant, Offset); |
1074 | 10 | |
1075 | 10 | if (!ResultReg) |
1076 | 2 | return false; |
1077 | 8 | Addr.setReg(ResultReg); |
1078 | 8 | Addr.setOffset(0); |
1079 | 8 | } |
1080 | 610 | return true608 ; |
1081 | 610 | } |
1082 | | |
1083 | | void AArch64FastISel::addLoadStoreOperands(Address &Addr, |
1084 | | const MachineInstrBuilder &MIB, |
1085 | | MachineMemOperand::Flags Flags, |
1086 | | unsigned ScaleFactor, |
1087 | 608 | MachineMemOperand *MMO) { |
1088 | 608 | int64_t Offset = Addr.getOffset() / ScaleFactor; |
1089 | 608 | // Frame base works a bit differently. Handle it separately. |
1090 | 608 | if (Addr.isFIBase()) { |
1091 | 189 | int FI = Addr.getFI(); |
1092 | 189 | // FIXME: We shouldn't be using getObjectSize/getObjectAlignment. The size |
1093 | 189 | // and alignment should be based on the VT. |
1094 | 189 | MMO = FuncInfo.MF->getMachineMemOperand( |
1095 | 189 | MachinePointerInfo::getFixedStack(*FuncInfo.MF, FI, Offset), Flags, |
1096 | 189 | MFI.getObjectSize(FI), MFI.getObjectAlignment(FI)); |
1097 | 189 | // Now add the rest of the operands. |
1098 | 189 | MIB.addFrameIndex(FI).addImm(Offset); |
1099 | 419 | } else { |
1100 | 419 | assert(Addr.isRegBase() && "Unexpected address kind."); |
1101 | 419 | const MCInstrDesc &II = MIB->getDesc(); |
1102 | 419 | unsigned Idx = (Flags & MachineMemOperand::MOStore) ? 1173 : 0246 ; |
1103 | 419 | Addr.setReg( |
1104 | 419 | constrainOperandRegClass(II, Addr.getReg(), II.getNumDefs()+Idx)); |
1105 | 419 | Addr.setOffsetReg( |
1106 | 419 | constrainOperandRegClass(II, Addr.getOffsetReg(), II.getNumDefs()+Idx+1)); |
1107 | 419 | if (Addr.getOffsetReg()) { |
1108 | 77 | assert(Addr.getOffset() == 0 && "Unexpected offset"); |
1109 | 77 | bool IsSigned = Addr.getExtendType() == AArch64_AM::SXTW || |
1110 | 77 | Addr.getExtendType() == AArch64_AM::SXTX49 ; |
1111 | 77 | MIB.addReg(Addr.getReg()); |
1112 | 77 | MIB.addReg(Addr.getOffsetReg()); |
1113 | 77 | MIB.addImm(IsSigned); |
1114 | 77 | MIB.addImm(Addr.getShift() != 0); |
1115 | 77 | } else |
1116 | 342 | MIB.addReg(Addr.getReg()).addImm(Offset); |
1117 | 419 | } |
1118 | 608 | |
1119 | 608 | if (MMO) |
1120 | 548 | MIB.addMemOperand(MMO); |
1121 | 608 | } |
1122 | | |
1123 | | unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS, |
1124 | | const Value *RHS, bool SetFlags, |
1125 | 313 | bool WantResult, bool IsZExt) { |
1126 | 313 | AArch64_AM::ShiftExtendType ExtendType = AArch64_AM::InvalidShiftExtend; |
1127 | 313 | bool NeedExtend = false; |
1128 | 313 | switch (RetVT.SimpleTy) { |
1129 | 313 | default: |
1130 | 0 | return 0; |
1131 | 313 | case MVT::i1: |
1132 | 2 | NeedExtend = true; |
1133 | 2 | break; |
1134 | 313 | case MVT::i8: |
1135 | 3 | NeedExtend = true; |
1136 | 3 | ExtendType = IsZExt ? AArch64_AM::UXTB0 : AArch64_AM::SXTB; |
1137 | 3 | break; |
1138 | 313 | case MVT::i16: |
1139 | 5 | NeedExtend = true; |
1140 | 5 | ExtendType = IsZExt ? AArch64_AM::UXTH2 : AArch64_AM::SXTH3 ; |
1141 | 5 | break; |
1142 | 313 | case MVT::i32: // fall-through |
1143 | 303 | case MVT::i64: |
1144 | 303 | break; |
1145 | 313 | } |
1146 | 313 | MVT SrcVT = RetVT; |
1147 | 313 | RetVT.SimpleTy = std::max(RetVT.SimpleTy, MVT::i32); |
1148 | 313 | |
1149 | 313 | // Canonicalize immediates to the RHS first. |
1150 | 313 | if (UseAdd && isa<Constant>(LHS)232 && !isa<Constant>(RHS)0 ) |
1151 | 0 | std::swap(LHS, RHS); |
1152 | 313 | |
1153 | 313 | // Canonicalize mul by power of 2 to the RHS. |
1154 | 313 | if (UseAdd && LHS->hasOneUse()232 && isValueAvailable(LHS)199 ) |
1155 | 199 | if (isMulPowOf2(LHS)) |
1156 | 0 | std::swap(LHS, RHS); |
1157 | 313 | |
1158 | 313 | // Canonicalize shift immediate to the RHS. |
1159 | 313 | if (UseAdd && LHS->hasOneUse()232 && isValueAvailable(LHS)199 ) |
1160 | 199 | if (const auto *SI = dyn_cast<BinaryOperator>(LHS)) |
1161 | 78 | if (isa<ConstantInt>(SI->getOperand(1))) |
1162 | 3 | if (SI->getOpcode() == Instruction::Shl || |
1163 | 3 | SI->getOpcode() == Instruction::LShr || |
1164 | 3 | SI->getOpcode() == Instruction::AShr ) |
1165 | 0 | std::swap(LHS, RHS); |
1166 | 313 | |
1167 | 313 | unsigned LHSReg = getRegForValue(LHS); |
1168 | 313 | if (!LHSReg) |
1169 | 0 | return 0; |
1170 | 313 | bool LHSIsKill = hasTrivialKill(LHS); |
1171 | 313 | |
1172 | 313 | if (NeedExtend) |
1173 | 10 | LHSReg = emitIntExt(SrcVT, LHSReg, RetVT, IsZExt); |
1174 | 313 | |
1175 | 313 | unsigned ResultReg = 0; |
1176 | 313 | if (const auto *C = dyn_cast<ConstantInt>(RHS)) { |
1177 | 58 | uint64_t Imm = IsZExt ? C->getZExtValue()1 : C->getSExtValue()57 ; |
1178 | 58 | if (C->isNegative()) |
1179 | 8 | ResultReg = emitAddSub_ri(!UseAdd, RetVT, LHSReg, LHSIsKill, -Imm, |
1180 | 8 | SetFlags, WantResult); |
1181 | 50 | else |
1182 | 50 | ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, LHSIsKill, Imm, SetFlags, |
1183 | 50 | WantResult); |
1184 | 255 | } else if (const auto *C = dyn_cast<Constant>(RHS)) |
1185 | 2 | if (C->isNullValue()) |
1186 | 2 | ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, LHSIsKill, 0, SetFlags, |
1187 | 2 | WantResult); |
1188 | 313 | |
1189 | 313 | if (ResultReg) |
1190 | 57 | return ResultReg; |
1191 | 256 | |
1192 | 256 | // Only extend the RHS within the instruction if there is a valid extend type. |
1193 | 256 | if (ExtendType != AArch64_AM::InvalidShiftExtend && RHS->hasOneUse()6 && |
1194 | 256 | isValueAvailable(RHS)6 ) { |
1195 | 6 | if (const auto *SI = dyn_cast<BinaryOperator>(RHS)) |
1196 | 0 | if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) |
1197 | 0 | if ((SI->getOpcode() == Instruction::Shl) && (C->getZExtValue() < 4)) { |
1198 | 0 | unsigned RHSReg = getRegForValue(SI->getOperand(0)); |
1199 | 0 | if (!RHSReg) |
1200 | 0 | return 0; |
1201 | 0 | bool RHSIsKill = hasTrivialKill(SI->getOperand(0)); |
1202 | 0 | return emitAddSub_rx(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, |
1203 | 0 | RHSIsKill, ExtendType, C->getZExtValue(), |
1204 | 0 | SetFlags, WantResult); |
1205 | 0 | } |
1206 | 6 | unsigned RHSReg = getRegForValue(RHS); |
1207 | 6 | if (!RHSReg) |
1208 | 0 | return 0; |
1209 | 6 | bool RHSIsKill = hasTrivialKill(RHS); |
1210 | 6 | return emitAddSub_rx(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill, |
1211 | 6 | ExtendType, 0, SetFlags, WantResult); |
1212 | 6 | } |
1213 | 250 | |
1214 | 250 | // Check if the mul can be folded into the instruction. |
1215 | 250 | if (RHS->hasOneUse() && isValueAvailable(RHS)210 ) { |
1216 | 210 | if (isMulPowOf2(RHS)) { |
1217 | 0 | const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0); |
1218 | 0 | const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1); |
1219 | 0 |
|
1220 | 0 | if (const auto *C = dyn_cast<ConstantInt>(MulLHS)) |
1221 | 0 | if (C->getValue().isPowerOf2()) |
1222 | 0 | std::swap(MulLHS, MulRHS); |
1223 | 0 |
|
1224 | 0 | assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt."); |
1225 | 0 | uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2(); |
1226 | 0 | unsigned RHSReg = getRegForValue(MulLHS); |
1227 | 0 | if (!RHSReg) |
1228 | 0 | return 0; |
1229 | 0 | bool RHSIsKill = hasTrivialKill(MulLHS); |
1230 | 0 | ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, |
1231 | 0 | RHSIsKill, AArch64_AM::LSL, ShiftVal, SetFlags, |
1232 | 0 | WantResult); |
1233 | 0 | if (ResultReg) |
1234 | 0 | return ResultReg; |
1235 | 250 | } |
1236 | 210 | } |
1237 | 250 | |
1238 | 250 | // Check if the shift can be folded into the instruction. |
1239 | 250 | if (RHS->hasOneUse() && isValueAvailable(RHS)210 ) { |
1240 | 210 | if (const auto *SI = dyn_cast<BinaryOperator>(RHS)) { |
1241 | 51 | if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) { |
1242 | 11 | AArch64_AM::ShiftExtendType ShiftType = AArch64_AM::InvalidShiftExtend; |
1243 | 11 | switch (SI->getOpcode()) { |
1244 | 11 | default: break0 ; |
1245 | 11 | case Instruction::Shl: ShiftType = AArch64_AM::LSL; break3 ; |
1246 | 11 | case Instruction::LShr: ShiftType = AArch64_AM::LSR; break0 ; |
1247 | 11 | case Instruction::AShr: ShiftType = AArch64_AM::ASR; break8 ; |
1248 | 11 | } |
1249 | 11 | uint64_t ShiftVal = C->getZExtValue(); |
1250 | 11 | if (ShiftType != AArch64_AM::InvalidShiftExtend) { |
1251 | 11 | unsigned RHSReg = getRegForValue(SI->getOperand(0)); |
1252 | 11 | if (!RHSReg) |
1253 | 0 | return 0; |
1254 | 11 | bool RHSIsKill = hasTrivialKill(SI->getOperand(0)); |
1255 | 11 | ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, |
1256 | 11 | RHSIsKill, ShiftType, ShiftVal, SetFlags, |
1257 | 11 | WantResult); |
1258 | 11 | if (ResultReg) |
1259 | 9 | return ResultReg; |
1260 | 241 | } |
1261 | 11 | } |
1262 | 51 | } |
1263 | 210 | } |
1264 | 241 | |
1265 | 241 | unsigned RHSReg = getRegForValue(RHS); |
1266 | 241 | if (!RHSReg) |
1267 | 0 | return 0; |
1268 | 241 | bool RHSIsKill = hasTrivialKill(RHS); |
1269 | 241 | |
1270 | 241 | if (NeedExtend) |
1271 | 1 | RHSReg = emitIntExt(SrcVT, RHSReg, RetVT, IsZExt); |
1272 | 241 | |
1273 | 241 | return emitAddSub_rr(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill, |
1274 | 241 | SetFlags, WantResult); |
1275 | 241 | } |
1276 | | |
1277 | | unsigned AArch64FastISel::emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg, |
1278 | | bool LHSIsKill, unsigned RHSReg, |
1279 | | bool RHSIsKill, bool SetFlags, |
1280 | 250 | bool WantResult) { |
1281 | 250 | assert(LHSReg && RHSReg && "Invalid register number."); |
1282 | 250 | |
1283 | 250 | if (LHSReg == AArch64::SP || LHSReg == AArch64::WSP248 || |
1284 | 250 | RHSReg == AArch64::SP248 || RHSReg == AArch64::WSP248 ) |
1285 | 2 | return 0; |
1286 | 248 | |
1287 | 248 | if (RetVT != MVT::i32 && RetVT != MVT::i64103 ) |
1288 | 0 | return 0; |
1289 | 248 | |
1290 | 248 | static const unsigned OpcTable[2][2][2] = { |
1291 | 248 | { { AArch64::SUBWrr, AArch64::SUBXrr }, |
1292 | 248 | { AArch64::ADDWrr, AArch64::ADDXrr } }, |
1293 | 248 | { { AArch64::SUBSWrr, AArch64::SUBSXrr }, |
1294 | 248 | { AArch64::ADDSWrr, AArch64::ADDSXrr } } |
1295 | 248 | }; |
1296 | 248 | bool Is64Bit = RetVT == MVT::i64; |
1297 | 248 | unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit]; |
1298 | 248 | const TargetRegisterClass *RC = |
1299 | 248 | Is64Bit ? &AArch64::GPR64RegClass103 : &AArch64::GPR32RegClass145 ; |
1300 | 248 | unsigned ResultReg; |
1301 | 248 | if (WantResult) |
1302 | 202 | ResultReg = createResultReg(RC); |
1303 | 46 | else |
1304 | 46 | ResultReg = Is64Bit ? AArch64::XZR8 : AArch64::WZR38 ; |
1305 | 248 | |
1306 | 248 | const MCInstrDesc &II = TII.get(Opc); |
1307 | 248 | LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs()); |
1308 | 248 | RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1); |
1309 | 248 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) |
1310 | 248 | .addReg(LHSReg, getKillRegState(LHSIsKill)) |
1311 | 248 | .addReg(RHSReg, getKillRegState(RHSIsKill)); |
1312 | 248 | return ResultReg; |
1313 | 248 | } |
1314 | | |
1315 | | unsigned AArch64FastISel::emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg, |
1316 | | bool LHSIsKill, uint64_t Imm, |
1317 | 105 | bool SetFlags, bool WantResult) { |
1318 | 105 | assert(LHSReg && "Invalid register number."); |
1319 | 105 | |
1320 | 105 | if (RetVT != MVT::i32 && RetVT != MVT::i6448 ) |
1321 | 0 | return 0; |
1322 | 105 | |
1323 | 105 | unsigned ShiftImm; |
1324 | 105 | if (isUInt<12>(Imm)) |
1325 | 93 | ShiftImm = 0; |
1326 | 12 | else if ((Imm & 0xfff000) == Imm) { |
1327 | 4 | ShiftImm = 12; |
1328 | 4 | Imm >>= 12; |
1329 | 4 | } else |
1330 | 8 | return 0; |
1331 | 97 | |
1332 | 97 | static const unsigned OpcTable[2][2][2] = { |
1333 | 97 | { { AArch64::SUBWri, AArch64::SUBXri }, |
1334 | 97 | { AArch64::ADDWri, AArch64::ADDXri } }, |
1335 | 97 | { { AArch64::SUBSWri, AArch64::SUBSXri }, |
1336 | 97 | { AArch64::ADDSWri, AArch64::ADDSXri } } |
1337 | 97 | }; |
1338 | 97 | bool Is64Bit = RetVT == MVT::i64; |
1339 | 97 | unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit]; |
1340 | 97 | const TargetRegisterClass *RC; |
1341 | 97 | if (SetFlags) |
1342 | 34 | RC = Is64Bit ? &AArch64::GPR64RegClass12 : &AArch64::GPR32RegClass22 ; |
1343 | 63 | else |
1344 | 63 | RC = Is64Bit ? &AArch64::GPR64spRegClass29 : &AArch64::GPR32spRegClass34 ; |
1345 | 97 | unsigned ResultReg; |
1346 | 97 | if (WantResult) |
1347 | 68 | ResultReg = createResultReg(RC); |
1348 | 29 | else |
1349 | 29 | ResultReg = Is64Bit ? AArch64::XZR10 : AArch64::WZR19 ; |
1350 | 97 | |
1351 | 97 | const MCInstrDesc &II = TII.get(Opc); |
1352 | 97 | LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs()); |
1353 | 97 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) |
1354 | 97 | .addReg(LHSReg, getKillRegState(LHSIsKill)) |
1355 | 97 | .addImm(Imm) |
1356 | 97 | .addImm(getShifterImm(AArch64_AM::LSL, ShiftImm)); |
1357 | 97 | return ResultReg; |
1358 | 97 | } |
1359 | | |
1360 | | unsigned AArch64FastISel::emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg, |
1361 | | bool LHSIsKill, unsigned RHSReg, |
1362 | | bool RHSIsKill, |
1363 | | AArch64_AM::ShiftExtendType ShiftType, |
1364 | | uint64_t ShiftImm, bool SetFlags, |
1365 | 25 | bool WantResult) { |
1366 | 25 | assert(LHSReg && RHSReg && "Invalid register number."); |
1367 | 25 | assert(LHSReg != AArch64::SP && LHSReg != AArch64::WSP && |
1368 | 25 | RHSReg != AArch64::SP && RHSReg != AArch64::WSP); |
1369 | 25 | |
1370 | 25 | if (RetVT != MVT::i32 && RetVT != MVT::i6410 ) |
1371 | 0 | return 0; |
1372 | 25 | |
1373 | 25 | // Don't deal with undefined shifts. |
1374 | 25 | if (ShiftImm >= RetVT.getSizeInBits()) |
1375 | 2 | return 0; |
1376 | 23 | |
1377 | 23 | static const unsigned OpcTable[2][2][2] = { |
1378 | 23 | { { AArch64::SUBWrs, AArch64::SUBXrs }, |
1379 | 23 | { AArch64::ADDWrs, AArch64::ADDXrs } }, |
1380 | 23 | { { AArch64::SUBSWrs, AArch64::SUBSXrs }, |
1381 | 23 | { AArch64::ADDSWrs, AArch64::ADDSXrs } } |
1382 | 23 | }; |
1383 | 23 | bool Is64Bit = RetVT == MVT::i64; |
1384 | 23 | unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit]; |
1385 | 23 | const TargetRegisterClass *RC = |
1386 | 23 | Is64Bit ? &AArch64::GPR64RegClass9 : &AArch64::GPR32RegClass14 ; |
1387 | 23 | unsigned ResultReg; |
1388 | 23 | if (WantResult) |
1389 | 14 | ResultReg = createResultReg(RC); |
1390 | 9 | else |
1391 | 9 | ResultReg = Is64Bit ? AArch64::XZR6 : AArch64::WZR3 ; |
1392 | 23 | |
1393 | 23 | const MCInstrDesc &II = TII.get(Opc); |
1394 | 23 | LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs()); |
1395 | 23 | RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1); |
1396 | 23 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) |
1397 | 23 | .addReg(LHSReg, getKillRegState(LHSIsKill)) |
1398 | 23 | .addReg(RHSReg, getKillRegState(RHSIsKill)) |
1399 | 23 | .addImm(getShifterImm(ShiftType, ShiftImm)); |
1400 | 23 | return ResultReg; |
1401 | 23 | } |
1402 | | |
1403 | | unsigned AArch64FastISel::emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg, |
1404 | | bool LHSIsKill, unsigned RHSReg, |
1405 | | bool RHSIsKill, |
1406 | | AArch64_AM::ShiftExtendType ExtType, |
1407 | | uint64_t ShiftImm, bool SetFlags, |
1408 | 7 | bool WantResult) { |
1409 | 7 | assert(LHSReg && RHSReg && "Invalid register number."); |
1410 | 7 | assert(LHSReg != AArch64::XZR && LHSReg != AArch64::WZR && |
1411 | 7 | RHSReg != AArch64::XZR && RHSReg != AArch64::WZR); |
1412 | 7 | |
1413 | 7 | if (RetVT != MVT::i32 && RetVT != MVT::i641 ) |
1414 | 0 | return 0; |
1415 | 7 | |
1416 | 7 | if (ShiftImm >= 4) |
1417 | 0 | return 0; |
1418 | 7 | |
1419 | 7 | static const unsigned OpcTable[2][2][2] = { |
1420 | 7 | { { AArch64::SUBWrx, AArch64::SUBXrx }, |
1421 | 7 | { AArch64::ADDWrx, AArch64::ADDXrx } }, |
1422 | 7 | { { AArch64::SUBSWrx, AArch64::SUBSXrx }, |
1423 | 7 | { AArch64::ADDSWrx, AArch64::ADDSXrx } } |
1424 | 7 | }; |
1425 | 7 | bool Is64Bit = RetVT == MVT::i64; |
1426 | 7 | unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit]; |
1427 | 7 | const TargetRegisterClass *RC = nullptr; |
1428 | 7 | if (SetFlags) |
1429 | 5 | RC = Is64Bit ? &AArch64::GPR64RegClass0 : &AArch64::GPR32RegClass; |
1430 | 2 | else |
1431 | 2 | RC = Is64Bit ? &AArch64::GPR64spRegClass1 : &AArch64::GPR32spRegClass1 ; |
1432 | 7 | unsigned ResultReg; |
1433 | 7 | if (WantResult) |
1434 | 2 | ResultReg = createResultReg(RC); |
1435 | 5 | else |
1436 | 5 | ResultReg = Is64Bit ? AArch64::XZR0 : AArch64::WZR; |
1437 | 7 | |
1438 | 7 | const MCInstrDesc &II = TII.get(Opc); |
1439 | 7 | LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs()); |
1440 | 7 | RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1); |
1441 | 7 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) |
1442 | 7 | .addReg(LHSReg, getKillRegState(LHSIsKill)) |
1443 | 7 | .addReg(RHSReg, getKillRegState(RHSIsKill)) |
1444 | 7 | .addImm(getArithExtendImm(ExtType, ShiftImm)); |
1445 | 7 | return ResultReg; |
1446 | 7 | } |
1447 | | |
1448 | 112 | bool AArch64FastISel::emitCmp(const Value *LHS, const Value *RHS, bool IsZExt) { |
1449 | 112 | Type *Ty = LHS->getType(); |
1450 | 112 | EVT EVT = TLI.getValueType(DL, Ty, true); |
1451 | 112 | if (!EVT.isSimple()) |
1452 | 0 | return false; |
1453 | 112 | MVT VT = EVT.getSimpleVT(); |
1454 | 112 | |
1455 | 112 | switch (VT.SimpleTy) { |
1456 | 112 | default: |
1457 | 0 | return false; |
1458 | 112 | case MVT::i1: |
1459 | 65 | case MVT::i8: |
1460 | 65 | case MVT::i16: |
1461 | 65 | case MVT::i32: |
1462 | 65 | case MVT::i64: |
1463 | 65 | return emitICmp(VT, LHS, RHS, IsZExt); |
1464 | 65 | case MVT::f32: |
1465 | 47 | case MVT::f64: |
1466 | 47 | return emitFCmp(VT, LHS, RHS); |
1467 | 112 | } |
1468 | 112 | } |
1469 | | |
1470 | | bool AArch64FastISel::emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, |
1471 | 65 | bool IsZExt) { |
1472 | 65 | return emitSub(RetVT, LHS, RHS, /*SetFlags=*/true, /*WantResult=*/false, |
1473 | 65 | IsZExt) != 0; |
1474 | 65 | } |
1475 | | |
1476 | | bool AArch64FastISel::emitICmp_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, |
1477 | 11 | uint64_t Imm) { |
1478 | 11 | return emitAddSub_ri(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, Imm, |
1479 | 11 | /*SetFlags=*/true, /*WantResult=*/false) != 0; |
1480 | 11 | } |
1481 | | |
1482 | 47 | bool AArch64FastISel::emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS) { |
1483 | 47 | if (RetVT != MVT::f32 && RetVT != MVT::f642 ) |
1484 | 0 | return false; |
1485 | 47 | |
1486 | 47 | // Check to see if the 2nd operand is a constant that we can encode directly |
1487 | 47 | // in the compare. |
1488 | 47 | bool UseImm = false; |
1489 | 47 | if (const auto *CFP = dyn_cast<ConstantFP>(RHS)) |
1490 | 4 | if (CFP->isZero() && !CFP->isNegative()2 ) |
1491 | 2 | UseImm = true; |
1492 | 47 | |
1493 | 47 | unsigned LHSReg = getRegForValue(LHS); |
1494 | 47 | if (!LHSReg) |
1495 | 0 | return false; |
1496 | 47 | bool LHSIsKill = hasTrivialKill(LHS); |
1497 | 47 | |
1498 | 47 | if (UseImm) { |
1499 | 2 | unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDri1 : AArch64::FCMPSri1 ; |
1500 | 2 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc)) |
1501 | 2 | .addReg(LHSReg, getKillRegState(LHSIsKill)); |
1502 | 2 | return true; |
1503 | 2 | } |
1504 | 45 | |
1505 | 45 | unsigned RHSReg = getRegForValue(RHS); |
1506 | 45 | if (!RHSReg) |
1507 | 0 | return false; |
1508 | 45 | bool RHSIsKill = hasTrivialKill(RHS); |
1509 | 45 | |
1510 | 45 | unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDrr1 : AArch64::FCMPSrr44 ; |
1511 | 45 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc)) |
1512 | 45 | .addReg(LHSReg, getKillRegState(LHSIsKill)) |
1513 | 45 | .addReg(RHSReg, getKillRegState(RHSIsKill)); |
1514 | 45 | return true; |
1515 | 45 | } |
1516 | | |
1517 | | unsigned AArch64FastISel::emitAdd(MVT RetVT, const Value *LHS, const Value *RHS, |
1518 | 232 | bool SetFlags, bool WantResult, bool IsZExt) { |
1519 | 232 | return emitAddSub(/*UseAdd=*/true, RetVT, LHS, RHS, SetFlags, WantResult, |
1520 | 232 | IsZExt); |
1521 | 232 | } |
1522 | | |
1523 | | /// This method is a wrapper to simplify add emission. |
1524 | | /// |
1525 | | /// First try to emit an add with an immediate operand using emitAddSub_ri. If |
1526 | | /// that fails, then try to materialize the immediate into a register and use |
1527 | | /// emitAddSub_rr instead. |
1528 | | unsigned AArch64FastISel::emitAdd_ri_(MVT VT, unsigned Op0, bool Op0IsKill, |
1529 | 34 | int64_t Imm) { |
1530 | 34 | unsigned ResultReg; |
1531 | 34 | if (Imm < 0) |
1532 | 2 | ResultReg = emitAddSub_ri(false, VT, Op0, Op0IsKill, -Imm); |
1533 | 32 | else |
1534 | 32 | ResultReg = emitAddSub_ri(true, VT, Op0, Op0IsKill, Imm); |
1535 | 34 | |
1536 | 34 | if (ResultReg) |
1537 | 29 | return ResultReg; |
1538 | 5 | |
1539 | 5 | unsigned CReg = fastEmit_i(VT, VT, ISD::Constant, Imm); |
1540 | 5 | if (!CReg) |
1541 | 0 | return 0; |
1542 | 5 | |
1543 | 5 | ResultReg = emitAddSub_rr(true, VT, Op0, Op0IsKill, CReg, true); |
1544 | 5 | return ResultReg; |
1545 | 5 | } |
1546 | | |
1547 | | unsigned AArch64FastISel::emitSub(MVT RetVT, const Value *LHS, const Value *RHS, |
1548 | 81 | bool SetFlags, bool WantResult, bool IsZExt) { |
1549 | 81 | return emitAddSub(/*UseAdd=*/false, RetVT, LHS, RHS, SetFlags, WantResult, |
1550 | 81 | IsZExt); |
1551 | 81 | } |
1552 | | |
1553 | | unsigned AArch64FastISel::emitSubs_rr(MVT RetVT, unsigned LHSReg, |
1554 | | bool LHSIsKill, unsigned RHSReg, |
1555 | 4 | bool RHSIsKill, bool WantResult) { |
1556 | 4 | return emitAddSub_rr(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, RHSReg, |
1557 | 4 | RHSIsKill, /*SetFlags=*/true, WantResult); |
1558 | 4 | } |
1559 | | |
1560 | | unsigned AArch64FastISel::emitSubs_rs(MVT RetVT, unsigned LHSReg, |
1561 | | bool LHSIsKill, unsigned RHSReg, |
1562 | | bool RHSIsKill, |
1563 | | AArch64_AM::ShiftExtendType ShiftType, |
1564 | 9 | uint64_t ShiftImm, bool WantResult) { |
1565 | 9 | return emitAddSub_rs(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, RHSReg, |
1566 | 9 | RHSIsKill, ShiftType, ShiftImm, /*SetFlags=*/true, |
1567 | 9 | WantResult); |
1568 | 9 | } |
1569 | | |
1570 | | unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT, |
1571 | 91 | const Value *LHS, const Value *RHS) { |
1572 | 91 | // Canonicalize immediates to the RHS first. |
1573 | 91 | if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS)0 ) |
1574 | 0 | std::swap(LHS, RHS); |
1575 | 91 | |
1576 | 91 | // Canonicalize mul by power-of-2 to the RHS. |
1577 | 91 | if (LHS->hasOneUse() && isValueAvailable(LHS)89 ) |
1578 | 89 | if (isMulPowOf2(LHS)) |
1579 | 0 | std::swap(LHS, RHS); |
1580 | 91 | |
1581 | 91 | // Canonicalize shift immediate to the RHS. |
1582 | 91 | if (LHS->hasOneUse() && isValueAvailable(LHS)89 ) |
1583 | 89 | if (const auto *SI = dyn_cast<ShlOperator>(LHS)) |
1584 | 0 | if (isa<ConstantInt>(SI->getOperand(1))) |
1585 | 0 | std::swap(LHS, RHS); |
1586 | 91 | |
1587 | 91 | unsigned LHSReg = getRegForValue(LHS); |
1588 | 91 | if (!LHSReg) |
1589 | 0 | return 0; |
1590 | 91 | bool LHSIsKill = hasTrivialKill(LHS); |
1591 | 91 | |
1592 | 91 | unsigned ResultReg = 0; |
1593 | 91 | if (const auto *C = dyn_cast<ConstantInt>(RHS)) { |
1594 | 19 | uint64_t Imm = C->getZExtValue(); |
1595 | 19 | ResultReg = emitLogicalOp_ri(ISDOpc, RetVT, LHSReg, LHSIsKill, Imm); |
1596 | 19 | } |
1597 | 91 | if (ResultReg) |
1598 | 19 | return ResultReg; |
1599 | 72 | |
1600 | 72 | // Check if the mul can be folded into the instruction. |
1601 | 72 | if (RHS->hasOneUse() && isValueAvailable(RHS)) { |
1602 | 72 | if (isMulPowOf2(RHS)) { |
1603 | 6 | const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0); |
1604 | 6 | const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1); |
1605 | 6 | |
1606 | 6 | if (const auto *C = dyn_cast<ConstantInt>(MulLHS)) |
1607 | 0 | if (C->getValue().isPowerOf2()) |
1608 | 0 | std::swap(MulLHS, MulRHS); |
1609 | 6 | |
1610 | 6 | assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt."); |
1611 | 6 | uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2(); |
1612 | 6 | |
1613 | 6 | unsigned RHSReg = getRegForValue(MulLHS); |
1614 | 6 | if (!RHSReg) |
1615 | 0 | return 0; |
1616 | 6 | bool RHSIsKill = hasTrivialKill(MulLHS); |
1617 | 6 | ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg, |
1618 | 6 | RHSIsKill, ShiftVal); |
1619 | 6 | if (ResultReg) |
1620 | 6 | return ResultReg; |
1621 | 66 | } |
1622 | 72 | } |
1623 | 66 | |
1624 | 66 | // Check if the shift can be folded into the instruction. |
1625 | 66 | if (RHS->hasOneUse() && isValueAvailable(RHS)) { |
1626 | 66 | if (const auto *SI = dyn_cast<ShlOperator>(RHS)) |
1627 | 25 | if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) { |
1628 | 25 | uint64_t ShiftVal = C->getZExtValue(); |
1629 | 25 | unsigned RHSReg = getRegForValue(SI->getOperand(0)); |
1630 | 25 | if (!RHSReg) |
1631 | 0 | return 0; |
1632 | 25 | bool RHSIsKill = hasTrivialKill(SI->getOperand(0)); |
1633 | 25 | ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg, |
1634 | 25 | RHSIsKill, ShiftVal); |
1635 | 25 | if (ResultReg) |
1636 | 13 | return ResultReg; |
1637 | 53 | } |
1638 | 66 | } |
1639 | 53 | |
1640 | 53 | unsigned RHSReg = getRegForValue(RHS); |
1641 | 53 | if (!RHSReg) |
1642 | 0 | return 0; |
1643 | 53 | bool RHSIsKill = hasTrivialKill(RHS); |
1644 | 53 | |
1645 | 53 | MVT VT = std::max(MVT::i32, RetVT.SimpleTy); |
1646 | 53 | ResultReg = fastEmit_rr(VT, VT, ISDOpc, LHSReg, LHSIsKill, RHSReg, RHSIsKill); |
1647 | 53 | if (RetVT >= MVT::i8 && RetVT <= MVT::i1627 ) { |
1648 | 12 | uint64_t Mask = (RetVT == MVT::i8) ? 0xff6 : 0xffff6 ; |
1649 | 12 | ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask); |
1650 | 12 | } |
1651 | 53 | return ResultReg; |
1652 | 53 | } |
1653 | | |
1654 | | unsigned AArch64FastISel::emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, |
1655 | | unsigned LHSReg, bool LHSIsKill, |
1656 | 204 | uint64_t Imm) { |
1657 | 204 | static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR), |
1658 | 204 | "ISD nodes are not consecutive!"); |
1659 | 204 | static const unsigned OpcTable[3][2] = { |
1660 | 204 | { AArch64::ANDWri, AArch64::ANDXri }, |
1661 | 204 | { AArch64::ORRWri, AArch64::ORRXri }, |
1662 | 204 | { AArch64::EORWri, AArch64::EORXri } |
1663 | 204 | }; |
1664 | 204 | const TargetRegisterClass *RC; |
1665 | 204 | unsigned Opc; |
1666 | 204 | unsigned RegSize; |
1667 | 204 | switch (RetVT.SimpleTy) { |
1668 | 204 | default: |
1669 | 0 | return 0; |
1670 | 204 | case MVT::i1: |
1671 | 198 | case MVT::i8: |
1672 | 198 | case MVT::i16: |
1673 | 198 | case MVT::i32: { |
1674 | 198 | unsigned Idx = ISDOpc - ISD::AND; |
1675 | 198 | Opc = OpcTable[Idx][0]; |
1676 | 198 | RC = &AArch64::GPR32spRegClass; |
1677 | 198 | RegSize = 32; |
1678 | 198 | break; |
1679 | 198 | } |
1680 | 198 | case MVT::i64: |
1681 | 6 | Opc = OpcTable[ISDOpc - ISD::AND][1]; |
1682 | 6 | RC = &AArch64::GPR64spRegClass; |
1683 | 6 | RegSize = 64; |
1684 | 6 | break; |
1685 | 204 | } |
1686 | 204 | |
1687 | 204 | if (!AArch64_AM::isLogicalImmediate(Imm, RegSize)) |
1688 | 0 | return 0; |
1689 | 204 | |
1690 | 204 | unsigned ResultReg = |
1691 | 204 | fastEmitInst_ri(Opc, RC, LHSReg, LHSIsKill, |
1692 | 204 | AArch64_AM::encodeLogicalImmediate(Imm, RegSize)); |
1693 | 204 | if (RetVT >= MVT::i8 && RetVT <= MVT::i16203 && ISDOpc != ISD::AND6 ) { |
1694 | 4 | uint64_t Mask = (RetVT == MVT::i8) ? 0xff2 : 0xffff2 ; |
1695 | 4 | ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask); |
1696 | 4 | } |
1697 | 204 | return ResultReg; |
1698 | 204 | } |
1699 | | |
1700 | | unsigned AArch64FastISel::emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, |
1701 | | unsigned LHSReg, bool LHSIsKill, |
1702 | | unsigned RHSReg, bool RHSIsKill, |
1703 | 31 | uint64_t ShiftImm) { |
1704 | 31 | static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR), |
1705 | 31 | "ISD nodes are not consecutive!"); |
1706 | 31 | static const unsigned OpcTable[3][2] = { |
1707 | 31 | { AArch64::ANDWrs, AArch64::ANDXrs }, |
1708 | 31 | { AArch64::ORRWrs, AArch64::ORRXrs }, |
1709 | 31 | { AArch64::EORWrs, AArch64::EORXrs } |
1710 | 31 | }; |
1711 | 31 | |
1712 | 31 | // Don't deal with undefined shifts. |
1713 | 31 | if (ShiftImm >= RetVT.getSizeInBits()) |
1714 | 12 | return 0; |
1715 | 19 | |
1716 | 19 | const TargetRegisterClass *RC; |
1717 | 19 | unsigned Opc; |
1718 | 19 | switch (RetVT.SimpleTy) { |
1719 | 19 | default: |
1720 | 0 | return 0; |
1721 | 19 | case MVT::i1: |
1722 | 13 | case MVT::i8: |
1723 | 13 | case MVT::i16: |
1724 | 13 | case MVT::i32: |
1725 | 13 | Opc = OpcTable[ISDOpc - ISD::AND][0]; |
1726 | 13 | RC = &AArch64::GPR32RegClass; |
1727 | 13 | break; |
1728 | 13 | case MVT::i64: |
1729 | 6 | Opc = OpcTable[ISDOpc - ISD::AND][1]; |
1730 | 6 | RC = &AArch64::GPR64RegClass; |
1731 | 6 | break; |
1732 | 19 | } |
1733 | 19 | unsigned ResultReg = |
1734 | 19 | fastEmitInst_rri(Opc, RC, LHSReg, LHSIsKill, RHSReg, RHSIsKill, |
1735 | 19 | AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftImm)); |
1736 | 19 | if (RetVT >= MVT::i8 && RetVT <= MVT::i16) { |
1737 | 6 | uint64_t Mask = (RetVT == MVT::i8) ? 0xff3 : 0xffff3 ; |
1738 | 6 | ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask); |
1739 | 6 | } |
1740 | 19 | return ResultReg; |
1741 | 19 | } |
1742 | | |
1743 | | unsigned AArch64FastISel::emitAnd_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, |
1744 | 184 | uint64_t Imm) { |
1745 | 184 | return emitLogicalOp_ri(ISD::AND, RetVT, LHSReg, LHSIsKill, Imm); |
1746 | 184 | } |
1747 | | |
1748 | | unsigned AArch64FastISel::emitLoad(MVT VT, MVT RetVT, Address Addr, |
1749 | 391 | bool WantZExt, MachineMemOperand *MMO) { |
1750 | 391 | if (!TLI.allowsMisalignedMemoryAccesses(VT)) |
1751 | 2 | return 0; |
1752 | 389 | |
1753 | 389 | // Simplify this down to something we can handle. |
1754 | 389 | if (!simplifyAddress(Addr, VT)) |
1755 | 72 | return 0; |
1756 | 317 | |
1757 | 317 | unsigned ScaleFactor = getImplicitScaleFactor(VT); |
1758 | 317 | if (!ScaleFactor) |
1759 | 317 | llvm_unreachable0 ("Unexpected value type."); |
1760 | 317 | |
1761 | 317 | // Negative offsets require unscaled, 9-bit, signed immediate offsets. |
1762 | 317 | // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets. |
1763 | 317 | bool UseScaled = true; |
1764 | 317 | if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))292 ) { |
1765 | 26 | UseScaled = false; |
1766 | 26 | ScaleFactor = 1; |
1767 | 26 | } |
1768 | 317 | |
1769 | 317 | static const unsigned GPOpcTable[2][8][4] = { |
1770 | 317 | // Sign-extend. |
1771 | 317 | { { AArch64::LDURSBWi, AArch64::LDURSHWi, AArch64::LDURWi, |
1772 | 317 | AArch64::LDURXi }, |
1773 | 317 | { AArch64::LDURSBXi, AArch64::LDURSHXi, AArch64::LDURSWi, |
1774 | 317 | AArch64::LDURXi }, |
1775 | 317 | { AArch64::LDRSBWui, AArch64::LDRSHWui, AArch64::LDRWui, |
1776 | 317 | AArch64::LDRXui }, |
1777 | 317 | { AArch64::LDRSBXui, AArch64::LDRSHXui, AArch64::LDRSWui, |
1778 | 317 | AArch64::LDRXui }, |
1779 | 317 | { AArch64::LDRSBWroX, AArch64::LDRSHWroX, AArch64::LDRWroX, |
1780 | 317 | AArch64::LDRXroX }, |
1781 | 317 | { AArch64::LDRSBXroX, AArch64::LDRSHXroX, AArch64::LDRSWroX, |
1782 | 317 | AArch64::LDRXroX }, |
1783 | 317 | { AArch64::LDRSBWroW, AArch64::LDRSHWroW, AArch64::LDRWroW, |
1784 | 317 | AArch64::LDRXroW }, |
1785 | 317 | { AArch64::LDRSBXroW, AArch64::LDRSHXroW, AArch64::LDRSWroW, |
1786 | 317 | AArch64::LDRXroW } |
1787 | 317 | }, |
1788 | 317 | // Zero-extend. |
1789 | 317 | { { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi, |
1790 | 317 | AArch64::LDURXi }, |
1791 | 317 | { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi, |
1792 | 317 | AArch64::LDURXi }, |
1793 | 317 | { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui, |
1794 | 317 | AArch64::LDRXui }, |
1795 | 317 | { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui, |
1796 | 317 | AArch64::LDRXui }, |
1797 | 317 | { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX, |
1798 | 317 | AArch64::LDRXroX }, |
1799 | 317 | { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX, |
1800 | 317 | AArch64::LDRXroX }, |
1801 | 317 | { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW, |
1802 | 317 | AArch64::LDRXroW }, |
1803 | 317 | { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW, |
1804 | 317 | AArch64::LDRXroW } |
1805 | 317 | } |
1806 | 317 | }; |
1807 | 317 | |
1808 | 317 | static const unsigned FPOpcTable[4][2] = { |
1809 | 317 | { AArch64::LDURSi, AArch64::LDURDi }, |
1810 | 317 | { AArch64::LDRSui, AArch64::LDRDui }, |
1811 | 317 | { AArch64::LDRSroX, AArch64::LDRDroX }, |
1812 | 317 | { AArch64::LDRSroW, AArch64::LDRDroW } |
1813 | 317 | }; |
1814 | 317 | |
1815 | 317 | unsigned Opc; |
1816 | 317 | const TargetRegisterClass *RC; |
1817 | 317 | bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset()246 && Addr.getReg()177 && |
1818 | 317 | Addr.getOffsetReg()177 ; |
1819 | 317 | unsigned Idx = UseRegOffset ? 275 : UseScaled 242 ? 1216 : 026 ; |
1820 | 317 | if (Addr.getExtendType() == AArch64_AM::UXTW || |
1821 | 317 | Addr.getExtendType() == AArch64_AM::SXTW306 ) |
1822 | 39 | Idx++; |
1823 | 317 | |
1824 | 317 | bool IsRet64Bit = RetVT == MVT::i64; |
1825 | 317 | switch (VT.SimpleTy) { |
1826 | 317 | default: |
1827 | 0 | llvm_unreachable("Unexpected value type."); |
1828 | 317 | case MVT::i1: // Intentional fall-through. |
1829 | 51 | case MVT::i8: |
1830 | 51 | Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][0]; |
1831 | 51 | RC = (IsRet64Bit && !WantZExt13 ) ? |
1832 | 45 | &AArch64::GPR64RegClass6 : &AArch64::GPR32RegClass; |
1833 | 51 | break; |
1834 | 51 | case MVT::i16: |
1835 | 41 | Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][1]; |
1836 | 41 | RC = (IsRet64Bit && !WantZExt12 ) ? |
1837 | 35 | &AArch64::GPR64RegClass6 : &AArch64::GPR32RegClass; |
1838 | 41 | break; |
1839 | 116 | case MVT::i32: |
1840 | 116 | Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][2]; |
1841 | 116 | RC = (IsRet64Bit && !WantZExt15 ) ? |
1842 | 109 | &AArch64::GPR64RegClass7 : &AArch64::GPR32RegClass; |
1843 | 116 | break; |
1844 | 96 | case MVT::i64: |
1845 | 96 | Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][3]; |
1846 | 96 | RC = &AArch64::GPR64RegClass; |
1847 | 96 | break; |
1848 | 51 | case MVT::f32: |
1849 | 3 | Opc = FPOpcTable[Idx][0]; |
1850 | 3 | RC = &AArch64::FPR32RegClass; |
1851 | 3 | break; |
1852 | 51 | case MVT::f64: |
1853 | 10 | Opc = FPOpcTable[Idx][1]; |
1854 | 10 | RC = &AArch64::FPR64RegClass; |
1855 | 10 | break; |
1856 | 317 | } |
1857 | 317 | |
1858 | 317 | // Create the base instruction, then add the operands. |
1859 | 317 | unsigned ResultReg = createResultReg(RC); |
1860 | 317 | MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, |
1861 | 317 | TII.get(Opc), ResultReg); |
1862 | 317 | addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, ScaleFactor, MMO); |
1863 | 317 | |
1864 | 317 | // Loading an i1 requires special handling. |
1865 | 317 | if (VT == MVT::i1) { |
1866 | 3 | unsigned ANDReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, 1); |
1867 | 3 | assert(ANDReg && "Unexpected AND instruction emission failure."); |
1868 | 3 | ResultReg = ANDReg; |
1869 | 3 | } |
1870 | 317 | |
1871 | 317 | // For zero-extending loads to 64bit we emit a 32bit load and then convert |
1872 | 317 | // the 32bit reg to a 64bit reg. |
1873 | 317 | if (WantZExt && RetVT == MVT::i64279 && VT <= MVT::i32117 ) { |
1874 | 21 | unsigned Reg64 = createResultReg(&AArch64::GPR64RegClass); |
1875 | 21 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, |
1876 | 21 | TII.get(AArch64::SUBREG_TO_REG), Reg64) |
1877 | 21 | .addImm(0) |
1878 | 21 | .addReg(ResultReg, getKillRegState(true)) |
1879 | 21 | .addImm(AArch64::sub_32); |
1880 | 21 | ResultReg = Reg64; |
1881 | 21 | } |
1882 | 317 | return ResultReg; |
1883 | 317 | } |
1884 | | |
1885 | 284 | bool AArch64FastISel::selectAddSub(const Instruction *I) { |
1886 | 284 | MVT VT; |
1887 | 284 | if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true)) |
1888 | 0 | return false; |
1889 | 284 | |
1890 | 284 | if (VT.isVector()) |
1891 | 71 | return selectOperator(I, I->getOpcode()); |
1892 | 213 | |
1893 | 213 | unsigned ResultReg; |
1894 | 213 | switch (I->getOpcode()) { |
1895 | 213 | default: |
1896 | 0 | llvm_unreachable("Unexpected instruction."); |
1897 | 213 | case Instruction::Add: |
1898 | 210 | ResultReg = emitAdd(VT, I->getOperand(0), I->getOperand(1)); |
1899 | 210 | break; |
1900 | 213 | case Instruction::Sub: |
1901 | 3 | ResultReg = emitSub(VT, I->getOperand(0), I->getOperand(1)); |
1902 | 3 | break; |
1903 | 213 | } |
1904 | 213 | if (!ResultReg) |
1905 | 0 | return false; |
1906 | 213 | |
1907 | 213 | updateValueMap(I, ResultReg); |
1908 | 213 | return true; |
1909 | 213 | } |
1910 | | |
1911 | 91 | bool AArch64FastISel::selectLogicalOp(const Instruction *I) { |
1912 | 91 | MVT VT; |
1913 | 91 | if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true)) |
1914 | 0 | return false; |
1915 | 91 | |
1916 | 91 | if (VT.isVector()) |
1917 | 0 | return selectOperator(I, I->getOpcode()); |
1918 | 91 | |
1919 | 91 | unsigned ResultReg; |
1920 | 91 | switch (I->getOpcode()) { |
1921 | 91 | default: |
1922 | 0 | llvm_unreachable("Unexpected instruction."); |
1923 | 91 | case Instruction::And: |
1924 | 51 | ResultReg = emitLogicalOp(ISD::AND, VT, I->getOperand(0), I->getOperand(1)); |
1925 | 51 | break; |
1926 | 91 | case Instruction::Or: |
1927 | 21 | ResultReg = emitLogicalOp(ISD::OR, VT, I->getOperand(0), I->getOperand(1)); |
1928 | 21 | break; |
1929 | 91 | case Instruction::Xor: |
1930 | 19 | ResultReg = emitLogicalOp(ISD::XOR, VT, I->getOperand(0), I->getOperand(1)); |
1931 | 19 | break; |
1932 | 91 | } |
1933 | 91 | if (!ResultReg) |
1934 | 0 | return false; |
1935 | 91 | |
1936 | 91 | updateValueMap(I, ResultReg); |
1937 | 91 | return true; |
1938 | 91 | } |
1939 | | |
1940 | 381 | bool AArch64FastISel::selectLoad(const Instruction *I) { |
1941 | 381 | MVT VT; |
1942 | 381 | // Verify we have a legal type before going any further. Currently, we handle |
1943 | 381 | // simple types that will directly fit in a register (i32/f32/i64/f64) or |
1944 | 381 | // those that can be sign or zero-extended to a basic operation (i1/i8/i16). |
1945 | 381 | if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true) || |
1946 | 381 | cast<LoadInst>(I)->isAtomic()377 ) |
1947 | 4 | return false; |
1948 | 377 | |
1949 | 377 | const Value *SV = I->getOperand(0); |
1950 | 377 | if (TLI.supportSwiftError()) { |
1951 | 377 | // Swifterror values can come from either a function parameter with |
1952 | 377 | // swifterror attribute or an alloca with swifterror attribute. |
1953 | 377 | if (const Argument *Arg = dyn_cast<Argument>(SV)) { |
1954 | 103 | if (Arg->hasSwiftErrorAttr()) |
1955 | 0 | return false; |
1956 | 377 | } |
1957 | 377 | |
1958 | 377 | if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) { |
1959 | 74 | if (Alloca->isSwiftError()) |
1960 | 5 | return false; |
1961 | 372 | } |
1962 | 377 | } |
1963 | 372 | |
1964 | 372 | // See if we can handle this address. |
1965 | 372 | Address Addr; |
1966 | 372 | if (!computeAddress(I->getOperand(0), Addr, I->getType())) |
1967 | 22 | return false; |
1968 | 350 | |
1969 | 350 | // Fold the following sign-/zero-extend into the load instruction. |
1970 | 350 | bool WantZExt = true; |
1971 | 350 | MVT RetVT = VT; |
1972 | 350 | const Value *IntExtVal = nullptr; |
1973 | 350 | if (I->hasOneUse()) { |
1974 | 257 | if (const auto *ZE = dyn_cast<ZExtInst>(I->use_begin()->getUser())) { |
1975 | 41 | if (isTypeSupported(ZE->getType(), RetVT)) |
1976 | 41 | IntExtVal = ZE; |
1977 | 0 | else |
1978 | 0 | RetVT = VT; |
1979 | 216 | } else if (const auto *SE = dyn_cast<SExtInst>(I->use_begin()->getUser())) { |
1980 | 38 | if (isTypeSupported(SE->getType(), RetVT)) |
1981 | 38 | IntExtVal = SE; |
1982 | 0 | else |
1983 | 0 | RetVT = VT; |
1984 | 38 | WantZExt = false; |
1985 | 38 | } |
1986 | 257 | } |
1987 | 350 | |
1988 | 350 | unsigned ResultReg = |
1989 | 350 | emitLoad(VT, RetVT, Addr, WantZExt, createMachineMemOperandFor(I)); |
1990 | 350 | if (!ResultReg) |
1991 | 74 | return false; |
1992 | 276 | |
1993 | 276 | // There are a few different cases we have to handle, because the load or the |
1994 | 276 | // sign-/zero-extend might not be selected by FastISel if we fall-back to |
1995 | 276 | // SelectionDAG. There is also an ordering issue when both instructions are in |
1996 | 276 | // different basic blocks. |
1997 | 276 | // 1.) The load instruction is selected by FastISel, but the integer extend |
1998 | 276 | // not. This usually happens when the integer extend is in a different |
1999 | 276 | // basic block and SelectionDAG took over for that basic block. |
2000 | 276 | // 2.) The load instruction is selected before the integer extend. This only |
2001 | 276 | // happens when the integer extend is in a different basic block. |
2002 | 276 | // 3.) The load instruction is selected by SelectionDAG and the integer extend |
2003 | 276 | // by FastISel. This happens if there are instructions between the load |
2004 | 276 | // and the integer extend that couldn't be selected by FastISel. |
2005 | 276 | if (IntExtVal) { |
2006 | 79 | // The integer extend hasn't been emitted yet. FastISel or SelectionDAG |
2007 | 79 | // could select it. Emit a copy to subreg if necessary. FastISel will remove |
2008 | 79 | // it when it selects the integer extend. |
2009 | 79 | unsigned Reg = lookUpRegForValue(IntExtVal); |
2010 | 79 | auto *MI = MRI.getUniqueVRegDef(Reg); |
2011 | 79 | if (!MI) { |
2012 | 1 | if (RetVT == MVT::i64 && VT <= MVT::i32) { |
2013 | 1 | if (WantZExt) { |
2014 | 1 | // Delete the last emitted instruction from emitLoad (SUBREG_TO_REG). |
2015 | 1 | MachineBasicBlock::iterator I(std::prev(FuncInfo.InsertPt)); |
2016 | 1 | ResultReg = std::prev(I)->getOperand(0).getReg(); |
2017 | 1 | removeDeadCode(I, std::next(I)); |
2018 | 1 | } else |
2019 | 0 | ResultReg = fastEmitInst_extractsubreg(MVT::i32, ResultReg, |
2020 | 0 | /*IsKill=*/true, |
2021 | 0 | AArch64::sub_32); |
2022 | 1 | } |
2023 | 1 | updateValueMap(I, ResultReg); |
2024 | 1 | return true; |
2025 | 1 | } |
2026 | 78 | |
2027 | 78 | // The integer extend has already been emitted - delete all the instructions |
2028 | 78 | // that have been emitted by the integer extend lowering code and use the |
2029 | 78 | // result from the load instruction directly. |
2030 | 195 | while (78 MI) { |
2031 | 117 | Reg = 0; |
2032 | 156 | for (auto &Opnd : MI->uses()) { |
2033 | 156 | if (Opnd.isReg()) { |
2034 | 117 | Reg = Opnd.getReg(); |
2035 | 117 | break; |
2036 | 117 | } |
2037 | 156 | } |
2038 | 117 | MachineBasicBlock::iterator I(MI); |
2039 | 117 | removeDeadCode(I, std::next(I)); |
2040 | 117 | MI = nullptr; |
2041 | 117 | if (Reg) |
2042 | 117 | MI = MRI.getUniqueVRegDef(Reg); |
2043 | 117 | } |
2044 | 78 | updateValueMap(IntExtVal, ResultReg); |
2045 | 78 | return true; |
2046 | 78 | } |
2047 | 197 | |
2048 | 197 | updateValueMap(I, ResultReg); |
2049 | 197 | return true; |
2050 | 197 | } |
2051 | | |
2052 | | bool AArch64FastISel::emitStoreRelease(MVT VT, unsigned SrcReg, |
2053 | | unsigned AddrReg, |
2054 | 16 | MachineMemOperand *MMO) { |
2055 | 16 | unsigned Opc; |
2056 | 16 | switch (VT.SimpleTy) { |
2057 | 16 | default: return false0 ; |
2058 | 16 | case MVT::i8: Opc = AArch64::STLRB; break4 ; |
2059 | 16 | case MVT::i16: Opc = AArch64::STLRH; break4 ; |
2060 | 16 | case MVT::i32: Opc = AArch64::STLRW; break4 ; |
2061 | 16 | case MVT::i64: Opc = AArch64::STLRX; break4 ; |
2062 | 16 | } |
2063 | 16 | |
2064 | 16 | const MCInstrDesc &II = TII.get(Opc); |
2065 | 16 | SrcReg = constrainOperandRegClass(II, SrcReg, 0); |
2066 | 16 | AddrReg = constrainOperandRegClass(II, AddrReg, 1); |
2067 | 16 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) |
2068 | 16 | .addReg(SrcReg) |
2069 | 16 | .addReg(AddrReg) |
2070 | 16 | .addMemOperand(MMO); |
2071 | 16 | return true; |
2072 | 16 | } |
2073 | | |
2074 | | bool AArch64FastISel::emitStore(MVT VT, unsigned SrcReg, Address Addr, |
2075 | 445 | MachineMemOperand *MMO) { |
2076 | 445 | if (!TLI.allowsMisalignedMemoryAccesses(VT)) |
2077 | 0 | return false; |
2078 | 445 | |
2079 | 445 | // Simplify this down to something we can handle. |
2080 | 445 | if (!simplifyAddress(Addr, VT)) |
2081 | 154 | return false; |
2082 | 291 | |
2083 | 291 | unsigned ScaleFactor = getImplicitScaleFactor(VT); |
2084 | 291 | if (!ScaleFactor) |
2085 | 291 | llvm_unreachable0 ("Unexpected value type."); |
2086 | 291 | |
2087 | 291 | // Negative offsets require unscaled, 9-bit, signed immediate offsets. |
2088 | 291 | // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets. |
2089 | 291 | bool UseScaled = true; |
2090 | 291 | if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))288 ) { |
2091 | 4 | UseScaled = false; |
2092 | 4 | ScaleFactor = 1; |
2093 | 4 | } |
2094 | 291 | |
2095 | 291 | static const unsigned OpcTable[4][6] = { |
2096 | 291 | { AArch64::STURBBi, AArch64::STURHHi, AArch64::STURWi, AArch64::STURXi, |
2097 | 291 | AArch64::STURSi, AArch64::STURDi }, |
2098 | 291 | { AArch64::STRBBui, AArch64::STRHHui, AArch64::STRWui, AArch64::STRXui, |
2099 | 291 | AArch64::STRSui, AArch64::STRDui }, |
2100 | 291 | { AArch64::STRBBroX, AArch64::STRHHroX, AArch64::STRWroX, AArch64::STRXroX, |
2101 | 291 | AArch64::STRSroX, AArch64::STRDroX }, |
2102 | 291 | { AArch64::STRBBroW, AArch64::STRHHroW, AArch64::STRWroW, AArch64::STRXroW, |
2103 | 291 | AArch64::STRSroW, AArch64::STRDroW } |
2104 | 291 | }; |
2105 | 291 | |
2106 | 291 | unsigned Opc; |
2107 | 291 | bool VTIsi1 = false; |
2108 | 291 | bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset()173 && Addr.getReg()133 && |
2109 | 291 | Addr.getOffsetReg()133 ; |
2110 | 291 | unsigned Idx = UseRegOffset ? 22 : UseScaled 289 ? 1285 : 04 ; |
2111 | 291 | if (Addr.getExtendType() == AArch64_AM::UXTW || |
2112 | 291 | Addr.getExtendType() == AArch64_AM::SXTW) |
2113 | 0 | Idx++; |
2114 | 291 | |
2115 | 291 | switch (VT.SimpleTy) { |
2116 | 291 | default: 0 llvm_unreachable0 ("Unexpected value type."); |
2117 | 291 | case MVT::i1: VTIsi1 = true; 8 LLVM_FALLTHROUGH8 ; |
2118 | 48 | case MVT::i8: Opc = OpcTable[Idx][0]; break; |
2119 | 19 | case MVT::i16: Opc = OpcTable[Idx][1]; break; |
2120 | 108 | case MVT::i32: Opc = OpcTable[Idx][2]; break; |
2121 | 96 | case MVT::i64: Opc = OpcTable[Idx][3]; break; |
2122 | 8 | case MVT::f32: Opc = OpcTable[Idx][4]; break2 ; |
2123 | 18 | case MVT::f64: Opc = OpcTable[Idx][5]; break; |
2124 | 291 | } |
2125 | 291 | |
2126 | 291 | // Storing an i1 requires special handling. |
2127 | 291 | if (VTIsi1 && SrcReg != AArch64::WZR8 ) { |
2128 | 7 | unsigned ANDReg = emitAnd_ri(MVT::i32, SrcReg, /*TODO:IsKill=*/false, 1); |
2129 | 7 | assert(ANDReg && "Unexpected AND instruction emission failure."); |
2130 | 7 | SrcReg = ANDReg; |
2131 | 7 | } |
2132 | 291 | // Create the base instruction, then add the operands. |
2133 | 291 | const MCInstrDesc &II = TII.get(Opc); |
2134 | 291 | SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs()); |
2135 | 291 | MachineInstrBuilder MIB = |
2136 | 291 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(SrcReg); |
2137 | 291 | addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, ScaleFactor, MMO); |
2138 | 291 | |
2139 | 291 | return true; |
2140 | 291 | } |
2141 | | |
2142 | 427 | bool AArch64FastISel::selectStore(const Instruction *I) { |
2143 | 427 | MVT VT; |
2144 | 427 | const Value *Op0 = I->getOperand(0); |
2145 | 427 | // Verify we have a legal type before going any further. Currently, we handle |
2146 | 427 | // simple types that will directly fit in a register (i32/f32/i64/f64) or |
2147 | 427 | // those that can be sign or zero-extended to a basic operation (i1/i8/i16). |
2148 | 427 | if (!isTypeSupported(Op0->getType(), VT, /*IsVectorAllowed=*/true)) |
2149 | 23 | return false; |
2150 | 404 | |
2151 | 404 | const Value *PtrV = I->getOperand(1); |
2152 | 404 | if (TLI.supportSwiftError()) { |
2153 | 404 | // Swifterror values can come from either a function parameter with |
2154 | 404 | // swifterror attribute or an alloca with swifterror attribute. |
2155 | 404 | if (const Argument *Arg = dyn_cast<Argument>(PtrV)) { |
2156 | 228 | if (Arg->hasSwiftErrorAttr()) |
2157 | 1 | return false; |
2158 | 403 | } |
2159 | 403 | |
2160 | 403 | if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) { |
2161 | 105 | if (Alloca->isSwiftError()) |
2162 | 1 | return false; |
2163 | 402 | } |
2164 | 403 | } |
2165 | 402 | |
2166 | 402 | // Get the value to be stored into a register. Use the zero register directly |
2167 | 402 | // when possible to avoid an unnecessary copy and a wasted register. |
2168 | 402 | unsigned SrcReg = 0; |
2169 | 402 | if (const auto *CI = dyn_cast<ConstantInt>(Op0)) { |
2170 | 49 | if (CI->isZero()) |
2171 | 34 | SrcReg = (VT == MVT::i64) ? AArch64::XZR2 : AArch64::WZR32 ; |
2172 | 353 | } else if (const auto *CF = dyn_cast<ConstantFP>(Op0)) { |
2173 | 2 | if (CF->isZero() && !CF->isNegative()) { |
2174 | 2 | VT = MVT::getIntegerVT(VT.getSizeInBits()); |
2175 | 2 | SrcReg = (VT == MVT::i64) ? AArch64::XZR1 : AArch64::WZR1 ; |
2176 | 2 | } |
2177 | 2 | } |
2178 | 402 | |
2179 | 402 | if (!SrcReg) |
2180 | 366 | SrcReg = getRegForValue(Op0); |
2181 | 402 | |
2182 | 402 | if (!SrcReg) |
2183 | 6 | return false; |
2184 | 396 | |
2185 | 396 | auto *SI = cast<StoreInst>(I); |
2186 | 396 | |
2187 | 396 | // Try to emit a STLR for seq_cst/release. |
2188 | 396 | if (SI->isAtomic()) { |
2189 | 24 | AtomicOrdering Ord = SI->getOrdering(); |
2190 | 24 | // The non-atomic instructions are sufficient for relaxed stores. |
2191 | 24 | if (isReleaseOrStronger(Ord)) { |
2192 | 16 | // The STLR addressing mode only supports a base reg; pass that directly. |
2193 | 16 | unsigned AddrReg = getRegForValue(PtrV); |
2194 | 16 | return emitStoreRelease(VT, SrcReg, AddrReg, |
2195 | 16 | createMachineMemOperandFor(I)); |
2196 | 16 | } |
2197 | 380 | } |
2198 | 380 | |
2199 | 380 | // See if we can handle this address. |
2200 | 380 | Address Addr; |
2201 | 380 | if (!computeAddress(PtrV, Addr, Op0->getType())) |
2202 | 8 | return false; |
2203 | 372 | |
2204 | 372 | if (!emitStore(VT, SrcReg, Addr, createMachineMemOperandFor(I))) |
2205 | 152 | return false; |
2206 | 220 | return true; |
2207 | 220 | } |
2208 | | |
2209 | 110 | static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred) { |
2210 | 110 | switch (Pred) { |
2211 | 110 | case CmpInst::FCMP_ONE: |
2212 | 4 | case CmpInst::FCMP_UEQ: |
2213 | 4 | default: |
2214 | 4 | // AL is our "false" for now. The other two need more compares. |
2215 | 4 | return AArch64CC::AL; |
2216 | 13 | case CmpInst::ICMP_EQ: |
2217 | 13 | case CmpInst::FCMP_OEQ: |
2218 | 13 | return AArch64CC::EQ; |
2219 | 13 | case CmpInst::ICMP_SGT: |
2220 | 9 | case CmpInst::FCMP_OGT: |
2221 | 9 | return AArch64CC::GT; |
2222 | 10 | case CmpInst::ICMP_SGE: |
2223 | 10 | case CmpInst::FCMP_OGE: |
2224 | 10 | return AArch64CC::GE; |
2225 | 10 | case CmpInst::ICMP_UGT: |
2226 | 6 | case CmpInst::FCMP_UGT: |
2227 | 6 | return AArch64CC::HI; |
2228 | 6 | case CmpInst::FCMP_OLT: |
2229 | 3 | return AArch64CC::MI; |
2230 | 6 | case CmpInst::ICMP_ULE: |
2231 | 6 | case CmpInst::FCMP_OLE: |
2232 | 6 | return AArch64CC::LS; |
2233 | 6 | case CmpInst::FCMP_ORD: |
2234 | 3 | return AArch64CC::VC; |
2235 | 6 | case CmpInst::FCMP_UNO: |
2236 | 3 | return AArch64CC::VS; |
2237 | 6 | case CmpInst::FCMP_UGE: |
2238 | 3 | return AArch64CC::PL; |
2239 | 10 | case CmpInst::ICMP_SLT: |
2240 | 10 | case CmpInst::FCMP_ULT: |
2241 | 10 | return AArch64CC::LT; |
2242 | 11 | case CmpInst::ICMP_SLE: |
2243 | 11 | case CmpInst::FCMP_ULE: |
2244 | 11 | return AArch64CC::LE; |
2245 | 19 | case CmpInst::FCMP_UNE: |
2246 | 19 | case CmpInst::ICMP_NE: |
2247 | 19 | return AArch64CC::NE; |
2248 | 19 | case CmpInst::ICMP_UGE: |
2249 | 4 | return AArch64CC::HS; |
2250 | 19 | case CmpInst::ICMP_ULT: |
2251 | 6 | return AArch64CC::LO; |
2252 | 110 | } |
2253 | 110 | } |
2254 | | |
2255 | | /// Try to emit a combined compare-and-branch instruction. |
2256 | 89 | bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) { |
2257 | 89 | // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z instructions |
2258 | 89 | // will not be produced, as they are conditional branch instructions that do |
2259 | 89 | // not set flags. |
2260 | 89 | if (FuncInfo.MF->getFunction().hasFnAttribute( |
2261 | 89 | Attribute::SpeculativeLoadHardening)) |
2262 | 4 | return false; |
2263 | 85 | |
2264 | 85 | assert(isa<CmpInst>(BI->getCondition()) && "Expected cmp instruction"); |
2265 | 85 | const CmpInst *CI = cast<CmpInst>(BI->getCondition()); |
2266 | 85 | CmpInst::Predicate Predicate = optimizeCmpPredicate(CI); |
2267 | 85 | |
2268 | 85 | const Value *LHS = CI->getOperand(0); |
2269 | 85 | const Value *RHS = CI->getOperand(1); |
2270 | 85 | |
2271 | 85 | MVT VT; |
2272 | 85 | if (!isTypeSupported(LHS->getType(), VT)) |
2273 | 0 | return false; |
2274 | 85 | |
2275 | 85 | unsigned BW = VT.getSizeInBits(); |
2276 | 85 | if (BW > 64) |
2277 | 0 | return false; |
2278 | 85 | |
2279 | 85 | MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)]; |
2280 | 85 | MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)]; |
2281 | 85 | |
2282 | 85 | // Try to take advantage of fallthrough opportunities. |
2283 | 85 | if (FuncInfo.MBB->isLayoutSuccessor(TBB)) { |
2284 | 46 | std::swap(TBB, FBB); |
2285 | 46 | Predicate = CmpInst::getInversePredicate(Predicate); |
2286 | 46 | } |
2287 | 85 | |
2288 | 85 | int TestBit = -1; |
2289 | 85 | bool IsCmpNE; |
2290 | 85 | switch (Predicate) { |
2291 | 85 | default: |
2292 | 21 | return false; |
2293 | 85 | case CmpInst::ICMP_EQ: |
2294 | 39 | case CmpInst::ICMP_NE: |
2295 | 39 | if (isa<Constant>(LHS) && cast<Constant>(LHS)->isNullValue()0 ) |
2296 | 0 | std::swap(LHS, RHS); |
2297 | 39 | |
2298 | 39 | if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue()37 ) |
2299 | 2 | return false; |
2300 | 37 | |
2301 | 37 | if (const auto *AI = dyn_cast<BinaryOperator>(LHS)) |
2302 | 14 | if (AI->getOpcode() == Instruction::And && isValueAvailable(AI)) { |
2303 | 13 | const Value *AndLHS = AI->getOperand(0); |
2304 | 13 | const Value *AndRHS = AI->getOperand(1); |
2305 | 13 | |
2306 | 13 | if (const auto *C = dyn_cast<ConstantInt>(AndLHS)) |
2307 | 0 | if (C->getValue().isPowerOf2()) |
2308 | 0 | std::swap(AndLHS, AndRHS); |
2309 | 13 | |
2310 | 13 | if (const auto *C = dyn_cast<ConstantInt>(AndRHS)) |
2311 | 13 | if (C->getValue().isPowerOf2()) { |
2312 | 13 | TestBit = C->getValue().logBase2(); |
2313 | 13 | LHS = AndLHS; |
2314 | 13 | } |
2315 | 13 | } |
2316 | 37 | |
2317 | 37 | if (VT == MVT::i1) |
2318 | 1 | TestBit = 0; |
2319 | 37 | |
2320 | 37 | IsCmpNE = Predicate == CmpInst::ICMP_NE; |
2321 | 37 | break; |
2322 | 37 | case CmpInst::ICMP_SLT: |
2323 | 14 | case CmpInst::ICMP_SGE: |
2324 | 14 | if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue()7 ) |
2325 | 8 | return false; |
2326 | 6 | |
2327 | 6 | TestBit = BW - 1; |
2328 | 6 | IsCmpNE = Predicate == CmpInst::ICMP_SLT; |
2329 | 6 | break; |
2330 | 11 | case CmpInst::ICMP_SGT: |
2331 | 11 | case CmpInst::ICMP_SLE: |
2332 | 11 | if (!isa<ConstantInt>(RHS)) |
2333 | 3 | return false; |
2334 | 8 | |
2335 | 8 | if (cast<ConstantInt>(RHS)->getValue() != APInt(BW, -1, true)) |
2336 | 0 | return false; |
2337 | 8 | |
2338 | 8 | TestBit = BW - 1; |
2339 | 8 | IsCmpNE = Predicate == CmpInst::ICMP_SLE; |
2340 | 8 | break; |
2341 | 51 | } // end switch |
2342 | 51 | |
2343 | 51 | static const unsigned OpcTable[2][2][2] = { |
2344 | 51 | { {AArch64::CBZW, AArch64::CBZX }, |
2345 | 51 | {AArch64::CBNZW, AArch64::CBNZX} }, |
2346 | 51 | { {AArch64::TBZW, AArch64::TBZX }, |
2347 | 51 | {AArch64::TBNZW, AArch64::TBNZX} } |
2348 | 51 | }; |
2349 | 51 | |
2350 | 51 | bool IsBitTest = TestBit != -1; |
2351 | 51 | bool Is64Bit = BW == 64; |
2352 | 51 | if (TestBit < 32 && TestBit >= 046 ) |
2353 | 23 | Is64Bit = false; |
2354 | 51 | |
2355 | 51 | unsigned Opc = OpcTable[IsBitTest][IsCmpNE][Is64Bit]; |
2356 | 51 | const MCInstrDesc &II = TII.get(Opc); |
2357 | 51 | |
2358 | 51 | unsigned SrcReg = getRegForValue(LHS); |
2359 | 51 | if (!SrcReg) |
2360 | 0 | return false; |
2361 | 51 | bool SrcIsKill = hasTrivialKill(LHS); |
2362 | 51 | |
2363 | 51 | if (BW == 64 && !Is64Bit13 ) |
2364 | 2 | SrcReg = fastEmitInst_extractsubreg(MVT::i32, SrcReg, SrcIsKill, |
2365 | 2 | AArch64::sub_32); |
2366 | 51 | |
2367 | 51 | if ((BW < 32) && !IsBitTest15 ) |
2368 | 2 | SrcReg = emitIntExt(VT, SrcReg, MVT::i32, /*isZExt=*/true); |
2369 | 51 | |
2370 | 51 | // Emit the combined compare and branch instruction. |
2371 | 51 | SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs()); |
2372 | 51 | MachineInstrBuilder MIB = |
2373 | 51 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc)) |
2374 | 51 | .addReg(SrcReg, getKillRegState(SrcIsKill)); |
2375 | 51 | if (IsBitTest) |
2376 | 28 | MIB.addImm(TestBit); |
2377 | 51 | MIB.addMBB(TBB); |
2378 | 51 | |
2379 | 51 | finishCondBranch(BI->getParent(), TBB, FBB); |
2380 | 51 | return true; |
2381 | 51 | } |
2382 | | |
2383 | 287 | bool AArch64FastISel::selectBranch(const Instruction *I) { |
2384 | 287 | const BranchInst *BI = cast<BranchInst>(I); |
2385 | 287 | if (BI->isUnconditional()) { |
2386 | 164 | MachineBasicBlock *MSucc = FuncInfo.MBBMap[BI->getSuccessor(0)]; |
2387 | 164 | fastEmitBranch(MSucc, BI->getDebugLoc()); |
2388 | 164 | return true; |
2389 | 164 | } |
2390 | 123 | |
2391 | 123 | MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)]; |
2392 | 123 | MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)]; |
2393 | 123 | |
2394 | 123 | if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) { |
2395 | 90 | if (CI->hasOneUse() && isValueAvailable(CI)89 ) { |
2396 | 89 | // Try to optimize or fold the cmp. |
2397 | 89 | CmpInst::Predicate Predicate = optimizeCmpPredicate(CI); |
2398 | 89 | switch (Predicate) { |
2399 | 89 | default: |
2400 | 89 | break; |
2401 | 89 | case CmpInst::FCMP_FALSE: |
2402 | 0 | fastEmitBranch(FBB, DbgLoc); |
2403 | 0 | return true; |
2404 | 89 | case CmpInst::FCMP_TRUE: |
2405 | 0 | fastEmitBranch(TBB, DbgLoc); |
2406 | 0 | return true; |
2407 | 89 | } |
2408 | 89 | |
2409 | 89 | // Try to emit a combined compare-and-branch first. |
2410 | 89 | if (emitCompareAndBranch(BI)) |
2411 | 51 | return true; |
2412 | 38 | |
2413 | 38 | // Try to take advantage of fallthrough opportunities. |
2414 | 38 | if (FuncInfo.MBB->isLayoutSuccessor(TBB)) { |
2415 | 11 | std::swap(TBB, FBB); |
2416 | 11 | Predicate = CmpInst::getInversePredicate(Predicate); |
2417 | 11 | } |
2418 | 38 | |
2419 | 38 | // Emit the cmp. |
2420 | 38 | if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned())) |
2421 | 0 | return false; |
2422 | 38 | |
2423 | 38 | // FCMP_UEQ and FCMP_ONE cannot be checked with a single branch |
2424 | 38 | // instruction. |
2425 | 38 | AArch64CC::CondCode CC = getCompareCC(Predicate); |
2426 | 38 | AArch64CC::CondCode ExtraCC = AArch64CC::AL; |
2427 | 38 | switch (Predicate) { |
2428 | 38 | default: |
2429 | 36 | break; |
2430 | 38 | case CmpInst::FCMP_UEQ: |
2431 | 1 | ExtraCC = AArch64CC::EQ; |
2432 | 1 | CC = AArch64CC::VS; |
2433 | 1 | break; |
2434 | 38 | case CmpInst::FCMP_ONE: |
2435 | 1 | ExtraCC = AArch64CC::MI; |
2436 | 1 | CC = AArch64CC::GT; |
2437 | 1 | break; |
2438 | 38 | } |
2439 | 38 | assert((CC != AArch64CC::AL) && "Unexpected condition code."); |
2440 | 38 | |
2441 | 38 | // Emit the extra branch for FCMP_UEQ and FCMP_ONE. |
2442 | 38 | if (ExtraCC != AArch64CC::AL) { |
2443 | 2 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc)) |
2444 | 2 | .addImm(ExtraCC) |
2445 | 2 | .addMBB(TBB); |
2446 | 2 | } |
2447 | 38 | |
2448 | 38 | // Emit the branch. |
2449 | 38 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc)) |
2450 | 38 | .addImm(CC) |
2451 | 38 | .addMBB(TBB); |
2452 | 38 | |
2453 | 38 | finishCondBranch(BI->getParent(), TBB, FBB); |
2454 | 38 | return true; |
2455 | 38 | } |
2456 | 33 | } else if (const auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) { |
2457 | 2 | uint64_t Imm = CI->getZExtValue(); |
2458 | 2 | MachineBasicBlock *Target = (Imm == 0) ? FBB1 : TBB1 ; |
2459 | 2 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::B)) |
2460 | 2 | .addMBB(Target); |
2461 | 2 | |
2462 | 2 | // Obtain the branch probability and add the target to the successor list. |
2463 | 2 | if (FuncInfo.BPI) { |
2464 | 0 | auto BranchProbability = FuncInfo.BPI->getEdgeProbability( |
2465 | 0 | BI->getParent(), Target->getBasicBlock()); |
2466 | 0 | FuncInfo.MBB->addSuccessor(Target, BranchProbability); |
2467 | 0 | } else |
2468 | 2 | FuncInfo.MBB->addSuccessorWithoutProb(Target); |
2469 | 2 | return true; |
2470 | 31 | } else { |
2471 | 31 | AArch64CC::CondCode CC = AArch64CC::NE; |
2472 | 31 | if (foldXALUIntrinsic(CC, I, BI->getCondition())) { |
2473 | 14 | // Fake request the condition, otherwise the intrinsic might be completely |
2474 | 14 | // optimized away. |
2475 | 14 | unsigned CondReg = getRegForValue(BI->getCondition()); |
2476 | 14 | if (!CondReg) |
2477 | 0 | return false; |
2478 | 14 | |
2479 | 14 | // Emit the branch. |
2480 | 14 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc)) |
2481 | 14 | .addImm(CC) |
2482 | 14 | .addMBB(TBB); |
2483 | 14 | |
2484 | 14 | finishCondBranch(BI->getParent(), TBB, FBB); |
2485 | 14 | return true; |
2486 | 14 | } |
2487 | 31 | } |
2488 | 18 | |
2489 | 18 | unsigned CondReg = getRegForValue(BI->getCondition()); |
2490 | 18 | if (CondReg == 0) |
2491 | 0 | return false; |
2492 | 18 | bool CondRegIsKill = hasTrivialKill(BI->getCondition()); |
2493 | 18 | |
2494 | 18 | // i1 conditions come as i32 values, test the lowest bit with tb(n)z. |
2495 | 18 | unsigned Opcode = AArch64::TBNZW; |
2496 | 18 | if (FuncInfo.MBB->isLayoutSuccessor(TBB)) { |
2497 | 14 | std::swap(TBB, FBB); |
2498 | 14 | Opcode = AArch64::TBZW; |
2499 | 14 | } |
2500 | 18 | |
2501 | 18 | const MCInstrDesc &II = TII.get(Opcode); |
2502 | 18 | unsigned ConstrainedCondReg |
2503 | 18 | = constrainOperandRegClass(II, CondReg, II.getNumDefs()); |
2504 | 18 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) |
2505 | 18 | .addReg(ConstrainedCondReg, getKillRegState(CondRegIsKill)) |
2506 | 18 | .addImm(0) |
2507 | 18 | .addMBB(TBB); |
2508 | 18 | |
2509 | 18 | finishCondBranch(BI->getParent(), TBB, FBB); |
2510 | 18 | return true; |
2511 | 18 | } |
2512 | | |
2513 | 1 | bool AArch64FastISel::selectIndirectBr(const Instruction *I) { |
2514 | 1 | const IndirectBrInst *BI = cast<IndirectBrInst>(I); |
2515 | 1 | unsigned AddrReg = getRegForValue(BI->getOperand(0)); |
2516 | 1 | if (AddrReg == 0) |
2517 | 0 | return false; |
2518 | 1 | |
2519 | 1 | // Emit the indirect branch. |
2520 | 1 | const MCInstrDesc &II = TII.get(AArch64::BR); |
2521 | 1 | AddrReg = constrainOperandRegClass(II, AddrReg, II.getNumDefs()); |
2522 | 1 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(AddrReg); |
2523 | 1 | |
2524 | 1 | // Make sure the CFG is up-to-date. |
2525 | 1 | for (auto *Succ : BI->successors()) |
2526 | 2 | FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[Succ]); |
2527 | 1 | |
2528 | 1 | return true; |
2529 | 1 | } |
2530 | | |
2531 | 57 | bool AArch64FastISel::selectCmp(const Instruction *I) { |
2532 | 57 | const CmpInst *CI = cast<CmpInst>(I); |
2533 | 57 | |
2534 | 57 | // Vectors of i1 are weird: bail out. |
2535 | 57 | if (CI->getType()->isVectorTy()) |
2536 | 6 | return false; |
2537 | 51 | |
2538 | 51 | // Try to optimize or fold the cmp. |
2539 | 51 | CmpInst::Predicate Predicate = optimizeCmpPredicate(CI); |
2540 | 51 | unsigned ResultReg = 0; |
2541 | 51 | switch (Predicate) { |
2542 | 51 | default: |
2543 | 49 | break; |
2544 | 51 | case CmpInst::FCMP_FALSE: |
2545 | 1 | ResultReg = createResultReg(&AArch64::GPR32RegClass); |
2546 | 1 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, |
2547 | 1 | TII.get(TargetOpcode::COPY), ResultReg) |
2548 | 1 | .addReg(AArch64::WZR, getKillRegState(true)); |
2549 | 1 | break; |
2550 | 51 | case CmpInst::FCMP_TRUE: |
2551 | 1 | ResultReg = fastEmit_i(MVT::i32, MVT::i32, ISD::Constant, 1); |
2552 | 1 | break; |
2553 | 51 | } |
2554 | 51 | |
2555 | 51 | if (ResultReg) { |
2556 | 2 | updateValueMap(I, ResultReg); |
2557 | 2 | return true; |
2558 | 2 | } |
2559 | 49 | |
2560 | 49 | // Emit the cmp. |
2561 | 49 | if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned())) |
2562 | 0 | return false; |
2563 | 49 | |
2564 | 49 | ResultReg = createResultReg(&AArch64::GPR32RegClass); |
2565 | 49 | |
2566 | 49 | // FCMP_UEQ and FCMP_ONE cannot be checked with a single instruction. These |
2567 | 49 | // condition codes are inverted, because they are used by CSINC. |
2568 | 49 | static unsigned CondCodeTable[2][2] = { |
2569 | 49 | { AArch64CC::NE, AArch64CC::VC }, |
2570 | 49 | { AArch64CC::PL, AArch64CC::LE } |
2571 | 49 | }; |
2572 | 49 | unsigned *CondCodes = nullptr; |
2573 | 49 | switch (Predicate) { |
2574 | 49 | default: |
2575 | 47 | break; |
2576 | 49 | case CmpInst::FCMP_UEQ: |
2577 | 1 | CondCodes = &CondCodeTable[0][0]; |
2578 | 1 | break; |
2579 | 49 | case CmpInst::FCMP_ONE: |
2580 | 1 | CondCodes = &CondCodeTable[1][0]; |
2581 | 1 | break; |
2582 | 49 | } |
2583 | 49 | |
2584 | 49 | if (CondCodes) { |
2585 | 2 | unsigned TmpReg1 = createResultReg(&AArch64::GPR32RegClass); |
2586 | 2 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr), |
2587 | 2 | TmpReg1) |
2588 | 2 | .addReg(AArch64::WZR, getKillRegState(true)) |
2589 | 2 | .addReg(AArch64::WZR, getKillRegState(true)) |
2590 | 2 | .addImm(CondCodes[0]); |
2591 | 2 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr), |
2592 | 2 | ResultReg) |
2593 | 2 | .addReg(TmpReg1, getKillRegState(true)) |
2594 | 2 | .addReg(AArch64::WZR, getKillRegState(true)) |
2595 | 2 | .addImm(CondCodes[1]); |
2596 | 2 | |
2597 | 2 | updateValueMap(I, ResultReg); |
2598 | 2 | return true; |
2599 | 2 | } |
2600 | 47 | |
2601 | 47 | // Now set a register based on the comparison. |
2602 | 47 | AArch64CC::CondCode CC = getCompareCC(Predicate); |
2603 | 47 | assert((CC != AArch64CC::AL) && "Unexpected condition code."); |
2604 | 47 | AArch64CC::CondCode invertedCC = getInvertedCondCode(CC); |
2605 | 47 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr), |
2606 | 47 | ResultReg) |
2607 | 47 | .addReg(AArch64::WZR, getKillRegState(true)) |
2608 | 47 | .addReg(AArch64::WZR, getKillRegState(true)) |
2609 | 47 | .addImm(invertedCC); |
2610 | 47 | |
2611 | 47 | updateValueMap(I, ResultReg); |
2612 | 47 | return true; |
2613 | 47 | } |
2614 | | |
2615 | | /// Optimize selects of i1 if one of the operands has a 'true' or 'false' |
2616 | | /// value. |
2617 | 53 | bool AArch64FastISel::optimizeSelect(const SelectInst *SI) { |
2618 | 53 | if (!SI->getType()->isIntegerTy(1)) |
2619 | 47 | return false; |
2620 | 6 | |
2621 | 6 | const Value *Src1Val, *Src2Val; |
2622 | 6 | unsigned Opc = 0; |
2623 | 6 | bool NeedExtraOp = false; |
2624 | 6 | if (auto *CI = dyn_cast<ConstantInt>(SI->getTrueValue())) { |
2625 | 3 | if (CI->isOne()) { |
2626 | 2 | Src1Val = SI->getCondition(); |
2627 | 2 | Src2Val = SI->getFalseValue(); |
2628 | 2 | Opc = AArch64::ORRWrr; |
2629 | 2 | } else { |
2630 | 1 | assert(CI->isZero()); |
2631 | 1 | Src1Val = SI->getFalseValue(); |
2632 | 1 | Src2Val = SI->getCondition(); |
2633 | 1 | Opc = AArch64::BICWrr; |
2634 | 1 | } |
2635 | 3 | } else if (auto *CI = dyn_cast<ConstantInt>(SI->getFalseValue())) { |
2636 | 2 | if (CI->isOne()) { |
2637 | 1 | Src1Val = SI->getCondition(); |
2638 | 1 | Src2Val = SI->getTrueValue(); |
2639 | 1 | Opc = AArch64::ORRWrr; |
2640 | 1 | NeedExtraOp = true; |
2641 | 1 | } else { |
2642 | 1 | assert(CI->isZero()); |
2643 | 1 | Src1Val = SI->getCondition(); |
2644 | 1 | Src2Val = SI->getTrueValue(); |
2645 | 1 | Opc = AArch64::ANDWrr; |
2646 | 1 | } |
2647 | 2 | } |
2648 | 6 | |
2649 | 6 | if (!Opc) |
2650 | 1 | return false; |
2651 | 5 | |
2652 | 5 | unsigned Src1Reg = getRegForValue(Src1Val); |
2653 | 5 | if (!Src1Reg) |
2654 | 0 | return false; |
2655 | 5 | bool Src1IsKill = hasTrivialKill(Src1Val); |
2656 | 5 | |
2657 | 5 | unsigned Src2Reg = getRegForValue(Src2Val); |
2658 | 5 | if (!Src2Reg) |
2659 | 0 | return false; |
2660 | 5 | bool Src2IsKill = hasTrivialKill(Src2Val); |
2661 | 5 | |
2662 | 5 | if (NeedExtraOp) { |
2663 | 1 | Src1Reg = emitLogicalOp_ri(ISD::XOR, MVT::i32, Src1Reg, Src1IsKill, 1); |
2664 | 1 | Src1IsKill = true; |
2665 | 1 | } |
2666 | 5 | unsigned ResultReg = fastEmitInst_rr(Opc, &AArch64::GPR32RegClass, Src1Reg, |
2667 | 5 | Src1IsKill, Src2Reg, Src2IsKill); |
2668 | 5 | updateValueMap(SI, ResultReg); |
2669 | 5 | return true; |
2670 | 5 | } |
2671 | | |
2672 | 53 | bool AArch64FastISel::selectSelect(const Instruction *I) { |
2673 | 53 | assert(isa<SelectInst>(I) && "Expected a select instruction."); |
2674 | 53 | MVT VT; |
2675 | 53 | if (!isTypeSupported(I->getType(), VT)) |
2676 | 0 | return false; |
2677 | 53 | |
2678 | 53 | unsigned Opc; |
2679 | 53 | const TargetRegisterClass *RC; |
2680 | 53 | switch (VT.SimpleTy) { |
2681 | 53 | default: |
2682 | 0 | return false; |
2683 | 53 | case MVT::i1: |
2684 | 18 | case MVT::i8: |
2685 | 18 | case MVT::i16: |
2686 | 18 | case MVT::i32: |
2687 | 18 | Opc = AArch64::CSELWr; |
2688 | 18 | RC = &AArch64::GPR32RegClass; |
2689 | 18 | break; |
2690 | 18 | case MVT::i64: |
2691 | 8 | Opc = AArch64::CSELXr; |
2692 | 8 | RC = &AArch64::GPR64RegClass; |
2693 | 8 | break; |
2694 | 26 | case MVT::f32: |
2695 | 26 | Opc = AArch64::FCSELSrrr; |
2696 | 26 | RC = &AArch64::FPR32RegClass; |
2697 | 26 | break; |
2698 | 18 | case MVT::f64: |
2699 | 1 | Opc = AArch64::FCSELDrrr; |
2700 | 1 | RC = &AArch64::FPR64RegClass; |
2701 | 1 | break; |
2702 | 53 | } |
2703 | 53 | |
2704 | 53 | const SelectInst *SI = cast<SelectInst>(I); |
2705 | 53 | const Value *Cond = SI->getCondition(); |
2706 | 53 | AArch64CC::CondCode CC = AArch64CC::NE; |
2707 | 53 | AArch64CC::CondCode ExtraCC = AArch64CC::AL; |
2708 | 53 | |
2709 | 53 | if (optimizeSelect(SI)) |
2710 | 5 | return true; |
2711 | 48 | |
2712 | 48 | // Try to pickup the flags, so we don't have to emit another compare. |
2713 | 48 | if (foldXALUIntrinsic(CC, I, Cond)) { |
2714 | 12 | // Fake request the condition to force emission of the XALU intrinsic. |
2715 | 12 | unsigned CondReg = getRegForValue(Cond); |
2716 | 12 | if (!CondReg) |
2717 | 0 | return false; |
2718 | 36 | } else if (isa<CmpInst>(Cond) && cast<CmpInst>(Cond)->hasOneUse()28 && |
2719 | 36 | isValueAvailable(Cond)27 ) { |
2720 | 27 | const auto *Cmp = cast<CmpInst>(Cond); |
2721 | 27 | // Try to optimize or fold the cmp. |
2722 | 27 | CmpInst::Predicate Predicate = optimizeCmpPredicate(Cmp); |
2723 | 27 | const Value *FoldSelect = nullptr; |
2724 | 27 | switch (Predicate) { |
2725 | 27 | default: |
2726 | 25 | break; |
2727 | 27 | case CmpInst::FCMP_FALSE: |
2728 | 1 | FoldSelect = SI->getFalseValue(); |
2729 | 1 | break; |
2730 | 27 | case CmpInst::FCMP_TRUE: |
2731 | 1 | FoldSelect = SI->getTrueValue(); |
2732 | 1 | break; |
2733 | 27 | } |
2734 | 27 | |
2735 | 27 | if (FoldSelect) { |
2736 | 2 | unsigned SrcReg = getRegForValue(FoldSelect); |
2737 | 2 | if (!SrcReg) |
2738 | 0 | return false; |
2739 | 2 | unsigned UseReg = lookUpRegForValue(SI); |
2740 | 2 | if (UseReg) |
2741 | 2 | MRI.clearKillFlags(UseReg); |
2742 | 2 | |
2743 | 2 | updateValueMap(I, SrcReg); |
2744 | 2 | return true; |
2745 | 2 | } |
2746 | 25 | |
2747 | 25 | // Emit the cmp. |
2748 | 25 | if (!emitCmp(Cmp->getOperand(0), Cmp->getOperand(1), Cmp->isUnsigned())) |
2749 | 0 | return false; |
2750 | 25 | |
2751 | 25 | // FCMP_UEQ and FCMP_ONE cannot be checked with a single select instruction. |
2752 | 25 | CC = getCompareCC(Predicate); |
2753 | 25 | switch (Predicate) { |
2754 | 25 | default: |
2755 | 23 | break; |
2756 | 25 | case CmpInst::FCMP_UEQ: |
2757 | 1 | ExtraCC = AArch64CC::EQ; |
2758 | 1 | CC = AArch64CC::VS; |
2759 | 1 | break; |
2760 | 25 | case CmpInst::FCMP_ONE: |
2761 | 1 | ExtraCC = AArch64CC::MI; |
2762 | 1 | CC = AArch64CC::GT; |
2763 | 1 | break; |
2764 | 25 | } |
2765 | 25 | assert((CC != AArch64CC::AL) && "Unexpected condition code."); |
2766 | 25 | } else { |
2767 | 9 | unsigned CondReg = getRegForValue(Cond); |
2768 | 9 | if (!CondReg) |
2769 | 0 | return false; |
2770 | 9 | bool CondIsKill = hasTrivialKill(Cond); |
2771 | 9 | |
2772 | 9 | const MCInstrDesc &II = TII.get(AArch64::ANDSWri); |
2773 | 9 | CondReg = constrainOperandRegClass(II, CondReg, 1); |
2774 | 9 | |
2775 | 9 | // Emit a TST instruction (ANDS wzr, reg, #imm). |
2776 | 9 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, |
2777 | 9 | AArch64::WZR) |
2778 | 9 | .addReg(CondReg, getKillRegState(CondIsKill)) |
2779 | 9 | .addImm(AArch64_AM::encodeLogicalImmediate(1, 32)); |
2780 | 9 | } |
2781 | 48 | |
2782 | 48 | unsigned Src1Reg = getRegForValue(SI->getTrueValue()); |
2783 | 46 | bool Src1IsKill = hasTrivialKill(SI->getTrueValue()); |
2784 | 46 | |
2785 | 46 | unsigned Src2Reg = getRegForValue(SI->getFalseValue()); |
2786 | 46 | bool Src2IsKill = hasTrivialKill(SI->getFalseValue()); |
2787 | 46 | |
2788 | 46 | if (!Src1Reg || !Src2Reg) |
2789 | 0 | return false; |
2790 | 46 | |
2791 | 46 | if (ExtraCC != AArch64CC::AL) { |
2792 | 2 | Src2Reg = fastEmitInst_rri(Opc, RC, Src1Reg, Src1IsKill, Src2Reg, |
2793 | 2 | Src2IsKill, ExtraCC); |
2794 | 2 | Src2IsKill = true; |
2795 | 2 | } |
2796 | 46 | unsigned ResultReg = fastEmitInst_rri(Opc, RC, Src1Reg, Src1IsKill, Src2Reg, |
2797 | 46 | Src2IsKill, CC); |
2798 | 46 | updateValueMap(I, ResultReg); |
2799 | 46 | return true; |
2800 | 46 | } |
2801 | | |
2802 | 5 | bool AArch64FastISel::selectFPExt(const Instruction *I) { |
2803 | 5 | Value *V = I->getOperand(0); |
2804 | 5 | if (!I->getType()->isDoubleTy() || !V->getType()->isFloatTy()3 ) |
2805 | 2 | return false; |
2806 | 3 | |
2807 | 3 | unsigned Op = getRegForValue(V); |
2808 | 3 | if (Op == 0) |
2809 | 0 | return false; |
2810 | 3 | |
2811 | 3 | unsigned ResultReg = createResultReg(&AArch64::FPR64RegClass); |
2812 | 3 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTDSr), |
2813 | 3 | ResultReg).addReg(Op); |
2814 | 3 | updateValueMap(I, ResultReg); |
2815 | 3 | return true; |
2816 | 3 | } |
2817 | | |
2818 | 2 | bool AArch64FastISel::selectFPTrunc(const Instruction *I) { |
2819 | 2 | Value *V = I->getOperand(0); |
2820 | 2 | if (!I->getType()->isFloatTy() || !V->getType()->isDoubleTy()1 ) |
2821 | 1 | return false; |
2822 | 1 | |
2823 | 1 | unsigned Op = getRegForValue(V); |
2824 | 1 | if (Op == 0) |
2825 | 0 | return false; |
2826 | 1 | |
2827 | 1 | unsigned ResultReg = createResultReg(&AArch64::FPR32RegClass); |
2828 | 1 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTSDr), |
2829 | 1 | ResultReg).addReg(Op); |
2830 | 1 | updateValueMap(I, ResultReg); |
2831 | 1 | return true; |
2832 | 1 | } |
2833 | | |
2834 | | // FPToUI and FPToSI |
2835 | 7 | bool AArch64FastISel::selectFPToInt(const Instruction *I, bool Signed) { |
2836 | 7 | MVT DestVT; |
2837 | 7 | if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector()) |
2838 | 1 | return false; |
2839 | 6 | |
2840 | 6 | unsigned SrcReg = getRegForValue(I->getOperand(0)); |
2841 | 6 | if (SrcReg == 0) |
2842 | 0 | return false; |
2843 | 6 | |
2844 | 6 | EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true); |
2845 | 6 | if (SrcVT == MVT::f128 || SrcVT == MVT::f164 ) |
2846 | 4 | return false; |
2847 | 2 | |
2848 | 2 | unsigned Opc; |
2849 | 2 | if (SrcVT == MVT::f64) { |
2850 | 1 | if (Signed) |
2851 | 0 | Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWDr : AArch64::FCVTZSUXDr; |
2852 | 1 | else |
2853 | 1 | Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWDr : AArch64::FCVTZUUXDr0 ; |
2854 | 1 | } else { |
2855 | 1 | if (Signed) |
2856 | 0 | Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWSr : AArch64::FCVTZSUXSr; |
2857 | 1 | else |
2858 | 1 | Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWSr : AArch64::FCVTZUUXSr0 ; |
2859 | 1 | } |
2860 | 2 | unsigned ResultReg = createResultReg( |
2861 | 2 | DestVT == MVT::i32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass0 ); |
2862 | 2 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) |
2863 | 2 | .addReg(SrcReg); |
2864 | 2 | updateValueMap(I, ResultReg); |
2865 | 2 | return true; |
2866 | 2 | } |
2867 | | |
2868 | 22 | bool AArch64FastISel::selectIntToFP(const Instruction *I, bool Signed) { |
2869 | 22 | MVT DestVT; |
2870 | 22 | if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector()) |
2871 | 2 | return false; |
2872 | 20 | // Let regular ISEL handle FP16 |
2873 | 20 | if (DestVT == MVT::f16) |
2874 | 10 | return false; |
2875 | 10 | |
2876 | 10 | assert((DestVT == MVT::f32 || DestVT == MVT::f64) && |
2877 | 10 | "Unexpected value type."); |
2878 | 10 | |
2879 | 10 | unsigned SrcReg = getRegForValue(I->getOperand(0)); |
2880 | 10 | if (!SrcReg) |
2881 | 0 | return false; |
2882 | 10 | bool SrcIsKill = hasTrivialKill(I->getOperand(0)); |
2883 | 10 | |
2884 | 10 | EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true); |
2885 | 10 | |
2886 | 10 | // Handle sign-extension. |
2887 | 10 | if (SrcVT == MVT::i16 || SrcVT == MVT::i88 || SrcVT == MVT::i16 ) { |
2888 | 6 | SrcReg = |
2889 | 6 | emitIntExt(SrcVT.getSimpleVT(), SrcReg, MVT::i32, /*isZExt*/ !Signed); |
2890 | 6 | if (!SrcReg) |
2891 | 0 | return false; |
2892 | 6 | SrcIsKill = true; |
2893 | 6 | } |
2894 | 10 | |
2895 | 10 | unsigned Opc; |
2896 | 10 | if (SrcVT == MVT::i64) { |
2897 | 2 | if (Signed) |
2898 | 0 | Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUXSri : AArch64::SCVTFUXDri; |
2899 | 2 | else |
2900 | 2 | Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUXSri1 : AArch64::UCVTFUXDri1 ; |
2901 | 8 | } else { |
2902 | 8 | if (Signed) |
2903 | 3 | Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUWSri : AArch64::SCVTFUWDri0 ; |
2904 | 5 | else |
2905 | 5 | Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUWSri4 : AArch64::UCVTFUWDri1 ; |
2906 | 8 | } |
2907 | 10 | |
2908 | 10 | unsigned ResultReg = fastEmitInst_r(Opc, TLI.getRegClassFor(DestVT), SrcReg, |
2909 | 10 | SrcIsKill); |
2910 | 10 | updateValueMap(I, ResultReg); |
2911 | 10 | return true; |
2912 | 10 | } |
2913 | | |
2914 | 1.26k | bool AArch64FastISel::fastLowerArguments() { |
2915 | 1.26k | if (!FuncInfo.CanLowerReturn) |
2916 | 0 | return false; |
2917 | 1.26k | |
2918 | 1.26k | const Function *F = FuncInfo.Fn; |
2919 | 1.26k | if (F->isVarArg()) |
2920 | 6 | return false; |
2921 | 1.25k | |
2922 | 1.25k | CallingConv::ID CC = F->getCallingConv(); |
2923 | 1.25k | if (CC != CallingConv::C && CC != CallingConv::Swift24 ) |
2924 | 8 | return false; |
2925 | 1.24k | |
2926 | 1.24k | if (Subtarget->hasCustomCallingConv()) |
2927 | 8 | return false; |
2928 | 1.23k | |
2929 | 1.23k | // Only handle simple cases of up to 8 GPR and FPR each. |
2930 | 1.23k | unsigned GPRCnt = 0; |
2931 | 1.23k | unsigned FPRCnt = 0; |
2932 | 2.05k | for (auto const &Arg : F->args()) { |
2933 | 2.05k | if (Arg.hasAttribute(Attribute::ByVal) || |
2934 | 2.05k | Arg.hasAttribute(Attribute::InReg) || |
2935 | 2.05k | Arg.hasAttribute(Attribute::StructRet) || |
2936 | 2.05k | Arg.hasAttribute(Attribute::SwiftSelf)2.05k || |
2937 | 2.05k | Arg.hasAttribute(Attribute::SwiftError)2.04k || |
2938 | 2.05k | Arg.hasAttribute(Attribute::Nest)2.04k ) |
2939 | 16 | return false; |
2940 | 2.04k | |
2941 | 2.04k | Type *ArgTy = Arg.getType(); |
2942 | 2.04k | if (ArgTy->isStructTy() || ArgTy->isArrayTy()) |
2943 | 3 | return false; |
2944 | 2.03k | |
2945 | 2.03k | EVT ArgVT = TLI.getValueType(DL, ArgTy); |
2946 | 2.03k | if (!ArgVT.isSimple()) |
2947 | 0 | return false; |
2948 | 2.03k | |
2949 | 2.03k | MVT VT = ArgVT.getSimpleVT().SimpleTy; |
2950 | 2.03k | if (VT.isFloatingPoint() && !Subtarget->hasFPARMv8()311 ) |
2951 | 0 | return false; |
2952 | 2.03k | |
2953 | 2.03k | if (VT.isVector() && |
2954 | 2.03k | (117 !Subtarget->hasNEON()117 || !Subtarget->isLittleEndian()117 )) |
2955 | 71 | return false; |
2956 | 1.96k | |
2957 | 1.96k | if (VT >= MVT::i1 && VT <= MVT::i64) |
2958 | 1.65k | ++GPRCnt; |
2959 | 315 | else if ((VT >= MVT::f16 && VT <= MVT::f64313 ) || VT.is64BitVector()56 || |
2960 | 315 | VT.is128BitVector()37 ) |
2961 | 305 | ++FPRCnt; |
2962 | 10 | else |
2963 | 10 | return false; |
2964 | 1.95k | |
2965 | 1.95k | if (GPRCnt > 8 || FPRCnt > 81.95k ) |
2966 | 8 | return false; |
2967 | 1.95k | } |
2968 | 1.23k | |
2969 | 1.23k | static const MCPhysReg Registers[6][8] = { |
2970 | 1.13k | { AArch64::W0, AArch64::W1, AArch64::W2, AArch64::W3, AArch64::W4, |
2971 | 1.13k | AArch64::W5, AArch64::W6, AArch64::W7 }, |
2972 | 1.13k | { AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4, |
2973 | 1.13k | AArch64::X5, AArch64::X6, AArch64::X7 }, |
2974 | 1.13k | { AArch64::H0, AArch64::H1, AArch64::H2, AArch64::H3, AArch64::H4, |
2975 | 1.13k | AArch64::H5, AArch64::H6, AArch64::H7 }, |
2976 | 1.13k | { AArch64::S0, AArch64::S1, AArch64::S2, AArch64::S3, AArch64::S4, |
2977 | 1.13k | AArch64::S5, AArch64::S6, AArch64::S7 }, |
2978 | 1.13k | { AArch64::D0, AArch64::D1, AArch64::D2, AArch64::D3, AArch64::D4, |
2979 | 1.13k | AArch64::D5, AArch64::D6, AArch64::D7 }, |
2980 | 1.13k | { AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, AArch64::Q4, |
2981 | 1.13k | AArch64::Q5, AArch64::Q6, AArch64::Q7 } |
2982 | 1.13k | }; |
2983 | 1.13k | |
2984 | 1.13k | unsigned GPRIdx = 0; |
2985 | 1.13k | unsigned FPRIdx = 0; |
2986 | 1.86k | for (auto const &Arg : F->args()) { |
2987 | 1.86k | MVT VT = TLI.getSimpleValueType(DL, Arg.getType()); |
2988 | 1.86k | unsigned SrcReg; |
2989 | 1.86k | const TargetRegisterClass *RC; |
2990 | 1.86k | if (VT >= MVT::i1 && VT <= MVT::i32) { |
2991 | 668 | SrcReg = Registers[0][GPRIdx++]; |
2992 | 668 | RC = &AArch64::GPR32RegClass; |
2993 | 668 | VT = MVT::i32; |
2994 | 1.19k | } else if (VT == MVT::i64) { |
2995 | 902 | SrcReg = Registers[1][GPRIdx++]; |
2996 | 902 | RC = &AArch64::GPR64RegClass; |
2997 | 902 | } else if (296 VT == MVT::f16296 ) { |
2998 | 2 | SrcReg = Registers[2][FPRIdx++]; |
2999 | 2 | RC = &AArch64::FPR16RegClass; |
3000 | 294 | } else if (VT == MVT::f32) { |
3001 | 188 | SrcReg = Registers[3][FPRIdx++]; |
3002 | 188 | RC = &AArch64::FPR32RegClass; |
3003 | 188 | } else if (106 (VT == MVT::f64)106 || VT.is64BitVector()46 ) { |
3004 | 79 | SrcReg = Registers[4][FPRIdx++]; |
3005 | 79 | RC = &AArch64::FPR64RegClass; |
3006 | 79 | } else if (27 VT.is128BitVector()27 ) { |
3007 | 27 | SrcReg = Registers[5][FPRIdx++]; |
3008 | 27 | RC = &AArch64::FPR128RegClass; |
3009 | 27 | } else |
3010 | 27 | llvm_unreachable0 ("Unexpected value type."); |
3011 | 1.86k | |
3012 | 1.86k | unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC); |
3013 | 1.86k | // FIXME: Unfortunately it's necessary to emit a copy from the livein copy. |
3014 | 1.86k | // Without this, EmitLiveInCopies may eliminate the livein if its only |
3015 | 1.86k | // use is a bitcast (which isn't turned into an instruction). |
3016 | 1.86k | unsigned ResultReg = createResultReg(RC); |
3017 | 1.86k | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, |
3018 | 1.86k | TII.get(TargetOpcode::COPY), ResultReg) |
3019 | 1.86k | .addReg(DstReg, getKillRegState(true)); |
3020 | 1.86k | updateValueMap(&Arg, ResultReg); |
3021 | 1.86k | } |
3022 | 1.13k | return true; |
3023 | 1.13k | } |
3024 | | |
3025 | | bool AArch64FastISel::processCallArgs(CallLoweringInfo &CLI, |
3026 | | SmallVectorImpl<MVT> &OutVTs, |
3027 | 131 | unsigned &NumBytes) { |
3028 | 131 | CallingConv::ID CC = CLI.CallConv; |
3029 | 131 | SmallVector<CCValAssign, 16> ArgLocs; |
3030 | 131 | CCState CCInfo(CC, false, *FuncInfo.MF, ArgLocs, *Context); |
3031 | 131 | CCInfo.AnalyzeCallOperands(OutVTs, CLI.OutFlags, CCAssignFnForCall(CC)); |
3032 | 131 | |
3033 | 131 | // Get a count of how many bytes are to be pushed on the stack. |
3034 | 131 | NumBytes = CCInfo.getNextStackOffset(); |
3035 | 131 | |
3036 | 131 | // Issue CALLSEQ_START |
3037 | 131 | unsigned AdjStackDown = TII.getCallFrameSetupOpcode(); |
3038 | 131 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown)) |
3039 | 131 | .addImm(NumBytes).addImm(0); |
3040 | 131 | |
3041 | 131 | // Process the args. |
3042 | 1.33k | for (CCValAssign &VA : ArgLocs) { |
3043 | 1.33k | const Value *ArgVal = CLI.OutVals[VA.getValNo()]; |
3044 | 1.33k | MVT ArgVT = OutVTs[VA.getValNo()]; |
3045 | 1.33k | |
3046 | 1.33k | unsigned ArgReg = getRegForValue(ArgVal); |
3047 | 1.33k | if (!ArgReg) |
3048 | 2 | return false; |
3049 | 1.33k | |
3050 | 1.33k | // Handle arg promotion: SExt, ZExt, AExt. |
3051 | 1.33k | switch (VA.getLocInfo()) { |
3052 | 1.33k | case CCValAssign::Full: |
3053 | 1.22k | break; |
3054 | 1.33k | case CCValAssign::SExt: { |
3055 | 15 | MVT DestVT = VA.getLocVT(); |
3056 | 15 | MVT SrcVT = ArgVT; |
3057 | 15 | ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/false); |
3058 | 15 | if (!ArgReg) |
3059 | 0 | return false; |
3060 | 15 | break; |
3061 | 15 | } |
3062 | 88 | case CCValAssign::AExt: |
3063 | 88 | // Intentional fall-through. |
3064 | 88 | case CCValAssign::ZExt: { |
3065 | 88 | MVT DestVT = VA.getLocVT(); |
3066 | 88 | MVT SrcVT = ArgVT; |
3067 | 88 | ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/true); |
3068 | 88 | if (!ArgReg) |
3069 | 0 | return false; |
3070 | 88 | break; |
3071 | 88 | } |
3072 | 88 | default: |
3073 | 0 | llvm_unreachable("Unknown arg promotion!"); |
3074 | 1.33k | } |
3075 | 1.33k | |
3076 | 1.33k | // Now copy/store arg to correct locations. |
3077 | 1.33k | if (VA.isRegLoc() && !VA.needsCustom()267 ) { |
3078 | 267 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, |
3079 | 267 | TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg); |
3080 | 267 | CLI.OutRegs.push_back(VA.getLocReg()); |
3081 | 1.06k | } else if (VA.needsCustom()) { |
3082 | 0 | // FIXME: Handle custom args. |
3083 | 0 | return false; |
3084 | 1.06k | } else { |
3085 | 1.06k | assert(VA.isMemLoc() && "Assuming store on stack."); |
3086 | 1.06k | |
3087 | 1.06k | // Don't emit stores for undef values. |
3088 | 1.06k | if (isa<UndefValue>(ArgVal)) |
3089 | 1.03k | continue; |
3090 | 32 | |
3091 | 32 | // Need to store on the stack. |
3092 | 32 | unsigned ArgSize = (ArgVT.getSizeInBits() + 7) / 8; |
3093 | 32 | |
3094 | 32 | unsigned BEAlign = 0; |
3095 | 32 | if (ArgSize < 8 && !Subtarget->isLittleEndian()21 ) |
3096 | 2 | BEAlign = 8 - ArgSize; |
3097 | 32 | |
3098 | 32 | Address Addr; |
3099 | 32 | Addr.setKind(Address::RegBase); |
3100 | 32 | Addr.setReg(AArch64::SP); |
3101 | 32 | Addr.setOffset(VA.getLocMemOffset() + BEAlign); |
3102 | 32 | |
3103 | 32 | unsigned Alignment = DL.getABITypeAlignment(ArgVal->getType()); |
3104 | 32 | MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand( |
3105 | 32 | MachinePointerInfo::getStack(*FuncInfo.MF, Addr.getOffset()), |
3106 | 32 | MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment); |
3107 | 32 | |
3108 | 32 | if (!emitStore(ArgVT, ArgReg, Addr, MMO)) |
3109 | 2 | return false; |
3110 | 32 | } |
3111 | 1.33k | } |
3112 | 131 | return true127 ; |
3113 | 131 | } |
3114 | | |
3115 | | bool AArch64FastISel::finishCall(CallLoweringInfo &CLI, MVT RetVT, |
3116 | 127 | unsigned NumBytes) { |
3117 | 127 | CallingConv::ID CC = CLI.CallConv; |
3118 | 127 | |
3119 | 127 | // Issue CALLSEQ_END |
3120 | 127 | unsigned AdjStackUp = TII.getCallFrameDestroyOpcode(); |
3121 | 127 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp)) |
3122 | 127 | .addImm(NumBytes).addImm(0); |
3123 | 127 | |
3124 | 127 | // Now the return value. |
3125 | 127 | if (RetVT != MVT::isVoid) { |
3126 | 72 | SmallVector<CCValAssign, 16> RVLocs; |
3127 | 72 | CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context); |
3128 | 72 | CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC)); |
3129 | 72 | |
3130 | 72 | // Only handle a single return value. |
3131 | 72 | if (RVLocs.size() != 1) |
3132 | 0 | return false; |
3133 | 72 | |
3134 | 72 | // Copy all of the result registers out of their specified physreg. |
3135 | 72 | MVT CopyVT = RVLocs[0].getValVT(); |
3136 | 72 | |
3137 | 72 | // TODO: Handle big-endian results |
3138 | 72 | if (CopyVT.isVector() && !Subtarget->isLittleEndian()10 ) |
3139 | 10 | return false; |
3140 | 62 | |
3141 | 62 | unsigned ResultReg = createResultReg(TLI.getRegClassFor(CopyVT)); |
3142 | 62 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, |
3143 | 62 | TII.get(TargetOpcode::COPY), ResultReg) |
3144 | 62 | .addReg(RVLocs[0].getLocReg()); |
3145 | 62 | CLI.InRegs.push_back(RVLocs[0].getLocReg()); |
3146 | 62 | |
3147 | 62 | CLI.ResultReg = ResultReg; |
3148 | 62 | CLI.NumResultRegs = 1; |
3149 | 62 | } |
3150 | 127 | |
3151 | 127 | return true117 ; |
3152 | 127 | } |
3153 | | |
3154 | 241 | bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) { |
3155 | 241 | CallingConv::ID CC = CLI.CallConv; |
3156 | 241 | bool IsTailCall = CLI.IsTailCall; |
3157 | 241 | bool IsVarArg = CLI.IsVarArg; |
3158 | 241 | const Value *Callee = CLI.Callee; |
3159 | 241 | MCSymbol *Symbol = CLI.Symbol; |
3160 | 241 | |
3161 | 241 | if (!Callee && !Symbol16 ) |
3162 | 0 | return false; |
3163 | 241 | |
3164 | 241 | // Allow SelectionDAG isel to handle tail calls. |
3165 | 241 | if (IsTailCall) |
3166 | 26 | return false; |
3167 | 215 | |
3168 | 215 | CodeModel::Model CM = TM.getCodeModel(); |
3169 | 215 | // Only support the small-addressing and large code models. |
3170 | 215 | if (CM != CodeModel::Large && !Subtarget->useSmallAddressing()199 ) |
3171 | 0 | return false; |
3172 | 215 | |
3173 | 215 | // FIXME: Add large code model support for ELF. |
3174 | 215 | if (CM == CodeModel::Large && !Subtarget->isTargetMachO()16 ) |
3175 | 0 | return false; |
3176 | 215 | |
3177 | 215 | // Let SDISel handle vararg functions. |
3178 | 215 | if (IsVarArg) |
3179 | 0 | return false; |
3180 | 215 | |
3181 | 215 | // FIXME: Only handle *simple* calls for now. |
3182 | 215 | MVT RetVT; |
3183 | 215 | if (CLI.RetTy->isVoidTy()) |
3184 | 59 | RetVT = MVT::isVoid; |
3185 | 156 | else if (!isTypeLegal(CLI.RetTy, RetVT)) |
3186 | 11 | return false; |
3187 | 204 | |
3188 | 204 | for (auto Flag : CLI.OutFlags) |
3189 | 1.40k | if (Flag.isInReg() || Flag.isSRet() || Flag.isNest() || Flag.isByVal() || |
3190 | 1.40k | Flag.isSwiftSelf() || Flag.isSwiftError()1.40k ) |
3191 | 5 | return false; |
3192 | 204 | |
3193 | 204 | // Set up the argument vectors. |
3194 | 204 | SmallVector<MVT, 16> OutVTs; |
3195 | 199 | OutVTs.reserve(CLI.OutVals.size()); |
3196 | 199 | |
3197 | 1.40k | for (auto *Val : CLI.OutVals) { |
3198 | 1.40k | MVT VT; |
3199 | 1.40k | if (!isTypeLegal(Val->getType(), VT) && |
3200 | 1.40k | !(111 VT == MVT::i1111 || VT == MVT::i893 || VT == MVT::i1619 )) |
3201 | 8 | return false; |
3202 | 1.39k | |
3203 | 1.39k | // We don't handle vector parameters yet. |
3204 | 1.39k | if (VT.isVector() || VT.getSizeInBits() > 641.33k ) |
3205 | 60 | return false; |
3206 | 1.33k | |
3207 | 1.33k | OutVTs.push_back(VT); |
3208 | 1.33k | } |
3209 | 199 | |
3210 | 199 | Address Addr; |
3211 | 131 | if (Callee && !computeCallAddress(Callee, Addr)115 ) |
3212 | 0 | return false; |
3213 | 131 | |
3214 | 131 | // Handle the arguments now that we've gotten them. |
3215 | 131 | unsigned NumBytes; |
3216 | 131 | if (!processCallArgs(CLI, OutVTs, NumBytes)) |
3217 | 4 | return false; |
3218 | 127 | |
3219 | 127 | const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo(); |
3220 | 127 | if (RegInfo->isAnyArgRegReserved(*MF)) |
3221 | 2 | RegInfo->emitReservedArgRegCallError(*MF); |
3222 | 127 | |
3223 | 127 | // Issue the call. |
3224 | 127 | MachineInstrBuilder MIB; |
3225 | 127 | if (Subtarget->useSmallAddressing()) { |
3226 | 111 | const MCInstrDesc &II = TII.get(Addr.getReg() ? AArch64::BLR18 : AArch64::BL93 ); |
3227 | 111 | MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II); |
3228 | 111 | if (Symbol) |
3229 | 16 | MIB.addSym(Symbol, 0); |
3230 | 95 | else if (Addr.getGlobalValue()) |
3231 | 77 | MIB.addGlobalAddress(Addr.getGlobalValue(), 0, 0); |
3232 | 18 | else if (Addr.getReg()) { |
3233 | 18 | unsigned Reg = constrainOperandRegClass(II, Addr.getReg(), 0); |
3234 | 18 | MIB.addReg(Reg); |
3235 | 18 | } else |
3236 | 0 | return false; |
3237 | 16 | } else { |
3238 | 16 | unsigned CallReg = 0; |
3239 | 16 | if (Symbol) { |
3240 | 8 | unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass); |
3241 | 8 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP), |
3242 | 8 | ADRPReg) |
3243 | 8 | .addSym(Symbol, AArch64II::MO_GOT | AArch64II::MO_PAGE); |
3244 | 8 | |
3245 | 8 | CallReg = createResultReg(&AArch64::GPR64RegClass); |
3246 | 8 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, |
3247 | 8 | TII.get(AArch64::LDRXui), CallReg) |
3248 | 8 | .addReg(ADRPReg) |
3249 | 8 | .addSym(Symbol, |
3250 | 8 | AArch64II::MO_GOT | AArch64II::MO_PAGEOFF | AArch64II::MO_NC); |
3251 | 8 | } else if (Addr.getGlobalValue()) |
3252 | 7 | CallReg = materializeGV(Addr.getGlobalValue()); |
3253 | 1 | else if (Addr.getReg()) |
3254 | 1 | CallReg = Addr.getReg(); |
3255 | 16 | |
3256 | 16 | if (!CallReg) |
3257 | 0 | return false; |
3258 | 16 | |
3259 | 16 | const MCInstrDesc &II = TII.get(AArch64::BLR); |
3260 | 16 | CallReg = constrainOperandRegClass(II, CallReg, 0); |
3261 | 16 | MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(CallReg); |
3262 | 16 | } |
3263 | 127 | |
3264 | 127 | // Add implicit physical register uses to the call. |
3265 | 127 | for (auto Reg : CLI.OutRegs) |
3266 | 250 | MIB.addReg(Reg, RegState::Implicit); |
3267 | 127 | |
3268 | 127 | // Add a register mask with the call-preserved registers. |
3269 | 127 | // Proper defs for return values will be added by setPhysRegsDeadExcept(). |
3270 | 127 | MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC)); |
3271 | 127 | |
3272 | 127 | CLI.Call = MIB; |
3273 | 127 | |
3274 | 127 | // Finish off the call including any return values. |
3275 | 127 | return finishCall(CLI, RetVT, NumBytes); |
3276 | 127 | } |
3277 | | |
3278 | 32 | bool AArch64FastISel::isMemCpySmall(uint64_t Len, unsigned Alignment) { |
3279 | 32 | if (Alignment) |
3280 | 29 | return Len / Alignment <= 4; |
3281 | 3 | else |
3282 | 3 | return Len < 32; |
3283 | 32 | } |
3284 | | |
3285 | | bool AArch64FastISel::tryEmitSmallMemCpy(Address Dest, Address Src, |
3286 | 13 | uint64_t Len, unsigned Alignment) { |
3287 | 13 | // Make sure we don't bloat code by inlining very large memcpy's. |
3288 | 13 | if (!isMemCpySmall(Len, Alignment)) |
3289 | 0 | return false; |
3290 | 13 | |
3291 | 13 | int64_t UnscaledOffset = 0; |
3292 | 13 | Address OrigDest = Dest; |
3293 | 13 | Address OrigSrc = Src; |
3294 | 13 | |
3295 | 54 | while (Len) { |
3296 | 41 | MVT VT; |
3297 | 41 | if (!Alignment || Alignment >= 840 ) { |
3298 | 30 | if (Len >= 8) |
3299 | 27 | VT = MVT::i64; |
3300 | 3 | else if (Len >= 4) |
3301 | 0 | VT = MVT::i32; |
3302 | 3 | else if (Len >= 2) |
3303 | 0 | VT = MVT::i16; |
3304 | 3 | else { |
3305 | 3 | VT = MVT::i8; |
3306 | 3 | } |
3307 | 30 | } else { |
3308 | 11 | // Bound based on alignment. |
3309 | 11 | if (Len >= 4 && Alignment == 45 ) |
3310 | 2 | VT = MVT::i32; |
3311 | 9 | else if (Len >= 2 && Alignment == 26 ) |
3312 | 3 | VT = MVT::i16; |
3313 | 6 | else { |
3314 | 6 | VT = MVT::i8; |
3315 | 6 | } |
3316 | 11 | } |
3317 | 41 | |
3318 | 41 | unsigned ResultReg = emitLoad(VT, VT, Src); |
3319 | 41 | if (!ResultReg) |
3320 | 0 | return false; |
3321 | 41 | |
3322 | 41 | if (!emitStore(VT, ResultReg, Dest)) |
3323 | 0 | return false; |
3324 | 41 | |
3325 | 41 | int64_t Size = VT.getSizeInBits() / 8; |
3326 | 41 | Len -= Size; |
3327 | 41 | UnscaledOffset += Size; |
3328 | 41 | |
3329 | 41 | // We need to recompute the unscaled offset for each iteration. |
3330 | 41 | Dest.setOffset(OrigDest.getOffset() + UnscaledOffset); |
3331 | 41 | Src.setOffset(OrigSrc.getOffset() + UnscaledOffset); |
3332 | 41 | } |
3333 | 13 | |
3334 | 13 | return true; |
3335 | 13 | } |
3336 | | |
3337 | | /// Check if it is possible to fold the condition from the XALU intrinsic |
3338 | | /// into the user. The condition code will only be updated on success. |
3339 | | bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC, |
3340 | | const Instruction *I, |
3341 | 79 | const Value *Cond) { |
3342 | 79 | if (!isa<ExtractValueInst>(Cond)) |
3343 | 53 | return false; |
3344 | 26 | |
3345 | 26 | const auto *EV = cast<ExtractValueInst>(Cond); |
3346 | 26 | if (!isa<IntrinsicInst>(EV->getAggregateOperand())) |
3347 | 0 | return false; |
3348 | 26 | |
3349 | 26 | const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand()); |
3350 | 26 | MVT RetVT; |
3351 | 26 | const Function *Callee = II->getCalledFunction(); |
3352 | 26 | Type *RetTy = |
3353 | 26 | cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U); |
3354 | 26 | if (!isTypeLegal(RetTy, RetVT)) |
3355 | 0 | return false; |
3356 | 26 | |
3357 | 26 | if (RetVT != MVT::i32 && RetVT != MVT::i6414 ) |
3358 | 0 | return false; |
3359 | 26 | |
3360 | 26 | const Value *LHS = II->getArgOperand(0); |
3361 | 26 | const Value *RHS = II->getArgOperand(1); |
3362 | 26 | |
3363 | 26 | // Canonicalize immediate to the RHS. |
3364 | 26 | if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS)0 && |
3365 | 26 | isCommutativeIntrinsic(II)0 ) |
3366 | 0 | std::swap(LHS, RHS); |
3367 | 26 | |
3368 | 26 | // Simplify multiplies. |
3369 | 26 | Intrinsic::ID IID = II->getIntrinsicID(); |
3370 | 26 | switch (IID) { |
3371 | 26 | default: |
3372 | 16 | break; |
3373 | 26 | case Intrinsic::smul_with_overflow: |
3374 | 5 | if (const auto *C = dyn_cast<ConstantInt>(RHS)) |
3375 | 1 | if (C->getValue() == 2) |
3376 | 1 | IID = Intrinsic::sadd_with_overflow; |
3377 | 5 | break; |
3378 | 26 | case Intrinsic::umul_with_overflow: |
3379 | 5 | if (const auto *C = dyn_cast<ConstantInt>(RHS)) |
3380 | 1 | if (C->getValue() == 2) |
3381 | 1 | IID = Intrinsic::uadd_with_overflow; |
3382 | 5 | break; |
3383 | 26 | } |
3384 | 26 | |
3385 | 26 | AArch64CC::CondCode TmpCC; |
3386 | 26 | switch (IID) { |
3387 | 26 | default: |
3388 | 0 | return false; |
3389 | 26 | case Intrinsic::sadd_with_overflow: |
3390 | 9 | case Intrinsic::ssub_with_overflow: |
3391 | 9 | TmpCC = AArch64CC::VS; |
3392 | 9 | break; |
3393 | 9 | case Intrinsic::uadd_with_overflow: |
3394 | 5 | TmpCC = AArch64CC::HS; |
3395 | 5 | break; |
3396 | 9 | case Intrinsic::usub_with_overflow: |
3397 | 4 | TmpCC = AArch64CC::LO; |
3398 | 4 | break; |
3399 | 9 | case Intrinsic::smul_with_overflow: |
3400 | 8 | case Intrinsic::umul_with_overflow: |
3401 | 8 | TmpCC = AArch64CC::NE; |
3402 | 8 | break; |
3403 | 26 | } |
3404 | 26 | |
3405 | 26 | // Check if both instructions are in the same basic block. |
3406 | 26 | if (!isValueAvailable(II)) |
3407 | 0 | return false; |
3408 | 26 | |
3409 | 26 | // Make sure nothing is in the way |
3410 | 26 | BasicBlock::const_iterator Start(I); |
3411 | 26 | BasicBlock::const_iterator End(II); |
3412 | 66 | for (auto Itr = std::prev(Start); Itr != End; --Itr40 ) { |
3413 | 40 | // We only expect extractvalue instructions between the intrinsic and the |
3414 | 40 | // instruction to be selected. |
3415 | 40 | if (!isa<ExtractValueInst>(Itr)) |
3416 | 0 | return false; |
3417 | 40 | |
3418 | 40 | // Check that the extractvalue operand comes from the intrinsic. |
3419 | 40 | const auto *EVI = cast<ExtractValueInst>(Itr); |
3420 | 40 | if (EVI->getAggregateOperand() != II) |
3421 | 0 | return false; |
3422 | 40 | } |
3423 | 26 | |
3424 | 26 | CC = TmpCC; |
3425 | 26 | return true; |
3426 | 26 | } |
3427 | | |
3428 | 104 | bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) { |
3429 | 104 | // FIXME: Handle more intrinsics. |
3430 | 104 | switch (II->getIntrinsicID()) { |
3431 | 104 | default: return false9 ; |
3432 | 104 | case Intrinsic::frameaddress: { |
3433 | 2 | MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo(); |
3434 | 2 | MFI.setFrameAddressIsTaken(true); |
3435 | 2 | |
3436 | 2 | const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo(); |
3437 | 2 | unsigned FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF)); |
3438 | 2 | unsigned SrcReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass); |
3439 | 2 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, |
3440 | 2 | TII.get(TargetOpcode::COPY), SrcReg).addReg(FramePtr); |
3441 | 2 | // Recursively load frame address |
3442 | 2 | // ldr x0, [fp] |
3443 | 2 | // ldr x0, [x0] |
3444 | 2 | // ldr x0, [x0] |
3445 | 2 | // ... |
3446 | 2 | unsigned DestReg; |
3447 | 2 | unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue(); |
3448 | 4 | while (Depth--) { |
3449 | 2 | DestReg = fastEmitInst_ri(AArch64::LDRXui, &AArch64::GPR64RegClass, |
3450 | 2 | SrcReg, /*IsKill=*/true, 0); |
3451 | 2 | assert(DestReg && "Unexpected LDR instruction emission failure."); |
3452 | 2 | SrcReg = DestReg; |
3453 | 2 | } |
3454 | 2 | |
3455 | 2 | updateValueMap(II, SrcReg); |
3456 | 2 | return true; |
3457 | 104 | } |
3458 | 104 | case Intrinsic::sponentry: { |
3459 | 6 | MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo(); |
3460 | 6 | |
3461 | 6 | // SP = FP + Fixed Object + 16 |
3462 | 6 | int FI = MFI.CreateFixedObject(4, 0, false); |
3463 | 6 | unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass); |
3464 | 6 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, |
3465 | 6 | TII.get(AArch64::ADDXri), ResultReg) |
3466 | 6 | .addFrameIndex(FI) |
3467 | 6 | .addImm(0) |
3468 | 6 | .addImm(0); |
3469 | 6 | |
3470 | 6 | updateValueMap(II, ResultReg); |
3471 | 6 | return true; |
3472 | 104 | } |
3473 | 104 | case Intrinsic::memcpy: |
3474 | 20 | case Intrinsic::memmove: { |
3475 | 20 | const auto *MTI = cast<MemTransferInst>(II); |
3476 | 20 | // Don't handle volatile. |
3477 | 20 | if (MTI->isVolatile()) |
3478 | 0 | return false; |
3479 | 20 | |
3480 | 20 | // Disable inlining for memmove before calls to ComputeAddress. Otherwise, |
3481 | 20 | // we would emit dead code because we don't currently handle memmoves. |
3482 | 20 | bool IsMemCpy = (II->getIntrinsicID() == Intrinsic::memcpy); |
3483 | 20 | if (isa<ConstantInt>(MTI->getLength()) && IsMemCpy) { |
3484 | 19 | // Small memcpy's are common enough that we want to do them without a call |
3485 | 19 | // if possible. |
3486 | 19 | uint64_t Len = cast<ConstantInt>(MTI->getLength())->getZExtValue(); |
3487 | 19 | unsigned Alignment = MinAlign(MTI->getDestAlignment(), |
3488 | 19 | MTI->getSourceAlignment()); |
3489 | 19 | if (isMemCpySmall(Len, Alignment)) { |
3490 | 13 | Address Dest, Src; |
3491 | 13 | if (!computeAddress(MTI->getRawDest(), Dest) || |
3492 | 13 | !computeAddress(MTI->getRawSource(), Src)) |
3493 | 0 | return false; |
3494 | 13 | if (tryEmitSmallMemCpy(Dest, Src, Len, Alignment)) |
3495 | 13 | return true; |
3496 | 7 | } |
3497 | 19 | } |
3498 | 7 | |
3499 | 7 | if (!MTI->getLength()->getType()->isIntegerTy(64)) |
3500 | 0 | return false; |
3501 | 7 | |
3502 | 7 | if (MTI->getSourceAddressSpace() > 255 || MTI->getDestAddressSpace() > 255) |
3503 | 0 | // Fast instruction selection doesn't support the special |
3504 | 0 | // address spaces. |
3505 | 0 | return false; |
3506 | 7 | |
3507 | 7 | const char *IntrMemName = isa<MemCpyInst>(II) ? "memcpy"6 : "memmove"1 ; |
3508 | 7 | return lowerCallTo(II, IntrMemName, II->getNumArgOperands() - 1); |
3509 | 7 | } |
3510 | 7 | case Intrinsic::memset: { |
3511 | 1 | const MemSetInst *MSI = cast<MemSetInst>(II); |
3512 | 1 | // Don't handle volatile. |
3513 | 1 | if (MSI->isVolatile()) |
3514 | 0 | return false; |
3515 | 1 | |
3516 | 1 | if (!MSI->getLength()->getType()->isIntegerTy(64)) |
3517 | 0 | return false; |
3518 | 1 | |
3519 | 1 | if (MSI->getDestAddressSpace() > 255) |
3520 | 0 | // Fast instruction selection doesn't support the special |
3521 | 0 | // address spaces. |
3522 | 0 | return false; |
3523 | 1 | |
3524 | 1 | return lowerCallTo(II, "memset", II->getNumArgOperands() - 1); |
3525 | 1 | } |
3526 | 12 | case Intrinsic::sin: |
3527 | 12 | case Intrinsic::cos: |
3528 | 12 | case Intrinsic::pow: { |
3529 | 12 | MVT RetVT; |
3530 | 12 | if (!isTypeLegal(II->getType(), RetVT)) |
3531 | 0 | return false; |
3532 | 12 | |
3533 | 12 | if (RetVT != MVT::f32 && RetVT != MVT::f646 ) |
3534 | 0 | return false; |
3535 | 12 | |
3536 | 12 | static const RTLIB::Libcall LibCallTable[3][2] = { |
3537 | 12 | { RTLIB::SIN_F32, RTLIB::SIN_F64 }, |
3538 | 12 | { RTLIB::COS_F32, RTLIB::COS_F64 }, |
3539 | 12 | { RTLIB::POW_F32, RTLIB::POW_F64 } |
3540 | 12 | }; |
3541 | 12 | RTLIB::Libcall LC; |
3542 | 12 | bool Is64Bit = RetVT == MVT::f64; |
3543 | 12 | switch (II->getIntrinsicID()) { |
3544 | 12 | default: |
3545 | 0 | llvm_unreachable("Unexpected intrinsic."); |
3546 | 12 | case Intrinsic::sin: |
3547 | 4 | LC = LibCallTable[0][Is64Bit]; |
3548 | 4 | break; |
3549 | 12 | case Intrinsic::cos: |
3550 | 4 | LC = LibCallTable[1][Is64Bit]; |
3551 | 4 | break; |
3552 | 12 | case Intrinsic::pow: |
3553 | 4 | LC = LibCallTable[2][Is64Bit]; |
3554 | 4 | break; |
3555 | 12 | } |
3556 | 12 | |
3557 | 12 | ArgListTy Args; |
3558 | 12 | Args.reserve(II->getNumArgOperands()); |
3559 | 12 | |
3560 | 12 | // Populate the argument list. |
3561 | 16 | for (auto &Arg : II->arg_operands()) { |
3562 | 16 | ArgListEntry Entry; |
3563 | 16 | Entry.Val = Arg; |
3564 | 16 | Entry.Ty = Arg->getType(); |
3565 | 16 | Args.push_back(Entry); |
3566 | 16 | } |
3567 | 12 | |
3568 | 12 | CallLoweringInfo CLI; |
3569 | 12 | MCContext &Ctx = MF->getContext(); |
3570 | 12 | CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), II->getType(), |
3571 | 12 | TLI.getLibcallName(LC), std::move(Args)); |
3572 | 12 | if (!lowerCallTo(CLI)) |
3573 | 0 | return false; |
3574 | 12 | updateValueMap(II, CLI.ResultReg); |
3575 | 12 | return true; |
3576 | 12 | } |
3577 | 12 | case Intrinsic::fabs: { |
3578 | 2 | MVT VT; |
3579 | 2 | if (!isTypeLegal(II->getType(), VT)) |
3580 | 0 | return false; |
3581 | 2 | |
3582 | 2 | unsigned Opc; |
3583 | 2 | switch (VT.SimpleTy) { |
3584 | 2 | default: |
3585 | 0 | return false; |
3586 | 2 | case MVT::f32: |
3587 | 1 | Opc = AArch64::FABSSr; |
3588 | 1 | break; |
3589 | 2 | case MVT::f64: |
3590 | 1 | Opc = AArch64::FABSDr; |
3591 | 1 | break; |
3592 | 2 | } |
3593 | 2 | unsigned SrcReg = getRegForValue(II->getOperand(0)); |
3594 | 2 | if (!SrcReg) |
3595 | 0 | return false; |
3596 | 2 | bool SrcRegIsKill = hasTrivialKill(II->getOperand(0)); |
3597 | 2 | unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT)); |
3598 | 2 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) |
3599 | 2 | .addReg(SrcReg, getKillRegState(SrcRegIsKill)); |
3600 | 2 | updateValueMap(II, ResultReg); |
3601 | 2 | return true; |
3602 | 2 | } |
3603 | 2 | case Intrinsic::trap: |
3604 | 1 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BRK)) |
3605 | 1 | .addImm(1); |
3606 | 1 | return true; |
3607 | 2 | case Intrinsic::debugtrap: { |
3608 | 1 | if (Subtarget->isTargetWindows()) { |
3609 | 1 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BRK)) |
3610 | 1 | .addImm(0xF000); |
3611 | 1 | return true; |
3612 | 1 | } |
3613 | 0 | break; |
3614 | 0 | } |
3615 | 0 |
|
3616 | 2 | case Intrinsic::sqrt: { |
3617 | 2 | Type *RetTy = II->getCalledFunction()->getReturnType(); |
3618 | 2 | |
3619 | 2 | MVT VT; |
3620 | 2 | if (!isTypeLegal(RetTy, VT)) |
3621 | 0 | return false; |
3622 | 2 | |
3623 | 2 | unsigned Op0Reg = getRegForValue(II->getOperand(0)); |
3624 | 2 | if (!Op0Reg) |
3625 | 0 | return false; |
3626 | 2 | bool Op0IsKill = hasTrivialKill(II->getOperand(0)); |
3627 | 2 | |
3628 | 2 | unsigned ResultReg = fastEmit_r(VT, VT, ISD::FSQRT, Op0Reg, Op0IsKill); |
3629 | 2 | if (!ResultReg) |
3630 | 0 | return false; |
3631 | 2 | |
3632 | 2 | updateValueMap(II, ResultReg); |
3633 | 2 | return true; |
3634 | 2 | } |
3635 | 48 | case Intrinsic::sadd_with_overflow: |
3636 | 48 | case Intrinsic::uadd_with_overflow: |
3637 | 48 | case Intrinsic::ssub_with_overflow: |
3638 | 48 | case Intrinsic::usub_with_overflow: |
3639 | 48 | case Intrinsic::smul_with_overflow: |
3640 | 48 | case Intrinsic::umul_with_overflow: { |
3641 | 48 | // This implements the basic lowering of the xalu with overflow intrinsics. |
3642 | 48 | const Function *Callee = II->getCalledFunction(); |
3643 | 48 | auto *Ty = cast<StructType>(Callee->getReturnType()); |
3644 | 48 | Type *RetTy = Ty->getTypeAtIndex(0U); |
3645 | 48 | |
3646 | 48 | MVT VT; |
3647 | 48 | if (!isTypeLegal(RetTy, VT)) |
3648 | 0 | return false; |
3649 | 48 | |
3650 | 48 | if (VT != MVT::i32 && VT != MVT::i6425 ) |
3651 | 0 | return false; |
3652 | 48 | |
3653 | 48 | const Value *LHS = II->getArgOperand(0); |
3654 | 48 | const Value *RHS = II->getArgOperand(1); |
3655 | 48 | // Canonicalize immediate to the RHS. |
3656 | 48 | if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS)0 && |
3657 | 48 | isCommutativeIntrinsic(II)0 ) |
3658 | 0 | std::swap(LHS, RHS); |
3659 | 48 | |
3660 | 48 | // Simplify multiplies. |
3661 | 48 | Intrinsic::ID IID = II->getIntrinsicID(); |
3662 | 48 | switch (IID) { |
3663 | 48 | default: |
3664 | 31 | break; |
3665 | 48 | case Intrinsic::smul_with_overflow: |
3666 | 8 | if (const auto *C = dyn_cast<ConstantInt>(RHS)) |
3667 | 2 | if (C->getValue() == 2) { |
3668 | 2 | IID = Intrinsic::sadd_with_overflow; |
3669 | 2 | RHS = LHS; |
3670 | 2 | } |
3671 | 8 | break; |
3672 | 48 | case Intrinsic::umul_with_overflow: |
3673 | 9 | if (const auto *C = dyn_cast<ConstantInt>(RHS)) |
3674 | 3 | if (C->getValue() == 2) { |
3675 | 2 | IID = Intrinsic::uadd_with_overflow; |
3676 | 2 | RHS = LHS; |
3677 | 2 | } |
3678 | 9 | break; |
3679 | 48 | } |
3680 | 48 | |
3681 | 48 | unsigned ResultReg1 = 0, ResultReg2 = 0, MulReg = 0; |
3682 | 48 | AArch64CC::CondCode CC = AArch64CC::Invalid; |
3683 | 48 | switch (IID) { |
3684 | 48 | default: 0 llvm_unreachable0 ("Unexpected intrinsic!"); |
3685 | 48 | case Intrinsic::sadd_with_overflow: |
3686 | 14 | ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true); |
3687 | 14 | CC = AArch64CC::VS; |
3688 | 14 | break; |
3689 | 48 | case Intrinsic::uadd_with_overflow: |
3690 | 8 | ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true); |
3691 | 8 | CC = AArch64CC::HS; |
3692 | 8 | break; |
3693 | 48 | case Intrinsic::ssub_with_overflow: |
3694 | 7 | ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true); |
3695 | 7 | CC = AArch64CC::VS; |
3696 | 7 | break; |
3697 | 48 | case Intrinsic::usub_with_overflow: |
3698 | 6 | ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true); |
3699 | 6 | CC = AArch64CC::LO; |
3700 | 6 | break; |
3701 | 48 | case Intrinsic::smul_with_overflow: { |
3702 | 6 | CC = AArch64CC::NE; |
3703 | 6 | unsigned LHSReg = getRegForValue(LHS); |
3704 | 6 | if (!LHSReg) |
3705 | 0 | return false; |
3706 | 6 | bool LHSIsKill = hasTrivialKill(LHS); |
3707 | 6 | |
3708 | 6 | unsigned RHSReg = getRegForValue(RHS); |
3709 | 6 | if (!RHSReg) |
3710 | 0 | return false; |
3711 | 6 | bool RHSIsKill = hasTrivialKill(RHS); |
3712 | 6 | |
3713 | 6 | if (VT == MVT::i32) { |
3714 | 3 | MulReg = emitSMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill); |
3715 | 3 | unsigned ShiftReg = emitLSR_ri(MVT::i64, MVT::i64, MulReg, |
3716 | 3 | /*IsKill=*/false, 32); |
3717 | 3 | MulReg = fastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true, |
3718 | 3 | AArch64::sub_32); |
3719 | 3 | ShiftReg = fastEmitInst_extractsubreg(VT, ShiftReg, /*IsKill=*/true, |
3720 | 3 | AArch64::sub_32); |
3721 | 3 | emitSubs_rs(VT, ShiftReg, /*IsKill=*/true, MulReg, /*IsKill=*/false, |
3722 | 3 | AArch64_AM::ASR, 31, /*WantResult=*/false); |
3723 | 3 | } else { |
3724 | 3 | assert(VT == MVT::i64 && "Unexpected value type."); |
3725 | 3 | // LHSReg and RHSReg cannot be killed by this Mul, since they are |
3726 | 3 | // reused in the next instruction. |
3727 | 3 | MulReg = emitMul_rr(VT, LHSReg, /*IsKill=*/false, RHSReg, |
3728 | 3 | /*IsKill=*/false); |
3729 | 3 | unsigned SMULHReg = fastEmit_rr(VT, VT, ISD::MULHS, LHSReg, LHSIsKill, |
3730 | 3 | RHSReg, RHSIsKill); |
3731 | 3 | emitSubs_rs(VT, SMULHReg, /*IsKill=*/true, MulReg, /*IsKill=*/false, |
3732 | 3 | AArch64_AM::ASR, 63, /*WantResult=*/false); |
3733 | 3 | } |
3734 | 6 | break; |
3735 | 6 | } |
3736 | 7 | case Intrinsic::umul_with_overflow: { |
3737 | 7 | CC = AArch64CC::NE; |
3738 | 7 | unsigned LHSReg = getRegForValue(LHS); |
3739 | 7 | if (!LHSReg) |
3740 | 0 | return false; |
3741 | 7 | bool LHSIsKill = hasTrivialKill(LHS); |
3742 | 7 | |
3743 | 7 | unsigned RHSReg = getRegForValue(RHS); |
3744 | 7 | if (!RHSReg) |
3745 | 0 | return false; |
3746 | 7 | bool RHSIsKill = hasTrivialKill(RHS); |
3747 | 7 | |
3748 | 7 | if (VT == MVT::i32) { |
3749 | 3 | MulReg = emitUMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill); |
3750 | 3 | emitSubs_rs(MVT::i64, AArch64::XZR, /*IsKill=*/true, MulReg, |
3751 | 3 | /*IsKill=*/false, AArch64_AM::LSR, 32, |
3752 | 3 | /*WantResult=*/false); |
3753 | 3 | MulReg = fastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true, |
3754 | 3 | AArch64::sub_32); |
3755 | 4 | } else { |
3756 | 4 | assert(VT == MVT::i64 && "Unexpected value type."); |
3757 | 4 | // LHSReg and RHSReg cannot be killed by this Mul, since they are |
3758 | 4 | // reused in the next instruction. |
3759 | 4 | MulReg = emitMul_rr(VT, LHSReg, /*IsKill=*/false, RHSReg, |
3760 | 4 | /*IsKill=*/false); |
3761 | 4 | unsigned UMULHReg = fastEmit_rr(VT, VT, ISD::MULHU, LHSReg, LHSIsKill, |
3762 | 4 | RHSReg, RHSIsKill); |
3763 | 4 | emitSubs_rr(VT, AArch64::XZR, /*IsKill=*/true, UMULHReg, |
3764 | 4 | /*IsKill=*/false, /*WantResult=*/false); |
3765 | 4 | } |
3766 | 7 | break; |
3767 | 7 | } |
3768 | 48 | } |
3769 | 48 | |
3770 | 48 | if (MulReg) { |
3771 | 13 | ResultReg1 = createResultReg(TLI.getRegClassFor(VT)); |
3772 | 13 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, |
3773 | 13 | TII.get(TargetOpcode::COPY), ResultReg1).addReg(MulReg); |
3774 | 13 | } |
3775 | 48 | |
3776 | 48 | if (!ResultReg1) |
3777 | 0 | return false; |
3778 | 48 | |
3779 | 48 | ResultReg2 = fastEmitInst_rri(AArch64::CSINCWr, &AArch64::GPR32RegClass, |
3780 | 48 | AArch64::WZR, /*IsKill=*/true, AArch64::WZR, |
3781 | 48 | /*IsKill=*/true, getInvertedCondCode(CC)); |
3782 | 48 | (void)ResultReg2; |
3783 | 48 | assert((ResultReg1 + 1) == ResultReg2 && |
3784 | 48 | "Nonconsecutive result registers."); |
3785 | 48 | updateValueMap(II, ResultReg1, 2); |
3786 | 48 | return true; |
3787 | 48 | } |
3788 | 0 | } |
3789 | 0 | return false; |
3790 | 0 | } |
3791 | | |
3792 | 1.34k | bool AArch64FastISel::selectRet(const Instruction *I) { |
3793 | 1.34k | const ReturnInst *Ret = cast<ReturnInst>(I); |
3794 | 1.34k | const Function &F = *I->getParent()->getParent(); |
3795 | 1.34k | |
3796 | 1.34k | if (!FuncInfo.CanLowerReturn) |
3797 | 0 | return false; |
3798 | 1.34k | |
3799 | 1.34k | if (F.isVarArg()) |
3800 | 6 | return false; |
3801 | 1.33k | |
3802 | 1.33k | if (TLI.supportSwiftError() && |
3803 | 1.33k | F.getAttributes().hasAttrSomewhere(Attribute::SwiftError)) |
3804 | 11 | return false; |
3805 | 1.32k | |
3806 | 1.32k | if (TLI.supportSplitCSR(FuncInfo.MF)) |
3807 | 3 | return false; |
3808 | 1.32k | |
3809 | 1.32k | // Build a list of return value registers. |
3810 | 1.32k | SmallVector<unsigned, 4> RetRegs; |
3811 | 1.32k | |
3812 | 1.32k | if (Ret->getNumOperands() > 0) { |
3813 | 970 | CallingConv::ID CC = F.getCallingConv(); |
3814 | 970 | SmallVector<ISD::OutputArg, 4> Outs; |
3815 | 970 | GetReturnInfo(CC, F.getReturnType(), F.getAttributes(), Outs, TLI, DL); |
3816 | 970 | |
3817 | 970 | // Analyze operands of the call, assigning locations to each operand. |
3818 | 970 | SmallVector<CCValAssign, 16> ValLocs; |
3819 | 970 | CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext()); |
3820 | 970 | CCAssignFn *RetCC = CC == CallingConv::WebKit_JS ? RetCC_AArch64_WebKit_JS1 |
3821 | 970 | : RetCC_AArch64_AAPCS969 ; |
3822 | 970 | CCInfo.AnalyzeReturn(Outs, RetCC); |
3823 | 970 | |
3824 | 970 | // Only handle a single return value for now. |
3825 | 970 | if (ValLocs.size() != 1) |
3826 | 7 | return false; |
3827 | 963 | |
3828 | 963 | CCValAssign &VA = ValLocs[0]; |
3829 | 963 | const Value *RV = Ret->getOperand(0); |
3830 | 963 | |
3831 | 963 | // Don't bother handling odd stuff for now. |
3832 | 963 | if ((VA.getLocInfo() != CCValAssign::Full) && |
3833 | 963 | (VA.getLocInfo() != CCValAssign::BCvt)72 ) |
3834 | 0 | return false; |
3835 | 963 | |
3836 | 963 | // Only handle register returns for now. |
3837 | 963 | if (!VA.isRegLoc()) |
3838 | 0 | return false; |
3839 | 963 | |
3840 | 963 | unsigned Reg = getRegForValue(RV); |
3841 | 963 | if (Reg == 0) |
3842 | 4 | return false; |
3843 | 959 | |
3844 | 959 | unsigned SrcReg = Reg + VA.getValNo(); |
3845 | 959 | unsigned DestReg = VA.getLocReg(); |
3846 | 959 | // Avoid a cross-class copy. This is very unlikely. |
3847 | 959 | if (!MRI.getRegClass(SrcReg)->contains(DestReg)) |
3848 | 0 | return false; |
3849 | 959 | |
3850 | 959 | EVT RVEVT = TLI.getValueType(DL, RV->getType()); |
3851 | 959 | if (!RVEVT.isSimple()) |
3852 | 0 | return false; |
3853 | 959 | |
3854 | 959 | // Vectors (of > 1 lane) in big endian need tricky handling. |
3855 | 959 | if (RVEVT.isVector() && RVEVT.getVectorNumElements() > 195 && |
3856 | 959 | !Subtarget->isLittleEndian()89 ) |
3857 | 60 | return false; |
3858 | 899 | |
3859 | 899 | MVT RVVT = RVEVT.getSimpleVT(); |
3860 | 899 | if (RVVT == MVT::f128) |
3861 | 8 | return false; |
3862 | 891 | |
3863 | 891 | MVT DestVT = VA.getValVT(); |
3864 | 891 | // Special handling for extended integers. |
3865 | 891 | if (RVVT != DestVT) { |
3866 | 173 | if (RVVT != MVT::i1 && RVVT != MVT::i874 && RVVT != MVT::i1644 ) |
3867 | 0 | return false; |
3868 | 173 | |
3869 | 173 | if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt()33 ) |
3870 | 22 | return false; |
3871 | 151 | |
3872 | 151 | bool IsZExt = Outs[0].Flags.isZExt(); |
3873 | 151 | SrcReg = emitIntExt(RVVT, SrcReg, DestVT, IsZExt); |
3874 | 151 | if (SrcReg == 0) |
3875 | 0 | return false; |
3876 | 869 | } |
3877 | 869 | |
3878 | 869 | // Make the copy. |
3879 | 869 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, |
3880 | 869 | TII.get(TargetOpcode::COPY), DestReg).addReg(SrcReg); |
3881 | 869 | |
3882 | 869 | // Add register to return instruction. |
3883 | 869 | RetRegs.push_back(VA.getLocReg()); |
3884 | 869 | } |
3885 | 1.32k | |
3886 | 1.32k | MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, |
3887 | 1.22k | TII.get(AArch64::RET_ReallyLR)); |
3888 | 1.22k | for (unsigned RetReg : RetRegs) |
3889 | 869 | MIB.addReg(RetReg, RegState::Implicit); |
3890 | 1.22k | return true; |
3891 | 1.32k | } |
3892 | | |
3893 | 14 | bool AArch64FastISel::selectTrunc(const Instruction *I) { |
3894 | 14 | Type *DestTy = I->getType(); |
3895 | 14 | Value *Op = I->getOperand(0); |
3896 | 14 | Type *SrcTy = Op->getType(); |
3897 | 14 | |
3898 | 14 | EVT SrcEVT = TLI.getValueType(DL, SrcTy, true); |
3899 | 14 | EVT DestEVT = TLI.getValueType(DL, DestTy, true); |
3900 | 14 | if (!SrcEVT.isSimple()) |
3901 | 0 | return false; |
3902 | 14 | if (!DestEVT.isSimple()) |
3903 | 0 | return false; |
3904 | 14 | |
3905 | 14 | MVT SrcVT = SrcEVT.getSimpleVT(); |
3906 | 14 | MVT DestVT = DestEVT.getSimpleVT(); |
3907 | 14 | |
3908 | 14 | if (SrcVT != MVT::i64 && SrcVT != MVT::i328 && SrcVT != MVT::i164 && |
3909 | 14 | SrcVT != MVT::i82 ) |
3910 | 2 | return false; |
3911 | 12 | if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i88 && |
3912 | 12 | DestVT != MVT::i15 ) |
3913 | 0 | return false; |
3914 | 12 | |
3915 | 12 | unsigned SrcReg = getRegForValue(Op); |
3916 | 12 | if (!SrcReg) |
3917 | 0 | return false; |
3918 | 12 | bool SrcIsKill = hasTrivialKill(Op); |
3919 | 12 | |
3920 | 12 | // If we're truncating from i64 to a smaller non-legal type then generate an |
3921 | 12 | // AND. Otherwise, we know the high bits are undefined and a truncate only |
3922 | 12 | // generate a COPY. We cannot mark the source register also as result |
3923 | 12 | // register, because this can incorrectly transfer the kill flag onto the |
3924 | 12 | // source register. |
3925 | 12 | unsigned ResultReg; |
3926 | 12 | if (SrcVT == MVT::i64) { |
3927 | 6 | uint64_t Mask = 0; |
3928 | 6 | switch (DestVT.SimpleTy) { |
3929 | 6 | default: |
3930 | 0 | // Trunc i64 to i32 is handled by the target-independent fast-isel. |
3931 | 0 | return false; |
3932 | 6 | case MVT::i1: |
3933 | 3 | Mask = 0x1; |
3934 | 3 | break; |
3935 | 6 | case MVT::i8: |
3936 | 2 | Mask = 0xff; |
3937 | 2 | break; |
3938 | 6 | case MVT::i16: |
3939 | 1 | Mask = 0xffff; |
3940 | 1 | break; |
3941 | 6 | } |
3942 | 6 | // Issue an extract_subreg to get the lower 32-bits. |
3943 | 6 | unsigned Reg32 = fastEmitInst_extractsubreg(MVT::i32, SrcReg, SrcIsKill, |
3944 | 6 | AArch64::sub_32); |
3945 | 6 | // Create the AND instruction which performs the actual truncation. |
3946 | 6 | ResultReg = emitAnd_ri(MVT::i32, Reg32, /*IsKill=*/true, Mask); |
3947 | 6 | assert(ResultReg && "Unexpected AND instruction emission failure."); |
3948 | 6 | } else { |
3949 | 6 | ResultReg = createResultReg(&AArch64::GPR32RegClass); |
3950 | 6 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, |
3951 | 6 | TII.get(TargetOpcode::COPY), ResultReg) |
3952 | 6 | .addReg(SrcReg, getKillRegState(SrcIsKill)); |
3953 | 6 | } |
3954 | 12 | |
3955 | 12 | updateValueMap(I, ResultReg); |
3956 | 12 | return true; |
3957 | 12 | } |
3958 | | |
3959 | 141 | unsigned AArch64FastISel::emiti1Ext(unsigned SrcReg, MVT DestVT, bool IsZExt) { |
3960 | 141 | assert((DestVT == MVT::i8 || DestVT == MVT::i16 || DestVT == MVT::i32 || |
3961 | 141 | DestVT == MVT::i64) && |
3962 | 141 | "Unexpected value type."); |
3963 | 141 | // Handle i8 and i16 as i32. |
3964 | 141 | if (DestVT == MVT::i8 || DestVT == MVT::i16140 ) |
3965 | 3 | DestVT = MVT::i32; |
3966 | 141 | |
3967 | 141 | if (IsZExt) { |
3968 | 132 | unsigned ResultReg = emitAnd_ri(MVT::i32, SrcReg, /*TODO:IsKill=*/false, 1); |
3969 | 132 | assert(ResultReg && "Unexpected AND instruction emission failure."); |
3970 | 132 | if (DestVT == MVT::i64) { |
3971 | 0 | // We're ZExt i1 to i64. The ANDWri Wd, Ws, #1 implicitly clears the |
3972 | 0 | // upper 32 bits. Emit a SUBREG_TO_REG to extend from Wd to Xd. |
3973 | 0 | unsigned Reg64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass); |
3974 | 0 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, |
3975 | 0 | TII.get(AArch64::SUBREG_TO_REG), Reg64) |
3976 | 0 | .addImm(0) |
3977 | 0 | .addReg(ResultReg) |
3978 | 0 | .addImm(AArch64::sub_32); |
3979 | 0 | ResultReg = Reg64; |
3980 | 0 | } |
3981 | 132 | return ResultReg; |
3982 | 132 | } else { |
3983 | 9 | if (DestVT == MVT::i64) { |
3984 | 0 | // FIXME: We're SExt i1 to i64. |
3985 | 0 | return 0; |
3986 | 0 | } |
3987 | 9 | return fastEmitInst_rii(AArch64::SBFMWri, &AArch64::GPR32RegClass, SrcReg, |
3988 | 9 | /*TODO:IsKill=*/false, 0, 0); |
3989 | 9 | } |
3990 | 141 | } |
3991 | | |
3992 | | unsigned AArch64FastISel::emitMul_rr(MVT RetVT, unsigned Op0, bool Op0IsKill, |
3993 | 19 | unsigned Op1, bool Op1IsKill) { |
3994 | 19 | unsigned Opc, ZReg; |
3995 | 19 | switch (RetVT.SimpleTy) { |
3996 | 19 | default: return 00 ; |
3997 | 19 | case MVT::i8: |
3998 | 5 | case MVT::i16: |
3999 | 5 | case MVT::i32: |
4000 | 5 | RetVT = MVT::i32; |
4001 | 5 | Opc = AArch64::MADDWrrr; ZReg = AArch64::WZR; break; |
4002 | 14 | case MVT::i64: |
4003 | 14 | Opc = AArch64::MADDXrrr; ZReg = AArch64::XZR; break; |
4004 | 19 | } |
4005 | 19 | |
4006 | 19 | const TargetRegisterClass *RC = |
4007 | 19 | (RetVT == MVT::i64) ? &AArch64::GPR64RegClass14 : &AArch64::GPR32RegClass5 ; |
4008 | 19 | return fastEmitInst_rrr(Opc, RC, Op0, Op0IsKill, Op1, Op1IsKill, |
4009 | 19 | /*IsKill=*/ZReg, true); |
4010 | 19 | } |
4011 | | |
4012 | | unsigned AArch64FastISel::emitSMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill, |
4013 | 3 | unsigned Op1, bool Op1IsKill) { |
4014 | 3 | if (RetVT != MVT::i64) |
4015 | 0 | return 0; |
4016 | 3 | |
4017 | 3 | return fastEmitInst_rrr(AArch64::SMADDLrrr, &AArch64::GPR64RegClass, |
4018 | 3 | Op0, Op0IsKill, Op1, Op1IsKill, |
4019 | 3 | AArch64::XZR, /*IsKill=*/true); |
4020 | 3 | } |
4021 | | |
4022 | | unsigned AArch64FastISel::emitUMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill, |
4023 | 3 | unsigned Op1, bool Op1IsKill) { |
4024 | 3 | if (RetVT != MVT::i64) |
4025 | 0 | return 0; |
4026 | 3 | |
4027 | 3 | return fastEmitInst_rrr(AArch64::UMADDLrrr, &AArch64::GPR64RegClass, |
4028 | 3 | Op0, Op0IsKill, Op1, Op1IsKill, |
4029 | 3 | AArch64::XZR, /*IsKill=*/true); |
4030 | 3 | } |
4031 | | |
4032 | | unsigned AArch64FastISel::emitLSL_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill, |
4033 | 4 | unsigned Op1Reg, bool Op1IsKill) { |
4034 | 4 | unsigned Opc = 0; |
4035 | 4 | bool NeedTrunc = false; |
4036 | 4 | uint64_t Mask = 0; |
4037 | 4 | switch (RetVT.SimpleTy) { |
4038 | 4 | default: return 00 ; |
4039 | 4 | case MVT::i8: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xff; break1 ; |
4040 | 4 | case MVT::i16: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xffff; break1 ; |
4041 | 4 | case MVT::i32: Opc = AArch64::LSLVWr; break1 ; |
4042 | 4 | case MVT::i64: Opc = AArch64::LSLVXr; break1 ; |
4043 | 4 | } |
4044 | 4 | |
4045 | 4 | const TargetRegisterClass *RC = |
4046 | 4 | (RetVT == MVT::i64) ? &AArch64::GPR64RegClass1 : &AArch64::GPR32RegClass3 ; |
4047 | 4 | if (NeedTrunc) { |
4048 | 2 | Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask); |
4049 | 2 | Op1IsKill = true; |
4050 | 2 | } |
4051 | 4 | unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg, |
4052 | 4 | Op1IsKill); |
4053 | 4 | if (NeedTrunc) |
4054 | 2 | ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask); |
4055 | 4 | return ResultReg; |
4056 | 4 | } |
4057 | | |
4058 | | unsigned AArch64FastISel::emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0, |
4059 | | bool Op0IsKill, uint64_t Shift, |
4060 | 53 | bool IsZExt) { |
4061 | 53 | assert(RetVT.SimpleTy >= SrcVT.SimpleTy && |
4062 | 53 | "Unexpected source/return type pair."); |
4063 | 53 | assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 || |
4064 | 53 | SrcVT == MVT::i32 || SrcVT == MVT::i64) && |
4065 | 53 | "Unexpected source value type."); |
4066 | 53 | assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 || |
4067 | 53 | RetVT == MVT::i64) && "Unexpected return value type."); |
4068 | 53 | |
4069 | 53 | bool Is64Bit = (RetVT == MVT::i64); |
4070 | 53 | unsigned RegSize = Is64Bit ? 6427 : 3226 ; |
4071 | 53 | unsigned DstBits = RetVT.getSizeInBits(); |
4072 | 53 | unsigned SrcBits = SrcVT.getSizeInBits(); |
4073 | 53 | const TargetRegisterClass *RC = |
4074 | 53 | Is64Bit ? &AArch64::GPR64RegClass27 : &AArch64::GPR32RegClass26 ; |
4075 | 53 | |
4076 | 53 | // Just emit a copy for "zero" shifts. |
4077 | 53 | if (Shift == 0) { |
4078 | 2 | if (RetVT == SrcVT) { |
4079 | 1 | unsigned ResultReg = createResultReg(RC); |
4080 | 1 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, |
4081 | 1 | TII.get(TargetOpcode::COPY), ResultReg) |
4082 | 1 | .addReg(Op0, getKillRegState(Op0IsKill)); |
4083 | 1 | return ResultReg; |
4084 | 1 | } else |
4085 | 1 | return emitIntExt(SrcVT, Op0, RetVT, IsZExt); |
4086 | 51 | } |
4087 | 51 | |
4088 | 51 | // Don't deal with undefined shifts. |
4089 | 51 | if (Shift >= DstBits) |
4090 | 14 | return 0; |
4091 | 37 | |
4092 | 37 | // For immediate shifts we can fold the zero-/sign-extension into the shift. |
4093 | 37 | // {S|U}BFM Wd, Wn, #r, #s |
4094 | 37 | // Wd<32+s-r,32-r> = Wn<s:0> when r > s |
4095 | 37 | |
4096 | 37 | // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 |
4097 | 37 | // %2 = shl i16 %1, 4 |
4098 | 37 | // Wd<32+7-28,32-28> = Wn<7:0> <- clamp s to 7 |
4099 | 37 | // 0b1111_1111_1111_1111__1111_1010_1010_0000 sext |
4100 | 37 | // 0b0000_0000_0000_0000__0000_0101_0101_0000 sext | zext |
4101 | 37 | // 0b0000_0000_0000_0000__0000_1010_1010_0000 zext |
4102 | 37 | |
4103 | 37 | // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 |
4104 | 37 | // %2 = shl i16 %1, 8 |
4105 | 37 | // Wd<32+7-24,32-24> = Wn<7:0> |
4106 | 37 | // 0b1111_1111_1111_1111__1010_1010_0000_0000 sext |
4107 | 37 | // 0b0000_0000_0000_0000__0101_0101_0000_0000 sext | zext |
4108 | 37 | // 0b0000_0000_0000_0000__1010_1010_0000_0000 zext |
4109 | 37 | |
4110 | 37 | // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 |
4111 | 37 | // %2 = shl i16 %1, 12 |
4112 | 37 | // Wd<32+3-20,32-20> = Wn<3:0> |
4113 | 37 | // 0b1111_1111_1111_1111__1010_0000_0000_0000 sext |
4114 | 37 | // 0b0000_0000_0000_0000__0101_0000_0000_0000 sext | zext |
4115 | 37 | // 0b0000_0000_0000_0000__1010_0000_0000_0000 zext |
4116 | 37 | |
4117 | 37 | unsigned ImmR = RegSize - Shift; |
4118 | 37 | // Limit the width to the length of the source type. |
4119 | 37 | unsigned ImmS = std::min<unsigned>(SrcBits - 1, DstBits - 1 - Shift); |
4120 | 37 | static const unsigned OpcTable[2][2] = { |
4121 | 37 | {AArch64::SBFMWri, AArch64::SBFMXri}, |
4122 | 37 | {AArch64::UBFMWri, AArch64::UBFMXri} |
4123 | 37 | }; |
4124 | 37 | unsigned Opc = OpcTable[IsZExt][Is64Bit]; |
4125 | 37 | if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i6425 ) { |
4126 | 10 | unsigned TmpReg = MRI.createVirtualRegister(RC); |
4127 | 10 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, |
4128 | 10 | TII.get(AArch64::SUBREG_TO_REG), TmpReg) |
4129 | 10 | .addImm(0) |
4130 | 10 | .addReg(Op0, getKillRegState(Op0IsKill)) |
4131 | 10 | .addImm(AArch64::sub_32); |
4132 | 10 | Op0 = TmpReg; |
4133 | 10 | Op0IsKill = true; |
4134 | 10 | } |
4135 | 37 | return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS); |
4136 | 37 | } |
4137 | | |
4138 | | unsigned AArch64FastISel::emitLSR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill, |
4139 | 4 | unsigned Op1Reg, bool Op1IsKill) { |
4140 | 4 | unsigned Opc = 0; |
4141 | 4 | bool NeedTrunc = false; |
4142 | 4 | uint64_t Mask = 0; |
4143 | 4 | switch (RetVT.SimpleTy) { |
4144 | 4 | default: return 00 ; |
4145 | 4 | case MVT::i8: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xff; break1 ; |
4146 | 4 | case MVT::i16: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xffff; break1 ; |
4147 | 4 | case MVT::i32: Opc = AArch64::LSRVWr; break1 ; |
4148 | 4 | case MVT::i64: Opc = AArch64::LSRVXr; break1 ; |
4149 | 4 | } |
4150 | 4 | |
4151 | 4 | const TargetRegisterClass *RC = |
4152 | 4 | (RetVT == MVT::i64) ? &AArch64::GPR64RegClass1 : &AArch64::GPR32RegClass3 ; |
4153 | 4 | if (NeedTrunc) { |
4154 | 2 | Op0Reg = emitAnd_ri(MVT::i32, Op0Reg, Op0IsKill, Mask); |
4155 | 2 | Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask); |
4156 | 2 | Op0IsKill = Op1IsKill = true; |
4157 | 2 | } |
4158 | 4 | unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg, |
4159 | 4 | Op1IsKill); |
4160 | 4 | if (NeedTrunc) |
4161 | 2 | ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask); |
4162 | 4 | return ResultReg; |
4163 | 4 | } |
4164 | | |
4165 | | unsigned AArch64FastISel::emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0, |
4166 | | bool Op0IsKill, uint64_t Shift, |
4167 | 23 | bool IsZExt) { |
4168 | 23 | assert(RetVT.SimpleTy >= SrcVT.SimpleTy && |
4169 | 23 | "Unexpected source/return type pair."); |
4170 | 23 | assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 || |
4171 | 23 | SrcVT == MVT::i32 || SrcVT == MVT::i64) && |
4172 | 23 | "Unexpected source value type."); |
4173 | 23 | assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 || |
4174 | 23 | RetVT == MVT::i64) && "Unexpected return value type."); |
4175 | 23 | |
4176 | 23 | bool Is64Bit = (RetVT == MVT::i64); |
4177 | 23 | unsigned RegSize = Is64Bit ? 6410 : 3213 ; |
4178 | 23 | unsigned DstBits = RetVT.getSizeInBits(); |
4179 | 23 | unsigned SrcBits = SrcVT.getSizeInBits(); |
4180 | 23 | const TargetRegisterClass *RC = |
4181 | 23 | Is64Bit ? &AArch64::GPR64RegClass10 : &AArch64::GPR32RegClass13 ; |
4182 | 23 | |
4183 | 23 | // Just emit a copy for "zero" shifts. |
4184 | 23 | if (Shift == 0) { |
4185 | 2 | if (RetVT == SrcVT) { |
4186 | 1 | unsigned ResultReg = createResultReg(RC); |
4187 | 1 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, |
4188 | 1 | TII.get(TargetOpcode::COPY), ResultReg) |
4189 | 1 | .addReg(Op0, getKillRegState(Op0IsKill)); |
4190 | 1 | return ResultReg; |
4191 | 1 | } else |
4192 | 1 | return emitIntExt(SrcVT, Op0, RetVT, IsZExt); |
4193 | 21 | } |
4194 | 21 | |
4195 | 21 | // Don't deal with undefined shifts. |
4196 | 21 | if (Shift >= DstBits) |
4197 | 0 | return 0; |
4198 | 21 | |
4199 | 21 | // For immediate shifts we can fold the zero-/sign-extension into the shift. |
4200 | 21 | // {S|U}BFM Wd, Wn, #r, #s |
4201 | 21 | // Wd<s-r:0> = Wn<s:r> when r <= s |
4202 | 21 | |
4203 | 21 | // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 |
4204 | 21 | // %2 = lshr i16 %1, 4 |
4205 | 21 | // Wd<7-4:0> = Wn<7:4> |
4206 | 21 | // 0b0000_0000_0000_0000__0000_1111_1111_1010 sext |
4207 | 21 | // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext |
4208 | 21 | // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext |
4209 | 21 | |
4210 | 21 | // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 |
4211 | 21 | // %2 = lshr i16 %1, 8 |
4212 | 21 | // Wd<7-7,0> = Wn<7:7> |
4213 | 21 | // 0b0000_0000_0000_0000__0000_0000_1111_1111 sext |
4214 | 21 | // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext |
4215 | 21 | // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext |
4216 | 21 | |
4217 | 21 | // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 |
4218 | 21 | // %2 = lshr i16 %1, 12 |
4219 | 21 | // Wd<7-7,0> = Wn<7:7> <- clamp r to 7 |
4220 | 21 | // 0b0000_0000_0000_0000__0000_0000_0000_1111 sext |
4221 | 21 | // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext |
4222 | 21 | // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext |
4223 | 21 | |
4224 | 21 | if (Shift >= SrcBits && IsZExt5 ) |
4225 | 3 | return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT); |
4226 | 18 | |
4227 | 18 | // It is not possible to fold a sign-extend into the LShr instruction. In this |
4228 | 18 | // case emit a sign-extend. |
4229 | 18 | if (!IsZExt) { |
4230 | 4 | Op0 = emitIntExt(SrcVT, Op0, RetVT, IsZExt); |
4231 | 4 | if (!Op0) |
4232 | 0 | return 0; |
4233 | 4 | Op0IsKill = true; |
4234 | 4 | SrcVT = RetVT; |
4235 | 4 | SrcBits = SrcVT.getSizeInBits(); |
4236 | 4 | IsZExt = true; |
4237 | 4 | } |
4238 | 18 | |
4239 | 18 | unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift); |
4240 | 18 | unsigned ImmS = SrcBits - 1; |
4241 | 18 | static const unsigned OpcTable[2][2] = { |
4242 | 18 | {AArch64::SBFMWri, AArch64::SBFMXri}, |
4243 | 18 | {AArch64::UBFMWri, AArch64::UBFMXri} |
4244 | 18 | }; |
4245 | 18 | unsigned Opc = OpcTable[IsZExt][Is64Bit]; |
4246 | 18 | if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i6410 ) { |
4247 | 0 | unsigned TmpReg = MRI.createVirtualRegister(RC); |
4248 | 0 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, |
4249 | 0 | TII.get(AArch64::SUBREG_TO_REG), TmpReg) |
4250 | 0 | .addImm(0) |
4251 | 0 | .addReg(Op0, getKillRegState(Op0IsKill)) |
4252 | 0 | .addImm(AArch64::sub_32); |
4253 | 0 | Op0 = TmpReg; |
4254 | 0 | Op0IsKill = true; |
4255 | 0 | } |
4256 | 18 | return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS); |
4257 | 18 | } |
4258 | | |
4259 | | unsigned AArch64FastISel::emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill, |
4260 | 4 | unsigned Op1Reg, bool Op1IsKill) { |
4261 | 4 | unsigned Opc = 0; |
4262 | 4 | bool NeedTrunc = false; |
4263 | 4 | uint64_t Mask = 0; |
4264 | 4 | switch (RetVT.SimpleTy) { |
4265 | 4 | default: return 00 ; |
4266 | 4 | case MVT::i8: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xff; break1 ; |
4267 | 4 | case MVT::i16: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xffff; break1 ; |
4268 | 4 | case MVT::i32: Opc = AArch64::ASRVWr; break1 ; |
4269 | 4 | case MVT::i64: Opc = AArch64::ASRVXr; break1 ; |
4270 | 4 | } |
4271 | 4 | |
4272 | 4 | const TargetRegisterClass *RC = |
4273 | 4 | (RetVT == MVT::i64) ? &AArch64::GPR64RegClass1 : &AArch64::GPR32RegClass3 ; |
4274 | 4 | if (NeedTrunc) { |
4275 | 2 | Op0Reg = emitIntExt(RetVT, Op0Reg, MVT::i32, /*isZExt=*/false); |
4276 | 2 | Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask); |
4277 | 2 | Op0IsKill = Op1IsKill = true; |
4278 | 2 | } |
4279 | 4 | unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg, |
4280 | 4 | Op1IsKill); |
4281 | 4 | if (NeedTrunc) |
4282 | 2 | ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask); |
4283 | 4 | return ResultReg; |
4284 | 4 | } |
4285 | | |
4286 | | unsigned AArch64FastISel::emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0, |
4287 | | bool Op0IsKill, uint64_t Shift, |
4288 | 28 | bool IsZExt) { |
4289 | 28 | assert(RetVT.SimpleTy >= SrcVT.SimpleTy && |
4290 | 28 | "Unexpected source/return type pair."); |
4291 | 28 | assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 || |
4292 | 28 | SrcVT == MVT::i32 || SrcVT == MVT::i64) && |
4293 | 28 | "Unexpected source value type."); |
4294 | 28 | assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 || |
4295 | 28 | RetVT == MVT::i64) && "Unexpected return value type."); |
4296 | 28 | |
4297 | 28 | bool Is64Bit = (RetVT == MVT::i64); |
4298 | 28 | unsigned RegSize = Is64Bit ? 6411 : 3217 ; |
4299 | 28 | unsigned DstBits = RetVT.getSizeInBits(); |
4300 | 28 | unsigned SrcBits = SrcVT.getSizeInBits(); |
4301 | 28 | const TargetRegisterClass *RC = |
4302 | 28 | Is64Bit ? &AArch64::GPR64RegClass11 : &AArch64::GPR32RegClass17 ; |
4303 | 28 | |
4304 | 28 | // Just emit a copy for "zero" shifts. |
4305 | 28 | if (Shift == 0) { |
4306 | 2 | if (RetVT == SrcVT) { |
4307 | 1 | unsigned ResultReg = createResultReg(RC); |
4308 | 1 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, |
4309 | 1 | TII.get(TargetOpcode::COPY), ResultReg) |
4310 | 1 | .addReg(Op0, getKillRegState(Op0IsKill)); |
4311 | 1 | return ResultReg; |
4312 | 1 | } else |
4313 | 1 | return emitIntExt(SrcVT, Op0, RetVT, IsZExt); |
4314 | 26 | } |
4315 | 26 | |
4316 | 26 | // Don't deal with undefined shifts. |
4317 | 26 | if (Shift >= DstBits) |
4318 | 0 | return 0; |
4319 | 26 | |
4320 | 26 | // For immediate shifts we can fold the zero-/sign-extension into the shift. |
4321 | 26 | // {S|U}BFM Wd, Wn, #r, #s |
4322 | 26 | // Wd<s-r:0> = Wn<s:r> when r <= s |
4323 | 26 | |
4324 | 26 | // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 |
4325 | 26 | // %2 = ashr i16 %1, 4 |
4326 | 26 | // Wd<7-4:0> = Wn<7:4> |
4327 | 26 | // 0b1111_1111_1111_1111__1111_1111_1111_1010 sext |
4328 | 26 | // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext |
4329 | 26 | // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext |
4330 | 26 | |
4331 | 26 | // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 |
4332 | 26 | // %2 = ashr i16 %1, 8 |
4333 | 26 | // Wd<7-7,0> = Wn<7:7> |
4334 | 26 | // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext |
4335 | 26 | // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext |
4336 | 26 | // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext |
4337 | 26 | |
4338 | 26 | // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 |
4339 | 26 | // %2 = ashr i16 %1, 12 |
4340 | 26 | // Wd<7-7,0> = Wn<7:7> <- clamp r to 7 |
4341 | 26 | // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext |
4342 | 26 | // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext |
4343 | 26 | // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext |
4344 | 26 | |
4345 | 26 | if (Shift >= SrcBits && IsZExt6 ) |
4346 | 3 | return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT); |
4347 | 23 | |
4348 | 23 | unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift); |
4349 | 23 | unsigned ImmS = SrcBits - 1; |
4350 | 23 | static const unsigned OpcTable[2][2] = { |
4351 | 23 | {AArch64::SBFMWri, AArch64::SBFMXri}, |
4352 | 23 | {AArch64::UBFMWri, AArch64::UBFMXri} |
4353 | 23 | }; |
4354 | 23 | unsigned Opc = OpcTable[IsZExt][Is64Bit]; |
4355 | 23 | if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i6415 ) { |
4356 | 1 | unsigned TmpReg = MRI.createVirtualRegister(RC); |
4357 | 1 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, |
4358 | 1 | TII.get(AArch64::SUBREG_TO_REG), TmpReg) |
4359 | 1 | .addImm(0) |
4360 | 1 | .addReg(Op0, getKillRegState(Op0IsKill)) |
4361 | 1 | .addImm(AArch64::sub_32); |
4362 | 1 | Op0 = TmpReg; |
4363 | 1 | Op0IsKill = true; |
4364 | 1 | } |
4365 | 23 | return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS); |
4366 | 23 | } |
4367 | | |
4368 | | unsigned AArch64FastISel::emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, |
4369 | 415 | bool IsZExt) { |
4370 | 415 | assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?"); |
4371 | 415 | |
4372 | 415 | // FastISel does not have plumbing to deal with extensions where the SrcVT or |
4373 | 415 | // DestVT are odd things, so test to make sure that they are both types we can |
4374 | 415 | // handle (i1/i8/i16/i32 for SrcVT and i8/i16/i32/i64 for DestVT), otherwise |
4375 | 415 | // bail out to SelectionDAG. |
4376 | 415 | if (((DestVT != MVT::i8) && (DestVT != MVT::i16)414 && |
4377 | 415 | (DestVT != MVT::i32)409 && (DestVT != MVT::i64)61 ) || |
4378 | 415 | ((SrcVT != MVT::i1) && (SrcVT != MVT::i8)274 && |
4379 | 415 | (SrcVT != MVT::i16)128 && (SrcVT != MVT::i32)31 )) |
4380 | 0 | return 0; |
4381 | 415 | |
4382 | 415 | unsigned Opc; |
4383 | 415 | unsigned Imm = 0; |
4384 | 415 | |
4385 | 415 | switch (SrcVT.SimpleTy) { |
4386 | 415 | default: |
4387 | 0 | return 0; |
4388 | 415 | case MVT::i1: |
4389 | 141 | return emiti1Ext(SrcReg, DestVT, IsZExt); |
4390 | 415 | case MVT::i8: |
4391 | 146 | if (DestVT == MVT::i64) |
4392 | 15 | Opc = IsZExt ? AArch64::UBFMXri7 : AArch64::SBFMXri8 ; |
4393 | 131 | else |
4394 | 131 | Opc = IsZExt ? AArch64::UBFMWri103 : AArch64::SBFMWri28 ; |
4395 | 146 | Imm = 7; |
4396 | 146 | break; |
4397 | 415 | case MVT::i16: |
4398 | 97 | if (DestVT == MVT::i64) |
4399 | 15 | Opc = IsZExt ? AArch64::UBFMXri7 : AArch64::SBFMXri8 ; |
4400 | 82 | else |
4401 | 82 | Opc = IsZExt ? AArch64::UBFMWri50 : AArch64::SBFMWri32 ; |
4402 | 97 | Imm = 15; |
4403 | 97 | break; |
4404 | 415 | case MVT::i32: |
4405 | 31 | assert(DestVT == MVT::i64 && "IntExt i32 to i32?!?"); |
4406 | 31 | Opc = IsZExt ? AArch64::UBFMXri19 : AArch64::SBFMXri12 ; |
4407 | 31 | Imm = 31; |
4408 | 31 | break; |
4409 | 274 | } |
4410 | 274 | |
4411 | 274 | // Handle i8 and i16 as i32. |
4412 | 274 | if (DestVT == MVT::i8 || DestVT == MVT::i16) |
4413 | 3 | DestVT = MVT::i32; |
4414 | 271 | else if (DestVT == MVT::i64) { |
4415 | 61 | unsigned Src64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass); |
4416 | 61 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, |
4417 | 61 | TII.get(AArch64::SUBREG_TO_REG), Src64) |
4418 | 61 | .addImm(0) |
4419 | 61 | .addReg(SrcReg) |
4420 | 61 | .addImm(AArch64::sub_32); |
4421 | 61 | SrcReg = Src64; |
4422 | 61 | } |
4423 | 274 | |
4424 | 274 | const TargetRegisterClass *RC = |
4425 | 274 | (DestVT == MVT::i64) ? &AArch64::GPR64RegClass61 : &AArch64::GPR32RegClass213 ; |
4426 | 274 | return fastEmitInst_rii(Opc, RC, SrcReg, /*TODO:IsKill=*/false, 0, Imm); |
4427 | 274 | } |
4428 | | |
4429 | 1 | static bool isZExtLoad(const MachineInstr *LI) { |
4430 | 1 | switch (LI->getOpcode()) { |
4431 | 1 | default: |
4432 | 0 | return false; |
4433 | 1 | case AArch64::LDURBBi: |
4434 | 1 | case AArch64::LDURHHi: |
4435 | 1 | case AArch64::LDURWi: |
4436 | 1 | case AArch64::LDRBBui: |
4437 | 1 | case AArch64::LDRHHui: |
4438 | 1 | case AArch64::LDRWui: |
4439 | 1 | case AArch64::LDRBBroX: |
4440 | 1 | case AArch64::LDRHHroX: |
4441 | 1 | case AArch64::LDRWroX: |
4442 | 1 | case AArch64::LDRBBroW: |
4443 | 1 | case AArch64::LDRHHroW: |
4444 | 1 | case AArch64::LDRWroW: |
4445 | 1 | return true; |
4446 | 1 | } |
4447 | 1 | } |
4448 | | |
4449 | | static bool isSExtLoad(const MachineInstr *LI) { |
4450 | | switch (LI->getOpcode()) { |
4451 | | default: |
4452 | | return false; |
4453 | | case AArch64::LDURSBWi: |
4454 | | case AArch64::LDURSHWi: |
4455 | | case AArch64::LDURSBXi: |
4456 | | case AArch64::LDURSHXi: |
4457 | | case AArch64::LDURSWi: |
4458 | | case AArch64::LDRSBWui: |
4459 | | case AArch64::LDRSHWui: |
4460 | | case AArch64::LDRSBXui: |
4461 | | case AArch64::LDRSHXui: |
4462 | | case AArch64::LDRSWui: |
4463 | | case AArch64::LDRSBWroX: |
4464 | | case AArch64::LDRSHWroX: |
4465 | | case AArch64::LDRSBXroX: |
4466 | | case AArch64::LDRSHXroX: |
4467 | | case AArch64::LDRSWroX: |
4468 | | case AArch64::LDRSBWroW: |
4469 | | case AArch64::LDRSHWroW: |
4470 | | case AArch64::LDRSBXroW: |
4471 | | case AArch64::LDRSHXroW: |
4472 | | case AArch64::LDRSWroW: |
4473 | | return true; |
4474 | | } |
4475 | | } |
4476 | | |
4477 | | bool AArch64FastISel::optimizeIntExtLoad(const Instruction *I, MVT RetVT, |
4478 | 199 | MVT SrcVT) { |
4479 | 199 | const auto *LI = dyn_cast<LoadInst>(I->getOperand(0)); |
4480 | 199 | if (!LI || !LI->hasOneUse()89 ) |
4481 | 110 | return false; |
4482 | 89 | |
4483 | 89 | // Check if the load instruction has already been selected. |
4484 | 89 | unsigned Reg = lookUpRegForValue(LI); |
4485 | 89 | if (!Reg) |
4486 | 88 | return false; |
4487 | 1 | |
4488 | 1 | MachineInstr *MI = MRI.getUniqueVRegDef(Reg); |
4489 | 1 | if (!MI) |
4490 | 0 | return false; |
4491 | 1 | |
4492 | 1 | // Check if the correct load instruction has been emitted - SelectionDAG might |
4493 | 1 | // have emitted a zero-extending load, but we need a sign-extending load. |
4494 | 1 | bool IsZExt = isa<ZExtInst>(I); |
4495 | 1 | const auto *LoadMI = MI; |
4496 | 1 | if (LoadMI->getOpcode() == TargetOpcode::COPY && |
4497 | 1 | LoadMI->getOperand(1).getSubReg() == AArch64::sub_320 ) { |
4498 | 0 | unsigned LoadReg = MI->getOperand(1).getReg(); |
4499 | 0 | LoadMI = MRI.getUniqueVRegDef(LoadReg); |
4500 | 0 | assert(LoadMI && "Expected valid instruction"); |
4501 | 0 | } |
4502 | 1 | if (!(IsZExt && isZExtLoad(LoadMI)) && !(0 !IsZExt0 && isSExtLoad(LoadMI)0 )) |
4503 | 0 | return false; |
4504 | 1 | |
4505 | 1 | // Nothing to be done. |
4506 | 1 | if (RetVT != MVT::i64 || SrcVT > MVT::i32) { |
4507 | 0 | updateValueMap(I, Reg); |
4508 | 0 | return true; |
4509 | 0 | } |
4510 | 1 | |
4511 | 1 | if (IsZExt) { |
4512 | 1 | unsigned Reg64 = createResultReg(&AArch64::GPR64RegClass); |
4513 | 1 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, |
4514 | 1 | TII.get(AArch64::SUBREG_TO_REG), Reg64) |
4515 | 1 | .addImm(0) |
4516 | 1 | .addReg(Reg, getKillRegState(true)) |
4517 | 1 | .addImm(AArch64::sub_32); |
4518 | 1 | Reg = Reg64; |
4519 | 1 | } else { |
4520 | 0 | assert((MI->getOpcode() == TargetOpcode::COPY && |
4521 | 0 | MI->getOperand(1).getSubReg() == AArch64::sub_32) && |
4522 | 0 | "Expected copy instruction"); |
4523 | 0 | Reg = MI->getOperand(1).getReg(); |
4524 | 0 | MachineBasicBlock::iterator I(MI); |
4525 | 0 | removeDeadCode(I, std::next(I)); |
4526 | 0 | } |
4527 | 1 | updateValueMap(I, Reg); |
4528 | 1 | return true; |
4529 | 1 | } |
4530 | | |
4531 | 206 | bool AArch64FastISel::selectIntExt(const Instruction *I) { |
4532 | 206 | assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) && |
4533 | 206 | "Unexpected integer extend instruction."); |
4534 | 206 | MVT RetVT; |
4535 | 206 | MVT SrcVT; |
4536 | 206 | if (!isTypeSupported(I->getType(), RetVT)) |
4537 | 7 | return false; |
4538 | 199 | |
4539 | 199 | if (!isTypeSupported(I->getOperand(0)->getType(), SrcVT)) |
4540 | 0 | return false; |
4541 | 199 | |
4542 | 199 | // Try to optimize already sign-/zero-extended values from load instructions. |
4543 | 199 | if (optimizeIntExtLoad(I, RetVT, SrcVT)) |
4544 | 1 | return true; |
4545 | 198 | |
4546 | 198 | unsigned SrcReg = getRegForValue(I->getOperand(0)); |
4547 | 198 | if (!SrcReg) |
4548 | 0 | return false; |
4549 | 198 | bool SrcIsKill = hasTrivialKill(I->getOperand(0)); |
4550 | 198 | |
4551 | 198 | // Try to optimize already sign-/zero-extended values from function arguments. |
4552 | 198 | bool IsZExt = isa<ZExtInst>(I); |
4553 | 198 | if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0))) { |
4554 | 80 | if ((IsZExt && Arg->hasZExtAttr()38 ) || (49 !IsZExt49 && Arg->hasSExtAttr()42 )) { |
4555 | 66 | if (RetVT == MVT::i64 && SrcVT != MVT::i6413 ) { |
4556 | 13 | unsigned ResultReg = createResultReg(&AArch64::GPR64RegClass); |
4557 | 13 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, |
4558 | 13 | TII.get(AArch64::SUBREG_TO_REG), ResultReg) |
4559 | 13 | .addImm(0) |
4560 | 13 | .addReg(SrcReg, getKillRegState(SrcIsKill)) |
4561 | 13 | .addImm(AArch64::sub_32); |
4562 | 13 | SrcReg = ResultReg; |
4563 | 13 | } |
4564 | 66 | // Conservatively clear all kill flags from all uses, because we are |
4565 | 66 | // replacing a sign-/zero-extend instruction at IR level with a nop at MI |
4566 | 66 | // level. The result of the instruction at IR level might have been |
4567 | 66 | // trivially dead, which is now not longer true. |
4568 | 66 | unsigned UseReg = lookUpRegForValue(I); |
4569 | 66 | if (UseReg) |
4570 | 66 | MRI.clearKillFlags(UseReg); |
4571 | 66 | |
4572 | 66 | updateValueMap(I, SrcReg); |
4573 | 66 | return true; |
4574 | 66 | } |
4575 | 132 | } |
4576 | 132 | |
4577 | 132 | unsigned ResultReg = emitIntExt(SrcVT, SrcReg, RetVT, IsZExt); |
4578 | 132 | if (!ResultReg) |
4579 | 0 | return false; |
4580 | 132 | |
4581 | 132 | updateValueMap(I, ResultReg); |
4582 | 132 | return true; |
4583 | 132 | } |
4584 | | |
4585 | 8 | bool AArch64FastISel::selectRem(const Instruction *I, unsigned ISDOpcode) { |
4586 | 8 | EVT DestEVT = TLI.getValueType(DL, I->getType(), true); |
4587 | 8 | if (!DestEVT.isSimple()) |
4588 | 0 | return false; |
4589 | 8 | |
4590 | 8 | MVT DestVT = DestEVT.getSimpleVT(); |
4591 | 8 | if (DestVT != MVT::i64 && DestVT != MVT::i324 ) |
4592 | 0 | return false; |
4593 | 8 | |
4594 | 8 | unsigned DivOpc; |
4595 | 8 | bool Is64bit = (DestVT == MVT::i64); |
4596 | 8 | switch (ISDOpcode) { |
4597 | 8 | default: |
4598 | 0 | return false; |
4599 | 8 | case ISD::SREM: |
4600 | 4 | DivOpc = Is64bit ? AArch64::SDIVXr2 : AArch64::SDIVWr2 ; |
4601 | 4 | break; |
4602 | 8 | case ISD::UREM: |
4603 | 4 | DivOpc = Is64bit ? AArch64::UDIVXr2 : AArch64::UDIVWr2 ; |
4604 | 4 | break; |
4605 | 8 | } |
4606 | 8 | unsigned MSubOpc = Is64bit ? AArch64::MSUBXrrr4 : AArch64::MSUBWrrr4 ; |
4607 | 8 | unsigned Src0Reg = getRegForValue(I->getOperand(0)); |
4608 | 8 | if (!Src0Reg) |
4609 | 0 | return false; |
4610 | 8 | bool Src0IsKill = hasTrivialKill(I->getOperand(0)); |
4611 | 8 | |
4612 | 8 | unsigned Src1Reg = getRegForValue(I->getOperand(1)); |
4613 | 8 | if (!Src1Reg) |
4614 | 0 | return false; |
4615 | 8 | bool Src1IsKill = hasTrivialKill(I->getOperand(1)); |
4616 | 8 | |
4617 | 8 | const TargetRegisterClass *RC = |
4618 | 8 | (DestVT == MVT::i64) ? &AArch64::GPR64RegClass4 : &AArch64::GPR32RegClass4 ; |
4619 | 8 | unsigned QuotReg = fastEmitInst_rr(DivOpc, RC, Src0Reg, /*IsKill=*/false, |
4620 | 8 | Src1Reg, /*IsKill=*/false); |
4621 | 8 | assert(QuotReg && "Unexpected DIV instruction emission failure."); |
4622 | 8 | // The remainder is computed as numerator - (quotient * denominator) using the |
4623 | 8 | // MSUB instruction. |
4624 | 8 | unsigned ResultReg = fastEmitInst_rrr(MSubOpc, RC, QuotReg, /*IsKill=*/true, |
4625 | 8 | Src1Reg, Src1IsKill, Src0Reg, |
4626 | 8 | Src0IsKill); |
4627 | 8 | updateValueMap(I, ResultReg); |
4628 | 8 | return true; |
4629 | 8 | } |
4630 | | |
4631 | 10 | bool AArch64FastISel::selectMul(const Instruction *I) { |
4632 | 10 | MVT VT; |
4633 | 10 | if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true)) |
4634 | 0 | return false; |
4635 | 10 | |
4636 | 10 | if (VT.isVector()) |
4637 | 0 | return selectBinaryOp(I, ISD::MUL); |
4638 | 10 | |
4639 | 10 | const Value *Src0 = I->getOperand(0); |
4640 | 10 | const Value *Src1 = I->getOperand(1); |
4641 | 10 | if (const auto *C = dyn_cast<ConstantInt>(Src0)) |
4642 | 0 | if (C->getValue().isPowerOf2()) |
4643 | 0 | std::swap(Src0, Src1); |
4644 | 10 | |
4645 | 10 | // Try to simplify to a shift instruction. |
4646 | 10 | if (const auto *C = dyn_cast<ConstantInt>(Src1)) |
4647 | 4 | if (C->getValue().isPowerOf2()) { |
4648 | 2 | uint64_t ShiftVal = C->getValue().logBase2(); |
4649 | 2 | MVT SrcVT = VT; |
4650 | 2 | bool IsZExt = true; |
4651 | 2 | if (const auto *ZExt = dyn_cast<ZExtInst>(Src0)) { |
4652 | 0 | if (!isIntExtFree(ZExt)) { |
4653 | 0 | MVT VT; |
4654 | 0 | if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), VT)) { |
4655 | 0 | SrcVT = VT; |
4656 | 0 | IsZExt = true; |
4657 | 0 | Src0 = ZExt->getOperand(0); |
4658 | 0 | } |
4659 | 0 | } |
4660 | 2 | } else if (const auto *SExt = dyn_cast<SExtInst>(Src0)) { |
4661 | 0 | if (!isIntExtFree(SExt)) { |
4662 | 0 | MVT VT; |
4663 | 0 | if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), VT)) { |
4664 | 0 | SrcVT = VT; |
4665 | 0 | IsZExt = false; |
4666 | 0 | Src0 = SExt->getOperand(0); |
4667 | 0 | } |
4668 | 0 | } |
4669 | 0 | } |
4670 | 2 | |
4671 | 2 | unsigned Src0Reg = getRegForValue(Src0); |
4672 | 2 | if (!Src0Reg) |
4673 | 0 | return false; |
4674 | 2 | bool Src0IsKill = hasTrivialKill(Src0); |
4675 | 2 | |
4676 | 2 | unsigned ResultReg = |
4677 | 2 | emitLSL_ri(VT, SrcVT, Src0Reg, Src0IsKill, ShiftVal, IsZExt); |
4678 | 2 | |
4679 | 2 | if (ResultReg) { |
4680 | 2 | updateValueMap(I, ResultReg); |
4681 | 2 | return true; |
4682 | 2 | } |
4683 | 8 | } |
4684 | 8 | |
4685 | 8 | unsigned Src0Reg = getRegForValue(I->getOperand(0)); |
4686 | 8 | if (!Src0Reg) |
4687 | 0 | return false; |
4688 | 8 | bool Src0IsKill = hasTrivialKill(I->getOperand(0)); |
4689 | 8 | |
4690 | 8 | unsigned Src1Reg = getRegForValue(I->getOperand(1)); |
4691 | 8 | if (!Src1Reg) |
4692 | 0 | return false; |
4693 | 8 | bool Src1IsKill = hasTrivialKill(I->getOperand(1)); |
4694 | 8 | |
4695 | 8 | unsigned ResultReg = emitMul_rr(VT, Src0Reg, Src0IsKill, Src1Reg, Src1IsKill); |
4696 | 8 | |
4697 | 8 | if (!ResultReg) |
4698 | 0 | return false; |
4699 | 8 | |
4700 | 8 | updateValueMap(I, ResultReg); |
4701 | 8 | return true; |
4702 | 8 | } |
4703 | | |
4704 | 98 | bool AArch64FastISel::selectShift(const Instruction *I) { |
4705 | 98 | MVT RetVT; |
4706 | 98 | if (!isTypeSupported(I->getType(), RetVT, /*IsVectorAllowed=*/true)) |
4707 | 0 | return false; |
4708 | 98 | |
4709 | 98 | if (RetVT.isVector()) |
4710 | 0 | return selectOperator(I, I->getOpcode()); |
4711 | 98 | |
4712 | 98 | if (const auto *C = dyn_cast<ConstantInt>(I->getOperand(1))) { |
4713 | 86 | unsigned ResultReg = 0; |
4714 | 86 | uint64_t ShiftVal = C->getZExtValue(); |
4715 | 86 | MVT SrcVT = RetVT; |
4716 | 86 | bool IsZExt = I->getOpcode() != Instruction::AShr; |
4717 | 86 | const Value *Op0 = I->getOperand(0); |
4718 | 86 | if (const auto *ZExt = dyn_cast<ZExtInst>(Op0)) { |
4719 | 22 | if (!isIntExtFree(ZExt)) { |
4720 | 22 | MVT TmpVT; |
4721 | 22 | if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), TmpVT)) { |
4722 | 22 | SrcVT = TmpVT; |
4723 | 22 | IsZExt = true; |
4724 | 22 | Op0 = ZExt->getOperand(0); |
4725 | 22 | } |
4726 | 22 | } |
4727 | 64 | } else if (const auto *SExt = dyn_cast<SExtInst>(Op0)) { |
4728 | 19 | if (!isIntExtFree(SExt)) { |
4729 | 19 | MVT TmpVT; |
4730 | 19 | if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), TmpVT)) { |
4731 | 19 | SrcVT = TmpVT; |
4732 | 19 | IsZExt = false; |
4733 | 19 | Op0 = SExt->getOperand(0); |
4734 | 19 | } |
4735 | 19 | } |
4736 | 19 | } |
4737 | 86 | |
4738 | 86 | unsigned Op0Reg = getRegForValue(Op0); |
4739 | 86 | if (!Op0Reg) |
4740 | 0 | return false; |
4741 | 86 | bool Op0IsKill = hasTrivialKill(Op0); |
4742 | 86 | |
4743 | 86 | switch (I->getOpcode()) { |
4744 | 86 | default: 0 llvm_unreachable0 ("Unexpected instruction."); |
4745 | 86 | case Instruction::Shl: |
4746 | 48 | ResultReg = emitLSL_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt); |
4747 | 48 | break; |
4748 | 86 | case Instruction::AShr: |
4749 | 18 | ResultReg = emitASR_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt); |
4750 | 18 | break; |
4751 | 86 | case Instruction::LShr: |
4752 | 20 | ResultReg = emitLSR_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt); |
4753 | 20 | break; |
4754 | 86 | } |
4755 | 86 | if (!ResultReg) |
4756 | 14 | return false; |
4757 | 72 | |
4758 | 72 | updateValueMap(I, ResultReg); |
4759 | 72 | return true; |
4760 | 72 | } |
4761 | 12 | |
4762 | 12 | unsigned Op0Reg = getRegForValue(I->getOperand(0)); |
4763 | 12 | if (!Op0Reg) |
4764 | 0 | return false; |
4765 | 12 | bool Op0IsKill = hasTrivialKill(I->getOperand(0)); |
4766 | 12 | |
4767 | 12 | unsigned Op1Reg = getRegForValue(I->getOperand(1)); |
4768 | 12 | if (!Op1Reg) |
4769 | 0 | return false; |
4770 | 12 | bool Op1IsKill = hasTrivialKill(I->getOperand(1)); |
4771 | 12 | |
4772 | 12 | unsigned ResultReg = 0; |
4773 | 12 | switch (I->getOpcode()) { |
4774 | 12 | default: 0 llvm_unreachable0 ("Unexpected instruction."); |
4775 | 12 | case Instruction::Shl: |
4776 | 4 | ResultReg = emitLSL_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill); |
4777 | 4 | break; |
4778 | 12 | case Instruction::AShr: |
4779 | 4 | ResultReg = emitASR_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill); |
4780 | 4 | break; |
4781 | 12 | case Instruction::LShr: |
4782 | 4 | ResultReg = emitLSR_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill); |
4783 | 4 | break; |
4784 | 12 | } |
4785 | 12 | |
4786 | 12 | if (!ResultReg) |
4787 | 0 | return false; |
4788 | 12 | |
4789 | 12 | updateValueMap(I, ResultReg); |
4790 | 12 | return true; |
4791 | 12 | } |
4792 | | |
4793 | 23 | bool AArch64FastISel::selectBitCast(const Instruction *I) { |
4794 | 23 | MVT RetVT, SrcVT; |
4795 | 23 | |
4796 | 23 | if (!isTypeLegal(I->getOperand(0)->getType(), SrcVT)) |
4797 | 0 | return false; |
4798 | 23 | if (!isTypeLegal(I->getType(), RetVT)) |
4799 | 0 | return false; |
4800 | 23 | |
4801 | 23 | unsigned Opc; |
4802 | 23 | if (RetVT == MVT::f32 && SrcVT == MVT::i321 ) |
4803 | 1 | Opc = AArch64::FMOVWSr; |
4804 | 22 | else if (RetVT == MVT::f64 && SrcVT == MVT::i645 ) |
4805 | 3 | Opc = AArch64::FMOVXDr; |
4806 | 19 | else if (RetVT == MVT::i32 && SrcVT == MVT::f321 ) |
4807 | 1 | Opc = AArch64::FMOVSWr; |
4808 | 18 | else if (RetVT == MVT::i64 && SrcVT == MVT::f6414 ) |
4809 | 3 | Opc = AArch64::FMOVDXr; |
4810 | 15 | else |
4811 | 15 | return false; |
4812 | 8 | |
4813 | 8 | const TargetRegisterClass *RC = nullptr; |
4814 | 8 | switch (RetVT.SimpleTy) { |
4815 | 8 | default: 0 llvm_unreachable0 ("Unexpected value type."); |
4816 | 8 | case MVT::i32: RC = &AArch64::GPR32RegClass; break1 ; |
4817 | 8 | case MVT::i64: RC = &AArch64::GPR64RegClass; break3 ; |
4818 | 8 | case MVT::f32: RC = &AArch64::FPR32RegClass; break1 ; |
4819 | 8 | case MVT::f64: RC = &AArch64::FPR64RegClass; break3 ; |
4820 | 8 | } |
4821 | 8 | unsigned Op0Reg = getRegForValue(I->getOperand(0)); |
4822 | 8 | if (!Op0Reg) |
4823 | 0 | return false; |
4824 | 8 | bool Op0IsKill = hasTrivialKill(I->getOperand(0)); |
4825 | 8 | unsigned ResultReg = fastEmitInst_r(Opc, RC, Op0Reg, Op0IsKill); |
4826 | 8 | |
4827 | 8 | if (!ResultReg) |
4828 | 0 | return false; |
4829 | 8 | |
4830 | 8 | updateValueMap(I, ResultReg); |
4831 | 8 | return true; |
4832 | 8 | } |
4833 | | |
4834 | 4 | bool AArch64FastISel::selectFRem(const Instruction *I) { |
4835 | 4 | MVT RetVT; |
4836 | 4 | if (!isTypeLegal(I->getType(), RetVT)) |
4837 | 0 | return false; |
4838 | 4 | |
4839 | 4 | RTLIB::Libcall LC; |
4840 | 4 | switch (RetVT.SimpleTy) { |
4841 | 4 | default: |
4842 | 0 | return false; |
4843 | 4 | case MVT::f32: |
4844 | 2 | LC = RTLIB::REM_F32; |
4845 | 2 | break; |
4846 | 4 | case MVT::f64: |
4847 | 2 | LC = RTLIB::REM_F64; |
4848 | 2 | break; |
4849 | 4 | } |
4850 | 4 | |
4851 | 4 | ArgListTy Args; |
4852 | 4 | Args.reserve(I->getNumOperands()); |
4853 | 4 | |
4854 | 4 | // Populate the argument list. |
4855 | 8 | for (auto &Arg : I->operands()) { |
4856 | 8 | ArgListEntry Entry; |
4857 | 8 | Entry.Val = Arg; |
4858 | 8 | Entry.Ty = Arg->getType(); |
4859 | 8 | Args.push_back(Entry); |
4860 | 8 | } |
4861 | 4 | |
4862 | 4 | CallLoweringInfo CLI; |
4863 | 4 | MCContext &Ctx = MF->getContext(); |
4864 | 4 | CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), I->getType(), |
4865 | 4 | TLI.getLibcallName(LC), std::move(Args)); |
4866 | 4 | if (!lowerCallTo(CLI)) |
4867 | 0 | return false; |
4868 | 4 | updateValueMap(I, CLI.ResultReg); |
4869 | 4 | return true; |
4870 | 4 | } |
4871 | | |
4872 | 26 | bool AArch64FastISel::selectSDiv(const Instruction *I) { |
4873 | 26 | MVT VT; |
4874 | 26 | if (!isTypeLegal(I->getType(), VT)) |
4875 | 0 | return false; |
4876 | 26 | |
4877 | 26 | if (!isa<ConstantInt>(I->getOperand(1))) |
4878 | 12 | return selectBinaryOp(I, ISD::SDIV); |
4879 | 14 | |
4880 | 14 | const APInt &C = cast<ConstantInt>(I->getOperand(1))->getValue(); |
4881 | 14 | if ((VT != MVT::i32 && VT != MVT::i648 ) || !C || |
4882 | 14 | !(C.isPowerOf2() || (-C).isPowerOf2()4 )) |
4883 | 0 | return selectBinaryOp(I, ISD::SDIV); |
4884 | 14 | |
4885 | 14 | unsigned Lg2 = C.countTrailingZeros(); |
4886 | 14 | unsigned Src0Reg = getRegForValue(I->getOperand(0)); |
4887 | 14 | if (!Src0Reg) |
4888 | 0 | return false; |
4889 | 14 | bool Src0IsKill = hasTrivialKill(I->getOperand(0)); |
4890 | 14 | |
4891 | 14 | if (cast<BinaryOperator>(I)->isExact()) { |
4892 | 3 | unsigned ResultReg = emitASR_ri(VT, VT, Src0Reg, Src0IsKill, Lg2); |
4893 | 3 | if (!ResultReg) |
4894 | 0 | return false; |
4895 | 3 | updateValueMap(I, ResultReg); |
4896 | 3 | return true; |
4897 | 3 | } |
4898 | 11 | |
4899 | 11 | int64_t Pow2MinusOne = (1ULL << Lg2) - 1; |
4900 | 11 | unsigned AddReg = emitAdd_ri_(VT, Src0Reg, /*IsKill=*/false, Pow2MinusOne); |
4901 | 11 | if (!AddReg) |
4902 | 0 | return false; |
4903 | 11 | |
4904 | 11 | // (Src0 < 0) ? Pow2 - 1 : 0; |
4905 | 11 | if (!emitICmp_ri(VT, Src0Reg, /*IsKill=*/false, 0)) |
4906 | 0 | return false; |
4907 | 11 | |
4908 | 11 | unsigned SelectOpc; |
4909 | 11 | const TargetRegisterClass *RC; |
4910 | 11 | if (VT == MVT::i64) { |
4911 | 6 | SelectOpc = AArch64::CSELXr; |
4912 | 6 | RC = &AArch64::GPR64RegClass; |
4913 | 6 | } else { |
4914 | 5 | SelectOpc = AArch64::CSELWr; |
4915 | 5 | RC = &AArch64::GPR32RegClass; |
4916 | 5 | } |
4917 | 11 | unsigned SelectReg = |
4918 | 11 | fastEmitInst_rri(SelectOpc, RC, AddReg, /*IsKill=*/true, Src0Reg, |
4919 | 11 | Src0IsKill, AArch64CC::LT); |
4920 | 11 | if (!SelectReg) |
4921 | 0 | return false; |
4922 | 11 | |
4923 | 11 | // Divide by Pow2 --> ashr. If we're dividing by a negative value we must also |
4924 | 11 | // negate the result. |
4925 | 11 | unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR6 : AArch64::WZR5 ; |
4926 | 11 | unsigned ResultReg; |
4927 | 11 | if (C.isNegative()) |
4928 | 4 | ResultReg = emitAddSub_rs(/*UseAdd=*/false, VT, ZeroReg, /*IsKill=*/true, |
4929 | 4 | SelectReg, /*IsKill=*/true, AArch64_AM::ASR, Lg2); |
4930 | 7 | else |
4931 | 7 | ResultReg = emitASR_ri(VT, VT, SelectReg, /*IsKill=*/true, Lg2); |
4932 | 11 | |
4933 | 11 | if (!ResultReg) |
4934 | 0 | return false; |
4935 | 11 | |
4936 | 11 | updateValueMap(I, ResultReg); |
4937 | 11 | return true; |
4938 | 11 | } |
4939 | | |
4940 | | /// This is mostly a copy of the existing FastISel getRegForGEPIndex code. We |
4941 | | /// have to duplicate it for AArch64, because otherwise we would fail during the |
4942 | | /// sign-extend emission. |
4943 | 6 | std::pair<unsigned, bool> AArch64FastISel::getRegForGEPIndex(const Value *Idx) { |
4944 | 6 | unsigned IdxN = getRegForValue(Idx); |
4945 | 6 | if (IdxN == 0) |
4946 | 0 | // Unhandled operand. Halt "fast" selection and bail. |
4947 | 0 | return std::pair<unsigned, bool>(0, false); |
4948 | 6 | |
4949 | 6 | bool IdxNIsKill = hasTrivialKill(Idx); |
4950 | 6 | |
4951 | 6 | // If the index is smaller or larger than intptr_t, truncate or extend it. |
4952 | 6 | MVT PtrVT = TLI.getPointerTy(DL); |
4953 | 6 | EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false); |
4954 | 6 | if (IdxVT.bitsLT(PtrVT)) { |
4955 | 1 | IdxN = emitIntExt(IdxVT.getSimpleVT(), IdxN, PtrVT, /*isZExt=*/false); |
4956 | 1 | IdxNIsKill = true; |
4957 | 5 | } else if (IdxVT.bitsGT(PtrVT)) |
4958 | 5 | llvm_unreachable0 ("AArch64 FastISel doesn't support types larger than i64"); |
4959 | 6 | return std::pair<unsigned, bool>(IdxN, IdxNIsKill); |
4960 | 6 | } |
4961 | | |
4962 | | /// This is mostly a copy of the existing FastISel GEP code, but we have to |
4963 | | /// duplicate it for AArch64, because otherwise we would bail out even for |
4964 | | /// simple cases. This is because the standard fastEmit functions don't cover |
4965 | | /// MUL at all and ADD is lowered very inefficientily. |
4966 | 21 | bool AArch64FastISel::selectGetElementPtr(const Instruction *I) { |
4967 | 21 | unsigned N = getRegForValue(I->getOperand(0)); |
4968 | 21 | if (!N) |
4969 | 0 | return false; |
4970 | 21 | bool NIsKill = hasTrivialKill(I->getOperand(0)); |
4971 | 21 | |
4972 | 21 | // Keep a running tab of the total offset to coalesce multiple N = N + Offset |
4973 | 21 | // into a single N = N + TotalOffset. |
4974 | 21 | uint64_t TotalOffs = 0; |
4975 | 21 | MVT VT = TLI.getPointerTy(DL); |
4976 | 21 | for (gep_type_iterator GTI = gep_type_begin(I), E = gep_type_end(I); |
4977 | 47 | GTI != E; ++GTI26 ) { |
4978 | 26 | const Value *Idx = GTI.getOperand(); |
4979 | 26 | if (auto *StTy = GTI.getStructTypeOrNull()) { |
4980 | 4 | unsigned Field = cast<ConstantInt>(Idx)->getZExtValue(); |
4981 | 4 | // N = N + Offset |
4982 | 4 | if (Field) |
4983 | 2 | TotalOffs += DL.getStructLayout(StTy)->getElementOffset(Field); |
4984 | 22 | } else { |
4985 | 22 | Type *Ty = GTI.getIndexedType(); |
4986 | 22 | |
4987 | 22 | // If this is a constant subscript, handle it quickly. |
4988 | 22 | if (const auto *CI = dyn_cast<ConstantInt>(Idx)) { |
4989 | 16 | if (CI->isZero()) |
4990 | 5 | continue; |
4991 | 11 | // N = N + Offset |
4992 | 11 | TotalOffs += |
4993 | 11 | DL.getTypeAllocSize(Ty) * cast<ConstantInt>(CI)->getSExtValue(); |
4994 | 11 | continue; |
4995 | 11 | } |
4996 | 6 | if (TotalOffs) { |
4997 | 0 | N = emitAdd_ri_(VT, N, NIsKill, TotalOffs); |
4998 | 0 | if (!N) |
4999 | 0 | return false; |
5000 | 0 | NIsKill = true; |
5001 | 0 | TotalOffs = 0; |
5002 | 0 | } |
5003 | 6 | |
5004 | 6 | // N = N + Idx * ElementSize; |
5005 | 6 | uint64_t ElementSize = DL.getTypeAllocSize(Ty); |
5006 | 6 | std::pair<unsigned, bool> Pair = getRegForGEPIndex(Idx); |
5007 | 6 | unsigned IdxN = Pair.first; |
5008 | 6 | bool IdxNIsKill = Pair.second; |
5009 | 6 | if (!IdxN) |
5010 | 0 | return false; |
5011 | 6 | |
5012 | 6 | if (ElementSize != 1) { |
5013 | 4 | unsigned C = fastEmit_i(VT, VT, ISD::Constant, ElementSize); |
5014 | 4 | if (!C) |
5015 | 0 | return false; |
5016 | 4 | IdxN = emitMul_rr(VT, IdxN, IdxNIsKill, C, true); |
5017 | 4 | if (!IdxN) |
5018 | 0 | return false; |
5019 | 4 | IdxNIsKill = true; |
5020 | 4 | } |
5021 | 6 | N = fastEmit_rr(VT, VT, ISD::ADD, N, NIsKill, IdxN, IdxNIsKill); |
5022 | 6 | if (!N) |
5023 | 0 | return false; |
5024 | 6 | } |
5025 | 26 | } |
5026 | 21 | if (TotalOffs) { |
5027 | 13 | N = emitAdd_ri_(VT, N, NIsKill, TotalOffs); |
5028 | 13 | if (!N) |
5029 | 0 | return false; |
5030 | 21 | } |
5031 | 21 | updateValueMap(I, N); |
5032 | 21 | return true; |
5033 | 21 | } |
5034 | | |
5035 | 3 | bool AArch64FastISel::selectAtomicCmpXchg(const AtomicCmpXchgInst *I) { |
5036 | 3 | assert(TM.getOptLevel() == CodeGenOpt::None && |
5037 | 3 | "cmpxchg survived AtomicExpand at optlevel > -O0"); |
5038 | 3 | |
5039 | 3 | auto *RetPairTy = cast<StructType>(I->getType()); |
5040 | 3 | Type *RetTy = RetPairTy->getTypeAtIndex(0U); |
5041 | 3 | assert(RetPairTy->getTypeAtIndex(1U)->isIntegerTy(1) && |
5042 | 3 | "cmpxchg has a non-i1 status result"); |
5043 | 3 | |
5044 | 3 | MVT VT; |
5045 | 3 | if (!isTypeLegal(RetTy, VT)) |
5046 | 0 | return false; |
5047 | 3 | |
5048 | 3 | const TargetRegisterClass *ResRC; |
5049 | 3 | unsigned Opc, CmpOpc; |
5050 | 3 | // This only supports i32/i64, because i8/i16 aren't legal, and the generic |
5051 | 3 | // extractvalue selection doesn't support that. |
5052 | 3 | if (VT == MVT::i32) { |
5053 | 2 | Opc = AArch64::CMP_SWAP_32; |
5054 | 2 | CmpOpc = AArch64::SUBSWrs; |
5055 | 2 | ResRC = &AArch64::GPR32RegClass; |
5056 | 2 | } else if (1 VT == MVT::i641 ) { |
5057 | 1 | Opc = AArch64::CMP_SWAP_64; |
5058 | 1 | CmpOpc = AArch64::SUBSXrs; |
5059 | 1 | ResRC = &AArch64::GPR64RegClass; |
5060 | 1 | } else { |
5061 | 0 | return false; |
5062 | 0 | } |
5063 | 3 | |
5064 | 3 | const MCInstrDesc &II = TII.get(Opc); |
5065 | 3 | |
5066 | 3 | const unsigned AddrReg = constrainOperandRegClass( |
5067 | 3 | II, getRegForValue(I->getPointerOperand()), II.getNumDefs()); |
5068 | 3 | const unsigned DesiredReg = constrainOperandRegClass( |
5069 | 3 | II, getRegForValue(I->getCompareOperand()), II.getNumDefs() + 1); |
5070 | 3 | const unsigned NewReg = constrainOperandRegClass( |
5071 | 3 | II, getRegForValue(I->getNewValOperand()), II.getNumDefs() + 2); |
5072 | 3 | |
5073 | 3 | const unsigned ResultReg1 = createResultReg(ResRC); |
5074 | 3 | const unsigned ResultReg2 = createResultReg(&AArch64::GPR32RegClass); |
5075 | 3 | const unsigned ScratchReg = createResultReg(&AArch64::GPR32RegClass); |
5076 | 3 | |
5077 | 3 | // FIXME: MachineMemOperand doesn't support cmpxchg yet. |
5078 | 3 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) |
5079 | 3 | .addDef(ResultReg1) |
5080 | 3 | .addDef(ScratchReg) |
5081 | 3 | .addUse(AddrReg) |
5082 | 3 | .addUse(DesiredReg) |
5083 | 3 | .addUse(NewReg); |
5084 | 3 | |
5085 | 3 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc)) |
5086 | 3 | .addDef(VT == MVT::i32 ? AArch64::WZR2 : AArch64::XZR1 ) |
5087 | 3 | .addUse(ResultReg1) |
5088 | 3 | .addUse(DesiredReg) |
5089 | 3 | .addImm(0); |
5090 | 3 | |
5091 | 3 | BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr)) |
5092 | 3 | .addDef(ResultReg2) |
5093 | 3 | .addUse(AArch64::WZR) |
5094 | 3 | .addUse(AArch64::WZR) |
5095 | 3 | .addImm(AArch64CC::NE); |
5096 | 3 | |
5097 | 3 | assert((ResultReg1 + 1) == ResultReg2 && "Nonconsecutive result registers."); |
5098 | 3 | updateValueMap(I, ResultReg1, 2); |
5099 | 3 | return true; |
5100 | 3 | } |
5101 | | |
5102 | 4.03k | bool AArch64FastISel::fastSelectInstruction(const Instruction *I) { |
5103 | 4.03k | switch (I->getOpcode()) { |
5104 | 4.03k | default: |
5105 | 603 | break; |
5106 | 4.03k | case Instruction::Add: |
5107 | 284 | case Instruction::Sub: |
5108 | 284 | return selectAddSub(I); |
5109 | 284 | case Instruction::Mul: |
5110 | 10 | return selectMul(I); |
5111 | 284 | case Instruction::SDiv: |
5112 | 26 | return selectSDiv(I); |
5113 | 284 | case Instruction::SRem: |
5114 | 4 | if (!selectBinaryOp(I, ISD::SREM)) |
5115 | 4 | return selectRem(I, ISD::SREM); |
5116 | 0 | return true; |
5117 | 4 | case Instruction::URem: |
5118 | 4 | if (!selectBinaryOp(I, ISD::UREM)) |
5119 | 4 | return selectRem(I, ISD::UREM); |
5120 | 0 | return true; |
5121 | 98 | case Instruction::Shl: |
5122 | 98 | case Instruction::LShr: |
5123 | 98 | case Instruction::AShr: |
5124 | 98 | return selectShift(I); |
5125 | 98 | case Instruction::And: |
5126 | 91 | case Instruction::Or: |
5127 | 91 | case Instruction::Xor: |
5128 | 91 | return selectLogicalOp(I); |
5129 | 287 | case Instruction::Br: |
5130 | 287 | return selectBranch(I); |
5131 | 91 | case Instruction::IndirectBr: |
5132 | 1 | return selectIndirectBr(I); |
5133 | 91 | case Instruction::BitCast: |
5134 | 56 | if (!FastISel::selectBitCast(I)) |
5135 | 23 | return selectBitCast(I); |
5136 | 33 | return true; |
5137 | 33 | case Instruction::FPToSI: |
5138 | 4 | if (!selectCast(I, ISD::FP_TO_SINT)) |
5139 | 1 | return selectFPToInt(I, /*Signed=*/true); |
5140 | 3 | return true; |
5141 | 6 | case Instruction::FPToUI: |
5142 | 6 | return selectFPToInt(I, /*Signed=*/false); |
5143 | 206 | case Instruction::ZExt: |
5144 | 206 | case Instruction::SExt: |
5145 | 206 | return selectIntExt(I); |
5146 | 206 | case Instruction::Trunc: |
5147 | 26 | if (!selectCast(I, ISD::TRUNCATE)) |
5148 | 14 | return selectTrunc(I); |
5149 | 12 | return true; |
5150 | 12 | case Instruction::FPExt: |
5151 | 5 | return selectFPExt(I); |
5152 | 12 | case Instruction::FPTrunc: |
5153 | 2 | return selectFPTrunc(I); |
5154 | 17 | case Instruction::SIToFP: |
5155 | 17 | if (!selectCast(I, ISD::SINT_TO_FP)) |
5156 | 9 | return selectIntToFP(I, /*Signed=*/true); |
5157 | 8 | return true; |
5158 | 13 | case Instruction::UIToFP: |
5159 | 13 | return selectIntToFP(I, /*Signed=*/false); |
5160 | 381 | case Instruction::Load: |
5161 | 381 | return selectLoad(I); |
5162 | 427 | case Instruction::Store: |
5163 | 427 | return selectStore(I); |
5164 | 57 | case Instruction::FCmp: |
5165 | 57 | case Instruction::ICmp: |
5166 | 57 | return selectCmp(I); |
5167 | 57 | case Instruction::Select: |
5168 | 53 | return selectSelect(I); |
5169 | 1.34k | case Instruction::Ret: |
5170 | 1.34k | return selectRet(I); |
5171 | 57 | case Instruction::FRem: |
5172 | 4 | return selectFRem(I); |
5173 | 57 | case Instruction::GetElementPtr: |
5174 | 21 | return selectGetElementPtr(I); |
5175 | 57 | case Instruction::AtomicCmpXchg: |
5176 | 3 | return selectAtomicCmpXchg(cast<AtomicCmpXchgInst>(I)); |
5177 | 603 | } |
5178 | 603 | |
5179 | 603 | // fall-back to target-independent instruction selection. |
5180 | 603 | return selectOperator(I, I->getOpcode()); |
5181 | 603 | } |
5182 | | |
5183 | | namespace llvm { |
5184 | | |
5185 | | FastISel *AArch64::createFastISel(FunctionLoweringInfo &FuncInfo, |
5186 | 1.26k | const TargetLibraryInfo *LibInfo) { |
5187 | 1.26k | return new AArch64FastISel(FuncInfo, LibInfo); |
5188 | 1.26k | } |
5189 | | |
5190 | | } // end namespace llvm |