/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===-- ARMBaseInstrInfo.cpp - ARM Instruction Information ----------------===// |
2 | | // |
3 | | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | | // See https://llvm.org/LICENSE.txt for license information. |
5 | | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | | // |
7 | | //===----------------------------------------------------------------------===// |
8 | | // |
9 | | // This file contains the Base ARM implementation of the TargetInstrInfo class. |
10 | | // |
11 | | //===----------------------------------------------------------------------===// |
12 | | |
13 | | #include "ARMBaseInstrInfo.h" |
14 | | #include "ARMBaseRegisterInfo.h" |
15 | | #include "ARMConstantPoolValue.h" |
16 | | #include "ARMFeatures.h" |
17 | | #include "ARMHazardRecognizer.h" |
18 | | #include "ARMMachineFunctionInfo.h" |
19 | | #include "ARMSubtarget.h" |
20 | | #include "MCTargetDesc/ARMAddressingModes.h" |
21 | | #include "MCTargetDesc/ARMBaseInfo.h" |
22 | | #include "llvm/ADT/DenseMap.h" |
23 | | #include "llvm/ADT/STLExtras.h" |
24 | | #include "llvm/ADT/SmallSet.h" |
25 | | #include "llvm/ADT/SmallVector.h" |
26 | | #include "llvm/ADT/Triple.h" |
27 | | #include "llvm/CodeGen/LiveVariables.h" |
28 | | #include "llvm/CodeGen/MachineBasicBlock.h" |
29 | | #include "llvm/CodeGen/MachineConstantPool.h" |
30 | | #include "llvm/CodeGen/MachineFrameInfo.h" |
31 | | #include "llvm/CodeGen/MachineFunction.h" |
32 | | #include "llvm/CodeGen/MachineInstr.h" |
33 | | #include "llvm/CodeGen/MachineInstrBuilder.h" |
34 | | #include "llvm/CodeGen/MachineMemOperand.h" |
35 | | #include "llvm/CodeGen/MachineOperand.h" |
36 | | #include "llvm/CodeGen/MachineRegisterInfo.h" |
37 | | #include "llvm/CodeGen/ScoreboardHazardRecognizer.h" |
38 | | #include "llvm/CodeGen/SelectionDAGNodes.h" |
39 | | #include "llvm/CodeGen/TargetInstrInfo.h" |
40 | | #include "llvm/CodeGen/TargetRegisterInfo.h" |
41 | | #include "llvm/CodeGen/TargetSchedule.h" |
42 | | #include "llvm/IR/Attributes.h" |
43 | | #include "llvm/IR/Constants.h" |
44 | | #include "llvm/IR/DebugLoc.h" |
45 | | #include "llvm/IR/Function.h" |
46 | | #include "llvm/IR/GlobalValue.h" |
47 | | #include "llvm/MC/MCAsmInfo.h" |
48 | | #include "llvm/MC/MCInstrDesc.h" |
49 | | #include "llvm/MC/MCInstrItineraries.h" |
50 | | #include "llvm/Support/BranchProbability.h" |
51 | | #include "llvm/Support/Casting.h" |
52 | | #include "llvm/Support/CommandLine.h" |
53 | | #include "llvm/Support/Compiler.h" |
54 | | #include "llvm/Support/Debug.h" |
55 | | #include "llvm/Support/ErrorHandling.h" |
56 | | #include "llvm/Support/raw_ostream.h" |
57 | | #include "llvm/Target/TargetMachine.h" |
58 | | #include <algorithm> |
59 | | #include <cassert> |
60 | | #include <cstdint> |
61 | | #include <iterator> |
62 | | #include <new> |
63 | | #include <utility> |
64 | | #include <vector> |
65 | | |
66 | | using namespace llvm; |
67 | | |
68 | | #define DEBUG_TYPE "arm-instrinfo" |
69 | | |
70 | | #define GET_INSTRINFO_CTOR_DTOR |
71 | | #include "ARMGenInstrInfo.inc" |
72 | | |
73 | | static cl::opt<bool> |
74 | | EnableARM3Addr("enable-arm-3-addr-conv", cl::Hidden, |
75 | | cl::desc("Enable ARM 2-addr to 3-addr conv")); |
76 | | |
77 | | /// ARM_MLxEntry - Record information about MLA / MLS instructions. |
78 | | struct ARM_MLxEntry { |
79 | | uint16_t MLxOpc; // MLA / MLS opcode |
80 | | uint16_t MulOpc; // Expanded multiplication opcode |
81 | | uint16_t AddSubOpc; // Expanded add / sub opcode |
82 | | bool NegAcc; // True if the acc is negated before the add / sub. |
83 | | bool HasLane; // True if instruction has an extra "lane" operand. |
84 | | }; |
85 | | |
86 | | static const ARM_MLxEntry ARM_MLxTable[] = { |
87 | | // MLxOpc, MulOpc, AddSubOpc, NegAcc, HasLane |
88 | | // fp scalar ops |
89 | | { ARM::VMLAS, ARM::VMULS, ARM::VADDS, false, false }, |
90 | | { ARM::VMLSS, ARM::VMULS, ARM::VSUBS, false, false }, |
91 | | { ARM::VMLAD, ARM::VMULD, ARM::VADDD, false, false }, |
92 | | { ARM::VMLSD, ARM::VMULD, ARM::VSUBD, false, false }, |
93 | | { ARM::VNMLAS, ARM::VNMULS, ARM::VSUBS, true, false }, |
94 | | { ARM::VNMLSS, ARM::VMULS, ARM::VSUBS, true, false }, |
95 | | { ARM::VNMLAD, ARM::VNMULD, ARM::VSUBD, true, false }, |
96 | | { ARM::VNMLSD, ARM::VMULD, ARM::VSUBD, true, false }, |
97 | | |
98 | | // fp SIMD ops |
99 | | { ARM::VMLAfd, ARM::VMULfd, ARM::VADDfd, false, false }, |
100 | | { ARM::VMLSfd, ARM::VMULfd, ARM::VSUBfd, false, false }, |
101 | | { ARM::VMLAfq, ARM::VMULfq, ARM::VADDfq, false, false }, |
102 | | { ARM::VMLSfq, ARM::VMULfq, ARM::VSUBfq, false, false }, |
103 | | { ARM::VMLAslfd, ARM::VMULslfd, ARM::VADDfd, false, true }, |
104 | | { ARM::VMLSslfd, ARM::VMULslfd, ARM::VSUBfd, false, true }, |
105 | | { ARM::VMLAslfq, ARM::VMULslfq, ARM::VADDfq, false, true }, |
106 | | { ARM::VMLSslfq, ARM::VMULslfq, ARM::VSUBfq, false, true }, |
107 | | }; |
108 | | |
109 | | ARMBaseInstrInfo::ARMBaseInstrInfo(const ARMSubtarget& STI) |
110 | | : ARMGenInstrInfo(ARM::ADJCALLSTACKDOWN, ARM::ADJCALLSTACKUP), |
111 | 7.46k | Subtarget(STI) { |
112 | 126k | for (unsigned i = 0, e = array_lengthof(ARM_MLxTable); i != e; ++i119k ) { |
113 | 119k | if (!MLxEntryMap.insert(std::make_pair(ARM_MLxTable[i].MLxOpc, i)).second) |
114 | 119k | llvm_unreachable0 ("Duplicated entries?"); |
115 | 119k | MLxHazardOpcodes.insert(ARM_MLxTable[i].AddSubOpc); |
116 | 119k | MLxHazardOpcodes.insert(ARM_MLxTable[i].MulOpc); |
117 | 119k | } |
118 | 7.46k | } |
119 | | |
120 | | // Use a ScoreboardHazardRecognizer for prepass ARM scheduling. TargetInstrImpl |
121 | | // currently defaults to no prepass hazard recognizer. |
122 | | ScheduleHazardRecognizer * |
123 | | ARMBaseInstrInfo::CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI, |
124 | 64.5k | const ScheduleDAG *DAG) const { |
125 | 64.5k | if (usePreRAHazardRecognizer()) { |
126 | 64.5k | const InstrItineraryData *II = |
127 | 64.5k | static_cast<const ARMSubtarget *>(STI)->getInstrItineraryData(); |
128 | 64.5k | return new ScoreboardHazardRecognizer(II, DAG, "pre-RA-sched"); |
129 | 64.5k | } |
130 | 0 | return TargetInstrInfo::CreateTargetHazardRecognizer(STI, DAG); |
131 | 0 | } |
132 | | |
133 | | ScheduleHazardRecognizer *ARMBaseInstrInfo:: |
134 | | CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, |
135 | 18.8k | const ScheduleDAG *DAG) const { |
136 | 18.8k | if (Subtarget.isThumb2() || Subtarget.hasVFP2Base()7.69k ) |
137 | 17.0k | return (ScheduleHazardRecognizer *)new ARMHazardRecognizer(II, DAG); |
138 | 1.78k | return TargetInstrInfo::CreateTargetPostRAHazardRecognizer(II, DAG); |
139 | 1.78k | } |
140 | | |
141 | | MachineInstr *ARMBaseInstrInfo::convertToThreeAddress( |
142 | 0 | MachineFunction::iterator &MFI, MachineInstr &MI, LiveVariables *LV) const { |
143 | 0 | // FIXME: Thumb2 support. |
144 | 0 |
|
145 | 0 | if (!EnableARM3Addr) |
146 | 0 | return nullptr; |
147 | 0 | |
148 | 0 | MachineFunction &MF = *MI.getParent()->getParent(); |
149 | 0 | uint64_t TSFlags = MI.getDesc().TSFlags; |
150 | 0 | bool isPre = false; |
151 | 0 | switch ((TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift) { |
152 | 0 | default: return nullptr; |
153 | 0 | case ARMII::IndexModePre: |
154 | 0 | isPre = true; |
155 | 0 | break; |
156 | 0 | case ARMII::IndexModePost: |
157 | 0 | break; |
158 | 0 | } |
159 | 0 | |
160 | 0 | // Try splitting an indexed load/store to an un-indexed one plus an add/sub |
161 | 0 | // operation. |
162 | 0 | unsigned MemOpc = getUnindexedOpcode(MI.getOpcode()); |
163 | 0 | if (MemOpc == 0) |
164 | 0 | return nullptr; |
165 | 0 | |
166 | 0 | MachineInstr *UpdateMI = nullptr; |
167 | 0 | MachineInstr *MemMI = nullptr; |
168 | 0 | unsigned AddrMode = (TSFlags & ARMII::AddrModeMask); |
169 | 0 | const MCInstrDesc &MCID = MI.getDesc(); |
170 | 0 | unsigned NumOps = MCID.getNumOperands(); |
171 | 0 | bool isLoad = !MI.mayStore(); |
172 | 0 | const MachineOperand &WB = isLoad ? MI.getOperand(1) : MI.getOperand(0); |
173 | 0 | const MachineOperand &Base = MI.getOperand(2); |
174 | 0 | const MachineOperand &Offset = MI.getOperand(NumOps - 3); |
175 | 0 | unsigned WBReg = WB.getReg(); |
176 | 0 | unsigned BaseReg = Base.getReg(); |
177 | 0 | unsigned OffReg = Offset.getReg(); |
178 | 0 | unsigned OffImm = MI.getOperand(NumOps - 2).getImm(); |
179 | 0 | ARMCC::CondCodes Pred = (ARMCC::CondCodes)MI.getOperand(NumOps - 1).getImm(); |
180 | 0 | switch (AddrMode) { |
181 | 0 | default: llvm_unreachable("Unknown indexed op!"); |
182 | 0 | case ARMII::AddrMode2: { |
183 | 0 | bool isSub = ARM_AM::getAM2Op(OffImm) == ARM_AM::sub; |
184 | 0 | unsigned Amt = ARM_AM::getAM2Offset(OffImm); |
185 | 0 | if (OffReg == 0) { |
186 | 0 | if (ARM_AM::getSOImmVal(Amt) == -1) |
187 | 0 | // Can't encode it in a so_imm operand. This transformation will |
188 | 0 | // add more than 1 instruction. Abandon! |
189 | 0 | return nullptr; |
190 | 0 | UpdateMI = BuildMI(MF, MI.getDebugLoc(), |
191 | 0 | get(isSub ? ARM::SUBri : ARM::ADDri), WBReg) |
192 | 0 | .addReg(BaseReg) |
193 | 0 | .addImm(Amt) |
194 | 0 | .add(predOps(Pred)) |
195 | 0 | .add(condCodeOp()); |
196 | 0 | } else if (Amt != 0) { |
197 | 0 | ARM_AM::ShiftOpc ShOpc = ARM_AM::getAM2ShiftOpc(OffImm); |
198 | 0 | unsigned SOOpc = ARM_AM::getSORegOpc(ShOpc, Amt); |
199 | 0 | UpdateMI = BuildMI(MF, MI.getDebugLoc(), |
200 | 0 | get(isSub ? ARM::SUBrsi : ARM::ADDrsi), WBReg) |
201 | 0 | .addReg(BaseReg) |
202 | 0 | .addReg(OffReg) |
203 | 0 | .addReg(0) |
204 | 0 | .addImm(SOOpc) |
205 | 0 | .add(predOps(Pred)) |
206 | 0 | .add(condCodeOp()); |
207 | 0 | } else |
208 | 0 | UpdateMI = BuildMI(MF, MI.getDebugLoc(), |
209 | 0 | get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg) |
210 | 0 | .addReg(BaseReg) |
211 | 0 | .addReg(OffReg) |
212 | 0 | .add(predOps(Pred)) |
213 | 0 | .add(condCodeOp()); |
214 | 0 | break; |
215 | 0 | } |
216 | 0 | case ARMII::AddrMode3 : { |
217 | 0 | bool isSub = ARM_AM::getAM3Op(OffImm) == ARM_AM::sub; |
218 | 0 | unsigned Amt = ARM_AM::getAM3Offset(OffImm); |
219 | 0 | if (OffReg == 0) |
220 | 0 | // Immediate is 8-bits. It's guaranteed to fit in a so_imm operand. |
221 | 0 | UpdateMI = BuildMI(MF, MI.getDebugLoc(), |
222 | 0 | get(isSub ? ARM::SUBri : ARM::ADDri), WBReg) |
223 | 0 | .addReg(BaseReg) |
224 | 0 | .addImm(Amt) |
225 | 0 | .add(predOps(Pred)) |
226 | 0 | .add(condCodeOp()); |
227 | 0 | else |
228 | 0 | UpdateMI = BuildMI(MF, MI.getDebugLoc(), |
229 | 0 | get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg) |
230 | 0 | .addReg(BaseReg) |
231 | 0 | .addReg(OffReg) |
232 | 0 | .add(predOps(Pred)) |
233 | 0 | .add(condCodeOp()); |
234 | 0 | break; |
235 | 0 | } |
236 | 0 | } |
237 | 0 |
|
238 | 0 | std::vector<MachineInstr*> NewMIs; |
239 | 0 | if (isPre) { |
240 | 0 | if (isLoad) |
241 | 0 | MemMI = |
242 | 0 | BuildMI(MF, MI.getDebugLoc(), get(MemOpc), MI.getOperand(0).getReg()) |
243 | 0 | .addReg(WBReg) |
244 | 0 | .addImm(0) |
245 | 0 | .addImm(Pred); |
246 | 0 | else |
247 | 0 | MemMI = BuildMI(MF, MI.getDebugLoc(), get(MemOpc)) |
248 | 0 | .addReg(MI.getOperand(1).getReg()) |
249 | 0 | .addReg(WBReg) |
250 | 0 | .addReg(0) |
251 | 0 | .addImm(0) |
252 | 0 | .addImm(Pred); |
253 | 0 | NewMIs.push_back(MemMI); |
254 | 0 | NewMIs.push_back(UpdateMI); |
255 | 0 | } else { |
256 | 0 | if (isLoad) |
257 | 0 | MemMI = |
258 | 0 | BuildMI(MF, MI.getDebugLoc(), get(MemOpc), MI.getOperand(0).getReg()) |
259 | 0 | .addReg(BaseReg) |
260 | 0 | .addImm(0) |
261 | 0 | .addImm(Pred); |
262 | 0 | else |
263 | 0 | MemMI = BuildMI(MF, MI.getDebugLoc(), get(MemOpc)) |
264 | 0 | .addReg(MI.getOperand(1).getReg()) |
265 | 0 | .addReg(BaseReg) |
266 | 0 | .addReg(0) |
267 | 0 | .addImm(0) |
268 | 0 | .addImm(Pred); |
269 | 0 | if (WB.isDead()) |
270 | 0 | UpdateMI->getOperand(0).setIsDead(); |
271 | 0 | NewMIs.push_back(UpdateMI); |
272 | 0 | NewMIs.push_back(MemMI); |
273 | 0 | } |
274 | 0 |
|
275 | 0 | // Transfer LiveVariables states, kill / dead info. |
276 | 0 | if (LV) { |
277 | 0 | for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { |
278 | 0 | MachineOperand &MO = MI.getOperand(i); |
279 | 0 | if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())) { |
280 | 0 | unsigned Reg = MO.getReg(); |
281 | 0 |
|
282 | 0 | LiveVariables::VarInfo &VI = LV->getVarInfo(Reg); |
283 | 0 | if (MO.isDef()) { |
284 | 0 | MachineInstr *NewMI = (Reg == WBReg) ? UpdateMI : MemMI; |
285 | 0 | if (MO.isDead()) |
286 | 0 | LV->addVirtualRegisterDead(Reg, *NewMI); |
287 | 0 | } |
288 | 0 | if (MO.isUse() && MO.isKill()) { |
289 | 0 | for (unsigned j = 0; j < 2; ++j) { |
290 | 0 | // Look at the two new MI's in reverse order. |
291 | 0 | MachineInstr *NewMI = NewMIs[j]; |
292 | 0 | if (!NewMI->readsRegister(Reg)) |
293 | 0 | continue; |
294 | 0 | LV->addVirtualRegisterKilled(Reg, *NewMI); |
295 | 0 | if (VI.removeKill(MI)) |
296 | 0 | VI.Kills.push_back(NewMI); |
297 | 0 | break; |
298 | 0 | } |
299 | 0 | } |
300 | 0 | } |
301 | 0 | } |
302 | 0 | } |
303 | 0 |
|
304 | 0 | MachineBasicBlock::iterator MBBI = MI.getIterator(); |
305 | 0 | MFI->insert(MBBI, NewMIs[1]); |
306 | 0 | MFI->insert(MBBI, NewMIs[0]); |
307 | 0 | return NewMIs[0]; |
308 | 0 | } |
309 | | |
310 | | // Branch analysis. |
311 | | bool ARMBaseInstrInfo::analyzeBranch(MachineBasicBlock &MBB, |
312 | | MachineBasicBlock *&TBB, |
313 | | MachineBasicBlock *&FBB, |
314 | | SmallVectorImpl<MachineOperand> &Cond, |
315 | 3.34M | bool AllowModify) const { |
316 | 3.34M | TBB = nullptr; |
317 | 3.34M | FBB = nullptr; |
318 | 3.34M | |
319 | 3.34M | MachineBasicBlock::iterator I = MBB.end(); |
320 | 3.34M | if (I == MBB.begin()) |
321 | 14.7k | return false; // Empty blocks are easy. |
322 | 3.33M | --I; |
323 | 3.33M | |
324 | 3.33M | // Walk backwards from the end of the basic block until the branch is |
325 | 3.33M | // analyzed or we give up. |
326 | 5.72M | while (isPredicated(*I) || I->isTerminator()3.78M || I->isDebugValue()2.83M ) { |
327 | 2.88M | // Flag to be raised on unanalyzeable instructions. This is useful in cases |
328 | 2.88M | // where we want to clean up on the end of the basic block before we bail |
329 | 2.88M | // out. |
330 | 2.88M | bool CantAnalyze = false; |
331 | 2.88M | |
332 | 2.88M | // Skip over DEBUG values and predicated nonterminators. |
333 | 3.27M | while (I->isDebugInstr() || !I->isTerminator()3.27M ) { |
334 | 457k | if (I == MBB.begin()) |
335 | 62.3k | return false; |
336 | 395k | --I; |
337 | 395k | } |
338 | 2.88M | |
339 | 2.88M | if (2.81M isIndirectBranchOpcode(I->getOpcode())2.81M || |
340 | 2.81M | isJumpTableBranchOpcode(I->getOpcode())2.81M ) { |
341 | 20.0k | // Indirect branches and jump tables can't be analyzed, but we still want |
342 | 20.0k | // to clean up any instructions at the tail of the basic block. |
343 | 20.0k | CantAnalyze = true; |
344 | 2.79M | } else if (isUncondBranchOpcode(I->getOpcode())) { |
345 | 545k | TBB = I->getOperand(0).getMBB(); |
346 | 2.25M | } else if (isCondBranchOpcode(I->getOpcode())) { |
347 | 1.86M | // Bail out if we encounter multiple conditional branches. |
348 | 1.86M | if (!Cond.empty()) |
349 | 5.62k | return true; |
350 | 1.86M | |
351 | 1.86M | assert(!FBB && "FBB should have been null."); |
352 | 1.86M | FBB = TBB; |
353 | 1.86M | TBB = I->getOperand(0).getMBB(); |
354 | 1.86M | Cond.push_back(I->getOperand(1)); |
355 | 1.86M | Cond.push_back(I->getOperand(2)); |
356 | 1.86M | } else if (386k I->isReturn()386k ) { |
357 | 378k | // Returns can't be analyzed, but we should run cleanup. |
358 | 378k | CantAnalyze = !isPredicated(*I); |
359 | 378k | } else { |
360 | 8.31k | // We encountered other unrecognized terminator. Bail out immediately. |
361 | 8.31k | return true; |
362 | 8.31k | } |
363 | 2.80M | |
364 | 2.80M | // Cleanup code - to be run for unpredicated unconditional branches and |
365 | 2.80M | // returns. |
366 | 2.80M | if (!isPredicated(*I) && |
367 | 2.80M | (936k isUncondBranchOpcode(I->getOpcode())936k || |
368 | 936k | isIndirectBranchOpcode(I->getOpcode())391k || |
369 | 936k | isJumpTableBranchOpcode(I->getOpcode())390k || |
370 | 936k | I->isReturn()371k )) { |
371 | 936k | // Forget any previous condition branch information - it no longer applies. |
372 | 936k | Cond.clear(); |
373 | 936k | FBB = nullptr; |
374 | 936k | |
375 | 936k | // If we can modify the function, delete everything below this |
376 | 936k | // unconditional branch. |
377 | 936k | if (AllowModify) { |
378 | 567k | MachineBasicBlock::iterator DI = std::next(I); |
379 | 567k | while (DI != MBB.end()) { |
380 | 6 | MachineInstr &InstToDelete = *DI; |
381 | 6 | ++DI; |
382 | 6 | InstToDelete.eraseFromParent(); |
383 | 6 | } |
384 | 567k | } |
385 | 936k | } |
386 | 2.80M | |
387 | 2.80M | if (CantAnalyze) |
388 | 391k | return true; |
389 | 2.41M | |
390 | 2.41M | if (I == MBB.begin()) |
391 | 25.7k | return false; |
392 | 2.38M | |
393 | 2.38M | --I; |
394 | 2.38M | } |
395 | 3.33M | |
396 | 3.33M | // We made it past the terminators without bailing out - we must have |
397 | 3.33M | // analyzed this branch successfully. |
398 | 3.33M | return false2.83M ; |
399 | 3.33M | } |
400 | | |
401 | | unsigned ARMBaseInstrInfo::removeBranch(MachineBasicBlock &MBB, |
402 | 388k | int *BytesRemoved) const { |
403 | 388k | assert(!BytesRemoved && "code size not handled"); |
404 | 388k | |
405 | 388k | MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr(); |
406 | 388k | if (I == MBB.end()) |
407 | 2 | return 0; |
408 | 388k | |
409 | 388k | if (!isUncondBranchOpcode(I->getOpcode()) && |
410 | 388k | !isCondBranchOpcode(I->getOpcode())253k ) |
411 | 8.29k | return 0; |
412 | 379k | |
413 | 379k | // Remove the branch. |
414 | 379k | I->eraseFromParent(); |
415 | 379k | |
416 | 379k | I = MBB.end(); |
417 | 379k | |
418 | 379k | if (I == MBB.begin()) return 112.3k ; |
419 | 367k | --I; |
420 | 367k | if (!isCondBranchOpcode(I->getOpcode())) |
421 | 301k | return 1; |
422 | 66.1k | |
423 | 66.1k | // Remove the branch. |
424 | 66.1k | I->eraseFromParent(); |
425 | 66.1k | return 2; |
426 | 66.1k | } |
427 | | |
428 | | unsigned ARMBaseInstrInfo::insertBranch(MachineBasicBlock &MBB, |
429 | | MachineBasicBlock *TBB, |
430 | | MachineBasicBlock *FBB, |
431 | | ArrayRef<MachineOperand> Cond, |
432 | | const DebugLoc &DL, |
433 | 384k | int *BytesAdded) const { |
434 | 384k | assert(!BytesAdded && "code size not handled"); |
435 | 384k | ARMFunctionInfo *AFI = MBB.getParent()->getInfo<ARMFunctionInfo>(); |
436 | 384k | int BOpc = !AFI->isThumbFunction() |
437 | 384k | ? ARM::B3.41k : (AFI->isThumb2Function() 381k ? ARM::t2B373k : ARM::tB7.75k ); |
438 | 384k | int BccOpc = !AFI->isThumbFunction() |
439 | 384k | ? ARM::Bcc3.41k : (AFI->isThumb2Function() 381k ? ARM::t2Bcc373k : ARM::tBcc7.75k ); |
440 | 384k | bool isThumb = AFI->isThumbFunction() || AFI->isThumb2Function()3.41k ; |
441 | 384k | |
442 | 384k | // Shouldn't be a fall through. |
443 | 384k | assert(TBB && "insertBranch must not be told to insert a fallthrough"); |
444 | 384k | assert((Cond.size() == 2 || Cond.size() == 0) && |
445 | 384k | "ARM branch conditions have two components!"); |
446 | 384k | |
447 | 384k | // For conditional branches, we use addOperand to preserve CPSR flags. |
448 | 384k | |
449 | 384k | if (!FBB) { |
450 | 375k | if (Cond.empty()) { // Unconditional branch? |
451 | 77.4k | if (isThumb) |
452 | 76.7k | BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB).add(predOps(ARMCC::AL)); |
453 | 714 | else |
454 | 714 | BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB); |
455 | 77.4k | } else |
456 | 297k | BuildMI(&MBB, DL, get(BccOpc)) |
457 | 297k | .addMBB(TBB) |
458 | 297k | .addImm(Cond[0].getImm()) |
459 | 297k | .add(Cond[1]); |
460 | 375k | return 1; |
461 | 375k | } |
462 | 9.38k | |
463 | 9.38k | // Two-way conditional branch. |
464 | 9.38k | BuildMI(&MBB, DL, get(BccOpc)) |
465 | 9.38k | .addMBB(TBB) |
466 | 9.38k | .addImm(Cond[0].getImm()) |
467 | 9.38k | .add(Cond[1]); |
468 | 9.38k | if (isThumb) |
469 | 9.31k | BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB).add(predOps(ARMCC::AL)); |
470 | 74 | else |
471 | 74 | BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB); |
472 | 9.38k | return 2; |
473 | 9.38k | } |
474 | | |
475 | | bool ARMBaseInstrInfo:: |
476 | 382k | reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const { |
477 | 382k | ARMCC::CondCodes CC = (ARMCC::CondCodes)(int)Cond[0].getImm(); |
478 | 382k | Cond[0].setImm(ARMCC::getOppositeCondition(CC)); |
479 | 382k | return false; |
480 | 382k | } |
481 | | |
482 | 10.5M | bool ARMBaseInstrInfo::isPredicated(const MachineInstr &MI) const { |
483 | 10.5M | if (MI.isBundle()) { |
484 | 22.0k | MachineBasicBlock::const_instr_iterator I = MI.getIterator(); |
485 | 22.0k | MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end(); |
486 | 44.0k | while (++I != E && I->isInsideBundle()) { |
487 | 44.0k | int PIdx = I->findFirstPredOperandIdx(); |
488 | 44.0k | if (PIdx != -1 && I->getOperand(PIdx).getImm() != ARMCC::AL22.0k ) |
489 | 21.9k | return true; |
490 | 44.0k | } |
491 | 22.0k | return false38 ; |
492 | 10.4M | } |
493 | 10.4M | |
494 | 10.4M | int PIdx = MI.findFirstPredOperandIdx(); |
495 | 10.4M | return PIdx != -1 && MI.getOperand(PIdx).getImm() != ARMCC::AL9.87M ; |
496 | 10.4M | } |
497 | | |
498 | | bool ARMBaseInstrInfo::PredicateInstruction( |
499 | 9.04k | MachineInstr &MI, ArrayRef<MachineOperand> Pred) const { |
500 | 9.04k | unsigned Opc = MI.getOpcode(); |
501 | 9.04k | if (isUncondBranchOpcode(Opc)) { |
502 | 0 | MI.setDesc(get(getMatchingCondBranchOpcode(Opc))); |
503 | 0 | MachineInstrBuilder(*MI.getParent()->getParent(), MI) |
504 | 0 | .addImm(Pred[0].getImm()) |
505 | 0 | .addReg(Pred[1].getReg()); |
506 | 0 | return true; |
507 | 0 | } |
508 | 9.04k | |
509 | 9.04k | int PIdx = MI.findFirstPredOperandIdx(); |
510 | 9.04k | if (PIdx != -1) { |
511 | 9.04k | MachineOperand &PMO = MI.getOperand(PIdx); |
512 | 9.04k | PMO.setImm(Pred[0].getImm()); |
513 | 9.04k | MI.getOperand(PIdx+1).setReg(Pred[1].getReg()); |
514 | 9.04k | return true; |
515 | 9.04k | } |
516 | 0 | return false; |
517 | 0 | } |
518 | | |
519 | | bool ARMBaseInstrInfo::SubsumesPredicate(ArrayRef<MachineOperand> Pred1, |
520 | 4.05k | ArrayRef<MachineOperand> Pred2) const { |
521 | 4.05k | if (Pred1.size() > 2 || Pred2.size() > 2) |
522 | 0 | return false; |
523 | 4.05k | |
524 | 4.05k | ARMCC::CondCodes CC1 = (ARMCC::CondCodes)Pred1[0].getImm(); |
525 | 4.05k | ARMCC::CondCodes CC2 = (ARMCC::CondCodes)Pred2[0].getImm(); |
526 | 4.05k | if (CC1 == CC2) |
527 | 1.34k | return true; |
528 | 2.71k | |
529 | 2.71k | switch (CC1) { |
530 | 2.71k | default: |
531 | 2.27k | return false; |
532 | 2.71k | case ARMCC::AL: |
533 | 0 | return true; |
534 | 2.71k | case ARMCC::HS: |
535 | 216 | return CC2 == ARMCC::HI; |
536 | 2.71k | case ARMCC::LS: |
537 | 222 | return CC2 == ARMCC::LO || CC2 == ARMCC::EQ213 ; |
538 | 2.71k | case ARMCC::GE: |
539 | 5 | return CC2 == ARMCC::GT; |
540 | 2.71k | case ARMCC::LE: |
541 | 2 | return CC2 == ARMCC::LT; |
542 | 2.71k | } |
543 | 2.71k | } |
544 | | |
545 | | bool ARMBaseInstrInfo::DefinesPredicate( |
546 | 289k | MachineInstr &MI, std::vector<MachineOperand> &Pred) const { |
547 | 289k | bool Found = false; |
548 | 1.80M | for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i1.51M ) { |
549 | 1.51M | const MachineOperand &MO = MI.getOperand(i); |
550 | 1.51M | if ((MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR)17.8k ) || |
551 | 1.51M | (1.49M MO.isReg()1.49M && MO.isDef()1.01M && MO.getReg() == ARM::CPSR314k )) { |
552 | 52.3k | Pred.push_back(MO); |
553 | 52.3k | Found = true; |
554 | 52.3k | } |
555 | 1.51M | } |
556 | 289k | |
557 | 289k | return Found; |
558 | 289k | } |
559 | | |
560 | 88 | bool ARMBaseInstrInfo::isCPSRDefined(const MachineInstr &MI) { |
561 | 88 | for (const auto &MO : MI.operands()) |
562 | 488 | if (MO.isReg() && MO.getReg() == ARM::CPSR361 && MO.isDef()90 && !MO.isDead()88 ) |
563 | 9 | return true; |
564 | 88 | return false79 ; |
565 | 88 | } |
566 | | |
567 | | bool ARMBaseInstrInfo::isAddrMode3OpImm(const MachineInstr &MI, |
568 | 0 | unsigned Op) const { |
569 | 0 | const MachineOperand &Offset = MI.getOperand(Op + 1); |
570 | 0 | return Offset.getReg() != 0; |
571 | 0 | } |
572 | | |
573 | | // Load with negative register offset requires additional 1cyc and +I unit |
574 | | // for Cortex A57 |
575 | | bool ARMBaseInstrInfo::isAddrMode3OpMinusReg(const MachineInstr &MI, |
576 | 0 | unsigned Op) const { |
577 | 0 | const MachineOperand &Offset = MI.getOperand(Op + 1); |
578 | 0 | const MachineOperand &Opc = MI.getOperand(Op + 2); |
579 | 0 | assert(Opc.isImm()); |
580 | 0 | assert(Offset.isReg()); |
581 | 0 | int64_t OpcImm = Opc.getImm(); |
582 | 0 |
|
583 | 0 | bool isSub = ARM_AM::getAM3Op(OpcImm) == ARM_AM::sub; |
584 | 0 | return (isSub && Offset.getReg() != 0); |
585 | 0 | } |
586 | | |
587 | | bool ARMBaseInstrInfo::isLdstScaledReg(const MachineInstr &MI, |
588 | 0 | unsigned Op) const { |
589 | 0 | const MachineOperand &Opc = MI.getOperand(Op + 2); |
590 | 0 | unsigned OffImm = Opc.getImm(); |
591 | 0 | return ARM_AM::getAM2ShiftOpc(OffImm) != ARM_AM::no_shift; |
592 | 0 | } |
593 | | |
594 | | // Load, scaled register offset, not plus LSL2 |
595 | | bool ARMBaseInstrInfo::isLdstScaledRegNotPlusLsl2(const MachineInstr &MI, |
596 | 3 | unsigned Op) const { |
597 | 3 | const MachineOperand &Opc = MI.getOperand(Op + 2); |
598 | 3 | unsigned OffImm = Opc.getImm(); |
599 | 3 | |
600 | 3 | bool isAdd = ARM_AM::getAM2Op(OffImm) == ARM_AM::add; |
601 | 3 | unsigned Amt = ARM_AM::getAM2Offset(OffImm); |
602 | 3 | ARM_AM::ShiftOpc ShiftOpc = ARM_AM::getAM2ShiftOpc(OffImm); |
603 | 3 | if (ShiftOpc == ARM_AM::no_shift) return false0 ; // not scaled |
604 | 3 | bool SimpleScaled = (isAdd && ShiftOpc == ARM_AM::lsl && Amt == 2); |
605 | 3 | return !SimpleScaled; |
606 | 3 | } |
607 | | |
608 | | // Minus reg for ldstso addr mode |
609 | | bool ARMBaseInstrInfo::isLdstSoMinusReg(const MachineInstr &MI, |
610 | 3 | unsigned Op) const { |
611 | 3 | unsigned OffImm = MI.getOperand(Op + 2).getImm(); |
612 | 3 | return ARM_AM::getAM2Op(OffImm) == ARM_AM::sub; |
613 | 3 | } |
614 | | |
615 | | // Load, scaled register offset |
616 | | bool ARMBaseInstrInfo::isAm2ScaledReg(const MachineInstr &MI, |
617 | 0 | unsigned Op) const { |
618 | 0 | unsigned OffImm = MI.getOperand(Op + 2).getImm(); |
619 | 0 | return ARM_AM::getAM2ShiftOpc(OffImm) != ARM_AM::no_shift; |
620 | 0 | } |
621 | | |
622 | 263k | static bool isEligibleForITBlock(const MachineInstr *MI) { |
623 | 263k | switch (MI->getOpcode()) { |
624 | 263k | default: return true263k ; |
625 | 263k | case ARM::tADC: // ADC (register) T1 |
626 | 88 | case ARM::tADDi3: // ADD (immediate) T1 |
627 | 88 | case ARM::tADDi8: // ADD (immediate) T2 |
628 | 88 | case ARM::tADDrr: // ADD (register) T1 |
629 | 88 | case ARM::tAND: // AND (register) T1 |
630 | 88 | case ARM::tASRri: // ASR (immediate) T1 |
631 | 88 | case ARM::tASRrr: // ASR (register) T1 |
632 | 88 | case ARM::tBIC: // BIC (register) T1 |
633 | 88 | case ARM::tEOR: // EOR (register) T1 |
634 | 88 | case ARM::tLSLri: // LSL (immediate) T1 |
635 | 88 | case ARM::tLSLrr: // LSL (register) T1 |
636 | 88 | case ARM::tLSRri: // LSR (immediate) T1 |
637 | 88 | case ARM::tLSRrr: // LSR (register) T1 |
638 | 88 | case ARM::tMUL: // MUL T1 |
639 | 88 | case ARM::tMVN: // MVN (register) T1 |
640 | 88 | case ARM::tORR: // ORR (register) T1 |
641 | 88 | case ARM::tROR: // ROR (register) T1 |
642 | 88 | case ARM::tRSB: // RSB (immediate) T1 |
643 | 88 | case ARM::tSBC: // SBC (register) T1 |
644 | 88 | case ARM::tSUBi3: // SUB (immediate) T1 |
645 | 88 | case ARM::tSUBi8: // SUB (immediate) T2 |
646 | 88 | case ARM::tSUBrr: // SUB (register) T1 |
647 | 88 | return !ARMBaseInstrInfo::isCPSRDefined(*MI); |
648 | 263k | } |
649 | 263k | } |
650 | | |
651 | | /// isPredicable - Return true if the specified instruction can be predicated. |
652 | | /// By default, this returns true for every instruction with a |
653 | | /// PredicateOperand. |
654 | 292k | bool ARMBaseInstrInfo::isPredicable(const MachineInstr &MI) const { |
655 | 292k | if (!MI.isPredicable()) |
656 | 28.8k | return false; |
657 | 263k | |
658 | 263k | if (MI.isBundle()) |
659 | 36 | return false; |
660 | 263k | |
661 | 263k | if (!isEligibleForITBlock(&MI)) |
662 | 9 | return false; |
663 | 263k | |
664 | 263k | const ARMFunctionInfo *AFI = |
665 | 263k | MI.getParent()->getParent()->getInfo<ARMFunctionInfo>(); |
666 | 263k | |
667 | 263k | // Neon instructions in Thumb2 IT blocks are deprecated, see ARMARM. |
668 | 263k | // In their ARM encoding, they can't be encoded in a conditional form. |
669 | 263k | if ((MI.getDesc().TSFlags & ARMII::DomainMask) == ARMII::DomainNEON) |
670 | 2.97k | return false; |
671 | 260k | |
672 | 260k | if (AFI->isThumb2Function()) { |
673 | 239k | if (getSubtarget().restrictIT()) |
674 | 1.13k | return isV8EligibleForIT(&MI); |
675 | 259k | } |
676 | 259k | |
677 | 259k | return true; |
678 | 259k | } |
679 | | |
680 | | namespace llvm { |
681 | | |
682 | 144 | template <> bool IsCPSRDead<MachineInstr>(const MachineInstr *MI) { |
683 | 939 | for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i795 ) { |
684 | 795 | const MachineOperand &MO = MI->getOperand(i); |
685 | 795 | if (!MO.isReg() || MO.isUndef()538 || MO.isUse()538 ) |
686 | 507 | continue; |
687 | 288 | if (MO.getReg() != ARM::CPSR) |
688 | 144 | continue; |
689 | 144 | if (!MO.isDead()) |
690 | 0 | return false; |
691 | 144 | } |
692 | 144 | // all definitions of CPSR are dead |
693 | 144 | return true; |
694 | 144 | } |
695 | | |
696 | | } // end namespace llvm |
697 | | |
698 | | /// GetInstSize - Return the size of the specified MachineInstr. |
699 | | /// |
700 | 2.59M | unsigned ARMBaseInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const { |
701 | 2.59M | const MachineBasicBlock &MBB = *MI.getParent(); |
702 | 2.59M | const MachineFunction *MF = MBB.getParent(); |
703 | 2.59M | const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo(); |
704 | 2.59M | |
705 | 2.59M | const MCInstrDesc &MCID = MI.getDesc(); |
706 | 2.59M | if (MCID.getSize()) |
707 | 1.99M | return MCID.getSize(); |
708 | 600k | |
709 | 600k | switch (MI.getOpcode()) { |
710 | 600k | default: |
711 | 292k | // pseudo-instruction sizes are zero. |
712 | 292k | return 0; |
713 | 600k | case TargetOpcode::BUNDLE: |
714 | 1 | return getInstBundleLength(MI); |
715 | 600k | case ARM::MOVi16_ga_pcrel: |
716 | 111k | case ARM::MOVTi16_ga_pcrel: |
717 | 111k | case ARM::t2MOVi16_ga_pcrel: |
718 | 111k | case ARM::t2MOVTi16_ga_pcrel: |
719 | 111k | return 4; |
720 | 111k | case ARM::MOVi32imm: |
721 | 13 | case ARM::t2MOVi32imm: |
722 | 13 | return 8; |
723 | 31.7k | case ARM::CONSTPOOL_ENTRY: |
724 | 31.7k | case ARM::JUMPTABLE_INSTS: |
725 | 31.7k | case ARM::JUMPTABLE_ADDRS: |
726 | 31.7k | case ARM::JUMPTABLE_TBB: |
727 | 31.7k | case ARM::JUMPTABLE_TBH: |
728 | 31.7k | // If this machine instr is a constant pool entry, its size is recorded as |
729 | 31.7k | // operand #2. |
730 | 31.7k | return MI.getOperand(2).getImm(); |
731 | 31.7k | case ARM::Int_eh_sjlj_longjmp: |
732 | 4 | return 16; |
733 | 31.7k | case ARM::tInt_eh_sjlj_longjmp: |
734 | 2 | return 10; |
735 | 31.7k | case ARM::tInt_WIN_eh_sjlj_longjmp: |
736 | 3 | return 12; |
737 | 31.7k | case ARM::Int_eh_sjlj_setjmp: |
738 | 7 | case ARM::Int_eh_sjlj_setjmp_nofp: |
739 | 7 | return 20; |
740 | 13 | case ARM::tInt_eh_sjlj_setjmp: |
741 | 13 | case ARM::t2Int_eh_sjlj_setjmp: |
742 | 13 | case ARM::t2Int_eh_sjlj_setjmp_nofp: |
743 | 13 | return 12; |
744 | 52 | case ARM::SPACE: |
745 | 52 | return MI.getOperand(1).getImm(); |
746 | 164k | case ARM::INLINEASM: |
747 | 164k | case ARM::INLINEASM_BR: { |
748 | 164k | // If this machine instr is an inline asm, measure it. |
749 | 164k | unsigned Size = getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI); |
750 | 164k | if (!MF->getInfo<ARMFunctionInfo>()->isThumbFunction()) |
751 | 284 | Size = alignTo(Size, 4); |
752 | 164k | return Size; |
753 | 164k | } |
754 | 600k | } |
755 | 600k | } |
756 | | |
757 | 1 | unsigned ARMBaseInstrInfo::getInstBundleLength(const MachineInstr &MI) const { |
758 | 1 | unsigned Size = 0; |
759 | 1 | MachineBasicBlock::const_instr_iterator I = MI.getIterator(); |
760 | 1 | MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end(); |
761 | 3 | while (++I != E && I->isInsideBundle()2 ) { |
762 | 2 | assert(!I->isBundle() && "No nested bundle!"); |
763 | 2 | Size += getInstSizeInBytes(*I); |
764 | 2 | } |
765 | 1 | return Size; |
766 | 1 | } |
767 | | |
768 | | void ARMBaseInstrInfo::copyFromCPSR(MachineBasicBlock &MBB, |
769 | | MachineBasicBlock::iterator I, |
770 | | unsigned DestReg, bool KillSrc, |
771 | 4 | const ARMSubtarget &Subtarget) const { |
772 | 4 | unsigned Opc = Subtarget.isThumb() |
773 | 4 | ? (Subtarget.isMClass() 2 ? ARM::t2MRS_M1 : ARM::t2MRS_AR1 ) |
774 | 4 | : ARM::MRS2 ; |
775 | 4 | |
776 | 4 | MachineInstrBuilder MIB = |
777 | 4 | BuildMI(MBB, I, I->getDebugLoc(), get(Opc), DestReg); |
778 | 4 | |
779 | 4 | // There is only 1 A/R class MRS instruction, and it always refers to |
780 | 4 | // APSR. However, there are lots of other possibilities on M-class cores. |
781 | 4 | if (Subtarget.isMClass()) |
782 | 1 | MIB.addImm(0x800); |
783 | 4 | |
784 | 4 | MIB.add(predOps(ARMCC::AL)) |
785 | 4 | .addReg(ARM::CPSR, RegState::Implicit | getKillRegState(KillSrc)); |
786 | 4 | } |
787 | | |
788 | | void ARMBaseInstrInfo::copyToCPSR(MachineBasicBlock &MBB, |
789 | | MachineBasicBlock::iterator I, |
790 | | unsigned SrcReg, bool KillSrc, |
791 | 4 | const ARMSubtarget &Subtarget) const { |
792 | 4 | unsigned Opc = Subtarget.isThumb() |
793 | 4 | ? (Subtarget.isMClass() 2 ? ARM::t2MSR_M1 : ARM::t2MSR_AR1 ) |
794 | 4 | : ARM::MSR2 ; |
795 | 4 | |
796 | 4 | MachineInstrBuilder MIB = BuildMI(MBB, I, I->getDebugLoc(), get(Opc)); |
797 | 4 | |
798 | 4 | if (Subtarget.isMClass()) |
799 | 1 | MIB.addImm(0x800); |
800 | 3 | else |
801 | 3 | MIB.addImm(8); |
802 | 4 | |
803 | 4 | MIB.addReg(SrcReg, getKillRegState(KillSrc)) |
804 | 4 | .add(predOps(ARMCC::AL)) |
805 | 4 | .addReg(ARM::CPSR, RegState::Implicit | RegState::Define); |
806 | 4 | } |
807 | | |
808 | 294 | void llvm::addUnpredicatedMveVpredNOp(MachineInstrBuilder &MIB) { |
809 | 294 | MIB.addImm(ARMVCC::None); |
810 | 294 | MIB.addReg(0); |
811 | 294 | } |
812 | | |
813 | | void llvm::addUnpredicatedMveVpredROp(MachineInstrBuilder &MIB, |
814 | 294 | unsigned DestReg) { |
815 | 294 | addUnpredicatedMveVpredNOp(MIB); |
816 | 294 | MIB.addReg(DestReg, RegState::Undef); |
817 | 294 | } |
818 | | |
819 | 0 | void llvm::addPredicatedMveVpredNOp(MachineInstrBuilder &MIB, unsigned Cond) { |
820 | 0 | MIB.addImm(Cond); |
821 | 0 | MIB.addReg(ARM::VPR, RegState::Implicit); |
822 | 0 | } |
823 | | |
824 | | void llvm::addPredicatedMveVpredROp(MachineInstrBuilder &MIB, |
825 | 0 | unsigned Cond, unsigned Inactive) { |
826 | 0 | addPredicatedMveVpredNOp(MIB, Cond); |
827 | 0 | MIB.addReg(Inactive); |
828 | 0 | } |
829 | | |
830 | | void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB, |
831 | | MachineBasicBlock::iterator I, |
832 | | const DebugLoc &DL, unsigned DestReg, |
833 | 10.7k | unsigned SrcReg, bool KillSrc) const { |
834 | 10.7k | bool GPRDest = ARM::GPRRegClass.contains(DestReg); |
835 | 10.7k | bool GPRSrc = ARM::GPRRegClass.contains(SrcReg); |
836 | 10.7k | |
837 | 10.7k | if (GPRDest && GPRSrc5.82k ) { |
838 | 4.76k | BuildMI(MBB, I, DL, get(ARM::MOVr), DestReg) |
839 | 4.76k | .addReg(SrcReg, getKillRegState(KillSrc)) |
840 | 4.76k | .add(predOps(ARMCC::AL)) |
841 | 4.76k | .add(condCodeOp()); |
842 | 4.76k | return; |
843 | 4.76k | } |
844 | 5.95k | |
845 | 5.95k | bool SPRDest = ARM::SPRRegClass.contains(DestReg); |
846 | 5.95k | bool SPRSrc = ARM::SPRRegClass.contains(SrcReg); |
847 | 5.95k | |
848 | 5.95k | unsigned Opc = 0; |
849 | 5.95k | if (SPRDest && SPRSrc2.24k ) |
850 | 1.40k | Opc = ARM::VMOVS; |
851 | 4.55k | else if (GPRDest && SPRSrc1.06k ) |
852 | 1.05k | Opc = ARM::VMOVRS; |
853 | 3.49k | else if (SPRDest && GPRSrc847 ) |
854 | 847 | Opc = ARM::VMOVSR; |
855 | 2.64k | else if (ARM::DPRRegClass.contains(DestReg, SrcReg) && Subtarget.hasFP64()1.83k ) |
856 | 1.39k | Opc = ARM::VMOVD; |
857 | 1.25k | else if (ARM::QPRRegClass.contains(DestReg, SrcReg)) |
858 | 800 | Opc = Subtarget.hasNEON() ? ARM::VORRq506 : ARM::MVE_VORR294 ; |
859 | 5.95k | |
860 | 5.95k | if (Opc) { |
861 | 5.50k | MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opc), DestReg); |
862 | 5.50k | MIB.addReg(SrcReg, getKillRegState(KillSrc)); |
863 | 5.50k | if (Opc == ARM::VORRq || Opc == ARM::MVE_VORR4.99k ) |
864 | 800 | MIB.addReg(SrcReg, getKillRegState(KillSrc)); |
865 | 5.50k | if (Opc == ARM::MVE_VORR) |
866 | 294 | addUnpredicatedMveVpredROp(MIB, DestReg); |
867 | 5.20k | else |
868 | 5.20k | MIB.add(predOps(ARMCC::AL)); |
869 | 5.50k | return; |
870 | 5.50k | } |
871 | 454 | |
872 | 454 | // Handle register classes that require multiple instructions. |
873 | 454 | unsigned BeginIdx = 0; |
874 | 454 | unsigned SubRegs = 0; |
875 | 454 | int Spacing = 1; |
876 | 454 | |
877 | 454 | // Use VORRq when possible. |
878 | 454 | if (ARM::QQPRRegClass.contains(DestReg, SrcReg)) { |
879 | 0 | Opc = Subtarget.hasNEON() ? ARM::VORRq : ARM::MVE_VORR; |
880 | 0 | BeginIdx = ARM::qsub_0; |
881 | 0 | SubRegs = 2; |
882 | 454 | } else if (ARM::QQQQPRRegClass.contains(DestReg, SrcReg)) { |
883 | 5 | Opc = Subtarget.hasNEON() ? ARM::VORRq : ARM::MVE_VORR0 ; |
884 | 5 | BeginIdx = ARM::qsub_0; |
885 | 5 | SubRegs = 4; |
886 | 5 | // Fall back to VMOVD. |
887 | 449 | } else if (ARM::DPairRegClass.contains(DestReg, SrcReg)) { |
888 | 3 | Opc = ARM::VMOVD; |
889 | 3 | BeginIdx = ARM::dsub_0; |
890 | 3 | SubRegs = 2; |
891 | 446 | } else if (ARM::DTripleRegClass.contains(DestReg, SrcReg)) { |
892 | 0 | Opc = ARM::VMOVD; |
893 | 0 | BeginIdx = ARM::dsub_0; |
894 | 0 | SubRegs = 3; |
895 | 446 | } else if (ARM::DQuadRegClass.contains(DestReg, SrcReg)) { |
896 | 0 | Opc = ARM::VMOVD; |
897 | 0 | BeginIdx = ARM::dsub_0; |
898 | 0 | SubRegs = 4; |
899 | 446 | } else if (ARM::GPRPairRegClass.contains(DestReg, SrcReg)) { |
900 | 2 | Opc = Subtarget.isThumb2() ? ARM::tMOVr1 : ARM::MOVr1 ; |
901 | 2 | BeginIdx = ARM::gsub_0; |
902 | 2 | SubRegs = 2; |
903 | 444 | } else if (ARM::DPairSpcRegClass.contains(DestReg, SrcReg)) { |
904 | 0 | Opc = ARM::VMOVD; |
905 | 0 | BeginIdx = ARM::dsub_0; |
906 | 0 | SubRegs = 2; |
907 | 0 | Spacing = 2; |
908 | 444 | } else if (ARM::DTripleSpcRegClass.contains(DestReg, SrcReg)) { |
909 | 0 | Opc = ARM::VMOVD; |
910 | 0 | BeginIdx = ARM::dsub_0; |
911 | 0 | SubRegs = 3; |
912 | 0 | Spacing = 2; |
913 | 444 | } else if (ARM::DQuadSpcRegClass.contains(DestReg, SrcReg)) { |
914 | 0 | Opc = ARM::VMOVD; |
915 | 0 | BeginIdx = ARM::dsub_0; |
916 | 0 | SubRegs = 4; |
917 | 0 | Spacing = 2; |
918 | 444 | } else if (ARM::DPRRegClass.contains(DestReg, SrcReg) && |
919 | 444 | !Subtarget.hasFP64()436 ) { |
920 | 436 | Opc = ARM::VMOVS; |
921 | 436 | BeginIdx = ARM::ssub_0; |
922 | 436 | SubRegs = 2; |
923 | 436 | } else if (8 SrcReg == ARM::CPSR8 ) { |
924 | 4 | copyFromCPSR(MBB, I, DestReg, KillSrc, Subtarget); |
925 | 4 | return; |
926 | 4 | } else if (DestReg == ARM::CPSR) { |
927 | 4 | copyToCPSR(MBB, I, SrcReg, KillSrc, Subtarget); |
928 | 4 | return; |
929 | 4 | } else if (0 DestReg == ARM::VPR0 ) { |
930 | 0 | assert(ARM::GPRRegClass.contains(SrcReg)); |
931 | 0 | BuildMI(MBB, I, I->getDebugLoc(), get(ARM::VMSR_P0), DestReg) |
932 | 0 | .addReg(SrcReg, getKillRegState(KillSrc)) |
933 | 0 | .add(predOps(ARMCC::AL)); |
934 | 0 | return; |
935 | 0 | } else if (SrcReg == ARM::VPR) { |
936 | 0 | assert(ARM::GPRRegClass.contains(DestReg)); |
937 | 0 | BuildMI(MBB, I, I->getDebugLoc(), get(ARM::VMRS_P0), DestReg) |
938 | 0 | .addReg(SrcReg, getKillRegState(KillSrc)) |
939 | 0 | .add(predOps(ARMCC::AL)); |
940 | 0 | return; |
941 | 0 | } else if (DestReg == ARM::FPSCR_NZCV) { |
942 | 0 | assert(ARM::GPRRegClass.contains(SrcReg)); |
943 | 0 | BuildMI(MBB, I, I->getDebugLoc(), get(ARM::VMSR_FPSCR_NZCVQC), DestReg) |
944 | 0 | .addReg(SrcReg, getKillRegState(KillSrc)) |
945 | 0 | .add(predOps(ARMCC::AL)); |
946 | 0 | return; |
947 | 0 | } else if (SrcReg == ARM::FPSCR_NZCV) { |
948 | 0 | assert(ARM::GPRRegClass.contains(DestReg)); |
949 | 0 | BuildMI(MBB, I, I->getDebugLoc(), get(ARM::VMRS_FPSCR_NZCVQC), DestReg) |
950 | 0 | .addReg(SrcReg, getKillRegState(KillSrc)) |
951 | 0 | .add(predOps(ARMCC::AL)); |
952 | 0 | return; |
953 | 0 | } |
954 | 446 | |
955 | 446 | assert(Opc && "Impossible reg-to-reg copy"); |
956 | 446 | |
957 | 446 | const TargetRegisterInfo *TRI = &getRegisterInfo(); |
958 | 446 | MachineInstrBuilder Mov; |
959 | 446 | |
960 | 446 | // Copy register tuples backward when the first Dest reg overlaps with SrcReg. |
961 | 446 | if (TRI->regsOverlap(SrcReg, TRI->getSubReg(DestReg, BeginIdx))) { |
962 | 0 | BeginIdx = BeginIdx + ((SubRegs - 1) * Spacing); |
963 | 0 | Spacing = -Spacing; |
964 | 0 | } |
965 | | #ifndef NDEBUG |
966 | | SmallSet<unsigned, 4> DstRegs; |
967 | | #endif |
968 | 1.34k | for (unsigned i = 0; i != SubRegs; ++i902 ) { |
969 | 902 | unsigned Dst = TRI->getSubReg(DestReg, BeginIdx + i * Spacing); |
970 | 902 | unsigned Src = TRI->getSubReg(SrcReg, BeginIdx + i * Spacing); |
971 | 902 | assert(Dst && Src && "Bad sub-register"); |
972 | | #ifndef NDEBUG |
973 | | assert(!DstRegs.count(Src) && "destructive vector copy"); |
974 | | DstRegs.insert(Dst); |
975 | | #endif |
976 | | Mov = BuildMI(MBB, I, I->getDebugLoc(), get(Opc), Dst).addReg(Src); |
977 | 902 | // VORR (NEON or MVE) takes two source operands. |
978 | 902 | if (Opc == ARM::VORRq || Opc == ARM::MVE_VORR882 ) { |
979 | 20 | Mov.addReg(Src); |
980 | 20 | } |
981 | 902 | // MVE VORR takes predicate operands in place of an ordinary condition. |
982 | 902 | if (Opc == ARM::MVE_VORR) |
983 | 0 | addUnpredicatedMveVpredROp(Mov, Dst); |
984 | 902 | else |
985 | 902 | Mov = Mov.add(predOps(ARMCC::AL)); |
986 | 902 | // MOVr can set CC. |
987 | 902 | if (Opc == ARM::MOVr) |
988 | 2 | Mov = Mov.add(condCodeOp()); |
989 | 902 | } |
990 | 446 | // Add implicit super-register defs and kills to the last instruction. |
991 | 446 | Mov->addRegisterDefined(DestReg, TRI); |
992 | 446 | if (KillSrc) |
993 | 143 | Mov->addRegisterKilled(SrcReg, TRI); |
994 | 446 | } |
995 | | |
996 | | bool ARMBaseInstrInfo::isCopyInstrImpl(const MachineInstr &MI, |
997 | | const MachineOperand *&Src, |
998 | 159 | const MachineOperand *&Dest) const { |
999 | 159 | // VMOVRRD is also a copy instruction but it requires |
1000 | 159 | // special way of handling. It is more complex copy version |
1001 | 159 | // and since that we are not considering it. For recognition |
1002 | 159 | // of such instruction isExtractSubregLike MI interface fuction |
1003 | 159 | // could be used. |
1004 | 159 | // VORRq is considered as a move only if two inputs are |
1005 | 159 | // the same register. |
1006 | 159 | if (!MI.isMoveReg() || |
1007 | 159 | (22 MI.getOpcode() == ARM::VORRq22 && |
1008 | 22 | MI.getOperand(1).getReg() != MI.getOperand(2).getReg()0 )) |
1009 | 137 | return false; |
1010 | 22 | Dest = &MI.getOperand(0); |
1011 | 22 | Src = &MI.getOperand(1); |
1012 | 22 | return true; |
1013 | 22 | } |
1014 | | |
1015 | | const MachineInstrBuilder & |
1016 | | ARMBaseInstrInfo::AddDReg(MachineInstrBuilder &MIB, unsigned Reg, |
1017 | | unsigned SubIdx, unsigned State, |
1018 | 64 | const TargetRegisterInfo *TRI) const { |
1019 | 64 | if (!SubIdx) |
1020 | 0 | return MIB.addReg(Reg, State); |
1021 | 64 | |
1022 | 64 | if (TargetRegisterInfo::isPhysicalRegister(Reg)) |
1023 | 2 | return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State); |
1024 | 62 | return MIB.addReg(Reg, State, SubIdx); |
1025 | 62 | } |
1026 | | |
1027 | | void ARMBaseInstrInfo:: |
1028 | | storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, |
1029 | | unsigned SrcReg, bool isKill, int FI, |
1030 | | const TargetRegisterClass *RC, |
1031 | 2.26k | const TargetRegisterInfo *TRI) const { |
1032 | 2.26k | MachineFunction &MF = *MBB.getParent(); |
1033 | 2.26k | MachineFrameInfo &MFI = MF.getFrameInfo(); |
1034 | 2.26k | unsigned Align = MFI.getObjectAlignment(FI); |
1035 | 2.26k | |
1036 | 2.26k | MachineMemOperand *MMO = MF.getMachineMemOperand( |
1037 | 2.26k | MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOStore, |
1038 | 2.26k | MFI.getObjectSize(FI), Align); |
1039 | 2.26k | |
1040 | 2.26k | switch (TRI->getSpillSize(*RC)) { |
1041 | 2.26k | case 2: |
1042 | 2 | if (ARM::HPRRegClass.hasSubClassEq(RC)) { |
1043 | 2 | BuildMI(MBB, I, DebugLoc(), get(ARM::VSTRH)) |
1044 | 2 | .addReg(SrcReg, getKillRegState(isKill)) |
1045 | 2 | .addFrameIndex(FI) |
1046 | 2 | .addImm(0) |
1047 | 2 | .addMemOperand(MMO) |
1048 | 2 | .add(predOps(ARMCC::AL)); |
1049 | 2 | } else |
1050 | 2 | llvm_unreachable0 ("Unknown reg class!"); |
1051 | 2 | break; |
1052 | 1.67k | case 4: |
1053 | 1.67k | if (ARM::GPRRegClass.hasSubClassEq(RC)) { |
1054 | 1.37k | BuildMI(MBB, I, DebugLoc(), get(ARM::STRi12)) |
1055 | 1.37k | .addReg(SrcReg, getKillRegState(isKill)) |
1056 | 1.37k | .addFrameIndex(FI) |
1057 | 1.37k | .addImm(0) |
1058 | 1.37k | .addMemOperand(MMO) |
1059 | 1.37k | .add(predOps(ARMCC::AL)); |
1060 | 1.37k | } else if (295 ARM::SPRRegClass.hasSubClassEq(RC)295 ) { |
1061 | 295 | BuildMI(MBB, I, DebugLoc(), get(ARM::VSTRS)) |
1062 | 295 | .addReg(SrcReg, getKillRegState(isKill)) |
1063 | 295 | .addFrameIndex(FI) |
1064 | 295 | .addImm(0) |
1065 | 295 | .addMemOperand(MMO) |
1066 | 295 | .add(predOps(ARMCC::AL)); |
1067 | 295 | } else if (0 ARM::VCCRRegClass.hasSubClassEq(RC)0 ) { |
1068 | 0 | BuildMI(MBB, I, DebugLoc(), get(ARM::VSTR_P0_off)) |
1069 | 0 | .addReg(SrcReg, getKillRegState(isKill)) |
1070 | 0 | .addFrameIndex(FI) |
1071 | 0 | .addImm(0) |
1072 | 0 | .addMemOperand(MMO) |
1073 | 0 | .add(predOps(ARMCC::AL)); |
1074 | 0 | } else |
1075 | 0 | llvm_unreachable("Unknown reg class!"); |
1076 | 1.67k | break; |
1077 | 1.67k | case 8: |
1078 | 284 | if (ARM::DPRRegClass.hasSubClassEq(RC)) { |
1079 | 279 | BuildMI(MBB, I, DebugLoc(), get(ARM::VSTRD)) |
1080 | 279 | .addReg(SrcReg, getKillRegState(isKill)) |
1081 | 279 | .addFrameIndex(FI) |
1082 | 279 | .addImm(0) |
1083 | 279 | .addMemOperand(MMO) |
1084 | 279 | .add(predOps(ARMCC::AL)); |
1085 | 279 | } else if (5 ARM::GPRPairRegClass.hasSubClassEq(RC)5 ) { |
1086 | 5 | if (Subtarget.hasV5TEOps()) { |
1087 | 3 | MachineInstrBuilder MIB = BuildMI(MBB, I, DebugLoc(), get(ARM::STRD)); |
1088 | 3 | AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI); |
1089 | 3 | AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI); |
1090 | 3 | MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO) |
1091 | 3 | .add(predOps(ARMCC::AL)); |
1092 | 3 | } else { |
1093 | 2 | // Fallback to STM instruction, which has existed since the dawn of |
1094 | 2 | // time. |
1095 | 2 | MachineInstrBuilder MIB = BuildMI(MBB, I, DebugLoc(), get(ARM::STMIA)) |
1096 | 2 | .addFrameIndex(FI) |
1097 | 2 | .addMemOperand(MMO) |
1098 | 2 | .add(predOps(ARMCC::AL)); |
1099 | 2 | AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI); |
1100 | 2 | AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI); |
1101 | 2 | } |
1102 | 5 | } else |
1103 | 5 | llvm_unreachable0 ("Unknown reg class!"); |
1104 | 284 | break; |
1105 | 304 | case 16: |
1106 | 304 | if (ARM::DPairRegClass.hasSubClassEq(RC) && Subtarget.hasNEON()) { |
1107 | 304 | // Use aligned spills if the stack can be realigned. |
1108 | 304 | if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { |
1109 | 303 | BuildMI(MBB, I, DebugLoc(), get(ARM::VST1q64)) |
1110 | 303 | .addFrameIndex(FI) |
1111 | 303 | .addImm(16) |
1112 | 303 | .addReg(SrcReg, getKillRegState(isKill)) |
1113 | 303 | .addMemOperand(MMO) |
1114 | 303 | .add(predOps(ARMCC::AL)); |
1115 | 303 | } else { |
1116 | 1 | BuildMI(MBB, I, DebugLoc(), get(ARM::VSTMQIA)) |
1117 | 1 | .addReg(SrcReg, getKillRegState(isKill)) |
1118 | 1 | .addFrameIndex(FI) |
1119 | 1 | .addMemOperand(MMO) |
1120 | 1 | .add(predOps(ARMCC::AL)); |
1121 | 1 | } |
1122 | 304 | } else if (0 ARM::QPRRegClass.hasSubClassEq(RC)0 && |
1123 | 0 | Subtarget.hasMVEIntegerOps()) { |
1124 | 0 | auto MIB = BuildMI(MBB, I, DebugLoc(), get(ARM::MVE_VSTRWU32)); |
1125 | 0 | MIB.addReg(SrcReg, getKillRegState(isKill)) |
1126 | 0 | .addFrameIndex(FI) |
1127 | 0 | .addImm(0) |
1128 | 0 | .addMemOperand(MMO); |
1129 | 0 | addUnpredicatedMveVpredNOp(MIB); |
1130 | 0 | } else |
1131 | 0 | llvm_unreachable("Unknown reg class!"); |
1132 | 304 | break; |
1133 | 304 | case 24: |
1134 | 1 | if (ARM::DTripleRegClass.hasSubClassEq(RC)) { |
1135 | 1 | // Use aligned spills if the stack can be realigned. |
1136 | 1 | if (Align >= 16 && getRegisterInfo().canRealignStack(MF)0 ) { |
1137 | 0 | BuildMI(MBB, I, DebugLoc(), get(ARM::VST1d64TPseudo)) |
1138 | 0 | .addFrameIndex(FI) |
1139 | 0 | .addImm(16) |
1140 | 0 | .addReg(SrcReg, getKillRegState(isKill)) |
1141 | 0 | .addMemOperand(MMO) |
1142 | 0 | .add(predOps(ARMCC::AL)); |
1143 | 1 | } else { |
1144 | 1 | MachineInstrBuilder MIB = BuildMI(MBB, I, DebugLoc(), |
1145 | 1 | get(ARM::VSTMDIA)) |
1146 | 1 | .addFrameIndex(FI) |
1147 | 1 | .add(predOps(ARMCC::AL)) |
1148 | 1 | .addMemOperand(MMO); |
1149 | 1 | MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI); |
1150 | 1 | MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI); |
1151 | 1 | AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI); |
1152 | 1 | } |
1153 | 1 | } else |
1154 | 1 | llvm_unreachable0 ("Unknown reg class!"); |
1155 | 1 | break; |
1156 | 1 | case 32: |
1157 | 0 | if (ARM::QQPRRegClass.hasSubClassEq(RC) || ARM::DQuadRegClass.hasSubClassEq(RC)) { |
1158 | 0 | if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { |
1159 | 0 | // FIXME: It's possible to only store part of the QQ register if the |
1160 | 0 | // spilled def has a sub-register index. |
1161 | 0 | BuildMI(MBB, I, DebugLoc(), get(ARM::VST1d64QPseudo)) |
1162 | 0 | .addFrameIndex(FI) |
1163 | 0 | .addImm(16) |
1164 | 0 | .addReg(SrcReg, getKillRegState(isKill)) |
1165 | 0 | .addMemOperand(MMO) |
1166 | 0 | .add(predOps(ARMCC::AL)); |
1167 | 0 | } else { |
1168 | 0 | MachineInstrBuilder MIB = BuildMI(MBB, I, DebugLoc(), |
1169 | 0 | get(ARM::VSTMDIA)) |
1170 | 0 | .addFrameIndex(FI) |
1171 | 0 | .add(predOps(ARMCC::AL)) |
1172 | 0 | .addMemOperand(MMO); |
1173 | 0 | MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI); |
1174 | 0 | MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI); |
1175 | 0 | MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI); |
1176 | 0 | AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI); |
1177 | 0 | } |
1178 | 0 | } else |
1179 | 0 | llvm_unreachable("Unknown reg class!"); |
1180 | 0 | break; |
1181 | 2 | case 64: |
1182 | 2 | if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) { |
1183 | 2 | MachineInstrBuilder MIB = BuildMI(MBB, I, DebugLoc(), get(ARM::VSTMDIA)) |
1184 | 2 | .addFrameIndex(FI) |
1185 | 2 | .add(predOps(ARMCC::AL)) |
1186 | 2 | .addMemOperand(MMO); |
1187 | 2 | MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI); |
1188 | 2 | MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI); |
1189 | 2 | MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI); |
1190 | 2 | MIB = AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI); |
1191 | 2 | MIB = AddDReg(MIB, SrcReg, ARM::dsub_4, 0, TRI); |
1192 | 2 | MIB = AddDReg(MIB, SrcReg, ARM::dsub_5, 0, TRI); |
1193 | 2 | MIB = AddDReg(MIB, SrcReg, ARM::dsub_6, 0, TRI); |
1194 | 2 | AddDReg(MIB, SrcReg, ARM::dsub_7, 0, TRI); |
1195 | 2 | } else |
1196 | 2 | llvm_unreachable0 ("Unknown reg class!"); |
1197 | 2 | break; |
1198 | 2 | default: |
1199 | 0 | llvm_unreachable("Unknown reg class!"); |
1200 | 2.26k | } |
1201 | 2.26k | } |
1202 | | |
1203 | | unsigned ARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr &MI, |
1204 | 136k | int &FrameIndex) const { |
1205 | 136k | switch (MI.getOpcode()) { |
1206 | 136k | default: break128k ; |
1207 | 136k | case ARM::STRrs: |
1208 | 348 | case ARM::t2STRs: // FIXME: don't use t2STRs to access frame. |
1209 | 348 | if (MI.getOperand(1).isFI() && MI.getOperand(2).isReg()0 && |
1210 | 348 | MI.getOperand(3).isImm()0 && MI.getOperand(2).getReg() == 00 && |
1211 | 348 | MI.getOperand(3).getImm() == 00 ) { |
1212 | 0 | FrameIndex = MI.getOperand(1).getIndex(); |
1213 | 0 | return MI.getOperand(0).getReg(); |
1214 | 0 | } |
1215 | 348 | break; |
1216 | 7.28k | case ARM::STRi12: |
1217 | 7.28k | case ARM::t2STRi12: |
1218 | 7.28k | case ARM::tSTRspi: |
1219 | 7.28k | case ARM::VSTRD: |
1220 | 7.28k | case ARM::VSTRS: |
1221 | 7.28k | if (MI.getOperand(1).isFI() && MI.getOperand(2).isImm()3.25k && |
1222 | 7.28k | MI.getOperand(2).getImm() == 03.25k ) { |
1223 | 1.93k | FrameIndex = MI.getOperand(1).getIndex(); |
1224 | 1.93k | return MI.getOperand(0).getReg(); |
1225 | 1.93k | } |
1226 | 5.35k | break; |
1227 | 5.35k | case ARM::VSTR_P0_off: |
1228 | 0 | if (MI.getOperand(0).isFI() && MI.getOperand(1).isImm() && |
1229 | 0 | MI.getOperand(1).getImm() == 0) { |
1230 | 0 | FrameIndex = MI.getOperand(0).getIndex(); |
1231 | 0 | return ARM::P0; |
1232 | 0 | } |
1233 | 0 | break; |
1234 | 76 | case ARM::VST1q64: |
1235 | 76 | case ARM::VST1d64TPseudo: |
1236 | 76 | case ARM::VST1d64QPseudo: |
1237 | 76 | if (MI.getOperand(0).isFI() && MI.getOperand(2).getSubReg() == 00 ) { |
1238 | 0 | FrameIndex = MI.getOperand(0).getIndex(); |
1239 | 0 | return MI.getOperand(2).getReg(); |
1240 | 0 | } |
1241 | 76 | break; |
1242 | 76 | case ARM::VSTMQIA: |
1243 | 0 | if (MI.getOperand(1).isFI() && MI.getOperand(0).getSubReg() == 0) { |
1244 | 0 | FrameIndex = MI.getOperand(1).getIndex(); |
1245 | 0 | return MI.getOperand(0).getReg(); |
1246 | 0 | } |
1247 | 0 | break; |
1248 | 134k | } |
1249 | 134k | |
1250 | 134k | return 0; |
1251 | 134k | } |
1252 | | |
1253 | | unsigned ARMBaseInstrInfo::isStoreToStackSlotPostFE(const MachineInstr &MI, |
1254 | 162k | int &FrameIndex) const { |
1255 | 162k | SmallVector<const MachineMemOperand *, 1> Accesses; |
1256 | 162k | if (MI.mayStore() && hasStoreToStackSlot(MI, Accesses)17.2k && |
1257 | 162k | Accesses.size() == 13.24k ) { |
1258 | 3.15k | FrameIndex = |
1259 | 3.15k | cast<FixedStackPseudoSourceValue>(Accesses.front()->getPseudoValue()) |
1260 | 3.15k | ->getFrameIndex(); |
1261 | 3.15k | return true; |
1262 | 3.15k | } |
1263 | 159k | return false; |
1264 | 159k | } |
1265 | | |
1266 | | void ARMBaseInstrInfo:: |
1267 | | loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, |
1268 | | unsigned DestReg, int FI, |
1269 | | const TargetRegisterClass *RC, |
1270 | 2.34k | const TargetRegisterInfo *TRI) const { |
1271 | 2.34k | DebugLoc DL; |
1272 | 2.34k | if (I != MBB.end()) DL = I->getDebugLoc(); |
1273 | 2.34k | MachineFunction &MF = *MBB.getParent(); |
1274 | 2.34k | MachineFrameInfo &MFI = MF.getFrameInfo(); |
1275 | 2.34k | unsigned Align = MFI.getObjectAlignment(FI); |
1276 | 2.34k | MachineMemOperand *MMO = MF.getMachineMemOperand( |
1277 | 2.34k | MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOLoad, |
1278 | 2.34k | MFI.getObjectSize(FI), Align); |
1279 | 2.34k | |
1280 | 2.34k | switch (TRI->getSpillSize(*RC)) { |
1281 | 2.34k | case 2: |
1282 | 2 | if (ARM::HPRRegClass.hasSubClassEq(RC)) { |
1283 | 2 | BuildMI(MBB, I, DL, get(ARM::VLDRH), DestReg) |
1284 | 2 | .addFrameIndex(FI) |
1285 | 2 | .addImm(0) |
1286 | 2 | .addMemOperand(MMO) |
1287 | 2 | .add(predOps(ARMCC::AL)); |
1288 | 2 | } else |
1289 | 2 | llvm_unreachable0 ("Unknown reg class!"); |
1290 | 2 | break; |
1291 | 1.43k | case 4: |
1292 | 1.43k | if (ARM::GPRRegClass.hasSubClassEq(RC)) { |
1293 | 1.19k | BuildMI(MBB, I, DL, get(ARM::LDRi12), DestReg) |
1294 | 1.19k | .addFrameIndex(FI) |
1295 | 1.19k | .addImm(0) |
1296 | 1.19k | .addMemOperand(MMO) |
1297 | 1.19k | .add(predOps(ARMCC::AL)); |
1298 | 1.19k | } else if (240 ARM::SPRRegClass.hasSubClassEq(RC)240 ) { |
1299 | 240 | BuildMI(MBB, I, DL, get(ARM::VLDRS), DestReg) |
1300 | 240 | .addFrameIndex(FI) |
1301 | 240 | .addImm(0) |
1302 | 240 | .addMemOperand(MMO) |
1303 | 240 | .add(predOps(ARMCC::AL)); |
1304 | 240 | } else if (0 ARM::VCCRRegClass.hasSubClassEq(RC)0 ) { |
1305 | 0 | BuildMI(MBB, I, DL, get(ARM::VLDR_P0_off), DestReg) |
1306 | 0 | .addFrameIndex(FI) |
1307 | 0 | .addImm(0) |
1308 | 0 | .addMemOperand(MMO) |
1309 | 0 | .add(predOps(ARMCC::AL)); |
1310 | 0 | } else |
1311 | 0 | llvm_unreachable("Unknown reg class!"); |
1312 | 1.43k | break; |
1313 | 1.43k | case 8: |
1314 | 565 | if (ARM::DPRRegClass.hasSubClassEq(RC)) { |
1315 | 561 | BuildMI(MBB, I, DL, get(ARM::VLDRD), DestReg) |
1316 | 561 | .addFrameIndex(FI) |
1317 | 561 | .addImm(0) |
1318 | 561 | .addMemOperand(MMO) |
1319 | 561 | .add(predOps(ARMCC::AL)); |
1320 | 561 | } else if (4 ARM::GPRPairRegClass.hasSubClassEq(RC)4 ) { |
1321 | 4 | MachineInstrBuilder MIB; |
1322 | 4 | |
1323 | 4 | if (Subtarget.hasV5TEOps()) { |
1324 | 2 | MIB = BuildMI(MBB, I, DL, get(ARM::LDRD)); |
1325 | 2 | AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI); |
1326 | 2 | AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI); |
1327 | 2 | MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO) |
1328 | 2 | .add(predOps(ARMCC::AL)); |
1329 | 2 | } else { |
1330 | 2 | // Fallback to LDM instruction, which has existed since the dawn of |
1331 | 2 | // time. |
1332 | 2 | MIB = BuildMI(MBB, I, DL, get(ARM::LDMIA)) |
1333 | 2 | .addFrameIndex(FI) |
1334 | 2 | .addMemOperand(MMO) |
1335 | 2 | .add(predOps(ARMCC::AL)); |
1336 | 2 | MIB = AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI); |
1337 | 2 | MIB = AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI); |
1338 | 2 | } |
1339 | 4 | |
1340 | 4 | if (TargetRegisterInfo::isPhysicalRegister(DestReg)) |
1341 | 0 | MIB.addReg(DestReg, RegState::ImplicitDefine); |
1342 | 4 | } else |
1343 | 4 | llvm_unreachable0 ("Unknown reg class!"); |
1344 | 565 | break; |
1345 | 565 | case 16: |
1346 | 344 | if (ARM::DPairRegClass.hasSubClassEq(RC) && Subtarget.hasNEON()) { |
1347 | 344 | if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { |
1348 | 343 | BuildMI(MBB, I, DL, get(ARM::VLD1q64), DestReg) |
1349 | 343 | .addFrameIndex(FI) |
1350 | 343 | .addImm(16) |
1351 | 343 | .addMemOperand(MMO) |
1352 | 343 | .add(predOps(ARMCC::AL)); |
1353 | 343 | } else { |
1354 | 1 | BuildMI(MBB, I, DL, get(ARM::VLDMQIA), DestReg) |
1355 | 1 | .addFrameIndex(FI) |
1356 | 1 | .addMemOperand(MMO) |
1357 | 1 | .add(predOps(ARMCC::AL)); |
1358 | 1 | } |
1359 | 344 | } else if (0 ARM::QPRRegClass.hasSubClassEq(RC)0 && |
1360 | 0 | Subtarget.hasMVEIntegerOps()) { |
1361 | 0 | auto MIB = BuildMI(MBB, I, DL, get(ARM::MVE_VLDRWU32), DestReg); |
1362 | 0 | MIB.addFrameIndex(FI) |
1363 | 0 | .addImm(0) |
1364 | 0 | .addMemOperand(MMO); |
1365 | 0 | addUnpredicatedMveVpredNOp(MIB); |
1366 | 0 | } else |
1367 | 0 | llvm_unreachable("Unknown reg class!"); |
1368 | 344 | break; |
1369 | 344 | case 24: |
1370 | 1 | if (ARM::DTripleRegClass.hasSubClassEq(RC)) { |
1371 | 1 | if (Align >= 16 && getRegisterInfo().canRealignStack(MF)0 ) { |
1372 | 0 | BuildMI(MBB, I, DL, get(ARM::VLD1d64TPseudo), DestReg) |
1373 | 0 | .addFrameIndex(FI) |
1374 | 0 | .addImm(16) |
1375 | 0 | .addMemOperand(MMO) |
1376 | 0 | .add(predOps(ARMCC::AL)); |
1377 | 1 | } else { |
1378 | 1 | MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLDMDIA)) |
1379 | 1 | .addFrameIndex(FI) |
1380 | 1 | .addMemOperand(MMO) |
1381 | 1 | .add(predOps(ARMCC::AL)); |
1382 | 1 | MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI); |
1383 | 1 | MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI); |
1384 | 1 | MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI); |
1385 | 1 | if (TargetRegisterInfo::isPhysicalRegister(DestReg)) |
1386 | 0 | MIB.addReg(DestReg, RegState::ImplicitDefine); |
1387 | 1 | } |
1388 | 1 | } else |
1389 | 1 | llvm_unreachable0 ("Unknown reg class!"); |
1390 | 1 | break; |
1391 | 1 | case 32: |
1392 | 0 | if (ARM::QQPRRegClass.hasSubClassEq(RC) || ARM::DQuadRegClass.hasSubClassEq(RC)) { |
1393 | 0 | if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { |
1394 | 0 | BuildMI(MBB, I, DL, get(ARM::VLD1d64QPseudo), DestReg) |
1395 | 0 | .addFrameIndex(FI) |
1396 | 0 | .addImm(16) |
1397 | 0 | .addMemOperand(MMO) |
1398 | 0 | .add(predOps(ARMCC::AL)); |
1399 | 0 | } else { |
1400 | 0 | MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLDMDIA)) |
1401 | 0 | .addFrameIndex(FI) |
1402 | 0 | .add(predOps(ARMCC::AL)) |
1403 | 0 | .addMemOperand(MMO); |
1404 | 0 | MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI); |
1405 | 0 | MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI); |
1406 | 0 | MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI); |
1407 | 0 | MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::DefineNoRead, TRI); |
1408 | 0 | if (TargetRegisterInfo::isPhysicalRegister(DestReg)) |
1409 | 0 | MIB.addReg(DestReg, RegState::ImplicitDefine); |
1410 | 0 | } |
1411 | 0 | } else |
1412 | 0 | llvm_unreachable("Unknown reg class!"); |
1413 | 0 | break; |
1414 | 2 | case 64: |
1415 | 2 | if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) { |
1416 | 2 | MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLDMDIA)) |
1417 | 2 | .addFrameIndex(FI) |
1418 | 2 | .add(predOps(ARMCC::AL)) |
1419 | 2 | .addMemOperand(MMO); |
1420 | 2 | MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI); |
1421 | 2 | MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI); |
1422 | 2 | MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI); |
1423 | 2 | MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::DefineNoRead, TRI); |
1424 | 2 | MIB = AddDReg(MIB, DestReg, ARM::dsub_4, RegState::DefineNoRead, TRI); |
1425 | 2 | MIB = AddDReg(MIB, DestReg, ARM::dsub_5, RegState::DefineNoRead, TRI); |
1426 | 2 | MIB = AddDReg(MIB, DestReg, ARM::dsub_6, RegState::DefineNoRead, TRI); |
1427 | 2 | MIB = AddDReg(MIB, DestReg, ARM::dsub_7, RegState::DefineNoRead, TRI); |
1428 | 2 | if (TargetRegisterInfo::isPhysicalRegister(DestReg)) |
1429 | 0 | MIB.addReg(DestReg, RegState::ImplicitDefine); |
1430 | 2 | } else |
1431 | 2 | llvm_unreachable0 ("Unknown reg class!"); |
1432 | 2 | break; |
1433 | 2 | default: |
1434 | 0 | llvm_unreachable("Unknown regclass!"); |
1435 | 2.34k | } |
1436 | 2.34k | } |
1437 | | |
1438 | | unsigned ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr &MI, |
1439 | 432k | int &FrameIndex) const { |
1440 | 432k | switch (MI.getOpcode()) { |
1441 | 432k | default: break373k ; |
1442 | 432k | case ARM::LDRrs: |
1443 | 2.97k | case ARM::t2LDRs: // FIXME: don't use t2LDRs to access frame. |
1444 | 2.97k | if (MI.getOperand(1).isFI() && MI.getOperand(2).isReg()0 && |
1445 | 2.97k | MI.getOperand(3).isImm()0 && MI.getOperand(2).getReg() == 00 && |
1446 | 2.97k | MI.getOperand(3).getImm() == 00 ) { |
1447 | 0 | FrameIndex = MI.getOperand(1).getIndex(); |
1448 | 0 | return MI.getOperand(0).getReg(); |
1449 | 0 | } |
1450 | 2.97k | break; |
1451 | 56.2k | case ARM::LDRi12: |
1452 | 56.2k | case ARM::t2LDRi12: |
1453 | 56.2k | case ARM::tLDRspi: |
1454 | 56.2k | case ARM::VLDRD: |
1455 | 56.2k | case ARM::VLDRS: |
1456 | 56.2k | if (MI.getOperand(1).isFI() && MI.getOperand(2).isImm()23.6k && |
1457 | 56.2k | MI.getOperand(2).getImm() == 023.6k ) { |
1458 | 19.8k | FrameIndex = MI.getOperand(1).getIndex(); |
1459 | 19.8k | return MI.getOperand(0).getReg(); |
1460 | 19.8k | } |
1461 | 36.3k | break; |
1462 | 36.3k | case ARM::VLDR_P0_off: |
1463 | 0 | if (MI.getOperand(0).isFI() && MI.getOperand(1).isImm() && |
1464 | 0 | MI.getOperand(1).getImm() == 0) { |
1465 | 0 | FrameIndex = MI.getOperand(0).getIndex(); |
1466 | 0 | return ARM::P0; |
1467 | 0 | } |
1468 | 0 | break; |
1469 | 211 | case ARM::VLD1q64: |
1470 | 211 | case ARM::VLD1d8TPseudo: |
1471 | 211 | case ARM::VLD1d16TPseudo: |
1472 | 211 | case ARM::VLD1d32TPseudo: |
1473 | 211 | case ARM::VLD1d64TPseudo: |
1474 | 211 | case ARM::VLD1d8QPseudo: |
1475 | 211 | case ARM::VLD1d16QPseudo: |
1476 | 211 | case ARM::VLD1d32QPseudo: |
1477 | 211 | case ARM::VLD1d64QPseudo: |
1478 | 211 | if (MI.getOperand(1).isFI() && MI.getOperand(0).getSubReg() == 02 ) { |
1479 | 2 | FrameIndex = MI.getOperand(1).getIndex(); |
1480 | 2 | return MI.getOperand(0).getReg(); |
1481 | 2 | } |
1482 | 209 | break; |
1483 | 209 | case ARM::VLDMQIA: |
1484 | 0 | if (MI.getOperand(1).isFI() && MI.getOperand(0).getSubReg() == 0) { |
1485 | 0 | FrameIndex = MI.getOperand(1).getIndex(); |
1486 | 0 | return MI.getOperand(0).getReg(); |
1487 | 0 | } |
1488 | 0 | break; |
1489 | 412k | } |
1490 | 412k | |
1491 | 412k | return 0; |
1492 | 412k | } |
1493 | | |
1494 | | unsigned ARMBaseInstrInfo::isLoadFromStackSlotPostFE(const MachineInstr &MI, |
1495 | 167k | int &FrameIndex) const { |
1496 | 167k | SmallVector<const MachineMemOperand *, 1> Accesses; |
1497 | 167k | if (MI.mayLoad() && hasLoadFromStackSlot(MI, Accesses)24.8k && |
1498 | 167k | Accesses.size() == 14.86k ) { |
1499 | 4.65k | FrameIndex = |
1500 | 4.65k | cast<FixedStackPseudoSourceValue>(Accesses.front()->getPseudoValue()) |
1501 | 4.65k | ->getFrameIndex(); |
1502 | 4.65k | return true; |
1503 | 4.65k | } |
1504 | 162k | return false; |
1505 | 162k | } |
1506 | | |
1507 | | /// Expands MEMCPY to either LDMIA/STMIA or LDMIA_UPD/STMID_UPD |
1508 | | /// depending on whether the result is used. |
1509 | 54 | void ARMBaseInstrInfo::expandMEMCPY(MachineBasicBlock::iterator MI) const { |
1510 | 54 | bool isThumb1 = Subtarget.isThumb1Only(); |
1511 | 54 | bool isThumb2 = Subtarget.isThumb2(); |
1512 | 54 | const ARMBaseInstrInfo *TII = Subtarget.getInstrInfo(); |
1513 | 54 | |
1514 | 54 | DebugLoc dl = MI->getDebugLoc(); |
1515 | 54 | MachineBasicBlock *BB = MI->getParent(); |
1516 | 54 | |
1517 | 54 | MachineInstrBuilder LDM, STM; |
1518 | 54 | if (isThumb1 || !MI->getOperand(1).isDead()26 ) { |
1519 | 39 | MachineOperand LDWb(MI->getOperand(1)); |
1520 | 39 | LDM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2LDMIA_UPD6 |
1521 | 39 | : isThumb1 33 ? ARM::tLDMIA_UPD28 |
1522 | 33 | : ARM::LDMIA_UPD5 )) |
1523 | 39 | .add(LDWb); |
1524 | 39 | } else { |
1525 | 15 | LDM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2LDMIA7 : ARM::LDMIA8 )); |
1526 | 15 | } |
1527 | 54 | |
1528 | 54 | if (isThumb1 || !MI->getOperand(0).isDead()26 ) { |
1529 | 39 | MachineOperand STWb(MI->getOperand(0)); |
1530 | 39 | STM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2STMIA_UPD6 |
1531 | 39 | : isThumb1 33 ? ARM::tSTMIA_UPD28 |
1532 | 33 | : ARM::STMIA_UPD5 )) |
1533 | 39 | .add(STWb); |
1534 | 39 | } else { |
1535 | 15 | STM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2STMIA7 : ARM::STMIA8 )); |
1536 | 15 | } |
1537 | 54 | |
1538 | 54 | MachineOperand LDBase(MI->getOperand(3)); |
1539 | 54 | LDM.add(LDBase).add(predOps(ARMCC::AL)); |
1540 | 54 | |
1541 | 54 | MachineOperand STBase(MI->getOperand(2)); |
1542 | 54 | STM.add(STBase).add(predOps(ARMCC::AL)); |
1543 | 54 | |
1544 | 54 | // Sort the scratch registers into ascending order. |
1545 | 54 | const TargetRegisterInfo &TRI = getRegisterInfo(); |
1546 | 54 | SmallVector<unsigned, 6> ScratchRegs; |
1547 | 277 | for(unsigned I = 5; I < MI->getNumOperands(); ++I223 ) |
1548 | 223 | ScratchRegs.push_back(MI->getOperand(I).getReg()); |
1549 | 54 | llvm::sort(ScratchRegs, |
1550 | 215 | [&TRI](const unsigned &Reg1, const unsigned &Reg2) -> bool { |
1551 | 215 | return TRI.getEncodingValue(Reg1) < |
1552 | 215 | TRI.getEncodingValue(Reg2); |
1553 | 215 | }); |
1554 | 54 | |
1555 | 223 | for (const auto &Reg : ScratchRegs) { |
1556 | 223 | LDM.addReg(Reg, RegState::Define); |
1557 | 223 | STM.addReg(Reg, RegState::Kill); |
1558 | 223 | } |
1559 | 54 | |
1560 | 54 | BB->erase(MI); |
1561 | 54 | } |
1562 | | |
1563 | 221k | bool ARMBaseInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { |
1564 | 221k | if (MI.getOpcode() == TargetOpcode::LOAD_STACK_GUARD) { |
1565 | 160 | assert(getSubtarget().getTargetTriple().isOSBinFormatMachO() && |
1566 | 160 | "LOAD_STACK_GUARD currently supported only for MachO."); |
1567 | 160 | expandLoadStackGuard(MI); |
1568 | 160 | MI.getParent()->erase(MI); |
1569 | 160 | return true; |
1570 | 160 | } |
1571 | 221k | |
1572 | 221k | if (MI.getOpcode() == ARM::MEMCPY) { |
1573 | 54 | expandMEMCPY(MI); |
1574 | 54 | return true; |
1575 | 54 | } |
1576 | 221k | |
1577 | 221k | // This hook gets to expand COPY instructions before they become |
1578 | 221k | // copyPhysReg() calls. Look for VMOVS instructions that can legally be |
1579 | 221k | // widened to VMOVD. We prefer the VMOVD when possible because it may be |
1580 | 221k | // changed into a VORR that can go down the NEON pipeline. |
1581 | 221k | if (!MI.isCopy() || Subtarget.dontWidenVMOVS()81.2k || !Subtarget.hasFP64()81.2k ) |
1582 | 152k | return false; |
1583 | 68.1k | |
1584 | 68.1k | // Look for a copy between even S-registers. That is where we keep floats |
1585 | 68.1k | // when using NEON v2f32 instructions for f32 arithmetic. |
1586 | 68.1k | unsigned DstRegS = MI.getOperand(0).getReg(); |
1587 | 68.1k | unsigned SrcRegS = MI.getOperand(1).getReg(); |
1588 | 68.1k | if (!ARM::SPRRegClass.contains(DstRegS, SrcRegS)) |
1589 | 66.4k | return false; |
1590 | 1.74k | |
1591 | 1.74k | const TargetRegisterInfo *TRI = &getRegisterInfo(); |
1592 | 1.74k | unsigned DstRegD = TRI->getMatchingSuperReg(DstRegS, ARM::ssub_0, |
1593 | 1.74k | &ARM::DPRRegClass); |
1594 | 1.74k | unsigned SrcRegD = TRI->getMatchingSuperReg(SrcRegS, ARM::ssub_0, |
1595 | 1.74k | &ARM::DPRRegClass); |
1596 | 1.74k | if (!DstRegD || !SrcRegD1.45k ) |
1597 | 522 | return false; |
1598 | 1.22k | |
1599 | 1.22k | // We want to widen this into a DstRegD = VMOVD SrcRegD copy. This is only |
1600 | 1.22k | // legal if the COPY already defines the full DstRegD, and it isn't a |
1601 | 1.22k | // sub-register insertion. |
1602 | 1.22k | if (!MI.definesRegister(DstRegD, TRI) || MI.readsRegister(DstRegD, TRI)113 ) |
1603 | 1.12k | return false; |
1604 | 103 | |
1605 | 103 | // A dead copy shouldn't show up here, but reject it just in case. |
1606 | 103 | if (MI.getOperand(0).isDead()) |
1607 | 0 | return false; |
1608 | 103 | |
1609 | 103 | // All clear, widen the COPY. |
1610 | 103 | LLVM_DEBUG(dbgs() << "widening: " << MI); |
1611 | 103 | MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI); |
1612 | 103 | |
1613 | 103 | // Get rid of the old implicit-def of DstRegD. Leave it if it defines a Q-reg |
1614 | 103 | // or some other super-register. |
1615 | 103 | int ImpDefIdx = MI.findRegisterDefOperandIdx(DstRegD); |
1616 | 103 | if (ImpDefIdx != -1) |
1617 | 82 | MI.RemoveOperand(ImpDefIdx); |
1618 | 103 | |
1619 | 103 | // Change the opcode and operands. |
1620 | 103 | MI.setDesc(get(ARM::VMOVD)); |
1621 | 103 | MI.getOperand(0).setReg(DstRegD); |
1622 | 103 | MI.getOperand(1).setReg(SrcRegD); |
1623 | 103 | MIB.add(predOps(ARMCC::AL)); |
1624 | 103 | |
1625 | 103 | // We are now reading SrcRegD instead of SrcRegS. This may upset the |
1626 | 103 | // register scavenger and machine verifier, so we need to indicate that we |
1627 | 103 | // are reading an undefined value from SrcRegD, but a proper value from |
1628 | 103 | // SrcRegS. |
1629 | 103 | MI.getOperand(1).setIsUndef(); |
1630 | 103 | MIB.addReg(SrcRegS, RegState::Implicit); |
1631 | 103 | |
1632 | 103 | // SrcRegD may actually contain an unrelated value in the ssub_1 |
1633 | 103 | // sub-register. Don't kill it. Only kill the ssub_0 sub-register. |
1634 | 103 | if (MI.getOperand(1).isKill()) { |
1635 | 21 | MI.getOperand(1).setIsKill(false); |
1636 | 21 | MI.addRegisterKilled(SrcRegS, TRI, true); |
1637 | 21 | } |
1638 | 103 | |
1639 | 103 | LLVM_DEBUG(dbgs() << "replaced by: " << MI); |
1640 | 103 | return true; |
1641 | 103 | } |
1642 | | |
1643 | | /// Create a copy of a const pool value. Update CPI to the new index and return |
1644 | | /// the label UID. |
1645 | 0 | static unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI) { |
1646 | 0 | MachineConstantPool *MCP = MF.getConstantPool(); |
1647 | 0 | ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); |
1648 | 0 |
|
1649 | 0 | const MachineConstantPoolEntry &MCPE = MCP->getConstants()[CPI]; |
1650 | 0 | assert(MCPE.isMachineConstantPoolEntry() && |
1651 | 0 | "Expecting a machine constantpool entry!"); |
1652 | 0 | ARMConstantPoolValue *ACPV = |
1653 | 0 | static_cast<ARMConstantPoolValue*>(MCPE.Val.MachineCPVal); |
1654 | 0 |
|
1655 | 0 | unsigned PCLabelId = AFI->createPICLabelUId(); |
1656 | 0 | ARMConstantPoolValue *NewCPV = nullptr; |
1657 | 0 |
|
1658 | 0 | // FIXME: The below assumes PIC relocation model and that the function |
1659 | 0 | // is Thumb mode (t1 or t2). PCAdjustment would be 8 for ARM mode PIC, and |
1660 | 0 | // zero for non-PIC in ARM or Thumb. The callers are all of thumb LDR |
1661 | 0 | // instructions, so that's probably OK, but is PIC always correct when |
1662 | 0 | // we get here? |
1663 | 0 | if (ACPV->isGlobalValue()) |
1664 | 0 | NewCPV = ARMConstantPoolConstant::Create( |
1665 | 0 | cast<ARMConstantPoolConstant>(ACPV)->getGV(), PCLabelId, ARMCP::CPValue, |
1666 | 0 | 4, ACPV->getModifier(), ACPV->mustAddCurrentAddress()); |
1667 | 0 | else if (ACPV->isExtSymbol()) |
1668 | 0 | NewCPV = ARMConstantPoolSymbol:: |
1669 | 0 | Create(MF.getFunction().getContext(), |
1670 | 0 | cast<ARMConstantPoolSymbol>(ACPV)->getSymbol(), PCLabelId, 4); |
1671 | 0 | else if (ACPV->isBlockAddress()) |
1672 | 0 | NewCPV = ARMConstantPoolConstant:: |
1673 | 0 | Create(cast<ARMConstantPoolConstant>(ACPV)->getBlockAddress(), PCLabelId, |
1674 | 0 | ARMCP::CPBlockAddress, 4); |
1675 | 0 | else if (ACPV->isLSDA()) |
1676 | 0 | NewCPV = ARMConstantPoolConstant::Create(&MF.getFunction(), PCLabelId, |
1677 | 0 | ARMCP::CPLSDA, 4); |
1678 | 0 | else if (ACPV->isMachineBasicBlock()) |
1679 | 0 | NewCPV = ARMConstantPoolMBB:: |
1680 | 0 | Create(MF.getFunction().getContext(), |
1681 | 0 | cast<ARMConstantPoolMBB>(ACPV)->getMBB(), PCLabelId, 4); |
1682 | 0 | else |
1683 | 0 | llvm_unreachable("Unexpected ARM constantpool value type!!"); |
1684 | 0 | CPI = MCP->getConstantPoolIndex(NewCPV, MCPE.getAlignment()); |
1685 | 0 | return PCLabelId; |
1686 | 0 | } |
1687 | | |
1688 | | void ARMBaseInstrInfo::reMaterialize(MachineBasicBlock &MBB, |
1689 | | MachineBasicBlock::iterator I, |
1690 | | unsigned DestReg, unsigned SubIdx, |
1691 | | const MachineInstr &Orig, |
1692 | 27.4k | const TargetRegisterInfo &TRI) const { |
1693 | 27.4k | unsigned Opcode = Orig.getOpcode(); |
1694 | 27.4k | switch (Opcode) { |
1695 | 27.4k | default: { |
1696 | 27.4k | MachineInstr *MI = MBB.getParent()->CloneMachineInstr(&Orig); |
1697 | 27.4k | MI->substituteRegister(Orig.getOperand(0).getReg(), DestReg, SubIdx, TRI); |
1698 | 27.4k | MBB.insert(I, MI); |
1699 | 27.4k | break; |
1700 | 27.4k | } |
1701 | 27.4k | case ARM::tLDRpci_pic: |
1702 | 0 | case ARM::t2LDRpci_pic: { |
1703 | 0 | MachineFunction &MF = *MBB.getParent(); |
1704 | 0 | unsigned CPI = Orig.getOperand(1).getIndex(); |
1705 | 0 | unsigned PCLabelId = duplicateCPV(MF, CPI); |
1706 | 0 | BuildMI(MBB, I, Orig.getDebugLoc(), get(Opcode), DestReg) |
1707 | 0 | .addConstantPoolIndex(CPI) |
1708 | 0 | .addImm(PCLabelId) |
1709 | 0 | .cloneMemRefs(Orig); |
1710 | 0 | break; |
1711 | 0 | } |
1712 | 27.4k | } |
1713 | 27.4k | } |
1714 | | |
1715 | | MachineInstr & |
1716 | | ARMBaseInstrInfo::duplicate(MachineBasicBlock &MBB, |
1717 | | MachineBasicBlock::iterator InsertBefore, |
1718 | 19.2k | const MachineInstr &Orig) const { |
1719 | 19.2k | MachineInstr &Cloned = TargetInstrInfo::duplicate(MBB, InsertBefore, Orig); |
1720 | 19.2k | MachineBasicBlock::instr_iterator I = Cloned.getIterator(); |
1721 | 19.4k | for (;;) { |
1722 | 19.4k | switch (I->getOpcode()) { |
1723 | 19.4k | case ARM::tLDRpci_pic: |
1724 | 0 | case ARM::t2LDRpci_pic: { |
1725 | 0 | MachineFunction &MF = *MBB.getParent(); |
1726 | 0 | unsigned CPI = I->getOperand(1).getIndex(); |
1727 | 0 | unsigned PCLabelId = duplicateCPV(MF, CPI); |
1728 | 0 | I->getOperand(1).setIndex(CPI); |
1729 | 0 | I->getOperand(2).setImm(PCLabelId); |
1730 | 0 | break; |
1731 | 19.4k | } |
1732 | 19.4k | } |
1733 | 19.4k | if (!I->isBundledWithSucc()) |
1734 | 19.2k | break; |
1735 | 193 | ++I; |
1736 | 193 | } |
1737 | 19.2k | return Cloned; |
1738 | 19.2k | } |
1739 | | |
1740 | | bool ARMBaseInstrInfo::produceSameValue(const MachineInstr &MI0, |
1741 | | const MachineInstr &MI1, |
1742 | 14.1k | const MachineRegisterInfo *MRI) const { |
1743 | 14.1k | unsigned Opcode = MI0.getOpcode(); |
1744 | 14.1k | if (Opcode == ARM::t2LDRpci || |
1745 | 14.1k | Opcode == ARM::t2LDRpci_pic || |
1746 | 14.1k | Opcode == ARM::tLDRpci14.1k || |
1747 | 14.1k | Opcode == ARM::tLDRpci_pic14.1k || |
1748 | 14.1k | Opcode == ARM::LDRLIT_ga_pcrel14.1k || |
1749 | 14.1k | Opcode == ARM::LDRLIT_ga_pcrel_ldr14.1k || |
1750 | 14.1k | Opcode == ARM::tLDRLIT_ga_pcrel14.1k || |
1751 | 14.1k | Opcode == ARM::MOV_ga_pcrel14.1k || |
1752 | 14.1k | Opcode == ARM::MOV_ga_pcrel_ldr14.1k || |
1753 | 14.1k | Opcode == ARM::t2MOV_ga_pcrel14.1k ) { |
1754 | 6.70k | if (MI1.getOpcode() != Opcode) |
1755 | 0 | return false; |
1756 | 6.70k | if (MI0.getNumOperands() != MI1.getNumOperands()) |
1757 | 0 | return false; |
1758 | 6.70k | |
1759 | 6.70k | const MachineOperand &MO0 = MI0.getOperand(1); |
1760 | 6.70k | const MachineOperand &MO1 = MI1.getOperand(1); |
1761 | 6.70k | if (MO0.getOffset() != MO1.getOffset()) |
1762 | 0 | return false; |
1763 | 6.70k | |
1764 | 6.70k | if (Opcode == ARM::LDRLIT_ga_pcrel || |
1765 | 6.70k | Opcode == ARM::LDRLIT_ga_pcrel_ldr6.70k || |
1766 | 6.70k | Opcode == ARM::tLDRLIT_ga_pcrel6.70k || |
1767 | 6.70k | Opcode == ARM::MOV_ga_pcrel6.69k || |
1768 | 6.70k | Opcode == ARM::MOV_ga_pcrel_ldr6.69k || |
1769 | 6.70k | Opcode == ARM::t2MOV_ga_pcrel6.69k ) |
1770 | 6.70k | // Ignore the PC labels. |
1771 | 6.70k | return MO0.getGlobal() == MO1.getGlobal(); |
1772 | 7 | |
1773 | 7 | const MachineFunction *MF = MI0.getParent()->getParent(); |
1774 | 7 | const MachineConstantPool *MCP = MF->getConstantPool(); |
1775 | 7 | int CPI0 = MO0.getIndex(); |
1776 | 7 | int CPI1 = MO1.getIndex(); |
1777 | 7 | const MachineConstantPoolEntry &MCPE0 = MCP->getConstants()[CPI0]; |
1778 | 7 | const MachineConstantPoolEntry &MCPE1 = MCP->getConstants()[CPI1]; |
1779 | 7 | bool isARMCP0 = MCPE0.isMachineConstantPoolEntry(); |
1780 | 7 | bool isARMCP1 = MCPE1.isMachineConstantPoolEntry(); |
1781 | 7 | if (isARMCP0 && isARMCP11 ) { |
1782 | 1 | ARMConstantPoolValue *ACPV0 = |
1783 | 1 | static_cast<ARMConstantPoolValue*>(MCPE0.Val.MachineCPVal); |
1784 | 1 | ARMConstantPoolValue *ACPV1 = |
1785 | 1 | static_cast<ARMConstantPoolValue*>(MCPE1.Val.MachineCPVal); |
1786 | 1 | return ACPV0->hasSameValue(ACPV1); |
1787 | 6 | } else if (!isARMCP0 && !isARMCP1) { |
1788 | 6 | return MCPE0.Val.ConstVal == MCPE1.Val.ConstVal; |
1789 | 6 | } |
1790 | 0 | return false; |
1791 | 7.46k | } else if (Opcode == ARM::PICLDR) { |
1792 | 0 | if (MI1.getOpcode() != Opcode) |
1793 | 0 | return false; |
1794 | 0 | if (MI0.getNumOperands() != MI1.getNumOperands()) |
1795 | 0 | return false; |
1796 | 0 | |
1797 | 0 | unsigned Addr0 = MI0.getOperand(1).getReg(); |
1798 | 0 | unsigned Addr1 = MI1.getOperand(1).getReg(); |
1799 | 0 | if (Addr0 != Addr1) { |
1800 | 0 | if (!MRI || |
1801 | 0 | !TargetRegisterInfo::isVirtualRegister(Addr0) || |
1802 | 0 | !TargetRegisterInfo::isVirtualRegister(Addr1)) |
1803 | 0 | return false; |
1804 | 0 | |
1805 | 0 | // This assumes SSA form. |
1806 | 0 | MachineInstr *Def0 = MRI->getVRegDef(Addr0); |
1807 | 0 | MachineInstr *Def1 = MRI->getVRegDef(Addr1); |
1808 | 0 | // Check if the loaded value, e.g. a constantpool of a global address, are |
1809 | 0 | // the same. |
1810 | 0 | if (!produceSameValue(*Def0, *Def1, MRI)) |
1811 | 0 | return false; |
1812 | 0 | } |
1813 | 0 | |
1814 | 0 | for (unsigned i = 3, e = MI0.getNumOperands(); i != e; ++i) { |
1815 | 0 | // %12 = PICLDR %11, 0, 14, %noreg |
1816 | 0 | const MachineOperand &MO0 = MI0.getOperand(i); |
1817 | 0 | const MachineOperand &MO1 = MI1.getOperand(i); |
1818 | 0 | if (!MO0.isIdenticalTo(MO1)) |
1819 | 0 | return false; |
1820 | 0 | } |
1821 | 0 | return true; |
1822 | 7.46k | } |
1823 | 7.46k | |
1824 | 7.46k | return MI0.isIdenticalTo(MI1, MachineInstr::IgnoreVRegDefs); |
1825 | 7.46k | } |
1826 | | |
1827 | | /// areLoadsFromSameBasePtr - This is used by the pre-regalloc scheduler to |
1828 | | /// determine if two loads are loading from the same base address. It should |
1829 | | /// only return true if the base pointers are the same and the only differences |
1830 | | /// between the two addresses is the offset. It also returns the offsets by |
1831 | | /// reference. |
1832 | | /// |
1833 | | /// FIXME: remove this in favor of the MachineInstr interface once pre-RA-sched |
1834 | | /// is permanently disabled. |
1835 | | bool ARMBaseInstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, |
1836 | | int64_t &Offset1, |
1837 | 328k | int64_t &Offset2) const { |
1838 | 328k | // Don't worry about Thumb: just ARM and Thumb2. |
1839 | 328k | if (Subtarget.isThumb1Only()) return false28.1k ; |
1840 | 300k | |
1841 | 300k | if (!Load1->isMachineOpcode() || !Load2->isMachineOpcode()) |
1842 | 144k | return false; |
1843 | 155k | |
1844 | 155k | switch (Load1->getMachineOpcode()) { |
1845 | 155k | default: |
1846 | 27.6k | return false; |
1847 | 155k | case ARM::LDRi12: |
1848 | 128k | case ARM::LDRBi12: |
1849 | 128k | case ARM::LDRD: |
1850 | 128k | case ARM::LDRH: |
1851 | 128k | case ARM::LDRSB: |
1852 | 128k | case ARM::LDRSH: |
1853 | 128k | case ARM::VLDRD: |
1854 | 128k | case ARM::VLDRS: |
1855 | 128k | case ARM::t2LDRi8: |
1856 | 128k | case ARM::t2LDRBi8: |
1857 | 128k | case ARM::t2LDRDi8: |
1858 | 128k | case ARM::t2LDRSHi8: |
1859 | 128k | case ARM::t2LDRi12: |
1860 | 128k | case ARM::t2LDRBi12: |
1861 | 128k | case ARM::t2LDRSHi12: |
1862 | 128k | break; |
1863 | 128k | } |
1864 | 128k | |
1865 | 128k | switch (Load2->getMachineOpcode()) { |
1866 | 128k | default: |
1867 | 23.6k | return false; |
1868 | 128k | case ARM::LDRi12: |
1869 | 104k | case ARM::LDRBi12: |
1870 | 104k | case ARM::LDRD: |
1871 | 104k | case ARM::LDRH: |
1872 | 104k | case ARM::LDRSB: |
1873 | 104k | case ARM::LDRSH: |
1874 | 104k | case ARM::VLDRD: |
1875 | 104k | case ARM::VLDRS: |
1876 | 104k | case ARM::t2LDRi8: |
1877 | 104k | case ARM::t2LDRBi8: |
1878 | 104k | case ARM::t2LDRSHi8: |
1879 | 104k | case ARM::t2LDRi12: |
1880 | 104k | case ARM::t2LDRBi12: |
1881 | 104k | case ARM::t2LDRSHi12: |
1882 | 104k | break; |
1883 | 104k | } |
1884 | 104k | |
1885 | 104k | // Check if base addresses and chain operands match. |
1886 | 104k | if (Load1->getOperand(0) != Load2->getOperand(0) || |
1887 | 104k | Load1->getOperand(4) != Load2->getOperand(4)24.1k ) |
1888 | 80.2k | return false; |
1889 | 24.1k | |
1890 | 24.1k | // Index should be Reg0. |
1891 | 24.1k | if (Load1->getOperand(3) != Load2->getOperand(3)) |
1892 | 0 | return false; |
1893 | 24.1k | |
1894 | 24.1k | // Determine the offsets. |
1895 | 24.1k | if (isa<ConstantSDNode>(Load1->getOperand(1)) && |
1896 | 24.1k | isa<ConstantSDNode>(Load2->getOperand(1))24.0k ) { |
1897 | 24.0k | Offset1 = cast<ConstantSDNode>(Load1->getOperand(1))->getSExtValue(); |
1898 | 24.0k | Offset2 = cast<ConstantSDNode>(Load2->getOperand(1))->getSExtValue(); |
1899 | 24.0k | return true; |
1900 | 24.0k | } |
1901 | 84 | |
1902 | 84 | return false; |
1903 | 84 | } |
1904 | | |
1905 | | /// shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to |
1906 | | /// determine (in conjunction with areLoadsFromSameBasePtr) if two loads should |
1907 | | /// be scheduled togther. On some targets if two loads are loading from |
1908 | | /// addresses in the same cache line, it's better if they are scheduled |
1909 | | /// together. This function takes two integers that represent the load offsets |
1910 | | /// from the common base address. It returns true if it decides it's desirable |
1911 | | /// to schedule the two loads together. "NumLoads" is the number of loads that |
1912 | | /// have already been scheduled after Load1. |
1913 | | /// |
1914 | | /// FIXME: remove this in favor of the MachineInstr interface once pre-RA-sched |
1915 | | /// is permanently disabled. |
1916 | | bool ARMBaseInstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2, |
1917 | | int64_t Offset1, int64_t Offset2, |
1918 | 9.30k | unsigned NumLoads) const { |
1919 | 9.30k | // Don't worry about Thumb: just ARM and Thumb2. |
1920 | 9.30k | if (Subtarget.isThumb1Only()) return false0 ; |
1921 | 9.30k | |
1922 | 9.30k | assert(Offset2 > Offset1); |
1923 | 9.30k | |
1924 | 9.30k | if ((Offset2 - Offset1) / 8 > 64) |
1925 | 78 | return false; |
1926 | 9.22k | |
1927 | 9.22k | // Check if the machine opcodes are different. If they are different |
1928 | 9.22k | // then we consider them to not be of the same base address, |
1929 | 9.22k | // EXCEPT in the case of Thumb2 byte loads where one is LDRBi8 and the other LDRBi12. |
1930 | 9.22k | // In this case, they are considered to be the same because they are different |
1931 | 9.22k | // encoding forms of the same basic instruction. |
1932 | 9.22k | if ((Load1->getMachineOpcode() != Load2->getMachineOpcode()) && |
1933 | 9.22k | !(636 (636 Load1->getMachineOpcode() == ARM::t2LDRBi8636 && |
1934 | 636 | Load2->getMachineOpcode() == ARM::t2LDRBi123 ) || |
1935 | 636 | (633 Load1->getMachineOpcode() == ARM::t2LDRBi12633 && |
1936 | 633 | Load2->getMachineOpcode() == ARM::t2LDRBi8209 ))) |
1937 | 633 | return false; // FIXME: overly conservative? |
1938 | 8.59k | |
1939 | 8.59k | // Four loads in a row should be sufficient. |
1940 | 8.59k | if (NumLoads >= 3) |
1941 | 819 | return false; |
1942 | 7.77k | |
1943 | 7.77k | return true; |
1944 | 7.77k | } |
1945 | | |
1946 | | bool ARMBaseInstrInfo::isSchedulingBoundary(const MachineInstr &MI, |
1947 | | const MachineBasicBlock *MBB, |
1948 | 716k | const MachineFunction &MF) const { |
1949 | 716k | // Debug info is never a scheduling boundary. It's necessary to be explicit |
1950 | 716k | // due to the special treatment of IT instructions below, otherwise a |
1951 | 716k | // dbg_value followed by an IT will result in the IT instruction being |
1952 | 716k | // considered a scheduling hazard, which is wrong. It should be the actual |
1953 | 716k | // instruction preceding the dbg_value instruction(s), just like it is |
1954 | 716k | // when debug info is not present. |
1955 | 716k | if (MI.isDebugInstr()) |
1956 | 141 | return false; |
1957 | 716k | |
1958 | 716k | // Terminators and labels can't be scheduled around. |
1959 | 716k | if (MI.isTerminator() || MI.isPosition()625k ) |
1960 | 140k | return true; |
1961 | 575k | |
1962 | 575k | // Treat the start of the IT block as a scheduling boundary, but schedule |
1963 | 575k | // t2IT along with all instructions following it. |
1964 | 575k | // FIXME: This is a big hammer. But the alternative is to add all potential |
1965 | 575k | // true and anti dependencies to IT block instructions as implicit operands |
1966 | 575k | // to the t2IT instruction. The added compile time and complexity does not |
1967 | 575k | // seem worth it. |
1968 | 575k | MachineBasicBlock::const_iterator I = MI; |
1969 | 575k | // Make sure to skip any debug instructions |
1970 | 575k | while (++I != MBB->end() && I->isDebugInstr()558k ) |
1971 | 58 | ; |
1972 | 575k | if (I != MBB->end() && I->getOpcode() == ARM::t2IT558k ) |
1973 | 0 | return true; |
1974 | 575k | |
1975 | 575k | // Don't attempt to schedule around any instruction that defines |
1976 | 575k | // a stack-oriented pointer, as it's unlikely to be profitable. This |
1977 | 575k | // saves compile time, because it doesn't require every single |
1978 | 575k | // stack slot reference to depend on the instruction that does the |
1979 | 575k | // modification. |
1980 | 575k | // Calls don't actually change the stack pointer, even if they have imp-defs. |
1981 | 575k | // No ARM calling conventions change the stack pointer. (X86 calling |
1982 | 575k | // conventions sometimes do). |
1983 | 575k | if (!MI.isCall() && MI.definesRegister(ARM::SP)) |
1984 | 55.7k | return true; |
1985 | 520k | |
1986 | 520k | return false; |
1987 | 520k | } |
1988 | | |
1989 | | bool ARMBaseInstrInfo:: |
1990 | | isProfitableToIfCvt(MachineBasicBlock &MBB, |
1991 | | unsigned NumCycles, unsigned ExtraPredCycles, |
1992 | 26.9k | BranchProbability Probability) const { |
1993 | 26.9k | if (!NumCycles) |
1994 | 0 | return false; |
1995 | 26.9k | |
1996 | 26.9k | // If we are optimizing for size, see if the branch in the predecessor can be |
1997 | 26.9k | // lowered to cbn?z by the constant island lowering pass, and return false if |
1998 | 26.9k | // so. This results in a shorter instruction sequence. |
1999 | 26.9k | if (MBB.getParent()->getFunction().hasOptSize()) { |
2000 | 1.84k | MachineBasicBlock *Pred = *MBB.pred_begin(); |
2001 | 1.84k | if (!Pred->empty()) { |
2002 | 1.84k | MachineInstr *LastMI = &*Pred->rbegin(); |
2003 | 1.84k | if (LastMI->getOpcode() == ARM::t2Bcc) { |
2004 | 1.78k | const TargetRegisterInfo *TRI = &getRegisterInfo(); |
2005 | 1.78k | MachineInstr *CmpMI = findCMPToFoldIntoCBZ(LastMI, TRI); |
2006 | 1.78k | if (CmpMI) |
2007 | 481 | return false; |
2008 | 26.5k | } |
2009 | 1.84k | } |
2010 | 1.84k | } |
2011 | 26.5k | return isProfitableToIfCvt(MBB, NumCycles, ExtraPredCycles, |
2012 | 26.5k | MBB, 0, 0, Probability); |
2013 | 26.5k | } |
2014 | | |
2015 | | bool ARMBaseInstrInfo:: |
2016 | | isProfitableToIfCvt(MachineBasicBlock &TBB, |
2017 | | unsigned TCycles, unsigned TExtra, |
2018 | | MachineBasicBlock &FBB, |
2019 | | unsigned FCycles, unsigned FExtra, |
2020 | 27.3k | BranchProbability Probability) const { |
2021 | 27.3k | if (!TCycles) |
2022 | 0 | return false; |
2023 | 27.3k | |
2024 | 27.3k | // In thumb code we often end up trading one branch for a IT block, and |
2025 | 27.3k | // if we are cloning the instruction can increase code size. Prevent |
2026 | 27.3k | // blocks with multiple predecesors from being ifcvted to prevent this |
2027 | 27.3k | // cloning. |
2028 | 27.3k | if (Subtarget.isThumb2() && TBB.getParent()->getFunction().hasMinSize()26.6k ) { |
2029 | 1.38k | if (TBB.pred_size() != 1 || FBB.pred_size() != 11.19k ) |
2030 | 194 | return false; |
2031 | 27.1k | } |
2032 | 27.1k | |
2033 | 27.1k | // Attempt to estimate the relative costs of predication versus branching. |
2034 | 27.1k | // Here we scale up each component of UnpredCost to avoid precision issue when |
2035 | 27.1k | // scaling TCycles/FCycles by Probability. |
2036 | 27.1k | const unsigned ScalingUpFactor = 1024; |
2037 | 27.1k | |
2038 | 27.1k | unsigned PredCost = (TCycles + FCycles + TExtra + FExtra) * ScalingUpFactor; |
2039 | 27.1k | unsigned UnpredCost; |
2040 | 27.1k | if (!Subtarget.hasBranchPredictor()) { |
2041 | 770 | // When we don't have a branch predictor it's always cheaper to not take a |
2042 | 770 | // branch than take it, so we have to take that into account. |
2043 | 770 | unsigned NotTakenBranchCost = 1; |
2044 | 770 | unsigned TakenBranchCost = Subtarget.getMispredictionPenalty(); |
2045 | 770 | unsigned TUnpredCycles, FUnpredCycles; |
2046 | 770 | if (!FCycles) { |
2047 | 708 | // Triangle: TBB is the fallthrough |
2048 | 708 | TUnpredCycles = TCycles + NotTakenBranchCost; |
2049 | 708 | FUnpredCycles = TakenBranchCost; |
2050 | 708 | } else { |
2051 | 62 | // Diamond: TBB is the block that is branched to, FBB is the fallthrough |
2052 | 62 | TUnpredCycles = TCycles + TakenBranchCost; |
2053 | 62 | FUnpredCycles = FCycles + NotTakenBranchCost; |
2054 | 62 | // The branch at the end of FBB will disappear when it's predicated, so |
2055 | 62 | // discount it from PredCost. |
2056 | 62 | PredCost -= 1 * ScalingUpFactor; |
2057 | 62 | } |
2058 | 770 | // The total cost is the cost of each path scaled by their probabilites |
2059 | 770 | unsigned TUnpredCost = Probability.scale(TUnpredCycles * ScalingUpFactor); |
2060 | 770 | unsigned FUnpredCost = Probability.getCompl().scale(FUnpredCycles * ScalingUpFactor); |
2061 | 770 | UnpredCost = TUnpredCost + FUnpredCost; |
2062 | 770 | // When predicating assume that the first IT can be folded away but later |
2063 | 770 | // ones cost one cycle each |
2064 | 770 | if (Subtarget.isThumb2() && TCycles + FCycles > 4) { |
2065 | 213 | PredCost += ((TCycles + FCycles - 4) / 4) * ScalingUpFactor; |
2066 | 213 | } |
2067 | 26.3k | } else { |
2068 | 26.3k | unsigned TUnpredCost = Probability.scale(TCycles * ScalingUpFactor); |
2069 | 26.3k | unsigned FUnpredCost = |
2070 | 26.3k | Probability.getCompl().scale(FCycles * ScalingUpFactor); |
2071 | 26.3k | UnpredCost = TUnpredCost + FUnpredCost; |
2072 | 26.3k | UnpredCost += 1 * ScalingUpFactor; // The branch itself |
2073 | 26.3k | UnpredCost += Subtarget.getMispredictionPenalty() * ScalingUpFactor / 10; |
2074 | 26.3k | } |
2075 | 27.1k | |
2076 | 27.1k | return PredCost <= UnpredCost; |
2077 | 27.1k | } |
2078 | | |
2079 | | bool |
2080 | | ARMBaseInstrInfo::isProfitableToUnpredicate(MachineBasicBlock &TMBB, |
2081 | 219 | MachineBasicBlock &FMBB) const { |
2082 | 219 | // Reduce false anti-dependencies to let the target's out-of-order execution |
2083 | 219 | // engine do its thing. |
2084 | 219 | return Subtarget.isProfitableToUnpredicate(); |
2085 | 219 | } |
2086 | | |
2087 | | /// getInstrPredicate - If instruction is predicated, returns its predicate |
2088 | | /// condition, otherwise returns AL. It also returns the condition code |
2089 | | /// register by reference. |
2090 | | ARMCC::CondCodes llvm::getInstrPredicate(const MachineInstr &MI, |
2091 | 1.07M | unsigned &PredReg) { |
2092 | 1.07M | int PIdx = MI.findFirstPredOperandIdx(); |
2093 | 1.07M | if (PIdx == -1) { |
2094 | 150k | PredReg = 0; |
2095 | 150k | return ARMCC::AL; |
2096 | 150k | } |
2097 | 927k | |
2098 | 927k | PredReg = MI.getOperand(PIdx+1).getReg(); |
2099 | 927k | return (ARMCC::CondCodes)MI.getOperand(PIdx).getImm(); |
2100 | 927k | } |
2101 | | |
2102 | 0 | unsigned llvm::getMatchingCondBranchOpcode(unsigned Opc) { |
2103 | 0 | if (Opc == ARM::B) |
2104 | 0 | return ARM::Bcc; |
2105 | 0 | if (Opc == ARM::tB) |
2106 | 0 | return ARM::tBcc; |
2107 | 0 | if (Opc == ARM::t2B) |
2108 | 0 | return ARM::t2Bcc; |
2109 | 0 | |
2110 | 0 | llvm_unreachable("Unknown unconditional branch opcode!"); |
2111 | 0 | } |
2112 | | |
2113 | | MachineInstr *ARMBaseInstrInfo::commuteInstructionImpl(MachineInstr &MI, |
2114 | | bool NewMI, |
2115 | | unsigned OpIdx1, |
2116 | 46.7k | unsigned OpIdx2) const { |
2117 | 46.7k | switch (MI.getOpcode()) { |
2118 | 46.7k | case ARM::MOVCCr: |
2119 | 8.27k | case ARM::t2MOVCCr: { |
2120 | 8.27k | // MOVCC can be commuted by inverting the condition. |
2121 | 8.27k | unsigned PredReg = 0; |
2122 | 8.27k | ARMCC::CondCodes CC = getInstrPredicate(MI, PredReg); |
2123 | 8.27k | // MOVCC AL can't be inverted. Shouldn't happen. |
2124 | 8.27k | if (CC == ARMCC::AL || PredReg != ARM::CPSR) |
2125 | 0 | return nullptr; |
2126 | 8.27k | MachineInstr *CommutedMI = |
2127 | 8.27k | TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2); |
2128 | 8.27k | if (!CommutedMI) |
2129 | 0 | return nullptr; |
2130 | 8.27k | // After swapping the MOVCC operands, also invert the condition. |
2131 | 8.27k | CommutedMI->getOperand(CommutedMI->findFirstPredOperandIdx()) |
2132 | 8.27k | .setImm(ARMCC::getOppositeCondition(CC)); |
2133 | 8.27k | return CommutedMI; |
2134 | 8.27k | } |
2135 | 38.4k | } |
2136 | 38.4k | return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2); |
2137 | 38.4k | } |
2138 | | |
2139 | | /// Identify instructions that can be folded into a MOVCC instruction, and |
2140 | | /// return the defining instruction. |
2141 | | MachineInstr * |
2142 | | ARMBaseInstrInfo::canFoldIntoMOVCC(unsigned Reg, const MachineRegisterInfo &MRI, |
2143 | 6.62k | const TargetInstrInfo *TII) const { |
2144 | 6.62k | if (!TargetRegisterInfo::isVirtualRegister(Reg)) |
2145 | 0 | return nullptr; |
2146 | 6.62k | if (!MRI.hasOneNonDBGUse(Reg)) |
2147 | 3.70k | return nullptr; |
2148 | 2.91k | MachineInstr *MI = MRI.getVRegDef(Reg); |
2149 | 2.91k | if (!MI) |
2150 | 0 | return nullptr; |
2151 | 2.91k | // Check if MI can be predicated and folded into the MOVCC. |
2152 | 2.91k | if (!isPredicable(*MI)) |
2153 | 637 | return nullptr; |
2154 | 2.28k | // Check if MI has any non-dead defs or physreg uses. This also detects |
2155 | 2.28k | // predicated instructions which will be reading CPSR. |
2156 | 10.8k | for (unsigned i = 1, e = MI->getNumOperands(); 2.28k i != e; ++i8.55k ) { |
2157 | 9.21k | const MachineOperand &MO = MI->getOperand(i); |
2158 | 9.21k | // Reject frame index operands, PEI can't handle the predicated pseudos. |
2159 | 9.21k | if (MO.isFI() || MO.isCPI()9.13k || MO.isJTI()9.09k ) |
2160 | 117 | return nullptr; |
2161 | 9.09k | if (!MO.isReg()) |
2162 | 3.09k | continue; |
2163 | 6.00k | // MI can't have any tied operands, that would conflict with predication. |
2164 | 6.00k | if (MO.isTied()) |
2165 | 258 | return nullptr; |
2166 | 5.75k | if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) |
2167 | 232 | return nullptr; |
2168 | 5.51k | if (MO.isDef() && !MO.isDead()58 ) |
2169 | 58 | return nullptr; |
2170 | 5.51k | } |
2171 | 2.28k | bool DontMoveAcrossStores = true; |
2172 | 1.61k | if (!MI->isSafeToMove(/* AliasAnalysis = */ nullptr, DontMoveAcrossStores)) |
2173 | 45 | return nullptr; |
2174 | 1.57k | return MI; |
2175 | 1.57k | } |
2176 | | |
2177 | | bool ARMBaseInstrInfo::analyzeSelect(const MachineInstr &MI, |
2178 | | SmallVectorImpl<MachineOperand> &Cond, |
2179 | | unsigned &TrueOp, unsigned &FalseOp, |
2180 | 3.86k | bool &Optimizable) const { |
2181 | 3.86k | assert((MI.getOpcode() == ARM::MOVCCr || MI.getOpcode() == ARM::t2MOVCCr) && |
2182 | 3.86k | "Unknown select instruction"); |
2183 | 3.86k | // MOVCC operands: |
2184 | 3.86k | // 0: Def. |
2185 | 3.86k | // 1: True use. |
2186 | 3.86k | // 2: False use. |
2187 | 3.86k | // 3: Condition code. |
2188 | 3.86k | // 4: CPSR use. |
2189 | 3.86k | TrueOp = 1; |
2190 | 3.86k | FalseOp = 2; |
2191 | 3.86k | Cond.push_back(MI.getOperand(3)); |
2192 | 3.86k | Cond.push_back(MI.getOperand(4)); |
2193 | 3.86k | // We can always fold a def. |
2194 | 3.86k | Optimizable = true; |
2195 | 3.86k | return false; |
2196 | 3.86k | } |
2197 | | |
2198 | | MachineInstr * |
2199 | | ARMBaseInstrInfo::optimizeSelect(MachineInstr &MI, |
2200 | | SmallPtrSetImpl<MachineInstr *> &SeenMIs, |
2201 | 3.86k | bool PreferFalse) const { |
2202 | 3.86k | assert((MI.getOpcode() == ARM::MOVCCr || MI.getOpcode() == ARM::t2MOVCCr) && |
2203 | 3.86k | "Unknown select instruction"); |
2204 | 3.86k | MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); |
2205 | 3.86k | MachineInstr *DefMI = canFoldIntoMOVCC(MI.getOperand(2).getReg(), MRI, this); |
2206 | 3.86k | bool Invert = !DefMI; |
2207 | 3.86k | if (!DefMI) |
2208 | 2.76k | DefMI = canFoldIntoMOVCC(MI.getOperand(1).getReg(), MRI, this); |
2209 | 3.86k | if (!DefMI) |
2210 | 2.29k | return nullptr; |
2211 | 1.57k | |
2212 | 1.57k | // Find new register class to use. |
2213 | 1.57k | MachineOperand FalseReg = MI.getOperand(Invert ? 2474 : 11.09k ); |
2214 | 1.57k | unsigned DestReg = MI.getOperand(0).getReg(); |
2215 | 1.57k | const TargetRegisterClass *PreviousClass = MRI.getRegClass(FalseReg.getReg()); |
2216 | 1.57k | if (!MRI.constrainRegClass(DestReg, PreviousClass)) |
2217 | 0 | return nullptr; |
2218 | 1.57k | |
2219 | 1.57k | // Create a new predicated version of DefMI. |
2220 | 1.57k | // Rfalse is the first use. |
2221 | 1.57k | MachineInstrBuilder NewMI = |
2222 | 1.57k | BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), DefMI->getDesc(), DestReg); |
2223 | 1.57k | |
2224 | 1.57k | // Copy all the DefMI operands, excluding its (null) predicate. |
2225 | 1.57k | const MCInstrDesc &DefDesc = DefMI->getDesc(); |
2226 | 1.57k | for (unsigned i = 1, e = DefDesc.getNumOperands(); |
2227 | 4.37k | i != e && !DefDesc.OpInfo[i].isPredicate(); ++i2.80k ) |
2228 | 2.80k | NewMI.add(DefMI->getOperand(i)); |
2229 | 1.57k | |
2230 | 1.57k | unsigned CondCode = MI.getOperand(3).getImm(); |
2231 | 1.57k | if (Invert) |
2232 | 474 | NewMI.addImm(ARMCC::getOppositeCondition(ARMCC::CondCodes(CondCode))); |
2233 | 1.09k | else |
2234 | 1.09k | NewMI.addImm(CondCode); |
2235 | 1.57k | NewMI.add(MI.getOperand(4)); |
2236 | 1.57k | |
2237 | 1.57k | // DefMI is not the -S version that sets CPSR, so add an optional %noreg. |
2238 | 1.57k | if (NewMI->hasOptionalDef()) |
2239 | 1.44k | NewMI.add(condCodeOp()); |
2240 | 1.57k | |
2241 | 1.57k | // The output register value when the predicate is false is an implicit |
2242 | 1.57k | // register operand tied to the first def. |
2243 | 1.57k | // The tie makes the register allocator ensure the FalseReg is allocated the |
2244 | 1.57k | // same register as operand 0. |
2245 | 1.57k | FalseReg.setImplicit(); |
2246 | 1.57k | NewMI.add(FalseReg); |
2247 | 1.57k | NewMI->tieOperands(0, NewMI->getNumOperands() - 1); |
2248 | 1.57k | |
2249 | 1.57k | // Update SeenMIs set: register newly created MI and erase removed DefMI. |
2250 | 1.57k | SeenMIs.insert(NewMI); |
2251 | 1.57k | SeenMIs.erase(DefMI); |
2252 | 1.57k | |
2253 | 1.57k | // If MI is inside a loop, and DefMI is outside the loop, then kill flags on |
2254 | 1.57k | // DefMI would be invalid when tranferred inside the loop. Checking for a |
2255 | 1.57k | // loop is expensive, but at least remove kill flags if they are in different |
2256 | 1.57k | // BBs. |
2257 | 1.57k | if (DefMI->getParent() != MI.getParent()) |
2258 | 77 | NewMI->clearKillInfo(); |
2259 | 1.57k | |
2260 | 1.57k | // The caller will erase MI, but not DefMI. |
2261 | 1.57k | DefMI->eraseFromParent(); |
2262 | 1.57k | return NewMI; |
2263 | 1.57k | } |
2264 | | |
2265 | | /// Map pseudo instructions that imply an 'S' bit onto real opcodes. Whether the |
2266 | | /// instruction is encoded with an 'S' bit is determined by the optional CPSR |
2267 | | /// def operand. |
2268 | | /// |
2269 | | /// This will go away once we can teach tblgen how to set the optional CPSR def |
2270 | | /// operand itself. |
2271 | | struct AddSubFlagsOpcodePair { |
2272 | | uint16_t PseudoOpc; |
2273 | | uint16_t MachineOpc; |
2274 | | }; |
2275 | | |
2276 | | static const AddSubFlagsOpcodePair AddSubFlagsOpcodeMap[] = { |
2277 | | {ARM::ADDSri, ARM::ADDri}, |
2278 | | {ARM::ADDSrr, ARM::ADDrr}, |
2279 | | {ARM::ADDSrsi, ARM::ADDrsi}, |
2280 | | {ARM::ADDSrsr, ARM::ADDrsr}, |
2281 | | |
2282 | | {ARM::SUBSri, ARM::SUBri}, |
2283 | | {ARM::SUBSrr, ARM::SUBrr}, |
2284 | | {ARM::SUBSrsi, ARM::SUBrsi}, |
2285 | | {ARM::SUBSrsr, ARM::SUBrsr}, |
2286 | | |
2287 | | {ARM::RSBSri, ARM::RSBri}, |
2288 | | {ARM::RSBSrsi, ARM::RSBrsi}, |
2289 | | {ARM::RSBSrsr, ARM::RSBrsr}, |
2290 | | |
2291 | | {ARM::tADDSi3, ARM::tADDi3}, |
2292 | | {ARM::tADDSi8, ARM::tADDi8}, |
2293 | | {ARM::tADDSrr, ARM::tADDrr}, |
2294 | | {ARM::tADCS, ARM::tADC}, |
2295 | | |
2296 | | {ARM::tSUBSi3, ARM::tSUBi3}, |
2297 | | {ARM::tSUBSi8, ARM::tSUBi8}, |
2298 | | {ARM::tSUBSrr, ARM::tSUBrr}, |
2299 | | {ARM::tSBCS, ARM::tSBC}, |
2300 | | {ARM::tRSBS, ARM::tRSB}, |
2301 | | |
2302 | | {ARM::t2ADDSri, ARM::t2ADDri}, |
2303 | | {ARM::t2ADDSrr, ARM::t2ADDrr}, |
2304 | | {ARM::t2ADDSrs, ARM::t2ADDrs}, |
2305 | | |
2306 | | {ARM::t2SUBSri, ARM::t2SUBri}, |
2307 | | {ARM::t2SUBSrr, ARM::t2SUBrr}, |
2308 | | {ARM::t2SUBSrs, ARM::t2SUBrs}, |
2309 | | |
2310 | | {ARM::t2RSBSri, ARM::t2RSBri}, |
2311 | | {ARM::t2RSBSrs, ARM::t2RSBrs}, |
2312 | | }; |
2313 | | |
2314 | 1.80M | unsigned llvm::convertAddSubFlagsOpcode(unsigned OldOpc) { |
2315 | 52.4M | for (unsigned i = 0, e = array_lengthof(AddSubFlagsOpcodeMap); i != e; ++i50.6M ) |
2316 | 50.6M | if (OldOpc == AddSubFlagsOpcodeMap[i].PseudoOpc) |
2317 | 3.94k | return AddSubFlagsOpcodeMap[i].MachineOpc; |
2318 | 1.80M | return 01.80M ; |
2319 | 1.80M | } |
2320 | | |
2321 | | void llvm::emitARMRegPlusImmediate(MachineBasicBlock &MBB, |
2322 | | MachineBasicBlock::iterator &MBBI, |
2323 | | const DebugLoc &dl, unsigned DestReg, |
2324 | | unsigned BaseReg, int NumBytes, |
2325 | | ARMCC::CondCodes Pred, unsigned PredReg, |
2326 | | const ARMBaseInstrInfo &TII, |
2327 | 2.88k | unsigned MIFlags) { |
2328 | 2.88k | if (NumBytes == 0 && DestReg != BaseReg311 ) { |
2329 | 311 | BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), DestReg) |
2330 | 311 | .addReg(BaseReg, RegState::Kill) |
2331 | 311 | .add(predOps(Pred, PredReg)) |
2332 | 311 | .add(condCodeOp()) |
2333 | 311 | .setMIFlags(MIFlags); |
2334 | 311 | return; |
2335 | 311 | } |
2336 | 2.57k | |
2337 | 2.57k | bool isSub = NumBytes < 0; |
2338 | 2.57k | if (isSub) NumBytes = -NumBytes1.17k ; |
2339 | 2.57k | |
2340 | 5.20k | while (NumBytes) { |
2341 | 2.63k | unsigned RotAmt = ARM_AM::getSOImmValRotate(NumBytes); |
2342 | 2.63k | unsigned ThisVal = NumBytes & ARM_AM::rotr32(0xFF, RotAmt); |
2343 | 2.63k | assert(ThisVal && "Didn't extract field correctly"); |
2344 | 2.63k | |
2345 | 2.63k | // We will handle these bits from offset, clear them. |
2346 | 2.63k | NumBytes &= ~ThisVal; |
2347 | 2.63k | |
2348 | 2.63k | assert(ARM_AM::getSOImmVal(ThisVal) != -1 && "Bit extraction didn't work?"); |
2349 | 2.63k | |
2350 | 2.63k | // Build the new ADD / SUB. |
2351 | 2.63k | unsigned Opc = isSub ? ARM::SUBri1.21k : ARM::ADDri1.41k ; |
2352 | 2.63k | BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg) |
2353 | 2.63k | .addReg(BaseReg, RegState::Kill) |
2354 | 2.63k | .addImm(ThisVal) |
2355 | 2.63k | .add(predOps(Pred, PredReg)) |
2356 | 2.63k | .add(condCodeOp()) |
2357 | 2.63k | .setMIFlags(MIFlags); |
2358 | 2.63k | BaseReg = DestReg; |
2359 | 2.63k | } |
2360 | 2.57k | } |
2361 | | |
2362 | | bool llvm::tryFoldSPUpdateIntoPushPop(const ARMSubtarget &Subtarget, |
2363 | | MachineFunction &MF, MachineInstr *MI, |
2364 | 13.8k | unsigned NumBytes) { |
2365 | 13.8k | // This optimisation potentially adds lots of load and store |
2366 | 13.8k | // micro-operations, it's only really a great benefit to code-size. |
2367 | 13.8k | if (!Subtarget.hasMinSize()) |
2368 | 13.1k | return false; |
2369 | 682 | |
2370 | 682 | // If only one register is pushed/popped, LLVM can use an LDR/STR |
2371 | 682 | // instead. We can't modify those so make sure we're dealing with an |
2372 | 682 | // instruction we understand. |
2373 | 682 | bool IsPop = isPopOpcode(MI->getOpcode()); |
2374 | 682 | bool IsPush = isPushOpcode(MI->getOpcode()); |
2375 | 682 | if (!IsPush && !IsPop342 ) |
2376 | 4 | return false; |
2377 | 678 | |
2378 | 678 | bool IsVFPPushPop = MI->getOpcode() == ARM::VSTMDDB_UPD || |
2379 | 678 | MI->getOpcode() == ARM::VLDMDIA_UPD663 ; |
2380 | 678 | bool IsT1PushPop = MI->getOpcode() == ARM::tPUSH || |
2381 | 678 | MI->getOpcode() == ARM::tPOP504 || |
2382 | 678 | MI->getOpcode() == ARM::tPOP_RET500 ; |
2383 | 678 | |
2384 | 678 | assert((IsT1PushPop || (MI->getOperand(0).getReg() == ARM::SP && |
2385 | 678 | MI->getOperand(1).getReg() == ARM::SP)) && |
2386 | 678 | "trying to fold sp update into non-sp-updating push/pop"); |
2387 | 678 | |
2388 | 678 | // The VFP push & pop act on D-registers, so we can only fold an adjustment |
2389 | 678 | // by a multiple of 8 bytes in correctly. Similarly rN is 4-bytes. Don't try |
2390 | 678 | // if this is violated. |
2391 | 678 | if (NumBytes % (IsVFPPushPop ? 830 : 4648 ) != 0) |
2392 | 2 | return false; |
2393 | 676 | |
2394 | 676 | // ARM and Thumb2 push/pop insts have explicit "sp, sp" operands (+ |
2395 | 676 | // pred) so the list starts at 4. Thumb1 starts after the predicate. |
2396 | 676 | int RegListIdx = IsT1PushPop ? 2352 : 4324 ; |
2397 | 676 | |
2398 | 676 | // Calculate the space we'll need in terms of registers. |
2399 | 676 | unsigned RegsNeeded; |
2400 | 676 | const TargetRegisterClass *RegClass; |
2401 | 676 | if (IsVFPPushPop) { |
2402 | 28 | RegsNeeded = NumBytes / 8; |
2403 | 28 | RegClass = &ARM::DPRRegClass; |
2404 | 648 | } else { |
2405 | 648 | RegsNeeded = NumBytes / 4; |
2406 | 648 | RegClass = &ARM::GPRRegClass; |
2407 | 648 | } |
2408 | 676 | |
2409 | 676 | // We're going to have to strip all list operands off before |
2410 | 676 | // re-adding them since the order matters, so save the existing ones |
2411 | 676 | // for later. |
2412 | 676 | SmallVector<MachineOperand, 4> RegList; |
2413 | 676 | |
2414 | 676 | // We're also going to need the first register transferred by this |
2415 | 676 | // instruction, which won't necessarily be the first register in the list. |
2416 | 676 | unsigned FirstRegEnc = -1; |
2417 | 676 | |
2418 | 676 | const TargetRegisterInfo *TRI = MF.getRegInfo().getTargetRegisterInfo(); |
2419 | 4.60k | for (int i = MI->getNumOperands() - 1; i >= RegListIdx; --i3.93k ) { |
2420 | 3.93k | MachineOperand &MO = MI->getOperand(i); |
2421 | 3.93k | RegList.push_back(MO); |
2422 | 3.93k | |
2423 | 3.93k | if (MO.isReg() && TRI->getEncodingValue(MO.getReg()) < FirstRegEnc) |
2424 | 2.24k | FirstRegEnc = TRI->getEncodingValue(MO.getReg()); |
2425 | 3.93k | } |
2426 | 676 | |
2427 | 676 | const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF); |
2428 | 676 | |
2429 | 676 | // Now try to find enough space in the reglist to allocate NumBytes. |
2430 | 2.08k | for (int CurRegEnc = FirstRegEnc - 1; CurRegEnc >= 0 && RegsNeeded1.66k ; |
2431 | 1.40k | --CurRegEnc1.40k ) { |
2432 | 1.40k | unsigned CurReg = RegClass->getRegister(CurRegEnc); |
2433 | 1.40k | if (IsT1PushPop && CurReg > ARM::R7356 ) |
2434 | 10 | continue; |
2435 | 1.39k | if (!IsPop) { |
2436 | 849 | // Pushing any register is completely harmless, mark the register involved |
2437 | 849 | // as undef since we don't care about its value and must not restore it |
2438 | 849 | // during stack unwinding. |
2439 | 849 | RegList.push_back(MachineOperand::CreateReg(CurReg, false, false, |
2440 | 849 | false, false, true)); |
2441 | 849 | --RegsNeeded; |
2442 | 849 | continue; |
2443 | 849 | } |
2444 | 550 | |
2445 | 550 | // However, we can only pop an extra register if it's not live. For |
2446 | 550 | // registers live within the function we might clobber a return value |
2447 | 550 | // register; the other way a register can be live here is if it's |
2448 | 550 | // callee-saved. |
2449 | 550 | if (isCalleeSavedRegister(CurReg, CSRegs) || |
2450 | 550 | MI->getParent()->computeRegisterLiveness(TRI, CurReg, MI) != |
2451 | 355 | MachineBasicBlock::LQR_Dead) { |
2452 | 217 | // VFP pops don't allow holes in the register list, so any skip is fatal |
2453 | 217 | // for our transformation. GPR pops do, so we should just keep looking. |
2454 | 217 | if (IsVFPPushPop) |
2455 | 4 | return false; |
2456 | 213 | else |
2457 | 213 | continue; |
2458 | 333 | } |
2459 | 333 | |
2460 | 333 | // Mark the unimportant registers as <def,dead> in the POP. |
2461 | 333 | RegList.push_back(MachineOperand::CreateReg(CurReg, true, false, false, |
2462 | 333 | true)); |
2463 | 333 | --RegsNeeded; |
2464 | 333 | } |
2465 | 676 | |
2466 | 676 | if (672 RegsNeeded > 0672 ) |
2467 | 273 | return false; |
2468 | 399 | |
2469 | 399 | // Finally we know we can profitably perform the optimisation so go |
2470 | 399 | // ahead: strip all existing registers off and add them back again |
2471 | 399 | // in the right order. |
2472 | 2.43k | for (int i = MI->getNumOperands() - 1; 399 i >= RegListIdx; --i2.03k ) |
2473 | 2.03k | MI->RemoveOperand(i); |
2474 | 399 | |
2475 | 399 | // Add the complete list back in. |
2476 | 399 | MachineInstrBuilder MIB(MF, &*MI); |
2477 | 3.03k | for (int i = RegList.size() - 1; i >= 0; --i2.63k ) |
2478 | 2.63k | MIB.add(RegList[i]); |
2479 | 399 | |
2480 | 399 | return true; |
2481 | 399 | } |
2482 | | |
2483 | | bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx, |
2484 | | unsigned FrameReg, int &Offset, |
2485 | 8.02k | const ARMBaseInstrInfo &TII) { |
2486 | 8.02k | unsigned Opcode = MI.getOpcode(); |
2487 | 8.02k | const MCInstrDesc &Desc = MI.getDesc(); |
2488 | 8.02k | unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask); |
2489 | 8.02k | bool isSub = false; |
2490 | 8.02k | |
2491 | 8.02k | // Memory operands in inline assembly always use AddrMode2. |
2492 | 8.02k | if (Opcode == ARM::INLINEASM || Opcode == ARM::INLINEASM_BR) |
2493 | 0 | AddrMode = ARMII::AddrMode2; |
2494 | 8.02k | |
2495 | 8.02k | if (Opcode == ARM::ADDri) { |
2496 | 1.31k | Offset += MI.getOperand(FrameRegIdx+1).getImm(); |
2497 | 1.31k | if (Offset == 0) { |
2498 | 209 | // Turn it into a move. |
2499 | 209 | MI.setDesc(TII.get(ARM::MOVr)); |
2500 | 209 | MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); |
2501 | 209 | MI.RemoveOperand(FrameRegIdx+1); |
2502 | 209 | Offset = 0; |
2503 | 209 | return true; |
2504 | 1.10k | } else if (Offset < 0) { |
2505 | 52 | Offset = -Offset; |
2506 | 52 | isSub = true; |
2507 | 52 | MI.setDesc(TII.get(ARM::SUBri)); |
2508 | 52 | } |
2509 | 1.31k | |
2510 | 1.31k | // Common case: small offset, fits into instruction. |
2511 | 1.31k | if (1.10k ARM_AM::getSOImmVal(Offset) != -11.10k ) { |
2512 | 789 | // Replace the FrameIndex with sp / fp |
2513 | 789 | MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); |
2514 | 789 | MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Offset); |
2515 | 789 | Offset = 0; |
2516 | 789 | return true; |
2517 | 789 | } |
2518 | 314 | |
2519 | 314 | // Otherwise, pull as much of the immedidate into this ADDri/SUBri |
2520 | 314 | // as possible. |
2521 | 314 | unsigned RotAmt = ARM_AM::getSOImmValRotate(Offset); |
2522 | 314 | unsigned ThisImmVal = Offset & ARM_AM::rotr32(0xFF, RotAmt); |
2523 | 314 | |
2524 | 314 | // We will handle these bits from offset, clear them. |
2525 | 314 | Offset &= ~ThisImmVal; |
2526 | 314 | |
2527 | 314 | // Get the properly encoded SOImmVal field. |
2528 | 314 | assert(ARM_AM::getSOImmVal(ThisImmVal) != -1 && |
2529 | 314 | "Bit extraction didn't work?"); |
2530 | 314 | MI.getOperand(FrameRegIdx+1).ChangeToImmediate(ThisImmVal); |
2531 | 6.71k | } else { |
2532 | 6.71k | unsigned ImmIdx = 0; |
2533 | 6.71k | int InstrOffs = 0; |
2534 | 6.71k | unsigned NumBits = 0; |
2535 | 6.71k | unsigned Scale = 1; |
2536 | 6.71k | switch (AddrMode) { |
2537 | 6.71k | case ARMII::AddrMode_i12: |
2538 | 5.97k | ImmIdx = FrameRegIdx + 1; |
2539 | 5.97k | InstrOffs = MI.getOperand(ImmIdx).getImm(); |
2540 | 5.97k | NumBits = 12; |
2541 | 5.97k | break; |
2542 | 6.71k | case ARMII::AddrMode2: |
2543 | 0 | ImmIdx = FrameRegIdx+2; |
2544 | 0 | InstrOffs = ARM_AM::getAM2Offset(MI.getOperand(ImmIdx).getImm()); |
2545 | 0 | if (ARM_AM::getAM2Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub) |
2546 | 0 | InstrOffs *= -1; |
2547 | 0 | NumBits = 12; |
2548 | 0 | break; |
2549 | 6.71k | case ARMII::AddrMode3: |
2550 | 100 | ImmIdx = FrameRegIdx+2; |
2551 | 100 | InstrOffs = ARM_AM::getAM3Offset(MI.getOperand(ImmIdx).getImm()); |
2552 | 100 | if (ARM_AM::getAM3Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub) |
2553 | 0 | InstrOffs *= -1; |
2554 | 100 | NumBits = 8; |
2555 | 100 | break; |
2556 | 6.71k | case ARMII::AddrMode4: |
2557 | 36 | case ARMII::AddrMode6: |
2558 | 36 | // Can't fold any offset even if it's zero. |
2559 | 36 | return false; |
2560 | 579 | case ARMII::AddrMode5: |
2561 | 579 | ImmIdx = FrameRegIdx+1; |
2562 | 579 | InstrOffs = ARM_AM::getAM5Offset(MI.getOperand(ImmIdx).getImm()); |
2563 | 579 | if (ARM_AM::getAM5Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub) |
2564 | 0 | InstrOffs *= -1; |
2565 | 579 | NumBits = 8; |
2566 | 579 | Scale = 4; |
2567 | 579 | break; |
2568 | 36 | case ARMII::AddrMode5FP16: |
2569 | 25 | ImmIdx = FrameRegIdx+1; |
2570 | 25 | InstrOffs = ARM_AM::getAM5Offset(MI.getOperand(ImmIdx).getImm()); |
2571 | 25 | if (ARM_AM::getAM5Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub) |
2572 | 0 | InstrOffs *= -1; |
2573 | 25 | NumBits = 8; |
2574 | 25 | Scale = 2; |
2575 | 25 | break; |
2576 | 36 | case ARMII::AddrModeT2_i7: |
2577 | 0 | case ARMII::AddrModeT2_i7s2: |
2578 | 0 | case ARMII::AddrModeT2_i7s4: |
2579 | 0 | ImmIdx = FrameRegIdx+1; |
2580 | 0 | InstrOffs = MI.getOperand(ImmIdx).getImm(); |
2581 | 0 | NumBits = 7; |
2582 | 0 | Scale = (AddrMode == ARMII::AddrModeT2_i7s2 ? 2 : |
2583 | 0 | AddrMode == ARMII::AddrModeT2_i7s4 ? 4 : 1); |
2584 | 0 | break; |
2585 | 0 | default: |
2586 | 0 | llvm_unreachable("Unsupported addressing mode!"); |
2587 | 6.67k | } |
2588 | 6.67k | |
2589 | 6.67k | Offset += InstrOffs * Scale; |
2590 | 6.67k | assert((Offset & (Scale-1)) == 0 && "Can't encode this offset!"); |
2591 | 6.67k | if (Offset < 0) { |
2592 | 181 | Offset = -Offset; |
2593 | 181 | isSub = true; |
2594 | 181 | } |
2595 | 6.67k | |
2596 | 6.67k | // Attempt to fold address comp. if opcode has offset bits |
2597 | 6.67k | if (NumBits > 0) { |
2598 | 6.67k | // Common case: small offset, fits into instruction. |
2599 | 6.67k | MachineOperand &ImmOp = MI.getOperand(ImmIdx); |
2600 | 6.67k | int ImmedOffset = Offset / Scale; |
2601 | 6.67k | unsigned Mask = (1 << NumBits) - 1; |
2602 | 6.67k | if ((unsigned)Offset <= Mask * Scale) { |
2603 | 6.63k | // Replace the FrameIndex with sp |
2604 | 6.63k | MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); |
2605 | 6.63k | // FIXME: When addrmode2 goes away, this will simplify (like the |
2606 | 6.63k | // T2 version), as the LDR.i12 versions don't need the encoding |
2607 | 6.63k | // tricks for the offset value. |
2608 | 6.63k | if (isSub) { |
2609 | 180 | if (AddrMode == ARMII::AddrMode_i12) |
2610 | 133 | ImmedOffset = -ImmedOffset; |
2611 | 47 | else |
2612 | 47 | ImmedOffset |= 1 << NumBits; |
2613 | 180 | } |
2614 | 6.63k | ImmOp.ChangeToImmediate(ImmedOffset); |
2615 | 6.63k | Offset = 0; |
2616 | 6.63k | return true; |
2617 | 6.63k | } |
2618 | 47 | |
2619 | 47 | // Otherwise, it didn't fit. Pull in what we can to simplify the immed. |
2620 | 47 | ImmedOffset = ImmedOffset & Mask; |
2621 | 47 | if (isSub) { |
2622 | 1 | if (AddrMode == ARMII::AddrMode_i12) |
2623 | 1 | ImmedOffset = -ImmedOffset; |
2624 | 0 | else |
2625 | 0 | ImmedOffset |= 1 << NumBits; |
2626 | 1 | } |
2627 | 47 | ImmOp.ChangeToImmediate(ImmedOffset); |
2628 | 47 | Offset &= ~(Mask*Scale); |
2629 | 47 | } |
2630 | 6.67k | } |
2631 | 8.02k | |
2632 | 8.02k | Offset = (isSub) 361 ? -Offset2 : Offset359 ; |
2633 | 361 | return Offset == 0; |
2634 | 8.02k | } |
2635 | | |
2636 | | /// analyzeCompare - For a comparison instruction, return the source registers |
2637 | | /// in SrcReg and SrcReg2 if having two register operands, and the value it |
2638 | | /// compares against in CmpValue. Return true if the comparison instruction |
2639 | | /// can be analyzed. |
2640 | | bool ARMBaseInstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg, |
2641 | | unsigned &SrcReg2, int &CmpMask, |
2642 | 883k | int &CmpValue) const { |
2643 | 883k | switch (MI.getOpcode()) { |
2644 | 883k | default: break712k ; |
2645 | 883k | case ARM::CMPri: |
2646 | 135k | case ARM::t2CMPri: |
2647 | 135k | case ARM::tCMPi8: |
2648 | 135k | SrcReg = MI.getOperand(0).getReg(); |
2649 | 135k | SrcReg2 = 0; |
2650 | 135k | CmpMask = ~0; |
2651 | 135k | CmpValue = MI.getOperand(1).getImm(); |
2652 | 135k | return true; |
2653 | 135k | case ARM::CMPrr: |
2654 | 36.2k | case ARM::t2CMPrr: |
2655 | 36.2k | case ARM::tCMPr: |
2656 | 36.2k | SrcReg = MI.getOperand(0).getReg(); |
2657 | 36.2k | SrcReg2 = MI.getOperand(1).getReg(); |
2658 | 36.2k | CmpMask = ~0; |
2659 | 36.2k | CmpValue = 0; |
2660 | 36.2k | return true; |
2661 | 36.2k | case ARM::TSTri: |
2662 | 317 | case ARM::t2TSTri: |
2663 | 317 | SrcReg = MI.getOperand(0).getReg(); |
2664 | 317 | SrcReg2 = 0; |
2665 | 317 | CmpMask = MI.getOperand(1).getImm(); |
2666 | 317 | CmpValue = 0; |
2667 | 317 | return true; |
2668 | 712k | } |
2669 | 712k | |
2670 | 712k | return false; |
2671 | 712k | } |
2672 | | |
2673 | | /// isSuitableForMask - Identify a suitable 'and' instruction that |
2674 | | /// operates on the given source register and applies the same mask |
2675 | | /// as a 'tst' instruction. Provide a limited look-through for copies. |
2676 | | /// When successful, MI will hold the found instruction. |
2677 | | static bool isSuitableForMask(MachineInstr *&MI, unsigned SrcReg, |
2678 | 133 | int CmpMask, bool CommonUse) { |
2679 | 133 | switch (MI->getOpcode()) { |
2680 | 133 | case ARM::ANDri: |
2681 | 2 | case ARM::t2ANDri: |
2682 | 2 | if (CmpMask != MI->getOperand(2).getImm()) |
2683 | 0 | return false; |
2684 | 2 | if (SrcReg == MI->getOperand(CommonUse ? 1 : 00 ).getReg()) |
2685 | 2 | return true; |
2686 | 0 | break; |
2687 | 131 | } |
2688 | 131 | |
2689 | 131 | return false; |
2690 | 131 | } |
2691 | | |
2692 | | /// getSwappedCondition - assume the flags are set by MI(a,b), return |
2693 | | /// the condition code if we modify the instructions such that flags are |
2694 | | /// set by MI(b,a). |
2695 | 9 | inline static ARMCC::CondCodes getSwappedCondition(ARMCC::CondCodes CC) { |
2696 | 9 | switch (CC) { |
2697 | 9 | default: return ARMCC::AL0 ; |
2698 | 9 | case ARMCC::EQ: return ARMCC::EQ0 ; |
2699 | 9 | case ARMCC::NE: return ARMCC::NE0 ; |
2700 | 9 | case ARMCC::HS: return ARMCC::LS0 ; |
2701 | 9 | case ARMCC::LO: return ARMCC::HI; |
2702 | 9 | case ARMCC::HI: return ARMCC::LO0 ; |
2703 | 9 | case ARMCC::LS: return ARMCC::HS0 ; |
2704 | 9 | case ARMCC::GE: return ARMCC::LE0 ; |
2705 | 9 | case ARMCC::LT: return ARMCC::GT0 ; |
2706 | 9 | case ARMCC::GT: return ARMCC::LT0 ; |
2707 | 9 | case ARMCC::LE: return ARMCC::GE0 ; |
2708 | 9 | } |
2709 | 9 | } |
2710 | | |
2711 | | /// getCmpToAddCondition - assume the flags are set by CMP(a,b), return |
2712 | | /// the condition code if we modify the instructions such that flags are |
2713 | | /// set by ADD(a,b,X). |
2714 | 143 | inline static ARMCC::CondCodes getCmpToAddCondition(ARMCC::CondCodes CC) { |
2715 | 143 | switch (CC) { |
2716 | 143 | default: return ARMCC::AL18 ; |
2717 | 143 | case ARMCC::HS: return ARMCC::LO85 ; |
2718 | 143 | case ARMCC::LO: return ARMCC::HS30 ; |
2719 | 143 | case ARMCC::VS: return ARMCC::VS2 ; |
2720 | 143 | case ARMCC::VC: return ARMCC::VC8 ; |
2721 | 143 | } |
2722 | 143 | } |
2723 | | |
2724 | | /// isRedundantFlagInstr - check whether the first instruction, whose only |
2725 | | /// purpose is to update flags, can be made redundant. |
2726 | | /// CMPrr can be made redundant by SUBrr if the operands are the same. |
2727 | | /// CMPri can be made redundant by SUBri if the operands are the same. |
2728 | | /// CMPrr(r0, r1) can be made redundant by ADDr[ri](r0, r1, X). |
2729 | | /// This function can be extended later on. |
2730 | | inline static bool isRedundantFlagInstr(const MachineInstr *CmpI, |
2731 | | unsigned SrcReg, unsigned SrcReg2, |
2732 | | int ImmValue, const MachineInstr *OI, |
2733 | 161k | bool &IsThumb1) { |
2734 | 161k | if ((CmpI->getOpcode() == ARM::CMPrr || CmpI->getOpcode() == ARM::t2CMPrr160k ) && |
2735 | 161k | (42.9k OI->getOpcode() == ARM::SUBrr42.9k || OI->getOpcode() == ARM::t2SUBrr42.8k ) && |
2736 | 161k | (415 (415 OI->getOperand(1).getReg() == SrcReg415 && |
2737 | 415 | OI->getOperand(2).getReg() == SrcReg2117 ) || |
2738 | 415 | (298 OI->getOperand(1).getReg() == SrcReg2298 && |
2739 | 298 | OI->getOperand(2).getReg() == SrcReg32 ))) { |
2740 | 149 | IsThumb1 = false; |
2741 | 149 | return true; |
2742 | 149 | } |
2743 | 161k | |
2744 | 161k | if (CmpI->getOpcode() == ARM::tCMPr && OI->getOpcode() == ARM::tSUBrr721 && |
2745 | 161k | (4 (4 OI->getOperand(2).getReg() == SrcReg4 && |
2746 | 4 | OI->getOperand(3).getReg() == SrcReg23 ) || |
2747 | 4 | (1 OI->getOperand(2).getReg() == SrcReg21 && |
2748 | 4 | OI->getOperand(3).getReg() == SrcReg1 ))) { |
2749 | 4 | IsThumb1 = true; |
2750 | 4 | return true; |
2751 | 4 | } |
2752 | 161k | |
2753 | 161k | if ((CmpI->getOpcode() == ARM::CMPri || CmpI->getOpcode() == ARM::t2CMPri159k ) && |
2754 | 161k | (114k OI->getOpcode() == ARM::SUBri114k || OI->getOpcode() == ARM::t2SUBri114k ) && |
2755 | 161k | OI->getOperand(1).getReg() == SrcReg2.66k && |
2756 | 161k | OI->getOperand(2).getImm() == ImmValue300 ) { |
2757 | 31 | IsThumb1 = false; |
2758 | 31 | return true; |
2759 | 31 | } |
2760 | 161k | |
2761 | 161k | if (CmpI->getOpcode() == ARM::tCMPi8 && |
2762 | 161k | (2.94k OI->getOpcode() == ARM::tSUBi82.94k || OI->getOpcode() == ARM::tSUBi32.93k ) && |
2763 | 161k | OI->getOperand(2).getReg() == SrcReg178 && |
2764 | 161k | OI->getOperand(3).getImm() == ImmValue2 ) { |
2765 | 2 | IsThumb1 = true; |
2766 | 2 | return true; |
2767 | 2 | } |
2768 | 161k | |
2769 | 161k | if ((CmpI->getOpcode() == ARM::CMPrr || CmpI->getOpcode() == ARM::t2CMPrr160k ) && |
2770 | 161k | (42.7k OI->getOpcode() == ARM::ADDrr42.7k || OI->getOpcode() == ARM::t2ADDrr42.7k || |
2771 | 42.7k | OI->getOpcode() == ARM::ADDri40.7k || OI->getOpcode() == ARM::t2ADDri40.7k ) && |
2772 | 161k | OI->getOperand(0).isReg()5.60k && OI->getOperand(1).isReg()5.60k && |
2773 | 161k | OI->getOperand(0).getReg() == SrcReg5.44k && |
2774 | 161k | OI->getOperand(1).getReg() == SrcReg23.27k ) { |
2775 | 214 | IsThumb1 = false; |
2776 | 214 | return true; |
2777 | 214 | } |
2778 | 161k | |
2779 | 161k | if (CmpI->getOpcode() == ARM::tCMPr && |
2780 | 161k | (717 OI->getOpcode() == ARM::tADDi3717 || OI->getOpcode() == ARM::tADDi8618 || |
2781 | 717 | OI->getOpcode() == ARM::tADDrr612 ) && |
2782 | 161k | OI->getOperand(0).getReg() == SrcReg153 && |
2783 | 161k | OI->getOperand(2).getReg() == SrcReg265 ) { |
2784 | 14 | IsThumb1 = true; |
2785 | 14 | return true; |
2786 | 14 | } |
2787 | 161k | |
2788 | 161k | return false; |
2789 | 161k | } |
2790 | | |
2791 | 27.0k | static bool isOptimizeCompareCandidate(MachineInstr *MI, bool &IsThumb1) { |
2792 | 27.0k | switch (MI->getOpcode()) { |
2793 | 27.0k | default: return false22.9k ; |
2794 | 27.0k | case ARM::tLSLri: |
2795 | 244 | case ARM::tLSRri: |
2796 | 244 | case ARM::tLSLrr: |
2797 | 244 | case ARM::tLSRrr: |
2798 | 244 | case ARM::tSUBrr: |
2799 | 244 | case ARM::tADDrr: |
2800 | 244 | case ARM::tADDi3: |
2801 | 244 | case ARM::tADDi8: |
2802 | 244 | case ARM::tSUBi3: |
2803 | 244 | case ARM::tSUBi8: |
2804 | 244 | case ARM::tMUL: |
2805 | 244 | case ARM::tADC: |
2806 | 244 | case ARM::tSBC: |
2807 | 244 | case ARM::tRSB: |
2808 | 244 | case ARM::tAND: |
2809 | 244 | case ARM::tORR: |
2810 | 244 | case ARM::tEOR: |
2811 | 244 | case ARM::tBIC: |
2812 | 244 | case ARM::tMVN: |
2813 | 244 | case ARM::tASRri: |
2814 | 244 | case ARM::tASRrr: |
2815 | 244 | case ARM::tROR: |
2816 | 244 | IsThumb1 = true; |
2817 | 244 | LLVM_FALLTHROUGH; |
2818 | 4.09k | case ARM::RSBrr: |
2819 | 4.09k | case ARM::RSBri: |
2820 | 4.09k | case ARM::RSCrr: |
2821 | 4.09k | case ARM::RSCri: |
2822 | 4.09k | case ARM::ADDrr: |
2823 | 4.09k | case ARM::ADDri: |
2824 | 4.09k | case ARM::ADCrr: |
2825 | 4.09k | case ARM::ADCri: |
2826 | 4.09k | case ARM::SUBrr: |
2827 | 4.09k | case ARM::SUBri: |
2828 | 4.09k | case ARM::SBCrr: |
2829 | 4.09k | case ARM::SBCri: |
2830 | 4.09k | case ARM::t2RSBri: |
2831 | 4.09k | case ARM::t2ADDrr: |
2832 | 4.09k | case ARM::t2ADDri: |
2833 | 4.09k | case ARM::t2ADCrr: |
2834 | 4.09k | case ARM::t2ADCri: |
2835 | 4.09k | case ARM::t2SUBrr: |
2836 | 4.09k | case ARM::t2SUBri: |
2837 | 4.09k | case ARM::t2SBCrr: |
2838 | 4.09k | case ARM::t2SBCri: |
2839 | 4.09k | case ARM::ANDrr: |
2840 | 4.09k | case ARM::ANDri: |
2841 | 4.09k | case ARM::t2ANDrr: |
2842 | 4.09k | case ARM::t2ANDri: |
2843 | 4.09k | case ARM::ORRrr: |
2844 | 4.09k | case ARM::ORRri: |
2845 | 4.09k | case ARM::t2ORRrr: |
2846 | 4.09k | case ARM::t2ORRri: |
2847 | 4.09k | case ARM::EORrr: |
2848 | 4.09k | case ARM::EORri: |
2849 | 4.09k | case ARM::t2EORrr: |
2850 | 4.09k | case ARM::t2EORri: |
2851 | 4.09k | case ARM::t2LSRri: |
2852 | 4.09k | case ARM::t2LSRrr: |
2853 | 4.09k | case ARM::t2LSLri: |
2854 | 4.09k | case ARM::t2LSLrr: |
2855 | 4.09k | return true; |
2856 | 27.0k | } |
2857 | 27.0k | } |
2858 | | |
2859 | | /// optimizeCompareInstr - Convert the instruction supplying the argument to the |
2860 | | /// comparison into one that sets the zero bit in the flags register; |
2861 | | /// Remove a redundant Compare instruction if an earlier instruction can set the |
2862 | | /// flags in the same way as Compare. |
2863 | | /// E.g. SUBrr(r1,r2) and CMPrr(r1,r2). We also handle the case where two |
2864 | | /// operands are swapped: SUBrr(r1,r2) and CMPrr(r2,r1), by updating the |
2865 | | /// condition code of instructions which use the flags. |
2866 | | bool ARMBaseInstrInfo::optimizeCompareInstr( |
2867 | | MachineInstr &CmpInstr, unsigned SrcReg, unsigned SrcReg2, int CmpMask, |
2868 | 49.8k | int CmpValue, const MachineRegisterInfo *MRI) const { |
2869 | 49.8k | // Get the unique definition of SrcReg. |
2870 | 49.8k | MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg); |
2871 | 49.8k | if (!MI) return false0 ; |
2872 | 49.8k | |
2873 | 49.8k | // Masked compares sometimes use the same register as the corresponding 'and'. |
2874 | 49.8k | if (CmpMask != ~0) { |
2875 | 63 | if (!isSuitableForMask(MI, SrcReg, CmpMask, false) || isPredicated(*MI)0 ) { |
2876 | 63 | MI = nullptr; |
2877 | 63 | for (MachineRegisterInfo::use_instr_iterator |
2878 | 63 | UI = MRI->use_instr_begin(SrcReg), UE = MRI->use_instr_end(); |
2879 | 199 | UI != UE; ++UI136 ) { |
2880 | 138 | if (UI->getParent() != CmpInstr.getParent()) |
2881 | 68 | continue; |
2882 | 70 | MachineInstr *PotentialAND = &*UI; |
2883 | 70 | if (!isSuitableForMask(PotentialAND, SrcReg, CmpMask, true) || |
2884 | 70 | isPredicated(*PotentialAND)2 ) |
2885 | 68 | continue; |
2886 | 2 | MI = PotentialAND; |
2887 | 2 | break; |
2888 | 2 | } |
2889 | 63 | if (!MI) return false61 ; |
2890 | 49.8k | } |
2891 | 63 | } |
2892 | 49.8k | |
2893 | 49.8k | // Get ready to iterate backward from CmpInstr. |
2894 | 49.8k | MachineBasicBlock::iterator I = CmpInstr, E = MI, |
2895 | 49.8k | B = CmpInstr.getParent()->begin(); |
2896 | 49.8k | |
2897 | 49.8k | // Early exit if CmpInstr is at the beginning of the BB. |
2898 | 49.8k | if (I == B) return false5.25k ; |
2899 | 44.5k | |
2900 | 44.5k | // There are two possible candidates which can be changed to set CPSR: |
2901 | 44.5k | // One is MI, the other is a SUB or ADD instruction. |
2902 | 44.5k | // For CMPrr(r1,r2), we are looking for SUB(r1,r2), SUB(r2,r1), or |
2903 | 44.5k | // ADDr[ri](r1, r2, X). |
2904 | 44.5k | // For CMPri(r1, CmpValue), we are looking for SUBri(r1, CmpValue). |
2905 | 44.5k | MachineInstr *SubAdd = nullptr; |
2906 | 44.5k | if (SrcReg2 != 0) |
2907 | 9.75k | // MI is not a candidate for CMPrr. |
2908 | 9.75k | MI = nullptr; |
2909 | 34.8k | else if (MI->getParent() != CmpInstr.getParent() || CmpValue != 032.2k ) { |
2910 | 7.72k | // Conservatively refuse to convert an instruction which isn't in the same |
2911 | 7.72k | // BB as the comparison. |
2912 | 7.72k | // For CMPri w/ CmpValue != 0, a SubAdd may still be a candidate. |
2913 | 7.72k | // Thus we cannot return here. |
2914 | 7.72k | if (CmpInstr.getOpcode() == ARM::CMPri || |
2915 | 7.72k | CmpInstr.getOpcode() == ARM::t2CMPri7.57k || |
2916 | 7.72k | CmpInstr.getOpcode() == ARM::tCMPi8243 ) |
2917 | 7.72k | MI = nullptr; |
2918 | 0 | else |
2919 | 0 | return false; |
2920 | 44.5k | } |
2921 | 44.5k | |
2922 | 44.5k | bool IsThumb1 = false; |
2923 | 44.5k | if (MI && !isOptimizeCompareCandidate(MI, IsThumb1)27.0k ) |
2924 | 22.9k | return false; |
2925 | 21.5k | |
2926 | 21.5k | // We also want to do this peephole for cases like this: if (a*b == 0), |
2927 | 21.5k | // and optimise away the CMP instruction from the generated code sequence: |
2928 | 21.5k | // MULS, MOVS, MOVS, CMP. Here the MOVS instructions load the boolean values |
2929 | 21.5k | // resulting from the select instruction, but these MOVS instructions for |
2930 | 21.5k | // Thumb1 (V6M) are flag setting and are thus preventing this optimisation. |
2931 | 21.5k | // However, if we only have MOVS instructions in between the CMP and the |
2932 | 21.5k | // other instruction (the MULS in this example), then the CPSR is dead so we |
2933 | 21.5k | // can safely reorder the sequence into: MOVS, MOVS, MULS, CMP. We do this |
2934 | 21.5k | // reordering and then continue the analysis hoping we can eliminate the |
2935 | 21.5k | // CMP. This peephole works on the vregs, so is still in SSA form. As a |
2936 | 21.5k | // consequence, the movs won't redefine/kill the MUL operands which would |
2937 | 21.5k | // make this reordering illegal. |
2938 | 21.5k | const TargetRegisterInfo *TRI = &getRegisterInfo(); |
2939 | 21.5k | if (MI && IsThumb14.09k ) { |
2940 | 244 | --I; |
2941 | 244 | if (I != E && !MI->readsRegister(ARM::CPSR, TRI)29 ) { |
2942 | 25 | bool CanReorder = true; |
2943 | 29 | for (; I != E; --I4 ) { |
2944 | 26 | if (I->getOpcode() != ARM::tMOVi8) { |
2945 | 22 | CanReorder = false; |
2946 | 22 | break; |
2947 | 22 | } |
2948 | 26 | } |
2949 | 25 | if (CanReorder) { |
2950 | 3 | MI = MI->removeFromParent(); |
2951 | 3 | E = CmpInstr; |
2952 | 3 | CmpInstr.getParent()->insert(E, MI); |
2953 | 3 | } |
2954 | 25 | } |
2955 | 244 | I = CmpInstr; |
2956 | 244 | E = MI; |
2957 | 244 | } |
2958 | 21.5k | |
2959 | 21.5k | // Check that CPSR isn't set between the comparison instruction and the one we |
2960 | 21.5k | // want to change. At the same time, search for SubAdd. |
2961 | 21.5k | bool SubAddIsThumb1 = false; |
2962 | 39.8k | do { |
2963 | 39.8k | const MachineInstr &Instr = *--I; |
2964 | 39.8k | |
2965 | 39.8k | // Check whether CmpInstr can be made redundant by the current instruction. |
2966 | 39.8k | if (isRedundantFlagInstr(&CmpInstr, SrcReg, SrcReg2, CmpValue, &Instr, |
2967 | 39.8k | SubAddIsThumb1)) { |
2968 | 218 | SubAdd = &*I; |
2969 | 218 | break; |
2970 | 218 | } |
2971 | 39.5k | |
2972 | 39.5k | // Allow E (which was initially MI) to be SubAdd but do not search before E. |
2973 | 39.5k | if (I == E) |
2974 | 16.5k | break; |
2975 | 23.0k | |
2976 | 23.0k | if (Instr.modifiesRegister(ARM::CPSR, TRI) || |
2977 | 23.0k | Instr.readsRegister(ARM::CPSR, TRI)22.2k ) |
2978 | 1.32k | // This instruction modifies or uses CPSR after the one we want to |
2979 | 1.32k | // change. We can't do this transformation. |
2980 | 1.32k | return false; |
2981 | 21.7k | |
2982 | 21.7k | if (I == B) { |
2983 | 3.46k | // In some cases, we scan the use-list of an instruction for an AND; |
2984 | 3.46k | // that AND is in the same BB, but may not be scheduled before the |
2985 | 3.46k | // corresponding TST. In that case, bail out. |
2986 | 3.46k | // |
2987 | 3.46k | // FIXME: We could try to reschedule the AND. |
2988 | 3.46k | return false; |
2989 | 3.46k | } |
2990 | 18.2k | } while (true); |
2991 | 21.5k | |
2992 | 21.5k | // Return false if no candidates exist. |
2993 | 21.5k | if (16.7k !MI16.7k && !SubAdd12.8k ) |
2994 | 12.5k | return false; |
2995 | 4.20k | |
2996 | 4.20k | // If we found a SubAdd, use it as it will be closer to the CMP |
2997 | 4.20k | if (SubAdd) { |
2998 | 218 | MI = SubAdd; |
2999 | 218 | IsThumb1 = SubAddIsThumb1; |
3000 | 218 | } |
3001 | 4.20k | |
3002 | 4.20k | // We can't use a predicated instruction - it doesn't always write the flags. |
3003 | 4.20k | if (isPredicated(*MI)) |
3004 | 7 | return false; |
3005 | 4.19k | |
3006 | 4.19k | // Scan forward for the use of CPSR |
3007 | 4.19k | // When checking against MI: if it's a conditional code that requires |
3008 | 4.19k | // checking of the V bit or C bit, then this is not safe to do. |
3009 | 4.19k | // It is safe to remove CmpInstr if CPSR is redefined or killed. |
3010 | 4.19k | // If we are done with the basic block, we need to check whether CPSR is |
3011 | 4.19k | // live-out. |
3012 | 4.19k | SmallVector<std::pair<MachineOperand*, ARMCC::CondCodes>, 4> |
3013 | 4.19k | OperandsToUpdate; |
3014 | 4.19k | bool isSafe = false; |
3015 | 4.19k | I = CmpInstr; |
3016 | 4.19k | E = CmpInstr.getParent()->end(); |
3017 | 13.7k | while (!isSafe && ++I != E13.3k ) { |
3018 | 9.62k | const MachineInstr &Instr = *I; |
3019 | 9.62k | for (unsigned IO = 0, EO = Instr.getNumOperands(); |
3020 | 42.8k | !isSafe && IO != EO; ++IO33.2k ) { |
3021 | 33.7k | const MachineOperand &MO = Instr.getOperand(IO); |
3022 | 33.7k | if (MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR)34 ) { |
3023 | 34 | isSafe = true; |
3024 | 34 | break; |
3025 | 34 | } |
3026 | 33.6k | if (!MO.isReg() || MO.getReg() != ARM::CPSR17.2k ) |
3027 | 28.9k | continue; |
3028 | 4.71k | if (MO.isDef()) { |
3029 | 324 | isSafe = true; |
3030 | 324 | break; |
3031 | 324 | } |
3032 | 4.38k | // Condition code is after the operand before CPSR except for VSELs. |
3033 | 4.38k | ARMCC::CondCodes CC; |
3034 | 4.38k | bool IsInstrVSel = true; |
3035 | 4.38k | switch (Instr.getOpcode()) { |
3036 | 4.38k | default: |
3037 | 4.38k | IsInstrVSel = false; |
3038 | 4.38k | CC = (ARMCC::CondCodes)Instr.getOperand(IO - 1).getImm(); |
3039 | 4.38k | break; |
3040 | 4.38k | case ARM::VSELEQD: |
3041 | 4 | case ARM::VSELEQS: |
3042 | 4 | CC = ARMCC::EQ; |
3043 | 4 | break; |
3044 | 4 | case ARM::VSELGTD: |
3045 | 0 | case ARM::VSELGTS: |
3046 | 0 | CC = ARMCC::GT; |
3047 | 0 | break; |
3048 | 0 | case ARM::VSELGED: |
3049 | 0 | case ARM::VSELGES: |
3050 | 0 | CC = ARMCC::GE; |
3051 | 0 | break; |
3052 | 0 | case ARM::VSELVSS: |
3053 | 0 | case ARM::VSELVSD: |
3054 | 0 | CC = ARMCC::VS; |
3055 | 0 | break; |
3056 | 4.38k | } |
3057 | 4.38k | |
3058 | 4.38k | if (SubAdd) { |
3059 | 222 | // If we have SUB(r1, r2) and CMP(r2, r1), the condition code based |
3060 | 222 | // on CMP needs to be updated to be based on SUB. |
3061 | 222 | // If we have ADD(r1, r2, X) and CMP(r1, r2), the condition code also |
3062 | 222 | // needs to be modified. |
3063 | 222 | // Push the condition code operands to OperandsToUpdate. |
3064 | 222 | // If it is safe to remove CmpInstr, the condition code of these |
3065 | 222 | // operands will be modified. |
3066 | 222 | unsigned Opc = SubAdd->getOpcode(); |
3067 | 222 | bool IsSub = Opc == ARM::SUBrr || Opc == ARM::t2SUBrr200 || |
3068 | 222 | Opc == ARM::SUBri164 || Opc == ARM::t2SUBri161 || |
3069 | 222 | Opc == ARM::tSUBrr148 || Opc == ARM::tSUBi3145 || |
3070 | 222 | Opc == ARM::tSUBi8144 ; |
3071 | 222 | unsigned OpI = Opc != ARM::tSUBrr ? 1219 : 23 ; |
3072 | 222 | if (!IsSub || |
3073 | 222 | (79 SrcReg2 != 079 && SubAdd->getOperand(OpI).getReg() == SrcReg261 && |
3074 | 152 | SubAdd->getOperand(OpI + 1).getReg() == SrcReg9 )) { |
3075 | 152 | // VSel doesn't support condition code update. |
3076 | 152 | if (IsInstrVSel) |
3077 | 0 | return false; |
3078 | 152 | // Ensure we can swap the condition. |
3079 | 152 | ARMCC::CondCodes NewCC = (IsSub ? getSwappedCondition(CC)9 : getCmpToAddCondition(CC)143 ); |
3080 | 152 | if (NewCC == ARMCC::AL) |
3081 | 18 | return false; |
3082 | 134 | OperandsToUpdate.push_back( |
3083 | 134 | std::make_pair(&((*I).getOperand(IO - 1)), NewCC)); |
3084 | 134 | } |
3085 | 4.16k | } else { |
3086 | 4.16k | // No SubAdd, so this is x = <op> y, z; cmp x, 0. |
3087 | 4.16k | switch (CC) { |
3088 | 4.16k | case ARMCC::EQ: // Z |
3089 | 4.08k | case ARMCC::NE: // Z |
3090 | 4.08k | case ARMCC::MI: // N |
3091 | 4.08k | case ARMCC::PL: // N |
3092 | 4.08k | case ARMCC::AL: // none |
3093 | 4.08k | // CPSR can be used multiple times, we should continue. |
3094 | 4.08k | break; |
3095 | 4.08k | case ARMCC::HS: // C |
3096 | 77 | case ARMCC::LO: // C |
3097 | 77 | case ARMCC::VS: // V |
3098 | 77 | case ARMCC::VC: // V |
3099 | 77 | case ARMCC::HI: // C Z |
3100 | 77 | case ARMCC::LS: // C Z |
3101 | 77 | case ARMCC::GE: // N V |
3102 | 77 | case ARMCC::LT: // N V |
3103 | 77 | case ARMCC::GT: // Z N V |
3104 | 77 | case ARMCC::LE: // Z N V |
3105 | 77 | // The instruction uses the V bit or C bit which is not safe. |
3106 | 77 | return false; |
3107 | 4.16k | } |
3108 | 4.16k | } |
3109 | 4.38k | } |
3110 | 9.62k | } |
3111 | 4.19k | |
3112 | 4.19k | // If CPSR is not killed nor re-defined, we should check whether it is |
3113 | 4.19k | // live-out. If it is live-out, do not optimize. |
3114 | 4.19k | if (4.10k !isSafe4.10k ) { |
3115 | 3.74k | MachineBasicBlock *MBB = CmpInstr.getParent(); |
3116 | 3.74k | for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), |
3117 | 10.7k | SE = MBB->succ_end(); SI != SE; ++SI7.01k ) |
3118 | 7.01k | if ((*SI)->isLiveIn(ARM::CPSR)) |
3119 | 0 | return false; |
3120 | 3.74k | } |
3121 | 4.10k | |
3122 | 4.10k | // Toggle the optional operand to CPSR (if it exists - in Thumb1 we always |
3123 | 4.10k | // set CPSR so this is represented as an explicit output) |
3124 | 4.10k | if (!IsThumb1) { |
3125 | 3.86k | MI->getOperand(5).setReg(ARM::CPSR); |
3126 | 3.86k | MI->getOperand(5).setIsDef(true); |
3127 | 3.86k | } |
3128 | 4.10k | assert(!isPredicated(*MI) && "Can't use flags from predicated instruction"); |
3129 | 4.10k | CmpInstr.eraseFromParent(); |
3130 | 4.10k | |
3131 | 4.10k | // Modify the condition code of operands in OperandsToUpdate. |
3132 | 4.10k | // Since we have SUB(r1, r2) and CMP(r2, r1), the condition code needs to |
3133 | 4.10k | // be changed from r2 > r1 to r1 < r2, from r2 < r1 to r1 > r2, etc. |
3134 | 4.23k | for (unsigned i = 0, e = OperandsToUpdate.size(); i < e; i++134 ) |
3135 | 134 | OperandsToUpdate[i].first->setImm(OperandsToUpdate[i].second); |
3136 | 4.10k | |
3137 | 4.10k | MI->clearRegisterDeads(ARM::CPSR); |
3138 | 4.10k | |
3139 | 4.10k | return true; |
3140 | 4.10k | } |
3141 | | |
3142 | 1.06M | bool ARMBaseInstrInfo::shouldSink(const MachineInstr &MI) const { |
3143 | 1.06M | // Do not sink MI if it might be used to optimize a redundant compare. |
3144 | 1.06M | // We heuristically only look at the instruction immediately following MI to |
3145 | 1.06M | // avoid potentially searching the entire basic block. |
3146 | 1.06M | if (isPredicated(MI)) |
3147 | 126k | return true; |
3148 | 938k | MachineBasicBlock::const_iterator Next = &MI; |
3149 | 938k | ++Next; |
3150 | 938k | unsigned SrcReg, SrcReg2; |
3151 | 938k | int CmpMask, CmpValue; |
3152 | 938k | bool IsThumb1; |
3153 | 938k | if (Next != MI.getParent()->end() && |
3154 | 938k | analyzeCompare(*Next, SrcReg, SrcReg2, CmpMask, CmpValue)831k && |
3155 | 938k | isRedundantFlagInstr(&*Next, SrcReg, SrcReg2, CmpValue, &MI, IsThumb1)121k ) |
3156 | 196 | return false; |
3157 | 938k | return true; |
3158 | 938k | } |
3159 | | |
3160 | | bool ARMBaseInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, |
3161 | | unsigned Reg, |
3162 | 71.0k | MachineRegisterInfo *MRI) const { |
3163 | 71.0k | // Fold large immediates into add, sub, or, xor. |
3164 | 71.0k | unsigned DefOpc = DefMI.getOpcode(); |
3165 | 71.0k | if (DefOpc != ARM::t2MOVi32imm && DefOpc != ARM::MOVi32imm69.5k ) |
3166 | 68.6k | return false; |
3167 | 2.38k | if (!DefMI.getOperand(1).isImm()) |
3168 | 901 | // Could be t2MOVi32imm @xx |
3169 | 901 | return false; |
3170 | 1.48k | |
3171 | 1.48k | if (!MRI->hasOneNonDBGUse(Reg)) |
3172 | 404 | return false; |
3173 | 1.07k | |
3174 | 1.07k | const MCInstrDesc &DefMCID = DefMI.getDesc(); |
3175 | 1.07k | if (DefMCID.hasOptionalDef()) { |
3176 | 0 | unsigned NumOps = DefMCID.getNumOperands(); |
3177 | 0 | const MachineOperand &MO = DefMI.getOperand(NumOps - 1); |
3178 | 0 | if (MO.getReg() == ARM::CPSR && !MO.isDead()) |
3179 | 0 | // If DefMI defines CPSR and it is not dead, it's obviously not safe |
3180 | 0 | // to delete DefMI. |
3181 | 0 | return false; |
3182 | 1.07k | } |
3183 | 1.07k | |
3184 | 1.07k | const MCInstrDesc &UseMCID = UseMI.getDesc(); |
3185 | 1.07k | if (UseMCID.hasOptionalDef()) { |
3186 | 520 | unsigned NumOps = UseMCID.getNumOperands(); |
3187 | 520 | if (UseMI.getOperand(NumOps - 1).getReg() == ARM::CPSR) |
3188 | 33 | // If the instruction sets the flag, do not attempt this optimization |
3189 | 33 | // since it may change the semantics of the code. |
3190 | 33 | return false; |
3191 | 1.04k | } |
3192 | 1.04k | |
3193 | 1.04k | unsigned UseOpc = UseMI.getOpcode(); |
3194 | 1.04k | unsigned NewUseOpc = 0; |
3195 | 1.04k | uint32_t ImmVal = (uint32_t)DefMI.getOperand(1).getImm(); |
3196 | 1.04k | uint32_t SOImmValV1 = 0, SOImmValV2 = 0; |
3197 | 1.04k | bool Commute = false; |
3198 | 1.04k | switch (UseOpc) { |
3199 | 1.04k | default: return false743 ; |
3200 | 1.04k | case ARM::SUBrr: |
3201 | 300 | case ARM::ADDrr: |
3202 | 300 | case ARM::ORRrr: |
3203 | 300 | case ARM::EORrr: |
3204 | 300 | case ARM::t2SUBrr: |
3205 | 300 | case ARM::t2ADDrr: |
3206 | 300 | case ARM::t2ORRrr: |
3207 | 300 | case ARM::t2EORrr: { |
3208 | 300 | Commute = UseMI.getOperand(2).getReg() != Reg; |
3209 | 300 | switch (UseOpc) { |
3210 | 300 | default: break0 ; |
3211 | 300 | case ARM::ADDrr: |
3212 | 9 | case ARM::SUBrr: |
3213 | 9 | if (UseOpc == ARM::SUBrr && Commute2 ) |
3214 | 0 | return false; |
3215 | 9 | |
3216 | 9 | // ADD/SUB are special because they're essentially the same operation, so |
3217 | 9 | // we can handle a larger range of immediates. |
3218 | 9 | if (ARM_AM::isSOImmTwoPartVal(ImmVal)) |
3219 | 3 | NewUseOpc = UseOpc == ARM::ADDrr ? ARM::ADDri2 : ARM::SUBri1 ; |
3220 | 6 | else if (ARM_AM::isSOImmTwoPartVal(-ImmVal)) { |
3221 | 2 | ImmVal = -ImmVal; |
3222 | 2 | NewUseOpc = UseOpc == ARM::ADDrr ? ARM::SUBri1 : ARM::ADDri1 ; |
3223 | 2 | } else |
3224 | 4 | return false; |
3225 | 5 | SOImmValV1 = (uint32_t)ARM_AM::getSOImmTwoPartFirst(ImmVal); |
3226 | 5 | SOImmValV2 = (uint32_t)ARM_AM::getSOImmTwoPartSecond(ImmVal); |
3227 | 5 | break; |
3228 | 5 | case ARM::ORRrr: |
3229 | 2 | case ARM::EORrr: |
3230 | 2 | if (!ARM_AM::isSOImmTwoPartVal(ImmVal)) |
3231 | 1 | return false; |
3232 | 1 | SOImmValV1 = (uint32_t)ARM_AM::getSOImmTwoPartFirst(ImmVal); |
3233 | 1 | SOImmValV2 = (uint32_t)ARM_AM::getSOImmTwoPartSecond(ImmVal); |
3234 | 1 | switch (UseOpc) { |
3235 | 1 | default: break0 ; |
3236 | 1 | case ARM::ORRrr: NewUseOpc = ARM::ORRri; break; |
3237 | 1 | case ARM::EORrr: NewUseOpc = ARM::EORri; break0 ; |
3238 | 1 | } |
3239 | 1 | break; |
3240 | 279 | case ARM::t2ADDrr: |
3241 | 279 | case ARM::t2SUBrr: |
3242 | 279 | if (UseOpc == ARM::t2SUBrr && Commute3 ) |
3243 | 1 | return false; |
3244 | 278 | |
3245 | 278 | // ADD/SUB are special because they're essentially the same operation, so |
3246 | 278 | // we can handle a larger range of immediates. |
3247 | 278 | if (ARM_AM::isT2SOImmTwoPartVal(ImmVal)) |
3248 | 261 | NewUseOpc = UseOpc == ARM::t2ADDrr ? ARM::t2ADDri260 : ARM::t2SUBri1 ; |
3249 | 17 | else if (ARM_AM::isT2SOImmTwoPartVal(-ImmVal)) { |
3250 | 2 | ImmVal = -ImmVal; |
3251 | 2 | NewUseOpc = UseOpc == ARM::t2ADDrr ? ARM::t2SUBri1 : ARM::t2ADDri1 ; |
3252 | 2 | } else |
3253 | 15 | return false; |
3254 | 263 | SOImmValV1 = (uint32_t)ARM_AM::getT2SOImmTwoPartFirst(ImmVal); |
3255 | 263 | SOImmValV2 = (uint32_t)ARM_AM::getT2SOImmTwoPartSecond(ImmVal); |
3256 | 263 | break; |
3257 | 263 | case ARM::t2ORRrr: |
3258 | 10 | case ARM::t2EORrr: |
3259 | 10 | if (!ARM_AM::isT2SOImmTwoPartVal(ImmVal)) |
3260 | 4 | return false; |
3261 | 6 | SOImmValV1 = (uint32_t)ARM_AM::getT2SOImmTwoPartFirst(ImmVal); |
3262 | 6 | SOImmValV2 = (uint32_t)ARM_AM::getT2SOImmTwoPartSecond(ImmVal); |
3263 | 6 | switch (UseOpc) { |
3264 | 6 | default: break0 ; |
3265 | 6 | case ARM::t2ORRrr: NewUseOpc = ARM::t2ORRri; break; |
3266 | 6 | case ARM::t2EORrr: NewUseOpc = ARM::t2EORri; break0 ; |
3267 | 6 | } |
3268 | 6 | break; |
3269 | 300 | } |
3270 | 300 | } |
3271 | 275 | } |
3272 | 275 | |
3273 | 275 | unsigned OpIdx = Commute ? 20 : 1; |
3274 | 275 | unsigned Reg1 = UseMI.getOperand(OpIdx).getReg(); |
3275 | 275 | bool isKill = UseMI.getOperand(OpIdx).isKill(); |
3276 | 275 | unsigned NewReg = MRI->createVirtualRegister(MRI->getRegClass(Reg)); |
3277 | 275 | BuildMI(*UseMI.getParent(), UseMI, UseMI.getDebugLoc(), get(NewUseOpc), |
3278 | 275 | NewReg) |
3279 | 275 | .addReg(Reg1, getKillRegState(isKill)) |
3280 | 275 | .addImm(SOImmValV1) |
3281 | 275 | .add(predOps(ARMCC::AL)) |
3282 | 275 | .add(condCodeOp()); |
3283 | 275 | UseMI.setDesc(get(NewUseOpc)); |
3284 | 275 | UseMI.getOperand(1).setReg(NewReg); |
3285 | 275 | UseMI.getOperand(1).setIsKill(); |
3286 | 275 | UseMI.getOperand(2).ChangeToImmediate(SOImmValV2); |
3287 | 275 | DefMI.eraseFromParent(); |
3288 | 275 | return true; |
3289 | 275 | } |
3290 | | |
3291 | | static unsigned getNumMicroOpsSwiftLdSt(const InstrItineraryData *ItinData, |
3292 | 0 | const MachineInstr &MI) { |
3293 | 0 | switch (MI.getOpcode()) { |
3294 | 0 | default: { |
3295 | 0 | const MCInstrDesc &Desc = MI.getDesc(); |
3296 | 0 | int UOps = ItinData->getNumMicroOps(Desc.getSchedClass()); |
3297 | 0 | assert(UOps >= 0 && "bad # UOps"); |
3298 | 0 | return UOps; |
3299 | 0 | } |
3300 | 0 |
|
3301 | 0 | case ARM::LDRrs: |
3302 | 0 | case ARM::LDRBrs: |
3303 | 0 | case ARM::STRrs: |
3304 | 0 | case ARM::STRBrs: { |
3305 | 0 | unsigned ShOpVal = MI.getOperand(3).getImm(); |
3306 | 0 | bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub; |
3307 | 0 | unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); |
3308 | 0 | if (!isSub && |
3309 | 0 | (ShImm == 0 || |
3310 | 0 | ((ShImm == 1 || ShImm == 2 || ShImm == 3) && |
3311 | 0 | ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))) |
3312 | 0 | return 1; |
3313 | 0 | return 2; |
3314 | 0 | } |
3315 | 0 |
|
3316 | 0 | case ARM::LDRH: |
3317 | 0 | case ARM::STRH: { |
3318 | 0 | if (!MI.getOperand(2).getReg()) |
3319 | 0 | return 1; |
3320 | 0 | |
3321 | 0 | unsigned ShOpVal = MI.getOperand(3).getImm(); |
3322 | 0 | bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub; |
3323 | 0 | unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); |
3324 | 0 | if (!isSub && |
3325 | 0 | (ShImm == 0 || |
3326 | 0 | ((ShImm == 1 || ShImm == 2 || ShImm == 3) && |
3327 | 0 | ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))) |
3328 | 0 | return 1; |
3329 | 0 | return 2; |
3330 | 0 | } |
3331 | 0 |
|
3332 | 0 | case ARM::LDRSB: |
3333 | 0 | case ARM::LDRSH: |
3334 | 0 | return (ARM_AM::getAM3Op(MI.getOperand(3).getImm()) == ARM_AM::sub) ? 3 : 2; |
3335 | 0 |
|
3336 | 0 | case ARM::LDRSB_POST: |
3337 | 0 | case ARM::LDRSH_POST: { |
3338 | 0 | unsigned Rt = MI.getOperand(0).getReg(); |
3339 | 0 | unsigned Rm = MI.getOperand(3).getReg(); |
3340 | 0 | return (Rt == Rm) ? 4 : 3; |
3341 | 0 | } |
3342 | 0 |
|
3343 | 0 | case ARM::LDR_PRE_REG: |
3344 | 0 | case ARM::LDRB_PRE_REG: { |
3345 | 0 | unsigned Rt = MI.getOperand(0).getReg(); |
3346 | 0 | unsigned Rm = MI.getOperand(3).getReg(); |
3347 | 0 | if (Rt == Rm) |
3348 | 0 | return 3; |
3349 | 0 | unsigned ShOpVal = MI.getOperand(4).getImm(); |
3350 | 0 | bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub; |
3351 | 0 | unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); |
3352 | 0 | if (!isSub && |
3353 | 0 | (ShImm == 0 || |
3354 | 0 | ((ShImm == 1 || ShImm == 2 || ShImm == 3) && |
3355 | 0 | ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))) |
3356 | 0 | return 2; |
3357 | 0 | return 3; |
3358 | 0 | } |
3359 | 0 |
|
3360 | 0 | case ARM::STR_PRE_REG: |
3361 | 0 | case ARM::STRB_PRE_REG: { |
3362 | 0 | unsigned ShOpVal = MI.getOperand(4).getImm(); |
3363 | 0 | bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub; |
3364 | 0 | unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); |
3365 | 0 | if (!isSub && |
3366 | 0 | (ShImm == 0 || |
3367 | 0 | ((ShImm == 1 || ShImm == 2 || ShImm == 3) && |
3368 | 0 | ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))) |
3369 | 0 | return 2; |
3370 | 0 | return 3; |
3371 | 0 | } |
3372 | 0 |
|
3373 | 0 | case ARM::LDRH_PRE: |
3374 | 0 | case ARM::STRH_PRE: { |
3375 | 0 | unsigned Rt = MI.getOperand(0).getReg(); |
3376 | 0 | unsigned Rm = MI.getOperand(3).getReg(); |
3377 | 0 | if (!Rm) |
3378 | 0 | return 2; |
3379 | 0 | if (Rt == Rm) |
3380 | 0 | return 3; |
3381 | 0 | return (ARM_AM::getAM3Op(MI.getOperand(4).getImm()) == ARM_AM::sub) ? 3 : 2; |
3382 | 0 | } |
3383 | 0 |
|
3384 | 0 | case ARM::LDR_POST_REG: |
3385 | 0 | case ARM::LDRB_POST_REG: |
3386 | 0 | case ARM::LDRH_POST: { |
3387 | 0 | unsigned Rt = MI.getOperand(0).getReg(); |
3388 | 0 | unsigned Rm = MI.getOperand(3).getReg(); |
3389 | 0 | return (Rt == Rm) ? 3 : 2; |
3390 | 0 | } |
3391 | 0 |
|
3392 | 0 | case ARM::LDR_PRE_IMM: |
3393 | 0 | case ARM::LDRB_PRE_IMM: |
3394 | 0 | case ARM::LDR_POST_IMM: |
3395 | 0 | case ARM::LDRB_POST_IMM: |
3396 | 0 | case ARM::STRB_POST_IMM: |
3397 | 0 | case ARM::STRB_POST_REG: |
3398 | 0 | case ARM::STRB_PRE_IMM: |
3399 | 0 | case ARM::STRH_POST: |
3400 | 0 | case ARM::STR_POST_IMM: |
3401 | 0 | case ARM::STR_POST_REG: |
3402 | 0 | case ARM::STR_PRE_IMM: |
3403 | 0 | return 2; |
3404 | 0 |
|
3405 | 0 | case ARM::LDRSB_PRE: |
3406 | 0 | case ARM::LDRSH_PRE: { |
3407 | 0 | unsigned Rm = MI.getOperand(3).getReg(); |
3408 | 0 | if (Rm == 0) |
3409 | 0 | return 3; |
3410 | 0 | unsigned Rt = MI.getOperand(0).getReg(); |
3411 | 0 | if (Rt == Rm) |
3412 | 0 | return 4; |
3413 | 0 | unsigned ShOpVal = MI.getOperand(4).getImm(); |
3414 | 0 | bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub; |
3415 | 0 | unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); |
3416 | 0 | if (!isSub && |
3417 | 0 | (ShImm == 0 || |
3418 | 0 | ((ShImm == 1 || ShImm == 2 || ShImm == 3) && |
3419 | 0 | ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))) |
3420 | 0 | return 3; |
3421 | 0 | return 4; |
3422 | 0 | } |
3423 | 0 |
|
3424 | 0 | case ARM::LDRD: { |
3425 | 0 | unsigned Rt = MI.getOperand(0).getReg(); |
3426 | 0 | unsigned Rn = MI.getOperand(2).getReg(); |
3427 | 0 | unsigned Rm = MI.getOperand(3).getReg(); |
3428 | 0 | if (Rm) |
3429 | 0 | return (ARM_AM::getAM3Op(MI.getOperand(4).getImm()) == ARM_AM::sub) ? 4 |
3430 | 0 | : 3; |
3431 | 0 | return (Rt == Rn) ? 3 : 2; |
3432 | 0 | } |
3433 | 0 |
|
3434 | 0 | case ARM::STRD: { |
3435 | 0 | unsigned Rm = MI.getOperand(3).getReg(); |
3436 | 0 | if (Rm) |
3437 | 0 | return (ARM_AM::getAM3Op(MI.getOperand(4).getImm()) == ARM_AM::sub) ? 4 |
3438 | 0 | : 3; |
3439 | 0 | return 2; |
3440 | 0 | } |
3441 | 0 |
|
3442 | 0 | case ARM::LDRD_POST: |
3443 | 0 | case ARM::t2LDRD_POST: |
3444 | 0 | return 3; |
3445 | 0 |
|
3446 | 0 | case ARM::STRD_POST: |
3447 | 0 | case ARM::t2STRD_POST: |
3448 | 0 | return 4; |
3449 | 0 |
|
3450 | 0 | case ARM::LDRD_PRE: { |
3451 | 0 | unsigned Rt = MI.getOperand(0).getReg(); |
3452 | 0 | unsigned Rn = MI.getOperand(3).getReg(); |
3453 | 0 | unsigned Rm = MI.getOperand(4).getReg(); |
3454 | 0 | if (Rm) |
3455 | 0 | return (ARM_AM::getAM3Op(MI.getOperand(5).getImm()) == ARM_AM::sub) ? 5 |
3456 | 0 | : 4; |
3457 | 0 | return (Rt == Rn) ? 4 : 3; |
3458 | 0 | } |
3459 | 0 |
|
3460 | 0 | case ARM::t2LDRD_PRE: { |
3461 | 0 | unsigned Rt = MI.getOperand(0).getReg(); |
3462 | 0 | unsigned Rn = MI.getOperand(3).getReg(); |
3463 | 0 | return (Rt == Rn) ? 4 : 3; |
3464 | 0 | } |
3465 | 0 |
|
3466 | 0 | case ARM::STRD_PRE: { |
3467 | 0 | unsigned Rm = MI.getOperand(4).getReg(); |
3468 | 0 | if (Rm) |
3469 | 0 | return (ARM_AM::getAM3Op(MI.getOperand(5).getImm()) == ARM_AM::sub) ? 5 |
3470 | 0 | : 4; |
3471 | 0 | return 3; |
3472 | 0 | } |
3473 | 0 |
|
3474 | 0 | case ARM::t2STRD_PRE: |
3475 | 0 | return 3; |
3476 | 0 |
|
3477 | 0 | case ARM::t2LDR_POST: |
3478 | 0 | case ARM::t2LDRB_POST: |
3479 | 0 | case ARM::t2LDRB_PRE: |
3480 | 0 | case ARM::t2LDRSBi12: |
3481 | 0 | case ARM::t2LDRSBi8: |
3482 | 0 | case ARM::t2LDRSBpci: |
3483 | 0 | case ARM::t2LDRSBs: |
3484 | 0 | case ARM::t2LDRH_POST: |
3485 | 0 | case ARM::t2LDRH_PRE: |
3486 | 0 | case ARM::t2LDRSBT: |
3487 | 0 | case ARM::t2LDRSB_POST: |
3488 | 0 | case ARM::t2LDRSB_PRE: |
3489 | 0 | case ARM::t2LDRSH_POST: |
3490 | 0 | case ARM::t2LDRSH_PRE: |
3491 | 0 | case ARM::t2LDRSHi12: |
3492 | 0 | case ARM::t2LDRSHi8: |
3493 | 0 | case ARM::t2LDRSHpci: |
3494 | 0 | case ARM::t2LDRSHs: |
3495 | 0 | return 2; |
3496 | 0 |
|
3497 | 0 | case ARM::t2LDRDi8: { |
3498 | 0 | unsigned Rt = MI.getOperand(0).getReg(); |
3499 | 0 | unsigned Rn = MI.getOperand(2).getReg(); |
3500 | 0 | return (Rt == Rn) ? 3 : 2; |
3501 | 0 | } |
3502 | 0 |
|
3503 | 0 | case ARM::t2STRB_POST: |
3504 | 0 | case ARM::t2STRB_PRE: |
3505 | 0 | case ARM::t2STRBs: |
3506 | 0 | case ARM::t2STRDi8: |
3507 | 0 | case ARM::t2STRH_POST: |
3508 | 0 | case ARM::t2STRH_PRE: |
3509 | 0 | case ARM::t2STRHs: |
3510 | 0 | case ARM::t2STR_POST: |
3511 | 0 | case ARM::t2STR_PRE: |
3512 | 0 | case ARM::t2STRs: |
3513 | 0 | return 2; |
3514 | 0 | } |
3515 | 0 | } |
3516 | | |
3517 | | // Return the number of 32-bit words loaded by LDM or stored by STM. If this |
3518 | | // can't be easily determined return 0 (missing MachineMemOperand). |
3519 | | // |
3520 | | // FIXME: The current MachineInstr design does not support relying on machine |
3521 | | // mem operands to determine the width of a memory access. Instead, we expect |
3522 | | // the target to provide this information based on the instruction opcode and |
3523 | | // operands. However, using MachineMemOperand is the best solution now for |
3524 | | // two reasons: |
3525 | | // |
3526 | | // 1) getNumMicroOps tries to infer LDM memory width from the total number of MI |
3527 | | // operands. This is much more dangerous than using the MachineMemOperand |
3528 | | // sizes because CodeGen passes can insert/remove optional machine operands. In |
3529 | | // fact, it's totally incorrect for preRA passes and appears to be wrong for |
3530 | | // postRA passes as well. |
3531 | | // |
3532 | | // 2) getNumLDMAddresses is only used by the scheduling machine model and any |
3533 | | // machine model that calls this should handle the unknown (zero size) case. |
3534 | | // |
3535 | | // Long term, we should require a target hook that verifies MachineMemOperand |
3536 | | // sizes during MC lowering. That target hook should be local to MC lowering |
3537 | | // because we can't ensure that it is aware of other MI forms. Doing this will |
3538 | | // ensure that MachineMemOperands are correctly propagated through all passes. |
3539 | 65.9k | unsigned ARMBaseInstrInfo::getNumLDMAddresses(const MachineInstr &MI) const { |
3540 | 65.9k | unsigned Size = 0; |
3541 | 65.9k | for (MachineInstr::mmo_iterator I = MI.memoperands_begin(), |
3542 | 65.9k | E = MI.memoperands_end(); |
3543 | 66.7k | I != E; ++I848 ) { |
3544 | 848 | Size += (*I)->getSize(); |
3545 | 848 | } |
3546 | 65.9k | // FIXME: The scheduler currently can't handle values larger than 16. But |
3547 | 65.9k | // the values can actually go up to 32 for floating-point load/store |
3548 | 65.9k | // multiple (VLDMIA etc.). Also, the way this code is reasoning about memory |
3549 | 65.9k | // operations isn't right; we could end up with "extra" memory operands for |
3550 | 65.9k | // various reasons, like tail merge merging two memory operations. |
3551 | 65.9k | return std::min(Size / 4, 16U); |
3552 | 65.9k | } |
3553 | | |
3554 | | static unsigned getNumMicroOpsSingleIssuePlusExtras(unsigned Opc, |
3555 | 0 | unsigned NumRegs) { |
3556 | 0 | unsigned UOps = 1 + NumRegs; // 1 for address computation. |
3557 | 0 | switch (Opc) { |
3558 | 0 | default: |
3559 | 0 | break; |
3560 | 0 | case ARM::VLDMDIA_UPD: |
3561 | 0 | case ARM::VLDMDDB_UPD: |
3562 | 0 | case ARM::VLDMSIA_UPD: |
3563 | 0 | case ARM::VLDMSDB_UPD: |
3564 | 0 | case ARM::VSTMDIA_UPD: |
3565 | 0 | case ARM::VSTMDDB_UPD: |
3566 | 0 | case ARM::VSTMSIA_UPD: |
3567 | 0 | case ARM::VSTMSDB_UPD: |
3568 | 0 | case ARM::LDMIA_UPD: |
3569 | 0 | case ARM::LDMDA_UPD: |
3570 | 0 | case ARM::LDMDB_UPD: |
3571 | 0 | case ARM::LDMIB_UPD: |
3572 | 0 | case ARM::STMIA_UPD: |
3573 | 0 | case ARM::STMDA_UPD: |
3574 | 0 | case ARM::STMDB_UPD: |
3575 | 0 | case ARM::STMIB_UPD: |
3576 | 0 | case ARM::tLDMIA_UPD: |
3577 | 0 | case ARM::tSTMIA_UPD: |
3578 | 0 | case ARM::t2LDMIA_UPD: |
3579 | 0 | case ARM::t2LDMDB_UPD: |
3580 | 0 | case ARM::t2STMIA_UPD: |
3581 | 0 | case ARM::t2STMDB_UPD: |
3582 | 0 | ++UOps; // One for base register writeback. |
3583 | 0 | break; |
3584 | 0 | case ARM::LDMIA_RET: |
3585 | 0 | case ARM::tPOP_RET: |
3586 | 0 | case ARM::t2LDMIA_RET: |
3587 | 0 | UOps += 2; // One for base reg wb, one for write to pc. |
3588 | 0 | break; |
3589 | 0 | } |
3590 | 0 | return UOps; |
3591 | 0 | } |
3592 | | |
3593 | | unsigned ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData, |
3594 | 14.1k | const MachineInstr &MI) const { |
3595 | 14.1k | if (!ItinData || ItinData->isEmpty()) |
3596 | 0 | return 1; |
3597 | 14.1k | |
3598 | 14.1k | const MCInstrDesc &Desc = MI.getDesc(); |
3599 | 14.1k | unsigned Class = Desc.getSchedClass(); |
3600 | 14.1k | int ItinUOps = ItinData->getNumMicroOps(Class); |
3601 | 14.1k | if (ItinUOps >= 0) { |
3602 | 0 | if (Subtarget.isSwift() && (Desc.mayLoad() || Desc.mayStore())) |
3603 | 0 | return getNumMicroOpsSwiftLdSt(ItinData, MI); |
3604 | 0 | |
3605 | 0 | return ItinUOps; |
3606 | 0 | } |
3607 | 14.1k | |
3608 | 14.1k | unsigned Opc = MI.getOpcode(); |
3609 | 14.1k | switch (Opc) { |
3610 | 14.1k | default: |
3611 | 0 | llvm_unreachable("Unexpected multi-uops instruction!"); |
3612 | 14.1k | case ARM::VLDMQIA: |
3613 | 0 | case ARM::VSTMQIA: |
3614 | 0 | return 2; |
3615 | 0 |
|
3616 | 0 | // The number of uOps for load / store multiple are determined by the number |
3617 | 0 | // registers. |
3618 | 0 | // |
3619 | 0 | // On Cortex-A8, each pair of register loads / stores can be scheduled on the |
3620 | 0 | // same cycle. The scheduling for the first load / store must be done |
3621 | 0 | // separately by assuming the address is not 64-bit aligned. |
3622 | 0 | // |
3623 | 0 | // On Cortex-A9, the formula is simply (#reg / 2) + (#reg % 2). If the address |
3624 | 0 | // is not 64-bit aligned, then AGU would take an extra cycle. For VFP / NEON |
3625 | 0 | // load / store multiple, the formula is (#reg / 2) + (#reg % 2) + 1. |
3626 | 230 | case ARM::VLDMDIA: |
3627 | 230 | case ARM::VLDMDIA_UPD: |
3628 | 230 | case ARM::VLDMDDB_UPD: |
3629 | 230 | case ARM::VLDMSIA: |
3630 | 230 | case ARM::VLDMSIA_UPD: |
3631 | 230 | case ARM::VLDMSDB_UPD: |
3632 | 230 | case ARM::VSTMDIA: |
3633 | 230 | case ARM::VSTMDIA_UPD: |
3634 | 230 | case ARM::VSTMDDB_UPD: |
3635 | 230 | case ARM::VSTMSIA: |
3636 | 230 | case ARM::VSTMSIA_UPD: |
3637 | 230 | case ARM::VSTMSDB_UPD: { |
3638 | 230 | unsigned NumRegs = MI.getNumOperands() - Desc.getNumOperands(); |
3639 | 230 | return (NumRegs / 2) + (NumRegs % 2) + 1; |
3640 | 230 | } |
3641 | 230 | |
3642 | 13.9k | case ARM::LDMIA_RET: |
3643 | 13.9k | case ARM::LDMIA: |
3644 | 13.9k | case ARM::LDMDA: |
3645 | 13.9k | case ARM::LDMDB: |
3646 | 13.9k | case ARM::LDMIB: |
3647 | 13.9k | case ARM::LDMIA_UPD: |
3648 | 13.9k | case ARM::LDMDA_UPD: |
3649 | 13.9k | case ARM::LDMDB_UPD: |
3650 | 13.9k | case ARM::LDMIB_UPD: |
3651 | 13.9k | case ARM::STMIA: |
3652 | 13.9k | case ARM::STMDA: |
3653 | 13.9k | case ARM::STMDB: |
3654 | 13.9k | case ARM::STMIB: |
3655 | 13.9k | case ARM::STMIA_UPD: |
3656 | 13.9k | case ARM::STMDA_UPD: |
3657 | 13.9k | case ARM::STMDB_UPD: |
3658 | 13.9k | case ARM::STMIB_UPD: |
3659 | 13.9k | case ARM::tLDMIA: |
3660 | 13.9k | case ARM::tLDMIA_UPD: |
3661 | 13.9k | case ARM::tSTMIA_UPD: |
3662 | 13.9k | case ARM::tPOP_RET: |
3663 | 13.9k | case ARM::tPOP: |
3664 | 13.9k | case ARM::tPUSH: |
3665 | 13.9k | case ARM::t2LDMIA_RET: |
3666 | 13.9k | case ARM::t2LDMIA: |
3667 | 13.9k | case ARM::t2LDMDB: |
3668 | 13.9k | case ARM::t2LDMIA_UPD: |
3669 | 13.9k | case ARM::t2LDMDB_UPD: |
3670 | 13.9k | case ARM::t2STMIA: |
3671 | 13.9k | case ARM::t2STMDB: |
3672 | 13.9k | case ARM::t2STMIA_UPD: |
3673 | 13.9k | case ARM::t2STMDB_UPD: { |
3674 | 13.9k | unsigned NumRegs = MI.getNumOperands() - Desc.getNumOperands() + 1; |
3675 | 13.9k | switch (Subtarget.getLdStMultipleTiming()) { |
3676 | 13.9k | case ARMSubtarget::SingleIssuePlusExtras: |
3677 | 0 | return getNumMicroOpsSingleIssuePlusExtras(Opc, NumRegs); |
3678 | 13.9k | case ARMSubtarget::SingleIssue: |
3679 | 8.84k | // Assume the worst. |
3680 | 8.84k | return NumRegs; |
3681 | 13.9k | case ARMSubtarget::DoubleIssue: { |
3682 | 5.05k | if (NumRegs < 4) |
3683 | 1.79k | return 2; |
3684 | 3.25k | // 4 registers would be issued: 2, 2. |
3685 | 3.25k | // 5 registers would be issued: 2, 2, 1. |
3686 | 3.25k | unsigned UOps = (NumRegs / 2); |
3687 | 3.25k | if (NumRegs % 2) |
3688 | 2.10k | ++UOps; |
3689 | 3.25k | return UOps; |
3690 | 3.25k | } |
3691 | 3.25k | case ARMSubtarget::DoubleIssueCheckUnalignedAccess: { |
3692 | 67 | unsigned UOps = (NumRegs / 2); |
3693 | 67 | // If there are odd number of registers or if it's not 64-bit aligned, |
3694 | 67 | // then it takes an extra AGU (Address Generation Unit) cycle. |
3695 | 67 | if ((NumRegs % 2) || !MI.hasOneMemOperand()51 || |
3696 | 67 | (*MI.memoperands_begin())->getAlignment() < 80 ) |
3697 | 67 | ++UOps; |
3698 | 67 | return UOps; |
3699 | 0 | } |
3700 | 13.9k | } |
3701 | 13.9k | } |
3702 | 0 | } |
3703 | 0 | llvm_unreachable("Didn't find the number of microops"); |
3704 | 0 | } |
3705 | | |
3706 | | int |
3707 | | ARMBaseInstrInfo::getVLDMDefCycle(const InstrItineraryData *ItinData, |
3708 | | const MCInstrDesc &DefMCID, |
3709 | | unsigned DefClass, |
3710 | 40 | unsigned DefIdx, unsigned DefAlign) const { |
3711 | 40 | int RegNo = (int)(DefIdx+1) - DefMCID.getNumOperands() + 1; |
3712 | 40 | if (RegNo <= 0) |
3713 | 0 | // Def is the address writeback. |
3714 | 0 | return ItinData->getOperandCycle(DefClass, DefIdx); |
3715 | 40 | |
3716 | 40 | int DefCycle; |
3717 | 40 | if (Subtarget.isCortexA8() || Subtarget.isCortexA7()35 ) { |
3718 | 5 | // (regno / 2) + (regno % 2) + 1 |
3719 | 5 | DefCycle = RegNo / 2 + 1; |
3720 | 5 | if (RegNo % 2) |
3721 | 5 | ++DefCycle; |
3722 | 35 | } else if (Subtarget.isLikeA9() || Subtarget.isSwift()31 ) { |
3723 | 4 | DefCycle = RegNo; |
3724 | 4 | bool isSLoad = false; |
3725 | 4 | |
3726 | 4 | switch (DefMCID.getOpcode()) { |
3727 | 4 | default: break; |
3728 | 4 | case ARM::VLDMSIA: |
3729 | 0 | case ARM::VLDMSIA_UPD: |
3730 | 0 | case ARM::VLDMSDB_UPD: |
3731 | 0 | isSLoad = true; |
3732 | 0 | break; |
3733 | 4 | } |
3734 | 4 | |
3735 | 4 | // If there are odd number of 'S' registers or if it's not 64-bit aligned, |
3736 | 4 | // then it takes an extra cycle. |
3737 | 4 | if ((isSLoad && (RegNo % 2)0 ) || DefAlign < 8) |
3738 | 2 | ++DefCycle; |
3739 | 31 | } else { |
3740 | 31 | // Assume the worst. |
3741 | 31 | DefCycle = RegNo + 2; |
3742 | 31 | } |
3743 | 40 | |
3744 | 40 | return DefCycle; |
3745 | 40 | } |
3746 | | |
3747 | 0 | bool ARMBaseInstrInfo::isLDMBaseRegInList(const MachineInstr &MI) const { |
3748 | 0 | unsigned BaseReg = MI.getOperand(0).getReg(); |
3749 | 0 | for (unsigned i = 1, sz = MI.getNumOperands(); i < sz; ++i) { |
3750 | 0 | const auto &Op = MI.getOperand(i); |
3751 | 0 | if (Op.isReg() && Op.getReg() == BaseReg) |
3752 | 0 | return true; |
3753 | 0 | } |
3754 | 0 | return false; |
3755 | 0 | } |
3756 | | unsigned |
3757 | 7 | ARMBaseInstrInfo::getLDMVariableDefsSize(const MachineInstr &MI) const { |
3758 | 7 | // ins GPR:$Rn, $p (2xOp), reglist:$regs, variable_ops |
3759 | 7 | // (outs GPR:$wb), (ins GPR:$Rn, $p (2xOp), reglist:$regs, variable_ops) |
3760 | 7 | return MI.getNumOperands() + 1 - MI.getDesc().getNumOperands(); |
3761 | 7 | } |
3762 | | |
3763 | | int |
3764 | | ARMBaseInstrInfo::getLDMDefCycle(const InstrItineraryData *ItinData, |
3765 | | const MCInstrDesc &DefMCID, |
3766 | | unsigned DefClass, |
3767 | 124 | unsigned DefIdx, unsigned DefAlign) const { |
3768 | 124 | int RegNo = (int)(DefIdx+1) - DefMCID.getNumOperands() + 1; |
3769 | 124 | if (RegNo <= 0) |
3770 | 0 | // Def is the address writeback. |
3771 | 0 | return ItinData->getOperandCycle(DefClass, DefIdx); |
3772 | 124 | |
3773 | 124 | int DefCycle; |
3774 | 124 | if (Subtarget.isCortexA8() || Subtarget.isCortexA7()121 ) { |
3775 | 18 | // 4 registers would be issued: 1, 2, 1. |
3776 | 18 | // 5 registers would be issued: 1, 2, 2. |
3777 | 18 | DefCycle = RegNo / 2; |
3778 | 18 | if (DefCycle < 1) |
3779 | 18 | DefCycle = 1; |
3780 | 18 | // Result latency is issue cycle + 2: E2. |
3781 | 18 | DefCycle += 2; |
3782 | 106 | } else if (Subtarget.isLikeA9() || Subtarget.isSwift()103 ) { |
3783 | 3 | DefCycle = (RegNo / 2); |
3784 | 3 | // If there are odd number of registers or if it's not 64-bit aligned, |
3785 | 3 | // then it takes an extra AGU (Address Generation Unit) cycle. |
3786 | 3 | if ((RegNo % 2) || DefAlign < 80 ) |
3787 | 3 | ++DefCycle; |
3788 | 3 | // Result latency is AGU cycles + 2. |
3789 | 3 | DefCycle += 2; |
3790 | 103 | } else { |
3791 | 103 | // Assume the worst. |
3792 | 103 | DefCycle = RegNo + 2; |
3793 | 103 | } |
3794 | 124 | |
3795 | 124 | return DefCycle; |
3796 | 124 | } |
3797 | | |
3798 | | int |
3799 | | ARMBaseInstrInfo::getVSTMUseCycle(const InstrItineraryData *ItinData, |
3800 | | const MCInstrDesc &UseMCID, |
3801 | | unsigned UseClass, |
3802 | 1 | unsigned UseIdx, unsigned UseAlign) const { |
3803 | 1 | int RegNo = (int)(UseIdx+1) - UseMCID.getNumOperands() + 1; |
3804 | 1 | if (RegNo <= 0) |
3805 | 0 | return ItinData->getOperandCycle(UseClass, UseIdx); |
3806 | 1 | |
3807 | 1 | int UseCycle; |
3808 | 1 | if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) { |
3809 | 0 | // (regno / 2) + (regno % 2) + 1 |
3810 | 0 | UseCycle = RegNo / 2 + 1; |
3811 | 0 | if (RegNo % 2) |
3812 | 0 | ++UseCycle; |
3813 | 1 | } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) { |
3814 | 0 | UseCycle = RegNo; |
3815 | 0 | bool isSStore = false; |
3816 | 0 |
|
3817 | 0 | switch (UseMCID.getOpcode()) { |
3818 | 0 | default: break; |
3819 | 0 | case ARM::VSTMSIA: |
3820 | 0 | case ARM::VSTMSIA_UPD: |
3821 | 0 | case ARM::VSTMSDB_UPD: |
3822 | 0 | isSStore = true; |
3823 | 0 | break; |
3824 | 0 | } |
3825 | 0 | |
3826 | 0 | // If there are odd number of 'S' registers or if it's not 64-bit aligned, |
3827 | 0 | // then it takes an extra cycle. |
3828 | 0 | if ((isSStore && (RegNo % 2)) || UseAlign < 8) |
3829 | 0 | ++UseCycle; |
3830 | 1 | } else { |
3831 | 1 | // Assume the worst. |
3832 | 1 | UseCycle = RegNo + 2; |
3833 | 1 | } |
3834 | 1 | |
3835 | 1 | return UseCycle; |
3836 | 1 | } |
3837 | | |
3838 | | int |
3839 | | ARMBaseInstrInfo::getSTMUseCycle(const InstrItineraryData *ItinData, |
3840 | | const MCInstrDesc &UseMCID, |
3841 | | unsigned UseClass, |
3842 | 63 | unsigned UseIdx, unsigned UseAlign) const { |
3843 | 63 | int RegNo = (int)(UseIdx+1) - UseMCID.getNumOperands() + 1; |
3844 | 63 | if (RegNo <= 0) |
3845 | 0 | return ItinData->getOperandCycle(UseClass, UseIdx); |
3846 | 63 | |
3847 | 63 | int UseCycle; |
3848 | 63 | if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) { |
3849 | 3 | UseCycle = RegNo / 2; |
3850 | 3 | if (UseCycle < 2) |
3851 | 3 | UseCycle = 2; |
3852 | 3 | // Read in E3. |
3853 | 3 | UseCycle += 2; |
3854 | 60 | } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) { |
3855 | 0 | UseCycle = (RegNo / 2); |
3856 | 0 | // If there are odd number of registers or if it's not 64-bit aligned, |
3857 | 0 | // then it takes an extra AGU (Address Generation Unit) cycle. |
3858 | 0 | if ((RegNo % 2) || UseAlign < 8) |
3859 | 0 | ++UseCycle; |
3860 | 60 | } else { |
3861 | 60 | // Assume the worst. |
3862 | 60 | UseCycle = 1; |
3863 | 60 | } |
3864 | 63 | return UseCycle; |
3865 | 63 | } |
3866 | | |
3867 | | int |
3868 | | ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, |
3869 | | const MCInstrDesc &DefMCID, |
3870 | | unsigned DefIdx, unsigned DefAlign, |
3871 | | const MCInstrDesc &UseMCID, |
3872 | 320k | unsigned UseIdx, unsigned UseAlign) const { |
3873 | 320k | unsigned DefClass = DefMCID.getSchedClass(); |
3874 | 320k | unsigned UseClass = UseMCID.getSchedClass(); |
3875 | 320k | |
3876 | 320k | if (DefIdx < DefMCID.getNumDefs() && UseIdx < UseMCID.getNumOperands()320k ) |
3877 | 318k | return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx); |
3878 | 1.97k | |
3879 | 1.97k | // This may be a def / use of a variable_ops instruction, the operand |
3880 | 1.97k | // latency might be determinable dynamically. Let the target try to |
3881 | 1.97k | // figure it out. |
3882 | 1.97k | int DefCycle = -1; |
3883 | 1.97k | bool LdmBypass = false; |
3884 | 1.97k | switch (DefMCID.getOpcode()) { |
3885 | 1.97k | default: |
3886 | 1.81k | DefCycle = ItinData->getOperandCycle(DefClass, DefIdx); |
3887 | 1.81k | break; |
3888 | 1.97k | |
3889 | 1.97k | case ARM::VLDMDIA: |
3890 | 40 | case ARM::VLDMDIA_UPD: |
3891 | 40 | case ARM::VLDMDDB_UPD: |
3892 | 40 | case ARM::VLDMSIA: |
3893 | 40 | case ARM::VLDMSIA_UPD: |
3894 | 40 | case ARM::VLDMSDB_UPD: |
3895 | 40 | DefCycle = getVLDMDefCycle(ItinData, DefMCID, DefClass, DefIdx, DefAlign); |
3896 | 40 | break; |
3897 | 40 | |
3898 | 124 | case ARM::LDMIA_RET: |
3899 | 124 | case ARM::LDMIA: |
3900 | 124 | case ARM::LDMDA: |
3901 | 124 | case ARM::LDMDB: |
3902 | 124 | case ARM::LDMIB: |
3903 | 124 | case ARM::LDMIA_UPD: |
3904 | 124 | case ARM::LDMDA_UPD: |
3905 | 124 | case ARM::LDMDB_UPD: |
3906 | 124 | case ARM::LDMIB_UPD: |
3907 | 124 | case ARM::tLDMIA: |
3908 | 124 | case ARM::tLDMIA_UPD: |
3909 | 124 | case ARM::tPUSH: |
3910 | 124 | case ARM::t2LDMIA_RET: |
3911 | 124 | case ARM::t2LDMIA: |
3912 | 124 | case ARM::t2LDMDB: |
3913 | 124 | case ARM::t2LDMIA_UPD: |
3914 | 124 | case ARM::t2LDMDB_UPD: |
3915 | 124 | LdmBypass = true; |
3916 | 124 | DefCycle = getLDMDefCycle(ItinData, DefMCID, DefClass, DefIdx, DefAlign); |
3917 | 124 | break; |
3918 | 1.97k | } |
3919 | 1.97k | |
3920 | 1.97k | if (DefCycle == -1) |
3921 | 56 | // We can't seem to determine the result latency of the def, assume it's 2. |
3922 | 56 | DefCycle = 2; |
3923 | 1.97k | |
3924 | 1.97k | int UseCycle = -1; |
3925 | 1.97k | switch (UseMCID.getOpcode()) { |
3926 | 1.97k | default: |
3927 | 1.91k | UseCycle = ItinData->getOperandCycle(UseClass, UseIdx); |
3928 | 1.91k | break; |
3929 | 1.97k | |
3930 | 1.97k | case ARM::VSTMDIA: |
3931 | 1 | case ARM::VSTMDIA_UPD: |
3932 | 1 | case ARM::VSTMDDB_UPD: |
3933 | 1 | case ARM::VSTMSIA: |
3934 | 1 | case ARM::VSTMSIA_UPD: |
3935 | 1 | case ARM::VSTMSDB_UPD: |
3936 | 1 | UseCycle = getVSTMUseCycle(ItinData, UseMCID, UseClass, UseIdx, UseAlign); |
3937 | 1 | break; |
3938 | 1 | |
3939 | 63 | case ARM::STMIA: |
3940 | 63 | case ARM::STMDA: |
3941 | 63 | case ARM::STMDB: |
3942 | 63 | case ARM::STMIB: |
3943 | 63 | case ARM::STMIA_UPD: |
3944 | 63 | case ARM::STMDA_UPD: |
3945 | 63 | case ARM::STMDB_UPD: |
3946 | 63 | case ARM::STMIB_UPD: |
3947 | 63 | case ARM::tSTMIA_UPD: |
3948 | 63 | case ARM::tPOP_RET: |
3949 | 63 | case ARM::tPOP: |
3950 | 63 | case ARM::t2STMIA: |
3951 | 63 | case ARM::t2STMDB: |
3952 | 63 | case ARM::t2STMIA_UPD: |
3953 | 63 | case ARM::t2STMDB_UPD: |
3954 | 63 | UseCycle = getSTMUseCycle(ItinData, UseMCID, UseClass, UseIdx, UseAlign); |
3955 | 63 | break; |
3956 | 1.97k | } |
3957 | 1.97k | |
3958 | 1.97k | if (UseCycle == -1) |
3959 | 1.79k | // Assume it's read in the first stage. |
3960 | 1.79k | UseCycle = 1; |
3961 | 1.97k | |
3962 | 1.97k | UseCycle = DefCycle - UseCycle + 1; |
3963 | 1.97k | if (UseCycle > 0) { |
3964 | 1.97k | if (LdmBypass) { |
3965 | 121 | // It's a variable_ops instruction so we can't use DefIdx here. Just use |
3966 | 121 | // first def operand. |
3967 | 121 | if (ItinData->hasPipelineForwarding(DefClass, DefMCID.getNumOperands()-1, |
3968 | 121 | UseClass, UseIdx)) |
3969 | 0 | --UseCycle; |
3970 | 1.85k | } else if (ItinData->hasPipelineForwarding(DefClass, DefIdx, |
3971 | 1.85k | UseClass, UseIdx)) { |
3972 | 0 | --UseCycle; |
3973 | 0 | } |
3974 | 1.97k | } |
3975 | 1.97k | |
3976 | 1.97k | return UseCycle; |
3977 | 1.97k | } |
3978 | | |
3979 | | static const MachineInstr *getBundledDefMI(const TargetRegisterInfo *TRI, |
3980 | | const MachineInstr *MI, unsigned Reg, |
3981 | 0 | unsigned &DefIdx, unsigned &Dist) { |
3982 | 0 | Dist = 0; |
3983 | 0 |
|
3984 | 0 | MachineBasicBlock::const_iterator I = MI; ++I; |
3985 | 0 | MachineBasicBlock::const_instr_iterator II = std::prev(I.getInstrIterator()); |
3986 | 0 | assert(II->isInsideBundle() && "Empty bundle?"); |
3987 | 0 |
|
3988 | 0 | int Idx = -1; |
3989 | 0 | while (II->isInsideBundle()) { |
3990 | 0 | Idx = II->findRegisterDefOperandIdx(Reg, false, true, TRI); |
3991 | 0 | if (Idx != -1) |
3992 | 0 | break; |
3993 | 0 | --II; |
3994 | 0 | ++Dist; |
3995 | 0 | } |
3996 | 0 |
|
3997 | 0 | assert(Idx != -1 && "Cannot find bundled definition!"); |
3998 | 0 | DefIdx = Idx; |
3999 | 0 | return &*II; |
4000 | 0 | } |
4001 | | |
4002 | | static const MachineInstr *getBundledUseMI(const TargetRegisterInfo *TRI, |
4003 | | const MachineInstr &MI, unsigned Reg, |
4004 | 0 | unsigned &UseIdx, unsigned &Dist) { |
4005 | 0 | Dist = 0; |
4006 | 0 |
|
4007 | 0 | MachineBasicBlock::const_instr_iterator II = ++MI.getIterator(); |
4008 | 0 | assert(II->isInsideBundle() && "Empty bundle?"); |
4009 | 0 | MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end(); |
4010 | 0 |
|
4011 | 0 | // FIXME: This doesn't properly handle multiple uses. |
4012 | 0 | int Idx = -1; |
4013 | 0 | while (II != E && II->isInsideBundle()) { |
4014 | 0 | Idx = II->findRegisterUseOperandIdx(Reg, false, TRI); |
4015 | 0 | if (Idx != -1) |
4016 | 0 | break; |
4017 | 0 | if (II->getOpcode() != ARM::t2IT) |
4018 | 0 | ++Dist; |
4019 | 0 | ++II; |
4020 | 0 | } |
4021 | 0 |
|
4022 | 0 | if (Idx == -1) { |
4023 | 0 | Dist = 0; |
4024 | 0 | return nullptr; |
4025 | 0 | } |
4026 | 0 | |
4027 | 0 | UseIdx = Idx; |
4028 | 0 | return &*II; |
4029 | 0 | } |
4030 | | |
4031 | | /// Return the number of cycles to add to (or subtract from) the static |
4032 | | /// itinerary based on the def opcode and alignment. The caller will ensure that |
4033 | | /// adjusted latency is at least one cycle. |
4034 | | static int adjustDefLatency(const ARMSubtarget &Subtarget, |
4035 | | const MachineInstr &DefMI, |
4036 | 765k | const MCInstrDesc &DefMCID, unsigned DefAlign) { |
4037 | 765k | int Adjust = 0; |
4038 | 765k | if (Subtarget.isCortexA8() || Subtarget.isLikeA9()750k || Subtarget.isCortexA7()746k ) { |
4039 | 300k | // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2] |
4040 | 300k | // variants are one cycle cheaper. |
4041 | 300k | switch (DefMCID.getOpcode()) { |
4042 | 300k | default: break298k ; |
4043 | 300k | case ARM::LDRrs: |
4044 | 57 | case ARM::LDRBrs: { |
4045 | 57 | unsigned ShOpVal = DefMI.getOperand(3).getImm(); |
4046 | 57 | unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); |
4047 | 57 | if (ShImm == 0 || |
4048 | 57 | (43 ShImm == 243 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl27 )) |
4049 | 41 | --Adjust; |
4050 | 57 | break; |
4051 | 57 | } |
4052 | 1.99k | case ARM::t2LDRs: |
4053 | 1.99k | case ARM::t2LDRBs: |
4054 | 1.99k | case ARM::t2LDRHs: |
4055 | 1.99k | case ARM::t2LDRSHs: { |
4056 | 1.99k | // Thumb2 mode: lsl only. |
4057 | 1.99k | unsigned ShAmt = DefMI.getOperand(3).getImm(); |
4058 | 1.99k | if (ShAmt == 0 || ShAmt == 2864 ) |
4059 | 1.91k | --Adjust; |
4060 | 1.99k | break; |
4061 | 464k | } |
4062 | 464k | } |
4063 | 464k | } else if (Subtarget.isSwift()) { |
4064 | 74 | // FIXME: Properly handle all of the latency adjustments for address |
4065 | 74 | // writeback. |
4066 | 74 | switch (DefMCID.getOpcode()) { |
4067 | 74 | default: break73 ; |
4068 | 74 | case ARM::LDRrs: |
4069 | 0 | case ARM::LDRBrs: { |
4070 | 0 | unsigned ShOpVal = DefMI.getOperand(3).getImm(); |
4071 | 0 | bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub; |
4072 | 0 | unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); |
4073 | 0 | if (!isSub && |
4074 | 0 | (ShImm == 0 || |
4075 | 0 | ((ShImm == 1 || ShImm == 2 || ShImm == 3) && |
4076 | 0 | ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))) |
4077 | 0 | Adjust -= 2; |
4078 | 0 | else if (!isSub && |
4079 | 0 | ShImm == 1 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsr) |
4080 | 0 | --Adjust; |
4081 | 0 | break; |
4082 | 0 | } |
4083 | 1 | case ARM::t2LDRs: |
4084 | 1 | case ARM::t2LDRBs: |
4085 | 1 | case ARM::t2LDRHs: |
4086 | 1 | case ARM::t2LDRSHs: { |
4087 | 1 | // Thumb2 mode: lsl only. |
4088 | 1 | unsigned ShAmt = DefMI.getOperand(3).getImm(); |
4089 | 1 | if (ShAmt == 0 || ShAmt == 10 || ShAmt == 20 || ShAmt == 30 ) |
4090 | 1 | Adjust -= 2; |
4091 | 1 | break; |
4092 | 765k | } |
4093 | 74 | } |
4094 | 74 | } |
4095 | 765k | |
4096 | 765k | if (DefAlign < 8 && Subtarget.checkVLDnAccessAlignment()746k ) { |
4097 | 3.45k | switch (DefMCID.getOpcode()) { |
4098 | 3.45k | default: break3.39k ; |
4099 | 3.45k | case ARM::VLD1q8: |
4100 | 60 | case ARM::VLD1q16: |
4101 | 60 | case ARM::VLD1q32: |
4102 | 60 | case ARM::VLD1q64: |
4103 | 60 | case ARM::VLD1q8wb_fixed: |
4104 | 60 | case ARM::VLD1q16wb_fixed: |
4105 | 60 | case ARM::VLD1q32wb_fixed: |
4106 | 60 | case ARM::VLD1q64wb_fixed: |
4107 | 60 | case ARM::VLD1q8wb_register: |
4108 | 60 | case ARM::VLD1q16wb_register: |
4109 | 60 | case ARM::VLD1q32wb_register: |
4110 | 60 | case ARM::VLD1q64wb_register: |
4111 | 60 | case ARM::VLD2d8: |
4112 | 60 | case ARM::VLD2d16: |
4113 | 60 | case ARM::VLD2d32: |
4114 | 60 | case ARM::VLD2q8: |
4115 | 60 | case ARM::VLD2q16: |
4116 | 60 | case ARM::VLD2q32: |
4117 | 60 | case ARM::VLD2d8wb_fixed: |
4118 | 60 | case ARM::VLD2d16wb_fixed: |
4119 | 60 | case ARM::VLD2d32wb_fixed: |
4120 | 60 | case ARM::VLD2q8wb_fixed: |
4121 | 60 | case ARM::VLD2q16wb_fixed: |
4122 | 60 | case ARM::VLD2q32wb_fixed: |
4123 | 60 | case ARM::VLD2d8wb_register: |
4124 | 60 | case ARM::VLD2d16wb_register: |
4125 | 60 | case ARM::VLD2d32wb_register: |
4126 | 60 | case ARM::VLD2q8wb_register: |
4127 | 60 | case ARM::VLD2q16wb_register: |
4128 | 60 | case ARM::VLD2q32wb_register: |
4129 | 60 | case ARM::VLD3d8: |
4130 | 60 | case ARM::VLD3d16: |
4131 | 60 | case ARM::VLD3d32: |
4132 | 60 | case ARM::VLD1d64T: |
4133 | 60 | case ARM::VLD3d8_UPD: |
4134 | 60 | case ARM::VLD3d16_UPD: |
4135 | 60 | case ARM::VLD3d32_UPD: |
4136 | 60 | case ARM::VLD1d64Twb_fixed: |
4137 | 60 | case ARM::VLD1d64Twb_register: |
4138 | 60 | case ARM::VLD3q8_UPD: |
4139 | 60 | case ARM::VLD3q16_UPD: |
4140 | 60 | case ARM::VLD3q32_UPD: |
4141 | 60 | case ARM::VLD4d8: |
4142 | 60 | case ARM::VLD4d16: |
4143 | 60 | case ARM::VLD4d32: |
4144 | 60 | case ARM::VLD1d64Q: |
4145 | 60 | case ARM::VLD4d8_UPD: |
4146 | 60 | case ARM::VLD4d16_UPD: |
4147 | 60 | case ARM::VLD4d32_UPD: |
4148 | 60 | case ARM::VLD1d64Qwb_fixed: |
4149 | 60 | case ARM::VLD1d64Qwb_register: |
4150 | 60 | case ARM::VLD4q8_UPD: |
4151 | 60 | case ARM::VLD4q16_UPD: |
4152 | 60 | case ARM::VLD4q32_UPD: |
4153 | 60 | case ARM::VLD1DUPq8: |
4154 | 60 | case ARM::VLD1DUPq16: |
4155 | 60 | case ARM::VLD1DUPq32: |
4156 | 60 | case ARM::VLD1DUPq8wb_fixed: |
4157 | 60 | case ARM::VLD1DUPq16wb_fixed: |
4158 | 60 | case ARM::VLD1DUPq32wb_fixed: |
4159 | 60 | case ARM::VLD1DUPq8wb_register: |
4160 | 60 | case ARM::VLD1DUPq16wb_register: |
4161 | 60 | case ARM::VLD1DUPq32wb_register: |
4162 | 60 | case ARM::VLD2DUPd8: |
4163 | 60 | case ARM::VLD2DUPd16: |
4164 | 60 | case ARM::VLD2DUPd32: |
4165 | 60 | case ARM::VLD2DUPd8wb_fixed: |
4166 | 60 | case ARM::VLD2DUPd16wb_fixed: |
4167 | 60 | case ARM::VLD2DUPd32wb_fixed: |
4168 | 60 | case ARM::VLD2DUPd8wb_register: |
4169 | 60 | case ARM::VLD2DUPd16wb_register: |
4170 | 60 | case ARM::VLD2DUPd32wb_register: |
4171 | 60 | case ARM::VLD4DUPd8: |
4172 | 60 | case ARM::VLD4DUPd16: |
4173 | 60 | case ARM::VLD4DUPd32: |
4174 | 60 | case ARM::VLD4DUPd8_UPD: |
4175 | 60 | case ARM::VLD4DUPd16_UPD: |
4176 | 60 | case ARM::VLD4DUPd32_UPD: |
4177 | 60 | case ARM::VLD1LNd8: |
4178 | 60 | case ARM::VLD1LNd16: |
4179 | 60 | case ARM::VLD1LNd32: |
4180 | 60 | case ARM::VLD1LNd8_UPD: |
4181 | 60 | case ARM::VLD1LNd16_UPD: |
4182 | 60 | case ARM::VLD1LNd32_UPD: |
4183 | 60 | case ARM::VLD2LNd8: |
4184 | 60 | case ARM::VLD2LNd16: |
4185 | 60 | case ARM::VLD2LNd32: |
4186 | 60 | case ARM::VLD2LNq16: |
4187 | 60 | case ARM::VLD2LNq32: |
4188 | 60 | case ARM::VLD2LNd8_UPD: |
4189 | 60 | case ARM::VLD2LNd16_UPD: |
4190 | 60 | case ARM::VLD2LNd32_UPD: |
4191 | 60 | case ARM::VLD2LNq16_UPD: |
4192 | 60 | case ARM::VLD2LNq32_UPD: |
4193 | 60 | case ARM::VLD4LNd8: |
4194 | 60 | case ARM::VLD4LNd16: |
4195 | 60 | case ARM::VLD4LNd32: |
4196 | 60 | case ARM::VLD4LNq16: |
4197 | 60 | case ARM::VLD4LNq32: |
4198 | 60 | case ARM::VLD4LNd8_UPD: |
4199 | 60 | case ARM::VLD4LNd16_UPD: |
4200 | 60 | case ARM::VLD4LNd32_UPD: |
4201 | 60 | case ARM::VLD4LNq16_UPD: |
4202 | 60 | case ARM::VLD4LNq32_UPD: |
4203 | 60 | // If the address is not 64-bit aligned, the latencies of these |
4204 | 60 | // instructions increases by one. |
4205 | 60 | ++Adjust; |
4206 | 60 | break; |
4207 | 765k | } |
4208 | 765k | } |
4209 | 765k | return Adjust; |
4210 | 765k | } |
4211 | | |
4212 | | int ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, |
4213 | | const MachineInstr &DefMI, |
4214 | | unsigned DefIdx, |
4215 | | const MachineInstr &UseMI, |
4216 | 187k | unsigned UseIdx) const { |
4217 | 187k | // No operand latency. The caller may fall back to getInstrLatency. |
4218 | 187k | if (!ItinData || ItinData->isEmpty()) |
4219 | 0 | return -1; |
4220 | 187k | |
4221 | 187k | const MachineOperand &DefMO = DefMI.getOperand(DefIdx); |
4222 | 187k | unsigned Reg = DefMO.getReg(); |
4223 | 187k | |
4224 | 187k | const MachineInstr *ResolvedDefMI = &DefMI; |
4225 | 187k | unsigned DefAdj = 0; |
4226 | 187k | if (DefMI.isBundle()) |
4227 | 0 | ResolvedDefMI = |
4228 | 0 | getBundledDefMI(&getRegisterInfo(), &DefMI, Reg, DefIdx, DefAdj); |
4229 | 187k | if (ResolvedDefMI->isCopyLike() || ResolvedDefMI->isInsertSubreg()186k || |
4230 | 187k | ResolvedDefMI->isRegSequence()186k || ResolvedDefMI->isImplicitDef()186k ) { |
4231 | 121 | return 1; |
4232 | 121 | } |
4233 | 186k | |
4234 | 186k | const MachineInstr *ResolvedUseMI = &UseMI; |
4235 | 186k | unsigned UseAdj = 0; |
4236 | 186k | if (UseMI.isBundle()) { |
4237 | 0 | ResolvedUseMI = |
4238 | 0 | getBundledUseMI(&getRegisterInfo(), UseMI, Reg, UseIdx, UseAdj); |
4239 | 0 | if (!ResolvedUseMI) |
4240 | 0 | return -1; |
4241 | 186k | } |
4242 | 186k | |
4243 | 186k | return getOperandLatencyImpl( |
4244 | 186k | ItinData, *ResolvedDefMI, DefIdx, ResolvedDefMI->getDesc(), DefAdj, DefMO, |
4245 | 186k | Reg, *ResolvedUseMI, UseIdx, ResolvedUseMI->getDesc(), UseAdj); |
4246 | 186k | } |
4247 | | |
4248 | | int ARMBaseInstrInfo::getOperandLatencyImpl( |
4249 | | const InstrItineraryData *ItinData, const MachineInstr &DefMI, |
4250 | | unsigned DefIdx, const MCInstrDesc &DefMCID, unsigned DefAdj, |
4251 | | const MachineOperand &DefMO, unsigned Reg, const MachineInstr &UseMI, |
4252 | 186k | unsigned UseIdx, const MCInstrDesc &UseMCID, unsigned UseAdj) const { |
4253 | 186k | if (Reg == ARM::CPSR) { |
4254 | 2.85k | if (DefMI.getOpcode() == ARM::FMSTAT) { |
4255 | 386 | // fpscr -> cpsr stalls over 20 cycles on A8 (and earlier?) |
4256 | 386 | return Subtarget.isLikeA9() ? 14 : 20382 ; |
4257 | 386 | } |
4258 | 2.46k | |
4259 | 2.46k | // CPSR set and branch can be paired in the same cycle. |
4260 | 2.46k | if (UseMI.isBranch()) |
4261 | 0 | return 0; |
4262 | 2.46k | |
4263 | 2.46k | // Otherwise it takes the instruction latency (generally one). |
4264 | 2.46k | unsigned Latency = getInstrLatency(ItinData, DefMI); |
4265 | 2.46k | |
4266 | 2.46k | // For Thumb2 and -Os, prefer scheduling CPSR setting instruction close to |
4267 | 2.46k | // its uses. Instructions which are otherwise scheduled between them may |
4268 | 2.46k | // incur a code size penalty (not able to use the CPSR setting 16-bit |
4269 | 2.46k | // instructions). |
4270 | 2.46k | if (Latency > 0 && Subtarget.isThumb2()2.46k ) { |
4271 | 1.39k | const MachineFunction *MF = DefMI.getParent()->getParent(); |
4272 | 1.39k | // FIXME: Use Function::hasOptSize(). |
4273 | 1.39k | if (MF->getFunction().hasFnAttribute(Attribute::OptimizeForSize)) |
4274 | 328 | --Latency; |
4275 | 1.39k | } |
4276 | 2.46k | return Latency; |
4277 | 2.46k | } |
4278 | 184k | |
4279 | 184k | if (DefMO.isImplicit() || UseMI.getOperand(UseIdx).isImplicit()183k ) |
4280 | 997 | return -1; |
4281 | 183k | |
4282 | 183k | unsigned DefAlign = DefMI.hasOneMemOperand() |
4283 | 183k | ? (*DefMI.memoperands_begin())->getAlignment()38.1k |
4284 | 183k | : 0144k ; |
4285 | 183k | unsigned UseAlign = UseMI.hasOneMemOperand() |
4286 | 183k | ? (*UseMI.memoperands_begin())->getAlignment()38.7k |
4287 | 183k | : 0144k ; |
4288 | 183k | |
4289 | 183k | // Get the itinerary's latency if possible, and handle variable_ops. |
4290 | 183k | int Latency = getOperandLatency(ItinData, DefMCID, DefIdx, DefAlign, UseMCID, |
4291 | 183k | UseIdx, UseAlign); |
4292 | 183k | // Unable to find operand latency. The caller may resort to getInstrLatency. |
4293 | 183k | if (Latency < 0) |
4294 | 50.0k | return Latency; |
4295 | 133k | |
4296 | 133k | // Adjust for IT block position. |
4297 | 133k | int Adj = DefAdj + UseAdj; |
4298 | 133k | |
4299 | 133k | // Adjust for dynamic def-side opcode variants not captured by the itinerary. |
4300 | 133k | Adj += adjustDefLatency(Subtarget, DefMI, DefMCID, DefAlign); |
4301 | 133k | if (Adj >= 0 || (int)Latency > -Adj617 ) { |
4302 | 133k | return Latency + Adj; |
4303 | 133k | } |
4304 | 0 | // Return the itinerary latency, which may be zero but not less than zero. |
4305 | 0 | return Latency; |
4306 | 0 | } |
4307 | | |
4308 | | int |
4309 | | ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, |
4310 | | SDNode *DefNode, unsigned DefIdx, |
4311 | 395k | SDNode *UseNode, unsigned UseIdx) const { |
4312 | 395k | if (!DefNode->isMachineOpcode()) |
4313 | 158k | return 1; |
4314 | 236k | |
4315 | 236k | const MCInstrDesc &DefMCID = get(DefNode->getMachineOpcode()); |
4316 | 236k | |
4317 | 236k | if (isZeroCost(DefMCID.Opcode)) |
4318 | 10.7k | return 0; |
4319 | 225k | |
4320 | 225k | if (!ItinData || ItinData->isEmpty()) |
4321 | 912 | return DefMCID.mayLoad() ? 3122 : 1790 ; |
4322 | 224k | |
4323 | 224k | if (!UseNode->isMachineOpcode()) { |
4324 | 87.1k | int Latency = ItinData->getOperandCycle(DefMCID.getSchedClass(), DefIdx); |
4325 | 87.1k | int Adj = Subtarget.getPreISelOperandLatencyAdjustment(); |
4326 | 87.1k | int Threshold = 1 + Adj; |
4327 | 87.1k | return Latency <= Threshold ? 176.8k : Latency - Adj10.2k ; |
4328 | 87.1k | } |
4329 | 137k | |
4330 | 137k | const MCInstrDesc &UseMCID = get(UseNode->getMachineOpcode()); |
4331 | 137k | const MachineSDNode *DefMN = dyn_cast<MachineSDNode>(DefNode); |
4332 | 137k | unsigned DefAlign = !DefMN->memoperands_empty() |
4333 | 137k | ? (*DefMN->memoperands_begin())->getAlignment()36.8k : 0100k ; |
4334 | 137k | const MachineSDNode *UseMN = dyn_cast<MachineSDNode>(UseNode); |
4335 | 137k | unsigned UseAlign = !UseMN->memoperands_empty() |
4336 | 137k | ? (*UseMN->memoperands_begin())->getAlignment()63.4k : 074.2k ; |
4337 | 137k | int Latency = getOperandLatency(ItinData, DefMCID, DefIdx, DefAlign, |
4338 | 137k | UseMCID, UseIdx, UseAlign); |
4339 | 137k | |
4340 | 137k | if (Latency > 1 && |
4341 | 137k | (56.6k Subtarget.isCortexA8()56.6k || Subtarget.isLikeA9()55.4k || |
4342 | 56.6k | Subtarget.isCortexA7()54.9k )) { |
4343 | 22.8k | // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2] |
4344 | 22.8k | // variants are one cycle cheaper. |
4345 | 22.8k | switch (DefMCID.getOpcode()) { |
4346 | 22.8k | default: break22.2k ; |
4347 | 22.8k | case ARM::LDRrs: |
4348 | 9 | case ARM::LDRBrs: { |
4349 | 9 | unsigned ShOpVal = |
4350 | 9 | cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue(); |
4351 | 9 | unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); |
4352 | 9 | if (ShImm == 0 || |
4353 | 9 | (7 ShImm == 27 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl7 )) |
4354 | 9 | --Latency; |
4355 | 9 | break; |
4356 | 9 | } |
4357 | 662 | case ARM::t2LDRs: |
4358 | 662 | case ARM::t2LDRBs: |
4359 | 662 | case ARM::t2LDRHs: |
4360 | 662 | case ARM::t2LDRSHs: { |
4361 | 662 | // Thumb2 mode: lsl only. |
4362 | 662 | unsigned ShAmt = |
4363 | 662 | cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue(); |
4364 | 662 | if (ShAmt == 0 || ShAmt == 2255 ) |
4365 | 633 | --Latency; |
4366 | 662 | break; |
4367 | 114k | } |
4368 | 114k | } |
4369 | 114k | } else if (DefIdx == 0 && Latency > 2113k && Subtarget.isSwift()15.5k ) { |
4370 | 0 | // FIXME: Properly handle all of the latency adjustments for address |
4371 | 0 | // writeback. |
4372 | 0 | switch (DefMCID.getOpcode()) { |
4373 | 0 | default: break; |
4374 | 0 | case ARM::LDRrs: |
4375 | 0 | case ARM::LDRBrs: { |
4376 | 0 | unsigned ShOpVal = |
4377 | 0 | cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue(); |
4378 | 0 | unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); |
4379 | 0 | if (ShImm == 0 || |
4380 | 0 | ((ShImm == 1 || ShImm == 2 || ShImm == 3) && |
4381 | 0 | ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)) |
4382 | 0 | Latency -= 2; |
4383 | 0 | else if (ShImm == 1 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsr) |
4384 | 0 | --Latency; |
4385 | 0 | break; |
4386 | 0 | } |
4387 | 0 | case ARM::t2LDRs: |
4388 | 0 | case ARM::t2LDRBs: |
4389 | 0 | case ARM::t2LDRHs: |
4390 | 0 | case ARM::t2LDRSHs: |
4391 | 0 | // Thumb2 mode: lsl 0-3 only. |
4392 | 0 | Latency -= 2; |
4393 | 0 | break; |
4394 | 137k | } |
4395 | 137k | } |
4396 | 137k | |
4397 | 137k | if (DefAlign < 8 && Subtarget.checkVLDnAccessAlignment()131k ) |
4398 | 902 | switch (DefMCID.getOpcode()) { |
4399 | 902 | default: break883 ; |
4400 | 902 | case ARM::VLD1q8: |
4401 | 19 | case ARM::VLD1q16: |
4402 | 19 | case ARM::VLD1q32: |
4403 | 19 | case ARM::VLD1q64: |
4404 | 19 | case ARM::VLD1q8wb_register: |
4405 | 19 | case ARM::VLD1q16wb_register: |
4406 | 19 | case ARM::VLD1q32wb_register: |
4407 | 19 | case ARM::VLD1q64wb_register: |
4408 | 19 | case ARM::VLD1q8wb_fixed: |
4409 | 19 | case ARM::VLD1q16wb_fixed: |
4410 | 19 | case ARM::VLD1q32wb_fixed: |
4411 | 19 | case ARM::VLD1q64wb_fixed: |
4412 | 19 | case ARM::VLD2d8: |
4413 | 19 | case ARM::VLD2d16: |
4414 | 19 | case ARM::VLD2d32: |
4415 | 19 | case ARM::VLD2q8Pseudo: |
4416 | 19 | case ARM::VLD2q16Pseudo: |
4417 | 19 | case ARM::VLD2q32Pseudo: |
4418 | 19 | case ARM::VLD2d8wb_fixed: |
4419 | 19 | case ARM::VLD2d16wb_fixed: |
4420 | 19 | case ARM::VLD2d32wb_fixed: |
4421 | 19 | case ARM::VLD2q8PseudoWB_fixed: |
4422 | 19 | case ARM::VLD2q16PseudoWB_fixed: |
4423 | 19 | case ARM::VLD2q32PseudoWB_fixed: |
4424 | 19 | case ARM::VLD2d8wb_register: |
4425 | 19 | case ARM::VLD2d16wb_register: |
4426 | 19 | case ARM::VLD2d32wb_register: |
4427 | 19 | case ARM::VLD2q8PseudoWB_register: |
4428 | 19 | case ARM::VLD2q16PseudoWB_register: |
4429 | 19 | case ARM::VLD2q32PseudoWB_register: |
4430 | 19 | case ARM::VLD3d8Pseudo: |
4431 | 19 | case ARM::VLD3d16Pseudo: |
4432 | 19 | case ARM::VLD3d32Pseudo: |
4433 | 19 | case ARM::VLD1d8TPseudo: |
4434 | 19 | case ARM::VLD1d16TPseudo: |
4435 | 19 | case ARM::VLD1d32TPseudo: |
4436 | 19 | case ARM::VLD1d64TPseudo: |
4437 | 19 | case ARM::VLD1d64TPseudoWB_fixed: |
4438 | 19 | case ARM::VLD1d64TPseudoWB_register: |
4439 | 19 | case ARM::VLD3d8Pseudo_UPD: |
4440 | 19 | case ARM::VLD3d16Pseudo_UPD: |
4441 | 19 | case ARM::VLD3d32Pseudo_UPD: |
4442 | 19 | case ARM::VLD3q8Pseudo_UPD: |
4443 | 19 | case ARM::VLD3q16Pseudo_UPD: |
4444 | 19 | case ARM::VLD3q32Pseudo_UPD: |
4445 | 19 | case ARM::VLD3q8oddPseudo: |
4446 | 19 | case ARM::VLD3q16oddPseudo: |
4447 | 19 | case ARM::VLD3q32oddPseudo: |
4448 | 19 | case ARM::VLD3q8oddPseudo_UPD: |
4449 | 19 | case ARM::VLD3q16oddPseudo_UPD: |
4450 | 19 | case ARM::VLD3q32oddPseudo_UPD: |
4451 | 19 | case ARM::VLD4d8Pseudo: |
4452 | 19 | case ARM::VLD4d16Pseudo: |
4453 | 19 | case ARM::VLD4d32Pseudo: |
4454 | 19 | case ARM::VLD1d8QPseudo: |
4455 | 19 | case ARM::VLD1d16QPseudo: |
4456 | 19 | case ARM::VLD1d32QPseudo: |
4457 | 19 | case ARM::VLD1d64QPseudo: |
4458 | 19 | case ARM::VLD1d64QPseudoWB_fixed: |
4459 | 19 | case ARM::VLD1d64QPseudoWB_register: |
4460 | 19 | case ARM::VLD1q8HighQPseudo: |
4461 | 19 | case ARM::VLD1q8LowQPseudo_UPD: |
4462 | 19 | case ARM::VLD1q8HighTPseudo: |
4463 | 19 | case ARM::VLD1q8LowTPseudo_UPD: |
4464 | 19 | case ARM::VLD1q16HighQPseudo: |
4465 | 19 | case ARM::VLD1q16LowQPseudo_UPD: |
4466 | 19 | case ARM::VLD1q16HighTPseudo: |
4467 | 19 | case ARM::VLD1q16LowTPseudo_UPD: |
4468 | 19 | case ARM::VLD1q32HighQPseudo: |
4469 | 19 | case ARM::VLD1q32LowQPseudo_UPD: |
4470 | 19 | case ARM::VLD1q32HighTPseudo: |
4471 | 19 | case ARM::VLD1q32LowTPseudo_UPD: |
4472 | 19 | case ARM::VLD1q64HighQPseudo: |
4473 | 19 | case ARM::VLD1q64LowQPseudo_UPD: |
4474 | 19 | case ARM::VLD1q64HighTPseudo: |
4475 | 19 | case ARM::VLD1q64LowTPseudo_UPD: |
4476 | 19 | case ARM::VLD4d8Pseudo_UPD: |
4477 | 19 | case ARM::VLD4d16Pseudo_UPD: |
4478 | 19 | case ARM::VLD4d32Pseudo_UPD: |
4479 | 19 | case ARM::VLD4q8Pseudo_UPD: |
4480 | 19 | case ARM::VLD4q16Pseudo_UPD: |
4481 | 19 | case ARM::VLD4q32Pseudo_UPD: |
4482 | 19 | case ARM::VLD4q8oddPseudo: |
4483 | 19 | case ARM::VLD4q16oddPseudo: |
4484 | 19 | case ARM::VLD4q32oddPseudo: |
4485 | 19 | case ARM::VLD4q8oddPseudo_UPD: |
4486 | 19 | case ARM::VLD4q16oddPseudo_UPD: |
4487 | 19 | case ARM::VLD4q32oddPseudo_UPD: |
4488 | 19 | case ARM::VLD1DUPq8: |
4489 | 19 | case ARM::VLD1DUPq16: |
4490 | 19 | case ARM::VLD1DUPq32: |
4491 | 19 | case ARM::VLD1DUPq8wb_fixed: |
4492 | 19 | case ARM::VLD1DUPq16wb_fixed: |
4493 | 19 | case ARM::VLD1DUPq32wb_fixed: |
4494 | 19 | case ARM::VLD1DUPq8wb_register: |
4495 | 19 | case ARM::VLD1DUPq16wb_register: |
4496 | 19 | case ARM::VLD1DUPq32wb_register: |
4497 | 19 | case ARM::VLD2DUPd8: |
4498 | 19 | case ARM::VLD2DUPd16: |
4499 | 19 | case ARM::VLD2DUPd32: |
4500 | 19 | case ARM::VLD2DUPd8wb_fixed: |
4501 | 19 | case ARM::VLD2DUPd16wb_fixed: |
4502 | 19 | case ARM::VLD2DUPd32wb_fixed: |
4503 | 19 | case ARM::VLD2DUPd8wb_register: |
4504 | 19 | case ARM::VLD2DUPd16wb_register: |
4505 | 19 | case ARM::VLD2DUPd32wb_register: |
4506 | 19 | case ARM::VLD2DUPq8EvenPseudo: |
4507 | 19 | case ARM::VLD2DUPq8OddPseudo: |
4508 | 19 | case ARM::VLD2DUPq16EvenPseudo: |
4509 | 19 | case ARM::VLD2DUPq16OddPseudo: |
4510 | 19 | case ARM::VLD2DUPq32EvenPseudo: |
4511 | 19 | case ARM::VLD2DUPq32OddPseudo: |
4512 | 19 | case ARM::VLD3DUPq8EvenPseudo: |
4513 | 19 | case ARM::VLD3DUPq8OddPseudo: |
4514 | 19 | case ARM::VLD3DUPq16EvenPseudo: |
4515 | 19 | case ARM::VLD3DUPq16OddPseudo: |
4516 | 19 | case ARM::VLD3DUPq32EvenPseudo: |
4517 | 19 | case ARM::VLD3DUPq32OddPseudo: |
4518 | 19 | case ARM::VLD4DUPd8Pseudo: |
4519 | 19 | case ARM::VLD4DUPd16Pseudo: |
4520 | 19 | case ARM::VLD4DUPd32Pseudo: |
4521 | 19 | case ARM::VLD4DUPd8Pseudo_UPD: |
4522 | 19 | case ARM::VLD4DUPd16Pseudo_UPD: |
4523 | 19 | case ARM::VLD4DUPd32Pseudo_UPD: |
4524 | 19 | case ARM::VLD4DUPq8EvenPseudo: |
4525 | 19 | case ARM::VLD4DUPq8OddPseudo: |
4526 | 19 | case ARM::VLD4DUPq16EvenPseudo: |
4527 | 19 | case ARM::VLD4DUPq16OddPseudo: |
4528 | 19 | case ARM::VLD4DUPq32EvenPseudo: |
4529 | 19 | case ARM::VLD4DUPq32OddPseudo: |
4530 | 19 | case ARM::VLD1LNq8Pseudo: |
4531 | 19 | case ARM::VLD1LNq16Pseudo: |
4532 | 19 | case ARM::VLD1LNq32Pseudo: |
4533 | 19 | case ARM::VLD1LNq8Pseudo_UPD: |
4534 | 19 | case ARM::VLD1LNq16Pseudo_UPD: |
4535 | 19 | case ARM::VLD1LNq32Pseudo_UPD: |
4536 | 19 | case ARM::VLD2LNd8Pseudo: |
4537 | 19 | case ARM::VLD2LNd16Pseudo: |
4538 | 19 | case ARM::VLD2LNd32Pseudo: |
4539 | 19 | case ARM::VLD2LNq16Pseudo: |
4540 | 19 | case ARM::VLD2LNq32Pseudo: |
4541 | 19 | case ARM::VLD2LNd8Pseudo_UPD: |
4542 | 19 | case ARM::VLD2LNd16Pseudo_UPD: |
4543 | 19 | case ARM::VLD2LNd32Pseudo_UPD: |
4544 | 19 | case ARM::VLD2LNq16Pseudo_UPD: |
4545 | 19 | case ARM::VLD2LNq32Pseudo_UPD: |
4546 | 19 | case ARM::VLD4LNd8Pseudo: |
4547 | 19 | case ARM::VLD4LNd16Pseudo: |
4548 | 19 | case ARM::VLD4LNd32Pseudo: |
4549 | 19 | case ARM::VLD4LNq16Pseudo: |
4550 | 19 | case ARM::VLD4LNq32Pseudo: |
4551 | 19 | case ARM::VLD4LNd8Pseudo_UPD: |
4552 | 19 | case ARM::VLD4LNd16Pseudo_UPD: |
4553 | 19 | case ARM::VLD4LNd32Pseudo_UPD: |
4554 | 19 | case ARM::VLD4LNq16Pseudo_UPD: |
4555 | 19 | case ARM::VLD4LNq32Pseudo_UPD: |
4556 | 19 | // If the address is not 64-bit aligned, the latencies of these |
4557 | 19 | // instructions increases by one. |
4558 | 19 | ++Latency; |
4559 | 19 | break; |
4560 | 137k | } |
4561 | 137k | |
4562 | 137k | return Latency; |
4563 | 137k | } |
4564 | | |
4565 | 305k | unsigned ARMBaseInstrInfo::getPredicationCost(const MachineInstr &MI) const { |
4566 | 305k | if (MI.isCopyLike() || MI.isInsertSubreg() || MI.isRegSequence() || |
4567 | 305k | MI.isImplicitDef()) |
4568 | 19 | return 0; |
4569 | 305k | |
4570 | 305k | if (MI.isBundle()) |
4571 | 37 | return 0; |
4572 | 305k | |
4573 | 305k | const MCInstrDesc &MCID = MI.getDesc(); |
4574 | 305k | |
4575 | 305k | if (MCID.isCall() || (284k MCID.hasImplicitDefOfPhysReg(ARM::CPSR)284k && |
4576 | 284k | !Subtarget.cheapPredicableCPSRDef()35.5k )) { |
4577 | 55.6k | // When predicated, CPSR is an additional source operand for CPSR updating |
4578 | 55.6k | // instructions, this apparently increases their latencies. |
4579 | 55.6k | return 1; |
4580 | 55.6k | } |
4581 | 249k | return 0; |
4582 | 249k | } |
4583 | | |
4584 | | unsigned ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, |
4585 | | const MachineInstr &MI, |
4586 | 653k | unsigned *PredCost) const { |
4587 | 653k | if (MI.isCopyLike() || MI.isInsertSubreg()653k || MI.isRegSequence()653k || |
4588 | 653k | MI.isImplicitDef()653k ) |
4589 | 96 | return 1; |
4590 | 653k | |
4591 | 653k | // An instruction scheduler typically runs on unbundled instructions, however |
4592 | 653k | // other passes may query the latency of a bundled instruction. |
4593 | 653k | if (MI.isBundle()) { |
4594 | 6.62k | unsigned Latency = 0; |
4595 | 6.62k | MachineBasicBlock::const_instr_iterator I = MI.getIterator(); |
4596 | 6.62k | MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end(); |
4597 | 21.8k | while (++I != E && I->isInsideBundle()21.4k ) { |
4598 | 15.2k | if (I->getOpcode() != ARM::t2IT) |
4599 | 8.72k | Latency += getInstrLatency(ItinData, *I, PredCost); |
4600 | 15.2k | } |
4601 | 6.62k | return Latency; |
4602 | 6.62k | } |
4603 | 646k | |
4604 | 646k | const MCInstrDesc &MCID = MI.getDesc(); |
4605 | 646k | if (PredCost && (0 MCID.isCall()0 || (0 MCID.hasImplicitDefOfPhysReg(ARM::CPSR)0 && |
4606 | 0 | !Subtarget.cheapPredicableCPSRDef()))) { |
4607 | 0 | // When predicated, CPSR is an additional source operand for CPSR updating |
4608 | 0 | // instructions, this apparently increases their latencies. |
4609 | 0 | *PredCost = 1; |
4610 | 0 | } |
4611 | 646k | // Be sure to call getStageLatency for an empty itinerary in case it has a |
4612 | 646k | // valid MinLatency property. |
4613 | 646k | if (!ItinData) |
4614 | 0 | return MI.mayLoad() ? 3 : 1; |
4615 | 646k | |
4616 | 646k | unsigned Class = MCID.getSchedClass(); |
4617 | 646k | |
4618 | 646k | // For instructions with variable uops, use uops as latency. |
4619 | 646k | if (!ItinData->isEmpty() && ItinData->getNumMicroOps(Class) < 0644k ) |
4620 | 14.1k | return getNumMicroOps(ItinData, MI); |
4621 | 632k | |
4622 | 632k | // For the common case, fall back on the itinerary's latency. |
4623 | 632k | unsigned Latency = ItinData->getStageLatency(Class); |
4624 | 632k | |
4625 | 632k | // Adjust for dynamic def-side opcode variants not captured by the itinerary. |
4626 | 632k | unsigned DefAlign = |
4627 | 632k | MI.hasOneMemOperand() ? (*MI.memoperands_begin())->getAlignment()104k : 0527k ; |
4628 | 632k | int Adj = adjustDefLatency(Subtarget, MI, MCID, DefAlign); |
4629 | 632k | if (Adj >= 0 || (int)Latency > -Adj1.33k ) { |
4630 | 632k | return Latency + Adj; |
4631 | 632k | } |
4632 | 32 | return Latency; |
4633 | 32 | } |
4634 | | |
4635 | | int ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, |
4636 | 440k | SDNode *Node) const { |
4637 | 440k | if (!Node->isMachineOpcode()) |
4638 | 0 | return 1; |
4639 | 440k | |
4640 | 440k | if (!ItinData || ItinData->isEmpty()) |
4641 | 0 | return 1; |
4642 | 440k | |
4643 | 440k | unsigned Opcode = Node->getMachineOpcode(); |
4644 | 440k | switch (Opcode) { |
4645 | 440k | default: |
4646 | 440k | return ItinData->getStageLatency(get(Opcode).getSchedClass()); |
4647 | 440k | case ARM::VLDMQIA: |
4648 | 2 | case ARM::VSTMQIA: |
4649 | 2 | return 2; |
4650 | 440k | } |
4651 | 440k | } |
4652 | | |
4653 | | bool ARMBaseInstrInfo::hasHighOperandLatency(const TargetSchedModel &SchedModel, |
4654 | | const MachineRegisterInfo *MRI, |
4655 | | const MachineInstr &DefMI, |
4656 | | unsigned DefIdx, |
4657 | | const MachineInstr &UseMI, |
4658 | 897 | unsigned UseIdx) const { |
4659 | 897 | unsigned DDomain = DefMI.getDesc().TSFlags & ARMII::DomainMask; |
4660 | 897 | unsigned UDomain = UseMI.getDesc().TSFlags & ARMII::DomainMask; |
4661 | 897 | if (Subtarget.nonpipelinedVFP() && |
4662 | 897 | (19 DDomain == ARMII::DomainVFP19 || UDomain == ARMII::DomainVFP19 )) |
4663 | 0 | return true; |
4664 | 897 | |
4665 | 897 | // Hoist VFP / NEON instructions with 4 or higher latency. |
4666 | 897 | unsigned Latency = |
4667 | 897 | SchedModel.computeOperandLatency(&DefMI, DefIdx, &UseMI, UseIdx); |
4668 | 897 | if (Latency <= 3) |
4669 | 878 | return false; |
4670 | 19 | return DDomain == ARMII::DomainVFP || DDomain == ARMII::DomainNEON || |
4671 | 19 | UDomain == ARMII::DomainVFP9 || UDomain == ARMII::DomainNEON9 ; |
4672 | 19 | } |
4673 | | |
4674 | | bool ARMBaseInstrInfo::hasLowDefLatency(const TargetSchedModel &SchedModel, |
4675 | | const MachineInstr &DefMI, |
4676 | 4.42k | unsigned DefIdx) const { |
4677 | 4.42k | const InstrItineraryData *ItinData = SchedModel.getInstrItineraries(); |
4678 | 4.42k | if (!ItinData || ItinData->isEmpty()3.09k ) |
4679 | 1.32k | return false; |
4680 | 3.09k | |
4681 | 3.09k | unsigned DDomain = DefMI.getDesc().TSFlags & ARMII::DomainMask; |
4682 | 3.09k | if (DDomain == ARMII::DomainGeneral) { |
4683 | 3.05k | unsigned DefClass = DefMI.getDesc().getSchedClass(); |
4684 | 3.05k | int DefCycle = ItinData->getOperandCycle(DefClass, DefIdx); |
4685 | 3.05k | return (DefCycle != -1 && DefCycle <= 23.04k ); |
4686 | 3.05k | } |
4687 | 46 | return false; |
4688 | 46 | } |
4689 | | |
4690 | | bool ARMBaseInstrInfo::verifyInstruction(const MachineInstr &MI, |
4691 | 1.80M | StringRef &ErrInfo) const { |
4692 | 1.80M | if (convertAddSubFlagsOpcode(MI.getOpcode())) { |
4693 | 0 | ErrInfo = "Pseudo flag setting opcodes only exist in Selection DAG"; |
4694 | 0 | return false; |
4695 | 0 | } |
4696 | 1.80M | if (MI.getOpcode() == ARM::tMOVr && !Subtarget.hasV6Ops()25.2k ) { |
4697 | 3.08k | // Make sure we don't generate a lo-lo mov that isn't supported. |
4698 | 3.08k | if (!ARM::hGPRRegClass.contains(MI.getOperand(0).getReg()) && |
4699 | 3.08k | !ARM::hGPRRegClass.contains(MI.getOperand(1).getReg())1.93k ) { |
4700 | 1 | ErrInfo = "Non-flag-setting Thumb1 mov is v6-only"; |
4701 | 1 | return false; |
4702 | 1 | } |
4703 | 1.80M | } |
4704 | 1.80M | if (MI.getOpcode() == ARM::tPUSH || |
4705 | 1.80M | MI.getOpcode() == ARM::tPOP1.79M || |
4706 | 1.80M | MI.getOpcode() == ARM::tPOP_RET1.79M ) { |
4707 | 83.9k | for (int i = 2, e = MI.getNumOperands(); i < e; ++i68.3k ) { |
4708 | 68.3k | if (MI.getOperand(i).isImplicit() || |
4709 | 68.3k | !MI.getOperand(i).isReg()39.7k ) |
4710 | 28.5k | continue; |
4711 | 39.7k | unsigned Reg = MI.getOperand(i).getReg(); |
4712 | 39.7k | if (Reg < ARM::R0 || Reg > ARM::R726.9k ) { |
4713 | 12.8k | if (!(MI.getOpcode() == ARM::tPUSH && Reg == ARM::LR6.85k ) && |
4714 | 12.8k | !(5.96k MI.getOpcode() == ARM::tPOP_RET5.96k && Reg == ARM::PC5.96k )) { |
4715 | 1 | ErrInfo = "Unsupported register in Thumb1 push/pop"; |
4716 | 1 | return false; |
4717 | 1 | } |
4718 | 12.8k | } |
4719 | 39.7k | } |
4720 | 15.5k | } |
4721 | 1.80M | return true1.80M ; |
4722 | 1.80M | } |
4723 | | |
4724 | | // LoadStackGuard has so far only been implemented for MachO. Different code |
4725 | | // sequence is needed for other targets. |
4726 | | void ARMBaseInstrInfo::expandLoadStackGuardBase(MachineBasicBlock::iterator MI, |
4727 | | unsigned LoadImmOpc, |
4728 | 150 | unsigned LoadOpc) const { |
4729 | 150 | assert(!Subtarget.isROPI() && !Subtarget.isRWPI() && |
4730 | 150 | "ROPI/RWPI not currently supported with stack guard"); |
4731 | 150 | |
4732 | 150 | MachineBasicBlock &MBB = *MI->getParent(); |
4733 | 150 | DebugLoc DL = MI->getDebugLoc(); |
4734 | 150 | unsigned Reg = MI->getOperand(0).getReg(); |
4735 | 150 | const GlobalValue *GV = |
4736 | 150 | cast<GlobalValue>((*MI->memoperands_begin())->getValue()); |
4737 | 150 | MachineInstrBuilder MIB; |
4738 | 150 | |
4739 | 150 | BuildMI(MBB, MI, DL, get(LoadImmOpc), Reg) |
4740 | 150 | .addGlobalAddress(GV, 0, ARMII::MO_NONLAZY); |
4741 | 150 | |
4742 | 150 | if (Subtarget.isGVIndirectSymbol(GV)) { |
4743 | 142 | MIB = BuildMI(MBB, MI, DL, get(LoadOpc), Reg); |
4744 | 142 | MIB.addReg(Reg, RegState::Kill).addImm(0); |
4745 | 142 | auto Flags = MachineMemOperand::MOLoad | |
4746 | 142 | MachineMemOperand::MODereferenceable | |
4747 | 142 | MachineMemOperand::MOInvariant; |
4748 | 142 | MachineMemOperand *MMO = MBB.getParent()->getMachineMemOperand( |
4749 | 142 | MachinePointerInfo::getGOT(*MBB.getParent()), Flags, 4, 4); |
4750 | 142 | MIB.addMemOperand(MMO).add(predOps(ARMCC::AL)); |
4751 | 142 | } |
4752 | 150 | |
4753 | 150 | MIB = BuildMI(MBB, MI, DL, get(LoadOpc), Reg); |
4754 | 150 | MIB.addReg(Reg, RegState::Kill) |
4755 | 150 | .addImm(0) |
4756 | 150 | .cloneMemRefs(*MI) |
4757 | 150 | .add(predOps(ARMCC::AL)); |
4758 | 150 | } |
4759 | | |
4760 | | bool |
4761 | | ARMBaseInstrInfo::isFpMLxInstruction(unsigned Opcode, unsigned &MulOpc, |
4762 | | unsigned &AddSubOpc, |
4763 | 563 | bool &NegAcc, bool &HasLane) const { |
4764 | 563 | DenseMap<unsigned, unsigned>::const_iterator I = MLxEntryMap.find(Opcode); |
4765 | 563 | if (I == MLxEntryMap.end()) |
4766 | 547 | return false; |
4767 | 16 | |
4768 | 16 | const ARM_MLxEntry &Entry = ARM_MLxTable[I->second]; |
4769 | 16 | MulOpc = Entry.MulOpc; |
4770 | 16 | AddSubOpc = Entry.AddSubOpc; |
4771 | 16 | NegAcc = Entry.NegAcc; |
4772 | 16 | HasLane = Entry.HasLane; |
4773 | 16 | return true; |
4774 | 16 | } |
4775 | | |
4776 | | //===----------------------------------------------------------------------===// |
4777 | | // Execution domains. |
4778 | | //===----------------------------------------------------------------------===// |
4779 | | // |
4780 | | // Some instructions go down the NEON pipeline, some go down the VFP pipeline, |
4781 | | // and some can go down both. The vmov instructions go down the VFP pipeline, |
4782 | | // but they can be changed to vorr equivalents that are executed by the NEON |
4783 | | // pipeline. |
4784 | | // |
4785 | | // We use the following execution domain numbering: |
4786 | | // |
4787 | | enum ARMExeDomain { |
4788 | | ExeGeneric = 0, |
4789 | | ExeVFP = 1, |
4790 | | ExeNEON = 2 |
4791 | | }; |
4792 | | |
4793 | | // |
4794 | | // Also see ARMInstrFormats.td and Domain* enums in ARMBaseInfo.h |
4795 | | // |
4796 | | std::pair<uint16_t, uint16_t> |
4797 | 657k | ARMBaseInstrInfo::getExecutionDomain(const MachineInstr &MI) const { |
4798 | 657k | // If we don't have access to NEON instructions then we won't be able |
4799 | 657k | // to swizzle anything to the NEON domain. Check to make sure. |
4800 | 657k | if (Subtarget.hasNEON()) { |
4801 | 566k | // VMOVD, VMOVRS and VMOVSR are VFP instructions, but can be changed to NEON |
4802 | 566k | // if they are not predicated. |
4803 | 566k | if (MI.getOpcode() == ARM::VMOVD && !isPredicated(MI)616 ) |
4804 | 616 | return std::make_pair(ExeVFP, (1 << ExeVFP) | (1 << ExeNEON)); |
4805 | 565k | |
4806 | 565k | // CortexA9 is particularly picky about mixing the two and wants these |
4807 | 565k | // converted. |
4808 | 565k | if (Subtarget.useNEONForFPMovs() && !isPredicated(MI)1.50k && |
4809 | 565k | (1.46k MI.getOpcode() == ARM::VMOVRS1.46k || MI.getOpcode() == ARM::VMOVSR1.40k || |
4810 | 1.46k | MI.getOpcode() == ARM::VMOVS1.32k )) |
4811 | 153 | return std::make_pair(ExeVFP, (1 << ExeVFP) | (1 << ExeNEON)); |
4812 | 656k | } |
4813 | 656k | // No other instructions can be swizzled, so just determine their domain. |
4814 | 656k | unsigned Domain = MI.getDesc().TSFlags & ARMII::DomainMask; |
4815 | 656k | |
4816 | 656k | if (Domain & ARMII::DomainNEON) |
4817 | 26.5k | return std::make_pair(ExeNEON, 0); |
4818 | 629k | |
4819 | 629k | // Certain instructions can go either way on Cortex-A8. |
4820 | 629k | // Treat them as NEON instructions. |
4821 | 629k | if ((Domain & ARMII::DomainNEONA8) && Subtarget.isCortexA8()1.82k ) |
4822 | 128 | return std::make_pair(ExeNEON, 0); |
4823 | 629k | |
4824 | 629k | if (Domain & ARMII::DomainVFP) |
4825 | 10.0k | return std::make_pair(ExeVFP, 0); |
4826 | 619k | |
4827 | 619k | return std::make_pair(ExeGeneric, 0); |
4828 | 619k | } |
4829 | | |
4830 | | static unsigned getCorrespondingDRegAndLane(const TargetRegisterInfo *TRI, |
4831 | 69 | unsigned SReg, unsigned &Lane) { |
4832 | 69 | unsigned DReg = TRI->getMatchingSuperReg(SReg, ARM::ssub_0, &ARM::DPRRegClass); |
4833 | 69 | Lane = 0; |
4834 | 69 | |
4835 | 69 | if (DReg != ARM::NoRegister) |
4836 | 45 | return DReg; |
4837 | 24 | |
4838 | 24 | Lane = 1; |
4839 | 24 | DReg = TRI->getMatchingSuperReg(SReg, ARM::ssub_1, &ARM::DPRRegClass); |
4840 | 24 | |
4841 | 24 | assert(DReg && "S-register with no D super-register?"); |
4842 | 24 | return DReg; |
4843 | 24 | } |
4844 | | |
4845 | | /// getImplicitSPRUseForDPRUse - Given a use of a DPR register and lane, |
4846 | | /// set ImplicitSReg to a register number that must be marked as implicit-use or |
4847 | | /// zero if no register needs to be defined as implicit-use. |
4848 | | /// |
4849 | | /// If the function cannot determine if an SPR should be marked implicit use or |
4850 | | /// not, it returns false. |
4851 | | /// |
4852 | | /// This function handles cases where an instruction is being modified from taking |
4853 | | /// an SPR to a DPR[Lane]. A use of the DPR is being added, which may conflict |
4854 | | /// with an earlier def of an SPR corresponding to DPR[Lane^1] (i.e. the other |
4855 | | /// lane of the DPR). |
4856 | | /// |
4857 | | /// If the other SPR is defined, an implicit-use of it should be added. Else, |
4858 | | /// (including the case where the DPR itself is defined), it should not. |
4859 | | /// |
4860 | | static bool getImplicitSPRUseForDPRUse(const TargetRegisterInfo *TRI, |
4861 | | MachineInstr &MI, unsigned DReg, |
4862 | 21 | unsigned Lane, unsigned &ImplicitSReg) { |
4863 | 21 | // If the DPR is defined or used already, the other SPR lane will be chained |
4864 | 21 | // correctly, so there is nothing to be done. |
4865 | 21 | if (MI.definesRegister(DReg, TRI) || MI.readsRegister(DReg, TRI)10 ) { |
4866 | 21 | ImplicitSReg = 0; |
4867 | 21 | return true; |
4868 | 21 | } |
4869 | 0 | |
4870 | 0 | // Otherwise we need to go searching to see if the SPR is set explicitly. |
4871 | 0 | ImplicitSReg = TRI->getSubReg(DReg, |
4872 | 0 | (Lane & 1) ? ARM::ssub_0 : ARM::ssub_1); |
4873 | 0 | MachineBasicBlock::LivenessQueryResult LQR = |
4874 | 0 | MI.getParent()->computeRegisterLiveness(TRI, ImplicitSReg, MI); |
4875 | 0 |
|
4876 | 0 | if (LQR == MachineBasicBlock::LQR_Live) |
4877 | 0 | return true; |
4878 | 0 | else if (LQR == MachineBasicBlock::LQR_Unknown) |
4879 | 0 | return false; |
4880 | 0 | |
4881 | 0 | // If the register is known not to be live, there is no need to add an |
4882 | 0 | // implicit-use. |
4883 | 0 | ImplicitSReg = 0; |
4884 | 0 | return true; |
4885 | 0 | } |
4886 | | |
4887 | | void ARMBaseInstrInfo::setExecutionDomain(MachineInstr &MI, |
4888 | 769 | unsigned Domain) const { |
4889 | 769 | unsigned DstReg, SrcReg, DReg; |
4890 | 769 | unsigned Lane; |
4891 | 769 | MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI); |
4892 | 769 | const TargetRegisterInfo *TRI = &getRegisterInfo(); |
4893 | 769 | switch (MI.getOpcode()) { |
4894 | 769 | default: |
4895 | 0 | llvm_unreachable("cannot handle opcode!"); |
4896 | 769 | break0 ; |
4897 | 769 | case ARM::VMOVD: |
4898 | 616 | if (Domain != ExeNEON) |
4899 | 270 | break; |
4900 | 346 | |
4901 | 346 | // Zap the predicate operands. |
4902 | 346 | assert(!isPredicated(MI) && "Cannot predicate a VORRd"); |
4903 | 346 | |
4904 | 346 | // Make sure we've got NEON instructions. |
4905 | 346 | assert(Subtarget.hasNEON() && "VORRd requires NEON"); |
4906 | 346 | |
4907 | 346 | // Source instruction is %DDst = VMOVD %DSrc, 14, %noreg (; implicits) |
4908 | 346 | DstReg = MI.getOperand(0).getReg(); |
4909 | 346 | SrcReg = MI.getOperand(1).getReg(); |
4910 | 346 | |
4911 | 1.73k | for (unsigned i = MI.getDesc().getNumOperands(); i; --i1.38k ) |
4912 | 1.38k | MI.RemoveOperand(i - 1); |
4913 | 346 | |
4914 | 346 | // Change to a %DDst = VORRd %DSrc, %DSrc, 14, %noreg (; implicits) |
4915 | 346 | MI.setDesc(get(ARM::VORRd)); |
4916 | 346 | MIB.addReg(DstReg, RegState::Define) |
4917 | 346 | .addReg(SrcReg) |
4918 | 346 | .addReg(SrcReg) |
4919 | 346 | .add(predOps(ARMCC::AL)); |
4920 | 346 | break; |
4921 | 346 | case ARM::VMOVRS: |
4922 | 59 | if (Domain != ExeNEON) |
4923 | 22 | break; |
4924 | 37 | assert(!isPredicated(MI) && "Cannot predicate a VGETLN"); |
4925 | 37 | |
4926 | 37 | // Source instruction is %RDst = VMOVRS %SSrc, 14, %noreg (; implicits) |
4927 | 37 | DstReg = MI.getOperand(0).getReg(); |
4928 | 37 | SrcReg = MI.getOperand(1).getReg(); |
4929 | 37 | |
4930 | 185 | for (unsigned i = MI.getDesc().getNumOperands(); i; --i148 ) |
4931 | 148 | MI.RemoveOperand(i - 1); |
4932 | 37 | |
4933 | 37 | DReg = getCorrespondingDRegAndLane(TRI, SrcReg, Lane); |
4934 | 37 | |
4935 | 37 | // Convert to %RDst = VGETLNi32 %DSrc, Lane, 14, %noreg (; imps) |
4936 | 37 | // Note that DSrc has been widened and the other lane may be undef, which |
4937 | 37 | // contaminates the entire register. |
4938 | 37 | MI.setDesc(get(ARM::VGETLNi32)); |
4939 | 37 | MIB.addReg(DstReg, RegState::Define) |
4940 | 37 | .addReg(DReg, RegState::Undef) |
4941 | 37 | .addImm(Lane) |
4942 | 37 | .add(predOps(ARMCC::AL)); |
4943 | 37 | |
4944 | 37 | // The old source should be an implicit use, otherwise we might think it |
4945 | 37 | // was dead before here. |
4946 | 37 | MIB.addReg(SrcReg, RegState::Implicit); |
4947 | 37 | break; |
4948 | 80 | case ARM::VMOVSR: { |
4949 | 80 | if (Domain != ExeNEON) |
4950 | 70 | break; |
4951 | 10 | assert(!isPredicated(MI) && "Cannot predicate a VSETLN"); |
4952 | 10 | |
4953 | 10 | // Source instruction is %SDst = VMOVSR %RSrc, 14, %noreg (; implicits) |
4954 | 10 | DstReg = MI.getOperand(0).getReg(); |
4955 | 10 | SrcReg = MI.getOperand(1).getReg(); |
4956 | 10 | |
4957 | 10 | DReg = getCorrespondingDRegAndLane(TRI, DstReg, Lane); |
4958 | 10 | |
4959 | 10 | unsigned ImplicitSReg; |
4960 | 10 | if (!getImplicitSPRUseForDPRUse(TRI, MI, DReg, Lane, ImplicitSReg)) |
4961 | 0 | break; |
4962 | 10 | |
4963 | 50 | for (unsigned i = MI.getDesc().getNumOperands(); 10 i; --i40 ) |
4964 | 40 | MI.RemoveOperand(i - 1); |
4965 | 10 | |
4966 | 10 | // Convert to %DDst = VSETLNi32 %DDst, %RSrc, Lane, 14, %noreg (; imps) |
4967 | 10 | // Again DDst may be undefined at the beginning of this instruction. |
4968 | 10 | MI.setDesc(get(ARM::VSETLNi32)); |
4969 | 10 | MIB.addReg(DReg, RegState::Define) |
4970 | 10 | .addReg(DReg, getUndefRegState(!MI.readsRegister(DReg, TRI))) |
4971 | 10 | .addReg(SrcReg) |
4972 | 10 | .addImm(Lane) |
4973 | 10 | .add(predOps(ARMCC::AL)); |
4974 | 10 | |
4975 | 10 | // The narrower destination must be marked as set to keep previous chains |
4976 | 10 | // in place. |
4977 | 10 | MIB.addReg(DstReg, RegState::Define | RegState::Implicit); |
4978 | 10 | if (ImplicitSReg != 0) |
4979 | 0 | MIB.addReg(ImplicitSReg, RegState::Implicit); |
4980 | 10 | break; |
4981 | 10 | } |
4982 | 14 | case ARM::VMOVS: { |
4983 | 14 | if (Domain != ExeNEON) |
4984 | 3 | break; |
4985 | 11 | |
4986 | 11 | // Source instruction is %SDst = VMOVS %SSrc, 14, %noreg (; implicits) |
4987 | 11 | DstReg = MI.getOperand(0).getReg(); |
4988 | 11 | SrcReg = MI.getOperand(1).getReg(); |
4989 | 11 | |
4990 | 11 | unsigned DstLane = 0, SrcLane = 0, DDst, DSrc; |
4991 | 11 | DDst = getCorrespondingDRegAndLane(TRI, DstReg, DstLane); |
4992 | 11 | DSrc = getCorrespondingDRegAndLane(TRI, SrcReg, SrcLane); |
4993 | 11 | |
4994 | 11 | unsigned ImplicitSReg; |
4995 | 11 | if (!getImplicitSPRUseForDPRUse(TRI, MI, DSrc, SrcLane, ImplicitSReg)) |
4996 | 0 | break; |
4997 | 11 | |
4998 | 55 | for (unsigned i = MI.getDesc().getNumOperands(); 11 i; --i44 ) |
4999 | 44 | MI.RemoveOperand(i - 1); |
5000 | 11 | |
5001 | 11 | if (DSrc == DDst) { |
5002 | 1 | // Destination can be: |
5003 | 1 | // %DDst = VDUPLN32d %DDst, Lane, 14, %noreg (; implicits) |
5004 | 1 | MI.setDesc(get(ARM::VDUPLN32d)); |
5005 | 1 | MIB.addReg(DDst, RegState::Define) |
5006 | 1 | .addReg(DDst, getUndefRegState(!MI.readsRegister(DDst, TRI))) |
5007 | 1 | .addImm(SrcLane) |
5008 | 1 | .add(predOps(ARMCC::AL)); |
5009 | 1 | |
5010 | 1 | // Neither the source or the destination are naturally represented any |
5011 | 1 | // more, so add them in manually. |
5012 | 1 | MIB.addReg(DstReg, RegState::Implicit | RegState::Define); |
5013 | 1 | MIB.addReg(SrcReg, RegState::Implicit); |
5014 | 1 | if (ImplicitSReg != 0) |
5015 | 0 | MIB.addReg(ImplicitSReg, RegState::Implicit); |
5016 | 1 | break; |
5017 | 1 | } |
5018 | 10 | |
5019 | 10 | // In general there's no single instruction that can perform an S <-> S |
5020 | 10 | // move in NEON space, but a pair of VEXT instructions *can* do the |
5021 | 10 | // job. It turns out that the VEXTs needed will only use DSrc once, with |
5022 | 10 | // the position based purely on the combination of lane-0 and lane-1 |
5023 | 10 | // involved. For example |
5024 | 10 | // vmov s0, s2 -> vext.32 d0, d0, d1, #1 vext.32 d0, d0, d0, #1 |
5025 | 10 | // vmov s1, s3 -> vext.32 d0, d1, d0, #1 vext.32 d0, d0, d0, #1 |
5026 | 10 | // vmov s0, s3 -> vext.32 d0, d0, d0, #1 vext.32 d0, d1, d0, #1 |
5027 | 10 | // vmov s1, s2 -> vext.32 d0, d0, d0, #1 vext.32 d0, d0, d1, #1 |
5028 | 10 | // |
5029 | 10 | // Pattern of the MachineInstrs is: |
5030 | 10 | // %DDst = VEXTd32 %DSrc1, %DSrc2, Lane, 14, %noreg (;implicits) |
5031 | 10 | MachineInstrBuilder NewMIB; |
5032 | 10 | NewMIB = BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(ARM::VEXTd32), |
5033 | 10 | DDst); |
5034 | 10 | |
5035 | 10 | // On the first instruction, both DSrc and DDst may be undef if present. |
5036 | 10 | // Specifically when the original instruction didn't have them as an |
5037 | 10 | // <imp-use>. |
5038 | 10 | unsigned CurReg = SrcLane == 1 && DstLane == 12 ? DSrc1 : DDst9 ; |
5039 | 10 | bool CurUndef = !MI.readsRegister(CurReg, TRI); |
5040 | 10 | NewMIB.addReg(CurReg, getUndefRegState(CurUndef)); |
5041 | 10 | |
5042 | 10 | CurReg = SrcLane == 0 && DstLane == 08 ? DSrc6 : DDst4 ; |
5043 | 10 | CurUndef = !MI.readsRegister(CurReg, TRI); |
5044 | 10 | NewMIB.addReg(CurReg, getUndefRegState(CurUndef)) |
5045 | 10 | .addImm(1) |
5046 | 10 | .add(predOps(ARMCC::AL)); |
5047 | 10 | |
5048 | 10 | if (SrcLane == DstLane) |
5049 | 7 | NewMIB.addReg(SrcReg, RegState::Implicit); |
5050 | 10 | |
5051 | 10 | MI.setDesc(get(ARM::VEXTd32)); |
5052 | 10 | MIB.addReg(DDst, RegState::Define); |
5053 | 10 | |
5054 | 10 | // On the second instruction, DDst has definitely been defined above, so |
5055 | 10 | // it is not undef. DSrc, if present, can be undef as above. |
5056 | 10 | CurReg = SrcLane == 1 && DstLane == 02 ? DSrc1 : DDst9 ; |
5057 | 10 | CurUndef = CurReg == DSrc && !MI.readsRegister(CurReg, TRI)1 ; |
5058 | 10 | MIB.addReg(CurReg, getUndefRegState(CurUndef)); |
5059 | 10 | |
5060 | 10 | CurReg = SrcLane == 0 && DstLane == 18 ? DSrc2 : DDst8 ; |
5061 | 10 | CurUndef = CurReg == DSrc && !MI.readsRegister(CurReg, TRI)2 ; |
5062 | 10 | MIB.addReg(CurReg, getUndefRegState(CurUndef)) |
5063 | 10 | .addImm(1) |
5064 | 10 | .add(predOps(ARMCC::AL)); |
5065 | 10 | |
5066 | 10 | if (SrcLane != DstLane) |
5067 | 3 | MIB.addReg(SrcReg, RegState::Implicit); |
5068 | 10 | |
5069 | 10 | // As before, the original destination is no longer represented, add it |
5070 | 10 | // implicitly. |
5071 | 10 | MIB.addReg(DstReg, RegState::Define | RegState::Implicit); |
5072 | 10 | if (ImplicitSReg != 0) |
5073 | 0 | MIB.addReg(ImplicitSReg, RegState::Implicit); |
5074 | 10 | break; |
5075 | 10 | } |
5076 | 769 | } |
5077 | 769 | } |
5078 | | |
5079 | | //===----------------------------------------------------------------------===// |
5080 | | // Partial register updates |
5081 | | //===----------------------------------------------------------------------===// |
5082 | | // |
5083 | | // Swift renames NEON registers with 64-bit granularity. That means any |
5084 | | // instruction writing an S-reg implicitly reads the containing D-reg. The |
5085 | | // problem is mostly avoided by translating f32 operations to v2f32 operations |
5086 | | // on D-registers, but f32 loads are still a problem. |
5087 | | // |
5088 | | // These instructions can load an f32 into a NEON register: |
5089 | | // |
5090 | | // VLDRS - Only writes S, partial D update. |
5091 | | // VLD1LNd32 - Writes all D-regs, explicit partial D update, 2 uops. |
5092 | | // VLD1DUPd32 - Writes all D-regs, no partial reg update, 2 uops. |
5093 | | // |
5094 | | // FCONSTD can be used as a dependency-breaking instruction. |
5095 | | unsigned ARMBaseInstrInfo::getPartialRegUpdateClearance( |
5096 | | const MachineInstr &MI, unsigned OpNum, |
5097 | 495k | const TargetRegisterInfo *TRI) const { |
5098 | 495k | auto PartialUpdateClearance = Subtarget.getPartialUpdateClearance(); |
5099 | 495k | if (!PartialUpdateClearance) |
5100 | 384k | return 0; |
5101 | 111k | |
5102 | 111k | assert(TRI && "Need TRI instance"); |
5103 | 111k | |
5104 | 111k | const MachineOperand &MO = MI.getOperand(OpNum); |
5105 | 111k | if (MO.readsReg()) |
5106 | 0 | return 0; |
5107 | 111k | unsigned Reg = MO.getReg(); |
5108 | 111k | int UseOp = -1; |
5109 | 111k | |
5110 | 111k | switch (MI.getOpcode()) { |
5111 | 111k | // Normal instructions writing only an S-register. |
5112 | 111k | case ARM::VLDRS: |
5113 | 125 | case ARM::FCONSTS: |
5114 | 125 | case ARM::VMOVSR: |
5115 | 125 | case ARM::VMOVv8i8: |
5116 | 125 | case ARM::VMOVv4i16: |
5117 | 125 | case ARM::VMOVv2i32: |
5118 | 125 | case ARM::VMOVv2f32: |
5119 | 125 | case ARM::VMOVv1i64: |
5120 | 125 | UseOp = MI.findRegisterUseOperandIdx(Reg, false, TRI); |
5121 | 125 | break; |
5122 | 125 | |
5123 | 125 | // Explicitly reads the dependency. |
5124 | 125 | case ARM::VLD1LNd32: |
5125 | 24 | UseOp = 3; |
5126 | 24 | break; |
5127 | 111k | default: |
5128 | 111k | return 0; |
5129 | 149 | } |
5130 | 149 | |
5131 | 149 | // If this instruction actually reads a value from Reg, there is no unwanted |
5132 | 149 | // dependency. |
5133 | 149 | if (UseOp != -1 && MI.getOperand(UseOp).readsReg()24 ) |
5134 | 1 | return 0; |
5135 | 148 | |
5136 | 148 | // We must be able to clobber the whole D-reg. |
5137 | 148 | if (TargetRegisterInfo::isVirtualRegister(Reg)) { |
5138 | 0 | // Virtual register must be a def undef foo:ssub_0 operand. |
5139 | 0 | if (!MO.getSubReg() || MI.readsVirtualRegister(Reg)) |
5140 | 0 | return 0; |
5141 | 148 | } else if (ARM::SPRRegClass.contains(Reg)) { |
5142 | 30 | // Physical register: MI must define the full D-reg. |
5143 | 30 | unsigned DReg = TRI->getMatchingSuperReg(Reg, ARM::ssub_0, |
5144 | 30 | &ARM::DPRRegClass); |
5145 | 30 | if (!DReg || !MI.definesRegister(DReg, TRI)) |
5146 | 19 | return 0; |
5147 | 129 | } |
5148 | 129 | |
5149 | 129 | // MI has an unwanted D-register dependency. |
5150 | 129 | // Avoid defs in the previous N instructrions. |
5151 | 129 | return PartialUpdateClearance; |
5152 | 129 | } |
5153 | | |
5154 | | // Break a partial register dependency after getPartialRegUpdateClearance |
5155 | | // returned non-zero. |
5156 | | void ARMBaseInstrInfo::breakPartialRegDependency( |
5157 | 32 | MachineInstr &MI, unsigned OpNum, const TargetRegisterInfo *TRI) const { |
5158 | 32 | assert(OpNum < MI.getDesc().getNumDefs() && "OpNum is not a def"); |
5159 | 32 | assert(TRI && "Need TRI instance"); |
5160 | 32 | |
5161 | 32 | const MachineOperand &MO = MI.getOperand(OpNum); |
5162 | 32 | unsigned Reg = MO.getReg(); |
5163 | 32 | assert(TargetRegisterInfo::isPhysicalRegister(Reg) && |
5164 | 32 | "Can't break virtual register dependencies."); |
5165 | 32 | unsigned DReg = Reg; |
5166 | 32 | |
5167 | 32 | // If MI defines an S-reg, find the corresponding D super-register. |
5168 | 32 | if (ARM::SPRRegClass.contains(Reg)) { |
5169 | 2 | DReg = ARM::D0 + (Reg - ARM::S0) / 2; |
5170 | 2 | assert(TRI->isSuperRegister(Reg, DReg) && "Register enums broken"); |
5171 | 2 | } |
5172 | 32 | |
5173 | 32 | assert(ARM::DPRRegClass.contains(DReg) && "Can only break D-reg deps"); |
5174 | 32 | assert(MI.definesRegister(DReg, TRI) && "MI doesn't clobber full D-reg"); |
5175 | 32 | |
5176 | 32 | // FIXME: In some cases, VLDRS can be changed to a VLD1DUPd32 which defines |
5177 | 32 | // the full D-register by loading the same value to both lanes. The |
5178 | 32 | // instruction is micro-coded with 2 uops, so don't do this until we can |
5179 | 32 | // properly schedule micro-coded instructions. The dispatcher stalls cause |
5180 | 32 | // too big regressions. |
5181 | 32 | |
5182 | 32 | // Insert the dependency-breaking FCONSTD before MI. |
5183 | 32 | // 96 is the encoding of 0.5, but the actual value doesn't matter here. |
5184 | 32 | BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(ARM::FCONSTD), DReg) |
5185 | 32 | .addImm(96) |
5186 | 32 | .add(predOps(ARMCC::AL)); |
5187 | 32 | MI.addRegisterKilled(DReg, TRI, true); |
5188 | 32 | } |
5189 | | |
5190 | 16 | bool ARMBaseInstrInfo::hasNOP() const { |
5191 | 16 | return Subtarget.getFeatureBits()[ARM::HasV6KOps]; |
5192 | 16 | } |
5193 | | |
5194 | 19.0k | bool ARMBaseInstrInfo::isSwiftFastImmShift(const MachineInstr *MI) const { |
5195 | 19.0k | if (MI->getNumOperands() < 4) |
5196 | 0 | return true; |
5197 | 19.0k | unsigned ShOpVal = MI->getOperand(3).getImm(); |
5198 | 19.0k | unsigned ShImm = ARM_AM::getSORegOffset(ShOpVal); |
5199 | 19.0k | // Swift supports faster shifts for: lsl 2, lsl 1, and lsr 1. |
5200 | 19.0k | if ((ShImm == 1 && ARM_AM::getSORegShOp(ShOpVal) == ARM_AM::lsr3.91k ) || |
5201 | 19.0k | (18.8k (18.8k ShImm == 118.8k || ShImm == 215.1k ) && |
5202 | 18.8k | ARM_AM::getSORegShOp(ShOpVal) == ARM_AM::lsl9.29k )) |
5203 | 7.21k | return true; |
5204 | 11.8k | |
5205 | 11.8k | return false; |
5206 | 11.8k | } |
5207 | | |
5208 | | bool ARMBaseInstrInfo::getRegSequenceLikeInputs( |
5209 | | const MachineInstr &MI, unsigned DefIdx, |
5210 | 2.70k | SmallVectorImpl<RegSubRegPairAndIdx> &InputRegs) const { |
5211 | 2.70k | assert(DefIdx < MI.getDesc().getNumDefs() && "Invalid definition index"); |
5212 | 2.70k | assert(MI.isRegSequenceLike() && "Invalid kind of instruction"); |
5213 | 2.70k | |
5214 | 2.70k | switch (MI.getOpcode()) { |
5215 | 2.70k | case ARM::VMOVDRR: |
5216 | 2.70k | // dX = VMOVDRR rY, rZ |
5217 | 2.70k | // is the same as: |
5218 | 2.70k | // dX = REG_SEQUENCE rY, ssub_0, rZ, ssub_1 |
5219 | 2.70k | // Populate the InputRegs accordingly. |
5220 | 2.70k | // rY |
5221 | 2.70k | const MachineOperand *MOReg = &MI.getOperand(1); |
5222 | 2.70k | if (!MOReg->isUndef()) |
5223 | 2.70k | InputRegs.push_back(RegSubRegPairAndIdx(MOReg->getReg(), |
5224 | 2.70k | MOReg->getSubReg(), ARM::ssub_0)); |
5225 | 2.70k | // rZ |
5226 | 2.70k | MOReg = &MI.getOperand(2); |
5227 | 2.70k | if (!MOReg->isUndef()) |
5228 | 2.70k | InputRegs.push_back(RegSubRegPairAndIdx(MOReg->getReg(), |
5229 | 2.70k | MOReg->getSubReg(), ARM::ssub_1)); |
5230 | 2.70k | return true; |
5231 | 0 | } |
5232 | 0 | llvm_unreachable("Target dependent opcode missing"); |
5233 | 0 | } |
5234 | | |
5235 | | bool ARMBaseInstrInfo::getExtractSubregLikeInputs( |
5236 | | const MachineInstr &MI, unsigned DefIdx, |
5237 | 3.94k | RegSubRegPairAndIdx &InputReg) const { |
5238 | 3.94k | assert(DefIdx < MI.getDesc().getNumDefs() && "Invalid definition index"); |
5239 | 3.94k | assert(MI.isExtractSubregLike() && "Invalid kind of instruction"); |
5240 | 3.94k | |
5241 | 3.94k | switch (MI.getOpcode()) { |
5242 | 3.94k | case ARM::VMOVRRD: |
5243 | 3.94k | // rX, rY = VMOVRRD dZ |
5244 | 3.94k | // is the same as: |
5245 | 3.94k | // rX = EXTRACT_SUBREG dZ, ssub_0 |
5246 | 3.94k | // rY = EXTRACT_SUBREG dZ, ssub_1 |
5247 | 3.94k | const MachineOperand &MOReg = MI.getOperand(2); |
5248 | 3.94k | if (MOReg.isUndef()) |
5249 | 0 | return false; |
5250 | 3.94k | InputReg.Reg = MOReg.getReg(); |
5251 | 3.94k | InputReg.SubReg = MOReg.getSubReg(); |
5252 | 3.94k | InputReg.SubIdx = DefIdx == 0 ? ARM::ssub_03.69k : ARM::ssub_1247 ; |
5253 | 3.94k | return true; |
5254 | 0 | } |
5255 | 0 | llvm_unreachable("Target dependent opcode missing"); |
5256 | 0 | } |
5257 | | |
5258 | | bool ARMBaseInstrInfo::getInsertSubregLikeInputs( |
5259 | | const MachineInstr &MI, unsigned DefIdx, RegSubRegPair &BaseReg, |
5260 | 245 | RegSubRegPairAndIdx &InsertedReg) const { |
5261 | 245 | assert(DefIdx < MI.getDesc().getNumDefs() && "Invalid definition index"); |
5262 | 245 | assert(MI.isInsertSubregLike() && "Invalid kind of instruction"); |
5263 | 245 | |
5264 | 245 | switch (MI.getOpcode()) { |
5265 | 245 | case ARM::VSETLNi32: |
5266 | 245 | // dX = VSETLNi32 dY, rZ, imm |
5267 | 245 | const MachineOperand &MOBaseReg = MI.getOperand(1); |
5268 | 245 | const MachineOperand &MOInsertedReg = MI.getOperand(2); |
5269 | 245 | if (MOInsertedReg.isUndef()) |
5270 | 0 | return false; |
5271 | 245 | const MachineOperand &MOIndex = MI.getOperand(3); |
5272 | 245 | BaseReg.Reg = MOBaseReg.getReg(); |
5273 | 245 | BaseReg.SubReg = MOBaseReg.getSubReg(); |
5274 | 245 | |
5275 | 245 | InsertedReg.Reg = MOInsertedReg.getReg(); |
5276 | 245 | InsertedReg.SubReg = MOInsertedReg.getSubReg(); |
5277 | 245 | InsertedReg.SubIdx = MOIndex.getImm() == 0 ? ARM::ssub_0129 : ARM::ssub_1116 ; |
5278 | 245 | return true; |
5279 | 0 | } |
5280 | 0 | llvm_unreachable("Target dependent opcode missing"); |
5281 | 0 | } |
5282 | | |
5283 | | std::pair<unsigned, unsigned> |
5284 | 276 | ARMBaseInstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const { |
5285 | 276 | const unsigned Mask = ARMII::MO_OPTION_MASK; |
5286 | 276 | return std::make_pair(TF & Mask, TF & ~Mask); |
5287 | 276 | } |
5288 | | |
5289 | | ArrayRef<std::pair<unsigned, const char *>> |
5290 | 248 | ARMBaseInstrInfo::getSerializableDirectMachineOperandTargetFlags() const { |
5291 | 248 | using namespace ARMII; |
5292 | 248 | |
5293 | 248 | static const std::pair<unsigned, const char *> TargetFlags[] = { |
5294 | 248 | {MO_LO16, "arm-lo16"}, {MO_HI16, "arm-hi16"}}; |
5295 | 248 | return makeArrayRef(TargetFlags); |
5296 | 248 | } |
5297 | | |
5298 | | ArrayRef<std::pair<unsigned, const char *>> |
5299 | 34 | ARMBaseInstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const { |
5300 | 34 | using namespace ARMII; |
5301 | 34 | |
5302 | 34 | static const std::pair<unsigned, const char *> TargetFlags[] = { |
5303 | 34 | {MO_COFFSTUB, "arm-coffstub"}, |
5304 | 34 | {MO_GOT, "arm-got"}, |
5305 | 34 | {MO_SBREL, "arm-sbrel"}, |
5306 | 34 | {MO_DLLIMPORT, "arm-dllimport"}, |
5307 | 34 | {MO_SECREL, "arm-secrel"}, |
5308 | 34 | {MO_NONLAZY, "arm-nonlazy"}}; |
5309 | 34 | return makeArrayRef(TargetFlags); |
5310 | 34 | } |
5311 | | |
5312 | | bool llvm::registerDefinedBetween(unsigned Reg, |
5313 | | MachineBasicBlock::iterator From, |
5314 | | MachineBasicBlock::iterator To, |
5315 | 11.9k | const TargetRegisterInfo *TRI) { |
5316 | 13.1k | for (auto I = From; I != To; ++I1.21k ) |
5317 | 1.28k | if (I->modifiesRegister(Reg, TRI)) |
5318 | 70 | return true; |
5319 | 11.9k | return false11.8k ; |
5320 | 11.9k | } |
5321 | | |
5322 | | MachineInstr *llvm::findCMPToFoldIntoCBZ(MachineInstr *Br, |
5323 | 18.8k | const TargetRegisterInfo *TRI) { |
5324 | 18.8k | // Search backwards to the instruction that defines CSPR. This may or not |
5325 | 18.8k | // be a CMP, we check that after this loop. If we find another instruction |
5326 | 18.8k | // that reads cpsr, we return nullptr. |
5327 | 18.8k | MachineBasicBlock::iterator CmpMI = Br; |
5328 | 21.1k | while (CmpMI != Br->getParent()->begin()) { |
5329 | 20.9k | --CmpMI; |
5330 | 20.9k | if (CmpMI->modifiesRegister(ARM::CPSR, TRI)) |
5331 | 18.6k | break; |
5332 | 2.28k | if (CmpMI->readsRegister(ARM::CPSR, TRI)) |
5333 | 14 | break; |
5334 | 2.28k | } |
5335 | 18.8k | |
5336 | 18.8k | // Check that this inst is a CMP r[0-7], #0 and that the register |
5337 | 18.8k | // is not redefined between the cmp and the br. |
5338 | 18.8k | if (CmpMI->getOpcode() != ARM::tCMPi8 && CmpMI->getOpcode() != ARM::t2CMPri6.01k ) |
5339 | 4.11k | return nullptr; |
5340 | 14.7k | unsigned Reg = CmpMI->getOperand(0).getReg(); |
5341 | 14.7k | unsigned PredReg = 0; |
5342 | 14.7k | ARMCC::CondCodes Pred = getInstrPredicate(*CmpMI, PredReg); |
5343 | 14.7k | if (Pred != ARMCC::AL || CmpMI->getOperand(1).getImm() != 014.2k ) |
5344 | 2.12k | return nullptr; |
5345 | 12.6k | if (!isARMLowRegister(Reg)) |
5346 | 736 | return nullptr; |
5347 | 11.9k | if (registerDefinedBetween(Reg, CmpMI->getNextNode(), Br, TRI)) |
5348 | 70 | return nullptr; |
5349 | 11.8k | |
5350 | 11.8k | return &*CmpMI; |
5351 | 11.8k | } |