/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===-- ARMBaseInstrInfo.cpp - ARM Instruction Information ----------------===// |
2 | | // |
3 | | // The LLVM Compiler Infrastructure |
4 | | // |
5 | | // This file is distributed under the University of Illinois Open Source |
6 | | // License. See LICENSE.TXT for details. |
7 | | // |
8 | | //===----------------------------------------------------------------------===// |
9 | | // |
10 | | // This file contains the Base ARM implementation of the TargetInstrInfo class. |
11 | | // |
12 | | //===----------------------------------------------------------------------===// |
13 | | |
14 | | #include "ARMBaseInstrInfo.h" |
15 | | #include "ARMBaseRegisterInfo.h" |
16 | | #include "ARMConstantPoolValue.h" |
17 | | #include "ARMFeatures.h" |
18 | | #include "ARMHazardRecognizer.h" |
19 | | #include "ARMMachineFunctionInfo.h" |
20 | | #include "ARMSubtarget.h" |
21 | | #include "MCTargetDesc/ARMAddressingModes.h" |
22 | | #include "MCTargetDesc/ARMBaseInfo.h" |
23 | | #include "llvm/ADT/DenseMap.h" |
24 | | #include "llvm/ADT/STLExtras.h" |
25 | | #include "llvm/ADT/SmallSet.h" |
26 | | #include "llvm/ADT/SmallVector.h" |
27 | | #include "llvm/ADT/Triple.h" |
28 | | #include "llvm/CodeGen/LiveVariables.h" |
29 | | #include "llvm/CodeGen/MachineBasicBlock.h" |
30 | | #include "llvm/CodeGen/MachineConstantPool.h" |
31 | | #include "llvm/CodeGen/MachineFrameInfo.h" |
32 | | #include "llvm/CodeGen/MachineFunction.h" |
33 | | #include "llvm/CodeGen/MachineInstr.h" |
34 | | #include "llvm/CodeGen/MachineInstrBuilder.h" |
35 | | #include "llvm/CodeGen/MachineMemOperand.h" |
36 | | #include "llvm/CodeGen/MachineOperand.h" |
37 | | #include "llvm/CodeGen/MachineRegisterInfo.h" |
38 | | #include "llvm/CodeGen/ScoreboardHazardRecognizer.h" |
39 | | #include "llvm/CodeGen/SelectionDAGNodes.h" |
40 | | #include "llvm/CodeGen/TargetSchedule.h" |
41 | | #include "llvm/IR/Attributes.h" |
42 | | #include "llvm/IR/Constants.h" |
43 | | #include "llvm/IR/DebugLoc.h" |
44 | | #include "llvm/IR/Function.h" |
45 | | #include "llvm/IR/GlobalValue.h" |
46 | | #include "llvm/MC/MCAsmInfo.h" |
47 | | #include "llvm/MC/MCInstrDesc.h" |
48 | | #include "llvm/MC/MCInstrItineraries.h" |
49 | | #include "llvm/Support/BranchProbability.h" |
50 | | #include "llvm/Support/Casting.h" |
51 | | #include "llvm/Support/CommandLine.h" |
52 | | #include "llvm/Support/Compiler.h" |
53 | | #include "llvm/Support/Debug.h" |
54 | | #include "llvm/Support/ErrorHandling.h" |
55 | | #include "llvm/Support/raw_ostream.h" |
56 | | #include "llvm/Target/TargetInstrInfo.h" |
57 | | #include "llvm/Target/TargetMachine.h" |
58 | | #include "llvm/Target/TargetRegisterInfo.h" |
59 | | #include <algorithm> |
60 | | #include <cassert> |
61 | | #include <cstdint> |
62 | | #include <iterator> |
63 | | #include <new> |
64 | | #include <utility> |
65 | | #include <vector> |
66 | | |
67 | | using namespace llvm; |
68 | | |
69 | | #define DEBUG_TYPE "arm-instrinfo" |
70 | | |
71 | | #define GET_INSTRINFO_CTOR_DTOR |
72 | | #include "ARMGenInstrInfo.inc" |
73 | | |
74 | | static cl::opt<bool> |
75 | | EnableARM3Addr("enable-arm-3-addr-conv", cl::Hidden, |
76 | | cl::desc("Enable ARM 2-addr to 3-addr conv")); |
77 | | |
78 | | /// ARM_MLxEntry - Record information about MLA / MLS instructions. |
79 | | struct ARM_MLxEntry { |
80 | | uint16_t MLxOpc; // MLA / MLS opcode |
81 | | uint16_t MulOpc; // Expanded multiplication opcode |
82 | | uint16_t AddSubOpc; // Expanded add / sub opcode |
83 | | bool NegAcc; // True if the acc is negated before the add / sub. |
84 | | bool HasLane; // True if instruction has an extra "lane" operand. |
85 | | }; |
86 | | |
87 | | static const ARM_MLxEntry ARM_MLxTable[] = { |
88 | | // MLxOpc, MulOpc, AddSubOpc, NegAcc, HasLane |
89 | | // fp scalar ops |
90 | | { ARM::VMLAS, ARM::VMULS, ARM::VADDS, false, false }, |
91 | | { ARM::VMLSS, ARM::VMULS, ARM::VSUBS, false, false }, |
92 | | { ARM::VMLAD, ARM::VMULD, ARM::VADDD, false, false }, |
93 | | { ARM::VMLSD, ARM::VMULD, ARM::VSUBD, false, false }, |
94 | | { ARM::VNMLAS, ARM::VNMULS, ARM::VSUBS, true, false }, |
95 | | { ARM::VNMLSS, ARM::VMULS, ARM::VSUBS, true, false }, |
96 | | { ARM::VNMLAD, ARM::VNMULD, ARM::VSUBD, true, false }, |
97 | | { ARM::VNMLSD, ARM::VMULD, ARM::VSUBD, true, false }, |
98 | | |
99 | | // fp SIMD ops |
100 | | { ARM::VMLAfd, ARM::VMULfd, ARM::VADDfd, false, false }, |
101 | | { ARM::VMLSfd, ARM::VMULfd, ARM::VSUBfd, false, false }, |
102 | | { ARM::VMLAfq, ARM::VMULfq, ARM::VADDfq, false, false }, |
103 | | { ARM::VMLSfq, ARM::VMULfq, ARM::VSUBfq, false, false }, |
104 | | { ARM::VMLAslfd, ARM::VMULslfd, ARM::VADDfd, false, true }, |
105 | | { ARM::VMLSslfd, ARM::VMULslfd, ARM::VSUBfd, false, true }, |
106 | | { ARM::VMLAslfq, ARM::VMULslfq, ARM::VADDfq, false, true }, |
107 | | { ARM::VMLSslfq, ARM::VMULslfq, ARM::VSUBfq, false, true }, |
108 | | }; |
109 | | |
110 | | ARMBaseInstrInfo::ARMBaseInstrInfo(const ARMSubtarget& STI) |
111 | | : ARMGenInstrInfo(ARM::ADJCALLSTACKDOWN, ARM::ADJCALLSTACKUP), |
112 | 6.10k | Subtarget(STI) { |
113 | 103k | for (unsigned i = 0, e = array_lengthof(ARM_MLxTable); i != e103k ; ++i97.6k ) { |
114 | 97.6k | if (!MLxEntryMap.insert(std::make_pair(ARM_MLxTable[i].MLxOpc, i)).second) |
115 | 0 | llvm_unreachable("Duplicated entries?"); |
116 | 97.6k | MLxHazardOpcodes.insert(ARM_MLxTable[i].AddSubOpc); |
117 | 97.6k | MLxHazardOpcodes.insert(ARM_MLxTable[i].MulOpc); |
118 | 97.6k | } |
119 | 6.10k | } |
120 | | |
121 | | // Use a ScoreboardHazardRecognizer for prepass ARM scheduling. TargetInstrImpl |
122 | | // currently defaults to no prepass hazard recognizer. |
123 | | ScheduleHazardRecognizer * |
124 | | ARMBaseInstrInfo::CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI, |
125 | 34.7k | const ScheduleDAG *DAG) const { |
126 | 34.7k | if (usePreRAHazardRecognizer()34.7k ) { |
127 | 34.7k | const InstrItineraryData *II = |
128 | 34.7k | static_cast<const ARMSubtarget *>(STI)->getInstrItineraryData(); |
129 | 34.7k | return new ScoreboardHazardRecognizer(II, DAG, "pre-RA-sched"); |
130 | 34.7k | } |
131 | 0 | return TargetInstrInfo::CreateTargetHazardRecognizer(STI, DAG); |
132 | 0 | } |
133 | | |
134 | | ScheduleHazardRecognizer *ARMBaseInstrInfo:: |
135 | | CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, |
136 | 12.5k | const ScheduleDAG *DAG) const { |
137 | 12.5k | if (Subtarget.isThumb2() || 12.5k Subtarget.hasVFP2()6.04k ) |
138 | 11.1k | return (ScheduleHazardRecognizer *)new ARMHazardRecognizer(II, DAG); |
139 | 1.46k | return TargetInstrInfo::CreateTargetPostRAHazardRecognizer(II, DAG); |
140 | 1.46k | } |
141 | | |
142 | | MachineInstr *ARMBaseInstrInfo::convertToThreeAddress( |
143 | 0 | MachineFunction::iterator &MFI, MachineInstr &MI, LiveVariables *LV) const { |
144 | 0 | // FIXME: Thumb2 support. |
145 | 0 |
|
146 | 0 | if (!EnableARM3Addr) |
147 | 0 | return nullptr; |
148 | 0 |
|
149 | 0 | MachineFunction &MF = *MI.getParent()->getParent(); |
150 | 0 | uint64_t TSFlags = MI.getDesc().TSFlags; |
151 | 0 | bool isPre = false; |
152 | 0 | switch ((TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift) { |
153 | 0 | default: return nullptr; |
154 | 0 | case ARMII::IndexModePre: |
155 | 0 | isPre = true; |
156 | 0 | break; |
157 | 0 | case ARMII::IndexModePost: |
158 | 0 | break; |
159 | 0 | } |
160 | 0 |
|
161 | 0 | // Try splitting an indexed load/store to an un-indexed one plus an add/sub |
162 | 0 | // operation. |
163 | 0 | unsigned MemOpc = getUnindexedOpcode(MI.getOpcode()); |
164 | 0 | if (MemOpc == 0) |
165 | 0 | return nullptr; |
166 | 0 |
|
167 | 0 | MachineInstr *UpdateMI = nullptr; |
168 | 0 | MachineInstr *MemMI = nullptr; |
169 | 0 | unsigned AddrMode = (TSFlags & ARMII::AddrModeMask); |
170 | 0 | const MCInstrDesc &MCID = MI.getDesc(); |
171 | 0 | unsigned NumOps = MCID.getNumOperands(); |
172 | 0 | bool isLoad = !MI.mayStore(); |
173 | 0 | const MachineOperand &WB = isLoad ? MI.getOperand(1)0 : MI.getOperand(0)0 ; |
174 | 0 | const MachineOperand &Base = MI.getOperand(2); |
175 | 0 | const MachineOperand &Offset = MI.getOperand(NumOps - 3); |
176 | 0 | unsigned WBReg = WB.getReg(); |
177 | 0 | unsigned BaseReg = Base.getReg(); |
178 | 0 | unsigned OffReg = Offset.getReg(); |
179 | 0 | unsigned OffImm = MI.getOperand(NumOps - 2).getImm(); |
180 | 0 | ARMCC::CondCodes Pred = (ARMCC::CondCodes)MI.getOperand(NumOps - 1).getImm(); |
181 | 0 | switch (AddrMode) { |
182 | 0 | default: 0 llvm_unreachable0 ("Unknown indexed op!"); |
183 | 0 | case ARMII::AddrMode2: { |
184 | 0 | bool isSub = ARM_AM::getAM2Op(OffImm) == ARM_AM::sub; |
185 | 0 | unsigned Amt = ARM_AM::getAM2Offset(OffImm); |
186 | 0 | if (OffReg == 00 ) { |
187 | 0 | if (ARM_AM::getSOImmVal(Amt) == -1) |
188 | 0 | // Can't encode it in a so_imm operand. This transformation will |
189 | 0 | // add more than 1 instruction. Abandon! |
190 | 0 | return nullptr; |
191 | 0 | UpdateMI = BuildMI(MF, MI.getDebugLoc(), |
192 | 0 | get(isSub ? ARM::SUBri0 : ARM::ADDri0 ), WBReg) |
193 | 0 | .addReg(BaseReg) |
194 | 0 | .addImm(Amt) |
195 | 0 | .add(predOps(Pred)) |
196 | 0 | .add(condCodeOp()); |
197 | 0 | } else if (0 Amt != 00 ) { |
198 | 0 | ARM_AM::ShiftOpc ShOpc = ARM_AM::getAM2ShiftOpc(OffImm); |
199 | 0 | unsigned SOOpc = ARM_AM::getSORegOpc(ShOpc, Amt); |
200 | 0 | UpdateMI = BuildMI(MF, MI.getDebugLoc(), |
201 | 0 | get(isSub ? ARM::SUBrsi0 : ARM::ADDrsi0 ), WBReg) |
202 | 0 | .addReg(BaseReg) |
203 | 0 | .addReg(OffReg) |
204 | 0 | .addReg(0) |
205 | 0 | .addImm(SOOpc) |
206 | 0 | .add(predOps(Pred)) |
207 | 0 | .add(condCodeOp()); |
208 | 0 | } else |
209 | 0 | UpdateMI = BuildMI(MF, MI.getDebugLoc(), |
210 | 0 | get(isSub ? ARM::SUBrr0 : ARM::ADDrr0 ), WBReg) |
211 | 0 | .addReg(BaseReg) |
212 | 0 | .addReg(OffReg) |
213 | 0 | .add(predOps(Pred)) |
214 | 0 | .add(condCodeOp()); |
215 | 0 | break; |
216 | 0 | } |
217 | 0 | case ARMII::AddrMode3 : { |
218 | 0 | bool isSub = ARM_AM::getAM3Op(OffImm) == ARM_AM::sub; |
219 | 0 | unsigned Amt = ARM_AM::getAM3Offset(OffImm); |
220 | 0 | if (OffReg == 0) |
221 | 0 | // Immediate is 8-bits. It's guaranteed to fit in a so_imm operand. |
222 | 0 | UpdateMI = BuildMI(MF, MI.getDebugLoc(), |
223 | 0 | get(isSub ? ARM::SUBri0 : ARM::ADDri0 ), WBReg) |
224 | 0 | .addReg(BaseReg) |
225 | 0 | .addImm(Amt) |
226 | 0 | .add(predOps(Pred)) |
227 | 0 | .add(condCodeOp()); |
228 | 0 | else |
229 | 0 | UpdateMI = BuildMI(MF, MI.getDebugLoc(), |
230 | 0 | get(isSub ? ARM::SUBrr0 : ARM::ADDrr0 ), WBReg) |
231 | 0 | .addReg(BaseReg) |
232 | 0 | .addReg(OffReg) |
233 | 0 | .add(predOps(Pred)) |
234 | 0 | .add(condCodeOp()); |
235 | 0 | break; |
236 | 0 | } |
237 | 0 | } |
238 | 0 |
|
239 | 0 | std::vector<MachineInstr*> NewMIs; |
240 | 0 | if (isPre0 ) { |
241 | 0 | if (isLoad) |
242 | 0 | MemMI = |
243 | 0 | BuildMI(MF, MI.getDebugLoc(), get(MemOpc), MI.getOperand(0).getReg()) |
244 | 0 | .addReg(WBReg) |
245 | 0 | .addImm(0) |
246 | 0 | .addImm(Pred); |
247 | 0 | else |
248 | 0 | MemMI = BuildMI(MF, MI.getDebugLoc(), get(MemOpc)) |
249 | 0 | .addReg(MI.getOperand(1).getReg()) |
250 | 0 | .addReg(WBReg) |
251 | 0 | .addReg(0) |
252 | 0 | .addImm(0) |
253 | 0 | .addImm(Pred); |
254 | 0 | NewMIs.push_back(MemMI); |
255 | 0 | NewMIs.push_back(UpdateMI); |
256 | 0 | } else { |
257 | 0 | if (isLoad) |
258 | 0 | MemMI = |
259 | 0 | BuildMI(MF, MI.getDebugLoc(), get(MemOpc), MI.getOperand(0).getReg()) |
260 | 0 | .addReg(BaseReg) |
261 | 0 | .addImm(0) |
262 | 0 | .addImm(Pred); |
263 | 0 | else |
264 | 0 | MemMI = BuildMI(MF, MI.getDebugLoc(), get(MemOpc)) |
265 | 0 | .addReg(MI.getOperand(1).getReg()) |
266 | 0 | .addReg(BaseReg) |
267 | 0 | .addReg(0) |
268 | 0 | .addImm(0) |
269 | 0 | .addImm(Pred); |
270 | 0 | if (WB.isDead()) |
271 | 0 | UpdateMI->getOperand(0).setIsDead(); |
272 | 0 | NewMIs.push_back(UpdateMI); |
273 | 0 | NewMIs.push_back(MemMI); |
274 | 0 | } |
275 | 0 |
|
276 | 0 | // Transfer LiveVariables states, kill / dead info. |
277 | 0 | if (LV0 ) { |
278 | 0 | for (unsigned i = 0, e = MI.getNumOperands(); i != e0 ; ++i0 ) { |
279 | 0 | MachineOperand &MO = MI.getOperand(i); |
280 | 0 | if (MO.isReg() && 0 TargetRegisterInfo::isVirtualRegister(MO.getReg())0 ) { |
281 | 0 | unsigned Reg = MO.getReg(); |
282 | 0 |
|
283 | 0 | LiveVariables::VarInfo &VI = LV->getVarInfo(Reg); |
284 | 0 | if (MO.isDef()0 ) { |
285 | 0 | MachineInstr *NewMI = (Reg == WBReg) ? UpdateMI0 : MemMI0 ; |
286 | 0 | if (MO.isDead()) |
287 | 0 | LV->addVirtualRegisterDead(Reg, *NewMI); |
288 | 0 | } |
289 | 0 | if (MO.isUse() && 0 MO.isKill()0 ) { |
290 | 0 | for (unsigned j = 0; j < 20 ; ++j0 ) { |
291 | 0 | // Look at the two new MI's in reverse order. |
292 | 0 | MachineInstr *NewMI = NewMIs[j]; |
293 | 0 | if (!NewMI->readsRegister(Reg)) |
294 | 0 | continue; |
295 | 0 | LV->addVirtualRegisterKilled(Reg, *NewMI); |
296 | 0 | if (VI.removeKill(MI)) |
297 | 0 | VI.Kills.push_back(NewMI); |
298 | 0 | break; |
299 | 0 | } |
300 | 0 | } |
301 | 0 | } |
302 | 0 | } |
303 | 0 | } |
304 | 0 |
|
305 | 0 | MachineBasicBlock::iterator MBBI = MI.getIterator(); |
306 | 0 | MFI->insert(MBBI, NewMIs[1]); |
307 | 0 | MFI->insert(MBBI, NewMIs[0]); |
308 | 0 | return NewMIs[0]; |
309 | 0 | } |
310 | | |
311 | | // Branch analysis. |
312 | | bool ARMBaseInstrInfo::analyzeBranch(MachineBasicBlock &MBB, |
313 | | MachineBasicBlock *&TBB, |
314 | | MachineBasicBlock *&FBB, |
315 | | SmallVectorImpl<MachineOperand> &Cond, |
316 | 1.73M | bool AllowModify) const { |
317 | 1.73M | TBB = nullptr; |
318 | 1.73M | FBB = nullptr; |
319 | 1.73M | |
320 | 1.73M | MachineBasicBlock::iterator I = MBB.end(); |
321 | 1.73M | if (I == MBB.begin()) |
322 | 11.8k | return false; // Empty blocks are easy. |
323 | 1.72M | --I; |
324 | 1.72M | |
325 | 1.72M | // Walk backwards from the end of the basic block until the branch is |
326 | 1.72M | // analyzed or we give up. |
327 | 2.91M | while (isPredicated(*I) || 2.91M I->isTerminator()1.98M || I->isDebugValue()1.42M ) { |
328 | 1.48M | // Flag to be raised on unanalyzeable instructions. This is useful in cases |
329 | 1.48M | // where we want to clean up on the end of the basic block before we bail |
330 | 1.48M | // out. |
331 | 1.48M | bool CantAnalyze = false; |
332 | 1.48M | |
333 | 1.48M | // Skip over DEBUG values and predicated nonterminators. |
334 | 1.73M | while (I->isDebugValue() || 1.73M !I->isTerminator()1.72M ) { |
335 | 279k | if (I == MBB.begin()) |
336 | 36.5k | return false; |
337 | 242k | --I; |
338 | 242k | } |
339 | 1.48M | |
340 | 1.45M | if (1.45M isIndirectBranchOpcode(I->getOpcode()) || |
341 | 1.45M | isJumpTableBranchOpcode(I->getOpcode())1.44M ) { |
342 | 24.9k | // Indirect branches and jump tables can't be analyzed, but we still want |
343 | 24.9k | // to clean up any instructions at the tail of the basic block. |
344 | 24.9k | CantAnalyze = true; |
345 | 1.45M | } else if (1.42M isUncondBranchOpcode(I->getOpcode())1.42M ) { |
346 | 321k | TBB = I->getOperand(0).getMBB(); |
347 | 1.42M | } else if (1.10M isCondBranchOpcode(I->getOpcode())1.10M ) { |
348 | 882k | // Bail out if we encounter multiple conditional branches. |
349 | 882k | if (!Cond.empty()) |
350 | 6.97k | return true; |
351 | 875k | |
352 | 882k | assert(!FBB && "FBB should have been null."); |
353 | 875k | FBB = TBB; |
354 | 875k | TBB = I->getOperand(0).getMBB(); |
355 | 875k | Cond.push_back(I->getOperand(1)); |
356 | 875k | Cond.push_back(I->getOperand(2)); |
357 | 1.10M | } else if (221k I->isReturn()221k ) { |
358 | 217k | // Returns can't be analyzed, but we should run cleanup. |
359 | 217k | CantAnalyze = !isPredicated(*I); |
360 | 221k | } else { |
361 | 3.71k | // We encountered other unrecognized terminator. Bail out immediately. |
362 | 3.71k | return true; |
363 | 3.71k | } |
364 | 1.43M | |
365 | 1.43M | // Cleanup code - to be run for unpredicated unconditional branches and |
366 | 1.43M | // returns. |
367 | 1.43M | if (1.43M !isPredicated(*I) && |
368 | 557k | (isUncondBranchOpcode(I->getOpcode()) || |
369 | 236k | isIndirectBranchOpcode(I->getOpcode()) || |
370 | 235k | isJumpTableBranchOpcode(I->getOpcode()) || |
371 | 1.43M | I->isReturn()211k )) { |
372 | 557k | // Forget any previous condition branch information - it no longer applies. |
373 | 557k | Cond.clear(); |
374 | 557k | FBB = nullptr; |
375 | 557k | |
376 | 557k | // If we can modify the function, delete everything below this |
377 | 557k | // unconditional branch. |
378 | 557k | if (AllowModify557k ) { |
379 | 339k | MachineBasicBlock::iterator DI = std::next(I); |
380 | 339k | while (DI != MBB.end()339k ) { |
381 | 6 | MachineInstr &InstToDelete = *DI; |
382 | 6 | ++DI; |
383 | 6 | InstToDelete.eraseFromParent(); |
384 | 6 | } |
385 | 339k | } |
386 | 557k | } |
387 | 1.43M | |
388 | 1.43M | if (CantAnalyze) |
389 | 236k | return true; |
390 | 1.20M | |
391 | 1.20M | if (1.20M I == MBB.begin()1.20M ) |
392 | 16.9k | return false; |
393 | 1.18M | |
394 | 1.18M | --I; |
395 | 1.18M | } |
396 | 1.72M | |
397 | 1.72M | // We made it past the terminators without bailing out - we must have |
398 | 1.72M | // analyzed this branch successfully. |
399 | 1.42M | return false; |
400 | 1.73M | } |
401 | | |
402 | | unsigned ARMBaseInstrInfo::removeBranch(MachineBasicBlock &MBB, |
403 | 180k | int *BytesRemoved) const { |
404 | 180k | assert(!BytesRemoved && "code size not handled"); |
405 | 180k | |
406 | 180k | MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr(); |
407 | 180k | if (I == MBB.end()) |
408 | 2 | return 0; |
409 | 180k | |
410 | 180k | if (180k !isUncondBranchOpcode(I->getOpcode()) && |
411 | 105k | !isCondBranchOpcode(I->getOpcode())) |
412 | 4.65k | return 0; |
413 | 175k | |
414 | 175k | // Remove the branch. |
415 | 175k | I->eraseFromParent(); |
416 | 175k | |
417 | 175k | I = MBB.end(); |
418 | 175k | |
419 | 175k | if (I == MBB.begin()175k ) return 18.21k ; |
420 | 167k | --I; |
421 | 167k | if (!isCondBranchOpcode(I->getOpcode())) |
422 | 134k | return 1; |
423 | 32.7k | |
424 | 32.7k | // Remove the branch. |
425 | 32.7k | I->eraseFromParent(); |
426 | 32.7k | return 2; |
427 | 32.7k | } |
428 | | |
429 | | unsigned ARMBaseInstrInfo::insertBranch(MachineBasicBlock &MBB, |
430 | | MachineBasicBlock *TBB, |
431 | | MachineBasicBlock *FBB, |
432 | | ArrayRef<MachineOperand> Cond, |
433 | | const DebugLoc &DL, |
434 | 177k | int *BytesAdded) const { |
435 | 177k | assert(!BytesAdded && "code size not handled"); |
436 | 177k | ARMFunctionInfo *AFI = MBB.getParent()->getInfo<ARMFunctionInfo>(); |
437 | 177k | int BOpc = !AFI->isThumbFunction() |
438 | 177k | ? ARM::B3.04k : (AFI->isThumb2Function() ? 174k ARM::t2B165k : ARM::tB9.76k ); |
439 | 177k | int BccOpc = !AFI->isThumbFunction() |
440 | 177k | ? ARM::Bcc3.04k : (AFI->isThumb2Function() ? 174k ARM::t2Bcc165k : ARM::tBcc9.76k ); |
441 | 3.04k | bool isThumb = AFI->isThumbFunction() || AFI->isThumb2Function(); |
442 | 177k | |
443 | 177k | // Shouldn't be a fall through. |
444 | 177k | assert(TBB && "insertBranch must not be told to insert a fallthrough"); |
445 | 177k | assert((Cond.size() == 2 || Cond.size() == 0) && |
446 | 177k | "ARM branch conditions have two components!"); |
447 | 177k | |
448 | 177k | // For conditional branches, we use addOperand to preserve CPSR flags. |
449 | 177k | |
450 | 177k | if (!FBB177k ) { |
451 | 172k | if (Cond.empty()172k ) { // Unconditional branch? |
452 | 47.3k | if (isThumb) |
453 | 46.7k | BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB).add(predOps(ARMCC::AL)); |
454 | 47.3k | else |
455 | 636 | BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB); |
456 | 47.3k | } else |
457 | 125k | BuildMI(&MBB, DL, get(BccOpc)) |
458 | 125k | .addMBB(TBB) |
459 | 125k | .addImm(Cond[0].getImm()) |
460 | 125k | .add(Cond[1]); |
461 | 172k | return 1; |
462 | 172k | } |
463 | 5.42k | |
464 | 5.42k | // Two-way conditional branch. |
465 | 5.42k | BuildMI(&MBB, DL, get(BccOpc)) |
466 | 5.42k | .addMBB(TBB) |
467 | 5.42k | .addImm(Cond[0].getImm()) |
468 | 5.42k | .add(Cond[1]); |
469 | 5.42k | if (isThumb) |
470 | 5.35k | BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB).add(predOps(ARMCC::AL)); |
471 | 5.42k | else |
472 | 62 | BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB); |
473 | 177k | return 2; |
474 | 177k | } |
475 | | |
476 | | bool ARMBaseInstrInfo:: |
477 | 159k | reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const { |
478 | 159k | ARMCC::CondCodes CC = (ARMCC::CondCodes)(int)Cond[0].getImm(); |
479 | 159k | Cond[0].setImm(ARMCC::getOppositeCondition(CC)); |
480 | 159k | return false; |
481 | 159k | } |
482 | | |
483 | 4.87M | bool ARMBaseInstrInfo::isPredicated(const MachineInstr &MI) const { |
484 | 4.87M | if (MI.isBundle()4.87M ) { |
485 | 13.1k | MachineBasicBlock::const_instr_iterator I = MI.getIterator(); |
486 | 13.1k | MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end(); |
487 | 26.3k | while (++I != E && 26.3k I->isInsideBundle()26.3k ) { |
488 | 26.3k | int PIdx = I->findFirstPredOperandIdx(); |
489 | 26.3k | if (PIdx != -1 && 26.3k I->getOperand(PIdx).getImm() != ARMCC::AL13.2k ) |
490 | 13.1k | return true; |
491 | 26.3k | } |
492 | 28 | return false; |
493 | 4.85M | } |
494 | 4.85M | |
495 | 4.85M | int PIdx = MI.findFirstPredOperandIdx(); |
496 | 4.63M | return PIdx != -1 && MI.getOperand(PIdx).getImm() != ARMCC::AL; |
497 | 4.87M | } |
498 | | |
499 | | bool ARMBaseInstrInfo::PredicateInstruction( |
500 | 5.20k | MachineInstr &MI, ArrayRef<MachineOperand> Pred) const { |
501 | 5.20k | unsigned Opc = MI.getOpcode(); |
502 | 5.20k | if (isUncondBranchOpcode(Opc)5.20k ) { |
503 | 0 | MI.setDesc(get(getMatchingCondBranchOpcode(Opc))); |
504 | 0 | MachineInstrBuilder(*MI.getParent()->getParent(), MI) |
505 | 0 | .addImm(Pred[0].getImm()) |
506 | 0 | .addReg(Pred[1].getReg()); |
507 | 0 | return true; |
508 | 0 | } |
509 | 5.20k | |
510 | 5.20k | int PIdx = MI.findFirstPredOperandIdx(); |
511 | 5.20k | if (PIdx != -15.20k ) { |
512 | 5.20k | MachineOperand &PMO = MI.getOperand(PIdx); |
513 | 5.20k | PMO.setImm(Pred[0].getImm()); |
514 | 5.20k | MI.getOperand(PIdx+1).setReg(Pred[1].getReg()); |
515 | 5.20k | return true; |
516 | 5.20k | } |
517 | 0 | return false; |
518 | 0 | } |
519 | | |
520 | | bool ARMBaseInstrInfo::SubsumesPredicate(ArrayRef<MachineOperand> Pred1, |
521 | 1.20k | ArrayRef<MachineOperand> Pred2) const { |
522 | 1.20k | if (Pred1.size() > 2 || 1.20k Pred2.size() > 21.20k ) |
523 | 0 | return false; |
524 | 1.20k | |
525 | 1.20k | ARMCC::CondCodes CC1 = (ARMCC::CondCodes)Pred1[0].getImm(); |
526 | 1.20k | ARMCC::CondCodes CC2 = (ARMCC::CondCodes)Pred2[0].getImm(); |
527 | 1.20k | if (CC1 == CC2) |
528 | 556 | return true; |
529 | 646 | |
530 | 646 | switch (CC1) { |
531 | 414 | default: |
532 | 414 | return false; |
533 | 0 | case ARMCC::AL: |
534 | 0 | return true; |
535 | 147 | case ARMCC::HS: |
536 | 147 | return CC2 == ARMCC::HI; |
537 | 76 | case ARMCC::LS: |
538 | 70 | return CC2 == ARMCC::LO || CC2 == ARMCC::EQ; |
539 | 7 | case ARMCC::GE: |
540 | 7 | return CC2 == ARMCC::GT; |
541 | 2 | case ARMCC::LE: |
542 | 2 | return CC2 == ARMCC::LT; |
543 | 0 | } |
544 | 0 | } |
545 | | |
546 | | bool ARMBaseInstrInfo::DefinesPredicate( |
547 | 147k | MachineInstr &MI, std::vector<MachineOperand> &Pred) const { |
548 | 147k | bool Found = false; |
549 | 899k | for (unsigned i = 0, e = MI.getNumOperands(); i != e899k ; ++i751k ) { |
550 | 751k | const MachineOperand &MO = MI.getOperand(i); |
551 | 751k | if ((MO.isRegMask() && 751k MO.clobbersPhysReg(ARM::CPSR)5.07k ) || |
552 | 751k | (MO.isReg() && 746k MO.isDef()508k && MO.getReg() == ARM::CPSR149k )) { |
553 | 25.3k | Pred.push_back(MO); |
554 | 25.3k | Found = true; |
555 | 25.3k | } |
556 | 751k | } |
557 | 147k | |
558 | 147k | return Found; |
559 | 147k | } |
560 | | |
561 | 39 | bool ARMBaseInstrInfo::isCPSRDefined(const MachineInstr &MI) { |
562 | 39 | for (const auto &MO : MI.operands()) |
563 | 210 | if (210 MO.isReg() && 210 MO.getReg() == ARM::CPSR153 && MO.isDef()41 && !MO.isDead()39 ) |
564 | 5 | return true; |
565 | 34 | return false; |
566 | 34 | } |
567 | | |
568 | | bool ARMBaseInstrInfo::isAddrMode3OpImm(const MachineInstr &MI, |
569 | 0 | unsigned Op) const { |
570 | 0 | const MachineOperand &Offset = MI.getOperand(Op + 1); |
571 | 0 | return Offset.getReg() != 0; |
572 | 0 | } |
573 | | |
574 | | // Load with negative register offset requires additional 1cyc and +I unit |
575 | | // for Cortex A57 |
576 | | bool ARMBaseInstrInfo::isAddrMode3OpMinusReg(const MachineInstr &MI, |
577 | 0 | unsigned Op) const { |
578 | 0 | const MachineOperand &Offset = MI.getOperand(Op + 1); |
579 | 0 | const MachineOperand &Opc = MI.getOperand(Op + 2); |
580 | 0 | assert(Opc.isImm()); |
581 | 0 | assert(Offset.isReg()); |
582 | 0 | int64_t OpcImm = Opc.getImm(); |
583 | 0 |
|
584 | 0 | bool isSub = ARM_AM::getAM3Op(OpcImm) == ARM_AM::sub; |
585 | 0 | return (isSub && Offset.getReg() != 0); |
586 | 0 | } |
587 | | |
588 | | bool ARMBaseInstrInfo::isLdstScaledReg(const MachineInstr &MI, |
589 | 0 | unsigned Op) const { |
590 | 0 | const MachineOperand &Opc = MI.getOperand(Op + 2); |
591 | 0 | unsigned OffImm = Opc.getImm(); |
592 | 0 | return ARM_AM::getAM2ShiftOpc(OffImm) != ARM_AM::no_shift; |
593 | 0 | } |
594 | | |
595 | | // Load, scaled register offset, not plus LSL2 |
596 | | bool ARMBaseInstrInfo::isLdstScaledRegNotPlusLsl2(const MachineInstr &MI, |
597 | 3 | unsigned Op) const { |
598 | 3 | const MachineOperand &Opc = MI.getOperand(Op + 2); |
599 | 3 | unsigned OffImm = Opc.getImm(); |
600 | 3 | |
601 | 3 | bool isAdd = ARM_AM::getAM2Op(OffImm) == ARM_AM::add; |
602 | 3 | unsigned Amt = ARM_AM::getAM2Offset(OffImm); |
603 | 3 | ARM_AM::ShiftOpc ShiftOpc = ARM_AM::getAM2ShiftOpc(OffImm); |
604 | 3 | if (ShiftOpc == ARM_AM::no_shift3 ) return false0 ; // not scaled |
605 | 3 | bool SimpleScaled = (isAdd && 3 ShiftOpc == ARM_AM::lsl3 && Amt == 23 ); |
606 | 3 | return !SimpleScaled; |
607 | 3 | } |
608 | | |
609 | | // Minus reg for ldstso addr mode |
610 | | bool ARMBaseInstrInfo::isLdstSoMinusReg(const MachineInstr &MI, |
611 | 3 | unsigned Op) const { |
612 | 3 | unsigned OffImm = MI.getOperand(Op + 2).getImm(); |
613 | 3 | return ARM_AM::getAM2Op(OffImm) == ARM_AM::sub; |
614 | 3 | } |
615 | | |
616 | | // Load, scaled register offset |
617 | | bool ARMBaseInstrInfo::isAm2ScaledReg(const MachineInstr &MI, |
618 | 0 | unsigned Op) const { |
619 | 0 | unsigned OffImm = MI.getOperand(Op + 2).getImm(); |
620 | 0 | return ARM_AM::getAM2ShiftOpc(OffImm) != ARM_AM::no_shift; |
621 | 0 | } |
622 | | |
623 | 135k | static bool isEligibleForITBlock(const MachineInstr *MI) { |
624 | 135k | switch (MI->getOpcode()) { |
625 | 135k | default: return true; |
626 | 39 | case ARM::tADC: // ADC (register) T1 |
627 | 39 | case ARM::tADDi3: // ADD (immediate) T1 |
628 | 39 | case ARM::tADDi8: // ADD (immediate) T2 |
629 | 39 | case ARM::tADDrr: // ADD (register) T1 |
630 | 39 | case ARM::tAND: // AND (register) T1 |
631 | 39 | case ARM::tASRri: // ASR (immediate) T1 |
632 | 39 | case ARM::tASRrr: // ASR (register) T1 |
633 | 39 | case ARM::tBIC: // BIC (register) T1 |
634 | 39 | case ARM::tEOR: // EOR (register) T1 |
635 | 39 | case ARM::tLSLri: // LSL (immediate) T1 |
636 | 39 | case ARM::tLSLrr: // LSL (register) T1 |
637 | 39 | case ARM::tLSRri: // LSR (immediate) T1 |
638 | 39 | case ARM::tLSRrr: // LSR (register) T1 |
639 | 39 | case ARM::tMUL: // MUL T1 |
640 | 39 | case ARM::tMVN: // MVN (register) T1 |
641 | 39 | case ARM::tORR: // ORR (register) T1 |
642 | 39 | case ARM::tROR: // ROR (register) T1 |
643 | 39 | case ARM::tRSB: // RSB (immediate) T1 |
644 | 39 | case ARM::tSBC: // SBC (register) T1 |
645 | 39 | case ARM::tSUBi3: // SUB (immediate) T1 |
646 | 39 | case ARM::tSUBi8: // SUB (immediate) T2 |
647 | 39 | case ARM::tSUBrr: // SUB (register) T1 |
648 | 39 | return !ARMBaseInstrInfo::isCPSRDefined(*MI); |
649 | 0 | } |
650 | 0 | } |
651 | | |
652 | | /// isPredicable - Return true if the specified instruction can be predicated. |
653 | | /// By default, this returns true for every instruction with a |
654 | | /// PredicateOperand. |
655 | 147k | bool ARMBaseInstrInfo::isPredicable(const MachineInstr &MI) const { |
656 | 147k | if (!MI.isPredicable()) |
657 | 12.1k | return false; |
658 | 135k | |
659 | 135k | if (135k MI.isBundle()135k ) |
660 | 26 | return false; |
661 | 135k | |
662 | 135k | if (135k !isEligibleForITBlock(&MI)135k ) |
663 | 5 | return false; |
664 | 135k | |
665 | 135k | const ARMFunctionInfo *AFI = |
666 | 135k | MI.getParent()->getParent()->getInfo<ARMFunctionInfo>(); |
667 | 135k | |
668 | 135k | // Neon instructions in Thumb2 IT blocks are deprecated, see ARMARM. |
669 | 135k | // In their ARM encoding, they can't be encoded in a conditional form. |
670 | 135k | if ((MI.getDesc().TSFlags & ARMII::DomainMask) == ARMII::DomainNEON) |
671 | 2.55k | return false; |
672 | 132k | |
673 | 132k | if (132k AFI->isThumb2Function()132k ) { |
674 | 115k | if (getSubtarget().restrictIT()) |
675 | 700 | return isV8EligibleForIT(&MI); |
676 | 132k | } |
677 | 132k | |
678 | 132k | return true; |
679 | 132k | } |
680 | | |
681 | | namespace llvm { |
682 | | |
683 | 64 | template <> bool IsCPSRDead<MachineInstr>(const MachineInstr *MI) { |
684 | 414 | for (unsigned i = 0, e = MI->getNumOperands(); i != e414 ; ++i350 ) { |
685 | 350 | const MachineOperand &MO = MI->getOperand(i); |
686 | 350 | if (!MO.isReg() || 350 MO.isUndef()233 || MO.isUse()233 ) |
687 | 222 | continue; |
688 | 128 | if (128 MO.getReg() != ARM::CPSR128 ) |
689 | 64 | continue; |
690 | 64 | if (64 !MO.isDead()64 ) |
691 | 0 | return false; |
692 | 350 | } |
693 | 64 | // all definitions of CPSR are dead |
694 | 64 | return true; |
695 | 64 | } |
696 | | |
697 | | } // end namespace llvm |
698 | | |
699 | | /// GetInstSize - Return the size of the specified MachineInstr. |
700 | | /// |
701 | 1.67M | unsigned ARMBaseInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const { |
702 | 1.67M | const MachineBasicBlock &MBB = *MI.getParent(); |
703 | 1.67M | const MachineFunction *MF = MBB.getParent(); |
704 | 1.67M | const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo(); |
705 | 1.67M | |
706 | 1.67M | const MCInstrDesc &MCID = MI.getDesc(); |
707 | 1.67M | if (MCID.getSize()) |
708 | 1.21M | return MCID.getSize(); |
709 | 456k | |
710 | 456k | // If this machine instr is an inline asm, measure it. |
711 | 456k | if (456k MI.getOpcode() == ARM::INLINEASM456k ) |
712 | 198k | return getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI); |
713 | 257k | unsigned Opc = MI.getOpcode(); |
714 | 257k | switch (Opc) { |
715 | 192k | default: |
716 | 192k | // pseudo-instruction sizes are zero. |
717 | 192k | return 0; |
718 | 0 | case TargetOpcode::BUNDLE: |
719 | 0 | return getInstBundleLength(MI); |
720 | 30.3k | case ARM::MOVi16_ga_pcrel: |
721 | 30.3k | case ARM::MOVTi16_ga_pcrel: |
722 | 30.3k | case ARM::t2MOVi16_ga_pcrel: |
723 | 30.3k | case ARM::t2MOVTi16_ga_pcrel: |
724 | 30.3k | return 4; |
725 | 7 | case ARM::MOVi32imm: |
726 | 7 | case ARM::t2MOVi32imm: |
727 | 7 | return 8; |
728 | 35.2k | case ARM::CONSTPOOL_ENTRY: |
729 | 35.2k | case ARM::JUMPTABLE_INSTS: |
730 | 35.2k | case ARM::JUMPTABLE_ADDRS: |
731 | 35.2k | case ARM::JUMPTABLE_TBB: |
732 | 35.2k | case ARM::JUMPTABLE_TBH: |
733 | 35.2k | // If this machine instr is a constant pool entry, its size is recorded as |
734 | 35.2k | // operand #2. |
735 | 35.2k | return MI.getOperand(2).getImm(); |
736 | 4 | case ARM::Int_eh_sjlj_longjmp: |
737 | 4 | return 16; |
738 | 2 | case ARM::tInt_eh_sjlj_longjmp: |
739 | 2 | return 10; |
740 | 3 | case ARM::tInt_WIN_eh_sjlj_longjmp: |
741 | 3 | return 12; |
742 | 7 | case ARM::Int_eh_sjlj_setjmp: |
743 | 7 | case ARM::Int_eh_sjlj_setjmp_nofp: |
744 | 7 | return 20; |
745 | 13 | case ARM::tInt_eh_sjlj_setjmp: |
746 | 13 | case ARM::t2Int_eh_sjlj_setjmp: |
747 | 13 | case ARM::t2Int_eh_sjlj_setjmp_nofp: |
748 | 13 | return 12; |
749 | 26 | case ARM::SPACE: |
750 | 26 | return MI.getOperand(1).getImm(); |
751 | 0 | } |
752 | 0 | } |
753 | | |
754 | 0 | unsigned ARMBaseInstrInfo::getInstBundleLength(const MachineInstr &MI) const { |
755 | 0 | unsigned Size = 0; |
756 | 0 | MachineBasicBlock::const_instr_iterator I = MI.getIterator(); |
757 | 0 | MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end(); |
758 | 0 | while (++I != E && 0 I->isInsideBundle()0 ) { |
759 | 0 | assert(!I->isBundle() && "No nested bundle!"); |
760 | 0 | Size += getInstSizeInBytes(*I); |
761 | 0 | } |
762 | 0 | return Size; |
763 | 0 | } |
764 | | |
765 | | void ARMBaseInstrInfo::copyFromCPSR(MachineBasicBlock &MBB, |
766 | | MachineBasicBlock::iterator I, |
767 | | unsigned DestReg, bool KillSrc, |
768 | 4 | const ARMSubtarget &Subtarget) const { |
769 | 4 | unsigned Opc = Subtarget.isThumb() |
770 | 2 | ? (Subtarget.isMClass() ? 2 ARM::t2MRS_M1 : ARM::t2MRS_AR1 ) |
771 | 2 | : ARM::MRS; |
772 | 4 | |
773 | 4 | MachineInstrBuilder MIB = |
774 | 4 | BuildMI(MBB, I, I->getDebugLoc(), get(Opc), DestReg); |
775 | 4 | |
776 | 4 | // There is only 1 A/R class MRS instruction, and it always refers to |
777 | 4 | // APSR. However, there are lots of other possibilities on M-class cores. |
778 | 4 | if (Subtarget.isMClass()) |
779 | 1 | MIB.addImm(0x800); |
780 | 4 | |
781 | 4 | MIB.add(predOps(ARMCC::AL)) |
782 | 4 | .addReg(ARM::CPSR, RegState::Implicit | getKillRegState(KillSrc)); |
783 | 4 | } |
784 | | |
785 | | void ARMBaseInstrInfo::copyToCPSR(MachineBasicBlock &MBB, |
786 | | MachineBasicBlock::iterator I, |
787 | | unsigned SrcReg, bool KillSrc, |
788 | 4 | const ARMSubtarget &Subtarget) const { |
789 | 4 | unsigned Opc = Subtarget.isThumb() |
790 | 2 | ? (Subtarget.isMClass() ? 2 ARM::t2MSR_M1 : ARM::t2MSR_AR1 ) |
791 | 2 | : ARM::MSR; |
792 | 4 | |
793 | 4 | MachineInstrBuilder MIB = BuildMI(MBB, I, I->getDebugLoc(), get(Opc)); |
794 | 4 | |
795 | 4 | if (Subtarget.isMClass()) |
796 | 1 | MIB.addImm(0x800); |
797 | 4 | else |
798 | 3 | MIB.addImm(8); |
799 | 4 | |
800 | 4 | MIB.addReg(SrcReg, getKillRegState(KillSrc)) |
801 | 4 | .add(predOps(ARMCC::AL)) |
802 | 4 | .addReg(ARM::CPSR, RegState::Implicit | RegState::Define); |
803 | 4 | } |
804 | | |
805 | | void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB, |
806 | | MachineBasicBlock::iterator I, |
807 | | const DebugLoc &DL, unsigned DestReg, |
808 | 8.85k | unsigned SrcReg, bool KillSrc) const { |
809 | 8.85k | bool GPRDest = ARM::GPRRegClass.contains(DestReg); |
810 | 8.85k | bool GPRSrc = ARM::GPRRegClass.contains(SrcReg); |
811 | 8.85k | |
812 | 8.85k | if (GPRDest && 8.85k GPRSrc4.84k ) { |
813 | 4.58k | BuildMI(MBB, I, DL, get(ARM::MOVr), DestReg) |
814 | 4.58k | .addReg(SrcReg, getKillRegState(KillSrc)) |
815 | 4.58k | .add(predOps(ARMCC::AL)) |
816 | 4.58k | .add(condCodeOp()); |
817 | 4.58k | return; |
818 | 4.58k | } |
819 | 4.27k | |
820 | 4.27k | bool SPRDest = ARM::SPRRegClass.contains(DestReg); |
821 | 4.27k | bool SPRSrc = ARM::SPRRegClass.contains(SrcReg); |
822 | 4.27k | |
823 | 4.27k | unsigned Opc = 0; |
824 | 4.27k | if (SPRDest && 4.27k SPRSrc1.86k ) |
825 | 1.64k | Opc = ARM::VMOVS; |
826 | 2.62k | else if (2.62k GPRDest && 2.62k SPRSrc260 ) |
827 | 256 | Opc = ARM::VMOVRS; |
828 | 2.36k | else if (2.36k SPRDest && 2.36k GPRSrc215 ) |
829 | 215 | Opc = ARM::VMOVSR; |
830 | 2.15k | else if (2.15k ARM::DPRRegClass.contains(DestReg, SrcReg) && 2.15k !Subtarget.isFPOnlySP()1.68k ) |
831 | 1.63k | Opc = ARM::VMOVD; |
832 | 516 | else if (516 ARM::QPRRegClass.contains(DestReg, SrcReg)516 ) |
833 | 450 | Opc = ARM::VORRq; |
834 | 4.27k | |
835 | 4.27k | if (Opc4.27k ) { |
836 | 4.20k | MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opc), DestReg); |
837 | 4.20k | MIB.addReg(SrcReg, getKillRegState(KillSrc)); |
838 | 4.20k | if (Opc == ARM::VORRq) |
839 | 450 | MIB.addReg(SrcReg, getKillRegState(KillSrc)); |
840 | 4.20k | MIB.add(predOps(ARMCC::AL)); |
841 | 4.20k | return; |
842 | 4.20k | } |
843 | 66 | |
844 | 66 | // Handle register classes that require multiple instructions. |
845 | 66 | unsigned BeginIdx = 0; |
846 | 66 | unsigned SubRegs = 0; |
847 | 66 | int Spacing = 1; |
848 | 66 | |
849 | 66 | // Use VORRq when possible. |
850 | 66 | if (ARM::QQPRRegClass.contains(DestReg, SrcReg)66 ) { |
851 | 0 | Opc = ARM::VORRq; |
852 | 0 | BeginIdx = ARM::qsub_0; |
853 | 0 | SubRegs = 2; |
854 | 66 | } else if (66 ARM::QQQQPRRegClass.contains(DestReg, SrcReg)66 ) { |
855 | 5 | Opc = ARM::VORRq; |
856 | 5 | BeginIdx = ARM::qsub_0; |
857 | 5 | SubRegs = 4; |
858 | 5 | // Fall back to VMOVD. |
859 | 66 | } else if (61 ARM::DPairRegClass.contains(DestReg, SrcReg)61 ) { |
860 | 4 | Opc = ARM::VMOVD; |
861 | 4 | BeginIdx = ARM::dsub_0; |
862 | 4 | SubRegs = 2; |
863 | 61 | } else if (57 ARM::DTripleRegClass.contains(DestReg, SrcReg)57 ) { |
864 | 0 | Opc = ARM::VMOVD; |
865 | 0 | BeginIdx = ARM::dsub_0; |
866 | 0 | SubRegs = 3; |
867 | 57 | } else if (57 ARM::DQuadRegClass.contains(DestReg, SrcReg)57 ) { |
868 | 0 | Opc = ARM::VMOVD; |
869 | 0 | BeginIdx = ARM::dsub_0; |
870 | 0 | SubRegs = 4; |
871 | 57 | } else if (57 ARM::GPRPairRegClass.contains(DestReg, SrcReg)57 ) { |
872 | 2 | Opc = Subtarget.isThumb2() ? ARM::tMOVr1 : ARM::MOVr1 ; |
873 | 2 | BeginIdx = ARM::gsub_0; |
874 | 2 | SubRegs = 2; |
875 | 57 | } else if (55 ARM::DPairSpcRegClass.contains(DestReg, SrcReg)55 ) { |
876 | 0 | Opc = ARM::VMOVD; |
877 | 0 | BeginIdx = ARM::dsub_0; |
878 | 0 | SubRegs = 2; |
879 | 0 | Spacing = 2; |
880 | 55 | } else if (55 ARM::DTripleSpcRegClass.contains(DestReg, SrcReg)55 ) { |
881 | 0 | Opc = ARM::VMOVD; |
882 | 0 | BeginIdx = ARM::dsub_0; |
883 | 0 | SubRegs = 3; |
884 | 0 | Spacing = 2; |
885 | 55 | } else if (55 ARM::DQuadSpcRegClass.contains(DestReg, SrcReg)55 ) { |
886 | 0 | Opc = ARM::VMOVD; |
887 | 0 | BeginIdx = ARM::dsub_0; |
888 | 0 | SubRegs = 4; |
889 | 0 | Spacing = 2; |
890 | 55 | } else if (55 ARM::DPRRegClass.contains(DestReg, SrcReg) && 55 Subtarget.isFPOnlySP()47 ) { |
891 | 47 | Opc = ARM::VMOVS; |
892 | 47 | BeginIdx = ARM::ssub_0; |
893 | 47 | SubRegs = 2; |
894 | 55 | } else if (8 SrcReg == ARM::CPSR8 ) { |
895 | 4 | copyFromCPSR(MBB, I, DestReg, KillSrc, Subtarget); |
896 | 4 | return; |
897 | 4 | } else if (4 DestReg == ARM::CPSR4 ) { |
898 | 4 | copyToCPSR(MBB, I, SrcReg, KillSrc, Subtarget); |
899 | 4 | return; |
900 | 4 | } |
901 | 58 | |
902 | 66 | assert(Opc && "Impossible reg-to-reg copy"); |
903 | 58 | |
904 | 58 | const TargetRegisterInfo *TRI = &getRegisterInfo(); |
905 | 58 | MachineInstrBuilder Mov; |
906 | 58 | |
907 | 58 | // Copy register tuples backward when the first Dest reg overlaps with SrcReg. |
908 | 58 | if (TRI->regsOverlap(SrcReg, TRI->getSubReg(DestReg, BeginIdx))58 ) { |
909 | 0 | BeginIdx = BeginIdx + ((SubRegs - 1) * Spacing); |
910 | 0 | Spacing = -Spacing; |
911 | 0 | } |
912 | | #ifndef NDEBUG |
913 | | SmallSet<unsigned, 4> DstRegs; |
914 | | #endif |
915 | 184 | for (unsigned i = 0; i != SubRegs184 ; ++i126 ) { |
916 | 126 | unsigned Dst = TRI->getSubReg(DestReg, BeginIdx + i * Spacing); |
917 | 126 | unsigned Src = TRI->getSubReg(SrcReg, BeginIdx + i * Spacing); |
918 | 126 | assert(Dst && Src && "Bad sub-register"); |
919 | | #ifndef NDEBUG |
920 | | assert(!DstRegs.count(Src) && "destructive vector copy"); |
921 | | DstRegs.insert(Dst); |
922 | | #endif |
923 | | Mov = BuildMI(MBB, I, I->getDebugLoc(), get(Opc), Dst).addReg(Src); |
924 | 126 | // VORR takes two source operands. |
925 | 126 | if (Opc == ARM::VORRq) |
926 | 20 | Mov.addReg(Src); |
927 | 126 | Mov = Mov.add(predOps(ARMCC::AL)); |
928 | 126 | // MOVr can set CC. |
929 | 126 | if (Opc == ARM::MOVr) |
930 | 2 | Mov = Mov.add(condCodeOp()); |
931 | 126 | } |
932 | 58 | // Add implicit super-register defs and kills to the last instruction. |
933 | 58 | Mov->addRegisterDefined(DestReg, TRI); |
934 | 58 | if (KillSrc) |
935 | 21 | Mov->addRegisterKilled(SrcReg, TRI); |
936 | 8.85k | } |
937 | | |
938 | | const MachineInstrBuilder & |
939 | | ARMBaseInstrInfo::AddDReg(MachineInstrBuilder &MIB, unsigned Reg, |
940 | | unsigned SubIdx, unsigned State, |
941 | 70 | const TargetRegisterInfo *TRI) const { |
942 | 70 | if (!SubIdx) |
943 | 0 | return MIB.addReg(Reg, State); |
944 | 70 | |
945 | 70 | if (70 TargetRegisterInfo::isPhysicalRegister(Reg)70 ) |
946 | 8 | return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State); |
947 | 62 | return MIB.addReg(Reg, State, SubIdx); |
948 | 62 | } |
949 | | |
950 | | void ARMBaseInstrInfo:: |
951 | | storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, |
952 | | unsigned SrcReg, bool isKill, int FI, |
953 | | const TargetRegisterClass *RC, |
954 | 2.26k | const TargetRegisterInfo *TRI) const { |
955 | 2.26k | DebugLoc DL; |
956 | 2.26k | if (I != MBB.end()2.26k ) DL = I->getDebugLoc()2.23k ; |
957 | 2.26k | MachineFunction &MF = *MBB.getParent(); |
958 | 2.26k | MachineFrameInfo &MFI = MF.getFrameInfo(); |
959 | 2.26k | unsigned Align = MFI.getObjectAlignment(FI); |
960 | 2.26k | |
961 | 2.26k | MachineMemOperand *MMO = MF.getMachineMemOperand( |
962 | 2.26k | MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOStore, |
963 | 2.26k | MFI.getObjectSize(FI), Align); |
964 | 2.26k | |
965 | 2.26k | switch (TRI->getSpillSize(*RC)) { |
966 | 1.67k | case 4: |
967 | 1.67k | if (ARM::GPRRegClass.hasSubClassEq(RC)1.67k ) { |
968 | 1.54k | BuildMI(MBB, I, DL, get(ARM::STRi12)) |
969 | 1.54k | .addReg(SrcReg, getKillRegState(isKill)) |
970 | 1.54k | .addFrameIndex(FI) |
971 | 1.54k | .addImm(0) |
972 | 1.54k | .addMemOperand(MMO) |
973 | 1.54k | .add(predOps(ARMCC::AL)); |
974 | 1.67k | } else if (129 ARM::SPRRegClass.hasSubClassEq(RC)129 ) { |
975 | 129 | BuildMI(MBB, I, DL, get(ARM::VSTRS)) |
976 | 129 | .addReg(SrcReg, getKillRegState(isKill)) |
977 | 129 | .addFrameIndex(FI) |
978 | 129 | .addImm(0) |
979 | 129 | .addMemOperand(MMO) |
980 | 129 | .add(predOps(ARMCC::AL)); |
981 | 129 | } else |
982 | 0 | llvm_unreachable("Unknown reg class!"); |
983 | 1.67k | break; |
984 | 269 | case 8: |
985 | 269 | if (ARM::DPRRegClass.hasSubClassEq(RC)269 ) { |
986 | 262 | BuildMI(MBB, I, DL, get(ARM::VSTRD)) |
987 | 262 | .addReg(SrcReg, getKillRegState(isKill)) |
988 | 262 | .addFrameIndex(FI) |
989 | 262 | .addImm(0) |
990 | 262 | .addMemOperand(MMO) |
991 | 262 | .add(predOps(ARMCC::AL)); |
992 | 269 | } else if (7 ARM::GPRPairRegClass.hasSubClassEq(RC)7 ) { |
993 | 7 | if (Subtarget.hasV5TEOps()7 ) { |
994 | 5 | MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::STRD)); |
995 | 5 | AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI); |
996 | 5 | AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI); |
997 | 5 | MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO) |
998 | 5 | .add(predOps(ARMCC::AL)); |
999 | 7 | } else { |
1000 | 2 | // Fallback to STM instruction, which has existed since the dawn of |
1001 | 2 | // time. |
1002 | 2 | MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::STMIA)) |
1003 | 2 | .addFrameIndex(FI) |
1004 | 2 | .addMemOperand(MMO) |
1005 | 2 | .add(predOps(ARMCC::AL)); |
1006 | 2 | AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI); |
1007 | 2 | AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI); |
1008 | 2 | } |
1009 | 7 | } else |
1010 | 0 | llvm_unreachable("Unknown reg class!"); |
1011 | 269 | break; |
1012 | 315 | case 16: |
1013 | 315 | if (ARM::DPairRegClass.hasSubClassEq(RC)315 ) { |
1014 | 315 | // Use aligned spills if the stack can be realigned. |
1015 | 315 | if (Align >= 16 && 315 getRegisterInfo().canRealignStack(MF)315 ) { |
1016 | 308 | BuildMI(MBB, I, DL, get(ARM::VST1q64)) |
1017 | 308 | .addFrameIndex(FI) |
1018 | 308 | .addImm(16) |
1019 | 308 | .addReg(SrcReg, getKillRegState(isKill)) |
1020 | 308 | .addMemOperand(MMO) |
1021 | 308 | .add(predOps(ARMCC::AL)); |
1022 | 315 | } else { |
1023 | 7 | BuildMI(MBB, I, DL, get(ARM::VSTMQIA)) |
1024 | 7 | .addReg(SrcReg, getKillRegState(isKill)) |
1025 | 7 | .addFrameIndex(FI) |
1026 | 7 | .addMemOperand(MMO) |
1027 | 7 | .add(predOps(ARMCC::AL)); |
1028 | 7 | } |
1029 | 315 | } else |
1030 | 0 | llvm_unreachable("Unknown reg class!"); |
1031 | 315 | break; |
1032 | 1 | case 24: |
1033 | 1 | if (ARM::DTripleRegClass.hasSubClassEq(RC)1 ) { |
1034 | 1 | // Use aligned spills if the stack can be realigned. |
1035 | 1 | if (Align >= 16 && 1 getRegisterInfo().canRealignStack(MF)0 ) { |
1036 | 0 | BuildMI(MBB, I, DL, get(ARM::VST1d64TPseudo)) |
1037 | 0 | .addFrameIndex(FI) |
1038 | 0 | .addImm(16) |
1039 | 0 | .addReg(SrcReg, getKillRegState(isKill)) |
1040 | 0 | .addMemOperand(MMO) |
1041 | 0 | .add(predOps(ARMCC::AL)); |
1042 | 1 | } else { |
1043 | 1 | MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VSTMDIA)) |
1044 | 1 | .addFrameIndex(FI) |
1045 | 1 | .add(predOps(ARMCC::AL)) |
1046 | 1 | .addMemOperand(MMO); |
1047 | 1 | MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI); |
1048 | 1 | MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI); |
1049 | 1 | AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI); |
1050 | 1 | } |
1051 | 1 | } else |
1052 | 0 | llvm_unreachable("Unknown reg class!"); |
1053 | 1 | break; |
1054 | 0 | case 32: |
1055 | 0 | if (ARM::QQPRRegClass.hasSubClassEq(RC) || 0 ARM::DQuadRegClass.hasSubClassEq(RC)0 ) { |
1056 | 0 | if (Align >= 16 && 0 getRegisterInfo().canRealignStack(MF)0 ) { |
1057 | 0 | // FIXME: It's possible to only store part of the QQ register if the |
1058 | 0 | // spilled def has a sub-register index. |
1059 | 0 | BuildMI(MBB, I, DL, get(ARM::VST1d64QPseudo)) |
1060 | 0 | .addFrameIndex(FI) |
1061 | 0 | .addImm(16) |
1062 | 0 | .addReg(SrcReg, getKillRegState(isKill)) |
1063 | 0 | .addMemOperand(MMO) |
1064 | 0 | .add(predOps(ARMCC::AL)); |
1065 | 0 | } else { |
1066 | 0 | MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VSTMDIA)) |
1067 | 0 | .addFrameIndex(FI) |
1068 | 0 | .add(predOps(ARMCC::AL)) |
1069 | 0 | .addMemOperand(MMO); |
1070 | 0 | MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI); |
1071 | 0 | MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI); |
1072 | 0 | MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI); |
1073 | 0 | AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI); |
1074 | 0 | } |
1075 | 0 | } else |
1076 | 0 | llvm_unreachable("Unknown reg class!"); |
1077 | 0 | break; |
1078 | 2 | case 64: |
1079 | 2 | if (ARM::QQQQPRRegClass.hasSubClassEq(RC)2 ) { |
1080 | 2 | MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VSTMDIA)) |
1081 | 2 | .addFrameIndex(FI) |
1082 | 2 | .add(predOps(ARMCC::AL)) |
1083 | 2 | .addMemOperand(MMO); |
1084 | 2 | MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI); |
1085 | 2 | MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI); |
1086 | 2 | MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI); |
1087 | 2 | MIB = AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI); |
1088 | 2 | MIB = AddDReg(MIB, SrcReg, ARM::dsub_4, 0, TRI); |
1089 | 2 | MIB = AddDReg(MIB, SrcReg, ARM::dsub_5, 0, TRI); |
1090 | 2 | MIB = AddDReg(MIB, SrcReg, ARM::dsub_6, 0, TRI); |
1091 | 2 | AddDReg(MIB, SrcReg, ARM::dsub_7, 0, TRI); |
1092 | 2 | } else |
1093 | 0 | llvm_unreachable("Unknown reg class!"); |
1094 | 2 | break; |
1095 | 0 | default: |
1096 | 0 | llvm_unreachable("Unknown reg class!"); |
1097 | 2.26k | } |
1098 | 2.26k | } |
1099 | | |
1100 | | unsigned ARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr &MI, |
1101 | 83.0k | int &FrameIndex) const { |
1102 | 83.0k | switch (MI.getOpcode()) { |
1103 | 79.4k | default: break; |
1104 | 217 | case ARM::STRrs: |
1105 | 217 | case ARM::t2STRs: // FIXME: don't use t2STRs to access frame. |
1106 | 217 | if (MI.getOperand(1).isFI() && 217 MI.getOperand(2).isReg()0 && |
1107 | 217 | MI.getOperand(3).isImm()0 && MI.getOperand(2).getReg() == 00 && |
1108 | 217 | MI.getOperand(3).getImm() == 00 ) { |
1109 | 0 | FrameIndex = MI.getOperand(1).getIndex(); |
1110 | 0 | return MI.getOperand(0).getReg(); |
1111 | 0 | } |
1112 | 217 | break; |
1113 | 3.37k | case ARM::STRi12: |
1114 | 3.37k | case ARM::t2STRi12: |
1115 | 3.37k | case ARM::tSTRspi: |
1116 | 3.37k | case ARM::VSTRD: |
1117 | 3.37k | case ARM::VSTRS: |
1118 | 3.37k | if (MI.getOperand(1).isFI() && 3.37k MI.getOperand(2).isImm()1.30k && |
1119 | 3.37k | MI.getOperand(2).getImm() == 01.30k ) { |
1120 | 1.08k | FrameIndex = MI.getOperand(1).getIndex(); |
1121 | 1.08k | return MI.getOperand(0).getReg(); |
1122 | 1.08k | } |
1123 | 2.28k | break; |
1124 | 75 | case ARM::VST1q64: |
1125 | 75 | case ARM::VST1d64TPseudo: |
1126 | 75 | case ARM::VST1d64QPseudo: |
1127 | 75 | if (MI.getOperand(0).isFI() && 75 MI.getOperand(2).getSubReg() == 00 ) { |
1128 | 0 | FrameIndex = MI.getOperand(0).getIndex(); |
1129 | 0 | return MI.getOperand(2).getReg(); |
1130 | 0 | } |
1131 | 75 | break; |
1132 | 0 | case ARM::VSTMQIA: |
1133 | 0 | if (MI.getOperand(1).isFI() && 0 MI.getOperand(0).getSubReg() == 00 ) { |
1134 | 0 | FrameIndex = MI.getOperand(1).getIndex(); |
1135 | 0 | return MI.getOperand(0).getReg(); |
1136 | 0 | } |
1137 | 0 | break; |
1138 | 82.0k | } |
1139 | 82.0k | |
1140 | 82.0k | return 0; |
1141 | 82.0k | } |
1142 | | |
1143 | | unsigned ARMBaseInstrInfo::isStoreToStackSlotPostFE(const MachineInstr &MI, |
1144 | 116k | int &FrameIndex) const { |
1145 | 116k | const MachineMemOperand *Dummy; |
1146 | 13.6k | return MI.mayStore() && hasStoreToStackSlot(MI, Dummy, FrameIndex); |
1147 | 116k | } |
1148 | | |
1149 | | void ARMBaseInstrInfo:: |
1150 | | loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, |
1151 | | unsigned DestReg, int FI, |
1152 | | const TargetRegisterClass *RC, |
1153 | 1.99k | const TargetRegisterInfo *TRI) const { |
1154 | 1.99k | DebugLoc DL; |
1155 | 1.99k | if (I != MBB.end()1.99k ) DL = I->getDebugLoc()1.99k ; |
1156 | 1.99k | MachineFunction &MF = *MBB.getParent(); |
1157 | 1.99k | MachineFrameInfo &MFI = MF.getFrameInfo(); |
1158 | 1.99k | unsigned Align = MFI.getObjectAlignment(FI); |
1159 | 1.99k | MachineMemOperand *MMO = MF.getMachineMemOperand( |
1160 | 1.99k | MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOLoad, |
1161 | 1.99k | MFI.getObjectSize(FI), Align); |
1162 | 1.99k | |
1163 | 1.99k | switch (TRI->getSpillSize(*RC)) { |
1164 | 1.20k | case 4: |
1165 | 1.20k | if (ARM::GPRRegClass.hasSubClassEq(RC)1.20k ) { |
1166 | 1.18k | BuildMI(MBB, I, DL, get(ARM::LDRi12), DestReg) |
1167 | 1.18k | .addFrameIndex(FI) |
1168 | 1.18k | .addImm(0) |
1169 | 1.18k | .addMemOperand(MMO) |
1170 | 1.18k | .add(predOps(ARMCC::AL)); |
1171 | 1.18k | |
1172 | 1.20k | } else if (28 ARM::SPRRegClass.hasSubClassEq(RC)28 ) { |
1173 | 28 | BuildMI(MBB, I, DL, get(ARM::VLDRS), DestReg) |
1174 | 28 | .addFrameIndex(FI) |
1175 | 28 | .addImm(0) |
1176 | 28 | .addMemOperand(MMO) |
1177 | 28 | .add(predOps(ARMCC::AL)); |
1178 | 28 | } else |
1179 | 0 | llvm_unreachable("Unknown reg class!"); |
1180 | 1.20k | break; |
1181 | 479 | case 8: |
1182 | 479 | if (ARM::DPRRegClass.hasSubClassEq(RC)479 ) { |
1183 | 475 | BuildMI(MBB, I, DL, get(ARM::VLDRD), DestReg) |
1184 | 475 | .addFrameIndex(FI) |
1185 | 475 | .addImm(0) |
1186 | 475 | .addMemOperand(MMO) |
1187 | 475 | .add(predOps(ARMCC::AL)); |
1188 | 479 | } else if (4 ARM::GPRPairRegClass.hasSubClassEq(RC)4 ) { |
1189 | 4 | MachineInstrBuilder MIB; |
1190 | 4 | |
1191 | 4 | if (Subtarget.hasV5TEOps()4 ) { |
1192 | 2 | MIB = BuildMI(MBB, I, DL, get(ARM::LDRD)); |
1193 | 2 | AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI); |
1194 | 2 | AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI); |
1195 | 2 | MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO) |
1196 | 2 | .add(predOps(ARMCC::AL)); |
1197 | 4 | } else { |
1198 | 2 | // Fallback to LDM instruction, which has existed since the dawn of |
1199 | 2 | // time. |
1200 | 2 | MIB = BuildMI(MBB, I, DL, get(ARM::LDMIA)) |
1201 | 2 | .addFrameIndex(FI) |
1202 | 2 | .addMemOperand(MMO) |
1203 | 2 | .add(predOps(ARMCC::AL)); |
1204 | 2 | MIB = AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI); |
1205 | 2 | MIB = AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI); |
1206 | 2 | } |
1207 | 4 | |
1208 | 4 | if (TargetRegisterInfo::isPhysicalRegister(DestReg)) |
1209 | 0 | MIB.addReg(DestReg, RegState::ImplicitDefine); |
1210 | 4 | } else |
1211 | 0 | llvm_unreachable("Unknown reg class!"); |
1212 | 479 | break; |
1213 | 300 | case 16: |
1214 | 300 | if (ARM::DPairRegClass.hasSubClassEq(RC)300 ) { |
1215 | 300 | if (Align >= 16 && 300 getRegisterInfo().canRealignStack(MF)300 ) { |
1216 | 298 | BuildMI(MBB, I, DL, get(ARM::VLD1q64), DestReg) |
1217 | 298 | .addFrameIndex(FI) |
1218 | 298 | .addImm(16) |
1219 | 298 | .addMemOperand(MMO) |
1220 | 298 | .add(predOps(ARMCC::AL)); |
1221 | 300 | } else { |
1222 | 2 | BuildMI(MBB, I, DL, get(ARM::VLDMQIA), DestReg) |
1223 | 2 | .addFrameIndex(FI) |
1224 | 2 | .addMemOperand(MMO) |
1225 | 2 | .add(predOps(ARMCC::AL)); |
1226 | 2 | } |
1227 | 300 | } else |
1228 | 0 | llvm_unreachable("Unknown reg class!"); |
1229 | 300 | break; |
1230 | 1 | case 24: |
1231 | 1 | if (ARM::DTripleRegClass.hasSubClassEq(RC)1 ) { |
1232 | 1 | if (Align >= 16 && 1 getRegisterInfo().canRealignStack(MF)0 ) { |
1233 | 0 | BuildMI(MBB, I, DL, get(ARM::VLD1d64TPseudo), DestReg) |
1234 | 0 | .addFrameIndex(FI) |
1235 | 0 | .addImm(16) |
1236 | 0 | .addMemOperand(MMO) |
1237 | 0 | .add(predOps(ARMCC::AL)); |
1238 | 1 | } else { |
1239 | 1 | MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLDMDIA)) |
1240 | 1 | .addFrameIndex(FI) |
1241 | 1 | .addMemOperand(MMO) |
1242 | 1 | .add(predOps(ARMCC::AL)); |
1243 | 1 | MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI); |
1244 | 1 | MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI); |
1245 | 1 | MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI); |
1246 | 1 | if (TargetRegisterInfo::isPhysicalRegister(DestReg)) |
1247 | 0 | MIB.addReg(DestReg, RegState::ImplicitDefine); |
1248 | 1 | } |
1249 | 1 | } else |
1250 | 0 | llvm_unreachable("Unknown reg class!"); |
1251 | 1 | break; |
1252 | 0 | case 32: |
1253 | 0 | if (ARM::QQPRRegClass.hasSubClassEq(RC) || 0 ARM::DQuadRegClass.hasSubClassEq(RC)0 ) { |
1254 | 0 | if (Align >= 16 && 0 getRegisterInfo().canRealignStack(MF)0 ) { |
1255 | 0 | BuildMI(MBB, I, DL, get(ARM::VLD1d64QPseudo), DestReg) |
1256 | 0 | .addFrameIndex(FI) |
1257 | 0 | .addImm(16) |
1258 | 0 | .addMemOperand(MMO) |
1259 | 0 | .add(predOps(ARMCC::AL)); |
1260 | 0 | } else { |
1261 | 0 | MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLDMDIA)) |
1262 | 0 | .addFrameIndex(FI) |
1263 | 0 | .add(predOps(ARMCC::AL)) |
1264 | 0 | .addMemOperand(MMO); |
1265 | 0 | MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI); |
1266 | 0 | MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI); |
1267 | 0 | MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI); |
1268 | 0 | MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::DefineNoRead, TRI); |
1269 | 0 | if (TargetRegisterInfo::isPhysicalRegister(DestReg)) |
1270 | 0 | MIB.addReg(DestReg, RegState::ImplicitDefine); |
1271 | 0 | } |
1272 | 0 | } else |
1273 | 0 | llvm_unreachable("Unknown reg class!"); |
1274 | 0 | break; |
1275 | 2 | case 64: |
1276 | 2 | if (ARM::QQQQPRRegClass.hasSubClassEq(RC)2 ) { |
1277 | 2 | MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLDMDIA)) |
1278 | 2 | .addFrameIndex(FI) |
1279 | 2 | .add(predOps(ARMCC::AL)) |
1280 | 2 | .addMemOperand(MMO); |
1281 | 2 | MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI); |
1282 | 2 | MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI); |
1283 | 2 | MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI); |
1284 | 2 | MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::DefineNoRead, TRI); |
1285 | 2 | MIB = AddDReg(MIB, DestReg, ARM::dsub_4, RegState::DefineNoRead, TRI); |
1286 | 2 | MIB = AddDReg(MIB, DestReg, ARM::dsub_5, RegState::DefineNoRead, TRI); |
1287 | 2 | MIB = AddDReg(MIB, DestReg, ARM::dsub_6, RegState::DefineNoRead, TRI); |
1288 | 2 | MIB = AddDReg(MIB, DestReg, ARM::dsub_7, RegState::DefineNoRead, TRI); |
1289 | 2 | if (TargetRegisterInfo::isPhysicalRegister(DestReg)) |
1290 | 0 | MIB.addReg(DestReg, RegState::ImplicitDefine); |
1291 | 2 | } else |
1292 | 0 | llvm_unreachable("Unknown reg class!"); |
1293 | 2 | break; |
1294 | 0 | default: |
1295 | 0 | llvm_unreachable("Unknown regclass!"); |
1296 | 1.99k | } |
1297 | 1.99k | } |
1298 | | |
1299 | | unsigned ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr &MI, |
1300 | 217k | int &FrameIndex) const { |
1301 | 217k | switch (MI.getOpcode()) { |
1302 | 189k | default: break; |
1303 | 1.59k | case ARM::LDRrs: |
1304 | 1.59k | case ARM::t2LDRs: // FIXME: don't use t2LDRs to access frame. |
1305 | 1.59k | if (MI.getOperand(1).isFI() && 1.59k MI.getOperand(2).isReg()0 && |
1306 | 1.59k | MI.getOperand(3).isImm()0 && MI.getOperand(2).getReg() == 00 && |
1307 | 1.59k | MI.getOperand(3).getImm() == 00 ) { |
1308 | 0 | FrameIndex = MI.getOperand(1).getIndex(); |
1309 | 0 | return MI.getOperand(0).getReg(); |
1310 | 0 | } |
1311 | 1.59k | break; |
1312 | 26.8k | case ARM::LDRi12: |
1313 | 26.8k | case ARM::t2LDRi12: |
1314 | 26.8k | case ARM::tLDRspi: |
1315 | 26.8k | case ARM::VLDRD: |
1316 | 26.8k | case ARM::VLDRS: |
1317 | 26.8k | if (MI.getOperand(1).isFI() && 26.8k MI.getOperand(2).isImm()13.8k && |
1318 | 26.8k | MI.getOperand(2).getImm() == 013.8k ) { |
1319 | 11.4k | FrameIndex = MI.getOperand(1).getIndex(); |
1320 | 11.4k | return MI.getOperand(0).getReg(); |
1321 | 11.4k | } |
1322 | 15.3k | break; |
1323 | 134 | case ARM::VLD1q64: |
1324 | 134 | case ARM::VLD1d64TPseudo: |
1325 | 134 | case ARM::VLD1d64QPseudo: |
1326 | 134 | if (MI.getOperand(1).isFI() && 134 MI.getOperand(0).getSubReg() == 02 ) { |
1327 | 2 | FrameIndex = MI.getOperand(1).getIndex(); |
1328 | 2 | return MI.getOperand(0).getReg(); |
1329 | 2 | } |
1330 | 132 | break; |
1331 | 0 | case ARM::VLDMQIA: |
1332 | 0 | if (MI.getOperand(1).isFI() && 0 MI.getOperand(0).getSubReg() == 00 ) { |
1333 | 0 | FrameIndex = MI.getOperand(1).getIndex(); |
1334 | 0 | return MI.getOperand(0).getReg(); |
1335 | 0 | } |
1336 | 0 | break; |
1337 | 206k | } |
1338 | 206k | |
1339 | 206k | return 0; |
1340 | 206k | } |
1341 | | |
1342 | | unsigned ARMBaseInstrInfo::isLoadFromStackSlotPostFE(const MachineInstr &MI, |
1343 | 119k | int &FrameIndex) const { |
1344 | 119k | const MachineMemOperand *Dummy; |
1345 | 18.4k | return MI.mayLoad() && hasLoadFromStackSlot(MI, Dummy, FrameIndex); |
1346 | 119k | } |
1347 | | |
1348 | | /// \brief Expands MEMCPY to either LDMIA/STMIA or LDMIA_UPD/STMID_UPD |
1349 | | /// depending on whether the result is used. |
1350 | 56 | void ARMBaseInstrInfo::expandMEMCPY(MachineBasicBlock::iterator MI) const { |
1351 | 56 | bool isThumb1 = Subtarget.isThumb1Only(); |
1352 | 56 | bool isThumb2 = Subtarget.isThumb2(); |
1353 | 56 | const ARMBaseInstrInfo *TII = Subtarget.getInstrInfo(); |
1354 | 56 | |
1355 | 56 | DebugLoc dl = MI->getDebugLoc(); |
1356 | 56 | MachineBasicBlock *BB = MI->getParent(); |
1357 | 56 | |
1358 | 56 | MachineInstrBuilder LDM, STM; |
1359 | 56 | if (isThumb1 || 56 !MI->getOperand(1).isDead()28 ) { |
1360 | 7 | LDM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2LDMIA_UPD |
1361 | 34 | : isThumb1 ? 34 ARM::tLDMIA_UPD28 |
1362 | 6 | : ARM::LDMIA_UPD)) |
1363 | 41 | .add(MI->getOperand(1)); |
1364 | 56 | } else { |
1365 | 15 | LDM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2LDMIA7 : ARM::LDMIA8 )); |
1366 | 15 | } |
1367 | 56 | |
1368 | 56 | if (isThumb1 || 56 !MI->getOperand(0).isDead()28 ) { |
1369 | 7 | STM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2STMIA_UPD |
1370 | 34 | : isThumb1 ? 34 ARM::tSTMIA_UPD28 |
1371 | 6 | : ARM::STMIA_UPD)) |
1372 | 41 | .add(MI->getOperand(0)); |
1373 | 56 | } else { |
1374 | 15 | STM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2STMIA7 : ARM::STMIA8 )); |
1375 | 15 | } |
1376 | 56 | |
1377 | 56 | LDM.add(MI->getOperand(3)).add(predOps(ARMCC::AL)); |
1378 | 56 | STM.add(MI->getOperand(2)).add(predOps(ARMCC::AL)); |
1379 | 56 | |
1380 | 56 | // Sort the scratch registers into ascending order. |
1381 | 56 | const TargetRegisterInfo &TRI = getRegisterInfo(); |
1382 | 56 | SmallVector<unsigned, 6> ScratchRegs; |
1383 | 285 | for(unsigned I = 5; I < MI->getNumOperands()285 ; ++I229 ) |
1384 | 229 | ScratchRegs.push_back(MI->getOperand(I).getReg()); |
1385 | 56 | std::sort(ScratchRegs.begin(), ScratchRegs.end(), |
1386 | 56 | [&TRI](const unsigned &Reg1, |
1387 | 227 | const unsigned &Reg2) -> bool { |
1388 | 227 | return TRI.getEncodingValue(Reg1) < |
1389 | 227 | TRI.getEncodingValue(Reg2); |
1390 | 227 | }); |
1391 | 56 | |
1392 | 229 | for (const auto &Reg : ScratchRegs) { |
1393 | 229 | LDM.addReg(Reg, RegState::Define); |
1394 | 229 | STM.addReg(Reg, RegState::Kill); |
1395 | 229 | } |
1396 | 56 | |
1397 | 56 | BB->erase(MI); |
1398 | 56 | } |
1399 | | |
1400 | 111k | bool ARMBaseInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { |
1401 | 111k | if (MI.getOpcode() == TargetOpcode::LOAD_STACK_GUARD111k ) { |
1402 | 156 | assert(getSubtarget().getTargetTriple().isOSBinFormatMachO() && |
1403 | 156 | "LOAD_STACK_GUARD currently supported only for MachO."); |
1404 | 156 | expandLoadStackGuard(MI); |
1405 | 156 | MI.getParent()->erase(MI); |
1406 | 156 | return true; |
1407 | 156 | } |
1408 | 111k | |
1409 | 111k | if (111k MI.getOpcode() == ARM::MEMCPY111k ) { |
1410 | 56 | expandMEMCPY(MI); |
1411 | 56 | return true; |
1412 | 56 | } |
1413 | 111k | |
1414 | 111k | // This hook gets to expand COPY instructions before they become |
1415 | 111k | // copyPhysReg() calls. Look for VMOVS instructions that can legally be |
1416 | 111k | // widened to VMOVD. We prefer the VMOVD when possible because it may be |
1417 | 111k | // changed into a VORR that can go down the NEON pipeline. |
1418 | 111k | if (111k !MI.isCopy() || 111k Subtarget.dontWidenVMOVS()34.8k || Subtarget.isFPOnlySP()34.8k ) |
1419 | 79.1k | return false; |
1420 | 32.4k | |
1421 | 32.4k | // Look for a copy between even S-registers. That is where we keep floats |
1422 | 32.4k | // when using NEON v2f32 instructions for f32 arithmetic. |
1423 | 32.4k | unsigned DstRegS = MI.getOperand(0).getReg(); |
1424 | 32.4k | unsigned SrcRegS = MI.getOperand(1).getReg(); |
1425 | 32.4k | if (!ARM::SPRRegClass.contains(DstRegS, SrcRegS)) |
1426 | 30.7k | return false; |
1427 | 1.69k | |
1428 | 1.69k | const TargetRegisterInfo *TRI = &getRegisterInfo(); |
1429 | 1.69k | unsigned DstRegD = TRI->getMatchingSuperReg(DstRegS, ARM::ssub_0, |
1430 | 1.69k | &ARM::DPRRegClass); |
1431 | 1.69k | unsigned SrcRegD = TRI->getMatchingSuperReg(SrcRegS, ARM::ssub_0, |
1432 | 1.69k | &ARM::DPRRegClass); |
1433 | 1.69k | if (!DstRegD || 1.69k !SrcRegD1.42k ) |
1434 | 397 | return false; |
1435 | 1.29k | |
1436 | 1.29k | // We want to widen this into a DstRegD = VMOVD SrcRegD copy. This is only |
1437 | 1.29k | // legal if the COPY already defines the full DstRegD, and it isn't a |
1438 | 1.29k | // sub-register insertion. |
1439 | 1.29k | if (1.29k !MI.definesRegister(DstRegD, TRI) || 1.29k MI.readsRegister(DstRegD, TRI)111 ) |
1440 | 1.19k | return false; |
1441 | 105 | |
1442 | 105 | // A dead copy shouldn't show up here, but reject it just in case. |
1443 | 105 | if (105 MI.getOperand(0).isDead()105 ) |
1444 | 0 | return false; |
1445 | 105 | |
1446 | 105 | // All clear, widen the COPY. |
1447 | 105 | DEBUG105 (dbgs() << "widening: " << MI); |
1448 | 105 | MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI); |
1449 | 105 | |
1450 | 105 | // Get rid of the old <imp-def> of DstRegD. Leave it if it defines a Q-reg |
1451 | 105 | // or some other super-register. |
1452 | 105 | int ImpDefIdx = MI.findRegisterDefOperandIdx(DstRegD); |
1453 | 105 | if (ImpDefIdx != -1) |
1454 | 80 | MI.RemoveOperand(ImpDefIdx); |
1455 | 105 | |
1456 | 105 | // Change the opcode and operands. |
1457 | 105 | MI.setDesc(get(ARM::VMOVD)); |
1458 | 105 | MI.getOperand(0).setReg(DstRegD); |
1459 | 105 | MI.getOperand(1).setReg(SrcRegD); |
1460 | 105 | MIB.add(predOps(ARMCC::AL)); |
1461 | 105 | |
1462 | 105 | // We are now reading SrcRegD instead of SrcRegS. This may upset the |
1463 | 105 | // register scavenger and machine verifier, so we need to indicate that we |
1464 | 105 | // are reading an undefined value from SrcRegD, but a proper value from |
1465 | 105 | // SrcRegS. |
1466 | 105 | MI.getOperand(1).setIsUndef(); |
1467 | 105 | MIB.addReg(SrcRegS, RegState::Implicit); |
1468 | 105 | |
1469 | 105 | // SrcRegD may actually contain an unrelated value in the ssub_1 |
1470 | 105 | // sub-register. Don't kill it. Only kill the ssub_0 sub-register. |
1471 | 105 | if (MI.getOperand(1).isKill()105 ) { |
1472 | 25 | MI.getOperand(1).setIsKill(false); |
1473 | 25 | MI.addRegisterKilled(SrcRegS, TRI, true); |
1474 | 25 | } |
1475 | 105 | |
1476 | 105 | DEBUG(dbgs() << "replaced by: " << MI); |
1477 | 111k | return true; |
1478 | 111k | } |
1479 | | |
1480 | | /// Create a copy of a const pool value. Update CPI to the new index and return |
1481 | | /// the label UID. |
1482 | 0 | static unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI) { |
1483 | 0 | MachineConstantPool *MCP = MF.getConstantPool(); |
1484 | 0 | ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); |
1485 | 0 |
|
1486 | 0 | const MachineConstantPoolEntry &MCPE = MCP->getConstants()[CPI]; |
1487 | 0 | assert(MCPE.isMachineConstantPoolEntry() && |
1488 | 0 | "Expecting a machine constantpool entry!"); |
1489 | 0 | ARMConstantPoolValue *ACPV = |
1490 | 0 | static_cast<ARMConstantPoolValue*>(MCPE.Val.MachineCPVal); |
1491 | 0 |
|
1492 | 0 | unsigned PCLabelId = AFI->createPICLabelUId(); |
1493 | 0 | ARMConstantPoolValue *NewCPV = nullptr; |
1494 | 0 |
|
1495 | 0 | // FIXME: The below assumes PIC relocation model and that the function |
1496 | 0 | // is Thumb mode (t1 or t2). PCAdjustment would be 8 for ARM mode PIC, and |
1497 | 0 | // zero for non-PIC in ARM or Thumb. The callers are all of thumb LDR |
1498 | 0 | // instructions, so that's probably OK, but is PIC always correct when |
1499 | 0 | // we get here? |
1500 | 0 | if (ACPV->isGlobalValue()) |
1501 | 0 | NewCPV = ARMConstantPoolConstant::Create( |
1502 | 0 | cast<ARMConstantPoolConstant>(ACPV)->getGV(), PCLabelId, ARMCP::CPValue, |
1503 | 0 | 4, ACPV->getModifier(), ACPV->mustAddCurrentAddress()); |
1504 | 0 | else if (0 ACPV->isExtSymbol()0 ) |
1505 | 0 | NewCPV = ARMConstantPoolSymbol:: |
1506 | 0 | Create(MF.getFunction()->getContext(), |
1507 | 0 | cast<ARMConstantPoolSymbol>(ACPV)->getSymbol(), PCLabelId, 4); |
1508 | 0 | else if (0 ACPV->isBlockAddress()0 ) |
1509 | 0 | NewCPV = ARMConstantPoolConstant:: |
1510 | 0 | Create(cast<ARMConstantPoolConstant>(ACPV)->getBlockAddress(), PCLabelId, |
1511 | 0 | ARMCP::CPBlockAddress, 4); |
1512 | 0 | else if (0 ACPV->isLSDA()0 ) |
1513 | 0 | NewCPV = ARMConstantPoolConstant::Create(MF.getFunction(), PCLabelId, |
1514 | 0 | ARMCP::CPLSDA, 4); |
1515 | 0 | else if (0 ACPV->isMachineBasicBlock()0 ) |
1516 | 0 | NewCPV = ARMConstantPoolMBB:: |
1517 | 0 | Create(MF.getFunction()->getContext(), |
1518 | 0 | cast<ARMConstantPoolMBB>(ACPV)->getMBB(), PCLabelId, 4); |
1519 | 0 | else |
1520 | 0 | llvm_unreachable("Unexpected ARM constantpool value type!!"); |
1521 | 0 | CPI = MCP->getConstantPoolIndex(NewCPV, MCPE.getAlignment()); |
1522 | 0 | return PCLabelId; |
1523 | 0 | } |
1524 | | |
1525 | | void ARMBaseInstrInfo::reMaterialize(MachineBasicBlock &MBB, |
1526 | | MachineBasicBlock::iterator I, |
1527 | | unsigned DestReg, unsigned SubIdx, |
1528 | | const MachineInstr &Orig, |
1529 | 12.8k | const TargetRegisterInfo &TRI) const { |
1530 | 12.8k | unsigned Opcode = Orig.getOpcode(); |
1531 | 12.8k | switch (Opcode) { |
1532 | 12.8k | default: { |
1533 | 12.8k | MachineInstr *MI = MBB.getParent()->CloneMachineInstr(&Orig); |
1534 | 12.8k | MI->substituteRegister(Orig.getOperand(0).getReg(), DestReg, SubIdx, TRI); |
1535 | 12.8k | MBB.insert(I, MI); |
1536 | 12.8k | break; |
1537 | 12.8k | } |
1538 | 0 | case ARM::tLDRpci_pic: |
1539 | 0 | case ARM::t2LDRpci_pic: { |
1540 | 0 | MachineFunction &MF = *MBB.getParent(); |
1541 | 0 | unsigned CPI = Orig.getOperand(1).getIndex(); |
1542 | 0 | unsigned PCLabelId = duplicateCPV(MF, CPI); |
1543 | 0 | MachineInstrBuilder MIB = |
1544 | 0 | BuildMI(MBB, I, Orig.getDebugLoc(), get(Opcode), DestReg) |
1545 | 0 | .addConstantPoolIndex(CPI) |
1546 | 0 | .addImm(PCLabelId); |
1547 | 0 | MIB->setMemRefs(Orig.memoperands_begin(), Orig.memoperands_end()); |
1548 | 0 | break; |
1549 | 12.8k | } |
1550 | 12.8k | } |
1551 | 12.8k | } |
1552 | | |
1553 | | MachineInstr & |
1554 | | ARMBaseInstrInfo::duplicate(MachineBasicBlock &MBB, |
1555 | | MachineBasicBlock::iterator InsertBefore, |
1556 | 9.22k | const MachineInstr &Orig) const { |
1557 | 9.22k | MachineInstr &Cloned = TargetInstrInfo::duplicate(MBB, InsertBefore, Orig); |
1558 | 9.22k | MachineBasicBlock::instr_iterator I = Cloned.getIterator(); |
1559 | 9.31k | for (;;) { |
1560 | 9.31k | switch (I->getOpcode()) { |
1561 | 0 | case ARM::tLDRpci_pic: |
1562 | 0 | case ARM::t2LDRpci_pic: { |
1563 | 0 | MachineFunction &MF = *MBB.getParent(); |
1564 | 0 | unsigned CPI = I->getOperand(1).getIndex(); |
1565 | 0 | unsigned PCLabelId = duplicateCPV(MF, CPI); |
1566 | 0 | I->getOperand(1).setIndex(CPI); |
1567 | 0 | I->getOperand(2).setImm(PCLabelId); |
1568 | 0 | break; |
1569 | 9.31k | } |
1570 | 9.31k | } |
1571 | 9.31k | if (9.31k !I->isBundledWithSucc()9.31k ) |
1572 | 9.22k | break; |
1573 | 90 | ++I; |
1574 | 90 | } |
1575 | 9.22k | return Cloned; |
1576 | 9.22k | } |
1577 | | |
1578 | | bool ARMBaseInstrInfo::produceSameValue(const MachineInstr &MI0, |
1579 | | const MachineInstr &MI1, |
1580 | 6.09k | const MachineRegisterInfo *MRI) const { |
1581 | 6.09k | unsigned Opcode = MI0.getOpcode(); |
1582 | 6.09k | if (Opcode == ARM::t2LDRpci || |
1583 | 6.09k | Opcode == ARM::t2LDRpci_pic || |
1584 | 6.09k | Opcode == ARM::tLDRpci || |
1585 | 6.08k | Opcode == ARM::tLDRpci_pic || |
1586 | 6.08k | Opcode == ARM::LDRLIT_ga_pcrel || |
1587 | 6.08k | Opcode == ARM::LDRLIT_ga_pcrel_ldr || |
1588 | 6.08k | Opcode == ARM::tLDRLIT_ga_pcrel || |
1589 | 6.07k | Opcode == ARM::MOV_ga_pcrel || |
1590 | 6.07k | Opcode == ARM::MOV_ga_pcrel_ldr || |
1591 | 6.09k | Opcode == ARM::t2MOV_ga_pcrel6.06k ) { |
1592 | 3.55k | if (MI1.getOpcode() != Opcode) |
1593 | 0 | return false; |
1594 | 3.55k | if (3.55k MI0.getNumOperands() != MI1.getNumOperands()3.55k ) |
1595 | 0 | return false; |
1596 | 3.55k | |
1597 | 3.55k | const MachineOperand &MO0 = MI0.getOperand(1); |
1598 | 3.55k | const MachineOperand &MO1 = MI1.getOperand(1); |
1599 | 3.55k | if (MO0.getOffset() != MO1.getOffset()) |
1600 | 0 | return false; |
1601 | 3.55k | |
1602 | 3.55k | if (3.55k Opcode == ARM::LDRLIT_ga_pcrel || |
1603 | 3.55k | Opcode == ARM::LDRLIT_ga_pcrel_ldr || |
1604 | 3.55k | Opcode == ARM::tLDRLIT_ga_pcrel || |
1605 | 3.54k | Opcode == ARM::MOV_ga_pcrel || |
1606 | 3.54k | Opcode == ARM::MOV_ga_pcrel_ldr || |
1607 | 3.54k | Opcode == ARM::t2MOV_ga_pcrel) |
1608 | 3.55k | // Ignore the PC labels. |
1609 | 3.55k | return MO0.getGlobal() == MO1.getGlobal(); |
1610 | 7 | |
1611 | 7 | const MachineFunction *MF = MI0.getParent()->getParent(); |
1612 | 7 | const MachineConstantPool *MCP = MF->getConstantPool(); |
1613 | 7 | int CPI0 = MO0.getIndex(); |
1614 | 7 | int CPI1 = MO1.getIndex(); |
1615 | 7 | const MachineConstantPoolEntry &MCPE0 = MCP->getConstants()[CPI0]; |
1616 | 7 | const MachineConstantPoolEntry &MCPE1 = MCP->getConstants()[CPI1]; |
1617 | 7 | bool isARMCP0 = MCPE0.isMachineConstantPoolEntry(); |
1618 | 7 | bool isARMCP1 = MCPE1.isMachineConstantPoolEntry(); |
1619 | 7 | if (isARMCP0 && 7 isARMCP11 ) { |
1620 | 1 | ARMConstantPoolValue *ACPV0 = |
1621 | 1 | static_cast<ARMConstantPoolValue*>(MCPE0.Val.MachineCPVal); |
1622 | 1 | ARMConstantPoolValue *ACPV1 = |
1623 | 1 | static_cast<ARMConstantPoolValue*>(MCPE1.Val.MachineCPVal); |
1624 | 1 | return ACPV0->hasSameValue(ACPV1); |
1625 | 6 | } else if (6 !isARMCP0 && 6 !isARMCP16 ) { |
1626 | 6 | return MCPE0.Val.ConstVal == MCPE1.Val.ConstVal; |
1627 | 6 | } |
1628 | 0 | return false; |
1629 | 2.53k | } else if (2.53k Opcode == ARM::PICLDR2.53k ) { |
1630 | 0 | if (MI1.getOpcode() != Opcode) |
1631 | 0 | return false; |
1632 | 0 | if (0 MI0.getNumOperands() != MI1.getNumOperands()0 ) |
1633 | 0 | return false; |
1634 | 0 |
|
1635 | 0 | unsigned Addr0 = MI0.getOperand(1).getReg(); |
1636 | 0 | unsigned Addr1 = MI1.getOperand(1).getReg(); |
1637 | 0 | if (Addr0 != Addr10 ) { |
1638 | 0 | if (!MRI || |
1639 | 0 | !TargetRegisterInfo::isVirtualRegister(Addr0) || |
1640 | 0 | !TargetRegisterInfo::isVirtualRegister(Addr1)) |
1641 | 0 | return false; |
1642 | 0 |
|
1643 | 0 | // This assumes SSA form. |
1644 | 0 | MachineInstr *Def0 = MRI->getVRegDef(Addr0); |
1645 | 0 | MachineInstr *Def1 = MRI->getVRegDef(Addr1); |
1646 | 0 | // Check if the loaded value, e.g. a constantpool of a global address, are |
1647 | 0 | // the same. |
1648 | 0 | if (!produceSameValue(*Def0, *Def1, MRI)) |
1649 | 0 | return false; |
1650 | 0 | } |
1651 | 0 |
|
1652 | 0 | for (unsigned i = 3, e = MI0.getNumOperands(); 0 i != e0 ; ++i0 ) { |
1653 | 0 | // %vreg12<def> = PICLDR %vreg11, 0, pred:14, pred:%noreg |
1654 | 0 | const MachineOperand &MO0 = MI0.getOperand(i); |
1655 | 0 | const MachineOperand &MO1 = MI1.getOperand(i); |
1656 | 0 | if (!MO0.isIdenticalTo(MO1)) |
1657 | 0 | return false; |
1658 | 0 | } |
1659 | 0 | return true; |
1660 | 2.53k | } |
1661 | 2.53k | |
1662 | 2.53k | return MI0.isIdenticalTo(MI1, MachineInstr::IgnoreVRegDefs); |
1663 | 2.53k | } |
1664 | | |
1665 | | /// areLoadsFromSameBasePtr - This is used by the pre-regalloc scheduler to |
1666 | | /// determine if two loads are loading from the same base address. It should |
1667 | | /// only return true if the base pointers are the same and the only differences |
1668 | | /// between the two addresses is the offset. It also returns the offsets by |
1669 | | /// reference. |
1670 | | /// |
1671 | | /// FIXME: remove this in favor of the MachineInstr interface once pre-RA-sched |
1672 | | /// is permanently disabled. |
1673 | | bool ARMBaseInstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, |
1674 | | int64_t &Offset1, |
1675 | 228k | int64_t &Offset2) const { |
1676 | 228k | // Don't worry about Thumb: just ARM and Thumb2. |
1677 | 228k | if (Subtarget.isThumb1Only()228k ) return false23.4k ; |
1678 | 205k | |
1679 | 205k | if (205k !Load1->isMachineOpcode() || 205k !Load2->isMachineOpcode()205k ) |
1680 | 87.0k | return false; |
1681 | 118k | |
1682 | 118k | switch (Load1->getMachineOpcode()) { |
1683 | 24.4k | default: |
1684 | 24.4k | return false; |
1685 | 93.8k | case ARM::LDRi12: |
1686 | 93.8k | case ARM::LDRBi12: |
1687 | 93.8k | case ARM::LDRD: |
1688 | 93.8k | case ARM::LDRH: |
1689 | 93.8k | case ARM::LDRSB: |
1690 | 93.8k | case ARM::LDRSH: |
1691 | 93.8k | case ARM::VLDRD: |
1692 | 93.8k | case ARM::VLDRS: |
1693 | 93.8k | case ARM::t2LDRi8: |
1694 | 93.8k | case ARM::t2LDRBi8: |
1695 | 93.8k | case ARM::t2LDRDi8: |
1696 | 93.8k | case ARM::t2LDRSHi8: |
1697 | 93.8k | case ARM::t2LDRi12: |
1698 | 93.8k | case ARM::t2LDRBi12: |
1699 | 93.8k | case ARM::t2LDRSHi12: |
1700 | 93.8k | break; |
1701 | 93.8k | } |
1702 | 93.8k | |
1703 | 93.8k | switch (Load2->getMachineOpcode()) { |
1704 | 12.2k | default: |
1705 | 12.2k | return false; |
1706 | 81.6k | case ARM::LDRi12: |
1707 | 81.6k | case ARM::LDRBi12: |
1708 | 81.6k | case ARM::LDRD: |
1709 | 81.6k | case ARM::LDRH: |
1710 | 81.6k | case ARM::LDRSB: |
1711 | 81.6k | case ARM::LDRSH: |
1712 | 81.6k | case ARM::VLDRD: |
1713 | 81.6k | case ARM::VLDRS: |
1714 | 81.6k | case ARM::t2LDRi8: |
1715 | 81.6k | case ARM::t2LDRBi8: |
1716 | 81.6k | case ARM::t2LDRSHi8: |
1717 | 81.6k | case ARM::t2LDRi12: |
1718 | 81.6k | case ARM::t2LDRBi12: |
1719 | 81.6k | case ARM::t2LDRSHi12: |
1720 | 81.6k | break; |
1721 | 81.6k | } |
1722 | 81.6k | |
1723 | 81.6k | // Check if base addresses and chain operands match. |
1724 | 81.6k | if (81.6k Load1->getOperand(0) != Load2->getOperand(0) || |
1725 | 19.0k | Load1->getOperand(4) != Load2->getOperand(4)) |
1726 | 62.5k | return false; |
1727 | 19.0k | |
1728 | 19.0k | // Index should be Reg0. |
1729 | 19.0k | if (19.0k Load1->getOperand(3) != Load2->getOperand(3)19.0k ) |
1730 | 0 | return false; |
1731 | 19.0k | |
1732 | 19.0k | // Determine the offsets. |
1733 | 19.0k | if (19.0k isa<ConstantSDNode>(Load1->getOperand(1)) && |
1734 | 19.0k | isa<ConstantSDNode>(Load2->getOperand(1))18.8k ) { |
1735 | 18.8k | Offset1 = cast<ConstantSDNode>(Load1->getOperand(1))->getSExtValue(); |
1736 | 18.8k | Offset2 = cast<ConstantSDNode>(Load2->getOperand(1))->getSExtValue(); |
1737 | 18.8k | return true; |
1738 | 18.8k | } |
1739 | 132 | |
1740 | 132 | return false; |
1741 | 132 | } |
1742 | | |
1743 | | /// shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to |
1744 | | /// determine (in conjunction with areLoadsFromSameBasePtr) if two loads should |
1745 | | /// be scheduled togther. On some targets if two loads are loading from |
1746 | | /// addresses in the same cache line, it's better if they are scheduled |
1747 | | /// together. This function takes two integers that represent the load offsets |
1748 | | /// from the common base address. It returns true if it decides it's desirable |
1749 | | /// to schedule the two loads together. "NumLoads" is the number of loads that |
1750 | | /// have already been scheduled after Load1. |
1751 | | /// |
1752 | | /// FIXME: remove this in favor of the MachineInstr interface once pre-RA-sched |
1753 | | /// is permanently disabled. |
1754 | | bool ARMBaseInstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2, |
1755 | | int64_t Offset1, int64_t Offset2, |
1756 | 5.61k | unsigned NumLoads) const { |
1757 | 5.61k | // Don't worry about Thumb: just ARM and Thumb2. |
1758 | 5.61k | if (Subtarget.isThumb1Only()5.61k ) return false0 ; |
1759 | 5.61k | |
1760 | 5.61k | assert(Offset2 > Offset1); |
1761 | 5.61k | |
1762 | 5.61k | if ((Offset2 - Offset1) / 8 > 64) |
1763 | 0 | return false; |
1764 | 5.61k | |
1765 | 5.61k | // Check if the machine opcodes are different. If they are different |
1766 | 5.61k | // then we consider them to not be of the same base address, |
1767 | 5.61k | // EXCEPT in the case of Thumb2 byte loads where one is LDRBi8 and the other LDRBi12. |
1768 | 5.61k | // In this case, they are considered to be the same because they are different |
1769 | 5.61k | // encoding forms of the same basic instruction. |
1770 | 5.61k | if (5.61k (Load1->getMachineOpcode() != Load2->getMachineOpcode()) && |
1771 | 125 | !((Load1->getMachineOpcode() == ARM::t2LDRBi8 && |
1772 | 3 | Load2->getMachineOpcode() == ARM::t2LDRBi12) || |
1773 | 122 | (Load1->getMachineOpcode() == ARM::t2LDRBi12 && |
1774 | 122 | Load2->getMachineOpcode() == ARM::t2LDRBi8))) |
1775 | 122 | return false; // FIXME: overly conservative? |
1776 | 5.49k | |
1777 | 5.49k | // Four loads in a row should be sufficient. |
1778 | 5.49k | if (5.49k NumLoads >= 35.49k ) |
1779 | 641 | return false; |
1780 | 4.85k | |
1781 | 4.85k | return true; |
1782 | 4.85k | } |
1783 | | |
1784 | | bool ARMBaseInstrInfo::isSchedulingBoundary(const MachineInstr &MI, |
1785 | | const MachineBasicBlock *MBB, |
1786 | 325k | const MachineFunction &MF) const { |
1787 | 325k | // Debug info is never a scheduling boundary. It's necessary to be explicit |
1788 | 325k | // due to the special treatment of IT instructions below, otherwise a |
1789 | 325k | // dbg_value followed by an IT will result in the IT instruction being |
1790 | 325k | // considered a scheduling hazard, which is wrong. It should be the actual |
1791 | 325k | // instruction preceding the dbg_value instruction(s), just like it is |
1792 | 325k | // when debug info is not present. |
1793 | 325k | if (MI.isDebugValue()) |
1794 | 116 | return false; |
1795 | 325k | |
1796 | 325k | // Terminators and labels can't be scheduled around. |
1797 | 325k | if (325k MI.isTerminator() || 325k MI.isPosition()279k ) |
1798 | 72.1k | return true; |
1799 | 253k | |
1800 | 253k | // Treat the start of the IT block as a scheduling boundary, but schedule |
1801 | 253k | // t2IT along with all instructions following it. |
1802 | 253k | // FIXME: This is a big hammer. But the alternative is to add all potential |
1803 | 253k | // true and anti dependencies to IT block instructions as implicit operands |
1804 | 253k | // to the t2IT instruction. The added compile time and complexity does not |
1805 | 253k | // seem worth it. |
1806 | 253k | MachineBasicBlock::const_iterator I = MI; |
1807 | 253k | // Make sure to skip any dbg_value instructions |
1808 | 253k | while (++I != MBB->end() && 253k I->isDebugValue()243k ) |
1809 | 42 | ; |
1810 | 253k | if (I != MBB->end() && 253k I->getOpcode() == ARM::t2IT243k ) |
1811 | 0 | return true; |
1812 | 253k | |
1813 | 253k | // Don't attempt to schedule around any instruction that defines |
1814 | 253k | // a stack-oriented pointer, as it's unlikely to be profitable. This |
1815 | 253k | // saves compile time, because it doesn't require every single |
1816 | 253k | // stack slot reference to depend on the instruction that does the |
1817 | 253k | // modification. |
1818 | 253k | // Calls don't actually change the stack pointer, even if they have imp-defs. |
1819 | 253k | // No ARM calling conventions change the stack pointer. (X86 calling |
1820 | 253k | // conventions sometimes do). |
1821 | 253k | if (253k !MI.isCall() && 253k MI.definesRegister(ARM::SP)253k ) |
1822 | 20.2k | return true; |
1823 | 232k | |
1824 | 232k | return false; |
1825 | 232k | } |
1826 | | |
1827 | | bool ARMBaseInstrInfo:: |
1828 | | isProfitableToIfCvt(MachineBasicBlock &MBB, |
1829 | | unsigned NumCycles, unsigned ExtraPredCycles, |
1830 | 11.0k | BranchProbability Probability) const { |
1831 | 11.0k | if (!NumCycles) |
1832 | 0 | return false; |
1833 | 11.0k | |
1834 | 11.0k | // If we are optimizing for size, see if the branch in the predecessor can be |
1835 | 11.0k | // lowered to cbn?z by the constant island lowering pass, and return false if |
1836 | 11.0k | // so. This results in a shorter instruction sequence. |
1837 | 11.0k | if (11.0k MBB.getParent()->getFunction()->optForSize()11.0k ) { |
1838 | 1.92k | MachineBasicBlock *Pred = *MBB.pred_begin(); |
1839 | 1.92k | if (!Pred->empty()1.92k ) { |
1840 | 1.92k | MachineInstr *LastMI = &*Pred->rbegin(); |
1841 | 1.92k | if (LastMI->getOpcode() == ARM::t2Bcc1.92k ) { |
1842 | 1.90k | MachineBasicBlock::iterator CmpMI = LastMI; |
1843 | 1.90k | if (CmpMI != Pred->begin()1.90k ) { |
1844 | 1.84k | --CmpMI; |
1845 | 1.84k | if (CmpMI->getOpcode() == ARM::tCMPi8 || |
1846 | 1.84k | CmpMI->getOpcode() == ARM::t2CMPri1.81k ) { |
1847 | 983 | unsigned Reg = CmpMI->getOperand(0).getReg(); |
1848 | 983 | unsigned PredReg = 0; |
1849 | 983 | ARMCC::CondCodes P = getInstrPredicate(*CmpMI, PredReg); |
1850 | 983 | if (P == ARMCC::AL && 983 CmpMI->getOperand(1).getImm() == 0981 && |
1851 | 643 | isARMLowRegister(Reg)) |
1852 | 567 | return false; |
1853 | 10.4k | } |
1854 | 1.84k | } |
1855 | 1.90k | } |
1856 | 1.92k | } |
1857 | 1.92k | } |
1858 | 10.4k | return isProfitableToIfCvt(MBB, NumCycles, ExtraPredCycles, |
1859 | 10.4k | MBB, 0, 0, Probability); |
1860 | 10.4k | } |
1861 | | |
1862 | | bool ARMBaseInstrInfo:: |
1863 | | isProfitableToIfCvt(MachineBasicBlock &TBB, |
1864 | | unsigned TCycles, unsigned TExtra, |
1865 | | MachineBasicBlock &FBB, |
1866 | | unsigned FCycles, unsigned FExtra, |
1867 | 10.8k | BranchProbability Probability) const { |
1868 | 10.8k | if (!TCycles) |
1869 | 0 | return false; |
1870 | 10.8k | |
1871 | 10.8k | // Attempt to estimate the relative costs of predication versus branching. |
1872 | 10.8k | // Here we scale up each component of UnpredCost to avoid precision issue when |
1873 | 10.8k | // scaling TCycles/FCycles by Probability. |
1874 | 10.8k | const unsigned ScalingUpFactor = 1024; |
1875 | 10.8k | |
1876 | 10.8k | unsigned PredCost = (TCycles + FCycles + TExtra + FExtra) * ScalingUpFactor; |
1877 | 10.8k | unsigned UnpredCost; |
1878 | 10.8k | if (!Subtarget.hasBranchPredictor()10.8k ) { |
1879 | 822 | // When we don't have a branch predictor it's always cheaper to not take a |
1880 | 822 | // branch than take it, so we have to take that into account. |
1881 | 822 | unsigned NotTakenBranchCost = 1; |
1882 | 822 | unsigned TakenBranchCost = Subtarget.getMispredictionPenalty(); |
1883 | 822 | unsigned TUnpredCycles, FUnpredCycles; |
1884 | 822 | if (!FCycles822 ) { |
1885 | 767 | // Triangle: TBB is the fallthrough |
1886 | 767 | TUnpredCycles = TCycles + NotTakenBranchCost; |
1887 | 767 | FUnpredCycles = TakenBranchCost; |
1888 | 822 | } else { |
1889 | 55 | // Diamond: TBB is the block that is branched to, FBB is the fallthrough |
1890 | 55 | TUnpredCycles = TCycles + TakenBranchCost; |
1891 | 55 | FUnpredCycles = FCycles + NotTakenBranchCost; |
1892 | 55 | // The branch at the end of FBB will disappear when it's predicated, so |
1893 | 55 | // discount it from PredCost. |
1894 | 55 | PredCost -= 1 * ScalingUpFactor; |
1895 | 55 | } |
1896 | 822 | // The total cost is the cost of each path scaled by their probabilites |
1897 | 822 | unsigned TUnpredCost = Probability.scale(TUnpredCycles * ScalingUpFactor); |
1898 | 822 | unsigned FUnpredCost = Probability.getCompl().scale(FUnpredCycles * ScalingUpFactor); |
1899 | 822 | UnpredCost = TUnpredCost + FUnpredCost; |
1900 | 822 | // When predicating assume that the first IT can be folded away but later |
1901 | 822 | // ones cost one cycle each |
1902 | 822 | if (Subtarget.isThumb2() && 822 TCycles + FCycles > 4822 ) { |
1903 | 168 | PredCost += ((TCycles + FCycles - 4) / 4) * ScalingUpFactor; |
1904 | 168 | } |
1905 | 10.8k | } else { |
1906 | 10.0k | unsigned TUnpredCost = Probability.scale(TCycles * ScalingUpFactor); |
1907 | 10.0k | unsigned FUnpredCost = |
1908 | 10.0k | Probability.getCompl().scale(FCycles * ScalingUpFactor); |
1909 | 10.0k | UnpredCost = TUnpredCost + FUnpredCost; |
1910 | 10.0k | UnpredCost += 1 * ScalingUpFactor; // The branch itself |
1911 | 10.0k | UnpredCost += Subtarget.getMispredictionPenalty() * ScalingUpFactor / 10; |
1912 | 10.0k | } |
1913 | 10.8k | |
1914 | 10.8k | return PredCost <= UnpredCost; |
1915 | 10.8k | } |
1916 | | |
1917 | | bool |
1918 | | ARMBaseInstrInfo::isProfitableToUnpredicate(MachineBasicBlock &TMBB, |
1919 | 150 | MachineBasicBlock &FMBB) const { |
1920 | 150 | // Reduce false anti-dependencies to let the target's out-of-order execution |
1921 | 150 | // engine do its thing. |
1922 | 150 | return Subtarget.isProfitableToUnpredicate(); |
1923 | 150 | } |
1924 | | |
1925 | | /// getInstrPredicate - If instruction is predicated, returns its predicate |
1926 | | /// condition, otherwise returns AL. It also returns the condition code |
1927 | | /// register by reference. |
1928 | | ARMCC::CondCodes llvm::getInstrPredicate(const MachineInstr &MI, |
1929 | 457k | unsigned &PredReg) { |
1930 | 457k | int PIdx = MI.findFirstPredOperandIdx(); |
1931 | 457k | if (PIdx == -1457k ) { |
1932 | 60.5k | PredReg = 0; |
1933 | 60.5k | return ARMCC::AL; |
1934 | 60.5k | } |
1935 | 396k | |
1936 | 396k | PredReg = MI.getOperand(PIdx+1).getReg(); |
1937 | 396k | return (ARMCC::CondCodes)MI.getOperand(PIdx).getImm(); |
1938 | 396k | } |
1939 | | |
1940 | 0 | unsigned llvm::getMatchingCondBranchOpcode(unsigned Opc) { |
1941 | 0 | if (Opc == ARM::B) |
1942 | 0 | return ARM::Bcc; |
1943 | 0 | if (0 Opc == ARM::tB0 ) |
1944 | 0 | return ARM::tBcc; |
1945 | 0 | if (0 Opc == ARM::t2B0 ) |
1946 | 0 | return ARM::t2Bcc; |
1947 | 0 |
|
1948 | 0 | llvm_unreachable0 ("Unknown unconditional branch opcode!"); |
1949 | 0 | } |
1950 | | |
1951 | | MachineInstr *ARMBaseInstrInfo::commuteInstructionImpl(MachineInstr &MI, |
1952 | | bool NewMI, |
1953 | | unsigned OpIdx1, |
1954 | 36.3k | unsigned OpIdx2) const { |
1955 | 36.3k | switch (MI.getOpcode()) { |
1956 | 4.88k | case ARM::MOVCCr: |
1957 | 4.88k | case ARM::t2MOVCCr: { |
1958 | 4.88k | // MOVCC can be commuted by inverting the condition. |
1959 | 4.88k | unsigned PredReg = 0; |
1960 | 4.88k | ARMCC::CondCodes CC = getInstrPredicate(MI, PredReg); |
1961 | 4.88k | // MOVCC AL can't be inverted. Shouldn't happen. |
1962 | 4.88k | if (CC == ARMCC::AL || 4.88k PredReg != ARM::CPSR4.88k ) |
1963 | 0 | return nullptr; |
1964 | 4.88k | MachineInstr *CommutedMI = |
1965 | 4.88k | TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2); |
1966 | 4.88k | if (!CommutedMI) |
1967 | 0 | return nullptr; |
1968 | 4.88k | // After swapping the MOVCC operands, also invert the condition. |
1969 | 4.88k | CommutedMI->getOperand(CommutedMI->findFirstPredOperandIdx()) |
1970 | 4.88k | .setImm(ARMCC::getOppositeCondition(CC)); |
1971 | 4.88k | return CommutedMI; |
1972 | 4.88k | } |
1973 | 31.4k | } |
1974 | 31.4k | return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2); |
1975 | 31.4k | } |
1976 | | |
1977 | | /// Identify instructions that can be folded into a MOVCC instruction, and |
1978 | | /// return the defining instruction. |
1979 | | static MachineInstr *canFoldIntoMOVCC(unsigned Reg, |
1980 | | const MachineRegisterInfo &MRI, |
1981 | 3.56k | const TargetInstrInfo *TII) { |
1982 | 3.56k | if (!TargetRegisterInfo::isVirtualRegister(Reg)) |
1983 | 0 | return nullptr; |
1984 | 3.56k | if (3.56k !MRI.hasOneNonDBGUse(Reg)3.56k ) |
1985 | 1.75k | return nullptr; |
1986 | 1.80k | MachineInstr *MI = MRI.getVRegDef(Reg); |
1987 | 1.80k | if (!MI) |
1988 | 0 | return nullptr; |
1989 | 1.80k | // MI is folded into the MOVCC by predicating it. |
1990 | 1.80k | if (1.80k !MI->isPredicable()1.80k ) |
1991 | 245 | return nullptr; |
1992 | 1.56k | // Check if MI has any non-dead defs or physreg uses. This also detects |
1993 | 1.56k | // predicated instructions which will be reading CPSR. |
1994 | 8.10k | for (unsigned i = 1, e = MI->getNumOperands(); 1.56k i != e8.10k ; ++i6.54k ) { |
1995 | 6.85k | const MachineOperand &MO = MI->getOperand(i); |
1996 | 6.85k | // Reject frame index operands, PEI can't handle the predicated pseudos. |
1997 | 6.85k | if (MO.isFI() || 6.85k MO.isCPI()6.83k || MO.isJTI()6.80k ) |
1998 | 51 | return nullptr; |
1999 | 6.80k | if (6.80k !MO.isReg()6.80k ) |
2000 | 2.05k | continue; |
2001 | 4.74k | // MI can't have any tied operands, that would conflict with predication. |
2002 | 4.74k | if (4.74k MO.isTied()4.74k ) |
2003 | 91 | return nullptr; |
2004 | 4.65k | if (4.65k TargetRegisterInfo::isPhysicalRegister(MO.getReg())4.65k ) |
2005 | 149 | return nullptr; |
2006 | 4.50k | if (4.50k MO.isDef() && 4.50k !MO.isDead()24 ) |
2007 | 24 | return nullptr; |
2008 | 6.85k | } |
2009 | 1.24k | bool DontMoveAcrossStores = true; |
2010 | 1.24k | if (!MI->isSafeToMove(/* AliasAnalysis = */ nullptr, DontMoveAcrossStores)) |
2011 | 18 | return nullptr; |
2012 | 1.23k | return MI; |
2013 | 1.23k | } |
2014 | | |
2015 | | bool ARMBaseInstrInfo::analyzeSelect(const MachineInstr &MI, |
2016 | | SmallVectorImpl<MachineOperand> &Cond, |
2017 | | unsigned &TrueOp, unsigned &FalseOp, |
2018 | 2.27k | bool &Optimizable) const { |
2019 | 2.27k | assert((MI.getOpcode() == ARM::MOVCCr || MI.getOpcode() == ARM::t2MOVCCr) && |
2020 | 2.27k | "Unknown select instruction"); |
2021 | 2.27k | // MOVCC operands: |
2022 | 2.27k | // 0: Def. |
2023 | 2.27k | // 1: True use. |
2024 | 2.27k | // 2: False use. |
2025 | 2.27k | // 3: Condition code. |
2026 | 2.27k | // 4: CPSR use. |
2027 | 2.27k | TrueOp = 1; |
2028 | 2.27k | FalseOp = 2; |
2029 | 2.27k | Cond.push_back(MI.getOperand(3)); |
2030 | 2.27k | Cond.push_back(MI.getOperand(4)); |
2031 | 2.27k | // We can always fold a def. |
2032 | 2.27k | Optimizable = true; |
2033 | 2.27k | return false; |
2034 | 2.27k | } |
2035 | | |
2036 | | MachineInstr * |
2037 | | ARMBaseInstrInfo::optimizeSelect(MachineInstr &MI, |
2038 | | SmallPtrSetImpl<MachineInstr *> &SeenMIs, |
2039 | 2.27k | bool PreferFalse) const { |
2040 | 2.27k | assert((MI.getOpcode() == ARM::MOVCCr || MI.getOpcode() == ARM::t2MOVCCr) && |
2041 | 2.27k | "Unknown select instruction"); |
2042 | 2.27k | MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); |
2043 | 2.27k | MachineInstr *DefMI = canFoldIntoMOVCC(MI.getOperand(2).getReg(), MRI, this); |
2044 | 2.27k | bool Invert = !DefMI; |
2045 | 2.27k | if (!DefMI) |
2046 | 1.29k | DefMI = canFoldIntoMOVCC(MI.getOperand(1).getReg(), MRI, this); |
2047 | 2.27k | if (!DefMI) |
2048 | 1.03k | return nullptr; |
2049 | 1.23k | |
2050 | 1.23k | // Find new register class to use. |
2051 | 1.23k | MachineOperand FalseReg = MI.getOperand(Invert ? 1.23k 2253 : 1978 ); |
2052 | 1.23k | unsigned DestReg = MI.getOperand(0).getReg(); |
2053 | 1.23k | const TargetRegisterClass *PreviousClass = MRI.getRegClass(FalseReg.getReg()); |
2054 | 1.23k | if (!MRI.constrainRegClass(DestReg, PreviousClass)) |
2055 | 0 | return nullptr; |
2056 | 1.23k | |
2057 | 1.23k | // Create a new predicated version of DefMI. |
2058 | 1.23k | // Rfalse is the first use. |
2059 | 1.23k | MachineInstrBuilder NewMI = |
2060 | 1.23k | BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), DefMI->getDesc(), DestReg); |
2061 | 1.23k | |
2062 | 1.23k | // Copy all the DefMI operands, excluding its (null) predicate. |
2063 | 1.23k | const MCInstrDesc &DefDesc = DefMI->getDesc(); |
2064 | 1.23k | for (unsigned i = 1, e = DefDesc.getNumOperands(); |
2065 | 3.45k | i != e && 3.45k !DefDesc.OpInfo[i].isPredicate()3.45k ; ++i2.22k ) |
2066 | 2.22k | NewMI.add(DefMI->getOperand(i)); |
2067 | 1.23k | |
2068 | 1.23k | unsigned CondCode = MI.getOperand(3).getImm(); |
2069 | 1.23k | if (Invert) |
2070 | 253 | NewMI.addImm(ARMCC::getOppositeCondition(ARMCC::CondCodes(CondCode))); |
2071 | 1.23k | else |
2072 | 978 | NewMI.addImm(CondCode); |
2073 | 1.23k | NewMI.add(MI.getOperand(4)); |
2074 | 1.23k | |
2075 | 1.23k | // DefMI is not the -S version that sets CPSR, so add an optional %noreg. |
2076 | 1.23k | if (NewMI->hasOptionalDef()) |
2077 | 1.11k | NewMI.add(condCodeOp()); |
2078 | 1.23k | |
2079 | 1.23k | // The output register value when the predicate is false is an implicit |
2080 | 1.23k | // register operand tied to the first def. |
2081 | 1.23k | // The tie makes the register allocator ensure the FalseReg is allocated the |
2082 | 1.23k | // same register as operand 0. |
2083 | 1.23k | FalseReg.setImplicit(); |
2084 | 1.23k | NewMI.add(FalseReg); |
2085 | 1.23k | NewMI->tieOperands(0, NewMI->getNumOperands() - 1); |
2086 | 1.23k | |
2087 | 1.23k | // Update SeenMIs set: register newly created MI and erase removed DefMI. |
2088 | 1.23k | SeenMIs.insert(NewMI); |
2089 | 1.23k | SeenMIs.erase(DefMI); |
2090 | 1.23k | |
2091 | 1.23k | // If MI is inside a loop, and DefMI is outside the loop, then kill flags on |
2092 | 1.23k | // DefMI would be invalid when tranferred inside the loop. Checking for a |
2093 | 1.23k | // loop is expensive, but at least remove kill flags if they are in different |
2094 | 1.23k | // BBs. |
2095 | 1.23k | if (DefMI->getParent() != MI.getParent()) |
2096 | 51 | NewMI->clearKillInfo(); |
2097 | 2.27k | |
2098 | 2.27k | // The caller will erase MI, but not DefMI. |
2099 | 2.27k | DefMI->eraseFromParent(); |
2100 | 2.27k | return NewMI; |
2101 | 2.27k | } |
2102 | | |
2103 | | /// Map pseudo instructions that imply an 'S' bit onto real opcodes. Whether the |
2104 | | /// instruction is encoded with an 'S' bit is determined by the optional CPSR |
2105 | | /// def operand. |
2106 | | /// |
2107 | | /// This will go away once we can teach tblgen how to set the optional CPSR def |
2108 | | /// operand itself. |
2109 | | struct AddSubFlagsOpcodePair { |
2110 | | uint16_t PseudoOpc; |
2111 | | uint16_t MachineOpc; |
2112 | | }; |
2113 | | |
2114 | | static const AddSubFlagsOpcodePair AddSubFlagsOpcodeMap[] = { |
2115 | | {ARM::ADDSri, ARM::ADDri}, |
2116 | | {ARM::ADDSrr, ARM::ADDrr}, |
2117 | | {ARM::ADDSrsi, ARM::ADDrsi}, |
2118 | | {ARM::ADDSrsr, ARM::ADDrsr}, |
2119 | | |
2120 | | {ARM::SUBSri, ARM::SUBri}, |
2121 | | {ARM::SUBSrr, ARM::SUBrr}, |
2122 | | {ARM::SUBSrsi, ARM::SUBrsi}, |
2123 | | {ARM::SUBSrsr, ARM::SUBrsr}, |
2124 | | |
2125 | | {ARM::RSBSri, ARM::RSBri}, |
2126 | | {ARM::RSBSrsi, ARM::RSBrsi}, |
2127 | | {ARM::RSBSrsr, ARM::RSBrsr}, |
2128 | | |
2129 | | {ARM::tADDSi3, ARM::tADDi3}, |
2130 | | {ARM::tADDSi8, ARM::tADDi8}, |
2131 | | {ARM::tADDSrr, ARM::tADDrr}, |
2132 | | {ARM::tADCS, ARM::tADC}, |
2133 | | |
2134 | | {ARM::tSUBSi3, ARM::tSUBi3}, |
2135 | | {ARM::tSUBSi8, ARM::tSUBi8}, |
2136 | | {ARM::tSUBSrr, ARM::tSUBrr}, |
2137 | | {ARM::tSBCS, ARM::tSBC}, |
2138 | | |
2139 | | {ARM::t2ADDSri, ARM::t2ADDri}, |
2140 | | {ARM::t2ADDSrr, ARM::t2ADDrr}, |
2141 | | {ARM::t2ADDSrs, ARM::t2ADDrs}, |
2142 | | |
2143 | | {ARM::t2SUBSri, ARM::t2SUBri}, |
2144 | | {ARM::t2SUBSrr, ARM::t2SUBrr}, |
2145 | | {ARM::t2SUBSrs, ARM::t2SUBrs}, |
2146 | | |
2147 | | {ARM::t2RSBSri, ARM::t2RSBri}, |
2148 | | {ARM::t2RSBSrs, ARM::t2RSBrs}, |
2149 | | }; |
2150 | | |
2151 | 1.20M | unsigned llvm::convertAddSubFlagsOpcode(unsigned OldOpc) { |
2152 | 33.8M | for (unsigned i = 0, e = array_lengthof(AddSubFlagsOpcodeMap); i != e33.8M ; ++i32.6M ) |
2153 | 32.6M | if (32.6M OldOpc == AddSubFlagsOpcodeMap[i].PseudoOpc32.6M ) |
2154 | 2.82k | return AddSubFlagsOpcodeMap[i].MachineOpc; |
2155 | 1.20M | return 0; |
2156 | 1.20M | } |
2157 | | |
2158 | | void llvm::emitARMRegPlusImmediate(MachineBasicBlock &MBB, |
2159 | | MachineBasicBlock::iterator &MBBI, |
2160 | | const DebugLoc &dl, unsigned DestReg, |
2161 | | unsigned BaseReg, int NumBytes, |
2162 | | ARMCC::CondCodes Pred, unsigned PredReg, |
2163 | | const ARMBaseInstrInfo &TII, |
2164 | 2.80k | unsigned MIFlags) { |
2165 | 2.80k | if (NumBytes == 0 && 2.80k DestReg != BaseReg301 ) { |
2166 | 301 | BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), DestReg) |
2167 | 301 | .addReg(BaseReg, RegState::Kill) |
2168 | 301 | .add(predOps(Pred, PredReg)) |
2169 | 301 | .add(condCodeOp()) |
2170 | 301 | .setMIFlags(MIFlags); |
2171 | 301 | return; |
2172 | 301 | } |
2173 | 2.50k | |
2174 | 2.50k | bool isSub = NumBytes < 0; |
2175 | 2.50k | if (isSub2.50k ) NumBytes = -NumBytes1.15k ; |
2176 | 2.50k | |
2177 | 5.06k | while (NumBytes5.06k ) { |
2178 | 2.55k | unsigned RotAmt = ARM_AM::getSOImmValRotate(NumBytes); |
2179 | 2.55k | unsigned ThisVal = NumBytes & ARM_AM::rotr32(0xFF, RotAmt); |
2180 | 2.55k | assert(ThisVal && "Didn't extract field correctly"); |
2181 | 2.55k | |
2182 | 2.55k | // We will handle these bits from offset, clear them. |
2183 | 2.55k | NumBytes &= ~ThisVal; |
2184 | 2.55k | |
2185 | 2.55k | assert(ARM_AM::getSOImmVal(ThisVal) != -1 && "Bit extraction didn't work?"); |
2186 | 2.55k | |
2187 | 2.55k | // Build the new ADD / SUB. |
2188 | 2.55k | unsigned Opc = isSub ? ARM::SUBri1.19k : ARM::ADDri1.36k ; |
2189 | 2.55k | BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg) |
2190 | 2.55k | .addReg(BaseReg, RegState::Kill) |
2191 | 2.55k | .addImm(ThisVal) |
2192 | 2.55k | .add(predOps(Pred, PredReg)) |
2193 | 2.55k | .add(condCodeOp()) |
2194 | 2.55k | .setMIFlags(MIFlags); |
2195 | 2.55k | BaseReg = DestReg; |
2196 | 2.55k | } |
2197 | 2.80k | } |
2198 | | |
2199 | | bool llvm::tryFoldSPUpdateIntoPushPop(const ARMSubtarget &Subtarget, |
2200 | | MachineFunction &MF, MachineInstr *MI, |
2201 | 6.16k | unsigned NumBytes) { |
2202 | 6.16k | // This optimisation potentially adds lots of load and store |
2203 | 6.16k | // micro-operations, it's only really a great benefit to code-size. |
2204 | 6.16k | if (!MF.getFunction()->optForMinSize()) |
2205 | 5.52k | return false; |
2206 | 635 | |
2207 | 635 | // If only one register is pushed/popped, LLVM can use an LDR/STR |
2208 | 635 | // instead. We can't modify those so make sure we're dealing with an |
2209 | 635 | // instruction we understand. |
2210 | 635 | bool IsPop = isPopOpcode(MI->getOpcode()); |
2211 | 635 | bool IsPush = isPushOpcode(MI->getOpcode()); |
2212 | 635 | if (!IsPush && 635 !IsPop316 ) |
2213 | 4 | return false; |
2214 | 631 | |
2215 | 631 | bool IsVFPPushPop = MI->getOpcode() == ARM::VSTMDDB_UPD || |
2216 | 617 | MI->getOpcode() == ARM::VLDMDIA_UPD; |
2217 | 631 | bool IsT1PushPop = MI->getOpcode() == ARM::tPUSH || |
2218 | 463 | MI->getOpcode() == ARM::tPOP || |
2219 | 461 | MI->getOpcode() == ARM::tPOP_RET; |
2220 | 631 | |
2221 | 631 | assert((IsT1PushPop || (MI->getOperand(0).getReg() == ARM::SP && |
2222 | 631 | MI->getOperand(1).getReg() == ARM::SP)) && |
2223 | 631 | "trying to fold sp update into non-sp-updating push/pop"); |
2224 | 631 | |
2225 | 631 | // The VFP push & pop act on D-registers, so we can only fold an adjustment |
2226 | 631 | // by a multiple of 8 bytes in correctly. Similarly rN is 4-bytes. Don't try |
2227 | 631 | // if this is violated. |
2228 | 631 | if (NumBytes % (IsVFPPushPop ? 631 828 : 4603 ) != 0) |
2229 | 2 | return false; |
2230 | 629 | |
2231 | 629 | // ARM and Thumb2 push/pop insts have explicit "sp, sp" operands (+ |
2232 | 629 | // pred) so the list starts at 4. Thumb1 starts after the predicate. |
2233 | 629 | int RegListIdx = IsT1PushPop ? 629 2337 : 4292 ; |
2234 | 629 | |
2235 | 629 | // Calculate the space we'll need in terms of registers. |
2236 | 629 | unsigned RegsNeeded; |
2237 | 629 | const TargetRegisterClass *RegClass; |
2238 | 629 | if (IsVFPPushPop629 ) { |
2239 | 26 | RegsNeeded = NumBytes / 8; |
2240 | 26 | RegClass = &ARM::DPRRegClass; |
2241 | 629 | } else { |
2242 | 603 | RegsNeeded = NumBytes / 4; |
2243 | 603 | RegClass = &ARM::GPRRegClass; |
2244 | 603 | } |
2245 | 629 | |
2246 | 629 | // We're going to have to strip all list operands off before |
2247 | 629 | // re-adding them since the order matters, so save the existing ones |
2248 | 629 | // for later. |
2249 | 629 | SmallVector<MachineOperand, 4> RegList; |
2250 | 629 | |
2251 | 629 | // We're also going to need the first register transferred by this |
2252 | 629 | // instruction, which won't necessarily be the first register in the list. |
2253 | 629 | unsigned FirstRegEnc = -1; |
2254 | 629 | |
2255 | 629 | const TargetRegisterInfo *TRI = MF.getRegInfo().getTargetRegisterInfo(); |
2256 | 4.31k | for (int i = MI->getNumOperands() - 1; i >= RegListIdx4.31k ; --i3.68k ) { |
2257 | 3.68k | MachineOperand &MO = MI->getOperand(i); |
2258 | 3.68k | RegList.push_back(MO); |
2259 | 3.68k | |
2260 | 3.68k | if (MO.isReg() && 3.68k TRI->getEncodingValue(MO.getReg()) < FirstRegEnc3.68k ) |
2261 | 2.04k | FirstRegEnc = TRI->getEncodingValue(MO.getReg()); |
2262 | 3.68k | } |
2263 | 629 | |
2264 | 629 | const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF); |
2265 | 629 | |
2266 | 629 | // Now try to find enough space in the reglist to allocate NumBytes. |
2267 | 1.83k | for (int CurRegEnc = FirstRegEnc - 1; CurRegEnc >= 0 && 1.83k RegsNeeded1.43k ; |
2268 | 1.20k | --CurRegEnc1.20k ) { |
2269 | 1.21k | unsigned CurReg = RegClass->getRegister(CurRegEnc); |
2270 | 1.21k | if (!IsPop1.21k ) { |
2271 | 782 | // Pushing any register is completely harmless, mark the |
2272 | 782 | // register involved as undef since we don't care about it in |
2273 | 782 | // the slightest. |
2274 | 782 | RegList.push_back(MachineOperand::CreateReg(CurReg, false, false, |
2275 | 782 | false, false, true)); |
2276 | 782 | --RegsNeeded; |
2277 | 782 | continue; |
2278 | 782 | } |
2279 | 431 | |
2280 | 431 | // However, we can only pop an extra register if it's not live. For |
2281 | 431 | // registers live within the function we might clobber a return value |
2282 | 431 | // register; the other way a register can be live here is if it's |
2283 | 431 | // callee-saved. |
2284 | 431 | if (431 isCalleeSavedRegister(CurReg, CSRegs) || |
2285 | 306 | MI->getParent()->computeRegisterLiveness(TRI, CurReg, MI) != |
2286 | 431 | MachineBasicBlock::LQR_Dead) { |
2287 | 145 | // VFP pops don't allow holes in the register list, so any skip is fatal |
2288 | 145 | // for our transformation. GPR pops do, so we should just keep looking. |
2289 | 145 | if (IsVFPPushPop) |
2290 | 8 | return false; |
2291 | 145 | else |
2292 | 137 | continue; |
2293 | 286 | } |
2294 | 286 | |
2295 | 286 | // Mark the unimportant registers as <def,dead> in the POP. |
2296 | 286 | RegList.push_back(MachineOperand::CreateReg(CurReg, true, false, false, |
2297 | 286 | true)); |
2298 | 286 | --RegsNeeded; |
2299 | 286 | } |
2300 | 629 | |
2301 | 621 | if (621 RegsNeeded > 0621 ) |
2302 | 262 | return false; |
2303 | 359 | |
2304 | 359 | // Finally we know we can profitably perform the optimisation so go |
2305 | 359 | // ahead: strip all existing registers off and add them back again |
2306 | 359 | // in the right order. |
2307 | 2.22k | for (int i = MI->getNumOperands() - 1; 359 i >= RegListIdx2.22k ; --i1.86k ) |
2308 | 1.86k | MI->RemoveOperand(i); |
2309 | 359 | |
2310 | 359 | // Add the complete list back in. |
2311 | 359 | MachineInstrBuilder MIB(MF, &*MI); |
2312 | 2.65k | for (int i = RegList.size() - 1; i >= 02.65k ; --i2.29k ) |
2313 | 2.29k | MIB.add(RegList[i]); |
2314 | 6.16k | |
2315 | 6.16k | return true; |
2316 | 6.16k | } |
2317 | | |
2318 | | bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx, |
2319 | | unsigned FrameReg, int &Offset, |
2320 | 7.78k | const ARMBaseInstrInfo &TII) { |
2321 | 7.78k | unsigned Opcode = MI.getOpcode(); |
2322 | 7.78k | const MCInstrDesc &Desc = MI.getDesc(); |
2323 | 7.78k | unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask); |
2324 | 7.78k | bool isSub = false; |
2325 | 7.78k | |
2326 | 7.78k | // Memory operands in inline assembly always use AddrMode2. |
2327 | 7.78k | if (Opcode == ARM::INLINEASM) |
2328 | 0 | AddrMode = ARMII::AddrMode2; |
2329 | 7.78k | |
2330 | 7.78k | if (Opcode == ARM::ADDri7.78k ) { |
2331 | 1.22k | Offset += MI.getOperand(FrameRegIdx+1).getImm(); |
2332 | 1.22k | if (Offset == 01.22k ) { |
2333 | 158 | // Turn it into a move. |
2334 | 158 | MI.setDesc(TII.get(ARM::MOVr)); |
2335 | 158 | MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); |
2336 | 158 | MI.RemoveOperand(FrameRegIdx+1); |
2337 | 158 | Offset = 0; |
2338 | 158 | return true; |
2339 | 1.07k | } else if (1.07k Offset < 01.07k ) { |
2340 | 51 | Offset = -Offset; |
2341 | 51 | isSub = true; |
2342 | 51 | MI.setDesc(TII.get(ARM::SUBri)); |
2343 | 51 | } |
2344 | 1.22k | |
2345 | 1.22k | // Common case: small offset, fits into instruction. |
2346 | 1.07k | if (1.07k ARM_AM::getSOImmVal(Offset) != -11.07k ) { |
2347 | 749 | // Replace the FrameIndex with sp / fp |
2348 | 749 | MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); |
2349 | 749 | MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Offset); |
2350 | 749 | Offset = 0; |
2351 | 749 | return true; |
2352 | 749 | } |
2353 | 322 | |
2354 | 322 | // Otherwise, pull as much of the immedidate into this ADDri/SUBri |
2355 | 322 | // as possible. |
2356 | 322 | unsigned RotAmt = ARM_AM::getSOImmValRotate(Offset); |
2357 | 322 | unsigned ThisImmVal = Offset & ARM_AM::rotr32(0xFF, RotAmt); |
2358 | 322 | |
2359 | 322 | // We will handle these bits from offset, clear them. |
2360 | 322 | Offset &= ~ThisImmVal; |
2361 | 322 | |
2362 | 322 | // Get the properly encoded SOImmVal field. |
2363 | 322 | assert(ARM_AM::getSOImmVal(ThisImmVal) != -1 && |
2364 | 322 | "Bit extraction didn't work?"); |
2365 | 322 | MI.getOperand(FrameRegIdx+1).ChangeToImmediate(ThisImmVal); |
2366 | 7.78k | } else { |
2367 | 6.55k | unsigned ImmIdx = 0; |
2368 | 6.55k | int InstrOffs = 0; |
2369 | 6.55k | unsigned NumBits = 0; |
2370 | 6.55k | unsigned Scale = 1; |
2371 | 6.55k | switch (AddrMode) { |
2372 | 5.93k | case ARMII::AddrMode_i12: |
2373 | 5.93k | ImmIdx = FrameRegIdx + 1; |
2374 | 5.93k | InstrOffs = MI.getOperand(ImmIdx).getImm(); |
2375 | 5.93k | NumBits = 12; |
2376 | 5.93k | break; |
2377 | 0 | case ARMII::AddrMode2: |
2378 | 0 | ImmIdx = FrameRegIdx+2; |
2379 | 0 | InstrOffs = ARM_AM::getAM2Offset(MI.getOperand(ImmIdx).getImm()); |
2380 | 0 | if (ARM_AM::getAM2Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub) |
2381 | 0 | InstrOffs *= -1; |
2382 | 0 | NumBits = 12; |
2383 | 0 | break; |
2384 | 61 | case ARMII::AddrMode3: |
2385 | 61 | ImmIdx = FrameRegIdx+2; |
2386 | 61 | InstrOffs = ARM_AM::getAM3Offset(MI.getOperand(ImmIdx).getImm()); |
2387 | 61 | if (ARM_AM::getAM3Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub) |
2388 | 0 | InstrOffs *= -1; |
2389 | 61 | NumBits = 8; |
2390 | 61 | break; |
2391 | 29 | case ARMII::AddrMode4: |
2392 | 29 | case ARMII::AddrMode6: |
2393 | 29 | // Can't fold any offset even if it's zero. |
2394 | 29 | return false; |
2395 | 530 | case ARMII::AddrMode5: |
2396 | 530 | ImmIdx = FrameRegIdx+1; |
2397 | 530 | InstrOffs = ARM_AM::getAM5Offset(MI.getOperand(ImmIdx).getImm()); |
2398 | 530 | if (ARM_AM::getAM5Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub) |
2399 | 0 | InstrOffs *= -1; |
2400 | 530 | NumBits = 8; |
2401 | 530 | Scale = 4; |
2402 | 530 | break; |
2403 | 0 | default: |
2404 | 0 | llvm_unreachable("Unsupported addressing mode!"); |
2405 | 6.52k | } |
2406 | 6.52k | |
2407 | 6.52k | Offset += InstrOffs * Scale; |
2408 | 6.52k | assert((Offset & (Scale-1)) == 0 && "Can't encode this offset!"); |
2409 | 6.52k | if (Offset < 06.52k ) { |
2410 | 218 | Offset = -Offset; |
2411 | 218 | isSub = true; |
2412 | 218 | } |
2413 | 6.52k | |
2414 | 6.52k | // Attempt to fold address comp. if opcode has offset bits |
2415 | 6.52k | if (NumBits > 06.52k ) { |
2416 | 6.52k | // Common case: small offset, fits into instruction. |
2417 | 6.52k | MachineOperand &ImmOp = MI.getOperand(ImmIdx); |
2418 | 6.52k | int ImmedOffset = Offset / Scale; |
2419 | 6.52k | unsigned Mask = (1 << NumBits) - 1; |
2420 | 6.52k | if ((unsigned)Offset <= Mask * Scale6.52k ) { |
2421 | 6.48k | // Replace the FrameIndex with sp |
2422 | 6.48k | MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); |
2423 | 6.48k | // FIXME: When addrmode2 goes away, this will simplify (like the |
2424 | 6.48k | // T2 version), as the LDR.i12 versions don't need the encoding |
2425 | 6.48k | // tricks for the offset value. |
2426 | 6.48k | if (isSub6.48k ) { |
2427 | 218 | if (AddrMode == ARMII::AddrMode_i12) |
2428 | 172 | ImmedOffset = -ImmedOffset; |
2429 | 218 | else |
2430 | 46 | ImmedOffset |= 1 << NumBits; |
2431 | 218 | } |
2432 | 6.48k | ImmOp.ChangeToImmediate(ImmedOffset); |
2433 | 6.48k | Offset = 0; |
2434 | 6.48k | return true; |
2435 | 6.48k | } |
2436 | 46 | |
2437 | 46 | // Otherwise, it didn't fit. Pull in what we can to simplify the immed. |
2438 | 46 | ImmedOffset = ImmedOffset & Mask; |
2439 | 46 | if (isSub46 ) { |
2440 | 0 | if (AddrMode == ARMII::AddrMode_i12) |
2441 | 0 | ImmedOffset = -ImmedOffset; |
2442 | 0 | else |
2443 | 0 | ImmedOffset |= 1 << NumBits; |
2444 | 0 | } |
2445 | 6.52k | ImmOp.ChangeToImmediate(ImmedOffset); |
2446 | 6.52k | Offset &= ~(Mask*Scale); |
2447 | 6.52k | } |
2448 | 6.55k | } |
2449 | 7.78k | |
2450 | 368 | Offset = (isSub) ? 368 -Offset1 : Offset367 ; |
2451 | 368 | return Offset == 0; |
2452 | 7.78k | } |
2453 | | |
2454 | | /// analyzeCompare - For a comparison instruction, return the source registers |
2455 | | /// in SrcReg and SrcReg2 if having two register operands, and the value it |
2456 | | /// compares against in CmpValue. Return true if the comparison instruction |
2457 | | /// can be analyzed. |
2458 | | bool ARMBaseInstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg, |
2459 | | unsigned &SrcReg2, int &CmpMask, |
2460 | 24.9k | int &CmpValue) const { |
2461 | 24.9k | switch (MI.getOpcode()) { |
2462 | 1.17k | default: break; |
2463 | 19.2k | case ARM::CMPri: |
2464 | 19.2k | case ARM::t2CMPri: |
2465 | 19.2k | case ARM::tCMPi8: |
2466 | 19.2k | SrcReg = MI.getOperand(0).getReg(); |
2467 | 19.2k | SrcReg2 = 0; |
2468 | 19.2k | CmpMask = ~0; |
2469 | 19.2k | CmpValue = MI.getOperand(1).getImm(); |
2470 | 19.2k | return true; |
2471 | 4.23k | case ARM::CMPrr: |
2472 | 4.23k | case ARM::t2CMPrr: |
2473 | 4.23k | SrcReg = MI.getOperand(0).getReg(); |
2474 | 4.23k | SrcReg2 = MI.getOperand(1).getReg(); |
2475 | 4.23k | CmpMask = ~0; |
2476 | 4.23k | CmpValue = 0; |
2477 | 4.23k | return true; |
2478 | 227 | case ARM::TSTri: |
2479 | 227 | case ARM::t2TSTri: |
2480 | 227 | SrcReg = MI.getOperand(0).getReg(); |
2481 | 227 | SrcReg2 = 0; |
2482 | 227 | CmpMask = MI.getOperand(1).getImm(); |
2483 | 227 | CmpValue = 0; |
2484 | 227 | return true; |
2485 | 1.17k | } |
2486 | 1.17k | |
2487 | 1.17k | return false; |
2488 | 1.17k | } |
2489 | | |
2490 | | /// isSuitableForMask - Identify a suitable 'and' instruction that |
2491 | | /// operates on the given source register and applies the same mask |
2492 | | /// as a 'tst' instruction. Provide a limited look-through for copies. |
2493 | | /// When successful, MI will hold the found instruction. |
2494 | | static bool isSuitableForMask(MachineInstr *&MI, unsigned SrcReg, |
2495 | 574 | int CmpMask, bool CommonUse) { |
2496 | 574 | switch (MI->getOpcode()) { |
2497 | 12 | case ARM::ANDri: |
2498 | 12 | case ARM::t2ANDri: |
2499 | 12 | if (CmpMask != MI->getOperand(2).getImm()) |
2500 | 12 | return false; |
2501 | 0 | if (0 SrcReg == MI->getOperand(CommonUse ? 0 10 : 00 ).getReg()) |
2502 | 0 | return true; |
2503 | 0 | break; |
2504 | 562 | } |
2505 | 562 | |
2506 | 562 | return false; |
2507 | 562 | } |
2508 | | |
2509 | | /// getSwappedCondition - assume the flags are set by MI(a,b), return |
2510 | | /// the condition code if we modify the instructions such that flags are |
2511 | | /// set by MI(b,a). |
2512 | 44 | inline static ARMCC::CondCodes getSwappedCondition(ARMCC::CondCodes CC) { |
2513 | 44 | switch (CC) { |
2514 | 0 | default: return ARMCC::AL; |
2515 | 0 | case ARMCC::EQ: return ARMCC::EQ; |
2516 | 3 | case ARMCC::NE: return ARMCC::NE; |
2517 | 0 | case ARMCC::HS: return ARMCC::LS; |
2518 | 15 | case ARMCC::LO: return ARMCC::HI; |
2519 | 6 | case ARMCC::HI: return ARMCC::LO; |
2520 | 0 | case ARMCC::LS: return ARMCC::HS; |
2521 | 0 | case ARMCC::GE: return ARMCC::LE; |
2522 | 6 | case ARMCC::LT: return ARMCC::GT; |
2523 | 14 | case ARMCC::GT: return ARMCC::LT; |
2524 | 0 | case ARMCC::LE: return ARMCC::GE; |
2525 | 0 | } |
2526 | 0 | } |
2527 | | |
2528 | | /// isRedundantFlagInstr - check whether the first instruction, whose only |
2529 | | /// purpose is to update flags, can be made redundant. |
2530 | | /// CMPrr can be made redundant by SUBrr if the operands are the same. |
2531 | | /// CMPri can be made redundant by SUBri if the operands are the same. |
2532 | | /// This function can be extended later on. |
2533 | | inline static bool isRedundantFlagInstr(MachineInstr *CmpI, unsigned SrcReg, |
2534 | | unsigned SrcReg2, int ImmValue, |
2535 | 12.7k | MachineInstr *OI) { |
2536 | 12.7k | if ((CmpI->getOpcode() == ARM::CMPrr || |
2537 | 12.6k | CmpI->getOpcode() == ARM::t2CMPrr) && |
2538 | 4.57k | (OI->getOpcode() == ARM::SUBrr || |
2539 | 4.57k | OI->getOpcode() == ARM::t2SUBrr) && |
2540 | 72 | ((OI->getOperand(1).getReg() == SrcReg && |
2541 | 15 | OI->getOperand(2).getReg() == SrcReg2) || |
2542 | 57 | (OI->getOperand(1).getReg() == SrcReg2 && |
2543 | 57 | OI->getOperand(2).getReg() == SrcReg))) |
2544 | 34 | return true; |
2545 | 12.7k | |
2546 | 12.7k | if (12.7k (CmpI->getOpcode() == ARM::CMPri || |
2547 | 12.5k | CmpI->getOpcode() == ARM::t2CMPri) && |
2548 | 8.19k | (OI->getOpcode() == ARM::SUBri || |
2549 | 8.19k | OI->getOpcode() == ARM::t2SUBri) && |
2550 | 137 | OI->getOperand(1).getReg() == SrcReg && |
2551 | 45 | OI->getOperand(2).getImm() == ImmValue) |
2552 | 6 | return true; |
2553 | 12.7k | return false; |
2554 | 12.7k | } |
2555 | | |
2556 | 12.2k | static bool isOptimizeCompareCandidate(MachineInstr *MI, bool &IsThumb1) { |
2557 | 12.2k | switch (MI->getOpcode()) { |
2558 | 9.01k | default: return false; |
2559 | 125 | case ARM::tLSLri: |
2560 | 125 | case ARM::tLSRri: |
2561 | 125 | case ARM::tLSLrr: |
2562 | 125 | case ARM::tLSRrr: |
2563 | 125 | case ARM::tSUBrr: |
2564 | 125 | case ARM::tADDrr: |
2565 | 125 | case ARM::tADDi3: |
2566 | 125 | case ARM::tADDi8: |
2567 | 125 | case ARM::tSUBi3: |
2568 | 125 | case ARM::tSUBi8: |
2569 | 125 | case ARM::tMUL: |
2570 | 125 | IsThumb1 = true; |
2571 | 125 | LLVM_FALLTHROUGH; |
2572 | 3.27k | case ARM::RSBrr: |
2573 | 3.27k | case ARM::RSBri: |
2574 | 3.27k | case ARM::RSCrr: |
2575 | 3.27k | case ARM::RSCri: |
2576 | 3.27k | case ARM::ADDrr: |
2577 | 3.27k | case ARM::ADDri: |
2578 | 3.27k | case ARM::ADCrr: |
2579 | 3.27k | case ARM::ADCri: |
2580 | 3.27k | case ARM::SUBrr: |
2581 | 3.27k | case ARM::SUBri: |
2582 | 3.27k | case ARM::SBCrr: |
2583 | 3.27k | case ARM::SBCri: |
2584 | 3.27k | case ARM::t2RSBri: |
2585 | 3.27k | case ARM::t2ADDrr: |
2586 | 3.27k | case ARM::t2ADDri: |
2587 | 3.27k | case ARM::t2ADCrr: |
2588 | 3.27k | case ARM::t2ADCri: |
2589 | 3.27k | case ARM::t2SUBrr: |
2590 | 3.27k | case ARM::t2SUBri: |
2591 | 3.27k | case ARM::t2SBCrr: |
2592 | 3.27k | case ARM::t2SBCri: |
2593 | 3.27k | case ARM::ANDrr: |
2594 | 3.27k | case ARM::ANDri: |
2595 | 3.27k | case ARM::t2ANDrr: |
2596 | 3.27k | case ARM::t2ANDri: |
2597 | 3.27k | case ARM::ORRrr: |
2598 | 3.27k | case ARM::ORRri: |
2599 | 3.27k | case ARM::t2ORRrr: |
2600 | 3.27k | case ARM::t2ORRri: |
2601 | 3.27k | case ARM::EORrr: |
2602 | 3.27k | case ARM::EORri: |
2603 | 3.27k | case ARM::t2EORrr: |
2604 | 3.27k | case ARM::t2EORri: |
2605 | 3.27k | case ARM::t2LSRri: |
2606 | 3.27k | case ARM::t2LSRrr: |
2607 | 3.27k | case ARM::t2LSLri: |
2608 | 3.27k | case ARM::t2LSLrr: |
2609 | 3.27k | return true; |
2610 | 0 | } |
2611 | 0 | } |
2612 | | |
2613 | | /// optimizeCompareInstr - Convert the instruction supplying the argument to the |
2614 | | /// comparison into one that sets the zero bit in the flags register; |
2615 | | /// Remove a redundant Compare instruction if an earlier instruction can set the |
2616 | | /// flags in the same way as Compare. |
2617 | | /// E.g. SUBrr(r1,r2) and CMPrr(r1,r2). We also handle the case where two |
2618 | | /// operands are swapped: SUBrr(r1,r2) and CMPrr(r2,r1), by updating the |
2619 | | /// condition code of instructions which use the flags. |
2620 | | bool ARMBaseInstrInfo::optimizeCompareInstr( |
2621 | | MachineInstr &CmpInstr, unsigned SrcReg, unsigned SrcReg2, int CmpMask, |
2622 | 23.7k | int CmpValue, const MachineRegisterInfo *MRI) const { |
2623 | 23.7k | // Get the unique definition of SrcReg. |
2624 | 23.7k | MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg); |
2625 | 23.7k | if (!MI23.7k ) return false0 ; |
2626 | 23.7k | |
2627 | 23.7k | // Masked compares sometimes use the same register as the corresponding 'and'. |
2628 | 23.7k | if (23.7k CmpMask != ~023.7k ) { |
2629 | 227 | if (!isSuitableForMask(MI, SrcReg, CmpMask, false) || 227 isPredicated(*MI)0 ) { |
2630 | 227 | MI = nullptr; |
2631 | 227 | for (MachineRegisterInfo::use_instr_iterator |
2632 | 227 | UI = MRI->use_instr_begin(SrcReg), UE = MRI->use_instr_end(); |
2633 | 855 | UI != UE855 ; ++UI628 ) { |
2634 | 628 | if (UI->getParent() != CmpInstr.getParent()) |
2635 | 281 | continue; |
2636 | 347 | MachineInstr *PotentialAND = &*UI; |
2637 | 347 | if (!isSuitableForMask(PotentialAND, SrcReg, CmpMask, true) || |
2638 | 0 | isPredicated(*PotentialAND)) |
2639 | 347 | continue; |
2640 | 0 | MI = PotentialAND; |
2641 | 0 | break; |
2642 | 0 | } |
2643 | 227 | if (!MI227 ) return false227 ; |
2644 | 23.4k | } |
2645 | 227 | } |
2646 | 23.4k | |
2647 | 23.4k | // Get ready to iterate backward from CmpInstr. |
2648 | 23.4k | MachineBasicBlock::iterator I = CmpInstr, E = MI, |
2649 | 23.4k | B = CmpInstr.getParent()->begin(); |
2650 | 23.4k | |
2651 | 23.4k | // Early exit if CmpInstr is at the beginning of the BB. |
2652 | 23.4k | if (I == B23.4k ) return false2.61k ; |
2653 | 20.8k | |
2654 | 20.8k | // There are two possible candidates which can be changed to set CPSR: |
2655 | 20.8k | // One is MI, the other is a SUB instruction. |
2656 | 20.8k | // For CMPrr(r1,r2), we are looking for SUB(r1,r2) or SUB(r2,r1). |
2657 | 20.8k | // For CMPri(r1, CmpValue), we are looking for SUBri(r1, CmpValue). |
2658 | 20.8k | MachineInstr *Sub = nullptr; |
2659 | 20.8k | if (SrcReg2 != 0) |
2660 | 20.8k | // MI is not a candidate for CMPrr. |
2661 | 3.71k | MI = nullptr; |
2662 | 17.1k | else if (17.1k MI->getParent() != CmpInstr.getParent() || 17.1k CmpValue != 015.6k ) { |
2663 | 4.88k | // Conservatively refuse to convert an instruction which isn't in the same |
2664 | 4.88k | // BB as the comparison. |
2665 | 4.88k | // For CMPri w/ CmpValue != 0, a Sub may still be a candidate. |
2666 | 4.88k | // Thus we cannot return here. |
2667 | 4.88k | if (CmpInstr.getOpcode() == ARM::CMPri || |
2668 | 4.75k | CmpInstr.getOpcode() == ARM::t2CMPri) |
2669 | 4.50k | MI = nullptr; |
2670 | 4.88k | else |
2671 | 381 | return false; |
2672 | 20.5k | } |
2673 | 20.5k | |
2674 | 20.5k | bool IsThumb1 = false; |
2675 | 20.5k | if (MI && 20.5k !isOptimizeCompareCandidate(MI, IsThumb1)12.2k ) |
2676 | 9.01k | return false; |
2677 | 11.4k | |
2678 | 11.4k | // We also want to do this peephole for cases like this: if (a*b == 0), |
2679 | 11.4k | // and optimise away the CMP instruction from the generated code sequence: |
2680 | 11.4k | // MULS, MOVS, MOVS, CMP. Here the MOVS instructions load the boolean values |
2681 | 11.4k | // resulting from the select instruction, but these MOVS instructions for |
2682 | 11.4k | // Thumb1 (V6M) are flag setting and are thus preventing this optimisation. |
2683 | 11.4k | // However, if we only have MOVS instructions in between the CMP and the |
2684 | 11.4k | // other instruction (the MULS in this example), then the CPSR is dead so we |
2685 | 11.4k | // can safely reorder the sequence into: MOVS, MOVS, MULS, CMP. We do this |
2686 | 11.4k | // reordering and then continue the analysis hoping we can eliminate the |
2687 | 11.4k | // CMP. This peephole works on the vregs, so is still in SSA form. As a |
2688 | 11.4k | // consequence, the movs won't redefine/kill the MUL operands which would |
2689 | 11.4k | // make this reordering illegal. |
2690 | 11.4k | if (11.4k MI && 11.4k IsThumb13.27k ) { |
2691 | 125 | --I; |
2692 | 125 | bool CanReorder = true; |
2693 | 125 | const bool HasStmts = I != E; |
2694 | 158 | for (; I != E158 ; --I33 ) { |
2695 | 82 | if (I->getOpcode() != ARM::tMOVi882 ) { |
2696 | 49 | CanReorder = false; |
2697 | 49 | break; |
2698 | 49 | } |
2699 | 82 | } |
2700 | 125 | if (HasStmts && 125 CanReorder68 ) { |
2701 | 19 | MI = MI->removeFromParent(); |
2702 | 19 | E = CmpInstr; |
2703 | 19 | CmpInstr.getParent()->insert(E, MI); |
2704 | 19 | } |
2705 | 125 | I = CmpInstr; |
2706 | 125 | E = MI; |
2707 | 125 | } |
2708 | 11.4k | |
2709 | 11.4k | // Check that CPSR isn't set between the comparison instruction and the one we |
2710 | 11.4k | // want to change. At the same time, search for Sub. |
2711 | 11.4k | const TargetRegisterInfo *TRI = &getRegisterInfo(); |
2712 | 11.4k | --I; |
2713 | 22.4k | for (; I != E22.4k ; --I10.9k ) { |
2714 | 13.6k | const MachineInstr &Instr = *I; |
2715 | 13.6k | |
2716 | 13.6k | if (Instr.modifiesRegister(ARM::CPSR, TRI) || |
2717 | 13.3k | Instr.readsRegister(ARM::CPSR, TRI)) |
2718 | 13.6k | // This instruction modifies or uses CPSR after the one we want to |
2719 | 13.6k | // change. We can't do this transformation. |
2720 | 854 | return false; |
2721 | 12.7k | |
2722 | 12.7k | // Check whether CmpInstr can be made redundant by the current instruction. |
2723 | 12.7k | if (12.7k isRedundantFlagInstr(&CmpInstr, SrcReg, SrcReg2, CmpValue, &*I)12.7k ) { |
2724 | 40 | Sub = &*I; |
2725 | 40 | break; |
2726 | 40 | } |
2727 | 12.7k | |
2728 | 12.7k | if (12.7k I == B12.7k ) |
2729 | 12.7k | // The 'and' is below the comparison instruction. |
2730 | 1.78k | return false; |
2731 | 13.6k | } |
2732 | 11.4k | |
2733 | 11.4k | // Return false if no candidates exist. |
2734 | 8.85k | if (8.85k !MI && 8.85k !Sub5.95k ) |
2735 | 5.91k | return false; |
2736 | 2.94k | |
2737 | 2.94k | // The single candidate is called MI. |
2738 | 2.94k | if (2.94k !MI2.94k ) MI = Sub40 ; |
2739 | 2.94k | |
2740 | 2.94k | // We can't use a predicated instruction - it doesn't always write the flags. |
2741 | 2.94k | if (isPredicated(*MI)) |
2742 | 4 | return false; |
2743 | 2.94k | |
2744 | 2.94k | // Scan forward for the use of CPSR |
2745 | 2.94k | // When checking against MI: if it's a conditional code that requires |
2746 | 2.94k | // checking of the V bit or C bit, then this is not safe to do. |
2747 | 2.94k | // It is safe to remove CmpInstr if CPSR is redefined or killed. |
2748 | 2.94k | // If we are done with the basic block, we need to check whether CPSR is |
2749 | 2.94k | // live-out. |
2750 | 2.94k | SmallVector<std::pair<MachineOperand*, ARMCC::CondCodes>, 4> |
2751 | 2.94k | OperandsToUpdate; |
2752 | 2.94k | bool isSafe = false; |
2753 | 2.94k | I = CmpInstr; |
2754 | 2.94k | E = CmpInstr.getParent()->end(); |
2755 | 8.14k | while (!isSafe && 8.14k ++I != E7.99k ) { |
2756 | 5.69k | const MachineInstr &Instr = *I; |
2757 | 5.69k | for (unsigned IO = 0, EO = Instr.getNumOperands(); |
2758 | 24.6k | !isSafe && 24.6k IO != EO24.6k ; ++IO18.9k ) { |
2759 | 19.6k | const MachineOperand &MO = Instr.getOperand(IO); |
2760 | 19.6k | if (MO.isRegMask() && 19.6k MO.clobbersPhysReg(ARM::CPSR)11 ) { |
2761 | 11 | isSafe = true; |
2762 | 11 | break; |
2763 | 11 | } |
2764 | 19.6k | if (19.6k !MO.isReg() || 19.6k MO.getReg() != ARM::CPSR9.86k ) |
2765 | 16.4k | continue; |
2766 | 3.11k | if (3.11k MO.isDef()3.11k ) { |
2767 | 141 | isSafe = true; |
2768 | 141 | break; |
2769 | 141 | } |
2770 | 2.97k | // Condition code is after the operand before CPSR except for VSELs. |
2771 | 2.97k | ARMCC::CondCodes CC; |
2772 | 2.97k | bool IsInstrVSel = true; |
2773 | 2.97k | switch (Instr.getOpcode()) { |
2774 | 2.97k | default: |
2775 | 2.97k | IsInstrVSel = false; |
2776 | 2.97k | CC = (ARMCC::CondCodes)Instr.getOperand(IO - 1).getImm(); |
2777 | 2.97k | break; |
2778 | 4 | case ARM::VSELEQD: |
2779 | 4 | case ARM::VSELEQS: |
2780 | 4 | CC = ARMCC::EQ; |
2781 | 4 | break; |
2782 | 0 | case ARM::VSELGTD: |
2783 | 0 | case ARM::VSELGTS: |
2784 | 0 | CC = ARMCC::GT; |
2785 | 0 | break; |
2786 | 0 | case ARM::VSELGED: |
2787 | 0 | case ARM::VSELGES: |
2788 | 0 | CC = ARMCC::GE; |
2789 | 0 | break; |
2790 | 0 | case ARM::VSELVSS: |
2791 | 0 | case ARM::VSELVSD: |
2792 | 0 | CC = ARMCC::VS; |
2793 | 0 | break; |
2794 | 2.97k | } |
2795 | 2.97k | |
2796 | 2.97k | if (2.97k Sub2.97k ) { |
2797 | 44 | ARMCC::CondCodes NewCC = getSwappedCondition(CC); |
2798 | 44 | if (NewCC == ARMCC::AL) |
2799 | 0 | return false; |
2800 | 44 | // If we have SUB(r1, r2) and CMP(r2, r1), the condition code based |
2801 | 44 | // on CMP needs to be updated to be based on SUB. |
2802 | 44 | // Push the condition code operands to OperandsToUpdate. |
2803 | 44 | // If it is safe to remove CmpInstr, the condition code of these |
2804 | 44 | // operands will be modified. |
2805 | 44 | if (44 SrcReg2 != 0 && 44 Sub->getOperand(1).getReg() == SrcReg238 && |
2806 | 44 | Sub->getOperand(2).getReg() == SrcReg21 ) { |
2807 | 21 | // VSel doesn't support condition code update. |
2808 | 21 | if (IsInstrVSel) |
2809 | 0 | return false; |
2810 | 21 | OperandsToUpdate.push_back( |
2811 | 21 | std::make_pair(&((*I).getOperand(IO - 1)), NewCC)); |
2812 | 21 | } |
2813 | 2.97k | } else { |
2814 | 2.93k | // No Sub, so this is x = <op> y, z; cmp x, 0. |
2815 | 2.93k | switch (CC) { |
2816 | 2.44k | case ARMCC::EQ: // Z |
2817 | 2.44k | case ARMCC::NE: // Z |
2818 | 2.44k | case ARMCC::MI: // N |
2819 | 2.44k | case ARMCC::PL: // N |
2820 | 2.44k | case ARMCC::AL: // none |
2821 | 2.44k | // CPSR can be used multiple times, we should continue. |
2822 | 2.44k | break; |
2823 | 486 | case ARMCC::HS: // C |
2824 | 486 | case ARMCC::LO: // C |
2825 | 486 | case ARMCC::VS: // V |
2826 | 486 | case ARMCC::VC: // V |
2827 | 486 | case ARMCC::HI: // C Z |
2828 | 486 | case ARMCC::LS: // C Z |
2829 | 486 | case ARMCC::GE: // N V |
2830 | 486 | case ARMCC::LT: // N V |
2831 | 486 | case ARMCC::GT: // Z N V |
2832 | 486 | case ARMCC::LE: // Z N V |
2833 | 486 | // The instruction uses the V bit or C bit which is not safe. |
2834 | 486 | return false; |
2835 | 2.93k | } |
2836 | 2.93k | } |
2837 | 19.6k | } |
2838 | 5.69k | } |
2839 | 2.94k | |
2840 | 2.94k | // If CPSR is not killed nor re-defined, we should check whether it is |
2841 | 2.94k | // live-out. If it is live-out, do not optimize. |
2842 | 2.45k | if (2.45k !isSafe2.45k ) { |
2843 | 2.30k | MachineBasicBlock *MBB = CmpInstr.getParent(); |
2844 | 2.30k | for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), |
2845 | 6.57k | SE = MBB->succ_end(); SI != SE6.57k ; ++SI4.27k ) |
2846 | 4.27k | if (4.27k (*SI)->isLiveIn(ARM::CPSR)4.27k ) |
2847 | 3 | return false; |
2848 | 2.30k | } |
2849 | 2.45k | |
2850 | 2.45k | // Toggle the optional operand to CPSR (if it exists - in Thumb1 we always |
2851 | 2.45k | // set CPSR so this is represented as an explicit output) |
2852 | 2.45k | if (2.45k !IsThumb12.45k ) { |
2853 | 2.39k | MI->getOperand(5).setReg(ARM::CPSR); |
2854 | 2.39k | MI->getOperand(5).setIsDef(true); |
2855 | 2.39k | } |
2856 | 2.45k | assert(!isPredicated(*MI) && "Can't use flags from predicated instruction"); |
2857 | 2.45k | CmpInstr.eraseFromParent(); |
2858 | 2.45k | |
2859 | 2.45k | // Modify the condition code of operands in OperandsToUpdate. |
2860 | 2.45k | // Since we have SUB(r1, r2) and CMP(r2, r1), the condition code needs to |
2861 | 2.45k | // be changed from r2 > r1 to r1 < r2, from r2 < r1 to r1 > r2, etc. |
2862 | 2.47k | for (unsigned i = 0, e = OperandsToUpdate.size(); i < e2.47k ; i++18 ) |
2863 | 18 | OperandsToUpdate[i].first->setImm(OperandsToUpdate[i].second); |
2864 | 2.45k | |
2865 | 2.45k | return true; |
2866 | 23.7k | } |
2867 | | |
2868 | | bool ARMBaseInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, |
2869 | | unsigned Reg, |
2870 | 28.7k | MachineRegisterInfo *MRI) const { |
2871 | 28.7k | // Fold large immediates into add, sub, or, xor. |
2872 | 28.7k | unsigned DefOpc = DefMI.getOpcode(); |
2873 | 28.7k | if (DefOpc != ARM::t2MOVi32imm && 28.7k DefOpc != ARM::MOVi32imm27.6k ) |
2874 | 27.0k | return false; |
2875 | 1.62k | if (1.62k !DefMI.getOperand(1).isImm()1.62k ) |
2876 | 1.62k | // Could be t2MOVi32imm <ga:xx> |
2877 | 692 | return false; |
2878 | 933 | |
2879 | 933 | if (933 !MRI->hasOneNonDBGUse(Reg)933 ) |
2880 | 376 | return false; |
2881 | 557 | |
2882 | 557 | const MCInstrDesc &DefMCID = DefMI.getDesc(); |
2883 | 557 | if (DefMCID.hasOptionalDef()557 ) { |
2884 | 0 | unsigned NumOps = DefMCID.getNumOperands(); |
2885 | 0 | const MachineOperand &MO = DefMI.getOperand(NumOps - 1); |
2886 | 0 | if (MO.getReg() == ARM::CPSR && 0 !MO.isDead()0 ) |
2887 | 0 | // If DefMI defines CPSR and it is not dead, it's obviously not safe |
2888 | 0 | // to delete DefMI. |
2889 | 0 | return false; |
2890 | 557 | } |
2891 | 557 | |
2892 | 557 | const MCInstrDesc &UseMCID = UseMI.getDesc(); |
2893 | 557 | if (UseMCID.hasOptionalDef()557 ) { |
2894 | 204 | unsigned NumOps = UseMCID.getNumOperands(); |
2895 | 204 | if (UseMI.getOperand(NumOps - 1).getReg() == ARM::CPSR) |
2896 | 204 | // If the instruction sets the flag, do not attempt this optimization |
2897 | 204 | // since it may change the semantics of the code. |
2898 | 33 | return false; |
2899 | 524 | } |
2900 | 524 | |
2901 | 524 | unsigned UseOpc = UseMI.getOpcode(); |
2902 | 524 | unsigned NewUseOpc = 0; |
2903 | 524 | uint32_t ImmVal = (uint32_t)DefMI.getOperand(1).getImm(); |
2904 | 524 | uint32_t SOImmValV1 = 0, SOImmValV2 = 0; |
2905 | 524 | bool Commute = false; |
2906 | 524 | switch (UseOpc) { |
2907 | 408 | default: return false; |
2908 | 116 | case ARM::SUBrr: |
2909 | 116 | case ARM::ADDrr: |
2910 | 116 | case ARM::ORRrr: |
2911 | 116 | case ARM::EORrr: |
2912 | 116 | case ARM::t2SUBrr: |
2913 | 116 | case ARM::t2ADDrr: |
2914 | 116 | case ARM::t2ORRrr: |
2915 | 116 | case ARM::t2EORrr: { |
2916 | 116 | Commute = UseMI.getOperand(2).getReg() != Reg; |
2917 | 116 | switch (UseOpc) { |
2918 | 0 | default: break; |
2919 | 5 | case ARM::ADDrr: |
2920 | 5 | case ARM::SUBrr: |
2921 | 5 | if (UseOpc == ARM::SUBrr && 5 Commute2 ) |
2922 | 0 | return false; |
2923 | 5 | |
2924 | 5 | // ADD/SUB are special because they're essentially the same operation, so |
2925 | 5 | // we can handle a larger range of immediates. |
2926 | 5 | if (5 ARM_AM::isSOImmTwoPartVal(ImmVal)5 ) |
2927 | 3 | NewUseOpc = UseOpc == ARM::ADDrr ? 3 ARM::ADDri2 : ARM::SUBri1 ; |
2928 | 2 | else if (2 ARM_AM::isSOImmTwoPartVal(-ImmVal)2 ) { |
2929 | 2 | ImmVal = -ImmVal; |
2930 | 2 | NewUseOpc = UseOpc == ARM::ADDrr ? ARM::SUBri1 : ARM::ADDri1 ; |
2931 | 2 | } else |
2932 | 0 | return false; |
2933 | 5 | SOImmValV1 = (uint32_t)ARM_AM::getSOImmTwoPartFirst(ImmVal); |
2934 | 5 | SOImmValV2 = (uint32_t)ARM_AM::getSOImmTwoPartSecond(ImmVal); |
2935 | 5 | break; |
2936 | 2 | case ARM::ORRrr: |
2937 | 2 | case ARM::EORrr: |
2938 | 2 | if (!ARM_AM::isSOImmTwoPartVal(ImmVal)) |
2939 | 1 | return false; |
2940 | 1 | SOImmValV1 = (uint32_t)ARM_AM::getSOImmTwoPartFirst(ImmVal); |
2941 | 1 | SOImmValV2 = (uint32_t)ARM_AM::getSOImmTwoPartSecond(ImmVal); |
2942 | 1 | switch (UseOpc) { |
2943 | 0 | default: break; |
2944 | 1 | case ARM::ORRrr: NewUseOpc = ARM::ORRri; break; |
2945 | 0 | case ARM::EORrr: NewUseOpc = ARM::EORri; break; |
2946 | 1 | } |
2947 | 1 | break; |
2948 | 102 | case ARM::t2ADDrr: |
2949 | 102 | case ARM::t2SUBrr: |
2950 | 102 | if (UseOpc == ARM::t2SUBrr && 102 Commute3 ) |
2951 | 1 | return false; |
2952 | 101 | |
2953 | 101 | // ADD/SUB are special because they're essentially the same operation, so |
2954 | 101 | // we can handle a larger range of immediates. |
2955 | 101 | if (101 ARM_AM::isT2SOImmTwoPartVal(ImmVal)101 ) |
2956 | 90 | NewUseOpc = UseOpc == ARM::t2ADDrr ? 90 ARM::t2ADDri89 : ARM::t2SUBri1 ; |
2957 | 11 | else if (11 ARM_AM::isT2SOImmTwoPartVal(-ImmVal)11 ) { |
2958 | 2 | ImmVal = -ImmVal; |
2959 | 2 | NewUseOpc = UseOpc == ARM::t2ADDrr ? ARM::t2SUBri1 : ARM::t2ADDri1 ; |
2960 | 2 | } else |
2961 | 9 | return false; |
2962 | 92 | SOImmValV1 = (uint32_t)ARM_AM::getT2SOImmTwoPartFirst(ImmVal); |
2963 | 92 | SOImmValV2 = (uint32_t)ARM_AM::getT2SOImmTwoPartSecond(ImmVal); |
2964 | 92 | break; |
2965 | 7 | case ARM::t2ORRrr: |
2966 | 7 | case ARM::t2EORrr: |
2967 | 7 | if (!ARM_AM::isT2SOImmTwoPartVal(ImmVal)) |
2968 | 4 | return false; |
2969 | 3 | SOImmValV1 = (uint32_t)ARM_AM::getT2SOImmTwoPartFirst(ImmVal); |
2970 | 3 | SOImmValV2 = (uint32_t)ARM_AM::getT2SOImmTwoPartSecond(ImmVal); |
2971 | 3 | switch (UseOpc) { |
2972 | 0 | default: break; |
2973 | 3 | case ARM::t2ORRrr: NewUseOpc = ARM::t2ORRri; break; |
2974 | 0 | case ARM::t2EORrr: NewUseOpc = ARM::t2EORri; break; |
2975 | 3 | } |
2976 | 3 | break; |
2977 | 116 | } |
2978 | 116 | } |
2979 | 101 | } |
2980 | 101 | |
2981 | 101 | unsigned OpIdx = Commute ? 101 20 : 1101 ; |
2982 | 28.7k | unsigned Reg1 = UseMI.getOperand(OpIdx).getReg(); |
2983 | 28.7k | bool isKill = UseMI.getOperand(OpIdx).isKill(); |
2984 | 28.7k | unsigned NewReg = MRI->createVirtualRegister(MRI->getRegClass(Reg)); |
2985 | 28.7k | BuildMI(*UseMI.getParent(), UseMI, UseMI.getDebugLoc(), get(NewUseOpc), |
2986 | 28.7k | NewReg) |
2987 | 28.7k | .addReg(Reg1, getKillRegState(isKill)) |
2988 | 28.7k | .addImm(SOImmValV1) |
2989 | 28.7k | .add(predOps(ARMCC::AL)) |
2990 | 28.7k | .add(condCodeOp()); |
2991 | 28.7k | UseMI.setDesc(get(NewUseOpc)); |
2992 | 28.7k | UseMI.getOperand(1).setReg(NewReg); |
2993 | 28.7k | UseMI.getOperand(1).setIsKill(); |
2994 | 28.7k | UseMI.getOperand(2).ChangeToImmediate(SOImmValV2); |
2995 | 28.7k | DefMI.eraseFromParent(); |
2996 | 28.7k | return true; |
2997 | 28.7k | } |
2998 | | |
2999 | | static unsigned getNumMicroOpsSwiftLdSt(const InstrItineraryData *ItinData, |
3000 | 0 | const MachineInstr &MI) { |
3001 | 0 | switch (MI.getOpcode()) { |
3002 | 0 | default: { |
3003 | 0 | const MCInstrDesc &Desc = MI.getDesc(); |
3004 | 0 | int UOps = ItinData->getNumMicroOps(Desc.getSchedClass()); |
3005 | 0 | assert(UOps >= 0 && "bad # UOps"); |
3006 | 0 | return UOps; |
3007 | 0 | } |
3008 | 0 |
|
3009 | 0 | case ARM::LDRrs: |
3010 | 0 | case ARM::LDRBrs: |
3011 | 0 | case ARM::STRrs: |
3012 | 0 | case ARM::STRBrs: { |
3013 | 0 | unsigned ShOpVal = MI.getOperand(3).getImm(); |
3014 | 0 | bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub; |
3015 | 0 | unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); |
3016 | 0 | if (!isSub && |
3017 | 0 | (ShImm == 0 || |
3018 | 0 | ((ShImm == 1 || 0 ShImm == 20 || ShImm == 30 ) && |
3019 | 0 | ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))) |
3020 | 0 | return 1; |
3021 | 0 | return 2; |
3022 | 0 | } |
3023 | 0 |
|
3024 | 0 | case ARM::LDRH: |
3025 | 0 | case ARM::STRH: { |
3026 | 0 | if (!MI.getOperand(2).getReg()) |
3027 | 0 | return 1; |
3028 | 0 |
|
3029 | 0 | unsigned ShOpVal = MI.getOperand(3).getImm(); |
3030 | 0 | bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub; |
3031 | 0 | unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); |
3032 | 0 | if (!isSub && |
3033 | 0 | (ShImm == 0 || |
3034 | 0 | ((ShImm == 1 || 0 ShImm == 20 || ShImm == 30 ) && |
3035 | 0 | ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))) |
3036 | 0 | return 1; |
3037 | 0 | return 2; |
3038 | 0 | } |
3039 | 0 |
|
3040 | 0 | case ARM::LDRSB: |
3041 | 0 | case ARM::LDRSH: |
3042 | 0 | return (ARM_AM::getAM3Op(MI.getOperand(3).getImm()) == ARM_AM::sub) ? 30 : 20 ; |
3043 | 0 |
|
3044 | 0 | case ARM::LDRSB_POST: |
3045 | 0 | case ARM::LDRSH_POST: { |
3046 | 0 | unsigned Rt = MI.getOperand(0).getReg(); |
3047 | 0 | unsigned Rm = MI.getOperand(3).getReg(); |
3048 | 0 | return (Rt == Rm) ? 40 : 30 ; |
3049 | 0 | } |
3050 | 0 |
|
3051 | 0 | case ARM::LDR_PRE_REG: |
3052 | 0 | case ARM::LDRB_PRE_REG: { |
3053 | 0 | unsigned Rt = MI.getOperand(0).getReg(); |
3054 | 0 | unsigned Rm = MI.getOperand(3).getReg(); |
3055 | 0 | if (Rt == Rm) |
3056 | 0 | return 3; |
3057 | 0 | unsigned ShOpVal = MI.getOperand(4).getImm(); |
3058 | 0 | bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub; |
3059 | 0 | unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); |
3060 | 0 | if (!isSub && |
3061 | 0 | (ShImm == 0 || |
3062 | 0 | ((ShImm == 1 || 0 ShImm == 20 || ShImm == 30 ) && |
3063 | 0 | ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))) |
3064 | 0 | return 2; |
3065 | 0 | return 3; |
3066 | 0 | } |
3067 | 0 |
|
3068 | 0 | case ARM::STR_PRE_REG: |
3069 | 0 | case ARM::STRB_PRE_REG: { |
3070 | 0 | unsigned ShOpVal = MI.getOperand(4).getImm(); |
3071 | 0 | bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub; |
3072 | 0 | unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); |
3073 | 0 | if (!isSub && |
3074 | 0 | (ShImm == 0 || |
3075 | 0 | ((ShImm == 1 || 0 ShImm == 20 || ShImm == 30 ) && |
3076 | 0 | ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))) |
3077 | 0 | return 2; |
3078 | 0 | return 3; |
3079 | 0 | } |
3080 | 0 |
|
3081 | 0 | case ARM::LDRH_PRE: |
3082 | 0 | case ARM::STRH_PRE: { |
3083 | 0 | unsigned Rt = MI.getOperand(0).getReg(); |
3084 | 0 | unsigned Rm = MI.getOperand(3).getReg(); |
3085 | 0 | if (!Rm) |
3086 | 0 | return 2; |
3087 | 0 | if (0 Rt == Rm0 ) |
3088 | 0 | return 3; |
3089 | 0 | return (ARM_AM::getAM3Op(MI.getOperand(4).getImm()) == ARM_AM::sub) ? 0 30 : 20 ; |
3090 | 0 | } |
3091 | 0 |
|
3092 | 0 | case ARM::LDR_POST_REG: |
3093 | 0 | case ARM::LDRB_POST_REG: |
3094 | 0 | case ARM::LDRH_POST: { |
3095 | 0 | unsigned Rt = MI.getOperand(0).getReg(); |
3096 | 0 | unsigned Rm = MI.getOperand(3).getReg(); |
3097 | 0 | return (Rt == Rm) ? 30 : 20 ; |
3098 | 0 | } |
3099 | 0 |
|
3100 | 0 | case ARM::LDR_PRE_IMM: |
3101 | 0 | case ARM::LDRB_PRE_IMM: |
3102 | 0 | case ARM::LDR_POST_IMM: |
3103 | 0 | case ARM::LDRB_POST_IMM: |
3104 | 0 | case ARM::STRB_POST_IMM: |
3105 | 0 | case ARM::STRB_POST_REG: |
3106 | 0 | case ARM::STRB_PRE_IMM: |
3107 | 0 | case ARM::STRH_POST: |
3108 | 0 | case ARM::STR_POST_IMM: |
3109 | 0 | case ARM::STR_POST_REG: |
3110 | 0 | case ARM::STR_PRE_IMM: |
3111 | 0 | return 2; |
3112 | 0 |
|
3113 | 0 | case ARM::LDRSB_PRE: |
3114 | 0 | case ARM::LDRSH_PRE: { |
3115 | 0 | unsigned Rm = MI.getOperand(3).getReg(); |
3116 | 0 | if (Rm == 0) |
3117 | 0 | return 3; |
3118 | 0 | unsigned Rt = MI.getOperand(0).getReg(); |
3119 | 0 | if (Rt == Rm) |
3120 | 0 | return 4; |
3121 | 0 | unsigned ShOpVal = MI.getOperand(4).getImm(); |
3122 | 0 | bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub; |
3123 | 0 | unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); |
3124 | 0 | if (!isSub && |
3125 | 0 | (ShImm == 0 || |
3126 | 0 | ((ShImm == 1 || 0 ShImm == 20 || ShImm == 30 ) && |
3127 | 0 | ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))) |
3128 | 0 | return 3; |
3129 | 0 | return 4; |
3130 | 0 | } |
3131 | 0 |
|
3132 | 0 | case ARM::LDRD: { |
3133 | 0 | unsigned Rt = MI.getOperand(0).getReg(); |
3134 | 0 | unsigned Rn = MI.getOperand(2).getReg(); |
3135 | 0 | unsigned Rm = MI.getOperand(3).getReg(); |
3136 | 0 | if (Rm) |
3137 | 0 | return (ARM_AM::getAM3Op(MI.getOperand(4).getImm()) == ARM_AM::sub) ? 0 40 |
3138 | 0 | : 3; |
3139 | 0 | return (Rt == Rn) ? 0 30 : 20 ; |
3140 | 0 | } |
3141 | 0 |
|
3142 | 0 | case ARM::STRD: { |
3143 | 0 | unsigned Rm = MI.getOperand(3).getReg(); |
3144 | 0 | if (Rm) |
3145 | 0 | return (ARM_AM::getAM3Op(MI.getOperand(4).getImm()) == ARM_AM::sub) ? 0 40 |
3146 | 0 | : 3; |
3147 | 0 | return 2; |
3148 | 0 | } |
3149 | 0 |
|
3150 | 0 | case ARM::LDRD_POST: |
3151 | 0 | case ARM::t2LDRD_POST: |
3152 | 0 | return 3; |
3153 | 0 |
|
3154 | 0 | case ARM::STRD_POST: |
3155 | 0 | case ARM::t2STRD_POST: |
3156 | 0 | return 4; |
3157 | 0 |
|
3158 | 0 | case ARM::LDRD_PRE: { |
3159 | 0 | unsigned Rt = MI.getOperand(0).getReg(); |
3160 | 0 | unsigned Rn = MI.getOperand(3).getReg(); |
3161 | 0 | unsigned Rm = MI.getOperand(4).getReg(); |
3162 | 0 | if (Rm) |
3163 | 0 | return (ARM_AM::getAM3Op(MI.getOperand(5).getImm()) == ARM_AM::sub) ? 0 50 |
3164 | 0 | : 4; |
3165 | 0 | return (Rt == Rn) ? 0 40 : 30 ; |
3166 | 0 | } |
3167 | 0 |
|
3168 | 0 | case ARM::t2LDRD_PRE: { |
3169 | 0 | unsigned Rt = MI.getOperand(0).getReg(); |
3170 | 0 | unsigned Rn = MI.getOperand(3).getReg(); |
3171 | 0 | return (Rt == Rn) ? 40 : 30 ; |
3172 | 0 | } |
3173 | 0 |
|
3174 | 0 | case ARM::STRD_PRE: { |
3175 | 0 | unsigned Rm = MI.getOperand(4).getReg(); |
3176 | 0 | if (Rm) |
3177 | 0 | return (ARM_AM::getAM3Op(MI.getOperand(5).getImm()) == ARM_AM::sub) ? 0 50 |
3178 | 0 | : 4; |
3179 | 0 | return 3; |
3180 | 0 | } |
3181 | 0 |
|
3182 | 0 | case ARM::t2STRD_PRE: |
3183 | 0 | return 3; |
3184 | 0 |
|
3185 | 0 | case ARM::t2LDR_POST: |
3186 | 0 | case ARM::t2LDRB_POST: |
3187 | 0 | case ARM::t2LDRB_PRE: |
3188 | 0 | case ARM::t2LDRSBi12: |
3189 | 0 | case ARM::t2LDRSBi8: |
3190 | 0 | case ARM::t2LDRSBpci: |
3191 | 0 | case ARM::t2LDRSBs: |
3192 | 0 | case ARM::t2LDRH_POST: |
3193 | 0 | case ARM::t2LDRH_PRE: |
3194 | 0 | case ARM::t2LDRSBT: |
3195 | 0 | case ARM::t2LDRSB_POST: |
3196 | 0 | case ARM::t2LDRSB_PRE: |
3197 | 0 | case ARM::t2LDRSH_POST: |
3198 | 0 | case ARM::t2LDRSH_PRE: |
3199 | 0 | case ARM::t2LDRSHi12: |
3200 | 0 | case ARM::t2LDRSHi8: |
3201 | 0 | case ARM::t2LDRSHpci: |
3202 | 0 | case ARM::t2LDRSHs: |
3203 | 0 | return 2; |
3204 | 0 |
|
3205 | 0 | case ARM::t2LDRDi8: { |
3206 | 0 | unsigned Rt = MI.getOperand(0).getReg(); |
3207 | 0 | unsigned Rn = MI.getOperand(2).getReg(); |
3208 | 0 | return (Rt == Rn) ? 30 : 20 ; |
3209 | 0 | } |
3210 | 0 |
|
3211 | 0 | case ARM::t2STRB_POST: |
3212 | 0 | case ARM::t2STRB_PRE: |
3213 | 0 | case ARM::t2STRBs: |
3214 | 0 | case ARM::t2STRDi8: |
3215 | 0 | case ARM::t2STRH_POST: |
3216 | 0 | case ARM::t2STRH_PRE: |
3217 | 0 | case ARM::t2STRHs: |
3218 | 0 | case ARM::t2STR_POST: |
3219 | 0 | case ARM::t2STR_PRE: |
3220 | 0 | case ARM::t2STRs: |
3221 | 0 | return 2; |
3222 | 0 | } |
3223 | 0 | } |
3224 | | |
3225 | | // Return the number of 32-bit words loaded by LDM or stored by STM. If this |
3226 | | // can't be easily determined return 0 (missing MachineMemOperand). |
3227 | | // |
3228 | | // FIXME: The current MachineInstr design does not support relying on machine |
3229 | | // mem operands to determine the width of a memory access. Instead, we expect |
3230 | | // the target to provide this information based on the instruction opcode and |
3231 | | // operands. However, using MachineMemOperand is the best solution now for |
3232 | | // two reasons: |
3233 | | // |
3234 | | // 1) getNumMicroOps tries to infer LDM memory width from the total number of MI |
3235 | | // operands. This is much more dangerous than using the MachineMemOperand |
3236 | | // sizes because CodeGen passes can insert/remove optional machine operands. In |
3237 | | // fact, it's totally incorrect for preRA passes and appears to be wrong for |
3238 | | // postRA passes as well. |
3239 | | // |
3240 | | // 2) getNumLDMAddresses is only used by the scheduling machine model and any |
3241 | | // machine model that calls this should handle the unknown (zero size) case. |
3242 | | // |
3243 | | // Long term, we should require a target hook that verifies MachineMemOperand |
3244 | | // sizes during MC lowering. That target hook should be local to MC lowering |
3245 | | // because we can't ensure that it is aware of other MI forms. Doing this will |
3246 | | // ensure that MachineMemOperands are correctly propagated through all passes. |
3247 | 26.3k | unsigned ARMBaseInstrInfo::getNumLDMAddresses(const MachineInstr &MI) const { |
3248 | 26.3k | unsigned Size = 0; |
3249 | 26.3k | for (MachineInstr::mmo_iterator I = MI.memoperands_begin(), |
3250 | 26.3k | E = MI.memoperands_end(); |
3251 | 26.3k | I != E26.3k ; ++I36 ) { |
3252 | 36 | Size += (*I)->getSize(); |
3253 | 36 | } |
3254 | 26.3k | return Size / 4; |
3255 | 26.3k | } |
3256 | | |
3257 | | static unsigned getNumMicroOpsSingleIssuePlusExtras(unsigned Opc, |
3258 | 0 | unsigned NumRegs) { |
3259 | 0 | unsigned UOps = 1 + NumRegs; // 1 for address computation. |
3260 | 0 | switch (Opc) { |
3261 | 0 | default: |
3262 | 0 | break; |
3263 | 0 | case ARM::VLDMDIA_UPD: |
3264 | 0 | case ARM::VLDMDDB_UPD: |
3265 | 0 | case ARM::VLDMSIA_UPD: |
3266 | 0 | case ARM::VLDMSDB_UPD: |
3267 | 0 | case ARM::VSTMDIA_UPD: |
3268 | 0 | case ARM::VSTMDDB_UPD: |
3269 | 0 | case ARM::VSTMSIA_UPD: |
3270 | 0 | case ARM::VSTMSDB_UPD: |
3271 | 0 | case ARM::LDMIA_UPD: |
3272 | 0 | case ARM::LDMDA_UPD: |
3273 | 0 | case ARM::LDMDB_UPD: |
3274 | 0 | case ARM::LDMIB_UPD: |
3275 | 0 | case ARM::STMIA_UPD: |
3276 | 0 | case ARM::STMDA_UPD: |
3277 | 0 | case ARM::STMDB_UPD: |
3278 | 0 | case ARM::STMIB_UPD: |
3279 | 0 | case ARM::tLDMIA_UPD: |
3280 | 0 | case ARM::tSTMIA_UPD: |
3281 | 0 | case ARM::t2LDMIA_UPD: |
3282 | 0 | case ARM::t2LDMDB_UPD: |
3283 | 0 | case ARM::t2STMIA_UPD: |
3284 | 0 | case ARM::t2STMDB_UPD: |
3285 | 0 | ++UOps; // One for base register writeback. |
3286 | 0 | break; |
3287 | 0 | case ARM::LDMIA_RET: |
3288 | 0 | case ARM::tPOP_RET: |
3289 | 0 | case ARM::t2LDMIA_RET: |
3290 | 0 | UOps += 2; // One for base reg wb, one for write to pc. |
3291 | 0 | break; |
3292 | 0 | } |
3293 | 0 | return UOps; |
3294 | 0 | } |
3295 | | |
3296 | | unsigned ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData, |
3297 | 7.99k | const MachineInstr &MI) const { |
3298 | 7.99k | if (!ItinData || 7.99k ItinData->isEmpty()7.99k ) |
3299 | 0 | return 1; |
3300 | 7.99k | |
3301 | 7.99k | const MCInstrDesc &Desc = MI.getDesc(); |
3302 | 7.99k | unsigned Class = Desc.getSchedClass(); |
3303 | 7.99k | int ItinUOps = ItinData->getNumMicroOps(Class); |
3304 | 7.99k | if (ItinUOps >= 07.99k ) { |
3305 | 0 | if (Subtarget.isSwift() && 0 (Desc.mayLoad() || 0 Desc.mayStore()0 )) |
3306 | 0 | return getNumMicroOpsSwiftLdSt(ItinData, MI); |
3307 | 0 |
|
3308 | 0 | return ItinUOps; |
3309 | 0 | } |
3310 | 7.99k | |
3311 | 7.99k | unsigned Opc = MI.getOpcode(); |
3312 | 7.99k | switch (Opc) { |
3313 | 0 | default: |
3314 | 0 | llvm_unreachable("Unexpected multi-uops instruction!"); |
3315 | 0 | case ARM::VLDMQIA: |
3316 | 0 | case ARM::VSTMQIA: |
3317 | 0 | return 2; |
3318 | 0 |
|
3319 | 0 | // The number of uOps for load / store multiple are determined by the number |
3320 | 0 | // registers. |
3321 | 0 | // |
3322 | 0 | // On Cortex-A8, each pair of register loads / stores can be scheduled on the |
3323 | 0 | // same cycle. The scheduling for the first load / store must be done |
3324 | 0 | // separately by assuming the address is not 64-bit aligned. |
3325 | 0 | // |
3326 | 0 | // On Cortex-A9, the formula is simply (#reg / 2) + (#reg % 2). If the address |
3327 | 0 | // is not 64-bit aligned, then AGU would take an extra cycle. For VFP / NEON |
3328 | 0 | // load / store multiple, the formula is (#reg / 2) + (#reg % 2) + 1. |
3329 | 374 | case ARM::VLDMDIA: |
3330 | 374 | case ARM::VLDMDIA_UPD: |
3331 | 374 | case ARM::VLDMDDB_UPD: |
3332 | 374 | case ARM::VLDMSIA: |
3333 | 374 | case ARM::VLDMSIA_UPD: |
3334 | 374 | case ARM::VLDMSDB_UPD: |
3335 | 374 | case ARM::VSTMDIA: |
3336 | 374 | case ARM::VSTMDIA_UPD: |
3337 | 374 | case ARM::VSTMDDB_UPD: |
3338 | 374 | case ARM::VSTMSIA: |
3339 | 374 | case ARM::VSTMSIA_UPD: |
3340 | 374 | case ARM::VSTMSDB_UPD: { |
3341 | 374 | unsigned NumRegs = MI.getNumOperands() - Desc.getNumOperands(); |
3342 | 374 | return (NumRegs / 2) + (NumRegs % 2) + 1; |
3343 | 374 | } |
3344 | 374 | |
3345 | 7.62k | case ARM::LDMIA_RET: |
3346 | 7.62k | case ARM::LDMIA: |
3347 | 7.62k | case ARM::LDMDA: |
3348 | 7.62k | case ARM::LDMDB: |
3349 | 7.62k | case ARM::LDMIB: |
3350 | 7.62k | case ARM::LDMIA_UPD: |
3351 | 7.62k | case ARM::LDMDA_UPD: |
3352 | 7.62k | case ARM::LDMDB_UPD: |
3353 | 7.62k | case ARM::LDMIB_UPD: |
3354 | 7.62k | case ARM::STMIA: |
3355 | 7.62k | case ARM::STMDA: |
3356 | 7.62k | case ARM::STMDB: |
3357 | 7.62k | case ARM::STMIB: |
3358 | 7.62k | case ARM::STMIA_UPD: |
3359 | 7.62k | case ARM::STMDA_UPD: |
3360 | 7.62k | case ARM::STMDB_UPD: |
3361 | 7.62k | case ARM::STMIB_UPD: |
3362 | 7.62k | case ARM::tLDMIA: |
3363 | 7.62k | case ARM::tLDMIA_UPD: |
3364 | 7.62k | case ARM::tSTMIA_UPD: |
3365 | 7.62k | case ARM::tPOP_RET: |
3366 | 7.62k | case ARM::tPOP: |
3367 | 7.62k | case ARM::tPUSH: |
3368 | 7.62k | case ARM::t2LDMIA_RET: |
3369 | 7.62k | case ARM::t2LDMIA: |
3370 | 7.62k | case ARM::t2LDMDB: |
3371 | 7.62k | case ARM::t2LDMIA_UPD: |
3372 | 7.62k | case ARM::t2LDMDB_UPD: |
3373 | 7.62k | case ARM::t2STMIA: |
3374 | 7.62k | case ARM::t2STMDB: |
3375 | 7.62k | case ARM::t2STMIA_UPD: |
3376 | 7.62k | case ARM::t2STMDB_UPD: { |
3377 | 7.62k | unsigned NumRegs = MI.getNumOperands() - Desc.getNumOperands() + 1; |
3378 | 7.62k | switch (Subtarget.getLdStMultipleTiming()) { |
3379 | 0 | case ARMSubtarget::SingleIssuePlusExtras: |
3380 | 0 | return getNumMicroOpsSingleIssuePlusExtras(Opc, NumRegs); |
3381 | 5.47k | case ARMSubtarget::SingleIssue: |
3382 | 5.47k | // Assume the worst. |
3383 | 5.47k | return NumRegs; |
3384 | 2.08k | case ARMSubtarget::DoubleIssue: { |
3385 | 2.08k | if (NumRegs < 4) |
3386 | 734 | return 2; |
3387 | 1.35k | // 4 registers would be issued: 2, 2. |
3388 | 1.35k | // 5 registers would be issued: 2, 2, 1. |
3389 | 1.35k | unsigned UOps = (NumRegs / 2); |
3390 | 1.35k | if (NumRegs % 2) |
3391 | 802 | ++UOps; |
3392 | 1.35k | return UOps; |
3393 | 1.35k | } |
3394 | 63 | case ARMSubtarget::DoubleIssueCheckUnalignedAccess: { |
3395 | 63 | unsigned UOps = (NumRegs / 2); |
3396 | 63 | // If there are odd number of registers or if it's not 64-bit aligned, |
3397 | 63 | // then it takes an extra AGU (Address Generation Unit) cycle. |
3398 | 63 | if ((NumRegs % 2) || 63 !MI.hasOneMemOperand()49 || |
3399 | 0 | (*MI.memoperands_begin())->getAlignment() < 8) |
3400 | 63 | ++UOps; |
3401 | 63 | return UOps; |
3402 | 0 | } |
3403 | 0 | } |
3404 | 0 | } |
3405 | 0 | } |
3406 | 0 | llvm_unreachable0 ("Didn't find the number of microops"); |
3407 | 0 | } |
3408 | | |
3409 | | int |
3410 | | ARMBaseInstrInfo::getVLDMDefCycle(const InstrItineraryData *ItinData, |
3411 | | const MCInstrDesc &DefMCID, |
3412 | | unsigned DefClass, |
3413 | 221 | unsigned DefIdx, unsigned DefAlign) const { |
3414 | 221 | int RegNo = (int)(DefIdx+1) - DefMCID.getNumOperands() + 1; |
3415 | 221 | if (RegNo <= 0) |
3416 | 221 | // Def is the address writeback. |
3417 | 0 | return ItinData->getOperandCycle(DefClass, DefIdx); |
3418 | 221 | |
3419 | 221 | int DefCycle; |
3420 | 221 | if (Subtarget.isCortexA8() || 221 Subtarget.isCortexA7()202 ) { |
3421 | 19 | // (regno / 2) + (regno % 2) + 1 |
3422 | 19 | DefCycle = RegNo / 2 + 1; |
3423 | 19 | if (RegNo % 2) |
3424 | 8 | ++DefCycle; |
3425 | 221 | } else if (202 Subtarget.isLikeA9() || 202 Subtarget.isSwift()193 ) { |
3426 | 9 | DefCycle = RegNo; |
3427 | 9 | bool isSLoad = false; |
3428 | 9 | |
3429 | 9 | switch (DefMCID.getOpcode()) { |
3430 | 9 | default: break; |
3431 | 0 | case ARM::VLDMSIA: |
3432 | 0 | case ARM::VLDMSIA_UPD: |
3433 | 0 | case ARM::VLDMSDB_UPD: |
3434 | 0 | isSLoad = true; |
3435 | 0 | break; |
3436 | 9 | } |
3437 | 9 | |
3438 | 9 | // If there are odd number of 'S' registers or if it's not 64-bit aligned, |
3439 | 9 | // then it takes an extra cycle. |
3440 | 9 | if (9 (isSLoad && 9 (RegNo % 2)0 ) || DefAlign < 89 ) |
3441 | 0 | ++DefCycle; |
3442 | 202 | } else { |
3443 | 193 | // Assume the worst. |
3444 | 193 | DefCycle = RegNo + 2; |
3445 | 193 | } |
3446 | 221 | |
3447 | 221 | return DefCycle; |
3448 | 221 | } |
3449 | | |
3450 | 0 | bool ARMBaseInstrInfo::isLDMBaseRegInList(const MachineInstr &MI) const { |
3451 | 0 | unsigned BaseReg = MI.getOperand(0).getReg(); |
3452 | 0 | for (unsigned i = 1, sz = MI.getNumOperands(); i < sz0 ; ++i0 ) { |
3453 | 0 | const auto &Op = MI.getOperand(i); |
3454 | 0 | if (Op.isReg() && 0 Op.getReg() == BaseReg0 ) |
3455 | 0 | return true; |
3456 | 0 | } |
3457 | 0 | return false; |
3458 | 0 | } |
3459 | | unsigned |
3460 | 7 | ARMBaseInstrInfo::getLDMVariableDefsSize(const MachineInstr &MI) const { |
3461 | 7 | // ins GPR:$Rn, pred:$p (2xOp), reglist:$regs, variable_ops |
3462 | 7 | // (outs GPR:$wb), (ins GPR:$Rn, pred:$p (2xOp), reglist:$regs, variable_ops) |
3463 | 7 | return MI.getNumOperands() + 1 - MI.getDesc().getNumOperands(); |
3464 | 7 | } |
3465 | | |
3466 | | int |
3467 | | ARMBaseInstrInfo::getLDMDefCycle(const InstrItineraryData *ItinData, |
3468 | | const MCInstrDesc &DefMCID, |
3469 | | unsigned DefClass, |
3470 | 412 | unsigned DefIdx, unsigned DefAlign) const { |
3471 | 412 | int RegNo = (int)(DefIdx+1) - DefMCID.getNumOperands() + 1; |
3472 | 412 | if (RegNo <= 0) |
3473 | 412 | // Def is the address writeback. |
3474 | 0 | return ItinData->getOperandCycle(DefClass, DefIdx); |
3475 | 412 | |
3476 | 412 | int DefCycle; |
3477 | 412 | if (Subtarget.isCortexA8() || 412 Subtarget.isCortexA7()406 ) { |
3478 | 22 | // 4 registers would be issued: 1, 2, 1. |
3479 | 22 | // 5 registers would be issued: 1, 2, 2. |
3480 | 22 | DefCycle = RegNo / 2; |
3481 | 22 | if (DefCycle < 1) |
3482 | 6 | DefCycle = 1; |
3483 | 22 | // Result latency is issue cycle + 2: E2. |
3484 | 22 | DefCycle += 2; |
3485 | 412 | } else if (390 Subtarget.isLikeA9() || 390 Subtarget.isSwift()384 ) { |
3486 | 6 | DefCycle = (RegNo / 2); |
3487 | 6 | // If there are odd number of registers or if it's not 64-bit aligned, |
3488 | 6 | // then it takes an extra AGU (Address Generation Unit) cycle. |
3489 | 6 | if ((RegNo % 2) || 6 DefAlign < 83 ) |
3490 | 6 | ++DefCycle; |
3491 | 6 | // Result latency is AGU cycles + 2. |
3492 | 6 | DefCycle += 2; |
3493 | 390 | } else { |
3494 | 384 | // Assume the worst. |
3495 | 384 | DefCycle = RegNo + 2; |
3496 | 384 | } |
3497 | 412 | |
3498 | 412 | return DefCycle; |
3499 | 412 | } |
3500 | | |
3501 | | int |
3502 | | ARMBaseInstrInfo::getVSTMUseCycle(const InstrItineraryData *ItinData, |
3503 | | const MCInstrDesc &UseMCID, |
3504 | | unsigned UseClass, |
3505 | 19 | unsigned UseIdx, unsigned UseAlign) const { |
3506 | 19 | int RegNo = (int)(UseIdx+1) - UseMCID.getNumOperands() + 1; |
3507 | 19 | if (RegNo <= 0) |
3508 | 0 | return ItinData->getOperandCycle(UseClass, UseIdx); |
3509 | 19 | |
3510 | 19 | int UseCycle; |
3511 | 19 | if (Subtarget.isCortexA8() || 19 Subtarget.isCortexA7()6 ) { |
3512 | 13 | // (regno / 2) + (regno % 2) + 1 |
3513 | 13 | UseCycle = RegNo / 2 + 1; |
3514 | 13 | if (RegNo % 2) |
3515 | 5 | ++UseCycle; |
3516 | 19 | } else if (6 Subtarget.isLikeA9() || 6 Subtarget.isSwift()3 ) { |
3517 | 3 | UseCycle = RegNo; |
3518 | 3 | bool isSStore = false; |
3519 | 3 | |
3520 | 3 | switch (UseMCID.getOpcode()) { |
3521 | 3 | default: break; |
3522 | 0 | case ARM::VSTMSIA: |
3523 | 0 | case ARM::VSTMSIA_UPD: |
3524 | 0 | case ARM::VSTMSDB_UPD: |
3525 | 0 | isSStore = true; |
3526 | 0 | break; |
3527 | 3 | } |
3528 | 3 | |
3529 | 3 | // If there are odd number of 'S' registers or if it's not 64-bit aligned, |
3530 | 3 | // then it takes an extra cycle. |
3531 | 3 | if (3 (isSStore && 3 (RegNo % 2)0 ) || UseAlign < 83 ) |
3532 | 0 | ++UseCycle; |
3533 | 6 | } else { |
3534 | 3 | // Assume the worst. |
3535 | 3 | UseCycle = RegNo + 2; |
3536 | 3 | } |
3537 | 19 | |
3538 | 19 | return UseCycle; |
3539 | 19 | } |
3540 | | |
3541 | | int |
3542 | | ARMBaseInstrInfo::getSTMUseCycle(const InstrItineraryData *ItinData, |
3543 | | const MCInstrDesc &UseMCID, |
3544 | | unsigned UseClass, |
3545 | 474 | unsigned UseIdx, unsigned UseAlign) const { |
3546 | 474 | int RegNo = (int)(UseIdx+1) - UseMCID.getNumOperands() + 1; |
3547 | 474 | if (RegNo <= 0) |
3548 | 0 | return ItinData->getOperandCycle(UseClass, UseIdx); |
3549 | 474 | |
3550 | 474 | int UseCycle; |
3551 | 474 | if (Subtarget.isCortexA8() || 474 Subtarget.isCortexA7()461 ) { |
3552 | 164 | UseCycle = RegNo / 2; |
3553 | 164 | if (UseCycle < 2) |
3554 | 159 | UseCycle = 2; |
3555 | 164 | // Read in E3. |
3556 | 164 | UseCycle += 2; |
3557 | 474 | } else if (310 Subtarget.isLikeA9() || 310 Subtarget.isSwift()310 ) { |
3558 | 0 | UseCycle = (RegNo / 2); |
3559 | 0 | // If there are odd number of registers or if it's not 64-bit aligned, |
3560 | 0 | // then it takes an extra AGU (Address Generation Unit) cycle. |
3561 | 0 | if ((RegNo % 2) || 0 UseAlign < 80 ) |
3562 | 0 | ++UseCycle; |
3563 | 310 | } else { |
3564 | 310 | // Assume the worst. |
3565 | 310 | UseCycle = 1; |
3566 | 310 | } |
3567 | 474 | return UseCycle; |
3568 | 474 | } |
3569 | | |
3570 | | int |
3571 | | ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, |
3572 | | const MCInstrDesc &DefMCID, |
3573 | | unsigned DefIdx, unsigned DefAlign, |
3574 | | const MCInstrDesc &UseMCID, |
3575 | 157k | unsigned UseIdx, unsigned UseAlign) const { |
3576 | 157k | unsigned DefClass = DefMCID.getSchedClass(); |
3577 | 157k | unsigned UseClass = UseMCID.getSchedClass(); |
3578 | 157k | |
3579 | 157k | if (DefIdx < DefMCID.getNumDefs() && 157k UseIdx < UseMCID.getNumOperands()156k ) |
3580 | 154k | return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx); |
3581 | 2.41k | |
3582 | 2.41k | // This may be a def / use of a variable_ops instruction, the operand |
3583 | 2.41k | // latency might be determinable dynamically. Let the target try to |
3584 | 2.41k | // figure it out. |
3585 | 2.41k | int DefCycle = -1; |
3586 | 2.41k | bool LdmBypass = false; |
3587 | 2.41k | switch (DefMCID.getOpcode()) { |
3588 | 1.77k | default: |
3589 | 1.77k | DefCycle = ItinData->getOperandCycle(DefClass, DefIdx); |
3590 | 1.77k | break; |
3591 | 2.41k | |
3592 | 221 | case ARM::VLDMDIA: |
3593 | 221 | case ARM::VLDMDIA_UPD: |
3594 | 221 | case ARM::VLDMDDB_UPD: |
3595 | 221 | case ARM::VLDMSIA: |
3596 | 221 | case ARM::VLDMSIA_UPD: |
3597 | 221 | case ARM::VLDMSDB_UPD: |
3598 | 221 | DefCycle = getVLDMDefCycle(ItinData, DefMCID, DefClass, DefIdx, DefAlign); |
3599 | 221 | break; |
3600 | 221 | |
3601 | 412 | case ARM::LDMIA_RET: |
3602 | 412 | case ARM::LDMIA: |
3603 | 412 | case ARM::LDMDA: |
3604 | 412 | case ARM::LDMDB: |
3605 | 412 | case ARM::LDMIB: |
3606 | 412 | case ARM::LDMIA_UPD: |
3607 | 412 | case ARM::LDMDA_UPD: |
3608 | 412 | case ARM::LDMDB_UPD: |
3609 | 412 | case ARM::LDMIB_UPD: |
3610 | 412 | case ARM::tLDMIA: |
3611 | 412 | case ARM::tLDMIA_UPD: |
3612 | 412 | case ARM::tPUSH: |
3613 | 412 | case ARM::t2LDMIA_RET: |
3614 | 412 | case ARM::t2LDMIA: |
3615 | 412 | case ARM::t2LDMDB: |
3616 | 412 | case ARM::t2LDMIA_UPD: |
3617 | 412 | case ARM::t2LDMDB_UPD: |
3618 | 412 | LdmBypass = true; |
3619 | 412 | DefCycle = getLDMDefCycle(ItinData, DefMCID, DefClass, DefIdx, DefAlign); |
3620 | 412 | break; |
3621 | 2.41k | } |
3622 | 2.41k | |
3623 | 2.41k | if (2.41k DefCycle == -12.41k ) |
3624 | 2.41k | // We can't seem to determine the result latency of the def, assume it's 2. |
3625 | 198 | DefCycle = 2; |
3626 | 2.41k | |
3627 | 2.41k | int UseCycle = -1; |
3628 | 2.41k | switch (UseMCID.getOpcode()) { |
3629 | 1.91k | default: |
3630 | 1.91k | UseCycle = ItinData->getOperandCycle(UseClass, UseIdx); |
3631 | 1.91k | break; |
3632 | 2.41k | |
3633 | 19 | case ARM::VSTMDIA: |
3634 | 19 | case ARM::VSTMDIA_UPD: |
3635 | 19 | case ARM::VSTMDDB_UPD: |
3636 | 19 | case ARM::VSTMSIA: |
3637 | 19 | case ARM::VSTMSIA_UPD: |
3638 | 19 | case ARM::VSTMSDB_UPD: |
3639 | 19 | UseCycle = getVSTMUseCycle(ItinData, UseMCID, UseClass, UseIdx, UseAlign); |
3640 | 19 | break; |
3641 | 19 | |
3642 | 474 | case ARM::STMIA: |
3643 | 474 | case ARM::STMDA: |
3644 | 474 | case ARM::STMDB: |
3645 | 474 | case ARM::STMIB: |
3646 | 474 | case ARM::STMIA_UPD: |
3647 | 474 | case ARM::STMDA_UPD: |
3648 | 474 | case ARM::STMDB_UPD: |
3649 | 474 | case ARM::STMIB_UPD: |
3650 | 474 | case ARM::tSTMIA_UPD: |
3651 | 474 | case ARM::tPOP_RET: |
3652 | 474 | case ARM::tPOP: |
3653 | 474 | case ARM::t2STMIA: |
3654 | 474 | case ARM::t2STMDB: |
3655 | 474 | case ARM::t2STMIA_UPD: |
3656 | 474 | case ARM::t2STMDB_UPD: |
3657 | 474 | UseCycle = getSTMUseCycle(ItinData, UseMCID, UseClass, UseIdx, UseAlign); |
3658 | 474 | break; |
3659 | 2.41k | } |
3660 | 2.41k | |
3661 | 2.41k | if (2.41k UseCycle == -12.41k ) |
3662 | 2.41k | // Assume it's read in the first stage. |
3663 | 1.41k | UseCycle = 1; |
3664 | 2.41k | |
3665 | 2.41k | UseCycle = DefCycle - UseCycle + 1; |
3666 | 2.41k | if (UseCycle > 02.41k ) { |
3667 | 2.16k | if (LdmBypass2.16k ) { |
3668 | 410 | // It's a variable_ops instruction so we can't use DefIdx here. Just use |
3669 | 410 | // first def operand. |
3670 | 410 | if (ItinData->hasPipelineForwarding(DefClass, DefMCID.getNumOperands()-1, |
3671 | 410 | UseClass, UseIdx)) |
3672 | 0 | --UseCycle; |
3673 | 2.16k | } else if (1.75k ItinData->hasPipelineForwarding(DefClass, DefIdx, |
3674 | 1.75k | UseClass, UseIdx)) { |
3675 | 0 | --UseCycle; |
3676 | 0 | } |
3677 | 2.16k | } |
3678 | 157k | |
3679 | 157k | return UseCycle; |
3680 | 157k | } |
3681 | | |
3682 | | static const MachineInstr *getBundledDefMI(const TargetRegisterInfo *TRI, |
3683 | | const MachineInstr *MI, unsigned Reg, |
3684 | 1.99k | unsigned &DefIdx, unsigned &Dist) { |
3685 | 1.99k | Dist = 0; |
3686 | 1.99k | |
3687 | 1.99k | MachineBasicBlock::const_iterator I = MI; ++I; |
3688 | 1.99k | MachineBasicBlock::const_instr_iterator II = std::prev(I.getInstrIterator()); |
3689 | 1.99k | assert(II->isInsideBundle() && "Empty bundle?"); |
3690 | 1.99k | |
3691 | 1.99k | int Idx = -1; |
3692 | 2.07k | while (II->isInsideBundle()2.07k ) { |
3693 | 2.07k | Idx = II->findRegisterDefOperandIdx(Reg, false, true, TRI); |
3694 | 2.07k | if (Idx != -1) |
3695 | 1.99k | break; |
3696 | 85 | --II; |
3697 | 85 | ++Dist; |
3698 | 85 | } |
3699 | 1.99k | |
3700 | 1.99k | assert(Idx != -1 && "Cannot find bundled definition!"); |
3701 | 1.99k | DefIdx = Idx; |
3702 | 1.99k | return &*II; |
3703 | 1.99k | } |
3704 | | |
3705 | | static const MachineInstr *getBundledUseMI(const TargetRegisterInfo *TRI, |
3706 | | const MachineInstr &MI, unsigned Reg, |
3707 | 6.59k | unsigned &UseIdx, unsigned &Dist) { |
3708 | 6.59k | Dist = 0; |
3709 | 6.59k | |
3710 | 6.59k | MachineBasicBlock::const_instr_iterator II = ++MI.getIterator(); |
3711 | 6.59k | assert(II->isInsideBundle() && "Empty bundle?"); |
3712 | 6.59k | MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end(); |
3713 | 6.59k | |
3714 | 6.59k | // FIXME: This doesn't properly handle multiple uses. |
3715 | 6.59k | int Idx = -1; |
3716 | 13.5k | while (II != E && 13.5k II->isInsideBundle()13.5k ) { |
3717 | 13.3k | Idx = II->findRegisterUseOperandIdx(Reg, false, TRI); |
3718 | 13.3k | if (Idx != -1) |
3719 | 6.47k | break; |
3720 | 6.92k | if (6.92k II->getOpcode() != ARM::t2IT6.92k ) |
3721 | 330 | ++Dist; |
3722 | 13.3k | ++II; |
3723 | 13.3k | } |
3724 | 6.59k | |
3725 | 6.59k | if (Idx == -16.59k ) { |
3726 | 112 | Dist = 0; |
3727 | 112 | return nullptr; |
3728 | 112 | } |
3729 | 6.47k | |
3730 | 6.47k | UseIdx = Idx; |
3731 | 6.47k | return &*II; |
3732 | 6.47k | } |
3733 | | |
3734 | | /// Return the number of cycles to add to (or subtract from) the static |
3735 | | /// itinerary based on the def opcode and alignment. The caller will ensure that |
3736 | | /// adjusted latency is at least one cycle. |
3737 | | static int adjustDefLatency(const ARMSubtarget &Subtarget, |
3738 | | const MachineInstr &DefMI, |
3739 | 407k | const MCInstrDesc &DefMCID, unsigned DefAlign) { |
3740 | 407k | int Adjust = 0; |
3741 | 407k | if (Subtarget.isCortexA8() || 407k Subtarget.isLikeA9()390k || Subtarget.isCortexA7()385k ) { |
3742 | 137k | // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2] |
3743 | 137k | // variants are one cycle cheaper. |
3744 | 137k | switch (DefMCID.getOpcode()) { |
3745 | 136k | default: break; |
3746 | 58 | case ARM::LDRrs: |
3747 | 58 | case ARM::LDRBrs: { |
3748 | 58 | unsigned ShOpVal = DefMI.getOperand(3).getImm(); |
3749 | 58 | unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); |
3750 | 58 | if (ShImm == 0 || |
3751 | 43 | (ShImm == 2 && 43 ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl27 )) |
3752 | 42 | --Adjust; |
3753 | 58 | break; |
3754 | 58 | } |
3755 | 1.16k | case ARM::t2LDRs: |
3756 | 1.16k | case ARM::t2LDRBs: |
3757 | 1.16k | case ARM::t2LDRHs: |
3758 | 1.16k | case ARM::t2LDRSHs: { |
3759 | 1.16k | // Thumb2 mode: lsl only. |
3760 | 1.16k | unsigned ShAmt = DefMI.getOperand(3).getImm(); |
3761 | 1.16k | if (ShAmt == 0 || 1.16k ShAmt == 2533 ) |
3762 | 1.09k | --Adjust; |
3763 | 1.16k | break; |
3764 | 407k | } |
3765 | 137k | } |
3766 | 269k | } else if (269k Subtarget.isSwift()269k ) { |
3767 | 46 | // FIXME: Properly handle all of the latency adjustments for address |
3768 | 46 | // writeback. |
3769 | 46 | switch (DefMCID.getOpcode()) { |
3770 | 46 | default: break; |
3771 | 0 | case ARM::LDRrs: |
3772 | 0 | case ARM::LDRBrs: { |
3773 | 0 | unsigned ShOpVal = DefMI.getOperand(3).getImm(); |
3774 | 0 | bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub; |
3775 | 0 | unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); |
3776 | 0 | if (!isSub && |
3777 | 0 | (ShImm == 0 || |
3778 | 0 | ((ShImm == 1 || 0 ShImm == 20 || ShImm == 30 ) && |
3779 | 0 | ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))) |
3780 | 0 | Adjust -= 2; |
3781 | 0 | else if (0 !isSub && |
3782 | 0 | ShImm == 10 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsr0 ) |
3783 | 0 | --Adjust; |
3784 | 0 | break; |
3785 | 0 | } |
3786 | 0 | case ARM::t2LDRs: |
3787 | 0 | case ARM::t2LDRBs: |
3788 | 0 | case ARM::t2LDRHs: |
3789 | 0 | case ARM::t2LDRSHs: { |
3790 | 0 | // Thumb2 mode: lsl only. |
3791 | 0 | unsigned ShAmt = DefMI.getOperand(3).getImm(); |
3792 | 0 | if (ShAmt == 0 || 0 ShAmt == 10 || ShAmt == 20 || ShAmt == 30 ) |
3793 | 0 | Adjust -= 2; |
3794 | 0 | break; |
3795 | 407k | } |
3796 | 269k | } |
3797 | 269k | } |
3798 | 407k | |
3799 | 407k | if (407k DefAlign < 8 && 407k Subtarget.checkVLDnAccessAlignment()386k ) { |
3800 | 4.32k | switch (DefMCID.getOpcode()) { |
3801 | 4.09k | default: break; |
3802 | 235 | case ARM::VLD1q8: |
3803 | 235 | case ARM::VLD1q16: |
3804 | 235 | case ARM::VLD1q32: |
3805 | 235 | case ARM::VLD1q64: |
3806 | 235 | case ARM::VLD1q8wb_fixed: |
3807 | 235 | case ARM::VLD1q16wb_fixed: |
3808 | 235 | case ARM::VLD1q32wb_fixed: |
3809 | 235 | case ARM::VLD1q64wb_fixed: |
3810 | 235 | case ARM::VLD1q8wb_register: |
3811 | 235 | case ARM::VLD1q16wb_register: |
3812 | 235 | case ARM::VLD1q32wb_register: |
3813 | 235 | case ARM::VLD1q64wb_register: |
3814 | 235 | case ARM::VLD2d8: |
3815 | 235 | case ARM::VLD2d16: |
3816 | 235 | case ARM::VLD2d32: |
3817 | 235 | case ARM::VLD2q8: |
3818 | 235 | case ARM::VLD2q16: |
3819 | 235 | case ARM::VLD2q32: |
3820 | 235 | case ARM::VLD2d8wb_fixed: |
3821 | 235 | case ARM::VLD2d16wb_fixed: |
3822 | 235 | case ARM::VLD2d32wb_fixed: |
3823 | 235 | case ARM::VLD2q8wb_fixed: |
3824 | 235 | case ARM::VLD2q16wb_fixed: |
3825 | 235 | case ARM::VLD2q32wb_fixed: |
3826 | 235 | case ARM::VLD2d8wb_register: |
3827 | 235 | case ARM::VLD2d16wb_register: |
3828 | 235 | case ARM::VLD2d32wb_register: |
3829 | 235 | case ARM::VLD2q8wb_register: |
3830 | 235 | case ARM::VLD2q16wb_register: |
3831 | 235 | case ARM::VLD2q32wb_register: |
3832 | 235 | case ARM::VLD3d8: |
3833 | 235 | case ARM::VLD3d16: |
3834 | 235 | case ARM::VLD3d32: |
3835 | 235 | case ARM::VLD1d64T: |
3836 | 235 | case ARM::VLD3d8_UPD: |
3837 | 235 | case ARM::VLD3d16_UPD: |
3838 | 235 | case ARM::VLD3d32_UPD: |
3839 | 235 | case ARM::VLD1d64Twb_fixed: |
3840 | 235 | case ARM::VLD1d64Twb_register: |
3841 | 235 | case ARM::VLD3q8_UPD: |
3842 | 235 | case ARM::VLD3q16_UPD: |
3843 | 235 | case ARM::VLD3q32_UPD: |
3844 | 235 | case ARM::VLD4d8: |
3845 | 235 | case ARM::VLD4d16: |
3846 | 235 | case ARM::VLD4d32: |
3847 | 235 | case ARM::VLD1d64Q: |
3848 | 235 | case ARM::VLD4d8_UPD: |
3849 | 235 | case ARM::VLD4d16_UPD: |
3850 | 235 | case ARM::VLD4d32_UPD: |
3851 | 235 | case ARM::VLD1d64Qwb_fixed: |
3852 | 235 | case ARM::VLD1d64Qwb_register: |
3853 | 235 | case ARM::VLD4q8_UPD: |
3854 | 235 | case ARM::VLD4q16_UPD: |
3855 | 235 | case ARM::VLD4q32_UPD: |
3856 | 235 | case ARM::VLD1DUPq8: |
3857 | 235 | case ARM::VLD1DUPq16: |
3858 | 235 | case ARM::VLD1DUPq32: |
3859 | 235 | case ARM::VLD1DUPq8wb_fixed: |
3860 | 235 | case ARM::VLD1DUPq16wb_fixed: |
3861 | 235 | case ARM::VLD1DUPq32wb_fixed: |
3862 | 235 | case ARM::VLD1DUPq8wb_register: |
3863 | 235 | case ARM::VLD1DUPq16wb_register: |
3864 | 235 | case ARM::VLD1DUPq32wb_register: |
3865 | 235 | case ARM::VLD2DUPd8: |
3866 | 235 | case ARM::VLD2DUPd16: |
3867 | 235 | case ARM::VLD2DUPd32: |
3868 | 235 | case ARM::VLD2DUPd8wb_fixed: |
3869 | 235 | case ARM::VLD2DUPd16wb_fixed: |
3870 | 235 | case ARM::VLD2DUPd32wb_fixed: |
3871 | 235 | case ARM::VLD2DUPd8wb_register: |
3872 | 235 | case ARM::VLD2DUPd16wb_register: |
3873 | 235 | case ARM::VLD2DUPd32wb_register: |
3874 | 235 | case ARM::VLD4DUPd8: |
3875 | 235 | case ARM::VLD4DUPd16: |
3876 | 235 | case ARM::VLD4DUPd32: |
3877 | 235 | case ARM::VLD4DUPd8_UPD: |
3878 | 235 | case ARM::VLD4DUPd16_UPD: |
3879 | 235 | case ARM::VLD4DUPd32_UPD: |
3880 | 235 | case ARM::VLD1LNd8: |
3881 | 235 | case ARM::VLD1LNd16: |
3882 | 235 | case ARM::VLD1LNd32: |
3883 | 235 | case ARM::VLD1LNd8_UPD: |
3884 | 235 | case ARM::VLD1LNd16_UPD: |
3885 | 235 | case ARM::VLD1LNd32_UPD: |
3886 | 235 | case ARM::VLD2LNd8: |
3887 | 235 | case ARM::VLD2LNd16: |
3888 | 235 | case ARM::VLD2LNd32: |
3889 | 235 | case ARM::VLD2LNq16: |
3890 | 235 | case ARM::VLD2LNq32: |
3891 | 235 | case ARM::VLD2LNd8_UPD: |
3892 | 235 | case ARM::VLD2LNd16_UPD: |
3893 | 235 | case ARM::VLD2LNd32_UPD: |
3894 | 235 | case ARM::VLD2LNq16_UPD: |
3895 | 235 | case ARM::VLD2LNq32_UPD: |
3896 | 235 | case ARM::VLD4LNd8: |
3897 | 235 | case ARM::VLD4LNd16: |
3898 | 235 | case ARM::VLD4LNd32: |
3899 | 235 | case ARM::VLD4LNq16: |
3900 | 235 | case ARM::VLD4LNq32: |
3901 | 235 | case ARM::VLD4LNd8_UPD: |
3902 | 235 | case ARM::VLD4LNd16_UPD: |
3903 | 235 | case ARM::VLD4LNd32_UPD: |
3904 | 235 | case ARM::VLD4LNq16_UPD: |
3905 | 235 | case ARM::VLD4LNq32_UPD: |
3906 | 235 | // If the address is not 64-bit aligned, the latencies of these |
3907 | 235 | // instructions increases by one. |
3908 | 235 | ++Adjust; |
3909 | 235 | break; |
3910 | 407k | } |
3911 | 407k | } |
3912 | 407k | return Adjust; |
3913 | 407k | } |
3914 | | |
3915 | | int ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, |
3916 | | const MachineInstr &DefMI, |
3917 | | unsigned DefIdx, |
3918 | | const MachineInstr &UseMI, |
3919 | 116k | unsigned UseIdx) const { |
3920 | 116k | // No operand latency. The caller may fall back to getInstrLatency. |
3921 | 116k | if (!ItinData || 116k ItinData->isEmpty()116k ) |
3922 | 0 | return -1; |
3923 | 116k | |
3924 | 116k | const MachineOperand &DefMO = DefMI.getOperand(DefIdx); |
3925 | 116k | unsigned Reg = DefMO.getReg(); |
3926 | 116k | |
3927 | 116k | const MachineInstr *ResolvedDefMI = &DefMI; |
3928 | 116k | unsigned DefAdj = 0; |
3929 | 116k | if (DefMI.isBundle()) |
3930 | 1.99k | ResolvedDefMI = |
3931 | 1.99k | getBundledDefMI(&getRegisterInfo(), &DefMI, Reg, DefIdx, DefAdj); |
3932 | 116k | if (ResolvedDefMI->isCopyLike() || 116k ResolvedDefMI->isInsertSubreg()116k || |
3933 | 116k | ResolvedDefMI->isRegSequence()116k || ResolvedDefMI->isImplicitDef()116k ) { |
3934 | 60 | return 1; |
3935 | 60 | } |
3936 | 116k | |
3937 | 116k | const MachineInstr *ResolvedUseMI = &UseMI; |
3938 | 116k | unsigned UseAdj = 0; |
3939 | 116k | if (UseMI.isBundle()116k ) { |
3940 | 6.59k | ResolvedUseMI = |
3941 | 6.59k | getBundledUseMI(&getRegisterInfo(), UseMI, Reg, UseIdx, UseAdj); |
3942 | 6.59k | if (!ResolvedUseMI) |
3943 | 112 | return -1; |
3944 | 116k | } |
3945 | 116k | |
3946 | 116k | return getOperandLatencyImpl( |
3947 | 116k | ItinData, *ResolvedDefMI, DefIdx, ResolvedDefMI->getDesc(), DefAdj, DefMO, |
3948 | 116k | Reg, *ResolvedUseMI, UseIdx, ResolvedUseMI->getDesc(), UseAdj); |
3949 | 116k | } |
3950 | | |
3951 | | int ARMBaseInstrInfo::getOperandLatencyImpl( |
3952 | | const InstrItineraryData *ItinData, const MachineInstr &DefMI, |
3953 | | unsigned DefIdx, const MCInstrDesc &DefMCID, unsigned DefAdj, |
3954 | | const MachineOperand &DefMO, unsigned Reg, const MachineInstr &UseMI, |
3955 | 116k | unsigned UseIdx, const MCInstrDesc &UseMCID, unsigned UseAdj) const { |
3956 | 116k | if (Reg == ARM::CPSR116k ) { |
3957 | 5.46k | if (DefMI.getOpcode() == ARM::FMSTAT5.46k ) { |
3958 | 580 | // fpscr -> cpsr stalls over 20 cycles on A8 (and earlier?) |
3959 | 580 | return Subtarget.isLikeA9() ? 15 : 20575 ; |
3960 | 580 | } |
3961 | 4.88k | |
3962 | 4.88k | // CPSR set and branch can be paired in the same cycle. |
3963 | 4.88k | if (4.88k UseMI.isBranch()4.88k ) |
3964 | 0 | return 0; |
3965 | 4.88k | |
3966 | 4.88k | // Otherwise it takes the instruction latency (generally one). |
3967 | 4.88k | unsigned Latency = getInstrLatency(ItinData, DefMI); |
3968 | 4.88k | |
3969 | 4.88k | // For Thumb2 and -Os, prefer scheduling CPSR setting instruction close to |
3970 | 4.88k | // its uses. Instructions which are otherwise scheduled between them may |
3971 | 4.88k | // incur a code size penalty (not able to use the CPSR setting 16-bit |
3972 | 4.88k | // instructions). |
3973 | 4.88k | if (Latency > 0 && 4.88k Subtarget.isThumb2()4.87k ) { |
3974 | 4.04k | const MachineFunction *MF = DefMI.getParent()->getParent(); |
3975 | 4.04k | // FIXME: Use Function::optForSize(). |
3976 | 4.04k | if (MF->getFunction()->hasFnAttribute(Attribute::OptimizeForSize)) |
3977 | 947 | --Latency; |
3978 | 4.04k | } |
3979 | 5.46k | return Latency; |
3980 | 5.46k | } |
3981 | 110k | |
3982 | 110k | if (110k DefMO.isImplicit() || 110k UseMI.getOperand(UseIdx).isImplicit()98.6k ) |
3983 | 18.6k | return -1; |
3984 | 91.9k | |
3985 | 91.9k | unsigned DefAlign = DefMI.hasOneMemOperand() |
3986 | 20.2k | ? (*DefMI.memoperands_begin())->getAlignment() |
3987 | 71.6k | : 0; |
3988 | 91.9k | unsigned UseAlign = UseMI.hasOneMemOperand() |
3989 | 15.8k | ? (*UseMI.memoperands_begin())->getAlignment() |
3990 | 76.0k | : 0; |
3991 | 91.9k | |
3992 | 91.9k | // Get the itinerary's latency if possible, and handle variable_ops. |
3993 | 91.9k | int Latency = getOperandLatency(ItinData, DefMCID, DefIdx, DefAlign, UseMCID, |
3994 | 91.9k | UseIdx, UseAlign); |
3995 | 91.9k | // Unable to find operand latency. The caller may resort to getInstrLatency. |
3996 | 91.9k | if (Latency < 0) |
3997 | 17.7k | return Latency; |
3998 | 74.2k | |
3999 | 74.2k | // Adjust for IT block position. |
4000 | 74.2k | int Adj = DefAdj + UseAdj; |
4001 | 74.2k | |
4002 | 74.2k | // Adjust for dynamic def-side opcode variants not captured by the itinerary. |
4003 | 74.2k | Adj += adjustDefLatency(Subtarget, DefMI, DefMCID, DefAlign); |
4004 | 74.2k | if (Adj >= 0 || 74.2k (int)Latency > -Adj349 ) { |
4005 | 74.2k | return Latency + Adj; |
4006 | 74.2k | } |
4007 | 0 | // Return the itinerary latency, which may be zero but not less than zero. |
4008 | 0 | return Latency; |
4009 | 0 | } |
4010 | | |
4011 | | int |
4012 | | ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, |
4013 | | SDNode *DefNode, unsigned DefIdx, |
4014 | 204k | SDNode *UseNode, unsigned UseIdx) const { |
4015 | 204k | if (!DefNode->isMachineOpcode()) |
4016 | 78.3k | return 1; |
4017 | 125k | |
4018 | 125k | const MCInstrDesc &DefMCID = get(DefNode->getMachineOpcode()); |
4019 | 125k | |
4020 | 125k | if (isZeroCost(DefMCID.Opcode)) |
4021 | 7.00k | return 0; |
4022 | 118k | |
4023 | 118k | if (118k !ItinData || 118k ItinData->isEmpty()118k ) |
4024 | 7.71k | return DefMCID.mayLoad() ? 7.71k 3465 : 17.24k ; |
4025 | 110k | |
4026 | 110k | if (110k !UseNode->isMachineOpcode()110k ) { |
4027 | 45.8k | int Latency = ItinData->getOperandCycle(DefMCID.getSchedClass(), DefIdx); |
4028 | 45.8k | int Adj = Subtarget.getPreISelOperandLatencyAdjustment(); |
4029 | 45.8k | int Threshold = 1 + Adj; |
4030 | 45.8k | return Latency <= Threshold ? 137.7k : Latency - Adj8.08k ; |
4031 | 45.8k | } |
4032 | 65.1k | |
4033 | 65.1k | const MCInstrDesc &UseMCID = get(UseNode->getMachineOpcode()); |
4034 | 65.1k | const MachineSDNode *DefMN = dyn_cast<MachineSDNode>(DefNode); |
4035 | 65.1k | unsigned DefAlign = !DefMN->memoperands_empty() |
4036 | 65.1k | ? (*DefMN->memoperands_begin())->getAlignment()16.6k : 048.5k ; |
4037 | 65.1k | const MachineSDNode *UseMN = dyn_cast<MachineSDNode>(UseNode); |
4038 | 65.1k | unsigned UseAlign = !UseMN->memoperands_empty() |
4039 | 65.1k | ? (*UseMN->memoperands_begin())->getAlignment()19.6k : 045.4k ; |
4040 | 65.1k | int Latency = getOperandLatency(ItinData, DefMCID, DefIdx, DefAlign, |
4041 | 65.1k | UseMCID, UseIdx, UseAlign); |
4042 | 65.1k | |
4043 | 65.1k | if (Latency > 1 && |
4044 | 27.5k | (Subtarget.isCortexA8() || 27.5k Subtarget.isLikeA9()26.5k || |
4045 | 65.1k | Subtarget.isCortexA7()25.9k )) { |
4046 | 9.47k | // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2] |
4047 | 9.47k | // variants are one cycle cheaper. |
4048 | 9.47k | switch (DefMCID.getOpcode()) { |
4049 | 9.09k | default: break; |
4050 | 9 | case ARM::LDRrs: |
4051 | 9 | case ARM::LDRBrs: { |
4052 | 9 | unsigned ShOpVal = |
4053 | 9 | cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue(); |
4054 | 9 | unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); |
4055 | 9 | if (ShImm == 0 || |
4056 | 7 | (ShImm == 2 && 7 ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl7 )) |
4057 | 9 | --Latency; |
4058 | 9 | break; |
4059 | 9 | } |
4060 | 374 | case ARM::t2LDRs: |
4061 | 374 | case ARM::t2LDRBs: |
4062 | 374 | case ARM::t2LDRHs: |
4063 | 374 | case ARM::t2LDRSHs: { |
4064 | 374 | // Thumb2 mode: lsl only. |
4065 | 374 | unsigned ShAmt = |
4066 | 374 | cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue(); |
4067 | 374 | if (ShAmt == 0 || 374 ShAmt == 2163 ) |
4068 | 349 | --Latency; |
4069 | 374 | break; |
4070 | 65.1k | } |
4071 | 9.47k | } |
4072 | 55.6k | } else if (55.6k DefIdx == 0 && 55.6k Latency > 254.8k && Subtarget.isSwift()6.91k ) { |
4073 | 0 | // FIXME: Properly handle all of the latency adjustments for address |
4074 | 0 | // writeback. |
4075 | 0 | switch (DefMCID.getOpcode()) { |
4076 | 0 | default: break; |
4077 | 0 | case ARM::LDRrs: |
4078 | 0 | case ARM::LDRBrs: { |
4079 | 0 | unsigned ShOpVal = |
4080 | 0 | cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue(); |
4081 | 0 | unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); |
4082 | 0 | if (ShImm == 0 || |
4083 | 0 | ((ShImm == 1 || 0 ShImm == 20 || ShImm == 30 ) && |
4084 | 0 | ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)) |
4085 | 0 | Latency -= 2; |
4086 | 0 | else if (0 ShImm == 1 && 0 ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsr0 ) |
4087 | 0 | --Latency; |
4088 | 0 | break; |
4089 | 0 | } |
4090 | 0 | case ARM::t2LDRs: |
4091 | 0 | case ARM::t2LDRBs: |
4092 | 0 | case ARM::t2LDRHs: |
4093 | 0 | case ARM::t2LDRSHs: |
4094 | 0 | // Thumb2 mode: lsl 0-3 only. |
4095 | 0 | Latency -= 2; |
4096 | 0 | break; |
4097 | 65.1k | } |
4098 | 65.1k | } |
4099 | 65.1k | |
4100 | 65.1k | if (65.1k DefAlign < 8 && 65.1k Subtarget.checkVLDnAccessAlignment()59.4k ) |
4101 | 981 | switch (DefMCID.getOpcode()) { |
4102 | 962 | default: break; |
4103 | 19 | case ARM::VLD1q8: |
4104 | 19 | case ARM::VLD1q16: |
4105 | 19 | case ARM::VLD1q32: |
4106 | 19 | case ARM::VLD1q64: |
4107 | 19 | case ARM::VLD1q8wb_register: |
4108 | 19 | case ARM::VLD1q16wb_register: |
4109 | 19 | case ARM::VLD1q32wb_register: |
4110 | 19 | case ARM::VLD1q64wb_register: |
4111 | 19 | case ARM::VLD1q8wb_fixed: |
4112 | 19 | case ARM::VLD1q16wb_fixed: |
4113 | 19 | case ARM::VLD1q32wb_fixed: |
4114 | 19 | case ARM::VLD1q64wb_fixed: |
4115 | 19 | case ARM::VLD2d8: |
4116 | 19 | case ARM::VLD2d16: |
4117 | 19 | case ARM::VLD2d32: |
4118 | 19 | case ARM::VLD2q8Pseudo: |
4119 | 19 | case ARM::VLD2q16Pseudo: |
4120 | 19 | case ARM::VLD2q32Pseudo: |
4121 | 19 | case ARM::VLD2d8wb_fixed: |
4122 | 19 | case ARM::VLD2d16wb_fixed: |
4123 | 19 | case ARM::VLD2d32wb_fixed: |
4124 | 19 | case ARM::VLD2q8PseudoWB_fixed: |
4125 | 19 | case ARM::VLD2q16PseudoWB_fixed: |
4126 | 19 | case ARM::VLD2q32PseudoWB_fixed: |
4127 | 19 | case ARM::VLD2d8wb_register: |
4128 | 19 | case ARM::VLD2d16wb_register: |
4129 | 19 | case ARM::VLD2d32wb_register: |
4130 | 19 | case ARM::VLD2q8PseudoWB_register: |
4131 | 19 | case ARM::VLD2q16PseudoWB_register: |
4132 | 19 | case ARM::VLD2q32PseudoWB_register: |
4133 | 19 | case ARM::VLD3d8Pseudo: |
4134 | 19 | case ARM::VLD3d16Pseudo: |
4135 | 19 | case ARM::VLD3d32Pseudo: |
4136 | 19 | case ARM::VLD1d64TPseudo: |
4137 | 19 | case ARM::VLD1d64TPseudoWB_fixed: |
4138 | 19 | case ARM::VLD3d8Pseudo_UPD: |
4139 | 19 | case ARM::VLD3d16Pseudo_UPD: |
4140 | 19 | case ARM::VLD3d32Pseudo_UPD: |
4141 | 19 | case ARM::VLD3q8Pseudo_UPD: |
4142 | 19 | case ARM::VLD3q16Pseudo_UPD: |
4143 | 19 | case ARM::VLD3q32Pseudo_UPD: |
4144 | 19 | case ARM::VLD3q8oddPseudo: |
4145 | 19 | case ARM::VLD3q16oddPseudo: |
4146 | 19 | case ARM::VLD3q32oddPseudo: |
4147 | 19 | case ARM::VLD3q8oddPseudo_UPD: |
4148 | 19 | case ARM::VLD3q16oddPseudo_UPD: |
4149 | 19 | case ARM::VLD3q32oddPseudo_UPD: |
4150 | 19 | case ARM::VLD4d8Pseudo: |
4151 | 19 | case ARM::VLD4d16Pseudo: |
4152 | 19 | case ARM::VLD4d32Pseudo: |
4153 | 19 | case ARM::VLD1d64QPseudo: |
4154 | 19 | case ARM::VLD1d64QPseudoWB_fixed: |
4155 | 19 | case ARM::VLD4d8Pseudo_UPD: |
4156 | 19 | case ARM::VLD4d16Pseudo_UPD: |
4157 | 19 | case ARM::VLD4d32Pseudo_UPD: |
4158 | 19 | case ARM::VLD4q8Pseudo_UPD: |
4159 | 19 | case ARM::VLD4q16Pseudo_UPD: |
4160 | 19 | case ARM::VLD4q32Pseudo_UPD: |
4161 | 19 | case ARM::VLD4q8oddPseudo: |
4162 | 19 | case ARM::VLD4q16oddPseudo: |
4163 | 19 | case ARM::VLD4q32oddPseudo: |
4164 | 19 | case ARM::VLD4q8oddPseudo_UPD: |
4165 | 19 | case ARM::VLD4q16oddPseudo_UPD: |
4166 | 19 | case ARM::VLD4q32oddPseudo_UPD: |
4167 | 19 | case ARM::VLD1DUPq8: |
4168 | 19 | case ARM::VLD1DUPq16: |
4169 | 19 | case ARM::VLD1DUPq32: |
4170 | 19 | case ARM::VLD1DUPq8wb_fixed: |
4171 | 19 | case ARM::VLD1DUPq16wb_fixed: |
4172 | 19 | case ARM::VLD1DUPq32wb_fixed: |
4173 | 19 | case ARM::VLD1DUPq8wb_register: |
4174 | 19 | case ARM::VLD1DUPq16wb_register: |
4175 | 19 | case ARM::VLD1DUPq32wb_register: |
4176 | 19 | case ARM::VLD2DUPd8: |
4177 | 19 | case ARM::VLD2DUPd16: |
4178 | 19 | case ARM::VLD2DUPd32: |
4179 | 19 | case ARM::VLD2DUPd8wb_fixed: |
4180 | 19 | case ARM::VLD2DUPd16wb_fixed: |
4181 | 19 | case ARM::VLD2DUPd32wb_fixed: |
4182 | 19 | case ARM::VLD2DUPd8wb_register: |
4183 | 19 | case ARM::VLD2DUPd16wb_register: |
4184 | 19 | case ARM::VLD2DUPd32wb_register: |
4185 | 19 | case ARM::VLD4DUPd8Pseudo: |
4186 | 19 | case ARM::VLD4DUPd16Pseudo: |
4187 | 19 | case ARM::VLD4DUPd32Pseudo: |
4188 | 19 | case ARM::VLD4DUPd8Pseudo_UPD: |
4189 | 19 | case ARM::VLD4DUPd16Pseudo_UPD: |
4190 | 19 | case ARM::VLD4DUPd32Pseudo_UPD: |
4191 | 19 | case ARM::VLD1LNq8Pseudo: |
4192 | 19 | case ARM::VLD1LNq16Pseudo: |
4193 | 19 | case ARM::VLD1LNq32Pseudo: |
4194 | 19 | case ARM::VLD1LNq8Pseudo_UPD: |
4195 | 19 | case ARM::VLD1LNq16Pseudo_UPD: |
4196 | 19 | case ARM::VLD1LNq32Pseudo_UPD: |
4197 | 19 | case ARM::VLD2LNd8Pseudo: |
4198 | 19 | case ARM::VLD2LNd16Pseudo: |
4199 | 19 | case ARM::VLD2LNd32Pseudo: |
4200 | 19 | case ARM::VLD2LNq16Pseudo: |
4201 | 19 | case ARM::VLD2LNq32Pseudo: |
4202 | 19 | case ARM::VLD2LNd8Pseudo_UPD: |
4203 | 19 | case ARM::VLD2LNd16Pseudo_UPD: |
4204 | 19 | case ARM::VLD2LNd32Pseudo_UPD: |
4205 | 19 | case ARM::VLD2LNq16Pseudo_UPD: |
4206 | 19 | case ARM::VLD2LNq32Pseudo_UPD: |
4207 | 19 | case ARM::VLD4LNd8Pseudo: |
4208 | 19 | case ARM::VLD4LNd16Pseudo: |
4209 | 19 | case ARM::VLD4LNd32Pseudo: |
4210 | 19 | case ARM::VLD4LNq16Pseudo: |
4211 | 19 | case ARM::VLD4LNq32Pseudo: |
4212 | 19 | case ARM::VLD4LNd8Pseudo_UPD: |
4213 | 19 | case ARM::VLD4LNd16Pseudo_UPD: |
4214 | 19 | case ARM::VLD4LNd32Pseudo_UPD: |
4215 | 19 | case ARM::VLD4LNq16Pseudo_UPD: |
4216 | 19 | case ARM::VLD4LNq32Pseudo_UPD: |
4217 | 19 | // If the address is not 64-bit aligned, the latencies of these |
4218 | 19 | // instructions increases by one. |
4219 | 19 | ++Latency; |
4220 | 19 | break; |
4221 | 65.1k | } |
4222 | 65.1k | |
4223 | 65.1k | return Latency; |
4224 | 65.1k | } |
4225 | | |
4226 | 152k | unsigned ARMBaseInstrInfo::getPredicationCost(const MachineInstr &MI) const { |
4227 | 152k | if (MI.isCopyLike() || 152k MI.isInsertSubreg()152k || MI.isRegSequence()152k || |
4228 | 152k | MI.isImplicitDef()) |
4229 | 10 | return 0; |
4230 | 152k | |
4231 | 152k | if (152k MI.isBundle()152k ) |
4232 | 27 | return 0; |
4233 | 152k | |
4234 | 152k | const MCInstrDesc &MCID = MI.getDesc(); |
4235 | 152k | |
4236 | 152k | if (MCID.isCall() || 152k (MCID.hasImplicitDefOfPhysReg(ARM::CPSR) && |
4237 | 152k | !Subtarget.cheapPredicableCPSRDef()18.8k )) { |
4238 | 24.9k | // When predicated, CPSR is an additional source operand for CPSR updating |
4239 | 24.9k | // instructions, this apparently increases their latencies. |
4240 | 24.9k | return 1; |
4241 | 24.9k | } |
4242 | 127k | return 0; |
4243 | 127k | } |
4244 | | |
4245 | | unsigned ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, |
4246 | | const MachineInstr &MI, |
4247 | 349k | unsigned *PredCost) const { |
4248 | 349k | if (MI.isCopyLike() || 349k MI.isInsertSubreg()349k || MI.isRegSequence()349k || |
4249 | 349k | MI.isImplicitDef()) |
4250 | 60 | return 1; |
4251 | 349k | |
4252 | 349k | // An instruction scheduler typically runs on unbundled instructions, however |
4253 | 349k | // other passes may query the latency of a bundled instruction. |
4254 | 349k | if (349k MI.isBundle()349k ) { |
4255 | 8.16k | unsigned Latency = 0; |
4256 | 8.16k | MachineBasicBlock::const_instr_iterator I = MI.getIterator(); |
4257 | 8.16k | MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end(); |
4258 | 26.7k | while (++I != E && 26.7k I->isInsideBundle()26.1k ) { |
4259 | 18.6k | if (I->getOpcode() != ARM::t2IT) |
4260 | 10.6k | Latency += getInstrLatency(ItinData, *I, PredCost); |
4261 | 18.6k | } |
4262 | 8.16k | return Latency; |
4263 | 8.16k | } |
4264 | 341k | |
4265 | 341k | const MCInstrDesc &MCID = MI.getDesc(); |
4266 | 341k | if (PredCost && 341k (MCID.isCall() || 0 (MCID.hasImplicitDefOfPhysReg(ARM::CPSR) && |
4267 | 341k | !Subtarget.cheapPredicableCPSRDef()0 ))) { |
4268 | 0 | // When predicated, CPSR is an additional source operand for CPSR updating |
4269 | 0 | // instructions, this apparently increases their latencies. |
4270 | 0 | *PredCost = 1; |
4271 | 0 | } |
4272 | 341k | // Be sure to call getStageLatency for an empty itinerary in case it has a |
4273 | 341k | // valid MinLatency property. |
4274 | 341k | if (!ItinData) |
4275 | 0 | return MI.mayLoad() ? 0 30 : 10 ; |
4276 | 341k | |
4277 | 341k | unsigned Class = MCID.getSchedClass(); |
4278 | 341k | |
4279 | 341k | // For instructions with variable uops, use uops as latency. |
4280 | 341k | if (!ItinData->isEmpty() && 341k ItinData->getNumMicroOps(Class) < 0324k ) |
4281 | 7.99k | return getNumMicroOps(ItinData, MI); |
4282 | 333k | |
4283 | 333k | // For the common case, fall back on the itinerary's latency. |
4284 | 333k | unsigned Latency = ItinData->getStageLatency(Class); |
4285 | 333k | |
4286 | 333k | // Adjust for dynamic def-side opcode variants not captured by the itinerary. |
4287 | 333k | unsigned DefAlign = |
4288 | 333k | MI.hasOneMemOperand() ? (*MI.memoperands_begin())->getAlignment()54.6k : 0278k ; |
4289 | 333k | int Adj = adjustDefLatency(Subtarget, MI, MCID, DefAlign); |
4290 | 333k | if (Adj >= 0 || 333k (int)Latency > -Adj789 ) { |
4291 | 333k | return Latency + Adj; |
4292 | 333k | } |
4293 | 31 | return Latency; |
4294 | 31 | } |
4295 | | |
4296 | | int ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, |
4297 | 194k | SDNode *Node) const { |
4298 | 194k | if (!Node->isMachineOpcode()) |
4299 | 0 | return 1; |
4300 | 194k | |
4301 | 194k | if (194k !ItinData || 194k ItinData->isEmpty()194k ) |
4302 | 0 | return 1; |
4303 | 194k | |
4304 | 194k | unsigned Opcode = Node->getMachineOpcode(); |
4305 | 194k | switch (Opcode) { |
4306 | 194k | default: |
4307 | 194k | return ItinData->getStageLatency(get(Opcode).getSchedClass()); |
4308 | 2 | case ARM::VLDMQIA: |
4309 | 2 | case ARM::VSTMQIA: |
4310 | 2 | return 2; |
4311 | 0 | } |
4312 | 0 | } |
4313 | | |
4314 | | bool ARMBaseInstrInfo::hasHighOperandLatency(const TargetSchedModel &SchedModel, |
4315 | | const MachineRegisterInfo *MRI, |
4316 | | const MachineInstr &DefMI, |
4317 | | unsigned DefIdx, |
4318 | | const MachineInstr &UseMI, |
4319 | 230 | unsigned UseIdx) const { |
4320 | 230 | unsigned DDomain = DefMI.getDesc().TSFlags & ARMII::DomainMask; |
4321 | 230 | unsigned UDomain = UseMI.getDesc().TSFlags & ARMII::DomainMask; |
4322 | 230 | if (Subtarget.nonpipelinedVFP() && |
4323 | 25 | (DDomain == ARMII::DomainVFP || 25 UDomain == ARMII::DomainVFP25 )) |
4324 | 0 | return true; |
4325 | 230 | |
4326 | 230 | // Hoist VFP / NEON instructions with 4 or higher latency. |
4327 | 230 | unsigned Latency = |
4328 | 230 | SchedModel.computeOperandLatency(&DefMI, DefIdx, &UseMI, UseIdx); |
4329 | 230 | if (Latency <= 3) |
4330 | 203 | return false; |
4331 | 27 | return DDomain == ARMII::DomainVFP || 27 DDomain == ARMII::DomainNEON27 || |
4332 | 27 | UDomain == ARMII::DomainVFP9 || UDomain == ARMII::DomainNEON9 ; |
4333 | 230 | } |
4334 | | |
4335 | | bool ARMBaseInstrInfo::hasLowDefLatency(const TargetSchedModel &SchedModel, |
4336 | | const MachineInstr &DefMI, |
4337 | 1.80k | unsigned DefIdx) const { |
4338 | 1.80k | const InstrItineraryData *ItinData = SchedModel.getInstrItineraries(); |
4339 | 1.80k | if (!ItinData || 1.80k ItinData->isEmpty()1.33k ) |
4340 | 470 | return false; |
4341 | 1.33k | |
4342 | 1.33k | unsigned DDomain = DefMI.getDesc().TSFlags & ARMII::DomainMask; |
4343 | 1.33k | if (DDomain == ARMII::DomainGeneral1.33k ) { |
4344 | 1.28k | unsigned DefClass = DefMI.getDesc().getSchedClass(); |
4345 | 1.28k | int DefCycle = ItinData->getOperandCycle(DefClass, DefIdx); |
4346 | 1.27k | return (DefCycle != -1 && DefCycle <= 2); |
4347 | 1.28k | } |
4348 | 51 | return false; |
4349 | 51 | } |
4350 | | |
4351 | | bool ARMBaseInstrInfo::verifyInstruction(const MachineInstr &MI, |
4352 | 1.20M | StringRef &ErrInfo) const { |
4353 | 1.20M | if (convertAddSubFlagsOpcode(MI.getOpcode())1.20M ) { |
4354 | 0 | ErrInfo = "Pseudo flag setting opcodes only exist in Selection DAG"; |
4355 | 0 | return false; |
4356 | 0 | } |
4357 | 1.20M | return true; |
4358 | 1.20M | } |
4359 | | |
4360 | | // LoadStackGuard has so far only been implemented for MachO. Different code |
4361 | | // sequence is needed for other targets. |
4362 | | void ARMBaseInstrInfo::expandLoadStackGuardBase(MachineBasicBlock::iterator MI, |
4363 | | unsigned LoadImmOpc, |
4364 | 146 | unsigned LoadOpc) const { |
4365 | 146 | assert(!Subtarget.isROPI() && !Subtarget.isRWPI() && |
4366 | 146 | "ROPI/RWPI not currently supported with stack guard"); |
4367 | 146 | |
4368 | 146 | MachineBasicBlock &MBB = *MI->getParent(); |
4369 | 146 | DebugLoc DL = MI->getDebugLoc(); |
4370 | 146 | unsigned Reg = MI->getOperand(0).getReg(); |
4371 | 146 | const GlobalValue *GV = |
4372 | 146 | cast<GlobalValue>((*MI->memoperands_begin())->getValue()); |
4373 | 146 | MachineInstrBuilder MIB; |
4374 | 146 | |
4375 | 146 | BuildMI(MBB, MI, DL, get(LoadImmOpc), Reg) |
4376 | 146 | .addGlobalAddress(GV, 0, ARMII::MO_NONLAZY); |
4377 | 146 | |
4378 | 146 | if (Subtarget.isGVIndirectSymbol(GV)146 ) { |
4379 | 138 | MIB = BuildMI(MBB, MI, DL, get(LoadOpc), Reg); |
4380 | 138 | MIB.addReg(Reg, RegState::Kill).addImm(0); |
4381 | 138 | auto Flags = MachineMemOperand::MOLoad | |
4382 | 138 | MachineMemOperand::MODereferenceable | |
4383 | 138 | MachineMemOperand::MOInvariant; |
4384 | 138 | MachineMemOperand *MMO = MBB.getParent()->getMachineMemOperand( |
4385 | 138 | MachinePointerInfo::getGOT(*MBB.getParent()), Flags, 4, 4); |
4386 | 138 | MIB.addMemOperand(MMO).add(predOps(ARMCC::AL)); |
4387 | 138 | } |
4388 | 146 | |
4389 | 146 | MIB = BuildMI(MBB, MI, DL, get(LoadOpc), Reg); |
4390 | 146 | MIB.addReg(Reg, RegState::Kill) |
4391 | 146 | .addImm(0) |
4392 | 146 | .setMemRefs(MI->memoperands_begin(), MI->memoperands_end()) |
4393 | 146 | .add(predOps(ARMCC::AL)); |
4394 | 146 | } |
4395 | | |
4396 | | bool |
4397 | | ARMBaseInstrInfo::isFpMLxInstruction(unsigned Opcode, unsigned &MulOpc, |
4398 | | unsigned &AddSubOpc, |
4399 | 604 | bool &NegAcc, bool &HasLane) const { |
4400 | 604 | DenseMap<unsigned, unsigned>::const_iterator I = MLxEntryMap.find(Opcode); |
4401 | 604 | if (I == MLxEntryMap.end()) |
4402 | 588 | return false; |
4403 | 16 | |
4404 | 16 | const ARM_MLxEntry &Entry = ARM_MLxTable[I->second]; |
4405 | 16 | MulOpc = Entry.MulOpc; |
4406 | 16 | AddSubOpc = Entry.AddSubOpc; |
4407 | 16 | NegAcc = Entry.NegAcc; |
4408 | 16 | HasLane = Entry.HasLane; |
4409 | 16 | return true; |
4410 | 16 | } |
4411 | | |
4412 | | //===----------------------------------------------------------------------===// |
4413 | | // Execution domains. |
4414 | | //===----------------------------------------------------------------------===// |
4415 | | // |
4416 | | // Some instructions go down the NEON pipeline, some go down the VFP pipeline, |
4417 | | // and some can go down both. The vmov instructions go down the VFP pipeline, |
4418 | | // but they can be changed to vorr equivalents that are executed by the NEON |
4419 | | // pipeline. |
4420 | | // |
4421 | | // We use the following execution domain numbering: |
4422 | | // |
4423 | | enum ARMExeDomain { |
4424 | | ExeGeneric = 0, |
4425 | | ExeVFP = 1, |
4426 | | ExeNEON = 2 |
4427 | | }; |
4428 | | |
4429 | | // |
4430 | | // Also see ARMInstrFormats.td and Domain* enums in ARMBaseInfo.h |
4431 | | // |
4432 | | std::pair<uint16_t, uint16_t> |
4433 | 292k | ARMBaseInstrInfo::getExecutionDomain(const MachineInstr &MI) const { |
4434 | 292k | // If we don't have access to NEON instructions then we won't be able |
4435 | 292k | // to swizzle anything to the NEON domain. Check to make sure. |
4436 | 292k | if (Subtarget.hasNEON()292k ) { |
4437 | 224k | // VMOVD, VMOVRS and VMOVSR are VFP instructions, but can be changed to NEON |
4438 | 224k | // if they are not predicated. |
4439 | 224k | if (MI.getOpcode() == ARM::VMOVD && 224k !isPredicated(MI)733 ) |
4440 | 733 | return std::make_pair(ExeVFP, (1 << ExeVFP) | (1 << ExeNEON)); |
4441 | 224k | |
4442 | 224k | // CortexA9 is particularly picky about mixing the two and wants these |
4443 | 224k | // converted. |
4444 | 224k | if (224k Subtarget.useNEONForFPMovs() && 224k !isPredicated(MI)1.55k && |
4445 | 1.52k | (MI.getOpcode() == ARM::VMOVRS || 1.52k MI.getOpcode() == ARM::VMOVSR1.46k || |
4446 | 1.38k | MI.getOpcode() == ARM::VMOVS)) |
4447 | 150 | return std::make_pair(ExeVFP, (1 << ExeVFP) | (1 << ExeNEON)); |
4448 | 291k | } |
4449 | 291k | // No other instructions can be swizzled, so just determine their domain. |
4450 | 291k | unsigned Domain = MI.getDesc().TSFlags & ARMII::DomainMask; |
4451 | 291k | |
4452 | 291k | if (Domain & ARMII::DomainNEON) |
4453 | 19.1k | return std::make_pair(ExeNEON, 0); |
4454 | 272k | |
4455 | 272k | // Certain instructions can go either way on Cortex-A8. |
4456 | 272k | // Treat them as NEON instructions. |
4457 | 272k | if (272k (Domain & ARMII::DomainNEONA8) && 272k Subtarget.isCortexA8()1.34k ) |
4458 | 124 | return std::make_pair(ExeNEON, 0); |
4459 | 272k | |
4460 | 272k | if (272k Domain & ARMII::DomainVFP272k ) |
4461 | 6.54k | return std::make_pair(ExeVFP, 0); |
4462 | 265k | |
4463 | 265k | return std::make_pair(ExeGeneric, 0); |
4464 | 265k | } |
4465 | | |
4466 | | static unsigned getCorrespondingDRegAndLane(const TargetRegisterInfo *TRI, |
4467 | 71 | unsigned SReg, unsigned &Lane) { |
4468 | 71 | unsigned DReg = TRI->getMatchingSuperReg(SReg, ARM::ssub_0, &ARM::DPRRegClass); |
4469 | 71 | Lane = 0; |
4470 | 71 | |
4471 | 71 | if (DReg != ARM::NoRegister) |
4472 | 46 | return DReg; |
4473 | 25 | |
4474 | 25 | Lane = 1; |
4475 | 25 | DReg = TRI->getMatchingSuperReg(SReg, ARM::ssub_1, &ARM::DPRRegClass); |
4476 | 25 | |
4477 | 25 | assert(DReg && "S-register with no D super-register?"); |
4478 | 25 | return DReg; |
4479 | 25 | } |
4480 | | |
4481 | | /// getImplicitSPRUseForDPRUse - Given a use of a DPR register and lane, |
4482 | | /// set ImplicitSReg to a register number that must be marked as implicit-use or |
4483 | | /// zero if no register needs to be defined as implicit-use. |
4484 | | /// |
4485 | | /// If the function cannot determine if an SPR should be marked implicit use or |
4486 | | /// not, it returns false. |
4487 | | /// |
4488 | | /// This function handles cases where an instruction is being modified from taking |
4489 | | /// an SPR to a DPR[Lane]. A use of the DPR is being added, which may conflict |
4490 | | /// with an earlier def of an SPR corresponding to DPR[Lane^1] (i.e. the other |
4491 | | /// lane of the DPR). |
4492 | | /// |
4493 | | /// If the other SPR is defined, an implicit-use of it should be added. Else, |
4494 | | /// (including the case where the DPR itself is defined), it should not. |
4495 | | /// |
4496 | | static bool getImplicitSPRUseForDPRUse(const TargetRegisterInfo *TRI, |
4497 | | MachineInstr &MI, unsigned DReg, |
4498 | 22 | unsigned Lane, unsigned &ImplicitSReg) { |
4499 | 22 | // If the DPR is defined or used already, the other SPR lane will be chained |
4500 | 22 | // correctly, so there is nothing to be done. |
4501 | 22 | if (MI.definesRegister(DReg, TRI) || 22 MI.readsRegister(DReg, TRI)11 ) { |
4502 | 13 | ImplicitSReg = 0; |
4503 | 13 | return true; |
4504 | 13 | } |
4505 | 9 | |
4506 | 9 | // Otherwise we need to go searching to see if the SPR is set explicitly. |
4507 | 9 | ImplicitSReg = TRI->getSubReg(DReg, |
4508 | 9 | (Lane & 1) ? ARM::ssub_02 : ARM::ssub_17 ); |
4509 | 9 | MachineBasicBlock::LivenessQueryResult LQR = |
4510 | 9 | MI.getParent()->computeRegisterLiveness(TRI, ImplicitSReg, MI); |
4511 | 9 | |
4512 | 9 | if (LQR == MachineBasicBlock::LQR_Live) |
4513 | 0 | return true; |
4514 | 9 | else if (9 LQR == MachineBasicBlock::LQR_Unknown9 ) |
4515 | 0 | return false; |
4516 | 9 | |
4517 | 9 | // If the register is known not to be live, there is no need to add an |
4518 | 9 | // implicit-use. |
4519 | 9 | ImplicitSReg = 0; |
4520 | 9 | return true; |
4521 | 9 | } |
4522 | | |
4523 | | void ARMBaseInstrInfo::setExecutionDomain(MachineInstr &MI, |
4524 | 883 | unsigned Domain) const { |
4525 | 883 | unsigned DstReg, SrcReg, DReg; |
4526 | 883 | unsigned Lane; |
4527 | 883 | MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI); |
4528 | 883 | const TargetRegisterInfo *TRI = &getRegisterInfo(); |
4529 | 883 | switch (MI.getOpcode()) { |
4530 | 0 | default: |
4531 | 0 | llvm_unreachable("cannot handle opcode!"); |
4532 | 0 | break; |
4533 | 733 | case ARM::VMOVD: |
4534 | 733 | if (Domain != ExeNEON) |
4535 | 297 | break; |
4536 | 436 | |
4537 | 436 | // Zap the predicate operands. |
4538 | 733 | assert(!isPredicated(MI) && "Cannot predicate a VORRd"); |
4539 | 436 | |
4540 | 436 | // Make sure we've got NEON instructions. |
4541 | 436 | assert(Subtarget.hasNEON() && "VORRd requires NEON"); |
4542 | 436 | |
4543 | 436 | // Source instruction is %DDst = VMOVD %DSrc, 14, %noreg (; implicits) |
4544 | 436 | DstReg = MI.getOperand(0).getReg(); |
4545 | 436 | SrcReg = MI.getOperand(1).getReg(); |
4546 | 436 | |
4547 | 2.18k | for (unsigned i = MI.getDesc().getNumOperands(); i2.18k ; --i1.74k ) |
4548 | 1.74k | MI.RemoveOperand(i - 1); |
4549 | 436 | |
4550 | 436 | // Change to a %DDst = VORRd %DSrc, %DSrc, 14, %noreg (; implicits) |
4551 | 436 | MI.setDesc(get(ARM::VORRd)); |
4552 | 436 | MIB.addReg(DstReg, RegState::Define) |
4553 | 436 | .addReg(SrcReg) |
4554 | 436 | .addReg(SrcReg) |
4555 | 436 | .add(predOps(ARMCC::AL)); |
4556 | 436 | break; |
4557 | 58 | case ARM::VMOVRS: |
4558 | 58 | if (Domain != ExeNEON) |
4559 | 21 | break; |
4560 | 58 | assert(!isPredicated(MI) && "Cannot predicate a VGETLN"); |
4561 | 37 | |
4562 | 37 | // Source instruction is %RDst = VMOVRS %SSrc, 14, %noreg (; implicits) |
4563 | 37 | DstReg = MI.getOperand(0).getReg(); |
4564 | 37 | SrcReg = MI.getOperand(1).getReg(); |
4565 | 37 | |
4566 | 185 | for (unsigned i = MI.getDesc().getNumOperands(); i185 ; --i148 ) |
4567 | 148 | MI.RemoveOperand(i - 1); |
4568 | 37 | |
4569 | 37 | DReg = getCorrespondingDRegAndLane(TRI, SrcReg, Lane); |
4570 | 37 | |
4571 | 37 | // Convert to %RDst = VGETLNi32 %DSrc, Lane, 14, %noreg (; imps) |
4572 | 37 | // Note that DSrc has been widened and the other lane may be undef, which |
4573 | 37 | // contaminates the entire register. |
4574 | 37 | MI.setDesc(get(ARM::VGETLNi32)); |
4575 | 37 | MIB.addReg(DstReg, RegState::Define) |
4576 | 37 | .addReg(DReg, RegState::Undef) |
4577 | 37 | .addImm(Lane) |
4578 | 37 | .add(predOps(ARMCC::AL)); |
4579 | 37 | |
4580 | 37 | // The old source should be an implicit use, otherwise we might think it |
4581 | 37 | // was dead before here. |
4582 | 37 | MIB.addReg(SrcReg, RegState::Implicit); |
4583 | 37 | break; |
4584 | 77 | case ARM::VMOVSR: { |
4585 | 77 | if (Domain != ExeNEON) |
4586 | 67 | break; |
4587 | 77 | assert(!isPredicated(MI) && "Cannot predicate a VSETLN"); |
4588 | 10 | |
4589 | 10 | // Source instruction is %SDst = VMOVSR %RSrc, 14, %noreg (; implicits) |
4590 | 10 | DstReg = MI.getOperand(0).getReg(); |
4591 | 10 | SrcReg = MI.getOperand(1).getReg(); |
4592 | 10 | |
4593 | 10 | DReg = getCorrespondingDRegAndLane(TRI, DstReg, Lane); |
4594 | 10 | |
4595 | 10 | unsigned ImplicitSReg; |
4596 | 10 | if (!getImplicitSPRUseForDPRUse(TRI, MI, DReg, Lane, ImplicitSReg)) |
4597 | 0 | break; |
4598 | 10 | |
4599 | 50 | for (unsigned i = MI.getDesc().getNumOperands(); 10 i50 ; --i40 ) |
4600 | 40 | MI.RemoveOperand(i - 1); |
4601 | 10 | |
4602 | 10 | // Convert to %DDst = VSETLNi32 %DDst, %RSrc, Lane, 14, %noreg (; imps) |
4603 | 10 | // Again DDst may be undefined at the beginning of this instruction. |
4604 | 10 | MI.setDesc(get(ARM::VSETLNi32)); |
4605 | 10 | MIB.addReg(DReg, RegState::Define) |
4606 | 10 | .addReg(DReg, getUndefRegState(!MI.readsRegister(DReg, TRI))) |
4607 | 10 | .addReg(SrcReg) |
4608 | 10 | .addImm(Lane) |
4609 | 10 | .add(predOps(ARMCC::AL)); |
4610 | 10 | |
4611 | 10 | // The narrower destination must be marked as set to keep previous chains |
4612 | 10 | // in place. |
4613 | 10 | MIB.addReg(DstReg, RegState::Define | RegState::Implicit); |
4614 | 10 | if (ImplicitSReg != 0) |
4615 | 0 | MIB.addReg(ImplicitSReg, RegState::Implicit); |
4616 | 10 | break; |
4617 | 10 | } |
4618 | 15 | case ARM::VMOVS: { |
4619 | 15 | if (Domain != ExeNEON) |
4620 | 3 | break; |
4621 | 12 | |
4622 | 12 | // Source instruction is %SDst = VMOVS %SSrc, 14, %noreg (; implicits) |
4623 | 12 | DstReg = MI.getOperand(0).getReg(); |
4624 | 12 | SrcReg = MI.getOperand(1).getReg(); |
4625 | 12 | |
4626 | 12 | unsigned DstLane = 0, SrcLane = 0, DDst, DSrc; |
4627 | 12 | DDst = getCorrespondingDRegAndLane(TRI, DstReg, DstLane); |
4628 | 12 | DSrc = getCorrespondingDRegAndLane(TRI, SrcReg, SrcLane); |
4629 | 12 | |
4630 | 12 | unsigned ImplicitSReg; |
4631 | 12 | if (!getImplicitSPRUseForDPRUse(TRI, MI, DSrc, SrcLane, ImplicitSReg)) |
4632 | 0 | break; |
4633 | 12 | |
4634 | 60 | for (unsigned i = MI.getDesc().getNumOperands(); 12 i60 ; --i48 ) |
4635 | 48 | MI.RemoveOperand(i - 1); |
4636 | 12 | |
4637 | 12 | if (DSrc == DDst12 ) { |
4638 | 1 | // Destination can be: |
4639 | 1 | // %DDst = VDUPLN32d %DDst, Lane, 14, %noreg (; implicits) |
4640 | 1 | MI.setDesc(get(ARM::VDUPLN32d)); |
4641 | 1 | MIB.addReg(DDst, RegState::Define) |
4642 | 1 | .addReg(DDst, getUndefRegState(!MI.readsRegister(DDst, TRI))) |
4643 | 1 | .addImm(SrcLane) |
4644 | 1 | .add(predOps(ARMCC::AL)); |
4645 | 1 | |
4646 | 1 | // Neither the source or the destination are naturally represented any |
4647 | 1 | // more, so add them in manually. |
4648 | 1 | MIB.addReg(DstReg, RegState::Implicit | RegState::Define); |
4649 | 1 | MIB.addReg(SrcReg, RegState::Implicit); |
4650 | 1 | if (ImplicitSReg != 0) |
4651 | 0 | MIB.addReg(ImplicitSReg, RegState::Implicit); |
4652 | 1 | break; |
4653 | 1 | } |
4654 | 11 | |
4655 | 11 | // In general there's no single instruction that can perform an S <-> S |
4656 | 11 | // move in NEON space, but a pair of VEXT instructions *can* do the |
4657 | 11 | // job. It turns out that the VEXTs needed will only use DSrc once, with |
4658 | 11 | // the position based purely on the combination of lane-0 and lane-1 |
4659 | 11 | // involved. For example |
4660 | 11 | // vmov s0, s2 -> vext.32 d0, d0, d1, #1 vext.32 d0, d0, d0, #1 |
4661 | 11 | // vmov s1, s3 -> vext.32 d0, d1, d0, #1 vext.32 d0, d0, d0, #1 |
4662 | 11 | // vmov s0, s3 -> vext.32 d0, d0, d0, #1 vext.32 d0, d1, d0, #1 |
4663 | 11 | // vmov s1, s2 -> vext.32 d0, d0, d0, #1 vext.32 d0, d0, d1, #1 |
4664 | 11 | // |
4665 | 11 | // Pattern of the MachineInstrs is: |
4666 | 11 | // %DDst = VEXTd32 %DSrc1, %DSrc2, Lane, 14, %noreg (;implicits) |
4667 | 11 | MachineInstrBuilder NewMIB; |
4668 | 11 | NewMIB = BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(ARM::VEXTd32), |
4669 | 11 | DDst); |
4670 | 11 | |
4671 | 11 | // On the first instruction, both DSrc and DDst may be <undef> if present. |
4672 | 11 | // Specifically when the original instruction didn't have them as an |
4673 | 11 | // <imp-use>. |
4674 | 11 | unsigned CurReg = SrcLane == 1 && DstLane == 12 ? DSrc1 : DDst10 ; |
4675 | 11 | bool CurUndef = !MI.readsRegister(CurReg, TRI); |
4676 | 11 | NewMIB.addReg(CurReg, getUndefRegState(CurUndef)); |
4677 | 11 | |
4678 | 11 | CurReg = SrcLane == 0 && DstLane == 09 ? DSrc6 : DDst5 ; |
4679 | 11 | CurUndef = !MI.readsRegister(CurReg, TRI); |
4680 | 11 | NewMIB.addReg(CurReg, getUndefRegState(CurUndef)) |
4681 | 11 | .addImm(1) |
4682 | 11 | .add(predOps(ARMCC::AL)); |
4683 | 11 | |
4684 | 11 | if (SrcLane == DstLane) |
4685 | 7 | NewMIB.addReg(SrcReg, RegState::Implicit); |
4686 | 11 | |
4687 | 11 | MI.setDesc(get(ARM::VEXTd32)); |
4688 | 11 | MIB.addReg(DDst, RegState::Define); |
4689 | 11 | |
4690 | 11 | // On the second instruction, DDst has definitely been defined above, so |
4691 | 11 | // it is not <undef>. DSrc, if present, can be <undef> as above. |
4692 | 11 | CurReg = SrcLane == 1 && DstLane == 02 ? DSrc1 : DDst10 ; |
4693 | 1 | CurUndef = CurReg == DSrc && !MI.readsRegister(CurReg, TRI); |
4694 | 11 | MIB.addReg(CurReg, getUndefRegState(CurUndef)); |
4695 | 11 | |
4696 | 11 | CurReg = SrcLane == 0 && DstLane == 19 ? DSrc3 : DDst8 ; |
4697 | 3 | CurUndef = CurReg == DSrc && !MI.readsRegister(CurReg, TRI); |
4698 | 11 | MIB.addReg(CurReg, getUndefRegState(CurUndef)) |
4699 | 11 | .addImm(1) |
4700 | 11 | .add(predOps(ARMCC::AL)); |
4701 | 11 | |
4702 | 11 | if (SrcLane != DstLane) |
4703 | 4 | MIB.addReg(SrcReg, RegState::Implicit); |
4704 | 11 | |
4705 | 11 | // As before, the original destination is no longer represented, add it |
4706 | 11 | // implicitly. |
4707 | 11 | MIB.addReg(DstReg, RegState::Define | RegState::Implicit); |
4708 | 11 | if (ImplicitSReg != 0) |
4709 | 0 | MIB.addReg(ImplicitSReg, RegState::Implicit); |
4710 | 733 | break; |
4711 | 733 | } |
4712 | 883 | } |
4713 | 883 | } |
4714 | | |
4715 | | //===----------------------------------------------------------------------===// |
4716 | | // Partial register updates |
4717 | | //===----------------------------------------------------------------------===// |
4718 | | // |
4719 | | // Swift renames NEON registers with 64-bit granularity. That means any |
4720 | | // instruction writing an S-reg implicitly reads the containing D-reg. The |
4721 | | // problem is mostly avoided by translating f32 operations to v2f32 operations |
4722 | | // on D-registers, but f32 loads are still a problem. |
4723 | | // |
4724 | | // These instructions can load an f32 into a NEON register: |
4725 | | // |
4726 | | // VLDRS - Only writes S, partial D update. |
4727 | | // VLD1LNd32 - Writes all D-regs, explicit partial D update, 2 uops. |
4728 | | // VLD1DUPd32 - Writes all D-regs, no partial reg update, 2 uops. |
4729 | | // |
4730 | | // FCONSTD can be used as a dependency-breaking instruction. |
4731 | | unsigned ARMBaseInstrInfo::getPartialRegUpdateClearance( |
4732 | | const MachineInstr &MI, unsigned OpNum, |
4733 | 29.0k | const TargetRegisterInfo *TRI) const { |
4734 | 29.0k | auto PartialUpdateClearance = Subtarget.getPartialUpdateClearance(); |
4735 | 29.0k | if (!PartialUpdateClearance) |
4736 | 26.1k | return 0; |
4737 | 2.87k | |
4738 | 29.0k | assert(TRI && "Need TRI instance"); |
4739 | 2.87k | |
4740 | 2.87k | const MachineOperand &MO = MI.getOperand(OpNum); |
4741 | 2.87k | if (MO.readsReg()) |
4742 | 0 | return 0; |
4743 | 2.87k | unsigned Reg = MO.getReg(); |
4744 | 2.87k | int UseOp = -1; |
4745 | 2.87k | |
4746 | 2.87k | switch (MI.getOpcode()) { |
4747 | 2.87k | // Normal instructions writing only an S-register. |
4748 | 215 | case ARM::VLDRS: |
4749 | 215 | case ARM::FCONSTS: |
4750 | 215 | case ARM::VMOVSR: |
4751 | 215 | case ARM::VMOVv8i8: |
4752 | 215 | case ARM::VMOVv4i16: |
4753 | 215 | case ARM::VMOVv2i32: |
4754 | 215 | case ARM::VMOVv2f32: |
4755 | 215 | case ARM::VMOVv1i64: |
4756 | 215 | UseOp = MI.findRegisterUseOperandIdx(Reg, false, TRI); |
4757 | 215 | break; |
4758 | 215 | |
4759 | 215 | // Explicitly reads the dependency. |
4760 | 20 | case ARM::VLD1LNd32: |
4761 | 20 | UseOp = 3; |
4762 | 20 | break; |
4763 | 2.63k | default: |
4764 | 2.63k | return 0; |
4765 | 235 | } |
4766 | 235 | |
4767 | 235 | // If this instruction actually reads a value from Reg, there is no unwanted |
4768 | 235 | // dependency. |
4769 | 235 | if (235 UseOp != -1 && 235 MI.getOperand(UseOp).readsReg()28 ) |
4770 | 9 | return 0; |
4771 | 226 | |
4772 | 226 | // We must be able to clobber the whole D-reg. |
4773 | 226 | if (226 TargetRegisterInfo::isVirtualRegister(Reg)226 ) { |
4774 | 0 | // Virtual register must be a foo:ssub_0<def,undef> operand. |
4775 | 0 | if (!MO.getSubReg() || 0 MI.readsVirtualRegister(Reg)0 ) |
4776 | 0 | return 0; |
4777 | 226 | } else if (226 ARM::SPRRegClass.contains(Reg)226 ) { |
4778 | 37 | // Physical register: MI must define the full D-reg. |
4779 | 37 | unsigned DReg = TRI->getMatchingSuperReg(Reg, ARM::ssub_0, |
4780 | 37 | &ARM::DPRRegClass); |
4781 | 37 | if (!DReg || 37 !MI.definesRegister(DReg, TRI)32 ) |
4782 | 22 | return 0; |
4783 | 204 | } |
4784 | 204 | |
4785 | 204 | // MI has an unwanted D-register dependency. |
4786 | 204 | // Avoid defs in the previous N instructrions. |
4787 | 204 | return PartialUpdateClearance; |
4788 | 204 | } |
4789 | | |
4790 | | // Break a partial register dependency after getPartialRegUpdateClearance |
4791 | | // returned non-zero. |
4792 | | void ARMBaseInstrInfo::breakPartialRegDependency( |
4793 | 68 | MachineInstr &MI, unsigned OpNum, const TargetRegisterInfo *TRI) const { |
4794 | 68 | assert(OpNum < MI.getDesc().getNumDefs() && "OpNum is not a def"); |
4795 | 68 | assert(TRI && "Need TRI instance"); |
4796 | 68 | |
4797 | 68 | const MachineOperand &MO = MI.getOperand(OpNum); |
4798 | 68 | unsigned Reg = MO.getReg(); |
4799 | 68 | assert(TargetRegisterInfo::isPhysicalRegister(Reg) && |
4800 | 68 | "Can't break virtual register dependencies."); |
4801 | 68 | unsigned DReg = Reg; |
4802 | 68 | |
4803 | 68 | // If MI defines an S-reg, find the corresponding D super-register. |
4804 | 68 | if (ARM::SPRRegClass.contains(Reg)68 ) { |
4805 | 0 | DReg = ARM::D0 + (Reg - ARM::S0) / 2; |
4806 | 0 | assert(TRI->isSuperRegister(Reg, DReg) && "Register enums broken"); |
4807 | 0 | } |
4808 | 68 | |
4809 | 68 | assert(ARM::DPRRegClass.contains(DReg) && "Can only break D-reg deps"); |
4810 | 68 | assert(MI.definesRegister(DReg, TRI) && "MI doesn't clobber full D-reg"); |
4811 | 68 | |
4812 | 68 | // FIXME: In some cases, VLDRS can be changed to a VLD1DUPd32 which defines |
4813 | 68 | // the full D-register by loading the same value to both lanes. The |
4814 | 68 | // instruction is micro-coded with 2 uops, so don't do this until we can |
4815 | 68 | // properly schedule micro-coded instructions. The dispatcher stalls cause |
4816 | 68 | // too big regressions. |
4817 | 68 | |
4818 | 68 | // Insert the dependency-breaking FCONSTD before MI. |
4819 | 68 | // 96 is the encoding of 0.5, but the actual value doesn't matter here. |
4820 | 68 | BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(ARM::FCONSTD), DReg) |
4821 | 68 | .addImm(96) |
4822 | 68 | .add(predOps(ARMCC::AL)); |
4823 | 68 | MI.addRegisterKilled(DReg, TRI, true); |
4824 | 68 | } |
4825 | | |
4826 | 20 | bool ARMBaseInstrInfo::hasNOP() const { |
4827 | 20 | return Subtarget.getFeatureBits()[ARM::HasV6KOps]; |
4828 | 20 | } |
4829 | | |
4830 | 8.79k | bool ARMBaseInstrInfo::isSwiftFastImmShift(const MachineInstr *MI) const { |
4831 | 8.79k | if (MI->getNumOperands() < 4) |
4832 | 0 | return true; |
4833 | 8.79k | unsigned ShOpVal = MI->getOperand(3).getImm(); |
4834 | 8.79k | unsigned ShImm = ARM_AM::getSORegOffset(ShOpVal); |
4835 | 8.79k | // Swift supports faster shifts for: lsl 2, lsl 1, and lsr 1. |
4836 | 8.79k | if ((ShImm == 1 && 8.79k ARM_AM::getSORegShOp(ShOpVal) == ARM_AM::lsr1.39k ) || |
4837 | 8.77k | ((ShImm == 1 || 8.77k ShImm == 27.39k ) && |
4838 | 4.70k | ARM_AM::getSORegShOp(ShOpVal) == ARM_AM::lsl)) |
4839 | 4.22k | return true; |
4840 | 4.56k | |
4841 | 4.56k | return false; |
4842 | 4.56k | } |
4843 | | |
4844 | | bool ARMBaseInstrInfo::getRegSequenceLikeInputs( |
4845 | | const MachineInstr &MI, unsigned DefIdx, |
4846 | 2.25k | SmallVectorImpl<RegSubRegPairAndIdx> &InputRegs) const { |
4847 | 2.25k | assert(DefIdx < MI.getDesc().getNumDefs() && "Invalid definition index"); |
4848 | 2.25k | assert(MI.isRegSequenceLike() && "Invalid kind of instruction"); |
4849 | 2.25k | |
4850 | 2.25k | switch (MI.getOpcode()) { |
4851 | 2.25k | case ARM::VMOVDRR: |
4852 | 2.25k | // dX = VMOVDRR rY, rZ |
4853 | 2.25k | // is the same as: |
4854 | 2.25k | // dX = REG_SEQUENCE rY, ssub_0, rZ, ssub_1 |
4855 | 2.25k | // Populate the InputRegs accordingly. |
4856 | 2.25k | // rY |
4857 | 2.25k | const MachineOperand *MOReg = &MI.getOperand(1); |
4858 | 2.25k | InputRegs.push_back( |
4859 | 2.25k | RegSubRegPairAndIdx(MOReg->getReg(), MOReg->getSubReg(), ARM::ssub_0)); |
4860 | 2.25k | // rZ |
4861 | 2.25k | MOReg = &MI.getOperand(2); |
4862 | 2.25k | InputRegs.push_back( |
4863 | 2.25k | RegSubRegPairAndIdx(MOReg->getReg(), MOReg->getSubReg(), ARM::ssub_1)); |
4864 | 2.25k | return true; |
4865 | 0 | } |
4866 | 0 | llvm_unreachable0 ("Target dependent opcode missing"); |
4867 | 0 | } |
4868 | | |
4869 | | bool ARMBaseInstrInfo::getExtractSubregLikeInputs( |
4870 | | const MachineInstr &MI, unsigned DefIdx, |
4871 | 3.59k | RegSubRegPairAndIdx &InputReg) const { |
4872 | 3.59k | assert(DefIdx < MI.getDesc().getNumDefs() && "Invalid definition index"); |
4873 | 3.59k | assert(MI.isExtractSubregLike() && "Invalid kind of instruction"); |
4874 | 3.59k | |
4875 | 3.59k | switch (MI.getOpcode()) { |
4876 | 3.59k | case ARM::VMOVRRD: |
4877 | 3.59k | // rX, rY = VMOVRRD dZ |
4878 | 3.59k | // is the same as: |
4879 | 3.59k | // rX = EXTRACT_SUBREG dZ, ssub_0 |
4880 | 3.59k | // rY = EXTRACT_SUBREG dZ, ssub_1 |
4881 | 3.59k | const MachineOperand &MOReg = MI.getOperand(2); |
4882 | 3.59k | InputReg.Reg = MOReg.getReg(); |
4883 | 3.59k | InputReg.SubReg = MOReg.getSubReg(); |
4884 | 3.59k | InputReg.SubIdx = DefIdx == 0 ? ARM::ssub_03.36k : ARM::ssub_1229 ; |
4885 | 3.59k | return true; |
4886 | 0 | } |
4887 | 0 | llvm_unreachable0 ("Target dependent opcode missing"); |
4888 | 0 | } |
4889 | | |
4890 | | bool ARMBaseInstrInfo::getInsertSubregLikeInputs( |
4891 | | const MachineInstr &MI, unsigned DefIdx, RegSubRegPair &BaseReg, |
4892 | 207 | RegSubRegPairAndIdx &InsertedReg) const { |
4893 | 207 | assert(DefIdx < MI.getDesc().getNumDefs() && "Invalid definition index"); |
4894 | 207 | assert(MI.isInsertSubregLike() && "Invalid kind of instruction"); |
4895 | 207 | |
4896 | 207 | switch (MI.getOpcode()) { |
4897 | 207 | case ARM::VSETLNi32: |
4898 | 207 | // dX = VSETLNi32 dY, rZ, imm |
4899 | 207 | const MachineOperand &MOBaseReg = MI.getOperand(1); |
4900 | 207 | const MachineOperand &MOInsertedReg = MI.getOperand(2); |
4901 | 207 | const MachineOperand &MOIndex = MI.getOperand(3); |
4902 | 207 | BaseReg.Reg = MOBaseReg.getReg(); |
4903 | 207 | BaseReg.SubReg = MOBaseReg.getSubReg(); |
4904 | 207 | |
4905 | 207 | InsertedReg.Reg = MOInsertedReg.getReg(); |
4906 | 207 | InsertedReg.SubReg = MOInsertedReg.getSubReg(); |
4907 | 207 | InsertedReg.SubIdx = MOIndex.getImm() == 0 ? ARM::ssub_0107 : ARM::ssub_1100 ; |
4908 | 207 | return true; |
4909 | 0 | } |
4910 | 0 | llvm_unreachable0 ("Target dependent opcode missing"); |
4911 | 0 | } |