/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===- HexagonHardwareLoops.cpp - Identify and generate hardware loops ----===// |
2 | | // |
3 | | // The LLVM Compiler Infrastructure |
4 | | // |
5 | | // This file is distributed under the University of Illinois Open Source |
6 | | // License. See LICENSE.TXT for details. |
7 | | // |
8 | | //===----------------------------------------------------------------------===// |
9 | | // |
10 | | // This pass identifies loops where we can generate the Hexagon hardware |
11 | | // loop instruction. The hardware loop can perform loop branches with a |
12 | | // zero-cycle overhead. |
13 | | // |
14 | | // The pattern that defines the induction variable can changed depending on |
15 | | // prior optimizations. For example, the IndVarSimplify phase run by 'opt' |
16 | | // normalizes induction variables, and the Loop Strength Reduction pass |
17 | | // run by 'llc' may also make changes to the induction variable. |
18 | | // The pattern detected by this phase is due to running Strength Reduction. |
19 | | // |
20 | | // Criteria for hardware loops: |
21 | | // - Countable loops (w/ ind. var for a trip count) |
22 | | // - Assumes loops are normalized by IndVarSimplify |
23 | | // - Try inner-most loops first |
24 | | // - No function calls in loops. |
25 | | // |
26 | | //===----------------------------------------------------------------------===// |
27 | | |
28 | | #include "HexagonInstrInfo.h" |
29 | | #include "HexagonSubtarget.h" |
30 | | #include "llvm/ADT/ArrayRef.h" |
31 | | #include "llvm/ADT/STLExtras.h" |
32 | | #include "llvm/ADT/SmallSet.h" |
33 | | #include "llvm/ADT/SmallVector.h" |
34 | | #include "llvm/ADT/Statistic.h" |
35 | | #include "llvm/ADT/StringRef.h" |
36 | | #include "llvm/CodeGen/MachineBasicBlock.h" |
37 | | #include "llvm/CodeGen/MachineDominators.h" |
38 | | #include "llvm/CodeGen/MachineFunction.h" |
39 | | #include "llvm/CodeGen/MachineFunctionPass.h" |
40 | | #include "llvm/CodeGen/MachineInstr.h" |
41 | | #include "llvm/CodeGen/MachineInstrBuilder.h" |
42 | | #include "llvm/CodeGen/MachineLoopInfo.h" |
43 | | #include "llvm/CodeGen/MachineOperand.h" |
44 | | #include "llvm/CodeGen/MachineRegisterInfo.h" |
45 | | #include "llvm/IR/Constants.h" |
46 | | #include "llvm/IR/DebugLoc.h" |
47 | | #include "llvm/Pass.h" |
48 | | #include "llvm/Support/CommandLine.h" |
49 | | #include "llvm/Support/Debug.h" |
50 | | #include "llvm/Support/ErrorHandling.h" |
51 | | #include "llvm/Support/MathExtras.h" |
52 | | #include "llvm/Support/raw_ostream.h" |
53 | | #include "llvm/Target/TargetRegisterInfo.h" |
54 | | #include <cassert> |
55 | | #include <cstdint> |
56 | | #include <cstdlib> |
57 | | #include <iterator> |
58 | | #include <map> |
59 | | #include <set> |
60 | | #include <string> |
61 | | #include <utility> |
62 | | #include <vector> |
63 | | |
64 | | using namespace llvm; |
65 | | |
66 | | #define DEBUG_TYPE "hwloops" |
67 | | |
68 | | #ifndef NDEBUG |
69 | | static cl::opt<int> HWLoopLimit("hexagon-max-hwloop", cl::Hidden, cl::init(-1)); |
70 | | |
71 | | // Option to create preheader only for a specific function. |
72 | | static cl::opt<std::string> PHFn("hexagon-hwloop-phfn", cl::Hidden, |
73 | | cl::init("")); |
74 | | #endif |
75 | | |
76 | | // Option to create a preheader if one doesn't exist. |
77 | | static cl::opt<bool> HWCreatePreheader("hexagon-hwloop-preheader", |
78 | | cl::Hidden, cl::init(true), |
79 | | cl::desc("Add a preheader to a hardware loop if one doesn't exist")); |
80 | | |
81 | | // Turn it off by default. If a preheader block is not created here, the |
82 | | // software pipeliner may be unable to find a block suitable to serve as |
83 | | // a preheader. In that case SWP will not run. |
84 | | static cl::opt<bool> SpecPreheader("hwloop-spec-preheader", cl::init(false), |
85 | | cl::Hidden, cl::ZeroOrMore, cl::desc("Allow speculation of preheader " |
86 | | "instructions")); |
87 | | |
88 | | STATISTIC(NumHWLoops, "Number of loops converted to hardware loops"); |
89 | | |
90 | | namespace llvm { |
91 | | |
92 | | FunctionPass *createHexagonHardwareLoops(); |
93 | | void initializeHexagonHardwareLoopsPass(PassRegistry&); |
94 | | |
95 | | } // end namespace llvm |
96 | | |
97 | | namespace { |
98 | | |
99 | | class CountValue; |
100 | | |
101 | | struct HexagonHardwareLoops : public MachineFunctionPass { |
102 | | MachineLoopInfo *MLI; |
103 | | MachineRegisterInfo *MRI; |
104 | | MachineDominatorTree *MDT; |
105 | | const HexagonInstrInfo *TII; |
106 | | const HexagonRegisterInfo *TRI; |
107 | | #ifndef NDEBUG |
108 | | static int Counter; |
109 | | #endif |
110 | | |
111 | | public: |
112 | | static char ID; |
113 | | |
114 | 405 | HexagonHardwareLoops() : MachineFunctionPass(ID) { |
115 | 405 | initializeHexagonHardwareLoopsPass(*PassRegistry::getPassRegistry()); |
116 | 405 | } |
117 | | |
118 | | bool runOnMachineFunction(MachineFunction &MF) override; |
119 | | |
120 | 401 | StringRef getPassName() const override { return "Hexagon Hardware Loops"; } |
121 | | |
122 | 401 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
123 | 401 | AU.addRequired<MachineDominatorTree>(); |
124 | 401 | AU.addRequired<MachineLoopInfo>(); |
125 | 401 | MachineFunctionPass::getAnalysisUsage(AU); |
126 | 401 | } |
127 | | |
128 | | private: |
129 | | using LoopFeederMap = std::map<unsigned, MachineInstr *>; |
130 | | |
131 | | /// Kinds of comparisons in the compare instructions. |
132 | | struct Comparison { |
133 | | enum Kind { |
134 | | EQ = 0x01, |
135 | | NE = 0x02, |
136 | | L = 0x04, |
137 | | G = 0x08, |
138 | | U = 0x40, |
139 | | LTs = L, |
140 | | LEs = L | EQ, |
141 | | GTs = G, |
142 | | GEs = G | EQ, |
143 | | LTu = L | U, |
144 | | LEu = L | EQ | U, |
145 | | GTu = G | U, |
146 | | GEu = G | EQ | U |
147 | | }; |
148 | | |
149 | 25 | static Kind getSwappedComparison(Kind Cmp) { |
150 | 25 | assert ((!((Cmp & L) && (Cmp & G))) && "Malformed comparison operator"); |
151 | 25 | if ((Cmp & L) || 25 (Cmp & G)24 ) |
152 | 23 | return (Kind)(Cmp ^ (L|G)); |
153 | 2 | return Cmp; |
154 | 2 | } |
155 | | |
156 | 122 | static Kind getNegatedComparison(Kind Cmp) { |
157 | 122 | if ((Cmp & L) || 122 (Cmp & G)122 ) |
158 | 40 | return (Kind)((Cmp ^ (L | G)) ^ EQ); |
159 | 82 | if (82 (Cmp & NE) || 82 (Cmp & EQ)82 ) |
160 | 82 | return (Kind)(Cmp ^ (EQ | NE)); |
161 | 0 | return (Kind)0; |
162 | 0 | } |
163 | | |
164 | 32 | static bool isSigned(Kind Cmp) { |
165 | 29 | return (Cmp & (L | G) && !(Cmp & U)); |
166 | 32 | } |
167 | | |
168 | 3 | static bool isUnsigned(Kind Cmp) { |
169 | 3 | return (Cmp & U); |
170 | 3 | } |
171 | | }; |
172 | | |
173 | | /// \brief Find the register that contains the loop controlling |
174 | | /// induction variable. |
175 | | /// If successful, it will return true and set the \p Reg, \p IVBump |
176 | | /// and \p IVOp arguments. Otherwise it will return false. |
177 | | /// The returned induction register is the register R that follows the |
178 | | /// following induction pattern: |
179 | | /// loop: |
180 | | /// R = phi ..., [ R.next, LatchBlock ] |
181 | | /// R.next = R + #bump |
182 | | /// if (R.next < #N) goto loop |
183 | | /// IVBump is the immediate value added to R, and IVOp is the instruction |
184 | | /// "R.next = R + #bump". |
185 | | bool findInductionRegister(MachineLoop *L, unsigned &Reg, |
186 | | int64_t &IVBump, MachineInstr *&IVOp) const; |
187 | | |
188 | | /// \brief Return the comparison kind for the specified opcode. |
189 | | Comparison::Kind getComparisonKind(unsigned CondOpc, |
190 | | MachineOperand *InitialValue, |
191 | | const MachineOperand *Endvalue, |
192 | | int64_t IVBump) const; |
193 | | |
194 | | /// \brief Analyze the statements in a loop to determine if the loop |
195 | | /// has a computable trip count and, if so, return a value that represents |
196 | | /// the trip count expression. |
197 | | CountValue *getLoopTripCount(MachineLoop *L, |
198 | | SmallVectorImpl<MachineInstr *> &OldInsts); |
199 | | |
200 | | /// \brief Return the expression that represents the number of times |
201 | | /// a loop iterates. The function takes the operands that represent the |
202 | | /// loop start value, loop end value, and induction value. Based upon |
203 | | /// these operands, the function attempts to compute the trip count. |
204 | | /// If the trip count is not directly available (as an immediate value, |
205 | | /// or a register), the function will attempt to insert computation of it |
206 | | /// to the loop's preheader. |
207 | | CountValue *computeCount(MachineLoop *Loop, const MachineOperand *Start, |
208 | | const MachineOperand *End, unsigned IVReg, |
209 | | int64_t IVBump, Comparison::Kind Cmp) const; |
210 | | |
211 | | /// \brief Return true if the instruction is not valid within a hardware |
212 | | /// loop. |
213 | | bool isInvalidLoopOperation(const MachineInstr *MI, |
214 | | bool IsInnerHWLoop) const; |
215 | | |
216 | | /// \brief Return true if the loop contains an instruction that inhibits |
217 | | /// using the hardware loop. |
218 | | bool containsInvalidInstruction(MachineLoop *L, bool IsInnerHWLoop) const; |
219 | | |
220 | | /// \brief Given a loop, check if we can convert it to a hardware loop. |
221 | | /// If so, then perform the conversion and return true. |
222 | | bool convertToHardwareLoop(MachineLoop *L, bool &L0used, bool &L1used); |
223 | | |
224 | | /// \brief Return true if the instruction is now dead. |
225 | | bool isDead(const MachineInstr *MI, |
226 | | SmallVectorImpl<MachineInstr *> &DeadPhis) const; |
227 | | |
228 | | /// \brief Remove the instruction if it is now dead. |
229 | | void removeIfDead(MachineInstr *MI); |
230 | | |
231 | | /// \brief Make sure that the "bump" instruction executes before the |
232 | | /// compare. We need that for the IV fixup, so that the compare |
233 | | /// instruction would not use a bumped value that has not yet been |
234 | | /// defined. If the instructions are out of order, try to reorder them. |
235 | | bool orderBumpCompare(MachineInstr *BumpI, MachineInstr *CmpI); |
236 | | |
237 | | /// \brief Return true if MO and MI pair is visited only once. If visited |
238 | | /// more than once, this indicates there is recursion. In such a case, |
239 | | /// return false. |
240 | | bool isLoopFeeder(MachineLoop *L, MachineBasicBlock *A, MachineInstr *MI, |
241 | | const MachineOperand *MO, |
242 | | LoopFeederMap &LoopFeederPhi) const; |
243 | | |
244 | | /// \brief Return true if the Phi may generate a value that may underflow, |
245 | | /// or may wrap. |
246 | | bool phiMayWrapOrUnderflow(MachineInstr *Phi, const MachineOperand *EndVal, |
247 | | MachineBasicBlock *MBB, MachineLoop *L, |
248 | | LoopFeederMap &LoopFeederPhi) const; |
249 | | |
250 | | /// \brief Return true if the induction variable may underflow an unsigned |
251 | | /// value in the first iteration. |
252 | | bool loopCountMayWrapOrUnderFlow(const MachineOperand *InitVal, |
253 | | const MachineOperand *EndVal, |
254 | | MachineBasicBlock *MBB, MachineLoop *L, |
255 | | LoopFeederMap &LoopFeederPhi) const; |
256 | | |
257 | | /// \brief Check if the given operand has a compile-time known constant |
258 | | /// value. Return true if yes, and false otherwise. When returning true, set |
259 | | /// Val to the corresponding constant value. |
260 | | bool checkForImmediate(const MachineOperand &MO, int64_t &Val) const; |
261 | | |
262 | | /// \brief Check if the operand has a compile-time known constant value. |
263 | 198 | bool isImmediate(const MachineOperand &MO) const { |
264 | 198 | int64_t V; |
265 | 198 | return checkForImmediate(MO, V); |
266 | 198 | } |
267 | | |
268 | | /// \brief Return the immediate for the specified operand. |
269 | 2 | int64_t getImmediate(const MachineOperand &MO) const { |
270 | 2 | int64_t V; |
271 | 2 | if (!checkForImmediate(MO, V)) |
272 | 0 | llvm_unreachable("Invalid operand"); |
273 | 2 | return V; |
274 | 2 | } |
275 | | |
276 | | /// \brief Reset the given machine operand to now refer to a new immediate |
277 | | /// value. Assumes that the operand was already referencing an immediate |
278 | | /// value, either directly, or via a register. |
279 | | void setImmediate(MachineOperand &MO, int64_t Val); |
280 | | |
281 | | /// \brief Fix the data flow of the induction variable. |
282 | | /// The desired flow is: phi ---> bump -+-> comparison-in-latch. |
283 | | /// | |
284 | | /// +-> back to phi |
285 | | /// where "bump" is the increment of the induction variable: |
286 | | /// iv = iv + #const. |
287 | | /// Due to some prior code transformations, the actual flow may look |
288 | | /// like this: |
289 | | /// phi -+-> bump ---> back to phi |
290 | | /// | |
291 | | /// +-> comparison-in-latch (against upper_bound-bump), |
292 | | /// i.e. the comparison that controls the loop execution may be using |
293 | | /// the value of the induction variable from before the increment. |
294 | | /// |
295 | | /// Return true if the loop's flow is the desired one (i.e. it's |
296 | | /// either been fixed, or no fixing was necessary). |
297 | | /// Otherwise, return false. This can happen if the induction variable |
298 | | /// couldn't be identified, or if the value in the latch's comparison |
299 | | /// cannot be adjusted to reflect the post-bump value. |
300 | | bool fixupInductionVariable(MachineLoop *L); |
301 | | |
302 | | /// \brief Given a loop, if it does not have a preheader, create one. |
303 | | /// Return the block that is the preheader. |
304 | | MachineBasicBlock *createPreheaderForLoop(MachineLoop *L); |
305 | | }; |
306 | | |
307 | | char HexagonHardwareLoops::ID = 0; |
308 | | #ifndef NDEBUG |
309 | | int HexagonHardwareLoops::Counter = 0; |
310 | | #endif |
311 | | |
312 | | /// \brief Abstraction for a trip count of a loop. A smaller version |
313 | | /// of the MachineOperand class without the concerns of changing the |
314 | | /// operand representation. |
315 | | class CountValue { |
316 | | public: |
317 | | enum CountValueType { |
318 | | CV_Register, |
319 | | CV_Immediate |
320 | | }; |
321 | | |
322 | | private: |
323 | | CountValueType Kind; |
324 | | union Values { |
325 | | struct { |
326 | | unsigned Reg; |
327 | | unsigned Sub; |
328 | | } R; |
329 | | unsigned ImmVal; |
330 | | } Contents; |
331 | | |
332 | | public: |
333 | 132 | explicit CountValue(CountValueType t, unsigned v, unsigned u = 0) { |
334 | 132 | Kind = t; |
335 | 132 | if (Kind == CV_Register132 ) { |
336 | 88 | Contents.R.Reg = v; |
337 | 88 | Contents.R.Sub = u; |
338 | 132 | } else { |
339 | 44 | Contents.ImmVal = v; |
340 | 44 | } |
341 | 132 | } |
342 | | |
343 | 264 | bool isReg() const { return Kind == CV_Register; } |
344 | 0 | bool isImm() const { return Kind == CV_Immediate; } |
345 | | |
346 | 176 | unsigned getReg() const { |
347 | 176 | assert(isReg() && "Wrong CountValue accessor"); |
348 | 176 | return Contents.R.Reg; |
349 | 176 | } |
350 | | |
351 | 88 | unsigned getSubReg() const { |
352 | 88 | assert(isReg() && "Wrong CountValue accessor"); |
353 | 88 | return Contents.R.Sub; |
354 | 88 | } |
355 | | |
356 | 44 | unsigned getImm() const { |
357 | 44 | assert(isImm() && "Wrong CountValue accessor"); |
358 | 44 | return Contents.ImmVal; |
359 | 44 | } |
360 | | |
361 | 0 | void print(raw_ostream &OS, const TargetRegisterInfo *TRI = nullptr) const { |
362 | 0 | if (isReg()) { OS << PrintReg(Contents.R.Reg, TRI, Contents.R.Sub); } |
363 | 0 | if (isImm()) { OS << Contents.ImmVal; } |
364 | 0 | } |
365 | | }; |
366 | | |
367 | | } // end anonymous namespace |
368 | | |
369 | 405 | INITIALIZE_PASS_BEGIN405 (HexagonHardwareLoops, "hwloops",
|
370 | 405 | "Hexagon Hardware Loops", false, false) |
371 | 405 | INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) |
372 | 405 | INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) |
373 | 405 | INITIALIZE_PASS_END(HexagonHardwareLoops, "hwloops", |
374 | | "Hexagon Hardware Loops", false, false) |
375 | | |
376 | 405 | FunctionPass *llvm::createHexagonHardwareLoops() { |
377 | 405 | return new HexagonHardwareLoops(); |
378 | 405 | } |
379 | | |
380 | 860 | bool HexagonHardwareLoops::runOnMachineFunction(MachineFunction &MF) { |
381 | 860 | DEBUG(dbgs() << "********* Hexagon Hardware Loops *********\n"); |
382 | 860 | if (skipFunction(*MF.getFunction())) |
383 | 0 | return false; |
384 | 860 | |
385 | 860 | bool Changed = false; |
386 | 860 | |
387 | 860 | MLI = &getAnalysis<MachineLoopInfo>(); |
388 | 860 | MRI = &MF.getRegInfo(); |
389 | 860 | MDT = &getAnalysis<MachineDominatorTree>(); |
390 | 860 | const HexagonSubtarget &HST = MF.getSubtarget<HexagonSubtarget>(); |
391 | 860 | TII = HST.getInstrInfo(); |
392 | 860 | TRI = HST.getRegisterInfo(); |
393 | 860 | |
394 | 860 | for (auto &L : *MLI) |
395 | 194 | if (194 !L->getParentLoop()194 ) { |
396 | 194 | bool L0Used = false; |
397 | 194 | bool L1Used = false; |
398 | 194 | Changed |= convertToHardwareLoop(L, L0Used, L1Used); |
399 | 194 | } |
400 | 860 | |
401 | 860 | return Changed; |
402 | 860 | } |
403 | | |
404 | | bool HexagonHardwareLoops::findInductionRegister(MachineLoop *L, |
405 | | unsigned &Reg, |
406 | | int64_t &IVBump, |
407 | | MachineInstr *&IVOp |
408 | 136 | ) const { |
409 | 136 | MachineBasicBlock *Header = L->getHeader(); |
410 | 136 | MachineBasicBlock *Preheader = MLI->findLoopPreheader(L, SpecPreheader); |
411 | 136 | MachineBasicBlock *Latch = L->getLoopLatch(); |
412 | 136 | MachineBasicBlock *ExitingBlock = L->findLoopControlBlock(); |
413 | 136 | if (!Header || 136 !Preheader136 || !Latch136 || !ExitingBlock136 ) |
414 | 0 | return false; |
415 | 136 | |
416 | 136 | // This pair represents an induction register together with an immediate |
417 | 136 | // value that will be added to it in each loop iteration. |
418 | 136 | using RegisterBump = std::pair<unsigned, int64_t>; |
419 | 136 | |
420 | 136 | // Mapping: R.next -> (R, bump), where R, R.next and bump are derived |
421 | 136 | // from an induction operation |
422 | 136 | // R.next = R + bump |
423 | 136 | // where bump is an immediate value. |
424 | 136 | using InductionMap = std::map<unsigned, RegisterBump>; |
425 | 136 | |
426 | 136 | InductionMap IndMap; |
427 | 136 | |
428 | 136 | using instr_iterator = MachineBasicBlock::instr_iterator; |
429 | 136 | |
430 | 136 | for (instr_iterator I = Header->instr_begin(), E = Header->instr_end(); |
431 | 436 | I != E && 436 I->isPHI()436 ; ++I300 ) { |
432 | 300 | MachineInstr *Phi = &*I; |
433 | 300 | |
434 | 300 | // Have a PHI instruction. Get the operand that corresponds to the |
435 | 300 | // latch block, and see if is a result of an addition of form "reg+imm", |
436 | 300 | // where the "reg" is defined by the PHI node we are looking at. |
437 | 900 | for (unsigned i = 1, n = Phi->getNumOperands(); i < n900 ; i += 2600 ) { |
438 | 600 | if (Phi->getOperand(i+1).getMBB() != Latch) |
439 | 300 | continue; |
440 | 300 | |
441 | 300 | unsigned PhiOpReg = Phi->getOperand(i).getReg(); |
442 | 300 | MachineInstr *DI = MRI->getVRegDef(PhiOpReg); |
443 | 300 | |
444 | 300 | if (DI->getDesc().isAdd()300 ) { |
445 | 168 | // If the register operand to the add is the PHI we're looking at, this |
446 | 168 | // meets the induction pattern. |
447 | 168 | unsigned IndReg = DI->getOperand(1).getReg(); |
448 | 168 | MachineOperand &Opnd2 = DI->getOperand(2); |
449 | 168 | int64_t V; |
450 | 168 | if (MRI->getVRegDef(IndReg) == Phi && 168 checkForImmediate(Opnd2, V)167 ) { |
451 | 167 | unsigned UpdReg = DI->getOperand(0).getReg(); |
452 | 167 | IndMap.insert(std::make_pair(UpdReg, std::make_pair(IndReg, V))); |
453 | 167 | } |
454 | 168 | } |
455 | 600 | } // for (i) |
456 | 300 | } // for (instr) |
457 | 136 | |
458 | 136 | SmallVector<MachineOperand,2> Cond; |
459 | 136 | MachineBasicBlock *TB = nullptr, *FB = nullptr; |
460 | 136 | bool NotAnalyzed = TII->analyzeBranch(*ExitingBlock, TB, FB, Cond, false); |
461 | 136 | if (NotAnalyzed) |
462 | 0 | return false; |
463 | 136 | |
464 | 136 | unsigned PredR, PredPos, PredRegFlags; |
465 | 136 | if (!TII->getPredReg(Cond, PredR, PredPos, PredRegFlags)) |
466 | 0 | return false; |
467 | 136 | |
468 | 136 | MachineInstr *PredI = MRI->getVRegDef(PredR); |
469 | 136 | if (!PredI->isCompare()) |
470 | 0 | return false; |
471 | 136 | |
472 | 136 | unsigned CmpReg1 = 0, CmpReg2 = 0; |
473 | 136 | int CmpImm = 0, CmpMask = 0; |
474 | 136 | bool CmpAnalyzed = |
475 | 136 | TII->analyzeCompare(*PredI, CmpReg1, CmpReg2, CmpMask, CmpImm); |
476 | 136 | // Fail if the compare was not analyzed, or it's not comparing a register |
477 | 136 | // with an immediate value. Not checking the mask here, since we handle |
478 | 136 | // the individual compare opcodes (including A4_cmpb*) later on. |
479 | 136 | if (!CmpAnalyzed) |
480 | 0 | return false; |
481 | 136 | |
482 | 136 | // Exactly one of the input registers to the comparison should be among |
483 | 136 | // the induction registers. |
484 | 136 | InductionMap::iterator IndMapEnd = IndMap.end(); |
485 | 136 | InductionMap::iterator F = IndMapEnd; |
486 | 136 | if (CmpReg1 != 0136 ) { |
487 | 136 | InductionMap::iterator F1 = IndMap.find(CmpReg1); |
488 | 136 | if (F1 != IndMapEnd) |
489 | 111 | F = F1; |
490 | 136 | } |
491 | 136 | if (CmpReg2 != 0136 ) { |
492 | 55 | InductionMap::iterator F2 = IndMap.find(CmpReg2); |
493 | 55 | if (F2 != IndMapEnd55 ) { |
494 | 25 | if (F != IndMapEnd) |
495 | 0 | return false; |
496 | 25 | F = F2; |
497 | 25 | } |
498 | 55 | } |
499 | 136 | if (136 F == IndMapEnd136 ) |
500 | 0 | return false; |
501 | 136 | |
502 | 136 | Reg = F->second.first; |
503 | 136 | IVBump = F->second.second; |
504 | 136 | IVOp = MRI->getVRegDef(F->first); |
505 | 136 | return true; |
506 | 136 | } |
507 | | |
508 | | // Return the comparison kind for the specified opcode. |
509 | | HexagonHardwareLoops::Comparison::Kind |
510 | | HexagonHardwareLoops::getComparisonKind(unsigned CondOpc, |
511 | | MachineOperand *InitialValue, |
512 | | const MachineOperand *EndValue, |
513 | 171 | int64_t IVBump) const { |
514 | 171 | Comparison::Kind Cmp = (Comparison::Kind)0; |
515 | 171 | switch (CondOpc) { |
516 | 84 | case Hexagon::C2_cmpeqi: |
517 | 84 | case Hexagon::C2_cmpeq: |
518 | 84 | case Hexagon::C2_cmpeqp: |
519 | 84 | Cmp = Comparison::EQ; |
520 | 84 | break; |
521 | 0 | case Hexagon::C4_cmpneq: |
522 | 0 | case Hexagon::C4_cmpneqi: |
523 | 0 | Cmp = Comparison::NE; |
524 | 0 | break; |
525 | 11 | case Hexagon::C4_cmplte: |
526 | 11 | Cmp = Comparison::LEs; |
527 | 11 | break; |
528 | 0 | case Hexagon::C4_cmplteu: |
529 | 0 | Cmp = Comparison::LEu; |
530 | 0 | break; |
531 | 2 | case Hexagon::C2_cmpgtui: |
532 | 2 | case Hexagon::C2_cmpgtu: |
533 | 2 | case Hexagon::C2_cmpgtup: |
534 | 2 | Cmp = Comparison::GTu; |
535 | 2 | break; |
536 | 74 | case Hexagon::C2_cmpgti: |
537 | 74 | case Hexagon::C2_cmpgt: |
538 | 74 | case Hexagon::C2_cmpgtp: |
539 | 74 | Cmp = Comparison::GTs; |
540 | 74 | break; |
541 | 0 | default: |
542 | 0 | return (Comparison::Kind)0; |
543 | 171 | } |
544 | 171 | return Cmp; |
545 | 171 | } |
546 | | |
547 | | /// \brief Analyze the statements in a loop to determine if the loop has |
548 | | /// a computable trip count and, if so, return a value that represents |
549 | | /// the trip count expression. |
550 | | /// |
551 | | /// This function iterates over the phi nodes in the loop to check for |
552 | | /// induction variable patterns that are used in the calculation for |
553 | | /// the number of time the loop is executed. |
554 | | CountValue *HexagonHardwareLoops::getLoopTripCount(MachineLoop *L, |
555 | 137 | SmallVectorImpl<MachineInstr *> &OldInsts) { |
556 | 137 | MachineBasicBlock *TopMBB = L->getTopBlock(); |
557 | 137 | MachineBasicBlock::pred_iterator PI = TopMBB->pred_begin(); |
558 | 137 | assert(PI != TopMBB->pred_end() && |
559 | 137 | "Loop must have more than one incoming edge!"); |
560 | 137 | MachineBasicBlock *Backedge = *PI++; |
561 | 137 | if (PI == TopMBB->pred_end()) // dead loop? |
562 | 1 | return nullptr; |
563 | 136 | MachineBasicBlock *Incoming = *PI++; |
564 | 136 | if (PI != TopMBB->pred_end()) // multiple backedges? |
565 | 0 | return nullptr; |
566 | 136 | |
567 | 136 | // Make sure there is one incoming and one backedge and determine which |
568 | 136 | // is which. |
569 | 136 | if (136 L->contains(Incoming)136 ) { |
570 | 134 | if (L->contains(Backedge)) |
571 | 0 | return nullptr; |
572 | 134 | std::swap(Incoming, Backedge); |
573 | 136 | } else if (2 !L->contains(Backedge)2 ) |
574 | 0 | return nullptr; |
575 | 136 | |
576 | 136 | // Look for the cmp instruction to determine if we can get a useful trip |
577 | 136 | // count. The trip count can be either a register or an immediate. The |
578 | 136 | // location of the value depends upon the type (reg or imm). |
579 | 136 | MachineBasicBlock *ExitingBlock = L->findLoopControlBlock(); |
580 | 136 | if (!ExitingBlock) |
581 | 0 | return nullptr; |
582 | 136 | |
583 | 136 | unsigned IVReg = 0; |
584 | 136 | int64_t IVBump = 0; |
585 | 136 | MachineInstr *IVOp; |
586 | 136 | bool FoundIV = findInductionRegister(L, IVReg, IVBump, IVOp); |
587 | 136 | if (!FoundIV) |
588 | 0 | return nullptr; |
589 | 136 | |
590 | 136 | MachineBasicBlock *Preheader = MLI->findLoopPreheader(L, SpecPreheader); |
591 | 136 | |
592 | 136 | MachineOperand *InitialValue = nullptr; |
593 | 136 | MachineInstr *IV_Phi = MRI->getVRegDef(IVReg); |
594 | 136 | MachineBasicBlock *Latch = L->getLoopLatch(); |
595 | 408 | for (unsigned i = 1, n = IV_Phi->getNumOperands(); i < n408 ; i += 2272 ) { |
596 | 272 | MachineBasicBlock *MBB = IV_Phi->getOperand(i+1).getMBB(); |
597 | 272 | if (MBB == Preheader) |
598 | 136 | InitialValue = &IV_Phi->getOperand(i); |
599 | 136 | else if (136 MBB == Latch136 ) |
600 | 136 | IVReg = IV_Phi->getOperand(i).getReg(); // Want IV reg after bump. |
601 | 272 | } |
602 | 136 | if (!InitialValue) |
603 | 0 | return nullptr; |
604 | 136 | |
605 | 136 | SmallVector<MachineOperand,2> Cond; |
606 | 136 | MachineBasicBlock *TB = nullptr, *FB = nullptr; |
607 | 136 | bool NotAnalyzed = TII->analyzeBranch(*ExitingBlock, TB, FB, Cond, false); |
608 | 136 | if (NotAnalyzed) |
609 | 0 | return nullptr; |
610 | 136 | |
611 | 136 | MachineBasicBlock *Header = L->getHeader(); |
612 | 136 | // TB must be non-null. If FB is also non-null, one of them must be |
613 | 136 | // the header. Otherwise, branch to TB could be exiting the loop, and |
614 | 136 | // the fall through can go to the header. |
615 | 136 | assert (TB && "Exit block without a branch?"); |
616 | 136 | if (ExitingBlock != Latch && 136 (TB == Latch || 0 FB == Latch0 )) { |
617 | 0 | MachineBasicBlock *LTB = nullptr, *LFB = nullptr; |
618 | 0 | SmallVector<MachineOperand,2> LCond; |
619 | 0 | bool NotAnalyzed = TII->analyzeBranch(*Latch, LTB, LFB, LCond, false); |
620 | 0 | if (NotAnalyzed) |
621 | 0 | return nullptr; |
622 | 0 | if (0 TB == Latch0 ) |
623 | 0 | TB = (LTB == Header) ? 0 LTB0 : LFB0 ; |
624 | 0 | else |
625 | 0 | FB = (LTB == Header) ? 0 LTB0 : LFB0 ; |
626 | 0 | } |
627 | 136 | assert ((!FB || TB == Header || FB == Header) && "Branches not to header?"); |
628 | 136 | if (!TB || 136 (FB && 136 TB != Header135 && FB != Header6 )) |
629 | 0 | return nullptr; |
630 | 136 | |
631 | 136 | // Branches of form "if (!P) ..." cause HexagonInstrInfo::AnalyzeBranch |
632 | 136 | // to put imm(0), followed by P in the vector Cond. |
633 | 136 | // If TB is not the header, it means that the "not-taken" path must lead |
634 | 136 | // to the header. |
635 | 136 | bool Negated = TII->predOpcodeHasNot(Cond) ^ (TB != Header); |
636 | 136 | unsigned PredReg, PredPos, PredRegFlags; |
637 | 136 | if (!TII->getPredReg(Cond, PredReg, PredPos, PredRegFlags)) |
638 | 0 | return nullptr; |
639 | 136 | MachineInstr *CondI = MRI->getVRegDef(PredReg); |
640 | 136 | unsigned CondOpc = CondI->getOpcode(); |
641 | 136 | |
642 | 136 | unsigned CmpReg1 = 0, CmpReg2 = 0; |
643 | 136 | int Mask = 0, ImmValue = 0; |
644 | 136 | bool AnalyzedCmp = |
645 | 136 | TII->analyzeCompare(*CondI, CmpReg1, CmpReg2, Mask, ImmValue); |
646 | 136 | if (!AnalyzedCmp) |
647 | 0 | return nullptr; |
648 | 136 | |
649 | 136 | // The comparison operator type determines how we compute the loop |
650 | 136 | // trip count. |
651 | 136 | OldInsts.push_back(CondI); |
652 | 136 | OldInsts.push_back(IVOp); |
653 | 136 | |
654 | 136 | // Sadly, the following code gets information based on the position |
655 | 136 | // of the operands in the compare instruction. This has to be done |
656 | 136 | // this way, because the comparisons check for a specific relationship |
657 | 136 | // between the operands (e.g. is-less-than), rather than to find out |
658 | 136 | // what relationship the operands are in (as on PPC). |
659 | 136 | Comparison::Kind Cmp; |
660 | 136 | bool isSwapped = false; |
661 | 136 | const MachineOperand &Op1 = CondI->getOperand(1); |
662 | 136 | const MachineOperand &Op2 = CondI->getOperand(2); |
663 | 136 | const MachineOperand *EndValue = nullptr; |
664 | 136 | |
665 | 136 | if (Op1.isReg()136 ) { |
666 | 136 | if (Op2.isImm() || 136 Op1.getReg() == IVReg55 ) |
667 | 111 | EndValue = &Op2; |
668 | 25 | else { |
669 | 25 | EndValue = &Op1; |
670 | 25 | isSwapped = true; |
671 | 25 | } |
672 | 136 | } |
673 | 136 | |
674 | 136 | if (!EndValue) |
675 | 0 | return nullptr; |
676 | 136 | |
677 | 136 | Cmp = getComparisonKind(CondOpc, InitialValue, EndValue, IVBump); |
678 | 136 | if (!Cmp) |
679 | 0 | return nullptr; |
680 | 136 | if (136 Negated136 ) |
681 | 102 | Cmp = Comparison::getNegatedComparison(Cmp); |
682 | 136 | if (isSwapped) |
683 | 25 | Cmp = Comparison::getSwappedComparison(Cmp); |
684 | 136 | |
685 | 136 | if (InitialValue->isReg()136 ) { |
686 | 136 | unsigned R = InitialValue->getReg(); |
687 | 136 | MachineBasicBlock *DefBB = MRI->getVRegDef(R)->getParent(); |
688 | 136 | if (!MDT->properlyDominates(DefBB, Header)) |
689 | 0 | return nullptr; |
690 | 136 | OldInsts.push_back(MRI->getVRegDef(R)); |
691 | 136 | } |
692 | 136 | if (136 EndValue->isReg()136 ) { |
693 | 55 | unsigned R = EndValue->getReg(); |
694 | 55 | MachineBasicBlock *DefBB = MRI->getVRegDef(R)->getParent(); |
695 | 55 | if (!MDT->properlyDominates(DefBB, Header)) |
696 | 1 | return nullptr; |
697 | 54 | OldInsts.push_back(MRI->getVRegDef(R)); |
698 | 54 | } |
699 | 136 | |
700 | 135 | return computeCount(L, InitialValue, EndValue, IVReg, IVBump, Cmp); |
701 | 137 | } |
702 | | |
703 | | /// \brief Helper function that returns the expression that represents the |
704 | | /// number of times a loop iterates. The function takes the operands that |
705 | | /// represent the loop start value, loop end value, and induction value. |
706 | | /// Based upon these operands, the function attempts to compute the trip count. |
707 | | CountValue *HexagonHardwareLoops::computeCount(MachineLoop *Loop, |
708 | | const MachineOperand *Start, |
709 | | const MachineOperand *End, |
710 | | unsigned IVReg, |
711 | | int64_t IVBump, |
712 | 135 | Comparison::Kind Cmp) const { |
713 | 135 | // Cannot handle comparison EQ, i.e. while (A == B). |
714 | 135 | if (Cmp == Comparison::EQ) |
715 | 0 | return nullptr; |
716 | 135 | |
717 | 135 | // Check if either the start or end values are an assignment of an immediate. |
718 | 135 | // If so, use the immediate value rather than the register. |
719 | 135 | if (135 Start->isReg()135 ) { |
720 | 135 | const MachineInstr *StartValInstr = MRI->getVRegDef(Start->getReg()); |
721 | 135 | if (StartValInstr && 135 (StartValInstr->getOpcode() == Hexagon::A2_tfrsi || |
722 | 67 | StartValInstr->getOpcode() == Hexagon::A2_tfrpi)) |
723 | 68 | Start = &StartValInstr->getOperand(1); |
724 | 135 | } |
725 | 135 | if (End->isReg()135 ) { |
726 | 54 | const MachineInstr *EndValInstr = MRI->getVRegDef(End->getReg()); |
727 | 54 | if (EndValInstr && 54 (EndValInstr->getOpcode() == Hexagon::A2_tfrsi || |
728 | 38 | EndValInstr->getOpcode() == Hexagon::A2_tfrpi)) |
729 | 16 | End = &EndValInstr->getOperand(1); |
730 | 54 | } |
731 | 135 | |
732 | 135 | if (!Start->isReg() && 135 !Start->isImm()68 ) |
733 | 0 | return nullptr; |
734 | 135 | if (135 !End->isReg() && 135 !End->isImm()97 ) |
735 | 0 | return nullptr; |
736 | 135 | |
737 | 135 | bool CmpLess = Cmp & Comparison::L; |
738 | 135 | bool CmpGreater = Cmp & Comparison::G; |
739 | 135 | bool CmpHasEqual = Cmp & Comparison::EQ; |
740 | 135 | |
741 | 135 | // Avoid certain wrap-arounds. This doesn't detect all wrap-arounds. |
742 | 135 | if (CmpLess && 135 IVBump < 054 ) |
743 | 135 | // Loop going while iv is "less" with the iv value going down. Must wrap. |
744 | 0 | return nullptr; |
745 | 135 | |
746 | 135 | if (135 CmpGreater && 135 IVBump > 02 ) |
747 | 135 | // Loop going while iv is "greater" with the iv value going up. Must wrap. |
748 | 0 | return nullptr; |
749 | 135 | |
750 | 135 | // Phis that may feed into the loop. |
751 | 135 | LoopFeederMap LoopFeederPhi; |
752 | 135 | |
753 | 135 | // Check if the initial value may be zero and can be decremented in the first |
754 | 135 | // iteration. If the value is zero, the endloop instruction will not decrement |
755 | 135 | // the loop counter, so we shouldn't generate a hardware loop in this case. |
756 | 135 | if (loopCountMayWrapOrUnderFlow(Start, End, Loop->getLoopPreheader(), Loop, |
757 | 135 | LoopFeederPhi)) |
758 | 3 | return nullptr; |
759 | 132 | |
760 | 132 | if (132 Start->isImm() && 132 End->isImm()68 ) { |
761 | 44 | // Both, start and end are immediates. |
762 | 44 | int64_t StartV = Start->getImm(); |
763 | 44 | int64_t EndV = End->getImm(); |
764 | 44 | int64_t Dist = EndV - StartV; |
765 | 44 | if (Dist == 0) |
766 | 0 | return nullptr; |
767 | 44 | |
768 | 44 | bool Exact = (Dist % IVBump) == 0; |
769 | 44 | |
770 | 44 | if (Cmp == Comparison::NE44 ) { |
771 | 30 | if (!Exact) |
772 | 0 | return nullptr; |
773 | 30 | if (30 (Dist < 0) ^ (IVBump < 0)30 ) |
774 | 0 | return nullptr; |
775 | 44 | } |
776 | 44 | |
777 | 44 | // For comparisons that include the final value (i.e. include equality |
778 | 44 | // with the final value), we need to increase the distance by 1. |
779 | 44 | if (44 CmpHasEqual44 ) |
780 | 13 | Dist = Dist > 0 ? 13 Dist+113 : Dist-10 ; |
781 | 44 | |
782 | 44 | // For the loop to iterate, CmpLess should imply Dist > 0. Similarly, |
783 | 44 | // CmpGreater should imply Dist < 0. These conditions could actually |
784 | 44 | // fail, for example, in unreachable code (which may still appear to be |
785 | 44 | // reachable in the CFG). |
786 | 44 | if ((CmpLess && 44 Dist < 014 ) || (CmpGreater && 44 Dist > 00 )) |
787 | 0 | return nullptr; |
788 | 44 | |
789 | 44 | // "Normalized" distance, i.e. with the bump set to +-1. |
790 | 44 | int64_t Dist1 = (IVBump > 0) ? 44 (Dist + (IVBump - 1)) / IVBump43 |
791 | 1 | : (-Dist + (-IVBump - 1)) / (-IVBump); |
792 | 44 | assert (Dist1 > 0 && "Fishy thing. Both operands have the same sign."); |
793 | 44 | |
794 | 44 | uint64_t Count = Dist1; |
795 | 44 | |
796 | 44 | if (Count > 0xFFFFFFFFULL) |
797 | 0 | return nullptr; |
798 | 44 | |
799 | 44 | return new CountValue(CountValue::CV_Immediate, Count); |
800 | 44 | } |
801 | 88 | |
802 | 88 | // A general case: Start and End are some values, but the actual |
803 | 88 | // iteration count may not be available. If it is not, insert |
804 | 88 | // a computation of it into the preheader. |
805 | 88 | |
806 | 88 | // If the induction variable bump is not a power of 2, quit. |
807 | 88 | // Othwerise we'd need a general integer division. |
808 | 88 | if (88 !isPowerOf2_64(std::abs(IVBump))88 ) |
809 | 0 | return nullptr; |
810 | 88 | |
811 | 88 | MachineBasicBlock *PH = MLI->findLoopPreheader(Loop, SpecPreheader); |
812 | 88 | assert (PH && "Should have a preheader by now"); |
813 | 88 | MachineBasicBlock::iterator InsertPos = PH->getFirstTerminator(); |
814 | 88 | DebugLoc DL; |
815 | 88 | if (InsertPos != PH->end()) |
816 | 8 | DL = InsertPos->getDebugLoc(); |
817 | 88 | |
818 | 88 | // If Start is an immediate and End is a register, the trip count |
819 | 88 | // will be "reg - imm". Hexagon's "subtract immediate" instruction |
820 | 88 | // is actually "reg + -imm". |
821 | 88 | |
822 | 88 | // If the loop IV is going downwards, i.e. if the bump is negative, |
823 | 88 | // then the iteration count (computed as End-Start) will need to be |
824 | 88 | // negated. To avoid the negation, just swap Start and End. |
825 | 88 | if (IVBump < 088 ) { |
826 | 36 | std::swap(Start, End); |
827 | 36 | IVBump = -IVBump; |
828 | 36 | } |
829 | 88 | // Cmp may now have a wrong direction, e.g. LEs may now be GEs. |
830 | 88 | // Signedness, and "including equality" are preserved. |
831 | 88 | |
832 | 29 | bool RegToImm = Start->isReg() && End->isImm(); // for (reg..imm) |
833 | 29 | bool RegToReg = Start->isReg() && End->isReg(); // for (reg..reg) |
834 | 88 | |
835 | 88 | int64_t StartV = 0, EndV = 0; |
836 | 88 | if (Start->isImm()) |
837 | 59 | StartV = Start->getImm(); |
838 | 88 | if (End->isImm()) |
839 | 15 | EndV = End->getImm(); |
840 | 88 | |
841 | 88 | int64_t AdjV = 0; |
842 | 88 | // To compute the iteration count, we would need this computation: |
843 | 88 | // Count = (End - Start + (IVBump-1)) / IVBump |
844 | 88 | // or, when CmpHasEqual: |
845 | 88 | // Count = (End - Start + (IVBump-1)+1) / IVBump |
846 | 88 | // The "IVBump-1" part is the adjustment (AdjV). We can avoid |
847 | 88 | // generating an instruction specifically to add it if we can adjust |
848 | 88 | // the immediate values for Start or End. |
849 | 88 | |
850 | 88 | if (CmpHasEqual88 ) { |
851 | 21 | // Need to add 1 to the total iteration count. |
852 | 21 | if (Start->isImm()) |
853 | 5 | StartV--; |
854 | 16 | else if (16 End->isImm()16 ) |
855 | 10 | EndV++; |
856 | 16 | else |
857 | 6 | AdjV += 1; |
858 | 21 | } |
859 | 88 | |
860 | 88 | if (Cmp != Comparison::NE88 ) { |
861 | 42 | if (Start->isImm()) |
862 | 19 | StartV -= (IVBump-1); |
863 | 23 | else if (23 End->isImm()23 ) |
864 | 10 | EndV += (IVBump-1); |
865 | 23 | else |
866 | 13 | AdjV += (IVBump-1); |
867 | 42 | } |
868 | 88 | |
869 | 88 | unsigned R = 0, SR = 0; |
870 | 88 | if (Start->isReg()88 ) { |
871 | 29 | R = Start->getReg(); |
872 | 29 | SR = Start->getSubReg(); |
873 | 88 | } else { |
874 | 59 | R = End->getReg(); |
875 | 59 | SR = End->getSubReg(); |
876 | 59 | } |
877 | 88 | const TargetRegisterClass *RC = MRI->getRegClass(R); |
878 | 88 | // Hardware loops cannot handle 64-bit registers. If it's a double |
879 | 88 | // register, it has to have a subregister. |
880 | 88 | if (!SR && 88 RC == &Hexagon::DoubleRegsRegClass88 ) |
881 | 0 | return nullptr; |
882 | 88 | const TargetRegisterClass *IntRC = &Hexagon::IntRegsRegClass; |
883 | 88 | |
884 | 88 | // Compute DistR (register with the distance between Start and End). |
885 | 88 | unsigned DistR, DistSR; |
886 | 88 | |
887 | 88 | // Avoid special case, where the start value is an imm(0). |
888 | 88 | if (Start->isImm() && 88 StartV == 059 ) { |
889 | 42 | DistR = End->getReg(); |
890 | 42 | DistSR = End->getSubReg(); |
891 | 88 | } else { |
892 | 14 | const MCInstrDesc &SubD = RegToReg ? TII->get(Hexagon::A2_sub) : |
893 | 32 | (RegToImm ? 32 TII->get(Hexagon::A2_subri)15 : |
894 | 32 | TII->get(Hexagon::A2_addi)); |
895 | 46 | if (RegToReg || 46 RegToImm32 ) { |
896 | 29 | unsigned SubR = MRI->createVirtualRegister(IntRC); |
897 | 29 | MachineInstrBuilder SubIB = |
898 | 29 | BuildMI(*PH, InsertPos, DL, SubD, SubR); |
899 | 29 | |
900 | 29 | if (RegToReg) |
901 | 14 | SubIB.addReg(End->getReg(), 0, End->getSubReg()) |
902 | 14 | .addReg(Start->getReg(), 0, Start->getSubReg()); |
903 | 29 | else |
904 | 15 | SubIB.addImm(EndV) |
905 | 15 | .addReg(Start->getReg(), 0, Start->getSubReg()); |
906 | 29 | DistR = SubR; |
907 | 46 | } else { |
908 | 17 | // If the loop has been unrolled, we should use the original loop count |
909 | 17 | // instead of recalculating the value. This will avoid additional |
910 | 17 | // 'Add' instruction. |
911 | 17 | const MachineInstr *EndValInstr = MRI->getVRegDef(End->getReg()); |
912 | 17 | if (EndValInstr->getOpcode() == Hexagon::A2_addi && |
913 | 17 | EndValInstr->getOperand(2).getImm() == StartV1 ) { |
914 | 1 | DistR = EndValInstr->getOperand(1).getReg(); |
915 | 17 | } else { |
916 | 16 | unsigned SubR = MRI->createVirtualRegister(IntRC); |
917 | 16 | MachineInstrBuilder SubIB = |
918 | 16 | BuildMI(*PH, InsertPos, DL, SubD, SubR); |
919 | 16 | SubIB.addReg(End->getReg(), 0, End->getSubReg()) |
920 | 16 | .addImm(-StartV); |
921 | 16 | DistR = SubR; |
922 | 16 | } |
923 | 17 | } |
924 | 46 | DistSR = 0; |
925 | 46 | } |
926 | 88 | |
927 | 88 | // From DistR, compute AdjR (register with the adjusted distance). |
928 | 88 | unsigned AdjR, AdjSR; |
929 | 88 | |
930 | 88 | if (AdjV == 088 ) { |
931 | 77 | AdjR = DistR; |
932 | 77 | AdjSR = DistSR; |
933 | 88 | } else { |
934 | 11 | // Generate CountR = ADD DistR, AdjVal |
935 | 11 | unsigned AddR = MRI->createVirtualRegister(IntRC); |
936 | 11 | MCInstrDesc const &AddD = TII->get(Hexagon::A2_addi); |
937 | 11 | BuildMI(*PH, InsertPos, DL, AddD, AddR) |
938 | 11 | .addReg(DistR, 0, DistSR) |
939 | 11 | .addImm(AdjV); |
940 | 11 | |
941 | 11 | AdjR = AddR; |
942 | 11 | AdjSR = 0; |
943 | 11 | } |
944 | 88 | |
945 | 88 | // From AdjR, compute CountR (register with the final count). |
946 | 88 | unsigned CountR, CountSR; |
947 | 88 | |
948 | 88 | if (IVBump == 188 ) { |
949 | 43 | CountR = AdjR; |
950 | 43 | CountSR = AdjSR; |
951 | 88 | } else { |
952 | 45 | // The IV bump is a power of two. Log_2(IV bump) is the shift amount. |
953 | 45 | unsigned Shift = Log2_32(IVBump); |
954 | 45 | |
955 | 45 | // Generate NormR = LSR DistR, Shift. |
956 | 45 | unsigned LsrR = MRI->createVirtualRegister(IntRC); |
957 | 45 | const MCInstrDesc &LsrD = TII->get(Hexagon::S2_lsr_i_r); |
958 | 45 | BuildMI(*PH, InsertPos, DL, LsrD, LsrR) |
959 | 45 | .addReg(AdjR, 0, AdjSR) |
960 | 45 | .addImm(Shift); |
961 | 45 | |
962 | 45 | CountR = LsrR; |
963 | 45 | CountSR = 0; |
964 | 45 | } |
965 | 135 | |
966 | 135 | return new CountValue(CountValue::CV_Register, CountR, CountSR); |
967 | 135 | } |
968 | | |
969 | | /// \brief Return true if the operation is invalid within hardware loop. |
970 | | bool HexagonHardwareLoops::isInvalidLoopOperation(const MachineInstr *MI, |
971 | 2.96k | bool IsInnerHWLoop) const { |
972 | 2.96k | // Call is not allowed because the callee may use a hardware loop except for |
973 | 2.96k | // the case when the call never returns. |
974 | 2.96k | if (MI->getDesc().isCall()) |
975 | 15 | return !TII->doesNotReturn(*MI); |
976 | 2.95k | |
977 | 2.95k | // Check if the instruction defines a hardware loop register. |
978 | 2.95k | using namespace Hexagon; |
979 | 2.95k | |
980 | 2.95k | static const unsigned Regs01[] = { LC0, SA0, LC1, SA1 }; |
981 | 2.95k | static const unsigned Regs1[] = { LC1, SA1 }; |
982 | 2.72k | auto CheckRegs = IsInnerHWLoop ? makeArrayRef(Regs01, array_lengthof(Regs01)) |
983 | 223 | : makeArrayRef(Regs1, array_lengthof(Regs1)); |
984 | 2.95k | for (unsigned R : CheckRegs) |
985 | 11.3k | if (11.3k MI->modifiesRegister(R, TRI)11.3k ) |
986 | 0 | return true; |
987 | 2.95k | |
988 | 2.95k | return false; |
989 | 2.95k | } |
990 | | |
991 | | /// \brief Return true if the loop contains an instruction that inhibits |
992 | | /// the use of the hardware loop instruction. |
993 | | bool HexagonHardwareLoops::containsInvalidInstruction(MachineLoop *L, |
994 | 208 | bool IsInnerHWLoop) const { |
995 | 208 | const std::vector<MachineBasicBlock *> &Blocks = L->getBlocks(); |
996 | 208 | DEBUG(dbgs() << "\nhw_loop head, BB#" << Blocks[0]->getNumber();); |
997 | 473 | for (unsigned i = 0, e = Blocks.size(); i != e473 ; ++i265 ) { |
998 | 278 | MachineBasicBlock *MBB = Blocks[i]; |
999 | 278 | for (MachineBasicBlock::iterator |
1000 | 3.23k | MII = MBB->begin(), E = MBB->end(); MII != E3.23k ; ++MII2.95k ) { |
1001 | 2.96k | const MachineInstr *MI = &*MII; |
1002 | 2.96k | if (isInvalidLoopOperation(MI, IsInnerHWLoop)2.96k ) { |
1003 | 13 | DEBUG(dbgs()<< "\nCannot convert to hw_loop due to:"; MI->dump();); |
1004 | 13 | return true; |
1005 | 13 | } |
1006 | 2.96k | } |
1007 | 278 | } |
1008 | 195 | return false; |
1009 | 208 | } |
1010 | | |
1011 | | /// \brief Returns true if the instruction is dead. This was essentially |
1012 | | /// copied from DeadMachineInstructionElim::isDead, but with special cases |
1013 | | /// for inline asm, physical registers and instructions with side effects |
1014 | | /// removed. |
1015 | | bool HexagonHardwareLoops::isDead(const MachineInstr *MI, |
1016 | 450 | SmallVectorImpl<MachineInstr *> &DeadPhis) const { |
1017 | 450 | // Examine each operand. |
1018 | 1.08k | for (unsigned i = 0, e = MI->getNumOperands(); i != e1.08k ; ++i634 ) { |
1019 | 862 | const MachineOperand &MO = MI->getOperand(i); |
1020 | 862 | if (!MO.isReg() || 862 !MO.isDef()694 ) |
1021 | 412 | continue; |
1022 | 450 | |
1023 | 450 | unsigned Reg = MO.getReg(); |
1024 | 450 | if (MRI->use_nodbg_empty(Reg)) |
1025 | 164 | continue; |
1026 | 286 | |
1027 | 286 | using use_nodbg_iterator = MachineRegisterInfo::use_nodbg_iterator; |
1028 | 286 | |
1029 | 286 | // This instruction has users, but if the only user is the phi node for the |
1030 | 286 | // parent block, and the only use of that phi node is this instruction, then |
1031 | 286 | // this instruction is dead: both it (and the phi node) can be removed. |
1032 | 286 | use_nodbg_iterator I = MRI->use_nodbg_begin(Reg); |
1033 | 286 | use_nodbg_iterator End = MRI->use_nodbg_end(); |
1034 | 286 | if (std::next(I) != End || 286 !I->getParent()->isPHI()188 ) |
1035 | 123 | return false; |
1036 | 163 | |
1037 | 163 | MachineInstr *OnePhi = I->getParent(); |
1038 | 453 | for (unsigned j = 0, f = OnePhi->getNumOperands(); j != f453 ; ++j290 ) { |
1039 | 395 | const MachineOperand &OPO = OnePhi->getOperand(j); |
1040 | 395 | if (!OPO.isReg() || 395 !OPO.isDef()279 ) |
1041 | 232 | continue; |
1042 | 163 | |
1043 | 163 | unsigned OPReg = OPO.getReg(); |
1044 | 163 | use_nodbg_iterator nextJ; |
1045 | 163 | for (use_nodbg_iterator J = MRI->use_nodbg_begin(OPReg); |
1046 | 266 | J != End266 ; J = nextJ103 ) { |
1047 | 208 | nextJ = std::next(J); |
1048 | 208 | MachineOperand &Use = *J; |
1049 | 208 | MachineInstr *UseMI = Use.getParent(); |
1050 | 208 | |
1051 | 208 | // If the phi node has a user that is not MI, bail. |
1052 | 208 | if (MI != UseMI) |
1053 | 105 | return false; |
1054 | 208 | } |
1055 | 395 | } |
1056 | 58 | DeadPhis.push_back(OnePhi); |
1057 | 58 | } |
1058 | 450 | |
1059 | 450 | // If there are no defs with uses, the instruction is dead. |
1060 | 222 | return true; |
1061 | 450 | } |
1062 | | |
1063 | 450 | void HexagonHardwareLoops::removeIfDead(MachineInstr *MI) { |
1064 | 450 | // This procedure was essentially copied from DeadMachineInstructionElim. |
1065 | 450 | |
1066 | 450 | SmallVector<MachineInstr*, 1> DeadPhis; |
1067 | 450 | if (isDead(MI, DeadPhis)450 ) { |
1068 | 222 | DEBUG(dbgs() << "HW looping will remove: " << *MI); |
1069 | 222 | |
1070 | 222 | // It is possible that some DBG_VALUE instructions refer to this |
1071 | 222 | // instruction. Examine each def operand for such references; |
1072 | 222 | // if found, mark the DBG_VALUE as undef (but don't delete it). |
1073 | 856 | for (unsigned i = 0, e = MI->getNumOperands(); i != e856 ; ++i634 ) { |
1074 | 634 | const MachineOperand &MO = MI->getOperand(i); |
1075 | 634 | if (!MO.isReg() || 634 !MO.isDef()466 ) |
1076 | 412 | continue; |
1077 | 222 | unsigned Reg = MO.getReg(); |
1078 | 222 | MachineRegisterInfo::use_iterator nextI; |
1079 | 222 | for (MachineRegisterInfo::use_iterator I = MRI->use_begin(Reg), |
1080 | 281 | E = MRI->use_end(); I != E281 ; I = nextI59 ) { |
1081 | 59 | nextI = std::next(I); // I is invalidated by the setReg |
1082 | 59 | MachineOperand &Use = *I; |
1083 | 59 | MachineInstr *UseMI = I->getParent(); |
1084 | 59 | if (UseMI == MI) |
1085 | 0 | continue; |
1086 | 59 | if (59 Use.isDebug()59 ) |
1087 | 1 | UseMI->getOperand(0).setReg(0U); |
1088 | 59 | } |
1089 | 634 | } |
1090 | 222 | |
1091 | 222 | MI->eraseFromParent(); |
1092 | 280 | for (unsigned i = 0; i < DeadPhis.size()280 ; ++i58 ) |
1093 | 58 | DeadPhis[i]->eraseFromParent(); |
1094 | 222 | } |
1095 | 450 | } |
1096 | | |
1097 | | /// \brief Check if the loop is a candidate for converting to a hardware |
1098 | | /// loop. If so, then perform the transformation. |
1099 | | /// |
1100 | | /// This function works on innermost loops first. A loop can be converted |
1101 | | /// if it is a counting loop; either a register value or an immediate. |
1102 | | /// |
1103 | | /// The code makes several assumptions about the representation of the loop |
1104 | | /// in llvm. |
1105 | | bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L, |
1106 | | bool &RecL0used, |
1107 | 210 | bool &RecL1used) { |
1108 | 210 | // This is just for sanity. |
1109 | 210 | assert(L->getHeader() && "Loop without a header?"); |
1110 | 210 | |
1111 | 210 | bool Changed = false; |
1112 | 210 | bool L0Used = false; |
1113 | 210 | bool L1Used = false; |
1114 | 210 | |
1115 | 210 | // Process nested loops first. |
1116 | 226 | for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E226 ; ++I16 ) { |
1117 | 16 | Changed |= convertToHardwareLoop(*I, RecL0used, RecL1used); |
1118 | 16 | L0Used |= RecL0used; |
1119 | 16 | L1Used |= RecL1used; |
1120 | 16 | } |
1121 | 210 | |
1122 | 210 | // If a nested loop has been converted, then we can't convert this loop. |
1123 | 210 | if (Changed && 210 L0Used9 && L1Used9 ) |
1124 | 2 | return Changed; |
1125 | 208 | |
1126 | 208 | unsigned LOOP_i; |
1127 | 208 | unsigned LOOP_r; |
1128 | 208 | unsigned ENDLOOP; |
1129 | 208 | |
1130 | 208 | // Flag used to track loopN instruction: |
1131 | 208 | // 1 - Hardware loop is being generated for the inner most loop. |
1132 | 208 | // 0 - Hardware loop is being generated for the outer loop. |
1133 | 208 | unsigned IsInnerHWLoop = 1; |
1134 | 208 | |
1135 | 208 | if (L0Used208 ) { |
1136 | 7 | LOOP_i = Hexagon::J2_loop1i; |
1137 | 7 | LOOP_r = Hexagon::J2_loop1r; |
1138 | 7 | ENDLOOP = Hexagon::ENDLOOP1; |
1139 | 7 | IsInnerHWLoop = 0; |
1140 | 208 | } else { |
1141 | 201 | LOOP_i = Hexagon::J2_loop0i; |
1142 | 201 | LOOP_r = Hexagon::J2_loop0r; |
1143 | 201 | ENDLOOP = Hexagon::ENDLOOP0; |
1144 | 201 | } |
1145 | 208 | |
1146 | | #ifndef NDEBUG |
1147 | | // Stop trying after reaching the limit (if any). |
1148 | | int Limit = HWLoopLimit; |
1149 | | if (Limit >= 0) { |
1150 | | if (Counter >= HWLoopLimit) |
1151 | | return false; |
1152 | | Counter++; |
1153 | | } |
1154 | | #endif |
1155 | | |
1156 | 208 | // Does the loop contain any invalid instructions? |
1157 | 208 | if (containsInvalidInstruction(L, IsInnerHWLoop)) |
1158 | 13 | return false; |
1159 | 195 | |
1160 | 195 | MachineBasicBlock *LastMBB = L->findLoopControlBlock(); |
1161 | 195 | // Don't generate hw loop if the loop has more than one exit. |
1162 | 195 | if (!LastMBB) |
1163 | 13 | return false; |
1164 | 182 | |
1165 | 182 | MachineBasicBlock::iterator LastI = LastMBB->getFirstTerminator(); |
1166 | 182 | if (LastI == LastMBB->end()) |
1167 | 0 | return false; |
1168 | 182 | |
1169 | 182 | // Is the induction variable bump feeding the latch condition? |
1170 | 182 | if (182 !fixupInductionVariable(L)182 ) |
1171 | 45 | return false; |
1172 | 137 | |
1173 | 137 | // Ensure the loop has a preheader: the loop instruction will be |
1174 | 137 | // placed there. |
1175 | 137 | MachineBasicBlock *Preheader = MLI->findLoopPreheader(L, SpecPreheader); |
1176 | 137 | if (!Preheader137 ) { |
1177 | 0 | Preheader = createPreheaderForLoop(L); |
1178 | 0 | if (!Preheader) |
1179 | 0 | return false; |
1180 | 137 | } |
1181 | 137 | |
1182 | 137 | MachineBasicBlock::iterator InsertPos = Preheader->getFirstTerminator(); |
1183 | 137 | |
1184 | 137 | SmallVector<MachineInstr*, 2> OldInsts; |
1185 | 137 | // Are we able to determine the trip count for the loop? |
1186 | 137 | CountValue *TripCount = getLoopTripCount(L, OldInsts); |
1187 | 137 | if (!TripCount) |
1188 | 5 | return false; |
1189 | 132 | |
1190 | 132 | // Is the trip count available in the preheader? |
1191 | 132 | if (132 TripCount->isReg()132 ) { |
1192 | 88 | // There will be a use of the register inserted into the preheader, |
1193 | 88 | // so make sure that the register is actually defined at that point. |
1194 | 88 | MachineInstr *TCDef = MRI->getVRegDef(TripCount->getReg()); |
1195 | 88 | MachineBasicBlock *BBDef = TCDef->getParent(); |
1196 | 88 | if (!MDT->dominates(BBDef, Preheader)) |
1197 | 0 | return false; |
1198 | 132 | } |
1199 | 132 | |
1200 | 132 | // Determine the loop start. |
1201 | 132 | MachineBasicBlock *TopBlock = L->getTopBlock(); |
1202 | 132 | MachineBasicBlock *ExitingBlock = L->findLoopControlBlock(); |
1203 | 132 | MachineBasicBlock *LoopStart = nullptr; |
1204 | 132 | if (ExitingBlock != L->getLoopLatch()132 ) { |
1205 | 0 | MachineBasicBlock *TB = nullptr, *FB = nullptr; |
1206 | 0 | SmallVector<MachineOperand, 2> Cond; |
1207 | 0 |
|
1208 | 0 | if (TII->analyzeBranch(*ExitingBlock, TB, FB, Cond, false)) |
1209 | 0 | return false; |
1210 | 0 |
|
1211 | 0 | if (0 L->contains(TB)0 ) |
1212 | 0 | LoopStart = TB; |
1213 | 0 | else if (0 L->contains(FB)0 ) |
1214 | 0 | LoopStart = FB; |
1215 | 0 | else |
1216 | 0 | return false; |
1217 | 132 | } |
1218 | 132 | else |
1219 | 132 | LoopStart = TopBlock; |
1220 | 132 | |
1221 | 132 | // Convert the loop to a hardware loop. |
1222 | 132 | DEBUG132 (dbgs() << "Change to hardware loop at "; L->dump()); |
1223 | 132 | DebugLoc DL; |
1224 | 132 | if (InsertPos != Preheader->end()) |
1225 | 21 | DL = InsertPos->getDebugLoc(); |
1226 | 132 | |
1227 | 132 | if (TripCount->isReg()132 ) { |
1228 | 88 | // Create a copy of the loop count register. |
1229 | 88 | unsigned CountReg = MRI->createVirtualRegister(&Hexagon::IntRegsRegClass); |
1230 | 88 | BuildMI(*Preheader, InsertPos, DL, TII->get(TargetOpcode::COPY), CountReg) |
1231 | 88 | .addReg(TripCount->getReg(), 0, TripCount->getSubReg()); |
1232 | 88 | // Add the Loop instruction to the beginning of the loop. |
1233 | 88 | BuildMI(*Preheader, InsertPos, DL, TII->get(LOOP_r)).addMBB(LoopStart) |
1234 | 88 | .addReg(CountReg); |
1235 | 132 | } else { |
1236 | 44 | assert(TripCount->isImm() && "Expecting immediate value for trip count"); |
1237 | 44 | // Add the Loop immediate instruction to the beginning of the loop, |
1238 | 44 | // if the immediate fits in the instructions. Otherwise, we need to |
1239 | 44 | // create a new virtual register. |
1240 | 44 | int64_t CountImm = TripCount->getImm(); |
1241 | 44 | if (!TII->isValidOffset(LOOP_i, CountImm, TRI)44 ) { |
1242 | 10 | unsigned CountReg = MRI->createVirtualRegister(&Hexagon::IntRegsRegClass); |
1243 | 10 | BuildMI(*Preheader, InsertPos, DL, TII->get(Hexagon::A2_tfrsi), CountReg) |
1244 | 10 | .addImm(CountImm); |
1245 | 10 | BuildMI(*Preheader, InsertPos, DL, TII->get(LOOP_r)) |
1246 | 10 | .addMBB(LoopStart).addReg(CountReg); |
1247 | 10 | } else |
1248 | 34 | BuildMI(*Preheader, InsertPos, DL, TII->get(LOOP_i)) |
1249 | 34 | .addMBB(LoopStart).addImm(CountImm); |
1250 | 44 | } |
1251 | 132 | |
1252 | 132 | // Make sure the loop start always has a reference in the CFG. We need |
1253 | 132 | // to create a BlockAddress operand to get this mechanism to work both the |
1254 | 132 | // MachineBasicBlock and BasicBlock objects need the flag set. |
1255 | 132 | LoopStart->setHasAddressTaken(); |
1256 | 132 | // This line is needed to set the hasAddressTaken flag on the BasicBlock |
1257 | 132 | // object. |
1258 | 132 | BlockAddress::get(const_cast<BasicBlock *>(LoopStart->getBasicBlock())); |
1259 | 132 | |
1260 | 132 | // Replace the loop branch with an endloop instruction. |
1261 | 132 | DebugLoc LastIDL = LastI->getDebugLoc(); |
1262 | 132 | BuildMI(*LastMBB, LastI, LastIDL, TII->get(ENDLOOP)).addMBB(LoopStart); |
1263 | 132 | |
1264 | 132 | // The loop ends with either: |
1265 | 132 | // - a conditional branch followed by an unconditional branch, or |
1266 | 132 | // - a conditional branch to the loop start. |
1267 | 132 | if (LastI->getOpcode() == Hexagon::J2_jumpt || |
1268 | 132 | LastI->getOpcode() == Hexagon::J2_jumpf93 ) { |
1269 | 132 | // Delete one and change/add an uncond. branch to out of the loop. |
1270 | 132 | MachineBasicBlock *BranchTarget = LastI->getOperand(1).getMBB(); |
1271 | 132 | LastI = LastMBB->erase(LastI); |
1272 | 132 | if (!L->contains(BranchTarget)132 ) { |
1273 | 6 | if (LastI != LastMBB->end()) |
1274 | 6 | LastI = LastMBB->erase(LastI); |
1275 | 6 | SmallVector<MachineOperand, 0> Cond; |
1276 | 6 | TII->insertBranch(*LastMBB, BranchTarget, nullptr, Cond, LastIDL); |
1277 | 6 | } |
1278 | 0 | } else { |
1279 | 0 | // Conditional branch to loop start; just delete it. |
1280 | 0 | LastMBB->erase(LastI); |
1281 | 0 | } |
1282 | 132 | delete TripCount; |
1283 | 132 | |
1284 | 132 | // The induction operation and the comparison may now be |
1285 | 132 | // unneeded. If these are unneeded, then remove them. |
1286 | 582 | for (unsigned i = 0; i < OldInsts.size()582 ; ++i450 ) |
1287 | 450 | removeIfDead(OldInsts[i]); |
1288 | 132 | |
1289 | 132 | ++NumHWLoops; |
1290 | 132 | |
1291 | 132 | // Set RecL1used and RecL0used only after hardware loop has been |
1292 | 132 | // successfully generated. Doing it earlier can cause wrong loop instruction |
1293 | 132 | // to be used. |
1294 | 132 | if (L0Used) // Loop0 was already used. So, the correct loop must be loop1. |
1295 | 4 | RecL1used = true; |
1296 | 132 | else |
1297 | 128 | RecL0used = true; |
1298 | 132 | |
1299 | 132 | return true; |
1300 | 210 | } |
1301 | | |
1302 | | bool HexagonHardwareLoops::orderBumpCompare(MachineInstr *BumpI, |
1303 | 3 | MachineInstr *CmpI) { |
1304 | 3 | assert (BumpI != CmpI && "Bump and compare in the same instruction?"); |
1305 | 3 | |
1306 | 3 | MachineBasicBlock *BB = BumpI->getParent(); |
1307 | 3 | if (CmpI->getParent() != BB) |
1308 | 2 | return false; |
1309 | 1 | |
1310 | 1 | using instr_iterator = MachineBasicBlock::instr_iterator; |
1311 | 1 | |
1312 | 1 | // Check if things are in order to begin with. |
1313 | 2 | for (instr_iterator I(BumpI), E = BB->instr_end(); I != E2 ; ++I1 ) |
1314 | 2 | if (2 &*I == CmpI2 ) |
1315 | 1 | return true; |
1316 | 1 | |
1317 | 1 | // Out of order. |
1318 | 0 | unsigned PredR = CmpI->getOperand(0).getReg(); |
1319 | 0 | bool FoundBump = false; |
1320 | 0 | instr_iterator CmpIt = CmpI->getIterator(), NextIt = std::next(CmpIt); |
1321 | 0 | for (instr_iterator I = NextIt, E = BB->instr_end(); I != E0 ; ++I0 ) { |
1322 | 0 | MachineInstr *In = &*I; |
1323 | 0 | for (unsigned i = 0, n = In->getNumOperands(); i < n0 ; ++i0 ) { |
1324 | 0 | MachineOperand &MO = In->getOperand(i); |
1325 | 0 | if (MO.isReg() && 0 MO.isUse()0 ) { |
1326 | 0 | if (MO.getReg() == PredR) // Found an intervening use of PredR. |
1327 | 0 | return false; |
1328 | 0 | } |
1329 | 0 | } |
1330 | 0 |
|
1331 | 0 | if (0 In == BumpI0 ) { |
1332 | 0 | BB->splice(++BumpI->getIterator(), BB, CmpI->getIterator()); |
1333 | 0 | FoundBump = true; |
1334 | 0 | break; |
1335 | 0 | } |
1336 | 0 | } |
1337 | 0 | assert (FoundBump && "Cannot determine instruction order"); |
1338 | 0 | return FoundBump; |
1339 | 3 | } |
1340 | | |
1341 | | /// This function is required to break recursion. Visiting phis in a loop may |
1342 | | /// result in recursion during compilation. We break the recursion by making |
1343 | | /// sure that we visit a MachineOperand and its definition in a |
1344 | | /// MachineInstruction only once. If we attempt to visit more than once, then |
1345 | | /// there is recursion, and will return false. |
1346 | | bool HexagonHardwareLoops::isLoopFeeder(MachineLoop *L, MachineBasicBlock *A, |
1347 | | MachineInstr *MI, |
1348 | | const MachineOperand *MO, |
1349 | 3 | LoopFeederMap &LoopFeederPhi) const { |
1350 | 3 | if (LoopFeederPhi.find(MO->getReg()) == LoopFeederPhi.end()3 ) { |
1351 | 3 | const std::vector<MachineBasicBlock *> &Blocks = L->getBlocks(); |
1352 | 3 | DEBUG(dbgs() << "\nhw_loop head, BB#" << Blocks[0]->getNumber();); |
1353 | 3 | // Ignore all BBs that form Loop. |
1354 | 8 | for (unsigned i = 0, e = Blocks.size(); i != e8 ; ++i5 ) { |
1355 | 5 | MachineBasicBlock *MBB = Blocks[i]; |
1356 | 5 | if (A == MBB) |
1357 | 0 | return false; |
1358 | 5 | } |
1359 | 3 | MachineInstr *Def = MRI->getVRegDef(MO->getReg()); |
1360 | 3 | LoopFeederPhi.insert(std::make_pair(MO->getReg(), Def)); |
1361 | 3 | return true; |
1362 | 3 | } else |
1363 | 3 | // Already visited node. |
1364 | 0 | return false; |
1365 | 0 | } |
1366 | | |
1367 | | /// Return true if a Phi may generate a value that can underflow. |
1368 | | /// This function calls loopCountMayWrapOrUnderFlow for each Phi operand. |
1369 | | bool HexagonHardwareLoops::phiMayWrapOrUnderflow( |
1370 | | MachineInstr *Phi, const MachineOperand *EndVal, MachineBasicBlock *MBB, |
1371 | 2 | MachineLoop *L, LoopFeederMap &LoopFeederPhi) const { |
1372 | 2 | assert(Phi->isPHI() && "Expecting a Phi."); |
1373 | 2 | // Walk through each Phi, and its used operands. Make sure that |
1374 | 2 | // if there is recursion in Phi, we won't generate hardware loops. |
1375 | 3 | for (int i = 1, n = Phi->getNumOperands(); i < n3 ; i += 21 ) |
1376 | 3 | if (3 isLoopFeeder(L, MBB, Phi, &(Phi->getOperand(i)), LoopFeederPhi)3 ) |
1377 | 3 | if (3 loopCountMayWrapOrUnderFlow(&(Phi->getOperand(i)), EndVal, |
1378 | 3 | Phi->getParent(), L, LoopFeederPhi)) |
1379 | 2 | return true; |
1380 | 0 | return false; |
1381 | 2 | } |
1382 | | |
1383 | | /// Return true if the induction variable can underflow in the first iteration. |
1384 | | /// An example, is an initial unsigned value that is 0 and is decrement in the |
1385 | | /// first itertion of a do-while loop. In this case, we cannot generate a |
1386 | | /// hardware loop because the endloop instruction does not decrement the loop |
1387 | | /// counter if it is <= 1. We only need to perform this analysis if the |
1388 | | /// initial value is a register. |
1389 | | /// |
1390 | | /// This function assumes the initial value may underfow unless proven |
1391 | | /// otherwise. If the type is signed, then we don't care because signed |
1392 | | /// underflow is undefined. We attempt to prove the initial value is not |
1393 | | /// zero by perfoming a crude analysis of the loop counter. This function |
1394 | | /// checks if the initial value is used in any comparison prior to the loop |
1395 | | /// and, if so, assumes the comparison is a range check. This is inexact, |
1396 | | /// but will catch the simple cases. |
1397 | | bool HexagonHardwareLoops::loopCountMayWrapOrUnderFlow( |
1398 | | const MachineOperand *InitVal, const MachineOperand *EndVal, |
1399 | | MachineBasicBlock *MBB, MachineLoop *L, |
1400 | 168 | LoopFeederMap &LoopFeederPhi) const { |
1401 | 168 | // Only check register values since they are unknown. |
1402 | 168 | if (!InitVal->isReg()) |
1403 | 68 | return false; |
1404 | 100 | |
1405 | 100 | if (100 !EndVal->isImm()100 ) |
1406 | 14 | return false; |
1407 | 86 | |
1408 | 86 | // A register value that is assigned an immediate is a known value, and it |
1409 | 86 | // won't underflow in the first iteration. |
1410 | 86 | int64_t Imm; |
1411 | 86 | if (checkForImmediate(*InitVal, Imm)) |
1412 | 3 | return (EndVal->getImm() == Imm); |
1413 | 83 | |
1414 | 83 | unsigned Reg = InitVal->getReg(); |
1415 | 83 | |
1416 | 83 | // We don't know the value of a physical register. |
1417 | 83 | if (!TargetRegisterInfo::isVirtualRegister(Reg)) |
1418 | 30 | return true; |
1419 | 53 | |
1420 | 53 | MachineInstr *Def = MRI->getVRegDef(Reg); |
1421 | 53 | if (!Def) |
1422 | 0 | return true; |
1423 | 53 | |
1424 | 53 | // If the initial value is a Phi or copy and the operands may not underflow, |
1425 | 53 | // then the definition cannot be underflow either. |
1426 | 53 | if (53 Def->isPHI() && 53 !phiMayWrapOrUnderflow(Def, EndVal, Def->getParent(), |
1427 | 2 | L, LoopFeederPhi)) |
1428 | 0 | return false; |
1429 | 53 | if (53 Def->isCopy() && 53 !loopCountMayWrapOrUnderFlow(&(Def->getOperand(1)), |
1430 | 30 | EndVal, Def->getParent(), |
1431 | 30 | L, LoopFeederPhi)) |
1432 | 0 | return false; |
1433 | 53 | |
1434 | 53 | // Iterate over the uses of the initial value. If the initial value is used |
1435 | 53 | // in a compare, then we assume this is a range check that ensures the loop |
1436 | 53 | // doesn't underflow. This is not an exact test and should be improved. |
1437 | 53 | for (MachineRegisterInfo::use_instr_nodbg_iterator I = MRI->use_instr_nodbg_begin(Reg), |
1438 | 78 | E = MRI->use_instr_nodbg_end(); I != E78 ; ++I25 ) { |
1439 | 57 | MachineInstr *MI = &*I; |
1440 | 57 | unsigned CmpReg1 = 0, CmpReg2 = 0; |
1441 | 57 | int CmpMask = 0, CmpValue = 0; |
1442 | 57 | |
1443 | 57 | if (!TII->analyzeCompare(*MI, CmpReg1, CmpReg2, CmpMask, CmpValue)) |
1444 | 25 | continue; |
1445 | 32 | |
1446 | 32 | MachineBasicBlock *TBB = nullptr, *FBB = nullptr; |
1447 | 32 | SmallVector<MachineOperand, 2> Cond; |
1448 | 32 | if (TII->analyzeBranch(*MI->getParent(), TBB, FBB, Cond, false)) |
1449 | 0 | continue; |
1450 | 32 | |
1451 | 32 | Comparison::Kind Cmp = |
1452 | 32 | getComparisonKind(MI->getOpcode(), nullptr, nullptr, 0); |
1453 | 32 | if (Cmp == 0) |
1454 | 0 | continue; |
1455 | 32 | if (32 TII->predOpcodeHasNot(Cond) ^ (TBB != MBB)32 ) |
1456 | 20 | Cmp = Comparison::getNegatedComparison(Cmp); |
1457 | 32 | if (CmpReg2 != 0 && 32 CmpReg2 == Reg15 ) |
1458 | 0 | Cmp = Comparison::getSwappedComparison(Cmp); |
1459 | 32 | |
1460 | 32 | // Signed underflow is undefined. |
1461 | 32 | if (Comparison::isSigned(Cmp)) |
1462 | 29 | return false; |
1463 | 3 | |
1464 | 3 | // Check if there is a comparison of the initial value. If the initial value |
1465 | 3 | // is greater than or not equal to another value, then assume this is a |
1466 | 3 | // range check. |
1467 | 3 | if (3 (Cmp & Comparison::G) || 3 Cmp == Comparison::NE3 ) |
1468 | 3 | return false; |
1469 | 57 | } |
1470 | 53 | |
1471 | 53 | // OK - this is a hack that needs to be improved. We really need to analyze |
1472 | 53 | // the instructions performed on the initial value. This works on the simplest |
1473 | 53 | // cases only. |
1474 | 21 | if (21 !Def->isCopy() && 21 !Def->isPHI()20 ) |
1475 | 18 | return false; |
1476 | 3 | |
1477 | 3 | return true; |
1478 | 3 | } |
1479 | | |
1480 | | bool HexagonHardwareLoops::checkForImmediate(const MachineOperand &MO, |
1481 | 713 | int64_t &Val) const { |
1482 | 713 | if (MO.isImm()713 ) { |
1483 | 367 | Val = MO.getImm(); |
1484 | 367 | return true; |
1485 | 367 | } |
1486 | 346 | if (346 !MO.isReg()346 ) |
1487 | 0 | return false; |
1488 | 346 | |
1489 | 346 | // MO is a register. Check whether it is defined as an immediate value, |
1490 | 346 | // and if so, get the value of it in TV. That value will then need to be |
1491 | 346 | // processed to handle potential subregisters in MO. |
1492 | 346 | int64_t TV; |
1493 | 346 | |
1494 | 346 | unsigned R = MO.getReg(); |
1495 | 346 | if (!TargetRegisterInfo::isVirtualRegister(R)) |
1496 | 92 | return false; |
1497 | 254 | MachineInstr *DI = MRI->getVRegDef(R); |
1498 | 254 | unsigned DOpc = DI->getOpcode(); |
1499 | 254 | switch (DOpc) { |
1500 | 81 | case TargetOpcode::COPY: |
1501 | 81 | case Hexagon::A2_tfrsi: |
1502 | 81 | case Hexagon::A2_tfrpi: |
1503 | 81 | case Hexagon::CONST32: |
1504 | 81 | case Hexagon::CONST64: |
1505 | 81 | // Call recursively to avoid an extra check whether operand(1) is |
1506 | 81 | // indeed an immediate (it could be a global address, for example), |
1507 | 81 | // plus we can handle COPY at the same time. |
1508 | 81 | if (!checkForImmediate(DI->getOperand(1), TV)) |
1509 | 62 | return false; |
1510 | 19 | break; |
1511 | 0 | case Hexagon::A2_combineii: |
1512 | 0 | case Hexagon::A4_combineir: |
1513 | 0 | case Hexagon::A4_combineii: |
1514 | 0 | case Hexagon::A4_combineri: |
1515 | 0 | case Hexagon::A2_combinew: { |
1516 | 0 | const MachineOperand &S1 = DI->getOperand(1); |
1517 | 0 | const MachineOperand &S2 = DI->getOperand(2); |
1518 | 0 | int64_t V1, V2; |
1519 | 0 | if (!checkForImmediate(S1, V1) || 0 !checkForImmediate(S2, V2)0 ) |
1520 | 0 | return false; |
1521 | 0 | TV = V2 | (static_cast<uint64_t>(V1) << 32); |
1522 | 0 | break; |
1523 | 0 | } |
1524 | 0 | case TargetOpcode::REG_SEQUENCE: { |
1525 | 0 | const MachineOperand &S1 = DI->getOperand(1); |
1526 | 0 | const MachineOperand &S3 = DI->getOperand(3); |
1527 | 0 | int64_t V1, V3; |
1528 | 0 | if (!checkForImmediate(S1, V1) || 0 !checkForImmediate(S3, V3)0 ) |
1529 | 0 | return false; |
1530 | 0 | unsigned Sub2 = DI->getOperand(2).getImm(); |
1531 | 0 | unsigned Sub4 = DI->getOperand(4).getImm(); |
1532 | 0 | if (Sub2 == Hexagon::isub_lo && 0 Sub4 == Hexagon::isub_hi0 ) |
1533 | 0 | TV = V1 | (V3 << 32); |
1534 | 0 | else if (0 Sub2 == Hexagon::isub_hi && 0 Sub4 == Hexagon::isub_lo0 ) |
1535 | 0 | TV = V3 | (V1 << 32); |
1536 | 0 | else |
1537 | 0 | llvm_unreachable("Unexpected form of REG_SEQUENCE"); |
1538 | 0 | break; |
1539 | 0 | } |
1540 | 0 |
|
1541 | 173 | default: |
1542 | 173 | return false; |
1543 | 19 | } |
1544 | 19 | |
1545 | 19 | // By now, we should have successfully obtained the immediate value defining |
1546 | 19 | // the register referenced in MO. Handle a potential use of a subregister. |
1547 | 19 | switch (MO.getSubReg()) { |
1548 | 0 | case Hexagon::isub_lo: |
1549 | 0 | Val = TV & 0xFFFFFFFFULL; |
1550 | 0 | break; |
1551 | 0 | case Hexagon::isub_hi: |
1552 | 0 | Val = (TV >> 32) & 0xFFFFFFFFULL; |
1553 | 0 | break; |
1554 | 19 | default: |
1555 | 19 | Val = TV; |
1556 | 19 | break; |
1557 | 19 | } |
1558 | 19 | return true; |
1559 | 19 | } |
1560 | | |
1561 | 0 | void HexagonHardwareLoops::setImmediate(MachineOperand &MO, int64_t Val) { |
1562 | 0 | if (MO.isImm()0 ) { |
1563 | 0 | MO.setImm(Val); |
1564 | 0 | return; |
1565 | 0 | } |
1566 | 0 |
|
1567 | 0 | assert(MO.isReg()); |
1568 | 0 | unsigned R = MO.getReg(); |
1569 | 0 | MachineInstr *DI = MRI->getVRegDef(R); |
1570 | 0 |
|
1571 | 0 | const TargetRegisterClass *RC = MRI->getRegClass(R); |
1572 | 0 | unsigned NewR = MRI->createVirtualRegister(RC); |
1573 | 0 | MachineBasicBlock &B = *DI->getParent(); |
1574 | 0 | DebugLoc DL = DI->getDebugLoc(); |
1575 | 0 | BuildMI(B, DI, DL, TII->get(DI->getOpcode()), NewR).addImm(Val); |
1576 | 0 | MO.setReg(NewR); |
1577 | 0 | } |
1578 | | |
1579 | 2 | static bool isImmValidForOpcode(unsigned CmpOpc, int64_t Imm) { |
1580 | 2 | // These two instructions are not extendable. |
1581 | 2 | if (CmpOpc == Hexagon::A4_cmpbeqi) |
1582 | 0 | return isUInt<8>(Imm); |
1583 | 2 | if (2 CmpOpc == Hexagon::A4_cmpbgti2 ) |
1584 | 0 | return isInt<8>(Imm); |
1585 | 2 | // The rest of the comparison-with-immediate instructions are extendable. |
1586 | 2 | return true; |
1587 | 2 | } |
1588 | | |
1589 | 182 | bool HexagonHardwareLoops::fixupInductionVariable(MachineLoop *L) { |
1590 | 182 | MachineBasicBlock *Header = L->getHeader(); |
1591 | 182 | MachineBasicBlock *Latch = L->getLoopLatch(); |
1592 | 182 | MachineBasicBlock *ExitingBlock = L->findLoopControlBlock(); |
1593 | 182 | |
1594 | 182 | if (!(Header && 182 Latch182 && ExitingBlock182 )) |
1595 | 0 | return false; |
1596 | 182 | |
1597 | 182 | // These data structures follow the same concept as the corresponding |
1598 | 182 | // ones in findInductionRegister (where some comments are). |
1599 | 182 | using RegisterBump = std::pair<unsigned, int64_t>; |
1600 | 182 | using RegisterInduction = std::pair<unsigned, RegisterBump>; |
1601 | 182 | using RegisterInductionSet = std::set<RegisterInduction>; |
1602 | 182 | |
1603 | 182 | // Register candidates for induction variables, with their associated bumps. |
1604 | 182 | RegisterInductionSet IndRegs; |
1605 | 182 | |
1606 | 182 | // Look for induction patterns: |
1607 | 182 | // vreg1 = PHI ..., [ latch, vreg2 ] |
1608 | 182 | // vreg2 = ADD vreg1, imm |
1609 | 182 | using instr_iterator = MachineBasicBlock::instr_iterator; |
1610 | 182 | |
1611 | 182 | for (instr_iterator I = Header->instr_begin(), E = Header->instr_end(); |
1612 | 518 | I != E && 518 I->isPHI()517 ; ++I336 ) { |
1613 | 336 | MachineInstr *Phi = &*I; |
1614 | 336 | |
1615 | 336 | // Have a PHI instruction. |
1616 | 1.00k | for (unsigned i = 1, n = Phi->getNumOperands(); i < n1.00k ; i += 2672 ) { |
1617 | 672 | if (Phi->getOperand(i+1).getMBB() != Latch) |
1618 | 336 | continue; |
1619 | 336 | |
1620 | 336 | unsigned PhiReg = Phi->getOperand(i).getReg(); |
1621 | 336 | MachineInstr *DI = MRI->getVRegDef(PhiReg); |
1622 | 336 | |
1623 | 336 | if (DI->getDesc().isAdd()336 ) { |
1624 | 183 | // If the register operand to the add/sub is the PHI we are looking |
1625 | 183 | // at, this meets the induction pattern. |
1626 | 183 | unsigned IndReg = DI->getOperand(1).getReg(); |
1627 | 183 | MachineOperand &Opnd2 = DI->getOperand(2); |
1628 | 183 | int64_t V; |
1629 | 183 | if (MRI->getVRegDef(IndReg) == Phi && 183 checkForImmediate(Opnd2, V)179 ) { |
1630 | 179 | unsigned UpdReg = DI->getOperand(0).getReg(); |
1631 | 179 | IndRegs.insert(std::make_pair(UpdReg, std::make_pair(IndReg, V))); |
1632 | 179 | } |
1633 | 183 | } |
1634 | 672 | } // for (i) |
1635 | 336 | } // for (instr) |
1636 | 182 | |
1637 | 182 | if (IndRegs.empty()) |
1638 | 39 | return false; |
1639 | 143 | |
1640 | 143 | MachineBasicBlock *TB = nullptr, *FB = nullptr; |
1641 | 143 | SmallVector<MachineOperand,2> Cond; |
1642 | 143 | // AnalyzeBranch returns true if it fails to analyze branch. |
1643 | 143 | bool NotAnalyzed = TII->analyzeBranch(*ExitingBlock, TB, FB, Cond, false); |
1644 | 143 | if (NotAnalyzed || 143 Cond.empty()143 ) |
1645 | 0 | return false; |
1646 | 143 | |
1647 | 143 | if (143 ExitingBlock != Latch && 143 (TB == Latch || 2 FB == Latch1 )) { |
1648 | 2 | MachineBasicBlock *LTB = nullptr, *LFB = nullptr; |
1649 | 2 | SmallVector<MachineOperand,2> LCond; |
1650 | 2 | bool NotAnalyzed = TII->analyzeBranch(*Latch, LTB, LFB, LCond, false); |
1651 | 2 | if (NotAnalyzed) |
1652 | 0 | return false; |
1653 | 2 | |
1654 | 2 | // Since latch is not the exiting block, the latch branch should be an |
1655 | 2 | // unconditional branch to the loop header. |
1656 | 2 | if (2 TB == Latch2 ) |
1657 | 1 | TB = (LTB == Header) ? 1 LTB1 : LFB0 ; |
1658 | 2 | else |
1659 | 1 | FB = (LTB == Header) ? 1 LTB1 : LFB0 ; |
1660 | 2 | } |
1661 | 143 | if (143 TB != Header143 ) { |
1662 | 9 | if (FB != Header9 ) { |
1663 | 0 | // The latch/exit block does not go back to the header. |
1664 | 0 | return false; |
1665 | 0 | } |
1666 | 9 | // FB is the header (i.e., uncond. jump to branch header) |
1667 | 9 | // In this case, the LoopBody -> TB should not be a back edge otherwise |
1668 | 9 | // it could result in an infinite loop after conversion to hw_loop. |
1669 | 9 | // This case can happen when the Latch has two jumps like this: |
1670 | 9 | // Jmp_c OuterLoopHeader <-- TB |
1671 | 9 | // Jmp InnerLoopHeader <-- FB |
1672 | 9 | if (9 MDT->dominates(TB, FB)9 ) |
1673 | 0 | return false; |
1674 | 143 | } |
1675 | 143 | |
1676 | 143 | // Expecting a predicate register as a condition. It won't be a hardware |
1677 | 143 | // predicate register at this point yet, just a vreg. |
1678 | 143 | // HexagonInstrInfo::AnalyzeBranch for negated branches inserts imm(0) |
1679 | 143 | // into Cond, followed by the predicate register. For non-negated branches |
1680 | 143 | // it's just the register. |
1681 | 143 | unsigned CSz = Cond.size(); |
1682 | 143 | if (CSz != 1 && 143 CSz != 2143 ) |
1683 | 0 | return false; |
1684 | 143 | |
1685 | 143 | if (143 !Cond[CSz-1].isReg()143 ) |
1686 | 0 | return false; |
1687 | 143 | |
1688 | 143 | unsigned P = Cond[CSz-1].getReg(); |
1689 | 143 | MachineInstr *PredDef = MRI->getVRegDef(P); |
1690 | 143 | |
1691 | 143 | if (!PredDef->isCompare()) |
1692 | 1 | return false; |
1693 | 142 | |
1694 | 142 | SmallSet<unsigned,2> CmpRegs; |
1695 | 142 | MachineOperand *CmpImmOp = nullptr; |
1696 | 142 | |
1697 | 142 | // Go over all operands to the compare and look for immediate and register |
1698 | 142 | // operands. Assume that if the compare has a single register use and a |
1699 | 142 | // single immediate operand, then the register is being compared with the |
1700 | 142 | // immediate value. |
1701 | 568 | for (unsigned i = 0, n = PredDef->getNumOperands(); i < n568 ; ++i426 ) { |
1702 | 426 | MachineOperand &MO = PredDef->getOperand(i); |
1703 | 426 | if (MO.isReg()426 ) { |
1704 | 340 | // Skip all implicit references. In one case there was: |
1705 | 340 | // %vreg140<def> = FCMPUGT32_rr %vreg138, %vreg139, %USR<imp-use> |
1706 | 340 | if (MO.isImplicit()) |
1707 | 0 | continue; |
1708 | 340 | if (340 MO.isUse()340 ) { |
1709 | 198 | if (!isImmediate(MO)198 ) { |
1710 | 182 | CmpRegs.insert(MO.getReg()); |
1711 | 182 | continue; |
1712 | 182 | } |
1713 | 16 | // Consider the register to be the "immediate" operand. |
1714 | 16 | if (16 CmpImmOp16 ) |
1715 | 0 | return false; |
1716 | 16 | CmpImmOp = &MO; |
1717 | 16 | } |
1718 | 426 | } else if (86 MO.isImm()86 ) { |
1719 | 86 | if (CmpImmOp) // A second immediate argument? Confusing. Bail out. |
1720 | 0 | return false; |
1721 | 86 | CmpImmOp = &MO; |
1722 | 86 | } |
1723 | 426 | } |
1724 | 142 | |
1725 | 142 | if (142 CmpRegs.empty()142 ) |
1726 | 0 | return false; |
1727 | 142 | |
1728 | 142 | // Check if the compared register follows the order we want. Fix if needed. |
1729 | 142 | for (RegisterInductionSet::iterator I = IndRegs.begin(), E = IndRegs.end(); |
1730 | 148 | I != E148 ; ++I6 ) { |
1731 | 146 | // This is a success. If the register used in the comparison is one that |
1732 | 146 | // we have identified as a bumped (updated) induction register, there is |
1733 | 146 | // nothing to do. |
1734 | 146 | if (CmpRegs.count(I->first)) |
1735 | 136 | return true; |
1736 | 10 | |
1737 | 10 | // Otherwise, if the register being compared comes out of a PHI node, |
1738 | 10 | // and has been recognized as following the induction pattern, and is |
1739 | 10 | // compared against an immediate, we can fix it. |
1740 | 10 | const RegisterBump &RB = I->second; |
1741 | 10 | if (CmpRegs.count(RB.first)10 ) { |
1742 | 4 | if (!CmpImmOp4 ) { |
1743 | 1 | // If both operands to the compare instruction are registers, see if |
1744 | 1 | // it can be changed to use induction register as one of the operands. |
1745 | 1 | MachineInstr *IndI = nullptr; |
1746 | 1 | MachineInstr *nonIndI = nullptr; |
1747 | 1 | MachineOperand *IndMO = nullptr; |
1748 | 1 | MachineOperand *nonIndMO = nullptr; |
1749 | 1 | |
1750 | 3 | for (unsigned i = 1, n = PredDef->getNumOperands(); i < n3 ; ++i2 ) { |
1751 | 2 | MachineOperand &MO = PredDef->getOperand(i); |
1752 | 2 | if (MO.isReg() && 2 MO.getReg() == RB.first2 ) { |
1753 | 1 | DEBUG(dbgs() << "\n DefMI(" << i << ") = " |
1754 | 1 | << *(MRI->getVRegDef(I->first))); |
1755 | 1 | if (IndI) |
1756 | 0 | return false; |
1757 | 1 | |
1758 | 1 | IndI = MRI->getVRegDef(I->first); |
1759 | 1 | IndMO = &MO; |
1760 | 2 | } else if (1 MO.isReg()1 ) { |
1761 | 1 | DEBUG(dbgs() << "\n DefMI(" << i << ") = " |
1762 | 1 | << *(MRI->getVRegDef(MO.getReg()))); |
1763 | 1 | if (nonIndI) |
1764 | 0 | return false; |
1765 | 1 | |
1766 | 1 | nonIndI = MRI->getVRegDef(MO.getReg()); |
1767 | 1 | nonIndMO = &MO; |
1768 | 1 | } |
1769 | 2 | } |
1770 | 1 | if (1 IndI && 1 nonIndI1 && |
1771 | 1 | nonIndI->getOpcode() == Hexagon::A2_addi && |
1772 | 1 | nonIndI->getOperand(2).isImm() && |
1773 | 1 | nonIndI->getOperand(2).getImm() == - RB.second1 ) { |
1774 | 1 | bool Order = orderBumpCompare(IndI, PredDef); |
1775 | 1 | if (Order1 ) { |
1776 | 1 | IndMO->setReg(I->first); |
1777 | 1 | nonIndMO->setReg(nonIndI->getOperand(1).getReg()); |
1778 | 1 | return true; |
1779 | 1 | } |
1780 | 0 | } |
1781 | 0 | return false; |
1782 | 0 | } |
1783 | 3 | |
1784 | 3 | // It is not valid to do this transformation on an unsigned comparison |
1785 | 3 | // because it may underflow. |
1786 | 3 | Comparison::Kind Cmp = |
1787 | 3 | getComparisonKind(PredDef->getOpcode(), nullptr, nullptr, 0); |
1788 | 3 | if (!Cmp || 3 Comparison::isUnsigned(Cmp)3 ) |
1789 | 1 | return false; |
1790 | 2 | |
1791 | 2 | // If the register is being compared against an immediate, try changing |
1792 | 2 | // the compare instruction to use induction register and adjust the |
1793 | 2 | // immediate operand. |
1794 | 2 | int64_t CmpImm = getImmediate(*CmpImmOp); |
1795 | 2 | int64_t V = RB.second; |
1796 | 2 | // Handle Overflow (64-bit). |
1797 | 2 | if (((V > 0) && 2 (CmpImm > INT64_MAX - V)1 ) || |
1798 | 2 | ((V < 0) && 2 (CmpImm < INT64_MIN - V)1 )) |
1799 | 0 | return false; |
1800 | 2 | CmpImm += V; |
1801 | 2 | // Most comparisons of register against an immediate value allow |
1802 | 2 | // the immediate to be constant-extended. There are some exceptions |
1803 | 2 | // though. Make sure the new combination will work. |
1804 | 2 | if (CmpImmOp->isImm()) |
1805 | 2 | if (2 !isImmValidForOpcode(PredDef->getOpcode(), CmpImm)2 ) |
1806 | 0 | return false; |
1807 | 2 | |
1808 | 2 | // Make sure that the compare happens after the bump. Otherwise, |
1809 | 2 | // after the fixup, the compare would use a yet-undefined register. |
1810 | 2 | MachineInstr *BumpI = MRI->getVRegDef(I->first); |
1811 | 2 | bool Order = orderBumpCompare(BumpI, PredDef); |
1812 | 2 | if (!Order) |
1813 | 2 | return false; |
1814 | 0 |
|
1815 | 0 | // Finally, fix the compare instruction. |
1816 | 0 | setImmediate(*CmpImmOp, CmpImm); |
1817 | 0 | for (unsigned i = 0, n = PredDef->getNumOperands(); i < n0 ; ++i0 ) { |
1818 | 0 | MachineOperand &MO = PredDef->getOperand(i); |
1819 | 0 | if (MO.isReg() && 0 MO.getReg() == RB.first0 ) { |
1820 | 0 | MO.setReg(I->first); |
1821 | 0 | return true; |
1822 | 0 | } |
1823 | 0 | } |
1824 | 4 | } |
1825 | 146 | } |
1826 | 142 | |
1827 | 2 | return false; |
1828 | 182 | } |
1829 | | |
1830 | | /// createPreheaderForLoop - Create a preheader for a given loop. |
1831 | | MachineBasicBlock *HexagonHardwareLoops::createPreheaderForLoop( |
1832 | 0 | MachineLoop *L) { |
1833 | 0 | if (MachineBasicBlock *TmpPH = MLI->findLoopPreheader(L, SpecPreheader)) |
1834 | 0 | return TmpPH; |
1835 | 0 | if (0 !HWCreatePreheader0 ) |
1836 | 0 | return nullptr; |
1837 | 0 |
|
1838 | 0 | MachineBasicBlock *Header = L->getHeader(); |
1839 | 0 | MachineBasicBlock *Latch = L->getLoopLatch(); |
1840 | 0 | MachineBasicBlock *ExitingBlock = L->findLoopControlBlock(); |
1841 | 0 | MachineFunction *MF = Header->getParent(); |
1842 | 0 | DebugLoc DL; |
1843 | 0 |
|
1844 | | #ifndef NDEBUG |
1845 | | if ((!PHFn.empty()) && (PHFn != MF->getName())) |
1846 | | return nullptr; |
1847 | | #endif |
1848 | |
|
1849 | 0 | if (!Latch || 0 !ExitingBlock0 || Header->hasAddressTaken()0 ) |
1850 | 0 | return nullptr; |
1851 | 0 |
|
1852 | 0 | using instr_iterator = MachineBasicBlock::instr_iterator; |
1853 | 0 |
|
1854 | 0 | // Verify that all existing predecessors have analyzable branches |
1855 | 0 | // (or no branches at all). |
1856 | 0 | using MBBVector = std::vector<MachineBasicBlock *>; |
1857 | 0 |
|
1858 | 0 | MBBVector Preds(Header->pred_begin(), Header->pred_end()); |
1859 | 0 | SmallVector<MachineOperand,2> Tmp1; |
1860 | 0 | MachineBasicBlock *TB = nullptr, *FB = nullptr; |
1861 | 0 |
|
1862 | 0 | if (TII->analyzeBranch(*ExitingBlock, TB, FB, Tmp1, false)) |
1863 | 0 | return nullptr; |
1864 | 0 |
|
1865 | 0 | for (MBBVector::iterator I = Preds.begin(), E = Preds.end(); 0 I != E0 ; ++I0 ) { |
1866 | 0 | MachineBasicBlock *PB = *I; |
1867 | 0 | bool NotAnalyzed = TII->analyzeBranch(*PB, TB, FB, Tmp1, false); |
1868 | 0 | if (NotAnalyzed) |
1869 | 0 | return nullptr; |
1870 | 0 | } |
1871 | 0 |
|
1872 | 0 | MachineBasicBlock *NewPH = MF->CreateMachineBasicBlock(); |
1873 | 0 | MF->insert(Header->getIterator(), NewPH); |
1874 | 0 |
|
1875 | 0 | if (Header->pred_size() > 20 ) { |
1876 | 0 | // Ensure that the header has only two predecessors: the preheader and |
1877 | 0 | // the loop latch. Any additional predecessors of the header should |
1878 | 0 | // join at the newly created preheader. Inspect all PHI nodes from the |
1879 | 0 | // header and create appropriate corresponding PHI nodes in the preheader. |
1880 | 0 |
|
1881 | 0 | for (instr_iterator I = Header->instr_begin(), E = Header->instr_end(); |
1882 | 0 | I != E && 0 I->isPHI()0 ; ++I0 ) { |
1883 | 0 | MachineInstr *PN = &*I; |
1884 | 0 |
|
1885 | 0 | const MCInstrDesc &PD = TII->get(TargetOpcode::PHI); |
1886 | 0 | MachineInstr *NewPN = MF->CreateMachineInstr(PD, DL); |
1887 | 0 | NewPH->insert(NewPH->end(), NewPN); |
1888 | 0 |
|
1889 | 0 | unsigned PR = PN->getOperand(0).getReg(); |
1890 | 0 | const TargetRegisterClass *RC = MRI->getRegClass(PR); |
1891 | 0 | unsigned NewPR = MRI->createVirtualRegister(RC); |
1892 | 0 | NewPN->addOperand(MachineOperand::CreateReg(NewPR, true)); |
1893 | 0 |
|
1894 | 0 | // Copy all non-latch operands of a header's PHI node to the newly |
1895 | 0 | // created PHI node in the preheader. |
1896 | 0 | for (unsigned i = 1, n = PN->getNumOperands(); i < n0 ; i += 20 ) { |
1897 | 0 | unsigned PredR = PN->getOperand(i).getReg(); |
1898 | 0 | unsigned PredRSub = PN->getOperand(i).getSubReg(); |
1899 | 0 | MachineBasicBlock *PredB = PN->getOperand(i+1).getMBB(); |
1900 | 0 | if (PredB == Latch) |
1901 | 0 | continue; |
1902 | 0 |
|
1903 | 0 | MachineOperand MO = MachineOperand::CreateReg(PredR, false); |
1904 | 0 | MO.setSubReg(PredRSub); |
1905 | 0 | NewPN->addOperand(MO); |
1906 | 0 | NewPN->addOperand(MachineOperand::CreateMBB(PredB)); |
1907 | 0 | } |
1908 | 0 |
|
1909 | 0 | // Remove copied operands from the old PHI node and add the value |
1910 | 0 | // coming from the preheader's PHI. |
1911 | 0 | for (int i = PN->getNumOperands()-2; i > 00 ; i -= 20 ) { |
1912 | 0 | MachineBasicBlock *PredB = PN->getOperand(i+1).getMBB(); |
1913 | 0 | if (PredB != Latch0 ) { |
1914 | 0 | PN->RemoveOperand(i+1); |
1915 | 0 | PN->RemoveOperand(i); |
1916 | 0 | } |
1917 | 0 | } |
1918 | 0 | PN->addOperand(MachineOperand::CreateReg(NewPR, false)); |
1919 | 0 | PN->addOperand(MachineOperand::CreateMBB(NewPH)); |
1920 | 0 | } |
1921 | 0 | } else { |
1922 | 0 | assert(Header->pred_size() == 2); |
1923 | 0 |
|
1924 | 0 | // The header has only two predecessors, but the non-latch predecessor |
1925 | 0 | // is not a preheader (e.g. it has other successors, etc.) |
1926 | 0 | // In such a case we don't need any extra PHI nodes in the new preheader, |
1927 | 0 | // all we need is to adjust existing PHIs in the header to now refer to |
1928 | 0 | // the new preheader. |
1929 | 0 | for (instr_iterator I = Header->instr_begin(), E = Header->instr_end(); |
1930 | 0 | I != E && 0 I->isPHI()0 ; ++I0 ) { |
1931 | 0 | MachineInstr *PN = &*I; |
1932 | 0 | for (unsigned i = 1, n = PN->getNumOperands(); i < n0 ; i += 20 ) { |
1933 | 0 | MachineOperand &MO = PN->getOperand(i+1); |
1934 | 0 | if (MO.getMBB() != Latch) |
1935 | 0 | MO.setMBB(NewPH); |
1936 | 0 | } |
1937 | 0 | } |
1938 | 0 | } |
1939 | 0 |
|
1940 | 0 | // "Reroute" the CFG edges to link in the new preheader. |
1941 | 0 | // If any of the predecessors falls through to the header, insert a branch |
1942 | 0 | // to the new preheader in that place. |
1943 | 0 | SmallVector<MachineOperand,1> Tmp2; |
1944 | 0 | SmallVector<MachineOperand,1> EmptyCond; |
1945 | 0 |
|
1946 | 0 | TB = FB = nullptr; |
1947 | 0 |
|
1948 | 0 | for (MBBVector::iterator I = Preds.begin(), E = Preds.end(); I != E0 ; ++I0 ) { |
1949 | 0 | MachineBasicBlock *PB = *I; |
1950 | 0 | if (PB != Latch0 ) { |
1951 | 0 | Tmp2.clear(); |
1952 | 0 | bool NotAnalyzed = TII->analyzeBranch(*PB, TB, FB, Tmp2, false); |
1953 | 0 | (void)NotAnalyzed; // suppress compiler warning |
1954 | 0 | assert (!NotAnalyzed && "Should be analyzable!"); |
1955 | 0 | if (TB != Header && 0 (Tmp2.empty() || 0 FB != Header0 )) |
1956 | 0 | TII->insertBranch(*PB, NewPH, nullptr, EmptyCond, DL); |
1957 | 0 | PB->ReplaceUsesOfBlockWith(Header, NewPH); |
1958 | 0 | } |
1959 | 0 | } |
1960 | 0 |
|
1961 | 0 | // It can happen that the latch block will fall through into the header. |
1962 | 0 | // Insert an unconditional branch to the header. |
1963 | 0 | TB = FB = nullptr; |
1964 | 0 | bool LatchNotAnalyzed = TII->analyzeBranch(*Latch, TB, FB, Tmp2, false); |
1965 | 0 | (void)LatchNotAnalyzed; // suppress compiler warning |
1966 | 0 | assert (!LatchNotAnalyzed && "Should be analyzable!"); |
1967 | 0 | if (!TB && 0 !FB0 ) |
1968 | 0 | TII->insertBranch(*Latch, Header, nullptr, EmptyCond, DL); |
1969 | 0 |
|
1970 | 0 | // Finally, the branch from the preheader to the header. |
1971 | 0 | TII->insertBranch(*NewPH, Header, nullptr, EmptyCond, DL); |
1972 | 0 | NewPH->addSuccessor(Header); |
1973 | 0 |
|
1974 | 0 | MachineLoop *ParentLoop = L->getParentLoop(); |
1975 | 0 | if (ParentLoop) |
1976 | 0 | ParentLoop->addBasicBlockToLoop(NewPH, MLI->getBase()); |
1977 | 0 |
|
1978 | 0 | // Update the dominator information with the new preheader. |
1979 | 0 | if (MDT0 ) { |
1980 | 0 | if (MachineDomTreeNode *HN0 = MDT->getNode(Header)) { |
1981 | 0 | if (MachineDomTreeNode *DHN0 = HN->getIDom()) { |
1982 | 0 | MDT->addNewBlock(NewPH, DHN->getBlock()); |
1983 | 0 | MDT->changeImmediateDominator(Header, NewPH); |
1984 | 0 | } |
1985 | 0 | } |
1986 | 0 | } |
1987 | 0 |
|
1988 | 0 | return NewPH; |
1989 | 0 | } |