/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/CodeGen/EarlyIfConversion.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===-- EarlyIfConversion.cpp - If-conversion on SSA form machine code ----===// |
2 | | // |
3 | | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | | // See https://llvm.org/LICENSE.txt for license information. |
5 | | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | | // |
7 | | //===----------------------------------------------------------------------===// |
8 | | // |
9 | | // Early if-conversion is for out-of-order CPUs that don't have a lot of |
10 | | // predicable instructions. The goal is to eliminate conditional branches that |
11 | | // may mispredict. |
12 | | // |
13 | | // Instructions from both sides of the branch are executed specutatively, and a |
14 | | // cmov instruction selects the result. |
15 | | // |
16 | | //===----------------------------------------------------------------------===// |
17 | | |
18 | | #include "llvm/ADT/BitVector.h" |
19 | | #include "llvm/ADT/PostOrderIterator.h" |
20 | | #include "llvm/ADT/SetVector.h" |
21 | | #include "llvm/ADT/SmallPtrSet.h" |
22 | | #include "llvm/ADT/SparseSet.h" |
23 | | #include "llvm/ADT/Statistic.h" |
24 | | #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" |
25 | | #include "llvm/CodeGen/MachineDominators.h" |
26 | | #include "llvm/CodeGen/MachineFunction.h" |
27 | | #include "llvm/CodeGen/MachineFunctionPass.h" |
28 | | #include "llvm/CodeGen/MachineLoopInfo.h" |
29 | | #include "llvm/CodeGen/MachineRegisterInfo.h" |
30 | | #include "llvm/CodeGen/MachineTraceMetrics.h" |
31 | | #include "llvm/CodeGen/Passes.h" |
32 | | #include "llvm/CodeGen/TargetInstrInfo.h" |
33 | | #include "llvm/CodeGen/TargetRegisterInfo.h" |
34 | | #include "llvm/CodeGen/TargetSubtargetInfo.h" |
35 | | #include "llvm/Support/CommandLine.h" |
36 | | #include "llvm/Support/Debug.h" |
37 | | #include "llvm/Support/raw_ostream.h" |
38 | | |
39 | | using namespace llvm; |
40 | | |
41 | | #define DEBUG_TYPE "early-ifcvt" |
42 | | |
43 | | // Absolute maximum number of instructions allowed per speculated block. |
44 | | // This bypasses all other heuristics, so it should be set fairly high. |
45 | | static cl::opt<unsigned> |
46 | | BlockInstrLimit("early-ifcvt-limit", cl::init(30), cl::Hidden, |
47 | | cl::desc("Maximum number of instructions per speculated block.")); |
48 | | |
49 | | // Stress testing mode - disable heuristics. |
50 | | static cl::opt<bool> Stress("stress-early-ifcvt", cl::Hidden, |
51 | | cl::desc("Turn all knobs to 11")); |
52 | | |
53 | | STATISTIC(NumDiamondsSeen, "Number of diamonds"); |
54 | | STATISTIC(NumDiamondsConv, "Number of diamonds converted"); |
55 | | STATISTIC(NumTrianglesSeen, "Number of triangles"); |
56 | | STATISTIC(NumTrianglesConv, "Number of triangles converted"); |
57 | | |
58 | | //===----------------------------------------------------------------------===// |
59 | | // SSAIfConv |
60 | | //===----------------------------------------------------------------------===// |
61 | | // |
62 | | // The SSAIfConv class performs if-conversion on SSA form machine code after |
63 | | // determining if it is possible. The class contains no heuristics; external |
64 | | // code should be used to determine when if-conversion is a good idea. |
65 | | // |
66 | | // SSAIfConv can convert both triangles and diamonds: |
67 | | // |
68 | | // Triangle: Head Diamond: Head |
69 | | // | \ / \_ |
70 | | // | \ / | |
71 | | // | [TF]BB FBB TBB |
72 | | // | / \ / |
73 | | // | / \ / |
74 | | // Tail Tail |
75 | | // |
76 | | // Instructions in the conditional blocks TBB and/or FBB are spliced into the |
77 | | // Head block, and phis in the Tail block are converted to select instructions. |
78 | | // |
79 | | namespace { |
80 | | class SSAIfConv { |
81 | | const TargetInstrInfo *TII; |
82 | | const TargetRegisterInfo *TRI; |
83 | | MachineRegisterInfo *MRI; |
84 | | |
85 | | public: |
86 | | /// The block containing the conditional branch. |
87 | | MachineBasicBlock *Head; |
88 | | |
89 | | /// The block containing phis after the if-then-else. |
90 | | MachineBasicBlock *Tail; |
91 | | |
92 | | /// The 'true' conditional block as determined by AnalyzeBranch. |
93 | | MachineBasicBlock *TBB; |
94 | | |
95 | | /// The 'false' conditional block as determined by AnalyzeBranch. |
96 | | MachineBasicBlock *FBB; |
97 | | |
98 | | /// isTriangle - When there is no 'else' block, either TBB or FBB will be |
99 | | /// equal to Tail. |
100 | 13.7k | bool isTriangle() const { return TBB == Tail || FBB == Tail1.85k ; } |
101 | | |
102 | | /// Returns the Tail predecessor for the True side. |
103 | 126k | MachineBasicBlock *getTPred() const { return TBB == Tail ? Head96.3k : TBB30.3k ; } |
104 | | |
105 | | /// Returns the Tail predecessor for the False side. |
106 | 119k | MachineBasicBlock *getFPred() const { return FBB == Tail ? Head5.21k : FBB114k ; } |
107 | | |
108 | | /// Information about each phi in the Tail block. |
109 | | struct PHIInfo { |
110 | | MachineInstr *PHI; |
111 | | unsigned TReg, FReg; |
112 | | // Latencies from Cond+Branch, TReg, and FReg to DstReg. |
113 | | int CondCycles, TCycles, FCycles; |
114 | | |
115 | | PHIInfo(MachineInstr *phi) |
116 | 115k | : PHI(phi), TReg(0), FReg(0), CondCycles(0), TCycles(0), FCycles(0) {} |
117 | | }; |
118 | | |
119 | | SmallVector<PHIInfo, 8> PHIs; |
120 | | |
121 | | private: |
122 | | /// The branch condition determined by AnalyzeBranch. |
123 | | SmallVector<MachineOperand, 4> Cond; |
124 | | |
125 | | /// Instructions in Head that define values used by the conditional blocks. |
126 | | /// The hoisted instructions must be inserted after these instructions. |
127 | | SmallPtrSet<MachineInstr*, 8> InsertAfter; |
128 | | |
129 | | /// Register units clobbered by the conditional blocks. |
130 | | BitVector ClobberedRegUnits; |
131 | | |
132 | | // Scratch pad for findInsertionPoint. |
133 | | SparseSet<unsigned> LiveRegUnits; |
134 | | |
135 | | /// Insertion point in Head for speculatively executed instructions form TBB |
136 | | /// and FBB. |
137 | | MachineBasicBlock::iterator InsertionPoint; |
138 | | |
139 | | /// Return true if all non-terminator instructions in MBB can be safely |
140 | | /// speculated. |
141 | | bool canSpeculateInstrs(MachineBasicBlock *MBB); |
142 | | |
143 | | /// Find a valid insertion point in Head. |
144 | | bool findInsertionPoint(); |
145 | | |
146 | | /// Replace PHI instructions in Tail with selects. |
147 | | void replacePHIInstrs(); |
148 | | |
149 | | /// Insert selects and rewrite PHI operands to use them. |
150 | | void rewritePHIOperands(); |
151 | | |
152 | | public: |
153 | | /// runOnMachineFunction - Initialize per-function data structures. |
154 | 275k | void runOnMachineFunction(MachineFunction &MF) { |
155 | 275k | TII = MF.getSubtarget().getInstrInfo(); |
156 | 275k | TRI = MF.getSubtarget().getRegisterInfo(); |
157 | 275k | MRI = &MF.getRegInfo(); |
158 | 275k | LiveRegUnits.clear(); |
159 | 275k | LiveRegUnits.setUniverse(TRI->getNumRegUnits()); |
160 | 275k | ClobberedRegUnits.clear(); |
161 | 275k | ClobberedRegUnits.resize(TRI->getNumRegUnits()); |
162 | 275k | } |
163 | | |
164 | | /// canConvertIf - If the sub-CFG headed by MBB can be if-converted, |
165 | | /// initialize the internal state, and return true. |
166 | | bool canConvertIf(MachineBasicBlock *MBB); |
167 | | |
168 | | /// convertIf - If-convert the last block passed to canConvertIf(), assuming |
169 | | /// it is possible. Add any erased blocks to RemovedBlocks. |
170 | | void convertIf(SmallVectorImpl<MachineBasicBlock*> &RemovedBlocks); |
171 | | }; |
172 | | } // end anonymous namespace |
173 | | |
174 | | |
175 | | /// canSpeculateInstrs - Returns true if all the instructions in MBB can safely |
176 | | /// be speculated. The terminators are not considered. |
177 | | /// |
178 | | /// If instructions use any values that are defined in the head basic block, |
179 | | /// the defining instructions are added to InsertAfter. |
180 | | /// |
181 | | /// Any clobbered regunits are added to ClobberedRegUnits. |
182 | | /// |
183 | 91.5k | bool SSAIfConv::canSpeculateInstrs(MachineBasicBlock *MBB) { |
184 | 91.5k | // Reject any live-in physregs. It's probably CPSR/EFLAGS, and very hard to |
185 | 91.5k | // get right. |
186 | 91.5k | if (!MBB->livein_empty()) { |
187 | 0 | LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << " has live-ins.\n"); |
188 | 0 | return false; |
189 | 0 | } |
190 | 91.5k | |
191 | 91.5k | unsigned InstrCount = 0; |
192 | 91.5k | |
193 | 91.5k | // Check all instructions, except the terminators. It is assumed that |
194 | 91.5k | // terminators never have side effects or define any used register values. |
195 | 91.5k | for (MachineBasicBlock::iterator I = MBB->begin(), |
196 | 229k | E = MBB->getFirstTerminator(); I != E; ++I138k ) { |
197 | 220k | if (I->isDebugInstr()) |
198 | 0 | continue; |
199 | 220k | |
200 | 220k | if (++InstrCount > BlockInstrLimit && !Stress1.44k ) { |
201 | 1.44k | LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << " has more than " |
202 | 1.44k | << BlockInstrLimit << " instructions.\n"); |
203 | 1.44k | return false; |
204 | 1.44k | } |
205 | 219k | |
206 | 219k | // There shouldn't normally be any phis in a single-predecessor block. |
207 | 219k | if (I->isPHI()) { |
208 | 0 | LLVM_DEBUG(dbgs() << "Can't hoist: " << *I); |
209 | 0 | return false; |
210 | 0 | } |
211 | 219k | |
212 | 219k | // Don't speculate loads. Note that it may be possible and desirable to |
213 | 219k | // speculate GOT or constant pool loads that are guaranteed not to trap, |
214 | 219k | // but we don't support that for now. |
215 | 219k | if (I->mayLoad()) { |
216 | 31.8k | LLVM_DEBUG(dbgs() << "Won't speculate load: " << *I); |
217 | 31.8k | return false; |
218 | 31.8k | } |
219 | 187k | |
220 | 187k | // We never speculate stores, so an AA pointer isn't necessary. |
221 | 187k | bool DontMoveAcrossStore = true; |
222 | 187k | if (!I->isSafeToMove(nullptr, DontMoveAcrossStore)) { |
223 | 49.1k | LLVM_DEBUG(dbgs() << "Can't speculate: " << *I); |
224 | 49.1k | return false; |
225 | 49.1k | } |
226 | 138k | |
227 | 138k | // Check for any dependencies on Head instructions. |
228 | 437k | for (const MachineOperand &MO : I->operands())138k { |
229 | 437k | if (MO.isRegMask()) { |
230 | 0 | LLVM_DEBUG(dbgs() << "Won't speculate regmask: " << *I); |
231 | 0 | return false; |
232 | 0 | } |
233 | 437k | if (!MO.isReg()) |
234 | 84.3k | continue; |
235 | 353k | unsigned Reg = MO.getReg(); |
236 | 353k | |
237 | 353k | // Remember clobbered regunits. |
238 | 353k | if (MO.isDef() && TargetRegisterInfo::isPhysicalRegister(Reg)156k ) |
239 | 53.2k | for (MCRegUnitIterator Units(Reg, TRI); 26.6k Units.isValid(); ++Units26.6k ) |
240 | 26.6k | ClobberedRegUnits.set(*Units); |
241 | 353k | |
242 | 353k | if (!MO.readsReg() || !TargetRegisterInfo::isVirtualRegister(Reg)196k ) |
243 | 177k | continue; |
244 | 175k | MachineInstr *DefMI = MRI->getVRegDef(Reg); |
245 | 175k | if (!DefMI || DefMI->getParent() != Head) |
246 | 147k | continue; |
247 | 28.6k | if (InsertAfter.insert(DefMI).second) |
248 | 28.6k | LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << " depends on " |
249 | 28.6k | << *DefMI); |
250 | 28.6k | if (DefMI->isTerminator()) { |
251 | 0 | LLVM_DEBUG(dbgs() << "Can't insert instructions below terminator.\n"); |
252 | 0 | return false; |
253 | 0 | } |
254 | 28.6k | } |
255 | 138k | } |
256 | 91.5k | return true9.14k ; |
257 | 91.5k | } |
258 | | |
259 | | |
260 | | /// Find an insertion point in Head for the speculated instructions. The |
261 | | /// insertion point must be: |
262 | | /// |
263 | | /// 1. Before any terminators. |
264 | | /// 2. After any instructions in InsertAfter. |
265 | | /// 3. Not have any clobbered regunits live. |
266 | | /// |
267 | | /// This function sets InsertionPoint and returns true when successful, it |
268 | | /// returns false if no valid insertion point could be found. |
269 | | /// |
270 | 7.73k | bool SSAIfConv::findInsertionPoint() { |
271 | 7.73k | // Keep track of live regunits before the current position. |
272 | 7.73k | // Only track RegUnits that are also in ClobberedRegUnits. |
273 | 7.73k | LiveRegUnits.clear(); |
274 | 7.73k | SmallVector<unsigned, 8> Reads; |
275 | 7.73k | MachineBasicBlock::iterator FirstTerm = Head->getFirstTerminator(); |
276 | 7.73k | MachineBasicBlock::iterator I = Head->end(); |
277 | 7.73k | MachineBasicBlock::iterator B = Head->begin(); |
278 | 15.1k | while (I != B) { |
279 | 15.1k | --I; |
280 | 15.1k | // Some of the conditional code depends in I. |
281 | 15.1k | if (InsertAfter.count(&*I)) { |
282 | 22 | LLVM_DEBUG(dbgs() << "Can't insert code after " << *I); |
283 | 22 | return false; |
284 | 22 | } |
285 | 15.0k | |
286 | 15.0k | // Update live regunits. |
287 | 37.2k | for (const MachineOperand &MO : I->operands())15.0k { |
288 | 37.2k | // We're ignoring regmask operands. That is conservatively correct. |
289 | 37.2k | if (!MO.isReg()) |
290 | 19.4k | continue; |
291 | 17.7k | unsigned Reg = MO.getReg(); |
292 | 17.7k | if (!TargetRegisterInfo::isPhysicalRegister(Reg)) |
293 | 7.97k | continue; |
294 | 9.78k | // I clobbers Reg, so it isn't live before I. |
295 | 9.78k | if (MO.isDef()) |
296 | 9.46k | for (MCRegUnitIterator Units(Reg, TRI); 4.72k Units.isValid(); ++Units4.73k ) |
297 | 4.73k | LiveRegUnits.erase(*Units); |
298 | 9.78k | // Unless I reads Reg. |
299 | 9.78k | if (MO.readsReg()) |
300 | 5.05k | Reads.push_back(Reg); |
301 | 9.78k | } |
302 | 15.0k | // Anything read by I is live before I. |
303 | 20.1k | while (!Reads.empty()) |
304 | 10.1k | for (MCRegUnitIterator Units(Reads.pop_back_val(), TRI); 5.05k Units.isValid(); |
305 | 5.07k | ++Units) |
306 | 5.07k | if (ClobberedRegUnits.test(*Units)) |
307 | 2.65k | LiveRegUnits.insert(*Units); |
308 | 15.0k | |
309 | 15.0k | // We can't insert before a terminator. |
310 | 15.0k | if (I != FirstTerm && I->isTerminator()7.35k ) |
311 | 4.64k | continue; |
312 | 10.4k | |
313 | 10.4k | // Some of the clobbered registers are live before I, not a valid insertion |
314 | 10.4k | // point. |
315 | 10.4k | if (!LiveRegUnits.empty()) { |
316 | 2.72k | LLVM_DEBUG({ |
317 | 2.72k | dbgs() << "Would clobber"; |
318 | 2.72k | for (SparseSet<unsigned>::const_iterator |
319 | 2.72k | i = LiveRegUnits.begin(), e = LiveRegUnits.end(); i != e; ++i) |
320 | 2.72k | dbgs() << ' ' << printRegUnit(*i, TRI); |
321 | 2.72k | dbgs() << " live before " << *I; |
322 | 2.72k | }); |
323 | 2.72k | continue; |
324 | 2.72k | } |
325 | 7.71k | |
326 | 7.71k | // This is a valid insertion point. |
327 | 7.71k | InsertionPoint = I; |
328 | 7.71k | LLVM_DEBUG(dbgs() << "Can insert before " << *I); |
329 | 7.71k | return true; |
330 | 7.71k | } |
331 | 7.73k | LLVM_DEBUG0 (dbgs() << "No legal insertion point found.\n"); |
332 | 0 | return false; |
333 | 7.73k | } |
334 | | |
335 | | |
336 | | |
337 | | /// canConvertIf - analyze the sub-cfg rooted in MBB, and return true if it is |
338 | | /// a potential candidate for if-conversion. Fill out the internal state. |
339 | | /// |
340 | 2.03M | bool SSAIfConv::canConvertIf(MachineBasicBlock *MBB) { |
341 | 2.03M | Head = MBB; |
342 | 2.03M | TBB = FBB = Tail = nullptr; |
343 | 2.03M | |
344 | 2.03M | if (Head->succ_size() != 2) |
345 | 978k | return false; |
346 | 1.05M | MachineBasicBlock *Succ0 = Head->succ_begin()[0]; |
347 | 1.05M | MachineBasicBlock *Succ1 = Head->succ_begin()[1]; |
348 | 1.05M | |
349 | 1.05M | // Canonicalize so Succ0 has MBB as its single predecessor. |
350 | 1.05M | if (Succ0->pred_size() != 1) |
351 | 501k | std::swap(Succ0, Succ1); |
352 | 1.05M | |
353 | 1.05M | if (Succ0->pred_size() != 1 || Succ0->succ_size() != 1883k ) |
354 | 674k | return false; |
355 | 383k | |
356 | 383k | Tail = Succ0->succ_begin()[0]; |
357 | 383k | |
358 | 383k | // This is not a triangle. |
359 | 383k | if (Tail != Succ1) { |
360 | 217k | // Check for a diamond. We won't deal with any critical edges. |
361 | 217k | if (Succ1->pred_size() != 1 || Succ1->succ_size() != 174.3k || |
362 | 217k | Succ1->succ_begin()[0] != Tail32.4k ) |
363 | 191k | return false; |
364 | 26.2k | LLVM_DEBUG(dbgs() << "\nDiamond: " << printMBBReference(*Head) << " -> " |
365 | 26.2k | << printMBBReference(*Succ0) << "/" |
366 | 26.2k | << printMBBReference(*Succ1) << " -> " |
367 | 26.2k | << printMBBReference(*Tail) << '\n'); |
368 | 26.2k | |
369 | 26.2k | // Live-in physregs are tricky to get right when speculating code. |
370 | 26.2k | if (!Tail->livein_empty()) { |
371 | 0 | LLVM_DEBUG(dbgs() << "Tail has live-ins.\n"); |
372 | 0 | return false; |
373 | 0 | } |
374 | 165k | } else { |
375 | 165k | LLVM_DEBUG(dbgs() << "\nTriangle: " << printMBBReference(*Head) << " -> " |
376 | 165k | << printMBBReference(*Succ0) << " -> " |
377 | 165k | << printMBBReference(*Tail) << '\n'); |
378 | 165k | } |
379 | 383k | |
380 | 383k | // This is a triangle or a diamond. |
381 | 383k | // If Tail doesn't have any phis, there must be side effects. |
382 | 383k | if (192k Tail->empty()192k || !Tail->front().isPHI()191k ) { |
383 | 100k | LLVM_DEBUG(dbgs() << "No phis in tail.\n"); |
384 | 100k | return false; |
385 | 100k | } |
386 | 91.3k | |
387 | 91.3k | // The branch we're looking to eliminate must be analyzable. |
388 | 91.3k | Cond.clear(); |
389 | 91.3k | if (TII->analyzeBranch(*Head, TBB, FBB, Cond)) { |
390 | 28 | LLVM_DEBUG(dbgs() << "Branch not analyzable.\n"); |
391 | 28 | return false; |
392 | 28 | } |
393 | 91.3k | |
394 | 91.3k | // This is weird, probably some sort of degenerate CFG. |
395 | 91.3k | if (!TBB) { |
396 | 0 | LLVM_DEBUG(dbgs() << "AnalyzeBranch didn't find conditional branch.\n"); |
397 | 0 | return false; |
398 | 0 | } |
399 | 91.3k | |
400 | 91.3k | // Make sure the analyzed branch is conditional; one of the successors |
401 | 91.3k | // could be a landing pad. (Empty landing pads can be generated on Windows.) |
402 | 91.3k | if (Cond.empty()) { |
403 | 3 | LLVM_DEBUG(dbgs() << "AnalyzeBranch found an unconditional branch.\n"); |
404 | 3 | return false; |
405 | 3 | } |
406 | 91.3k | |
407 | 91.3k | // AnalyzeBranch doesn't set FBB on a fall-through branch. |
408 | 91.3k | // Make sure it is always set. |
409 | 91.3k | FBB = TBB == Succ0 ? Succ111.0k : Succ080.3k ; |
410 | 91.3k | |
411 | 91.3k | // Any phis in the tail block must be convertible to selects. |
412 | 91.3k | PHIs.clear(); |
413 | 91.3k | MachineBasicBlock *TPred = getTPred(); |
414 | 91.3k | MachineBasicBlock *FPred = getFPred(); |
415 | 91.3k | for (MachineBasicBlock::iterator I = Tail->begin(), E = Tail->end(); |
416 | 205k | I != E && I->isPHI()204k ; ++I113k ) { |
417 | 115k | PHIs.push_back(&*I); |
418 | 115k | PHIInfo &PI = PHIs.back(); |
419 | 115k | // Find PHI operands corresponding to TPred and FPred. |
420 | 648k | for (unsigned i = 1; i != PI.PHI->getNumOperands(); i += 2533k ) { |
421 | 533k | if (PI.PHI->getOperand(i+1).getMBB() == TPred) |
422 | 115k | PI.TReg = PI.PHI->getOperand(i).getReg(); |
423 | 533k | if (PI.PHI->getOperand(i+1).getMBB() == FPred) |
424 | 115k | PI.FReg = PI.PHI->getOperand(i).getReg(); |
425 | 533k | } |
426 | 115k | assert(TargetRegisterInfo::isVirtualRegister(PI.TReg) && "Bad PHI"); |
427 | 115k | assert(TargetRegisterInfo::isVirtualRegister(PI.FReg) && "Bad PHI"); |
428 | 115k | |
429 | 115k | // Get target information. |
430 | 115k | if (!TII->canInsertSelect(*Head, Cond, PI.TReg, PI.FReg, |
431 | 115k | PI.CondCycles, PI.TCycles, PI.FCycles)) { |
432 | 1.18k | LLVM_DEBUG(dbgs() << "Can't convert: " << *PI.PHI); |
433 | 1.18k | return false; |
434 | 1.18k | } |
435 | 115k | } |
436 | 91.3k | |
437 | 91.3k | // Check that the conditional instructions can be speculated. |
438 | 91.3k | InsertAfter.clear(); |
439 | 90.1k | ClobberedRegUnits.reset(); |
440 | 90.1k | if (TBB != Tail && !canSpeculateInstrs(TBB)25.9k ) |
441 | 24.1k | return false; |
442 | 66.0k | if (FBB != Tail && !canSpeculateInstrs(FBB)65.6k ) |
443 | 58.2k | return false; |
444 | 7.73k | |
445 | 7.73k | // Try to find a valid insertion point for the speculated instructions in the |
446 | 7.73k | // head basic block. |
447 | 7.73k | if (!findInsertionPoint()) |
448 | 22 | return false; |
449 | 7.71k | |
450 | 7.71k | if (isTriangle()) |
451 | 6.89k | ++NumTrianglesSeen; |
452 | 815 | else |
453 | 815 | ++NumDiamondsSeen; |
454 | 7.71k | return true; |
455 | 7.71k | } |
456 | | |
457 | | /// replacePHIInstrs - Completely replace PHI instructions with selects. |
458 | | /// This is possible when the only Tail predecessors are the if-converted |
459 | | /// blocks. |
460 | 2.36k | void SSAIfConv::replacePHIInstrs() { |
461 | 2.36k | assert(Tail->pred_size() == 2 && "Cannot replace PHIs"); |
462 | 2.36k | MachineBasicBlock::iterator FirstTerm = Head->getFirstTerminator(); |
463 | 2.36k | assert(FirstTerm != Head->end() && "No terminators"); |
464 | 2.36k | DebugLoc HeadDL = FirstTerm->getDebugLoc(); |
465 | 2.36k | |
466 | 2.36k | // Convert all PHIs to select instructions inserted before FirstTerm. |
467 | 6.67k | for (unsigned i = 0, e = PHIs.size(); i != e; ++i4.30k ) { |
468 | 4.30k | PHIInfo &PI = PHIs[i]; |
469 | 4.30k | LLVM_DEBUG(dbgs() << "If-converting " << *PI.PHI); |
470 | 4.30k | unsigned DstReg = PI.PHI->getOperand(0).getReg(); |
471 | 4.30k | TII->insertSelect(*Head, FirstTerm, HeadDL, DstReg, Cond, PI.TReg, PI.FReg); |
472 | 4.30k | LLVM_DEBUG(dbgs() << " --> " << *std::prev(FirstTerm)); |
473 | 4.30k | PI.PHI->eraseFromParent(); |
474 | 4.30k | PI.PHI = nullptr; |
475 | 4.30k | } |
476 | 2.36k | } |
477 | | |
478 | | /// rewritePHIOperands - When there are additional Tail predecessors, insert |
479 | | /// select instructions in Head and rewrite PHI operands to use the selects. |
480 | | /// Keep the PHI instructions in Tail to handle the other predecessors. |
481 | 3.68k | void SSAIfConv::rewritePHIOperands() { |
482 | 3.68k | MachineBasicBlock::iterator FirstTerm = Head->getFirstTerminator(); |
483 | 3.68k | assert(FirstTerm != Head->end() && "No terminators"); |
484 | 3.68k | DebugLoc HeadDL = FirstTerm->getDebugLoc(); |
485 | 3.68k | |
486 | 3.68k | // Convert all PHIs to select instructions inserted before FirstTerm. |
487 | 10.7k | for (unsigned i = 0, e = PHIs.size(); i != e; ++i7.11k ) { |
488 | 7.11k | PHIInfo &PI = PHIs[i]; |
489 | 7.11k | unsigned DstReg = 0; |
490 | 7.11k | |
491 | 7.11k | LLVM_DEBUG(dbgs() << "If-converting " << *PI.PHI); |
492 | 7.11k | if (PI.TReg == PI.FReg) { |
493 | 3.07k | // We do not need the select instruction if both incoming values are |
494 | 3.07k | // equal. |
495 | 3.07k | DstReg = PI.TReg; |
496 | 4.03k | } else { |
497 | 4.03k | unsigned PHIDst = PI.PHI->getOperand(0).getReg(); |
498 | 4.03k | DstReg = MRI->createVirtualRegister(MRI->getRegClass(PHIDst)); |
499 | 4.03k | TII->insertSelect(*Head, FirstTerm, HeadDL, |
500 | 4.03k | DstReg, Cond, PI.TReg, PI.FReg); |
501 | 4.03k | LLVM_DEBUG(dbgs() << " --> " << *std::prev(FirstTerm)); |
502 | 4.03k | } |
503 | 7.11k | |
504 | 7.11k | // Rewrite PHI operands TPred -> (DstReg, Head), remove FPred. |
505 | 34.7k | for (unsigned i = PI.PHI->getNumOperands(); i != 1; i -= 227.6k ) { |
506 | 27.6k | MachineBasicBlock *MBB = PI.PHI->getOperand(i-1).getMBB(); |
507 | 27.6k | if (MBB == getTPred()) { |
508 | 7.11k | PI.PHI->getOperand(i-1).setMBB(Head); |
509 | 7.11k | PI.PHI->getOperand(i-2).setReg(DstReg); |
510 | 20.5k | } else if (MBB == getFPred()) { |
511 | 7.11k | PI.PHI->RemoveOperand(i-1); |
512 | 7.11k | PI.PHI->RemoveOperand(i-2); |
513 | 7.11k | } |
514 | 27.6k | } |
515 | 7.11k | LLVM_DEBUG(dbgs() << " --> " << *PI.PHI); |
516 | 7.11k | } |
517 | 3.68k | } |
518 | | |
519 | | /// convertIf - Execute the if conversion after canConvertIf has determined the |
520 | | /// feasibility. |
521 | | /// |
522 | | /// Any basic blocks erased will be added to RemovedBlocks. |
523 | | /// |
524 | 6.05k | void SSAIfConv::convertIf(SmallVectorImpl<MachineBasicBlock*> &RemovedBlocks) { |
525 | 6.05k | assert(Head && Tail && TBB && FBB && "Call canConvertIf first."); |
526 | 6.05k | |
527 | 6.05k | // Update statistics. |
528 | 6.05k | if (isTriangle()) |
529 | 5.48k | ++NumTrianglesConv; |
530 | 563 | else |
531 | 563 | ++NumDiamondsConv; |
532 | 6.05k | |
533 | 6.05k | // Move all instructions into Head, except for the terminators. |
534 | 6.05k | if (TBB != Tail) |
535 | 642 | Head->splice(InsertionPoint, TBB, TBB->begin(), TBB->getFirstTerminator()); |
536 | 6.05k | if (FBB != Tail) |
537 | 5.97k | Head->splice(InsertionPoint, FBB, FBB->begin(), FBB->getFirstTerminator()); |
538 | 6.05k | |
539 | 6.05k | // Are there extra Tail predecessors? |
540 | 6.05k | bool ExtraPreds = Tail->pred_size() != 2; |
541 | 6.05k | if (ExtraPreds) |
542 | 3.68k | rewritePHIOperands(); |
543 | 2.36k | else |
544 | 2.36k | replacePHIInstrs(); |
545 | 6.05k | |
546 | 6.05k | // Fix up the CFG, temporarily leave Head without any successors. |
547 | 6.05k | Head->removeSuccessor(TBB); |
548 | 6.05k | Head->removeSuccessor(FBB, true); |
549 | 6.05k | if (TBB != Tail) |
550 | 642 | TBB->removeSuccessor(Tail, true); |
551 | 6.05k | if (FBB != Tail) |
552 | 5.97k | FBB->removeSuccessor(Tail, true); |
553 | 6.05k | |
554 | 6.05k | // Fix up Head's terminators. |
555 | 6.05k | // It should become a single branch or a fallthrough. |
556 | 6.05k | DebugLoc HeadDL = Head->getFirstTerminator()->getDebugLoc(); |
557 | 6.05k | TII->removeBranch(*Head); |
558 | 6.05k | |
559 | 6.05k | // Erase the now empty conditional blocks. It is likely that Head can fall |
560 | 6.05k | // through to Tail, and we can join the two blocks. |
561 | 6.05k | if (TBB != Tail) { |
562 | 642 | RemovedBlocks.push_back(TBB); |
563 | 642 | TBB->eraseFromParent(); |
564 | 642 | } |
565 | 6.05k | if (FBB != Tail) { |
566 | 5.97k | RemovedBlocks.push_back(FBB); |
567 | 5.97k | FBB->eraseFromParent(); |
568 | 5.97k | } |
569 | 6.05k | |
570 | 6.05k | assert(Head->succ_empty() && "Additional head successors?"); |
571 | 6.05k | if (!ExtraPreds && Head->isLayoutSuccessor(Tail)2.36k ) { |
572 | 2.35k | // Splice Tail onto the end of Head. |
573 | 2.35k | LLVM_DEBUG(dbgs() << "Joining tail " << printMBBReference(*Tail) |
574 | 2.35k | << " into head " << printMBBReference(*Head) << '\n'); |
575 | 2.35k | Head->splice(Head->end(), Tail, |
576 | 2.35k | Tail->begin(), Tail->end()); |
577 | 2.35k | Head->transferSuccessorsAndUpdatePHIs(Tail); |
578 | 2.35k | RemovedBlocks.push_back(Tail); |
579 | 2.35k | Tail->eraseFromParent(); |
580 | 3.69k | } else { |
581 | 3.69k | // We need a branch to Tail, let code placement work it out later. |
582 | 3.69k | LLVM_DEBUG(dbgs() << "Converting to unconditional branch.\n"); |
583 | 3.69k | SmallVector<MachineOperand, 0> EmptyCond; |
584 | 3.69k | TII->insertBranch(*Head, Tail, nullptr, EmptyCond, HeadDL); |
585 | 3.69k | Head->addSuccessor(Tail); |
586 | 3.69k | } |
587 | 6.05k | LLVM_DEBUG(dbgs() << *Head); |
588 | 6.05k | } |
589 | | |
590 | | |
591 | | //===----------------------------------------------------------------------===// |
592 | | // EarlyIfConverter Pass |
593 | | //===----------------------------------------------------------------------===// |
594 | | |
595 | | namespace { |
596 | | class EarlyIfConverter : public MachineFunctionPass { |
597 | | const TargetInstrInfo *TII; |
598 | | const TargetRegisterInfo *TRI; |
599 | | MCSchedModel SchedModel; |
600 | | MachineRegisterInfo *MRI; |
601 | | MachineDominatorTree *DomTree; |
602 | | MachineLoopInfo *Loops; |
603 | | MachineTraceMetrics *Traces; |
604 | | MachineTraceMetrics::Ensemble *MinInstr; |
605 | | SSAIfConv IfConv; |
606 | | |
607 | | public: |
608 | | static char ID; |
609 | 22.6k | EarlyIfConverter() : MachineFunctionPass(ID) {} |
610 | | void getAnalysisUsage(AnalysisUsage &AU) const override; |
611 | | bool runOnMachineFunction(MachineFunction &MF) override; |
612 | 433k | StringRef getPassName() const override { return "Early If-Conversion"; } |
613 | | |
614 | | private: |
615 | | bool tryConvertIf(MachineBasicBlock*); |
616 | | void updateDomTree(ArrayRef<MachineBasicBlock*> Removed); |
617 | | void updateLoops(ArrayRef<MachineBasicBlock*> Removed); |
618 | | void invalidateTraces(); |
619 | | bool shouldConvertIf(); |
620 | | }; |
621 | | } // end anonymous namespace |
622 | | |
623 | | char EarlyIfConverter::ID = 0; |
624 | | char &llvm::EarlyIfConverterID = EarlyIfConverter::ID; |
625 | | |
626 | 42.3k | INITIALIZE_PASS_BEGIN(EarlyIfConverter, DEBUG_TYPE, |
627 | 42.3k | "Early If Converter", false, false) |
628 | 42.3k | INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) |
629 | 42.3k | INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) |
630 | 42.3k | INITIALIZE_PASS_DEPENDENCY(MachineTraceMetrics) |
631 | 42.3k | INITIALIZE_PASS_END(EarlyIfConverter, DEBUG_TYPE, |
632 | | "Early If Converter", false, false) |
633 | | |
634 | 22.5k | void EarlyIfConverter::getAnalysisUsage(AnalysisUsage &AU) const { |
635 | 22.5k | AU.addRequired<MachineBranchProbabilityInfo>(); |
636 | 22.5k | AU.addRequired<MachineDominatorTree>(); |
637 | 22.5k | AU.addPreserved<MachineDominatorTree>(); |
638 | 22.5k | AU.addRequired<MachineLoopInfo>(); |
639 | 22.5k | AU.addPreserved<MachineLoopInfo>(); |
640 | 22.5k | AU.addRequired<MachineTraceMetrics>(); |
641 | 22.5k | AU.addPreserved<MachineTraceMetrics>(); |
642 | 22.5k | MachineFunctionPass::getAnalysisUsage(AU); |
643 | 22.5k | } |
644 | | |
645 | | /// Update the dominator tree after if-conversion erased some blocks. |
646 | 6.05k | void EarlyIfConverter::updateDomTree(ArrayRef<MachineBasicBlock*> Removed) { |
647 | 6.05k | // convertIf can remove TBB, FBB, and Tail can be merged into Head. |
648 | 6.05k | // TBB and FBB should not dominate any blocks. |
649 | 6.05k | // Tail children should be transferred to Head. |
650 | 6.05k | MachineDomTreeNode *HeadNode = DomTree->getNode(IfConv.Head); |
651 | 15.0k | for (unsigned i = 0, e = Removed.size(); i != e; ++i8.97k ) { |
652 | 8.97k | MachineDomTreeNode *Node = DomTree->getNode(Removed[i]); |
653 | 8.97k | assert(Node != HeadNode && "Cannot erase the head node"); |
654 | 9.98k | while (Node->getNumChildren()) { |
655 | 1.01k | assert(Node->getBlock() == IfConv.Tail && "Unexpected children"); |
656 | 1.01k | DomTree->changeImmediateDominator(Node->getChildren().back(), HeadNode); |
657 | 1.01k | } |
658 | 8.97k | DomTree->eraseNode(Removed[i]); |
659 | 8.97k | } |
660 | 6.05k | } |
661 | | |
662 | | /// Update LoopInfo after if-conversion. |
663 | 6.05k | void EarlyIfConverter::updateLoops(ArrayRef<MachineBasicBlock*> Removed) { |
664 | 6.05k | if (!Loops) |
665 | 0 | return; |
666 | 6.05k | // If-conversion doesn't change loop structure, and it doesn't mess with back |
667 | 6.05k | // edges, so updating LoopInfo is simply removing the dead blocks. |
668 | 15.0k | for (unsigned i = 0, e = Removed.size(); 6.05k i != e; ++i8.97k ) |
669 | 8.97k | Loops->removeBlock(Removed[i]); |
670 | 6.05k | } |
671 | | |
672 | | /// Invalidate MachineTraceMetrics before if-conversion. |
673 | 6.05k | void EarlyIfConverter::invalidateTraces() { |
674 | 6.05k | Traces->verifyAnalysis(); |
675 | 6.05k | Traces->invalidate(IfConv.Head); |
676 | 6.05k | Traces->invalidate(IfConv.Tail); |
677 | 6.05k | Traces->invalidate(IfConv.TBB); |
678 | 6.05k | Traces->invalidate(IfConv.FBB); |
679 | 6.05k | Traces->verifyAnalysis(); |
680 | 6.05k | } |
681 | | |
682 | | // Adjust cycles with downward saturation. |
683 | 35.7k | static unsigned adjCycles(unsigned Cyc, int Delta) { |
684 | 35.7k | if (Delta < 0 && Cyc + Delta > Cyc0 ) |
685 | 0 | return 0; |
686 | 35.7k | return Cyc + Delta; |
687 | 35.7k | } |
688 | | |
689 | | /// Apply cost model and heuristics to the if-conversion in IfConv. |
690 | | /// Return true if the conversion is a good idea. |
691 | | /// |
692 | 7.71k | bool EarlyIfConverter::shouldConvertIf() { |
693 | 7.71k | // Stress testing mode disables all cost considerations. |
694 | 7.71k | if (Stress) |
695 | 34 | return true; |
696 | 7.67k | |
697 | 7.67k | if (!MinInstr) |
698 | 4.11k | MinInstr = Traces->getEnsemble(MachineTraceMetrics::TS_MinInstrCount); |
699 | 7.67k | |
700 | 7.67k | MachineTraceMetrics::Trace TBBTrace = MinInstr->getTrace(IfConv.getTPred()); |
701 | 7.67k | MachineTraceMetrics::Trace FBBTrace = MinInstr->getTrace(IfConv.getFPred()); |
702 | 7.67k | LLVM_DEBUG(dbgs() << "TBB: " << TBBTrace << "FBB: " << FBBTrace); |
703 | 7.67k | unsigned MinCrit = std::min(TBBTrace.getCriticalPath(), |
704 | 7.67k | FBBTrace.getCriticalPath()); |
705 | 7.67k | |
706 | 7.67k | // Set a somewhat arbitrary limit on the critical path extension we accept. |
707 | 7.67k | unsigned CritLimit = SchedModel.MispredictPenalty/2; |
708 | 7.67k | |
709 | 7.67k | // If-conversion only makes sense when there is unexploited ILP. Compute the |
710 | 7.67k | // maximum-ILP resource length of the trace after if-conversion. Compare it |
711 | 7.67k | // to the shortest critical path. |
712 | 7.67k | SmallVector<const MachineBasicBlock*, 1> ExtraBlocks; |
713 | 7.67k | if (IfConv.TBB != IfConv.Tail) |
714 | 1.21k | ExtraBlocks.push_back(IfConv.TBB); |
715 | 7.67k | unsigned ResLength = FBBTrace.getResourceLength(ExtraBlocks); |
716 | 7.67k | LLVM_DEBUG(dbgs() << "Resource length " << ResLength |
717 | 7.67k | << ", minimal critical path " << MinCrit << '\n'); |
718 | 7.67k | if (ResLength > MinCrit + CritLimit) { |
719 | 948 | LLVM_DEBUG(dbgs() << "Not enough available ILP.\n"); |
720 | 948 | return false; |
721 | 948 | } |
722 | 6.73k | |
723 | 6.73k | // Assume that the depth of the first head terminator will also be the depth |
724 | 6.73k | // of the select instruction inserted, as determined by the flag dependency. |
725 | 6.73k | // TBB / FBB data dependencies may delay the select even more. |
726 | 6.73k | MachineTraceMetrics::Trace HeadTrace = MinInstr->getTrace(IfConv.Head); |
727 | 6.73k | unsigned BranchDepth = |
728 | 6.73k | HeadTrace.getInstrCycles(*IfConv.Head->getFirstTerminator()).Depth; |
729 | 6.73k | LLVM_DEBUG(dbgs() << "Branch depth: " << BranchDepth << '\n'); |
730 | 6.73k | |
731 | 6.73k | // Look at all the tail phis, and compute the critical path extension caused |
732 | 6.73k | // by inserting select instructions. |
733 | 6.73k | MachineTraceMetrics::Trace TailTrace = MinInstr->getTrace(IfConv.Tail); |
734 | 18.1k | for (unsigned i = 0, e = IfConv.PHIs.size(); i != e; ++i11.4k ) { |
735 | 12.1k | SSAIfConv::PHIInfo &PI = IfConv.PHIs[i]; |
736 | 12.1k | unsigned Slack = TailTrace.getInstrSlack(*PI.PHI); |
737 | 12.1k | unsigned MaxDepth = Slack + TailTrace.getInstrCycles(*PI.PHI).Depth; |
738 | 12.1k | LLVM_DEBUG(dbgs() << "Slack " << Slack << ":\t" << *PI.PHI); |
739 | 12.1k | |
740 | 12.1k | // The condition is pulled into the critical path. |
741 | 12.1k | unsigned CondDepth = adjCycles(BranchDepth, PI.CondCycles); |
742 | 12.1k | if (CondDepth > MaxDepth) { |
743 | 6.21k | unsigned Extra = CondDepth - MaxDepth; |
744 | 6.21k | LLVM_DEBUG(dbgs() << "Condition adds " << Extra << " cycles.\n"); |
745 | 6.21k | if (Extra > CritLimit) { |
746 | 374 | LLVM_DEBUG(dbgs() << "Exceeds limit of " << CritLimit << '\n'); |
747 | 374 | return false; |
748 | 374 | } |
749 | 11.8k | } |
750 | 11.8k | |
751 | 11.8k | // The TBB value is pulled into the critical path. |
752 | 11.8k | unsigned TDepth = adjCycles(TBBTrace.getPHIDepth(*PI.PHI), PI.TCycles); |
753 | 11.8k | if (TDepth > MaxDepth) { |
754 | 5.28k | unsigned Extra = TDepth - MaxDepth; |
755 | 5.28k | LLVM_DEBUG(dbgs() << "TBB data adds " << Extra << " cycles.\n"); |
756 | 5.28k | if (Extra > CritLimit) { |
757 | 70 | LLVM_DEBUG(dbgs() << "Exceeds limit of " << CritLimit << '\n'); |
758 | 70 | return false; |
759 | 70 | } |
760 | 11.7k | } |
761 | 11.7k | |
762 | 11.7k | // The FBB value is pulled into the critical path. |
763 | 11.7k | unsigned FDepth = adjCycles(FBBTrace.getPHIDepth(*PI.PHI), PI.FCycles); |
764 | 11.7k | if (FDepth > MaxDepth) { |
765 | 8.58k | unsigned Extra = FDepth - MaxDepth; |
766 | 8.58k | LLVM_DEBUG(dbgs() << "FBB data adds " << Extra << " cycles.\n"); |
767 | 8.58k | if (Extra > CritLimit) { |
768 | 270 | LLVM_DEBUG(dbgs() << "Exceeds limit of " << CritLimit << '\n'); |
769 | 270 | return false; |
770 | 270 | } |
771 | 8.58k | } |
772 | 11.7k | } |
773 | 6.73k | return true6.01k ; |
774 | 6.73k | } |
775 | | |
776 | | /// Attempt repeated if-conversion on MBB, return true if successful. |
777 | | /// |
778 | 2.03M | bool EarlyIfConverter::tryConvertIf(MachineBasicBlock *MBB) { |
779 | 2.03M | bool Changed = false; |
780 | 2.03M | while (IfConv.canConvertIf(MBB) && shouldConvertIf()7.71k ) { |
781 | 6.05k | // If-convert MBB and update analyses. |
782 | 6.05k | invalidateTraces(); |
783 | 6.05k | SmallVector<MachineBasicBlock*, 4> RemovedBlocks; |
784 | 6.05k | IfConv.convertIf(RemovedBlocks); |
785 | 6.05k | Changed = true; |
786 | 6.05k | updateDomTree(RemovedBlocks); |
787 | 6.05k | updateLoops(RemovedBlocks); |
788 | 6.05k | } |
789 | 2.03M | return Changed; |
790 | 2.03M | } |
791 | | |
792 | 411k | bool EarlyIfConverter::runOnMachineFunction(MachineFunction &MF) { |
793 | 411k | LLVM_DEBUG(dbgs() << "********** EARLY IF-CONVERSION **********\n" |
794 | 411k | << "********** Function: " << MF.getName() << '\n'); |
795 | 411k | if (skipFunction(MF.getFunction())) |
796 | 215 | return false; |
797 | 410k | |
798 | 410k | // Only run if conversion if the target wants it. |
799 | 410k | const TargetSubtargetInfo &STI = MF.getSubtarget(); |
800 | 410k | if (!STI.enableEarlyIfConversion()) |
801 | 135k | return false; |
802 | 275k | |
803 | 275k | TII = STI.getInstrInfo(); |
804 | 275k | TRI = STI.getRegisterInfo(); |
805 | 275k | SchedModel = STI.getSchedModel(); |
806 | 275k | MRI = &MF.getRegInfo(); |
807 | 275k | DomTree = &getAnalysis<MachineDominatorTree>(); |
808 | 275k | Loops = getAnalysisIfAvailable<MachineLoopInfo>(); |
809 | 275k | Traces = &getAnalysis<MachineTraceMetrics>(); |
810 | 275k | MinInstr = nullptr; |
811 | 275k | |
812 | 275k | bool Changed = false; |
813 | 275k | IfConv.runOnMachineFunction(MF); |
814 | 275k | |
815 | 275k | // Visit blocks in dominator tree post-order. The post-order enables nested |
816 | 275k | // if-conversion in a single pass. The tryConvertIf() function may erase |
817 | 275k | // blocks, but only blocks dominated by the head block. This makes it safe to |
818 | 275k | // update the dominator tree while the post-order iterator is still active. |
819 | 275k | for (auto DomNode : post_order(DomTree)) |
820 | 2.03M | if (tryConvertIf(DomNode->getBlock())) |
821 | 6.04k | Changed = true; |
822 | 275k | |
823 | 275k | return Changed; |
824 | 275k | } |